-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcache.go
More file actions
104 lines (96 loc) · 3.42 KB
/
cache.go
File metadata and controls
104 lines (96 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package avro
import (
"crypto/sha256"
"encoding/json"
"maps"
"strings"
"sync"
)
// SchemaCache accumulates named types across multiple [SchemaCache.Parse]
// calls, allowing schemas to reference types defined in previously parsed
// schemas. This is useful for Schema Registry integrations where schemas
// have references to other schemas.
//
// Schemas must be parsed in dependency order: referenced types must be
// parsed before the schemas that reference them.
//
// Parsing the same schema string multiple times is allowed and returns the
// previously parsed result. This handles diamond dependencies in schema
// reference graphs (e.g. A→B→D, A→C→D) without requiring callers to
// track which schemas have already been parsed. Deduplication normalizes
// the JSON (whitespace and key order) but not the Avro canonical form:
// schemas that differ only in formatting are deduplicated, but differences
// in non-canonical fields like doc or aliases are not and will return a
// duplicate type error.
//
// The returned [*Schema] from each Parse call is fully resolved and
// independent of the cache — it can be used for [Schema.Encode] and
// [Schema.Decode] without the cache.
//
// The zero value is ready to use. A SchemaCache is safe for concurrent use.
type SchemaCache struct {
mu sync.Mutex
named map[string]*namedType
dedup map[[32]byte]*Schema
customParsed map[[32]byte]bool // schemas previously parsed with custom types
}
// Parse parses a schema string, registering any named types (records, enums,
// fixed) in the cache. Named types from previous Parse calls are available
// for reference resolution. On failure, the cache is not modified.
func (c *SchemaCache) Parse(schema string, opts ...SchemaOpt) (*Schema, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.named == nil {
c.named = make(map[string]*namedType)
c.dedup = make(map[[32]byte]*Schema)
c.customParsed = make(map[[32]byte]bool)
}
dec := json.NewDecoder(strings.NewReader(schema))
dec.UseNumber()
var v any
if err := dec.Decode(&v); err == nil {
if normalized, err := json.Marshal(v); err == nil {
schema = string(normalized)
}
}
// Clone the cache's map so a failed parse doesn't corrupt the cache.
cloned := maps.Clone(c.named)
b := &builder{
named: cloned,
}
applySchemaOpts(b, opts)
hasCustomTypes := len(b.customTypes) > 0
// Skip dedup when custom types are registered: custom types produce
// different compiled schemas for the same schema string.
h := sha256.Sum256([]byte(schema))
if !hasCustomTypes {
if s, ok := c.dedup[h]; ok {
return s, nil
}
}
// Allow re-registration of inherited names when re-parsing a schema
// that was previously parsed with custom types (which skipped dedup),
// or when parsing with custom types now. This preserves the
// "duplicate named type" error for genuinely conflicting definitions.
needsCachedNames := hasCustomTypes || c.customParsed[h]
if needsCachedNames && len(cloned) > 0 {
b.cachedNames = make(map[string]bool, len(cloned))
for name := range cloned {
b.cachedNames[name] = true
}
}
s, err := parse(schema, b)
if err != nil {
return nil, err
}
// Named types are safe to cache unconditionally: applyCustomTypes
// wraps b.ser/b.deser without mutating the node's ser/deser, so
// cached named type nodes keep their unwrapped functions.
c.named = b.named
if hasCustomTypes {
c.customParsed[h] = true
} else {
c.dedup[h] = s
}
return s, nil
}