Skip to content

Commit 84e72c8

Browse files
committed
Added a high level cache.
high memory use is a problem in libopenapi, this change moves us a step closer to fixing that. Very reference heavy specs can create an ungodly number of schema proxy calls when the model is being walked. For example the docusign spec create 4m proxy calls. This is not required as it’s the same ref being called over and over and we already have it after it’s been done once. This change adds a high level cache to prevent the over production of schemas.
1 parent 96eaabe commit 84e72c8

8 files changed

Lines changed: 73246 additions & 25 deletions

File tree

datamodel/high/base/schema_proxy.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package base
55

66
import (
7+
"fmt"
78
"sync"
89

910
"github.com/pb33f/libopenapi/datamodel/high"
@@ -90,15 +91,45 @@ func (sp *SchemaProxy) Schema() *Schema {
9091
sp.lock.Lock()
9192
if sp.rendered == nil {
9293

94+
//check the high-level cache first.
95+
idx := sp.schema.Value.GetIndex()
96+
if idx != nil && sp.schema.Value != nil {
97+
if sp.schema.Value.IsReference() && sp.schema.Value.GetReferenceNode() != nil && sp.schema.GetValueNode() != nil {
98+
loc := fmt.Sprintf("%s:%d:%d", idx.GetSpecAbsolutePath(), sp.schema.GetValueNode().Line, sp.schema.GetValueNode().Column)
99+
if seen, ok := idx.GetHighCache().Load(loc); ok {
100+
sp.lock.Unlock()
101+
idx.HighCacheHit()
102+
return seen.(*Schema)
103+
} else {
104+
idx.HighCacheMiss()
105+
}
106+
}
107+
}
108+
93109
s := sp.schema.Value.Schema()
94110
if s == nil {
95111
sp.buildError = sp.schema.Value.GetBuildError()
96112
sp.lock.Unlock()
97113
return nil
98114
}
99115
sch := NewSchema(s)
100-
sch.ParentProxy = sp
101116

117+
if idx != nil {
118+
119+
// only store the schema in the cache if is a reference!
120+
if sp.IsReference() && sp.GetReferenceNode() != nil && sp.schema != nil && sp.schema.GetValueNode() != nil {
121+
//if sp.schema.GetValueNode() != nil {
122+
loc := fmt.Sprintf("%s:%d:%d", idx.GetSpecAbsolutePath(), sp.schema.GetValueNode().Line, sp.schema.GetValueNode().Column)
123+
124+
// caching is only performed on traditional $ref nodes with a reference and a value node, any 3.1 additional
125+
// will not be cached as libopenapi does not yet support them.
126+
if len(sp.GetReferenceNode().Content) == 2 {
127+
idx.GetHighCache().Store(loc, sch)
128+
}
129+
}
130+
}
131+
132+
sch.ParentProxy = sp
102133
sp.rendered = sch
103134
sp.lock.Unlock()
104135
return sch

datamodel/low/base/schema_proxy.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,8 @@ func (sp *SchemaProxy) AddNode(key int, node *yaml.Node) {
179179
sp.Nodes.Store(key, node)
180180
}
181181
}
182+
183+
// GetIndex will return the index.SpecIndex pointer that was passed to the SchemaProxy during build.
184+
func (sp *SchemaProxy) GetIndex() *index.SpecIndex {
185+
return sp.idx
186+
}

document_iteration_test.go

Lines changed: 77 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,57 +23,96 @@ type context struct {
2323
stack []loopFrame
2424
}
2525

26+
func BenchmarkMemory_Speakeasy(b *testing.B) {
27+
// Tell the benchmark to report memory allocations
28+
b.ReportAllocs()
29+
30+
// Run the benchmark the specified number of iterations
31+
for i := 0; i < b.N; i++ {
32+
runTest(nil, "test_specs/speakeasy-test.yaml")
33+
}
34+
}
35+
2636
func Test_Speakeasy_Document_Iteration(t *testing.T) {
27-
spec, err := os.ReadFile("test_specs/speakeasy-test.yaml")
28-
require.NoError(t, err)
37+
runTest(t, "test_specs/speakeasy-test.yaml")
38+
}
39+
40+
func runTest(t *testing.T, specLocation string) {
41+
spec, err := os.ReadFile(specLocation)
42+
if t != nil {
43+
require.NoError(t, err)
44+
}
2945

3046
doc, err := NewDocumentWithConfiguration(spec, &datamodel.DocumentConfiguration{
3147
BasePath: "./test_specs",
3248
IgnorePolymorphicCircularReferences: true,
3349
IgnoreArrayCircularReferences: true,
3450
AllowFileReferences: true,
3551
})
36-
require.NoError(t, err)
52+
if t != nil {
53+
require.NoError(t, err)
54+
}
3755

3856
m, errs := doc.BuildV3Model()
39-
require.Empty(t, errs)
57+
if t != nil {
58+
require.Empty(t, errs)
59+
}
4060

4161
for path, pathItem := range m.Model.Paths.PathItems.FromOldest() {
42-
t.Log(path)
62+
if t != nil {
63+
t.Log(path)
64+
}
4365

4466
iterateOperations(t, pathItem.GetOperations())
4567
}
4668

4769
for path, pathItem := range m.Model.Webhooks.FromOldest() {
48-
t.Log(path)
70+
if t != nil {
71+
t.Log(path)
72+
}
4973

5074
iterateOperations(t, pathItem.GetOperations())
5175
}
5276

5377
for name, schemaProxy := range m.Model.Components.Schemas.FromOldest() {
54-
t.Log(name)
78+
if t != nil {
79+
t.Log(name)
80+
}
5581

5682
handleSchema(t, schemaProxy, context{})
5783
}
84+
85+
require.Equal(t, uint64(10), m.Index.GetHighCacheMisses())
86+
require.Equal(t, uint64(11), m.Index.GetHighCacheHits())
87+
require.Equal(t, uint64(101), m.Index.GetRolodex().GetIndexes()[0].GetHighCacheMisses())
88+
require.Equal(t, uint64(206), m.Index.GetRolodex().GetIndexes()[0].GetHighCacheHits())
5889
}
5990

6091
func iterateOperations(t *testing.T, ops *orderedmap.Map[string, *v3.Operation]) {
6192
for method, op := range ops.FromOldest() {
62-
t.Log(method)
93+
if t != nil {
94+
t.Log(method)
95+
}
6396

6497
for i, param := range op.Parameters {
65-
t.Log("param", i, param.Name)
98+
if t != nil {
99+
t.Log("param", i, param.Name)
100+
}
66101

67102
if param.Schema != nil {
68103
handleSchema(t, param.Schema, context{})
69104
}
70105
}
71106

72107
if op.RequestBody != nil {
73-
t.Log("request body")
108+
if t != nil {
109+
t.Log("request body")
110+
}
74111

75112
for contentType, mediaType := range op.RequestBody.Content.FromOldest() {
76-
t.Log(contentType)
113+
if t != nil {
114+
t.Log(contentType)
115+
}
77116

78117
if mediaType.Schema != nil {
79118
handleSchema(t, mediaType.Schema, context{})
@@ -82,14 +121,20 @@ func iterateOperations(t *testing.T, ops *orderedmap.Map[string, *v3.Operation])
82121
}
83122

84123
if orderedmap.Len(op.Responses.Codes) > 0 {
85-
t.Log("responses")
124+
if t != nil {
125+
t.Log("responses")
126+
}
86127
}
87128

88129
for code, response := range op.Responses.Codes.FromOldest() {
89-
t.Log(code)
130+
if t != nil {
131+
t.Log(code)
132+
}
90133

91134
for contentType, mediaType := range response.Content.FromOldest() {
92-
t.Log(contentType)
135+
if t != nil {
136+
t.Log(contentType)
137+
}
93138

94139
if mediaType.Schema != nil {
95140
handleSchema(t, mediaType.Schema, context{})
@@ -98,14 +143,20 @@ func iterateOperations(t *testing.T, ops *orderedmap.Map[string, *v3.Operation])
98143
}
99144

100145
if orderedmap.Len(op.Responses.Codes) > 0 {
101-
t.Log("callbacks")
146+
if t != nil {
147+
t.Log("callbacks")
148+
}
102149
}
103150

104151
for callbackName, callback := range op.Callbacks.FromOldest() {
105-
t.Log(callbackName)
152+
if t != nil {
153+
t.Log(callbackName)
154+
}
106155

107156
for name, pathItem := range callback.Expression.FromOldest() {
108-
t.Log(name)
157+
if t != nil {
158+
t.Log(name)
159+
}
109160

110161
iterateOperations(t, pathItem.GetOperations())
111162
}
@@ -119,11 +170,15 @@ func handleSchema(t *testing.T, schProxy *base.SchemaProxy, ctx context) {
119170
}
120171

121172
sch, err := schProxy.BuildSchema()
122-
require.NoError(t, err)
173+
if t != nil {
174+
require.NoError(t, err)
175+
}
123176

124177
typ, subTypes := getResolvedType(sch)
125178

126-
t.Log("schema", typ, subTypes)
179+
if t != nil {
180+
t.Log("schema", typ, subTypes)
181+
}
127182

128183
if len(sch.Enum) > 0 {
129184
switch typ {
@@ -278,7 +333,9 @@ func checkCircularReference(t *testing.T, ctx *context, schProxy *base.SchemaPro
278333
isRestricted = true
279334
}
280335

281-
require.False(t, isRestricted, "circular reference: %s", append(ctx.visited, loopRef))
336+
if t != nil {
337+
require.False(t, isRestricted, "circular reference: %s", append(ctx.visited, loopRef))
338+
}
282339
return true
283340
}
284341

index/cache.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Copyright 2023-2024 Princess Beef Heavy Industries, LLC / Dave Shanley
2+
// https://pb33f.io
3+
4+
package index
5+
6+
import "sync"
7+
8+
func (index *SpecIndex) SetCache(sync *sync.Map) {
9+
index.cache = sync
10+
}
11+
12+
// HighCacheHit increments the counter of high cache hits by one, and returns the current value of hits.
13+
func (index *SpecIndex) HighCacheHit() uint64 {
14+
index.highModelCacheHits.Add(1)
15+
return index.highModelCacheHits.Load()
16+
}
17+
18+
// HighCacheMiss increments the counter of high cache misses by one, and returns the current value of misses.
19+
func (index *SpecIndex) HighCacheMiss() uint64 {
20+
index.highModelCacheMisses.Add(1)
21+
return index.highModelCacheMisses.Load()
22+
}
23+
24+
// GetHighCacheHits returns the number of hits on the high model cache.
25+
func (index *SpecIndex) GetHighCacheHits() uint64 {
26+
return index.highModelCacheHits.Load()
27+
}
28+
29+
// GetHighCacheMisses returns the number of misses on the high model cache.
30+
func (index *SpecIndex) GetHighCacheMisses() uint64 {
31+
return index.highModelCacheMisses.Load()
32+
}
33+
34+
// GetHighCache returns the high model cache for this index.
35+
func (index *SpecIndex) GetHighCache() *sync.Map {
36+
if index.highModelCache == nil {
37+
index.highModelCache = &sync.Map{}
38+
}
39+
return index.highModelCache
40+
}

0 commit comments

Comments
 (0)