@@ -8,6 +8,7 @@ package overlap
8
8
9
9
import (
10
10
"context"
11
+ "slices"
11
12
12
13
"github.com/cockroachdb/pebble/internal/base"
13
14
"github.com/cockroachdb/pebble/internal/keyspan"
@@ -23,7 +24,8 @@ type WithLSM [manifest.NumLevels]WithLevel
23
24
type WithLevel struct {
24
25
Result Kind
25
26
// SplitFile can be set only when result is OnlyBoundary. If it is set, this
26
- // file can be split to free up the range of interest.
27
+ // file can be split to free up the range of interest. SplitFile is not set
28
+ // for L0 (overlapping tables are allowed in L0).
27
29
SplitFile * manifest.FileMetadata
28
30
}
29
31
@@ -89,6 +91,7 @@ func (c *Checker) LSMOverlap(
89
91
}
90
92
if res .Result == OnlyBoundary {
91
93
result [0 ].Result = OnlyBoundary
94
+ // We don't set SplitFile for L0 (tables in L0 are allowed to overlap).
92
95
}
93
96
}
94
97
for level := 1 ; level < manifest .NumLevels ; level ++ {
@@ -135,11 +138,11 @@ func (c *Checker) LevelOverlap(
135
138
return WithLevel {Result : Data }, nil
136
139
}
137
140
// We have a single file to look at; its boundaries enclose our region.
138
- empty , err := c .EmptyRegion (ctx , region , file )
141
+ overlap , err := c .DataOverlapWithFile (ctx , region , file )
139
142
if err != nil {
140
143
return WithLevel {}, err
141
144
}
142
- if ! empty {
145
+ if overlap {
143
146
return WithLevel {Result : Data }, nil
144
147
}
145
148
return WithLevel {
@@ -148,116 +151,183 @@ func (c *Checker) LevelOverlap(
148
151
}, nil
149
152
}
150
153
151
- // EmptyRegion returns true if the given region doesn't overlap with any keys or
152
- // ranges in the given table.
153
- func (c * Checker ) EmptyRegion (
154
+ // DataOverlapWithFile returns true if the given region overlaps with any keys
155
+ // or spans in the given table.
156
+ func (c * Checker ) DataOverlapWithFile (
154
157
ctx context.Context , region base.UserKeyBounds , m * manifest.FileMetadata ,
155
158
) (bool , error ) {
156
- empty , err := c .emptyRegionPointsAndRangeDels (ctx , region , m )
157
- if err != nil || ! empty {
158
- return empty , err
159
+ if overlap , ok := m .OverlapCache .CheckDataOverlap (c .cmp , region ); ok {
160
+ return overlap , nil
159
161
}
160
- return c .emptyRegionRangeKeys (ctx , region , m )
161
- }
162
+ // We want to check overlap with file, but we also want to update the cache
163
+ // with useful information. We try to find two data regions r1 and r2 with a
164
+ // space-in between; r1 ends before region.Start and r2 ends at or after
165
+ // region.Start. See overlapcache.C.ReportEmptyRegion().
166
+ var r1 , r2 base.UserKeyBounds
162
167
163
- // emptyRegionPointsAndRangeDels returns true if the file doesn't contain any
164
- // point keys or range del spans that overlap with region.
165
- func (c * Checker ) emptyRegionPointsAndRangeDels (
166
- ctx context.Context , region base.UserKeyBounds , m * manifest.FileMetadata ,
167
- ) (bool , error ) {
168
- if ! m .HasPointKeys {
169
- return true , nil
168
+ if m .HasPointKeys {
169
+ lt , ge , err := c .pointKeysAroundKey (ctx , region .Start , m )
170
+ if err != nil {
171
+ return false , err
172
+ }
173
+ r1 = base .UserKeyBoundsInclusive (lt , lt )
174
+ r2 = base .UserKeyBoundsInclusive (ge , ge )
175
+
176
+ if err := c .extendRegionsWithSpans (ctx , & r1 , & r2 , region .Start , m , manifest .KeyTypePoint ); err != nil {
177
+ return false , err
178
+ }
170
179
}
171
- pointBounds := m .UserKeyBoundsByType (manifest .KeyTypePoint )
172
- if ! pointBounds .Overlaps (c .cmp , & region ) {
180
+ if m .HasRangeKeys {
181
+ if err := c .extendRegionsWithSpans (ctx , & r1 , & r2 , region .Start , m , manifest .KeyTypeRange ); err != nil {
182
+ return false , err
183
+ }
184
+ }
185
+ // If the regions now overlap or touch, it's all one big data region.
186
+ if r1 .Start != nil && r2 .Start != nil && c .cmp (r1 .End .Key , r2 .Start ) >= 0 {
187
+ m .OverlapCache .ReportDataRegion (c .cmp , base.UserKeyBounds {
188
+ Start : r1 .Start ,
189
+ End : r2 .End ,
190
+ })
173
191
return true , nil
174
192
}
193
+ m .OverlapCache .ReportEmptyRegion (c .cmp , r1 , r2 )
194
+ // There is overlap iff we overlap with r2.
195
+ overlap := r2 .Start != nil && region .End .IsUpperBoundFor (c .cmp , r2 .Start )
196
+ return overlap , nil
197
+ }
198
+
199
+ // pointKeysAroundKey returns two consecutive point keys: the greatest key that
200
+ // is < key and the smallest key that is >= key. If there is no such key, the
201
+ // corresponding return value is nil. Both lt and ge are nil if the file
202
+ // contains no point keys.
203
+ func (c * Checker ) pointKeysAroundKey (
204
+ ctx context.Context , key []byte , m * manifest.FileMetadata ,
205
+ ) (lt , ge []byte , _ error ) {
206
+ pointBounds := m .UserKeyBoundsByType (manifest .KeyTypePoint )
207
+
175
208
points , err := c .iteratorFactory .Points (ctx , m )
176
- if err != nil {
177
- return false , err
209
+ if points == nil || err != nil {
210
+ return nil , nil , err
178
211
}
179
- if points != nil {
180
- defer points .Close ()
181
- var kv * base.InternalKV
182
- if c .cmp (region .Start , pointBounds .Start ) <= 0 {
183
- kv = points .First ()
184
- } else {
185
- kv = points .SeekGE (region .Start , base .SeekGEFlagsNone )
212
+ defer points .Close ()
213
+ switch {
214
+ case c .cmp (key , pointBounds .Start ) <= 0 :
215
+ kv := points .First ()
216
+ if kv != nil {
217
+ ge = slices .Clone (kv .K .UserKey )
186
218
}
187
- if kv == nil && points .Error () != nil {
188
- return false , points .Error ()
219
+ case c .cmp (key , pointBounds .End .Key ) > 0 :
220
+ kv := points .Last ()
221
+ if kv != nil {
222
+ lt = slices .Clone (kv .K .UserKey )
189
223
}
190
- if kv != nil && region .End .IsUpperBoundForInternalKey (c .cmp , kv .K ) {
191
- // Found overlap.
192
- return false , nil
224
+ default :
225
+ kv := points .SeekLT (key , base .SeekLTFlagsNone )
226
+ if kv != nil {
227
+ lt = slices .Clone (kv .K .UserKey )
193
228
}
194
- }
195
- rangeDels , err := c .iteratorFactory .RangeDels (ctx , m )
196
- if err != nil {
197
- return false , err
198
- }
199
- if rangeDels != nil {
200
- defer rangeDels .Close ()
201
- empty , err := c .emptyFragmentRegion (region , pointBounds .Start , rangeDels )
202
- if err != nil || ! empty {
203
- return empty , err
229
+ if kv = points .Next (); kv != nil {
230
+ ge = slices .Clone (kv .K .UserKey )
204
231
}
205
232
}
206
- // Found no overlap.
207
- return true , nil
233
+ return lt , ge , points .Error ()
208
234
}
209
235
210
- // emptyRegionRangeKeys returns true if the file doesn't contain any range key
211
- // spans that overlap with region.
212
- func (c * Checker ) emptyRegionRangeKeys (
213
- ctx context.Context , region base.UserKeyBounds , m * manifest.FileMetadata ,
214
- ) (bool , error ) {
215
- if ! m .HasRangeKeys {
216
- return true , nil
217
- }
218
- rangeKeyBounds := m .UserKeyBoundsByType (manifest .KeyTypeRange )
219
- if ! rangeKeyBounds .Overlaps (c .cmp , & region ) {
220
- return true , nil
236
+ // extendRegionsWithSpans opens a fragment iterator for either range dels or
237
+ // range keys (depending n keyType), finds the last span that ends before key
238
+ // and the following span, and extends/replaces regions r1 and r2.
239
+ func (c * Checker ) extendRegionsWithSpans (
240
+ ctx context.Context ,
241
+ r1 , r2 * base.UserKeyBounds ,
242
+ key []byte ,
243
+ m * manifest.FileMetadata ,
244
+ keyType manifest.KeyType ,
245
+ ) error {
246
+ var iter keyspan.FragmentIterator
247
+ var err error
248
+ if keyType == manifest .KeyTypePoint {
249
+ iter , err = c .iteratorFactory .RangeDels (ctx , m )
250
+ } else {
251
+ iter , err = c .iteratorFactory .RangeKeys (ctx , m )
221
252
}
222
- rangeKeys , err := c .iteratorFactory .RangeKeys (ctx , m )
223
- if err != nil {
224
- return false , err
253
+ if iter == nil || err != nil {
254
+ return err
225
255
}
226
- if rangeKeys != nil {
227
- defer rangeKeys .Close ()
228
- empty , err := c .emptyFragmentRegion (region , rangeKeyBounds .Start , rangeKeys )
229
- if err != nil || ! empty {
230
- return empty , err
256
+ defer iter .Close ()
257
+
258
+ fragmentBounds := m .UserKeyBoundsByType (keyType )
259
+ switch {
260
+ case c .cmp (key , fragmentBounds .Start ) <= 0 :
261
+ span , err := iter .First ()
262
+ if err != nil {
263
+ return err
264
+ }
265
+ c .updateR2 (r2 , span )
266
+
267
+ case ! fragmentBounds .End .IsUpperBoundFor (c .cmp , key ):
268
+ span , err := iter .Last ()
269
+ if err != nil {
270
+ return err
271
+ }
272
+ c .updateR1 (r1 , span )
273
+
274
+ default :
275
+ span , err := iter .SeekGE (key )
276
+ if err != nil {
277
+ return err
231
278
}
279
+ c .updateR2 (r2 , span )
280
+ span , err = iter .Prev ()
281
+ if err != nil {
282
+ return err
283
+ }
284
+ c .updateR1 (r1 , span )
232
285
}
233
- // Found no overlap.
234
- return true , nil
286
+ return nil
235
287
}
236
288
237
- // emptyFragmentRegion returns true if the given iterator doesn't contain any
238
- // spans that overlap with region. The fragmentLowerBounds is a known lower
239
- // bound for all the spans.
240
- func ( c * Checker ) emptyFragmentRegion (
241
- region base. UserKeyBounds , fragmentLowerBound [] byte , fragments keyspan. FragmentIterator ,
242
- ) ( bool , error ) {
243
- var span * keyspan. Span
244
- var err error
245
- if c . cmp ( region . Start , fragmentLowerBound ) <= 0 {
246
- // This is an optimization: we know there are no spans before region.Start,
247
- // so we can use First .
248
- span , err = fragments . First ()
249
- } else {
250
- span , err = fragments . SeekGE ( region . Start )
251
- }
252
- if err != nil {
253
- return false , err
254
- }
255
- if span != nil && span . Empty () {
256
- return false , base . AssertionFailedf ( "fragment iterator produced empty span" )
289
+ // updateR1 updates r1, the region of data that ends before a key of interest.
290
+ func ( c * Checker ) updateR1 ( r1 * base. UserKeyBounds , s * keyspan. Span ) {
291
+ switch {
292
+ case s == nil :
293
+
294
+ case r1 . Start == nil || c . cmp ( r1 . End . Key , s . Start ) < 0 :
295
+ // Region completely to the right of r1.
296
+ * r1 = base . UserKeyBoundsEndExclusive ( slices . Clone ( s . Start ), slices . Clone ( s . End ))
297
+
298
+ case c . cmp ( s . End , r1 . Start ) < 0 :
299
+ // Region completely to the left of r1, nothing to do .
300
+
301
+ default :
302
+ // Regions are overlapping or touching.
303
+ if c . cmp ( s . Start , r1 . Start ) < 0 {
304
+ r1 . Start = slices . Clone ( s . Start )
305
+ }
306
+ if c . cmp ( r1 . End . Key , s . End ) < 0 {
307
+ r1 . End = base . UserKeyExclusive ( slices . Clone ( s . End ))
308
+ }
257
309
}
258
- if span != nil && region .End .IsUpperBoundFor (c .cmp , span .Start ) {
259
- // Found overlap.
260
- return false , nil
310
+ }
311
+
312
+ // updateR2 updates r2, the region of data that ends before a key of interest.
313
+ func (c * Checker ) updateR2 (r2 * base.UserKeyBounds , s * keyspan.Span ) {
314
+ switch {
315
+ case s == nil :
316
+
317
+ case r2 .Start == nil || c .cmp (s .End , r2 .Start ) < 0 :
318
+ // Region completely to the left of r2.
319
+ * r2 = base .UserKeyBoundsEndExclusive (slices .Clone (s .Start ), slices .Clone (s .End ))
320
+
321
+ case c .cmp (r2 .End .Key , s .Start ) < 0 :
322
+ // Region completely to the right of r2, nothing to do.
323
+
324
+ default :
325
+ // Regions are overlapping or touching.
326
+ if c .cmp (s .Start , r2 .Start ) < 0 {
327
+ r2 .Start = slices .Clone (s .Start )
328
+ }
329
+ if c .cmp (r2 .End .Key , s .End ) < 0 {
330
+ r2 .End = base .UserKeyExclusive (slices .Clone (s .End ))
331
+ }
261
332
}
262
- return true , nil
263
333
}
0 commit comments