Skip to content

Commit a168e34

Browse files
committed
kvstorage: assert each engine only access its spans
The commit adds engine assertions to assert that Log engine only accesses log engine spans, and state engine only accesses state engine spans.
1 parent 7bb01fd commit a168e34

File tree

8 files changed

+327
-280
lines changed

8 files changed

+327
-280
lines changed

pkg/kv/kvserver/kvstorage/BUILD.bazel

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ go_library(
2121
"//pkg/kv/kvserver/kvserverpb",
2222
"//pkg/kv/kvserver/logstore",
2323
"//pkg/kv/kvserver/rditer",
24+
"//pkg/kv/kvserver/spanset",
2425
"//pkg/raft/raftpb",
2526
"//pkg/roachpb",
2627
"//pkg/storage",
2728
"//pkg/storage/enginepb",
2829
"//pkg/storage/fs",
30+
"//pkg/util",
2931
"//pkg/util/buildutil",
3032
"//pkg/util/hlc",
3133
"//pkg/util/iterutil",
@@ -45,6 +47,7 @@ go_test(
4547
"init_test.go",
4648
"initial_test.go",
4749
"stateloader_test.go",
50+
"storage_test.go",
4851
],
4952
data = glob(["testdata/**"]),
5053
embed = [":kvstorage"],
@@ -56,6 +59,7 @@ go_test(
5659
"//pkg/kv/kvserver/kvserverpb",
5760
"//pkg/kv/kvserver/logstore",
5861
"//pkg/kv/kvserver/print",
62+
"//pkg/kv/kvserver/spanset",
5963
"//pkg/raft/raftpb",
6064
"//pkg/roachpb",
6165
"//pkg/settings/cluster",

pkg/kv/kvserver/kvstorage/destroy_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
1919
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/logstore"
2020
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/print"
21+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
2122
"github.com/cockroachdb/cockroach/pkg/raft/raftpb"
2223
"github.com/cockroachdb/cockroach/pkg/roachpb"
2324
"github.com/cockroachdb/cockroach/pkg/storage"
@@ -139,7 +140,7 @@ func (r *replicaInfo) createStateMachine(ctx context.Context, t *testing.T, w st
139140
// TODO(pav-kv): figure out whether LastReplicaGCTimestamp should be in the
140141
// log or state engine.
141142
require.NoError(t, storage.MVCCBlindPutProto(
142-
ctx, w,
143+
ctx, spanset.DisableWriterAssertions(w),
143144
keys.RangeLastReplicaGCTimestampKey(r.id.RangeID),
144145
hlc.Timestamp{}, /* timestamp */
145146
&hlc.Timestamp{WallTime: 12345678},

pkg/kv/kvserver/kvstorage/storage.go

Lines changed: 154 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,13 @@
66
package kvstorage
77

88
import (
9+
"github.com/cockroachdb/cockroach/pkg/keys"
10+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
11+
"github.com/cockroachdb/cockroach/pkg/roachpb"
912
"github.com/cockroachdb/cockroach/pkg/storage"
13+
"github.com/cockroachdb/cockroach/pkg/util"
1014
"github.com/cockroachdb/cockroach/pkg/util/buildutil"
15+
"github.com/cockroachdb/errors"
1116
)
1217

1318
// The following are type aliases that help annotating various storage
@@ -121,8 +126,18 @@ type Engines struct {
121126
// MakeEngines creates an Engines handle in which both state machine and log
122127
// engine reside in the same physical engine.
123128
func MakeEngines(eng storage.Engine) Engines {
124-
// TODO(#158281): in test builds, wrap the engines in a way that allows
125-
// verifying that all accesses touch the correct subset of keys.
129+
if util.RaceEnabled {
130+
// Wrap the engines with span set engines to catch incorrect engine
131+
// accesses.
132+
wrappedStateEngine := spanset.NewSpanSetEngine(eng, validateIsStateEngineSpan)
133+
wrappedLogEngine := spanset.NewSpanSetEngine(eng, validateIsRaftEngineSpan)
134+
return Engines{
135+
stateEngine: wrappedStateEngine,
136+
logEngine: wrappedLogEngine,
137+
todoEngine: eng,
138+
}
139+
}
140+
126141
return Engines{
127142
stateEngine: eng,
128143
todoEngine: eng,
@@ -137,6 +152,18 @@ func MakeSeparatedEnginesForTesting(state, log storage.Engine) Engines {
137152
if !buildutil.CrdbTestBuild {
138153
panic("separated engines are not supported")
139154
}
155+
if util.RaceEnabled {
156+
// Wrap the engines with span set engines to catch incorrect engine
157+
// accesses.
158+
wrappedStateEngine := spanset.NewSpanSetEngine(state, validateIsStateEngineSpan)
159+
wrappedLogEngine := spanset.NewSpanSetEngine(log, validateIsRaftEngineSpan)
160+
return Engines{
161+
stateEngine: wrappedStateEngine,
162+
todoEngine: nil,
163+
logEngine: wrappedLogEngine,
164+
separated: true,
165+
}
166+
}
140167
return Engines{
141168
stateEngine: state,
142169
todoEngine: nil,
@@ -178,3 +205,128 @@ func (e *Engines) TODOEngine() storage.Engine {
178205
func (e *Engines) Separated() bool {
179206
return e.separated
180207
}
208+
209+
// validateIsStateEngineSpan asserts that the provided span only overlaps with
210+
// keys in the State engine and returns an error if not.
211+
// Note that we could receive the span with a nil startKey, which has a special
212+
// meaning that the span represents: [endKey.Prev(), endKey).
213+
func validateIsStateEngineSpan(span spanset.TrickySpan) error {
214+
// If the provided span overlaps with local store span, it cannot be a
215+
// StateEngine span because Store-local keys belong to the LogEngine.
216+
if spanset.Overlaps(roachpb.Span{
217+
Key: keys.LocalStorePrefix,
218+
EndKey: keys.LocalStoreMax,
219+
}, span) {
220+
return errors.Errorf("overlaps with store local keys")
221+
}
222+
223+
// If the provided span is completely outside the rangeID local spans for any
224+
// rangeID, then there is no overlap with any rangeID local keys.
225+
fullRangeIDLocalSpans := roachpb.Span{
226+
Key: keys.LocalRangeIDPrefix.AsRawKey(),
227+
EndKey: keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(),
228+
}
229+
if !spanset.Overlaps(fullRangeIDLocalSpans, span) {
230+
return nil
231+
}
232+
233+
// At this point, we know that we overlap with fullRangeIDLocalSpans. If we
234+
// are not completely within fullRangeIDLocalSpans, return an error as we
235+
// make an assumption that spans should respect the local RangeID tree
236+
// structure, and that spans that partially overlaps with
237+
// fullRangeIDLocalSpans don't make logical sense.
238+
if !spanset.Contains(fullRangeIDLocalSpans, span) {
239+
return errors.Errorf("overlapping an unreplicated rangeID key")
240+
}
241+
242+
// If the span in inside fullRangeIDLocalSpans, we expect that both start and
243+
// end keys should be in the same rangeID.
244+
rangeIDKey := span.Key
245+
if rangeIDKey == nil {
246+
rangeIDKey = span.EndKey
247+
}
248+
rangeID, err := keys.DecodeRangeIDPrefix(rangeIDKey)
249+
if err != nil {
250+
return errors.NewAssertionErrorWithWrappedErrf(err,
251+
"could not decode range ID for span: %s", span)
252+
}
253+
254+
// If the span is inside RangeIDLocalSpans but outside RangeIDUnreplicated,
255+
// it cannot overlap local raft keys.
256+
rangeIDPrefixBuf := keys.MakeRangeIDPrefixBuf(rangeID)
257+
if !spanset.Overlaps(roachpb.Span{
258+
Key: rangeIDPrefixBuf.UnreplicatedPrefix(),
259+
EndKey: rangeIDPrefixBuf.UnreplicatedPrefix().PrefixEnd(),
260+
}, span) {
261+
return nil
262+
}
263+
264+
// RangeTombstoneKey and RaftReplicaIDKey belong to the StateEngine, and can
265+
// be accessed as point keys.
266+
if roachpb.Span(span).Equal(roachpb.Span{
267+
Key: rangeIDPrefixBuf.RangeTombstoneKey(),
268+
}) {
269+
return nil
270+
}
271+
272+
if roachpb.Span(span).Equal(roachpb.Span{
273+
Key: rangeIDPrefixBuf.RaftReplicaIDKey(),
274+
}) {
275+
return nil
276+
}
277+
278+
return errors.Errorf("overlapping an unreplicated rangeID span")
279+
}
280+
281+
// validateIsRaftEngineSpan asserts that the provided span only overlaps with
282+
// keys in the Raft engine and returns an error if not.
283+
// Note that we could receive the span with a nil startKey, which has a special
284+
// meaning that the span represents: [endKey.Prev(), endKey).
285+
func validateIsRaftEngineSpan(span spanset.TrickySpan) error {
286+
// The LogEngine owns only Store-local and RangeID-local raft keys. A span
287+
// inside Store-local is correct. If it's only partially inside, an error is
288+
// returned below, as part of checking RangeID-local spans.
289+
if spanset.Contains(roachpb.Span{
290+
Key: keys.LocalStorePrefix,
291+
EndKey: keys.LocalStoreMax,
292+
}, span) {
293+
return nil
294+
}
295+
296+
// At this point, the remaining possible LogEngine keys are inside
297+
// LocalRangeID spans. If the span is not completely inside it, it must
298+
// overlap with some StateEngine keys.
299+
if !spanset.Contains(roachpb.Span{
300+
Key: keys.LocalRangeIDPrefix.AsRawKey(),
301+
EndKey: keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(),
302+
}, span) {
303+
return errors.Errorf("overlaps with state engine keys")
304+
}
305+
306+
// If the span in inside LocalRangeID, we assume that both start and
307+
// end keys should be in the same rangeID.
308+
rangeIDKey := span.Key
309+
if rangeIDKey == nil {
310+
rangeIDKey = span.EndKey
311+
}
312+
rangeID, err := keys.DecodeRangeIDPrefix(rangeIDKey)
313+
if err != nil {
314+
return errors.NewAssertionErrorWithWrappedErrf(err,
315+
"could not decode range ID for span: %s", span)
316+
}
317+
rangeIDPrefixBuf := keys.MakeRangeIDPrefixBuf(rangeID)
318+
if !spanset.Contains(roachpb.Span{
319+
Key: rangeIDPrefixBuf.UnreplicatedPrefix(),
320+
EndKey: rangeIDPrefixBuf.UnreplicatedPrefix().PrefixEnd(),
321+
}, span) {
322+
return errors.Errorf("overlaps with state engine keys")
323+
}
324+
if spanset.Overlaps(roachpb.Span{Key: rangeIDPrefixBuf.RangeTombstoneKey()}, span) {
325+
return errors.Errorf("overlaps with state engine keys")
326+
}
327+
if spanset.Overlaps(roachpb.Span{Key: rangeIDPrefixBuf.RaftReplicaIDKey()}, span) {
328+
return errors.Errorf("overlaps with state engine keys")
329+
}
330+
331+
return nil
332+
}
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package kvstorage
7+
8+
import (
9+
"testing"
10+
11+
"github.com/cockroachdb/cockroach/pkg/keys"
12+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
13+
"github.com/cockroachdb/cockroach/pkg/roachpb"
14+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
15+
"github.com/cockroachdb/cockroach/pkg/util/log"
16+
"github.com/stretchr/testify/require"
17+
)
18+
19+
func TestValidateIsStateEngineSpan(t *testing.T) {
20+
defer leaktest.AfterTest(t)()
21+
defer log.Scope(t).Close(t)
22+
23+
s := func(start, end roachpb.Key) roachpb.Span {
24+
return roachpb.Span{Key: start, EndKey: end}
25+
}
26+
testCases := []struct {
27+
span roachpb.Span
28+
notOk bool
29+
}{
30+
// Full state engine spans.
31+
{span: s(roachpb.KeyMin, keys.LocalRangeIDPrefix.AsRawKey())},
32+
{span: s(keys.RangeForceFlushKey(1), keys.RangeLeaseKey(1))},
33+
{span: s(keys.LocalStoreMax, roachpb.KeyMax)},
34+
35+
// Full non-state engine spans.
36+
{span: s(roachpb.KeyMin, keys.MakeRangeIDUnreplicatedPrefix(1)), notOk: true}, // partial overlap
37+
{span: s(roachpb.KeyMin, roachpb.Key(keys.LocalStorePrefix).Next()), notOk: true},
38+
{span: s(roachpb.KeyMin, keys.RaftTruncatedStateKey(1)), notOk: true},
39+
{span: s(keys.LocalRangeIDPrefix.AsRawKey(), keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()),
40+
notOk: true},
41+
{span: s(keys.RaftTruncatedStateKey(1), keys.RaftTruncatedStateKey(2)), notOk: true},
42+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd(), roachpb.KeyMax), notOk: true}, // partial overlap
43+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1),
44+
keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd()), notOk: true},
45+
{span: s(keys.RangeTombstoneKey(1), keys.RaftTruncatedStateKey(1)), notOk: true},
46+
{span: s(keys.RaftReplicaIDKey(1), keys.RaftTruncatedStateKey(1)), notOk: true},
47+
{span: s(keys.RaftTruncatedStateKey(1), roachpb.KeyMax), notOk: true},
48+
{span: s(keys.LocalStorePrefix, keys.LocalStoreMax), notOk: true},
49+
{span: s(keys.StoreGossipKey(), keys.StoreIdentKey()), notOk: true},
50+
{span: s(keys.LocalStoreMax.Prevish(1), roachpb.KeyMax), notOk: true},
51+
52+
// Point state engine spans.
53+
{span: s(roachpb.KeyMin, nil)},
54+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().Prevish(1), nil)},
55+
{span: s(keys.RangeForceFlushKey(1), nil)},
56+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).Prevish(1), nil)},
57+
{span: s(keys.RangeTombstoneKey(1), nil)},
58+
{span: s(keys.RaftReplicaIDKey(1), nil)},
59+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd(), nil)},
60+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(), nil)},
61+
{span: s(roachpb.Key(keys.LocalStorePrefix).Prevish(1), nil)},
62+
{span: s(keys.LocalStoreMax, nil)},
63+
{span: s(keys.LocalStoreMax.Next(), nil)},
64+
{span: s(roachpb.KeyMax, nil)},
65+
66+
// Point non-state engine spans.
67+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1), nil), notOk: true},
68+
{span: s(keys.RaftTruncatedStateKey(1), nil), notOk: true},
69+
{span: s(keys.RaftTruncatedStateKey(2), nil), notOk: true},
70+
{span: s(keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd().Prevish(1), nil), notOk: true},
71+
{span: s(keys.LocalStorePrefix, nil), notOk: true},
72+
{span: s(keys.StoreGossipKey(), nil), notOk: true},
73+
{span: s(keys.LocalStoreMax.Prevish(1), nil), notOk: true},
74+
75+
// Tricky state engine spans.
76+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey())},
77+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1))},
78+
{span: s(nil, keys.RangeForceFlushKey(1))},
79+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd().Next())},
80+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd().Next())},
81+
{span: s(nil, keys.LocalStorePrefix)},
82+
{span: s(nil, keys.LocalStoreMax.Next())},
83+
84+
// Tricky non-state engine spans.
85+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1).Next()), notOk: true},
86+
{span: s(nil, keys.RaftTruncatedStateKey(1).Next()), notOk: true},
87+
{span: s(nil, keys.RaftTruncatedStateKey(2).Next()), notOk: true},
88+
{span: s(nil, keys.MakeRangeIDUnreplicatedPrefix(1).PrefixEnd()), notOk: true},
89+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()), notOk: true}, // can't decode RangeID.
90+
{span: s(nil, roachpb.Key(keys.LocalStorePrefix).Next()), notOk: true},
91+
{span: s(nil, keys.StoreGossipKey()), notOk: true},
92+
{span: s(nil, keys.LocalStoreMax), notOk: true},
93+
}
94+
95+
for _, tc := range testCases {
96+
t.Run("", func(t *testing.T) {
97+
err := validateIsStateEngineSpan(spanset.TrickySpan(tc.span))
98+
require.Equal(t, tc.notOk, err != nil, tc.span)
99+
})
100+
}
101+
}
102+
103+
func TestValidateIsRaftEngineSpan(t *testing.T) {
104+
defer leaktest.AfterTest(t)()
105+
defer log.Scope(t).Close(t)
106+
107+
s := func(start, end roachpb.Key) roachpb.Span {
108+
return roachpb.Span{Key: start, EndKey: end}
109+
}
110+
testCases := []struct {
111+
span roachpb.Span
112+
notOk bool
113+
}{
114+
// Full spans not overlapping with state engine spans.
115+
{span: s(keys.RangeTombstoneKey(1).Next(), keys.RaftReplicaIDKey(1))},
116+
{span: s(keys.RaftReplicaIDKey(1).Next(), keys.RangeLastReplicaGCTimestampKey(1).Next())},
117+
{span: s(keys.LocalStorePrefix, keys.LocalStoreMax)},
118+
119+
// Full spans overlapping with state engine spans.
120+
{span: s(keys.MinKey, keys.LocalStorePrefix), notOk: true},
121+
{span: s(keys.RangeGCThresholdKey(1), keys.RangeVersionKey(1)), notOk: true},
122+
{span: s(keys.RangeTombstoneKey(1), keys.RaftReplicaIDKey(1)), notOk: true},
123+
{span: s(keys.RaftReplicaIDKey(1), keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd()), notOk: true},
124+
{span: s(keys.RangeGCThresholdKey(1), keys.RangeGCThresholdKey(2)), notOk: true},
125+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(), keys.LocalStorePrefix), notOk: true},
126+
{span: s(keys.LocalStoreMax, keys.MaxKey), notOk: true},
127+
128+
// Point spans not overlapping with state engine spans.
129+
{span: s(keys.RaftLogKey(1, 1), nil)},
130+
{span: s(keys.RangeTombstoneKey(1).Next(), nil)},
131+
{span: s(keys.RaftReplicaIDKey(1).Next(), nil)},
132+
{span: s(keys.RaftTruncatedStateKey(1).Next(), nil)},
133+
{span: s(keys.LocalStorePrefix, nil)},
134+
{span: s(roachpb.Key(keys.LocalStorePrefix).Next(), nil)},
135+
{span: s(keys.LocalStoreMax.Prevish(1), nil)},
136+
137+
// Point spans overlapping with state engine spans.
138+
{span: s(keys.LocalRangeIDPrefix.AsRawKey(), nil), notOk: true}, // invalid start key
139+
{span: s(keys.RangeLeaseKey(1), nil), notOk: true},
140+
{span: s(keys.RangeTombstoneKey(1), nil), notOk: true},
141+
{span: s(keys.RaftReplicaIDKey(1), nil), notOk: true},
142+
{span: s(keys.RangeTombstoneKey(2), nil), notOk: true},
143+
{span: s(keys.RaftReplicaIDKey(2), nil), notOk: true},
144+
{span: s(keys.LocalRangeIDPrefix.AsRawKey().PrefixEnd(), nil), notOk: true},
145+
146+
// Tricky spans not overlapping with state engine spans.
147+
{span: s(nil, keys.RaftLogKey(1, 1))},
148+
{span: s(nil, keys.RangeTombstoneKey(1))},
149+
{span: s(nil, keys.RaftReplicaIDKey(1))},
150+
{span: s(nil, roachpb.Key(keys.LocalStorePrefix).PrefixEnd())},
151+
152+
// Tricky spans overlapping with state engine spans.
153+
{span: s(nil, keys.LocalRangeIDPrefix.AsRawKey()), notOk: true},
154+
{span: s(nil, keys.LocalStorePrefix), notOk: true},
155+
{span: s(nil, keys.RangeLeaseKey(1)), notOk: true},
156+
{span: s(nil, keys.RangeTombstoneKey(1).Next()), notOk: true},
157+
{span: s(nil, keys.RaftReplicaIDKey(1).Next()), notOk: true},
158+
{span: s(nil, roachpb.Key(keys.LocalStorePrefix).PrefixEnd().Next()), notOk: true},
159+
}
160+
161+
for _, tc := range testCases {
162+
t.Run("", func(t *testing.T) {
163+
err := validateIsRaftEngineSpan(spanset.TrickySpan(tc.span))
164+
require.Equal(t, tc.notOk, err != nil, tc.span)
165+
})
166+
}
167+
}

0 commit comments

Comments
 (0)