From 95cc9ae91cce6e22f2f287db4930e753e98baea5 Mon Sep 17 00:00:00 2001 From: Han Kang Date: Sun, 23 Jul 2023 13:25:42 -0700 Subject: [PATCH 1/4] add a generic interface for the etcd backend Change-Id: I2dc656d216732582693346a5d79186c73af7c5ec Signed-off-by: Han Kang --- server/interfaces/errors.go | 94 +++++++++++++++++++++++++++++++++ server/interfaces/interfaces.go | 75 ++++++++++++++++++++++++++ 2 files changed, 169 insertions(+) create mode 100644 server/interfaces/errors.go create mode 100644 server/interfaces/interfaces.go diff --git a/server/interfaces/errors.go b/server/interfaces/errors.go new file mode 100644 index 00000000000..6b9d4c64e89 --- /dev/null +++ b/server/interfaces/errors.go @@ -0,0 +1,94 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interfaces + +import "errors" + +// These errors can be returned when opening or calling methods on a DB. +var ( + // ErrDatabaseNotOpen is returned when a DB instance is accessed before it + // is opened or after it is closed. + ErrDatabaseNotOpen = errors.New("database not open") + + // ErrDatabaseOpen is returned when opening a database that is + // already open. + ErrDatabaseOpen = errors.New("database already open") + + // ErrInvalid is returned when both meta pages on a database are invalid. + // This typically occurs when a file is not a bolt database. + ErrInvalid = errors.New("invalid database") + + // ErrInvalidMapping is returned when the database file fails to get mapped. + ErrInvalidMapping = errors.New("database isn't correctly mapped") + + // ErrVersionMismatch is returned when the data file was created with a + // different version of Bolt. + ErrVersionMismatch = errors.New("version mismatch") + + // ErrChecksum is returned when either meta page checksum does not match. + ErrChecksum = errors.New("checksum error") + + // ErrTimeout is returned when a database cannot obtain an exclusive lock + // on the data file after the timeout passed to Open(). + ErrTimeout = errors.New("timeout") +) + +// These errors can occur when beginning or committing a Tx. +var ( + // ErrTxNotWritable is returned when performing a write operation on a + // read-only transaction. + ErrTxNotWritable = errors.New("tx not writable") + + // ErrTxClosed is returned when committing or rolling back a transaction + // that has already been committed or rolled back. + ErrTxClosed = errors.New("tx closed") + + // ErrDatabaseReadOnly is returned when a mutating transaction is started on a + // read-only database. + ErrDatabaseReadOnly = errors.New("database is in read-only mode") + + // ErrFreePagesNotLoaded is returned when a readonly transaction without + // preloading the free pages is trying to access the free pages. + ErrFreePagesNotLoaded = errors.New("free pages are not pre-loaded") +) + +// These errors can occur when putting or deleting a value or a bucket. +var ( + // ErrBucketNotFound is returned when trying to access a bucket that has + // not been created yet. + ErrBucketNotFound = errors.New("bucket not found") + + // ErrBucketExists is returned when creating a bucket that already exists. + ErrBucketExists = errors.New("bucket already exists") + + // ErrBucketNameRequired is returned when creating a bucket with a blank name. + ErrBucketNameRequired = errors.New("bucket name required") + + // ErrKeyRequired is returned when inserting a zero-length key. + ErrKeyRequired = errors.New("key required") + + // ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize. + ErrKeyTooLarge = errors.New("key too large") + + // ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize. + ErrValueTooLarge = errors.New("value too large") + + // ErrIncompatibleValue is returned when trying create or delete a bucket + // on an existing non-bucket key or when trying to create or delete a + // non-bucket key on an existing bucket key. + ErrIncompatibleValue = errors.New("incompatible value") +) diff --git a/server/interfaces/interfaces.go b/server/interfaces/interfaces.go new file mode 100644 index 00000000000..e77f85f9fb8 --- /dev/null +++ b/server/interfaces/interfaces.go @@ -0,0 +1,75 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package interfaces + +import ( + "io" + + "github.com/prometheus/client_golang/prometheus" + + "go.uber.org/zap" +) + +type DB interface { + Path() string + GoString() string + Buckets() []string + HasBucket(name string) bool + DeleteBucket(name []byte) error + CreateBucket(string) + GetFromBucket(bucket string, key string) []byte + String() string + Close() error + Begin(writable bool) (Tx, error) + Size() (size int64) + Sync() error + Stats() interface{} + Info() interface{} + SetFreelistType(string) + FreelistType() string + DBType() string + HashBuckets(ignores func(bucketName, keyName []byte) bool) (uint32, error) + Defrag(logger *zap.Logger, dbopts interface{}, defragLimit int) error +} + +type Tx interface { + DB() DB + Size() int64 + Writable() bool + Stats() interface{} + Observe(rebalanceHist, spillHist, writeHist prometheus.Histogram) + Bucket(name []byte) Bucket + CreateBucket(name []byte) (Bucket, error) + DeleteBucket(name []byte) error + ForEach(interface{}) error + Commit() error + Rollback() error + WriteTo(w io.Writer) (n int64, err error) + CopyDatabase(lg *zap.Logger, dst string) (err error) +} + +type Bucket interface { + Tx() Tx + Writable() bool + Get(key []byte) []byte + Put(key []byte, value []byte) error + UnsafeRange(key, endKey []byte, limit int64) (keys [][]byte, vs [][]byte) + Delete(key []byte) error + ForEach(func(k []byte, v []byte) error) error + Stats() interface{} + SetFillPercent(float64) +} From ef43598deeceec9fe0012dfe79b852caf8b7b486 Mon Sep 17 00:00:00 2001 From: Han Kang Date: Sun, 23 Jul 2023 13:31:53 -0700 Subject: [PATCH 2/4] first pass at extracting out bolt to use the generic interface Change-Id: Ied315a6089c9ee280d4e2ea33179831c29bc7c0b Signed-off-by: Han Kang --- etcdutl/etcdutl/backup_command.go | 4 +- etcdutl/etcdutl/migrate_command.go | 4 +- etcdutl/snapshot/v3_snapshot.go | 16 +- server/{storage/schema => bucket}/bucket.go | 48 +- server/config/config.go | 6 +- server/databases/bbolt/bbolt.go | 485 ++++++++++++++++++ server/databases/bbolt/config_default.go | 21 + server/databases/bbolt/config_linux.go | 32 ++ server/databases/bbolt/config_windows.go | 24 + server/embed/config.go | 22 + server/embed/etcd.go | 5 +- server/etcdserver/bootstrap_test.go | 5 +- server/etcdserver/cindex/cindex_test.go | 6 +- server/storage/backend.go | 2 + server/storage/backend/backend.go | 257 ++++------ server/storage/backend/backend_bench_test.go | 7 +- server/storage/backend/backend_test.go | 88 ++-- server/storage/backend/batch_tx.go | 93 +--- server/storage/backend/batch_tx_test.go | 68 +-- server/storage/backend/export_test.go | 6 +- server/storage/backend/hooks_test.go | 4 +- server/storage/backend/read_tx.go | 20 +- server/storage/backend/testing/betesting.go | 1 + server/storage/backend/tx_buffer.go | 18 +- server/storage/mvcc/hash.go | 7 +- server/storage/mvcc/kvstore.go | 10 +- server/storage/mvcc/kvstore_compaction.go | 6 +- .../storage/mvcc/kvstore_compaction_test.go | 7 +- server/storage/mvcc/kvstore_test.go | 66 +-- server/storage/mvcc/kvstore_txn.go | 9 +- server/storage/mvcc/store.go | 10 +- server/storage/mvcc/store_test.go | 15 +- server/storage/mvcc/watchable_store.go | 8 +- server/storage/schema/actions.go | 7 +- server/storage/schema/actions_test.go | 28 +- server/storage/schema/alarm.go | 9 +- server/storage/schema/auth.go | 17 +- server/storage/schema/auth_roles.go | 11 +- server/storage/schema/auth_roles_test.go | 8 +- server/storage/schema/auth_test.go | 4 +- server/storage/schema/auth_users.go | 9 +- server/storage/schema/auth_users_test.go | 4 +- server/storage/schema/changes.go | 6 +- server/storage/schema/changes_test.go | 7 +- server/storage/schema/cindex.go | 13 +- server/storage/schema/confstate.go | 7 +- server/storage/schema/lease.go | 11 +- server/storage/schema/lease_test.go | 2 +- server/storage/schema/membership.go | 48 +- server/storage/schema/schema.go | 4 +- server/storage/schema/schema_test.go | 14 +- server/storage/schema/version.go | 10 +- server/storage/schema/version_test.go | 8 +- server/verify/verify.go | 10 +- tests/e2e/utl_migrate_test.go | 4 +- .../integration/clientv3/maintenance_test.go | 4 +- tests/integration/v3_alarm_test.go | 4 +- tools/etcd-dump-db/backend.go | 14 +- 58 files changed, 1087 insertions(+), 556 deletions(-) rename server/{storage/schema => bucket}/bucket.go (62%) create mode 100644 server/databases/bbolt/bbolt.go create mode 100644 server/databases/bbolt/config_default.go create mode 100644 server/databases/bbolt/config_linux.go create mode 100644 server/databases/bbolt/config_windows.go diff --git a/etcdutl/etcdutl/backup_command.go b/etcdutl/etcdutl/backup_command.go index 89121a37e95..d4e421cdbc3 100644 --- a/etcdutl/etcdutl/backup_command.go +++ b/etcdutl/etcdutl/backup_command.go @@ -48,6 +48,7 @@ var ( backupDir string walDir string backupWalDir string + backendType string ) func NewBackupCommand() *cobra.Command { @@ -61,6 +62,7 @@ func NewBackupCommand() *cobra.Command { cmd.Flags().StringVar(&dataDir, "data-dir", "", "Path to the etcd data dir") cmd.Flags().StringVar(&walDir, "wal-dir", "", "Path to the etcd wal dir") cmd.Flags().StringVar(&backupDir, "backup-dir", "", "Path to the backup dir") + cmd.Flags().StringVar(&backendType, "backend-type", "bolt", "Default backend type") cmd.Flags().StringVar(&backupWalDir, "backup-wal-dir", "", "Path to the backup wal dir") cmd.Flags().BoolVar(&withV3, "with-v3", true, "Backup v3 backend data. Note -with-v3=false is not supported since etcd v3.6. Please use v3.5.x client as the last supporting this deprecated functionality.") cmd.MarkFlagRequired("data-dir") @@ -303,7 +305,7 @@ func saveDB(lg *zap.Logger, destDB, srcDB string, idx uint64, term uint64, desir } // trim membership info - be := backend.NewDefaultBackend(lg, destDB) + be := backend.NewDefaultBackend(lg, destDB, backendType) defer be.Close() ms := schema.NewMembershipBackend(lg, be) if err := ms.TrimClusterFromBackend(); err != nil { diff --git a/etcdutl/etcdutl/migrate_command.go b/etcdutl/etcdutl/migrate_command.go index 521cf8ba80c..6c45ec2095e 100644 --- a/etcdutl/etcdutl/migrate_command.go +++ b/etcdutl/etcdutl/migrate_command.go @@ -56,6 +56,7 @@ type migrateOptions struct { dataDir string targetVersion string force bool + backendType string } func newMigrateOptions() *migrateOptions { @@ -64,6 +65,7 @@ func newMigrateOptions() *migrateOptions { func (o *migrateOptions) AddFlags(cmd *cobra.Command) { cmd.Flags().StringVar(&o.dataDir, "data-dir", o.dataDir, "Path to the etcd data dir") + cmd.Flags().StringVar(&o.backendType, "backend-type", "bolt", "Type of backend used by etcd.") cmd.MarkFlagRequired("data-dir") cmd.MarkFlagDirname("data-dir") @@ -92,7 +94,7 @@ func (o *migrateOptions) Config() (*migrateConfig, error) { } dbPath := datadir.ToBackendFileName(o.dataDir) - c.be = backend.NewDefaultBackend(GetLogger(), dbPath) + c.be = backend.NewDefaultBackend(GetLogger(), dbPath, backendType) walPath := datadir.ToWalDir(o.dataDir) w, err := wal.OpenForRead(c.lg, walPath, walpb.Snapshot{}) diff --git a/etcdutl/snapshot/v3_snapshot.go b/etcdutl/snapshot/v3_snapshot.go index 00969963d64..b542a9252fa 100644 --- a/etcdutl/snapshot/v3_snapshot.go +++ b/etcdutl/snapshot/v3_snapshot.go @@ -26,6 +26,8 @@ import ( "reflect" "strings" + "go.etcd.io/etcd/server/v3/bucket" + "go.uber.org/zap" bolt "go.etcd.io/bbolt" @@ -50,6 +52,8 @@ import ( "go.etcd.io/raft/v3/raftpb" ) +const defaultBackendType = "bolt" + // Manager defines snapshot methods. type Manager interface { // Save fetches snapshot from remote etcd server, saves data @@ -318,7 +322,7 @@ func (s *v3Manager) saveDB() error { return err } - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), defaultBackendType) defer be.Close() err = schema.NewMembershipBackend(s.lg, be).TrimMembershipFromBackend() @@ -332,7 +336,7 @@ func (s *v3Manager) saveDB() error { // modifyLatestRevision can increase the latest revision by the given amount and sets the scheduled compaction // to that revision so that the server will consider this revision compacted. func (s *v3Manager) modifyLatestRevision(bumpAmount uint64) error { - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), "bolt") defer func() { be.ForceCommit() be.Close() @@ -365,7 +369,7 @@ func (s *v3Manager) unsafeBumpRevision(tx backend.UnsafeWriter, latest revision, latest.sub = 0 k := make([]byte, 17) revToBytes(k, latest) - tx.UnsafePut(schema.Key, k, []byte{}) + tx.UnsafePut(bucket.Key, k, []byte{}) return latest } @@ -381,7 +385,7 @@ func (s *v3Manager) unsafeMarkRevisionCompacted(tx backend.UnsafeWriter, latest func (s *v3Manager) unsafeGetLatestRevision(tx backend.UnsafeReader) (revision, error) { var latest revision - err := tx.UnsafeForEach(schema.Key, func(k, _ []byte) (err error) { + err := tx.UnsafeForEach(bucket.Key, func(k, _ []byte) (err error) { rev := bytesToRev(k) if rev.GreaterThan(latest) { @@ -475,7 +479,7 @@ func (s *v3Manager) saveWALAndSnap() (*raftpb.HardState, error) { // add members again to persist them to the store we create. st := v2store.New(etcdserver.StoreClusterPrefix, etcdserver.StoreKeysPrefix) s.cl.SetStore(st) - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), defaultBackendType) defer be.Close() s.cl.SetBackend(schema.NewMembershipBackend(s.lg, be)) for _, m := range s.cl.Members() { @@ -558,7 +562,7 @@ func (s *v3Manager) saveWALAndSnap() (*raftpb.HardState, error) { } func (s *v3Manager) updateCIndex(commit uint64, term uint64) error { - be := backend.NewDefaultBackend(s.lg, s.outDbPath()) + be := backend.NewDefaultBackend(s.lg, s.outDbPath(), defaultBackendType) defer be.Close() cindex.UpdateConsistentIndexForce(be.BatchTx(), commit, term) diff --git a/server/storage/schema/bucket.go b/server/bucket/bucket.go similarity index 62% rename from server/storage/schema/bucket.go rename to server/bucket/bucket.go index 5472af3c3b4..cf5de30e573 100644 --- a/server/storage/schema/bucket.go +++ b/server/bucket/bucket.go @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package schema +package bucket import ( "bytes" "go.etcd.io/etcd/client/pkg/v3/types" - "go.etcd.io/etcd/server/v3/storage/backend" ) var ( @@ -39,30 +38,49 @@ var ( testBucketName = []byte("test") ) +type BucketID int + +type Bucket interface { + // ID returns a unique identifier of a bucket. + // The id must NOT be persisted and can be used as lightweight identificator + // in the in-memory maps. + ID() BucketID + Name() []byte + // String implements Stringer (human readable name). + String() string + + // IsSafeRangeBucket is a hack to avoid inadvertently reading duplicate keys; + // overwrites on a bucket should only fetch with limit=1, but safeRangeBucket + // is known to never overwrite any key so range is safe. + IsSafeRangeBucket() bool +} + var ( - Key = backend.Bucket(bucket{id: 1, name: keyBucketName, safeRangeBucket: true}) - Meta = backend.Bucket(bucket{id: 2, name: metaBucketName, safeRangeBucket: false}) - Lease = backend.Bucket(bucket{id: 3, name: leaseBucketName, safeRangeBucket: false}) - Alarm = backend.Bucket(bucket{id: 4, name: alarmBucketName, safeRangeBucket: false}) - Cluster = backend.Bucket(bucket{id: 5, name: clusterBucketName, safeRangeBucket: false}) + Key = Bucket(bucket{id: 1, name: keyBucketName, safeRangeBucket: true}) + Meta = Bucket(bucket{id: 2, name: metaBucketName, safeRangeBucket: false}) + Lease = Bucket(bucket{id: 3, name: leaseBucketName, safeRangeBucket: false}) + Alarm = Bucket(bucket{id: 4, name: alarmBucketName, safeRangeBucket: false}) + Cluster = Bucket(bucket{id: 5, name: clusterBucketName, safeRangeBucket: false}) + + Members = Bucket(bucket{id: 10, name: membersBucketName, safeRangeBucket: false}) + MembersRemoved = Bucket(bucket{id: 11, name: membersRemovedBucketName, safeRangeBucket: false}) - Members = backend.Bucket(bucket{id: 10, name: membersBucketName, safeRangeBucket: false}) - MembersRemoved = backend.Bucket(bucket{id: 11, name: membersRemovedBucketName, safeRangeBucket: false}) + Auth = Bucket(bucket{id: 20, name: authBucketName, safeRangeBucket: false}) + AuthUsers = Bucket(bucket{id: 21, name: authUsersBucketName, safeRangeBucket: false}) + AuthRoles = Bucket(bucket{id: 22, name: authRolesBucketName, safeRangeBucket: false}) - Auth = backend.Bucket(bucket{id: 20, name: authBucketName, safeRangeBucket: false}) - AuthUsers = backend.Bucket(bucket{id: 21, name: authUsersBucketName, safeRangeBucket: false}) - AuthRoles = backend.Bucket(bucket{id: 22, name: authRolesBucketName, safeRangeBucket: false}) + Test = Bucket(bucket{id: 100, name: testBucketName, safeRangeBucket: false}) - Test = backend.Bucket(bucket{id: 100, name: testBucketName, safeRangeBucket: false}) + Buckets = []Bucket{Key, Meta, Lease, Alarm, Cluster, Members, MembersRemoved, Auth, AuthUsers, AuthRoles, Test} ) type bucket struct { - id backend.BucketID + id BucketID name []byte safeRangeBucket bool } -func (b bucket) ID() backend.BucketID { return b.id } +func (b bucket) ID() BucketID { return b.id } func (b bucket) Name() []byte { return b.name } func (b bucket) String() string { return string(b.Name()) } func (b bucket) IsSafeRangeBucket() bool { return b.safeRangeBucket } diff --git a/server/config/config.go b/server/config/config.go index 48de650b8a1..ee595127ec1 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -30,8 +30,6 @@ import ( "go.etcd.io/etcd/pkg/v3/netutil" "go.etcd.io/etcd/server/v3/etcdserver/api/v3discovery" "go.etcd.io/etcd/server/v3/storage/datadir" - - bolt "go.etcd.io/bbolt" ) // ServerConfig holds the configuration of etcd as taken from the command line or discovery. @@ -42,6 +40,8 @@ type ServerConfig struct { DiscoveryProxy string DiscoveryCfg v3discovery.DiscoveryConfig + BackendType string + ClientURLs types.URLs PeerURLs types.URLs DataDir string @@ -67,7 +67,7 @@ type ServerConfig struct { BackendBatchLimit int // BackendFreelistType is the type of the backend boltdb freelist. - BackendFreelistType bolt.FreelistType + BackendFreelistType string InitialPeerURLsMap types.URLsMap InitialClusterToken string diff --git a/server/databases/bbolt/bbolt.go b/server/databases/bbolt/bbolt.go new file mode 100644 index 00000000000..050e0491c91 --- /dev/null +++ b/server/databases/bbolt/bbolt.go @@ -0,0 +1,485 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package bbolt + +import ( + "bytes" + "fmt" + "hash/crc32" + "io" + "math" + "os" + "path/filepath" + + "github.com/prometheus/client_golang/prometheus" + + "go.uber.org/zap" + + bolt "go.etcd.io/bbolt" + + "go.etcd.io/etcd/server/v3/interfaces" +) + +const ( + defragLimit = 10000 +) + +type DbOpts struct { + MMapSize int + FreelistType string + NoSync bool + NoGrowSync bool + Mlock bool +} + +func SetOptions(opts DbOpts) *bolt.Options { + bopts := &bolt.Options{} + if boltOpenOptions != nil { + *bopts = *boltOpenOptions + } + bopts.InitialMmapSize = opts.MMapSize + if opts.FreelistType != "" { + bopts.FreelistType = bolt.FreelistType(opts.FreelistType) + } + bopts.NoSync = opts.NoSync + bopts.NoGrowSync = opts.NoGrowSync + bopts.Mlock = opts.Mlock + return bopts +} +func Open(path string, mode os.FileMode, options *bolt.Options) (interfaces.DB, error) { + if db, err := bolt.Open(path, mode, options); err != nil { + return nil, err + } else { + return &BBoltDB{db: db}, nil + } +} + +type BBoltDB struct { + db *bolt.DB +} + +func (b *BBoltDB) Path() string { + return b.db.Path() +} + +func (b *BBoltDB) DBType() string { + return "bolt" +} + +func (b *BBoltDB) GoString() string { + return b.db.GoString() +} + +func (b *BBoltDB) String() string { + return b.db.String() +} + +func (b *BBoltDB) Flatten() error { + panic("not implemented for bolt") +} + +func (b *BBoltDB) Close() error { + return b.db.Close() +} + +// Buckets no-opt +func (b *BBoltDB) Buckets() []string { + return nil +} + +// DeleteBucket no-opt +func (b *BBoltDB) DeleteBucket(name []byte) error { + return nil +} + +// HasBucket no-opt +func (b *BBoltDB) HasBucket(name string) bool { + return false +} + +// CreateBucket no-opt +func (b *BBoltDB) CreateBucket(name string) { + return +} + +func (b *BBoltDB) Begin(writable bool) (interfaces.Tx, error) { + btx, err := b.db.Begin(writable) + if err != nil { + return nil, err + } + return &BBoltTx{Btx: btx}, nil +} + +func (b *BBoltDB) GetFromBucket(bucket string, key string) (val []byte) { + b.db.View(func(tx *bolt.Tx) error { + b := tx.Bucket([]byte(bucket)) + if b == nil { + return nil + } + v := b.Get([]byte(key)) + val = v + return nil + }) + return val +} + +func (b *BBoltDB) HashBuckets(ignores func(bucketName, keyName []byte) bool) (uint32, error) { + h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) + err := b.db.View(func(tx *bolt.Tx) error { + // get root cursor + c := tx.Cursor() + for next, _ := c.First(); next != nil; next, _ = c.Next() { + b := tx.Bucket(next) + if b == nil { + return fmt.Errorf("cannot get hash of bucket %s", string(next)) + } + b.ForEach(func(k, v []byte) error { + if ignores != nil && !ignores(next, k) { + h.Write(k) + h.Write(v) + } + return nil + }) + } + return nil + }) + if err != nil { + return 0, err + } + return h.Sum32(), nil +} + +func (b *BBoltDB) Size() (lsm int64) { + panic("implement me") +} + +func (b *BBoltDB) Defrag(logger *zap.Logger, dbopts interface{}, defragLimit int) error { + // Create a temporary file to ensure we start with a clean slate. + // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. + dir := filepath.Dir(b.db.Path()) + temp, err := os.CreateTemp(dir, "db.tmp.*") + if err != nil { + return err + } + options := bolt.Options{} + if boltOpenOptions != nil { + options = *boltOpenOptions + } + options.OpenFile = func(_ string, _ int, _ os.FileMode) (file *os.File, err error) { + return temp, nil + } + // Don't load tmp db into memory regardless of opening options + options.Mlock = false + tdbp := temp.Name() + tmpdb, err := bolt.Open(tdbp, 0600, &options) + if err != nil { + return err + } + + // gofail: var defragBeforeCopy struct{} + derr := b.defrag(b.db, tmpdb, defragLimit) + if derr != nil { + tmpdb.Close() + if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { + logger.Error("failed to remove db.tmp after defragmentation completed", zap.Error(rmErr)) + } + return derr + } + dbp := b.db.Path() + err = b.db.Close() + if err != nil { + logger.Fatal("failed to close database", zap.Error(err)) + } + err = tmpdb.Close() + if err != nil { + logger.Fatal("failed to close tmp database", zap.Error(err)) + } + + // gofail: var defragBeforeRename struct{} + err = os.Rename(tdbp, dbp) + if err != nil { + logger.Fatal("failed to rename tmp database", zap.Error(err)) + } + b.db, err = bolt.Open(dbp, 0600, dbopts.(*bolt.Options)) + if err != nil { + logger.Fatal("failed to open database", zap.String("path", dbp), zap.Error(err)) + } + return err +} + +func (b *BBoltDB) defrag(odb, tmpdb *bolt.DB, limit int) error { + tmptx, err := tmpdb.Begin(true) + if err != nil { + return err + } + defer func() { + if err != nil { + tmptx.Rollback() + } + }() + + // open a tx on old db for read + tx, err := odb.Begin(false) + if err != nil { + return err + } + defer tx.Rollback() + + c := tx.Cursor() + + count := 0 + for next, _ := c.First(); next != nil; next, _ = c.Next() { + b := tx.Bucket(next) + if b == nil { + return fmt.Errorf("backend: cannot defrag bucket %s", string(next)) + } + + tmpb, berr := tmptx.CreateBucketIfNotExists(next) + if berr != nil { + return berr + } + tmpb.FillPercent = 0.9 // for bucket2seq write in for each + + if err = b.ForEach(func(k, v []byte) error { + count++ + if count > defragLimit { + err = tmptx.Commit() + if err != nil { + return err + } + tmptx, err = tmpdb.Begin(true) + if err != nil { + return err + } + tmpb = tmptx.Bucket(next) + tmpb.FillPercent = 0.9 // for bucket2seq write in for each + + count = 0 + } + return tmpb.Put(k, v) + }); err != nil { + return err + } + } + + return tmptx.Commit() +} + +func (b *BBoltDB) Sync() error { + return b.db.Sync() +} + +func (b *BBoltDB) Stats() interface{} { + return b.db.Stats() +} + +func (b *BBoltDB) Info() interface{} { + return b.db.Info() +} + +func (b *BBoltDB) FreelistType() string { + return string(b.db.FreelistType) +} + +func (b *BBoltDB) SetFreelistType(freeListType string) { + b.db.FreelistType = bolt.FreelistType(freeListType) +} + +type BBoltTx struct { + Btx *bolt.Tx +} + +func (b *BBoltTx) DB() interfaces.DB { + return &BBoltDB{db: b.Btx.DB()} +} + +func (b *BBoltTx) Size() int64 { + return b.Btx.Size() +} + +func (b *BBoltTx) Writable() bool { + return b.Btx.Writable() +} + +func (b *BBoltTx) Stats() interface{} { + return b.Btx.Stats() +} + +func (b *BBoltTx) Bucket(name []byte) interfaces.Bucket { + if buck := b.Btx.Bucket(name); buck != nil { + return &BBoltBucket{b.Btx.Bucket(name)} + } else { + return nil + } +} + +func (b *BBoltTx) CreateBucket(name []byte) (interfaces.Bucket, error) { + bbuck, err := b.Btx.CreateBucket(name) + if err != nil { + return nil, err + } + return &BBoltBucket{bbuck}, nil +} + +func (b *BBoltTx) Observe(rebalanceHist, spillHist, writeHist prometheus.Histogram) { + txstats := b.Btx.Stats() + rebalanceHist.Observe(txstats.GetRebalanceTime().Seconds()) + spillHist.Observe(txstats.GetSpillTime().Seconds()) + writeHist.Observe(txstats.GetWriteTime().Seconds()) +} + +func (b *BBoltTx) CreateBucketIfNotExists(name []byte) (interfaces.Bucket, error) { + if buck, err := b.Btx.CreateBucketIfNotExists(name); err != nil { + return nil, err + } else { + return &BBoltBucket{buck}, nil + } +} + +func (b *BBoltTx) DeleteBucket(name []byte) error { + return b.Btx.DeleteBucket(name) +} + +func (b *BBoltTx) ForEach(fn interface{}) error { + return b.Btx.ForEach(fn.(func(name []byte, b *bolt.Bucket) error)) +} + +func (b *BBoltTx) Commit() error { + return b.Btx.Commit() +} + +func (b *BBoltTx) Rollback() error { + return b.Btx.Rollback() +} + +func (b *BBoltTx) Copy(w io.Writer) error { + return b.Btx.Copy(w) +} + +func (b *BBoltTx) WriteTo(w io.Writer) (n int64, err error) { + return b.Btx.WriteTo(w) +} + +func (b *BBoltTx) CopyDatabase(lg *zap.Logger, dst string) (err error) { + dest, err := os.Create(dst) + if err != nil { + lg.Fatal("creation of destination file failed", zap.String("dest", dst), zap.Error(err)) + } + // write contents to new db + if _, err := b.Btx.WriteTo(dest); err != nil { + lg.Fatal("bbolt write to destination file failed", zap.String("dest", dst), zap.Error(err)) + } + // close dest db + return dest.Close() +} + +type BBoltBucket struct { + bucket *bolt.Bucket +} + +func (b *BBoltBucket) Tx() interfaces.Tx { + if btx := b.bucket.Tx(); btx != nil { + return &BBoltTx{Btx: btx} + } + return nil +} + +func (b *BBoltBucket) Writable() bool { + return b.bucket.Writable() +} + +func (b *BBoltBucket) UnsafeRange(key, endKey []byte, limit int64) (keys [][]byte, vs [][]byte) { + c := b.bucket.Cursor() + if limit <= 0 { + limit = math.MaxInt64 + } + var isMatch func(b []byte) bool + if len(endKey) > 0 { + isMatch = func(b []byte) bool { return bytes.Compare(b, endKey) < 0 } + } else { + isMatch = func(b []byte) bool { return bytes.Equal(b, key) } + limit = 1 + } + + for ck, cv := c.Seek(key); ck != nil && isMatch(ck); ck, cv = c.Next() { + vs = append(vs, cv) + keys = append(keys, ck) + if limit == int64(len(keys)) { + break + } + } + return keys, vs +} + +func (b *BBoltBucket) Bucket(name []byte) interfaces.Bucket { + if buck := b.bucket.Bucket(name); buck != nil { + return &BBoltBucket{bucket: buck} + } + return nil +} + +func (b *BBoltBucket) SetFillPercent(fp float64) { + b.bucket.FillPercent = fp +} +func (b *BBoltBucket) CreateBucket(key []byte) (interfaces.Bucket, error) { + if buck, err := b.bucket.CreateBucket(key); err != nil { + return nil, err + } else { + return &BBoltBucket{bucket: buck}, nil + } +} + +func (b *BBoltBucket) CreateBucketIfNotExists(key []byte) (interfaces.Bucket, error) { + if buck, err := b.bucket.CreateBucketIfNotExists(key); err != nil { + return nil, err + } else { + return &BBoltBucket{bucket: buck}, nil + } +} + +func (b *BBoltBucket) DeleteBucket(key []byte) error { + return b.bucket.DeleteBucket(key) +} + +func (b *BBoltBucket) Get(key []byte) []byte { + return b.bucket.Get(key) +} + +func (b *BBoltBucket) Put(key []byte, value []byte) error { + return b.bucket.Put(key, value) +} + +func (b *BBoltBucket) Delete(key []byte) error { + return b.bucket.Delete(key) +} + +func (b *BBoltBucket) ForEach(fn func(k []byte, v []byte) error) error { + wrapfn := func(k []byte, v []byte) error { + return fn(k, v) + } + return b.bucket.ForEach(wrapfn) +} + +func (b *BBoltBucket) ForEachBucket(fn interface{}) error { + return b.bucket.ForEachBucket(fn.(func(k []byte) error)) +} + +func (b *BBoltBucket) Stats() interface{} { + return b.bucket.Stats() +} + +//db, err := bolt.Open(bcfg.Path, 0600, bopts) diff --git a/server/databases/bbolt/config_default.go b/server/databases/bbolt/config_default.go new file mode 100644 index 00000000000..79372189fc2 --- /dev/null +++ b/server/databases/bbolt/config_default.go @@ -0,0 +1,21 @@ +// Copyright 2016 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !linux && !windows + +package bbolt + +import bolt "go.etcd.io/bbolt" + +var boltOpenOptions *bolt.Options diff --git a/server/databases/bbolt/config_linux.go b/server/databases/bbolt/config_linux.go new file mode 100644 index 00000000000..ebef20883c8 --- /dev/null +++ b/server/databases/bbolt/config_linux.go @@ -0,0 +1,32 @@ +// Copyright 2015 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bbolt + +import ( + "syscall" + + bolt "go.etcd.io/bbolt" +) + +// syscall.MAP_POPULATE on linux 2.6.23+ does sequential read-ahead +// which can speed up entire-database read with boltdb. We want to +// enable MAP_POPULATE for faster key-value store recovery in storage +// package. If your kernel version is lower than 2.6.23 +// (https://github.com/torvalds/linux/releases/tag/v2.6.23), mmap might +// silently ignore this flag. Please update your kernel to prevent this. +var boltOpenOptions = &bolt.Options{ + MmapFlags: syscall.MAP_POPULATE, + NoFreelistSync: true, +} diff --git a/server/databases/bbolt/config_windows.go b/server/databases/bbolt/config_windows.go new file mode 100644 index 00000000000..fa3058d48c6 --- /dev/null +++ b/server/databases/bbolt/config_windows.go @@ -0,0 +1,24 @@ +// Copyright 2017 The etcd Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build windows + +package bbolt + +import bolt "go.etcd.io/bbolt" + +var boltOpenOptions *bolt.Options = nil + +// setting mmap size != 0 on windows will allocate the entire +// mmap size for the file, instead of growing it. So, force 0. diff --git a/server/embed/config.go b/server/embed/config.go index 0b79ce4bc25..affdf6a4a7c 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -106,6 +106,9 @@ const ( maxElectionMs = 50000 // backend freelist map type freelistArrayType = "array" + + // DefaultBackendType defaults the backend to bolt + DefaultBackendType = "bolt" ) var ( @@ -122,6 +125,8 @@ var ( // indirection for testing getCluster = srv.GetCluster + + SupportedBackendTypes = map[string]struct{}{"bolt": {}, "sqlite": {}} ) var ( @@ -381,6 +386,9 @@ type Config struct { // Defaults to 0. ExperimentalDistributedTracingSamplingRatePerMillion int `json:"experimental-distributed-tracing-sampling-rate"` + // ExperimentalBackendType allows you to set the underlying database to sqlite. + ExperimentalBackendType string `json:"experimental-backend-type"` + // Logger is logger options: currently only supports "zap". // "capnslog" is removed in v3.5. Logger string `json:"logger"` @@ -451,6 +459,8 @@ type configJSON struct { ClientSecurityJSON securityConfig `json:"client-transport-security"` PeerSecurityJSON securityConfig `json:"peer-transport-security"` + + BackendType string `json:"backend-type"` } type securityConfig struct { @@ -463,6 +473,12 @@ type securityConfig struct { AutoTLS bool `json:"auto-tls"` } +func NewSqliteConfig() *Config { + c := NewConfig() + c.ExperimentalBackendType = "sqlite" + return c +} + // NewConfig creates a new Config populated with default values. func NewConfig() *Config { lpurl, _ := url.Parse(DefaultListenPeerURLs) @@ -482,6 +498,7 @@ func NewConfig() *Config { MaxRequestBytes: DefaultMaxRequestBytes, MaxConcurrentStreams: DefaultMaxConcurrentStreams, ExperimentalWarningApplyDuration: DefaultWarningApplyDuration, + ExperimentalBackendType: DefaultBackendType, GRPCKeepAliveMinTime: DefaultGRPCKeepAliveMinTime, GRPCKeepAliveInterval: DefaultGRPCKeepAliveInterval, @@ -584,6 +601,11 @@ func (cfg *configYAML) configFromFile(path string) error { cfg.Config.ListenPeerUrls = u } + // default backend type to bolt + if cfg.configJSON.BackendType == "" { + cfg.BackendType = DefaultBackendType + } + if cfg.configJSON.ListenClientUrls != "" { u, err := types.NewURLs(strings.Split(cfg.configJSON.ListenClientUrls, ",")) if err != nil { diff --git a/server/embed/etcd.go b/server/embed/etcd.go index 594e11ec385..7eeb6aa08e5 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -164,8 +164,6 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { return e, err } - backendFreelistType := parseBackendFreelistType(cfg.BackendFreelistType) - srvcfg := config.ServerConfig{ Name: cfg.Name, ClientURLs: cfg.AdvertiseClientUrls, @@ -191,7 +189,8 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { AutoCompactionMode: cfg.AutoCompactionMode, QuotaBackendBytes: cfg.QuotaBackendBytes, BackendBatchLimit: cfg.BackendBatchLimit, - BackendFreelistType: backendFreelistType, + BackendFreelistType: cfg.BackendFreelistType, + BackendType: cfg.ExperimentalBackendType, BackendBatchInterval: cfg.BackendBatchInterval, MaxTxnOps: cfg.MaxTxnOps, MaxRequestBytes: cfg.MaxRequestBytes, diff --git a/server/etcdserver/bootstrap_test.go b/server/etcdserver/bootstrap_test.go index 55a20684fe8..db1dc5c5505 100644 --- a/server/etcdserver/bootstrap_test.go +++ b/server/etcdserver/bootstrap_test.go @@ -46,6 +46,8 @@ import ( "go.etcd.io/raft/v3/raftpb" ) +const defaultBackendType = "bolt" + func TestBootstrapExistingClusterNoWALMaxLearner(t *testing.T) { tests := []struct { name string @@ -185,8 +187,9 @@ func TestBootstrapBackend(t *testing.T) { cfg := config.ServerConfig{ Name: "demoNode", DataDir: dataDir, - BackendFreelistType: bolt.FreelistArrayType, + BackendFreelistType: string(bolt.FreelistArrayType), Logger: zaptest.NewLogger(t), + BackendType: defaultBackendType, } if tt.prepareData != nil { diff --git a/server/etcdserver/cindex/cindex_test.go b/server/etcdserver/cindex/cindex_test.go index a056ac3d759..b740051ad98 100644 --- a/server/etcdserver/cindex/cindex_test.go +++ b/server/etcdserver/cindex/cindex_test.go @@ -28,6 +28,8 @@ import ( "go.etcd.io/etcd/server/v3/storage/schema" ) +const defaultBackendType = "bolt" + // TestConsistentIndex ensures that LoadConsistentIndex/Save/ConsistentIndex and backend.BatchTx can work well together. func TestConsistentIndex(t *testing.T) { @@ -56,7 +58,7 @@ func TestConsistentIndex(t *testing.T) { be.ForceCommit() be.Close() - b := backend.NewDefaultBackend(zaptest.NewLogger(t), tmpPath) + b := backend.NewDefaultBackend(zaptest.NewLogger(t), tmpPath, defaultBackendType) defer b.Close() ci.SetBackend(b) index = ci.ConsistentIndex() @@ -108,7 +110,7 @@ func TestConsistentIndexDecrease(t *testing.T) { be.ForceCommit() be.Close() - be = backend.NewDefaultBackend(zaptest.NewLogger(t), tmpPath) + be = backend.NewDefaultBackend(zaptest.NewLogger(t), tmpPath, defaultBackendType) defer be.Close() ci := NewConsistentIndex(be) ci.SetConsistentIndex(tc.index, tc.term) diff --git a/server/storage/backend.go b/server/storage/backend.go index 8edfbc12e50..c6a746b1755 100644 --- a/server/storage/backend.go +++ b/server/storage/backend.go @@ -16,6 +16,7 @@ package storage import ( "fmt" + "os" "time" @@ -52,6 +53,7 @@ func newBackend(cfg config.ServerConfig, hooks backend.Hooks) backend.Backend { } bcfg.Mlock = cfg.ExperimentalMemoryMlock bcfg.Hooks = hooks + bcfg.BackendType = cfg.BackendType return backend.New(bcfg) } diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index e7b951ee7e6..8aad12192de 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -15,15 +15,15 @@ package backend import ( - "fmt" - "hash/crc32" "io" - "os" - "path/filepath" "sync" "sync/atomic" "time" + "go.etcd.io/etcd/server/v3/bucket" + "go.etcd.io/etcd/server/v3/databases/bbolt" + "go.etcd.io/etcd/server/v3/interfaces" + humanize "github.com/dustin/go-humanize" "go.uber.org/zap" @@ -34,6 +34,8 @@ var ( defaultBatchLimit = 10000 defaultBatchInterval = 100 * time.Millisecond + defaultBackendType = "bolt" + defragLimit = 10000 // initialMmapSize is the initial size of the mmapped region. Setting this larger than @@ -71,6 +73,9 @@ type Backend interface { // SetTxPostLockInsideApplyHook sets a txPostLockInsideApplyHook. SetTxPostLockInsideApplyHook(func()) + + // BackendType returns the underlying database + BackendType() string } type Snapshot interface { @@ -103,9 +108,9 @@ type backend struct { // mlock prevents backend database file to be swapped mlock bool - mu sync.RWMutex - bopts *bolt.Options - db *bolt.DB + mu sync.RWMutex + + db interfaces.DB batchInterval time.Duration batchLimit int @@ -127,6 +132,11 @@ type backend struct { txPostLockInsideApplyHook func() lg *zap.Logger + + // backendType is the underlying databse + backendType string + // bopts is the bolt options, if the backend is bolt + bopts interface{} } type BackendConfig struct { @@ -137,7 +147,9 @@ type BackendConfig struct { // BatchLimit is the maximum puts before flushing the BatchTx. BatchLimit int // BackendFreelistType is the backend boltdb's freelist type. - BackendFreelistType bolt.FreelistType + BackendFreelistType string + // BackendType is the type of backend + BackendType string // MmapSize is the number of bytes to mmap for the backend. MmapSize uint64 // Logger logs backend-side operations. @@ -157,31 +169,43 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig { BatchLimit: defaultBatchLimit, MmapSize: initialMmapSize, Logger: lg, + BackendType: defaultBackendType, } } func New(bcfg BackendConfig) Backend { - return newBackend(bcfg) + if bcfg.BackendType == "" || bcfg.BackendType == "bolt" { + return newBoltBackend(bcfg) + } + // todo(logicalhan) replace with sqlite + return newBoltBackend(bcfg) } -func NewDefaultBackend(lg *zap.Logger, path string) Backend { +func NewDefaultBackend(lg *zap.Logger, path string, backendType string) Backend { bcfg := DefaultBackendConfig(lg) bcfg.Path = path - return newBackend(bcfg) + bcfg.BackendType = backendType + // todo(logicalhan) remove the "" conditional and find all the places + // where we set and pass this value + if bcfg.BackendType == defaultBackendType || bcfg.BackendType == "" { + return newBoltBackend(bcfg) + } else { + // todo(logicalhan) replace with sqlite + return newBoltBackend(bcfg) + } } -func newBackend(bcfg BackendConfig) *backend { - bopts := &bolt.Options{} - if boltOpenOptions != nil { - *bopts = *boltOpenOptions +func newBoltBackend(bcfg BackendConfig) *backend { + opts := bbolt.DbOpts{ + MMapSize: bcfg.mmapSize(), + FreelistType: bcfg.BackendFreelistType, + NoSync: bcfg.UnsafeNoFsync, + NoGrowSync: bcfg.UnsafeNoFsync, + Mlock: bcfg.Mlock, } - bopts.InitialMmapSize = bcfg.mmapSize() - bopts.FreelistType = bcfg.BackendFreelistType - bopts.NoSync = bcfg.UnsafeNoFsync - bopts.NoGrowSync = bcfg.UnsafeNoFsync - bopts.Mlock = bcfg.Mlock + bopts := bbolt.SetOptions(opts) - db, err := bolt.Open(bcfg.Path, 0600, bopts) + db, err := bbolt.Open(bcfg.Path, 0600, bopts) if err != nil { bcfg.Logger.Panic("failed to open database", zap.String("path", bcfg.Path), zap.Error(err)) } @@ -195,14 +219,15 @@ func newBackend(bcfg BackendConfig) *backend { batchInterval: bcfg.BatchInterval, batchLimit: bcfg.BatchLimit, mlock: bcfg.Mlock, + backendType: bcfg.BackendType, readTx: &readTx{ baseReadTx: baseReadTx{ buf: txReadBuffer{ - txBuffer: txBuffer{make(map[BucketID]*bucketBuffer)}, + txBuffer: txBuffer{make(map[bucket.BucketID]*bucketBuffer)}, bufVersion: 0, }, - buckets: make(map[BucketID]*bolt.Bucket), + buckets: make(map[bucket.BucketID]interfaces.Bucket), txWg: new(sync.WaitGroup), txMu: new(sync.RWMutex), }, @@ -372,38 +397,17 @@ func (b *backend) Snapshot() Snapshot { } func (b *backend) Hash(ignores func(bucketName, keyName []byte) bool) (uint32, error) { - h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) - b.mu.RLock() defer b.mu.RUnlock() - err := b.db.View(func(tx *bolt.Tx) error { - c := tx.Cursor() - for next, _ := c.First(); next != nil; next, _ = c.Next() { - b := tx.Bucket(next) - if b == nil { - return fmt.Errorf("cannot get hash of bucket %s", string(next)) - } - h.Write(next) - b.ForEach(func(k, v []byte) error { - if ignores != nil && !ignores(next, k) { - h.Write(k) - h.Write(v) - } - return nil - }) - } - return nil - }) - - if err != nil { - return 0, err - } - - return h.Sum32(), nil + return b.db.HashBuckets(ignores) } func (b *backend) Size() int64 { - return atomic.LoadInt64(&b.size) + if b.BackendType() == "sqlite" { + return b.db.Size() + } else { + return atomic.LoadInt64(&b.size) + } } func (b *backend) SizeInUse() int64 { @@ -441,12 +445,14 @@ func (b *backend) Commits() int64 { return atomic.LoadInt64(&b.commits) } -func (b *backend) Defrag() error { - return b.defrag() +// BackendType returns underlying database +func (b *backend) BackendType() string { + return b.backendType } -func (b *backend) defrag() error { +func (b *backend) Defrag() error { now := time.Now() + isDefragActive.Set(1) defer isDefragActive.Set(0) @@ -468,67 +474,27 @@ func (b *backend) defrag() error { b.batchTx.tx = nil - // Create a temporary file to ensure we start with a clean slate. - // Snapshotter.cleanupSnapdir cleans up any of these that are found during startup. - dir := filepath.Dir(b.db.Path()) - temp, err := os.CreateTemp(dir, "db.tmp.*") - if err != nil { - return err + if b.BackendType() == "sqlite" { + size := b.db.Size() + atomic.StoreInt64(&b.size, size) + atomic.StoreInt64(&b.sizeInUse, size) } - options := bolt.Options{} - if boltOpenOptions != nil { - options = *boltOpenOptions - } - options.OpenFile = func(_ string, _ int, _ os.FileMode) (file *os.File, err error) { - return temp, nil - } - // Don't load tmp db into memory regardless of opening options - options.Mlock = false - tdbp := temp.Name() - tmpdb, err := bolt.Open(tdbp, 0600, &options) - if err != nil { - return err - } - - dbp := b.db.Path() size1, sizeInUse1 := b.Size(), b.SizeInUse() + if b.lg != nil { b.lg.Info( "defragmenting", - zap.String("path", dbp), + zap.String("path", b.db.Path()), zap.Int64("current-db-size-bytes", size1), zap.String("current-db-size", humanize.Bytes(uint64(size1))), zap.Int64("current-db-size-in-use-bytes", sizeInUse1), zap.String("current-db-size-in-use", humanize.Bytes(uint64(sizeInUse1))), ) } - // gofail: var defragBeforeCopy struct{} - err = defragdb(b.db, tmpdb, defragLimit) - if err != nil { - tmpdb.Close() - if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil { - b.lg.Error("failed to remove db.tmp after defragmentation completed", zap.Error(rmErr)) - } - return err - } + err := b.db.Defrag(b.lg, b.bopts, defragLimit) - err = b.db.Close() if err != nil { - b.lg.Fatal("failed to close database", zap.Error(err)) - } - err = tmpdb.Close() - if err != nil { - b.lg.Fatal("failed to close tmp database", zap.Error(err)) - } - // gofail: var defragBeforeRename struct{} - err = os.Rename(tdbp, dbp) - if err != nil { - b.lg.Fatal("failed to rename tmp database", zap.Error(err)) - } - - b.db, err = bolt.Open(dbp, 0600, b.bopts) - if err != nil { - b.lg.Fatal("failed to open database", zap.String("path", dbp), zap.Error(err)) + return err } b.batchTx.tx = b.unsafeBegin(true) @@ -538,7 +504,15 @@ func (b *backend) defrag() error { size := b.readTx.tx.Size() db := b.readTx.tx.DB() atomic.StoreInt64(&b.size, size) - atomic.StoreInt64(&b.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize))) + if db.DBType() == "bolt" { + stats := db.Stats().(bolt.Stats) + info := db.Info().(*bolt.Info) + atomic.StoreInt64(&b.sizeInUse, size-(int64(stats.FreePageN)*int64(info.PageSize))) + } else { + lsmSize := b.db.Size() + atomic.StoreInt64(&b.size, lsmSize) + atomic.StoreInt64(&b.sizeInUse, lsmSize) + } took := time.Since(now) defragSec.Observe(took.Seconds()) @@ -547,7 +521,7 @@ func (b *backend) defrag() error { if b.lg != nil { b.lg.Info( "finished defragmenting directory", - zap.String("path", dbp), + zap.String("path", db.Path()), zap.Int64("current-db-size-bytes-diff", size2-size1), zap.Int64("current-db-size-bytes", size2), zap.String("current-db-size", humanize.Bytes(uint64(size2))), @@ -557,84 +531,27 @@ func (b *backend) defrag() error { zap.Duration("took", took), ) } - return nil + return err } -func defragdb(odb, tmpdb *bolt.DB, limit int) error { - // open a tx on tmpdb for writes - tmptx, err := tmpdb.Begin(true) - if err != nil { - return err - } - defer func() { - if err != nil { - tmptx.Rollback() - } - }() - - // open a tx on old db for read - tx, err := odb.Begin(false) - if err != nil { - return err - } - defer tx.Rollback() - - c := tx.Cursor() - - count := 0 - for next, _ := c.First(); next != nil; next, _ = c.Next() { - b := tx.Bucket(next) - if b == nil { - return fmt.Errorf("backend: cannot defrag bucket %s", string(next)) - } - - tmpb, berr := tmptx.CreateBucketIfNotExists(next) - if berr != nil { - return berr - } - tmpb.FillPercent = 0.9 // for bucket2seq write in for each - - if err = b.ForEach(func(k, v []byte) error { - count++ - if count > limit { - err = tmptx.Commit() - if err != nil { - return err - } - tmptx, err = tmpdb.Begin(true) - if err != nil { - return err - } - tmpb = tmptx.Bucket(next) - tmpb.FillPercent = 0.9 // for bucket2seq write in for each - - count = 0 - } - return tmpb.Put(k, v) - }); err != nil { - return err - } - } - - return tmptx.Commit() -} - -func (b *backend) begin(write bool) *bolt.Tx { +func (b *backend) begin(write bool) interfaces.Tx { b.mu.RLock() tx := b.unsafeBegin(write) b.mu.RUnlock() size := tx.Size() db := tx.DB() - stats := db.Stats() atomic.StoreInt64(&b.size, size) - atomic.StoreInt64(&b.sizeInUse, size-(int64(stats.FreePageN)*int64(db.Info().PageSize))) - atomic.StoreInt64(&b.openReadTxN, int64(stats.OpenTxN)) - + if db.DBType() == "bolt" { + stats := db.Stats().(bolt.Stats) + info := db.Info().(*bolt.Info) + atomic.StoreInt64(&b.sizeInUse, size-(int64(stats.FreePageN)*int64(info.PageSize))) + atomic.StoreInt64(&b.openReadTxN, int64(stats.OpenTxN)) + } return tx } -func (b *backend) unsafeBegin(write bool) *bolt.Tx { +func (b *backend) unsafeBegin(write bool) interfaces.Tx { // gofail: var beforeStartDBTxn struct{} tx, err := b.db.Begin(write) // gofail: var afterStartDBTxn struct{} @@ -649,7 +566,7 @@ func (b *backend) OpenReadTxN() int64 { } type snapshot struct { - *bolt.Tx + interfaces.Tx stopc chan struct{} donec chan struct{} } diff --git a/server/storage/backend/backend_bench_test.go b/server/storage/backend/backend_bench_test.go index 204d7de6157..c0a3acd5a36 100644 --- a/server/storage/backend/backend_bench_test.go +++ b/server/storage/backend/backend_bench_test.go @@ -20,8 +20,9 @@ import ( "time" "github.com/stretchr/testify/assert" + + buck "go.etcd.io/etcd/server/v3/bucket" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" - "go.etcd.io/etcd/server/v3/storage/schema" ) func BenchmarkBackendPut(b *testing.B) { @@ -42,13 +43,13 @@ func BenchmarkBackendPut(b *testing.B) { batchTx := backend.BatchTx() batchTx.Lock() - batchTx.UnsafeCreateBucket(schema.Test) + batchTx.UnsafeCreateBucket(buck.Test) batchTx.Unlock() b.ResetTimer() for i := 0; i < b.N; i++ { batchTx.Lock() - batchTx.UnsafePut(schema.Test, keys[i], value) + batchTx.UnsafePut(buck.Test, keys[i], value) batchTx.Unlock() } } diff --git a/server/storage/backend/backend_test.go b/server/storage/backend/backend_test.go index 0b0b6b8ec9b..c7c8ffce9bf 100644 --- a/server/storage/backend/backend_test.go +++ b/server/storage/backend/backend_test.go @@ -15,6 +15,7 @@ package backend_test import ( + "bytes" "fmt" "os" "reflect" @@ -23,9 +24,9 @@ import ( "github.com/stretchr/testify/assert" bolt "go.etcd.io/bbolt" + buck "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" - "go.etcd.io/etcd/server/v3/storage/schema" "go.uber.org/zap/zaptest" ) @@ -54,8 +55,8 @@ func TestBackendSnapshot(t *testing.T) { tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Test) - tx.UnsafePut(schema.Test, []byte("foo"), []byte("bar")) + tx.UnsafeCreateBucket(buck.Test) + tx.UnsafePut(buck.Test, []byte("foo"), []byte("bar")) tx.Unlock() b.ForceCommit() @@ -79,7 +80,7 @@ func TestBackendSnapshot(t *testing.T) { newTx := nb.BatchTx() newTx.Lock() - ks, _ := newTx.UnsafeRange(schema.Test, []byte("foo"), []byte("goo"), 0) + ks, _ := newTx.UnsafeRange(buck.Test, []byte("foo"), []byte("goo"), 0) if len(ks) != 1 { t.Errorf("len(kvs) = %d, want 1", len(ks)) } @@ -96,8 +97,8 @@ func TestBackendBatchIntervalCommit(t *testing.T) { tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Test) - tx.UnsafePut(schema.Test, []byte("foo"), []byte("bar")) + tx.UnsafeCreateBucket(buck.Test) + tx.UnsafePut(buck.Test, []byte("foo"), []byte("bar")) tx.Unlock() for i := 0; i < 10; i++ { @@ -107,29 +108,22 @@ func TestBackendBatchIntervalCommit(t *testing.T) { time.Sleep(time.Duration(i*100) * time.Millisecond) } - // check whether put happens via db view - assert.NoError(t, backend.DbFromBackendForTest(b).View(func(tx *bolt.Tx) error { - bucket := tx.Bucket([]byte("test")) - if bucket == nil { - t.Errorf("bucket test does not exit") - return nil - } - v := bucket.Get([]byte("foo")) - if v == nil { - t.Errorf("foo key failed to written in backend") - } - return nil - })) + val := backend.DbFromBackendForTest(b).GetFromBucket(string(buck.Test.Name()), "foo") + if val == nil { + t.Errorf("couldn't find foo in bucket test in backend") + } else if !bytes.Equal([]byte("bar"), val) { + t.Errorf("got '%s', want 'bar'", val) + } } func TestBackendDefrag(t *testing.T) { bcfg := backend.DefaultBackendConfig(zaptest.NewLogger(t)) // Make sure we change BackendFreelistType // The goal is to verify that we restore config option after defrag. - if bcfg.BackendFreelistType == bolt.FreelistMapType { - bcfg.BackendFreelistType = bolt.FreelistArrayType + if bcfg.BackendFreelistType == string(bolt.FreelistMapType) { + bcfg.BackendFreelistType = string(bolt.FreelistArrayType) } else { - bcfg.BackendFreelistType = bolt.FreelistMapType + bcfg.BackendFreelistType = string(bolt.FreelistMapType) } b, _ := betesting.NewTmpBackendFromCfg(t, bcfg) @@ -138,9 +132,9 @@ func TestBackendDefrag(t *testing.T) { tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Test) + tx.UnsafeCreateBucket(buck.Test) for i := 0; i < backend.DefragLimitForTest()+100; i++ { - tx.UnsafePut(schema.Test, []byte(fmt.Sprintf("foo_%d", i)), []byte("bar")) + tx.UnsafePut(buck.Test, []byte(fmt.Sprintf("foo_%d", i)), []byte("bar")) } tx.Unlock() b.ForceCommit() @@ -149,7 +143,7 @@ func TestBackendDefrag(t *testing.T) { tx = b.BatchTx() tx.Lock() for i := 0; i < 50; i++ { - tx.UnsafeDelete(schema.Test, []byte(fmt.Sprintf("foo_%d", i))) + tx.UnsafeDelete(buck.Test, []byte(fmt.Sprintf("foo_%d", i))) } tx.Unlock() b.ForceCommit() @@ -180,15 +174,15 @@ func TestBackendDefrag(t *testing.T) { t.Errorf("new size = %v, want < %d", nsize, size) } db := backend.DbFromBackendForTest(b) - if db.FreelistType != bcfg.BackendFreelistType { - t.Errorf("db FreelistType = [%v], want [%v]", db.FreelistType, bcfg.BackendFreelistType) + if db.FreelistType() != bcfg.BackendFreelistType { + t.Errorf("db FreelistType = [%v], want [%v]", db.FreelistType(), bcfg.BackendFreelistType) } // try put more keys after shrink. tx = b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Test) - tx.UnsafePut(schema.Test, []byte("more"), []byte("bar")) + tx.UnsafeCreateBucket(buck.Test) + tx.UnsafePut(buck.Test, []byte("more"), []byte("bar")) tx.Unlock() b.ForceCommit() } @@ -200,15 +194,15 @@ func TestBackendWriteback(t *testing.T) { tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Key) - tx.UnsafePut(schema.Key, []byte("abc"), []byte("bar")) - tx.UnsafePut(schema.Key, []byte("def"), []byte("baz")) - tx.UnsafePut(schema.Key, []byte("overwrite"), []byte("1")) + tx.UnsafeCreateBucket(buck.Key) + tx.UnsafePut(buck.Key, []byte("abc"), []byte("bar")) + tx.UnsafePut(buck.Key, []byte("def"), []byte("baz")) + tx.UnsafePut(buck.Key, []byte("overwrite"), []byte("1")) tx.Unlock() // overwrites should be propagated too tx.Lock() - tx.UnsafePut(schema.Key, []byte("overwrite"), []byte("2")) + tx.UnsafePut(buck.Key, []byte("overwrite"), []byte("2")) tx.Unlock() keys := []struct { @@ -261,7 +255,7 @@ func TestBackendWriteback(t *testing.T) { func() { rtx.RLock() defer rtx.RUnlock() - k, v := rtx.UnsafeRange(schema.Key, tt.key, tt.end, tt.limit) + k, v := rtx.UnsafeRange(buck.Key, tt.key, tt.end, tt.limit) if !reflect.DeepEqual(tt.wkey, k) || !reflect.DeepEqual(tt.wval, v) { t.Errorf("#%d: want k=%+v, v=%+v; got k=%+v, v=%+v", i, tt.wkey, tt.wval, k, v) } @@ -276,20 +270,20 @@ func TestConcurrentReadTx(t *testing.T) { wtx1 := b.BatchTx() wtx1.Lock() - wtx1.UnsafeCreateBucket(schema.Key) - wtx1.UnsafePut(schema.Key, []byte("abc"), []byte("ABC")) - wtx1.UnsafePut(schema.Key, []byte("overwrite"), []byte("1")) + wtx1.UnsafeCreateBucket(buck.Key) + wtx1.UnsafePut(buck.Key, []byte("abc"), []byte("ABC")) + wtx1.UnsafePut(buck.Key, []byte("overwrite"), []byte("1")) wtx1.Unlock() wtx2 := b.BatchTx() wtx2.Lock() - wtx2.UnsafePut(schema.Key, []byte("def"), []byte("DEF")) - wtx2.UnsafePut(schema.Key, []byte("overwrite"), []byte("2")) + wtx2.UnsafePut(buck.Key, []byte("def"), []byte("DEF")) + wtx2.UnsafePut(buck.Key, []byte("overwrite"), []byte("2")) wtx2.Unlock() rtx := b.ConcurrentReadTx() rtx.RLock() // no-op - k, v := rtx.UnsafeRange(schema.Key, []byte("abc"), []byte("\xff"), 0) + k, v := rtx.UnsafeRange(buck.Key, []byte("abc"), []byte("\xff"), 0) rtx.RUnlock() wKey := [][]byte{[]byte("abc"), []byte("def"), []byte("overwrite")} wVal := [][]byte{[]byte("ABC"), []byte("DEF"), []byte("2")} @@ -306,10 +300,10 @@ func TestBackendWritebackForEach(t *testing.T) { tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Key) + tx.UnsafeCreateBucket(buck.Key) for i := 0; i < 5; i++ { k := []byte(fmt.Sprintf("%04d", i)) - tx.UnsafePut(schema.Key, k, []byte("bar")) + tx.UnsafePut(buck.Key, k, []byte("bar")) } tx.Unlock() @@ -317,10 +311,10 @@ func TestBackendWritebackForEach(t *testing.T) { b.ForceCommit() tx.Lock() - tx.UnsafeCreateBucket(schema.Key) + tx.UnsafeCreateBucket(buck.Key) for i := 5; i < 20; i++ { k := []byte(fmt.Sprintf("%04d", i)) - tx.UnsafePut(schema.Key, k, []byte("bar")) + tx.UnsafePut(buck.Key, k, []byte("bar")) } tx.Unlock() @@ -331,7 +325,7 @@ func TestBackendWritebackForEach(t *testing.T) { } rtx := b.ReadTx() rtx.RLock() - assert.NoError(t, rtx.UnsafeForEach(schema.Key, getSeq)) + assert.NoError(t, rtx.UnsafeForEach(buck.Key, getSeq)) rtx.RUnlock() partialSeq := seq @@ -340,7 +334,7 @@ func TestBackendWritebackForEach(t *testing.T) { b.ForceCommit() tx.Lock() - assert.NoError(t, tx.UnsafeForEach(schema.Key, getSeq)) + assert.NoError(t, tx.UnsafeForEach(buck.Key, getSeq)) tx.Unlock() if seq != partialSeq { diff --git a/server/storage/backend/batch_tx.go b/server/storage/backend/batch_tx.go index 60be4ce6d74..926eab2cb51 100644 --- a/server/storage/backend/batch_tx.go +++ b/server/storage/backend/batch_tx.go @@ -15,8 +15,6 @@ package backend import ( - "bytes" - "math" "sync" "sync/atomic" "time" @@ -24,24 +22,10 @@ import ( "go.uber.org/zap" bolt "go.etcd.io/bbolt" -) -type BucketID int - -type Bucket interface { - // ID returns a unique identifier of a bucket. - // The id must NOT be persisted and can be used as lightweight identificator - // in the in-memory maps. - ID() BucketID - Name() []byte - // String implements Stringer (human readable name). - String() string - - // IsSafeRangeBucket is a hack to avoid inadvertently reading duplicate keys; - // overwrites on a bucket should only fetch with limit=1, but safeRangeBucket - // is known to never overwrite any key so range is safe. - IsSafeRangeBucket() bool -} + "go.etcd.io/etcd/server/v3/bucket" + "go.etcd.io/etcd/server/v3/interfaces" +) type BatchTx interface { Lock() @@ -61,16 +45,16 @@ type UnsafeReadWriter interface { } type UnsafeWriter interface { - UnsafeCreateBucket(bucket Bucket) - UnsafeDeleteBucket(bucket Bucket) - UnsafePut(bucket Bucket, key []byte, value []byte) - UnsafeSeqPut(bucket Bucket, key []byte, value []byte) - UnsafeDelete(bucket Bucket, key []byte) + UnsafeCreateBucket(bucket bucket.Bucket) + UnsafeDeleteBucket(bucket bucket.Bucket) + UnsafePut(bucket bucket.Bucket, key []byte, value []byte) + UnsafeSeqPut(bucket bucket.Bucket, key []byte, value []byte) + UnsafeDelete(bucket bucket.Bucket, key []byte) } type batchTx struct { sync.Mutex - tx *bolt.Tx + tx interfaces.Tx backend *backend pending int @@ -111,7 +95,7 @@ func (t *batchTx) Unlock() { t.Mutex.Unlock() } -func (t *batchTx) UnsafeCreateBucket(bucket Bucket) { +func (t *batchTx) UnsafeCreateBucket(bucket bucket.Bucket) { _, err := t.tx.CreateBucket(bucket.Name()) if err != nil && err != bolt.ErrBucketExists { t.backend.lg.Fatal( @@ -123,7 +107,7 @@ func (t *batchTx) UnsafeCreateBucket(bucket Bucket) { t.pending++ } -func (t *batchTx) UnsafeDeleteBucket(bucket Bucket) { +func (t *batchTx) UnsafeDeleteBucket(bucket bucket.Bucket) { err := t.tx.DeleteBucket(bucket.Name()) if err != nil && err != bolt.ErrBucketNotFound { t.backend.lg.Fatal( @@ -136,17 +120,18 @@ func (t *batchTx) UnsafeDeleteBucket(bucket Bucket) { } // UnsafePut must be called holding the lock on the tx. -func (t *batchTx) UnsafePut(bucket Bucket, key []byte, value []byte) { +func (t *batchTx) UnsafePut(bucket bucket.Bucket, key []byte, value []byte) { t.unsafePut(bucket, key, value, false) } // UnsafeSeqPut must be called holding the lock on the tx. -func (t *batchTx) UnsafeSeqPut(bucket Bucket, key []byte, value []byte) { +func (t *batchTx) UnsafeSeqPut(bucket bucket.Bucket, key []byte, value []byte) { t.unsafePut(bucket, key, value, true) } -func (t *batchTx) unsafePut(bucketType Bucket, key []byte, value []byte, seq bool) { +func (t *batchTx) unsafePut(bucketType bucket.Bucket, key []byte, value []byte, seq bool) { bucket := t.tx.Bucket(bucketType.Name()) + if bucket == nil { t.backend.lg.Fatal( "failed to find a bucket", @@ -157,7 +142,7 @@ func (t *batchTx) unsafePut(bucketType Bucket, key []byte, value []byte, seq boo if seq { // it is useful to increase fill percent when the workloads are mostly append-only. // this can delay the page split and reduce space usage. - bucket.FillPercent = 0.9 + bucket.SetFillPercent(0.9) } if err := bucket.Put(key, value); err != nil { t.backend.lg.Fatal( @@ -170,7 +155,7 @@ func (t *batchTx) unsafePut(bucketType Bucket, key []byte, value []byte, seq boo } // UnsafeRange must be called holding the lock on the tx. -func (t *batchTx) UnsafeRange(bucketType Bucket, key, endKey []byte, limit int64) ([][]byte, [][]byte) { +func (t *batchTx) UnsafeRange(bucketType bucket.Bucket, key, endKey []byte, limit int64) ([][]byte, [][]byte) { bucket := t.tx.Bucket(bucketType.Name()) if bucket == nil { t.backend.lg.Fatal( @@ -179,33 +164,11 @@ func (t *batchTx) UnsafeRange(bucketType Bucket, key, endKey []byte, limit int64 zap.Stack("stack"), ) } - return unsafeRange(bucket.Cursor(), key, endKey, limit) -} - -func unsafeRange(c *bolt.Cursor, key, endKey []byte, limit int64) (keys [][]byte, vs [][]byte) { - if limit <= 0 { - limit = math.MaxInt64 - } - var isMatch func(b []byte) bool - if len(endKey) > 0 { - isMatch = func(b []byte) bool { return bytes.Compare(b, endKey) < 0 } - } else { - isMatch = func(b []byte) bool { return bytes.Equal(b, key) } - limit = 1 - } - - for ck, cv := c.Seek(key); ck != nil && isMatch(ck); ck, cv = c.Next() { - vs = append(vs, cv) - keys = append(keys, ck) - if limit == int64(len(keys)) { - break - } - } - return keys, vs + return bucket.UnsafeRange(key, endKey, limit) } // UnsafeDelete must be called holding the lock on the tx. -func (t *batchTx) UnsafeDelete(bucketType Bucket, key []byte) { +func (t *batchTx) UnsafeDelete(bucketType bucket.Bucket, key []byte) { bucket := t.tx.Bucket(bucketType.Name()) if bucket == nil { t.backend.lg.Fatal( @@ -226,11 +189,11 @@ func (t *batchTx) UnsafeDelete(bucketType Bucket, key []byte) { } // UnsafeForEach must be called holding the lock on the tx. -func (t *batchTx) UnsafeForEach(bucket Bucket, visitor func(k, v []byte) error) error { +func (t *batchTx) UnsafeForEach(bucket bucket.Bucket, visitor func(k, v []byte) error) error { return unsafeForEach(t.tx, bucket, visitor) } -func unsafeForEach(tx *bolt.Tx, bucket Bucket, visitor func(k, v []byte) error) error { +func unsafeForEach(tx interfaces.Tx, bucket bucket.Bucket, visitor func(k, v []byte) error) error { if b := tx.Bucket(bucket.Name()); b != nil { return b.ForEach(visitor) } @@ -270,9 +233,7 @@ func (t *batchTx) commit(stop bool) { err := t.tx.Commit() // gofail: var afterCommit struct{} - rebalanceSec.Observe(t.tx.Stats().RebalanceTime.Seconds()) - spillSec.Observe(t.tx.Stats().SpillTime.Seconds()) - writeSec.Observe(t.tx.Stats().WriteTime.Seconds()) + t.tx.Observe(rebalanceSec, spillSec, writeSec) commitSec.Observe(time.Since(start).Seconds()) atomic.AddInt64(&t.backend.commits, 1) @@ -295,8 +256,8 @@ func newBatchTxBuffered(backend *backend) *batchTxBuffered { tx := &batchTxBuffered{ batchTx: batchTx{backend: backend}, buf: txWriteBuffer{ - txBuffer: txBuffer{make(map[BucketID]*bucketBuffer)}, - bucket2seq: make(map[BucketID]bool), + txBuffer: txBuffer{make(map[bucket.BucketID]*bucketBuffer)}, + bucket2seq: make(map[bucket.BucketID]bool), }, } tx.Commit() @@ -346,7 +307,7 @@ func (t *batchTxBuffered) unsafeCommit(stop bool) { if t.backend.readTx.tx != nil { // wait all store read transactions using the current boltdb tx to finish, // then close the boltdb tx - go func(tx *bolt.Tx, wg *sync.WaitGroup) { + go func(tx interfaces.Tx, wg *sync.WaitGroup) { wg.Wait() if err := tx.Rollback(); err != nil { t.backend.lg.Fatal("failed to rollback tx", zap.Error(err)) @@ -362,12 +323,12 @@ func (t *batchTxBuffered) unsafeCommit(stop bool) { } } -func (t *batchTxBuffered) UnsafePut(bucket Bucket, key []byte, value []byte) { +func (t *batchTxBuffered) UnsafePut(bucket bucket.Bucket, key []byte, value []byte) { t.batchTx.UnsafePut(bucket, key, value) t.buf.put(bucket, key, value) } -func (t *batchTxBuffered) UnsafeSeqPut(bucket Bucket, key []byte, value []byte) { +func (t *batchTxBuffered) UnsafeSeqPut(bucket bucket.Bucket, key []byte, value []byte) { t.batchTx.UnsafeSeqPut(bucket, key, value) t.buf.putSeq(bucket, key, value) } diff --git a/server/storage/backend/batch_tx_test.go b/server/storage/backend/batch_tx_test.go index 6fd2bbae631..88a07771999 100644 --- a/server/storage/backend/batch_tx_test.go +++ b/server/storage/backend/batch_tx_test.go @@ -15,14 +15,14 @@ package backend_test import ( + "bytes" "reflect" "testing" "time" - bolt "go.etcd.io/bbolt" + buck "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" - "go.etcd.io/etcd/server/v3/storage/schema" ) func TestBatchTxPut(t *testing.T) { @@ -34,18 +34,18 @@ func TestBatchTxPut(t *testing.T) { tx.Lock() // create bucket - tx.UnsafeCreateBucket(schema.Test) + tx.UnsafeCreateBucket(buck.Test) // put v := []byte("bar") - tx.UnsafePut(schema.Test, []byte("foo"), v) + tx.UnsafePut(buck.Test, []byte("foo"), v) tx.Unlock() // check put result before and after tx is committed for k := 0; k < 2; k++ { tx.Lock() - _, gv := tx.UnsafeRange(schema.Test, []byte("foo"), nil, 0) + _, gv := tx.UnsafeRange(buck.Test, []byte("foo"), nil, 0) tx.Unlock() if !reflect.DeepEqual(gv[0], v) { t.Errorf("v = %s, want %s", string(gv[0]), string(v)) @@ -62,12 +62,12 @@ func TestBatchTxRange(t *testing.T) { tx.Lock() defer tx.Unlock() - tx.UnsafeCreateBucket(schema.Test) + tx.UnsafeCreateBucket(buck.Test) // put keys allKeys := [][]byte{[]byte("foo"), []byte("foo1"), []byte("foo2")} allVals := [][]byte{[]byte("bar"), []byte("bar1"), []byte("bar2")} for i := range allKeys { - tx.UnsafePut(schema.Test, allKeys[i], allVals[i]) + tx.UnsafePut(buck.Test, allKeys[i], allVals[i]) } tests := []struct { @@ -115,7 +115,7 @@ func TestBatchTxRange(t *testing.T) { }, } for i, tt := range tests { - keys, vals := tx.UnsafeRange(schema.Test, tt.key, tt.endKey, tt.limit) + keys, vals := tx.UnsafeRange(buck.Test, tt.key, tt.endKey, tt.limit) if !reflect.DeepEqual(keys, tt.wkeys) { t.Errorf("#%d: keys = %+v, want %+v", i, keys, tt.wkeys) } @@ -132,17 +132,17 @@ func TestBatchTxDelete(t *testing.T) { tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Test) - tx.UnsafePut(schema.Test, []byte("foo"), []byte("bar")) + tx.UnsafeCreateBucket(buck.Test) + tx.UnsafePut(buck.Test, []byte("foo"), []byte("bar")) - tx.UnsafeDelete(schema.Test, []byte("foo")) + tx.UnsafeDelete(buck.Test, []byte("foo")) tx.Unlock() // check put result before and after tx is committed for k := 0; k < 2; k++ { tx.Lock() - ks, _ := tx.UnsafeRange(schema.Test, []byte("foo"), nil, 0) + ks, _ := tx.UnsafeRange(buck.Test, []byte("foo"), nil, 0) tx.Unlock() if len(ks) != 0 { t.Errorf("keys on foo = %v, want nil", ks) @@ -154,28 +154,20 @@ func TestBatchTxDelete(t *testing.T) { func TestBatchTxCommit(t *testing.T) { b, _ := betesting.NewTmpBackend(t, time.Hour, 10000) defer betesting.Close(t, b) - + expectedVal := []byte("bar") tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Test) - tx.UnsafePut(schema.Test, []byte("foo"), []byte("bar")) + tx.UnsafeCreateBucket(buck.Test) + tx.UnsafePut(buck.Test, []byte("foo"), expectedVal) tx.Unlock() tx.Commit() // check whether put happens via db view - backend.DbFromBackendForTest(b).View(func(tx *bolt.Tx) error { - bucket := tx.Bucket(schema.Test.Name()) - if bucket == nil { - t.Errorf("bucket test does not exit") - return nil - } - v := bucket.Get([]byte("foo")) - if v == nil { - t.Errorf("foo key failed to written in backend") - } - return nil - }) + val := backend.DbFromBackendForTest(b).GetFromBucket(string(buck.Test.Name()), "foo") + if !bytes.Equal(val, expectedVal) { + t.Errorf("got %s, want %s", val, expectedVal) + } } func TestBatchTxBatchLimitCommit(t *testing.T) { @@ -183,25 +175,17 @@ func TestBatchTxBatchLimitCommit(t *testing.T) { // trigger a commit b, _ := betesting.NewTmpBackend(t, time.Hour, 1) defer betesting.Close(t, b) - + expectedVal := []byte("bar") tx := b.BatchTx() tx.Lock() - tx.UnsafeCreateBucket(schema.Test) - tx.UnsafePut(schema.Test, []byte("foo"), []byte("bar")) + tx.UnsafeCreateBucket(buck.Test) + tx.UnsafePut(buck.Test, []byte("foo"), expectedVal) tx.Unlock() // batch limit commit should have been triggered // check whether put happens via db view - backend.DbFromBackendForTest(b).View(func(tx *bolt.Tx) error { - bucket := tx.Bucket(schema.Test.Name()) - if bucket == nil { - t.Errorf("bucket test does not exit") - return nil - } - v := bucket.Get([]byte("foo")) - if v == nil { - t.Errorf("foo key failed to written in backend") - } - return nil - }) + val := backend.DbFromBackendForTest(b).GetFromBucket(string(buck.Test.Name()), "foo") + if !bytes.Equal(val, expectedVal) { + t.Errorf("got %s, want %s", val, expectedVal) + } } diff --git a/server/storage/backend/export_test.go b/server/storage/backend/export_test.go index e9f5ad38d6a..1dd0f67b2b9 100644 --- a/server/storage/backend/export_test.go +++ b/server/storage/backend/export_test.go @@ -14,9 +14,11 @@ package backend -import bolt "go.etcd.io/bbolt" +import ( + "go.etcd.io/etcd/server/v3/interfaces" +) -func DbFromBackendForTest(b Backend) *bolt.DB { +func DbFromBackendForTest(b Backend) interfaces.DB { return b.(*backend).db } diff --git a/server/storage/backend/hooks_test.go b/server/storage/backend/hooks_test.go index afc4e883351..dc41f153a51 100644 --- a/server/storage/backend/hooks_test.go +++ b/server/storage/backend/hooks_test.go @@ -20,14 +20,14 @@ import ( "time" "github.com/stretchr/testify/assert" + buck "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" - "go.etcd.io/etcd/server/v3/storage/schema" "go.uber.org/zap/zaptest" ) var ( - bucket = schema.Test + bucket = buck.Test key = []byte("key") ) diff --git a/server/storage/backend/read_tx.go b/server/storage/backend/read_tx.go index 4ca2621411c..42f5dad052a 100644 --- a/server/storage/backend/read_tx.go +++ b/server/storage/backend/read_tx.go @@ -18,7 +18,8 @@ import ( "math" "sync" - bolt "go.etcd.io/bbolt" + "go.etcd.io/etcd/server/v3/bucket" + "go.etcd.io/etcd/server/v3/interfaces" ) // IsSafeRangeBucket is a hack to avoid inadvertently reading duplicate keys; @@ -32,8 +33,8 @@ type ReadTx interface { } type UnsafeReader interface { - UnsafeRange(bucket Bucket, key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte) - UnsafeForEach(bucket Bucket, visitor func(k, v []byte) error) error + UnsafeRange(bucket bucket.Bucket, key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte) + UnsafeForEach(bucket bucket.Bucket, visitor func(k, v []byte) error) error } // Base type for readTx and concurrentReadTx to eliminate duplicate functions between these @@ -45,13 +46,13 @@ type baseReadTx struct { // TODO: group and encapsulate {txMu, tx, buckets, txWg}, as they share the same lifecycle. // txMu protects accesses to buckets and tx on Range requests. txMu *sync.RWMutex - tx *bolt.Tx - buckets map[BucketID]*bolt.Bucket + tx interfaces.Tx + buckets map[bucket.BucketID]interfaces.Bucket // txWg protects tx from being rolled back at the end of a batch interval until all reads using this tx are done. txWg *sync.WaitGroup } -func (baseReadTx *baseReadTx) UnsafeForEach(bucket Bucket, visitor func(k, v []byte) error) error { +func (baseReadTx *baseReadTx) UnsafeForEach(bucket bucket.Bucket, visitor func(k, v []byte) error) error { dups := make(map[string]struct{}) getDups := func(k, v []byte) error { dups[string(k)] = struct{}{} @@ -75,7 +76,7 @@ func (baseReadTx *baseReadTx) UnsafeForEach(bucket Bucket, visitor func(k, v []b return baseReadTx.buf.ForEach(bucket, visitor) } -func (baseReadTx *baseReadTx) UnsafeRange(bucketType Bucket, key, endKey []byte, limit int64) ([][]byte, [][]byte) { +func (baseReadTx *baseReadTx) UnsafeRange(bucketType bucket.Bucket, key, endKey []byte, limit int64) ([][]byte, [][]byte) { if endKey == nil { // forbid duplicates for single keys limit = 1 @@ -114,10 +115,9 @@ func (baseReadTx *baseReadTx) UnsafeRange(bucketType Bucket, key, endKey []byte, if !lockHeld { baseReadTx.txMu.Lock() } - c := bucket.Cursor() baseReadTx.txMu.Unlock() - k2, v2 := unsafeRange(c, key, endKey, limit-int64(len(keys))) + k2, v2 := bucket.UnsafeRange(key, endKey, limit-int64(len(keys))) return append(k2, keys...), append(v2, vals...) } @@ -132,7 +132,7 @@ func (rt *readTx) RUnlock() { rt.mu.RUnlock() } func (rt *readTx) reset() { rt.buf.reset() - rt.buckets = make(map[BucketID]*bolt.Bucket) + rt.buckets = make(map[bucket.BucketID]interfaces.Bucket) rt.tx = nil rt.txWg = new(sync.WaitGroup) } diff --git a/server/storage/backend/testing/betesting.go b/server/storage/backend/testing/betesting.go index e42908f9365..daa9d425a22 100644 --- a/server/storage/backend/testing/betesting.go +++ b/server/storage/backend/testing/betesting.go @@ -34,6 +34,7 @@ func NewTmpBackendFromCfg(t testing.TB, bcfg backend.BackendConfig) (backend.Bac tmpPath := filepath.Join(dir, "database") bcfg.Path = tmpPath bcfg.Logger = zaptest.NewLogger(t) + bcfg.BackendType = "bolt" return backend.New(bcfg), tmpPath } diff --git a/server/storage/backend/tx_buffer.go b/server/storage/backend/tx_buffer.go index 779255b7320..f84665a4206 100644 --- a/server/storage/backend/tx_buffer.go +++ b/server/storage/backend/tx_buffer.go @@ -17,13 +17,15 @@ package backend import ( "bytes" "sort" + + "go.etcd.io/etcd/server/v3/bucket" ) const bucketBufferInitialSize = 512 // txBuffer handles functionality shared between txWriteBuffer and txReadBuffer. type txBuffer struct { - buckets map[BucketID]*bucketBuffer + buckets map[bucket.BucketID]*bucketBuffer } func (txb *txBuffer) reset() { @@ -41,20 +43,20 @@ type txWriteBuffer struct { txBuffer // Map from bucket ID into information whether this bucket is edited // sequentially (i.e. keys are growing monotonically). - bucket2seq map[BucketID]bool + bucket2seq map[bucket.BucketID]bool } -func (txw *txWriteBuffer) put(bucket Bucket, k, v []byte) { +func (txw *txWriteBuffer) put(bucket bucket.Bucket, k, v []byte) { txw.bucket2seq[bucket.ID()] = false txw.putInternal(bucket, k, v) } -func (txw *txWriteBuffer) putSeq(bucket Bucket, k, v []byte) { +func (txw *txWriteBuffer) putSeq(bucket bucket.Bucket, k, v []byte) { // TODO: Add (in tests?) verification whether k>b[len(b)] txw.putInternal(bucket, k, v) } -func (txw *txWriteBuffer) putInternal(bucket Bucket, k, v []byte) { +func (txw *txWriteBuffer) putInternal(bucket bucket.Bucket, k, v []byte) { b, ok := txw.buckets[bucket.ID()] if !ok { b = newBucketBuffer() @@ -101,14 +103,14 @@ type txReadBuffer struct { bufVersion uint64 } -func (txr *txReadBuffer) Range(bucket Bucket, key, endKey []byte, limit int64) ([][]byte, [][]byte) { +func (txr *txReadBuffer) Range(bucket bucket.Bucket, key, endKey []byte, limit int64) ([][]byte, [][]byte) { if b := txr.buckets[bucket.ID()]; b != nil { return b.Range(key, endKey, limit) } return nil, nil } -func (txr *txReadBuffer) ForEach(bucket Bucket, visitor func(k, v []byte) error) error { +func (txr *txReadBuffer) ForEach(bucket bucket.Bucket, visitor func(k, v []byte) error) error { if b := txr.buckets[bucket.ID()]; b != nil { return b.ForEach(visitor) } @@ -119,7 +121,7 @@ func (txr *txReadBuffer) ForEach(bucket Bucket, visitor func(k, v []byte) error) func (txr *txReadBuffer) unsafeCopy() txReadBuffer { txrCopy := txReadBuffer{ txBuffer: txBuffer{ - buckets: make(map[BucketID]*bucketBuffer, len(txr.txBuffer.buckets)), + buckets: make(map[bucket.BucketID]*bucketBuffer, len(txr.txBuffer.buckets)), }, bufVersion: 0, } diff --git a/server/storage/mvcc/hash.go b/server/storage/mvcc/hash.go index cf9ffe3c085..8557efe919e 100644 --- a/server/storage/mvcc/hash.go +++ b/server/storage/mvcc/hash.go @@ -20,10 +20,11 @@ import ( "sort" "sync" + "go.etcd.io/etcd/server/v3/bucket" + "go.uber.org/zap" "go.etcd.io/etcd/server/v3/storage/backend" - "go.etcd.io/etcd/server/v3/storage/schema" ) const ( @@ -32,7 +33,7 @@ const ( func unsafeHashByRev(tx backend.UnsafeReader, compactRevision, revision int64, keep map[revision]struct{}) (KeyValueHash, error) { h := newKVHasher(compactRevision, revision, keep) - err := tx.UnsafeForEach(schema.Key, func(k, v []byte) error { + err := tx.UnsafeForEach(bucket.Key, func(k, v []byte) error { h.WriteKeyValue(k, v) return nil }) @@ -48,7 +49,7 @@ type kvHasher struct { func newKVHasher(compactRev, rev int64, keep map[revision]struct{}) kvHasher { h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) - h.Write(schema.Key.Name()) + h.Write(bucket.Key.Name()) return kvHasher{ hash: h, compactRevision: compactRev, diff --git a/server/storage/mvcc/kvstore.go b/server/storage/mvcc/kvstore.go index d4c44e42182..52ec44ea01e 100644 --- a/server/storage/mvcc/kvstore.go +++ b/server/storage/mvcc/kvstore.go @@ -22,6 +22,8 @@ import ( "sync" "time" + "go.etcd.io/etcd/server/v3/bucket" + "go.etcd.io/etcd/api/v3/mvccpb" "go.etcd.io/etcd/pkg/v3/schedule" "go.etcd.io/etcd/pkg/v3/traceutil" @@ -123,7 +125,7 @@ func NewStore(lg *zap.Logger, b backend.Backend, le lease.Lessor, cfg StoreConfi tx := s.b.BatchTx() tx.LockOutsideApply() - tx.UnsafeCreateBucket(schema.Key) + tx.UnsafeCreateBucket(bucket.Key) schema.UnsafeCreateMetaBucket(tx) tx.Unlock() s.b.ForceCommit() @@ -162,7 +164,7 @@ func (s *store) hash() (hash uint32, revision int64, err error) { start := time.Now() s.b.ForceCommit() - h, err := s.b.Hash(schema.DefaultIgnores) + h, err := s.b.Hash(bucket.DefaultIgnores) hashSec.Observe(time.Since(start).Seconds()) return h, s.currentRev, err @@ -337,7 +339,7 @@ func (s *store) restore() error { s.lg.Info( "restored last compact revision", - zap.String("meta-bucket-name-key", string(schema.FinishedCompactKeyName)), + zap.String("meta-bucket-name-key", string(bucket.FinishedCompactKeyName)), zap.Int64("restored-compact-revision", s.compactMainRev), ) s.revMu.Unlock() @@ -347,7 +349,7 @@ func (s *store) restore() error { keysGauge.Set(0) rkvc, revc := restoreIntoIndex(s.lg, s.kvindex) for { - keys, vals := tx.UnsafeRange(schema.Key, min, max, int64(restoreChunkKeys)) + keys, vals := tx.UnsafeRange(bucket.Key, min, max, int64(restoreChunkKeys)) if len(keys) == 0 { break } diff --git a/server/storage/mvcc/kvstore_compaction.go b/server/storage/mvcc/kvstore_compaction.go index 9a0163697a7..c638ba102ce 100644 --- a/server/storage/mvcc/kvstore_compaction.go +++ b/server/storage/mvcc/kvstore_compaction.go @@ -21,7 +21,7 @@ import ( "go.uber.org/zap" - "go.etcd.io/etcd/server/v3/storage/schema" + "go.etcd.io/etcd/server/v3/bucket" ) func (s *store) scheduleCompaction(compactMainRev, prevCompactRev int64) (KeyValueHash, error) { @@ -50,11 +50,11 @@ func (s *store) scheduleCompaction(compactMainRev, prevCompactRev int64) (KeyVal tx := s.b.BatchTx() tx.LockOutsideApply() - keys, values := tx.UnsafeRange(schema.Key, last, end, int64(batchNum)) + keys, values := tx.UnsafeRange(bucket.Key, last, end, int64(batchNum)) for i := range keys { rev = bytesToRev(keys[i]) if _, ok := keep[rev]; !ok { - tx.UnsafeDelete(schema.Key, keys[i]) + tx.UnsafeDelete(bucket.Key, keys[i]) keyCompactions++ } h.WriteKeyValue(keys[i], values[i]) diff --git a/server/storage/mvcc/kvstore_compaction_test.go b/server/storage/mvcc/kvstore_compaction_test.go index dd8837637ae..89282293a92 100644 --- a/server/storage/mvcc/kvstore_compaction_test.go +++ b/server/storage/mvcc/kvstore_compaction_test.go @@ -20,12 +20,13 @@ import ( "testing" "time" + "go.etcd.io/etcd/server/v3/bucket" + "go.uber.org/zap/zaptest" "go.etcd.io/etcd/pkg/v3/traceutil" "go.etcd.io/etcd/server/v3/lease" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" - "go.etcd.io/etcd/server/v3/storage/schema" ) func TestScheduleCompaction(t *testing.T) { @@ -79,7 +80,7 @@ func TestScheduleCompaction(t *testing.T) { ibytes := newRevBytes() for _, rev := range revs { revToBytes(rev, ibytes) - tx.UnsafePut(schema.Key, ibytes, []byte("bar")) + tx.UnsafePut(bucket.Key, ibytes, []byte("bar")) } tx.Unlock() @@ -91,7 +92,7 @@ func TestScheduleCompaction(t *testing.T) { tx.Lock() for _, rev := range tt.wrevs { revToBytes(rev, ibytes) - keys, _ := tx.UnsafeRange(schema.Key, ibytes, nil, 0) + keys, _ := tx.UnsafeRange(bucket.Key, ibytes, nil, 0) if len(keys) != 1 { t.Errorf("#%d: range on %v = %d, want 1", i, rev, len(keys)) } diff --git a/server/storage/mvcc/kvstore_test.go b/server/storage/mvcc/kvstore_test.go index af4c3846c33..7c2afc5fbbb 100644 --- a/server/storage/mvcc/kvstore_test.go +++ b/server/storage/mvcc/kvstore_test.go @@ -29,8 +29,12 @@ import ( "testing" "time" + "go.etcd.io/etcd/server/v3/bucket" + "go.uber.org/zap/zaptest" + "go.uber.org/zap" + "go.etcd.io/etcd/api/v3/mvccpb" "go.etcd.io/etcd/client/pkg/v3/testutil" "go.etcd.io/etcd/pkg/v3/schedule" @@ -38,9 +42,6 @@ import ( "go.etcd.io/etcd/server/v3/lease" "go.etcd.io/etcd/server/v3/storage/backend" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" - "go.etcd.io/etcd/server/v3/storage/schema" - - "go.uber.org/zap" ) func TestStoreRev(t *testing.T) { @@ -151,12 +152,12 @@ func TestStorePut(t *testing.T) { } wact := []testutil.Action{ - {Name: "seqput", Params: []interface{}{schema.Key, tt.wkey, data}}, + {Name: "seqput", Params: []interface{}{bucket.Key, tt.wkey, data}}, } if tt.rr != nil { wact = []testutil.Action{ - {Name: "seqput", Params: []interface{}{schema.Key, tt.wkey, data}}, + {Name: "seqput", Params: []interface{}{bucket.Key, tt.wkey, data}}, } } @@ -232,7 +233,7 @@ func TestStoreRange(t *testing.T) { wstart := newRevBytes() revToBytes(tt.idxr.revs[0], wstart) wact := []testutil.Action{ - {Name: "range", Params: []interface{}{schema.Key, wstart, []byte(nil), int64(0)}}, + {Name: "range", Params: []interface{}{bucket.Key, wstart, []byte(nil), int64(0)}}, } if g := b.tx.Action(); !reflect.DeepEqual(g, wact) { t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact) @@ -308,7 +309,7 @@ func TestStoreDeleteRange(t *testing.T) { t.Errorf("#%d: marshal err = %v, want nil", i, err) } wact := []testutil.Action{ - {Name: "seqput", Params: []interface{}{schema.Key, tt.wkey, data}}, + {Name: "seqput", Params: []interface{}{bucket.Key, tt.wkey, data}}, } if g := b.tx.Action(); !reflect.DeepEqual(g, wact) { t.Errorf("#%d: tx action = %+v, want %+v", i, g, wact) @@ -351,12 +352,12 @@ func TestStoreCompact(t *testing.T) { end := make([]byte, 8) binary.BigEndian.PutUint64(end, uint64(4)) wact := []testutil.Action{ - {Name: "range", Params: []interface{}{schema.Meta, schema.ScheduledCompactKeyName, []uint8(nil), int64(0)}}, - {Name: "range", Params: []interface{}{schema.Meta, schema.FinishedCompactKeyName, []uint8(nil), int64(0)}}, - {Name: "put", Params: []interface{}{schema.Meta, schema.ScheduledCompactKeyName, newTestRevBytes(revision{3, 0})}}, - {Name: "range", Params: []interface{}{schema.Key, make([]byte, 17), end, int64(10000)}}, - {Name: "delete", Params: []interface{}{schema.Key, key2}}, - {Name: "put", Params: []interface{}{schema.Meta, schema.FinishedCompactKeyName, newTestRevBytes(revision{3, 0})}}, + {Name: "range", Params: []interface{}{bucket.Meta, bucket.ScheduledCompactKeyName, []uint8(nil), int64(0)}}, + {Name: "range", Params: []interface{}{bucket.Meta, bucket.FinishedCompactKeyName, []uint8(nil), int64(0)}}, + {Name: "put", Params: []interface{}{bucket.Meta, bucket.ScheduledCompactKeyName, newTestRevBytes(revision{3, 0})}}, + {Name: "range", Params: []interface{}{bucket.Key, make([]byte, 17), end, int64(10000)}}, + {Name: "delete", Params: []interface{}{bucket.Key, key2}}, + {Name: "put", Params: []interface{}{bucket.Meta, bucket.FinishedCompactKeyName, newTestRevBytes(revision{3, 0})}}, } if g := b.tx.Action(); !reflect.DeepEqual(g, wact) { t.Errorf("tx actions = %+v, want %+v", g, wact) @@ -396,8 +397,8 @@ func TestStoreRestore(t *testing.T) { if err != nil { t.Fatal(err) } - b.tx.rangeRespc <- rangeResp{[][]byte{schema.FinishedCompactKeyName}, [][]byte{newTestRevBytes(revision{3, 0})}} - b.tx.rangeRespc <- rangeResp{[][]byte{schema.ScheduledCompactKeyName}, [][]byte{newTestRevBytes(revision{3, 0})}} + b.tx.rangeRespc <- rangeResp{[][]byte{bucket.FinishedCompactKeyName}, [][]byte{newTestRevBytes(revision{3, 0})}} + b.tx.rangeRespc <- rangeResp{[][]byte{bucket.ScheduledCompactKeyName}, [][]byte{newTestRevBytes(revision{3, 0})}} b.tx.rangeRespc <- rangeResp{[][]byte{putkey, delkey}, [][]byte{putkvb, delkvb}} b.tx.rangeRespc <- rangeResp{nil, nil} @@ -411,9 +412,9 @@ func TestStoreRestore(t *testing.T) { t.Errorf("current rev = %v, want 5", s.currentRev) } wact := []testutil.Action{ - {Name: "range", Params: []interface{}{schema.Meta, schema.FinishedCompactKeyName, []byte(nil), int64(0)}}, - {Name: "range", Params: []interface{}{schema.Meta, schema.ScheduledCompactKeyName, []byte(nil), int64(0)}}, - {Name: "range", Params: []interface{}{schema.Key, newTestRevBytes(revision{1, 0}), newTestRevBytes(revision{math.MaxInt64, math.MaxInt64}), int64(restoreChunkKeys)}}, + {Name: "range", Params: []interface{}{bucket.Meta, bucket.FinishedCompactKeyName, []byte(nil), int64(0)}}, + {Name: "range", Params: []interface{}{bucket.Meta, bucket.ScheduledCompactKeyName, []byte(nil), int64(0)}}, + {Name: "range", Params: []interface{}{bucket.Key, newTestRevBytes(revision{1, 0}), newTestRevBytes(revision{math.MaxInt64, math.MaxInt64}), int64(restoreChunkKeys)}}, } if g := b.tx.Action(); !reflect.DeepEqual(g, wact) { t.Errorf("tx actions = %+v, want %+v", g, wact) @@ -533,7 +534,7 @@ func TestRestoreContinueUnfinishedCompaction(t *testing.T) { for i := 0; i < 5; i++ { tx := s.b.BatchTx() tx.Lock() - ks, _ := tx.UnsafeRange(schema.Key, revbytes, nil, 0) + ks, _ := tx.UnsafeRange(bucket.Key, revbytes, nil, 0) tx.Unlock() if len(ks) != 0 { time.Sleep(100 * time.Millisecond) @@ -942,29 +943,29 @@ type fakeBatchTx struct { rangeRespc chan rangeResp } -func (b *fakeBatchTx) LockInsideApply() {} -func (b *fakeBatchTx) LockOutsideApply() {} -func (b *fakeBatchTx) Lock() {} -func (b *fakeBatchTx) Unlock() {} -func (b *fakeBatchTx) RLock() {} -func (b *fakeBatchTx) RUnlock() {} -func (b *fakeBatchTx) UnsafeCreateBucket(bucket backend.Bucket) {} -func (b *fakeBatchTx) UnsafeDeleteBucket(bucket backend.Bucket) {} -func (b *fakeBatchTx) UnsafePut(bucket backend.Bucket, key []byte, value []byte) { +func (b *fakeBatchTx) LockInsideApply() {} +func (b *fakeBatchTx) LockOutsideApply() {} +func (b *fakeBatchTx) Lock() {} +func (b *fakeBatchTx) Unlock() {} +func (b *fakeBatchTx) RLock() {} +func (b *fakeBatchTx) RUnlock() {} +func (b *fakeBatchTx) UnsafeCreateBucket(bucket bucket.Bucket) {} +func (b *fakeBatchTx) UnsafeDeleteBucket(bucket bucket.Bucket) {} +func (b *fakeBatchTx) UnsafePut(bucket bucket.Bucket, key []byte, value []byte) { b.Recorder.Record(testutil.Action{Name: "put", Params: []interface{}{bucket, key, value}}) } -func (b *fakeBatchTx) UnsafeSeqPut(bucket backend.Bucket, key []byte, value []byte) { +func (b *fakeBatchTx) UnsafeSeqPut(bucket bucket.Bucket, key []byte, value []byte) { b.Recorder.Record(testutil.Action{Name: "seqput", Params: []interface{}{bucket, key, value}}) } -func (b *fakeBatchTx) UnsafeRange(bucket backend.Bucket, key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte) { +func (b *fakeBatchTx) UnsafeRange(bucket bucket.Bucket, key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte) { b.Recorder.Record(testutil.Action{Name: "range", Params: []interface{}{bucket, key, endKey, limit}}) r := <-b.rangeRespc return r.keys, r.vals } -func (b *fakeBatchTx) UnsafeDelete(bucket backend.Bucket, key []byte) { +func (b *fakeBatchTx) UnsafeDelete(bucket bucket.Bucket, key []byte) { b.Recorder.Record(testutil.Action{Name: "delete", Params: []interface{}{bucket, key}}) } -func (b *fakeBatchTx) UnsafeForEach(bucket backend.Bucket, visitor func(k, v []byte) error) error { +func (b *fakeBatchTx) UnsafeForEach(bucket bucket.Bucket, visitor func(k, v []byte) error) error { return nil } func (b *fakeBatchTx) Commit() {} @@ -986,6 +987,7 @@ func (b *fakeBackend) ForceCommit() func (b *fakeBackend) Defrag() error { return nil } func (b *fakeBackend) Close() error { return nil } func (b *fakeBackend) SetTxPostLockInsideApplyHook(func()) {} +func (b *fakeBackend) BackendType() string { return "bolt" } type indexGetResp struct { rev revision diff --git a/server/storage/mvcc/kvstore_txn.go b/server/storage/mvcc/kvstore_txn.go index d35d7fbaed0..97868ee4c82 100644 --- a/server/storage/mvcc/kvstore_txn.go +++ b/server/storage/mvcc/kvstore_txn.go @@ -18,13 +18,14 @@ import ( "context" "fmt" + "go.etcd.io/etcd/server/v3/bucket" + "go.uber.org/zap" "go.etcd.io/etcd/api/v3/mvccpb" "go.etcd.io/etcd/pkg/v3/traceutil" "go.etcd.io/etcd/server/v3/lease" "go.etcd.io/etcd/server/v3/storage/backend" - "go.etcd.io/etcd/server/v3/storage/schema" ) type storeTxnRead struct { @@ -105,7 +106,7 @@ func (tr *storeTxnCommon) rangeKeys(ctx context.Context, key, end []byte, curRev default: } revToBytes(revpair, revBytes) - _, vs := tr.tx.UnsafeRange(schema.Key, revBytes, nil, 0) + _, vs := tr.tx.UnsafeRange(bucket.Key, revBytes, nil, 0) if len(vs) != 1 { tr.s.lg.Fatal( "range failed to find revision pair", @@ -229,7 +230,7 @@ func (tw *storeTxnWrite) put(key, value []byte, leaseID lease.LeaseID) { } tw.trace.Step("marshal mvccpb.KeyValue") - tw.tx.UnsafeSeqPut(schema.Key, ibytes, d) + tw.tx.UnsafeSeqPut(bucket.Key, ibytes, d) tw.s.kvindex.Put(key, idxRev) tw.changes = append(tw.changes, kv) tw.trace.Step("store kv pair into bolt db") @@ -295,7 +296,7 @@ func (tw *storeTxnWrite) delete(key []byte) { ) } - tw.tx.UnsafeSeqPut(schema.Key, ibytes, d) + tw.tx.UnsafeSeqPut(bucket.Key, ibytes, d) err = tw.s.kvindex.Tombstone(key, idxRev) if err != nil { tw.storeTxnCommon.s.lg.Fatal( diff --git a/server/storage/mvcc/store.go b/server/storage/mvcc/store.go index 886375ec8c9..c0abb55bdde 100644 --- a/server/storage/mvcc/store.go +++ b/server/storage/mvcc/store.go @@ -15,12 +15,12 @@ package mvcc import ( + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" - "go.etcd.io/etcd/server/v3/storage/schema" ) func UnsafeReadFinishedCompact(tx backend.UnsafeReader) (finishedComact int64, found bool) { - _, finishedCompactBytes := tx.UnsafeRange(schema.Meta, schema.FinishedCompactKeyName, nil, 0) + _, finishedCompactBytes := tx.UnsafeRange(bucket.Meta, bucket.FinishedCompactKeyName, nil, 0) if len(finishedCompactBytes) != 0 { return bytesToRev(finishedCompactBytes[0]).main, true } @@ -28,7 +28,7 @@ func UnsafeReadFinishedCompact(tx backend.UnsafeReader) (finishedComact int64, f } func UnsafeReadScheduledCompact(tx backend.UnsafeReader) (scheduledComact int64, found bool) { - _, scheduledCompactBytes := tx.UnsafeRange(schema.Meta, schema.ScheduledCompactKeyName, nil, 0) + _, scheduledCompactBytes := tx.UnsafeRange(bucket.Meta, bucket.ScheduledCompactKeyName, nil, 0) if len(scheduledCompactBytes) != 0 { return bytesToRev(scheduledCompactBytes[0]).main, true } @@ -44,7 +44,7 @@ func SetScheduledCompact(tx backend.BatchTx, value int64) { func UnsafeSetScheduledCompact(tx backend.UnsafeWriter, value int64) { rbytes := newRevBytes() revToBytes(revision{main: value}, rbytes) - tx.UnsafePut(schema.Meta, schema.ScheduledCompactKeyName, rbytes) + tx.UnsafePut(bucket.Meta, bucket.ScheduledCompactKeyName, rbytes) } func SetFinishedCompact(tx backend.BatchTx, value int64) { @@ -56,5 +56,5 @@ func SetFinishedCompact(tx backend.BatchTx, value int64) { func UnsafeSetFinishedCompact(tx backend.UnsafeWriter, value int64) { rbytes := newRevBytes() revToBytes(revision{main: value}, rbytes) - tx.UnsafePut(schema.Meta, schema.FinishedCompactKeyName, rbytes) + tx.UnsafePut(bucket.Meta, bucket.FinishedCompactKeyName, rbytes) } diff --git a/server/storage/mvcc/store_test.go b/server/storage/mvcc/store_test.go index bd6d25e171f..2c6e560f9f8 100644 --- a/server/storage/mvcc/store_test.go +++ b/server/storage/mvcc/store_test.go @@ -20,12 +20,17 @@ import ( "testing" "time" + "go.etcd.io/etcd/server/v3/bucket" + "github.com/stretchr/testify/assert" "go.uber.org/zap/zaptest" "go.etcd.io/etcd/server/v3/storage/backend" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" - "go.etcd.io/etcd/server/v3/storage/schema" +) + +const ( + defaultBackendType = "bolt" ) // TestScheduledCompact ensures that UnsafeSetScheduledCompact&UnsafeReadScheduledCompact work well together. @@ -55,13 +60,13 @@ func TestScheduledCompact(t *testing.T) { t.Fatal("batch tx is nil") } tx.Lock() - tx.UnsafeCreateBucket(schema.Meta) + tx.UnsafeCreateBucket(bucket.Meta) UnsafeSetScheduledCompact(tx, tc.value) tx.Unlock() be.ForceCommit() be.Close() - b := backend.NewDefaultBackend(lg, tmpPath) + b := backend.NewDefaultBackend(lg, tmpPath, defaultBackendType) defer b.Close() v, found := UnsafeReadScheduledCompact(b.BatchTx()) assert.Equal(t, true, found) @@ -97,13 +102,13 @@ func TestFinishedCompact(t *testing.T) { t.Fatal("batch tx is nil") } tx.Lock() - tx.UnsafeCreateBucket(schema.Meta) + tx.UnsafeCreateBucket(bucket.Meta) UnsafeSetFinishedCompact(tx, tc.value) tx.Unlock() be.ForceCommit() be.Close() - b := backend.NewDefaultBackend(lg, tmpPath) + b := backend.NewDefaultBackend(lg, tmpPath, defaultBackendType) defer b.Close() v, found := UnsafeReadFinishedCompact(b.BatchTx()) assert.Equal(t, true, found) diff --git a/server/storage/mvcc/watchable_store.go b/server/storage/mvcc/watchable_store.go index 4e7b5a71407..b0c5da49b25 100644 --- a/server/storage/mvcc/watchable_store.go +++ b/server/storage/mvcc/watchable_store.go @@ -18,14 +18,14 @@ import ( "sync" "time" + "go.uber.org/zap" + "go.etcd.io/etcd/api/v3/mvccpb" clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/pkg/v3/traceutil" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/lease" "go.etcd.io/etcd/server/v3/storage/backend" - "go.etcd.io/etcd/server/v3/storage/schema" - - "go.uber.org/zap" ) // non-const so modifiable by tests @@ -360,7 +360,7 @@ func (s *watchableStore) syncWatchers() int { // values are actual key-value pairs in backend. tx := s.store.b.ReadTx() tx.RLock() - revs, vs := tx.UnsafeRange(schema.Key, minBytes, maxBytes, 0) + revs, vs := tx.UnsafeRange(bucket.Key, minBytes, maxBytes, 0) evs := kvsToEvents(s.store.lg, wg, revs, vs) // Must unlock after kvsToEvents, because vs (come from boltdb memory) is not deep copy. // We can only unlock after Unmarshal, which will do deep copy. diff --git a/server/storage/schema/actions.go b/server/storage/schema/actions.go index 415ffcd792f..54a682a0972 100644 --- a/server/storage/schema/actions.go +++ b/server/storage/schema/actions.go @@ -17,6 +17,7 @@ package schema import ( "go.uber.org/zap" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" ) @@ -27,7 +28,7 @@ type action interface { } type setKeyAction struct { - Bucket backend.Bucket + Bucket bucket.Bucket FieldName []byte FieldValue []byte } @@ -39,7 +40,7 @@ func (a setKeyAction) unsafeDo(tx backend.UnsafeReadWriter) (action, error) { } type deleteKeyAction struct { - Bucket backend.Bucket + Bucket bucket.Bucket FieldName []byte } @@ -49,7 +50,7 @@ func (a deleteKeyAction) unsafeDo(tx backend.UnsafeReadWriter) (action, error) { return revert, nil } -func restoreFieldValueAction(tx backend.UnsafeReader, bucket backend.Bucket, fieldName []byte) action { +func restoreFieldValueAction(tx backend.UnsafeReader, bucket bucket.Bucket, fieldName []byte) action { _, vs := tx.UnsafeRange(bucket, fieldName, nil, 1) if len(vs) == 1 { return &setKeyAction{ diff --git a/server/storage/schema/actions_test.go b/server/storage/schema/actions_test.go index 4e61a0dac9a..83085848cd7 100644 --- a/server/storage/schema/actions_test.go +++ b/server/storage/schema/actions_test.go @@ -19,6 +19,8 @@ import ( "testing" "time" + "go.etcd.io/etcd/server/v3/bucket" + "github.com/stretchr/testify/assert" "go.uber.org/zap/zaptest" @@ -35,7 +37,7 @@ func TestActionIsReversible(t *testing.T) { { name: "setKeyAction empty state", action: setKeyAction{ - Bucket: Meta, + Bucket: bucket.Meta, FieldName: []byte("/test"), FieldValue: []byte("1"), }, @@ -43,7 +45,7 @@ func TestActionIsReversible(t *testing.T) { { name: "setKeyAction with key", action: setKeyAction{ - Bucket: Meta, + Bucket: bucket.Meta, FieldName: []byte("/test"), FieldValue: []byte("1"), }, @@ -52,14 +54,14 @@ func TestActionIsReversible(t *testing.T) { { name: "deleteKeyAction empty state", action: deleteKeyAction{ - Bucket: Meta, + Bucket: bucket.Meta, FieldName: []byte("/test"), }, }, { name: "deleteKeyAction with key", action: deleteKeyAction{ - Bucket: Meta, + Bucket: bucket.Meta, FieldName: []byte("/test"), }, state: map[string]string{"/test": "2"}, @@ -76,9 +78,9 @@ func TestActionIsReversible(t *testing.T) { tx.Lock() defer tx.Unlock() UnsafeCreateMetaBucket(tx) - putKeyValues(tx, Meta, tc.state) + putKeyValues(tx, bucket.Meta, tc.state) - assertBucketState(t, tx, Meta, tc.state) + assertBucketState(t, tx, bucket.Meta, tc.state) reverse, err := tc.action.unsafeDo(tx) if err != nil { t.Errorf("Failed to upgrade, err: %v", err) @@ -87,7 +89,7 @@ func TestActionIsReversible(t *testing.T) { if err != nil { t.Errorf("Failed to downgrade, err: %v", err) } - assertBucketState(t, tx, Meta, tc.state) + assertBucketState(t, tx, bucket.Meta, tc.state) }) } } @@ -103,17 +105,17 @@ func TestActionListRevert(t *testing.T) { { name: "Apply multiple actions", actions: ActionList{ - setKeyAction{Meta, []byte("/testKey1"), []byte("testValue1")}, - setKeyAction{Meta, []byte("/testKey2"), []byte("testValue2")}, + setKeyAction{bucket.Meta, []byte("/testKey1"), []byte("testValue1")}, + setKeyAction{bucket.Meta, []byte("/testKey2"), []byte("testValue2")}, }, expectState: map[string]string{"/testKey1": "testValue1", "/testKey2": "testValue2"}, }, { name: "Broken action should result in changes reverted", actions: ActionList{ - setKeyAction{Meta, []byte("/testKey1"), []byte("testValue1")}, + setKeyAction{bucket.Meta, []byte("/testKey1"), []byte("testValue1")}, brokenAction{}, - setKeyAction{Meta, []byte("/testKey2"), []byte("testValue2")}, + setKeyAction{bucket.Meta, []byte("/testKey2"), []byte("testValue2")}, }, expectState: map[string]string{}, expectError: errBrokenAction, @@ -138,7 +140,7 @@ func TestActionListRevert(t *testing.T) { if err != tc.expectError { t.Errorf("Unexpected error or lack thereof, expected: %v, got: %v", tc.expectError, err) } - assertBucketState(t, tx, Meta, tc.expectState) + assertBucketState(t, tx, bucket.Meta, tc.expectState) }) } } @@ -151,7 +153,7 @@ func (c brokenAction) unsafeDo(tx backend.UnsafeReadWriter) (action, error) { return nil, errBrokenAction } -func putKeyValues(tx backend.UnsafeWriter, bucket backend.Bucket, kvs map[string]string) { +func putKeyValues(tx backend.UnsafeWriter, bucket bucket.Bucket, kvs map[string]string) { for k, v := range kvs { tx.UnsafePut(bucket, []byte(k), []byte(v)) } diff --git a/server/storage/schema/alarm.go b/server/storage/schema/alarm.go index 6e81d0f4671..5645ecf64f2 100644 --- a/server/storage/schema/alarm.go +++ b/server/storage/schema/alarm.go @@ -18,6 +18,7 @@ import ( "go.uber.org/zap" "go.etcd.io/etcd/api/v3/etcdserverpb" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" ) @@ -37,7 +38,7 @@ func (s *alarmBackend) CreateAlarmBucket() { tx := s.be.BatchTx() tx.LockOutsideApply() defer tx.Unlock() - tx.UnsafeCreateBucket(Alarm) + tx.UnsafeCreateBucket(bucket.Alarm) } func (s *alarmBackend) MustPutAlarm(alarm *etcdserverpb.AlarmMember) { @@ -53,7 +54,7 @@ func (s *alarmBackend) mustUnsafePutAlarm(tx backend.UnsafeWriter, alarm *etcdse s.lg.Panic("failed to marshal alarm member", zap.Error(err)) } - tx.UnsafePut(Alarm, v, nil) + tx.UnsafePut(bucket.Alarm, v, nil) } func (s *alarmBackend) MustDeleteAlarm(alarm *etcdserverpb.AlarmMember) { @@ -69,7 +70,7 @@ func (s *alarmBackend) mustUnsafeDeleteAlarm(tx backend.UnsafeWriter, alarm *etc s.lg.Panic("failed to marshal alarm member", zap.Error(err)) } - tx.UnsafeDelete(Alarm, v) + tx.UnsafeDelete(bucket.Alarm, v) } func (s *alarmBackend) GetAllAlarms() ([]*etcdserverpb.AlarmMember, error) { @@ -81,7 +82,7 @@ func (s *alarmBackend) GetAllAlarms() ([]*etcdserverpb.AlarmMember, error) { func (s *alarmBackend) unsafeGetAllAlarms(tx backend.UnsafeReader) ([]*etcdserverpb.AlarmMember, error) { var ms []*etcdserverpb.AlarmMember - err := tx.UnsafeForEach(Alarm, func(k, v []byte) error { + err := tx.UnsafeForEach(bucket.Alarm, func(k, v []byte) error { var m etcdserverpb.AlarmMember if err := m.Unmarshal(k); err != nil { return err diff --git a/server/storage/schema/auth.go b/server/storage/schema/auth.go index 3bff8412c9f..4548821c9f5 100644 --- a/server/storage/schema/auth.go +++ b/server/storage/schema/auth.go @@ -21,6 +21,7 @@ import ( "go.uber.org/zap" "go.etcd.io/etcd/server/v3/auth" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" ) @@ -51,9 +52,9 @@ func (abe *authBackend) CreateAuthBuckets() { tx := abe.be.BatchTx() tx.LockOutsideApply() defer tx.Unlock() - tx.UnsafeCreateBucket(Auth) - tx.UnsafeCreateBucket(AuthUsers) - tx.UnsafeCreateBucket(AuthRoles) + tx.UnsafeCreateBucket(bucket.Auth) + tx.UnsafeCreateBucket(bucket.AuthUsers) + tx.UnsafeCreateBucket(bucket.AuthRoles) } func (abe *authBackend) ForceCommit() { @@ -83,16 +84,16 @@ var _ auth.AuthBatchTx = (*authBatchTx)(nil) func (atx *authBatchTx) UnsafeSaveAuthEnabled(enabled bool) { if enabled { - atx.tx.UnsafePut(Auth, AuthEnabledKeyName, authEnabled) + atx.tx.UnsafePut(bucket.Auth, bucket.AuthEnabledKeyName, authEnabled) } else { - atx.tx.UnsafePut(Auth, AuthEnabledKeyName, authDisabled) + atx.tx.UnsafePut(bucket.Auth, bucket.AuthEnabledKeyName, authDisabled) } } func (atx *authBatchTx) UnsafeSaveAuthRevision(rev uint64) { revBytes := make([]byte, revBytesLen) binary.BigEndian.PutUint64(revBytes, rev) - atx.tx.UnsafePut(Auth, AuthRevisionKeyName, revBytes) + atx.tx.UnsafePut(bucket.Auth, bucket.AuthRevisionKeyName, revBytes) } func (atx *authBatchTx) UnsafeReadAuthEnabled() bool { @@ -119,7 +120,7 @@ func (atx *authReadTx) UnsafeReadAuthEnabled() bool { } func unsafeReadAuthEnabled(tx backend.UnsafeReader) bool { - _, vs := tx.UnsafeRange(Auth, AuthEnabledKeyName, nil, 0) + _, vs := tx.UnsafeRange(bucket.Auth, bucket.AuthEnabledKeyName, nil, 0) if len(vs) == 1 { if bytes.Equal(vs[0], authEnabled) { return true @@ -133,7 +134,7 @@ func (atx *authReadTx) UnsafeReadAuthRevision() uint64 { } func unsafeReadAuthRevision(tx backend.UnsafeReader) uint64 { - _, vs := tx.UnsafeRange(Auth, AuthRevisionKeyName, nil, 0) + _, vs := tx.UnsafeRange(bucket.Auth, bucket.AuthRevisionKeyName, nil, 0) if len(vs) != 1 { // this can happen in the initialization phase return 0 diff --git a/server/storage/schema/auth_roles.go b/server/storage/schema/auth_roles.go index 6161a0885a9..364c7c8cc05 100644 --- a/server/storage/schema/auth_roles.go +++ b/server/storage/schema/auth_roles.go @@ -18,11 +18,12 @@ import ( "go.uber.org/zap" "go.etcd.io/etcd/api/v3/authpb" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" ) func UnsafeCreateAuthRolesBucket(tx backend.UnsafeWriter) { - tx.UnsafeCreateBucket(AuthRoles) + tx.UnsafeCreateBucket(bucket.AuthRoles) } func (abe *authBackend) GetRole(roleName string) *authpb.Role { @@ -57,11 +58,11 @@ func (atx *authBatchTx) UnsafePutRole(role *authpb.Role) { ) } - atx.tx.UnsafePut(AuthRoles, role.Name, b) + atx.tx.UnsafePut(bucket.AuthRoles, role.Name, b) } func (atx *authBatchTx) UnsafeDeleteRole(rolename string) { - atx.tx.UnsafeDelete(AuthRoles, []byte(rolename)) + atx.tx.UnsafeDelete(bucket.AuthRoles, []byte(rolename)) } func (atx *authReadTx) UnsafeGetRole(roleName string) *authpb.Role { @@ -69,7 +70,7 @@ func (atx *authReadTx) UnsafeGetRole(roleName string) *authpb.Role { } func unsafeGetRole(lg *zap.Logger, tx backend.UnsafeReader, roleName string) *authpb.Role { - _, vs := tx.UnsafeRange(AuthRoles, []byte(roleName), nil, 0) + _, vs := tx.UnsafeRange(bucket.AuthRoles, []byte(roleName), nil, 0) if len(vs) == 0 { return nil } @@ -87,7 +88,7 @@ func (atx *authReadTx) UnsafeGetAllRoles() []*authpb.Role { } func unsafeGetAllRoles(lg *zap.Logger, tx backend.UnsafeReader) []*authpb.Role { - _, vs := tx.UnsafeRange(AuthRoles, []byte{0}, []byte{0xff}, -1) + _, vs := tx.UnsafeRange(bucket.AuthRoles, []byte{0}, []byte{0xff}, -1) if len(vs) == 0 { return nil } diff --git a/server/storage/schema/auth_roles_test.go b/server/storage/schema/auth_roles_test.go index d7323c6f4a6..1089b8008c3 100644 --- a/server/storage/schema/auth_roles_test.go +++ b/server/storage/schema/auth_roles_test.go @@ -27,6 +27,10 @@ import ( betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" ) +const ( + defaultTestBackend = "bolt" +) + func TestGetAllRoles(t *testing.T) { tcs := []struct { name string @@ -122,7 +126,7 @@ func TestGetAllRoles(t *testing.T) { abe.ForceCommit() be.Close() - be2 := backend.NewDefaultBackend(lg, tmpPath) + be2 := backend.NewDefaultBackend(lg, tmpPath, defaultTestBackend) defer be2.Close() abe2 := NewAuthBackend(lg, be2) users := abe2.GetAllRoles() @@ -219,7 +223,7 @@ func TestGetRole(t *testing.T) { abe.ForceCommit() be.Close() - be2 := backend.NewDefaultBackend(lg, tmpPath) + be2 := backend.NewDefaultBackend(lg, tmpPath, defaultTestBackend) defer be2.Close() abe2 := NewAuthBackend(lg, be2) users := abe2.GetRole("role1") diff --git a/server/storage/schema/auth_test.go b/server/storage/schema/auth_test.go index 96174e50ffc..c4fefc4dd78 100644 --- a/server/storage/schema/auth_test.go +++ b/server/storage/schema/auth_test.go @@ -66,7 +66,7 @@ func TestAuthEnabled(t *testing.T) { abe.ForceCommit() be.Close() - be2 := backend.NewDefaultBackend(lg, tmpPath) + be2 := backend.NewDefaultBackend(lg, tmpPath, defaultTestBackend) defer be2.Close() abe2 := NewAuthBackend(lg, be2) tx = abe2.BatchTx() @@ -117,7 +117,7 @@ func TestAuthRevision(t *testing.T) { abe.ForceCommit() be.Close() - be2 := backend.NewDefaultBackend(lg, tmpPath) + be2 := backend.NewDefaultBackend(lg, tmpPath, defaultTestBackend) defer be2.Close() abe2 := NewAuthBackend(lg, be2) tx := abe2.BatchTx() diff --git a/server/storage/schema/auth_users.go b/server/storage/schema/auth_users.go index 6ea9dd98a41..b46df46c9b8 100644 --- a/server/storage/schema/auth_users.go +++ b/server/storage/schema/auth_users.go @@ -20,6 +20,7 @@ import ( "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/etcd/api/v3/authpb" + "go.etcd.io/etcd/server/v3/bucket" ) func (abe *authBackend) GetUser(username string) *authpb.User { @@ -42,11 +43,11 @@ func (atx *authBatchTx) UnsafePutUser(user *authpb.User) { if err != nil { atx.lg.Panic("failed to unmarshal 'authpb.User'", zap.Error(err)) } - atx.tx.UnsafePut(AuthUsers, user.Name, b) + atx.tx.UnsafePut(bucket.AuthUsers, user.Name, b) } func (atx *authBatchTx) UnsafeDeleteUser(username string) { - atx.tx.UnsafeDelete(AuthUsers, []byte(username)) + atx.tx.UnsafeDelete(bucket.AuthUsers, []byte(username)) } func (atx *authReadTx) UnsafeGetUser(username string) *authpb.User { @@ -54,7 +55,7 @@ func (atx *authReadTx) UnsafeGetUser(username string) *authpb.User { } func unsafeGetUser(lg *zap.Logger, tx backend.UnsafeReader, username string) *authpb.User { - _, vs := tx.UnsafeRange(AuthUsers, []byte(username), nil, 0) + _, vs := tx.UnsafeRange(bucket.AuthUsers, []byte(username), nil, 0) if len(vs) == 0 { return nil } @@ -84,7 +85,7 @@ func (atx *authReadTx) UnsafeGetAllUsers() []*authpb.User { func unsafeGetAllUsers(lg *zap.Logger, tx backend.UnsafeReader) []*authpb.User { var vs [][]byte - err := tx.UnsafeForEach(AuthUsers, func(k []byte, v []byte) error { + err := tx.UnsafeForEach(bucket.AuthUsers, func(k []byte, v []byte) error { vs = append(vs, v) return nil }) diff --git a/server/storage/schema/auth_users_test.go b/server/storage/schema/auth_users_test.go index 2261e57071b..f2916467370 100644 --- a/server/storage/schema/auth_users_test.go +++ b/server/storage/schema/auth_users_test.go @@ -110,7 +110,7 @@ func TestGetAllUsers(t *testing.T) { abe.ForceCommit() be.Close() - be2 := backend.NewDefaultBackend(lg, tmpPath) + be2 := backend.NewDefaultBackend(lg, tmpPath, defaultTestBackend) defer be2.Close() abe2 := NewAuthBackend(lg, be2) users := abe2.ReadTx().UnsafeGetAllUsers() @@ -195,7 +195,7 @@ func TestGetUser(t *testing.T) { abe.ForceCommit() be.Close() - be2 := backend.NewDefaultBackend(lg, tmpPath) + be2 := backend.NewDefaultBackend(lg, tmpPath, defaultTestBackend) defer be2.Close() abe2 := NewAuthBackend(lg, be2) users := abe2.GetUser("alice") diff --git a/server/storage/schema/changes.go b/server/storage/schema/changes.go index 6eb0b751209..c66c45cd363 100644 --- a/server/storage/schema/changes.go +++ b/server/storage/schema/changes.go @@ -14,7 +14,9 @@ package schema -import "go.etcd.io/etcd/server/v3/storage/backend" +import ( + "go.etcd.io/etcd/server/v3/bucket" +) type schemaChange interface { upgradeAction() action @@ -22,7 +24,7 @@ type schemaChange interface { } // addNewField represents adding new field when upgrading. Downgrade will remove the field. -func addNewField(bucket backend.Bucket, fieldName []byte, fieldValue []byte) schemaChange { +func addNewField(bucket bucket.Bucket, fieldName []byte, fieldValue []byte) schemaChange { return simpleSchemaChange{ upgrade: setKeyAction{ Bucket: bucket, diff --git a/server/storage/schema/changes_test.go b/server/storage/schema/changes_test.go index 05b8d49cf44..f46be39c340 100644 --- a/server/storage/schema/changes_test.go +++ b/server/storage/schema/changes_test.go @@ -18,6 +18,7 @@ import ( "testing" "time" + "go.etcd.io/etcd/server/v3/bucket" betesting "go.etcd.io/etcd/server/v3/storage/backend/testing" ) @@ -30,7 +31,7 @@ func TestUpgradeDowngrade(t *testing.T) { }{ { name: "addNewField empty", - change: addNewField(Meta, []byte("/test"), []byte("1")), + change: addNewField(bucket.Meta, []byte("/test"), []byte("1")), expectStateAfterUpgrade: map[string]string{"/test": "1"}, }, } @@ -50,12 +51,12 @@ func TestUpgradeDowngrade(t *testing.T) { if err != nil { t.Errorf("Failed to upgrade, err: %v", err) } - assertBucketState(t, tx, Meta, tc.expectStateAfterUpgrade) + assertBucketState(t, tx, bucket.Meta, tc.expectStateAfterUpgrade) _, err = tc.change.downgradeAction().unsafeDo(tx) if err != nil { t.Errorf("Failed to downgrade, err: %v", err) } - assertBucketState(t, tx, Meta, tc.expectStateAfterDowngrade) + assertBucketState(t, tx, bucket.Meta, tc.expectStateAfterDowngrade) }) } } diff --git a/server/storage/schema/cindex.go b/server/storage/schema/cindex.go index cdf938d346c..a30fdb56a92 100644 --- a/server/storage/schema/cindex.go +++ b/server/storage/schema/cindex.go @@ -19,31 +19,32 @@ import ( "fmt" "go.etcd.io/etcd/client/pkg/v3/verify" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" ) // UnsafeCreateMetaBucket creates the `meta` bucket (if it does not exist yet). func UnsafeCreateMetaBucket(tx backend.UnsafeWriter) { - tx.UnsafeCreateBucket(Meta) + tx.UnsafeCreateBucket(bucket.Meta) } // CreateMetaBucket creates the `meta` bucket (if it does not exist yet). func CreateMetaBucket(tx backend.BatchTx) { tx.LockOutsideApply() defer tx.Unlock() - tx.UnsafeCreateBucket(Meta) + tx.UnsafeCreateBucket(bucket.Meta) } // UnsafeReadConsistentIndex loads consistent index & term from given transaction. // returns 0,0 if the data are not found. // Term is persisted since v3.5. func UnsafeReadConsistentIndex(tx backend.UnsafeReader) (uint64, uint64) { - _, vs := tx.UnsafeRange(Meta, MetaConsistentIndexKeyName, nil, 0) + _, vs := tx.UnsafeRange(bucket.Meta, bucket.MetaConsistentIndexKeyName, nil, 0) if len(vs) == 0 { return 0, 0 } v := binary.BigEndian.Uint64(vs[0]) - _, ts := tx.UnsafeRange(Meta, MetaTermKeyName, nil, 0) + _, ts := tx.UnsafeRange(bucket.Meta, bucket.MetaTermKeyName, nil, 0) if len(ts) == 0 { return v, 0 } @@ -86,10 +87,10 @@ func unsafeUpdateConsistentIndex(tx backend.UnsafeReadWriter, index uint64, term // put the index into the underlying backend // tx has been locked in TxnBegin, so there is no need to lock it again - tx.UnsafePut(Meta, MetaConsistentIndexKeyName, bs1) + tx.UnsafePut(bucket.Meta, bucket.MetaConsistentIndexKeyName, bs1) if term > 0 { bs2 := make([]byte, 8) binary.BigEndian.PutUint64(bs2, term) - tx.UnsafePut(Meta, MetaTermKeyName, bs2) + tx.UnsafePut(bucket.Meta, bucket.MetaTermKeyName, bs2) } } diff --git a/server/storage/schema/confstate.go b/server/storage/schema/confstate.go index ac3a2cc1485..36945958857 100644 --- a/server/storage/schema/confstate.go +++ b/server/storage/schema/confstate.go @@ -20,6 +20,7 @@ import ( "go.uber.org/zap" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/storage/backend" "go.etcd.io/raft/v3/raftpb" ) @@ -32,20 +33,20 @@ func MustUnsafeSaveConfStateToBackend(lg *zap.Logger, tx backend.UnsafeWriter, c lg.Panic("Cannot marshal raftpb.ConfState", zap.Stringer("conf-state", confState), zap.Error(err)) } - tx.UnsafePut(Meta, MetaConfStateName, confStateBytes) + tx.UnsafePut(bucket.Meta, bucket.MetaConfStateName, confStateBytes) } // UnsafeConfStateFromBackend retrieves ConfState from the backend. // Returns nil if confState in backend is not persisted (e.g. backend writen by Date: Sun, 23 Jul 2023 22:29:01 +0000 Subject: [PATCH 3/4] add directories to gofail Signed-off-by: Han Kang --- tests/robustness/makefile.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/robustness/makefile.mk b/tests/robustness/makefile.mk index fd122d23a92..483ca240835 100644 --- a/tests/robustness/makefile.mk +++ b/tests/robustness/makefile.mk @@ -36,7 +36,7 @@ GOFAIL_VERSION = $(shell cd tools/mod && go list -m -f {{.Version}} go.etcd.io/g .PHONY: gofail-enable gofail-enable: install-gofail - gofail enable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ + gofail enable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/databases/bbolt/ cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION} cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION} cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION} @@ -44,7 +44,7 @@ gofail-enable: install-gofail .PHONY: gofail-disable gofail-disable: install-gofail - gofail disable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ + gofail disable server/etcdserver/ server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/databases/bbolt/ cd ./server && go mod tidy cd ./etcdutl && go mod tidy cd ./etcdctl && go mod tidy From 8e6243dda5d708667c49a5ef02eb9be52e7d6dee Mon Sep 17 00:00:00 2001 From: Han Kang Date: Sun, 30 Jul 2023 17:31:00 -0700 Subject: [PATCH 4/4] first pass at sqlite implementation (rebased) use non-cgo implementation first, so that builds continue to function Change-Id: I05aa62ad56a771552e81d397e59570737767346f Signed-off-by: Han Kang --- bill-of-materials.json | 72 ++++ etcdutl/go.mod | 8 + etcdutl/go.sum | 19 + go.mod | 7 + go.sum | 16 + server/config/config.go | 3 + server/databases/bbolt/bbolt.go | 3 - server/databases/sqlite/sqlite.go | 536 +++++++++++++++++++++++++ server/databases/sqlite/sqlite_test.go | 175 ++++++++ server/embed/config.go | 20 +- server/embed/etcd.go | 1 + server/go.mod | 8 + server/go.sum | 19 + server/storage/backend/backend.go | 55 ++- tests/go.mod | 7 + tests/go.sum | 16 + 16 files changed, 954 insertions(+), 11 deletions(-) create mode 100644 server/databases/sqlite/sqlite.go create mode 100644 server/databases/sqlite/sqlite_test.go diff --git a/bill-of-materials.json b/bill-of-materials.json index b5cf5d624f0..c3dfefc105e 100644 --- a/bill-of-materials.json +++ b/bill-of-materials.json @@ -125,6 +125,15 @@ } ] }, + { + "project": "github.com/glebarez/go-sqlite", + "licenses": [ + { + "type": "BSD 3-clause \"New\" or \"Revised\" License", + "confidence": 0.9707112970711297 + } + ] + }, { "project": "github.com/go-logr/logr", "licenses": [ @@ -197,6 +206,15 @@ } ] }, + { + "project": "github.com/google/uuid", + "licenses": [ + { + "type": "BSD 3-clause \"New\" or \"Revised\" License", + "confidence": 0.9663865546218487 + } + ] + }, { "project": "github.com/gorilla/websocket", "licenses": [ @@ -350,6 +368,15 @@ } ] }, + { + "project": "github.com/remyoudompheng/bigfft", + "licenses": [ + { + "type": "BSD 3-clause \"New\" or \"Revised\" License", + "confidence": 0.9663865546218487 + } + ] + }, { "project": "github.com/rivo/uniseg", "licenses": [ @@ -759,6 +786,51 @@ } ] }, + { + "project": "modernc.org/libc", + "licenses": [ + { + "type": "BSD 3-clause \"New\" or \"Revised\" License", + "confidence": 0.9617021276595744 + } + ] + }, + { + "project": "modernc.org/libc/honnef.co/go/netdb", + "licenses": [ + { + "type": "MIT License", + "confidence": 0.9891304347826086 + } + ] + }, + { + "project": "modernc.org/mathutil", + "licenses": [ + { + "type": "BSD 3-clause \"New\" or \"Revised\" License", + "confidence": 0.9617021276595744 + } + ] + }, + { + "project": "modernc.org/memory", + "licenses": [ + { + "type": "BSD 3-clause \"New\" or \"Revised\" License", + "confidence": 0.9617021276595744 + } + ] + }, + { + "project": "modernc.org/sqlite/lib", + "licenses": [ + { + "type": "BSD 3-clause \"New\" or \"Revised\" License", + "confidence": 0.9707112970711297 + } + ] + }, { "project": "sigs.k8s.io/json", "licenses": [ diff --git a/etcdutl/go.mod b/etcdutl/go.mod index 326abbe8a08..484d8f63808 100644 --- a/etcdutl/go.mod +++ b/etcdutl/go.mod @@ -38,21 +38,25 @@ require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect + github.com/glebarez/go-sqlite v1.21.2 // indirect github.com/go-logr/logr v1.2.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang-jwt/jwt/v4 v4.5.0 // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/btree v1.1.2 // indirect + github.com/google/uuid v1.3.0 // indirect github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jonboulle/clockwork v0.4.0 // indirect + github.com/mattn/go-isatty v0.0.17 // indirect github.com/mattn/go-runewidth v0.0.15 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/prometheus/client_golang v1.16.0 // indirect github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/common v0.43.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect @@ -72,5 +76,9 @@ require ( google.golang.org/genproto v0.0.0-20221118155620-16455021b5e6 // indirect google.golang.org/grpc v1.52.0 // indirect google.golang.org/protobuf v1.31.0 // indirect + modernc.org/libc v1.22.5 // indirect + modernc.org/mathutil v1.5.0 // indirect + modernc.org/memory v1.5.0 // indirect + modernc.org/sqlite v1.23.1 // indirect sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect ) diff --git a/etcdutl/go.sum b/etcdutl/go.sum index bcee9c2c6e9..3f41f2474bc 100644 --- a/etcdutl/go.sum +++ b/etcdutl/go.sum @@ -33,6 +33,8 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo= +github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -57,7 +59,10 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= @@ -70,6 +75,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= @@ -89,6 +96,9 @@ github.com/prometheus/common v0.43.0 h1:iq+BVjvYLei5f27wiuNiB1DN6DYQkp1c8Bx0Vykh github.com/prometheus/common v0.43.0/go.mod h1:NCvr5cQIh3Y/gy73/RdVtC9r8xxrxwJnB+2lB3BxrFc= github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -170,6 +180,7 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -219,5 +230,13 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE= +modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= +modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= +modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM= +modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk= sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 h1:fD1pz4yfdADVNfFmcP2aBEtudwUQ1AlLnRBALr33v3s= sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6/go.mod h1:p4QtZmO4uMYipTQNzagwnNoseA6OxSUutVw05NhYDRs= diff --git a/go.mod b/go.mod index 5cd17324401..9f0d7c928ec 100644 --- a/go.mod +++ b/go.mod @@ -47,6 +47,7 @@ require ( github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/fatih/color v1.15.0 // indirect + github.com/glebarez/go-sqlite v1.21.2 // indirect github.com/go-logr/logr v1.2.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -55,6 +56,7 @@ require ( github.com/golang/protobuf v1.5.3 // indirect github.com/google/btree v1.1.2 // indirect github.com/google/go-cmp v0.5.9 // indirect + github.com/google/uuid v1.3.0 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect @@ -72,6 +74,7 @@ require ( github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/common v0.43.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/soheilhy/cmux v0.1.5 // indirect @@ -98,6 +101,10 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + modernc.org/libc v1.22.5 // indirect + modernc.org/mathutil v1.5.0 // indirect + modernc.org/memory v1.5.0 // indirect + modernc.org/sqlite v1.23.1 // indirect sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect sigs.k8s.io/yaml v1.3.0 // indirect ) diff --git a/go.sum b/go.sum index fd631d03527..eba9780ee83 100644 --- a/go.sum +++ b/go.sum @@ -91,6 +91,8 @@ github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo= +github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -164,8 +166,11 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= @@ -222,6 +227,9 @@ github.com/prometheus/common v0.43.0 h1:iq+BVjvYLei5f27wiuNiB1DN6DYQkp1c8Bx0Vykh github.com/prometheus/common v0.43.0/go.mod h1:NCvr5cQIh3Y/gy73/RdVtC9r8xxrxwJnB+2lB3BxrFc= github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -585,6 +593,14 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE= +modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= +modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= +modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM= +modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/server/config/config.go b/server/config/config.go index ee595127ec1..e285a17bb1d 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -204,6 +204,9 @@ type ServerConfig struct { // ExperimentalMaxLearners sets a limit to the number of learner members that can exist in the cluster membership. ExperimentalMaxLearners int `json:"experimental-max-learners"` + // ExperimentalBackendType sets the backend type. + ExperimentalBackendType string `json:"experimental-backend-type"` + // V2Deprecation defines a phase of v2store deprecation process. V2Deprecation V2DeprecationEnum `json:"v2-deprecation"` } diff --git a/server/databases/bbolt/bbolt.go b/server/databases/bbolt/bbolt.go index 050e0491c91..67726affe10 100644 --- a/server/databases/bbolt/bbolt.go +++ b/server/databases/bbolt/bbolt.go @@ -275,7 +275,6 @@ func (b *BBoltDB) defrag(odb, tmpdb *bolt.DB, limit int) error { return err } } - return tmptx.Commit() } @@ -481,5 +480,3 @@ func (b *BBoltBucket) ForEachBucket(fn interface{}) error { func (b *BBoltBucket) Stats() interface{} { return b.bucket.Stats() } - -//db, err := bolt.Open(bcfg.Path, 0600, bopts) diff --git a/server/databases/sqlite/sqlite.go b/server/databases/sqlite/sqlite.go new file mode 100644 index 00000000000..da66c491eb1 --- /dev/null +++ b/server/databases/sqlite/sqlite.go @@ -0,0 +1,536 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sqlite + +import ( + // sqlite DB driver + + "bufio" + "database/sql" + "fmt" + "hash/crc32" + "io" + "log" + "os" + "strings" + + _ "github.com/glebarez/go-sqlite" + "github.com/prometheus/client_golang/prometheus" + "go.uber.org/zap" + + bolt "go.etcd.io/bbolt" + "go.etcd.io/etcd/server/v3/interfaces" +) + +const ( + hasBucketQuery = `SELECT name FROM sqlite_master WHERE type='table' AND name=?;` + queryTableNames = `SELECT name FROM sqlite_schema WHERE type='table' ORDER BY name;` + dropBucketQuery = `DROP TABLE IF EXISTS ?;` + createBucketQuery = "CREATE TABLE IF NOT EXISTS %s (key STRING PRIMARY KEY, value BLOB);" + genericUnsafeRangeQuery = "select key, value from %s WHERE key >= ? AND key <= ? ORDER BY key limit ?;" + genericUnsafeRangeQueryNoEnd = "select key, value from %s WHERE key >= ? ORDER BY key limit ?;" + genericGet = "SELECT value from %s WHERE key=?;" + genericUpsert = "INSERT INTO %s (key, value) VALUES(?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value;" + genericDelete = "DELETE from %s where key = ?;" + genericForEach = "select key, value from %s;" + + sizeQuery = `SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size();` + defragCommand = `VACUUM;` + UpsertKV = `INSERT INTO KVs (key, value) + VALUES(?, ?) ON CONFLICT(key) DO UPDATE SET value=excluded.value;` + + dbName = "db" +) + +type SqliteDB struct { + DB *sql.DB + Dir string + dbName string + FreeListType string // no-opts +} + +type BackendBucket interface { + Name() []byte +} + +func NewBlankSqliteDB(dir string) (*SqliteDB, error) { + parts := strings.Split(dir, "/") + subdir := strings.Join(parts[:len(parts)-1], "/") + name := parts[len(parts)-1] + if err := os.MkdirAll(subdir, 0755); err != nil { + fmt.Printf("couldn't make directory: %s", dir) + return nil, err + } + db, err := sql.Open("sqlite", subdir+"/"+dbName) + + if err != nil { + return nil, err + } + db.SetConnMaxLifetime(0) + db.SetMaxIdleConns(50) + db.SetMaxOpenConns(50) + // ensure that DB is functional + if err = db.Ping(); err != nil { + return nil, err + } + if err != nil { + return nil, err + } + sdb := newDB(db, subdir, name) + return sdb, nil +} + +func NewSqliteDB[B BackendBucket](dir string, buckets ...B) (*SqliteDB, error) { + parts := strings.Split(dir, "/") + subdir := strings.Join(parts[:len(parts)-1], "/") + name := parts[len(parts)-1] + db, err := sql.Open("sqlite", dir) + + if err != nil { + return nil, err + } + db.SetConnMaxLifetime(0) + db.SetMaxIdleConns(50) + db.SetMaxOpenConns(50) + // ensure that DB is functional + if err = db.Ping(); err != nil { + return nil, err + } + for _, b := range buckets { + tn := resolveTableName(string(b.Name())) + createTableQuery := fmt.Sprintf("CREATE TABLE IF NOT EXISTS %s (key STRING PRIMARY KEY, value BLOB );", tn) + _, err = db.Exec(createTableQuery) + } + + if err != nil { + return nil, err + } + sdb := newDB(db, subdir, name) + return sdb, nil +} + +func newDB(db *sql.DB, dir string, dbName string) *SqliteDB { + return &SqliteDB{ + DB: db, + Dir: dir, + dbName: dbName, + FreeListType: string(bolt.FreelistMapType), // dummy value + } +} + +func (s *SqliteDB) Path() string { + return s.Dir +} + +func (s *SqliteDB) GoString() string { + return s.Dir + "/" + dbName +} + +func (s *SqliteDB) Buckets() []string { + rows, err := s.DB.Query(queryTableNames) + if err != nil { + return nil + } + defer rows.Close() + names := make([]string, 0) + + for rows.Next() { + var name string + if err := rows.Scan(&name); err != nil { + // Check for a scan error. + // Query rows will be closed with defer. + log.Fatal(err) + } + names = append(names, name) + } + return names +} + +func (s *SqliteDB) HasBucket(name string) bool { + tableName := resolveTableName(name) + rows, err := s.DB.Query(hasBucketQuery, tableName) + if err != nil { + return false + } + defer rows.Close() + names := make([]string, 0) + + for rows.Next() { + var n string + if err := rows.Scan(&n); err != nil { + // Check for a scan error. + // Query rows will be closed with defer. + log.Fatal(err) + } + names = append(names, n) + } + if len(names) < 1 { + return false + } else if len(names) > 1 { + panic("too many keys of the same bucket") + } else { + return true + } +} + +func (s *SqliteDB) DeleteBucket(name []byte) error { + tableName := resolveTableName(string(name)) + _, err := s.DB.Exec(dropBucketQuery, tableName) + if err != nil { + return err + } + return nil +} + +func (s *SqliteDB) CreateBucket(s2 string) { + tableName := resolveTableName(s2) + query := fmt.Sprintf(createBucketQuery, tableName) + if _, err := s.DB.Exec(query); err != nil { + panic(err) + } +} + +func (s *SqliteDB) GetFromBucket(bucket string, key string) []byte { + tableName := resolveTableName(bucket) + query := fmt.Sprintf(genericGet, tableName) + r, err := s.DB.Query(query, key) + if err != nil { + return nil + } + defer r.Close() + var val []byte + for r.Next() { + if err := r.Scan(&val); err != nil { + log.Fatal(err) + } + return val + } + return nil +} + +func resolveTableName(bucket string) string { + tableName := bucket + if tableName == "key" { + tableName = "KVs" + } + return tableName +} + +func (s *SqliteDB) String() string { + //TODO implement me + panic("implement me") +} + +func (s *SqliteDB) Close() error { + return s.DB.Close() +} + +func (s *SqliteDB) Begin(writable bool) (interfaces.Tx, error) { + tx, err := s.DB.Begin() + if err != nil { + return nil, err + } + return &SqliteTx{tx: tx, writable: writable, db: s.DB, dir: s.Dir, dbName: s.dbName}, nil +} + +func (s *SqliteDB) Size() (size int64) { + if rows, err := s.DB.Query(sizeQuery); err != nil { + return 0 + } else { + var val int64 + defer rows.Close() + rows.Next() + rows.Scan(&val) + return val + } +} + +func (s *SqliteDB) Sync() error { + // no-opt. + return nil +} + +func (s *SqliteDB) Stats() interface{} { + return s.DB.Stats() +} + +func (s *SqliteDB) Info() interface{} { + return s.DB.Stats() +} + +func (s *SqliteDB) SetFreelistType(freelistType string) { + s.FreeListType = freelistType +} + +func (s *SqliteDB) FreelistType() string { + return s.FreeListType +} + +func (s *SqliteDB) DBType() string { + return "sqlite" +} + +func (s *SqliteDB) HashBuckets(ignores func(bucketName []byte, keyName []byte) bool) (uint32, error) { + h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) + // todo(logicalhan) fixme + return h.Sum32(), nil +} + +func (s *SqliteDB) Defrag(logger *zap.Logger, dbopts interface{}, limit int) error { + _, err := s.DB.Exec(defragCommand) + return err +} + +type SqliteTx struct { + tx *sql.Tx + db *sql.DB + dir string + dbName string + writable bool + size int64 +} + +func (s *SqliteTx) DB() interfaces.DB { + return newDB(s.db, s.dir, s.dbName) +} + +func (s *SqliteTx) Size() int64 { + if s.size == 0 { + return s.DB().Size() + } + return s.size +} + +func (s *SqliteTx) Writable() bool { + return s.writable +} + +func (s *SqliteTx) Stats() interface{} { + panic("implement me") +} + +func (s *SqliteTx) Bucket(name []byte) interfaces.Bucket { + tableName := resolveTableName(string(name)) + return &SqliteBucket{ + name: tableName, + db: s.db, + dbName: s.dbName, + TX: s.tx, + dir: s.dir, + writable: s.writable, + } +} + +func (s *SqliteTx) CreateBucket(name []byte) (interfaces.Bucket, error) { + tableName := resolveTableName(string(name)) + query := fmt.Sprintf(createBucketQuery, tableName) + _, err := s.tx.Exec(query) + if err != nil { + return nil, err + } + return &SqliteBucket{ + name: tableName, + db: s.db, + TX: s.tx, + dbName: s.dbName, + dir: s.dir, + writable: s.writable, + }, nil +} + +func (s *SqliteTx) DeleteBucket(name []byte) error { + _, err := s.tx.Exec(dropBucketQuery, string(name)) + return err +} + +func (s *SqliteTx) ForEach(i interface{}) error { + //TODO implement me + panic("implement me") +} + +func (s *SqliteTx) Observe(rebalanceHist, spillHist, writeHist prometheus.Histogram) { + // no-opt +} + +func (s *SqliteTx) WriteTo(w io.Writer) (n int64, err error) { + tmpdir := os.TempDir() + os.MkdirAll(tmpdir, 0755) + backup := tmpdir + "etcd.sqlite" + os.Remove(backup) + if _, err := s.db.Exec(`VACUUM main INTO ?;`, backup); err != nil { + return 0, err + } + stat, err := os.Stat(backup) + if err != nil { + return 0, err + } + size := stat.Size() + s.size = size + f, err := os.Open(backup) + defer f.Close() + if err != nil { + return 0, err + } + + r := bufio.NewReader(f) + return r.WriteTo(w) +} + +func (s *SqliteTx) CopyDatabase(lg *zap.Logger, dst string) (err error) { + //TODO implement me + panic("implement me") +} + +func (s *SqliteTx) Commit() error { + return s.tx.Commit() +} + +func (s *SqliteTx) Rollback() error { + return s.tx.Rollback() +} + +type SqliteBucket struct { + TX *sql.Tx + name string + dbName string + db *sql.DB + dir string + writable bool +} + +func (s *SqliteBucket) Tx() interfaces.Tx { + return &SqliteTx{ + tx: s.TX, + db: s.db, + dbName: s.dbName, + dir: s.dir, + writable: s.writable, + } +} + +func (s *SqliteBucket) Writable() bool { + return s.writable +} + +func (s *SqliteBucket) Get(key []byte) []byte { + query := fmt.Sprintf(genericGet, s.name) + r, err := s.TX.Query(query, string(key)) + defer r.Close() + if err != nil { + return nil + } + val := []byte{} + + for r.Next() { + var v []byte + if err := r.Scan(&v); err != nil { + // Check for a scan error. + // Query rows will be closed with defer. + log.Fatal(err) + } + val = append(val, v...) + } + return val +} + +func (s *SqliteBucket) Put(key []byte, value []byte) error { + query := fmt.Sprintf(genericUpsert, s.name) + _, err := s.TX.Exec(query, string(key), value) + return err +} + +func (s *SqliteBucket) UnsafeRange(key, endKey []byte, limit int64) (keys [][]byte, vs [][]byte) { + if endKey == nil || limit == 0 || limit == 1 { + query := fmt.Sprintf(genericGet, s.name) + r, err := s.TX.Query(query, string(key)) + defer r.Close() + if err != nil { + return + } + for r.Next() { + var val []byte + r.Scan(&val) + keys = append(keys, key) + vs = append(vs, val) + return + } + } + var query string + var r *sql.Rows + var err error + if endKey == nil { + query = fmt.Sprintf(genericUnsafeRangeQueryNoEnd, s.name) + r, err = s.TX.Query(query, string(key), limit) + } else { + query := fmt.Sprintf(genericUnsafeRangeQuery, s.name) + r, err = s.TX.Query(query, string(key), string(endKey), limit) + } + + if err != nil { + return nil, nil + } + defer r.Close() + names := make([][]byte, 0) + values := make([][]byte, 0) + for r.Next() { + var key string + var v []byte + if err := r.Scan(&key, &v); err != nil { + // Check for a scan error. + // Query rows will be closed with defer. + log.Fatal(err) + } + names = append(names, []byte(key)) + values = append(values, v) + } + return names, values +} + +func (s *SqliteBucket) Delete(key []byte) error { + query := fmt.Sprintf(genericDelete, s.name) + _, err := s.TX.Exec(query, string(key)) + return err +} + +func (s *SqliteBucket) ForEach(f func(k []byte, v []byte) error) error { + query := fmt.Sprintf(genericForEach, s.name) + r, err := s.TX.Query(query) + defer r.Close() + if err != nil { + return err + } + for r.Next() { + var key string + var v []byte + if err := r.Scan(&key, &v); err != nil { + return err + } + if err := f([]byte(key), v); err != nil { + return err + } + + } + return err +} + +func (s *SqliteBucket) Stats() interface{} { + return nil +} + +func (s *SqliteBucket) SetFillPercent(f float64) { + return +} diff --git a/server/databases/sqlite/sqlite_test.go b/server/databases/sqlite/sqlite_test.go new file mode 100644 index 00000000000..14bd99e937b --- /dev/null +++ b/server/databases/sqlite/sqlite_test.go @@ -0,0 +1,175 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sqlite_test + +import ( + "bytes" + "strconv" + "testing" + + "go.etcd.io/etcd/server/v3/bucket" + "go.etcd.io/etcd/server/v3/databases/sqlite" +) + +func TestOpen(t *testing.T) { + _, err := sqlite.NewSqliteDB(t.TempDir()+"/db", bucket.Buckets...) + if err != nil { + t.Fatalf("expected no err, got %v", err) + } +} + +func TestBuckets(t *testing.T) { + db, err := sqlite.NewSqliteDB(t.TempDir()+"/db", bucket.Buckets...) + if err != nil { + t.Fatalf("expected no err, got %v", err) + } + // create a extra bucket + db.CreateBucket("foo") + tables := db.Buckets() + + if len(tables) != len(bucket.Buckets)+1 { + t.Errorf("got %v buckets, wanted %d", len(tables), len(bucket.Buckets)+1) + } +} + +func TestSize(t *testing.T) { + db, err := sqlite.NewSqliteDB(t.TempDir()+"/db", bucket.Buckets...) + if err != nil { + t.Fatalf("expected no err, got %v", err) + } + originalSize := db.Size() + for i := 0; i < 100; i++ { + if _, err := db.DB.Exec(sqlite.UpsertKV, "key-"+strconv.Itoa(i), make([]byte, 1000)); err != nil { + t.Fatalf("error inserting %s", strconv.Itoa(i)) + } + } + newSize := db.Size() + if originalSize == newSize { + t.Errorf("got %d size, but want original(%d)!=new(%d)", newSize, originalSize, newSize) + } +} + +func TestPutAndGetFromBucket(t *testing.T) { + db, err := sqlite.NewSqliteDB(t.TempDir()+"/db", bucket.Buckets...) + if err != nil { + t.Fatalf("expected no err, got %v", err) + } + + txn, err := db.Begin(true) + if err != nil { + t.Errorf("expected no err, got %v", err) + } + testBucket := []byte("test") + txn.CreateBucket(testBucket) + b := txn.Bucket(testBucket) + firstVal := []byte("firstval") + firstKey := []byte("firstkey") + b.Put(firstKey, firstVal) + txn.Commit() + txn2, err := db.Begin(true) + if err != nil { + t.Errorf("expected no err, got %v", err) + } + b2 := txn2.Bucket(testBucket) + v2 := b2.Get(firstKey) + + if err := b2.Tx().Commit(); err != nil { + t.Errorf("unexpected err %v", err) + } + if !bytes.Equal(firstVal, v2) { + t.Errorf("got %v, want %v", v2, firstVal) + } +} + +func TestUnsafeRange(t *testing.T) { + db, err := sqlite.NewSqliteDB(t.TempDir()+"/db", bucket.Buckets...) + if err != nil { + t.Fatalf("expected no err, got %v", err) + } + for i := 0; i < 1000; i++ { + if _, err := db.DB.Exec(sqlite.UpsertKV, "key-"+strconv.Itoa(i), make([]byte, 1000)); err != nil { + t.Fatalf("error inserting %s", strconv.Itoa(i)) + } + } + txn, err := db.Begin(true) + if err != nil { + t.Errorf("expected no err, got %v", err) + } + keys, _ := txn.Bucket([]byte("KVs")).UnsafeRange([]byte("key-1"), []byte("key-300"), 100) + if len(keys) != 100 { + t.Errorf("got %d keys, expected %d keys", len(keys), 100) + } +} + +func TestUnsafeRangeUncommitted(t *testing.T) { + db, err := sqlite.NewSqliteDB(t.TempDir()+"/db", bucket.Buckets...) + if err != nil { + t.Fatalf("expected no err, got %v", err) + } + txn, err := db.Begin(true) + if err != nil { + t.Errorf("expected no err, got %v", err) + } + bkt := txn.Bucket([]byte("KVs")) + for i := 0; i < 1000; i++ { + stringNum := strconv.Itoa(i) + if len(stringNum) == 1 { + stringNum = "00" + stringNum + } else if len(stringNum) == 2 { + stringNum = "0" + stringNum + } + if err := bkt.Put([]byte("key-"+stringNum), make([]byte, 1000)); err != nil { + t.Fatalf("error inserting %s", strconv.Itoa(i)) + } + } + if err != nil { + t.Errorf("expected no err, got %v", err) + } + keys, _ := bkt.UnsafeRange([]byte("key-0"), []byte("key-300"), 100) + if len(keys) != 100 { + t.Errorf("got %d keys, expected %d keys", len(keys), 100) + } + if string(keys[0]) != "key-000" { + t.Errorf("got %s, wanted %s", string(keys[0]), "key-000") + } + if string(keys[99]) != "key-099" { + t.Errorf("got %s, wanted %s", string(keys[0]), "key-099") + } +} + +func TestForEach(t *testing.T) { + db, err := sqlite.NewSqliteDB(t.TempDir()+"/db", bucket.Buckets...) + if err != nil { + t.Fatalf("expected no err, got %v", err) + } + + for i := 0; i < 1000; i++ { + if _, err := db.DB.Exec(sqlite.UpsertKV, "key-"+strconv.Itoa(i), "value-"+strconv.Itoa(i)); err != nil { + t.Fatalf("error inserting %s", strconv.Itoa(i)) + } + } + txn, err := db.Begin(true) + if err != nil { + t.Errorf("expected no err, got %v", err) + } + err = txn.Bucket([]byte("KVs")).ForEach(func(k []byte, v []byte) error { + return nil + }) + if err != nil { + t.Errorf("expected no err, got %v", err) + } +} diff --git a/server/embed/config.go b/server/embed/config.go index affdf6a4a7c..dce2f672587 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -109,6 +109,9 @@ const ( // DefaultBackendType defaults the backend to bolt DefaultBackendType = "bolt" + + // SqliteBackendType defaults the backend to sqlite + SqliteBackendType = "sqlite" ) var ( @@ -474,13 +477,10 @@ type securityConfig struct { } func NewSqliteConfig() *Config { - c := NewConfig() - c.ExperimentalBackendType = "sqlite" - return c + return NewGenericConfig(SqliteBackendType) } -// NewConfig creates a new Config populated with default values. -func NewConfig() *Config { +func NewGenericConfig(backendType string) *Config { lpurl, _ := url.Parse(DefaultListenPeerURLs) apurl, _ := url.Parse(DefaultInitialAdvertisePeerURLs) lcurl, _ := url.Parse(DefaultListenClientURLs) @@ -498,7 +498,7 @@ func NewConfig() *Config { MaxRequestBytes: DefaultMaxRequestBytes, MaxConcurrentStreams: DefaultMaxConcurrentStreams, ExperimentalWarningApplyDuration: DefaultWarningApplyDuration, - ExperimentalBackendType: DefaultBackendType, + ExperimentalBackendType: backendType, GRPCKeepAliveMinTime: DefaultGRPCKeepAliveMinTime, GRPCKeepAliveInterval: DefaultGRPCKeepAliveInterval, @@ -571,6 +571,11 @@ func NewConfig() *Config { return cfg } +// NewConfig creates a new Config populated with default values. +func NewConfig() *Config { + return NewGenericConfig(DefaultBackendType) +} + func ConfigFromFile(path string) (*Config, error) { cfg := &configYAML{Config: *NewConfig()} if err := cfg.configFromFile(path); err != nil { @@ -810,6 +815,9 @@ func (cfg *Config) Validate() error { return fmt.Errorf("distributed tracing configurition is not valid: (%v)", err) } } + if cfg.ExperimentalBackendType == "sqlite" { + cfg.logger.Warn("Detected sqlite as a backend.") + } if !cfg.ExperimentalEnableLeaseCheckpointPersist && cfg.ExperimentalEnableLeaseCheckpoint { cfg.logger.Warn("Detected that checkpointing is enabled without persistence. Consider enabling experimental-enable-lease-checkpoint-persist") diff --git a/server/embed/etcd.go b/server/embed/etcd.go index 7eeb6aa08e5..2ac782845d6 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -373,6 +373,7 @@ func print(lg *zap.Logger, ec Config, sc config.ServerConfig, memberInitialized zap.String("downgrade-check-interval", sc.DowngradeCheckTime.String()), zap.Int("max-learners", sc.ExperimentalMaxLearners), + zap.String("backend-type", sc.ExperimentalBackendType), ) } diff --git a/server/go.mod b/server/go.mod index cba5b151eb6..4ee4d8130cf 100644 --- a/server/go.mod +++ b/server/go.mod @@ -6,6 +6,7 @@ require ( github.com/coreos/go-semver v0.3.1 github.com/coreos/go-systemd/v22 v22.5.0 github.com/dustin/go-humanize v1.0.1 + github.com/glebarez/go-sqlite v1.21.2 github.com/gogo/protobuf v1.3.2 github.com/golang-jwt/jwt/v4 v4.5.0 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da @@ -53,13 +54,16 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/go-logr/logr v1.2.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/uuid v1.3.0 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/mattn/go-isatty v0.0.17 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/common v0.43.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/pflag v1.0.5 // indirect go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.11.2 // indirect @@ -72,6 +76,10 @@ require ( golang.org/x/text v0.11.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + modernc.org/libc v1.22.5 // indirect + modernc.org/mathutil v1.5.0 // indirect + modernc.org/memory v1.5.0 // indirect + modernc.org/sqlite v1.23.1 // indirect sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect ) diff --git a/server/go.sum b/server/go.sum index 0adddd81028..add056052af 100644 --- a/server/go.sum +++ b/server/go.sum @@ -83,6 +83,8 @@ github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go. github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo= +github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -155,8 +157,11 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= @@ -187,6 +192,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= +github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= @@ -203,6 +210,9 @@ github.com/prometheus/common v0.43.0 h1:iq+BVjvYLei5f27wiuNiB1DN6DYQkp1c8Bx0Vykh github.com/prometheus/common v0.43.0/go.mod h1:NCvr5cQIh3Y/gy73/RdVtC9r8xxrxwJnB+2lB3BxrFc= github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= @@ -387,6 +397,7 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -559,6 +570,14 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE= +modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= +modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= +modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM= +modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index 8aad12192de..e72e8ec05b8 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -20,6 +20,8 @@ import ( "sync/atomic" "time" + "go.etcd.io/etcd/server/v3/databases/sqlite" + "go.etcd.io/etcd/server/v3/bucket" "go.etcd.io/etcd/server/v3/databases/bbolt" "go.etcd.io/etcd/server/v3/interfaces" @@ -177,8 +179,9 @@ func New(bcfg BackendConfig) Backend { if bcfg.BackendType == "" || bcfg.BackendType == "bolt" { return newBoltBackend(bcfg) } + println("Using sqlite") // todo(logicalhan) replace with sqlite - return newBoltBackend(bcfg) + return newSqliteBackend(bcfg) } func NewDefaultBackend(lg *zap.Logger, path string, backendType string) Backend { @@ -191,8 +194,56 @@ func NewDefaultBackend(lg *zap.Logger, path string, backendType string) Backend return newBoltBackend(bcfg) } else { // todo(logicalhan) replace with sqlite - return newBoltBackend(bcfg) + return newSqliteBackend(bcfg) + } +} + +func newSqliteBackend(bcfg BackendConfig) *backend { + + db, err := sqlite.NewSqliteDB(bcfg.Path, bucket.Buckets...) + if err != nil { + bcfg.Logger.Panic("failed to open database", zap.String("path", bcfg.Path), zap.Error(err)) } + + // In future, may want to make buffering optional for low-concurrency systems + // or dynamically swap between buffered/non-buffered depending on workload. + b := &backend{ + db: db, + + batchInterval: bcfg.BatchInterval, + batchLimit: bcfg.BatchLimit, + mlock: bcfg.Mlock, + backendType: bcfg.BackendType, + + readTx: &readTx{ + baseReadTx: baseReadTx{ + buf: txReadBuffer{ + txBuffer: txBuffer{make(map[bucket.BucketID]*bucketBuffer)}, + bufVersion: 0, + }, + buckets: make(map[bucket.BucketID]interfaces.Bucket), + txWg: new(sync.WaitGroup), + txMu: new(sync.RWMutex), + }, + }, + txReadBufferCache: txReadBufferCache{ + mu: sync.Mutex{}, + bufVersion: 0, + buf: nil, + }, + + stopc: make(chan struct{}), + donec: make(chan struct{}), + + lg: bcfg.Logger, + } + + b.batchTx = newBatchTxBuffered(b) + // We set it after newBatchTxBuffered to skip the 'empty' commit. + b.hooks = bcfg.Hooks + + go b.run() + return b } func newBoltBackend(bcfg BackendConfig) *backend { diff --git a/tests/go.mod b/tests/go.mod index aed29b9b773..d10f7786b2a 100644 --- a/tests/go.mod +++ b/tests/go.mod @@ -59,6 +59,7 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/fatih/color v1.15.0 // indirect + github.com/glebarez/go-sqlite v1.21.2 // indirect github.com/go-logr/logr v1.2.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -66,6 +67,7 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/btree v1.1.2 // indirect + github.com/google/uuid v1.3.0 // indirect github.com/gorilla/websocket v1.4.2 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -78,6 +80,7 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/prometheus/client_model v0.4.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/spf13/cobra v1.7.0 // indirect @@ -99,6 +102,10 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + modernc.org/libc v1.22.5 // indirect + modernc.org/mathutil v1.5.0 // indirect + modernc.org/memory v1.5.0 // indirect + modernc.org/sqlite v1.23.1 // indirect sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect sigs.k8s.io/yaml v1.3.0 // indirect ) diff --git a/tests/go.sum b/tests/go.sum index ec7fe98fcc6..c65b32c6c44 100644 --- a/tests/go.sum +++ b/tests/go.sum @@ -95,6 +95,8 @@ github.com/fatih/color v1.15.0 h1:kOqh6YHBtK8aywxGerMG2Eq3H6Qgoqeo13Bk2Mv/nBs= github.com/fatih/color v1.15.0/go.mod h1:0h5ZqXfHYED7Bhv2ZJamyIOUej9KtShiJESRwBDUSsw= github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/glebarez/go-sqlite v1.21.2 h1:3a6LFC4sKahUunAmynQKLZceZCOzUthkRkEAl9gAXWo= +github.com/glebarez/go-sqlite v1.21.2/go.mod h1:sfxdZyhQjTM2Wry3gVYWaW072Ri1WMdWJi0k6+3382k= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -168,8 +170,11 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= +github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= @@ -226,6 +231,9 @@ github.com/prometheus/common v0.43.0 h1:iq+BVjvYLei5f27wiuNiB1DN6DYQkp1c8Bx0Vykh github.com/prometheus/common v0.43.0/go.mod h1:NCvr5cQIh3Y/gy73/RdVtC9r8xxrxwJnB+2lB3BxrFc= github.com/prometheus/procfs v0.10.1 h1:kYK1Va/YMlutzCGazswoHKo//tZVlFpKYh+PymziUAg= github.com/prometheus/procfs v0.10.1/go.mod h1:nwNm2aOCAYw8uTR/9bWRREkZFxAUcWzPHWJq+XBB/FM= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -590,6 +598,14 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +modernc.org/libc v1.22.5 h1:91BNch/e5B0uPbJFgqbxXuOnxBQjlS//icfQEGmvyjE= +modernc.org/libc v1.22.5/go.mod h1:jj+Z7dTNX8fBScMVNRAYZ/jF91K8fdT2hYMThc3YjBY= +modernc.org/mathutil v1.5.0 h1:rV0Ko/6SfM+8G+yKiyI830l3Wuz1zRutdslNoQ0kfiQ= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= +modernc.org/memory v1.5.0 h1:N+/8c5rE6EqugZwHii4IFsaJ7MUhoWX07J5tC/iI5Ds= +modernc.org/memory v1.5.0/go.mod h1:PkUhL0Mugw21sHPeskwZW4D6VscE/GQJOnIpCnW6pSU= +modernc.org/sqlite v1.23.1 h1:nrSBg4aRQQwq59JpvGEQ15tNxoO5pX/kUjcRNwSAGQM= +modernc.org/sqlite v1.23.1/go.mod h1:OrDj17Mggn6MhE+iPbBNf7RGKODDE9NFT0f3EwDzJqk= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=