diff --git a/server/config/config.go b/server/config/config.go index 48de650b8a1..d8c9991cdad 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -65,6 +65,8 @@ type ServerConfig struct { BackendBatchInterval time.Duration // BackendBatchLimit is the maximum operations before commit the backend transaction. BackendBatchLimit int + // DefragBatchLimit is the number of keys iterated before committing a transaction during defragmentation. + DefragBatchLimit int // BackendFreelistType is the type of the backend boltdb freelist. BackendFreelistType bolt.FreelistType diff --git a/server/embed/config.go b/server/embed/config.go index 2f4a3416742..ccd675a923a 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -338,6 +338,8 @@ type Config struct { // TODO: Delete in v3.7 ExperimentalEnableLeaseCheckpointPersist bool `json:"experimental-enable-lease-checkpoint-persist"` ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"` + // ExperimentalDefragBatchLimit is the number of keys iterated before committing a transaction during defragmentation. + ExperimentalDefragBatchLimit int `json:"experimental-defrag-limit"` // ExperimentalCompactionSleepInterval is the sleep interval between every etcd compaction loop. ExperimentalCompactionSleepInterval time.Duration `json:"experimental-compaction-sleep-interval"` ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"` diff --git a/server/embed/etcd.go b/server/embed/etcd.go index 3b0e4db7c01..cbcdf2c468b 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -213,6 +213,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint, LeaseCheckpointPersist: cfg.ExperimentalEnableLeaseCheckpointPersist, CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit, + DefragBatchLimit: cfg.ExperimentalDefragBatchLimit, CompactionSleepInterval: cfg.ExperimentalCompactionSleepInterval, WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval, DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime, diff --git a/server/etcdmain/config.go b/server/etcdmain/config.go index 954a08727d1..0d5ef875e6f 100644 --- a/server/etcdmain/config.go +++ b/server/etcdmain/config.go @@ -283,6 +283,7 @@ func newConfig() *config { fs.IntVar(&cfg.ec.ExperimentalMaxLearners, "experimental-max-learners", membership.DefaultMaxLearners, "Sets the maximum number of learners that can be available in the cluster membership.") fs.DurationVar(&cfg.ec.ExperimentalWaitClusterReadyTimeout, "experimental-wait-cluster-ready-timeout", cfg.ec.ExperimentalWaitClusterReadyTimeout, "Maximum duration to wait for the cluster to be ready.") fs.Uint64Var(&cfg.ec.SnapshotCatchUpEntries, "experimental-snapshot-catchup-entries", cfg.ec.SnapshotCatchUpEntries, "Number of entries for a slow follower to catch up after compacting the the raft storage entries.") + fs.IntVar(&cfg.ec.ExperimentalDefragBatchLimit, "experimental-defrag-limit", cfg.ec.ExperimentalDefragBatchLimit, "Number of keys iterated before committing a transaction during defragmentation.") // unsafe fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.") diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index 67b67add816..242e28aebd0 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -257,6 +257,8 @@ Experimental feature: ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. --experimental-compaction-batch-limit 1000 ExperimentalCompactionBatchLimit sets the maximum revisions deleted in each compaction batch. + --experimental-defrag-limit 10000 + ExperimentalDefragBatchLimit sets the number of keys iterated before committing a transaction during defragmentation. --experimental-peer-skip-client-san-verification 'false' Skip verification of SAN field in client certificate for peer connections. --experimental-watch-progress-notify-interval '10m' diff --git a/server/storage/backend.go b/server/storage/backend.go index a93fd8a3f11..fcf0de57932 100644 --- a/server/storage/backend.go +++ b/server/storage/backend.go @@ -44,6 +44,12 @@ func newBackend(cfg config.ServerConfig, hooks backend.Hooks) backend.Backend { cfg.Logger.Info("setting backend batch interval", zap.Duration("batch interval", cfg.BackendBatchInterval)) } } + if cfg.DefragBatchLimit != 0 { + bcfg.DefragBatchLimit = cfg.DefragBatchLimit + if cfg.Logger != nil { + cfg.Logger.Info("setting backend defrag limit", zap.Int("defrag limit", cfg.DefragBatchLimit)) + } + } bcfg.BackendFreelistType = cfg.BackendFreelistType bcfg.Logger = cfg.Logger if cfg.QuotaBackendBytes > 0 && cfg.QuotaBackendBytes != DefaultQuotaBytes { diff --git a/server/storage/backend/backend.go b/server/storage/backend/backend.go index e7b951ee7e6..659b848b6ed 100644 --- a/server/storage/backend/backend.go +++ b/server/storage/backend/backend.go @@ -34,7 +34,7 @@ var ( defaultBatchLimit = 10000 defaultBatchInterval = 100 * time.Millisecond - defragLimit = 10000 + defaultDefragBatchLimit = 10000 // initialMmapSize is the initial size of the mmapped region. Setting this larger than // the potential max db size can prevent writer from blocking reader. @@ -111,6 +111,8 @@ type backend struct { batchLimit int batchTx *batchTxBuffered + defragLimit int + readTx *readTx // txReadBufferCache mirrors "txReadBuffer" within "readTx" -- readTx.baseReadTx.buf. // When creating "concurrentReadTx": @@ -136,6 +138,8 @@ type BackendConfig struct { BatchInterval time.Duration // BatchLimit is the maximum puts before flushing the BatchTx. BatchLimit int + // DefragBatchLimit is the number of keys iterated before committing a transaction during defragmentation. + DefragBatchLimit int // BackendFreelistType is the backend boltdb's freelist type. BackendFreelistType bolt.FreelistType // MmapSize is the number of bytes to mmap for the backend. @@ -155,6 +159,7 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig { return BackendConfig{ BatchInterval: defaultBatchInterval, BatchLimit: defaultBatchLimit, + DefragBatchLimit: defaultDefragBatchLimit, MmapSize: initialMmapSize, Logger: lg, } @@ -194,6 +199,7 @@ func newBackend(bcfg BackendConfig) *backend { batchInterval: bcfg.BatchInterval, batchLimit: bcfg.BatchLimit, + defragLimit: bcfg.DefragBatchLimit, mlock: bcfg.Mlock, readTx: &readTx{ @@ -503,7 +509,7 @@ func (b *backend) defrag() error { ) } // gofail: var defragBeforeCopy struct{} - err = defragdb(b.db, tmpdb, defragLimit) + err = defragdb(b.db, tmpdb, b.defragLimit) if err != nil { tmpdb.Close() if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil {