Skip to content

Commit

Permalink
backend: add experimental defrag txn limit flag
Browse files Browse the repository at this point in the history
Signed-off-by: Steven Johnson <[email protected]>
Signed-off-by: sjdot <[email protected]>
  • Loading branch information
sjdot committed Mar 18, 2023
1 parent 736c893 commit 461ebc3
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 2 deletions.
2 changes: 2 additions & 0 deletions server/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ type ServerConfig struct {
BackendBatchInterval time.Duration
// BackendBatchLimit is the maximum operations before commit the backend transaction.
BackendBatchLimit int
// DefragLimit is the number of keys iterated before committing a transaction during defragmentation.
DefragLimit int

// BackendFreelistType is the type of the backend boltdb freelist.
BackendFreelistType bolt.FreelistType
Expand Down
2 changes: 2 additions & 0 deletions server/embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ type Config struct {
// TODO: Delete in v3.7
ExperimentalEnableLeaseCheckpointPersist bool `json:"experimental-enable-lease-checkpoint-persist"`
ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"`
// ExperimentalDefragLimit is the number of keys iterated before committing a transaction during defragmentation.
ExperimentalDefragLimit int `json:"experimental-defrag-limit"`
// ExperimentalCompactionSleepInterval is the sleep interval between every etcd compaction loop.
ExperimentalCompactionSleepInterval time.Duration `json:"experimental-compaction-sleep-interval"`
ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"`
Expand Down
1 change: 1 addition & 0 deletions server/embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint,
LeaseCheckpointPersist: cfg.ExperimentalEnableLeaseCheckpointPersist,
CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit,
DefragLimit: cfg.ExperimentalDefragLimit,
CompactionSleepInterval: cfg.ExperimentalCompactionSleepInterval,
WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval,
DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime,
Expand Down
1 change: 1 addition & 0 deletions server/etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ func newConfig() *config {
fs.IntVar(&cfg.ec.ExperimentalMaxLearners, "experimental-max-learners", membership.DefaultMaxLearners, "Sets the maximum number of learners that can be available in the cluster membership.")
fs.DurationVar(&cfg.ec.ExperimentalWaitClusterReadyTimeout, "experimental-wait-cluster-ready-timeout", cfg.ec.ExperimentalWaitClusterReadyTimeout, "Maximum duration to wait for the cluster to be ready.")
fs.Uint64Var(&cfg.ec.SnapshotCatchUpEntries, "experimental-snapshot-catchup-entries", cfg.ec.SnapshotCatchUpEntries, "Number of entries for a slow follower to catch up after compacting the the raft storage entries.")
fs.IntVar(&cfg.ec.ExperimentalDefragLimit, "experimental-defrag-limit", cfg.ec.ExperimentalDefragLimit, "Number of keys iterated before committing a transaction during defragmentation.")

// unsafe
fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.")
Expand Down
2 changes: 2 additions & 0 deletions server/etcdmain/help.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ Experimental feature:
ExperimentalEnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases.
--experimental-compaction-batch-limit 1000
ExperimentalCompactionBatchLimit sets the maximum revisions deleted in each compaction batch.
--experimental-defrag-limit 10000
ExperimentalDefragLimit sets the maximum revisions deleted in each compaction batch.
--experimental-peer-skip-client-san-verification 'false'
Skip verification of SAN field in client certificate for peer connections.
--experimental-watch-progress-notify-interval '10m'
Expand Down
6 changes: 6 additions & 0 deletions server/storage/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ func newBackend(cfg config.ServerConfig, hooks backend.Hooks) backend.Backend {
cfg.Logger.Info("setting backend batch interval", zap.Duration("batch interval", cfg.BackendBatchInterval))
}
}
if cfg.DefragLimit != 0 {
bcfg.DefragLimit = cfg.DefragLimit
if cfg.Logger != nil {
cfg.Logger.Info("setting backend defrag limit", zap.Int("defrag limit", cfg.DefragLimit))
}
}
bcfg.BackendFreelistType = cfg.BackendFreelistType
bcfg.Logger = cfg.Logger
if cfg.QuotaBackendBytes > 0 && cfg.QuotaBackendBytes != DefaultQuotaBytes {
Expand Down
10 changes: 8 additions & 2 deletions server/storage/backend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ var (
defaultBatchLimit = 10000
defaultBatchInterval = 100 * time.Millisecond

defragLimit = 10000
defaultDefragLimit = 10000

// initialMmapSize is the initial size of the mmapped region. Setting this larger than
// the potential max db size can prevent writer from blocking reader.
Expand Down Expand Up @@ -111,6 +111,8 @@ type backend struct {
batchLimit int
batchTx *batchTxBuffered

defragLimit int

readTx *readTx
// txReadBufferCache mirrors "txReadBuffer" within "readTx" -- readTx.baseReadTx.buf.
// When creating "concurrentReadTx":
Expand All @@ -136,6 +138,8 @@ type BackendConfig struct {
BatchInterval time.Duration
// BatchLimit is the maximum puts before flushing the BatchTx.
BatchLimit int
// DefragLimit is the number of keys iterated before committing a transaction during defragmentation.
DefragLimit int
// BackendFreelistType is the backend boltdb's freelist type.
BackendFreelistType bolt.FreelistType
// MmapSize is the number of bytes to mmap for the backend.
Expand All @@ -155,6 +159,7 @@ func DefaultBackendConfig(lg *zap.Logger) BackendConfig {
return BackendConfig{
BatchInterval: defaultBatchInterval,
BatchLimit: defaultBatchLimit,
DefragLimit: defaultDefragLimit,
MmapSize: initialMmapSize,
Logger: lg,
}
Expand Down Expand Up @@ -194,6 +199,7 @@ func newBackend(bcfg BackendConfig) *backend {

batchInterval: bcfg.BatchInterval,
batchLimit: bcfg.BatchLimit,
defragLimit: bcfg.DefragLimit,
mlock: bcfg.Mlock,

readTx: &readTx{
Expand Down Expand Up @@ -503,7 +509,7 @@ func (b *backend) defrag() error {
)
}
// gofail: var defragBeforeCopy struct{}
err = defragdb(b.db, tmpdb, defragLimit)
err = defragdb(b.db, tmpdb, b.defragLimit)
if err != nil {
tmpdb.Close()
if rmErr := os.RemoveAll(tmpdb.Path()); rmErr != nil {
Expand Down

0 comments on commit 461ebc3

Please sign in to comment.