Skip to content

Commit cbc8358

Browse files
Fixed OOM during seeding and added progress logging during seed stage
1 parent b8bd337 commit cbc8358

File tree

6 files changed

+112
-42
lines changed

6 files changed

+112
-42
lines changed

README.md

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -117,44 +117,45 @@ Flags:
117117

118118
Environment Variables (Overrides):
119119
[Connection]
120-
PLGM_URI Connection URI
121-
PLGM_USERNAME Database User
122-
PLGM_PASSWORD Database Password (Recommended: Use Prompt)
123-
PLGM_DIRECT_CONNECTION Force direct connection (true/false)
124-
PLGM_REPLICASET_NAME Replica Set name
125-
PLGM_READ_PREFERENCE nearest
120+
PLGM_URI Connection URI
121+
PLGM_USERNAME Database User
122+
PLGM_PASSWORD Database Password (Recommended: Use Prompt)
123+
PLGM_DIRECT_CONNECTION Force direct connection (true/false)
124+
PLGM_REPLICASET_NAME Replica Set name
125+
PLGM_READ_PREFERENCE nearest
126126

127127
[Workload Core]
128-
PLGM_DEFAULT_WORKLOAD Use built-in workload (true/false)
129-
PLGM_COLLECTIONS_PATH Path to collection JSON
130-
PLGM_QUERIES_PATH Path to query JSON
131-
PLGM_DURATION Test duration (e.g. 60s, 5m)
132-
PLGM_CONCURRENCY Number of active workers
133-
PLGM_DOCUMENTS_COUNT Initial seed document count
134-
PLGM_DROP_COLLECTIONS Drop collections on start (true/false)
135-
PLGM_SKIP_SEED Do not seed initial data on start (true/false)
136-
PLGM_DEBUG_MODE Enable verbose logic logs (true/false)
137-
PLGM_USE_TRANSACTIONS Enable transactional workloads (true/false)
138-
PLGM_MAX_TRANSACTION_OPS Maximum number of operations to group into a single transaction block
128+
PLGM_DEFAULT_WORKLOAD Use built-in workload (true/false)
129+
PLGM_COLLECTIONS_PATH Path to collection JSON
130+
PLGM_QUERIES_PATH Path to query JSON
131+
PLGM_DURATION Test duration (e.g. 60s, 5m)
132+
PLGM_CONCURRENCY Number of active workers
133+
PLGM_DOCUMENTS_COUNT Initial seed document count
134+
PLGM_DROP_COLLECTIONS Drop collections on start (true/false)
135+
PLGM_SKIP_SEED Do not seed initial data on start (true/false)
136+
PLGM_SEED_BATCH_SIZE Number of documents to insert per batch during SEED phase
137+
PLGM_DEBUG_MODE Enable verbose logic logs (true/false)
138+
PLGM_USE_TRANSACTIONS Enable transactional workloads (true/false)
139+
PLGM_MAX_TRANSACTION_OPS Maximum number of operations to group into a single transaction block
139140

140141
[Operation Ratios] (Must sum to ~100)
141-
PLGM_FIND_PERCENT % of ops that are FIND
142-
PLGM_UPDATE_PERCENT % of ops that are UPDATE
143-
PLGM_INSERT_PERCENT % of ops that are INSERT
144-
PLGM_DELETE_PERCENT % of ops that are DELETE
145-
PLGM_AGGREGATE_PERCENT % of ops that are AGGREGATE
146-
PLGM_TRANSACTION_PERCENT % of ops that are TRANSACTIONAL
147-
PLGM_BULK_INSERT_PERCENT % of ops that are BULK INSERTS
142+
PLGM_FIND_PERCENT % of ops that are FIND
143+
PLGM_UPDATE_PERCENT % of ops that are UPDATE
144+
PLGM_INSERT_PERCENT % of ops that are INSERT
145+
PLGM_DELETE_PERCENT % of ops that are DELETE
146+
PLGM_AGGREGATE_PERCENT % of ops that are AGGREGATE
147+
PLGM_TRANSACTION_PERCENT % of ops that are TRANSACTIONAL
148+
PLGM_BULK_INSERT_PERCENT % of ops that are BULK INSERTS
148149

149150
[Performance Optimization]
150-
PLGM_FIND_BATCH_SIZE Docs returned per cursor batch
151-
PLGM_INSERT_BATCH_SIZE Number of docs in batch bulk insert
152-
PLGM_FIND_LIMIT Max docs per Find query
153-
PLGM_INSERT_CACHE_SIZE Generator buffer size
154-
PLGM_OP_TIMEOUT_MS Soft timeout per DB op (ms)
155-
PLGM_RETRY_ATTEMPTS Retry attempts for failures
156-
PLGM_RETRY_BACKOFF_MS Wait time between retries (ms)
157-
PLGM_STATUS_REFRESH_RATE_SEC Status report interval (sec)
151+
PLGM_FIND_BATCH_SIZE Docs returned per cursor batch
152+
PLGM_INSERT_BATCH_SIZE Number of docs in batch bulk insert
153+
PLGM_FIND_LIMIT Max docs per Find query
154+
PLGM_INSERT_CACHE_SIZE Generator buffer size
155+
PLGM_OP_TIMEOUT_MS Soft timeout per DB op (ms)
156+
PLGM_RETRY_ATTEMPTS Retry attempts for failures
157+
PLGM_RETRY_BACKOFF_MS Wait time between retries (ms)
158+
PLGM_STATUS_REFRESH_RATE_SEC Status report interval (sec)
158159
GOMAXPROCS Go Runtime CPU limit
159160
```
160161

@@ -246,6 +247,7 @@ You can override any setting in `config.yaml` using environment variables. This
246247
| `documents_count` | `PLGM_DOCUMENTS_COUNT` | Number of documents to seed initially | `10000` |
247248
| `drop_collections` | `PLGM_DROP_COLLECTIONS` | Drop collections before starting (`true`/`false`) | `true` |
248249
| `skip_seed` | `PLGM_SKIP_SEED` | Do not seed initial data on start (`true`/`false`) | `true` |
250+
| `seed_batch_size` | `PLGM_SEED_BATCH_SIZE` | Number of documents to insert per batch during SEED phase | `1000` |
249251
| `debug_mode` | `PLGM_DEBUG_MODE` | Enable verbose debug logging (`true`/`false`) | `false` |
250252
| `use_transactions` | `PLGM_USE_TRANSACTIONS` | Enable Transactional Workloads (`true`/`false`) | `false` |
251253
| `max_transaction_ops` | `PLGM_MAX_TRANSACTION_OPS` | Maximum number of operations to group into a single transaction block | `5` |
@@ -482,6 +484,9 @@ These settings affect the efficiency of individual database operations and memor
482484
* *Default:* `10`
483485
* **`insert_batch_size`**: The number of documents to be inserted by bulk inserts.
484486
* *Default:* `10`
487+
* **`seed_batch_size`**: The number of documents grouped into a single InsertMany call during the initial data seeding phase.
488+
* *Tip:* Keeps memory usage stable when seeding millions of documents. A value of 1000 is recommended for performance.
489+
* *Default:* `1000`
485490
* **`find_limit`**: The hard limit on documents returned for `find` operations.
486491
* *Default:* `5`
487492
* **`insert_cache_size`**: The buffer size for the document generator channel.

cmd/plgm/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ func main() {
5757
fmt.Fprintf(os.Stderr, " %-35s %s\n", "PLGM_DOCUMENTS_COUNT", "Initial seed document count")
5858
fmt.Fprintf(os.Stderr, " %-35s %s\n", "PLGM_DROP_COLLECTIONS", "Drop collections on start (true/false)")
5959
fmt.Fprintf(os.Stderr, " %-35s %s\n", "PLGM_SKIP_SEED", "Do not seed initial data on start (true/false)")
60+
fmt.Fprintf(os.Stderr, " %-35s %s\n", "PLGM_SEED_BATCH_SIZE", "Number of documents to insert per batch during SEED phase")
6061
fmt.Fprintf(os.Stderr, " %-35s %s\n", "PLGM_DEBUG_MODE", "Enable verbose logic logs (true/false)")
6162
fmt.Fprintf(os.Stderr, " %-35s %s\n", "PLGM_USE_TRANSACTIONS", "Enable transactional workloads (true/false)")
6263
fmt.Fprintf(os.Stderr, " %-35s %s\n", "PLGM_MAX_TRANSACTION_OPS", "Maximum number of operations to group into a single transaction block")

config.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ drop_collections: false
6262
skip_seed: true
6363

6464
# Number of documents to insert during the initialization "Seed" phase.
65-
documents_count: 1000
65+
documents_count: 100000
6666

6767
# Number of concurrent "Active Workers" to spawn.
6868
# Higher = more load, provided the client CPU/Network can handle it.
@@ -105,7 +105,8 @@ use_findone_for_limit_one: true # Optimization: use FindOne() if limit is 1.
105105

106106
# --- Write Operations ---
107107
insert_cache_size: 1000 # Size of the internal buffer for generated documents.
108-
insert_batch_size: 10 # Number of documents per InsertMany call
108+
insert_batch_size: 10 # Number of documents per InsertMany call during WORKLOAD phase.
109+
seed_batch_size: 1000 # Number of documents per InsertMany call during SEED phase.
109110

110111
# --- Monitoring ---
111112
status_refresh_rate_sec: 1 # How often (seconds) to print the "AVG Ops/Sec" log line.

internal/config/config.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ type AppConfig struct {
3030
TransactionPercent int `yaml:"transaction_percent"`
3131
BulkInsertPercent int `yaml:"bulk_insert_percent"`
3232
InsertBatchSize int `yaml:"insert_batch_size"`
33+
SeedBatchSize int `yaml:"seed_batch_size"`
3334
UseTransactions bool `yaml:"use_transactions"`
3435
MaxTransactionOps int `yaml:"max_transaction_ops"`
3536
DebugMode bool `yaml:"debug_mode"`
@@ -91,6 +92,10 @@ func applyDefaults(cfg *AppConfig) {
9192
if cfg.InsertBatchSize <= 0 {
9293
cfg.InsertBatchSize = 10
9394
}
95+
// Default to 1000 for fast seeding
96+
if cfg.SeedBatchSize <= 0 {
97+
cfg.SeedBatchSize = 1000
98+
}
9499
if cfg.FindLimit <= 0 {
95100
cfg.FindLimit = 5
96101
}
@@ -284,6 +289,11 @@ func applyEnvOverrides(cfg *AppConfig) map[string]bool {
284289
cfg.InsertBatchSize = n
285290
}
286291
}
292+
if v := os.Getenv("PLGM_SEED_BATCH_SIZE"); v != "" {
293+
if n, err := strconv.Atoi(v); err == nil && n > 0 {
294+
cfg.SeedBatchSize = n
295+
}
296+
}
287297

288298
return overrides
289299
}

internal/logger/logger.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,3 +54,11 @@ func Header(msg string) {
5454
func Section(msg string) {
5555
fmt.Printf("\n %s %s\n", GreenString(">"), msg)
5656
}
57+
58+
// Debug prints a formatted debug message with the [DEBUG] tag in Purple.
59+
func Debug(format string, args ...interface{}) {
60+
msg := fmt.Sprintf(format, args...)
61+
// Indent multiline messages to align with the text
62+
msg = strings.ReplaceAll(msg, "\n", "\n ")
63+
fmt.Printf(" %s %s%s\n", Purple+"[DEBUG]"+Reset, msg, Reset)
64+
}

internal/mongo/setup.go

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,65 @@ import (
1616

1717
func InsertRandomDocuments(ctx context.Context, db *mongo.Database, col config.CollectionDefinition, count int, cfg *config.AppConfig) error {
1818
logger.Info("Seeding %d documents into '%s.%s'...", count, col.DatabaseName, col.Name)
19-
docs := make([]interface{}, count)
19+
20+
// 1. Configure Batch Size
21+
batchSize := cfg.SeedBatchSize
22+
if batchSize <= 0 {
23+
batchSize = 1000
24+
}
25+
26+
// 2. Configure Progress Reporting
27+
// Calculate 10% interval
28+
modu := int(float32(count) * 0.1)
29+
if modu < 1 {
30+
modu = 1
31+
}
32+
nextLogTarget := modu
33+
34+
logger.Debug("Inserting documents in batches of %d", batchSize)
35+
logger.Debug("Progress reporting every %d documents", modu)
36+
37+
targetDB := db.Client().Database(col.DatabaseName)
38+
collection := targetDB.Collection(col.Name)
39+
40+
// Pre-allocate batch slice
41+
batch := make([]interface{}, 0, batchSize)
42+
totalInserted := 0
2043

2144
for i := 0; i < count; i++ {
22-
docs[i] = workloads.GenerateDocument(col, cfg)
45+
// Generate document
46+
batch = append(batch, workloads.GenerateDocument(col, cfg))
47+
48+
// If batch is full, InsertMany
49+
if len(batch) >= batchSize {
50+
if _, err := collection.InsertMany(ctx, batch); err != nil {
51+
return fmt.Errorf("insert batch into %s.%s: %w", col.DatabaseName, col.Name, err)
52+
}
53+
54+
totalInserted += len(batch)
55+
batch = batch[:0] // Reset batch, keep capacity
56+
57+
// Check if we crossed the 10% threshold
58+
if totalInserted >= nextLogTarget {
59+
logger.Info("-- Inserted %d documents...", totalInserted)
60+
// Advance target to next 10% marker
61+
for totalInserted >= nextLogTarget {
62+
nextLogTarget += modu
63+
}
64+
}
65+
}
2366
}
2467

25-
if len(docs) > 0 {
26-
// Use database from definition
27-
targetDB := db.Client().Database(col.DatabaseName)
28-
_, err := targetDB.Collection(col.Name).InsertMany(ctx, docs)
29-
if err != nil {
30-
return fmt.Errorf("insert documents into %s.%s: %w", col.DatabaseName, col.Name, err)
68+
// Insert any remaining documents
69+
if len(batch) > 0 {
70+
if _, err := collection.InsertMany(ctx, batch); err != nil {
71+
return fmt.Errorf("insert remaining documents into %s.%s: %w", col.DatabaseName, col.Name, err)
3172
}
73+
totalInserted += len(batch)
74+
logger.Info("-- Inserted %d documents (Final)...", totalInserted)
3275
}
76+
77+
logger.Debug("Document generation and seeding complete")
3378
return nil
3479
}
3580

0 commit comments

Comments
 (0)