Skip to content

Commit 901ca32

Browse files
committed
add memberlist settings to the loki ingesters config
Signed-off-by: obetsun <[email protected]> rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
1 parent 61a933c commit 901ca32

File tree

9 files changed

+234
-0
lines changed

9 files changed

+234
-0
lines changed

components/vector-kubearchive-log-collector/production/kflux-ocp-p01/loki-helm-prod-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,32 @@ gateway:
1616
loki:
1717
commonConfig:
1818
replication_factor: 3
19+
memberlist:
20+
join_members: []
21+
# How long to wait before reclaiming a dead node's tokens
22+
# Reduced to 2 minutes for development (faster cleanup with single replica)
23+
# This helps remove stale ring instances quickly when pods are restarted
24+
dead_node_reclaim_time: 2m
25+
# How often to gossip with other nodes (lower = faster detection of failures)
26+
# Keep at 2s for quick failure detection
27+
gossip_interval: 2s
28+
# How often to do full state sync with other nodes
29+
# Reduced for development to sync faster
30+
push_pull_interval: 5s
31+
# Number of random nodes to gossip with per interval
32+
# Set to 1 for development (only 1 ingester replica)
33+
gossip_nodes: 1
34+
# How long to continue gossiping to dead nodes (helps propagate death info)
35+
# Reduced for development to propagate death info faster
36+
gossip_to_dead_nodes_time: 10s
37+
# How long to wait for an ingester to gracefully leave before considering it dead
38+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
39+
# Reduced to 60s for development (faster cleanup)
40+
left_ingesters_timeout: 60s
41+
max_join_backoff: 1m
42+
max_join_retries: 10
43+
min_join_backoff: 1s
44+
rejoin_interval: 90s
1945
# Required storage configuration for Helm chart
2046
storage:
2147
type: s3

components/vector-kubearchive-log-collector/production/kflux-osp-p01/loki-helm-prod-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ gateway:
1717
loki:
1818
commonConfig:
1919
replication_factor: 3
20+
memberlist:
21+
join_members: []
22+
# How long to wait before reclaiming a dead node's tokens
23+
# Reduced to 2 minutes for development (faster cleanup with single replica)
24+
# This helps remove stale ring instances quickly when pods are restarted
25+
dead_node_reclaim_time: 2m
26+
# How often to gossip with other nodes (lower = faster detection of failures)
27+
# Keep at 2s for quick failure detection
28+
gossip_interval: 2s
29+
# How often to do full state sync with other nodes
30+
# Reduced for development to sync faster
31+
push_pull_interval: 5s
32+
# Number of random nodes to gossip with per interval
33+
# Set to 1 for development (only 1 ingester replica)
34+
gossip_nodes: 1
35+
# How long to continue gossiping to dead nodes (helps propagate death info)
36+
# Reduced for development to propagate death info faster
37+
gossip_to_dead_nodes_time: 10s
38+
# How long to wait for an ingester to gracefully leave before considering it dead
39+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
40+
# Reduced to 60s for development (faster cleanup)
41+
left_ingesters_timeout: 60s
42+
max_join_backoff: 1m
43+
max_join_retries: 10
44+
min_join_backoff: 1s
45+
rejoin_interval: 90s
2046
# Required storage configuration for Helm chart
2147
storage:
2248
type: s3

components/vector-kubearchive-log-collector/production/kflux-prd-rh02/loki-helm-prod-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ gateway:
1717
loki:
1818
commonConfig:
1919
replication_factor: 3
20+
memberlist:
21+
join_members: []
22+
# How long to wait before reclaiming a dead node's tokens
23+
# Reduced to 2 minutes for development (faster cleanup with single replica)
24+
# This helps remove stale ring instances quickly when pods are restarted
25+
dead_node_reclaim_time: 2m
26+
# How often to gossip with other nodes (lower = faster detection of failures)
27+
# Keep at 2s for quick failure detection
28+
gossip_interval: 2s
29+
# How often to do full state sync with other nodes
30+
# Reduced for development to sync faster
31+
push_pull_interval: 5s
32+
# Number of random nodes to gossip with per interval
33+
# Set to 1 for development (only 1 ingester replica)
34+
gossip_nodes: 1
35+
# How long to continue gossiping to dead nodes (helps propagate death info)
36+
# Reduced for development to propagate death info faster
37+
gossip_to_dead_nodes_time: 10s
38+
# How long to wait for an ingester to gracefully leave before considering it dead
39+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
40+
# Reduced to 60s for development (faster cleanup)
41+
left_ingesters_timeout: 60s
42+
max_join_backoff: 1m
43+
max_join_retries: 10
44+
min_join_backoff: 1s
45+
rejoin_interval: 90s
2046
# Required storage configuration for Helm chart
2147
storage:
2248
type: s3

components/vector-kubearchive-log-collector/production/kflux-prd-rh03/loki-helm-prod-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ gateway:
1717
loki:
1818
commonConfig:
1919
replication_factor: 3
20+
memberlist:
21+
join_members: []
22+
# How long to wait before reclaiming a dead node's tokens
23+
# Reduced to 2 minutes for development (faster cleanup with single replica)
24+
# This helps remove stale ring instances quickly when pods are restarted
25+
dead_node_reclaim_time: 2m
26+
# How often to gossip with other nodes (lower = faster detection of failures)
27+
# Keep at 2s for quick failure detection
28+
gossip_interval: 2s
29+
# How often to do full state sync with other nodes
30+
# Reduced for development to sync faster
31+
push_pull_interval: 5s
32+
# Number of random nodes to gossip with per interval
33+
# Set to 1 for development (only 1 ingester replica)
34+
gossip_nodes: 1
35+
# How long to continue gossiping to dead nodes (helps propagate death info)
36+
# Reduced for development to propagate death info faster
37+
gossip_to_dead_nodes_time: 10s
38+
# How long to wait for an ingester to gracefully leave before considering it dead
39+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
40+
# Reduced to 60s for development (faster cleanup)
41+
left_ingesters_timeout: 60s
42+
max_join_backoff: 1m
43+
max_join_retries: 10
44+
min_join_backoff: 1s
45+
rejoin_interval: 90s
2046
# Required storage configuration for Helm chart
2147
storage:
2248
type: s3

components/vector-kubearchive-log-collector/production/pentest-p01/loki-helm-prod-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,32 @@ gateway:
4747
loki:
4848
commonConfig:
4949
replication_factor: 3
50+
memberlist:
51+
join_members: []
52+
# How long to wait before reclaiming a dead node's tokens
53+
# Reduced to 2 minutes for development (faster cleanup with single replica)
54+
# This helps remove stale ring instances quickly when pods are restarted
55+
dead_node_reclaim_time: 2m
56+
# How often to gossip with other nodes (lower = faster detection of failures)
57+
# Keep at 2s for quick failure detection
58+
gossip_interval: 2s
59+
# How often to do full state sync with other nodes
60+
# Reduced for development to sync faster
61+
push_pull_interval: 5s
62+
# Number of random nodes to gossip with per interval
63+
# Set to 1 for development (only 1 ingester replica)
64+
gossip_nodes: 1
65+
# How long to continue gossiping to dead nodes (helps propagate death info)
66+
# Reduced for development to propagate death info faster
67+
gossip_to_dead_nodes_time: 10s
68+
# How long to wait for an ingester to gracefully leave before considering it dead
69+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
70+
# Reduced to 60s for development (faster cleanup)
71+
left_ingesters_timeout: 60s
72+
max_join_backoff: 1m
73+
max_join_retries: 10
74+
min_join_backoff: 1s
75+
rejoin_interval: 90s
5076
# Required storage configuration for Helm chart
5177
storage:
5278
type: s3

components/vector-kubearchive-log-collector/production/stone-prod-p01/loki-helm-prod-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ gateway:
1717
loki:
1818
commonConfig:
1919
replication_factor: 3
20+
memberlist:
21+
join_members: []
22+
# How long to wait before reclaiming a dead node's tokens
23+
# Reduced to 2 minutes for development (faster cleanup with single replica)
24+
# This helps remove stale ring instances quickly when pods are restarted
25+
dead_node_reclaim_time: 2m
26+
# How often to gossip with other nodes (lower = faster detection of failures)
27+
# Keep at 2s for quick failure detection
28+
gossip_interval: 2s
29+
# How often to do full state sync with other nodes
30+
# Reduced for development to sync faster
31+
push_pull_interval: 5s
32+
# Number of random nodes to gossip with per interval
33+
# Set to 1 for development (only 1 ingester replica)
34+
gossip_nodes: 1
35+
# How long to continue gossiping to dead nodes (helps propagate death info)
36+
# Reduced for development to propagate death info faster
37+
gossip_to_dead_nodes_time: 10s
38+
# How long to wait for an ingester to gracefully leave before considering it dead
39+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
40+
# Reduced to 60s for development (faster cleanup)
41+
left_ingesters_timeout: 60s
42+
max_join_backoff: 1m
43+
max_join_retries: 10
44+
min_join_backoff: 1s
45+
rejoin_interval: 90s
2046
# Required storage configuration for Helm chart
2147
storage:
2248
type: s3

components/vector-kubearchive-log-collector/production/stone-prod-p02/loki-helm-prod-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ gateway:
1717
loki:
1818
commonConfig:
1919
replication_factor: 3
20+
memberlist:
21+
join_members: []
22+
# How long to wait before reclaiming a dead node's tokens
23+
# Reduced to 2 minutes for development (faster cleanup with single replica)
24+
# This helps remove stale ring instances quickly when pods are restarted
25+
dead_node_reclaim_time: 2m
26+
# How often to gossip with other nodes (lower = faster detection of failures)
27+
# Keep at 2s for quick failure detection
28+
gossip_interval: 2s
29+
# How often to do full state sync with other nodes
30+
# Reduced for development to sync faster
31+
push_pull_interval: 5s
32+
# Number of random nodes to gossip with per interval
33+
# Set to 1 for development (only 1 ingester replica)
34+
gossip_nodes: 1
35+
# How long to continue gossiping to dead nodes (helps propagate death info)
36+
# Reduced for development to propagate death info faster
37+
gossip_to_dead_nodes_time: 10s
38+
# How long to wait for an ingester to gracefully leave before considering it dead
39+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
40+
# Reduced to 60s for development (faster cleanup)
41+
left_ingesters_timeout: 60s
42+
max_join_backoff: 1m
43+
max_join_retries: 10
44+
min_join_backoff: 1s
45+
rejoin_interval: 90s
2046
# Required storage configuration for Helm chart
2147
storage:
2248
type: s3

components/vector-kubearchive-log-collector/staging/stone-stage-p01/loki-helm-stg-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,32 @@ gateway:
1616
loki:
1717
commonConfig:
1818
replication_factor: 3
19+
memberlist:
20+
join_members: []
21+
# How long to wait before reclaiming a dead node's tokens
22+
# Reduced to 2 minutes for development (faster cleanup with single replica)
23+
# This helps remove stale ring instances quickly when pods are restarted
24+
dead_node_reclaim_time: 2m
25+
# How often to gossip with other nodes (lower = faster detection of failures)
26+
# Keep at 2s for quick failure detection
27+
gossip_interval: 2s
28+
# How often to do full state sync with other nodes
29+
# Reduced for development to sync faster
30+
push_pull_interval: 5s
31+
# Number of random nodes to gossip with per interval
32+
# Set to 1 for development (only 1 ingester replica)
33+
gossip_nodes: 1
34+
# How long to continue gossiping to dead nodes (helps propagate death info)
35+
# Reduced for development to propagate death info faster
36+
gossip_to_dead_nodes_time: 10s
37+
# How long to wait for an ingester to gracefully leave before considering it dead
38+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
39+
# Reduced to 60s for development (faster cleanup)
40+
left_ingesters_timeout: 60s
41+
max_join_backoff: 1m
42+
max_join_retries: 10
43+
min_join_backoff: 1s
44+
rejoin_interval: 90s
1945
storage:
2046
type: s3
2147
# bucketNames: Fill it on the generator for each cluster

components/vector-kubearchive-log-collector/staging/stone-stg-rh01/loki-helm-stg-values.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,32 @@ gateway:
4848
loki:
4949
commonConfig:
5050
replication_factor: 3
51+
memberlist:
52+
join_members: []
53+
# How long to wait before reclaiming a dead node's tokens
54+
# Reduced to 2 minutes for development (faster cleanup with single replica)
55+
# This helps remove stale ring instances quickly when pods are restarted
56+
dead_node_reclaim_time: 2m
57+
# How often to gossip with other nodes (lower = faster detection of failures)
58+
# Keep at 2s for quick failure detection
59+
gossip_interval: 2s
60+
# How often to do full state sync with other nodes
61+
# Reduced for development to sync faster
62+
push_pull_interval: 5s
63+
# Number of random nodes to gossip with per interval
64+
# Set to 1 for development (only 1 ingester replica)
65+
gossip_nodes: 1
66+
# How long to continue gossiping to dead nodes (helps propagate death info)
67+
# Reduced for development to propagate death info faster
68+
gossip_to_dead_nodes_time: 10s
69+
# How long to wait for an ingester to gracefully leave before considering it dead
70+
# This should be longer than terminationGracePeriodSeconds to allow graceful shutdown
71+
# Reduced to 60s for development (faster cleanup)
72+
left_ingesters_timeout: 60s
73+
max_join_backoff: 1m
74+
max_join_retries: 10
75+
min_join_backoff: 1s
76+
rejoin_interval: 90s
5177
storage:
5278
type: s3
5379
# bucketNames: Fill it on the generator for each cluster

0 commit comments

Comments
 (0)