Skip to content

Commit ac00225

Browse files
Added support for multiple destinations in healthcheck (#2704)
1 parent 7daf590 commit ac00225

File tree

2 files changed

+114
-19
lines changed

2 files changed

+114
-19
lines changed

package/sbin/healthcheck.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import os
33
import subprocess
4+
import re
45

56
from flask_wtf.csrf import CSRFProtect
67
from flask import Flask, jsonify
@@ -18,11 +19,20 @@ def str_to_bool(value):
1819
'yes'
1920
}
2021

22+
def get_list_of_destinations():
23+
found_destinations = []
24+
regex = r"^SC4S_DEST_SPLUNK_HEC_(.*)_URL$"
25+
26+
for var_key, var_variable in os.environ.items():
27+
if re.search(regex, var_key):
28+
found_destinations.append(var_variable)
29+
return set(found_destinations)
30+
2131
class Config:
22-
SC4S_DEST_SPLUNK_HEC_DEFAULT_URL = os.getenv('SC4S_DEST_SPLUNK_HEC_DEFAULT_URL')
2332
HEALTHCHECK_PORT = int(os.getenv('SC4S_LISTEN_STATUS_PORT', '8080'))
2433
CHECK_QUEUE_SIZE = str_to_bool(os.getenv('HEALTHCHECK_CHECK_QUEUE_SIZE', "false"))
2534
MAX_QUEUE_SIZE = int(os.getenv('HEALTHCHECK_MAX_QUEUE_SIZE', '10000'))
35+
DESTINATIONS = get_list_of_destinations()
2636

2737
logging.basicConfig(
2838
format="%(asctime)s - healthcheck.py - %(levelname)s - %(message)s",
@@ -52,11 +62,11 @@ def check_syslog_ng_health() -> bool:
5262
return False
5363

5464
def check_queue_size(
55-
sc4s_dest_splunk_hec_default=Config.SC4S_DEST_SPLUNK_HEC_DEFAULT_URL,
65+
sc4s_dest_splunk_hec_destinations=Config.DESTINATIONS,
5666
max_queue_size=Config.MAX_QUEUE_SIZE
5767
) -> bool:
5868
"""Check syslog-ng queue size and compare it against the configured maximum limit."""
59-
if not sc4s_dest_splunk_hec_default:
69+
if not sc4s_dest_splunk_hec_destinations:
6070
logger.error(
6171
"SC4S_DEST_SPLUNK_HEC_DEFAULT_URL not configured. "
6272
"Ensure the default HEC destination is set, or disable HEALTHCHECK_CHECK_QUEUE_SIZE."
@@ -75,15 +85,22 @@ def check_queue_size(
7585
return False
7686

7787
stats = result.stdout.splitlines()
78-
destination_stat = next(
79-
(s for s in stats if ";queued;" in s and sc4s_dest_splunk_hec_default in s),
80-
None
81-
)
82-
if not destination_stat:
83-
logger.error("No matching queue stats found for the destination URL.")
84-
return False
8588

86-
queue_size = int(destination_stat.split(";")[-1])
89+
queue_sizes_all_destinations = []
90+
91+
for destination in sc4s_dest_splunk_hec_destinations:
92+
destination_stat = next(
93+
(s for s in stats if ";queued;" in s and destination in s),
94+
None
95+
)
96+
97+
if not destination_stat:
98+
logger.error(f"No matching queue stats found for the destination URL {destination}.")
99+
return False
100+
101+
queue_sizes_all_destinations.append(int(destination_stat.split(";")[-1]))
102+
103+
queue_size = max(queue_sizes_all_destinations)
87104
if queue_size > max_queue_size:
88105
logger.warning(
89106
f"Queue size {queue_size} exceeds the maximum limit of {max_queue_size}."

tests/test_healthcheck_unit_tests.py

+86-8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
check_syslog_ng_health,
99
subprocess,
1010
check_queue_size,
11+
get_list_of_destinations,
1112
)
1213

1314
# str_to_bool
@@ -52,9 +53,9 @@ def test_check_syslog_ng_health_exception(mock_run):
5253
# check_queue_size
5354
def test_check_queue_size_no_url():
5455
"""
55-
If sc4s_dest_splunk_hec_default is not set, check_queue_size should fail.
56+
If sc4s_dest_splunk_hec_destinations is not set, check_queue_size should fail.
5657
"""
57-
assert check_queue_size(sc4s_dest_splunk_hec_default=None, max_queue_size=1000) is False
58+
assert check_queue_size(sc4s_dest_splunk_hec_destinations=None, max_queue_size=1000) is False
5859

5960
@patch("subprocess.run")
6061
def test_check_queue_size_stats_fail(mock_run):
@@ -63,7 +64,7 @@ def test_check_queue_size_stats_fail(mock_run):
6364
"""
6465
mock_run.return_value.returncode = 1
6566
mock_run.return_value.stderr = "stats error"
66-
assert check_queue_size(sc4s_dest_splunk_hec_default="http://example.com:8088", max_queue_size=1000) is False
67+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088"}, max_queue_size=1000) is False
6768

6869
@patch("subprocess.run")
6970
def test_check_queue_size_no_matching_stats(mock_run):
@@ -72,7 +73,7 @@ def test_check_queue_size_no_matching_stats(mock_run):
7273
"""
7374
mock_run.return_value.returncode = 0
7475
mock_run.return_value.stdout = "some;other;stat;line\nanother;stat"
75-
assert check_queue_size(sc4s_dest_splunk_hec_default="http://example.com:8088", max_queue_size=1000) is False
76+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088"}, max_queue_size=1000) is False
7677

7778
@patch("subprocess.run")
7879
def test_check_queue_size_exceeds_limit(mock_run):
@@ -84,7 +85,7 @@ def test_check_queue_size_exceeds_limit(mock_run):
8485
"destination;queued;http://example.com:8088;2000\n"
8586
"another;queued;http://other-url.com;1234"
8687
)
87-
assert check_queue_size(sc4s_dest_splunk_hec_default="http://example.com:8088", max_queue_size=1000) is False
88+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088"}, max_queue_size=1000) is False
8889

8990
@patch("subprocess.run")
9091
def test_check_queue_size_under_limit(mock_run):
@@ -96,7 +97,7 @@ def test_check_queue_size_under_limit(mock_run):
9697
"destination;queued;http://example.com:8088;500\n"
9798
"another;queued;http://other-url.com;1234"
9899
)
99-
assert check_queue_size(sc4s_dest_splunk_hec_default="http://example.com:8088", max_queue_size=1000) is True
100+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088"}, max_queue_size=1000) is True
100101

101102
@patch("subprocess.run")
102103
def test_check_queue_size_equals_limit(mock_run):
@@ -108,7 +109,62 @@ def test_check_queue_size_equals_limit(mock_run):
108109
"destination;queued;http://example.com:8088;1000\n"
109110
"another;queued;http://other-url.com;1234"
110111
)
111-
assert check_queue_size(sc4s_dest_splunk_hec_default="http://example.com:8088", max_queue_size=1000) is True
112+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088"}, max_queue_size=1000) is True
113+
114+
@patch("subprocess.run")
115+
def test_check_queue_size_multiple_destinations(mock_run):
116+
"""
117+
If queue size for all destinations is <= HEALTHCHECK_MAX_QUEUE_SIZE, check_queue_size should pass.
118+
"""
119+
mock_run.return_value.returncode = 0
120+
mock_run.return_value.stdout = (
121+
"destination;queued;http://example.com:8088;300\n"
122+
"destination;queued;http://another.com:8088;500\n"
123+
"another;queued;http://other-url.com;1234"
124+
)
125+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088", "http://another.com:8088"},
126+
max_queue_size=1000) is True
127+
128+
@patch("subprocess.run")
129+
def test_check_queue_size_multiple_destinations_over_limit(mock_run):
130+
"""
131+
If queue size for at least one destination is > HEALTHCHECK_MAX_QUEUE_SIZE, check_queue_size should fail.
132+
"""
133+
mock_run.return_value.returncode = 0
134+
mock_run.return_value.stdout = (
135+
"destination;queued;http://example.com:8088;1300\n"
136+
"destination;queued;http://another.com:8088;500\n"
137+
"another;queued;http://other-url.com;1234"
138+
)
139+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088", "http://another.com:8088"},
140+
max_queue_size=1000) is False
141+
142+
@patch("subprocess.run")
143+
def test_check_queue_size_multiple_destinations_all_over_limit(mock_run):
144+
"""
145+
If queue size for all destinations is > HEALTHCHECK_MAX_QUEUE_SIZE, check_queue_size should fail.
146+
"""
147+
mock_run.return_value.returncode = 0
148+
mock_run.return_value.stdout = (
149+
"destination;queued;http://example.com:8088;1300\n"
150+
"destination;queued;http://another.com:8088;1500\n"
151+
"another;queued;http://other-url.com;1234"
152+
)
153+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088", "http://another.com:8088"},
154+
max_queue_size=1000) is False
155+
156+
@patch("subprocess.run")
157+
def test_check_queue_size_multiple_incomplete_info(mock_run):
158+
"""
159+
If stats run successfully but do not contain stats for one of the desired destinations, it should fail.
160+
"""
161+
mock_run.return_value.returncode = 0
162+
mock_run.return_value.stdout = (
163+
"destination;queued;http://example.com:8088;300\n"
164+
"another;queued;http://other-url.com;1234"
165+
)
166+
assert check_queue_size(sc4s_dest_splunk_hec_destinations={"http://example.com:8088", "http://another.com:8088"},
167+
max_queue_size=1000) is False
112168

113169
@patch("subprocess.run", side_effect=Exception("some exception"))
114170
def test_check_queue_size_exception(mock_run):
@@ -139,4 +195,26 @@ def test_health_endpoint_no_queue_check(mock_run, client):
139195

140196
response = client.get("/health")
141197
assert response.status_code == 200
142-
assert response.json["status"] == "healthy"
198+
assert response.json["status"] == "healthy"
199+
200+
@patch.dict(
201+
os.environ,
202+
{
203+
"SC4S_DEST_SPLUNK_HEC_DEFAULT_URL": "http://my_test_url:1234",
204+
"SC4S_DEST_SPLUNK_HEC_OTHER_URL": "http://my_hec:1234",
205+
"SOME_OTHER_URL": "http://my_url/test_url",
206+
"SOME_OTHER_ENV_VARIABLE": "my_variable",
207+
"SC4S_LISTEN_STATUS_PORT": "1234",
208+
},
209+
clear=True
210+
)
211+
def test_get_destinations():
212+
"""
213+
Check if get_list_of_destinations method parses and returns the expected
214+
destinations from environment variables.
215+
"""
216+
destinations = get_list_of_destinations()
217+
218+
assert len(destinations) == 2
219+
assert "http://my_test_url:1234" in destinations
220+
assert "http://my_hec:1234" in destinations

0 commit comments

Comments
 (0)