Skip to content

Commit 522b441

Browse files
committed
Avoid pending_restart=True flag flickering
The current Patroni 3.2.2 has wired/flickering behaviour: it temporary flag pending_restart=True on many changes to REST API, which is gone within a second but long enough to be cougth by charm. Sleepping a bit is a necessary evil, until Patroni 3.3.0 upgrade. The previous code sleept for 15 seconds waiting for pg_settings update. Also, the unnecessary restarts could be triggered by missmatch of Patroni config file and in-memory changes coming from REST API, e.g. the slots were undefined in yaml file but set as an empty JSON {} => None. Updating the default template to match the default API PATCHes and avoid restarts. Signed-off-by: Marcelo Henrique Neppel <[email protected]>
1 parent 11fe78b commit 522b441

File tree

5 files changed

+30
-13
lines changed

5 files changed

+30
-13
lines changed

src/charm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2137,7 +2137,7 @@ def update_config(self, is_creating_backup: bool = False) -> bool:
21372137
restore_stanza=self.app_peer_data.get("restore-stanza"),
21382138
parameters=postgresql_parameters,
21392139
user_databases_map=self.relations_user_databases_map,
2140-
slots=replication_slots or None,
2140+
slots=replication_slots,
21412141
)
21422142

21432143
if not self._is_workload_running:

src/patroni.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import os
99
import pwd
10+
from time import sleep
1011
from typing import Any, TypedDict
1112

1213
import requests
@@ -366,18 +367,32 @@ def is_replication_healthy(self) -> bool:
366367

367368
def is_restart_pending(self) -> bool:
368369
"""Returns whether the Patroni/PostgreSQL restart pending."""
369-
patroni_status = requests.get(
370+
pending_restart = self._get_patroni_restart_pending()
371+
if pending_restart:
372+
# The current Patroni 3.2.2 has wired behaviour: it temporarily flags pending_restart=True
373+
# on any changes to REST API, which is gone within a second but long enough to be
374+
# caught by charm. Sleep 2 seconds as a protection here until Patroni 3.3.0 upgrade.
375+
# Repeat the request to make sure pending_restart flag is still here
376+
logger.debug("Enduring restart is pending (to avoid unnecessary rolling restarts)")
377+
sleep(2)
378+
pending_restart = self._get_patroni_restart_pending()
379+
380+
return pending_restart
381+
382+
def _get_patroni_restart_pending(self) -> bool:
383+
"""Returns whether the Patroni flag pending_restart on REST API."""
384+
r = requests.get(
370385
f"{self._patroni_url}/patroni",
371386
verify=self._verify,
372387
timeout=PATRONI_TIMEOUT,
373388
auth=self._patroni_auth,
374389
)
375-
try:
376-
pending_restart = patroni_status.json()["pending_restart"]
377-
except KeyError:
378-
pending_restart = False
379-
pass
380-
logger.debug(f"Patroni API is_restart_pending: {pending_restart}")
390+
pending_restart = r.json().get("pending_restart", False)
391+
logger.debug(
392+
f"API _get_patroni_restart_pending ({pending_restart}): %s (%s)",
393+
r,
394+
r.elapsed.total_seconds(),
395+
)
381396

382397
return pending_restart
383398

@@ -509,7 +524,7 @@ def ensure_slots_controller_by_patroni(self, slots: dict[str, str]) -> None:
509524
auth=self._patroni_auth,
510525
)
511526
slots_patch: dict[str, dict[str, str] | None] = dict.fromkeys(
512-
current_config.json().get("slots", ())
527+
current_config.json().get("slots") or {}
513528
)
514529
for slot, database in slots.items():
515530
slots_patch[slot] = {
@@ -617,6 +632,8 @@ def render_patroni_yml_file(
617632
user_databases_map: map of databases to be accessible by each user.
618633
slots: replication slots (keys) with assigned database name (values).
619634
"""
635+
slots = slots or {}
636+
620637
# Open the template patroni.yml file.
621638
with open("templates/patroni.yml.j2") as file:
622639
template = Template(file.read())

templates/patroni.yml.j2

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,13 @@ bootstrap:
4747
logging_collector: 'on'
4848
wal_level: logical
4949
shared_preload_libraries: 'timescaledb,pgaudit'
50-
{%- if slots %}
5150
slots:
5251
{%- for slot, database in slots.items() %}
5352
{{slot}}:
5453
database: {{database}}
5554
plugin: pgoutput
5655
type: logical
5756
{%- endfor -%}
58-
{% endif %}
5957

6058
{%- if restoring_backup %}
6159
method: pgbackrest

tests/unit/test_charm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,7 +1504,7 @@ def test_update_config(harness):
15041504
restore_to_latest=False,
15051505
parameters={"test": "test"},
15061506
user_databases_map={"operator": "all", "replication": "all", "rewind": "all"},
1507-
slots=None,
1507+
slots={},
15081508
)
15091509
_handle_postgresql_restart_need.assert_called_once()
15101510
_restart_metrics_service.assert_called_once()
@@ -1535,7 +1535,7 @@ def test_update_config(harness):
15351535
restore_to_latest=False,
15361536
parameters={"test": "test"},
15371537
user_databases_map={"operator": "all", "replication": "all", "rewind": "all"},
1538-
slots=None,
1538+
slots={},
15391539
)
15401540
_handle_postgresql_restart_need.assert_called_once()
15411541
_restart_metrics_service.assert_called_once()

tests/unit/test_patroni.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ def test_render_patroni_yml_file(harness, patroni):
224224
with open("templates/patroni.yml.j2") as file:
225225
template = Template(file.read())
226226
expected_content = template.render(
227+
slots={},
227228
endpoint=patroni._endpoint,
228229
endpoints=patroni._endpoints,
229230
namespace=patroni._namespace,
@@ -259,6 +260,7 @@ def test_render_patroni_yml_file(harness, patroni):
259260
# Then test the rendering of the file with TLS enabled.
260261
_render_file.reset_mock()
261262
expected_content_with_tls = template.render(
263+
slots={},
262264
enable_tls=True,
263265
endpoint=patroni._endpoint,
264266
endpoints=patroni._endpoints,

0 commit comments

Comments
 (0)