diff --git a/README.md b/README.md index 86afc19..5ea5c6a 100644 --- a/README.md +++ b/README.md @@ -384,6 +384,9 @@ For the replacement of `racadm racreset`, the optional argument `--racreset` was ```bash badfish -H mgmt-your-server.example.com --racreset ``` +* You can also specify `--racreset --wait` and Badfish will poll the iDrac for it to complete and keep you updated on progress. + + > [!NOTE] > Dell specific command, for Supermicro servers there is an equivalent of `--bmc-reset` diff --git a/src/badfish/config.py b/src/badfish/config.py index 6974831..f16520f 100644 --- a/src/badfish/config.py +++ b/src/badfish/config.py @@ -1 +1 @@ -RETRIES = 15 +RETRIES = 30 diff --git a/src/badfish/helpers/parser.py b/src/badfish/helpers/parser.py index 6bafc25..ee1bc34 100644 --- a/src/badfish/helpers/parser.py +++ b/src/badfish/helpers/parser.py @@ -70,6 +70,7 @@ def create_parser(): action="store_true", ) parser.add_argument("--racreset", help="Flag for iDRAC reset", action="store_true") + parser.add_argument("--wait", help="Wait for iDRAC to be responsive after reset", action="store_true") parser.add_argument("--bmc-reset", help="Flag for BMC reset", action="store_true") parser.add_argument( "--factory-reset", diff --git a/src/badfish/main.py b/src/badfish/main.py index 27d47ad..862a61e 100755 --- a/src/badfish/main.py +++ b/src/badfish/main.py @@ -1074,7 +1074,7 @@ async def reboot_server(self, graceful=True): await self.send_reset("On") return True - async def reset_idrac(self): + async def reset_idrac(self, wait=False): if self.vendor != "Dell": self.logger.warning("Vendor isn't a Dell, if you are trying this on a Supermicro, use --bmc-reset instead.") return False @@ -1100,8 +1100,17 @@ async def reset_idrac(self): data = await _response.text("utf-8", "ignore") raise BadfishException("Status code %s returned, error is: \n%s." % (status_code, data)) - self.logger.info("iDRAC will now reset and be back online within a few minutes.") - return True + if wait: + self.logger.info("iDRAC reset initiated. Waiting for iDRAC to come back online...") + ready = await self.wait_for_idrac_ready() + if ready: + self.logger.info("iDRAC is now responsive.") + else: + self.logger.warning("iDRAC did not respond after %d retry attempts." % self.retries) + return ready + else: + self.logger.info("iDRAC will now reset and be back online within a few minutes.") + return True async def reset_bmc(self): if self.vendor != "Supermicro": @@ -1298,6 +1307,30 @@ async def polling_host_state(self, state, equals=True): return desired_state + async def poll_until_ready(self, check_func, description, sleep_interval=5, clear_cache=False): + self.logger.info("Polling for %s" % description) + for count in range(self.retries): + if clear_cache: + self.http_client.get_request.cache_clear() + ready = await check_func() + if ready: + self.progress_bar(self.retries, self.retries, "Ready") + self.logger.info("%s is ready." % description) + return True + self.progress_bar(count, self.retries, "Not Ready") + await asyncio.sleep(sleep_interval) + self.logger.warning("%s did not become ready after %d retry attempts." % (description, self.retries)) + return False + + async def wait_for_idrac_ready(self): + async def check_idrac_responsive(): + response = await self.get_request(self.root_uri, _continue=True) + return response and response.status == 200 + + self.logger.info("Waiting for iDRAC to be ready after reset (this may take a few minutes)...") + await asyncio.sleep(10) + return await self.poll_until_ready(check_idrac_responsive, "iDRAC", sleep_interval=10, clear_cache=True) + async def get_firmware_inventory(self): self.logger.debug("Getting firmware inventory for all devices supported by iDRAC.") @@ -2705,6 +2738,7 @@ async def execute_badfish(_host, _args, logger, format_handler=None): power_cycle = _args["power_cycle"] power_consumed_watts = _args["get_power_consumed"] rac_reset = _args["racreset"] + wait = _args.get("wait", False) bmc_reset = _args["bmc_reset"] factory_reset = _args["factory_reset"] check_boot = _args["check_boot"] @@ -2785,7 +2819,7 @@ async def execute_badfish(_host, _args, logger, format_handler=None): elif host_type: await badfish.change_boot(host_type, interfaces_path, pxe) elif rac_reset: - await badfish.reset_idrac() + await badfish.reset_idrac(wait=wait) elif bmc_reset: await badfish.reset_bmc() elif factory_reset: diff --git a/tests/test_context_manager.py b/tests/test_context_manager.py index 14c25d6..10e4e7d 100644 --- a/tests/test_context_manager.py +++ b/tests/test_context_manager.py @@ -424,6 +424,7 @@ async def test_execute_badfish_session_cleanup_success(self): "power_cycle": False, "get_power_consumed": False, "racreset": False, + "wait": False, "bmc_reset": False, "factory_reset": False, "check_boot": False, @@ -508,6 +509,7 @@ async def test_execute_badfish_session_cleanup_failure(self): "power_cycle": False, "get_power_consumed": False, "racreset": False, + "wait": False, "bmc_reset": False, "factory_reset": False, "check_boot": False, @@ -596,6 +598,7 @@ async def test_execute_badfish_no_session_cleanup(self): "power_cycle": False, "get_power_consumed": False, "racreset": False, + "wait": False, "bmc_reset": False, "factory_reset": False, "check_boot": False, @@ -680,6 +683,7 @@ async def test_execute_badfish_no_badfish_instance(self): "power_cycle": False, "get_power_consumed": False, "racreset": False, + "wait": False, "bmc_reset": False, "factory_reset": False, "check_boot": False, diff --git a/tests/test_poll_helpers.py b/tests/test_poll_helpers.py new file mode 100644 index 0000000..6072003 --- /dev/null +++ b/tests/test_poll_helpers.py @@ -0,0 +1,42 @@ +import asyncio +from unittest.mock import MagicMock, patch +from badfish.main import Badfish +from tests.test_base import TestBase + + +class TestPollHelpers(TestBase): + @patch("badfish.main.HTTPClient") + def test_poll_until_ready_timeout(self, mock_http_client): + logger = MagicMock() + badfish = Badfish("test-host", "user", "pass", logger, 2) + + async def always_false(): + return False + + async def run_test(): + return await badfish.poll_until_ready(always_false, "test service", sleep_interval=0) + + result = asyncio.get_event_loop().run_until_complete(run_test()) + + assert result is False + logger.warning.assert_called_once_with("test service did not become ready after 2 retry attempts.") + + @patch("badfish.main.HTTPClient") + def test_poll_until_ready_success(self, mock_http_client): + logger = MagicMock() + badfish = Badfish("test-host", "user", "pass", logger, 5) + + call_count = [0] + + async def check_after_attempts(): + call_count[0] += 1 + return call_count[0] >= 3 + + async def run_test(): + return await badfish.poll_until_ready(check_after_attempts, "test service", sleep_interval=0) + + result = asyncio.get_event_loop().run_until_complete(run_test()) + + assert result is True + logger.info.assert_any_call("Polling for test service") + logger.info.assert_any_call("test service is ready.") diff --git a/tests/test_reset_idrac.py b/tests/test_reset_idrac.py index d1275ce..141437f 100644 --- a/tests/test_reset_idrac.py +++ b/tests/test_reset_idrac.py @@ -1,4 +1,4 @@ -from unittest.mock import patch +from unittest.mock import patch, PropertyMock, AsyncMock from tests.config import ( BOOT_SEQ_RESPONSE_DIRECTOR, @@ -8,6 +8,7 @@ RESPONSE_RESET, RESPONSE_RESET_FAIL, RESPONSE_RESET_WRONG_VENDOR, + ROOT_RESP, ) from tests.test_base import TestBase @@ -53,3 +54,57 @@ def test_reset_idrac_wrong_vendor(self, mock_get, mock_post, mock_delete): self.args = [self.option_arg] _, err = self.badfish_call() assert err == RESPONSE_RESET_WRONG_VENDOR % ("Dell", "Supermicro", "--bmc-reset") + + @patch("badfish.main.Badfish.wait_for_idrac_ready", new_callable=AsyncMock) + @patch("aiohttp.ClientSession.delete") + @patch("aiohttp.ClientSession.post") + @patch("aiohttp.ClientSession.get") + def test_reset_idrac_with_wait_timeout(self, mock_get, mock_post, mock_delete, mock_wait): + responses = INIT_RESP + [RESET_TYPE_RESP] + self.set_mock_response(mock_get, 200, responses) + self.set_mock_response(mock_post, [200, 204], "OK", True) + self.set_mock_response(mock_delete, 200, "OK") + mock_wait.return_value = False + + self.boot_seq = BOOT_SEQ_RESPONSE_DIRECTOR + self.args = [self.option_arg, "--wait"] + _, err = self.badfish_call() + assert "Status code 204 returned for POST command to reset iDRAC" in err + assert "iDRAC reset initiated. Waiting for iDRAC to come back online" in err + assert "iDRAC did not respond after 30 retry attempts" in err + + @patch("aiohttp.ClientSession.delete") + @patch("aiohttp.ClientSession.post") + @patch("aiohttp.ClientSession.get") + def test_reset_idrac_with_wait_success(self, mock_get, mock_post, mock_delete): + responses = INIT_RESP + [RESET_TYPE_RESP] + [ROOT_RESP] * 15 + self.set_mock_response(mock_get, 200, responses) + self.set_mock_response(mock_post, [200, 204], "OK", True) + self.set_mock_response(mock_delete, 200, "OK") + self.boot_seq = BOOT_SEQ_RESPONSE_DIRECTOR + self.args = [self.option_arg, "--wait"] + _, err = self.badfish_call() + assert "Status code 204 returned for POST command to reset iDRAC" in err + assert "iDRAC reset initiated. Waiting for iDRAC to come back online" in err + assert "Polling for iDRAC" in err + assert "iDRAC is ready" in err + assert "iDRAC is now responsive" in err + + @patch("aiohttp.ClientSession.delete") + @patch("aiohttp.ClientSession.post") + @patch("aiohttp.ClientSession.get") + def test_reset_idrac_with_wait_delayed(self, mock_get, mock_post, mock_delete): + responses = INIT_RESP + [RESET_TYPE_RESP] + ["Not Found"] * 5 + [ROOT_RESP] * 10 + status_list = [200] * 6 + [404] * 5 + [200] * 10 + self.set_mock_response(mock_get, status_list, responses) + self.set_mock_response(mock_post, [200, 204], "OK", True) + self.set_mock_response(mock_delete, 200, "OK") + self.boot_seq = BOOT_SEQ_RESPONSE_DIRECTOR + self.args = [self.option_arg, "--wait"] + _, err = self.badfish_call() + assert "Status code 204 returned for POST command to reset iDRAC" in err + assert "iDRAC reset initiated. Waiting for iDRAC to come back online" in err + assert "Polling for iDRAC" in err + assert "iDRAC is ready" in err + assert "iDRAC is now responsive" in err +