From 9ae989482719bf890232061cf90b8677a788c25c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 25 Nov 2021 08:10:28 -0600 Subject: [PATCH] qa/tasks/cephadm_cases/test_cli: fix test_daemon_restart We cannot schedule a daemon start if there is another daemon action with a higher priority (including stop) scheduled. However, that state isn't cleared until *after* the osd goes down, the systemctl command returns, and mgr/cephadm gets around to updating the inventory scheduled_daemon_action state. Semi-fix: (1) wait for the orch status to change, and then (2) wait a few more seconds after that. Signed-off-by: Sage Weil --- qa/tasks/cephadm_cases/test_cli.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/qa/tasks/cephadm_cases/test_cli.py b/qa/tasks/cephadm_cases/test_cli.py index 1dcf2f35e69..c05395673c8 100644 --- a/qa/tasks/cephadm_cases/test_cli.py +++ b/qa/tasks/cephadm_cases/test_cli.py @@ -1,6 +1,9 @@ +import json import logging +import time from tasks.mgr.mgr_test_case import MgrTestCase +from teuthology.contextutil import safe_while log = logging.getLogger(__name__) @@ -48,6 +51,13 @@ class TestCephadmCLI(MgrTestCase): def test_daemon_restart(self): self._orch_cmd('daemon', 'stop', 'osd.0') self.wait_for_health('OSD_DOWN', 30) + with safe_while(sleep=1, tries=30) as proceed: + while proceed(): + j = json.loads(self._orch_cmd('ps', '--format', 'json')) + d = {d['daemon_name']: d for d in j} + if d['osd.0']['status_desc'] != 'running': + break + time.sleep(5) self._orch_cmd('daemon', 'start', 'osd.0') self.wait_for_health_clear(90) self._orch_cmd('daemon', 'restart', 'osd.0')