mirror of
https://github.com/ceph/ceph
synced 2025-02-22 10:37:15 +00:00
pybind/mgr/progress: disable pg recovery event by default
The progress module disabled the pg recovery event by default since the event is expensive and has interrupted other serviceis when there is OSDs being marked in/out from the the cluster. To turn the event on manually: ceph config set mgr mgr/progress/allow_pg_recovery_event true Updated qa/tasks/mgr/test_progress.py to enable the pg recovery event when testing the progress module. Signed-off-by: Kamoltat <ksirivad@redhat.com>
This commit is contained in:
parent
6c0c75fa17
commit
f06da20dff
@ -126,6 +126,13 @@
|
||||
|
||||
https://docs.ceph.com/en/latest/mgr/telemetry/
|
||||
|
||||
* MGR: The progress module disables the pg recovery event by default
|
||||
since the event is expensive and has interrupted other service when
|
||||
there are OSDs being marked in/out from the the cluster. However,
|
||||
the user may still enable this event anytime. For more details, see:
|
||||
|
||||
https://docs.ceph.com/en/latest/mgr/progress/
|
||||
|
||||
>=16.0.0
|
||||
--------
|
||||
* mgr/nfs: ``nfs`` module is moved out of volumes plugin. Prior using the
|
||||
|
@ -45,3 +45,14 @@ Clear all ongoing and completed events:
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph progress clear
|
||||
|
||||
PG Recovery Event
|
||||
-----------------
|
||||
|
||||
An event for each PG affected by recovery event can be shown in
|
||||
`ceph progress` This is completely optional, and disabled by default
|
||||
due to CPU overheard:
|
||||
|
||||
.. prompt:: bash #
|
||||
|
||||
ceph config set mgr mgr/progress/allow_pg_recovery_event true
|
||||
|
@ -280,6 +280,11 @@ class TestProgress(MgrTestCase):
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'osd', 'in', str(osd['osd']))
|
||||
|
||||
# Unset allow_pg_recovery_event in case it's set to true
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'config', 'set', 'mgr',
|
||||
'mgr/progress/allow_pg_recovery_event', 'false')
|
||||
|
||||
super(TestProgress, self).tearDown()
|
||||
|
||||
def test_osd_healthy_recovery(self):
|
||||
@ -288,6 +293,10 @@ class TestProgress(MgrTestCase):
|
||||
placement, and we wait for the PG to get healthy in its new
|
||||
locations.
|
||||
"""
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'config', 'set', 'mgr',
|
||||
'mgr/progress/allow_pg_recovery_event', 'true')
|
||||
|
||||
ev = self._simulate_failure()
|
||||
|
||||
# Wait for progress event to ultimately reach completion
|
||||
@ -301,6 +310,10 @@ class TestProgress(MgrTestCase):
|
||||
progress event to be correctly marked complete once there
|
||||
is no more data to move.
|
||||
"""
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'config', 'set', 'mgr',
|
||||
'mgr/progress/allow_pg_recovery_event', 'true')
|
||||
|
||||
ev = self._simulate_failure()
|
||||
|
||||
self.mgr_cluster.mon_manager.remove_pool(self.POOL)
|
||||
@ -317,6 +330,10 @@ class TestProgress(MgrTestCase):
|
||||
It should create another event for when osd is marked in
|
||||
and cancel the one that is still ongoing.
|
||||
"""
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'config', 'set', 'mgr',
|
||||
'mgr/progress/allow_pg_recovery_event', 'true')
|
||||
|
||||
ev1 = self._simulate_failure()
|
||||
|
||||
ev2 = self._simulate_back_in([0], ev1)
|
||||
@ -336,6 +353,9 @@ class TestProgress(MgrTestCase):
|
||||
coming in from other module, however, once it is turned
|
||||
back, on creating an event should be working as it is.
|
||||
"""
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'config', 'set', 'mgr',
|
||||
'mgr/progress/allow_pg_recovery_event', 'true')
|
||||
|
||||
pool_size = 3
|
||||
self._setup_pool(size=pool_size)
|
||||
@ -378,3 +398,26 @@ class TestProgress(MgrTestCase):
|
||||
check_fn=lambda: self._is_inprogress_or_complete(ev1['id']),
|
||||
timeout=self.RECOVERY_PERIOD)
|
||||
self.assertTrue(self._is_quiet())
|
||||
|
||||
def test_default_progress_test(self):
|
||||
"""
|
||||
progress module disabled the event of pg recovery event
|
||||
by default, we test this to see if this holds true
|
||||
"""
|
||||
pool_size = 3
|
||||
self._setup_pool(size=pool_size)
|
||||
self._write_some_data(self.WRITE_PERIOD)
|
||||
|
||||
with self.recovery_backfill_disabled():
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'osd', 'out', '0')
|
||||
|
||||
time.sleep(self.EVENT_CREATION_PERIOD/2)
|
||||
|
||||
with self.recovery_backfill_disabled():
|
||||
self.mgr_cluster.mon_manager.raw_cluster_cmd(
|
||||
'osd', 'in', '0')
|
||||
|
||||
time.sleep(self.EVENT_CREATION_PERIOD/2)
|
||||
|
||||
self.assertEqual(self._osd_in_out_events_count(), 0)
|
||||
|
@ -450,6 +450,13 @@ class Module(MgrModule):
|
||||
'enabled',
|
||||
default=True,
|
||||
type='bool',
|
||||
),
|
||||
Option(
|
||||
'allow_pg_recovery_event',
|
||||
default=False,
|
||||
type='bool',
|
||||
desc='allow the module to show pg recovery progress',
|
||||
runtime=True
|
||||
)
|
||||
]
|
||||
|
||||
@ -476,6 +483,7 @@ class Module(MgrModule):
|
||||
self.max_completed_events = 0
|
||||
self.sleep_interval = 0
|
||||
self.enabled = True
|
||||
self.allow_pg_recovery_event = False
|
||||
|
||||
def config_notify(self):
|
||||
for opt in self.MODULE_OPTIONS:
|
||||
@ -718,7 +726,8 @@ class Module(MgrModule):
|
||||
self._dirty = False
|
||||
|
||||
if self.enabled:
|
||||
self._process_osdmap()
|
||||
if self.allow_pg_recovery_event:
|
||||
self._process_osdmap()
|
||||
self._process_pg_summary()
|
||||
|
||||
self._shutdown.wait(timeout=self.sleep_interval)
|
||||
|
Loading…
Reference in New Issue
Block a user