diff --git a/src/pybind/mgr/cephadm/autotune.py b/src/pybind/mgr/cephadm/autotune.py index 72ebcd66064..0365c76a868 100644 --- a/src/pybind/mgr/cephadm/autotune.py +++ b/src/pybind/mgr/cephadm/autotune.py @@ -32,24 +32,38 @@ class MemoryAutotuner(object): def tune(self) -> Tuple[Optional[int], List[str]]: tuned_osds: List[str] = [] total = self.total_mem + logger.debug('Autotuning OSD memory with given parameters:\n' + f'Total memory: {total}\nDaemons: {self.daemons}') for d in self.daemons: if d.daemon_type == 'mds': - total -= self.config_get(d.name(), 'mds_cache_memory_limit') + mds_mem = self.config_get(d.name(), 'mds_cache_memory_limit') + logger.debug(f'Subtracting {mds_mem} from total for mds daemon') + total -= mds_mem + logger.debug(f'new total: {total}') continue if d.daemon_type != 'osd': assert d.daemon_type - total -= max( + daemon_mem = max( self.min_size_by_type.get(d.daemon_type, self.default_size), d.memory_usage or 0 ) + logger.debug(f'Subtracting {daemon_mem} from total for {d.daemon_type} daemon') + total -= daemon_mem + logger.debug(f'new total: {total}') continue if not self.config_get(d.name(), 'osd_memory_target_autotune'): - total -= self.config_get(d.name(), 'osd_memory_target') + osd_mem = self.config_get(d.name(), 'osd_memory_target') + logger.debug('osd_memory_target_autotune disabled. ' + f'Subtracting {osd_mem} from total for osd daemon') + total -= osd_mem + logger.debug(f'new total: {total}') continue tuned_osds.append(d.name()) if total < 0: return None, [] if not tuned_osds: return None, [] + logger.debug(f'Final total is {total} to be split among {len(tuned_osds)} OSDs') per = total // len(tuned_osds) + logger.debug(f'Result is {per} per OSD') return int(per), tuned_osds diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 4c7889bd18f..b8a8d4e5208 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -195,6 +195,9 @@ class CephadmServe: val = None else: total_mem *= 1024 # kb -> bytes + self.log.debug(f'Autotuning memory for host {host} with ' + f'{total_mem} total bytes of memory and ' + f'{self.mgr.autotune_memory_target_ratio} target ratio') total_mem *= self.mgr.autotune_memory_target_ratio a = MemoryAutotuner( daemons=self.mgr.cache.get_daemons_by_host(host), @@ -231,6 +234,9 @@ class CephadmServe: # options as users may be using them. Since there is no way to set autotuning # on/off at a host level, best we can do is check if it is globally on. if self.mgr.get_foreign_ceph_option('osd', 'osd_memory_target_autotune'): + self.mgr.log.debug(f'Removing osd_memory_target for OSDs on {host}' + ' as either there were no OSDs to tune or the ' + ' per OSD memory calculation result was <= 0') self.mgr.check_mon_command({ 'prefix': 'config rm', 'who': f'osd/host:{host.split(".")[0]}', diff --git a/src/pybind/mgr/cephadm/tests/test_autotune.py b/src/pybind/mgr/cephadm/tests/test_autotune.py index 7994c390a7e..bf6f3d5ef59 100644 --- a/src/pybind/mgr/cephadm/tests/test_autotune.py +++ b/src/pybind/mgr/cephadm/tests/test_autotune.py @@ -57,7 +57,31 @@ from orchestrator import DaemonDescription ], {}, 60 * 1024 * 1024 * 1024, - ) + ), + ( # Taken from an actual user case + int(32827840 * 1024 * 0.7), + [ + DaemonDescription('crash', 'a', 'host1'), + DaemonDescription('grafana', 'a', 'host1'), + DaemonDescription('mds', 'a', 'host1'), + DaemonDescription('mds', 'b', 'host1'), + DaemonDescription('mds', 'c', 'host1'), + DaemonDescription('mgr', 'a', 'host1'), + DaemonDescription('mon', 'a', 'host1'), + DaemonDescription('node-exporter', 'a', 'host1'), + DaemonDescription('osd', '1', 'host1'), + DaemonDescription('osd', '2', 'host1'), + DaemonDescription('osd', '3', 'host1'), + DaemonDescription('osd', '4', 'host1'), + DaemonDescription('prometheus', 'a', 'host1'), + ], + { + 'mds.a': 4 * 1024 * 1024 * 1024, # 4294967296 + 'mds.b': 4 * 1024 * 1024 * 1024, + 'mds.c': 4 * 1024 * 1024 * 1024, + }, + 480485376, + ), ]) def test_autotune(total, daemons, config, result): def fake_getter(who, opt): @@ -69,6 +93,8 @@ def test_autotune(total, daemons, config, result): if opt == 'osd_memory_target': return config.get(who, 4 * 1024 * 1024 * 1024) if opt == 'mds_cache_memory_limit': + if who in config: + return config.get(who, 16 * 1024 * 1024 * 1024) return 16 * 1024 * 1024 * 1024 a = MemoryAutotuner(