Merge pull request #48574 from adk3798/debug-log-spam

mgr/cephadm: make logging refresh metadata to debug logs configurable

Reviewed-by: Redouane Kachach <rkachach@redhat.com>
This commit is contained in:
Adam King 2023-01-13 11:59:26 -05:00 committed by GitHub
commit 06206df4ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 48 additions and 29 deletions

View File

@ -436,6 +436,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
default=True,
desc='Pass --cgroups=split when cephadm creates containers (currently podman only)'
),
Option(
'log_refresh_metadata',
type='bool',
default=False,
desc='Log all refresh metadata. Includes daemon, device, and host info collected regularly. Only has effect if logging at debug level'
),
]
def __init__(self, *args: Any, **kwargs: Any):
@ -512,6 +518,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
self.max_osd_draining_count = 10
self.device_enhanced_scan = False
self.cgroups_split = True
self.log_refresh_metadata = False
self.notify(NotifyType.mon_map, None)
self.config_notify()

View File

@ -38,7 +38,7 @@ class OfflineHostWatcher(threading.Thread):
def check_host(self, host: str) -> None:
if host not in self.mgr.offline_hosts:
try:
self.mgr.ssh.check_execute_command(host, ['true'])
self.mgr.ssh.check_execute_command(host, ['true'], log_command=self.mgr.log_refresh_metadata)
except Exception:
logger.debug(f'OfflineHostDetector: detected {host} to be offline')
# kick serve loop in case corrective action must be taken for offline host

View File

@ -327,7 +327,7 @@ class CephadmServe:
addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host
out, err, code = self.mgr.wait_async(self._run_cephadm(
host, cephadmNoImage, 'check-host', [],
error_ok=True, no_fsid=True))
error_ok=True, no_fsid=True, log_output=self.mgr.log_refresh_metadata))
self.mgr.cache.update_last_host_check(host)
self.mgr.cache.save_host(host)
if code:
@ -343,7 +343,8 @@ class CephadmServe:
def _refresh_host_daemons(self, host: str) -> Optional[str]:
try:
ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [], no_fsid=True))
ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [],
no_fsid=True, log_output=self.mgr.log_refresh_metadata))
except OrchestratorError as e:
return str(e)
self.mgr._process_ls_output(host, ls)
@ -352,7 +353,7 @@ class CephadmServe:
def _refresh_facts(self, host: str) -> Optional[str]:
try:
val = self.mgr.wait_async(self._run_cephadm_json(
host, cephadmNoImage, 'gather-facts', [], no_fsid=True))
host, cephadmNoImage, 'gather-facts', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata))
except OrchestratorError as e:
return str(e)
@ -371,13 +372,13 @@ class CephadmServe:
try:
try:
devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume',
inventory_args))
inventory_args, log_output=self.mgr.log_refresh_metadata))
except OrchestratorError as e:
if 'unrecognized arguments: --filter-for-batch' in str(e):
rerun_args = inventory_args.copy()
rerun_args.remove('--filter-for-batch')
devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume',
rerun_args))
rerun_args, log_output=self.mgr.log_refresh_metadata))
else:
raise
@ -395,7 +396,7 @@ class CephadmServe:
def _refresh_host_networks(self, host: str) -> Optional[str]:
try:
networks = self.mgr.wait_async(self._run_cephadm_json(
host, 'mon', 'list-networks', [], no_fsid=True))
host, 'mon', 'list-networks', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata))
except OrchestratorError as e:
return str(e)
@ -1335,10 +1336,11 @@ class CephadmServe:
args: List[str],
no_fsid: Optional[bool] = False,
image: Optional[str] = "",
log_output: Optional[bool] = True,
) -> Any:
try:
out, err, code = await self._run_cephadm(
host, entity, command, args, no_fsid=no_fsid, image=image)
host, entity, command, args, no_fsid=no_fsid, image=image, log_output=log_output)
if code:
raise OrchestratorError(f'host {host} `cephadm {command}` returned {code}: {err}')
except Exception as e:
@ -1361,6 +1363,7 @@ class CephadmServe:
error_ok: Optional[bool] = False,
image: Optional[str] = "",
env_vars: Optional[List[str]] = None,
log_output: Optional[bool] = True,
) -> Tuple[List[str], List[str], int]:
"""
Run cephadm on the remote host with the given command + args
@ -1425,7 +1428,8 @@ class CephadmServe:
host, cmd, stdin=stdin, addr=addr)
if code == 2:
ls_cmd = ['ls', self.mgr.cephadm_binary_path]
out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr)
out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr,
log_command=log_output)
if code_ls == 2:
await self._deploy_cephadm_binary(host, addr)
out, err, code = await self.mgr.ssh._execute_command(
@ -1455,11 +1459,12 @@ class CephadmServe:
else:
assert False, 'unsupported mode'
self.log.debug(f'code: {code}')
if out:
self.log.debug(f'out: {out}')
if err:
self.log.debug(f'err: {err}')
if log_output:
self.log.debug(f'code: {code}')
if out:
self.log.debug(f'out: {out}')
if err:
self.log.debug(f'err: {err}')
if code and not error_ok:
raise OrchestratorError(
f'cephadm exited with an error code: {code}, stderr: {err}')

View File

@ -134,11 +134,13 @@ class SSHManager:
cmd: List[str],
stdin: Optional[str] = None,
addr: Optional[str] = None,
log_command: Optional[bool] = True,
) -> Tuple[str, str, int]:
conn = await self._remote_connection(host, addr)
sudo_prefix = "sudo " if self.mgr.ssh_user != 'root' else ""
cmd = sudo_prefix + " ".join(quote(x) for x in cmd)
logger.debug(f'Running command: {cmd}')
if log_command:
logger.debug(f'Running command: {cmd}')
try:
r = await conn.run(f'{sudo_prefix}true', check=True, timeout=5)
r = await conn.run(cmd, input=stdin)
@ -171,16 +173,18 @@ class SSHManager:
cmd: List[str],
stdin: Optional[str] = None,
addr: Optional[str] = None,
log_command: Optional[bool] = True
) -> Tuple[str, str, int]:
return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr))
return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr, log_command))
async def _check_execute_command(self,
host: str,
cmd: List[str],
stdin: Optional[str] = None,
addr: Optional[str] = None,
log_command: Optional[bool] = True
) -> str:
out, err, code = await self._execute_command(host, cmd, stdin, addr)
out, err, code = await self._execute_command(host, cmd, stdin, addr, log_command)
if code != 0:
msg = f'Command {cmd} failed. {err}'
logger.debug(msg)
@ -192,8 +196,9 @@ class SSHManager:
cmd: List[str],
stdin: Optional[str] = None,
addr: Optional[str] = None,
log_command: Optional[bool] = True,
) -> str:
return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr))
return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr, log_command))
async def _write_remote_file(self,
host: str,

View File

@ -117,13 +117,13 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str
[host]).stdout == f"Created osd(s) 1 on host '{host}'"
assert _run_cephadm.mock_calls == [
mock.call(host, 'osd', 'ceph-volume',
['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image=''),
['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True),
mock.call(host, f'osd.{osd_id}', 'deploy',
['--name', f'osd.{osd_id}', '--meta-json', mock.ANY,
'--config-json', '-', '--osd-fsid', 'uuid'],
stdin=mock.ANY, image=''),
mock.call(host, 'osd', 'ceph-volume',
['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image=''),
['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True),
]
dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host)
assert dd.name() == f'osd.{osd_id}'
@ -792,11 +792,12 @@ class TestCephadm(object):
'test', 'osd', 'ceph-volume',
['--config-json', '-', '--', 'lvm', 'batch',
'--no-auto', '/dev/sdb', '--yes', '--no-systemd'],
env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, stdin='{"config": "", "keyring": ""}')
env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True,
stdin='{"config": "", "keyring": ""}')
_run_cephadm.assert_any_call(
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
_run_cephadm.assert_any_call(
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
@mock.patch("cephadm.serve.CephadmServe._run_cephadm")
def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
@ -836,9 +837,9 @@ class TestCephadm(object):
env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'],
error_ok=True, stdin='{"config": "", "keyring": ""}')
_run_cephadm.assert_any_call(
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
_run_cephadm.assert_any_call(
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
@mock.patch("cephadm.module.SpecStore.save")
@ -1831,10 +1832,10 @@ Traceback (most recent call last):
assert _run_cephadm.mock_calls == [
mock.call('test', 'osd', 'ceph-volume',
['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='',
no_fsid=False),
no_fsid=False, log_output=False),
mock.call('test', 'osd', 'ceph-volume',
['--', 'inventory', '--format=json-pretty'], image='',
no_fsid=False),
no_fsid=False, log_output=False),
]
@mock.patch("cephadm.serve.CephadmServe._run_cephadm")

View File

@ -56,6 +56,7 @@ class FakeMgr:
self.tuned_profiles.profiles = profiles
self.ssh = SSHManager(self)
self.offline_hosts = []
self.log_refresh_metadata = False
def set_store(self, what: str, value: str):
raise SaveError(f'{what}: {value}')
@ -138,7 +139,7 @@ class TestTunedProfiles:
tp = TunedProfileUtils(mgr)
tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
calls = [
mock.call('a', ['ls', SYSCTL_DIR]),
mock.call('a', ['ls', SYSCTL_DIR], log_command=False),
mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']),
mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']),
mock.call('a', ['sysctl', '--system'])

View File

@ -70,7 +70,7 @@ class TunedProfileUtils():
if host in self.mgr.offline_hosts:
return
cmd = ['ls', SYSCTL_DIR]
found_files = self.mgr.ssh.check_execute_command(host, cmd).split('\n')
found_files = self.mgr.ssh.check_execute_command(host, cmd, log_command=self.mgr.log_refresh_metadata).split('\n')
found_files = [s.strip() for s in found_files]
profile_names: List[str] = sum([[*p] for p in profiles], []) # extract all profiles names
profile_names = list(set(profile_names)) # remove duplicates