mirror of
https://github.com/ceph/ceph
synced 2025-02-19 00:47:49 +00:00
Merge pull request #48574 from adk3798/debug-log-spam
mgr/cephadm: make logging refresh metadata to debug logs configurable Reviewed-by: Redouane Kachach <rkachach@redhat.com>
This commit is contained in:
commit
06206df4ea
@ -436,6 +436,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
|
||||
default=True,
|
||||
desc='Pass --cgroups=split when cephadm creates containers (currently podman only)'
|
||||
),
|
||||
Option(
|
||||
'log_refresh_metadata',
|
||||
type='bool',
|
||||
default=False,
|
||||
desc='Log all refresh metadata. Includes daemon, device, and host info collected regularly. Only has effect if logging at debug level'
|
||||
),
|
||||
]
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
@ -512,6 +518,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
|
||||
self.max_osd_draining_count = 10
|
||||
self.device_enhanced_scan = False
|
||||
self.cgroups_split = True
|
||||
self.log_refresh_metadata = False
|
||||
|
||||
self.notify(NotifyType.mon_map, None)
|
||||
self.config_notify()
|
||||
|
@ -38,7 +38,7 @@ class OfflineHostWatcher(threading.Thread):
|
||||
def check_host(self, host: str) -> None:
|
||||
if host not in self.mgr.offline_hosts:
|
||||
try:
|
||||
self.mgr.ssh.check_execute_command(host, ['true'])
|
||||
self.mgr.ssh.check_execute_command(host, ['true'], log_command=self.mgr.log_refresh_metadata)
|
||||
except Exception:
|
||||
logger.debug(f'OfflineHostDetector: detected {host} to be offline')
|
||||
# kick serve loop in case corrective action must be taken for offline host
|
||||
|
@ -327,7 +327,7 @@ class CephadmServe:
|
||||
addr = self.mgr.inventory.get_addr(host) if host in self.mgr.inventory else host
|
||||
out, err, code = self.mgr.wait_async(self._run_cephadm(
|
||||
host, cephadmNoImage, 'check-host', [],
|
||||
error_ok=True, no_fsid=True))
|
||||
error_ok=True, no_fsid=True, log_output=self.mgr.log_refresh_metadata))
|
||||
self.mgr.cache.update_last_host_check(host)
|
||||
self.mgr.cache.save_host(host)
|
||||
if code:
|
||||
@ -343,7 +343,8 @@ class CephadmServe:
|
||||
|
||||
def _refresh_host_daemons(self, host: str) -> Optional[str]:
|
||||
try:
|
||||
ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [], no_fsid=True))
|
||||
ls = self.mgr.wait_async(self._run_cephadm_json(host, 'mon', 'ls', [],
|
||||
no_fsid=True, log_output=self.mgr.log_refresh_metadata))
|
||||
except OrchestratorError as e:
|
||||
return str(e)
|
||||
self.mgr._process_ls_output(host, ls)
|
||||
@ -352,7 +353,7 @@ class CephadmServe:
|
||||
def _refresh_facts(self, host: str) -> Optional[str]:
|
||||
try:
|
||||
val = self.mgr.wait_async(self._run_cephadm_json(
|
||||
host, cephadmNoImage, 'gather-facts', [], no_fsid=True))
|
||||
host, cephadmNoImage, 'gather-facts', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata))
|
||||
except OrchestratorError as e:
|
||||
return str(e)
|
||||
|
||||
@ -371,13 +372,13 @@ class CephadmServe:
|
||||
try:
|
||||
try:
|
||||
devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume',
|
||||
inventory_args))
|
||||
inventory_args, log_output=self.mgr.log_refresh_metadata))
|
||||
except OrchestratorError as e:
|
||||
if 'unrecognized arguments: --filter-for-batch' in str(e):
|
||||
rerun_args = inventory_args.copy()
|
||||
rerun_args.remove('--filter-for-batch')
|
||||
devices = self.mgr.wait_async(self._run_cephadm_json(host, 'osd', 'ceph-volume',
|
||||
rerun_args))
|
||||
rerun_args, log_output=self.mgr.log_refresh_metadata))
|
||||
else:
|
||||
raise
|
||||
|
||||
@ -395,7 +396,7 @@ class CephadmServe:
|
||||
def _refresh_host_networks(self, host: str) -> Optional[str]:
|
||||
try:
|
||||
networks = self.mgr.wait_async(self._run_cephadm_json(
|
||||
host, 'mon', 'list-networks', [], no_fsid=True))
|
||||
host, 'mon', 'list-networks', [], no_fsid=True, log_output=self.mgr.log_refresh_metadata))
|
||||
except OrchestratorError as e:
|
||||
return str(e)
|
||||
|
||||
@ -1335,10 +1336,11 @@ class CephadmServe:
|
||||
args: List[str],
|
||||
no_fsid: Optional[bool] = False,
|
||||
image: Optional[str] = "",
|
||||
log_output: Optional[bool] = True,
|
||||
) -> Any:
|
||||
try:
|
||||
out, err, code = await self._run_cephadm(
|
||||
host, entity, command, args, no_fsid=no_fsid, image=image)
|
||||
host, entity, command, args, no_fsid=no_fsid, image=image, log_output=log_output)
|
||||
if code:
|
||||
raise OrchestratorError(f'host {host} `cephadm {command}` returned {code}: {err}')
|
||||
except Exception as e:
|
||||
@ -1361,6 +1363,7 @@ class CephadmServe:
|
||||
error_ok: Optional[bool] = False,
|
||||
image: Optional[str] = "",
|
||||
env_vars: Optional[List[str]] = None,
|
||||
log_output: Optional[bool] = True,
|
||||
) -> Tuple[List[str], List[str], int]:
|
||||
"""
|
||||
Run cephadm on the remote host with the given command + args
|
||||
@ -1425,7 +1428,8 @@ class CephadmServe:
|
||||
host, cmd, stdin=stdin, addr=addr)
|
||||
if code == 2:
|
||||
ls_cmd = ['ls', self.mgr.cephadm_binary_path]
|
||||
out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr)
|
||||
out_ls, err_ls, code_ls = await self.mgr.ssh._execute_command(host, ls_cmd, addr=addr,
|
||||
log_command=log_output)
|
||||
if code_ls == 2:
|
||||
await self._deploy_cephadm_binary(host, addr)
|
||||
out, err, code = await self.mgr.ssh._execute_command(
|
||||
@ -1455,11 +1459,12 @@ class CephadmServe:
|
||||
else:
|
||||
assert False, 'unsupported mode'
|
||||
|
||||
self.log.debug(f'code: {code}')
|
||||
if out:
|
||||
self.log.debug(f'out: {out}')
|
||||
if err:
|
||||
self.log.debug(f'err: {err}')
|
||||
if log_output:
|
||||
self.log.debug(f'code: {code}')
|
||||
if out:
|
||||
self.log.debug(f'out: {out}')
|
||||
if err:
|
||||
self.log.debug(f'err: {err}')
|
||||
if code and not error_ok:
|
||||
raise OrchestratorError(
|
||||
f'cephadm exited with an error code: {code}, stderr: {err}')
|
||||
|
@ -134,11 +134,13 @@ class SSHManager:
|
||||
cmd: List[str],
|
||||
stdin: Optional[str] = None,
|
||||
addr: Optional[str] = None,
|
||||
log_command: Optional[bool] = True,
|
||||
) -> Tuple[str, str, int]:
|
||||
conn = await self._remote_connection(host, addr)
|
||||
sudo_prefix = "sudo " if self.mgr.ssh_user != 'root' else ""
|
||||
cmd = sudo_prefix + " ".join(quote(x) for x in cmd)
|
||||
logger.debug(f'Running command: {cmd}')
|
||||
if log_command:
|
||||
logger.debug(f'Running command: {cmd}')
|
||||
try:
|
||||
r = await conn.run(f'{sudo_prefix}true', check=True, timeout=5)
|
||||
r = await conn.run(cmd, input=stdin)
|
||||
@ -171,16 +173,18 @@ class SSHManager:
|
||||
cmd: List[str],
|
||||
stdin: Optional[str] = None,
|
||||
addr: Optional[str] = None,
|
||||
log_command: Optional[bool] = True
|
||||
) -> Tuple[str, str, int]:
|
||||
return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr))
|
||||
return self.mgr.wait_async(self._execute_command(host, cmd, stdin, addr, log_command))
|
||||
|
||||
async def _check_execute_command(self,
|
||||
host: str,
|
||||
cmd: List[str],
|
||||
stdin: Optional[str] = None,
|
||||
addr: Optional[str] = None,
|
||||
log_command: Optional[bool] = True
|
||||
) -> str:
|
||||
out, err, code = await self._execute_command(host, cmd, stdin, addr)
|
||||
out, err, code = await self._execute_command(host, cmd, stdin, addr, log_command)
|
||||
if code != 0:
|
||||
msg = f'Command {cmd} failed. {err}'
|
||||
logger.debug(msg)
|
||||
@ -192,8 +196,9 @@ class SSHManager:
|
||||
cmd: List[str],
|
||||
stdin: Optional[str] = None,
|
||||
addr: Optional[str] = None,
|
||||
log_command: Optional[bool] = True,
|
||||
) -> str:
|
||||
return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr))
|
||||
return self.mgr.wait_async(self._check_execute_command(host, cmd, stdin, addr, log_command))
|
||||
|
||||
async def _write_remote_file(self,
|
||||
host: str,
|
||||
|
@ -117,13 +117,13 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str
|
||||
[host]).stdout == f"Created osd(s) 1 on host '{host}'"
|
||||
assert _run_cephadm.mock_calls == [
|
||||
mock.call(host, 'osd', 'ceph-volume',
|
||||
['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image=''),
|
||||
['--', 'lvm', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True),
|
||||
mock.call(host, f'osd.{osd_id}', 'deploy',
|
||||
['--name', f'osd.{osd_id}', '--meta-json', mock.ANY,
|
||||
'--config-json', '-', '--osd-fsid', 'uuid'],
|
||||
stdin=mock.ANY, image=''),
|
||||
mock.call(host, 'osd', 'ceph-volume',
|
||||
['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image=''),
|
||||
['--', 'raw', 'list', '--format', 'json'], no_fsid=False, image='', log_output=True),
|
||||
]
|
||||
dd = cephadm_module.cache.get_daemon(f'osd.{osd_id}', host=host)
|
||||
assert dd.name() == f'osd.{osd_id}'
|
||||
@ -792,11 +792,12 @@ class TestCephadm(object):
|
||||
'test', 'osd', 'ceph-volume',
|
||||
['--config-json', '-', '--', 'lvm', 'batch',
|
||||
'--no-auto', '/dev/sdb', '--yes', '--no-systemd'],
|
||||
env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True, stdin='{"config": "", "keyring": ""}')
|
||||
env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=foo'], error_ok=True,
|
||||
stdin='{"config": "", "keyring": ""}')
|
||||
_run_cephadm.assert_any_call(
|
||||
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
|
||||
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
|
||||
_run_cephadm.assert_any_call(
|
||||
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
|
||||
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
|
||||
|
||||
@mock.patch("cephadm.serve.CephadmServe._run_cephadm")
|
||||
def test_apply_osd_save_non_collocated(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
|
||||
@ -836,9 +837,9 @@ class TestCephadm(object):
|
||||
env_vars=['CEPH_VOLUME_OSDSPEC_AFFINITY=noncollocated'],
|
||||
error_ok=True, stdin='{"config": "", "keyring": ""}')
|
||||
_run_cephadm.assert_any_call(
|
||||
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False)
|
||||
'test', 'osd', 'ceph-volume', ['--', 'lvm', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
|
||||
_run_cephadm.assert_any_call(
|
||||
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False)
|
||||
'test', 'osd', 'ceph-volume', ['--', 'raw', 'list', '--format', 'json'], image='', no_fsid=False, log_output=True)
|
||||
|
||||
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
|
||||
@mock.patch("cephadm.module.SpecStore.save")
|
||||
@ -1831,10 +1832,10 @@ Traceback (most recent call last):
|
||||
assert _run_cephadm.mock_calls == [
|
||||
mock.call('test', 'osd', 'ceph-volume',
|
||||
['--', 'inventory', '--format=json-pretty', '--filter-for-batch'], image='',
|
||||
no_fsid=False),
|
||||
no_fsid=False, log_output=False),
|
||||
mock.call('test', 'osd', 'ceph-volume',
|
||||
['--', 'inventory', '--format=json-pretty'], image='',
|
||||
no_fsid=False),
|
||||
no_fsid=False, log_output=False),
|
||||
]
|
||||
|
||||
@mock.patch("cephadm.serve.CephadmServe._run_cephadm")
|
||||
|
@ -56,6 +56,7 @@ class FakeMgr:
|
||||
self.tuned_profiles.profiles = profiles
|
||||
self.ssh = SSHManager(self)
|
||||
self.offline_hosts = []
|
||||
self.log_refresh_metadata = False
|
||||
|
||||
def set_store(self, what: str, value: str):
|
||||
raise SaveError(f'{what}: {value}')
|
||||
@ -138,7 +139,7 @@ class TestTunedProfiles:
|
||||
tp = TunedProfileUtils(mgr)
|
||||
tp._remove_stray_tuned_profiles('a', self.profiles_to_calls(tp, [self.tspec1, self.tspec2]))
|
||||
calls = [
|
||||
mock.call('a', ['ls', SYSCTL_DIR]),
|
||||
mock.call('a', ['ls', SYSCTL_DIR], log_command=False),
|
||||
mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/p3-cephadm-tuned-profile.conf']),
|
||||
mock.call('a', ['rm', '-f', f'{SYSCTL_DIR}/who-cephadm-tuned-profile.conf']),
|
||||
mock.call('a', ['sysctl', '--system'])
|
||||
|
@ -70,7 +70,7 @@ class TunedProfileUtils():
|
||||
if host in self.mgr.offline_hosts:
|
||||
return
|
||||
cmd = ['ls', SYSCTL_DIR]
|
||||
found_files = self.mgr.ssh.check_execute_command(host, cmd).split('\n')
|
||||
found_files = self.mgr.ssh.check_execute_command(host, cmd, log_command=self.mgr.log_refresh_metadata).split('\n')
|
||||
found_files = [s.strip() for s in found_files]
|
||||
profile_names: List[str] = sum([[*p] for p in profiles], []) # extract all profiles names
|
||||
profile_names = list(set(profile_names)) # remove duplicates
|
||||
|
Loading…
Reference in New Issue
Block a user