From d20f57000b52d1f17e2cdf666b39fef48cc288cd Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Fri, 14 Feb 2020 16:43:52 +0530 Subject: [PATCH] osd/OSD: Log slow ops/types to cluster logs In addition to logging slow ops in mon and osd specific log files, re-introduce logging the same information along with slow op type details to cluster logs as well. The objective is to make debugging slow ops easier. Modify the log whitelisting string to "slow request" within qa suites in order to make the search for the new warning log message within the cluster log successful. This should not cause any issue as it's a substring of the earlier string. Fixes: https://tracker.ceph.com/issues/43975 Signed-off-by: Sridhar Seshasayee --- .../all/mon-memory-target-compliance.yaml.disabled | 2 +- qa/suites/rados/singleton/all/pg-autoscaler.yaml | 2 +- qa/tasks/thrashosds-health.yaml | 2 +- src/osd/OSD.cc | 11 ++++++++--- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled b/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled index 56de322ebc4..7f9dd49592c 100644 --- a/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled +++ b/qa/suites/rados/singleton/all/mon-memory-target-compliance.yaml.disabled @@ -53,7 +53,7 @@ tasks: - \(SLOW_OPS\) - \(REQUEST_SLOW\) - \(TOO_FEW_PGS\) - - slow requests + - slow request - interactive: - parallel: - log-mon-rss diff --git a/qa/suites/rados/singleton/all/pg-autoscaler.yaml b/qa/suites/rados/singleton/all/pg-autoscaler.yaml index c55fc5072f2..72e18d52f68 100644 --- a/qa/suites/rados/singleton/all/pg-autoscaler.yaml +++ b/qa/suites/rados/singleton/all/pg-autoscaler.yaml @@ -31,7 +31,7 @@ tasks: - \(SLOW_OPS\) - \(REQUEST_SLOW\) - \(TOO_FEW_PGS\) - - slow requests + - slow request - workunit: clients: all: diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml index d3a954cdab5..914f6e25ee5 100644 --- a/qa/tasks/thrashosds-health.yaml +++ b/qa/tasks/thrashosds-health.yaml @@ -12,4 +12,4 @@ overrides: - \(SLOW_OPS\) - \(REQUEST_SLOW\) - \(TOO_FEW_PGS\) - - slow requests + - slow request diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 76113d11468..1c122163774 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7596,9 +7596,14 @@ vector OSD::get_health_metrics() TrackedOpRef oldest_op; auto count_slow_ops = [&](TrackedOp& op) { if (op.get_initiated() < too_old) { - lgeneric_subdout(cct,osd,20) << "slow op " << op.get_desc() - << " initiated " - << op.get_initiated() << dendl; + stringstream ss; + ss << "slow request " << op.get_desc() + << " initiated " + << op.get_initiated() + << " currently " + << op.state_string(); + lgeneric_subdout(cct,osd,20) << ss.str() << dendl; + clog->warn() << ss.str(); slow++; if (!oldest_op || op.get_initiated() < oldest_op->get_initiated()) { oldest_op = &op;