mgr: Add get_rates_from_data to mgr_util.py

The function calculates the difference between the values in a time series list. The previous location of it was in the dashboard, that's why the function has doc tests. In order to be able to test them as part of the manager utilities a bit of tox magic was added. Fixes: https://tracker.ceph.com/issues/40365 Signed-off-by: Stephan Müller <smueller@suse.com>
2025-02-23 11:07:35 +00:00 · 2019-06-14 15:45:53 +02:00 · 2019-06-14 15:45:53 +02:00 · 34976bae14
commit 34976bae14
parent f205d38c2c
4 changed files with 139 additions and 58 deletions
--- a/src/pybind/mgr/dashboard/controllers/osd.py
+++ b/src/pybind/mgr/dashboard/controllers/osd.py
@ -2,6 +2,9 @@
 from __future__ import absolute_import
 import json
 import logging
+
+from mgr_util import get_most_recent_rate
+
 from . import ApiController, RESTController, Endpoint, ReadPermission, UpdatePermission
 from .. import mgr
 from ..security import Scope
@ -48,8 +51,9 @@ class Osd(RESTController):
                continue
            for stat in ['osd.op_w', 'osd.op_in_bytes', 'osd.op_r', 'osd.op_out_bytes']:
                prop = stat.split('.')[1]
-                osd['stats'][prop] = CephService.get_rate('osd', osd_spec, stat)
-                osd['stats_history'][prop] = CephService.get_rates('osd', osd_spec, stat)
+                rates = CephService.get_rates('osd', osd_spec, stat)
+                osd['stats'][prop] = get_most_recent_rate(rates)
+                osd['stats_history'][prop] = rates
            # Gauge stats
            for stat in ['osd.numpg', 'osd.stat_bytes', 'osd.stat_bytes_used']:
                osd['stats'][stat.split('.')[1]] = mgr.get_latest('osd', osd_spec, stat)
--- a/src/pybind/mgr/dashboard/services/ceph_service.py
+++ b/src/pybind/mgr/dashboard/services/ceph_service.py
@ -7,15 +7,7 @@ from six.moves import reduce
 import rados

 from mgr_module import CommandResult
-
-try:
-    from more_itertools import pairwise
-except ImportError:
-    def pairwise(iterable):
-        from itertools import tee
-        a, b = tee(iterable)
-        next(b, None)
-        return zip(a, b)
+from mgr_util import get_time_series_rates, get_most_recent_rate

 from .. import mgr

@ -116,16 +108,12 @@ class CephService(object):
            stats = pool_stats[pool['pool']]
            s = {}

-            def get_rate(series):
-                if len(series) >= 2:
-                    return differentiate(*list(series)[-2:])
-                return 0
-
            for stat_name, stat_series in stats.items():
+                rates = get_time_series_rates(stat_series)
                s[stat_name] = {
                    'latest': stat_series[0][1],
-                    'rate': get_rate(stat_series),
-                    'rates': get_rates_from_data(stat_series)
+                    'rate': get_most_recent_rate(rates),
+                    'rates': rates
                }
            pool['stats'] = s
            pools_w_stats.append(pool)
@ -225,16 +213,12 @@ class CephService(object):
        :return: the derivative of mgr.get_counter()
        :rtype: list[tuple[int, float]]"""
        data = mgr.get_counter(svc_type, svc_name, path)[path]
-        return get_rates_from_data(data)
+        return get_time_series_rates(data)

    @classmethod
    def get_rate(cls, svc_type, svc_name, path):
        """returns most recent rate"""
-        data = mgr.get_counter(svc_type, svc_name, path)[path]
-
-        if data and len(data) > 1:
-            return differentiate(*data[-2:])
-        return 0.0
+        return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path))

    @classmethod
    def get_client_perf(cls):
@ -300,33 +284,3 @@ class CephService(object):
            'statuses': pg_summary['all'],
            'pgs_per_osd': pgs_per_osd,
        }
-
-
-def get_rates_from_data(data):
-    """
-    >>> get_rates_from_data([])
-    [(0, 0.0)]
-    >>> get_rates_from_data([[1, 42]])
-    [(1, 0.0)]
-    >>> get_rates_from_data([[0, 100], [2, 101], [3, 100], [4, 100]])
-    [(2, 0.5), (3, 1.0), (4, 0.0)]
-    """
-    if not data:
-        return [(0, 0.0)]
-    if len(data) == 1:
-        return [(data[0][0], 0.0)]
-    return [(data2[0], differentiate(data1, data2)) for data1, data2 in pairwise(data)]
-
-
-def differentiate(data1, data2):
-    """
-    >>> times = [0, 2]
-    >>> values = [100, 101]
-    >>> differentiate(*zip(times, values))
-    0.5
-    >>> times = [0, 2]
-    >>> values = [100, 99]
-    >>> differentiate(*zip(times, values))
-    0.5
-    """
-    return abs((data2[1] - data1[1]) / float(data2[0] - data1[0]))
--- a/src/pybind/mgr/dashboard/services/tcmu_service.py
+++ b/src/pybind/mgr/dashboard/services/tcmu_service.py
@ -1,3 +1,5 @@
+from mgr_util import get_most_recent_rate
+
 from dashboard.services.ceph_service import CephService
 from .. import mgr

@ -60,10 +62,9 @@ class TcmuService(object):
                    image['stats_history'] = {}
                    for s in ['rd', 'wr', 'rd_bytes', 'wr_bytes']:
                        perf_key = "{}{}".format(perf_key_prefix, s)
-                        image['stats'][s] = CephService.get_rate(
-                            'tcmu-runner', service_id, perf_key)
-                        image['stats_history'][s] = CephService.get_rates(
-                            'tcmu-runner', service_id, perf_key)
+                        rates = CephService.get_rates('tcmu-runner', service_id, perf_key)
+                        image['stats'][s] = get_most_recent_rate(rates)
+                        image['stats_history'][s] = rates
            else:
                daemon['non_optimized_paths'] += 1
                image['non_optimized_paths'].append(hostname)
--- a/src/pybind/mgr/mgr_util.py
+++ b/src/pybind/mgr/mgr_util.py
@ -183,3 +183,125 @@ def verify_tls_files(cert_fname, pkey_fname):
        logger.warning(
            'Private key {} and certificate {} do not match up: {}'.format(
                pkey_fname, cert_fname, str(e)))
+
+def get_most_recent_rate(rates):
+    """ Get most recent rate from rates
+
+    :param rates: The derivative between all time series data points [time in seconds, value]
+    :type rates: list[tuple[int, float]]
+
+    :return: The last derivative or 0.0 if none exists
+    :rtype: float
+
+    >>> get_most_recent_rate(None)
+    0.0
+    >>> get_most_recent_rate([])
+    0.0
+    >>> get_most_recent_rate([(1, -2.0)])
+    -2.0
+    >>> get_most_recent_rate([(1, 2.0), (2, 1.5), (3, 5.0)])
+    5.0
+    """
+    if not rates:
+        return 0.0
+    return rates[-1][1]
+
+def get_time_series_rates(data):
+    """ Rates from time series data
+
+    :param data: Time series data [time in seconds, value]
+    :type data: list[tuple[int, float]]
+
+    :return: The derivative between all time series data points [time in seconds, value]
+    :rtype: list[tuple[int, float]]
+
+    >>> logger.debug = lambda s,x,y: print(s % (x,y))
+    >>> get_time_series_rates([])
+    []
+    >>> get_time_series_rates([[0, 1], [1, 3]])
+    [(1, 2.0)]
+    >>> get_time_series_rates([[0, 2], [0, 3], [0, 1], [1, 2], [1, 3]])
+    Duplicate timestamp in time series data: [0, 2], [0, 3]
+    Duplicate timestamp in time series data: [0, 3], [0, 1]
+    Duplicate timestamp in time series data: [1, 2], [1, 3]
+    [(1, 2.0)]
+    >>> get_time_series_rates([[1, 1], [2, 3], [4, 11], [5, 16], [6, 22]])
+    [(2, 2.0), (4, 4.0), (5, 5.0), (6, 6.0)]
+    """
+    data = _filter_time_series(data)
+    if not data:
+        return []
+    return [(data2[0], _derivative(data1, data2)) for data1, data2 in
+            _pairwise(data)]
+
+def _filter_time_series(data):
+    """ Filters time series data
+
+    Filters out samples with the same timestamp in given time series data.
+    It also enforces the list to contain at least two samples.
+
+    All filtered values will be shown in the debug log. If values were filtered it's a bug in the
+    time series data collector, please report it.
+
+    :param data: Time series data [time in seconds, value]
+    :type data: list[tuple[int, float]]
+
+    :return: Filtered time series data [time in seconds, value]
+    :rtype: list[tuple[int, float]]
+
+    >>> logger.debug = lambda s,x,y: print(s % (x,y))
+    >>> _filter_time_series([])
+    []
+    >>> _filter_time_series([[1, 42]])
+    []
+    >>> _filter_time_series([[10, 2], [10, 3]])
+    Duplicate timestamp in time series data: [10, 2], [10, 3]
+    []
+    >>> _filter_time_series([[0, 1], [1, 2]])
+    [[0, 1], [1, 2]]
+    >>> _filter_time_series([[0, 2], [0, 3], [0, 1], [1, 2], [1, 3]])
+    Duplicate timestamp in time series data: [0, 2], [0, 3]
+    Duplicate timestamp in time series data: [0, 3], [0, 1]
+    Duplicate timestamp in time series data: [1, 2], [1, 3]
+    [[0, 1], [1, 3]]
+    >>> _filter_time_series([[1, 1], [2, 3], [4, 11], [5, 16], [6, 22]])
+    [[1, 1], [2, 3], [4, 11], [5, 16], [6, 22]]
+    """
+    filtered = []
+    for i in range(len(data) - 1):
+        if data[i][0] == data[i + 1][0]:  # Same timestamp
+            logger.debug("Duplicate timestamp in time series data: %s, %s", data[i], data[i + 1])
+            continue
+        filtered.append(data[i])
+    if not filtered:
+        return []
+    filtered.append(data[-1])
+    return filtered
+
+def _derivative(p1, p2):
+    """ Derivative between two time series data points
+
+    :param p1: Time series data [time in seconds, value]
+    :type p1: tuple[int, float]
+    :param p2: Time series data [time in seconds, value]
+    :type p2: tuple[int, float]
+
+    :return: Derivative between both points
+    :rtype: float
+
+    >>> _derivative([0, 0], [2, 1])
+    0.5
+    >>> _derivative([0, 1], [2, 0])
+    -0.5
+    >>> _derivative([0, 0], [3, 1])
+    0.3333333333333333
+    """
+    return (p2[1] - p1[1]) / float(p2[0] - p1[0])
+
+def _pairwise(iterable):
+    it = iter(iterable)
+    a = next(it, None)
+
+    for b in it:
+        yield (a, b)
+        a = b