diff --git a/src/mgr/ActivePyModules.cc b/src/mgr/ActivePyModules.cc index dc10235e70a..a21ecba7643 100644 --- a/src/mgr/ActivePyModules.cc +++ b/src/mgr/ActivePyModules.cc @@ -1084,6 +1084,36 @@ void ActivePyModules::set_uri(const std::string& module_name, modules.at(module_name)->set_uri(uri); } +void ActivePyModules::set_device_wear_level(const std::string& devid, + float wear_level) +{ + // update mgr state + map meta; + daemon_state.with_device( + devid, + [wear_level, &meta] (DeviceState& dev) { + dev.set_wear_level(wear_level); + meta = dev.metadata; + }); + + // tell mon + json_spirit::Object json_object; + for (auto& i : meta) { + json_spirit::Config::add(json_object, i.first, i.second); + } + bufferlist json; + json.append(json_spirit::write(json_object)); + const string cmd = + "{" + "\"prefix\": \"config-key set\", " + "\"key\": \"device/" + devid + "\"" + "}"; + + Command set_cmd; + set_cmd.run(&monc, cmd, json); + set_cmd.wait(); +} + MetricQueryID ActivePyModules::add_osd_perf_query( const OSDPerfMetricQuery &query, const std::optional &limit) diff --git a/src/mgr/ActivePyModules.h b/src/mgr/ActivePyModules.h index ad6f5021926..893d2174400 100644 --- a/src/mgr/ActivePyModules.h +++ b/src/mgr/ActivePyModules.h @@ -151,6 +151,7 @@ public: void config_notify(); void set_uri(const std::string& module_name, const std::string &uri); + void set_device_wear_level(const std::string& devid, float wear_level); int handle_command( const ModuleCommand& module_command, diff --git a/src/mgr/BaseMgrModule.cc b/src/mgr/BaseMgrModule.cc index ede9a809b21..700c7c0700c 100644 --- a/src/mgr/BaseMgrModule.cc +++ b/src/mgr/BaseMgrModule.cc @@ -677,6 +677,23 @@ ceph_set_uri(BaseMgrModule *self, PyObject *args) Py_RETURN_NONE; } +static PyObject* +ceph_set_wear_level(BaseMgrModule *self, PyObject *args) +{ + char *devid = nullptr; + float wear_level; + if (!PyArg_ParseTuple(args, "sf:ceph_set_wear_level", + &devid, &wear_level)) { + return nullptr; + } + + PyThreadState *tstate = PyEval_SaveThread(); + self->py_modules->set_device_wear_level(devid, wear_level); + PyEval_RestoreThread(tstate); + + Py_RETURN_NONE; +} + static PyObject* ceph_have_mon_connection(BaseMgrModule *self, PyObject *args) { @@ -1437,6 +1454,9 @@ PyMethodDef BaseMgrModule_methods[] = { {"_ceph_set_uri", (PyCFunction)ceph_set_uri, METH_VARARGS, "Advertize a service URI served by this module"}, + {"_ceph_set_device_wear_level", (PyCFunction)ceph_set_wear_level, METH_VARARGS, + "Set device wear_level value"}, + {"_ceph_have_mon_connection", (PyCFunction)ceph_have_mon_connection, METH_NOARGS, "Find out whether this mgr daemon currently has " "a connection to a monitor"}, diff --git a/src/mgr/MgrContext.h b/src/mgr/MgrContext.h index c6e647365b5..a5490bef3d6 100644 --- a/src/mgr/MgrContext.h +++ b/src/mgr/MgrContext.h @@ -35,6 +35,12 @@ public: &outbl, &outs, &cond); } + void run(MonClient *monc, const std::string &command, const ceph::buffer::list &inbl) + { + monc->start_mon_command({command}, inbl, + &outbl, &outs, &cond); + } + virtual void wait() { r = cond.wait(); diff --git a/src/pybind/mgr/ceph_module.pyi b/src/pybind/mgr/ceph_module.pyi index a5622065c79..8659e6dc539 100644 --- a/src/pybind/mgr/ceph_module.pyi +++ b/src/pybind/mgr/ceph_module.pyi @@ -61,6 +61,7 @@ class BaseMgrModule(object): def _ceph_get_store(self, key):... def _ceph_get_osdmap(self):... def _ceph_set_uri(self, uri):... + def _ceph_set_device_wear_level(self, devid, val):... def _ceph_have_mon_connection(self):... def _ceph_update_progress_event(self, evid, desc, progress, add_to_ceph_s):... def _ceph_complete_progress_event(self, evid):... diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index 4115f410821..7a622e62595 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -25,6 +25,29 @@ HEALTH_MESSAGES = { MAX_SAMPLES = 500 +def get_ata_wear_level(data: Dict[Any,Any]) -> Optional[float]: + """ + Extract wear level (as float) from smartctl -x --json output for SATA SSD + """ + for page in data.get("ata_device_statistics", {}).get("pages", []): + if page.get("number") != 7: + continue + for item in page.get("table", []): + if item["offset"] == 8: + return item["value"] / 100.0 + return None + + +def get_nvme_wear_level(data: Dict[Any,Any]) -> Optional[float]: + """ + Extract wear level (as float) from smartctl -x --json output for NVME SSD + """ + pct_used = data.get("nvme_smart_health_information_log", {}).get("percentage_used") + if pct_used is None: + return None + return pct_used / 100.0 + + class Module(MgrModule): MODULE_OPTIONS = [ Option( @@ -450,6 +473,22 @@ class Module(MgrModule): ioctx.remove_omap_keys(op, tuple(erase)) ioctx.operate_write_op(op, devid) + # extract wear level? + wear_level = get_ata_wear_level(data) + if wear_level is None: + wear_level = get_nvme_wear_level(data) + dev_data = self.get(f"device {devid}") or {} + if wear_level is not None: + if dev_data.get(wear_level) != str(wear_level): + dev_data["wear_level"] = str(wear_level) + self.log.debug(f"updating {devid} wear level to {wear_level}") + self.set_device_wear_level(devid, wear_level) + else: + if "wear_level" in dev_data: + del dev_data["wear_level"] + self.log.debug(f"removing {devid} wear level") + self.set_device_wear_level(devid, -1.0) + def _get_device_metrics(self, devid: str, sample: Optional[str] = None, min_sample: Optional[str] = None) -> Dict[str, Dict[str, Any]]: diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index 37acf88e3a6..81c205dc996 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -1611,6 +1611,9 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): """ return self._ceph_set_uri(uri) + def set_device_wear_level(self, devid: str, wear_level: float) -> None: + return self._ceph_set_device_wear_level(devid, wear_level) + def have_mon_connection(self) -> bool: """ Check whether this ceph-mgr daemon has an open connection