From 0441dfcd883bd63d2b981164af5d491de46168be Mon Sep 17 00:00:00 2001 From: "yite.gu" Date: Thu, 9 May 2024 14:31:58 +0800 Subject: [PATCH] tools/bluestore: Add command 'trim' to ceph-bluestore-tool Add command 'trim' to ceph-bluestore-tool. Co-authored-by: Igor Fedotov Signed-off-by: Yite Gu --- doc/man/8/ceph-bluestore-tool.rst | 8 ++++++ src/blk/BlockDevice.h | 1 + src/blk/kernel/KernelDevice.cc | 4 +-- src/os/bluestore/BlueFS.cc | 31 +++++++++++++++++++++ src/os/bluestore/BlueFS.h | 1 + src/os/bluestore/BlueStore.cc | 20 ++++++++++++++ src/os/bluestore/BlueStore.h | 1 + src/os/bluestore/bluestore_tool.cc | 44 +++++++++++++++++++++++++++++- 8 files changed, 107 insertions(+), 3 deletions(-) diff --git a/doc/man/8/ceph-bluestore-tool.rst b/doc/man/8/ceph-bluestore-tool.rst index 634d3a8bb6e..fa25facedce 100644 --- a/doc/man/8/ceph-bluestore-tool.rst +++ b/doc/man/8/ceph-bluestore-tool.rst @@ -29,6 +29,7 @@ Synopsis | **ceph-bluestore-tool** free-dump|free-score --path *osd path* [ --allocator block/bluefs-wal/bluefs-db/bluefs-slow ] | **ceph-bluestore-tool** reshard --path *osd path* --sharding *new sharding* [ --sharding-ctrl *control string* ] | **ceph-bluestore-tool** show-sharding --path *osd path* +| **ceph-bluestore-tool** trim --path *osd path* Description @@ -131,6 +132,13 @@ Commands Show sharding that is currently applied to BlueStore's RocksDB. +:command: `trim` --path *osd path* + + An SSD that has been used heavily may experience performance degradation. + This operation uses TRIM / discard to free unused blocks from BlueStore and BlueFS block devices, + and allows the drive to perform more efficient internal housekeeping. + If BlueStore runs with discard enabled, this option may not be useful. + Options ======= diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 6c55646fc76..c45c0759c21 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -233,6 +233,7 @@ public: uint64_t get_size() const { return size; } uint64_t get_block_size() const { return block_size; } uint64_t get_optimal_io_size() const { return optimal_io_size; } + bool is_discard_supported() const { return support_discard; } /// hook to provide utilization of thinly-provisioned device virtual int get_ebd_state(ExtBlkDevState &state) const { diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 6337292f5de..e77d17e0333 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -1116,8 +1116,8 @@ int KernelDevice::_discard(uint64_t offset, uint64_t len) return 0; } dout(10) << __func__ - << " 0x" << std::hex << offset << "~" << len << std::dec - << dendl; + << " 0x" << std::hex << offset << "~" << len << std::dec + << dendl; r = BlkDev{fd_directs[WRITE_LIFE_NOT_SET]}.discard((int64_t)offset, (int64_t)len); return r; } diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index a8b1fb25ee8..4a287ec3545 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -4738,6 +4738,37 @@ size_t BlueFS::probe_alloc_avail(int dev, uint64_t alloc_size) } return total; } + +void BlueFS::trim_free_space(const string& type, std::ostream& outss) +{ + unsigned bdev_id; + if(type == "bdev-wal") { + bdev_id = BDEV_WAL; + } else if (type == "bdev-db") { + bdev_id = BDEV_DB; + } else { + derr << __func__ << " unknown bdev type " << type << dendl; + return; + } + auto iterated_allocation = [&](size_t off, size_t len) { + ceph_assert(len > 0); + interval_set to_discard; + to_discard.union_insert(off, len); + bdev[bdev_id]->try_discard(to_discard, false); + }; + if (!bdev[bdev_id]) { + outss << "device " << type << " is not configured"; + return; + } + if (alloc[bdev_id] && !is_shared_alloc(bdev_id)) { + if (!bdev[bdev_id]->is_discard_supported()) { + outss << "device " << type << " does not support trim"; + return; + } + alloc[bdev_id]->foreach(iterated_allocation); + outss << "device " << type << " trim done"; + } +} // =============================================== // OriginalVolumeSelector diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index 86fb3cc882e..f57deac5706 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -778,6 +778,7 @@ public: } uint64_t debug_get_dirty_seq(FileWriter *h); bool debug_get_is_dev_dirty(FileWriter *h, uint8_t dev); + void trim_free_space(const std::string& type, std::ostream& outss); private: // Wrappers for BlockDevice::read(...) and BlockDevice::read_random(...) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 286dad7d3de..fec8a9ceb34 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8605,6 +8605,26 @@ int BlueStore::dump_bluefs_sizes(ostream& out) return r; } +void BlueStore::trim_free_space(const string& type, std::ostream& outss) +{ + auto iterated_allocation = [&](size_t off, size_t len) { + ceph_assert(len > 0); + interval_set to_discard; + to_discard.union_insert(off, len); + bdev->try_discard(to_discard, false); + }; + if (type == "bdev-block") { + if (!bdev->is_discard_supported()) { + outss << "device " << type << " does not support trim"; + return; + } + shared_alloc.a->foreach(iterated_allocation); + outss << "device " << type << " trim done"; + } else { + bluefs->trim_free_space(type, outss); + } +} + void BlueStore::set_cache_shards(unsigned num) { dout(10) << __func__ << " " << num << dendl; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 1f7f4940208..880fd4d7cea 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -3064,6 +3064,7 @@ public: std::string get_device_path(unsigned id); int dump_bluefs_sizes(std::ostream& out); + void trim_free_space(const std::string& type, std::ostream& outss); public: int statfs(struct store_statfs_t *buf, diff --git a/src/os/bluestore/bluestore_tool.cc b/src/os/bluestore/bluestore_tool.cc index 32cc5ecf4ed..173450d7961 100644 --- a/src/os/bluestore/bluestore_tool.cc +++ b/src/os/bluestore/bluestore_tool.cc @@ -285,9 +285,11 @@ int main(int argc, char **argv) string dest_file; string key, value; vector allocs_name; + vector bdev_type; string empty_sharding(1, '\0'); string new_sharding = empty_sharding; string resharding_ctrl; + string really; int log_level = 30; bool fsck_deep = false; po::options_description po_options("Options"); @@ -309,6 +311,8 @@ int main(int argc, char **argv) ("key,k", po::value(&key), "label metadata key name") ("value,v", po::value(&value), "label metadata value") ("allocator", po::value>(&allocs_name), "allocator to inspect: 'block'/'bluefs-wal'/'bluefs-db'") + ("bdev-type", po::value>(&bdev_type), "bdev type to inspect: 'bdev-block'/'bdev-wal'/'bdev-db'") + ("really", po::value(&really), "--yes-i-really-really-mean-it") ("sharding", po::value(&new_sharding), "new sharding to apply") ("resharding-ctrl", po::value(&resharding_ctrl), "gives control over resharding procedure details") ("op", po::value(&action_aux), @@ -340,7 +344,8 @@ int main(int argc, char **argv) "free-fragmentation, " "bluefs-stats, " "reshard, " - "show-sharding") + "show-sharding, " + "trim") ; po::options_description po_all("All options"); po_all.add(po_options).add(po_positional); @@ -572,6 +577,29 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } } + if (action == "trim") { + if (path.empty()) { + cerr << "must specify bluestore path" << std::endl; + exit(EXIT_FAILURE); + } + if (really.empty() || strcmp(really.c_str(), "--yes-i-really-really-mean-it") != 0) { + cerr << "Trimming a non healthy bluestore is a dangerous operation which could cause data loss, " + << "please run fsck and confirm with --yes-i-really-really-mean-it option" + << std::endl; + exit(EXIT_FAILURE); + } + for (auto type : bdev_type) { + if (!type.empty() && + type != "bdev-block" && + type != "bdev-db" && + type != "bdev-wal") { + cerr << "unknown bdev type '" << type << "'" << std::endl; + exit(EXIT_FAILURE); + } + } + if (bdev_type.empty()) + bdev_type = vector{"bdev-block", "bdev-db", "bdev-wal"}; + } if (action == "restore_cfb") { #ifndef CEPH_BLUESTORE_TOOL_RESTORE_ALLOCATION @@ -1175,6 +1203,20 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } cout << sharding << std::endl; + } else if (action == "trim") { + BlueStore bluestore(cct.get(), path); + int r = bluestore.cold_open(); + if (r < 0) { + cerr << "error from cold_open: " << cpp_strerror(r) << std::endl; + exit(EXIT_FAILURE); + } + for (auto type : bdev_type) { + cout << "trimming: " << type << std::endl; + ostringstream outss; + bluestore.trim_free_space(type, outss); + cout << "status: " << outss.str() << std::endl; + } + bluestore.cold_close(); } else { cerr << "unrecognized action " << action << std::endl; return 1;