Merge pull request #59762 from aclamk/wip-aclamk-cbt-combined

ceph-bluestore-tool: Fixes for multilple bdev label
This commit is contained in:
Adam Kupczyk 2024-09-25 07:24:45 +02:00 committed by GitHub
commit 274413ffbb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 89 additions and 8 deletions

View File

@ -30,6 +30,7 @@ Synopsis
| **ceph-bluestore-tool** reshard --path *osd path* --sharding *new sharding* [ --sharding-ctrl *control string* ]
| **ceph-bluestore-tool** show-sharding --path *osd path*
| **ceph-bluestore-tool** trim --path *osd path*
| **ceph-bluestore-tool** zap-device --dev *dev path*
Description
@ -108,7 +109,8 @@ Commands
:command:`show-label` --dev *device* [...]
Show device label(s).
Show device label(s).
The label may be printed while an OSD is running.
:command:`free-dump` --path *osd path* [ --allocator block/bluefs-wal/bluefs-db/bluefs-slow ]
@ -141,6 +143,10 @@ Commands
and allows the drive to perform more efficient internal housekeeping.
If BlueStore runs with discard enabled, this option may not be useful.
:command: `zap-device` --dev *dev path*
Zeros all device label locations. This effectively makes device appear empty.
Options
=======
@ -202,8 +208,8 @@ Useful to provide necessary configuration options when access to monitor/ceph.co
Device labels
=============
Every BlueStore block device has a single block label at the beginning of the
device. You can dump the contents of the label with::
Every BlueStore block device has a block label at the beginning of the device.
You can dump the contents of the label with::
ceph-bluestore-tool show-label --dev *device*
@ -211,6 +217,10 @@ The main device will have a lot of metadata, including information
that used to be stored in small files in the OSD data directory. The
auxiliary devices (db and wal) will only have the minimum required
fields (OSD UUID, size, device type, birth time).
The main device contains additional label copies at offsets: 1G, 10G, 100G and 1000G.
Corrupted labels are fixed as part of repair::
ceph-bluestore-tool repair --dev *device*
OSD directory priming
=====================

View File

@ -6928,6 +6928,10 @@ int BlueStore::read_bdev_label(
{
unique_ptr<BlockDevice> bdev(BlockDevice::create(
cct, path, nullptr, nullptr, nullptr, nullptr));
if (!bdev) {
return -EIO;
}
bdev->set_no_exclusive_lock();
int r = bdev->open(path);
if (r < 0)
return r;
@ -8988,6 +8992,47 @@ void BlueStore::trim_free_space(const string& type, std::ostream& outss)
}
}
int BlueStore::zap_device(CephContext* cct, const string& dev)
{
string path = dev; // dummy var for dout
uint64_t brush_size;
dout(5) << __func__ << " " << dev << dendl;
unique_ptr<BlockDevice>
_bdev(BlockDevice::create(cct, dev, nullptr, nullptr, nullptr, nullptr));
int r = _bdev->open(dev);
if (r < 0)
goto fail;
brush_size = std::max(_bdev->get_block_size(), BDEV_LABEL_BLOCK_SIZE);
for (auto off : bdev_label_positions) {
uint64_t end = std::min(off + brush_size, _bdev->get_size());
if (end > off) {
uint64_t l = end - off;
bufferlist bl;
bl.append_zero(l);
dout(10) << __func__ << " writing 0x"
<< std::hex << off << "~" << l
<< std::dec << " to " << dev
<< dendl;
r = _bdev->write(off, bl, false);
if (r < 0) {
derr << __func__ << " error writing 0x"
<< std::hex << off << "~" << l
<< std::dec << " to " << dev
<< " : " << cpp_strerror(r) << dendl;
break;
}
} else {
break;
}
}
_bdev->close();
fail:
return r;
}
void BlueStore::set_cache_shards(unsigned num)
{
dout(10) << __func__ << " " << num << dendl;

View File

@ -3192,6 +3192,7 @@ public:
int dump_bluefs_sizes(std::ostream& out);
void trim_free_space(const std::string& type, std::ostream& outss);
static int zap_device(CephContext* cct, const std::string& dev);
public:
int statfs(struct store_statfs_t *buf,

View File

@ -289,7 +289,6 @@ int main(int argc, char **argv)
string empty_sharding(1, '\0');
string new_sharding = empty_sharding;
string resharding_ctrl;
string really;
int log_level = 30;
bool fsck_deep = false;
po::options_description po_options("Options");
@ -312,7 +311,7 @@ int main(int argc, char **argv)
("value,v", po::value<string>(&value), "label metadata value")
("allocator", po::value<vector<string>>(&allocs_name), "allocator to inspect: 'block'/'bluefs-wal'/'bluefs-db'")
("bdev-type", po::value<vector<string>>(&bdev_type), "bdev type to inspect: 'bdev-block'/'bdev-wal'/'bdev-db'")
("really", po::value<string>(&really), "--yes-i-really-really-mean-it")
("yes-i-really-really-mean-it", "additional confirmation for dangerous commands")
("sharding", po::value<string>(&new_sharding), "new sharding to apply")
("resharding-ctrl", po::value<string>(&resharding_ctrl), "gives control over resharding procedure details")
("op", po::value<string>(&action_aux),
@ -345,7 +344,9 @@ int main(int argc, char **argv)
"bluefs-stats, "
"reshard, "
"show-sharding, "
"trim")
"trim, "
"zap-device"
)
;
po::options_description po_all("All options");
po_all.add(po_options).add(po_positional);
@ -354,7 +355,11 @@ int main(int argc, char **argv)
po::variables_map vm;
try {
po::parsed_options parsed =
po::command_line_parser(argc, argv).options(po_all).allow_unregistered().run();
po::command_line_parser(argc, argv).options(po_all)
.allow_unregistered()
.style(po::command_line_style::default_style &
~po::command_line_style::allow_guessing)
.run();
po::store( parsed, vm);
po::notify(vm);
ceph_option_strings = po::collect_unrecognized(parsed.options,
@ -582,7 +587,7 @@ int main(int argc, char **argv)
cerr << "must specify bluestore path" << std::endl;
exit(EXIT_FAILURE);
}
if (really.empty() || strcmp(really.c_str(), "--yes-i-really-really-mean-it") != 0) {
if (!vm.count("yes-i-really-really-mean-it")) {
cerr << "Trimming a non healthy bluestore is a dangerous operation which could cause data loss, "
<< "please run fsck and confirm with --yes-i-really-really-mean-it option"
<< std::endl;
@ -600,6 +605,18 @@ int main(int argc, char **argv)
if (bdev_type.empty())
bdev_type = vector<string>{"bdev-block", "bdev-db", "bdev-wal"};
}
if (action == "zap-device") {
if (devs.empty()) {
cerr << "must specify device(s) with --dev option" << std::endl;
exit(EXIT_FAILURE);
}
if (!vm.count("yes-i-really-really-mean-it")) {
cerr << "zap-osd is a DESTRUCTIVE operation, it causes OSD data loss, "
<< "please confirm with --yes-i-really-really-mean-it option"
<< std::endl;
exit(EXIT_FAILURE);
}
}
if (action == "restore_cfb") {
#ifndef CEPH_BLUESTORE_TOOL_RESTORE_ALLOCATION
@ -1249,6 +1266,14 @@ int main(int argc, char **argv)
cout << "status: " << outss.str() << std::endl;
}
bluestore.cold_close();
} else if (action == "zap-device") {
for(auto& dev : devs) {
int r = BlueStore::zap_device(cct.get(), dev);
if (r < 0) {
cerr << "error from zap: " << cpp_strerror(r) << std::endl;
exit(EXIT_FAILURE);
}
}
} else {
cerr << "unrecognized action " << action << std::endl;
return 1;