mirror of
https://github.com/ceph/ceph
synced 2025-02-24 03:27:10 +00:00
tools: ceph-objectstore-tool is able to trim pg log dups' entries.
The main assumption is trimming just dups doesn't need any update to the corresponding pg_info_t. Testing: 1. cluster without the autoscaler ``` rzarz@ubulap:~/dev/ceph/build$ MON=1 MGR=1 OSD=3 MGR=1 MDS=0 ../src/vstart.sh -l -b -n -o "osd_pg_log_dups_tracked=3000000" -o "osd_pool_default_pg_autoscale_mode=off" ``` 2. 8 PGs in the testing pool. ``` rzarz@ubulap:~/dev/ceph/build$ bin/ceph osd pool create test-pool 8 8 ``` 3. Provisioning dups with rados bench ``` bin/rados bench -p test-pool 300 write -b 4096 --no-cleanup ... Total time run: 300.034 Total writes made: 103413 Write size: 4096 Object size: 4096 Bandwidth (MB/sec): 1.34637 Stddev Bandwidth: 0.589071 Max bandwidth (MB/sec): 2.4375 Min bandwidth (MB/sec): 0.902344 Average IOPS: 344 Stddev IOPS: 150.802 Max IOPS: 624 Min IOPS: 231 Average Latency(s): 0.0464151 Stddev Latency(s): 0.0183627 Max latency(s): 0.0928424 Min latency(s): 0.0131932 ``` 4. Killing osd.0 ``` rzarz@ubulap:~/dev/ceph/build$ kill 2572129 # pid of osd.0 ``` 5. Listing PGs on osd.0 and calculating number of pg log's entries and dups: ``` rzarz@ubulap:~/dev/ceph/build$ bin/ceph-objectstore-tool --data-path dev/osd0 --op list-pgs --pgid 2.c > osd0_pgs.txt rzarz@ubulap:~/dev/ceph/build$ for pgid in `cat osd0_pgs.txt`; do echo $pgid; bin/ceph-objectstore-tool --data-path dev/osd0 --op log --pgid $pgid | jq '(.pg_log_t.log|length),(.pg_log_t.dups|length)'; done 2.7 10020 3100 2.6 10100 3000 2.3 10012 2800 2.1 10049 2900 2.2 10057 2700 2.0 10027 2900 2.5 10077 2700 2.4 10072 2900 1.0 97 0 ``` 6. Trimming dups ``` rzarz@ubulap:~/dev/ceph/build$ CEPH_ARGS="--osd_pg_log_dups_tracked 2500 --osd_pg_log_trim_max=100" bin/ceph-objectstore-tool --data-path dev/osd0 --op trim-pg-log-dups --pgid 2.7 max_dup_entries=2500 max_chunk_size=100 Removing keys dup_0000000020.00000000000000000001 - dup_0000000020.00000000000000000100 Removing keys dup_0000000020.00000000000000000101 - dup_0000000020.00000000000000000200 Removing keys dup_0000000020.00000000000000000201 - dup_0000000020.00000000000000000300 Removing keys dup_0000000020.00000000000000000301 - dup_0000000020.00000000000000000400 Removing keys dup_0000000020.00000000000000000401 - dup_0000000020.00000000000000000500 Removing keys dup_0000000020.00000000000000000501 - dup_0000000020.00000000000000000600 Finished trimming, now compacting... Finished trimming pg log dups ``` 7. Checking number of pg log's entries and dups ``` rzarz@ubulap:~/dev/ceph/build$ for pgid in `cat osd0_pgs.txt`; do echo $pgid; bin/ceph-objectstore-tool --data-path dev/osd0 --op log --pgid $pgid | jq '(.pg_log_t.log|length),(.pg_log_t.dups|length)'; done 2.7 10020 2500 2.6 10100 3000 2.3 10012 2800 2.1 10049 2900 2.2 10057 2700 2.0 10027 2900 2.5 10077 2700 2.4 10072 2900 1.0 97 0 ``` Fixes: https://tracker.ceph.com/issues/53729 Signed-off-by: Radosław Zarzyński <rzarzyns@redhat.com>
This commit is contained in:
parent
e312733598
commit
a2190f901a
@ -736,6 +736,82 @@ int do_trim_pg_log(ObjectStore *store, const coll_t &coll,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int do_trim_pg_log_dups(ObjectStore *store, const coll_t &coll,
|
||||
pg_info_t &info, const spg_t &pgid,
|
||||
epoch_t map_epoch,
|
||||
PastIntervals &past_intervals)
|
||||
{
|
||||
ghobject_t oid = pgid.make_pgmeta_oid();
|
||||
struct stat st;
|
||||
auto ch = store->open_collection(coll);
|
||||
int r = store->stat(ch, oid, &st);
|
||||
ceph_assert(r == 0);
|
||||
ceph_assert(st.st_size == 0);
|
||||
|
||||
const size_t max_dup_entries = g_ceph_context->_conf->osd_pg_log_dups_tracked;
|
||||
ceph_assert(max_dup_entries > 0);
|
||||
const size_t max_chunk_size = g_ceph_context->_conf->osd_pg_log_trim_max;
|
||||
ceph_assert(max_chunk_size > 0);
|
||||
|
||||
cout << "max_dup_entries=" << max_dup_entries
|
||||
<< " max_chunk_size=" << max_chunk_size << std::endl;
|
||||
if (dry_run) {
|
||||
cout << "Dry run enabled, so when many chunks are needed,"
|
||||
<< " the trimming will never stop!" << std::endl;
|
||||
}
|
||||
|
||||
set<string> keys_to_keep;
|
||||
size_t num_removed = 0;
|
||||
do {
|
||||
set<string> keys_to_trim;
|
||||
{
|
||||
ObjectMap::ObjectMapIterator p = store->get_omap_iterator(ch, oid);
|
||||
if (!p)
|
||||
break;
|
||||
for (p->seek_to_first(); p->valid(); p->next()) {
|
||||
if (p->key()[0] == '_')
|
||||
continue;
|
||||
if (p->key() == "can_rollback_to")
|
||||
continue;
|
||||
if (p->key() == "divergent_priors")
|
||||
continue;
|
||||
if (p->key() == "rollback_info_trimmed_to")
|
||||
continue;
|
||||
if (p->key() == "may_include_deletes_in_missing")
|
||||
continue;
|
||||
if (p->key().substr(0, 7) == string("missing"))
|
||||
continue;
|
||||
if (p->key().substr(0, 4) != string("dup_"))
|
||||
continue;
|
||||
keys_to_keep.insert(p->key());
|
||||
if (keys_to_keep.size() > max_dup_entries) {
|
||||
auto oldest_to_keep = keys_to_keep.begin();
|
||||
keys_to_trim.emplace(*oldest_to_keep);
|
||||
keys_to_keep.erase(oldest_to_keep);
|
||||
}
|
||||
if (keys_to_trim.size() >= max_chunk_size) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // deconstruct ObjectMapIterator
|
||||
// delete the keys
|
||||
num_removed = keys_to_trim.size();
|
||||
if (!dry_run && !keys_to_trim.empty()) {
|
||||
cout << "Removing keys " << *keys_to_trim.begin() << " - " << *keys_to_trim.rbegin() << std::endl;
|
||||
ObjectStore::Transaction t;
|
||||
t.omap_rmkeys(coll, oid, keys_to_trim);
|
||||
store->queue_transaction(ch, std::move(t));
|
||||
ch->flush();
|
||||
}
|
||||
} while (num_removed == max_chunk_size);
|
||||
|
||||
// compact the db since we just removed a bunch of data
|
||||
cerr << "Finished trimming, now compacting..." << std::endl;
|
||||
if (!dry_run)
|
||||
store->compact();
|
||||
return 0;
|
||||
}
|
||||
|
||||
const int OMAP_BATCH_SIZE = 25;
|
||||
void get_omap_batch(ObjectMap::ObjectMapIterator &iter, map<string, bufferlist> &oset)
|
||||
{
|
||||
@ -3219,12 +3295,12 @@ int main(int argc, char **argv)
|
||||
("journal-path", po::value<string>(&jpath),
|
||||
"path to journal, use if tool can't find it")
|
||||
("pgid", po::value<string>(&pgidstr),
|
||||
"PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, and mandatory for apply-layout-settings if --pool is not specified")
|
||||
"PG id, mandatory for info, log, remove, export, export-remove, mark-complete, trim-pg-log, trim-pg-log-dups and mandatory for apply-layout-settings if --pool is not specified")
|
||||
("pool", po::value<string>(&pool),
|
||||
"Pool name, mandatory for apply-layout-settings if --pgid is not specified")
|
||||
("op", po::value<string>(&op),
|
||||
"Arg is one of [info, log, remove, mkfs, fsck, repair, fuse, dup, export, export-remove, import, list, list-slow-omap, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
|
||||
"get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, statfs]")
|
||||
"get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, apply-layout-settings, update-mon-db, dump-export, trim-pg-log, trim-pg-log-dups statfs]")
|
||||
("epoch", po::value<unsigned>(&epoch),
|
||||
"epoch# for get-osdmap and get-inc-osdmap, the current epoch in use if not specified")
|
||||
("file", po::value<string>(&file),
|
||||
@ -3793,7 +3869,8 @@ int main(int argc, char **argv)
|
||||
if ((op == "info" || op == "log" || op == "remove" || op == "export"
|
||||
|| op == "export-remove" || op == "mark-complete"
|
||||
|| op == "reset-last-complete"
|
||||
|| op == "trim-pg-log") &&
|
||||
|| op == "trim-pg-log"
|
||||
|| op == "trim-pg-log-dups") &&
|
||||
pgidstr.length() == 0) {
|
||||
cerr << "Must provide pgid" << std::endl;
|
||||
usage(desc);
|
||||
@ -4020,9 +4097,9 @@ int main(int argc, char **argv)
|
||||
|
||||
// If not an object command nor any of the ops handled below, then output this usage
|
||||
// before complaining about a bad pgid
|
||||
if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log") {
|
||||
if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups") {
|
||||
cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, "
|
||||
"get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, statfs)"
|
||||
"get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)"
|
||||
<< std::endl;
|
||||
usage(desc);
|
||||
ret = 1;
|
||||
@ -4375,6 +4452,15 @@ int main(int argc, char **argv)
|
||||
}
|
||||
cout << "Finished trimming pg log" << std::endl;
|
||||
goto out;
|
||||
} else if (op == "trim-pg-log-dups") {
|
||||
ret = do_trim_pg_log_dups(fs.get(), coll, info, pgid,
|
||||
map_epoch, past_intervals);
|
||||
if (ret < 0) {
|
||||
cerr << "Error trimming pg log dups: " << cpp_strerror(ret) << std::endl;
|
||||
goto out;
|
||||
}
|
||||
cout << "Finished trimming pg log dups" << std::endl;
|
||||
goto out;
|
||||
} else if (op == "reset-last-complete") {
|
||||
if (!force) {
|
||||
std::cerr << "WARNING: reset-last-complete is extremely dangerous and almost "
|
||||
|
Loading…
Reference in New Issue
Block a user