mirror of
https://github.com/ceph/ceph
synced 2025-01-01 00:22:25 +00:00
osdmaptool: Add --upmap-active to simulate active upmap balancing
Signed-off-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
parent
184e9d1ae3
commit
621acf8ce7
@ -17,7 +17,7 @@ Synopsis
|
||||
| **osdmaptool** *mapfilename* [--export-crush *crushmap*]
|
||||
| **osdmaptool** *mapfilename* [--upmap *file*] [--upmap-max *max-optimizations*]
|
||||
[--upmap-deviation *max-deviation*] [--upmap-pool *poolname*]
|
||||
[--upmap-save *file*] [--upmap-save *newosdmap*]
|
||||
[--upmap-save *file*] [--upmap-save *newosdmap*] [--upmap-active]
|
||||
| **osdmaptool** *mapfilename* [--upmap-cleanup] [--upmap-save *newosdmap*]
|
||||
|
||||
|
||||
@ -27,6 +27,8 @@ Description
|
||||
**osdmaptool** is a utility that lets you create, view, and manipulate
|
||||
OSD cluster maps from the Ceph distributed storage system. Notably, it
|
||||
lets you extract the embedded CRUSH map or import a new CRUSH map.
|
||||
It can also simulate the upmap balancer mode so you can get a sense of
|
||||
what is needed to balance your PGs.
|
||||
|
||||
|
||||
Options
|
||||
@ -161,6 +163,10 @@ Options
|
||||
|
||||
write modified OSDMap with upmap changes
|
||||
|
||||
.. option:: --upmap-active
|
||||
|
||||
Act like an active balancer, keep applying changes until balanced
|
||||
|
||||
|
||||
Example
|
||||
=======
|
||||
@ -244,6 +250,56 @@ placement group distribution, whose standard deviation is 1.41421::
|
||||
size 20
|
||||
size 364
|
||||
|
||||
To simulate the active balancer in upmap mode::
|
||||
|
||||
osdmaptool --upmap upmaps.out --upmap-active --upmap-deviation 6 --upmap-max 11 osdmap
|
||||
|
||||
osdmaptool: osdmap file 'osdmap'
|
||||
writing upmap command output to: upmaps.out
|
||||
checking for upmap cleanups
|
||||
upmap, max-count 11, max deviation 6
|
||||
pools movies photos metadata data
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.00310404 secs
|
||||
pools movies photos metadata data
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.00283402 secs
|
||||
pools data metadata movies photos
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.003122 secs
|
||||
pools photos metadata data movies
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.00324372 secs
|
||||
pools movies metadata data photos
|
||||
prepared 1/11 changes
|
||||
Time elapsed 0.00222609 secs
|
||||
pools data movies photos metadata
|
||||
prepared 0/11 changes
|
||||
Time elapsed 0.00209916 secs
|
||||
Unable to find further optimization, or distribution is already perfect
|
||||
osd.0 pgs 41
|
||||
osd.1 pgs 42
|
||||
osd.2 pgs 42
|
||||
osd.3 pgs 41
|
||||
osd.4 pgs 46
|
||||
osd.5 pgs 39
|
||||
osd.6 pgs 39
|
||||
osd.7 pgs 43
|
||||
osd.8 pgs 41
|
||||
osd.9 pgs 46
|
||||
osd.10 pgs 46
|
||||
osd.11 pgs 46
|
||||
osd.12 pgs 46
|
||||
osd.13 pgs 41
|
||||
osd.14 pgs 40
|
||||
osd.15 pgs 40
|
||||
osd.16 pgs 39
|
||||
osd.17 pgs 46
|
||||
osd.18 pgs 46
|
||||
osd.19 pgs 39
|
||||
osd.20 pgs 42
|
||||
Total time elapsed 0.0167765 secs, 5 rounds
|
||||
|
||||
|
||||
Availability
|
||||
============
|
||||
|
@ -43,6 +43,7 @@ Upmap entries are updated with an offline optimizer built into ``osdmaptool``.
|
||||
|
||||
osdmaptool om --upmap out.txt [--upmap-pool <pool>]
|
||||
[--upmap-max <max-optimizations>] [--upmap-deviation <max-deviation>]
|
||||
[--upmap-active]
|
||||
|
||||
It is highly recommended that optimization be done for each pool
|
||||
individually, or for sets of similarly-utilized pools. You can
|
||||
@ -61,6 +62,12 @@ Upmap entries are updated with an offline optimizer built into ``osdmaptool``.
|
||||
varies from the computed target number by less than or equal
|
||||
to this amount it will be considered perfect.
|
||||
|
||||
The ``--upmap-active`` option simulates the behavior of the active
|
||||
balancer in upmap mode. It keeps cycling until the OSDs are balanced
|
||||
and reports how many rounds and how long each round is taking. The
|
||||
elapsed time for rounds indicates the CPU load ceph-mgr will be
|
||||
consuming when it tries to compute the next optimization plan.
|
||||
|
||||
#. Apply the changes::
|
||||
|
||||
source out.txt
|
||||
|
@ -28,6 +28,7 @@
|
||||
max deviation from target [default: 1]
|
||||
--upmap-pool <poolname> restrict upmap balancing to 1 or more pools
|
||||
--upmap-save write modified OSDMap with upmap changes
|
||||
--upmap-active Act like an active balancer, keep applying changes until balanced
|
||||
--dump <format> displays the map in plain text when <format> is 'plain', 'json' if specified format is not supported
|
||||
--tree displays a tree of the map
|
||||
--test-crush [--range-first <first> --range-last <last>] map pgs to acting osds
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "common/errno.h"
|
||||
#include "common/safe_io.h"
|
||||
#include "mon/health_check.h"
|
||||
#include <time.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "global/global_init.h"
|
||||
@ -56,6 +57,7 @@ void usage()
|
||||
cout << " max deviation from target [default: 1]" << std::endl;
|
||||
cout << " --upmap-pool <poolname> restrict upmap balancing to 1 or more pools" << std::endl;
|
||||
cout << " --upmap-save write modified OSDMap with upmap changes" << std::endl;
|
||||
cout << " --upmap-active Act like an active balancer, keep applying changes until balanced" << std::endl;
|
||||
cout << " --dump <format> displays the map in plain text when <format> is 'plain', 'json' if specified format is not supported" << std::endl;
|
||||
cout << " --tree displays a tree of the map" << std::endl;
|
||||
cout << " --test-crush [--range-first <first> --range-last <last>] map pgs to acting osds" << std::endl;
|
||||
@ -144,6 +146,7 @@ int main(int argc, const char **argv)
|
||||
std::string upmap_file = "-";
|
||||
int upmap_max = 10;
|
||||
int upmap_deviation = 1;
|
||||
bool upmap_active = false;
|
||||
std::set<std::string> upmap_pools;
|
||||
int64_t pg_num = -1;
|
||||
bool test_map_pgs_dump_all = false;
|
||||
@ -184,6 +187,8 @@ int main(int argc, const char **argv)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
createsimple = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--upmap-active", (char*)NULL)) {
|
||||
upmap_active = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--health", (char*)NULL)) {
|
||||
health = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--with-default-pool", (char*)NULL)) {
|
||||
@ -379,9 +384,8 @@ int main(int argc, const char **argv)
|
||||
cout << "upmap, max-count " << upmap_max
|
||||
<< ", max deviation " << upmap_deviation
|
||||
<< std::endl;
|
||||
OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
|
||||
pending_inc.fsid = osdmap.get_fsid();
|
||||
vector<int64_t> pools;
|
||||
set<int64_t> upmap_pool_nums;
|
||||
for (auto& s : upmap_pools) {
|
||||
int64_t p = osdmap.lookup_pg_pool_name(s);
|
||||
if (p < 0) {
|
||||
@ -389,6 +393,7 @@ int main(int argc, const char **argv)
|
||||
exit(1);
|
||||
}
|
||||
pools.push_back(p);
|
||||
upmap_pool_nums.insert(p);
|
||||
}
|
||||
if (!pools.empty()) {
|
||||
cout << " limiting to pools " << upmap_pools << " (" << pools << ")"
|
||||
@ -403,39 +408,79 @@ int main(int argc, const char **argv)
|
||||
cout << "No pools available" << std::endl;
|
||||
goto skip_upmap;
|
||||
}
|
||||
std::random_device rd;
|
||||
std::shuffle(pools.begin(), pools.end(), std::mt19937{rd()});
|
||||
cout << "pools ";
|
||||
for (auto& i: pools)
|
||||
cout << osdmap.get_pool_name(i) << " ";
|
||||
cout << std::endl;
|
||||
int total_did = 0;
|
||||
int left = upmap_max;
|
||||
for (auto& i: pools) {
|
||||
set<int64_t> one_pool;
|
||||
one_pool.insert(i);
|
||||
int did = osdmap.calc_pg_upmaps(
|
||||
g_ceph_context, upmap_deviation,
|
||||
left, one_pool,
|
||||
&pending_inc);
|
||||
total_did += did;
|
||||
left -= did;
|
||||
if (left <= 0)
|
||||
break;
|
||||
}
|
||||
cout << "prepared " << total_did << "/" << upmap_max << " changes" << std::endl;
|
||||
if (total_did > 0) {
|
||||
print_inc_upmaps(pending_inc, upmap_fd);
|
||||
if (upmap_save) {
|
||||
int r = osdmap.apply_incremental(pending_inc);
|
||||
ceph_assert(r == 0);
|
||||
modified = true;
|
||||
int rounds = 0;
|
||||
struct timespec round_start;
|
||||
int r = clock_gettime(CLOCK_MONOTONIC, &round_start);
|
||||
assert(r == 0);
|
||||
do {
|
||||
std::random_device rd;
|
||||
std::shuffle(pools.begin(), pools.end(), std::mt19937{rd()});
|
||||
cout << "pools ";
|
||||
for (auto& i: pools)
|
||||
cout << osdmap.get_pool_name(i) << " ";
|
||||
cout << std::endl;
|
||||
OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
|
||||
pending_inc.fsid = osdmap.get_fsid();
|
||||
int total_did = 0;
|
||||
int left = upmap_max;
|
||||
struct timespec begin, end;
|
||||
r = clock_gettime(CLOCK_MONOTONIC, &begin);
|
||||
assert(r == 0);
|
||||
for (auto& i: pools) {
|
||||
set<int64_t> one_pool;
|
||||
one_pool.insert(i);
|
||||
int did = osdmap.calc_pg_upmaps(
|
||||
g_ceph_context, upmap_deviation,
|
||||
left, one_pool,
|
||||
&pending_inc);
|
||||
total_did += did;
|
||||
left -= did;
|
||||
if (left <= 0)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
cout << "Unable to find further optimization, "
|
||||
<< "or distribution is already perfect"
|
||||
<< std::endl;
|
||||
}
|
||||
r = clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
assert(r == 0);
|
||||
cout << "prepared " << total_did << "/" << upmap_max << " changes" << std::endl;
|
||||
float elapsed_time = (end.tv_sec - begin.tv_sec) + 1.0e-9*(end.tv_nsec - begin.tv_nsec);
|
||||
if (upmap_active)
|
||||
cout << "Time elapsed " << elapsed_time << " secs" << std::endl;
|
||||
if (total_did > 0) {
|
||||
print_inc_upmaps(pending_inc, upmap_fd);
|
||||
if (upmap_save || upmap_active) {
|
||||
int r = osdmap.apply_incremental(pending_inc);
|
||||
ceph_assert(r == 0);
|
||||
if (upmap_save)
|
||||
modified = true;
|
||||
}
|
||||
} else {
|
||||
cout << "Unable to find further optimization, "
|
||||
<< "or distribution is already perfect"
|
||||
<< std::endl;
|
||||
if (upmap_active) {
|
||||
map<int,set<pg_t>> pgs_by_osd;
|
||||
for (auto& i : osdmap.get_pools()) {
|
||||
if (!upmap_pool_nums.empty() && !upmap_pool_nums.count(i.first))
|
||||
continue;
|
||||
for (unsigned ps = 0; ps < i.second.get_pg_num(); ++ps) {
|
||||
pg_t pg(ps, i.first);
|
||||
vector<int> up;
|
||||
osdmap.pg_to_up_acting_osds(pg, &up, nullptr, nullptr, nullptr);
|
||||
//ldout(cct, 20) << __func__ << " " << pg << " up " << up << dendl;
|
||||
for (auto osd : up) {
|
||||
if (osd != CRUSH_ITEM_NONE)
|
||||
pgs_by_osd[osd].insert(pg);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto& i : pgs_by_osd)
|
||||
cout << "osd." << i.first << " pgs " << i.second.size() << std::endl;
|
||||
float elapsed_time = (end.tv_sec - round_start.tv_sec) + 1.0e-9*(end.tv_nsec - round_start.tv_nsec);
|
||||
cout << "Total time elapsed " << elapsed_time << " secs, " << rounds << " rounds" << std::endl;
|
||||
}
|
||||
break;
|
||||
}
|
||||
++rounds;
|
||||
} while(upmap_active);
|
||||
}
|
||||
skip_upmap:
|
||||
if (upmap_file != "-") {
|
||||
|
Loading…
Reference in New Issue
Block a user