mirror of
https://github.com/ceph/ceph
synced 2025-02-22 02:27:29 +00:00
Merge pull request #32247 from dzafman/wip-43307
Remove use of rules batching for upmap balancer and default for upmap_max_deviation to 5 Reviewed-by: Josh Durgin <jdurgin@redhat.com> Reviewed-by: Neha Ojha <nojha@redhat.com>
This commit is contained in:
commit
3869ee3d7c
@ -13,6 +13,12 @@ Synopsis
|
||||
|
||||
| **osdmaptool** *mapfilename* [--print] [--createsimple *numosd*
|
||||
[--pgbits *bitsperosd* ] ] [--clobber]
|
||||
| **osdmaptool** *mapfilename* [--import-crush *crushmap*]
|
||||
| **osdmaptool** *mapfilename* [--export-crush *crushmap*]
|
||||
| **osdmaptool** *mapfilename* [--upmap *file*] [--upmap-max *max-optimizations*]
|
||||
[--upmap-deviation *max-deviation*] [--upmap-pool *poolname*]
|
||||
[--upmap-save *file*] [--upmap-save *newosdmap*] [--upmap-active]
|
||||
| **osdmaptool** *mapfilename* [--upmap-cleanup] [--upmap-save *newosdmap*]
|
||||
|
||||
|
||||
Description
|
||||
@ -21,6 +27,8 @@ Description
|
||||
**osdmaptool** is a utility that lets you create, view, and manipulate
|
||||
OSD cluster maps from the Ceph distributed storage system. Notably, it
|
||||
lets you extract the embedded CRUSH map or import a new CRUSH map.
|
||||
It can also simulate the upmap balancer mode so you can get a sense of
|
||||
what is needed to balance your PGs.
|
||||
|
||||
|
||||
Options
|
||||
@ -111,6 +119,10 @@ Options
|
||||
|
||||
mark osds up and in (but do not persist).
|
||||
|
||||
.. option:: --mark-out
|
||||
|
||||
mark an osd as out (but do not persist)
|
||||
|
||||
.. option:: --tree
|
||||
|
||||
Displays a hierarchical tree of the map.
|
||||
@ -119,6 +131,43 @@ Options
|
||||
|
||||
clears pg_temp and primary_temp variables.
|
||||
|
||||
.. option:: --health
|
||||
|
||||
dump health checks
|
||||
|
||||
.. option:: --with-default-pool
|
||||
|
||||
include default pool when creating map
|
||||
|
||||
.. option:: --upmap-cleanup <file>
|
||||
|
||||
clean up pg_upmap[_items] entries, writing commands to <file> [default: - for stdout]
|
||||
|
||||
.. option:: --upmap <file>
|
||||
|
||||
calculate pg upmap entries to balance pg layout writing commands to <file> [default: - for stdout]
|
||||
|
||||
.. option:: --upmap-max <max-optimizations>
|
||||
|
||||
set max upmap entries to calculate [default: 10]
|
||||
|
||||
.. option:: --upmap-deviation <max-deviation>
|
||||
|
||||
max deviation from target [default: 5]
|
||||
|
||||
.. option:: --upmap-pool <poolname>
|
||||
|
||||
restrict upmap balancing to 1 pool or the option can be repeated for multiple pools
|
||||
|
||||
.. option:: --upmap-save
|
||||
|
||||
write modified OSDMap with upmap changes
|
||||
|
||||
.. option:: --upmap-active
|
||||
|
||||
Act like an active balancer, keep applying changes until balanced
|
||||
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
@ -130,19 +179,19 @@ To view the result::
|
||||
|
||||
osdmaptool --print osdmap
|
||||
|
||||
To view the mappings of placement groups for pool 0::
|
||||
To view the mappings of placement groups for pool 1::
|
||||
|
||||
osdmaptool --test-map-pgs-dump rbd --pool 0
|
||||
osdmaptool osdmap --test-map-pgs-dump --pool 1
|
||||
|
||||
pool 0 pg_num 8
|
||||
0.0 [0,2,1] 0
|
||||
0.1 [2,0,1] 2
|
||||
0.2 [0,1,2] 0
|
||||
0.3 [2,0,1] 2
|
||||
0.4 [0,2,1] 0
|
||||
0.5 [0,2,1] 0
|
||||
0.6 [0,1,2] 0
|
||||
0.7 [1,0,2] 1
|
||||
1.0 [0,2,1] 0
|
||||
1.1 [2,0,1] 2
|
||||
1.2 [0,1,2] 0
|
||||
1.3 [2,0,1] 2
|
||||
1.4 [0,2,1] 0
|
||||
1.5 [0,2,1] 0
|
||||
1.6 [0,1,2] 0
|
||||
1.7 [1,0,2] 1
|
||||
#osd count first primary c wt wt
|
||||
osd.0 8 5 5 1 1
|
||||
osd.1 8 1 1 1 1
|
||||
@ -157,7 +206,7 @@ To view the mappings of placement groups for pool 0::
|
||||
size 3 8
|
||||
|
||||
In which,
|
||||
#. pool 0 has 8 placement groups. And two tables follow:
|
||||
#. pool 1 has 8 placement groups. And two tables follow:
|
||||
#. A table for placement groups. Each row presents a placement group. With columns of:
|
||||
|
||||
* placement group id,
|
||||
@ -201,6 +250,56 @@ placement group distribution, whose standard deviation is 1.41421::
|
||||
size 20
|
||||
size 364
|
||||
|
||||
To simulate the active balancer in upmap mode::
|
||||
|
||||
osdmaptool --upmap upmaps.out --upmap-active --upmap-deviation 6 --upmap-max 11 osdmap
|
||||
|
||||
osdmaptool: osdmap file 'osdmap'
|
||||
writing upmap command output to: upmaps.out
|
||||
checking for upmap cleanups
|
||||
upmap, max-count 11, max deviation 6
|
||||
pools movies photos metadata data
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.00310404 secs
|
||||
pools movies photos metadata data
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.00283402 secs
|
||||
pools data metadata movies photos
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.003122 secs
|
||||
pools photos metadata data movies
|
||||
prepared 11/11 changes
|
||||
Time elapsed 0.00324372 secs
|
||||
pools movies metadata data photos
|
||||
prepared 1/11 changes
|
||||
Time elapsed 0.00222609 secs
|
||||
pools data movies photos metadata
|
||||
prepared 0/11 changes
|
||||
Time elapsed 0.00209916 secs
|
||||
Unable to find further optimization, or distribution is already perfect
|
||||
osd.0 pgs 41
|
||||
osd.1 pgs 42
|
||||
osd.2 pgs 42
|
||||
osd.3 pgs 41
|
||||
osd.4 pgs 46
|
||||
osd.5 pgs 39
|
||||
osd.6 pgs 39
|
||||
osd.7 pgs 43
|
||||
osd.8 pgs 41
|
||||
osd.9 pgs 46
|
||||
osd.10 pgs 46
|
||||
osd.11 pgs 46
|
||||
osd.12 pgs 46
|
||||
osd.13 pgs 41
|
||||
osd.14 pgs 40
|
||||
osd.15 pgs 40
|
||||
osd.16 pgs 39
|
||||
osd.17 pgs 46
|
||||
osd.18 pgs 46
|
||||
osd.19 pgs 39
|
||||
osd.20 pgs 42
|
||||
Total time elapsed 0.0167765 secs, 5 rounds
|
||||
|
||||
|
||||
Availability
|
||||
============
|
||||
|
@ -23,14 +23,12 @@ use with::
|
||||
|
||||
ceph features
|
||||
|
||||
A word of caution
|
||||
Balancer module
|
||||
-----------------
|
||||
|
||||
This is a new feature and not very user friendly. At the time of this
|
||||
writing we are working on a new `balancer` module for ceph-mgr that
|
||||
will eventually do all of this automatically.
|
||||
The new `balancer` module for ceph-mgr will automatically balance
|
||||
the number of PGs per OSD. See ``Balancer``
|
||||
|
||||
Until then,
|
||||
|
||||
Offline optimization
|
||||
--------------------
|
||||
@ -43,7 +41,9 @@ Upmap entries are updated with an offline optimizer built into ``osdmaptool``.
|
||||
|
||||
#. Run the optimizer::
|
||||
|
||||
osdmaptool om --upmap out.txt [--upmap-pool <pool>] [--upmap-max <max-count>] [--upmap-deviation <max-deviation>]
|
||||
osdmaptool om --upmap out.txt [--upmap-pool <pool>]
|
||||
[--upmap-max <max-optimizations>] [--upmap-deviation <max-deviation>]
|
||||
[--upmap-active]
|
||||
|
||||
It is highly recommended that optimization be done for each pool
|
||||
individually, or for sets of similarly-utilized pools. You can
|
||||
@ -52,24 +52,34 @@ Upmap entries are updated with an offline optimizer built into ``osdmaptool``.
|
||||
kind of data (e.g., RBD image pools, yes; RGW index pool and RGW
|
||||
data pool, no).
|
||||
|
||||
The ``max-count`` value is the maximum number of upmap entries to
|
||||
identify in the run. The default is 100, but you may want to make
|
||||
this a smaller number so that the tool completes more quickly (but
|
||||
does less work). If it cannot find any additional changes to make
|
||||
it will stop early (i.e., when the pool distribution is perfect).
|
||||
The ``max-optimizations`` value is the maximum number of upmap entries to
|
||||
identify in the run. The default is `10` like the ceph-mgr balancer module,
|
||||
but you should use a larger number if you are doing offline optimization.
|
||||
If it cannot find any additional changes to make it will stop early
|
||||
(i.e., when the pool distribution is perfect).
|
||||
|
||||
The ``max-deviation`` value defaults to `.01` (i.e., 1%). If an OSD
|
||||
utilization varies from the average by less than this amount it
|
||||
will be considered perfect.
|
||||
The ``max-deviation`` value defaults to `5`. If an OSD PG count
|
||||
varies from the computed target number by less than or equal
|
||||
to this amount it will be considered perfect.
|
||||
|
||||
#. The proposed changes are written to the output file ``out.txt`` in
|
||||
the example above. These are normal ceph CLI commands that can be
|
||||
run to apply the changes to the cluster. This can be done with::
|
||||
The ``--upmap-active`` option simulates the behavior of the active
|
||||
balancer in upmap mode. It keeps cycling until the OSDs are balanced
|
||||
and reports how many rounds and how long each round is taking. The
|
||||
elapsed time for rounds indicates the CPU load ceph-mgr will be
|
||||
consuming when it tries to compute the next optimization plan.
|
||||
|
||||
#. Apply the changes::
|
||||
|
||||
source out.txt
|
||||
|
||||
The proposed changes are written to the output file ``out.txt`` in
|
||||
the example above. These are normal ceph CLI commands that can be
|
||||
run to apply the changes to the cluster.
|
||||
|
||||
|
||||
The above steps can be repeated as many times as necessary to achieve
|
||||
a perfect distribution of PGs for each set of pools.
|
||||
|
||||
You can see some (gory) details about what the tool is doing by
|
||||
passing ``--debug-osd 10`` to ``osdmaptool``.
|
||||
passing ``--debug-osd 10`` and even more with ``--debug-crush 10``
|
||||
to ``osdmaptool``.
|
||||
|
@ -67,9 +67,9 @@ function TEST_balancer() {
|
||||
ceph balancer pool add $TEST_POOL1 || return 1
|
||||
ceph balancer pool add $TEST_POOL2 || return 1
|
||||
ceph balancer pool ls || return 1
|
||||
eval POOL=$(ceph balancer pool ls | jq '.[0]')
|
||||
eval POOL=$(ceph balancer pool ls | jq 'sort | .[0]')
|
||||
test "$POOL" = "$TEST_POOL1" || return 1
|
||||
eval POOL=$(ceph balancer pool ls | jq '.[1]')
|
||||
eval POOL=$(ceph balancer pool ls | jq 'sort | .[1]')
|
||||
test "$POOL" = "$TEST_POOL2" || return 1
|
||||
ceph balancer pool rm $TEST_POOL1 || return 1
|
||||
ceph balancer pool rm $TEST_POOL2 || return 1
|
||||
@ -141,6 +141,7 @@ function TEST_balancer2() {
|
||||
done
|
||||
|
||||
ceph osd set-require-min-compat-client luminous
|
||||
ceph config set mgr mgr/balancer/upmap_max_deviation 1
|
||||
ceph balancer mode upmap || return 1
|
||||
ceph balancer on || return 1
|
||||
ceph config set mgr mgr/balancer/sleep_interval 5
|
||||
@ -195,17 +196,20 @@ function TEST_balancer2() {
|
||||
sleep 30
|
||||
ceph osd df
|
||||
|
||||
# FINAL_PER_OSD2 should distribute evenly
|
||||
# We should be with plue or minus 1 of FINAL_PER_OSD2
|
||||
# This is because here each pool is balanced independently
|
||||
MIN=$(expr $FINAL_PER_OSD2 - 1)
|
||||
MAX=$(expr $FINAL_PER_OSD2 + 1)
|
||||
PGS=$(ceph osd df --format=json-pretty | jq '.nodes[0].pgs')
|
||||
test $PGS -eq $FINAL_PER_OSD2 || return 1
|
||||
test $PGS -ge $MIN -a $PGS -le $MAX || return 1
|
||||
PGS=$(ceph osd df --format=json-pretty | jq '.nodes[1].pgs')
|
||||
test $PGS -eq $FINAL_PER_OSD2 || return 1
|
||||
test $PGS -ge $MIN -a $PGS -le $MAX || return 1
|
||||
PGS=$(ceph osd df --format=json-pretty | jq '.nodes[2].pgs')
|
||||
test $PGS -eq $FINAL_PER_OSD2 || return 1
|
||||
test $PGS -ge $MIN -a $PGS -le $MAX || return 1
|
||||
PGS=$(ceph osd df --format=json-pretty | jq '.nodes[3].pgs')
|
||||
test $PGS -eq $FINAL_PER_OSD2 || return 1
|
||||
test $PGS -ge $MIN -a $PGS -le $MAX || return 1
|
||||
PGS=$(ceph osd df --format=json-pretty | jq '.nodes[4].pgs')
|
||||
test $PGS -eq $FINAL_PER_OSD2 || return 1
|
||||
test $PGS -ge $MIN -a $PGS -le $MAX || return 1
|
||||
|
||||
teardown $dir || return 1
|
||||
}
|
||||
|
@ -313,7 +313,7 @@ class Module(MgrModule):
|
||||
{
|
||||
'name': 'upmap_max_deviation',
|
||||
'type': 'int',
|
||||
'default': 1,
|
||||
'default': 5,
|
||||
'min': 1,
|
||||
'desc': 'deviation below which no optimization is attempted',
|
||||
'long_desc': 'If the number of PGs are within this count then no optimization is attempted',
|
||||
@ -979,14 +979,17 @@ class Module(MgrModule):
|
||||
detail = 'No pools available'
|
||||
self.log.info(detail)
|
||||
return -errno.ENOENT, detail
|
||||
# shuffle pool list so they all get equal (in)attention
|
||||
random.shuffle(pools)
|
||||
self.log.info('pools %s' % pools)
|
||||
|
||||
adjusted_pools = []
|
||||
inc = plan.inc
|
||||
total_did = 0
|
||||
left = max_optimizations
|
||||
pools_with_pg_merge = [p['pool_name'] for p in osdmap_dump.get('pools', [])
|
||||
if p['pg_num'] > p['pg_num_target']]
|
||||
crush_rule_by_pool_name = dict((p['pool_name'], p['crush_rule']) for p in osdmap_dump.get('pools', []))
|
||||
pools_by_crush_rule = {} # group pools by crush_rule
|
||||
for pool in pools:
|
||||
if pool not in crush_rule_by_pool_name:
|
||||
self.log.info('pool %s does not exist' % pool)
|
||||
@ -994,36 +997,36 @@ class Module(MgrModule):
|
||||
if pool in pools_with_pg_merge:
|
||||
self.log.info('pool %s has pending PG(s) for merging, skipping for now' % pool)
|
||||
continue
|
||||
crush_rule = crush_rule_by_pool_name[pool]
|
||||
if crush_rule not in pools_by_crush_rule:
|
||||
pools_by_crush_rule[crush_rule] = []
|
||||
pools_by_crush_rule[crush_rule].append(pool)
|
||||
classified_pools = list(pools_by_crush_rule.values())
|
||||
adjusted_pools.append(pool)
|
||||
# shuffle so all pools get equal (in)attention
|
||||
random.shuffle(classified_pools)
|
||||
for it in classified_pools:
|
||||
pool_dump = osdmap_dump.get('pools', [])
|
||||
random.shuffle(adjusted_pools)
|
||||
pool_dump = osdmap_dump.get('pools', [])
|
||||
for pool in adjusted_pools:
|
||||
num_pg = 0
|
||||
for p in pool_dump:
|
||||
if p['pool_name'] in it:
|
||||
num_pg += p['pg_num']
|
||||
if p['pool_name'] == pool:
|
||||
num_pg = p['pg_num']
|
||||
pool_id = p['pool']
|
||||
break
|
||||
|
||||
# note that here we deliberately exclude any scrubbing pgs too
|
||||
# since scrubbing activities have significant impacts on performance
|
||||
pool_ids = list(p['pool'] for p in pool_dump if p['pool_name'] in it)
|
||||
num_pg_active_clean = 0
|
||||
for p in plan.pg_status.get('pgs_by_pool_state', []):
|
||||
if len(pool_ids) and p['pool_id'] not in pool_ids:
|
||||
pgs_pool_id = p['pool_id']
|
||||
if pgs_pool_id != pool_id:
|
||||
continue
|
||||
for s in p['pg_state_counts']:
|
||||
if s['state_name'] == 'active+clean':
|
||||
num_pg_active_clean += s['count']
|
||||
break
|
||||
available = max_optimizations - (num_pg - num_pg_active_clean)
|
||||
did = plan.osdmap.calc_pg_upmaps(inc, max_deviation, available, it)
|
||||
self.log.info('prepared %d changes for pool(s) %s' % (did, it))
|
||||
available = left - (num_pg - num_pg_active_clean)
|
||||
did = plan.osdmap.calc_pg_upmaps(inc, max_deviation, available, [pool])
|
||||
total_did += did
|
||||
self.log.info('prepared %d changes in total' % total_did)
|
||||
left -= did
|
||||
if left <= 0:
|
||||
break
|
||||
self.log.info('prepared %d/%d changes' % (total_did, max_optimizations))
|
||||
if total_did == 0:
|
||||
return -errno.EALREADY, 'Unable to find further optimization, ' \
|
||||
'or pool(s) pg_num is decreasing, ' \
|
||||
|
@ -25,9 +25,10 @@
|
||||
writing commands to <file> [default: - for stdout]
|
||||
--upmap-max <max-count> set max upmap entries to calculate [default: 10]
|
||||
--upmap-deviation <max-deviation>
|
||||
max deviation from target [default: 1]
|
||||
max deviation from target [default: 5]
|
||||
--upmap-pool <poolname> restrict upmap balancing to 1 or more pools
|
||||
--upmap-save write modified OSDMap with upmap changes
|
||||
--upmap-active Act like an active balancer, keep applying changes until balanced
|
||||
--dump <format> displays the map in plain text when <format> is 'plain', 'json' if specified format is not supported
|
||||
--tree displays a tree of the map
|
||||
--test-crush [--range-first <first> --range-last <last>] map pgs to acting osds
|
||||
|
@ -7,9 +7,9 @@
|
||||
marking OSD@147 as out
|
||||
writing upmap command output to: c
|
||||
checking for upmap cleanups
|
||||
upmap, max-count 11, max deviation 1
|
||||
prepared 11 changes for pools(s) rbd
|
||||
prepared 11 changes in total
|
||||
upmap, max-count 11, max deviation 5
|
||||
pools rbd
|
||||
prepared 11/11 changes
|
||||
$ cat c
|
||||
ceph osd pg-upmap-items 1.7 142 145
|
||||
ceph osd pg-upmap-items 1.8 219 223
|
||||
|
@ -6,9 +6,9 @@
|
||||
marking all OSDs up and in
|
||||
writing upmap command output to: c
|
||||
checking for upmap cleanups
|
||||
upmap, max-count 11, max deviation 1
|
||||
prepared 11 changes for pools(s) rbd
|
||||
prepared 11 changes in total
|
||||
upmap, max-count 11, max deviation 5
|
||||
pools rbd
|
||||
prepared 11/11 changes
|
||||
$ cat c
|
||||
ceph osd pg-upmap-items 1.7 142 147
|
||||
ceph osd pg-upmap-items 1.8 219 223
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "common/errno.h"
|
||||
#include "common/safe_io.h"
|
||||
#include "mon/health_check.h"
|
||||
#include <time.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "global/global_init.h"
|
||||
@ -53,9 +54,10 @@ void usage()
|
||||
cout << " writing commands to <file> [default: - for stdout]" << std::endl;
|
||||
cout << " --upmap-max <max-count> set max upmap entries to calculate [default: 10]" << std::endl;
|
||||
cout << " --upmap-deviation <max-deviation>" << std::endl;
|
||||
cout << " max deviation from target [default: 1]" << std::endl;
|
||||
cout << " max deviation from target [default: 5]" << std::endl;
|
||||
cout << " --upmap-pool <poolname> restrict upmap balancing to 1 or more pools" << std::endl;
|
||||
cout << " --upmap-save write modified OSDMap with upmap changes" << std::endl;
|
||||
cout << " --upmap-active Act like an active balancer, keep applying changes until balanced" << std::endl;
|
||||
cout << " --dump <format> displays the map in plain text when <format> is 'plain', 'json' if specified format is not supported" << std::endl;
|
||||
cout << " --tree displays a tree of the map" << std::endl;
|
||||
cout << " --test-crush [--range-first <first> --range-last <last>] map pgs to acting osds" << std::endl;
|
||||
@ -143,11 +145,11 @@ int main(int argc, const char **argv)
|
||||
bool health = false;
|
||||
std::string upmap_file = "-";
|
||||
int upmap_max = 10;
|
||||
int upmap_deviation = 1;
|
||||
int upmap_deviation = 5;
|
||||
bool upmap_active = false;
|
||||
std::set<std::string> upmap_pools;
|
||||
int64_t pg_num = -1;
|
||||
bool test_map_pgs_dump_all = false;
|
||||
bool debug = false;
|
||||
|
||||
std::string val;
|
||||
std::ostringstream err;
|
||||
@ -185,10 +187,10 @@ int main(int argc, const char **argv)
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
createsimple = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--upmap-active", (char*)NULL)) {
|
||||
upmap_active = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--health", (char*)NULL)) {
|
||||
health = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--debug", (char*)NULL)) {
|
||||
debug = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--with-default-pool", (char*)NULL)) {
|
||||
createpool = true;
|
||||
} else if (ceph_argparse_flag(args, i, "--create-from-conf", (char*)NULL)) {
|
||||
@ -382,9 +384,8 @@ int main(int argc, const char **argv)
|
||||
cout << "upmap, max-count " << upmap_max
|
||||
<< ", max deviation " << upmap_deviation
|
||||
<< std::endl;
|
||||
OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
|
||||
pending_inc.fsid = osdmap.get_fsid();
|
||||
vector<int64_t> pools;
|
||||
set<int64_t> upmap_pool_nums;
|
||||
for (auto& s : upmap_pools) {
|
||||
int64_t p = osdmap.lookup_pg_pool_name(s);
|
||||
if (p < 0) {
|
||||
@ -392,6 +393,7 @@ int main(int argc, const char **argv)
|
||||
exit(1);
|
||||
}
|
||||
pools.push_back(p);
|
||||
upmap_pool_nums.insert(p);
|
||||
}
|
||||
if (!pools.empty()) {
|
||||
cout << " limiting to pools " << upmap_pools << " (" << pools << ")"
|
||||
@ -406,68 +408,79 @@ int main(int argc, const char **argv)
|
||||
cout << "No pools available" << std::endl;
|
||||
goto skip_upmap;
|
||||
}
|
||||
if (debug) {
|
||||
int rounds = 0;
|
||||
struct timespec round_start;
|
||||
int r = clock_gettime(CLOCK_MONOTONIC, &round_start);
|
||||
assert(r == 0);
|
||||
do {
|
||||
std::random_device rd;
|
||||
std::shuffle(pools.begin(), pools.end(), std::mt19937{rd()});
|
||||
cout << "pools ";
|
||||
for (auto& i: pools)
|
||||
cout << osdmap.get_pool_name(i) << " ";
|
||||
cout << std::endl;
|
||||
}
|
||||
map< int, set<int64_t> > pools_by_rule;
|
||||
for (auto&i: pools) {
|
||||
const string& pool_name = osdmap.get_pool_name(i);
|
||||
const pg_pool_t *p = osdmap.get_pg_pool(i);
|
||||
const int rule = p->get_crush_rule();
|
||||
if (!osdmap.crush->rule_exists(rule)) {
|
||||
cout << " pool " << pool_name << " does not exist" << std::endl;
|
||||
continue;
|
||||
OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
|
||||
pending_inc.fsid = osdmap.get_fsid();
|
||||
int total_did = 0;
|
||||
int left = upmap_max;
|
||||
struct timespec begin, end;
|
||||
r = clock_gettime(CLOCK_MONOTONIC, &begin);
|
||||
assert(r == 0);
|
||||
for (auto& i: pools) {
|
||||
set<int64_t> one_pool;
|
||||
one_pool.insert(i);
|
||||
int did = osdmap.calc_pg_upmaps(
|
||||
g_ceph_context, upmap_deviation,
|
||||
left, one_pool,
|
||||
&pending_inc);
|
||||
total_did += did;
|
||||
left -= did;
|
||||
if (left <= 0)
|
||||
break;
|
||||
}
|
||||
if (p->get_pg_num() > p->get_pg_num_target()) {
|
||||
cout << "pool " << pool_name << " has pending PG(s) for merging, skipping for now" << std::endl;
|
||||
continue;
|
||||
r = clock_gettime(CLOCK_MONOTONIC, &end);
|
||||
assert(r == 0);
|
||||
cout << "prepared " << total_did << "/" << upmap_max << " changes" << std::endl;
|
||||
float elapsed_time = (end.tv_sec - begin.tv_sec) + 1.0e-9*(end.tv_nsec - begin.tv_nsec);
|
||||
if (upmap_active)
|
||||
cout << "Time elapsed " << elapsed_time << " secs" << std::endl;
|
||||
if (total_did > 0) {
|
||||
print_inc_upmaps(pending_inc, upmap_fd);
|
||||
if (upmap_save || upmap_active) {
|
||||
int r = osdmap.apply_incremental(pending_inc);
|
||||
ceph_assert(r == 0);
|
||||
if (upmap_save)
|
||||
modified = true;
|
||||
}
|
||||
} else {
|
||||
cout << "Unable to find further optimization, "
|
||||
<< "or distribution is already perfect"
|
||||
<< std::endl;
|
||||
if (upmap_active) {
|
||||
map<int,set<pg_t>> pgs_by_osd;
|
||||
for (auto& i : osdmap.get_pools()) {
|
||||
if (!upmap_pool_nums.empty() && !upmap_pool_nums.count(i.first))
|
||||
continue;
|
||||
for (unsigned ps = 0; ps < i.second.get_pg_num(); ++ps) {
|
||||
pg_t pg(ps, i.first);
|
||||
vector<int> up;
|
||||
osdmap.pg_to_up_acting_osds(pg, &up, nullptr, nullptr, nullptr);
|
||||
//ldout(cct, 20) << __func__ << " " << pg << " up " << up << dendl;
|
||||
for (auto osd : up) {
|
||||
if (osd != CRUSH_ITEM_NONE)
|
||||
pgs_by_osd[osd].insert(pg);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto& i : pgs_by_osd)
|
||||
cout << "osd." << i.first << " pgs " << i.second.size() << std::endl;
|
||||
float elapsed_time = (end.tv_sec - round_start.tv_sec) + 1.0e-9*(end.tv_nsec - round_start.tv_nsec);
|
||||
cout << "Total time elapsed " << elapsed_time << " secs, " << rounds << " rounds" << std::endl;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (debug) {
|
||||
cout << "pool " << i << " rule " << rule << std::endl;
|
||||
}
|
||||
pools_by_rule[rule].emplace(i);
|
||||
}
|
||||
vector<int> rules;
|
||||
for (auto& r: pools_by_rule)
|
||||
rules.push_back(r.first);
|
||||
std::random_device rd;
|
||||
std::shuffle(rules.begin(), rules.end(), std::mt19937{rd()});
|
||||
if (debug) {
|
||||
for (auto& r: rules)
|
||||
cout << "rule: " << r << " " << pools_by_rule[r] << std::endl;
|
||||
}
|
||||
int total_did = 0;
|
||||
int available = upmap_max;
|
||||
for (auto& r: rules) {
|
||||
// Assume all PGs are active+clean
|
||||
// available = upmap_max - (num_pg - num_pg_active_clean)
|
||||
int did = osdmap.calc_pg_upmaps(
|
||||
g_ceph_context, upmap_deviation,
|
||||
available, pools_by_rule[r],
|
||||
&pending_inc);
|
||||
cout << "prepared " << did << " changes for pools(s) ";
|
||||
for (auto i: pools_by_rule[r])
|
||||
cout << osdmap.get_pool_name(i) << " ";
|
||||
cout << std::endl;
|
||||
total_did += did;
|
||||
}
|
||||
cout << "prepared " << total_did << " changes in total" << std::endl;
|
||||
if (total_did > 0) {
|
||||
print_inc_upmaps(pending_inc, upmap_fd);
|
||||
if (upmap_save) {
|
||||
int r = osdmap.apply_incremental(pending_inc);
|
||||
ceph_assert(r == 0);
|
||||
modified = true;
|
||||
}
|
||||
} else {
|
||||
cout << "Unable to find further optimization, "
|
||||
<< "or pool(s) pg_num is decreasing, "
|
||||
<< "or distribution is already perfect"
|
||||
<< std::endl;
|
||||
}
|
||||
++rounds;
|
||||
} while(upmap_active);
|
||||
}
|
||||
skip_upmap:
|
||||
if (upmap_file != "-") {
|
||||
|
Loading…
Reference in New Issue
Block a user