diff --git a/src/ceph-osd-prestart.sh b/src/ceph-osd-prestart.sh index c9a777d9661..c5b99d53c69 100644 --- a/src/ceph-osd-prestart.sh +++ b/src/ceph-osd-prestart.sh @@ -20,32 +20,6 @@ fi data="/var/lib/ceph/osd/${cluster:-ceph}-$id" journal="$data/journal" -update="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_update_on_start || :)" - -if [ "${update:-1}" = "1" -o "${update:-1}" = "true" ]; then - # update location in crush - hook="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_location_hook || :)" - if [ -z "$hook" ]; then - hook="/usr/bin/ceph-crush-location" - fi - location="$($hook --cluster ${cluster:-ceph} --id $id --type osd)" - weight="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_initial_weight || :)" - if [ -e $data/block ]; then - defaultweight=`blockdev --getsize64 $data/block | awk '{ d= $1/1099511627776 ; r = sprintf("%.4f", d); print r }'` - else - defaultweight=`df -P -k $data/ | tail -1 | awk '{ d= $2/1073741824 ; r = sprintf("%.4f", d); print r }'` - fi - ceph \ - --cluster="${cluster:-ceph}" \ - --name="osd.$id" \ - --keyring="$data/keyring" \ - osd crush create-or-move \ - -- \ - "$id" \ - "${weight:-${defaultweight:-1}}" \ - $location -fi - if [ -L "$journal" -a ! -e "$journal" ]; then udevadm settle --timeout=5 || : if [ -L "$journal" -a ! -e "$journal" ]; then diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 0ec74a5f8ac..a6227e9a56a 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -612,9 +612,9 @@ OPTION(osd_pg_op_threshold_ratio, OPT_U64, 2) // the expected maximu OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host -// This parameter is not consumed by ceph C code but the upstart scripts. -// OPTION(osd_crush_initial_weight, OPT_DOUBLE, 0) // the initial weight is for newly added osds. OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it. +OPTION(osd_crush_update_on_start, OPT_BOOL, true) +OPTION(osd_crush_initial_weight, OPT_DOUBLE, 0) // the initial weight is for newly added osds. OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET) OPTION(osd_pool_erasure_code_stripe_width, OPT_U32, OSD_POOL_ERASURE_CODE_STRIPE_WIDTH) // in bytes diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 7caef4ef946..bc12f783569 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2207,6 +2207,12 @@ int OSD::init() } } + r = update_crush_location(); + if (r < 0) { + osd_lock.Lock(); + goto monout; + } + osd_lock.Lock(); if (is_stopping()) return 0; @@ -2752,6 +2758,61 @@ int OSD::shutdown() return r; } +int OSD::update_crush_location() +{ + if (!g_conf->osd_crush_update_on_start) { + dout(10) << __func__ << " osd_crush_update_on_start = false" << dendl; + return 0; + } + + char weight[32]; + if (g_conf->osd_crush_initial_weight) { + snprintf(weight, sizeof(weight), "%.4lf", g_conf->osd_crush_initial_weight); + } else { + struct statfs st; + int r = store->statfs(&st); + if (r < 0) { + derr << "statfs: " << cpp_strerror(r) << dendl; + return r; + } + snprintf(weight, sizeof(weight), "%.4lf", + MAX((double).00001, + (double)(st.f_blocks * st.f_bsize) / + (double)(1ull << 40 /* TB */))); + } + + std::multimap loc = cct->crush_location.get_location(); + dout(10) << __func__ << " crush location is " << loc << dendl; + + string cmd = + string("{\"prefix\": \"osd crush create-or-move\", ") + + string("\"id\": ") + stringify(whoami) + string(", ") + + string("\"weight\":") + weight + string(", ") + + string("\"args\": ["); + for (multimap::iterator p = loc.begin(); p != loc.end(); ++p) { + if (p != loc.begin()) + cmd += ", "; + cmd += "\"" + p->first + "=" + p->second + "\""; + } + cmd += "]}"; + + dout(10) << __func__ << " cmd: " << cmd << dendl; + vector vcmd{cmd}; + bufferlist inbl; + + C_SaferCond w; + string outs; + int r = monc->start_mon_command(vcmd, inbl, NULL, &outs, &w); + if (r == 0) + r = w.wait(); + if (r < 0) { + derr << __func__ << " fail: '" << outs << "': " << cpp_strerror(r) << dendl; + return r; + } + + return 0; +} + void OSD::write_superblock(ObjectStore::Transaction& t) { dout(10) << "write_superblock " << superblock << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index cdf2afd45dc..7c6fac7fca7 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2404,6 +2404,8 @@ protected: } private: + int update_crush_location(); + static int write_meta(ObjectStore *store, uuid_d& cluster_fsid, uuid_d& osd_fsid, int whoami);