osd: update crush_location on startup from ceph-osd

Update the crush location from ceph-osd instead of relying on
kludgey bash in ceph-osd-prestart.sh.  Among other things, this
lets us get accurate statfs information from the ObjectStore
implementation instead of relying on 'df'.

Fixes: http://tracker.ceph.com/issues/15213
Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-04-22 11:07:05 -04:00
parent 4587a379a3
commit 573c349245
4 changed files with 65 additions and 28 deletions

View File

@ -20,32 +20,6 @@ fi
data="/var/lib/ceph/osd/${cluster:-ceph}-$id"
journal="$data/journal"
update="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_update_on_start || :)"
if [ "${update:-1}" = "1" -o "${update:-1}" = "true" ]; then
# update location in crush
hook="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_location_hook || :)"
if [ -z "$hook" ]; then
hook="/usr/bin/ceph-crush-location"
fi
location="$($hook --cluster ${cluster:-ceph} --id $id --type osd)"
weight="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_initial_weight || :)"
if [ -e $data/block ]; then
defaultweight=`blockdev --getsize64 $data/block | awk '{ d= $1/1099511627776 ; r = sprintf("%.4f", d); print r }'`
else
defaultweight=`df -P -k $data/ | tail -1 | awk '{ d= $2/1073741824 ; r = sprintf("%.4f", d); print r }'`
fi
ceph \
--cluster="${cluster:-ceph}" \
--name="osd.$id" \
--keyring="$data/keyring" \
osd crush create-or-move \
-- \
"$id" \
"${weight:-${defaultweight:-1}}" \
$location
fi
if [ -L "$journal" -a ! -e "$journal" ]; then
udevadm settle --timeout=5 || :
if [ -L "$journal" -a ! -e "$journal" ]; then

View File

@ -612,9 +612,9 @@ OPTION(osd_pg_op_threshold_ratio, OPT_U64, 2) // the expected maximu
OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd
OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd
OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
// This parameter is not consumed by ceph C code but the upstart scripts.
// OPTION(osd_crush_initial_weight, OPT_DOUBLE, 0) // the initial weight is for newly added osds.
OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
OPTION(osd_crush_update_on_start, OPT_BOOL, true)
OPTION(osd_crush_initial_weight, OPT_DOUBLE, 0) // the initial weight is for newly added osds.
OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset
OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET)
OPTION(osd_pool_erasure_code_stripe_width, OPT_U32, OSD_POOL_ERASURE_CODE_STRIPE_WIDTH) // in bytes

View File

@ -2207,6 +2207,12 @@ int OSD::init()
}
}
r = update_crush_location();
if (r < 0) {
osd_lock.Lock();
goto monout;
}
osd_lock.Lock();
if (is_stopping())
return 0;
@ -2752,6 +2758,61 @@ int OSD::shutdown()
return r;
}
int OSD::update_crush_location()
{
if (!g_conf->osd_crush_update_on_start) {
dout(10) << __func__ << " osd_crush_update_on_start = false" << dendl;
return 0;
}
char weight[32];
if (g_conf->osd_crush_initial_weight) {
snprintf(weight, sizeof(weight), "%.4lf", g_conf->osd_crush_initial_weight);
} else {
struct statfs st;
int r = store->statfs(&st);
if (r < 0) {
derr << "statfs: " << cpp_strerror(r) << dendl;
return r;
}
snprintf(weight, sizeof(weight), "%.4lf",
MAX((double).00001,
(double)(st.f_blocks * st.f_bsize) /
(double)(1ull << 40 /* TB */)));
}
std::multimap<string,string> loc = cct->crush_location.get_location();
dout(10) << __func__ << " crush location is " << loc << dendl;
string cmd =
string("{\"prefix\": \"osd crush create-or-move\", ") +
string("\"id\": ") + stringify(whoami) + string(", ") +
string("\"weight\":") + weight + string(", ") +
string("\"args\": [");
for (multimap<string,string>::iterator p = loc.begin(); p != loc.end(); ++p) {
if (p != loc.begin())
cmd += ", ";
cmd += "\"" + p->first + "=" + p->second + "\"";
}
cmd += "]}";
dout(10) << __func__ << " cmd: " << cmd << dendl;
vector<string> vcmd{cmd};
bufferlist inbl;
C_SaferCond w;
string outs;
int r = monc->start_mon_command(vcmd, inbl, NULL, &outs, &w);
if (r == 0)
r = w.wait();
if (r < 0) {
derr << __func__ << " fail: '" << outs << "': " << cpp_strerror(r) << dendl;
return r;
}
return 0;
}
void OSD::write_superblock(ObjectStore::Transaction& t)
{
dout(10) << "write_superblock " << superblock << dendl;

View File

@ -2404,6 +2404,8 @@ protected:
}
private:
int update_crush_location();
static int write_meta(ObjectStore *store,
uuid_d& cluster_fsid, uuid_d& osd_fsid, int whoami);