mirror of
https://github.com/ceph/ceph
synced 2025-01-12 06:00:46 +00:00
Merge pull request #8357 from liewegas/wip-osd-prestart
osd: update crush_location from ceph-osd on startup Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
commit
a28b71e3c9
@ -10,3 +10,14 @@
|
||||
New monitors will now use rocksdb by default, but if that file is
|
||||
not present, existing monitors will use leveldb. The ``mon keyvaluedb`` option
|
||||
now only affects the backend chosen when a monitor is created.
|
||||
|
||||
* The 'osd crush initial weight' option allows you to specify a CRUSH
|
||||
weight for a newly added OSD. Previously a value of 0 (the default)
|
||||
meant that we should use the size of the OSD's store to weight the
|
||||
new OSD. Now, a value of 0 means it should have a weight of 0, and
|
||||
a negative value (the new default) means we should automatically
|
||||
weight the OSD based on its size. If your configuration file
|
||||
explicitly specifies a value of 0 for this option you will need to
|
||||
change it to a negative value (e.g., -1) to preserve the current
|
||||
behavior.
|
||||
|
||||
|
@ -184,7 +184,8 @@ set(crush_srcs
|
||||
crush/hash.c
|
||||
crush/CrushWrapper.cc
|
||||
crush/CrushCompiler.cc
|
||||
crush/CrushTester.cc)
|
||||
crush/CrushTester.cc
|
||||
crush/CrushLocation.cc)
|
||||
|
||||
add_library(crush STATIC ${crush_srcs})
|
||||
|
||||
|
@ -20,32 +20,6 @@ fi
|
||||
data="/var/lib/ceph/osd/${cluster:-ceph}-$id"
|
||||
journal="$data/journal"
|
||||
|
||||
update="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_update_on_start || :)"
|
||||
|
||||
if [ "${update:-1}" = "1" -o "${update:-1}" = "true" ]; then
|
||||
# update location in crush
|
||||
hook="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_location_hook || :)"
|
||||
if [ -z "$hook" ]; then
|
||||
hook="/usr/bin/ceph-crush-location"
|
||||
fi
|
||||
location="$($hook --cluster ${cluster:-ceph} --id $id --type osd)"
|
||||
weight="$(ceph-conf --cluster=${cluster:-ceph} --name=osd.$id --lookup osd_crush_initial_weight || :)"
|
||||
if [ -e $data/block ]; then
|
||||
defaultweight=`blockdev --getsize64 $data/block | awk '{ d= $1/1099511627776 ; r = sprintf("%.4f", d); print r }'`
|
||||
else
|
||||
defaultweight=`df -P -k $data/ | tail -1 | awk '{ d= $2/1073741824 ; r = sprintf("%.4f", d); print r }'`
|
||||
fi
|
||||
ceph \
|
||||
--cluster="${cluster:-ceph}" \
|
||||
--name="osd.$id" \
|
||||
--keyring="$data/keyring" \
|
||||
osd crush create-or-move \
|
||||
-- \
|
||||
"$id" \
|
||||
"${weight:-${defaultweight:-1}}" \
|
||||
$location
|
||||
fi
|
||||
|
||||
if [ -L "$journal" -a ! -e "$journal" ]; then
|
||||
udevadm settle --timeout=5 || :
|
||||
if [ -L "$journal" -a ! -e "$journal" ]; then
|
||||
|
@ -239,6 +239,7 @@ public:
|
||||
const char** get_tracked_conf_keys() const {
|
||||
static const char *KEYS[] = {
|
||||
"enable_experimental_unrecoverable_data_corrupting_features",
|
||||
"crush_location",
|
||||
NULL
|
||||
};
|
||||
return KEYS;
|
||||
@ -246,13 +247,20 @@ public:
|
||||
|
||||
void handle_conf_change(const md_config_t *conf,
|
||||
const std::set <std::string> &changed) {
|
||||
ceph_spin_lock(&cct->_feature_lock);
|
||||
get_str_set(conf->enable_experimental_unrecoverable_data_corrupting_features,
|
||||
cct->_experimental_features);
|
||||
ceph_spin_unlock(&cct->_feature_lock);
|
||||
if (!cct->_experimental_features.empty())
|
||||
lderr(cct) << "WARNING: the following dangerous and experimental features are enabled: "
|
||||
<< cct->_experimental_features << dendl;
|
||||
if (changed.count(
|
||||
"enable_experimental_unrecoverable_data_corrupting_features")) {
|
||||
ceph_spin_lock(&cct->_feature_lock);
|
||||
get_str_set(
|
||||
conf->enable_experimental_unrecoverable_data_corrupting_features,
|
||||
cct->_experimental_features);
|
||||
ceph_spin_unlock(&cct->_feature_lock);
|
||||
if (!cct->_experimental_features.empty())
|
||||
lderr(cct) << "WARNING: the following dangerous and experimental features are enabled: "
|
||||
<< cct->_experimental_features << dendl;
|
||||
}
|
||||
if (changed.count("crush_location")) {
|
||||
cct->crush_location.update_from_conf();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -459,6 +467,7 @@ CephContext::CephContext(uint32_t module_type_, int init_flags_)
|
||||
_crypto_aes(NULL),
|
||||
_plugin_registry(NULL),
|
||||
_lockdep_obs(NULL),
|
||||
crush_location(this),
|
||||
_cct_perf(NULL)
|
||||
{
|
||||
ceph_spin_init(&_service_thread_lock);
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "include/atomic.h"
|
||||
#include "common/cmdparse.h"
|
||||
#include "include/Spinlock.h"
|
||||
#include "crush/CrushLocation.h"
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
class AdminSocket;
|
||||
@ -247,6 +248,10 @@ private:
|
||||
|
||||
md_config_obs_t *_lockdep_obs;
|
||||
|
||||
public:
|
||||
CrushLocation crush_location;
|
||||
private:
|
||||
|
||||
enum {
|
||||
l_cct_first,
|
||||
l_cct_total_workers,
|
||||
|
@ -417,6 +417,8 @@ OPTION(client_use_faked_inos, OPT_BOOL, false)
|
||||
OPTION(client_mds_namespace, OPT_INT, -1)
|
||||
|
||||
OPTION(crush_location, OPT_STR, "") // whitespace-separated list of key=value pairs describing crush location
|
||||
OPTION(crush_location_hook, OPT_STR, "")
|
||||
OPTION(crush_location_hook_timeout, OPT_INT, 10)
|
||||
|
||||
OPTION(objecter_tick_interval, OPT_DOUBLE, 5.0)
|
||||
OPTION(objecter_timeout, OPT_DOUBLE, 10.0) // before we ask for a map
|
||||
@ -610,9 +612,9 @@ OPTION(osd_pg_op_threshold_ratio, OPT_U64, 2) // the expected maximu
|
||||
OPTION(osd_pg_bits, OPT_INT, 6) // bits per osd
|
||||
OPTION(osd_pgp_bits, OPT_INT, 6) // bits per osd
|
||||
OPTION(osd_crush_chooseleaf_type, OPT_INT, 1) // 1 = host
|
||||
// This parameter is not consumed by ceph C code but the upstart scripts.
|
||||
// OPTION(osd_crush_initial_weight, OPT_DOUBLE, 0) // the initial weight is for newly added osds.
|
||||
OPTION(osd_pool_use_gmt_hitset, OPT_BOOL, true) // try to use gmt for hitset archive names if all osds in cluster support it.
|
||||
OPTION(osd_crush_update_on_start, OPT_BOOL, true)
|
||||
OPTION(osd_crush_initial_weight, OPT_DOUBLE, -1) // if >=0, the initial weight is for newly added osds.
|
||||
OPTION(osd_pool_default_crush_rule, OPT_INT, -1) // deprecated for osd_pool_default_crush_replicated_ruleset
|
||||
OPTION(osd_pool_default_crush_replicated_ruleset, OPT_INT, CEPH_DEFAULT_CRUSH_REPLICATED_RULESET)
|
||||
OPTION(osd_pool_erasure_code_stripe_width, OPT_U32, OSD_POOL_ERASURE_CODE_STRIPE_WIDTH) // in bytes
|
||||
|
105
src/crush/CrushLocation.cc
Normal file
105
src/crush/CrushLocation.cc
Normal file
@ -0,0 +1,105 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#include "CrushLocation.h"
|
||||
#include "CrushWrapper.h"
|
||||
#include "common/config.h"
|
||||
#include "include/str_list.h"
|
||||
#include "common/debug.h"
|
||||
|
||||
#include <common/SubProcess.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
int CrushLocation::update_from_conf()
|
||||
{
|
||||
if (cct->_conf->crush_location.length())
|
||||
return _parse(cct->_conf->crush_location);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CrushLocation::_parse(const std::string& s)
|
||||
{
|
||||
std::multimap<std::string,std::string> new_crush_location;
|
||||
std::vector<std::string> lvec;
|
||||
get_str_vec(s, ";, \t", lvec);
|
||||
int r = CrushWrapper::parse_loc_multimap(lvec, &new_crush_location);
|
||||
if (r < 0) {
|
||||
lderr(cct) << "warning: crush_location '" << cct->_conf->crush_location
|
||||
<< "' does not parse, keeping original crush_location "
|
||||
<< loc << dendl;
|
||||
return -EINVAL;
|
||||
}
|
||||
std::lock_guard<std::mutex> l(lock);
|
||||
loc.swap(new_crush_location);
|
||||
lgeneric_dout(cct, 10) << "crush_location is " << loc << dendl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CrushLocation::update_from_hook()
|
||||
{
|
||||
if (cct->_conf->crush_location_hook.length() == 0)
|
||||
return 0;
|
||||
|
||||
SubProcessTimed hook(
|
||||
cct->_conf->crush_location_hook.c_str(),
|
||||
SubProcess::CLOSE, SubProcess::PIPE, SubProcess::PIPE,
|
||||
cct->_conf->crush_location_hook_timeout);
|
||||
hook.add_cmd_args(
|
||||
"--cluster", cct->_conf->cluster.c_str(),
|
||||
"--id", cct->_conf->name.get_id().c_str(),
|
||||
"--type", cct->_conf->name.get_type_str(),
|
||||
NULL);
|
||||
int ret = hook.spawn();
|
||||
if (ret != 0) {
|
||||
lderr(cct) << "error: failed run " << cct->_conf->crush_location_hook << ": "
|
||||
<< hook.err() << dendl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bufferlist bl;
|
||||
ret = bl.read_fd(hook.get_stdout(), 100 * 1024);
|
||||
if (ret < 0) {
|
||||
lderr(cct) << "error: failed read stdout from "
|
||||
<< cct->_conf->crush_location_hook
|
||||
<< ": " << cpp_strerror(-ret) << dendl;
|
||||
bufferlist err;
|
||||
err.read_fd(hook.get_stderr(), 100 * 1024);
|
||||
lderr(cct) << "stderr:\n";
|
||||
err.hexdump(*_dout);
|
||||
*_dout << dendl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (hook.join() != 0) {
|
||||
lderr(cct) << "error: failed to join: " << hook.err() << dendl;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
std::string out;
|
||||
bl.copy(0, bl.length(), out);
|
||||
out.erase(out.find_last_not_of(" \n\r\t")+1);
|
||||
return _parse(out);
|
||||
}
|
||||
|
||||
int CrushLocation::init_on_startup()
|
||||
{
|
||||
if (cct->_conf->crush_location.length()) {
|
||||
return update_from_conf();
|
||||
}
|
||||
if (cct->_conf->crush_location_hook.length()) {
|
||||
return update_from_hook();
|
||||
}
|
||||
|
||||
// start with a sane default
|
||||
char hostname[HOST_NAME_MAX + 1];
|
||||
int r = gethostname(hostname, sizeof(hostname)-1);
|
||||
if (r < 0)
|
||||
strcpy(hostname, "unknown_host");
|
||||
std::lock_guard<std::mutex> l(lock);
|
||||
loc.clear();
|
||||
loc.insert(make_pair<std::string,std::string>("host", hostname));
|
||||
loc.insert(make_pair<std::string,std::string>("root", "default"));
|
||||
lgeneric_dout(cct, 10) << "crush_location is (default) " << loc << dendl;
|
||||
return 0;
|
||||
}
|
35
src/crush/CrushLocation.h
Normal file
35
src/crush/CrushLocation.h
Normal file
@ -0,0 +1,35 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#ifndef CEPH_CRUSH_LOCATION_H
|
||||
#define CEPH_CRUSH_LOCATION_H
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
class CephContext;
|
||||
|
||||
class CrushLocation {
|
||||
CephContext *cct;
|
||||
std::multimap<std::string,std::string> loc;
|
||||
std::mutex lock;
|
||||
|
||||
int _parse(const std::string& s);
|
||||
|
||||
public:
|
||||
CrushLocation(CephContext *c) : cct(c) {
|
||||
update_from_conf();
|
||||
}
|
||||
|
||||
int update_from_conf(); ///< refresh from config
|
||||
int update_from_hook(); ///< call hook, if present
|
||||
int init_on_startup();
|
||||
|
||||
std::multimap<std::string,std::string> get_location() {
|
||||
std::lock_guard<std::mutex> l(lock);
|
||||
return loc;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
@ -5,11 +5,13 @@ libcrush_la_SOURCES = \
|
||||
crush/hash.c \
|
||||
crush/CrushWrapper.cc \
|
||||
crush/CrushCompiler.cc \
|
||||
crush/CrushTester.cc
|
||||
crush/CrushTester.cc \
|
||||
crush/CrushLocation.cc
|
||||
noinst_LTLIBRARIES += libcrush.la
|
||||
|
||||
noinst_HEADERS += \
|
||||
crush/CrushCompiler.h \
|
||||
crush/CrushLocation.h \
|
||||
crush/CrushTester.h \
|
||||
crush/CrushTreeDumper.h \
|
||||
crush/CrushWrapper.h \
|
||||
|
@ -321,6 +321,8 @@ void global_init(std::vector < const char * > *alt_def_args,
|
||||
|
||||
if (code_env == CODE_ENVIRONMENT_DAEMON && !(flags & CINIT_FLAG_NO_DAEMON_ACTIONS))
|
||||
output_ceph_version();
|
||||
|
||||
g_ceph_context->crush_location.init_on_startup();
|
||||
}
|
||||
|
||||
void global_print_banner(void)
|
||||
|
@ -2207,6 +2207,12 @@ int OSD::init()
|
||||
}
|
||||
}
|
||||
|
||||
r = update_crush_location();
|
||||
if (r < 0) {
|
||||
osd_lock.Lock();
|
||||
goto monout;
|
||||
}
|
||||
|
||||
osd_lock.Lock();
|
||||
if (is_stopping())
|
||||
return 0;
|
||||
@ -2752,6 +2758,82 @@ int OSD::shutdown()
|
||||
return r;
|
||||
}
|
||||
|
||||
int OSD::update_crush_location()
|
||||
{
|
||||
if (!g_conf->osd_crush_update_on_start) {
|
||||
dout(10) << __func__ << " osd_crush_update_on_start = false" << dendl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
char weight[32];
|
||||
if (g_conf->osd_crush_initial_weight >= 0) {
|
||||
snprintf(weight, sizeof(weight), "%.4lf", g_conf->osd_crush_initial_weight);
|
||||
} else {
|
||||
struct statfs st;
|
||||
int r = store->statfs(&st);
|
||||
if (r < 0) {
|
||||
derr << "statfs: " << cpp_strerror(r) << dendl;
|
||||
return r;
|
||||
}
|
||||
snprintf(weight, sizeof(weight), "%.4lf",
|
||||
MAX((double).00001,
|
||||
(double)(st.f_blocks * st.f_bsize) /
|
||||
(double)(1ull << 40 /* TB */)));
|
||||
}
|
||||
|
||||
std::multimap<string,string> loc = cct->crush_location.get_location();
|
||||
dout(10) << __func__ << " crush location is " << loc << dendl;
|
||||
|
||||
string cmd =
|
||||
string("{\"prefix\": \"osd crush create-or-move\", ") +
|
||||
string("\"id\": ") + stringify(whoami) + string(", ") +
|
||||
string("\"weight\":") + weight + string(", ") +
|
||||
string("\"args\": [");
|
||||
for (multimap<string,string>::iterator p = loc.begin(); p != loc.end(); ++p) {
|
||||
if (p != loc.begin())
|
||||
cmd += ", ";
|
||||
cmd += "\"" + p->first + "=" + p->second + "\"";
|
||||
}
|
||||
cmd += "]}";
|
||||
|
||||
bool created = false;
|
||||
while (true) {
|
||||
dout(10) << __func__ << " cmd: " << cmd << dendl;
|
||||
vector<string> vcmd{cmd};
|
||||
bufferlist inbl;
|
||||
C_SaferCond w;
|
||||
string outs;
|
||||
int r = monc->start_mon_command(vcmd, inbl, NULL, &outs, &w);
|
||||
if (r == 0)
|
||||
r = w.wait();
|
||||
if (r < 0) {
|
||||
if (r == -ENOENT && !created) {
|
||||
string newcmd = "{\"prefix\": \"osd create\", \"id\": " + stringify(whoami)
|
||||
+ ", \"uuid\": \"" + stringify(superblock.osd_fsid) + "\"}";
|
||||
vector<string> vnewcmd{newcmd};
|
||||
bufferlist inbl;
|
||||
C_SaferCond w;
|
||||
string outs;
|
||||
int r = monc->start_mon_command(vnewcmd, inbl, NULL, &outs, &w);
|
||||
if (r == 0)
|
||||
r = w.wait();
|
||||
if (r < 0) {
|
||||
derr << __func__ << " fail: osd does not exist and created failed: "
|
||||
<< cpp_strerror(r) << dendl;
|
||||
return r;
|
||||
}
|
||||
created = true;
|
||||
continue;
|
||||
}
|
||||
derr << __func__ << " fail: '" << outs << "': " << cpp_strerror(r) << dendl;
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void OSD::write_superblock(ObjectStore::Transaction& t)
|
||||
{
|
||||
dout(10) << "write_superblock " << superblock << dendl;
|
||||
|
@ -2404,6 +2404,8 @@ protected:
|
||||
}
|
||||
|
||||
private:
|
||||
int update_crush_location();
|
||||
|
||||
static int write_meta(ObjectStore *store,
|
||||
uuid_d& cluster_fsid, uuid_d& osd_fsid, int whoami);
|
||||
|
||||
|
@ -184,16 +184,7 @@ void Objecter::handle_conf_change(const struct md_config_t *conf,
|
||||
void Objecter::update_crush_location()
|
||||
{
|
||||
unique_lock wl(rwlock);
|
||||
std::multimap<string,string> new_crush_location;
|
||||
vector<string> lvec;
|
||||
get_str_vec(cct->_conf->crush_location, ";, \t", lvec);
|
||||
int r = CrushWrapper::parse_loc_multimap(lvec, &new_crush_location);
|
||||
if (r < 0) {
|
||||
lderr(cct) << "warning: crush_location '" << cct->_conf->crush_location
|
||||
<< "' does not parse, leave origin crush_location untouched." << dendl;
|
||||
return;
|
||||
}
|
||||
crush_location = new_crush_location;
|
||||
crush_location = cct->crush_location.get_location();
|
||||
}
|
||||
|
||||
// messages ------------------------------
|
||||
|
Loading…
Reference in New Issue
Block a user