diff --git a/src/common/config_opts.h b/src/common/config_opts.h index fa11c21ad5a..b07b6a7ed24 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -178,6 +178,7 @@ OPTION(mon_force_standby_active, OPT_BOOL, true) // should mons force standby-re OPTION(mon_warn_on_old_mons, OPT_BOOL, true) // should mons set health to WARN if part of quorum is old? OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL, true) // warn if crush tunables are not optimal OPTION(mon_warn_on_osd_down_out_interval_zero, OPT_BOOL, true) // warn if 'mon_osd_down_out_interval == 0' +OPTION(mon_warn_on_cache_pools_without_hit_sets, OPT_BOOL, true) OPTION(mon_min_osdmap_epochs, OPT_INT, 500) OPTION(mon_max_pgmap_epochs, OPT_INT, 500) OPTION(mon_max_log_epochs, OPT_INT, 500) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index a8c2c64c6b4..415ae19c473 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2078,6 +2078,32 @@ void OSDMonitor::get_health(list >& summary, } } + // hit_set-less cache_mode? + if (g_conf->mon_warn_on_cache_pools_without_hit_sets) { + int problem_cache_pools = 0; + for (map::const_iterator p = osdmap.pools.begin(); + p != osdmap.pools.end(); + ++p) { + const pg_pool_t& info = p->second; + if (info.cache_mode_requires_hit_set() && + info.hit_set_params.get_type() == HitSet::TYPE_NONE) { + ++problem_cache_pools; + if (detail) { + ostringstream ss; + ss << "pool '" << osdmap.get_pool_name(p->first) + << "' with cache_mode " << info.get_cache_mode_name() + << " needs hit_set_type to be set but it is not"; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + } + if (problem_cache_pools) { + ostringstream ss; + ss << problem_cache_pools << " cache pools are missing hit_sets"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + // Warn if 'mon_osd_down_out_interval' is set to zero. // Having this option set to zero on the leader acts much like the // 'noout' flag. It's hard to figure out what's going wrong with clusters diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 4dae5d3fef2..88d42c13c09 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1348,9 +1348,10 @@ void ReplicatedPG::do_op(OpRequestRef& op) hit_set_start_stamp + pool.info.hit_set_period <= m->get_recv_stamp()) { hit_set_persist(); } + } - if (agent_state) - agent_choose_mode(); + if (agent_state) { + agent_choose_mode(); } if ((m->get_flags() & CEPH_OSD_FLAG_IGNORE_CACHE) == 0 && @@ -11342,7 +11343,8 @@ bool ReplicatedPG::agent_maybe_evict(ObjectContextRef& obc) } } - if (agent_state->evict_mode != TierAgentState::EVICT_MODE_FULL) { + if (agent_state->evict_mode != TierAgentState::EVICT_MODE_FULL && + hit_set) { // is this object old and/or cold enough? int atime = -1, temp = 0; agent_estimate_atime_temp(soid, &atime, NULL /*FIXME &temp*/); @@ -11474,7 +11476,11 @@ void ReplicatedPG::agent_choose_mode(bool restart) num_dirty = 0; } - dout(10) << __func__ << ": " + dout(10) << __func__ + << " flush_mode: " + << TierAgentState::get_flush_mode_name(agent_state->flush_mode) + << " evict_mode: " + << TierAgentState::get_evict_mode_name(agent_state->evict_mode) << " num_objects: " << info.stats.stats.sum.num_objects << " num_bytes: " << info.stats.stats.sum.num_bytes << " num_objects_dirty: " << info.stats.stats.sum.num_objects_dirty diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index a311e651885..abbfe2c2675 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -876,6 +876,19 @@ struct pg_pool_t { const char *get_cache_mode_name() const { return get_cache_mode_name(cache_mode); } + bool cache_mode_requires_hit_set() const { + switch (cache_mode) { + case CACHEMODE_NONE: + case CACHEMODE_FORWARD: + case CACHEMODE_READONLY: + return false; + case CACHEMODE_WRITEBACK: + case CACHEMODE_READFORWARD: + return true; + default: + assert(0 == "implement me"); + } + } uint64_t flags; ///< FLAG_* __u8 type; ///< TYPE_* diff --git a/src/vstart.sh b/src/vstart.sh index 6ba2995ecfa..1cf4dbe56a6 100755 --- a/src/vstart.sh +++ b/src/vstart.sh @@ -381,6 +381,12 @@ $DAEMONOPTS osd class dir = .libs osd scrub load threshold = 5.0 osd debug op order = true + filestore wbthrottle xfs ios start flusher = 10 + filestore wbthrottle xfs ios hard limit = 20 + filestore wbthrottle xfs inodes hard limit = 30 + filestore wbthrottle btrfs ios start flusher = 10 + filestore wbthrottle btrfs ios hard limit = 20 + filestore wbthrottle btrfs inodes hard limit = 30 $COSDDEBUG $COSDMEMSTORE $extra_conf