mirror of
https://github.com/ceph/ceph
synced 2025-01-01 00:22:25 +00:00
osd: trim pg logs based on a per-osd budget
Set the default budget based on the current defaults: 3000 per osd, and a rule of thumb target of 100 PGs per OSD. Set the per-PG trim target by dividing the overall value by the number of PGs on the OSD. Increase the max pg log length alone, so if the OSD has <100 PGs, those PGs will get more entries. Reduce the minimum to be smaller than the max. Use the min/max config options to bracket what is allocated to a single PG. Signed-off-by: Sage Weil <sage@redhat.com>
This commit is contained in:
parent
8aa8b41fa5
commit
0db140c15c
@ -721,6 +721,7 @@ OPTION(osd_kill_backfill_at, OPT_INT)
|
||||
// Bounds how infrequently a new map epoch will be persisted for a pg
|
||||
OPTION(osd_pg_epoch_persisted_max_stale, OPT_U32) // make this < map_cache_size!
|
||||
|
||||
OPTION(osd_target_pg_log_entries_per_osd, OPT_U32)
|
||||
OPTION(osd_min_pg_log_entries, OPT_U32) // number of entries to keep in the pg log when trimming it
|
||||
OPTION(osd_max_pg_log_entries, OPT_U32) // max entries, say when degraded, before we trim
|
||||
OPTION(osd_pg_log_dups_tracked, OPT_U32) // how many versions back to track combined in both pglog's regular + dup logs
|
||||
|
@ -3302,15 +3302,21 @@ std::vector<Option> get_global_options() {
|
||||
.set_default(40)
|
||||
.set_description(""),
|
||||
|
||||
Option("osd_target_pg_log_entries_per_osd", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
|
||||
.set_default(3000 * 100)
|
||||
.set_description("target number of PG entries total on an OSD")
|
||||
.add_see_also("osd_max_pg_log_entries")
|
||||
.add_see_also("osd_min_pg_log_entries"),
|
||||
|
||||
Option("osd_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
|
||||
.set_default(3000)
|
||||
.set_default(250)
|
||||
.set_description("minimum number of entries to maintain in the PG log")
|
||||
.add_service("osd")
|
||||
.add_see_also("osd_max_pg_log_entries")
|
||||
.add_see_also("osd_pg_log_dups_tracked"),
|
||||
|
||||
Option("osd_max_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
|
||||
.set_default(3000)
|
||||
.set_default(10000)
|
||||
.set_description("maximum number of entries to maintain in the PG log when degraded before we trim")
|
||||
.add_service("osd")
|
||||
.add_see_also("osd_min_pg_log_entries")
|
||||
|
@ -194,6 +194,30 @@ void PG::recheck_readable()
|
||||
}
|
||||
}
|
||||
|
||||
unsigned PG::get_target_pg_log_entries() const
|
||||
{
|
||||
const unsigned num_pgs = shard_services.get_pg_num();
|
||||
const unsigned target =
|
||||
local_conf().get_val<uint64_t>("osd_target_pg_log_entries_per_osd");
|
||||
const unsigned min_pg_log_entries =
|
||||
local_conf().get_val<uint64_t>("osd_min_pg_log_entries");
|
||||
if (num_pgs > 0 && target > 0) {
|
||||
// target an even spread of our budgeted log entries across all
|
||||
// PGs. note that while we only get to control the entry count
|
||||
// for primary PGs, we'll normally be responsible for a mix of
|
||||
// primary and replica PGs (for the same pool(s) even), so this
|
||||
// will work out.
|
||||
const unsigned max_pg_log_entries =
|
||||
local_conf().get_val<uint64_t>("osd_max_pg_log_entries");
|
||||
return std::clamp(target / num_pgs,
|
||||
min_pg_log_entries,
|
||||
max_pg_log_entries);
|
||||
} else {
|
||||
// fall back to a per-pg value.
|
||||
return min_pg_log_entries;
|
||||
}
|
||||
}
|
||||
|
||||
void PG::on_activate(interval_set<snapid_t>)
|
||||
{
|
||||
projected_last_update = peering_state.get_info().last_update;
|
||||
|
@ -240,6 +240,8 @@ public:
|
||||
ceph::timespan delay) final;
|
||||
void recheck_readable() final;
|
||||
|
||||
unsigned get_target_pg_log_entries() const final;
|
||||
|
||||
void on_pool_change() final {
|
||||
// Not needed yet
|
||||
}
|
||||
|
@ -9400,6 +9400,26 @@ bool OSDService::_recover_now(uint64_t *available_pushes)
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned OSDService::get_target_pg_log_entries() const
|
||||
{
|
||||
auto num_pgs = osd->get_num_pgs();
|
||||
auto target = cct->_conf->osd_target_pg_log_entries_per_osd;
|
||||
if (num_pgs > 0 && target > 0) {
|
||||
// target an even spread of our budgeted log entries across all
|
||||
// PGs. note that while we only get to control the entry count
|
||||
// for primary PGs, we'll normally be responsible for a mix of
|
||||
// primary and replica PGs (for the same pool(s) even), so this
|
||||
// will work out.
|
||||
return std::max<unsigned>(
|
||||
std::min<unsigned>(target / num_pgs,
|
||||
cct->_conf->osd_max_pg_log_entries),
|
||||
cct->_conf->osd_min_pg_log_entries);
|
||||
} else {
|
||||
// fall back to a per-pg value.
|
||||
return cct->_conf->osd_min_pg_log_entries;
|
||||
}
|
||||
}
|
||||
|
||||
void OSD::do_recovery(
|
||||
PG *pg, epoch_t queued, uint64_t reserved_pushes,
|
||||
ThreadPool::TPHandle &handle)
|
||||
|
@ -662,6 +662,9 @@ public:
|
||||
return awaiting.second.get() == pg;
|
||||
});
|
||||
}
|
||||
|
||||
unsigned get_target_pg_log_entries() const;
|
||||
|
||||
// delayed pg activation
|
||||
void queue_for_recovery(PG *pg) {
|
||||
std::lock_guard l(recovery_lock);
|
||||
|
@ -849,6 +849,11 @@ void PG::publish_stats_to_osd()
|
||||
}
|
||||
}
|
||||
|
||||
unsigned PG::get_target_pg_log_entries() const
|
||||
{
|
||||
return osd->get_target_pg_log_entries();
|
||||
}
|
||||
|
||||
void PG::clear_publish_stats()
|
||||
{
|
||||
dout(15) << "clear_stats" << dendl;
|
||||
|
@ -400,6 +400,7 @@ public:
|
||||
uint64_t get_snap_trimq_size() const override {
|
||||
return snap_trimq.size();
|
||||
}
|
||||
unsigned get_target_pg_log_entries() const override;
|
||||
|
||||
void clear_publish_stats() override;
|
||||
void clear_primary_state() override;
|
||||
|
@ -4050,7 +4050,7 @@ void PeeringState::calc_trim_to()
|
||||
PG_STATE_BACKFILLING |
|
||||
PG_STATE_BACKFILL_WAIT |
|
||||
PG_STATE_BACKFILL_TOOFULL)) {
|
||||
target = cct->_conf->osd_max_pg_log_entries;
|
||||
target = pl->get_target_pg_log_entries();
|
||||
}
|
||||
|
||||
eversion_t limit = std::min(
|
||||
@ -4092,7 +4092,7 @@ void PeeringState::calc_trim_to_aggressive()
|
||||
PG_STATE_BACKFILLING |
|
||||
PG_STATE_BACKFILL_WAIT |
|
||||
PG_STATE_BACKFILL_TOOFULL)) {
|
||||
target = cct->_conf->osd_max_pg_log_entries;
|
||||
target = pl->get_target_pg_log_entries();
|
||||
}
|
||||
// limit pg log trimming up to the can_rollback_to value
|
||||
eversion_t limit = std::min(
|
||||
|
@ -281,6 +281,8 @@ public:
|
||||
virtual void queue_check_readable(epoch_t lpr, ceph::timespan delay) = 0;
|
||||
virtual void recheck_readable() = 0;
|
||||
|
||||
virtual unsigned get_target_pg_log_entries() const = 0;
|
||||
|
||||
// ============ Flush state ==================
|
||||
/**
|
||||
* try_flush_or_schedule_async()
|
||||
|
Loading…
Reference in New Issue
Block a user