mirror of
https://github.com/ceph/ceph
synced 2025-02-23 19:17:37 +00:00
Merge pull request #54491 from jianwei1216/fix_osd_pg_stat_report_interval_max_cmain
fix: resolve inconsistent judgment of osd_pg_stat_report_interval_max Reviewed-by: Samuel Just <sjust@redhat.com> Reviewed-by: Matan Breizman <Matan.Brz@gmail.com>
This commit is contained in:
commit
e538587e18
@ -209,14 +209,16 @@ function TEST_scrub_snaps() {
|
||||
done
|
||||
ceph tell osd.* config set osd_shallow_scrub_chunk_max 25
|
||||
ceph tell osd.* config set osd_shallow_scrub_chunk_min 5
|
||||
ceph tell osd.* config set osd_pg_stat_report_interval_max 1
|
||||
ceph tell osd.* config set osd_pg_stat_report_interval_max_seconds 1
|
||||
ceph tell osd.* config set osd_pg_stat_report_interval_max_epochs 1
|
||||
|
||||
|
||||
wait_for_clean || return 1
|
||||
|
||||
ceph tell osd.* config get osd_shallow_scrub_chunk_max
|
||||
ceph tell osd.* config get osd_shallow_scrub_chunk_min
|
||||
ceph tell osd.* config get osd_pg_stat_report_interval_max
|
||||
ceph tell osd.* config get osd_pg_stat_report_interval_max_seconds
|
||||
ceph tell osd.* config get osd_pg_stat_report_interval_max_epochs
|
||||
ceph tell osd.* config get osd_scrub_chunk_max
|
||||
ceph tell osd.* config get osd_scrub_chunk_min
|
||||
|
||||
@ -772,7 +774,8 @@ function _scrub_snaps_multi() {
|
||||
ceph tell osd.* config set osd_shallow_scrub_chunk_max 3
|
||||
ceph tell osd.* config set osd_shallow_scrub_chunk_min 3
|
||||
ceph tell osd.* config set osd_scrub_chunk_min 3
|
||||
ceph tell osd.* config set osd_pg_stat_report_interval_max 1
|
||||
ceph tell osd.* config set osd_pg_stat_report_interval_max_seconds 1
|
||||
ceph tell osd.* config set osd_pg_stat_report_interval_max_epochs 1
|
||||
wait_for_clean || return 1
|
||||
|
||||
local pgid="${poolid}.0"
|
||||
@ -1163,7 +1166,7 @@ fi
|
||||
function TEST_scrub_snaps_replica() {
|
||||
local dir=$1
|
||||
ORIG_ARGS=$CEPH_ARGS
|
||||
CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
|
||||
CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max_seconds=1 --osd_pg_stat_report_interval_max_epochs=1"
|
||||
_scrub_snaps_multi $dir replica
|
||||
err=$?
|
||||
CEPH_ARGS=$ORIG_ARGS
|
||||
@ -1173,7 +1176,7 @@ function TEST_scrub_snaps_replica() {
|
||||
function TEST_scrub_snaps_primary() {
|
||||
local dir=$1
|
||||
ORIG_ARGS=$CEPH_ARGS
|
||||
CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max=1"
|
||||
CEPH_ARGS+=" --osd_scrub_chunk_min=3 --osd_scrub_chunk_max=20 --osd_shallow_scrub_chunk_min=3 --osd_shallow_scrub_chunk_max=3 --osd_pg_stat_report_interval_max_seconds=1 --osd_pg_stat_report_interval_max_epochs=1"
|
||||
_scrub_snaps_multi $dir primary
|
||||
err=$?
|
||||
CEPH_ARGS=$ORIG_ARGS
|
||||
|
@ -239,7 +239,8 @@ function standard_scrub_cluster() {
|
||||
--osd_scrub_interval_randomize_ratio=0 \
|
||||
--osd_scrub_backoff_ratio=0.0 \
|
||||
--osd_pool_default_pg_autoscale_mode=off \
|
||||
--osd_pg_stat_report_interval_max=1 \
|
||||
--osd_pg_stat_report_interval_max_seconds=1 \
|
||||
--osd_pg_stat_report_interval_max_epochs=1 \
|
||||
$extra_pars"
|
||||
|
||||
for osd in $(seq 0 $(expr $OSDS - 1))
|
||||
|
@ -2903,9 +2903,19 @@ options:
|
||||
default: 5_min
|
||||
with_legacy: true
|
||||
# report pg stats for any given pg at least this often
|
||||
- name: osd_pg_stat_report_interval_max
|
||||
- name: osd_pg_stat_report_interval_max_seconds
|
||||
type: int
|
||||
level: advanced
|
||||
desc: The maximum interval seconds for update pg's reported_epoch,
|
||||
benefit for osdmap trim when osdmap not change frequently.
|
||||
default: 500
|
||||
with_legacy: true
|
||||
- name: osd_pg_stat_report_interval_max_epochs
|
||||
type: int
|
||||
level: advanced
|
||||
desc: The maximum interval by which pg's reported_epoch lags behind,
|
||||
otherwise, pg's reported_epoch must be updated,
|
||||
benefit for osdmap trim when osdmap changes frequently.
|
||||
default: 500
|
||||
with_legacy: true
|
||||
# Max number of snap intervals to report to mgr in pg_stat_t
|
||||
|
@ -3892,8 +3892,6 @@ std::optional<pg_stat_t> PeeringState::prepare_stats_for_publish(
|
||||
|
||||
pg_stat_t pre_publish = info.stats;
|
||||
pre_publish.stats.add(unstable_stats);
|
||||
utime_t cutoff = now;
|
||||
cutoff -= cct->_conf->osd_pg_stat_report_interval_max;
|
||||
|
||||
// share (some of) our purged_snaps via the pg_stats. limit # of intervals
|
||||
// because we don't want to make the pg_stat_t structures too expensive.
|
||||
@ -3908,8 +3906,22 @@ std::optional<pg_stat_t> PeeringState::prepare_stats_for_publish(
|
||||
psdout(20) << "reporting purged_snaps "
|
||||
<< pre_publish.purged_snaps << dendl;
|
||||
|
||||
// when there is no change in osdmap,
|
||||
// update info.stats.reported_epoch by the number of time seconds.
|
||||
utime_t cutoff_time = now;
|
||||
cutoff_time -= cct->_conf->osd_pg_stat_report_interval_max_seconds;
|
||||
bool is_time_expired = cutoff_time > info.stats.last_fresh ? true : false;
|
||||
|
||||
// 500 epoch osdmaps are also the minimum number of osdmaps that mon must retain.
|
||||
// if info.stats.reported_epoch less than current osdmap epoch exceeds 500 osdmaps,
|
||||
// it can be considered that the one reported by pgid is too old and needs to be updated.
|
||||
// to facilitate mon trim osdmaps
|
||||
epoch_t cutoff_epoch = info.stats.reported_epoch;
|
||||
cutoff_epoch += cct->_conf->osd_pg_stat_report_interval_max_epochs;
|
||||
bool is_epoch_behind = cutoff_epoch < get_osdmap_epoch() ? true : false;
|
||||
|
||||
if (pg_stats_publish && pre_publish == *pg_stats_publish &&
|
||||
info.stats.last_fresh > cutoff) {
|
||||
(!is_epoch_behind && !is_time_expired)) {
|
||||
psdout(15) << "publish_stats_to_osd " << pg_stats_publish->reported_epoch
|
||||
<< ": no change since " << info.stats.last_fresh << dendl;
|
||||
return std::nullopt;
|
||||
@ -5980,14 +5992,12 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
|
||||
return forward_event();
|
||||
}
|
||||
psdout(10) << "Active advmap" << dendl;
|
||||
bool need_publish = false;
|
||||
|
||||
pl->on_active_advmap(advmap.osdmap);
|
||||
if (ps->dirty_big_info) {
|
||||
// share updated purged_snaps to mgr/mon so that we (a) stop reporting
|
||||
// purged snaps and (b) perhaps share more snaps that we have purged
|
||||
// but didn't fit in pg_stat_t.
|
||||
need_publish = true;
|
||||
ps->share_pg_info();
|
||||
}
|
||||
|
||||
@ -6029,18 +6039,9 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
|
||||
ps->state_set(PG_STATE_UNDERSIZED);
|
||||
}
|
||||
// degraded changes will be detected by call from publish_stats_to_osd()
|
||||
need_publish = true;
|
||||
}
|
||||
|
||||
// if we haven't reported our PG stats in a long time, do so now.
|
||||
if (ps->info.stats.reported_epoch + ps->cct->_conf->osd_pg_stat_report_interval_max < advmap.osdmap->get_epoch()) {
|
||||
psdout(20) << "reporting stats to osd after " << (advmap.osdmap->get_epoch() - ps->info.stats.reported_epoch)
|
||||
<< " epochs" << dendl;
|
||||
need_publish = true;
|
||||
}
|
||||
|
||||
if (need_publish)
|
||||
pl->publish_stats_to_osd();
|
||||
pl->publish_stats_to_osd();
|
||||
|
||||
if (ps->check_prior_readable_down_osds(advmap.osdmap)) {
|
||||
pl->recheck_readable();
|
||||
|
Loading…
Reference in New Issue
Block a user