osd/osd_types: use appropriate cost value for PullOp

See included comments -- previous values did not account for object
size.  This causes problems for mclock which is much more strict
in how it interprets costs.

Fixes: https://tracker.ceph.com/issues/58607
Signed-off-by: Samuel Just <sjust@redhat.com>
Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
This commit is contained in:
Sridhar Seshasayee 2023-02-03 11:06:06 +05:30
parent ee26df6d56
commit 7dc3024355
2 changed files with 20 additions and 2 deletions

View File

@ -15,6 +15,7 @@
*
*/
#include <algorithm>
#include <list>
#include <map>
#include <ostream>
@ -6807,8 +6808,20 @@ ostream& operator<<(ostream& out, const PullOp &op)
uint64_t PullOp::cost(CephContext *cct) const
{
return cct->_conf->osd_push_per_object_cost +
cct->_conf->osd_recovery_max_chunk;
if (cct->_conf->osd_op_queue == "mclock_scheduler") {
return std::clamp<uint64_t>(
recovery_progress.estimate_remaining_data_to_recover(recovery_info),
1,
cct->_conf->osd_recovery_max_chunk);
} else {
/* We retain this legacy behavior for WeightedPriorityQueue. It seems to
* require very large costs for several messages in order to do any
* meaningful amount of throttling. This branch should be removed after
* Reef.
*/
return cct->_conf->osd_push_per_object_cost +
cct->_conf->osd_recovery_max_chunk;
}
}
// -- PushOp --

View File

@ -6028,6 +6028,11 @@ struct ObjectRecoveryProgress {
omap_complete;
}
uint64_t estimate_remaining_data_to_recover(const ObjectRecoveryInfo& info) const {
// Overestimates in case of clones, but avoids traversing copy_subset
return info.size - data_recovered_to;
}
static void generate_test_instances(std::list<ObjectRecoveryProgress*>& o);
void encode(ceph::buffer::list &bl) const;
void decode(ceph::buffer::list::const_iterator &bl);