mirror of
https://github.com/ceph/ceph
synced 2025-02-22 02:27:29 +00:00
Merge pull request #23629 from wangxiaoguang/fix_deep_scrub
os/bluestore: fix deep-scrub operation againest disk silent errors Reviewed-by: xie xingguo <xie.xingguo@zte.com.cn> Reviewed-by: Sage Weil <sage@redhat.com> Reviewed-by: David Zafman <dzafman@redhat.com>
This commit is contained in:
commit
f8985aa579
@ -470,6 +470,7 @@ enum {
|
||||
CEPH_OSD_OP_FLAG_FADVISE_DONTNEED = 0x20,/* data will not be accessed in the near future */
|
||||
CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40, /* data will be accessed only once by this client */
|
||||
CEPH_OSD_OP_FLAG_WITH_REFERENCE = 0x80, /* need reference couting */
|
||||
CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE = 0x100, /* bypass ObjectStore cache, mainly for deep-scrub */
|
||||
};
|
||||
|
||||
#define EOLDSNAPC 85 /* ORDERSNAP flag set; writer has old snapc*/
|
||||
|
@ -1306,7 +1306,8 @@ void BlueStore::BufferSpace::read(
|
||||
uint32_t offset,
|
||||
uint32_t length,
|
||||
BlueStore::ready_regions_t& res,
|
||||
interval_set<uint32_t>& res_intervals)
|
||||
interval_set<uint32_t>& res_intervals,
|
||||
int flags)
|
||||
{
|
||||
res.clear();
|
||||
res_intervals.clear();
|
||||
@ -1320,7 +1321,13 @@ void BlueStore::BufferSpace::read(
|
||||
++i) {
|
||||
Buffer *b = i->second.get();
|
||||
ceph_assert(b->end() > offset);
|
||||
if (b->is_writing() || b->is_clean()) {
|
||||
|
||||
bool val = false;
|
||||
if (flags & BYPASS_CLEAN_CACHE)
|
||||
val = b->is_writing();
|
||||
else
|
||||
val = b->is_writing() || b->is_clean();
|
||||
if (val) {
|
||||
if (b->offset < offset) {
|
||||
uint32_t skip = offset - b->offset;
|
||||
uint32_t l = min(length, b->length - skip);
|
||||
@ -7408,6 +7415,7 @@ int BlueStore::_do_read(
|
||||
{
|
||||
FUNCTRACE(cct);
|
||||
int r = 0;
|
||||
int read_cache_policy = 0; // do not bypass clean or dirty cache
|
||||
|
||||
dout(20) << __func__ << " 0x" << std::hex << offset << "~" << length
|
||||
<< " size 0x" << o->onode.size << " (" << std::dec
|
||||
@ -7442,6 +7450,13 @@ int BlueStore::_do_read(
|
||||
|
||||
ready_regions_t ready_regions;
|
||||
|
||||
// for deep-scrub, we only read dirty cache and bypass clean cache in
|
||||
// order to read underlying block device in case there are silent disk errors.
|
||||
if (op_flags & CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE) {
|
||||
dout(20) << __func__ << " will bypass cache and do direct read" << dendl;
|
||||
read_cache_policy = BufferSpace::BYPASS_CLEAN_CACHE;
|
||||
}
|
||||
|
||||
// build blob-wise list to of stuff read (that isn't cached)
|
||||
blobs2read_t blobs2read;
|
||||
unsigned left = length;
|
||||
@ -7467,7 +7482,8 @@ int BlueStore::_do_read(
|
||||
ready_regions_t cache_res;
|
||||
interval_set<uint32_t> cache_interval;
|
||||
bptr->shared_blob->bc.read(
|
||||
bptr->shared_blob->get_cache(), b_off, b_len, cache_res, cache_interval);
|
||||
bptr->shared_blob->get_cache(), b_off, b_len, cache_res, cache_interval,
|
||||
read_cache_policy);
|
||||
dout(20) << __func__ << " blob " << *bptr << std::hex
|
||||
<< " need 0x" << b_off << "~" << b_len
|
||||
<< " cache has 0x" << cache_interval
|
||||
|
@ -247,6 +247,10 @@ public:
|
||||
|
||||
/// map logical extent range (object) onto buffers
|
||||
struct BufferSpace {
|
||||
enum {
|
||||
BYPASS_CLEAN_CACHE = 0x1, // bypass clean cache
|
||||
};
|
||||
|
||||
typedef boost::intrusive::list<
|
||||
Buffer,
|
||||
boost::intrusive::member_hook<
|
||||
@ -346,7 +350,8 @@ public:
|
||||
|
||||
void read(Cache* cache, uint32_t offset, uint32_t length,
|
||||
BlueStore::ready_regions_t& res,
|
||||
interval_set<uint32_t>& res_intervals);
|
||||
interval_set<uint32_t>& res_intervals,
|
||||
int flags = 0);
|
||||
|
||||
void truncate(Cache* cache, uint32_t offset) {
|
||||
discard(cache, offset, (uint32_t)-1 - offset);
|
||||
|
@ -598,7 +598,8 @@ int ReplicatedBackend::be_deep_scrub(
|
||||
dout(10) << __func__ << " " << poid << " pos " << pos << dendl;
|
||||
int r;
|
||||
uint32_t fadvise_flags = CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
|
||||
CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
|
||||
CEPH_OSD_OP_FLAG_FADVISE_DONTNEED |
|
||||
CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE;
|
||||
|
||||
utime_t sleeptime;
|
||||
sleeptime.set_from_double(cct->_conf->osd_debug_deep_scrub_sleep);
|
||||
|
@ -100,6 +100,12 @@ const char * ceph_osd_op_flag_name(unsigned flag)
|
||||
case CEPH_OSD_OP_FLAG_FADVISE_NOCACHE:
|
||||
name = "fadvise_nocache";
|
||||
break;
|
||||
case CEPH_OSD_OP_FLAG_WITH_REFERENCE:
|
||||
name = "with_reference";
|
||||
break;
|
||||
case CEPH_OSD_OP_FLAG_BYPASS_CLEAN_CACHE:
|
||||
name = "bypass_clean_cache";
|
||||
break;
|
||||
default:
|
||||
name = "???";
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user