Merge pull request #11482 from ifed01/wip-bluestore-gc-fixes2

os/bluestore: GC infra refactor, more UTs and GC range calculation fixes

Reviewed-by: Sage Weil <sage@redhat.com>
This commit is contained in:
Sage Weil 2016-10-14 09:13:28 -05:00 committed by GitHub
commit 166d15430c
3 changed files with 186 additions and 92 deletions

View File

@ -2155,6 +2155,83 @@ BlueStore::BlobRef BlueStore::ExtentMap::split_blob(
return rb;
}
bool BlueStore::ExtentMap::do_write_check_depth(
uint64_t onode_size,
uint64_t start_offset,
uint64_t end_offset,
uint8_t *blob_depth,
uint64_t *gc_start_offset,
uint64_t *gc_end_offset)
{
uint8_t depth = 0;
bool head_overlap = false;
bool tail_overlap = false;
*gc_start_offset = start_offset;
*gc_end_offset = end_offset;
*blob_depth = 1;
auto hp = seek_lextent(start_offset);
if (hp != extent_map.end() &&
hp->logical_offset < start_offset &&
start_offset < hp->logical_offset + hp->length) {
depth = hp->blob_depth;
head_overlap = true;
}
auto tp = seek_lextent(end_offset);
if (tp != extent_map.end() &&
tp->logical_offset < end_offset &&
end_offset < tp->logical_offset + tp->length) {
tail_overlap = true;
if (depth < tp->blob_depth) {
depth = tp->blob_depth;
}
}
if (depth >= g_conf->bluestore_gc_max_blob_depth) {
if (head_overlap) {
auto hp_next = hp;
while (hp != extent_map.begin() && hp->blob_depth > 1) {
hp_next = hp;
--hp;
if (hp->logical_offset + hp->length != hp_next->logical_offset) {
hp = hp_next;
break;
}
}
*gc_start_offset = hp->logical_offset;
}
if (tail_overlap) {
auto tp_prev = tp;
while (tp->blob_depth > 1) {
tp_prev = tp;
tp++;
if (tp == extent_map.end() ||
(tp_prev->logical_offset + tp_prev->length) != tp->logical_offset) {
break;
}
}
*gc_end_offset = tp_prev->logical_offset + tp_prev->length;
}
}
if (*gc_end_offset > onode_size) {
*gc_end_offset = onode_size;
}
bool do_collect = true;
if (depth < g_conf->bluestore_gc_max_blob_depth) {
*blob_depth = 1 + depth;
do_collect = false;;
}
dout(20) << __func__ << " GC depth " << (int)*blob_depth
<< ", gc 0x" << std::hex << *gc_start_offset << "~"
<< (*gc_end_offset - *gc_start_offset)
<< (do_collect ? " collect" : "")
<< std::dec << dendl;
return do_collect;
}
// Onode
@ -2170,8 +2247,6 @@ void BlueStore::Onode::flush()
dout(20) << __func__ << " done" << dendl;
}
// =======================================================
// Collection
@ -2316,8 +2391,6 @@ void BlueStore::Collection::trim_cache()
g_conf->bluestore_buffer_cache_size / store->cache_shards.size());
}
// =======================================================
#undef dout_prefix
@ -7656,83 +7729,6 @@ void BlueStore::_wctx_finish(
}
}
bool BlueStore::_do_write_check_depth(
OnodeRef o,
uint64_t start_offset,
uint64_t end_offset,
uint8_t *blob_depth,
uint64_t *gc_start_offset,
uint64_t *gc_end_offset)
{
uint8_t depth = 0;
bool head_overlap = false;
bool tail_overlap = false;
*gc_start_offset = start_offset;
*gc_end_offset = end_offset;
*blob_depth = 1;
auto hp = o->extent_map.seek_lextent(start_offset);
if (hp != o->extent_map.extent_map.end() &&
hp->logical_offset < start_offset &&
start_offset < hp->logical_offset + hp->length) {
depth = hp->blob_depth;
head_overlap = true;
}
auto tp = o->extent_map.seek_lextent(end_offset);
if (tp != o->extent_map.extent_map.end() &&
tp->logical_offset < end_offset &&
end_offset < tp->logical_offset + tp->length) {
tail_overlap = true;
if (depth < tp->blob_depth) {
depth = tp->blob_depth;
}
}
if (depth >= g_conf->bluestore_gc_max_blob_depth) {
if (head_overlap) {
auto hp_next = hp;
while (hp != o->extent_map.extent_map.begin() && hp->blob_depth > 1) {
hp_next = hp;
--hp;
if (hp->logical_offset + hp->length != hp_next->logical_offset) {
hp = hp_next;
break;
}
}
*gc_start_offset = hp->logical_offset;
}
if (tail_overlap) {
auto tp_prev = tp;
while (tp->blob_depth > 1) {
tp_prev = tp;
tp++;
if (tp == o->extent_map.extent_map.end() ||
(tp_prev->logical_offset + tp_prev->length) != tp->logical_offset) {
tp = tp_prev;
break;
}
}
*gc_end_offset = tp->logical_offset + tp_prev->length;
}
}
if (*gc_end_offset > o->onode.size) {
*gc_end_offset = MAX(end_offset, o->onode.size);
}
dout(20) << __func__ << " depth " << (int)depth
<< ", gc 0x" << std::hex << *gc_start_offset << "~"
<< (*gc_end_offset - *gc_start_offset)
<< std::dec << dendl;
if (depth >= g_conf->bluestore_gc_max_blob_depth) {
return true;
} else {
*blob_depth = 1 + depth;
return false;
}
}
void BlueStore::_do_write_data(
TransContext *txc,
CollectionRef& c,
@ -7860,8 +7856,12 @@ int BlueStore::_do_write(
<< std::dec << dendl;
uint64_t gc_start_offset = offset, gc_end_offset = end;
if (_do_write_check_depth(o, offset, end, &wctx.blob_depth,
&gc_start_offset, &gc_end_offset)) {
bool do_collect =
o->extent_map.do_write_check_depth(o->onode.size,
offset, end, &wctx.blob_depth,
&gc_start_offset,
&gc_end_offset);
if (do_collect) {
// we need garbage collection of blobs.
if (offset > gc_start_offset) {
bufferlist head_bl;

View File

@ -636,7 +636,7 @@ public:
}
int s = seek_shard(offset);
assert(s >= 0);
if (s == shards.size() - 1) {
if (s == (int)shards.size() - 1) {
return false; // last shard
}
if (offset + length <= shards[s+1].offset) {
@ -690,6 +690,14 @@ public:
/// split a blob (and referring extents)
BlobRef split_blob(BlobRef lb, uint32_t blob_offset, uint32_t pos);
bool do_write_check_depth(
uint64_t onode_size,
uint64_t start_offset,
uint64_t end_offset,
uint8_t *blob_depth,
uint64_t *gc_start_offset,
uint64_t *gc_end_offset);
};
struct OnodeSpace;
@ -735,6 +743,7 @@ public:
};
typedef boost::intrusive_ptr<Onode> OnodeRef;
/// a cache (shard) of onodes and buffers
struct Cache {
PerfCounters *logger;
@ -1873,14 +1882,6 @@ private:
void _pad_zeros(bufferlist *bl, uint64_t *offset,
uint64_t chunk_size);
bool _do_write_check_depth(
OnodeRef o,
uint64_t start_offset,
uint64_t end_offset,
uint8_t *blob_depth,
uint64_t *gc_start_offset,
uint64_t *gc_end_offset);
int _do_write(TransContext *txc,
CollectionRef &c,
OnodeRef o,

View File

@ -986,6 +986,99 @@ TEST(ExtentMap, compress_extent_map)
ASSERT_EQ(6u, em.extent_map.size());
}
TEST(ExtentMap, GarbageCollectorTest)
{
BlueStore::LRUCache cache;
BlueStore::ExtentMap em(nullptr);
uint8_t blob_depth = 0;
uint64_t gc_start_offset = 0;
uint64_t gc_end_offset = 0;
bool b;
b = em.do_write_check_depth(0, //onode_size
0, //start_offset
100, //end_offset
&blob_depth,
&gc_start_offset,
&gc_end_offset);
ASSERT_TRUE(!b);
ASSERT_EQ(blob_depth, 1ul);
ASSERT_EQ(gc_start_offset, 0ul);
ASSERT_EQ(gc_end_offset, 0ul);
BlueStore::BlobRef b1(new BlueStore::Blob);
BlueStore::BlobRef b2(new BlueStore::Blob);
BlueStore::BlobRef b3(new BlueStore::Blob);
b1->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache);
b2->shared_blob = new BlueStore::SharedBlob(-1, string(), &cache);
b3->shared_blob = new BlueStore::SharedBlob(1, string(), &cache);
em.extent_map.insert(*new BlueStore::Extent(0, 0, 100, 3, b1));
em.extent_map.insert(*new BlueStore::Extent(100, 0, 50, 3, b2));
em.extent_map.insert(*new BlueStore::Extent(150, 0, 150, 1, b3));
b = em.do_write_check_depth(300, //onode_size
10, //start_offset
80, //end_offset
&blob_depth,
&gc_start_offset,
&gc_end_offset);
ASSERT_TRUE(b);
ASSERT_EQ(blob_depth, 1ul);
ASSERT_EQ(gc_start_offset, 0ul);
ASSERT_EQ(gc_end_offset, 150ul);
b = em.do_write_check_depth(300, //onode_size
70, //start_offset
310, //end_offset
&blob_depth,
&gc_start_offset,
&gc_end_offset);
ASSERT_TRUE(b);
ASSERT_EQ(blob_depth, 1ul);
ASSERT_EQ(gc_start_offset, 0ul);
ASSERT_EQ(gc_end_offset, 300ul);
b = em.do_write_check_depth(300, //onode_size
70, //start_offset
290, //end_offset
&blob_depth,
&gc_start_offset,
&gc_end_offset);
ASSERT_TRUE(b);
ASSERT_EQ(blob_depth, 1ul);
ASSERT_EQ(gc_start_offset, 0ul);
ASSERT_EQ(gc_end_offset, 300ul);
b = em.do_write_check_depth(300, //onode_size
180, //start_offset
290, //end_offset
&blob_depth,
&gc_start_offset,
&gc_end_offset);
ASSERT_TRUE(!b);
ASSERT_EQ(blob_depth, 2ul);
em.extent_map.clear();
em.extent_map.insert(*new BlueStore::Extent(0x17c00, 0xc000, 0x400, 3, b1));
em.extent_map.insert(*new BlueStore::Extent(0x18000, 0, 0xf000, 3, b1));
em.extent_map.insert(*new BlueStore::Extent(0x27000, 0, 0x400, 3, b1));
em.extent_map.insert(*new BlueStore::Extent(0x27400, 0x400, 0x7c00, 2, b2));
em.extent_map.insert(*new BlueStore::Extent(0x2f000, 0, 0xe00, 1, b3));
b = em.do_write_check_depth(0x3f000, //onode_size
0x1ac00, //start_offset
0x1ac00 + 0x6600, //end_offset
&blob_depth,
&gc_start_offset,
&gc_end_offset);
ASSERT_TRUE(b);
ASSERT_EQ(blob_depth, 1ul);
ASSERT_EQ(gc_start_offset, 0x17c00ul);
ASSERT_EQ(gc_end_offset, 0x2f000ul);
}
int main(int argc, char **argv) {
vector<const char*> args;
argv_to_vec(argc, (const char **)argv, args);