os/bluestore: add main device expand capability.

One can do that via ceph-bluestore-tool's bluefs-bdev-expand command

Signed-off-by: Igor Fedotov <ifedotov@suse.com>
This commit is contained in:
Igor Fedotov 2018-11-23 14:39:20 +03:00
parent c2612cf9db
commit d07c10dfc0
7 changed files with 191 additions and 37 deletions

View File

@ -12,7 +12,7 @@ function run() {
export CEPH_ARGS
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
CEPH_ARGS+="--mon-host=$CEPH_MON "
CEPH_ARGS+="--bluestore_block_size=4294967296 "
CEPH_ARGS+="--bluestore_block_size=2147483648 "
CEPH_ARGS+="--bluestore_block_db_create=true "
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
@ -66,8 +66,26 @@ function TEST_bluestore() {
while kill $osd_pid3; do sleep 1 ; done
ceph osd down 3
# expand slow devices
ceph-bluestore-tool --path $dir/0 fsck || return 1
ceph-bluestore-tool --path $dir/1 fsck || return 1
ceph-bluestore-tool --path $dir/2 fsck || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
truncate $dir/0/block -s 4294967296 # 4GB
ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
truncate $dir/1/block -s 4311744512 # 4GB + 16MB
ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
truncate $dir/2/block -s 4295099392 # 4GB + 129KB
ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
truncate $dir/3/block -s 4293918720 # 4GB - 1MB
ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1
# slow, DB, WAL -> slow, DB
ceph-bluestore-tool --path $dir/0 fsck || return 1
ceph-bluestore-tool --path $dir/1 fsck || return 1
ceph-bluestore-tool --path $dir/2 fsck || return 1
ceph-bluestore-tool --path $dir/3 fsck || return 1
ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes

View File

@ -106,6 +106,52 @@ int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity,
return 0;
}
int BitmapFreelistManager::expand(uint64_t new_size, KeyValueDB::Transaction txn)
{
assert(new_size > size);
ceph_assert(isp2(bytes_per_block));
uint64_t blocks0 = size / bytes_per_block;
if (blocks0 / blocks_per_key * blocks_per_key != blocks0) {
blocks0 = (blocks / blocks_per_key + 1) * blocks_per_key;
dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size
<< " to 0x" << (blocks0 * bytes_per_block)
<< " (0x" << blocks0 << " blocks)" << std::dec << dendl;
// reset past-eof blocks to unallocated
_xor(size, blocks0 * bytes_per_block - size, txn);
}
size = p2align(new_size, bytes_per_block);
blocks = size / bytes_per_block;
if (blocks / blocks_per_key * blocks_per_key != blocks) {
blocks = (blocks / blocks_per_key + 1) * blocks_per_key;
dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size
<< " to 0x" << (blocks * bytes_per_block)
<< " (0x" << blocks << " blocks)" << std::dec << dendl;
// set past-eof blocks as allocated
_xor(size, blocks * bytes_per_block - size, txn);
}
dout(10) << __func__
<< " size 0x" << std::hex << size
<< " bytes_per_block 0x" << bytes_per_block
<< " blocks 0x" << blocks
<< " blocks_per_key 0x" << blocks_per_key
<< std::dec << dendl;
{
bufferlist bl;
encode(blocks, bl);
txn->set(meta_prefix, "blocks", bl);
}
{
bufferlist bl;
encode(size, bl);
txn->set(meta_prefix, "size", bl);
}
return 0;
}
int BitmapFreelistManager::init()
{
dout(1) << __func__ << dendl;

View File

@ -55,6 +55,10 @@ public:
int create(uint64_t size, uint64_t granularity,
KeyValueDB::Transaction txn) override;
int expand(uint64_t new_size,
KeyValueDB::Transaction txn) override;
int init() override;
void shutdown() override;
@ -70,6 +74,9 @@ public:
uint64_t offset, uint64_t length,
KeyValueDB::Transaction txn) override;
inline uint64_t get_size() const override {
return size;
}
inline uint64_t get_alloc_units() const override {
return size / bytes_per_block;
}

View File

@ -6098,6 +6098,113 @@ shutdown:
return r;
}
string BlueStore::get_device_path(unsigned id)
{
string res;
if (id < BlueFS::MAX_BDEV) {
switch (id) {
case BlueFS::BDEV_WAL:
res = path + "/block.wal";
break;
case BlueFS::BDEV_DB:
if (id == bluefs_shared_bdev) {
res = path + "/block";
} else {
res = path + "/block.db";
}
break;
case BlueFS::BDEV_SLOW:
res = path + "/block";
break;
}
}
return res;
}
int BlueStore::expand_devices(ostream& out)
{
int r = _mount(false);
ceph_assert(r == 0);
bluefs->dump_block_extents(out);
out << "Expanding..." << std::endl;
for (auto devid : { BlueFS::BDEV_WAL, BlueFS::BDEV_DB}) {
if (devid == bluefs_shared_bdev ) {
continue;
}
interval_set<uint64_t> before;
bluefs->get_block_extents(devid, &before);
ceph_assert(!before.empty());
uint64_t end = before.range_end();
uint64_t size = bluefs->get_block_device_size(devid);
if (end < size) {
out << devid
<<" : expanding " << " from 0x" << std::hex
<< end << " to 0x" << size << std::dec << std::endl;
bluefs->add_block_extent(devid, end, size-end);
string p = get_device_path(devid);
const char* path = p.c_str();
if (path == nullptr) {
derr << devid
<<": can't find device path " << dendl;
continue;
}
bluestore_bdev_label_t label;
int r = _read_bdev_label(cct, path, &label);
if (r < 0) {
derr << "unable to read label for " << path << ": "
<< cpp_strerror(r) << dendl;
continue;
}
label.size = size;
r = _write_bdev_label(cct, path, label);
if (r < 0) {
derr << "unable to write label for " << path << ": "
<< cpp_strerror(r) << dendl;
continue;
}
out << devid
<<" : size label updated to " << size
<< std::endl;
}
}
uint64_t size0 = fm->get_size();
uint64_t size = bdev->get_size();
if (size0 < size) {
out << bluefs_shared_bdev
<<" : expanding " << " from 0x" << std::hex
<< size0 << " to 0x" << size << std::dec << std::endl;
KeyValueDB::Transaction txn;
txn = db->get_transaction();
int r = fm->expand(size, txn);
ceph_assert(r == 0);
db->submit_transaction_sync(txn);
// always reference to slow device here
string p = get_device_path(BlueFS::BDEV_SLOW);
ceph_assert(!p.empty());
const char* path = p.c_str();
bluestore_bdev_label_t label;
r = _read_bdev_label(cct, path, &label);
if (r < 0) {
derr << "unable to read label for " << path << ": "
<< cpp_strerror(r) << dendl;
} else {
label.size = size;
r = _write_bdev_label(cct, path, label);
if (r < 0) {
derr << "unable to write label for " << path << ": "
<< cpp_strerror(r) << dendl;
} else {
out << bluefs_shared_bdev
<<" : size label updated to " << size
<< std::endl;
}
}
}
umount();
return r;
}
void BlueStore::set_cache_shards(unsigned num)
{
dout(10) << __func__ << " " << num << dendl;

View File

@ -2398,6 +2398,8 @@ public:
int migrate_to_new_bluefs_device(const set<int>& devs_source,
int id,
const string& path);
int expand_devices(ostream& out);
string get_device_path(unsigned id);
public:
int statfs(struct store_statfs_t *buf) override;

View File

@ -27,6 +27,9 @@ public:
virtual int create(uint64_t size, uint64_t granularity,
KeyValueDB::Transaction txn) = 0;
virtual int expand(uint64_t new_size,
KeyValueDB::Transaction txn) = 0;
virtual int init() = 0;
virtual void shutdown() = 0;
@ -42,6 +45,7 @@ public:
uint64_t offset, uint64_t length,
KeyValueDB::Transaction txn) = 0;
virtual uint64_t get_size() const = 0;
virtual uint64_t get_alloc_units() const = 0;
virtual uint64_t get_alloc_size() const = 0;

View File

@ -527,43 +527,13 @@ int main(int argc, char **argv)
delete fs;
}
else if (action == "bluefs-bdev-expand") {
BlueFS *fs = open_bluefs(cct.get(), path, devs);
cout << "start:" << std::endl;
fs->dump_block_extents(cout);
for (int devid : { BlueFS::BDEV_WAL, BlueFS::BDEV_DB }) {
interval_set<uint64_t> before;
fs->get_block_extents(devid, &before);
if (before.empty()) continue;
uint64_t end = before.range_end();
uint64_t size = fs->get_block_device_size(devid);
if (end < size) {
cout << "expanding dev " << devid << " from 0x" << std::hex
<< end << " to 0x" << size << std::dec << std::endl;
fs->add_block_extent(devid, end, size-end);
const char* path = find_device_path(devid, cct.get(), devs);
if (path == nullptr) {
cerr << "Can't find device path for dev " << devid << std::endl;
continue;
}
bluestore_bdev_label_t label;
int r = BlueStore::_read_bdev_label(cct.get(), path, &label);
if (r < 0) {
cerr << "unable to read label for " << path << ": "
<< cpp_strerror(r) << std::endl;
continue;
}
label.size = size;
r = BlueStore::_write_bdev_label(cct.get(), path, label);
if (r < 0) {
cerr << "unable to write label for " << path << ": "
<< cpp_strerror(r) << std::endl;
continue;
}
cout << "dev " << devid << " size label updated to "
<< size << std::endl;
}
BlueStore bluestore(cct.get(), path);
auto r = bluestore.expand_devices(cout);
if (r <0) {
cerr << "failed to expand bluestore devices: "
<< cpp_strerror(r) << std::endl;
exit(EXIT_FAILURE);
}
delete fs;
}
else if (action == "bluefs-export") {
BlueFS *fs = open_bluefs(cct.get(), path, devs);