mirror of
https://github.com/ceph/ceph
synced 2025-03-25 11:48:05 +00:00
os/bluestore: add main device expand capability.
One can do that via ceph-bluestore-tool's bluefs-bdev-expand command Signed-off-by: Igor Fedotov <ifedotov@suse.com>
This commit is contained in:
parent
c2612cf9db
commit
d07c10dfc0
@ -12,7 +12,7 @@ function run() {
|
||||
export CEPH_ARGS
|
||||
CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
|
||||
CEPH_ARGS+="--mon-host=$CEPH_MON "
|
||||
CEPH_ARGS+="--bluestore_block_size=4294967296 "
|
||||
CEPH_ARGS+="--bluestore_block_size=2147483648 "
|
||||
CEPH_ARGS+="--bluestore_block_db_create=true "
|
||||
CEPH_ARGS+="--bluestore_block_db_size=1073741824 "
|
||||
CEPH_ARGS+="--bluestore_block_wal_size=536870912 "
|
||||
@ -66,8 +66,26 @@ function TEST_bluestore() {
|
||||
while kill $osd_pid3; do sleep 1 ; done
|
||||
ceph osd down 3
|
||||
|
||||
# expand slow devices
|
||||
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
||||
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
||||
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
||||
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
||||
|
||||
truncate $dir/0/block -s 4294967296 # 4GB
|
||||
ceph-bluestore-tool --path $dir/0 bluefs-bdev-expand || return 1
|
||||
truncate $dir/1/block -s 4311744512 # 4GB + 16MB
|
||||
ceph-bluestore-tool --path $dir/1 bluefs-bdev-expand || return 1
|
||||
truncate $dir/2/block -s 4295099392 # 4GB + 129KB
|
||||
ceph-bluestore-tool --path $dir/2 bluefs-bdev-expand || return 1
|
||||
truncate $dir/3/block -s 4293918720 # 4GB - 1MB
|
||||
ceph-bluestore-tool --path $dir/3 bluefs-bdev-expand || return 1
|
||||
|
||||
# slow, DB, WAL -> slow, DB
|
||||
ceph-bluestore-tool --path $dir/0 fsck || return 1
|
||||
ceph-bluestore-tool --path $dir/1 fsck || return 1
|
||||
ceph-bluestore-tool --path $dir/2 fsck || return 1
|
||||
ceph-bluestore-tool --path $dir/3 fsck || return 1
|
||||
|
||||
ceph-bluestore-tool --path $dir/0 bluefs-bdev-sizes
|
||||
|
||||
|
@ -106,6 +106,52 @@ int BitmapFreelistManager::create(uint64_t new_size, uint64_t granularity,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BitmapFreelistManager::expand(uint64_t new_size, KeyValueDB::Transaction txn)
|
||||
{
|
||||
assert(new_size > size);
|
||||
ceph_assert(isp2(bytes_per_block));
|
||||
|
||||
uint64_t blocks0 = size / bytes_per_block;
|
||||
if (blocks0 / blocks_per_key * blocks_per_key != blocks0) {
|
||||
blocks0 = (blocks / blocks_per_key + 1) * blocks_per_key;
|
||||
dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size
|
||||
<< " to 0x" << (blocks0 * bytes_per_block)
|
||||
<< " (0x" << blocks0 << " blocks)" << std::dec << dendl;
|
||||
// reset past-eof blocks to unallocated
|
||||
_xor(size, blocks0 * bytes_per_block - size, txn);
|
||||
}
|
||||
|
||||
size = p2align(new_size, bytes_per_block);
|
||||
blocks = size / bytes_per_block;
|
||||
|
||||
if (blocks / blocks_per_key * blocks_per_key != blocks) {
|
||||
blocks = (blocks / blocks_per_key + 1) * blocks_per_key;
|
||||
dout(10) << __func__ << " rounding blocks up from 0x" << std::hex << size
|
||||
<< " to 0x" << (blocks * bytes_per_block)
|
||||
<< " (0x" << blocks << " blocks)" << std::dec << dendl;
|
||||
// set past-eof blocks as allocated
|
||||
_xor(size, blocks * bytes_per_block - size, txn);
|
||||
}
|
||||
|
||||
dout(10) << __func__
|
||||
<< " size 0x" << std::hex << size
|
||||
<< " bytes_per_block 0x" << bytes_per_block
|
||||
<< " blocks 0x" << blocks
|
||||
<< " blocks_per_key 0x" << blocks_per_key
|
||||
<< std::dec << dendl;
|
||||
{
|
||||
bufferlist bl;
|
||||
encode(blocks, bl);
|
||||
txn->set(meta_prefix, "blocks", bl);
|
||||
}
|
||||
{
|
||||
bufferlist bl;
|
||||
encode(size, bl);
|
||||
txn->set(meta_prefix, "size", bl);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BitmapFreelistManager::init()
|
||||
{
|
||||
dout(1) << __func__ << dendl;
|
||||
|
@ -55,6 +55,10 @@ public:
|
||||
int create(uint64_t size, uint64_t granularity,
|
||||
KeyValueDB::Transaction txn) override;
|
||||
|
||||
int expand(uint64_t new_size,
|
||||
KeyValueDB::Transaction txn) override;
|
||||
|
||||
|
||||
int init() override;
|
||||
void shutdown() override;
|
||||
|
||||
@ -70,6 +74,9 @@ public:
|
||||
uint64_t offset, uint64_t length,
|
||||
KeyValueDB::Transaction txn) override;
|
||||
|
||||
inline uint64_t get_size() const override {
|
||||
return size;
|
||||
}
|
||||
inline uint64_t get_alloc_units() const override {
|
||||
return size / bytes_per_block;
|
||||
}
|
||||
|
@ -6098,6 +6098,113 @@ shutdown:
|
||||
return r;
|
||||
}
|
||||
|
||||
string BlueStore::get_device_path(unsigned id)
|
||||
{
|
||||
string res;
|
||||
if (id < BlueFS::MAX_BDEV) {
|
||||
switch (id) {
|
||||
case BlueFS::BDEV_WAL:
|
||||
res = path + "/block.wal";
|
||||
break;
|
||||
case BlueFS::BDEV_DB:
|
||||
if (id == bluefs_shared_bdev) {
|
||||
res = path + "/block";
|
||||
} else {
|
||||
res = path + "/block.db";
|
||||
}
|
||||
break;
|
||||
case BlueFS::BDEV_SLOW:
|
||||
res = path + "/block";
|
||||
break;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int BlueStore::expand_devices(ostream& out)
|
||||
{
|
||||
int r = _mount(false);
|
||||
ceph_assert(r == 0);
|
||||
bluefs->dump_block_extents(out);
|
||||
out << "Expanding..." << std::endl;
|
||||
for (auto devid : { BlueFS::BDEV_WAL, BlueFS::BDEV_DB}) {
|
||||
if (devid == bluefs_shared_bdev ) {
|
||||
continue;
|
||||
}
|
||||
interval_set<uint64_t> before;
|
||||
bluefs->get_block_extents(devid, &before);
|
||||
ceph_assert(!before.empty());
|
||||
uint64_t end = before.range_end();
|
||||
uint64_t size = bluefs->get_block_device_size(devid);
|
||||
if (end < size) {
|
||||
out << devid
|
||||
<<" : expanding " << " from 0x" << std::hex
|
||||
<< end << " to 0x" << size << std::dec << std::endl;
|
||||
bluefs->add_block_extent(devid, end, size-end);
|
||||
string p = get_device_path(devid);
|
||||
const char* path = p.c_str();
|
||||
if (path == nullptr) {
|
||||
derr << devid
|
||||
<<": can't find device path " << dendl;
|
||||
continue;
|
||||
}
|
||||
bluestore_bdev_label_t label;
|
||||
int r = _read_bdev_label(cct, path, &label);
|
||||
if (r < 0) {
|
||||
derr << "unable to read label for " << path << ": "
|
||||
<< cpp_strerror(r) << dendl;
|
||||
continue;
|
||||
}
|
||||
label.size = size;
|
||||
r = _write_bdev_label(cct, path, label);
|
||||
if (r < 0) {
|
||||
derr << "unable to write label for " << path << ": "
|
||||
<< cpp_strerror(r) << dendl;
|
||||
continue;
|
||||
}
|
||||
out << devid
|
||||
<<" : size label updated to " << size
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
uint64_t size0 = fm->get_size();
|
||||
uint64_t size = bdev->get_size();
|
||||
if (size0 < size) {
|
||||
out << bluefs_shared_bdev
|
||||
<<" : expanding " << " from 0x" << std::hex
|
||||
<< size0 << " to 0x" << size << std::dec << std::endl;
|
||||
KeyValueDB::Transaction txn;
|
||||
txn = db->get_transaction();
|
||||
int r = fm->expand(size, txn);
|
||||
ceph_assert(r == 0);
|
||||
db->submit_transaction_sync(txn);
|
||||
|
||||
// always reference to slow device here
|
||||
string p = get_device_path(BlueFS::BDEV_SLOW);
|
||||
ceph_assert(!p.empty());
|
||||
const char* path = p.c_str();
|
||||
bluestore_bdev_label_t label;
|
||||
r = _read_bdev_label(cct, path, &label);
|
||||
if (r < 0) {
|
||||
derr << "unable to read label for " << path << ": "
|
||||
<< cpp_strerror(r) << dendl;
|
||||
} else {
|
||||
label.size = size;
|
||||
r = _write_bdev_label(cct, path, label);
|
||||
if (r < 0) {
|
||||
derr << "unable to write label for " << path << ": "
|
||||
<< cpp_strerror(r) << dendl;
|
||||
} else {
|
||||
out << bluefs_shared_bdev
|
||||
<<" : size label updated to " << size
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
umount();
|
||||
return r;
|
||||
}
|
||||
|
||||
void BlueStore::set_cache_shards(unsigned num)
|
||||
{
|
||||
dout(10) << __func__ << " " << num << dendl;
|
||||
|
@ -2398,6 +2398,8 @@ public:
|
||||
int migrate_to_new_bluefs_device(const set<int>& devs_source,
|
||||
int id,
|
||||
const string& path);
|
||||
int expand_devices(ostream& out);
|
||||
string get_device_path(unsigned id);
|
||||
|
||||
public:
|
||||
int statfs(struct store_statfs_t *buf) override;
|
||||
|
@ -27,6 +27,9 @@ public:
|
||||
virtual int create(uint64_t size, uint64_t granularity,
|
||||
KeyValueDB::Transaction txn) = 0;
|
||||
|
||||
virtual int expand(uint64_t new_size,
|
||||
KeyValueDB::Transaction txn) = 0;
|
||||
|
||||
virtual int init() = 0;
|
||||
virtual void shutdown() = 0;
|
||||
|
||||
@ -42,6 +45,7 @@ public:
|
||||
uint64_t offset, uint64_t length,
|
||||
KeyValueDB::Transaction txn) = 0;
|
||||
|
||||
virtual uint64_t get_size() const = 0;
|
||||
virtual uint64_t get_alloc_units() const = 0;
|
||||
virtual uint64_t get_alloc_size() const = 0;
|
||||
|
||||
|
@ -527,43 +527,13 @@ int main(int argc, char **argv)
|
||||
delete fs;
|
||||
}
|
||||
else if (action == "bluefs-bdev-expand") {
|
||||
BlueFS *fs = open_bluefs(cct.get(), path, devs);
|
||||
cout << "start:" << std::endl;
|
||||
fs->dump_block_extents(cout);
|
||||
for (int devid : { BlueFS::BDEV_WAL, BlueFS::BDEV_DB }) {
|
||||
interval_set<uint64_t> before;
|
||||
fs->get_block_extents(devid, &before);
|
||||
if (before.empty()) continue;
|
||||
uint64_t end = before.range_end();
|
||||
uint64_t size = fs->get_block_device_size(devid);
|
||||
if (end < size) {
|
||||
cout << "expanding dev " << devid << " from 0x" << std::hex
|
||||
<< end << " to 0x" << size << std::dec << std::endl;
|
||||
fs->add_block_extent(devid, end, size-end);
|
||||
const char* path = find_device_path(devid, cct.get(), devs);
|
||||
if (path == nullptr) {
|
||||
cerr << "Can't find device path for dev " << devid << std::endl;
|
||||
continue;
|
||||
}
|
||||
bluestore_bdev_label_t label;
|
||||
int r = BlueStore::_read_bdev_label(cct.get(), path, &label);
|
||||
if (r < 0) {
|
||||
cerr << "unable to read label for " << path << ": "
|
||||
<< cpp_strerror(r) << std::endl;
|
||||
continue;
|
||||
}
|
||||
label.size = size;
|
||||
r = BlueStore::_write_bdev_label(cct.get(), path, label);
|
||||
if (r < 0) {
|
||||
cerr << "unable to write label for " << path << ": "
|
||||
<< cpp_strerror(r) << std::endl;
|
||||
continue;
|
||||
}
|
||||
cout << "dev " << devid << " size label updated to "
|
||||
<< size << std::endl;
|
||||
}
|
||||
BlueStore bluestore(cct.get(), path);
|
||||
auto r = bluestore.expand_devices(cout);
|
||||
if (r <0) {
|
||||
cerr << "failed to expand bluestore devices: "
|
||||
<< cpp_strerror(r) << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
delete fs;
|
||||
}
|
||||
else if (action == "bluefs-export") {
|
||||
BlueFS *fs = open_bluefs(cct.get(), path, devs);
|
||||
|
Loading…
Reference in New Issue
Block a user