mirror of
https://github.com/ceph/ceph
synced 2025-04-01 00:26:47 +00:00
Merge PR #27871 into master
* refs/pull/27871/head: ceph_test_objectstore: add very_large_write test os/bluestore: fix aio pwritev lost data problem. Reviewed-by: Igor Fedotov <ifedotov@suse.com>
This commit is contained in:
commit
6ad73b2d0f
@ -845,9 +845,6 @@ int KernelDevice::aio_write(
|
||||
|
||||
#ifdef HAVE_LIBAIO
|
||||
if (aio && dio && !buffered) {
|
||||
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
|
||||
++ioc->num_pending;
|
||||
aio_t& aio = ioc->pending_aios.back();
|
||||
if (cct->_conf->bdev_inject_crash &&
|
||||
rand() % cct->_conf->bdev_inject_crash == 0) {
|
||||
derr << __func__ << " bdev_inject_crash: dropping io 0x" << std::hex
|
||||
@ -855,16 +852,48 @@ int KernelDevice::aio_write(
|
||||
<< dendl;
|
||||
// generate a real io so that aio_wait behaves properly, but make it
|
||||
// a read instead of write, and toss the result.
|
||||
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
|
||||
++ioc->num_pending;
|
||||
auto& aio = ioc->pending_aios.back();
|
||||
aio.pread(off, len);
|
||||
++injecting_crash;
|
||||
} else {
|
||||
bl.prepare_iov(&aio.iov);
|
||||
dout(30) << aio << dendl;
|
||||
aio.bl.claim_append(bl);
|
||||
aio.pwritev(off, len);
|
||||
if (bl.length() <= RW_IO_MAX) {
|
||||
// fast path (non-huge write)
|
||||
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
|
||||
++ioc->num_pending;
|
||||
auto& aio = ioc->pending_aios.back();
|
||||
bl.prepare_iov(&aio.iov);
|
||||
aio.bl.claim_append(bl);
|
||||
aio.pwritev(off, len);
|
||||
dout(30) << aio << dendl;
|
||||
dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
|
||||
<< std::dec << " aio " << &aio << dendl;
|
||||
} else {
|
||||
// write in RW_IO_MAX-sized chunks
|
||||
uint64_t prev_len = 0;
|
||||
while (prev_len < bl.length()) {
|
||||
bufferlist tmp;
|
||||
if (prev_len + RW_IO_MAX < bl.length()) {
|
||||
tmp.substr_of(bl, prev_len, RW_IO_MAX);
|
||||
} else {
|
||||
tmp.substr_of(bl, prev_len, bl.length() - prev_len);
|
||||
}
|
||||
auto len = tmp.length();
|
||||
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
|
||||
++ioc->num_pending;
|
||||
auto& aio = ioc->pending_aios.back();
|
||||
tmp.prepare_iov(&aio.iov);
|
||||
aio.bl.claim_append(tmp);
|
||||
aio.pwritev(off + prev_len, len);
|
||||
dout(30) << aio << dendl;
|
||||
dout(5) << __func__ << " 0x" << std::hex << off + prev_len
|
||||
<< "~" << len
|
||||
<< std::dec << " aio " << &aio << " (piece)" << dendl;
|
||||
prev_len += len;
|
||||
}
|
||||
}
|
||||
}
|
||||
dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
|
||||
<< std::dec << " aio " << &aio << dendl;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
@ -25,6 +25,11 @@
|
||||
#include "ceph_aio.h"
|
||||
#include "BlockDevice.h"
|
||||
|
||||
#ifndef RW_IO_MAX
|
||||
#define RW_IO_MAX 0x7FFFF000
|
||||
#endif
|
||||
|
||||
|
||||
class KernelDevice : public BlockDevice {
|
||||
std::vector<int> fd_directs, fd_buffereds;
|
||||
bool enable_wrt = true;
|
||||
|
@ -135,6 +135,58 @@ TEST(BlueFS, small_appends) {
|
||||
rm_temp_bdev(fn);
|
||||
}
|
||||
|
||||
TEST(BlueFS, very_large_write) {
|
||||
// we'll write a ~3G file, so allocate more than that for the whole fs
|
||||
uint64_t size = 1048576 * 1024 * 8ull;
|
||||
string fn = get_temp_bdev(size);
|
||||
BlueFS fs(g_ceph_context);
|
||||
|
||||
bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
|
||||
g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
|
||||
|
||||
ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
|
||||
fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
|
||||
uuid_d fsid;
|
||||
ASSERT_EQ(0, fs.mkfs(fsid));
|
||||
ASSERT_EQ(0, fs.mount());
|
||||
char buf[1048571]; // this is biggish, but intentionally not evenly aligned
|
||||
for (unsigned i = 0; i < sizeof(buf); ++i) {
|
||||
buf[i] = i;
|
||||
}
|
||||
{
|
||||
BlueFS::FileWriter *h;
|
||||
ASSERT_EQ(0, fs.mkdir("dir"));
|
||||
ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
|
||||
for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
|
||||
h->append(buf, sizeof(buf));
|
||||
}
|
||||
fs.fsync(h);
|
||||
fs.close_writer(h);
|
||||
}
|
||||
{
|
||||
BlueFS::FileReader *h;
|
||||
ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
|
||||
bufferlist bl;
|
||||
BlueFS::FileReaderBuffer readbuf(10485760);
|
||||
for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
|
||||
bl.clear();
|
||||
fs.read(h, &readbuf, i * sizeof(buf), sizeof(buf), &bl, NULL);
|
||||
int r = memcmp(buf, bl.c_str(), sizeof(buf));
|
||||
if (r) {
|
||||
cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
|
||||
<< std::endl;
|
||||
}
|
||||
ASSERT_EQ(0, r);
|
||||
}
|
||||
delete h;
|
||||
}
|
||||
fs.umount();
|
||||
|
||||
g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
|
||||
|
||||
rm_temp_bdev(fn);
|
||||
}
|
||||
|
||||
#define ALLOC_SIZE 4096
|
||||
|
||||
void write_data(BlueFS &fs, uint64_t rationed_bytes)
|
||||
|
Loading…
Reference in New Issue
Block a user