Merge PR #27871 into master

* refs/pull/27871/head:
	ceph_test_objectstore: add very_large_write test
	os/bluestore: fix aio pwritev lost data problem.

Reviewed-by: Igor Fedotov <ifedotov@suse.com>
This commit is contained in:
Sage Weil 2019-05-02 08:22:04 -05:00
commit 6ad73b2d0f
3 changed files with 95 additions and 9 deletions

View File

@ -845,9 +845,6 @@ int KernelDevice::aio_write(
#ifdef HAVE_LIBAIO
if (aio && dio && !buffered) {
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
++ioc->num_pending;
aio_t& aio = ioc->pending_aios.back();
if (cct->_conf->bdev_inject_crash &&
rand() % cct->_conf->bdev_inject_crash == 0) {
derr << __func__ << " bdev_inject_crash: dropping io 0x" << std::hex
@ -855,16 +852,48 @@ int KernelDevice::aio_write(
<< dendl;
// generate a real io so that aio_wait behaves properly, but make it
// a read instead of write, and toss the result.
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
++ioc->num_pending;
auto& aio = ioc->pending_aios.back();
aio.pread(off, len);
++injecting_crash;
} else {
bl.prepare_iov(&aio.iov);
dout(30) << aio << dendl;
aio.bl.claim_append(bl);
aio.pwritev(off, len);
if (bl.length() <= RW_IO_MAX) {
// fast path (non-huge write)
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
++ioc->num_pending;
auto& aio = ioc->pending_aios.back();
bl.prepare_iov(&aio.iov);
aio.bl.claim_append(bl);
aio.pwritev(off, len);
dout(30) << aio << dendl;
dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
<< std::dec << " aio " << &aio << dendl;
} else {
// write in RW_IO_MAX-sized chunks
uint64_t prev_len = 0;
while (prev_len < bl.length()) {
bufferlist tmp;
if (prev_len + RW_IO_MAX < bl.length()) {
tmp.substr_of(bl, prev_len, RW_IO_MAX);
} else {
tmp.substr_of(bl, prev_len, bl.length() - prev_len);
}
auto len = tmp.length();
ioc->pending_aios.push_back(aio_t(ioc, choose_fd(false, write_hint)));
++ioc->num_pending;
auto& aio = ioc->pending_aios.back();
tmp.prepare_iov(&aio.iov);
aio.bl.claim_append(tmp);
aio.pwritev(off + prev_len, len);
dout(30) << aio << dendl;
dout(5) << __func__ << " 0x" << std::hex << off + prev_len
<< "~" << len
<< std::dec << " aio " << &aio << " (piece)" << dendl;
prev_len += len;
}
}
}
dout(5) << __func__ << " 0x" << std::hex << off << "~" << len
<< std::dec << " aio " << &aio << dendl;
} else
#endif
{

View File

@ -25,6 +25,11 @@
#include "ceph_aio.h"
#include "BlockDevice.h"
#ifndef RW_IO_MAX
#define RW_IO_MAX 0x7FFFF000
#endif
class KernelDevice : public BlockDevice {
std::vector<int> fd_directs, fd_buffereds;
bool enable_wrt = true;

View File

@ -135,6 +135,58 @@ TEST(BlueFS, small_appends) {
rm_temp_bdev(fn);
}
TEST(BlueFS, very_large_write) {
// we'll write a ~3G file, so allocate more than that for the whole fs
uint64_t size = 1048576 * 1024 * 8ull;
string fn = get_temp_bdev(size);
BlueFS fs(g_ceph_context);
bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
uuid_d fsid;
ASSERT_EQ(0, fs.mkfs(fsid));
ASSERT_EQ(0, fs.mount());
char buf[1048571]; // this is biggish, but intentionally not evenly aligned
for (unsigned i = 0; i < sizeof(buf); ++i) {
buf[i] = i;
}
{
BlueFS::FileWriter *h;
ASSERT_EQ(0, fs.mkdir("dir"));
ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
h->append(buf, sizeof(buf));
}
fs.fsync(h);
fs.close_writer(h);
}
{
BlueFS::FileReader *h;
ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
bufferlist bl;
BlueFS::FileReaderBuffer readbuf(10485760);
for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
bl.clear();
fs.read(h, &readbuf, i * sizeof(buf), sizeof(buf), &bl, NULL);
int r = memcmp(buf, bl.c_str(), sizeof(buf));
if (r) {
cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
<< std::endl;
}
ASSERT_EQ(0, r);
}
delete h;
}
fs.umount();
g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
rm_temp_bdev(fn);
}
#define ALLOC_SIZE 4096
void write_data(BlueFS &fs, uint64_t rationed_bytes)