mirror of
https://github.com/ceph/ceph
synced 2025-01-02 00:52:22 +00:00
NVMEDevice: use nvme zero command instead of writing zero
Signed-off-by: Haomai Wang <haomai@xsky.com>
This commit is contained in:
parent
398e331ea9
commit
525927044b
@ -63,9 +63,11 @@ rte_mempool *task_pool = nullptr;
|
||||
enum {
|
||||
l_bluestore_nvmedevice_first = 632430,
|
||||
l_bluestore_nvmedevice_aio_write_lat,
|
||||
l_bluestore_nvmedevice_aio_zero_lat,
|
||||
l_bluestore_nvmedevice_read_lat,
|
||||
l_bluestore_nvmedevice_flush_lat,
|
||||
l_bluestore_nvmedevice_aio_write_queue_lat,
|
||||
l_bluestore_nvmedevice_aio_zero_queue_lat,
|
||||
l_bluestore_nvmedevice_read_queue_lat,
|
||||
l_bluestore_nvmedevice_flush_queue_lat,
|
||||
l_bluestore_nvmedevice_queue_ops,
|
||||
@ -147,11 +149,13 @@ class SharedDriverData {
|
||||
PerfCountersBuilder b(g_ceph_context, string("NVMEDevice-AIOThread-"+stringify(this)),
|
||||
l_bluestore_nvmedevice_first, l_bluestore_nvmedevice_last);
|
||||
b.add_time_avg(l_bluestore_nvmedevice_aio_write_lat, "aio_write_lat", "Average write completing latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_aio_zero_lat, "aio_zero_lat", "Average zero completing latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_read_lat, "read_lat", "Average read completing latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_flush_lat, "flush_lat", "Average flush completing latency");
|
||||
b.add_u64(l_bluestore_nvmedevice_queue_ops, "queue_ops", "Operations in nvme queue");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_polling_lat, "polling_lat", "Average polling latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_aio_write_queue_lat, "aio_write_queue_lat", "Average queue write request latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_aio_zero_queue_lat, "aio_zero_queue_lat", "Average queue zero request latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_read_queue_lat, "read_queue_lat", "Average queue read request latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_flush_queue_lat, "flush_queue_lat", "Average queue flush request latency");
|
||||
logger = b.create_perf_counters();
|
||||
@ -279,6 +283,24 @@ void SharedDriverData::_aio_thread()
|
||||
logger->tinc(l_bluestore_nvmedevice_aio_write_queue_lat, lat);
|
||||
break;
|
||||
}
|
||||
case IOCommand::ZERO_COMMAND:
|
||||
{
|
||||
lba_off = t->offset / block_size;
|
||||
lba_count = t->len / block_size;
|
||||
dout(20) << __func__ << " zero command issued " << lba_off << "~" << lba_count << dendl;
|
||||
r = nvme_ns_cmd_write_zeroes(ns, lba_off, lba_count, io_complete, t, 0);
|
||||
if (r < 0) {
|
||||
t->ctx->nvme_task_first = t->ctx->nvme_task_last = nullptr;
|
||||
rte_free(t->buf);
|
||||
rte_mempool_put(task_pool, t);
|
||||
derr << __func__ << " failed to do zero command" << dendl;
|
||||
assert(0);
|
||||
}
|
||||
lat = ceph_clock_now(g_ceph_context);
|
||||
lat -= t->start;
|
||||
logger->tinc(l_bluestore_nvmedevice_aio_zero_queue_lat, lat);
|
||||
break;
|
||||
}
|
||||
case IOCommand::READ_COMMAND:
|
||||
{
|
||||
dout(20) << __func__ << " read command issueed " << lba_off << "~" << lba_count << dendl;
|
||||
@ -523,11 +545,15 @@ void io_complete(void *t, const struct nvme_completion *completion)
|
||||
int left = driver->inflight_ops.dec();
|
||||
utime_t lat = ceph_clock_now(g_ceph_context);
|
||||
lat -= task->start;
|
||||
if (task->command == IOCommand::WRITE_COMMAND) {
|
||||
driver->logger->tinc(l_bluestore_nvmedevice_aio_write_lat, lat);
|
||||
if (task->command == IOCommand::WRITE_COMMAND ||
|
||||
task->command == IOCommand::ZERO_COMMAND) {
|
||||
if (task->command == IOCommand::WRITE_COMMAND)
|
||||
driver->logger->tinc(l_bluestore_nvmedevice_aio_write_lat, lat);
|
||||
else
|
||||
driver->logger->tinc(l_bluestore_nvmedevice_aio_zero_lat, lat);
|
||||
assert(!nvme_completion_is_error(completion));
|
||||
dout(20) << __func__ << " write op successfully, left " << left << dendl;
|
||||
// buffer write won't have ctx, and we will free request later, see `flush`
|
||||
dout(20) << __func__ << " write/zero op successfully, left " << left << dendl;
|
||||
// buffer write/zero won't have ctx, and we will free request later, see `flush`
|
||||
if (ctx) {
|
||||
// check waiting count before doing callback (which may
|
||||
// destroy this ioc).
|
||||
@ -581,8 +607,6 @@ NVMEDevice::NVMEDevice(aio_callback_t cb, void *cbpriv)
|
||||
aio_callback(cb),
|
||||
aio_callback_priv(cbpriv)
|
||||
{
|
||||
zeros = buffer::create_page_aligned(1048576);
|
||||
zeros.zero();
|
||||
}
|
||||
|
||||
|
||||
@ -779,16 +803,32 @@ int NVMEDevice::aio_zero(
|
||||
assert(off < size);
|
||||
assert(off + len <= size);
|
||||
|
||||
bufferlist bl;
|
||||
while (len > 0) {
|
||||
bufferlist t;
|
||||
t.append(zeros, 0, MIN(zeros.length(), len));
|
||||
len -= t.length();
|
||||
bl.claim_append(t);
|
||||
Task *t;
|
||||
int r = rte_mempool_get(task_pool, (void **)&t);
|
||||
if (r < 0) {
|
||||
derr << __func__ << " failed to get task from mempool: " << r << dendl;
|
||||
return r;
|
||||
}
|
||||
// note: this works with aio only becaues the actual buffer is
|
||||
// this->zeros, which is page-aligned and never freed.
|
||||
return aio_write(off, bl, ioc, false);
|
||||
t->start = ceph_clock_now(g_ceph_context);
|
||||
|
||||
t->command = IOCommand::ZERO_COMMAND;
|
||||
t->offset = off;
|
||||
t->len = len;
|
||||
t->device = this;
|
||||
t->return_code = 0;
|
||||
t->next = nullptr;
|
||||
|
||||
t->ctx = ioc;
|
||||
Task *first = static_cast<Task*>(ioc->nvme_task_first);
|
||||
Task *last = static_cast<Task*>(ioc->nvme_task_last);
|
||||
if (last)
|
||||
last->next = t;
|
||||
if (!first)
|
||||
ioc->nvme_task_first = t;
|
||||
ioc->nvme_task_last = t;
|
||||
ioc->num_pending.inc();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int NVMEDevice::read(uint64_t off, uint64_t len, bufferlist *pbl,
|
||||
|
@ -35,6 +35,7 @@
|
||||
enum class IOCommand {
|
||||
READ_COMMAND,
|
||||
WRITE_COMMAND,
|
||||
ZERO_COMMAND,
|
||||
FLUSH_COMMAND
|
||||
};
|
||||
|
||||
@ -67,7 +68,6 @@ class NVMEDevice : public BlockDevice {
|
||||
uint64_t block_size;
|
||||
|
||||
bool aio_stop;
|
||||
bufferptr zeros;
|
||||
|
||||
struct BufferedExtents {
|
||||
struct Extent {
|
||||
|
Loading…
Reference in New Issue
Block a user