Merge pull request #34432 from optimistyzy/47_upstream

os/bluestore/NVMEDevice.cc: use slist for tracking mempool

Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
Kefu Chai 2020-04-20 14:51:33 +08:00 committed by GitHub
commit 36ddb9ed3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -28,6 +28,7 @@
#include <functional>
#include <map>
#include <thread>
#include <boost/intrusive/slist.hpp>
#include <spdk/nvme.h>
@ -54,20 +55,6 @@ static constexpr uint32_t data_buffer_size = 8192;
static constexpr uint16_t inline_segment_num = 32;
enum {
l_bluestore_nvmedevice_first = 632430,
l_bluestore_nvmedevice_write_lat,
l_bluestore_nvmedevice_read_lat,
l_bluestore_nvmedevice_flush_lat,
l_bluestore_nvmedevice_write_queue_lat,
l_bluestore_nvmedevice_read_queue_lat,
l_bluestore_nvmedevice_flush_queue_lat,
l_bluestore_nvmedevice_queue_ops,
l_bluestore_nvmedevice_polling_lat,
l_bluestore_nvmedevice_buffer_alloc_failed,
l_bluestore_nvmedevice_last
};
static void io_complete(void *t, const struct spdk_nvme_cpl *completion);
struct IORequest {
@ -78,6 +65,10 @@ struct IORequest {
void **extra_segs = nullptr;
};
namespace bi = boost::intrusive;
struct data_cache_buf : public bi::slist_base_hook<bi::link_mode<bi::normal_link>>
{};
struct Task;
class SharedDriverData {
@ -143,8 +134,7 @@ class SharedDriverQueueData {
public:
uint32_t current_queue_depth = 0;
std::atomic_ulong completed_op_seq, queue_op_seq;
std::vector<void*> data_buf_mempool;
PerfCounters *logger = nullptr;
bi::slist<data_cache_buf, bi::constant_time_size<true>> data_buf_list;
void _aio_handle(Task *t, IOContext *ioc);
SharedDriverQueueData(NVMEDevice *bdev, SharedDriverData *driver)
@ -169,45 +159,21 @@ class SharedDriverQueueData {
derr << __func__ << " failed to create memory pool for nvme data buffer" << dendl;
ceph_assert(b);
}
data_buf_mempool.push_back(b);
data_buf_list.push_front(*reinterpret_cast<data_cache_buf *>(b));
}
PerfCountersBuilder b(g_ceph_context, string("NVMEDevice-AIOThread-"+stringify(this)),
l_bluestore_nvmedevice_first, l_bluestore_nvmedevice_last);
b.add_time_avg(l_bluestore_nvmedevice_write_lat, "write_lat", "Average write completing latency");
b.add_time_avg(l_bluestore_nvmedevice_read_lat, "read_lat", "Average read completing latency");
b.add_time_avg(l_bluestore_nvmedevice_flush_lat, "flush_lat", "Average flush completing latency");
b.add_u64(l_bluestore_nvmedevice_queue_ops, "queue_ops", "Operations in nvme queue");
b.add_time_avg(l_bluestore_nvmedevice_polling_lat, "polling_lat", "Average polling latency");
b.add_time_avg(l_bluestore_nvmedevice_write_queue_lat, "write_queue_lat", "Average queue write request latency");
b.add_time_avg(l_bluestore_nvmedevice_read_queue_lat, "read_queue_lat", "Average queue read request latency");
b.add_time_avg(l_bluestore_nvmedevice_flush_queue_lat, "flush_queue_lat", "Average queue flush request latency");
b.add_u64_counter(l_bluestore_nvmedevice_buffer_alloc_failed, "buffer_alloc_failed", "Alloc data buffer failed count");
logger = b.create_perf_counters();
g_ceph_context->get_perfcounters_collection()->add(logger);
bdev->queue_number++;
if (bdev->queue_number.load() == 1)
reap_io = true;
}
~SharedDriverQueueData() {
g_ceph_context->get_perfcounters_collection()->remove(logger);
if (qpair) {
spdk_nvme_ctrlr_free_io_qpair(qpair);
bdev->queue_number--;
}
// free all spdk dma memory;
if (!data_buf_mempool.empty()) {
for (uint16_t i = 0; i < data_buffer_default_num; i++) {
void *b = data_buf_mempool[i];
ceph_assert(b);
spdk_dma_free(b);
}
data_buf_mempool.clear();
}
delete logger;
data_buf_list.clear_and_dispose(spdk_dma_free);
}
};
@ -246,12 +212,16 @@ struct Task {
}
void release_segs(SharedDriverQueueData *queue_data) {
if (io_request.extra_segs) {
for (uint16_t i = 0; i < io_request.nseg; i++)
queue_data->data_buf_mempool.push_back(io_request.extra_segs[i]);
for (uint16_t i = 0; i < io_request.nseg; i++) {
auto buf = reinterpret_cast<data_cache_buf *>(io_request.extra_segs[i]);
queue_data->data_buf_list.push_front(*buf);
}
delete io_request.extra_segs;
} else if (io_request.nseg) {
for (uint16_t i = 0; i < io_request.nseg; i++)
queue_data->data_buf_mempool.push_back(io_request.inline_segs[i]);
for (uint16_t i = 0; i < io_request.nseg; i++) {
auto buf = reinterpret_cast<data_cache_buf *>(io_request.inline_segs[i]);
queue_data->data_buf_list.push_front(*buf);
}
}
ctx->total_nseg -= io_request.nseg;
io_request.nseg = 0;
@ -334,7 +304,7 @@ int SharedDriverQueueData::alloc_buf_from_pool(Task *t, bool write)
if (t->len % data_buffer_size)
++count;
void **segs;
if (count > data_buf_mempool.size())
if (count > data_buf_list.size())
return -ENOMEM;
if (count <= inline_segment_num) {
segs = t->io_request.inline_segs;
@ -343,8 +313,10 @@ int SharedDriverQueueData::alloc_buf_from_pool(Task *t, bool write)
segs = t->io_request.extra_segs;
}
for (uint16_t i = 0; i < count; i++) {
segs[i] = data_buf_mempool.back();
data_buf_mempool.pop_back();
ceph_assert(!data_buf_list.empty());
segs[i] = &data_buf_list.front();
ceph_assert(segs[i] != nullptr);
data_buf_list.pop_front();
}
t->io_request.nseg = count;
t->ctx->total_nseg += count;
@ -371,8 +343,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
uint32_t max_io_completion = (uint32_t)g_conf().get_val<uint64_t>("bluestore_spdk_max_io_completion");
uint64_t io_sleep_in_us = g_conf().get_val<uint64_t>("bluestore_spdk_io_sleep");
ceph::coarse_real_clock::time_point cur, start
= ceph::coarse_real_clock::now();
while (ioc->num_running) {
again:
dout(40) << __func__ << " polling" << dendl;
@ -400,7 +370,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
dout(20) << __func__ << " write command issued " << lba_off << "~" << lba_count << dendl;
r = alloc_buf_from_pool(t, true);
if (r < 0) {
logger->inc(l_bluestore_nvmedevice_buffer_alloc_failed);
goto again;
}
@ -414,9 +383,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
delete t;
ceph_abort();
}
cur = ceph::coarse_real_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - t->start);
logger->tinc(l_bluestore_nvmedevice_write_queue_lat, dur);
break;
}
case IOCommand::READ_COMMAND:
@ -424,7 +390,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
dout(20) << __func__ << " read command issued " << lba_off << "~" << lba_count << dendl;
r = alloc_buf_from_pool(t, false);
if (r < 0) {
logger->inc(l_bluestore_nvmedevice_buffer_alloc_failed);
goto again;
}
@ -436,10 +401,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
t->release_segs(this);
delete t;
ceph_abort();
} else {
cur = ceph::coarse_real_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - t->start);
logger->tinc(l_bluestore_nvmedevice_read_queue_lat, dur);
}
break;
}
@ -452,20 +413,12 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
t->release_segs(this);
delete t;
ceph_abort();
} else {
cur = ceph::coarse_real_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - t->start);
logger->tinc(l_bluestore_nvmedevice_flush_queue_lat, dur);
}
break;
}
}
current_queue_depth++;
}
cur = ceph::coarse_real_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - start);
logger->tinc(l_bluestore_nvmedevice_polling_lat, dur);
start = ceph::coarse_real_clock::now();
}
if (reap_io)
@ -678,10 +631,7 @@ void io_complete(void *t, const struct spdk_nvme_cpl *completion)
ceph_assert(queue != NULL);
ceph_assert(ctx != NULL);
--queue->current_queue_depth;
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(
ceph::coarse_real_clock::now() - task->start);
if (task->command == IOCommand::WRITE_COMMAND) {
queue->logger->tinc(l_bluestore_nvmedevice_write_lat, dur);
ceph_assert(!spdk_nvme_cpl_is_error(completion));
dout(20) << __func__ << " write/zero op successfully, left "
<< queue->queue_op_seq - queue->completed_op_seq << dendl;
@ -697,7 +647,6 @@ void io_complete(void *t, const struct spdk_nvme_cpl *completion)
task->release_segs(queue);
delete task;
} else if (task->command == IOCommand::READ_COMMAND) {
queue->logger->tinc(l_bluestore_nvmedevice_read_lat, dur);
ceph_assert(!spdk_nvme_cpl_is_error(completion));
dout(20) << __func__ << " read op successfully" << dendl;
task->fill_cb();
@ -725,7 +674,6 @@ void io_complete(void *t, const struct spdk_nvme_cpl *completion)
} else {
ceph_assert(task->command == IOCommand::FLUSH_COMMAND);
ceph_assert(!spdk_nvme_cpl_is_error(completion));
queue->logger->tinc(l_bluestore_nvmedevice_flush_lat, dur);
dout(20) << __func__ << " flush op successfully" << dendl;
task->return_code = 0;
}