mirror of
https://github.com/ceph/ceph
synced 2025-01-01 08:32:24 +00:00
Merge pull request #34432 from optimistyzy/47_upstream
os/bluestore/NVMEDevice.cc: use slist for tracking mempool Reviewed-by: Kefu Chai <kchai@redhat.com>
This commit is contained in:
commit
36ddb9ed3b
@ -28,6 +28,7 @@
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <thread>
|
||||
#include <boost/intrusive/slist.hpp>
|
||||
|
||||
#include <spdk/nvme.h>
|
||||
|
||||
@ -54,20 +55,6 @@ static constexpr uint32_t data_buffer_size = 8192;
|
||||
|
||||
static constexpr uint16_t inline_segment_num = 32;
|
||||
|
||||
enum {
|
||||
l_bluestore_nvmedevice_first = 632430,
|
||||
l_bluestore_nvmedevice_write_lat,
|
||||
l_bluestore_nvmedevice_read_lat,
|
||||
l_bluestore_nvmedevice_flush_lat,
|
||||
l_bluestore_nvmedevice_write_queue_lat,
|
||||
l_bluestore_nvmedevice_read_queue_lat,
|
||||
l_bluestore_nvmedevice_flush_queue_lat,
|
||||
l_bluestore_nvmedevice_queue_ops,
|
||||
l_bluestore_nvmedevice_polling_lat,
|
||||
l_bluestore_nvmedevice_buffer_alloc_failed,
|
||||
l_bluestore_nvmedevice_last
|
||||
};
|
||||
|
||||
static void io_complete(void *t, const struct spdk_nvme_cpl *completion);
|
||||
|
||||
struct IORequest {
|
||||
@ -78,6 +65,10 @@ struct IORequest {
|
||||
void **extra_segs = nullptr;
|
||||
};
|
||||
|
||||
namespace bi = boost::intrusive;
|
||||
struct data_cache_buf : public bi::slist_base_hook<bi::link_mode<bi::normal_link>>
|
||||
{};
|
||||
|
||||
struct Task;
|
||||
|
||||
class SharedDriverData {
|
||||
@ -143,8 +134,7 @@ class SharedDriverQueueData {
|
||||
public:
|
||||
uint32_t current_queue_depth = 0;
|
||||
std::atomic_ulong completed_op_seq, queue_op_seq;
|
||||
std::vector<void*> data_buf_mempool;
|
||||
PerfCounters *logger = nullptr;
|
||||
bi::slist<data_cache_buf, bi::constant_time_size<true>> data_buf_list;
|
||||
void _aio_handle(Task *t, IOContext *ioc);
|
||||
|
||||
SharedDriverQueueData(NVMEDevice *bdev, SharedDriverData *driver)
|
||||
@ -169,45 +159,21 @@ class SharedDriverQueueData {
|
||||
derr << __func__ << " failed to create memory pool for nvme data buffer" << dendl;
|
||||
ceph_assert(b);
|
||||
}
|
||||
data_buf_mempool.push_back(b);
|
||||
data_buf_list.push_front(*reinterpret_cast<data_cache_buf *>(b));
|
||||
}
|
||||
|
||||
PerfCountersBuilder b(g_ceph_context, string("NVMEDevice-AIOThread-"+stringify(this)),
|
||||
l_bluestore_nvmedevice_first, l_bluestore_nvmedevice_last);
|
||||
b.add_time_avg(l_bluestore_nvmedevice_write_lat, "write_lat", "Average write completing latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_read_lat, "read_lat", "Average read completing latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_flush_lat, "flush_lat", "Average flush completing latency");
|
||||
b.add_u64(l_bluestore_nvmedevice_queue_ops, "queue_ops", "Operations in nvme queue");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_polling_lat, "polling_lat", "Average polling latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_write_queue_lat, "write_queue_lat", "Average queue write request latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_read_queue_lat, "read_queue_lat", "Average queue read request latency");
|
||||
b.add_time_avg(l_bluestore_nvmedevice_flush_queue_lat, "flush_queue_lat", "Average queue flush request latency");
|
||||
b.add_u64_counter(l_bluestore_nvmedevice_buffer_alloc_failed, "buffer_alloc_failed", "Alloc data buffer failed count");
|
||||
logger = b.create_perf_counters();
|
||||
g_ceph_context->get_perfcounters_collection()->add(logger);
|
||||
bdev->queue_number++;
|
||||
if (bdev->queue_number.load() == 1)
|
||||
reap_io = true;
|
||||
}
|
||||
|
||||
~SharedDriverQueueData() {
|
||||
g_ceph_context->get_perfcounters_collection()->remove(logger);
|
||||
if (qpair) {
|
||||
spdk_nvme_ctrlr_free_io_qpair(qpair);
|
||||
bdev->queue_number--;
|
||||
}
|
||||
|
||||
// free all spdk dma memory;
|
||||
if (!data_buf_mempool.empty()) {
|
||||
for (uint16_t i = 0; i < data_buffer_default_num; i++) {
|
||||
void *b = data_buf_mempool[i];
|
||||
ceph_assert(b);
|
||||
spdk_dma_free(b);
|
||||
}
|
||||
data_buf_mempool.clear();
|
||||
}
|
||||
|
||||
delete logger;
|
||||
data_buf_list.clear_and_dispose(spdk_dma_free);
|
||||
}
|
||||
};
|
||||
|
||||
@ -246,12 +212,16 @@ struct Task {
|
||||
}
|
||||
void release_segs(SharedDriverQueueData *queue_data) {
|
||||
if (io_request.extra_segs) {
|
||||
for (uint16_t i = 0; i < io_request.nseg; i++)
|
||||
queue_data->data_buf_mempool.push_back(io_request.extra_segs[i]);
|
||||
for (uint16_t i = 0; i < io_request.nseg; i++) {
|
||||
auto buf = reinterpret_cast<data_cache_buf *>(io_request.extra_segs[i]);
|
||||
queue_data->data_buf_list.push_front(*buf);
|
||||
}
|
||||
delete io_request.extra_segs;
|
||||
} else if (io_request.nseg) {
|
||||
for (uint16_t i = 0; i < io_request.nseg; i++)
|
||||
queue_data->data_buf_mempool.push_back(io_request.inline_segs[i]);
|
||||
for (uint16_t i = 0; i < io_request.nseg; i++) {
|
||||
auto buf = reinterpret_cast<data_cache_buf *>(io_request.inline_segs[i]);
|
||||
queue_data->data_buf_list.push_front(*buf);
|
||||
}
|
||||
}
|
||||
ctx->total_nseg -= io_request.nseg;
|
||||
io_request.nseg = 0;
|
||||
@ -334,7 +304,7 @@ int SharedDriverQueueData::alloc_buf_from_pool(Task *t, bool write)
|
||||
if (t->len % data_buffer_size)
|
||||
++count;
|
||||
void **segs;
|
||||
if (count > data_buf_mempool.size())
|
||||
if (count > data_buf_list.size())
|
||||
return -ENOMEM;
|
||||
if (count <= inline_segment_num) {
|
||||
segs = t->io_request.inline_segs;
|
||||
@ -343,8 +313,10 @@ int SharedDriverQueueData::alloc_buf_from_pool(Task *t, bool write)
|
||||
segs = t->io_request.extra_segs;
|
||||
}
|
||||
for (uint16_t i = 0; i < count; i++) {
|
||||
segs[i] = data_buf_mempool.back();
|
||||
data_buf_mempool.pop_back();
|
||||
ceph_assert(!data_buf_list.empty());
|
||||
segs[i] = &data_buf_list.front();
|
||||
ceph_assert(segs[i] != nullptr);
|
||||
data_buf_list.pop_front();
|
||||
}
|
||||
t->io_request.nseg = count;
|
||||
t->ctx->total_nseg += count;
|
||||
@ -371,8 +343,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
|
||||
uint32_t max_io_completion = (uint32_t)g_conf().get_val<uint64_t>("bluestore_spdk_max_io_completion");
|
||||
uint64_t io_sleep_in_us = g_conf().get_val<uint64_t>("bluestore_spdk_io_sleep");
|
||||
|
||||
ceph::coarse_real_clock::time_point cur, start
|
||||
= ceph::coarse_real_clock::now();
|
||||
while (ioc->num_running) {
|
||||
again:
|
||||
dout(40) << __func__ << " polling" << dendl;
|
||||
@ -400,7 +370,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
|
||||
dout(20) << __func__ << " write command issued " << lba_off << "~" << lba_count << dendl;
|
||||
r = alloc_buf_from_pool(t, true);
|
||||
if (r < 0) {
|
||||
logger->inc(l_bluestore_nvmedevice_buffer_alloc_failed);
|
||||
goto again;
|
||||
}
|
||||
|
||||
@ -414,9 +383,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
|
||||
delete t;
|
||||
ceph_abort();
|
||||
}
|
||||
cur = ceph::coarse_real_clock::now();
|
||||
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - t->start);
|
||||
logger->tinc(l_bluestore_nvmedevice_write_queue_lat, dur);
|
||||
break;
|
||||
}
|
||||
case IOCommand::READ_COMMAND:
|
||||
@ -424,7 +390,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
|
||||
dout(20) << __func__ << " read command issued " << lba_off << "~" << lba_count << dendl;
|
||||
r = alloc_buf_from_pool(t, false);
|
||||
if (r < 0) {
|
||||
logger->inc(l_bluestore_nvmedevice_buffer_alloc_failed);
|
||||
goto again;
|
||||
}
|
||||
|
||||
@ -436,10 +401,6 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
|
||||
t->release_segs(this);
|
||||
delete t;
|
||||
ceph_abort();
|
||||
} else {
|
||||
cur = ceph::coarse_real_clock::now();
|
||||
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - t->start);
|
||||
logger->tinc(l_bluestore_nvmedevice_read_queue_lat, dur);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -452,20 +413,12 @@ void SharedDriverQueueData::_aio_handle(Task *t, IOContext *ioc)
|
||||
t->release_segs(this);
|
||||
delete t;
|
||||
ceph_abort();
|
||||
} else {
|
||||
cur = ceph::coarse_real_clock::now();
|
||||
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - t->start);
|
||||
logger->tinc(l_bluestore_nvmedevice_flush_queue_lat, dur);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
current_queue_depth++;
|
||||
}
|
||||
cur = ceph::coarse_real_clock::now();
|
||||
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(cur - start);
|
||||
logger->tinc(l_bluestore_nvmedevice_polling_lat, dur);
|
||||
start = ceph::coarse_real_clock::now();
|
||||
}
|
||||
|
||||
if (reap_io)
|
||||
@ -678,10 +631,7 @@ void io_complete(void *t, const struct spdk_nvme_cpl *completion)
|
||||
ceph_assert(queue != NULL);
|
||||
ceph_assert(ctx != NULL);
|
||||
--queue->current_queue_depth;
|
||||
auto dur = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
ceph::coarse_real_clock::now() - task->start);
|
||||
if (task->command == IOCommand::WRITE_COMMAND) {
|
||||
queue->logger->tinc(l_bluestore_nvmedevice_write_lat, dur);
|
||||
ceph_assert(!spdk_nvme_cpl_is_error(completion));
|
||||
dout(20) << __func__ << " write/zero op successfully, left "
|
||||
<< queue->queue_op_seq - queue->completed_op_seq << dendl;
|
||||
@ -697,7 +647,6 @@ void io_complete(void *t, const struct spdk_nvme_cpl *completion)
|
||||
task->release_segs(queue);
|
||||
delete task;
|
||||
} else if (task->command == IOCommand::READ_COMMAND) {
|
||||
queue->logger->tinc(l_bluestore_nvmedevice_read_lat, dur);
|
||||
ceph_assert(!spdk_nvme_cpl_is_error(completion));
|
||||
dout(20) << __func__ << " read op successfully" << dendl;
|
||||
task->fill_cb();
|
||||
@ -725,7 +674,6 @@ void io_complete(void *t, const struct spdk_nvme_cpl *completion)
|
||||
} else {
|
||||
ceph_assert(task->command == IOCommand::FLUSH_COMMAND);
|
||||
ceph_assert(!spdk_nvme_cpl_is_error(completion));
|
||||
queue->logger->tinc(l_bluestore_nvmedevice_flush_lat, dur);
|
||||
dout(20) << __func__ << " flush op successfully" << dendl;
|
||||
task->return_code = 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user