diff --git a/src/librbd/image/CreateRequest.cc b/src/librbd/image/CreateRequest.cc index df0fe28f079..09aa17871a8 100644 --- a/src/librbd/image/CreateRequest.cc +++ b/src/librbd/image/CreateRequest.cc @@ -72,7 +72,7 @@ int validate_striping(CephContext *cct, uint8_t order, uint64_t stripe_unit, } int validate_data_pool(CephContext *cct, IoCtx &io_ctx, uint64_t features, - const std::string &data_pool) { + const std::string &data_pool, int64_t *data_pool_id) { if ((features & RBD_FEATURE_DATA_POOL) == 0) { return 0; } @@ -84,6 +84,8 @@ int validate_data_pool(CephContext *cct, IoCtx &io_ctx, uint64_t features, lderr(cct) << "data pool " << data_pool << " does not exist" << dendl; return -ENOENT; } + + *data_pool_id = data_io_ctx.get_id(); return 0; } @@ -185,7 +187,7 @@ CreateRequest::CreateRequest(IoCtx &ioctx, const std::string &image_name, m_force_non_primary = !non_primary_global_image_id.empty(); - if (/* TODO */ false && !m_data_pool.empty() && m_data_pool != ioctx.get_pool_name()) { + if (!m_data_pool.empty() && m_data_pool != ioctx.get_pool_name()) { m_features |= RBD_FEATURE_DATA_POOL; } else { m_features &= ~RBD_FEATURE_DATA_POOL; @@ -242,7 +244,8 @@ void CreateRequest::send() { return; } - r = validate_data_pool(m_cct, m_ioctx, m_features, m_data_pool); + r = validate_data_pool(m_cct, m_ioctx, m_features, m_data_pool, + &m_data_pool_id); if (r < 0) { complete(r); return; @@ -384,14 +387,19 @@ Context *CreateRequest::handle_add_image_to_directory(int *result) { template void CreateRequest::create_image() { ldout(m_cct, 20) << this << " " << __func__ << dendl; + assert(m_data_pool.empty() || m_data_pool_id != -1); ostringstream oss; - oss << RBD_DATA_PREFIX << m_image_id; + oss << RBD_DATA_PREFIX; + if (m_data_pool_id != -1) { + oss << stringify(m_ioctx.get_id()) << "."; + } + oss << m_image_id; librados::ObjectWriteOperation op; op.create(true); cls_client::create_image(&op, m_size, m_order, m_features, oss.str(), - /* TODO */-1); + m_data_pool_id); using klass = CreateRequest; librados::AioCompletion *comp = diff --git a/src/librbd/image/CreateRequest.h b/src/librbd/image/CreateRequest.h index 90941129104..aa30615c143 100644 --- a/src/librbd/image/CreateRequest.h +++ b/src/librbd/image/CreateRequest.h @@ -109,6 +109,7 @@ private: uint8_t m_journal_splay_width = 0; std::string m_journal_pool; std::string m_data_pool; + int64_t m_data_pool_id = -1; const std::string m_non_primary_global_image_id; const std::string m_primary_mirror_uuid; diff --git a/src/librbd/image/OpenRequest.cc b/src/librbd/image/OpenRequest.cc index 36d740da76f..4fa036eae15 100644 --- a/src/librbd/image/OpenRequest.cc +++ b/src/librbd/image/OpenRequest.cc @@ -273,8 +273,59 @@ Context *OpenRequest::handle_v2_get_stripe_unit_count(int *result) { return nullptr; } - m_image_ctx->init_layout(); + send_v2_get_data_pool(); + return nullptr; +} +template +void OpenRequest::send_v2_get_data_pool() { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 10) << this << " " << __func__ << dendl; + + librados::ObjectReadOperation op; + cls_client::get_data_pool_start(&op); + + using klass = OpenRequest; + librados::AioCompletion *comp = create_rados_ack_callback< + klass, &klass::handle_v2_get_data_pool>(this); + m_out_bl.clear(); + m_image_ctx->md_ctx.aio_operate(m_image_ctx->header_oid, comp, &op, + &m_out_bl); + comp->release(); +} + +template +Context *OpenRequest::handle_v2_get_data_pool(int *result) { + CephContext *cct = m_image_ctx->cct; + ldout(cct, 10) << this << " " << __func__ << ": r=" << *result << dendl; + + int64_t data_pool_id = -1; + if (*result == 0) { + bufferlist::iterator it = m_out_bl.begin(); + *result = cls_client::get_data_pool_finish(&it, &data_pool_id); + } else if (*result == -ENOEXEC) { + *result = 0; + } + + if (*result < 0) { + lderr(cct) << "failed to read data pool: " << cpp_strerror(*result) + << dendl; + send_close_image(*result); + return nullptr; + } + + if (data_pool_id != -1) { + librados::Rados rados(m_image_ctx->md_ctx); + *result = rados.ioctx_create2(data_pool_id, m_image_ctx->data_ctx); + if (*result < 0) { + lderr(cct) << "failed to initialize data pool IO context: " + << cpp_strerror(*result) << dendl; + send_close_image(*result); + return nullptr; + } + } + + m_image_ctx->init_layout(); send_v2_apply_metadata(); return nullptr; } diff --git a/src/librbd/image/OpenRequest.h b/src/librbd/image/OpenRequest.h index 627285b40ff..e71093c9016 100644 --- a/src/librbd/image/OpenRequest.h +++ b/src/librbd/image/OpenRequest.h @@ -47,7 +47,10 @@ private: * v | * V2_GET_STRIPE_UNIT_COUNT | * | | - * v v + * v | + * V2_GET_DATA_POOL | + * | | + * v | * /---> V2_APPLY_METADATA -------------> REGISTER_WATCH (skip if * | | | read-only) * \---------/ v @@ -94,6 +97,9 @@ private: void send_v2_get_stripe_unit_count(); Context *handle_v2_get_stripe_unit_count(int *result); + void send_v2_get_data_pool(); + Context *handle_v2_get_data_pool(int *result); + void send_v2_apply_metadata(); Context *handle_v2_apply_metadata(int *result); diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 9edcb839fb0..35a1815b98e 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -1420,6 +1420,106 @@ TEST_F(TestLibRBD, TestIOWithIOHint) rados_ioctx_destroy(ioctx); } +TEST_F(TestLibRBD, TestDataPoolIO) +{ + REQUIRE_FORMAT_V2(); + + rados_ioctx_t ioctx; + rados_ioctx_create(_cluster, m_pool_name.c_str(), &ioctx); + + std::string data_pool_name = create_pool(true); + + CephContext* cct = reinterpret_cast(_rados.cct()); + bool skip_discard = cct->_conf->rbd_skip_partial_discard; + + rbd_image_t image; + std::string name = get_temp_image_name(); + uint64_t size = 2 << 20; + + bool old_format; + uint64_t features; + ASSERT_EQ(0, get_features(&old_format, &features)); + ASSERT_FALSE(old_format); + + rbd_image_options_t image_options; + rbd_image_options_create(&image_options); + BOOST_SCOPE_EXIT( (&image_options) ) { + rbd_image_options_destroy(image_options); + } BOOST_SCOPE_EXIT_END; + + ASSERT_EQ(0, rbd_image_options_set_uint64(image_options, + RBD_IMAGE_OPTION_FEATURES, + features)); + ASSERT_EQ(0, rbd_image_options_set_string(image_options, + RBD_IMAGE_OPTION_DATA_POOL, + data_pool_name.c_str())); + + ASSERT_EQ(0, rbd_create4(ioctx, name.c_str(), size, image_options)); + ASSERT_EQ(0, rbd_open(ioctx, name.c_str(), &image, NULL)); + ASSERT_NE(-1, rbd_get_data_pool_id(image)); + + char test_data[TEST_IO_SIZE + 1]; + char zero_data[TEST_IO_SIZE + 1]; + int i; + + for (i = 0; i < TEST_IO_SIZE; ++i) { + test_data[i] = (char) (rand() % (126 - 33) + 33); + } + test_data[TEST_IO_SIZE] = '\0'; + memset(zero_data, 0, sizeof(zero_data)); + + for (i = 0; i < 5; ++i) + ASSERT_PASSED(write_test_data, image, test_data, TEST_IO_SIZE * i, TEST_IO_SIZE, 0); + + for (i = 5; i < 10; ++i) + ASSERT_PASSED(aio_write_test_data, image, test_data, TEST_IO_SIZE * i, TEST_IO_SIZE, 0); + + for (i = 0; i < 5; ++i) + ASSERT_PASSED(read_test_data, image, test_data, TEST_IO_SIZE * i, TEST_IO_SIZE, 0); + + for (i = 5; i < 10; ++i) + ASSERT_PASSED(aio_read_test_data, image, test_data, TEST_IO_SIZE * i, TEST_IO_SIZE, 0); + + // discard 2nd, 4th sections. + ASSERT_PASSED(discard_test_data, image, TEST_IO_SIZE, TEST_IO_SIZE); + ASSERT_PASSED(aio_discard_test_data, image, TEST_IO_SIZE*3, TEST_IO_SIZE); + + ASSERT_PASSED(read_test_data, image, test_data, 0, TEST_IO_SIZE, 0); + ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data, + TEST_IO_SIZE, TEST_IO_SIZE, 0); + ASSERT_PASSED(read_test_data, image, test_data, TEST_IO_SIZE*2, TEST_IO_SIZE, 0); + ASSERT_PASSED(read_test_data, image, skip_discard ? test_data : zero_data, + TEST_IO_SIZE*3, TEST_IO_SIZE, 0); + ASSERT_PASSED(read_test_data, image, test_data, TEST_IO_SIZE*4, TEST_IO_SIZE, 0); + + rbd_image_info_t info; + rbd_completion_t comp; + ASSERT_EQ(0, rbd_stat(image, &info, sizeof(info))); + // can't read or write starting past end + ASSERT_EQ(-EINVAL, rbd_write(image, info.size, 1, test_data)); + ASSERT_EQ(-EINVAL, rbd_read(image, info.size, 1, test_data)); + // reading through end returns amount up to end + ASSERT_EQ(10, rbd_read(image, info.size - 10, 100, test_data)); + // writing through end returns amount up to end + ASSERT_EQ(10, rbd_write(image, info.size - 10, 100, test_data)); + + rbd_aio_create_completion(NULL, (rbd_callback_t) simple_read_cb, &comp); + ASSERT_EQ(0, rbd_aio_write(image, info.size, 1, test_data, comp)); + ASSERT_EQ(0, rbd_aio_wait_for_complete(comp)); + ASSERT_EQ(-EINVAL, rbd_aio_get_return_value(comp)); + rbd_aio_release(comp); + + rbd_aio_create_completion(NULL, (rbd_callback_t) simple_read_cb, &comp); + ASSERT_EQ(0, rbd_aio_read(image, info.size, 1, test_data, comp)); + ASSERT_EQ(0, rbd_aio_wait_for_complete(comp)); + ASSERT_EQ(-EINVAL, rbd_aio_get_return_value(comp)); + rbd_aio_release(comp); + + ASSERT_PASSED(validate_object_map, image); + ASSERT_EQ(0, rbd_close(image)); + + rados_ioctx_destroy(ioctx); +} TEST_F(TestLibRBD, TestEmptyDiscard) { rados_ioctx_t ioctx;