Merge branch 'next'

Conflicts:
	doc/rbd/rbd-cloudstack.rst
This commit is contained in:
Sage Weil 2013-04-17 11:05:00 -07:00
commit 593507ea70
15 changed files with 149 additions and 69 deletions

View File

@ -118,4 +118,4 @@ Limitations
.. _Add Primary Storage (4.0.0): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.0-incubating/html/Admin_Guide/primary-storage-add.html
.. _Add Primary Storage (4.0.1): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.1-incubating/html/Admin_Guide/primary-storage-add.html
.. _Create a New Disk Offering (4.0.0): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.0-incubating/html/Admin_Guide/compute-disk-service-offerings.html#creating-disk-offerings
.. _Create a New Disk Offering (4.0.1): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.1-incubating/html/Admin_Guide/compute-disk-service-offerings.html#creating-disk-offerings
.. _Create a New Disk Offering (4.0.1): http://cloudstack.apache.org/docs/en-US/Apache_CloudStack/4.0.1-incubating/html/Admin_Guide/compute-disk-service-offerings.html#creating-disk-offerings

View File

@ -8,7 +8,7 @@
# This will only work with particular qemu versions, like 1.0. Later
# versions of qemu includ qemu-iotests directly in the qemu
# repository.
git clone git://repo.or.cz/qemu-iotests.git
git clone git://ceph.com/git/qemu-iotests.git
cd qemu-iotests
mkdir bin

View File

@ -1948,9 +1948,9 @@ void Client::send_reconnect(MetaSession *session)
in->exporting_mseq = 0;
if (!in->is_any_caps()) {
ldout(cct, 10) << " removing last cap, closing snaprealm" << dendl;
in->snaprealm_item.remove_myself();
put_snap_realm(in->snaprealm);
in->snaprealm = 0;
in->snaprealm_item.remove_myself();
}
}
}
@ -3257,8 +3257,8 @@ void Client::handle_snap(MClientSnap *m)
// queue for snap writeback
queue_cap_snap(in, in->snaprealm->get_snap_context().seq);
put_snap_realm(in->snaprealm);
in->snaprealm_item.remove_myself();
put_snap_realm(in->snaprealm);
to_move.push_back(in);
}
}

View File

@ -11,12 +11,12 @@ class ObjecterWriteback : public WritebackHandler {
ObjecterWriteback(Objecter *o) : m_objecter(o) {}
virtual ~ObjecterWriteback() {}
virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish) {
return m_objecter->read_trunc(oid, oloc, off, len, snapid, pbl, 0,
trunc_size, trunc_seq, onfinish);
virtual void read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish) {
m_objecter->read_trunc(oid, oloc, off, len, snapid, pbl, 0,
trunc_size, trunc_seq, onfinish);
}
virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid) {

View File

@ -48,7 +48,6 @@ namespace librbd {
C_Request(CephContext *cct, Context *c, Mutex *l)
: m_cct(cct), m_ctx(c), m_lock(l) {}
virtual ~C_Request() {}
void set_req(AioRequest *req);
virtual void finish(int r) {
ldout(m_cct, 20) << "aio_cb completing " << dendl;
{
@ -63,16 +62,39 @@ namespace librbd {
Mutex *m_lock;
};
class C_OrderedWrite : public Context {
public:
C_OrderedWrite(CephContext *cct, LibrbdWriteback::write_result_d *result,
LibrbdWriteback *wb)
: m_cct(cct), m_result(result), m_wb_handler(wb) {}
virtual ~C_OrderedWrite() {}
virtual void finish(int r) {
ldout(m_cct, 20) << "C_OrderedWrite completing " << m_result << dendl;
{
Mutex::Locker l(m_wb_handler->m_lock);
assert(!m_result->done);
m_result->done = true;
m_result->ret = r;
m_wb_handler->complete_writes(m_result->oid);
}
ldout(m_cct, 20) << "C_OrderedWrite finished " << m_result << dendl;
}
private:
CephContext *m_cct;
LibrbdWriteback::write_result_d *m_result;
LibrbdWriteback *m_wb_handler;
};
LibrbdWriteback::LibrbdWriteback(ImageCtx *ictx, Mutex& lock)
: m_tid(0), m_lock(lock), m_ictx(ictx)
{
}
tid_t LibrbdWriteback::read(const object_t& oid,
const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size,
__u32 trunc_seq, Context *onfinish)
void LibrbdWriteback::read(const object_t& oid,
const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size,
__u32 trunc_seq, Context *onfinish)
{
// on completion, take the mutex and then call onfinish.
Context *req = new C_Request(m_ictx->cct, onfinish, &m_lock);
@ -82,7 +104,6 @@ namespace librbd {
len, off);
rados_completion->release();
assert(r >= 0);
return ++m_tid;
}
bool LibrbdWriteback::may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid)
@ -132,8 +153,10 @@ namespace librbd {
object_no, 0, m_ictx->layout.fl_object_size,
objectx);
uint64_t object_overlap = m_ictx->prune_parent_extents(objectx, overlap);
C_Request *req_comp = new C_Request(m_ictx->cct, oncommit, &m_lock);
write_result_d *result = new write_result_d(oid.name, oncommit);
m_writes[oid.name].push(result);
ldout(m_ictx->cct, 20) << "write will wait for result " << result << dendl;
C_OrderedWrite *req_comp = new C_OrderedWrite(m_ictx->cct, result, this);
AioWrite *req = new AioWrite(m_ictx, oid.name,
object_no, off, objectx, object_overlap,
bl, snapc, snap_id,
@ -141,4 +164,32 @@ namespace librbd {
req->send();
return ++m_tid;
}
void LibrbdWriteback::complete_writes(const std::string& oid)
{
assert(m_lock.is_locked());
std::queue<write_result_d*>& results = m_writes[oid];
ldout(m_ictx->cct, 20) << "complete_writes() oid " << oid << dendl;
std::list<write_result_d*> finished;
while (!results.empty()) {
write_result_d *result = results.front();
if (!result->done)
break;
finished.push_back(result);
results.pop();
}
if (results.empty())
m_writes.erase(oid);
for (std::list<write_result_d*>::iterator it = finished.begin();
it != finished.end(); ++it) {
write_result_d *result = *it;
ldout(m_ictx->cct, 20) << "complete_writes() completing " << result
<< dendl;
result->oncommit->complete(result->ret);
delete result;
}
}
}

View File

@ -3,6 +3,8 @@
#ifndef CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H
#define CEPH_LIBRBD_LIBRBDWRITEBACKHANDLER_H
#include <queue>
#include "include/Context.h"
#include "include/types.h"
#include "include/rados/librados.hpp"
@ -21,10 +23,10 @@ namespace librbd {
virtual ~LibrbdWriteback() {}
// Note that oloc, trunc_size, and trunc_seq are ignored
virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish);
virtual void read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish);
// Determine whether a read to this extent could be affected by a write-triggered copy-on-write
virtual bool may_copy_on_write(const object_t& oid, uint64_t read_off, uint64_t read_len, snapid_t snapid);
@ -35,10 +37,26 @@ namespace librbd {
const bufferlist &bl, utime_t mtime, uint64_t trunc_size,
__u32 trunc_seq, Context *oncommit);
struct write_result_d {
bool done;
int ret;
std::string oid;
Context *oncommit;
write_result_d(const std::string& oid, Context *oncommit) :
done(false), ret(0), oid(oid), oncommit(oncommit) {}
private:
write_result_d(const write_result_d& rhs);
const write_result_d& operator=(const write_result_d& rhs);
};
private:
int m_tid;
void complete_writes(const std::string& oid);
tid_t m_tid;
Mutex& m_lock;
librbd::ImageCtx *m_ictx;
hash_map<std::string, std::queue<write_result_d*> > m_writes;
friend class C_OrderedWrite;
};
}

View File

@ -2291,6 +2291,9 @@ reprotect_and_return_err:
ldout(ictx->cct, 20) << "diff_iterate " << ictx << " off = " << off
<< " len = " << len << dendl;
// ensure previous writes are visible to listsnaps
_flush(ictx);
int r = ictx_check(ictx);
if (r < 0)
return r;

View File

@ -1529,6 +1529,17 @@ void FileJournal::committed_thru(uint64_t seq)
dout(5) << "committed_thru " << seq << " (last_committed_seq " << last_committed_seq << ")" << dendl;
last_committed_seq = seq;
// completions!
{
Mutex::Locker locker(finisher_lock);
queue_completions_thru(seq);
if (plug_journal_completions && seq >= header.start_seq) {
dout(10) << " removing completion plug, queuing completions thru journaled_seq " << journaled_seq << dendl;
plug_journal_completions = false;
queue_completions_thru(journaled_seq);
}
}
// adjust start pointer
while (!journalq.empty() && journalq.front().first <= seq) {
journalq.pop_front();
@ -1543,17 +1554,6 @@ void FileJournal::committed_thru(uint64_t seq)
must_write_header = true;
print_header();
{
Mutex::Locker locker(finisher_lock);
// completions!
queue_completions_thru(seq);
if (plug_journal_completions && seq >= header.start_seq) {
dout(10) << " removing completion plug, queuing completions thru journaled_seq " << journaled_seq << dendl;
plug_journal_completions = false;
queue_completions_thru(journaled_seq);
}
}
// committed but unjournaled items
while (!writeq_empty() && peek_write().seq <= seq) {
dout(15) << " dropping committed but unwritten seq " << peek_write().seq

View File

@ -1495,6 +1495,20 @@ bool ObjectCacher::flush(Object *ob, loff_t offset, loff_t length)
return clean;
}
bool ObjectCacher::_flush_set_finish(C_GatherBuilder *gather, Context *onfinish)
{
assert(lock.is_locked());
if (gather->has_subs()) {
gather->set_finisher(onfinish);
gather->activate();
return false;
}
ldout(cct, 10) << "flush_set has no dirty|tx bhs" << dendl;
onfinish->complete(0);
return true;
}
// flush. non-blocking, takes callback.
// returns true if already flushed
bool ObjectCacher::flush_set(ObjectSet *oset, Context *onfinish)
@ -1526,15 +1540,7 @@ bool ObjectCacher::flush_set(ObjectSet *oset, Context *onfinish)
}
}
if (gather.has_subs()) {
gather.set_finisher(onfinish);
gather.activate();
return false;
} else {
ldout(cct, 10) << "flush_set " << oset << " has no dirty|tx bhs" << dendl;
onfinish->complete(0);
return true;
}
return _flush_set_finish(&gather, onfinish);
}
// flush. non-blocking, takes callback.
@ -1549,7 +1555,8 @@ bool ObjectCacher::flush_set(ObjectSet *oset, vector<ObjectExtent>& exv, Context
return true;
}
ldout(cct, 10) << "flush_set " << oset << " on " << exv.size() << " ObjectExtents" << dendl;
ldout(cct, 10) << "flush_set " << oset << " on " << exv.size()
<< " ObjectExtents" << dendl;
// we'll need to wait for all objects to flush!
C_GatherBuilder gather(cct);
@ -1573,15 +1580,7 @@ bool ObjectCacher::flush_set(ObjectSet *oset, vector<ObjectExtent>& exv, Context
}
}
if (gather.has_subs()) {
gather.set_finisher(onfinish);
gather.activate();
return false;
} else {
ldout(cct, 10) << "flush_set " << oset << " has no dirty|tx bhs" << dendl;
onfinish->complete(0);
return true;
}
return _flush_set_finish(&gather, onfinish);
}
void ObjectCacher::purge_set(ObjectSet *oset)

View File

@ -573,6 +573,7 @@ private:
int _wait_for_write(OSDWrite *wr, uint64_t len, ObjectSet *oset, Mutex& lock,
Context *onfreespace);
void maybe_wait_for_writeback(uint64_t len);
bool _flush_set_finish(C_GatherBuilder *gather, Context *onfinish);
public:
bool set_is_cached(ObjectSet *oset);

View File

@ -12,10 +12,10 @@ class WritebackHandler {
WritebackHandler() {}
virtual ~WritebackHandler() {}
virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish) = 0;
virtual void read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish) = 0;
/**
* check if a given extent read result may change due to a write
*

View File

@ -1561,6 +1561,10 @@ TEST(LibRBD, DiffIterate)
ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
int seed = getpid();
cout << "seed " << seed << std::endl;
srand(seed);
{
librbd::RBD rbd;
librbd::Image image;
@ -1627,6 +1631,10 @@ TEST(LibRBD, DiffIterateDiscard)
ASSERT_EQ("", create_one_pool_pp(pool_name, rados));
ASSERT_EQ(0, rados.ioctx_create(pool_name.c_str(), ioctx));
int seed = getpid();
cout << "seed " << seed << std::endl;
srand(seed);
{
librbd::RBD rbd;
librbd::Image image;
@ -1645,6 +1653,7 @@ TEST(LibRBD, DiffIterateDiscard)
ASSERT_EQ(0u, extents.size());
char data[256];
memset(data, 1, sizeof(data));
bl.append(data, 256);
ASSERT_EQ(256, image.write(0, 256, bl));
ASSERT_EQ(0, image.diff_iterate(NULL, 0, size,

View File

@ -58,15 +58,14 @@ FakeWriteback::~FakeWriteback()
delete m_finisher;
}
tid_t FakeWriteback::read(const object_t& oid,
const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size,
__u32 trunc_seq, Context *onfinish)
void FakeWriteback::read(const object_t& oid,
const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size,
__u32 trunc_seq, Context *onfinish)
{
C_Delay *wrapper = new C_Delay(m_cct, onfinish, m_lock, off, pbl, m_delay_ns);
m_finisher->queue(wrapper, len);
return m_tid.inc();
}
tid_t FakeWriteback::write(const object_t& oid,

View File

@ -17,10 +17,10 @@ public:
FakeWriteback(CephContext *cct, Mutex *lock, uint64_t delay_ns);
virtual ~FakeWriteback();
virtual tid_t read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish);
virtual void read(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, snapid_t snapid,
bufferlist *pbl, uint64_t trunc_size, __u32 trunc_seq,
Context *onfinish);
virtual tid_t write(const object_t& oid, const object_locator_t& oloc,
uint64_t off, uint64_t len, const SnapContext& snapc,

View File

@ -17,5 +17,5 @@ ACTION=="add" SUBSYSTEM=="block", \
ENV{DEVTYPE}=="partition", \
ENV{ID_PART_ENTRY_TYPE}=="4fbd7e29-9d25-41b8-afd0-5ec00ceff05d", \
RUN+="/sbin/cryptsetup --key-file /etc/ceph/dmcrypt-keys/$env{ID_PART_ENTRY_UUID} --key-size 256 create $env{ID_PART_ENTRY_UUID} /dev/$name", \
RUN+="bash -c 'while [ ! -e /dev/mapper/$env{ID_PART_ENTRY_UUID} ];do sleep 1; done'", \
RUN+="/bin/bash -c 'while [ ! -e /dev/mapper/$env{ID_PART_ENTRY_UUID} ];do sleep 1; done'", \
RUN+="/usr/sbin/ceph-disk-activate --mount /dev/mapper/$env{ID_PART_ENTRY_UUID}"