couple fixes. prelim stuff for snapshots/cloning.

git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@851 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
sage 2006-09-13 20:58:47 +00:00
parent 1693db33cc
commit c8e6cf0c86
7 changed files with 119 additions and 84 deletions

View File

@ -237,6 +237,50 @@ int Allocator::release_limbo()
int Allocator::_alloc_inc(Extent& ex)
{
Table<block_t,pair<block_t,int> >::Cursor cursor(fs->alloc_tab);
if (fs->alloc_tab->find( ex.start, cursor )
== Table<block_t,pair<block_t,int> >::Cursor::MATCH) {
assert(cursor.current().value.first == ex.length);
pair<block_t,int>& v = cursor.dirty_current_value();
v.second++;
dout(10) << "_alloc_inc " << ex << " "
<< (v.second-1) << " -> " << v.second
<< endl;
} else {
// insert it, @1
fs->alloc_tab->insert(ex.start, pair<block_t,int>(ex.length,1));
dout(10) << "_alloc_inc " << ex << " 0 -> 1" << endl;
}
return 0;
}
int Allocator::_alloc_dec(Extent& ex)
{
Table<block_t,pair<block_t,int> >::Cursor cursor(fs->alloc_tab);
if (fs->alloc_tab->find( ex.start, cursor )
== Table<block_t,pair<block_t,int> >::Cursor::MATCH) {
assert(cursor.current().value.first == ex.length);
if (cursor.current().value.second == 1) {
dout(10) << "_alloc_dec " << ex << " 1 -> 0" << endl;
fs->alloc_tab->remove( cursor.current().key );
} else {
pair<block_t,int>& v = cursor.dirty_current_value();
--v.second;
dout(10) << "_alloc_dec " << ex << " "
<< (v.second+1) << " -> " << v.second
<< endl;
}
} else {
assert(0);
}
return 0;
}
/*
* release extent into freelist
* WARNING: *ONLY* use this if you _know_ there are no adjacent free extents

View File

@ -56,6 +56,9 @@ protected:
int _release_loner(Extent& ex); // release loner extent
int _release_merge(Extent& ex); // release any extent (searches for adjacent)
int _alloc_inc(Extent& ex);
int _alloc_dec(Extent& ex);
public:
Allocator(Ebofs *f) : fs(f), last_pos(0) {}

View File

@ -24,88 +24,6 @@
#define dout(x) if (x <= g_conf.debug_ebofs) cout << "ebofs.bh."
/*
void BufferHead::finish_partials()
{
dout(10) << "finish_partials on " << *this << endl;
block_t cur_block = 0;
// submit partial writes
for (map<block_t, PartialWrite>::iterator p = partial_write.begin();
p != partial_write.end();
p++) {
dout(10) << "finish_partials submitting queued write to " << p->second.block << endl;
// copy raw buffer; this may be a past write
bufferlist bl;
bl.push_back( oc->bc->bufferpool.alloc(EBOFS_BLOCK_SIZE) );
bl.copy_in(0, EBOFS_BLOCK_SIZE, data);
apply_partial( bl, p->second.partial );
if (tx_ioh && tx_block == p->first) {
assert(is_tx());
oc->bc->bh_cancel_write(this);
}
vector<Extent> exv;
oc->on->map_extents(object_loc.start, 1, exv);
assert(exv.size() == 1);
if (exv[0].start == p->first) {
// current block! make like a bh_write.
assert(cur_block == 0);
cur_block = p->first;
dout(10) << "finish_partials same block, doing a bh_write on " << p->first << " on " << *this << endl;
} else {
// past epoch. just write.
dout(10) << "finish_partials different block, writing to " << p->first << " on " << *this << endl;
oc->bc->dev.write( p->second.block, 1, bl,
new C_OC_PartialTxFinish( oc->bc, p->second.epoch ),
"finish_partials");
//oc->get(); // don't need OC for completion func!
}
}
partial_write.clear();
apply_partial();
if (cur_block) {
// same as epoch_modified, so do a normal bh_write.
// assert: this should match the current onode's block
oc->bc->mark_dirty(this);
if (tx_ioh)
oc->bc->bh_cancel_write(this);
oc->bc->bh_write(oc->on, this, cur_block);
oc->bc->dec_unflushed(epoch_modified); // undo the queued partial inc. (bh_write just inced it)
} else
oc->bc->mark_clean(this);
}
void BufferHead::cancel_partials()
{
dout(10) << "cancel_partials on " << *this << endl;
for (map<block_t, PartialWrite>::iterator p = partial_write.begin();
p != partial_write.end();
p++) {
oc->bc->dec_unflushed( p->second.epoch );
}
}
void BufferHead::queue_partial_write(block_t b)
{
if (oc->bc->partial_write[bh->start()].count(b)) {
// overwrite previous partial write
// note that it better be same epoch if it's the same block!!
assert( bc.partial_write[bh->start()].[b].epoch == epoch_modified );
} else {
oc->bc->inc_unflushed( epoch_modified );
}
oc->bc->partial_write[bh->start()].[ b ].partial = partial;
oc->bc->partial_write[bh->start()].[ b ].epoch = epoch_modified;
}
*/
@ -826,7 +744,7 @@ void BufferCache::rx_finish(ObjectCache *oc,
if (sp->first >= start+length) break;
assert(sp->first >= start);
block_t pstart;
block_t pstart = sp->first;
map<block_t, PartialWrite> writes;
writes.swap( sp->second );

View File

@ -85,6 +85,7 @@ int Ebofs::mount()
for (int i=0; i<EBOFS_NUM_FREE_BUCKETS; i++)
free_tab[i] = new Table<block_t, block_t>( nodepool, sb->free_tab[i] );
limbo_tab = new Table<block_t, block_t>( nodepool, sb->limbo_tab );
alloc_tab = new Table<block_t, pair<block_t,int> >( nodepool, sb->alloc_tab );
collection_tab = new Table<coll_t, Extent>( nodepool, sb->collection_tab );
co_tab = new Table<idpair_t, bool>( nodepool, sb->co_tab );
@ -148,6 +149,7 @@ int Ebofs::mkfs()
for (int i=0; i<EBOFS_NUM_FREE_BUCKETS; i++)
free_tab[i] = new Table<block_t,block_t>( nodepool, empty );
limbo_tab = new Table<block_t,block_t>( nodepool, empty );
alloc_tab = new Table<block_t,pair<block_t,int> >( nodepool, empty );
co_tab = new Table<idpair_t, bool>( nodepool, empty );
@ -193,6 +195,7 @@ void Ebofs::close_tables()
for (int i=0; i<EBOFS_NUM_FREE_BUCKETS; i++)
delete free_tab[i];
delete limbo_tab;
delete alloc_tab;
delete collection_tab;
delete co_tab;
@ -276,6 +279,10 @@ void Ebofs::prepare_super(version_t epoch, bufferptr& bp)
sb.limbo_tab.root = limbo_tab->get_root();
sb.limbo_tab.depth = limbo_tab->get_depth();
sb.alloc_tab.num_keys = alloc_tab->get_num_keys();
sb.alloc_tab.root = alloc_tab->get_root();
sb.alloc_tab.depth = alloc_tab->get_depth();
sb.collection_tab.num_keys = collection_tab->get_num_keys();
sb.collection_tab.root = collection_tab->get_root();
sb.collection_tab.depth = collection_tab->get_depth();
@ -2188,6 +2195,57 @@ int Ebofs::truncate(object_t oid, off_t size, Context *onsafe)
int Ebofs::clone(object_t from, object_t to, Context *onsafe)
{
ebofs_lock.Lock();
int r = _clone(from, to);
// set up commit waiter
if (r >= 0) {
if (onsafe) commit_waiters[super_epoch].push_back(onsafe);
} else {
if (onsafe) delete onsafe;
}
ebofs_lock.Unlock();
return r;
}
int Ebofs::_clone(object_t from, object_t to)
{
Onode *fon = get_onode(from);
if (!fon) return -ENOENT;
Onode *ton = get_onode(to);
if (ton) {
put_onode(fon);
return -EEXIST;
}
ton = new_onode(to);
assert(ton);
// copy easy bits
ton->object_size = fon->object_size;
ton->object_blocks = fon->object_blocks;
ton->attr = fon->attr;
// collections
for (set<coll_t>::iterator p = fon->collections.begin();
p != fon->collections.end();
p++)
_collection_add(*p, to);
// extents
ton->extent_map = fon->extent_map;
//FIXME inc ref count
return 0;
}
bool Ebofs::exists(object_t oid)
{
ebofs_lock.Lock();

View File

@ -104,6 +104,7 @@ class Ebofs : public ObjectStore {
Table<object_t, Extent> *object_tab;
Table<block_t,block_t> *free_tab[EBOFS_NUM_FREE_BUCKETS];
Table<block_t,block_t> *limbo_tab;
Table<block_t,pair<block_t,int> > *alloc_tab;
// collections
Table<coll_t, Extent> *collection_tab;
@ -249,6 +250,10 @@ class Ebofs : public ObjectStore {
int remove(object_t oid, Context *onsafe=0);
bool write_will_block();
int rename(object_t from, object_t to);
int clone(object_t from, object_t to, Context *onsafe);
// object attr
int setattr(object_t oid, const char *name, const void *value, size_t size, Context *onsafe=0);
int setattrs(object_t oid, map<string,bufferptr>& attrset, Context *onsafe=0);
@ -293,6 +298,7 @@ private:
int _truncate(object_t oid, off_t size);
int _truncate_front(object_t oid, off_t size);
int _remove(object_t oid);
int _clone(object_t from, object_t to);
int _setattr(object_t oid, const char *name, const void *value, size_t size);
int _setattrs(object_t oid, map<string,bufferptr>& attrset);
int _rmattr(object_t oid, const char *name);

View File

@ -186,10 +186,15 @@ class Table {
public:
LeafItem& current() {
const LeafItem& current() {
assert(open[level].is_leaf());
return open[level].leaf_item(pos[level]);
}
V& dirty_current_value() {
assert(open[level].is_leaf());
dirty();
return open[level].leaf_item(pos[level]).value;
}
// ** read-only bits **
int move_left() {

View File

@ -166,6 +166,7 @@ struct ebofs_super {
// tables
struct ebofs_table free_tab[EBOFS_NUM_FREE_BUCKETS];
struct ebofs_table limbo_tab;
struct ebofs_table alloc_tab;
struct ebofs_table object_tab; // object directory
struct ebofs_table collection_tab; // collection directory
struct ebofs_table co_tab;