ceph/branches/marnberg/quota/ebofs/Onode.h
marnberg 183857fe7e Created a quota branch
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1248 29311d96-e01e-0410-9327-a35deaab8ce9
2007-03-15 23:23:52 +00:00

391 lines
9.9 KiB
C++

// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#ifndef __EBOFS_ONODE_H
#define __EBOFS_ONODE_H
#include "include/lru.h"
#include "types.h"
#include "BufferCache.h"
#include "include/interval_set.h"
/*
* object node (like an inode)
*
* holds object metadata, including
* size
* allocation (extent list)
* attributes
*
*/
class Onode : public LRUObject {
private:
int ref;
public:
object_t object_id;
version_t version; // incremented on each modify.
// data
bool readonly;
Extent onode_loc;
off_t object_size;
unsigned object_blocks;
// onode
set<coll_t> collections;
map<string, bufferptr> attr;
//vector<Extent> extents;
map<block_t, Extent> extent_map;
interval_set<block_t> uncommitted;
ObjectCache *oc;
bool dirty;
bool dangling; // not in onode_map
bool deleted; // deleted
list<Context*> commit_waiters;
public:
Onode(object_t oid) : ref(0), object_id(oid), version(0),
readonly(false),
object_size(0), object_blocks(0), oc(0),
dirty(false), dangling(false), deleted(false) {
onode_loc.length = 0;
}
~Onode() {
if (oc) delete oc;
}
block_t get_onode_id() { return onode_loc.start; }
int get_onode_len() { return onode_loc.length; }
int get_ref_count() { return ref; }
void get() {
if (ref == 0) lru_pin();
ref++;
//cout << "ebofs.onode.get " << hex << object_id << dec << " " << ref << endl;
}
void put() {
ref--;
if (ref == 0) lru_unpin();
//cout << "ebofs.onode.put " << hex << object_id << dec << " " << ref << endl;
}
void mark_dirty() {
if (!dirty) {
dirty = true;
get();
}
}
void mark_clean() {
if (dirty) {
dirty = false;
put();
}
}
bool is_dirty() { return dirty; }
bool is_deleted() { return deleted; }
bool is_dangling() { return dangling; }
bool have_oc() {
return oc != 0;
}
ObjectCache *get_oc(BufferCache *bc) {
if (!oc) {
oc = new ObjectCache(object_id, this, bc);
oc->get();
get();
}
return oc;
}
void close_oc() {
if (oc) {
//cout << "close_oc on " << object_id << endl;
assert(oc->is_empty());
if (oc->put() == 0){
//cout << "************************* hosing oc" << endl;
delete oc;
}
oc = 0;
put();
}
}
// allocation
void verify_extents() {
if (0) { // do crazy stupid sanity checking
block_t count = 0;
interval_set<block_t> is;
set<block_t> s;
cout << "verifying" << endl;
for (map<block_t,Extent>::iterator p = extent_map.begin();
p != extent_map.end();
p++) {
cout << " " << p->first << ": " << p->second << endl;
assert(count == p->first);
count += p->second.length;
for (unsigned j=0;j<p->second.length;j++) {
assert(s.count(p->second.start+j) == 0);
s.insert(p->second.start+j);
}
}
assert(s.size() == count);
assert(count == object_blocks);
}
}
void set_extent(block_t offset, Extent ex) {
//cout << "set_extent " << offset << " -> " << ex << " ... " << object_blocks << endl;
assert(offset <= object_blocks);
verify_extents();
// at the end?
if (offset == object_blocks) {
//cout << " appending " << ex << endl;
if (!extent_map.empty() && extent_map.rbegin()->second.end() == ex.start) {
//cout << "appending " << ex << " to " << extent_map.rbegin()->second << endl;
extent_map.rbegin()->second.length += ex.length;
} else
extent_map[object_blocks] = ex;
object_blocks += ex.length;
return;
}
// removing any extent bits we overwrite
if (!extent_map.empty()) {
// preceeding extent?
map<block_t,Extent>::iterator p = extent_map.lower_bound(offset);
if (p != extent_map.begin()) {
p--;
if (p->first + p->second.length > offset) {
//cout << " preceeding was " << p->second << endl;
if (p->first + p->second.length > offset+ex.length) {
// cutting chunk out of middle, add last bit
Extent &n = extent_map[offset+ex.length] = p->second;
n.start += offset+ex.length - p->first;
n.length -= offset+ex.length - p->first;
//cout << " tail frag is " << n << endl;
}
p->second.length = offset - p->first; // cut tail off preceeding extent
//cout << " preceeding now " << p->second << endl;
}
p++;
}
// overlapping extents
while (p != extent_map.end() &&
p->first < offset + ex.length) {
map<block_t,Extent>::iterator next = p;
next++;
// completely subsumed?
if (p->first + p->second.length <= offset+ex.length) {
//cout << " erasing " << p->second << endl;
extent_map.erase(p);
p = next;
continue;
}
// spans new extent, cut off head
Extent &n = extent_map[ offset+ex.length ] = p->second;
//cout << " cut head off " << p->second;
n.start += offset+ex.length - p->first;
n.length -= offset+ex.length - p->first;
extent_map.erase(p);
//cout << ", now " << n << endl;
break;
}
}
extent_map[ offset ] = ex;
// extend object?
if (offset + ex.length > object_blocks)
object_blocks = offset+ex.length;
verify_extents();
}
/* map_extents(start, len, ls)
* map teh given page range into extents on disk.
*/
int map_extents(block_t start, block_t len, vector<Extent>& ls) {
//cout << "map_extents " << start << " " << len << endl;
verify_extents();
//assert(start+len <= object_blocks);
map<block_t,Extent>::iterator p = extent_map.lower_bound(start);
if (p != extent_map.begin() &&
(p == extent_map.end() || p->first > start && p->first)) {
p--;
if (p->second.length > start - p->first) {
Extent ex;
ex.start = p->second.start + (start - p->first);
ex.length = MIN(len, p->second.length - (start - p->first));
ls.push_back(ex);
//cout << " got (tail of?) " << p->second << " : " << ex << endl;
start += ex.length;
len -= ex.length;
}
p++;
}
while (len > 0 &&
p != extent_map.end()) {
assert(p->first == start);
Extent ex = p->second;
ex.length = MIN(len, ex.length);
ls.push_back(ex);
//cout << " got (head of?) " << p->second << " : " << ex << endl;
start += ex.length;
len -= ex.length;
p++;
}
return 0;
}
int truncate_extents(block_t len, vector<Extent>& extra) {
verify_extents();
map<block_t,Extent>::iterator p = extent_map.lower_bound(len);
if (p != extent_map.begin() &&
(p == extent_map.end() || p->first > len && p->first)) {
p--;
if (p->second.length > len - p->first) {
Extent ex;
ex.start = p->second.start + (len - p->first);
ex.length = p->second.length - (len - p->first);
extra.push_back(ex);
p->second.length = len - p->first;
assert(p->second.length > 0);
//cout << " got (tail of?) " << p->second << " : " << ex << endl;
}
p++;
}
while (p != extent_map.end()) {
assert(p->first >= len);
extra.push_back(p->second);
map<block_t,Extent>::iterator n = p;
n++;
extent_map.erase(p);
p = n;
}
object_blocks = len;
verify_extents();
return 0;
}
int truncate_front_extents(block_t len, vector<Extent>& extra) {
verify_extents();
while (len > 0) {
Extent& ex = extent_map.begin()->second; // look, this is a reference!
if (ex.length > len) {
// partial first extent
Extent frontbit( ex.start, len );
extra.push_back(frontbit);
ex.length -= len;
ex.start += len;
break;
}
// pull off entire first extent.
assert(ex.length <= len);
len -= ex.length;
extra.push_back(ex);
extent_map.erase(extent_map.begin());
}
object_blocks -= len;
verify_extents();
return 0;
}
/* map_alloc_regions(start, len, map)
* map range into regions that need to be (re)allocated on disk
* because they overlap "safe" (or unallocated) parts of the object
*/
/*
void map_alloc_regions(block_t start, block_t len,
interval_set<block_t>& alloc) {
interval_set<block_t> already_uncom;
alloc.insert(start, len); // start with whole range
already_uncom.intersection_of(alloc, uncommitted);
alloc.subtract(already_uncom); // take out the bits that aren't yet committed
}
*/
// pack/unpack
int get_collection_bytes() {
return sizeof(coll_t) * collections.size();
}
int get_attr_bytes() {
int s = 0;
for (map<string, bufferptr>::iterator i = attr.begin();
i != attr.end();
i++) {
s += i->first.length() + 1;
s += i->second.length() + sizeof(int);
}
return s;
}
int get_extent_bytes() {
return sizeof(Extent) * extent_map.size();
}
};
inline ostream& operator<<(ostream& out, Onode& on)
{
out << "onode(" << hex << on.object_id << dec << " len=" << on.object_size;
out << " ref=" << on.get_ref_count();
if (on.is_dirty()) out << " dirty";
if (on.is_dangling()) out << " dangling";
if (on.is_deleted()) out << " deleted";
out << " uncom=" << on.uncommitted;
// out << " " << &on;
out << ")";
return out;
}
#endif