mirror of
https://github.com/ceph/ceph
synced 2025-03-06 08:20:12 +00:00
merged trunk changes r1107:1121 into branches/riccardo/monitor1
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1122 29311d96-e01e-0410-9327-a35deaab8ce9
This commit is contained in:
parent
432611ef46
commit
3968ca40f0
@ -29,6 +29,11 @@ endif
|
||||
CC = g++
|
||||
LIBS = -lpthread
|
||||
|
||||
ifeq ($(want_bdb),yes)
|
||||
CFLAGS += -DUSE_OSBDB
|
||||
OSBDB_LIBS = -ldb_cxx
|
||||
endif
|
||||
|
||||
#for normal mpich2 machines
|
||||
MPICC = mpicxx
|
||||
MPICFLAGS = ${CFLAGS}
|
||||
@ -99,7 +104,14 @@ CLIENT_OBJS= \
|
||||
client/SyntheticClient.o\
|
||||
client/Trace.o
|
||||
|
||||
TARGETS = cmon cosd cmds cfuse csyn newsyn fakesyn
|
||||
ifeq ($(want_bdb),yes)
|
||||
OSBDB_OBJS = \
|
||||
osbdb/OSBDB.o
|
||||
|
||||
OSBDB_OBJ = osbdb.o
|
||||
endif
|
||||
|
||||
TARGETS = cmon cosd cmds cfuse csyn newsyn fakesyn mkmonmap
|
||||
|
||||
SRCS=*.cc */*.cc *.h */*.h */*/*.h
|
||||
|
||||
@ -117,8 +129,8 @@ mkmonmap: mkmonmap.cc common.o
|
||||
cmon: cmon.cc mon.o msg/SimpleMessenger.o common.o
|
||||
${CC} ${CFLAGS} ${LIBS} $^ -o $@
|
||||
|
||||
cosd: cosd.cc osd.o ebofs.o msg/SimpleMessenger.o common.o
|
||||
${CC} ${CFLAGS} ${LIBS} $^ -o $@
|
||||
cosd: cosd.cc osd.o ebofs.o ${OSBDB_OBJ} msg/SimpleMessenger.o common.o
|
||||
${CC} ${CFLAGS} ${LIBS} ${OSBDB_LIBS} $^ -o $@
|
||||
|
||||
cmds: cmds.cc mds.o osdc.o msg/SimpleMessenger.o common.o
|
||||
${CC} ${CFLAGS} ${LIBS} $^ -o $@
|
||||
@ -136,19 +148,19 @@ gprof-helper.so: test/gprof-helper.c
|
||||
|
||||
|
||||
# fake*
|
||||
fakefuse: fakefuse.cc mon.o mds.o client.o osd.o osdc.o ebofs.o client/fuse.o msg/FakeMessenger.o common.o
|
||||
${CC} -pg ${CFLAGS} ${LIBS} -lfuse $^ -o $@
|
||||
fakefuse: fakefuse.cc mon.o mds.o client.o osd.o osdc.o ebofs.o ${OSBDB_OBJ} client/fuse.o msg/FakeMessenger.o common.o
|
||||
${CC} -pg ${CFLAGS} ${LIBS} ${OSBDB_LIBS} -lfuse $^ -o $@
|
||||
|
||||
fakesyn: fakesyn.cc mon.o mds.o client.o osd.o ebofs.o osdc.o msg/FakeMessenger.o common.o
|
||||
${CC} -pg ${CFLAGS} ${LIBS} $^ -o $@
|
||||
fakesyn: fakesyn.cc mon.o mds.o client.o osd.o ebofs.o ${OSBDB_OBJ} osdc.o msg/FakeMessenger.o common.o
|
||||
${CC} -pg ${CFLAGS} ${LIBS} ${OSBDB_LIBS} $^ -o $@
|
||||
|
||||
|
||||
# mpi startup
|
||||
newsyn: newsyn.cc mon.o mds.o client.o osd.o ebofs.o osdc.o msg/SimpleMessenger.o common.o
|
||||
${MPICC} -pg ${MPICFLAGS} ${MPILIBS} $^ -o $@
|
||||
newsyn: newsyn.cc mon.o mds.o client.o osd.o ebofs.o ${OSBDB_OBJ} osdc.o msg/SimpleMessenger.o common.o
|
||||
${MPICC} -pg ${MPICFLAGS} ${MPILIBS} ${OSBDB_LIBS} $^ -o $@
|
||||
|
||||
newsyn.nopg: newsyn.cc mon.o mds.o client.o osd.o ebofs.o osdc.o msg/SimpleMessenger.o common.o
|
||||
${MPICC} ${MPICFLAGS} ${MPILIBS} $^ -o $@
|
||||
newsyn.nopg: newsyn.cc mon.o mds.o client.o osd.o ebofs.o ${OSBDB_OBJ} osdc.o msg/SimpleMessenger.o common.o
|
||||
${MPICC} ${MPICFLAGS} ${MPILIBS} ${OSBDB_LIBS} $^ -o $@
|
||||
|
||||
|
||||
# ebofs
|
||||
@ -184,6 +196,11 @@ mdtest: bench/mdtest/mdtest.o
|
||||
mdtest.ceph: bench/mdtest/mdtest.o libceph.o
|
||||
${MPICC} ${MPICFLAGS} ${MPILIBS} $^ -o $@
|
||||
|
||||
# OSD test
|
||||
|
||||
testos: test/testos.o ebofs.o osbdb.o common.o
|
||||
${CC} ${CFLAGS} ${LIBS} ${OSBDB_LIBS} -o $@ $^
|
||||
|
||||
#
|
||||
|
||||
%.so: %.cc
|
||||
@ -213,6 +230,9 @@ mds.o: ${MDS_OBJS}
|
||||
mon.o: ${MON_OBJS}
|
||||
${LDINC} $@ $^
|
||||
|
||||
osbdb.o: ${OSBDB_OBJS}
|
||||
${LDINC} $@ $^
|
||||
|
||||
%.o: %.cc
|
||||
${CC} ${CFLAGS} -c $< -o $@
|
||||
|
||||
|
@ -949,6 +949,7 @@ void Client::release_caps(Inode *in,
|
||||
dout(5) << "releasing caps on ino " << in->inode.ino << dec
|
||||
<< " had " << cap_string(in->file_caps())
|
||||
<< " retaining " << cap_string(retain)
|
||||
<< " want " << cap_string(in->file_caps_wanted())
|
||||
<< endl;
|
||||
|
||||
for (map<int,InodeCap>::iterator it = in->caps.begin();
|
||||
@ -2119,13 +2120,15 @@ int Client::open(const char *relpath, int flags)
|
||||
void Client::close_release(Inode *in)
|
||||
{
|
||||
dout(10) << "close_release on " << in->ino() << endl;
|
||||
dout(10) << " wr " << in->num_open_wr << " rd " << in->num_open_rd
|
||||
<< " dirty " << in->fc.is_dirty() << " cached " << in->fc.is_cached() << endl;
|
||||
|
||||
if (!in->num_open_rd)
|
||||
in->fc.release_clean();
|
||||
|
||||
int retain = 0;
|
||||
if (in->num_open_wr || in->fc.is_dirty()) retain |= CAP_FILE_WR | CAP_FILE_WRBUFFER;
|
||||
if (in->num_open_rd || in->fc.is_cached()) retain |= CAP_FILE_WR | CAP_FILE_WRBUFFER;
|
||||
if (in->num_open_wr || in->fc.is_dirty()) retain |= CAP_FILE_WR | CAP_FILE_WRBUFFER | CAP_FILE_WREXTEND;
|
||||
if (in->num_open_rd || in->fc.is_cached()) retain |= CAP_FILE_RD | CAP_FILE_RDCACHE;
|
||||
|
||||
release_caps(in, retain); // release caps now.
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ void FileCache::check_caps()
|
||||
// check callbacks
|
||||
map<int, list<Context*> >::iterator p = caps_callbacks.begin();
|
||||
while (p != caps_callbacks.end()) {
|
||||
if (used == 0 || (~(p->first) & used)) {
|
||||
if (used == 0 || (~(p->first) & used) == 0) {
|
||||
// implemented.
|
||||
dout(10) << "used is " << cap_string(used)
|
||||
<< ", caps " << cap_string(p->first) << " implemented, doing callback(s)" << endl;
|
||||
|
@ -299,6 +299,17 @@ md_config_t g_conf = {
|
||||
fakeclient_op_truncate: false,
|
||||
fakeclient_op_fsync: false,
|
||||
fakeclient_op_close: 200
|
||||
|
||||
#ifdef USE_OSBDB
|
||||
,
|
||||
bdbstore: false,
|
||||
debug_bdbstore: 1,
|
||||
bdbstore_btree: false,
|
||||
bdbstore_ffactor: 0,
|
||||
bdbstore_nelem: 0,
|
||||
bdbstore_pagesize: 0,
|
||||
bdbstore_cachesize: 0
|
||||
#endif // USE_OSBDB
|
||||
};
|
||||
|
||||
|
||||
@ -772,6 +783,28 @@ void parse_config_options(std::vector<char*>& args)
|
||||
g_conf.mds_log = false;
|
||||
}
|
||||
|
||||
#ifdef USE_OSBDB
|
||||
else if (strcmp(args[i], "--bdbstore") == 0) {
|
||||
g_conf.bdbstore = true;
|
||||
g_conf.ebofs = 0;
|
||||
}
|
||||
else if (strcmp(args[i], "--bdbstore-btree") == 0) {
|
||||
g_conf.bdbstore_btree = true;
|
||||
}
|
||||
else if (strcmp(args[i], "--bdbstore-hash-ffactor") == 0) {
|
||||
g_conf.bdbstore_ffactor = atoi(args[++i]);
|
||||
}
|
||||
else if (strcmp(args[i], "--bdbstore-hash-nelem") == 0) {
|
||||
g_conf.bdbstore_nelem = atoi(args[++i]);
|
||||
}
|
||||
else if (strcmp(args[i], "--bdbstore-hash-pagesize") == 0) {
|
||||
g_conf.bdbstore_pagesize = atoi(args[++i]);
|
||||
}
|
||||
else if (strcmp(args[i], "--bdbstore-cachesize") == 0) {
|
||||
g_conf.bdbstore_cachesize = atoi(args[++i]);
|
||||
}
|
||||
#endif // USE_OSBDB
|
||||
|
||||
else {
|
||||
nargs.push_back(args[i]);
|
||||
}
|
||||
|
@ -289,6 +289,15 @@ struct md_config_t {
|
||||
int fakeclient_op_fsync;
|
||||
int fakeclient_op_close;
|
||||
|
||||
#ifdef USE_OSBDB
|
||||
bool bdbstore;
|
||||
int debug_bdbstore;
|
||||
bool bdbstore_btree;
|
||||
int bdbstore_ffactor;
|
||||
int bdbstore_nelem;
|
||||
int bdbstore_pagesize;
|
||||
int bdbstore_cachesize;
|
||||
#endif // USE_OSBDB
|
||||
};
|
||||
|
||||
extern md_config_t g_conf;
|
||||
|
@ -16,7 +16,13 @@
|
||||
#include "Ebofs.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#ifndef DARWIN
|
||||
#include <sys/vfs.h>
|
||||
#else
|
||||
#include <sys/param.h>
|
||||
#include <sys/mount.h>
|
||||
#endif // DARWIN
|
||||
|
||||
// *******************
|
||||
|
||||
@ -1278,7 +1284,9 @@ int Ebofs::statfs(struct statfs *buf)
|
||||
buf->f_files = nodepool.num_total(); /* total file nodes in file system */
|
||||
buf->f_ffree = nodepool.num_free(); /* free file nodes in fs */
|
||||
//buf->f_fsid = 0; /* file system id */
|
||||
#ifndef DARWIN
|
||||
buf->f_namelen = 8; /* maximum length of filenames */
|
||||
#endif // DARWIN
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
1395
branches/riccardo/monitor2/osbdb/OSBDB.cc
Normal file
1395
branches/riccardo/monitor2/osbdb/OSBDB.cc
Normal file
File diff suppressed because it is too large
Load Diff
507
branches/riccardo/monitor2/osbdb/OSBDB.h
Normal file
507
branches/riccardo/monitor2/osbdb/OSBDB.h
Normal file
@ -0,0 +1,507 @@
|
||||
/* OSBDB.h -- ObjectStore on Berkeley DB. -*- c++ -*-
|
||||
Copyright (C) 2007 Casey Marshall <csm@soe.ucsc.edu>
|
||||
|
||||
Ceph - scalable distributed file system
|
||||
|
||||
This is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License version 2.1, as published by the Free Software
|
||||
Foundation. See file COPYING. */
|
||||
|
||||
|
||||
#include <db_cxx.h>
|
||||
#include "osd/ObjectStore.h"
|
||||
|
||||
// Redefine this to use a different BDB access type. DB_BTREE is
|
||||
// probably the only other one that makes sense.
|
||||
#ifndef OSBDB_DB_TYPE
|
||||
#define OSBDB_DB_TYPE DB_HASH
|
||||
#endif // OSBDB_DB_TYPE
|
||||
|
||||
/*
|
||||
* Maximum length of an attribute name.
|
||||
*/
|
||||
#define OSBDB_MAX_ATTR_LEN 256
|
||||
|
||||
#define OSBDB_THIS_VERSION 1
|
||||
|
||||
#define OSBDB_SUPERBLOCK_KEY ((void *) "s")
|
||||
|
||||
/*
|
||||
* The "superblock" of the BDB object store. We store one of these in
|
||||
* the DB, to store version and other information. We don't record
|
||||
* anything special here, just the version number the database was
|
||||
* written with.
|
||||
*
|
||||
* In principle, this structure is variable-length, depending on the
|
||||
* software version writing the superblock.
|
||||
*/
|
||||
struct stored_superblock
|
||||
{
|
||||
uint32_t version;
|
||||
};
|
||||
|
||||
inline ostream& operator<<(ostream& out, const stored_superblock sb)
|
||||
{
|
||||
out << "osbdb.super(" << sb.version << ")" << endl;
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* An object identifier; we define this so we can have a POD object to
|
||||
* work with.
|
||||
*/
|
||||
struct oid_t // POD
|
||||
{
|
||||
char id[16];
|
||||
};
|
||||
|
||||
inline void mkoid (oid_t& id, object_t& oid)
|
||||
{
|
||||
// XXX byte order?
|
||||
memcpy (id.id, &oid, sizeof (oid_t));
|
||||
}
|
||||
|
||||
inline ostream& operator<<(ostream& out, const oid_t id)
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
out.fill('0');
|
||||
out << setw(2) << hex << (id.id[i] & 0xFF);
|
||||
if ((i & 3) == 3)
|
||||
out << ':';
|
||||
}
|
||||
out.unsetf(ios::right);
|
||||
out << dec;
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* An "inode" key. We map a 'stored_object' struct to this key for
|
||||
* every object.
|
||||
*/
|
||||
struct object_inode_key // POD
|
||||
{
|
||||
oid_t oid;
|
||||
char tag;
|
||||
};
|
||||
|
||||
/**
|
||||
* "Constructor" for an object_inode_key.
|
||||
*/
|
||||
inline object_inode_key new_object_inode_key (object_t& oid)
|
||||
{
|
||||
object_inode_key key;
|
||||
memset(&key, 0, sizeof (object_inode_key));
|
||||
mkoid (key.oid, oid);
|
||||
key.tag = 'i';
|
||||
return key;
|
||||
}
|
||||
|
||||
/*
|
||||
* We use this, instead of sizeof(), to try and guarantee that we
|
||||
* don't include the structure padding, if any.
|
||||
*
|
||||
* This *should* return 17: sizeof (oid_t) == 16; sizeof (char) == 1.
|
||||
*/
|
||||
inline size_t sizeof_object_inode_key()
|
||||
{
|
||||
return offsetof(object_inode_key, tag) + sizeof (char);
|
||||
}
|
||||
|
||||
// Frank Poole: Unfortunately, that sounds a little
|
||||
// like famous last words.
|
||||
// -- 2001: A Space Odyssey
|
||||
|
||||
inline ostream& operator<<(ostream& out, const object_inode_key o)
|
||||
{
|
||||
out << o.tag << "/" << o.oid;
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* A stored object. This is essentially the "inode" of the object,
|
||||
* containing things like the object's length. The object itself is
|
||||
* stored as-is, mapped by the 128-bit object ID.
|
||||
*/
|
||||
struct stored_object
|
||||
{
|
||||
uint32_t length;
|
||||
};
|
||||
|
||||
inline ostream& operator<<(ostream& out, const stored_object s)
|
||||
{
|
||||
out << "inode(l:" << s.length << ")";
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Key referencing the list of attribute names for an object. This is
|
||||
* simply the object's ID, with an additional character 'a' appended.
|
||||
*/
|
||||
struct attrs_id // POD
|
||||
{
|
||||
oid_t oid;
|
||||
char tag;
|
||||
};
|
||||
|
||||
/*
|
||||
* "Construtor" for attrs_id.
|
||||
*/
|
||||
inline struct attrs_id new_attrs_id (object_t& oid)
|
||||
{
|
||||
attrs_id aid;
|
||||
memset (&aid, 0, sizeof (attrs_id));
|
||||
mkoid(aid.oid, oid);
|
||||
aid.tag = 'a';
|
||||
return aid;
|
||||
}
|
||||
|
||||
/*
|
||||
* See explanation for sizeof_object_inode_id.
|
||||
*/
|
||||
inline size_t sizeof_attrs_id()
|
||||
{
|
||||
return offsetof(struct attrs_id, tag) + sizeof (char);
|
||||
}
|
||||
|
||||
inline ostream& operator<<(ostream& out, const attrs_id id)
|
||||
{
|
||||
out << id.tag << "/" << id.oid;
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Encapsulation of a single attribute name.
|
||||
*/
|
||||
struct attr_name // POD
|
||||
{
|
||||
char name[OSBDB_MAX_ATTR_LEN];
|
||||
};
|
||||
|
||||
inline ostream& operator<<(ostream& out, const attr_name n)
|
||||
{
|
||||
out << n.name;
|
||||
return out;
|
||||
}
|
||||
|
||||
inline bool operator<(const attr_name n1, const attr_name n2)
|
||||
{
|
||||
return (strncmp (n1.name, n2.name, OSBDB_MAX_ATTR_LEN) < 0);
|
||||
}
|
||||
|
||||
inline bool operator>(const attr_name n1, const attr_name n2)
|
||||
{
|
||||
return (strncmp (n1.name, n2.name, OSBDB_MAX_ATTR_LEN) > 0);
|
||||
}
|
||||
|
||||
inline bool operator==(const attr_name n1, const attr_name n2)
|
||||
{
|
||||
std::cerr << n1.name << " == " << n2.name << "?" << endl;
|
||||
return (strncmp (n1.name, n2.name, OSBDB_MAX_ATTR_LEN) == 0);
|
||||
}
|
||||
|
||||
inline bool operator!=(const attr_name n1, const attr_name n2)
|
||||
{
|
||||
return !(n1 == n2);
|
||||
}
|
||||
|
||||
inline bool operator>=(const attr_name n1, const attr_name n2)
|
||||
{
|
||||
return !(n1 < n2);
|
||||
}
|
||||
|
||||
inline bool operator<=(const attr_name n1, const attr_name n2)
|
||||
{
|
||||
return !(n1 > n2);
|
||||
}
|
||||
|
||||
/*
|
||||
* A list of an object or collection's attribute names.
|
||||
*/
|
||||
struct stored_attrs
|
||||
{
|
||||
uint32_t count;
|
||||
attr_name names[0]; // actually variable-length
|
||||
};
|
||||
|
||||
inline ostream& operator<<(ostream& out, const stored_attrs *sa)
|
||||
{
|
||||
out << sa->count << " [ ";
|
||||
for (unsigned i = 0; i < sa->count; i++)
|
||||
out << sa->names[i] << (i == sa->count - 1 ? " " : ", ");
|
||||
out << "]";
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* An object attribute key. An object attribute is mapped simply by
|
||||
* the object ID appended with the attribute name. Attribute names
|
||||
* may not be empty, and must be less than 256 characters, in this
|
||||
* implementation.
|
||||
*/
|
||||
struct attr_id // POD
|
||||
{
|
||||
oid_t oid;
|
||||
attr_name name;
|
||||
};
|
||||
|
||||
inline attr_id new_attr_id (object_t& oid, const char *name)
|
||||
{
|
||||
attr_id aid;
|
||||
memset(&aid, 0, sizeof (attr_id));
|
||||
mkoid (aid.oid, oid);
|
||||
strncpy (aid.name.name, name, OSBDB_MAX_ATTR_LEN);
|
||||
return aid;
|
||||
}
|
||||
|
||||
inline ostream& operator<<(ostream &out, const attr_id id)
|
||||
{
|
||||
out << id.oid << ":" << id.name;
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* A key for a collection attributes list.
|
||||
*/
|
||||
struct coll_attrs_id // POD
|
||||
{
|
||||
coll_t cid;
|
||||
char tag;
|
||||
};
|
||||
|
||||
inline coll_attrs_id new_coll_attrs_id (coll_t cid)
|
||||
{
|
||||
coll_attrs_id catts;
|
||||
memset(&catts, 0, sizeof (coll_attrs_id));
|
||||
catts.cid = cid;
|
||||
catts.tag = 'C';
|
||||
return catts;
|
||||
}
|
||||
|
||||
inline size_t sizeof_coll_attrs_id()
|
||||
{
|
||||
return offsetof(coll_attrs_id, tag) + sizeof (char);
|
||||
}
|
||||
|
||||
inline ostream& operator<<(ostream& out, coll_attrs_id id)
|
||||
{
|
||||
out << id.tag << "/" << id.cid;
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* A collection attribute key. Similar to
|
||||
*/
|
||||
struct coll_attr_id // POD
|
||||
{
|
||||
coll_t cid;
|
||||
attr_name name;
|
||||
};
|
||||
|
||||
inline coll_attr_id new_coll_attr_id (coll_t cid, const char *name)
|
||||
{
|
||||
coll_attr_id catt;
|
||||
memset(&catt, 0, sizeof (coll_attr_id));
|
||||
catt.cid = cid;
|
||||
strncpy (catt.name.name, name, OSBDB_MAX_ATTR_LEN);
|
||||
return catt;
|
||||
}
|
||||
|
||||
inline ostream& operator<<(ostream& out, coll_attr_id id)
|
||||
{
|
||||
out << id.cid << ":" << id.name;
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the key we store the master collections list under.
|
||||
*/
|
||||
#define COLLECTIONS_KEY ((void *) "c")
|
||||
|
||||
/*
|
||||
* The master list of collections. There should be one of these per
|
||||
* OSD. The sole reason for this structure is to have the ability
|
||||
* to enumerate all collections stored on this OSD.
|
||||
*/
|
||||
struct stored_colls
|
||||
{
|
||||
// The number of collections.
|
||||
uint32_t count;
|
||||
|
||||
// The collection identifiers. This is a sorted list of coll_t
|
||||
// values.
|
||||
coll_t colls[0]; // actually variable-length
|
||||
};
|
||||
|
||||
inline ostream& operator<<(ostream& out, stored_colls *c)
|
||||
{
|
||||
out << c->count << " [ ";
|
||||
for (unsigned i = 0; i < c->count; i++)
|
||||
{
|
||||
out << hex << c->colls[i];
|
||||
if (i < c->count - 1)
|
||||
out << ", ";
|
||||
}
|
||||
out << " ]" << dec;
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* A stored collection (a bag of object IDs). These are referenced by
|
||||
* the bare collection identifier type, a coll_t (thus, a 32-bit
|
||||
* integer). Internally this is stored as a sorted list of object IDs.
|
||||
*
|
||||
* Note, this structure places all collection items in a single
|
||||
* record; this may be a memory burden for large collections.
|
||||
*/
|
||||
struct stored_coll
|
||||
{
|
||||
// The size of this collection.
|
||||
uint32_t count;
|
||||
|
||||
// The object IDs in this collection. This is a sorted list of all
|
||||
// object ID's in this collection.
|
||||
object_t objects[0]; // actually variable-length
|
||||
};
|
||||
|
||||
inline ostream& operator<<(ostream& out, stored_coll *c)
|
||||
{
|
||||
out << c->count << " [ ";
|
||||
for (unsigned i = 0; i < c->count; i++)
|
||||
{
|
||||
out << c->objects[i];
|
||||
if (i < c->count - 1)
|
||||
out << ", ";
|
||||
}
|
||||
out << " ]";
|
||||
return out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The object store interface for Berkeley DB.
|
||||
*/
|
||||
class OSBDB : public ObjectStore
|
||||
{
|
||||
private:
|
||||
DbEnv *env;
|
||||
Db *db;
|
||||
string device;
|
||||
bool mounted;
|
||||
bool opened;
|
||||
|
||||
public:
|
||||
|
||||
OSBDB(const char *dev)
|
||||
: env(0), db (0), device (dev), mounted(false), opened(false)
|
||||
{
|
||||
/*env = new DbEnv (DB_CXX_NO_EXCEPTIONS);
|
||||
env->set_error_stream (&std::cerr);
|
||||
// WTF? You can't open an env if you set this flag here, but BDB
|
||||
// says you also can't set it after you open the env.
|
||||
//env->set_flags (DB_LOG_INMEMORY, 1);
|
||||
char *p = strrchr (dev, '/');
|
||||
int env_flags = (DB_CREATE | DB_THREAD | DB_INIT_LOCK
|
||||
| DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOG);
|
||||
if (p != NULL)
|
||||
{
|
||||
*p = '\0';
|
||||
if (env->open (dev, env_flags, 0) != 0)
|
||||
{
|
||||
std::cerr << "failed to open environment: "
|
||||
<< dev << std::endl;
|
||||
::abort();
|
||||
}
|
||||
*p = '/';
|
||||
dev = p+1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (env->open (NULL, env_flags, 0) != 0)
|
||||
{
|
||||
std::cerr << "failed to open environment: ." << std::endl;
|
||||
::abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Double WTF: if you remove the DB_LOG_INMEMORY bit, db->open
|
||||
// fails, inexplicably, with EINVAL!*/
|
||||
// env->set_flags (DB_DIRECT_DB | /*DB_AUTO_COMMIT |*/ DB_LOG_INMEMORY, 1);
|
||||
}
|
||||
|
||||
~OSBDB()
|
||||
{
|
||||
if (mounted)
|
||||
{
|
||||
umount();
|
||||
}
|
||||
if (env != NULL)
|
||||
{
|
||||
env->close (0);
|
||||
delete env;
|
||||
}
|
||||
}
|
||||
|
||||
int mount();
|
||||
int umount();
|
||||
int mkfs();
|
||||
|
||||
int statfs(struct statfs *buf);
|
||||
|
||||
int pick_object_revision_lt(object_t& oid);
|
||||
|
||||
bool exists(object_t oid);
|
||||
int stat(object_t oid, struct stat *st);
|
||||
|
||||
int remove(object_t oid, Context *onsafe=0);
|
||||
|
||||
int truncate(object_t oid, off_t size, Context *onsafe=0);
|
||||
|
||||
int read(object_t oid, off_t offset, size_t len,
|
||||
bufferlist& bl);
|
||||
int write(object_t oid, off_t offset, size_t len,
|
||||
bufferlist& bl, Context *onsafe);
|
||||
|
||||
int setattr(object_t oid, const char *name,
|
||||
const void *value, size_t size, Context *onsafe=0);
|
||||
int setattrs(object_t oid, map<string,bufferptr>& aset,
|
||||
Context *onsafe=0);
|
||||
int getattr(object_t oid, const char *name,
|
||||
void *value, size_t size);
|
||||
int getattrs(object_t oid, map<string,bufferptr>& aset);
|
||||
int rmattr(object_t oid, const char *name,
|
||||
Context *onsafe=0);
|
||||
int listattr(object_t oid, char *attrs, size_t size);
|
||||
|
||||
int clone(object_t oid, object_t noid);
|
||||
|
||||
// Collections.
|
||||
|
||||
int list_collections(list<coll_t>& ls);
|
||||
int create_collection(coll_t c, Context *onsafe=0);
|
||||
int destroy_collection(coll_t c, Context *onsafe=0);
|
||||
bool collection_exists(coll_t c);
|
||||
int collection_stat(coll_t c, struct stat *st);
|
||||
int collection_add(coll_t c, object_t o, Context *onsafe=0);
|
||||
int collection_remove(coll_t c, object_t o, Context *onsafe=0);
|
||||
int collection_list(coll_t c, list<object_t>& o);
|
||||
|
||||
int collection_setattr(coll_t cid, const char *name,
|
||||
const void *value, size_t size,
|
||||
Context *onsafe=0);
|
||||
int collection_rmattr(coll_t cid, const char *name,
|
||||
Context *onsafe=0);
|
||||
int collection_getattr(coll_t cid, const char *name,
|
||||
void *value, size_t size);
|
||||
int collection_listattr(coll_t cid, char *attrs, size_t size);
|
||||
|
||||
void sync(Context *onsync);
|
||||
void sync();
|
||||
|
||||
private:
|
||||
int opendb (DBTYPE type=DB_UNKNOWN, int flags=0);
|
||||
|
||||
int _setattr(object_t oid, const char *name, const void *value,
|
||||
size_t size, Context *onsync);
|
||||
int _getattr(object_t oid, const char *name, void *value, size_t size);
|
||||
};
|
@ -26,6 +26,10 @@
|
||||
|
||||
#include "ebofs/Ebofs.h"
|
||||
|
||||
#ifdef USE_OSBDB
|
||||
#include "osbdb/OSBDB.h"
|
||||
#endif // USE_OSBDB
|
||||
|
||||
#include "Ager.h"
|
||||
|
||||
|
||||
@ -157,6 +161,11 @@ OSD::OSD(int id, Messenger *m, MonMap *mm, char *dev) : timer(osd_lock)
|
||||
store = new OBFSStore(whoami, NULL, dev_path);
|
||||
}
|
||||
#endif
|
||||
#ifdef USE_OSBDB
|
||||
else if (g_conf.bdbstore) {
|
||||
store = new OSBDB(dev_path);
|
||||
}
|
||||
#endif // USE_OSBDB
|
||||
else {
|
||||
store = new FakeStore(osd_base_path, whoami);
|
||||
}
|
||||
|
308
branches/riccardo/monitor2/test/testos.cc
Normal file
308
branches/riccardo/monitor2/test/testos.cc
Normal file
@ -0,0 +1,308 @@
|
||||
/* testos.cc -- simple ObjectStore test harness.
|
||||
Copyright (C) 2007 Casey Marshall <csm@soe.ucsc.edu>
|
||||
|
||||
Ceph - scalable distributed file system
|
||||
|
||||
This is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License version 2.1, as published by the Free Software
|
||||
Foundation. See file COPYING. */
|
||||
|
||||
|
||||
#include "osd/ObjectStore.h"
|
||||
#include "ebofs/Ebofs.h"
|
||||
#include "osbdb/OSBDB.h"
|
||||
#include "include/buffer.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <cerrno>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/mount.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
static inline unsigned long long
|
||||
to_usec (struct timeval &time)
|
||||
{
|
||||
return (((unsigned long long) time.tv_sec * 1000000)
|
||||
+ ((unsigned long long) time.tv_usec));
|
||||
}
|
||||
|
||||
static inline unsigned long long
|
||||
to_msec (struct timeval &time)
|
||||
{
|
||||
return (((unsigned long long) time.tv_sec * 1000)
|
||||
+ ((unsigned long long) time.tv_usec / 1000));
|
||||
}
|
||||
|
||||
int main (int argc, char **argv)
|
||||
{
|
||||
char *osd_name = "ebofs";
|
||||
unsigned object_size = 1024;
|
||||
unsigned object_count = 1024;
|
||||
unsigned write_iter = 64;
|
||||
unsigned random_seed = ::time(NULL);
|
||||
char *device = "/tmp/testos";
|
||||
char *mountcmd = "mount /tmp/testos";
|
||||
char *umountcmd = "umount /tmp/testos";
|
||||
|
||||
bool inhibit_remount = (getenv("TESTOS_INHIBIT_REMOUNT") != NULL);
|
||||
|
||||
if (argc > 1
|
||||
&& (strcmp (argv[1], "-h") == 0
|
||||
|| strcmp (argv[1], "-help") == 0
|
||||
|| strcmp (argv[1], "--help") == 0
|
||||
|| argc > 6))
|
||||
{
|
||||
cout << "usage: " << argv[0] << " [store [object-size [object-count [iterations [seed]]]]]" << endl;
|
||||
cout << endl;
|
||||
cout << "Where the arguments are:" << endl << endl;
|
||||
cout << " store -- store type; default \"ebofs\"" << endl;
|
||||
cout << " object-size -- size of objects; default 1024" << endl;
|
||||
cout << " object-count -- number of objects to write; default 1024"
|
||||
<< endl;
|
||||
cout << " iterations -- write the objects that many times; default 5"
|
||||
<< endl;
|
||||
cout << " seed -- random seed; default current time" << endl;
|
||||
exit (0);
|
||||
}
|
||||
|
||||
if (argc > 1)
|
||||
osd_name = argv[1];
|
||||
if (argc > 2)
|
||||
object_size = (unsigned) atol (argv[2]);
|
||||
if (argc > 3)
|
||||
object_count = (unsigned) atol (argv[3]);
|
||||
if (argc > 4)
|
||||
write_iter = (unsigned) atol (argv[4]);
|
||||
if (argc > 5)
|
||||
random_seed = (unsigned) atol (argv[5]);
|
||||
|
||||
// algin object size to 'long'
|
||||
object_size = ((object_size + (sizeof (long) - 1)) / sizeof (long)) * sizeof (long);
|
||||
|
||||
char *osd_file = new char[32];
|
||||
strcpy (osd_file, "/tmp/testos/testos.XXXXXX");
|
||||
mktemp (osd_file);
|
||||
|
||||
if (!inhibit_remount)
|
||||
{
|
||||
if (system (mountcmd) != 0)
|
||||
{
|
||||
cerr << "mount failed" << endl;
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
ObjectStore *os = NULL;
|
||||
if (strcasecmp (osd_name, "ebofs") == 0)
|
||||
{
|
||||
FILE *f = fopen (osd_file, "w");
|
||||
if (f == NULL)
|
||||
{
|
||||
cerr << "failed to open " << osd_file << ": " << strerror (errno)
|
||||
<< endl;
|
||||
exit (1);
|
||||
}
|
||||
// 1G file.
|
||||
fseek (f, 1024 * 1024 * 1024, SEEK_SET);
|
||||
fputc ('\0', f);
|
||||
fclose (f);
|
||||
// 20K cache
|
||||
g_conf.ebofs_bc_size = 5; // times 4K
|
||||
os = new Ebofs (osd_file);
|
||||
}
|
||||
else if (strcasecmp (osd_name, "osbdb") == 0)
|
||||
{
|
||||
char *e = getenv ("OSBDB_FFACTOR");
|
||||
if (e != NULL)
|
||||
g_conf.bdbstore_ffactor = atol(e);
|
||||
e = getenv ("OSBDB_NELEM");
|
||||
if (e != NULL)
|
||||
g_conf.bdbstore_nelem = atol(e);
|
||||
e = getenv ("OSBDB_PAGESIZE");
|
||||
if (e != NULL)
|
||||
g_conf.bdbstore_pagesize = atol(e);
|
||||
g_conf.debug_bdbstore = 1;
|
||||
// 20K cache
|
||||
g_conf.bdbstore_cachesize = 20 * 1024;
|
||||
os = new OSBDB (osd_file);
|
||||
}
|
||||
else if (strcasecmp (osd_name, "osbdb-btree") == 0)
|
||||
{
|
||||
g_conf.bdbstore_btree = true;
|
||||
// 20K cache
|
||||
g_conf.bdbstore_cachesize = 20 * 1024;
|
||||
os = new OSBDB (osd_file);
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "I don't know about object store \"" << osd_name << "\""
|
||||
<< endl;
|
||||
exit (1);
|
||||
}
|
||||
|
||||
cout << "Writing " << object_count << " objects of size "
|
||||
<< object_size << " to " << osd_name << endl;
|
||||
|
||||
char *val = (char *) malloc (object_size);
|
||||
char *val2 = (char *) malloc (object_size);
|
||||
auto_ptr<char> valptr (val);
|
||||
auto_ptr<char> valptr2(val2);
|
||||
if (getenv ("TESTOS_UNALIGNED") != NULL)
|
||||
{
|
||||
val = val + 1;
|
||||
val2 = val2 + 1;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < object_size; i++)
|
||||
{
|
||||
val[i] = (char) i;
|
||||
val2[i] = (char) i;
|
||||
}
|
||||
object_t *oids = new object_t[object_count];
|
||||
|
||||
utime_t writes[write_iter];
|
||||
utime_t total_write;
|
||||
utime_t reads[write_iter];
|
||||
utime_t total_read;
|
||||
for (unsigned i = 0; i < write_iter; i++)
|
||||
{
|
||||
cerr << "Iteration " << i << endl;
|
||||
|
||||
int ret = os->mkfs();
|
||||
if (ret != 0)
|
||||
{
|
||||
cerr << "mkfs(" << osd_file << "): " << strerror (-ret) << endl;
|
||||
exit (1);
|
||||
}
|
||||
ret = os->mount();
|
||||
if (ret != 0)
|
||||
{
|
||||
cerr << "mount(): " << strerror (-ret) << endl;
|
||||
exit (1);
|
||||
}
|
||||
|
||||
srandom (random_seed + i);
|
||||
|
||||
for (unsigned j = 0; j < object_count; j++)
|
||||
{
|
||||
oids[j].ino = (uint64_t) random() << 32 | random();
|
||||
oids[j].bno = random();
|
||||
}
|
||||
|
||||
utime_t begin = g_clock.now();
|
||||
for (unsigned o = 0; o < object_count; o++)
|
||||
{
|
||||
bufferptr bp (val, object_size);
|
||||
bufferlist bl;
|
||||
bl.push_back (bp);
|
||||
int ret;
|
||||
if ((ret = os->write (oids[o], 0L, object_size, bl, NULL)) < 0)
|
||||
cerr << "write " << oids[o] << " failed: "
|
||||
<< strerror (-ret) << endl;
|
||||
}
|
||||
utime_t end = g_clock.now() - begin;
|
||||
|
||||
cerr << "Write finished in " << end << endl;
|
||||
total_write += end;
|
||||
writes[i] = end;
|
||||
|
||||
os->sync();
|
||||
os->umount();
|
||||
sync();
|
||||
|
||||
if (!inhibit_remount)
|
||||
{
|
||||
if (system (umountcmd) != 0)
|
||||
{
|
||||
cerr << "umount failed" << endl;
|
||||
exit (1);
|
||||
}
|
||||
|
||||
if (system (mountcmd) != 0)
|
||||
{
|
||||
cerr << "mount(2) failed" << endl;
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
|
||||
os->mount();
|
||||
|
||||
begin = g_clock.now();
|
||||
for (unsigned o = 0; o < object_count; o++)
|
||||
{
|
||||
bufferptr bp (val2, object_size);
|
||||
bufferlist bl;
|
||||
bl.push_back (bp);
|
||||
|
||||
if (os->read (oids[o], 0L, object_size, bl) < 0)
|
||||
{
|
||||
cerr << "object " << oids[o] << " not found!" << endl;
|
||||
}
|
||||
}
|
||||
end = g_clock.now() - begin;
|
||||
|
||||
cerr << "Read finished in " << end << endl;
|
||||
total_read += end;
|
||||
reads[i] = end;
|
||||
|
||||
os->umount();
|
||||
sync();
|
||||
|
||||
if (!inhibit_remount)
|
||||
{
|
||||
if (system (umountcmd) != 0)
|
||||
{
|
||||
cerr << "umount(2) failed" << endl;
|
||||
exit (1);
|
||||
}
|
||||
|
||||
if (system (mountcmd) != 0)
|
||||
{
|
||||
cerr << "mount(3) failed" << endl;
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Finished in " << (total_write + total_read) << endl;
|
||||
|
||||
double write_mean = (double) total_write / write_iter;
|
||||
double write_sd = 0.0;
|
||||
for (unsigned i = 0; i < write_iter; i++)
|
||||
{
|
||||
double x = (double) writes[i] - write_mean;
|
||||
write_sd += x * x;
|
||||
}
|
||||
write_sd = sqrt (write_sd / write_iter);
|
||||
|
||||
double read_mean = (double) total_read / write_iter;
|
||||
double read_sd = 0.0;
|
||||
for (unsigned i = 0; i < write_iter; i++)
|
||||
{
|
||||
double x = (double) reads[i] - read_mean;
|
||||
write_sd += x * x;
|
||||
}
|
||||
read_sd = sqrt (read_sd / write_iter);
|
||||
|
||||
cout << "TESTOS: write " << osd_name << ":" << object_size << ":"
|
||||
<< object_count << ":" << write_iter << ":" << random_seed
|
||||
<< " -- " << write_mean << " " << write_sd << endl;
|
||||
|
||||
cout << "TESTOS: read " << osd_name << ":" << object_size << ":"
|
||||
<< object_count << ":" << write_iter << ":" << random_seed
|
||||
<< " -- " << read_mean << " " << read_sd << endl;
|
||||
|
||||
unlink (osd_file);
|
||||
if (!inhibit_remount)
|
||||
{
|
||||
if (system (umountcmd) != 0)
|
||||
{
|
||||
cerr << "umount(3) failed" << endl;
|
||||
exit (1);
|
||||
}
|
||||
}
|
||||
exit (0);
|
||||
}
|
Loading…
Reference in New Issue
Block a user