mirror of
https://github.com/ceph/ceph
synced 2025-02-19 00:47:49 +00:00
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1543 29311d96-e01e-0410-9327-a35deaab8ce9
900 lines
25 KiB
C++
900 lines
25 KiB
C++
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
|
// vim: ts=8 sw=2 smarttab
|
|
/*
|
|
* Ceph - scalable distributed file system
|
|
*
|
|
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
|
|
*
|
|
* This is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software
|
|
* Foundation. See file COPYING.
|
|
*
|
|
*/
|
|
|
|
|
|
#ifndef __EBOFS_TABLE_H
|
|
#define __EBOFS_TABLE_H
|
|
|
|
#include "types.h"
|
|
#include "nodes.h"
|
|
|
|
/** table **/
|
|
|
|
#define dbtout if (25 <= g_conf.debug_ebofs) cout << "ebofs.table(" << this << ")."
|
|
|
|
|
|
template<class K, class V>
|
|
class Table {
|
|
private:
|
|
NodePool &pool;
|
|
|
|
nodeid_t root;
|
|
int nkeys;
|
|
int depth;
|
|
|
|
public:
|
|
Table(NodePool &p,
|
|
struct ebofs_table& bts) :
|
|
pool(p),
|
|
root(bts.root), nkeys(bts.num_keys), depth(bts.depth) {
|
|
dbtout << "cons" << endl;
|
|
}
|
|
|
|
nodeid_t get_root() { return root; }
|
|
int get_num_keys() { return nkeys; }
|
|
int get_depth() { return depth; }
|
|
|
|
|
|
/*
|
|
*/
|
|
class _IndexItem { // i just need a struct size for below
|
|
K k;
|
|
nodeid_t n;
|
|
};
|
|
class IndexItem {
|
|
public:
|
|
K key;
|
|
nodeid_t node;
|
|
static const int MAX = Node::ITEM_LEN / (sizeof(_IndexItem));
|
|
static const int MIN = MAX/2;
|
|
};
|
|
class _LeafItem { // i just need a struct size for below
|
|
K k;
|
|
V v;
|
|
};
|
|
class LeafItem {
|
|
public:
|
|
K key;
|
|
V value;
|
|
static const int MAX = Node::ITEM_LEN / (sizeof(_LeafItem));
|
|
static const int MIN = MAX/2;
|
|
};
|
|
|
|
class Nodeptr {
|
|
public:
|
|
Node *node;
|
|
|
|
Nodeptr() : node(0) {}
|
|
Nodeptr(Node *n) : node(n) {}
|
|
Nodeptr& operator=(Node *n) {
|
|
node = n;
|
|
return *this;
|
|
}
|
|
|
|
LeafItem& leaf_item(int i) { return (( LeafItem*)(node->item_ptr()))[i]; }
|
|
IndexItem& index_item(int i) { return ((IndexItem*)(node->item_ptr()))[i]; }
|
|
K key(int i) {
|
|
if (node->is_index())
|
|
return index_item(i).key;
|
|
else
|
|
return leaf_item(i).key;
|
|
}
|
|
|
|
bool is_leaf() { return node->is_leaf(); }
|
|
bool is_index() { return node->is_index(); }
|
|
void set_type(int t) { node->set_type(t); }
|
|
|
|
int max_items() const {
|
|
if (node->is_leaf())
|
|
return LeafItem::MAX;
|
|
else
|
|
return IndexItem::MAX;
|
|
}
|
|
int min_items() const { return max_items() / 2; }
|
|
|
|
nodeid_t get_id() { return node->get_id(); }
|
|
|
|
int size() { return node->size(); }
|
|
void set_size(int s) { node->set_size(s); }
|
|
|
|
void remove_at_pos(int p) {
|
|
if (node->is_index()) {
|
|
for (int i=p; i<size()-1; i++)
|
|
index_item(i) = index_item(i+1);
|
|
} else {
|
|
for (int i=p; i<size()-1; i++)
|
|
leaf_item(i) = leaf_item(i+1);
|
|
}
|
|
set_size(size() - 1);
|
|
}
|
|
void insert_at_leaf_pos(int p, K key, V value) {
|
|
assert(is_leaf());
|
|
for (int i=size(); i>p; i--)
|
|
leaf_item(i) = leaf_item(i-1);
|
|
leaf_item(p).key = key;
|
|
leaf_item(p).value = value;
|
|
set_size(size() + 1);
|
|
}
|
|
void insert_at_index_pos(int p, K key, nodeid_t node) {
|
|
assert(is_index());
|
|
for (int i=size(); i>p; i--)
|
|
index_item(i) = index_item(i-1);
|
|
index_item(p).key = key;
|
|
index_item(p).node = node;
|
|
set_size(size() + 1);
|
|
}
|
|
|
|
void append_item(LeafItem& i) {
|
|
leaf_item(size()) = i;
|
|
set_size(size() + 1);
|
|
}
|
|
void append_item(IndexItem& i) {
|
|
index_item(size()) = i;
|
|
set_size(size() + 1);
|
|
}
|
|
|
|
void split(Nodeptr& right) {
|
|
if (node->is_index()) {
|
|
for (int i=min_items(); i<size(); i++)
|
|
right.append_item( index_item(i) );
|
|
} else {
|
|
for (int i=min_items(); i<size(); i++)
|
|
right.append_item( leaf_item(i) );
|
|
}
|
|
set_size(min_items());
|
|
}
|
|
|
|
void merge(Nodeptr& right) {
|
|
if (node->is_index())
|
|
for (int i=0; i<right.size(); i++)
|
|
append_item( right.index_item(i) );
|
|
else
|
|
for (int i=0; i<right.size(); i++)
|
|
append_item( right.leaf_item(i) );
|
|
right.set_size(0);
|
|
}
|
|
|
|
};
|
|
|
|
/*
|
|
*/
|
|
class Cursor {
|
|
protected:
|
|
public:
|
|
static const int MATCH = 1; // on key
|
|
static const int INSERT = 0; // before key
|
|
static const int OOB = -1; // at end
|
|
|
|
Table *table;
|
|
vector<Nodeptr> open; // open nodes
|
|
vector<int> pos; // position within the node
|
|
//Nodeptr open[20];
|
|
//int pos[20];
|
|
int level;
|
|
|
|
Cursor(Table *t) : table(t), open(t->depth), pos(t->depth), level(0) {}
|
|
|
|
public:
|
|
|
|
const LeafItem& current() {
|
|
assert(open[level].is_leaf());
|
|
return open[level].leaf_item(pos[level]);
|
|
}
|
|
V& dirty_current_value() {
|
|
assert(open[level].is_leaf());
|
|
dirty();
|
|
return open[level].leaf_item(pos[level]).value;
|
|
}
|
|
|
|
// ** read-only bits **
|
|
int move_left() {
|
|
if (table->depth == 0) return OOB;
|
|
|
|
// work up around branch
|
|
int l;
|
|
for (l = level; l >= 0; l--)
|
|
if (pos[l] > 0) break;
|
|
if (l < 0)
|
|
return OOB; // we are the first item in the btree
|
|
|
|
// move left one
|
|
pos[l]--;
|
|
|
|
// work back down right side
|
|
for (; l<level; l++) {
|
|
open[l+1] = table->pool.get_node( open[l].index_item(pos[l]).node );
|
|
pos[l+1] = open[l+1].size() - 1;
|
|
}
|
|
return 1;
|
|
}
|
|
int move_right() {
|
|
if (table->depth == 0) return OOB;
|
|
|
|
// work up branch
|
|
int l;
|
|
for (l=level; l>=0; l--)
|
|
if (pos[l] < open[l].size() - 1) break;
|
|
if (l < 0) {
|
|
/* we are at last item in btree. */
|
|
if (pos[level] < open[level].size()) {
|
|
pos[level]++; /* move into add position! */
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* move right one */
|
|
assert( pos[l] < open[l].size() );
|
|
pos[l]++;
|
|
|
|
/* work back down */
|
|
for (; l<level; l++) {
|
|
open[l+1] = table->pool.get_node( open[l].index_item(pos[l]).node );
|
|
pos[l+1] = 0; // furthest left
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
// ** modifications **
|
|
void dirty() {
|
|
for (int l=level; l>=0; l--) {
|
|
if (open[l].node->is_dirty()) break; // already dirty! (and thus parents are too)
|
|
|
|
table->pool.dirty_node(open[l].node);
|
|
if (l > 0)
|
|
open[l-1].index_item( pos[l-1] ).node = open[l].get_id();
|
|
else
|
|
table->root = open[0].get_id();
|
|
}
|
|
}
|
|
private:
|
|
void repair_parents() {
|
|
// did i make a change at the start of a node?
|
|
if (pos[level] == 0) {
|
|
K key = open[level].key(0); // new key parents should have
|
|
for (int j=level-1; j>=0; j--) {
|
|
if (open[j].index_item(pos[j]).key == key)
|
|
break; /* it's the same key, we can stop fixing */
|
|
open[j].index_item(pos[j]).key = key;
|
|
if (pos[j] > 0) break; /* last in position 0.. */
|
|
}
|
|
}
|
|
}
|
|
|
|
public:
|
|
void remove() {
|
|
dirty();
|
|
|
|
// remove from node
|
|
open[level].remove_at_pos( pos[level] );
|
|
repair_parents();
|
|
|
|
// was it a key?
|
|
if (level == table->depth-1)
|
|
table->nkeys--;
|
|
}
|
|
|
|
void insert(K key, V value) {
|
|
dirty();
|
|
|
|
// insert
|
|
open[level].insert_at_leaf_pos(pos[level], key, value);
|
|
repair_parents();
|
|
|
|
// was it a key?
|
|
if (level == table->depth-1)
|
|
table->nkeys++;
|
|
}
|
|
|
|
int rotate_left() {
|
|
if (level == 0) return -1; // i am root
|
|
if (pos[level-1] == 0) return -1; // nothing to left
|
|
|
|
Nodeptr here = open[level];
|
|
Nodeptr parent = open[level-1];
|
|
Nodeptr left = table->pool.get_node( parent.index_item(pos[level-1] - 1).node );
|
|
if (left.size() == left.max_items()) return -1; // it's full
|
|
|
|
// make both dirty
|
|
dirty();
|
|
if (!left.node->is_dirty()) {
|
|
table->pool.dirty_node(left.node);
|
|
parent.index_item(pos[level-1]-1).node = left.get_id();
|
|
}
|
|
|
|
dbtout << "rotating item " << here.key(0) << " left from " << here.get_id() << " to " << left.get_id() << endl;
|
|
|
|
/* add */
|
|
if (here.node->is_leaf())
|
|
left.append_item(here.leaf_item(0));
|
|
else
|
|
left.append_item(here.index_item(0));
|
|
|
|
/* remove */
|
|
here.remove_at_pos(0);
|
|
|
|
/* fix parent index for me */
|
|
parent.index_item( pos[level-1] ).key = here.key(0);
|
|
// we never have to update past immediate parent, since we're not at pos 0
|
|
|
|
/* adjust cursor */
|
|
if (pos[level] > 0)
|
|
pos[level]--;
|
|
//else
|
|
//assert(1); /* if we were positioned here, we're equal */
|
|
/* if it was 0, then the shifted item == our key, and we can stay here safely. */
|
|
return 0;
|
|
}
|
|
int rotate_right() {
|
|
if (level == 0) return -1; // i am root
|
|
if (pos[level-1] + 1 >= open[level-1].size()) return -1; // nothing to right
|
|
|
|
Nodeptr here = open[level];
|
|
Nodeptr parent = open[level-1];
|
|
Nodeptr right = table->pool.get_node( parent.index_item( pos[level-1] + 1 ).node );
|
|
if (right.size() == right.max_items()) return -1; // it's full
|
|
|
|
// make both dirty
|
|
dirty();
|
|
if (!right.node->is_dirty()) {
|
|
table->pool.dirty_node(right.node);
|
|
parent.index_item( pos[level-1]+1 ).node = right.get_id();
|
|
}
|
|
|
|
if (pos[level] == here.size()) {
|
|
/* let's just move the cursor over! */
|
|
//if (sizeof(K) == 8)
|
|
dbtout << "shifting cursor right from " << here.get_id() << " to less-full node " << right.get_id() << endl;
|
|
open[level] = right;
|
|
pos[level] = 0;
|
|
pos[level-1]++;
|
|
return 0;
|
|
}
|
|
|
|
//if (sizeof(K) == 8)
|
|
dbtout << "rotating item " << hex << here.key(here.size()-1) << dec << " right from "
|
|
<< here.get_id() << " to " << right.get_id() << endl;
|
|
|
|
/* add */
|
|
if (here.is_index())
|
|
right.insert_at_index_pos(0,
|
|
here.index_item( here.size()-1 ).key,
|
|
here.index_item( here.size()-1 ).node);
|
|
else
|
|
right.insert_at_leaf_pos(0,
|
|
here.leaf_item( here.size()-1 ).key,
|
|
here.leaf_item( here.size()-1 ).value);
|
|
|
|
/* remove */
|
|
here.set_size(here.size() - 1);
|
|
|
|
/* fix parent index for right */
|
|
parent.index_item( pos[level-1] + 1 ).key = right.key(0);
|
|
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
|
|
public:
|
|
bool almost_full() {
|
|
if (2*(depth+1) > pool.num_free()) // worst case, plus some.
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
int find(K key, Cursor& cursor) {
|
|
dbtout << "find " << key << endl;
|
|
|
|
if (depth == 0)
|
|
return Cursor::OOB;
|
|
|
|
// init
|
|
cursor.level = 0;
|
|
|
|
// start at root
|
|
Nodeptr curnode( pool.get_node(root) );
|
|
cursor.open[0] = curnode;
|
|
|
|
if (curnode.size() == 0) return -1; // empty!
|
|
|
|
// find leaf
|
|
for (cursor.level = 0; cursor.level < depth-1; cursor.level++) {
|
|
/* if key=5, we want 2 3 [4] 6 7, or 3 4 [5] 5 6 (err to the left) */
|
|
int left = 0; /* i >= left */
|
|
int right = curnode.size()-1; /* i < right */
|
|
while (left < right) {
|
|
int i = left + (right - left) / 2;
|
|
if (curnode.index_item(i).key < key) {
|
|
left = i + 1;
|
|
} else if (i && curnode.index_item(i-1).key >= key) {
|
|
right = i;
|
|
} else {
|
|
left = right = i;
|
|
break;
|
|
}
|
|
}
|
|
int i = left;
|
|
if (i && curnode.index_item(i).key > key) i--;
|
|
|
|
#ifdef EBOFS_DEBUG_BTREE
|
|
int j;
|
|
for (j=0; j<curnode.size()-1; j++) {
|
|
if (curnode.index_item(j).key == key) break; /* perfect */
|
|
if (curnode.index_item(j+1).key > key) break;
|
|
}
|
|
if (i != j) {
|
|
dbtout << "btree binary search failed" << endl;
|
|
i = j;
|
|
}
|
|
#endif
|
|
|
|
cursor.pos[cursor.level] = i;
|
|
|
|
/* get child node */
|
|
curnode = pool.get_node( cursor.open[cursor.level].index_item(i).node );
|
|
cursor.open[cursor.level+1] = curnode;
|
|
}
|
|
|
|
/* search leaf */
|
|
/* if key=5, we want 2 3 4 [6] 7, or 3 4 [5] 5 6 (err to the right) */
|
|
int left = 0; /* i >= left */
|
|
int right = curnode.size(); /* i < right */
|
|
while (left < right) {
|
|
int i = left + (right - left) / 2;
|
|
if (curnode.leaf_item(i).key < key) {
|
|
left = i + 1;
|
|
} else if (i && curnode.leaf_item(i-1).key >= key) {
|
|
right = i;
|
|
} else {
|
|
left = right = i;
|
|
break;
|
|
}
|
|
}
|
|
int i = left;
|
|
|
|
#ifdef EBOFS_DEBUG_BTREE
|
|
int j;
|
|
for (j=0; j<curnode.size(); j++) {
|
|
if (curnode.leaf_item(j).key >= key) break;
|
|
}
|
|
if (i != j) {
|
|
dbtout << "btree binary search failed" << endl;
|
|
i = j;
|
|
}
|
|
#endif
|
|
|
|
cursor.pos[cursor.level] = i; /* first key in this node, or key insertion point */
|
|
|
|
if (curnode.size() >= i+1) {
|
|
if (curnode.leaf_item(i).key == key) {
|
|
return Cursor::MATCH; /* it's the actual key */
|
|
} else {
|
|
return Cursor::INSERT; /* it's an insertion point */
|
|
}
|
|
}
|
|
return Cursor::OOB; /* it's the end of the btree (also a valid insertion point) */
|
|
}
|
|
|
|
int lookup(K key) {
|
|
dbtout << "lookup" << endl;
|
|
Cursor cursor(this);
|
|
if (find(key, cursor) == Cursor::MATCH)
|
|
return 0;
|
|
return -1;
|
|
}
|
|
|
|
int lookup(K key, V& value) {
|
|
dbtout << "lookup" << endl;
|
|
Cursor cursor(this);
|
|
if (find(key, cursor) == Cursor::MATCH) {
|
|
value = cursor.current().value;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
int insert(K key, V value) {
|
|
dbtout << "insert " << key << " -> " << value << endl;
|
|
if (almost_full()) return -1;
|
|
|
|
// empty?
|
|
if (nkeys == 0) {
|
|
if (root == -1) {
|
|
// create a root node (leaf!)
|
|
assert(depth == 0);
|
|
Nodeptr newroot( pool.new_node(Node::TYPE_LEAF) );
|
|
root = newroot.get_id();
|
|
depth++;
|
|
}
|
|
assert(depth == 1);
|
|
assert(root >= 0);
|
|
}
|
|
|
|
// start at/near key
|
|
Cursor cursor(this);
|
|
find(key, cursor);
|
|
|
|
// insert loop
|
|
nodeid_t nodevalue = 0;
|
|
while (1) {
|
|
|
|
/* room in this node? */
|
|
if (cursor.open[cursor.level].size() < cursor.open[cursor.level].max_items()) {
|
|
if (cursor.open[cursor.level].is_leaf())
|
|
cursor.insert( key, value ); // will dirty, etc.
|
|
else {
|
|
// indices are already dirty
|
|
cursor.open[cursor.level].insert_at_index_pos(cursor.pos[cursor.level], key, nodevalue);
|
|
}
|
|
verify("insert 1");
|
|
return 0;
|
|
}
|
|
|
|
/* this node is full. */
|
|
assert( cursor.open[cursor.level].size() == cursor.open[cursor.level].max_items() );
|
|
|
|
/* can we rotate? */
|
|
if (false) // NO! there's a bug in here somewhere, don't to it.
|
|
if (cursor.level > 0) {
|
|
if ((cursor.pos[cursor.level-1] > 0
|
|
&& cursor.rotate_left() >= 0) ||
|
|
(cursor.pos[cursor.level-1] + 1 < cursor.open[cursor.level-1].size()
|
|
&& cursor.rotate_right() >= 0)) {
|
|
|
|
if (cursor.open[cursor.level].is_leaf())
|
|
cursor.insert( key, value ); // will dirty, etc.
|
|
else {
|
|
// indices are already dirty
|
|
cursor.open[cursor.level].insert_at_index_pos(cursor.pos[cursor.level], key, nodevalue);
|
|
}
|
|
verify("insert 2");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/** split node **/
|
|
|
|
if (cursor.level == depth-1) {
|
|
dbtout << "splitting leaf " << cursor.open[cursor.level].get_id() << endl;
|
|
} else {
|
|
dbtout << "splitting index " << cursor.open[cursor.level].get_id() << endl;
|
|
}
|
|
|
|
cursor.dirty();
|
|
|
|
// split
|
|
Nodeptr leftnode = cursor.open[cursor.level];
|
|
Nodeptr newnode( pool.new_node(leftnode.node->get_type()) );
|
|
leftnode.split( newnode );
|
|
|
|
/* insert our item */
|
|
if (cursor.pos[cursor.level] > leftnode.size()) {
|
|
// not with cursor, since this node isn't added yet!
|
|
if (newnode.is_leaf()) {
|
|
newnode.insert_at_leaf_pos( cursor.pos[cursor.level] - leftnode.size(),
|
|
key, value );
|
|
nkeys++;
|
|
} else {
|
|
newnode.insert_at_index_pos( cursor.pos[cursor.level] - leftnode.size(),
|
|
key, nodevalue );
|
|
}
|
|
} else {
|
|
// with cursor (if leaf)
|
|
if (leftnode.is_leaf())
|
|
cursor.insert( key, value );
|
|
else
|
|
leftnode.insert_at_index_pos( cursor.pos[cursor.level],
|
|
key, nodevalue );
|
|
}
|
|
|
|
/* are we at the root? */
|
|
if (cursor.level == 0) {
|
|
/* split root. */
|
|
dbtout << "that split was the root " << root << endl;
|
|
Nodeptr newroot( pool.new_node(Node::TYPE_INDEX) );
|
|
|
|
/* new root node */
|
|
newroot.set_size(2);
|
|
newroot.index_item(0).key = leftnode.key(0);
|
|
newroot.index_item(0).node = root;
|
|
newroot.index_item(1).key = newnode.key(0);
|
|
newroot.index_item(1).node = newnode.get_id();
|
|
|
|
/* heighten tree */
|
|
depth++;
|
|
root = newroot.get_id();
|
|
verify("insert 3");
|
|
return 0;
|
|
}
|
|
|
|
/* now insert newindex in level-1 */
|
|
nodevalue = newnode.get_id();
|
|
key = newnode.key(0);
|
|
cursor.level--;
|
|
cursor.pos[cursor.level]++; // ...to the right of leftnode!
|
|
}
|
|
}
|
|
|
|
|
|
int remove(K key) {
|
|
dbtout << "remove " << key << endl;
|
|
|
|
if (almost_full()) {
|
|
cout << "table almost full, failing" << endl;
|
|
assert(0);
|
|
return -1;
|
|
}
|
|
|
|
Cursor cursor(this);
|
|
if (find(key, cursor) <= 0) {
|
|
cerr << "remove " << key << " 0x" << hex << key << dec << " .. dne" << endl;
|
|
g_conf.debug_ebofs = 33;
|
|
g_conf.ebofs_verify = true;
|
|
verify("remove dne");
|
|
assert(0);
|
|
return -1; // key dne
|
|
}
|
|
|
|
|
|
while (1) {
|
|
cursor.remove();
|
|
|
|
// balance + adjust
|
|
|
|
if (cursor.level == 0) {
|
|
// useless root index?
|
|
if (cursor.open[0].size() == 1 &&
|
|
depth > 1) {
|
|
depth--;
|
|
root = cursor.open[0].index_item(0).node;
|
|
pool.release( cursor.open[0].node );
|
|
}
|
|
|
|
// note: root can be small, but not empty
|
|
else if (nkeys == 0) {
|
|
assert(cursor.open[cursor.level].size() == 0);
|
|
assert(depth == 1);
|
|
root = -1;
|
|
depth = 0;
|
|
if (cursor.open[0].node)
|
|
pool.release(cursor.open[0].node);
|
|
}
|
|
verify("remove 1");
|
|
return 0;
|
|
}
|
|
|
|
if (cursor.open[cursor.level].size() > cursor.open[cursor.level].min_items()) {
|
|
verify("remove 2");
|
|
return 0;
|
|
}
|
|
|
|
// borrow from siblings?
|
|
Nodeptr left;
|
|
Nodeptr right;
|
|
|
|
// left?
|
|
if (cursor.pos[cursor.level-1] > 0) {
|
|
int left_loc = cursor.open[cursor.level-1].index_item( cursor.pos[cursor.level-1] - 1).node;
|
|
left = pool.get_node( left_loc );
|
|
|
|
if (left.size() > left.min_items()) {
|
|
/* move cursor left, shift right */
|
|
cursor.pos[cursor.level] = 0;
|
|
cursor.open[cursor.level] = left;
|
|
cursor.pos[cursor.level-1]--;
|
|
cursor.rotate_right();
|
|
verify("remove 3");
|
|
return 0;
|
|
}
|
|
|
|
/* combine to left */
|
|
right = cursor.open[cursor.level];
|
|
}
|
|
else {
|
|
assert(cursor.pos[cursor.level-1] < cursor.open[cursor.level-1].size() - 1);
|
|
int right_loc = cursor.open[cursor.level-1].index_item( cursor.pos[cursor.level-1] + 1 ).node;
|
|
right = pool.get_node( right_loc );
|
|
|
|
if (right.size() > right.min_items()) {
|
|
/* move cursor right, shift an item left */
|
|
cursor.pos[cursor.level] = 1;
|
|
cursor.open[cursor.level] = right;
|
|
cursor.pos[cursor.level-1]++;
|
|
cursor.rotate_left();
|
|
verify("remove 4");
|
|
return 0;
|
|
}
|
|
|
|
/* combine to left */
|
|
left = cursor.open[cursor.level];
|
|
cursor.pos[cursor.level-1]++; /* move cursor to (soon-to-be-empty) right side item */
|
|
}
|
|
|
|
// note: cursor now points to _right_ node.
|
|
|
|
/* combine (towards left)
|
|
* (this makes it so our next delete will be in the index
|
|
* interior, which is less scary.)
|
|
*/
|
|
dbtout << "combining nodes " << left.get_id() << " and " << right.get_id() << endl;
|
|
|
|
left.merge(right);
|
|
|
|
// dirty left + right
|
|
cursor.dirty(); // right
|
|
if (!left.node->is_dirty()) {
|
|
pool.dirty_node(left.node);
|
|
cursor.open[cursor.level-1].index_item( cursor.pos[cursor.level-1]-1 ).node = left.get_id();
|
|
}
|
|
|
|
pool.release(right.node);
|
|
|
|
cursor.level--; // now point to the link to the obsolete (right-side) sib */
|
|
}
|
|
|
|
}
|
|
|
|
void clear(Cursor& cursor, int node_loc, int level) {
|
|
dbtout << "clear" << endl;
|
|
|
|
Nodeptr node = pool.get_node( node_loc );
|
|
cursor.open[level] = node;
|
|
|
|
// hose children?
|
|
if (level < depth-1) {
|
|
for (int i=0; i<node.size(); i++) {
|
|
// index
|
|
cursor.pos[level] = i;
|
|
nodeid_t child = cursor.open[level].index_item(i).node;
|
|
clear( cursor, child, level+1 );
|
|
}
|
|
}
|
|
|
|
// hose myself
|
|
pool.release( node.node );
|
|
}
|
|
|
|
void clear() {
|
|
Cursor cursor(this);
|
|
if (root == -1 && depth == 0) return; // already empty!
|
|
clear(cursor, root, 0);
|
|
root = -1;
|
|
depth = 0;
|
|
nkeys = 0;
|
|
}
|
|
|
|
int verify_sub(Cursor& cursor, int node_loc, int level, int& count, K& last, const char *on) {
|
|
int err = 0;
|
|
|
|
Nodeptr node = pool.get_node( node_loc );
|
|
cursor.open[level] = node;
|
|
|
|
// identify max, min, and validate key range
|
|
K min = node.key(0);
|
|
last = min;
|
|
K max = min;
|
|
for (int i=0; i<node.size(); i++) {
|
|
if (i && node.key(i) <= last) {
|
|
dbtout << ":: key " << i << " " << hex << node.key(i) << dec << " in node " << node_loc
|
|
<< " is out of order, last is " << hex << last << dec << endl;
|
|
err++;
|
|
}
|
|
if (node.key(i) > max)
|
|
max = node.key(i);
|
|
|
|
if (level < depth-1) {
|
|
// index
|
|
cursor.pos[level] = i;
|
|
err += verify_sub( cursor, cursor.open[level].index_item(i).node, level+1, count, last, on );
|
|
} else {
|
|
// leaf
|
|
count++;
|
|
last = node.key(i);
|
|
}
|
|
}
|
|
|
|
if (level) {
|
|
// verify that parent's keys are appropriate
|
|
if (min != cursor.open[level-1].index_item(cursor.pos[level-1]).key) {
|
|
dbtout << ":: key in index node " << cursor.open[level-1].get_id()
|
|
<< " != min in child " << node_loc
|
|
<< "(key is " << hex << cursor.open[level-1].index_item(cursor.pos[level-1]).key
|
|
<< ", min is " << min << ")" << dec << endl;
|
|
err++;
|
|
}
|
|
if (cursor.pos[level-1] < cursor.open[level-1].size()-1) {
|
|
if (max > cursor.open[level-1].index_item(1+cursor.pos[level-1]).key) {
|
|
dbtout << ":: next key in index node " << cursor.open[level-1].get_id()
|
|
<< " < max in child " << node_loc
|
|
<< "(key is " << hex << cursor.open[level-1].index_item(1+cursor.pos[level-1]).key
|
|
<< ", max is " << max << ")" << dec << endl;
|
|
err++;
|
|
}
|
|
}
|
|
}
|
|
|
|
//return err;
|
|
|
|
// print it
|
|
char s[1000];
|
|
strcpy(s," ");
|
|
s[level+1] = 0;
|
|
if (1) {
|
|
if (root == node_loc) {
|
|
dbtout << s << "root " << node_loc << ": "
|
|
<< node.size() << " / " << node.max_items() << " keys, " << hex << min << "-" << max << dec << endl;
|
|
} else if (level == depth-1) {
|
|
dbtout << s << "leaf " << node_loc << ": "
|
|
<< node.size() << " / " << node.max_items() << " keys, " << hex << min << "-" << max << dec << endl;
|
|
} else {
|
|
dbtout << s << "indx " << node_loc << ": "
|
|
<< node.size() << " / " << node.max_items() << " keys, " << hex << min << "-" << max << dec << endl;
|
|
}
|
|
|
|
if (0) {
|
|
for (int i=0; i<node.size(); i++) {
|
|
if (level < depth-1) { // index
|
|
dbtout << s << " " << hex << node.key(i) << " [" << node.index_item(i).node << "]" << dec << endl;
|
|
} else { // leaf
|
|
dbtout << s << " " << hex << node.key(i) << " -> " << node.leaf_item(i).value << dec << endl;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
void verify(const char *on) {
|
|
if (!g_conf.ebofs_verify)
|
|
return;
|
|
|
|
if (root == -1 && depth == 0) {
|
|
return; // empty!
|
|
}
|
|
|
|
int count = 0;
|
|
Cursor cursor(this);
|
|
K last;
|
|
|
|
int before = g_conf.debug_ebofs;
|
|
g_conf.debug_ebofs = 0;
|
|
|
|
int err = verify_sub(cursor, root, 0, count, last, on);
|
|
if (count != nkeys) {
|
|
cerr << "** count " << count << " != nkeys " << nkeys << endl;
|
|
err++;
|
|
}
|
|
|
|
g_conf.debug_ebofs = before;
|
|
|
|
// ok?
|
|
if (err) {
|
|
cerr << "verify failure, called by '" << on << "'" << endl;
|
|
g_conf.debug_ebofs = 30;
|
|
// do it again, so we definitely get the dump.
|
|
int count = 0;
|
|
Cursor cursor(this);
|
|
K last;
|
|
verify_sub(cursor, root, 0, count, last, on);
|
|
assert(err == 0);
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
|
|
#endif
|