mirror of
https://github.com/ceph/ceph
synced 2025-01-03 01:22:53 +00:00
crush: comments, cleanup
This commit is contained in:
parent
44c9462b2d
commit
e077a45d86
@ -1,4 +1,4 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
|
||||
#ifndef __CRUSH_WRAPPER_H
|
||||
@ -66,12 +66,12 @@ private:
|
||||
for (std::map<int, string>::iterator p = f.begin(); p != f.end(); p++)
|
||||
r[p->second] = p->first;
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
CrushWrapper() : crush(0), have_rmaps(false) {}
|
||||
~CrushWrapper() {
|
||||
if (crush) crush_destroy(crush);
|
||||
}
|
||||
}
|
||||
|
||||
/* building */
|
||||
void create() {
|
||||
@ -109,7 +109,7 @@ public:
|
||||
if (name_rmap.count(name))
|
||||
return name_rmap[name];
|
||||
return 0; /* hrm */
|
||||
}
|
||||
}
|
||||
const char *get_item_name(int t) {
|
||||
if (name_map.count(t))
|
||||
return name_map[t].c_str();
|
||||
@ -129,7 +129,7 @@ public:
|
||||
if (rule_name_rmap.count(name))
|
||||
return rule_name_rmap[name];
|
||||
return 0; /* hrm */
|
||||
}
|
||||
}
|
||||
const char *get_rule_name(int t) {
|
||||
if (rule_name_map.count(t))
|
||||
return rule_name_map[t].c_str();
|
||||
@ -152,13 +152,13 @@ public:
|
||||
if (d >= crush->max_devices) return -1;
|
||||
return crush->device_offload[d];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*** rules ***/
|
||||
private:
|
||||
crush_rule *get_rule(unsigned ruleno) {
|
||||
if (!crush) return (crush_rule *)(-ENOENT);
|
||||
if (ruleno >= crush->max_rules)
|
||||
if (ruleno >= crush->max_rules)
|
||||
return 0;
|
||||
return crush->rules[ruleno];
|
||||
}
|
||||
@ -255,15 +255,15 @@ public:
|
||||
int set_rule_step_emit(unsigned ruleno, unsigned step) {
|
||||
return set_rule_step(ruleno, step, CRUSH_RULE_EMIT, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/** buckets **/
|
||||
private:
|
||||
crush_bucket *get_bucket(int id) {
|
||||
if (!crush) return (crush_bucket *)(-ENOENT);
|
||||
int pos = -1 - id;
|
||||
if ((unsigned)pos >= crush->max_buckets) return 0;
|
||||
if (pos >= crush->max_buckets) return 0;
|
||||
return crush->buckets[pos];
|
||||
}
|
||||
|
||||
@ -318,7 +318,7 @@ public:
|
||||
crush_bucket *b = crush_make_bucket(alg, type, size, items, weights);
|
||||
return crush_add_bucket(crush, bucketno, b);
|
||||
}
|
||||
|
||||
|
||||
void finalize() {
|
||||
assert(crush);
|
||||
crush_finalize(crush);
|
||||
@ -342,7 +342,7 @@ public:
|
||||
}
|
||||
void do_rule(int rule, int x, vector<int>& out, int maxout, int forcefeed) {
|
||||
int rawout[maxout];
|
||||
|
||||
|
||||
int numrep = crush_do_rule(crush, rule, x, rawout, maxout, forcefeed);
|
||||
|
||||
out.resize(numrep);
|
||||
@ -356,13 +356,13 @@ public:
|
||||
for (map<int,double>::iterator p = weights.begin(); p != weights.end(); p++)
|
||||
if (p->second > max)
|
||||
max = p->second;
|
||||
|
||||
|
||||
for (map<int,double>::iterator p = weights.begin(); p != weights.end(); p++) {
|
||||
unsigned w = 0x10000 - (unsigned)(p->second / max * 0x10000);
|
||||
set_offload(p->first, w);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int read_from_file(const char *fn) {
|
||||
@ -390,7 +390,7 @@ public:
|
||||
::encode(crush->device_offload[i], bl);
|
||||
|
||||
// buckets
|
||||
for (unsigned i=0; i<crush->max_buckets; i++) {
|
||||
for (int i=0; i<crush->max_buckets; i++) {
|
||||
__u32 alg = 0;
|
||||
if (crush->buckets[i]) alg = crush->buckets[i]->alg;
|
||||
::encode(alg, bl);
|
||||
@ -403,7 +403,7 @@ public:
|
||||
::encode(crush->buckets[i]->size, bl);
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++)
|
||||
::encode(crush->buckets[i]->items[j], bl);
|
||||
|
||||
|
||||
switch (crush->buckets[i]->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++)
|
||||
@ -419,7 +419,7 @@ public:
|
||||
break;
|
||||
|
||||
case CRUSH_BUCKET_TREE:
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++)
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++)
|
||||
::encode(((crush_bucket_tree*)crush->buckets[i])->node_weights[j], bl);
|
||||
break;
|
||||
|
||||
@ -459,10 +459,10 @@ public:
|
||||
crush->device_offload = (__u32*)malloc(sizeof(crush->device_offload[0])*crush->max_devices);
|
||||
for (int i=0; i < crush->max_devices; i++)
|
||||
::decode(crush->device_offload[i], blp);
|
||||
|
||||
|
||||
// buckets
|
||||
crush->buckets = (crush_bucket**)malloc(sizeof(crush_bucket*)*crush->max_buckets);
|
||||
for (unsigned i=0; i<crush->max_buckets; i++) {
|
||||
for (int i=0; i<crush->max_buckets; i++) {
|
||||
__u32 alg;
|
||||
::decode(alg, blp);
|
||||
if (!alg) {
|
||||
@ -489,7 +489,7 @@ public:
|
||||
}
|
||||
crush->buckets[i] = (crush_bucket*)malloc(size);
|
||||
memset(crush->buckets[i], 0, size);
|
||||
|
||||
|
||||
::decode(crush->buckets[i]->id, blp);
|
||||
::decode(crush->buckets[i]->type, blp);
|
||||
::decode(crush->buckets[i]->alg, blp);
|
||||
@ -502,7 +502,7 @@ public:
|
||||
|
||||
switch (crush->buckets[i]->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
((crush_bucket_uniform*)crush->buckets[i])->primes =
|
||||
((crush_bucket_uniform*)crush->buckets[i])->primes =
|
||||
(__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++)
|
||||
::decode(((crush_bucket_uniform*)crush->buckets[i])->primes[j], blp);
|
||||
@ -510,9 +510,9 @@ public:
|
||||
break;
|
||||
|
||||
case CRUSH_BUCKET_LIST:
|
||||
((crush_bucket_list*)crush->buckets[i])->item_weights =
|
||||
((crush_bucket_list*)crush->buckets[i])->item_weights =
|
||||
(__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
|
||||
((crush_bucket_list*)crush->buckets[i])->sum_weights =
|
||||
((crush_bucket_list*)crush->buckets[i])->sum_weights =
|
||||
(__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
|
||||
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++) {
|
||||
@ -522,16 +522,16 @@ public:
|
||||
break;
|
||||
|
||||
case CRUSH_BUCKET_TREE:
|
||||
((crush_bucket_tree*)crush->buckets[i])->node_weights =
|
||||
((crush_bucket_tree*)crush->buckets[i])->node_weights =
|
||||
(__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++)
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++)
|
||||
::decode(((crush_bucket_tree*)crush->buckets[i])->node_weights[j], blp);
|
||||
break;
|
||||
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
((crush_bucket_straw*)crush->buckets[i])->straws =
|
||||
((crush_bucket_straw*)crush->buckets[i])->straws =
|
||||
(__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
|
||||
((crush_bucket_straw*)crush->buckets[i])->item_weights =
|
||||
((crush_bucket_straw*)crush->buckets[i])->item_weights =
|
||||
(__u32*)malloc(crush->buckets[i]->size * sizeof(__u32));
|
||||
for (unsigned j=0; j<crush->buckets[i]->size; j++) {
|
||||
::decode(((crush_bucket_straw*)crush->buckets[i])->item_weights[j], blp);
|
||||
|
@ -2,5 +2,5 @@
|
||||
#include "crush.h"
|
||||
#include "hash.h"
|
||||
|
||||
int
|
||||
int
|
||||
|
||||
|
@ -24,21 +24,21 @@ struct crush_map *crush_create()
|
||||
void crush_finalize(struct crush_map *map)
|
||||
{
|
||||
int b, i;
|
||||
|
||||
|
||||
/* calc max_devices */
|
||||
for (b=0; b<map->max_buckets; b++) {
|
||||
if (map->buckets[b] == 0) continue;
|
||||
for (i=0; i<map->buckets[b]->size; i++)
|
||||
for (i=0; i<map->buckets[b]->size; i++)
|
||||
if (map->buckets[b]->items[i] >= map->max_devices)
|
||||
map->max_devices = map->buckets[b]->items[i] + 1;
|
||||
}
|
||||
|
||||
|
||||
/* allocate arrays */
|
||||
map->device_parents = malloc(sizeof(map->device_parents[0]) * map->max_devices);
|
||||
memset(map->device_parents, 0, sizeof(map->device_parents[0]) * map->max_devices);
|
||||
map->bucket_parents = malloc(sizeof(map->bucket_parents[0]) * map->max_buckets);
|
||||
memset(map->bucket_parents, 0, sizeof(map->bucket_parents[0]) * map->max_buckets);
|
||||
|
||||
|
||||
/* build parent maps */
|
||||
crush_calc_parents(map);
|
||||
|
||||
@ -70,7 +70,7 @@ int crush_add_rule(struct crush_map *map, struct crush_rule *rule, int ruleno)
|
||||
map->rules = realloc(map->rules, map->max_rules * sizeof(map->rules[0]));
|
||||
memset(map->rules + oldsize, 0, (map->max_rules-oldsize) * sizeof(map->rules[0]));
|
||||
}
|
||||
|
||||
|
||||
/* add it */
|
||||
map->rules[ruleno] = rule;
|
||||
return ruleno;
|
||||
@ -105,7 +105,7 @@ int crush_get_next_bucket_id(struct crush_map *map)
|
||||
{
|
||||
int pos;
|
||||
for (pos=0; pos < map->max_buckets; pos++)
|
||||
if (map->buckets[pos] == 0)
|
||||
if (map->buckets[pos] == 0)
|
||||
break;
|
||||
return -1 - pos;
|
||||
}
|
||||
@ -119,7 +119,7 @@ int crush_add_bucket(struct crush_map *map,
|
||||
int pos;
|
||||
|
||||
/* find a bucket id */
|
||||
if (id == 0)
|
||||
if (id == 0)
|
||||
id = crush_get_next_bucket_id(map);
|
||||
pos = -1 - id;
|
||||
|
||||
@ -153,20 +153,20 @@ crush_make_uniform_bucket(int type, int size,
|
||||
{
|
||||
int i, j, x;
|
||||
struct crush_bucket_uniform *bucket;
|
||||
|
||||
|
||||
bucket = malloc(sizeof(*bucket));
|
||||
memset(bucket, 0, sizeof(*bucket));
|
||||
bucket->h.alg = CRUSH_BUCKET_UNIFORM;
|
||||
bucket->h.type = type;
|
||||
bucket->h.size = size;
|
||||
bucket->h.weight = size * item_weight;
|
||||
|
||||
|
||||
bucket->item_weight = item_weight;
|
||||
|
||||
|
||||
bucket->h.items = malloc(sizeof(__u32)*size);
|
||||
for (i=0; i<size; i++)
|
||||
bucket->h.items[i] = items[i];
|
||||
|
||||
|
||||
/* generate some primes */
|
||||
bucket->primes = malloc(sizeof(__u32)*size);
|
||||
|
||||
@ -177,12 +177,12 @@ crush_make_uniform_bucket(int type, int size,
|
||||
x = size + 1;
|
||||
x += crush_hash32(size) % (3*size); /* make it big */
|
||||
x |= 1; /* and odd */
|
||||
|
||||
|
||||
i=0;
|
||||
while (i < size) {
|
||||
for (j=2; j*j <= x; j++)
|
||||
for (j=2; j*j <= x; j++)
|
||||
if (x % j == 0) break;
|
||||
if (j*j > x)
|
||||
if (j*j > x)
|
||||
bucket->primes[i++] = x;
|
||||
x += 2;
|
||||
}
|
||||
@ -207,7 +207,7 @@ crush_make_list_bucket(int type, int size,
|
||||
bucket->h.alg = CRUSH_BUCKET_LIST;
|
||||
bucket->h.type = type;
|
||||
bucket->h.size = size;
|
||||
|
||||
|
||||
bucket->h.items = malloc(sizeof(__u32)*size);
|
||||
bucket->item_weights = malloc(sizeof(__u32)*size);
|
||||
bucket->sum_weights = malloc(sizeof(__u32)*size);
|
||||
@ -215,7 +215,7 @@ crush_make_list_bucket(int type, int size,
|
||||
/*
|
||||
* caller will place new items at end. so, we reverse things,
|
||||
* since we put new items at the beginning.
|
||||
*/
|
||||
*/
|
||||
for (i=0; i<size; i++) {
|
||||
int pos = size - i - 1;
|
||||
bucket->h.items[pos] = items[i];
|
||||
@ -225,7 +225,7 @@ crush_make_list_bucket(int type, int size,
|
||||
/*printf("%d item %d weight %d sum %d\n",
|
||||
i, items[i], weights[i], bucket->sum_weights[i]);*/
|
||||
}
|
||||
|
||||
|
||||
bucket->h.weight = w;
|
||||
|
||||
return bucket;
|
||||
@ -237,15 +237,15 @@ crush_make_list_bucket(int type, int size,
|
||||
static int height(int n) {
|
||||
int h = 0;
|
||||
while ((n & 1) == 0) {
|
||||
h++;
|
||||
h++;
|
||||
n = n >> 1;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
static int on_right(int n, int h) {
|
||||
return n & (1 << (h+1));
|
||||
static int on_right(int n, int h) {
|
||||
return n & (1 << (h+1));
|
||||
}
|
||||
static int parent(int n)
|
||||
static int parent(int n)
|
||||
{
|
||||
int h = height(n);
|
||||
if (on_right(n, h))
|
||||
@ -263,7 +263,7 @@ crush_make_tree_bucket(int type, int size,
|
||||
int depth;
|
||||
int node;
|
||||
int t, i, j;
|
||||
|
||||
|
||||
bucket = malloc(sizeof(*bucket));
|
||||
memset(bucket, 0, sizeof(*bucket));
|
||||
bucket->h.alg = CRUSH_BUCKET_TREE;
|
||||
@ -283,7 +283,7 @@ crush_make_tree_bucket(int type, int size,
|
||||
|
||||
memset(bucket->h.items, 0, sizeof(__u32)*bucket->h.size);
|
||||
memset(bucket->node_weights, 0, sizeof(__u32)*bucket->h.size);
|
||||
|
||||
|
||||
for (i=0; i<size; i++) {
|
||||
node = ((i+1) << 1)-1;
|
||||
bucket->h.items[node] = items[i];
|
||||
@ -304,7 +304,7 @@ crush_make_tree_bucket(int type, int size,
|
||||
/* straw bucket */
|
||||
|
||||
struct crush_bucket_straw *
|
||||
crush_make_straw_bucket(int type,
|
||||
crush_make_straw_bucket(int type,
|
||||
int size,
|
||||
int *items,
|
||||
int *weights)
|
||||
@ -312,27 +312,27 @@ crush_make_straw_bucket(int type,
|
||||
struct crush_bucket_straw *bucket;
|
||||
int *reverse;
|
||||
int i, j, k;
|
||||
|
||||
|
||||
double straw, wbelow, lastw, wnext, pbelow;
|
||||
int numleft;
|
||||
|
||||
|
||||
bucket = malloc(sizeof(*bucket));
|
||||
memset(bucket, 0, sizeof(*bucket));
|
||||
bucket->h.alg = CRUSH_BUCKET_STRAW;
|
||||
bucket->h.type = type;
|
||||
bucket->h.size = size;
|
||||
|
||||
|
||||
bucket->h.items = malloc(sizeof(__u32)*size);
|
||||
bucket->item_weights = malloc(sizeof(__u32)*size);
|
||||
bucket->straws = malloc(sizeof(__u32)*size);
|
||||
|
||||
|
||||
bucket->h.weight = 0;
|
||||
for (i=0; i<size; i++) {
|
||||
bucket->h.items[i] = items[i];
|
||||
bucket->h.weight += weights[i];
|
||||
bucket->item_weights[i] = weights[i];
|
||||
}
|
||||
|
||||
|
||||
/* reverse sort by weight (simple insertion sort) */
|
||||
reverse = malloc(sizeof(int) * size);
|
||||
reverse[0] = 0;
|
||||
@ -349,28 +349,28 @@ crush_make_straw_bucket(int type,
|
||||
if (j == i)
|
||||
reverse[i] = i;
|
||||
}
|
||||
|
||||
|
||||
numleft = size;
|
||||
straw = 1.0;
|
||||
wbelow = 0;
|
||||
lastw = 0;
|
||||
|
||||
|
||||
i=0;
|
||||
while (i < size) {
|
||||
/* set this item's straw */
|
||||
bucket->straws[reverse[i]] = straw * 0x10000;
|
||||
/*printf("item %d at %d weight %d straw %d (%lf)\n",
|
||||
/*printf("item %d at %d weight %d straw %d (%lf)\n",
|
||||
items[reverse[i]],
|
||||
reverse[i], weights[reverse[i]], bucket->straws[reverse[i]], straw);*/
|
||||
i++;
|
||||
if (i == size) break;
|
||||
|
||||
|
||||
/* same weight as previous? */
|
||||
if (weights[reverse[i]] == weights[reverse[i-1]]) {
|
||||
/*printf("same as previous\n");*/
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/* adjust straw for next guy */
|
||||
wbelow += ((double)weights[reverse[i-1]] - lastw) * numleft;
|
||||
for (j=i; j<size; j++)
|
||||
@ -381,14 +381,14 @@ crush_make_straw_bucket(int type,
|
||||
wnext = numleft * (weights[reverse[i]] - weights[reverse[i-1]]);
|
||||
pbelow = wbelow / (wbelow + wnext);
|
||||
/*printf("wbelow %lf wnext %lf pbelow %lf\n", wbelow, wnext, pbelow);*/
|
||||
|
||||
|
||||
straw *= pow((double)1.0 / pbelow, (double)1.0 / (double)numleft);
|
||||
|
||||
|
||||
lastw = weights[reverse[i-1]];
|
||||
}
|
||||
|
||||
|
||||
free(reverse);
|
||||
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
||||
@ -408,15 +408,15 @@ crush_make_bucket(int alg, int type, int size,
|
||||
else
|
||||
item_weight = 0;
|
||||
return (struct crush_bucket *)crush_make_uniform_bucket(type, size, items, item_weight);
|
||||
|
||||
|
||||
case CRUSH_BUCKET_LIST:
|
||||
return (struct crush_bucket *)crush_make_list_bucket(type, size, items, weights);
|
||||
|
||||
case CRUSH_BUCKET_TREE:
|
||||
return (struct crush_bucket *)crush_make_tree_bucket(type, size, items, weights);
|
||||
|
||||
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
return (struct crush_bucket *)crush_make_straw_bucket(type, size, items, weights);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -5,40 +5,52 @@
|
||||
#else
|
||||
# include <stdlib.h>
|
||||
# include <assert.h>
|
||||
# define kfree(x) free(x)
|
||||
# define kfree(x) do { if (x) free(x); } while (0)
|
||||
# define BUG_ON(x) assert(!(x))
|
||||
#endif
|
||||
|
||||
#include "crush.h"
|
||||
|
||||
int crush_get_bucket_item_weight(struct crush_bucket *b, int pos)
|
||||
/**
|
||||
* crush_get_bucket_item_weight - Get weight of an item in given bucket
|
||||
* @b: bucket pointer
|
||||
* @p: item index in bucket
|
||||
*/
|
||||
int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
|
||||
{
|
||||
if (pos >= b->size)
|
||||
if (p >= b->size)
|
||||
return 0;
|
||||
switch (b->alg) {
|
||||
|
||||
switch (b->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
return ((struct crush_bucket_uniform*)b)->item_weight;
|
||||
case CRUSH_BUCKET_LIST:
|
||||
return ((struct crush_bucket_list*)b)->item_weights[pos];
|
||||
case CRUSH_BUCKET_TREE:
|
||||
if (pos & 1)
|
||||
return ((struct crush_bucket_tree*)b)->node_weights[pos];
|
||||
return ((struct crush_bucket_list*)b)->item_weights[p];
|
||||
case CRUSH_BUCKET_TREE:
|
||||
if (p & 1)
|
||||
return ((struct crush_bucket_tree*)b)->node_weights[p];
|
||||
return 0;
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
return ((struct crush_bucket_straw*)b)->item_weights[pos];
|
||||
return ((struct crush_bucket_straw*)b)->item_weights[p];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* crush_calc_parents - Calculate parent vectors for the given crush map.
|
||||
* @map: crush_map pointer
|
||||
*/
|
||||
void crush_calc_parents(struct crush_map *map)
|
||||
{
|
||||
int i, b, c;
|
||||
for (b=0; b<map->max_buckets; b++) {
|
||||
if (map->buckets[b] == NULL) continue;
|
||||
for (i=0; i<map->buckets[b]->size; i++) {
|
||||
|
||||
for (b = 0; b < map->max_buckets; b++) {
|
||||
if (map->buckets[b] == NULL)
|
||||
continue;
|
||||
for (i = 0; i < map->buckets[b]->size; i++) {
|
||||
c = map->buckets[b]->items[i];
|
||||
BUG_ON(c >= map->max_devices);
|
||||
BUG_ON(c >= map->max_devices ||
|
||||
c < -map->max_buckets);
|
||||
if (c >= 0)
|
||||
map->device_parents[c] = map->buckets[b]->id;
|
||||
else
|
||||
@ -76,51 +88,52 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
|
||||
kfree(b);
|
||||
}
|
||||
|
||||
void crush_destroy_bucket(struct crush_bucket *b)
|
||||
{
|
||||
switch (b->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b);
|
||||
break;
|
||||
case CRUSH_BUCKET_LIST:
|
||||
crush_destroy_bucket_list((struct crush_bucket_list *)b);
|
||||
break;
|
||||
case CRUSH_BUCKET_TREE:
|
||||
crush_destroy_bucket_tree((struct crush_bucket_tree *)b);
|
||||
break;
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* deallocate
|
||||
/**
|
||||
* crush_destroy - Destroy a crush_map
|
||||
* @map: crush_map pointer
|
||||
*/
|
||||
void crush_destroy(struct crush_map *map)
|
||||
{
|
||||
int b;
|
||||
|
||||
|
||||
/* buckets */
|
||||
if (map->buckets) {
|
||||
for (b=0; b<map->max_buckets; b++) {
|
||||
if (map->buckets[b] == NULL) continue;
|
||||
switch (map->buckets[b]->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
crush_destroy_bucket_uniform((struct crush_bucket_uniform*)map->buckets[b]);
|
||||
break;
|
||||
case CRUSH_BUCKET_LIST:
|
||||
crush_destroy_bucket_list((struct crush_bucket_list*)map->buckets[b]);
|
||||
break;
|
||||
case CRUSH_BUCKET_TREE:
|
||||
crush_destroy_bucket_tree((struct crush_bucket_tree*)map->buckets[b]);
|
||||
break;
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
crush_destroy_bucket_straw((struct crush_bucket_straw*)map->buckets[b]);
|
||||
break;
|
||||
}
|
||||
for (b = 0; b < map->max_buckets; b++) {
|
||||
if (map->buckets[b] == NULL)
|
||||
continue;
|
||||
crush_destroy_bucket(map->buckets[b]);
|
||||
}
|
||||
kfree(map->buckets);
|
||||
}
|
||||
|
||||
|
||||
/* rules */
|
||||
if (map->rules) {
|
||||
for (b=0; b<map->max_rules; b++) {
|
||||
if (map->rules[b] == NULL) continue;
|
||||
for (b = 0; b < map->max_rules; b++)
|
||||
kfree(map->rules[b]);
|
||||
}
|
||||
kfree(map->rules);
|
||||
}
|
||||
|
||||
if (map->bucket_parents)
|
||||
kfree(map->bucket_parents);
|
||||
if (map->device_parents)
|
||||
kfree(map->device_parents);
|
||||
if (map->device_offload)
|
||||
kfree(map->device_offload);
|
||||
|
||||
kfree(map->bucket_parents);
|
||||
kfree(map->device_parents);
|
||||
kfree(map->device_offload);
|
||||
kfree(map);
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,34 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*** RULES ***/
|
||||
/*
|
||||
* CRUSH is a pseudo-random data distribution algorithm that
|
||||
* efficiently distributes input values (typically, data objects)
|
||||
* across a heterogeneous, structured storage cluster.
|
||||
*
|
||||
* The algorithm was originally described in detail in this paper
|
||||
* (although the algorithm has evolved somewhat since then):
|
||||
*
|
||||
* http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf
|
||||
*/
|
||||
|
||||
|
||||
#define CRUSH_MAX_DEPTH 10
|
||||
#define CRUSH_MAX_SET 10
|
||||
|
||||
|
||||
/*
|
||||
* CRUSH uses user-defined "rules" to describe how inputs should be
|
||||
* mapped to devices. A rule consists of sequence of steps to perform
|
||||
* to generate the set of output devices.
|
||||
*/
|
||||
struct crush_rule_step {
|
||||
__u32 op;
|
||||
__s32 arg1;
|
||||
__s32 arg2;
|
||||
};
|
||||
|
||||
/* step op codes */
|
||||
enum {
|
||||
CRUSH_RULE_NOOP = 0,
|
||||
CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */
|
||||
@ -15,22 +42,18 @@ enum {
|
||||
CRUSH_RULE_CHOOSE_LEAF_INDEP = 7,
|
||||
};
|
||||
|
||||
#define CRUSH_MAX_DEPTH 10
|
||||
#define CRUSH_MAX_SET 10
|
||||
|
||||
/*
|
||||
* for specifying choose numrep relative to the max
|
||||
* parameter passed to do_rule
|
||||
* for specifying choose num (arg1) relative to the max parameter
|
||||
* passed to do_rule
|
||||
*/
|
||||
#define CRUSH_CHOOSE_N 0
|
||||
#define CRUSH_CHOOSE_N_MINUS(x) (-(x))
|
||||
|
||||
struct crush_rule_step {
|
||||
__u32 op;
|
||||
__s32 arg1;
|
||||
__s32 arg2;
|
||||
};
|
||||
|
||||
/*
|
||||
* The rule mask is used to describe what the rule is intended for.
|
||||
* Given a storage pool and size of output set, we search through the
|
||||
* rule list for a matching rule_mask.
|
||||
*/
|
||||
struct crush_rule_mask {
|
||||
__u8 pool;
|
||||
__u8 type;
|
||||
@ -49,9 +72,20 @@ struct crush_rule {
|
||||
|
||||
|
||||
|
||||
/*** BUCKETS ***/
|
||||
|
||||
/* bucket algorithms */
|
||||
/*
|
||||
* A bucket is a named container of other items (either devices or
|
||||
* other buckets). Items within a bucket are chosen using one of a
|
||||
* few different algorithms. The table summarizes how the speed of
|
||||
* each option measures up against mapping stability when items are
|
||||
* added or removed.
|
||||
*
|
||||
* Bucket Alg Speed Additions Removals
|
||||
* ------------------------------------------------
|
||||
* uniform O(1) poor poor
|
||||
* list O(n) optimal poor
|
||||
* tree O(log n) good good
|
||||
* straw O(n) optimal optimal
|
||||
*/
|
||||
enum {
|
||||
CRUSH_BUCKET_UNIFORM = 1,
|
||||
CRUSH_BUCKET_LIST = 2,
|
||||
@ -70,7 +104,7 @@ static inline const char *crush_bucket_alg_name(int alg) {
|
||||
|
||||
struct crush_bucket {
|
||||
__s32 id; /* this'll be negative */
|
||||
__u16 type; /* non-zero; 0 is reserved for devices */
|
||||
__u16 type; /* non-zero; type=0 is reserved for devices */
|
||||
__u16 alg; /* one of CRUSH_BUCKET_* */
|
||||
__u32 weight; /* 16-bit fixed point */
|
||||
__u32 size; /* num items */
|
||||
@ -80,58 +114,69 @@ struct crush_bucket {
|
||||
struct crush_bucket_uniform {
|
||||
struct crush_bucket h;
|
||||
__u32 *primes;
|
||||
__u32 item_weight; /* 16-bit fixed point */
|
||||
__u32 item_weight; /* 16-bit fixed point; all items equally weighted */
|
||||
};
|
||||
|
||||
struct crush_bucket_list {
|
||||
struct crush_bucket h;
|
||||
__u32 *item_weights; /* 16-bit fixed point */
|
||||
__u32 *sum_weights; /* 16-bit fixed point. element i is sum of weights 0..i, inclusive */
|
||||
__u32 *sum_weights; /* 16-bit fixed point. element i is sum
|
||||
of weights 0..i, inclusive */
|
||||
};
|
||||
|
||||
struct crush_bucket_tree {
|
||||
struct crush_bucket h; /* note: h.size is tree size, not number of actual items */
|
||||
struct crush_bucket h; /* note: h.size is _tree_ size, not number of
|
||||
actual items */
|
||||
__u32 *node_weights;
|
||||
};
|
||||
|
||||
struct crush_bucket_straw {
|
||||
struct crush_bucket h;
|
||||
__u32 *item_weights;
|
||||
__u32 *straws; /* 16-bit fixed point */
|
||||
__u32 *item_weights; /* 16-bit fixed point */
|
||||
__u32 *straws; /* 16-bit fixed point */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*** CRUSH ***/
|
||||
|
||||
/*
|
||||
* CRUSH map includes all buckets, rules, etc.
|
||||
*/
|
||||
struct crush_map {
|
||||
struct crush_bucket **buckets;
|
||||
struct crush_rule **rules;
|
||||
|
||||
/* parent pointers */
|
||||
|
||||
/*
|
||||
* Parent pointers to identify the parent bucket a device or
|
||||
* bucket in the hierarchy. If an item appears more than
|
||||
* once, this is the _last_ time it appeared (where buckets
|
||||
* are processed in bucket id order, from -1 on down to
|
||||
* -max_buckets.
|
||||
*/
|
||||
__u32 *bucket_parents;
|
||||
__u32 *device_parents;
|
||||
|
||||
/* offload
|
||||
* size max_devices, values 0...0xffff
|
||||
|
||||
/*
|
||||
* device offload.
|
||||
* size max_devices, values 0..0x10000
|
||||
* 0 == normal
|
||||
* 0x10000 == 100% offload (i.e. failed)
|
||||
*/
|
||||
__u32 *device_offload;
|
||||
|
||||
__u32 max_buckets;
|
||||
__u32 *device_offload;
|
||||
|
||||
__s32 max_buckets;
|
||||
__u32 max_rules;
|
||||
__s32 max_devices;
|
||||
};
|
||||
|
||||
|
||||
/* common */
|
||||
/* crush.c */
|
||||
extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
|
||||
extern void crush_calc_parents(struct crush_map *m);
|
||||
extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *);
|
||||
extern void crush_destroy_bucket_list(struct crush_bucket_list *);
|
||||
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *);
|
||||
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *);
|
||||
extern void crush_calc_parents(struct crush_map *map);
|
||||
extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
|
||||
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
|
||||
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
|
||||
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
|
||||
extern void crush_destroy_bucket(struct crush_bucket *b);
|
||||
extern void crush_destroy(struct crush_map *map);
|
||||
|
||||
#endif
|
||||
|
@ -1,4 +1,4 @@
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
||||
// vim: ts=8 sw=2 smarttab
|
||||
/*
|
||||
* Ceph - scalable distributed file system
|
||||
@ -7,9 +7,9 @@
|
||||
*
|
||||
* This is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License version 2.1, as published by the Free Software
|
||||
* License version 2.1, as published by the Free Software
|
||||
* Foundation. See file COPYING.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __CRUSH_GRAMMAR
|
||||
@ -70,7 +70,7 @@ struct crush_grammar : public grammar<crush_grammar>
|
||||
rule<ScannerT, parser_context<>, parser_tag<_crushrule> > crushrule;
|
||||
|
||||
rule<ScannerT, parser_context<>, parser_tag<_crushmap> > crushmap;
|
||||
|
||||
|
||||
definition(crush_grammar const& /*self*/)
|
||||
{
|
||||
// base types
|
||||
@ -86,21 +86,21 @@ struct crush_grammar : public grammar<crush_grammar>
|
||||
>> !( ( str_p("offload") >> real_p ) |
|
||||
( str_p("load") >> real_p ) |
|
||||
str_p("down"));
|
||||
|
||||
|
||||
// bucket types
|
||||
bucket_type = str_p("type") >> posint >> name;
|
||||
|
||||
// buckets
|
||||
bucket_id = str_p("id") >> negint;
|
||||
bucket_alg = str_p("alg") >> ( str_p("uniform") |
|
||||
str_p("list") |
|
||||
str_p("tree") |
|
||||
bucket_alg = str_p("alg") >> ( str_p("uniform") |
|
||||
str_p("list") |
|
||||
str_p("tree") |
|
||||
str_p("straw") );
|
||||
bucket_item = str_p("item") >> name
|
||||
>> !( str_p("weight") >> real_p )
|
||||
>> !( str_p("pos") >> posint );
|
||||
bucket = name >> name >> '{' >> !bucket_id >> bucket_alg >> *bucket_item >> '}';
|
||||
|
||||
|
||||
// rules
|
||||
step_take = str_p("take") >> name;
|
||||
step_choose = str_p("choose")
|
||||
@ -112,11 +112,11 @@ struct crush_grammar : public grammar<crush_grammar>
|
||||
>> integer
|
||||
>> str_p("type") >> name;
|
||||
step_emit = str_p("emit");
|
||||
step = str_p("step") >> ( step_take |
|
||||
step_choose |
|
||||
step_chooseleaf |
|
||||
step = str_p("step") >> ( step_take |
|
||||
step_choose |
|
||||
step_chooseleaf |
|
||||
step_emit );
|
||||
crushrule = str_p("rule") >> !name >> '{'
|
||||
crushrule = str_p("rule") >> !name >> '{'
|
||||
>> str_p("pool") >> posint
|
||||
>> str_p("type") >> ( str_p("replicated") | str_p("raid4") )
|
||||
>> str_p("min_size") >> posint
|
||||
@ -127,8 +127,8 @@ struct crush_grammar : public grammar<crush_grammar>
|
||||
// the whole crush map
|
||||
crushmap = *(device | bucket_type) >> *bucket >> *crushrule;
|
||||
}
|
||||
|
||||
rule<ScannerT, parser_context<>, parser_tag<_crushmap> > const&
|
||||
|
||||
rule<ScannerT, parser_context<>, parser_tag<_crushmap> > const&
|
||||
start() const { return crushmap; }
|
||||
};
|
||||
};
|
||||
|
@ -5,15 +5,15 @@
|
||||
// http://burtleburtle.net/bob/hash/evahash.html
|
||||
// a, b = random bits, c = input and output
|
||||
#define hashmix(a,b,c) \
|
||||
a=a-b; a=a-c; a=a^(c>>13); \
|
||||
b=b-c; b=b-a; b=b^(a<<8); \
|
||||
c=c-a; c=c-b; c=c^(b>>13); \
|
||||
a=a-b; a=a-c; a=a^(c>>12); \
|
||||
b=b-c; b=b-a; b=b^(a<<16); \
|
||||
c=c-a; c=c-b; c=c^(b>>5); \
|
||||
a=a-b; a=a-c; a=a^(c>>3); \
|
||||
b=b-c; b=b-a; b=b^(a<<10); \
|
||||
c=c-a; c=c-b; c=c^(b>>15);
|
||||
a=a-b; a=a-c; a=a^(c>>13); \
|
||||
b=b-c; b=b-a; b=b^(a<<8); \
|
||||
c=c-a; c=c-b; c=c^(b>>13); \
|
||||
a=a-b; a=a-c; a=a^(c>>12); \
|
||||
b=b-c; b=b-a; b=b^(a<<16); \
|
||||
c=c-a; c=c-b; c=c^(b>>5); \
|
||||
a=a-b; a=a-c; a=a^(c>>3); \
|
||||
b=b-c; b=b-a; b=b^(a<<10); \
|
||||
c=c-a; c=c-b; c=c^(b>>15);
|
||||
|
||||
#define crush_hash_seed 1315423911
|
||||
|
||||
|
@ -15,10 +15,17 @@
|
||||
#include "hash.h"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* crush_find_rule - find a crush_rule id for a given pool, type, and size.
|
||||
* @map: the crush_map
|
||||
* @pool: the storage pool id (user defined)
|
||||
* @type: storage pool type (user defined)
|
||||
* @size: output set size
|
||||
*/
|
||||
int crush_find_rule(struct crush_map *map, int pool, int type, int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < map->max_rules; i++) {
|
||||
if (map->rules[i] &&
|
||||
map->rules[i]->mask.pool == pool &&
|
||||
@ -31,34 +38,37 @@ int crush_find_rule(struct crush_map *map, int pool, int type, int size)
|
||||
}
|
||||
|
||||
|
||||
/** bucket choose methods **/
|
||||
/*
|
||||
* bucket choose methods
|
||||
*
|
||||
* For each bucket algorithm, we have a "choose" method that, given a
|
||||
* crush input @x and replica position (usually, position in output set) @r,
|
||||
* will produce an item in the bucket.
|
||||
*/
|
||||
|
||||
/* uniform */
|
||||
|
||||
static int
|
||||
crush_bucket_uniform_choose(struct crush_bucket_uniform *bucket, int x, int r)
|
||||
static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
unsigned o, p, s;
|
||||
o = crush_hash32_2(x, bucket->h.id) & 0xffff;
|
||||
p = bucket->primes[crush_hash32_2(bucket->h.id, x) % bucket->h.size];
|
||||
s = (x + o + (r+1)*p) % bucket->h.size;
|
||||
unsigned o = crush_hash32_2(x, bucket->h.id) & 0xffff;
|
||||
unsigned p = bucket->primes[crush_hash32_2(bucket->h.id, x) %
|
||||
bucket->h.size];
|
||||
unsigned s = (x + o + (r+1)*p) % bucket->h.size;
|
||||
/*printf("%d %d %d %d\n", x, o, r, p);*/
|
||||
return bucket->h.items[s];
|
||||
}
|
||||
|
||||
|
||||
/* list */
|
||||
|
||||
static int
|
||||
crush_bucket_list_choose(struct crush_bucket_list *bucket, int x, int r)
|
||||
static int bucket_list_choose(struct crush_bucket_list *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
int i;
|
||||
__u64 w;
|
||||
|
||||
for (i=0; i<bucket->h.size; i++) {
|
||||
w = crush_hash32_4(x, bucket->h.items[i], r, bucket->h.id);
|
||||
|
||||
for (i = 0; i < bucket->h.size; i++) {
|
||||
__u64 w = crush_hash32_4(x, bucket->h.items[i], r,
|
||||
bucket->h.id);
|
||||
w &= 0xffff;
|
||||
/*printf("%d x %d item %d weight %d sum_weight %d r %lld",
|
||||
/*printf("%d x %d item %d weight %d sum_weight %d r %lld",
|
||||
i, x, bucket->h.items[i], bucket->item_weights[i], bucket->sum_weights[i], w);*/
|
||||
w *= bucket->sum_weights[i];
|
||||
w = w >> 16;
|
||||
@ -66,36 +76,38 @@ crush_bucket_list_choose(struct crush_bucket_list *bucket, int x, int r)
|
||||
if (w < bucket->item_weights[i])
|
||||
return bucket->h.items[i];
|
||||
}
|
||||
|
||||
|
||||
BUG_ON(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* tree */
|
||||
|
||||
static int height(int n) {
|
||||
int h = 0;
|
||||
while ((n & 1) == 0) {
|
||||
h++;
|
||||
h++;
|
||||
n = n >> 1;
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
static int left(int x) {
|
||||
int h = height(x);
|
||||
return x - (1 << (h-1));
|
||||
}
|
||||
|
||||
static int right(int x) {
|
||||
int h = height(x);
|
||||
return x + (1 << (h-1));
|
||||
}
|
||||
|
||||
static int terminal(int x) {
|
||||
return x & 1;
|
||||
}
|
||||
|
||||
static int
|
||||
crush_bucket_tree_choose(struct crush_bucket_tree *bucket, int x, int r)
|
||||
static int bucket_tree_choose(struct crush_bucket_tree *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
int n, l;
|
||||
__u32 w;
|
||||
@ -109,8 +121,8 @@ crush_bucket_tree_choose(struct crush_bucket_tree *bucket, int x, int r)
|
||||
w = bucket->node_weights[n];
|
||||
t = (__u64)crush_hash32_4(x, n, r, bucket->h.id) * (__u64)w;
|
||||
t = t >> 32;
|
||||
|
||||
/* left or right? */
|
||||
|
||||
/* descend to the left or right? */
|
||||
l = left(n);
|
||||
if (t < bucket->node_weights[l])
|
||||
n = l;
|
||||
@ -124,15 +136,15 @@ crush_bucket_tree_choose(struct crush_bucket_tree *bucket, int x, int r)
|
||||
|
||||
/* straw */
|
||||
|
||||
static int
|
||||
crush_bucket_straw_choose(struct crush_bucket_straw *bucket, int x, int r)
|
||||
static int bucket_straw_choose(struct crush_bucket_straw *bucket,
|
||||
int x, int r)
|
||||
{
|
||||
int i;
|
||||
int high = 0;
|
||||
__u64 high_draw = 0;
|
||||
__u64 draw;
|
||||
|
||||
for (i=0; i<bucket->h.size; i++) {
|
||||
|
||||
for (i = 0; i < bucket->h.size; i++) {
|
||||
draw = crush_hash32_3(x, bucket->h.items[i], r);
|
||||
draw &= 0xffff;
|
||||
draw *= bucket->straws[i];
|
||||
@ -141,34 +153,62 @@ crush_bucket_straw_choose(struct crush_bucket_straw *bucket, int x, int r)
|
||||
high_draw = draw;
|
||||
}
|
||||
}
|
||||
|
||||
return bucket->h.items[high];
|
||||
}
|
||||
|
||||
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
|
||||
{
|
||||
switch (in->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
|
||||
x, r);
|
||||
case CRUSH_BUCKET_LIST:
|
||||
return bucket_list_choose((struct crush_bucket_list *)in, x, r);
|
||||
case CRUSH_BUCKET_TREE:
|
||||
return bucket_tree_choose((struct crush_bucket_tree *)in, x, r);
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
return bucket_straw_choose((struct crush_bucket_straw *)in,
|
||||
x, r);
|
||||
default:
|
||||
BUG_ON(1);
|
||||
return in->items[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** crush proper **/
|
||||
|
||||
|
||||
/*
|
||||
* true if device is marked "out" (failed, fully offloaded)
|
||||
* of the cluster
|
||||
*/
|
||||
static int is_out(struct crush_map *map, int item, int x)
|
||||
{
|
||||
if (map->device_offload[item]) {
|
||||
if (map->device_offload[item] >= 0x10000)
|
||||
if (map->device_offload[item] >= 0x10000)
|
||||
return 1;
|
||||
else if ((crush_hash32_2(x, item) & 0xffff) < map->device_offload[item])
|
||||
else if ((crush_hash32_2(x, item) & 0xffff) <
|
||||
map->device_offload[item])
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* choose numrep distinct items of given type
|
||||
/**
|
||||
* crush_choose - choose numrep distinct items of given type
|
||||
* @map: the crush_map
|
||||
* @bucket: the bucket we are choose an item from
|
||||
* @x: crush input value
|
||||
* @numrep: the number of items to choose
|
||||
* @type: the type of item to choose
|
||||
* @out: pointer to output vector
|
||||
* @outpos: our position in that vector
|
||||
* @firstn: true if choosing "first n" items, false if choosing "indep"
|
||||
* @recurse_to_leaf: true if we want one device under each item of given type
|
||||
* @out2: second output vector for leaf items (if @recurse_to_leaf)
|
||||
*/
|
||||
static int crush_choose(struct crush_map *map,
|
||||
struct crush_bucket *bucket,
|
||||
int x, int numrep, int type,
|
||||
int *out, int outpos,
|
||||
int *out, int outpos,
|
||||
int firstn, int recurse_to_leaf,
|
||||
int *out2)
|
||||
{
|
||||
@ -181,7 +221,7 @@ static int crush_choose(struct crush_map *map,
|
||||
int item;
|
||||
int itemtype;
|
||||
int collide, reject;
|
||||
|
||||
|
||||
for (rep = outpos; rep < numrep; rep++) {
|
||||
/* keep trying until we get a non-out, non-colliding item */
|
||||
ftotal = 0;
|
||||
@ -189,7 +229,7 @@ static int crush_choose(struct crush_map *map,
|
||||
do {
|
||||
retry_descent = 0;
|
||||
in = bucket; /* initial bucket */
|
||||
|
||||
|
||||
/* choose through intervening buckets */
|
||||
flocal = 0;
|
||||
do {
|
||||
@ -197,65 +237,55 @@ static int crush_choose(struct crush_map *map,
|
||||
r = rep;
|
||||
if (in->alg == CRUSH_BUCKET_UNIFORM) {
|
||||
/* be careful */
|
||||
if (firstn || numrep >= in->size)
|
||||
r += ftotal; /* r' = r + f_total */
|
||||
if (firstn || numrep >= in->size)
|
||||
/* r' = r + f_total */
|
||||
r += ftotal;
|
||||
else if (in->size % numrep == 0)
|
||||
r += (numrep+1) * flocal; /* r'=r+(n+1)*f_local */
|
||||
/* r'=r+(n+1)*f_local */
|
||||
r += (numrep+1) * flocal;
|
||||
else
|
||||
r += numrep * flocal; /* r' = r + n*f_local */
|
||||
/* r' = r + n*f_local */
|
||||
r += numrep * flocal;
|
||||
} else {
|
||||
if (firstn)
|
||||
r += ftotal; /* r' = r + f_total */
|
||||
else
|
||||
r += numrep * flocal; /* r' = r + n*f_local */
|
||||
if (firstn)
|
||||
/* r' = r + f_total */
|
||||
r += ftotal;
|
||||
else
|
||||
/* r' = r + n*f_local */
|
||||
r += numrep * flocal;
|
||||
}
|
||||
|
||||
/* bucket choose */
|
||||
switch (in->alg) {
|
||||
case CRUSH_BUCKET_UNIFORM:
|
||||
item = crush_bucket_uniform_choose((struct crush_bucket_uniform*)in, x, r);
|
||||
break;
|
||||
case CRUSH_BUCKET_LIST:
|
||||
item = crush_bucket_list_choose((struct crush_bucket_list*)in, x, r);
|
||||
break;
|
||||
case CRUSH_BUCKET_TREE:
|
||||
item = crush_bucket_tree_choose((struct crush_bucket_tree*)in, x, r);
|
||||
break;
|
||||
case CRUSH_BUCKET_STRAW:
|
||||
item = crush_bucket_straw_choose((struct crush_bucket_straw*)in, x, r);
|
||||
break;
|
||||
default:
|
||||
BUG_ON(1);
|
||||
item = in->items[0];
|
||||
}
|
||||
item = crush_bucket_choose(in, x, r);
|
||||
BUG_ON(item >= map->max_devices);
|
||||
|
||||
|
||||
/* desired type? */
|
||||
if (item < 0)
|
||||
if (item < 0)
|
||||
itemtype = map->buckets[-1-item]->type;
|
||||
else
|
||||
else
|
||||
itemtype = 0;
|
||||
|
||||
|
||||
/* keep going? */
|
||||
if (itemtype != type) {
|
||||
BUG_ON(item >= 0 || (-1-item) >= map->max_buckets);
|
||||
BUG_ON(item >= 0 ||
|
||||
(-1-item) >= map->max_buckets);
|
||||
in = map->buckets[-1-item];
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/* collision? */
|
||||
collide = 0;
|
||||
for (i=0; i<outpos; i++) {
|
||||
for (i = 0; i < outpos; i++) {
|
||||
if (out[i] == item) {
|
||||
collide = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* out? */
|
||||
if (itemtype == 0)
|
||||
if (itemtype == 0)
|
||||
reject = is_out(map, item, x);
|
||||
else
|
||||
else
|
||||
reject = 0;
|
||||
|
||||
if (recurse_to_leaf &&
|
||||
@ -264,34 +294,46 @@ static int crush_choose(struct crush_map *map,
|
||||
out2+outpos, 0,
|
||||
firstn, 0, NULL))
|
||||
reject = 1;
|
||||
|
||||
|
||||
if (reject || collide) {
|
||||
ftotal++;
|
||||
flocal++;
|
||||
|
||||
if (collide && flocal < 3)
|
||||
retry_bucket = 1; /* retry locally a few times */
|
||||
|
||||
if (collide && flocal < 3)
|
||||
/* retry locally a few times */
|
||||
retry_bucket = 1;
|
||||
else if (ftotal < 10)
|
||||
retry_descent = 1; /* then retry descent */
|
||||
/* then retry descent */
|
||||
retry_descent = 1;
|
||||
else
|
||||
skip_rep = 1; /* else give up */
|
||||
/* else give up */
|
||||
skip_rep = 1;
|
||||
}
|
||||
} while (retry_bucket);
|
||||
} while (retry_descent);
|
||||
|
||||
|
||||
if (skip_rep) continue;
|
||||
|
||||
out[outpos] = item;
|
||||
outpos++;
|
||||
}
|
||||
|
||||
|
||||
return outpos;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* crush_do_rule - calculate a mapping with the given input and rule
|
||||
* @map: the crush_map
|
||||
* @ruleno: the rule id
|
||||
* @x: hash input
|
||||
* @result: pointer to result vector
|
||||
* @result_max: maximum result size
|
||||
* @force: force initial replica choice; -1 for none
|
||||
*/
|
||||
int crush_do_rule(struct crush_map *map,
|
||||
int ruleno, int x, int *result, int result_max,
|
||||
int force) /* -1 for none */
|
||||
int force)
|
||||
{
|
||||
int result_len;
|
||||
int force_context[CRUSH_MAX_DEPTH];
|
||||
@ -309,7 +351,7 @@ int crush_do_rule(struct crush_map *map,
|
||||
int step;
|
||||
int i,j;
|
||||
int numrep;
|
||||
|
||||
|
||||
BUG_ON(ruleno >= map->max_rules);
|
||||
rule = map->rules[ruleno];
|
||||
result_len = 0;
|
||||
@ -340,7 +382,7 @@ int crush_do_rule(struct crush_map *map,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (step = 0; step < rule->len; step++) {
|
||||
switch (rule->steps[step].op) {
|
||||
case CRUSH_RULE_TAKE:
|
||||
@ -351,23 +393,23 @@ int crush_do_rule(struct crush_map *map,
|
||||
}
|
||||
wsize = 1;
|
||||
break;
|
||||
|
||||
|
||||
case CRUSH_RULE_CHOOSE_FIRSTN:
|
||||
case CRUSH_RULE_CHOOSE_INDEP:
|
||||
case CRUSH_RULE_CHOOSE_LEAF_FIRSTN:
|
||||
case CRUSH_RULE_CHOOSE_LEAF_INDEP:
|
||||
BUG_ON(wsize == 0);
|
||||
|
||||
|
||||
/* reset output */
|
||||
osize = 0;
|
||||
|
||||
|
||||
recurse_to_leaf = rule->steps[step].op >=
|
||||
CRUSH_RULE_CHOOSE_LEAF_FIRSTN;
|
||||
for (i = 0; i < wsize; i++) {
|
||||
/*
|
||||
* see CRUSH_N, CRUSH_N_MINUS macros.
|
||||
* basically, numrep <= 0 means relative to
|
||||
* the provided result_max
|
||||
* the provided result_max
|
||||
*/
|
||||
numrep = rule->steps[step].arg1;
|
||||
if (numrep <= 0) {
|
||||
@ -391,8 +433,11 @@ int crush_do_rule(struct crush_map *map,
|
||||
}
|
||||
osize += crush_choose(map,
|
||||
map->buckets[-1-w[i]],
|
||||
x, numrep, rule->steps[step].arg2,
|
||||
o+osize, j, rule->steps[step].op == CRUSH_RULE_CHOOSE_FIRSTN,
|
||||
x, numrep,
|
||||
rule->steps[step].arg2,
|
||||
o+osize, j,
|
||||
rule->steps[step].op ==
|
||||
CRUSH_RULE_CHOOSE_FIRSTN,
|
||||
recurse_to_leaf, c+osize);
|
||||
}
|
||||
|
||||
@ -405,9 +450,9 @@ int crush_do_rule(struct crush_map *map,
|
||||
o = w;
|
||||
w = tmp;
|
||||
wsize = osize;
|
||||
break;
|
||||
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case CRUSH_RULE_EMIT:
|
||||
for (i=0; i<wsize && result_len < result_max; i++) {
|
||||
result[result_len] = w[i];
|
||||
@ -415,12 +460,12 @@ int crush_do_rule(struct crush_map *map,
|
||||
}
|
||||
wsize = 0;
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
BUG_ON(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return result_len;
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,10 @@
|
||||
|
||||
#include "crush.h"
|
||||
|
||||
/*
|
||||
* CRUSH functions for find rules and then mapping an input to an
|
||||
* output set.
|
||||
*/
|
||||
extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
|
||||
extern int crush_do_rule(struct crush_map *map,
|
||||
int ruleno,
|
||||
|
@ -19,12 +19,12 @@ int main()
|
||||
int root;
|
||||
int ruleno;
|
||||
int r[10];
|
||||
|
||||
|
||||
int uw[10] = { 1000, 1000, 500, 1000, 2000, 1000, 1000, 3000, 1000, 500 };
|
||||
|
||||
struct crush_bucket *b;
|
||||
struct crush_rule *rule;
|
||||
|
||||
|
||||
struct crush_map *map = crush_create();
|
||||
|
||||
d = 0;
|
||||
@ -59,7 +59,7 @@ int main()
|
||||
}
|
||||
|
||||
for (i=0; i<100; i += 10)
|
||||
printf("%2d : %d\n", i, o[i]);
|
||||
printf("%2d : %d\n", i, o[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user