ceph/branches/riccardo/monitor2/osd/Ager.cc
riccardo80 07ac5d3e74 creating branch for distributed monitor
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1068 29311d96-e01e-0410-9327-a35deaab8ce9
2007-02-01 05:43:23 +00:00

332 lines
7.9 KiB
C++

#include "include/types.h"
#include "Ager.h"
#include "ObjectStore.h"
#include "config.h"
#include "common/Clock.h"
// ick
#include "ebofs/Ebofs.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifdef DARWIN
#include <sys/param.h>
#include <sys/mount.h>
#endif // DARWIN
int myrand()
{
if (0)
return rand();
else {
static int n = 0;
srand(n++);
return rand();
}
}
object_t Ager::age_get_oid() {
if (!age_free_oids.empty()) {
object_t o = age_free_oids.front();
age_free_oids.pop_front();
return o;
}
object_t last = age_cur_oid;
++age_cur_oid.bno;
return last;
}
ssize_t Ager::age_pick_size() {
ssize_t max = file_size_distn.sample() * 1024;
return max/2 + (myrand() % 100) * max/200 + 1;
}
bool start_debug = false;
__uint64_t Ager::age_fill(float pc, utime_t until) {
int max = 1024*1024;
bufferptr bp(max);
bp.zero();
bufferlist bl;
bl.push_back(bp);
__uint64_t wrote = 0;
while (1) {
if (g_clock.now() > until) break;
struct statfs st;
store->statfs(&st);
float free = 1.0 - ((float)(st.f_bfree) / (float)st.f_blocks);
float avail = 1.0 - ((float)(st.f_bavail) / (float)st.f_blocks); // to write to
//float a = (float)(st.f_bfree) / (float)st.f_blocks;
//dout(10) << "age_fill at " << a << " / " << pc << " .. " << st.f_blocks << " " << st.f_bavail << endl;
if (free >= pc) {
dout(2) << "age_fill at " << free << " / " << avail << " / " << " / " << pc << " stopping" << endl;
break;
}
// make sure we can write to it..
if (avail > .98 ||
avail - free > .02)
store->sync();
object_t oid = age_get_oid();
int b = myrand() % 10;
age_objects[b].push_back(oid);
ssize_t s = age_pick_size();
wrote += (s + 4095) / 4096;
dout(2) << "age_fill at " << free << " / " << avail << " / " << pc << " creating " << hex << oid << dec << " sz " << s << endl;
if (false && !g_conf.ebofs_verify && start_debug && wrote > 1000000ULL) {
/*
1005700
?
1005000
1005700
1005710
1005725ULL
1005750ULL
1005800
1006000
// 99 1000500 ? 1000750 1006000
*/
g_conf.debug_ebofs = 30;
g_conf.ebofs_verify = true;
}
off_t off = 0;
while (s) {
ssize_t t = MIN(s, max);
bufferlist sbl;
sbl.substr_of(bl, 0, t);
store->write(oid, off, t, sbl, false);
off += t;
s -= t;
}
oid.bno++;
}
return wrote*4; // KB
}
void Ager::age_empty(float pc) {
int nper = 20;
int n = nper;
//g_conf.ebofs_verify = true;
while (1) {
struct statfs st;
store->statfs(&st);
float free = 1.0 - ((float)(st.f_bfree) / (float)st.f_blocks);
float avail = 1.0 - ((float)(st.f_bavail) / (float)st.f_blocks); // to write to
dout(2) << "age_empty at " << free << " / " << avail << " / " << pc << endl;//" stopping" << endl;
if (free <= pc) {
dout(2) << "age_empty at " << free << " / " << avail << " / " << pc << " stopping" << endl;
break;
}
int b = myrand() % 10;
n--;
if (n == 0 || age_objects[b].empty()) {
dout(2) << "age_empty sync" << endl;
//sync();
//sync();
n = nper;
continue;
}
object_t oid = age_objects[b].front();
age_objects[b].pop_front();
dout(2) << "age_empty at " << free << " / " << avail << " / " << pc << " removing " << hex << oid << dec << endl;
store->remove(oid);
age_free_oids.push_back(oid);
}
g_conf.ebofs_verify = false;
}
void pfrag(__uint64_t written, ObjectStore::FragmentationStat &st)
{
cout << "#gb wr\ttotal\tn x\tavg x\tavg per\tavg j\tfree\tn fr\tavg fr\tnum<2\tsum<2\tnum<4\tsum<4\t..."
<< endl;
cout << written
<< "\t" << st.total
<< "\t" << st.num_extent
<< "\t" << st.avg_extent
<< "\t" << st.avg_extent_per_object
<< "\t" << st.avg_extent_jump
<< "\t" << st.total_free
<< "\t" << st.num_free_extent
<< "\t" << st.avg_free_extent;
int n = st.num_extent;
for (__uint64_t i=1; i <= 30; i += 1) {
cout << "\t" << st.extent_dist[i];
cout << "\t" << st.extent_dist_sum[i];
//cout << "\ta " << (st.extent_dist[i] ? (st.extent_dist_sum[i] / st.extent_dist[i]):0);
n -= st.extent_dist[i];
if (n == 0) break;
}
cout << endl;
}
void Ager::age(int time,
float high_water, // fill to this %
float low_water, // then empty to this %
int count, // this many times
float final_water, // and end here ( <= low_water)
int fake_size_mb) {
store->_fake_writes(true);
srand(0);
utime_t start = g_clock.now();
utime_t until = start;
until.sec_ref() += time;
int elapsed = 0;
int freelist_inc = 60;
utime_t nextfl = start;
nextfl.sec_ref() += freelist_inc;
while (age_objects.size() < 10) age_objects.push_back( list<object_t>() );
if (fake_size_mb) {
int fake_bl = fake_size_mb * 256;
struct statfs st;
store->statfs(&st);
float f = (float)fake_bl / (float)st.f_blocks;
high_water = (float)high_water * f;
low_water = (float)low_water * f;
final_water = (float)final_water * f;
dout(2) << "fake " << fake_bl << " / " << st.f_blocks << " is " << f << ", high " << high_water << " low " << low_water << " final " << final_water << endl;
}
// init size distn (once)
if (!did_distn) {
did_distn = true;
age_cur_oid = object_t(0,1);
file_size_distn.add(1, 19.0758125+0.65434375);
file_size_distn.add(512, 35.6566);
file_size_distn.add(1024, 27.7271875);
file_size_distn.add(2*1024, 16.63503125);
//file_size_distn.add(4*1024, 106.82384375);
//file_size_distn.add(8*1024, 81.493375);
//file_size_distn.add(16*1024, 14.13553125);
//file_size_distn.add(32*1024, 2.176);
//file_size_distn.add(256*1024, 0.655938);
//file_size_distn.add(512*1024, 0.1480625);
//file_size_distn.add(1*1024*1024, 0.020125); // actually 2, but 32bit
file_size_distn.normalize();
}
// clear
for (int i=0; i<10; i++)
age_objects[i].clear();
ObjectStore::FragmentationStat st;
__uint64_t wrote = 0;
for (int c=1; c<=count; c++) {
if (g_clock.now() > until) break;
//if (c == 7) start_debug = true;
dout(1) << "#age " << c << "/" << count << " filling to " << high_water << endl;
__uint64_t w = age_fill(high_water, until);
//dout(1) << "age wrote " << w << endl;
wrote += w;
//store->sync();
//store->_get_frag_stat(st);
//pfrag(st);
if (c == count) {
dout(1) << "#age final empty to " << final_water << endl;
age_empty(final_water);
} else {
dout(1) << "#age " << c << "/" << count << " emptying to " << low_water << endl;
age_empty(low_water);
}
//store->sync();
//store->sync();
// show frag state
store->_get_frag_stat(st);
pfrag(wrote / (1024ULL*1024ULL) , // GB
st);
// dump freelist?
if (g_clock.now() > nextfl) {
elapsed += freelist_inc;
save_freelist(elapsed);
nextfl.sec_ref() += freelist_inc;
}
}
// dump the freelist
save_freelist(0);
exit(0); // hack
// ok!
store->_fake_writes(false);
store->sync();
store->sync();
dout(1) << "age finished" << endl;
}
void Ager::load_freelist()
{
dout(1) << "load_freelist" << endl;
struct stat st;
int r = ::stat("ebofs.freelist", &st);
assert(r == 0);
bufferptr bp(st.st_size);
bufferlist bl;
bl.push_back(bp);
int fd = ::open("ebofs.freelist", O_RDONLY);
::read(fd, bl.c_str(), st.st_size);
::close(fd);
((Ebofs*)store)->_import_freelist(bl);
store->sync();
store->sync();
}
void Ager::save_freelist(int el)
{
dout(1) << "save_freelist " << el << endl;
char s[100];
sprintf(s, "ebofs.freelist.%d", el);
bufferlist bl;
((Ebofs*)store)->_export_freelist(bl);
::unlink(s);
int fd = ::open(s, O_CREAT|O_WRONLY);
::fchmod(fd, 0644);
::write(fd, bl.c_str(), bl.length());
::close(fd);
}