mirror of
https://github.com/ceph/ceph
synced 2024-12-23 03:44:23 +00:00
9213a23f14
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1138 29311d96-e01e-0410-9327-a35deaab8ce9
589 lines
14 KiB
C++
589 lines
14 KiB
C++
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
|
/*
|
|
* Ceph - scalable distributed file system
|
|
*
|
|
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
|
|
*
|
|
* This is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software
|
|
* Foundation. See file COPYING.
|
|
*
|
|
*/
|
|
|
|
|
|
#ifndef __CLIENT_H
|
|
#define __CLIENT_H
|
|
|
|
|
|
#include "mds/MDSMap.h"
|
|
#include "osd/OSDMap.h"
|
|
#include "mon/MonMap.h"
|
|
|
|
#include "msg/Message.h"
|
|
#include "msg/Dispatcher.h"
|
|
#include "msg/Messenger.h"
|
|
#include "msg/SerialMessenger.h"
|
|
|
|
#include "messages/MClientRequest.h"
|
|
#include "messages/MClientReply.h"
|
|
|
|
//#include "msgthread.h"
|
|
|
|
#include "include/types.h"
|
|
#include "include/lru.h"
|
|
#include "include/filepath.h"
|
|
#include "include/interval_set.h"
|
|
|
|
#include "common/Mutex.h"
|
|
|
|
#include "FileCache.h"
|
|
|
|
// stl
|
|
#include <set>
|
|
#include <map>
|
|
using namespace std;
|
|
|
|
#include <ext/hash_map>
|
|
using namespace __gnu_cxx;
|
|
|
|
#define O_LAZY 01000000
|
|
|
|
|
|
class Filer;
|
|
class Objecter;
|
|
class ObjectCacher;
|
|
|
|
extern class LogType client_logtype;
|
|
extern class Logger *client_logger;
|
|
|
|
|
|
|
|
// ============================================
|
|
// types for my local metadata cache
|
|
/* basic structure:
|
|
|
|
- Dentries live in an LRU loop. they get expired based on last access.
|
|
see include/lru.h. items can be bumped to "mid" or "top" of list, etc.
|
|
- Inode has ref count for each Fh, Dir, or Dentry that points to it.
|
|
- when Inode ref goes to 0, it's expired.
|
|
- when Dir is empty, it's removed (and it's Inode ref--)
|
|
|
|
*/
|
|
|
|
typedef int fh_t;
|
|
|
|
class Dir;
|
|
class Inode;
|
|
|
|
class Dentry : public LRUObject {
|
|
public:
|
|
string name; // sort of lame
|
|
//const char *name;
|
|
Dir *dir;
|
|
Inode *inode;
|
|
int ref; // 1 if there's a dir beneath me.
|
|
|
|
void get() { assert(ref == 0); ref++; lru_pin(); }
|
|
void put() { assert(ref == 1); ref--; lru_unpin(); }
|
|
|
|
Dentry() : dir(0), inode(0), ref(0) { }
|
|
|
|
/*Dentry() : name(0), dir(0), inode(0), ref(0) { }
|
|
Dentry(string& n) : name(0), dir(0), inode(0), ref(0) {
|
|
name = new char[n.length()+1];
|
|
strcpy((char*)name, n.c_str());
|
|
}
|
|
~Dentry() {
|
|
delete[] name;
|
|
}*/
|
|
};
|
|
|
|
class Dir {
|
|
public:
|
|
Inode *parent_inode; // my inode
|
|
//hash_map<const char*, Dentry*, hash<const char*>, eqstr> dentries;
|
|
hash_map<string, Dentry*> dentries;
|
|
|
|
Dir(Inode* in) { parent_inode = in; }
|
|
|
|
bool is_empty() { return dentries.empty(); }
|
|
};
|
|
|
|
|
|
class InodeCap {
|
|
public:
|
|
int caps;
|
|
long seq;
|
|
InodeCap() : caps(0), seq(0) {}
|
|
};
|
|
|
|
|
|
class Inode {
|
|
public:
|
|
inode_t inode; // the actual inode
|
|
time_t valid_until;
|
|
|
|
// about the dir (if this is one!)
|
|
int dir_auth;
|
|
set<int> dir_contacts;
|
|
bool dir_hashed, dir_replicated;
|
|
|
|
// per-mds caps
|
|
map<int,InodeCap> caps; // mds -> InodeCap
|
|
map<int,InodeCap> stale_caps; // mds -> cap .. stale
|
|
|
|
time_t file_wr_mtime; // [writers] time of last write
|
|
off_t file_wr_size; // [writers] largest offset we've written to
|
|
int num_open_rd, num_open_wr, num_open_lazy; // num readers, writers
|
|
|
|
int ref; // ref count. 1 for each dentry, fh that links to me.
|
|
Dir *dir; // if i'm a dir.
|
|
Dentry *dn; // if i'm linked to a dentry.
|
|
string *symlink; // symlink content, if it's a symlink
|
|
|
|
// for caching i/o mode
|
|
FileCache fc;
|
|
|
|
// for sync i/o mode
|
|
int sync_reads; // sync reads in progress
|
|
int sync_writes; // sync writes in progress
|
|
|
|
list<Cond*> waitfor_write;
|
|
list<Cond*> waitfor_read;
|
|
list<Cond*> waitfor_lazy;
|
|
list<Context*> waitfor_no_read, waitfor_no_write;
|
|
|
|
void get() {
|
|
ref++;
|
|
//cout << "inode.get on " << hex << inode.ino << dec << " now " << ref << endl;
|
|
}
|
|
void put() {
|
|
ref--; assert(ref >= 0);
|
|
//cout << "inode.put on " << hex << inode.ino << dec << " now " << ref << endl;
|
|
}
|
|
|
|
Inode(inode_t _inode, ObjectCacher *_oc) :
|
|
inode(_inode),
|
|
valid_until(0),
|
|
dir_auth(-1), dir_hashed(false), dir_replicated(false),
|
|
file_wr_mtime(0), file_wr_size(0),
|
|
num_open_rd(0), num_open_wr(0), num_open_lazy(0),
|
|
ref(0), dir(0), dn(0), symlink(0),
|
|
fc(_oc, _inode),
|
|
sync_reads(0), sync_writes(0)
|
|
{ }
|
|
~Inode() {
|
|
if (symlink) { delete symlink; symlink = 0; }
|
|
}
|
|
|
|
inodeno_t ino() { return inode.ino; }
|
|
|
|
bool is_dir() {
|
|
return (inode.mode & INODE_TYPE_MASK) == INODE_MODE_DIR;
|
|
}
|
|
|
|
int file_caps() {
|
|
int c = 0;
|
|
for (map<int,InodeCap>::iterator it = caps.begin();
|
|
it != caps.end();
|
|
it++)
|
|
c |= it->second.caps;
|
|
for (map<int,InodeCap>::iterator it = stale_caps.begin();
|
|
it != stale_caps.end();
|
|
it++)
|
|
c |= it->second.caps;
|
|
return c;
|
|
}
|
|
|
|
int file_caps_wanted() {
|
|
int w = 0;
|
|
if (num_open_rd) w |= CAP_FILE_RD|CAP_FILE_RDCACHE;
|
|
if (num_open_wr) w |= CAP_FILE_WR|CAP_FILE_WRBUFFER;
|
|
if (num_open_lazy) w |= CAP_FILE_LAZYIO;
|
|
return w;
|
|
}
|
|
|
|
int authority(MDSMap *mdsmap) {
|
|
//cout << "authority on " << inode.ino << " .. dir_auth is " << dir_auth<< endl;
|
|
// parent?
|
|
if (dn && dn->dir && dn->dir->parent_inode) {
|
|
// parent hashed?
|
|
if (dn->dir->parent_inode->dir_hashed) {
|
|
// hashed
|
|
assert(0);
|
|
// fixme
|
|
//return mdcluster->hash_dentry( dn->dir->parent_inode->ino(),
|
|
//dn->name );
|
|
}
|
|
|
|
if (dir_auth >= 0)
|
|
return dir_auth;
|
|
else
|
|
return dn->dir->parent_inode->authority(mdsmap);
|
|
}
|
|
|
|
if (dir_auth >= 0)
|
|
return dir_auth;
|
|
|
|
assert(0); // !!!
|
|
return 0;
|
|
}
|
|
int dentry_authority(const char *dn,
|
|
MDSMap *mdsmap) {
|
|
assert(0);
|
|
return 0;
|
|
//return ->hash_dentry( ino(),
|
|
//dn );
|
|
}
|
|
int pick_replica(MDSMap *mdsmap) {
|
|
// replicas?
|
|
if (ino() > 1ULL && dir_contacts.size()) {
|
|
//cout << "dir_contacts if " << dir_contacts << endl;
|
|
set<int>::iterator it = dir_contacts.begin();
|
|
if (dir_contacts.size() == 1)
|
|
return *it;
|
|
else {
|
|
int r = rand() % dir_contacts.size();
|
|
while (r--) it++;
|
|
return *it;
|
|
}
|
|
}
|
|
|
|
if (dir_replicated || ino() == 1) {
|
|
//cout << "num_mds is " << mdcluster->get_num_mds() << endl;
|
|
return rand() % mdsmap->get_num_mds(); // huh.. pick a random mds!
|
|
}
|
|
else
|
|
return authority(mdsmap);
|
|
}
|
|
|
|
|
|
// open Dir for an inode. if it's not open, allocated it (and pin dentry in memory).
|
|
Dir *open_dir() {
|
|
if (!dir) {
|
|
if (dn) dn->get(); // pin dentry
|
|
get();
|
|
dir = new Dir(this);
|
|
}
|
|
return dir;
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// file handle for any open file state
|
|
|
|
struct Fh {
|
|
Inode *inode;
|
|
off_t pos;
|
|
int mds; // have to talk to mds we opened with (for now)
|
|
int mode; // the mode i opened the file with
|
|
|
|
bool is_lazy() { return mode & O_LAZY; }
|
|
|
|
Fh() : inode(0), pos(0), mds(0), mode(0) {}
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// ========================================================
|
|
// client interface
|
|
|
|
class Client : public Dispatcher {
|
|
public:
|
|
|
|
/* getdir result */
|
|
struct DirResult {
|
|
string path;
|
|
map<string,inode_t> contents;
|
|
map<string,inode_t>::iterator p;
|
|
int off;
|
|
int size;
|
|
struct dirent_plus dp;
|
|
struct dirent_lite dl;
|
|
DirResult() : p(contents.end()), off(-1), size(0) {}
|
|
};
|
|
|
|
|
|
protected:
|
|
Messenger *messenger;
|
|
int whoami;
|
|
MonMap *monmap;
|
|
|
|
// mds fake RPC
|
|
tid_t last_tid;
|
|
map<tid_t, Cond*> mds_rpc_cond;
|
|
map<tid_t, class MClientReply*> mds_rpc_reply;
|
|
map<tid_t, Cond*> mds_rpc_dispatch_cond;
|
|
|
|
// cluster descriptors
|
|
MDSMap *mdsmap;
|
|
OSDMap *osdmap;
|
|
|
|
bool mounted;
|
|
bool unmounting;
|
|
Cond mount_cond;
|
|
|
|
int unsafe_sync_write;
|
|
public:
|
|
entity_name_t get_myname() { return messenger->get_myname(); }
|
|
void hack_sync_write_safe();
|
|
|
|
protected:
|
|
Filer *filer;
|
|
ObjectCacher *objectcacher;
|
|
Objecter *objecter; // (non-blocking) osd interface
|
|
|
|
// cache
|
|
hash_map<inodeno_t, Inode*> inode_map;
|
|
Inode* root;
|
|
LRU lru; // lru list of Dentry's in our local metadata cache.
|
|
|
|
// cap weirdness
|
|
map<inodeno_t, map<int, class MClientFileCaps*> > cap_reap_queue; // ino -> mds -> msg .. set of (would-be) stale caps to reap
|
|
|
|
|
|
// file handles, etc.
|
|
string cwd;
|
|
interval_set<fh_t> free_fh_set; // unused fh's
|
|
hash_map<fh_t, Fh*> fh_map;
|
|
|
|
fh_t get_fh() {
|
|
fh_t fh = free_fh_set.start();
|
|
free_fh_set.erase(fh, 1);
|
|
return fh;
|
|
}
|
|
void put_fh(fh_t fh) {
|
|
free_fh_set.insert(fh, 1);
|
|
}
|
|
|
|
void mkabspath(const char *rel, string& abs) {
|
|
if (rel[0] == '/') {
|
|
abs = rel;
|
|
} else {
|
|
abs = cwd;
|
|
abs += "/";
|
|
abs += rel;
|
|
}
|
|
}
|
|
|
|
|
|
// global client lock
|
|
// - protects Client and buffer cache both!
|
|
Mutex client_lock;
|
|
|
|
|
|
// -- metadata cache stuff
|
|
|
|
// decrease inode ref. delete if dangling.
|
|
void put_inode(Inode *in) {
|
|
in->put();
|
|
if (in->ref == 0) {
|
|
inode_map.erase(in->inode.ino);
|
|
if (in == root) root = 0;
|
|
delete in;
|
|
}
|
|
}
|
|
|
|
void close_dir(Dir *dir) {
|
|
assert(dir->is_empty());
|
|
|
|
Inode *in = dir->parent_inode;
|
|
if (in->dn) in->dn->put(); // unpin dentry
|
|
|
|
delete in->dir;
|
|
in->dir = 0;
|
|
put_inode(in);
|
|
}
|
|
|
|
int get_cache_size() { return lru.lru_get_size(); }
|
|
void set_cache_size(int m) { lru.lru_set_max(m); }
|
|
|
|
Dentry* link(Dir *dir, const string& name, Inode *in) {
|
|
Dentry *dn = new Dentry;
|
|
dn->name = name;
|
|
|
|
// link to dir
|
|
dn->dir = dir;
|
|
dir->dentries[dn->name] = dn;
|
|
|
|
// link to inode
|
|
dn->inode = in;
|
|
in->dn = dn;
|
|
in->get();
|
|
|
|
lru.lru_insert_mid(dn); // mid or top?
|
|
return dn;
|
|
}
|
|
|
|
void unlink(Dentry *dn) {
|
|
Inode *in = dn->inode;
|
|
|
|
// unlink from inode
|
|
dn->inode = 0;
|
|
in->dn = 0;
|
|
put_inode(in);
|
|
|
|
// unlink from dir
|
|
dn->dir->dentries.erase(dn->name);
|
|
if (dn->dir->is_empty())
|
|
close_dir(dn->dir);
|
|
dn->dir = 0;
|
|
|
|
// delete den
|
|
lru.lru_remove(dn);
|
|
delete dn;
|
|
}
|
|
|
|
Dentry *relink(Dentry *dn, Dir *dir, const string& name) {
|
|
// first link new dn to dir
|
|
/*
|
|
char *oldname = (char*)dn->name;
|
|
dn->name = new char[name.length()+1];
|
|
strcpy((char*)dn->name, name.c_str());
|
|
dir->dentries[dn->name] = dn;
|
|
*/
|
|
dir->dentries[name] = dn;
|
|
|
|
// unlink from old dir
|
|
dn->dir->dentries.erase(dn->name);
|
|
//delete[] oldname;
|
|
if (dn->dir->is_empty())
|
|
close_dir(dn->dir);
|
|
|
|
// fix up dn
|
|
dn->name = name;
|
|
dn->dir = dir;
|
|
|
|
return dn;
|
|
}
|
|
|
|
// move dentry to top of lru
|
|
void touch_dn(Dentry *dn) { lru.lru_touch(dn); }
|
|
|
|
// trim cache.
|
|
void trim_cache();
|
|
void dump_inode(Inode *in, set<Inode*>& did);
|
|
void dump_cache(); // debug
|
|
|
|
// find dentry based on filepath
|
|
Dentry *lookup(filepath& path);
|
|
|
|
// make blocking mds request
|
|
MClientReply *make_request(MClientRequest *req, bool auth_best=false, int use_auth=-1);
|
|
MClientReply* sendrecv(MClientRequest *req, int mds);
|
|
void handle_client_reply(MClientReply *reply);
|
|
|
|
void fill_stat(inode_t& inode, struct stat *st);
|
|
void fill_statlite(inode_t& inode, struct statlite *st);
|
|
|
|
|
|
// friends
|
|
friend class SyntheticClient;
|
|
|
|
public:
|
|
Client(Messenger *m, MonMap *mm);
|
|
~Client();
|
|
void tear_down_cache();
|
|
|
|
int get_nodeid() { return whoami; }
|
|
|
|
void init();
|
|
void shutdown();
|
|
|
|
// messaging
|
|
void dispatch(Message *m);
|
|
|
|
void handle_mount_ack(class MClientMountAck*);
|
|
void handle_unmount_ack(Message*);
|
|
void handle_mds_map(class MMDSMap *m);
|
|
|
|
// file caps
|
|
void handle_file_caps(class MClientFileCaps *m);
|
|
void implemented_caps(class MClientFileCaps *m, Inode *in);
|
|
void release_caps(Inode *in, int retain=0);
|
|
void update_caps_wanted(Inode *in);
|
|
|
|
void close_release(Inode *in);
|
|
void close_safe(Inode *in);
|
|
|
|
// metadata cache
|
|
Inode* insert_inode(Dir *dir, InodeStat *in_info, const string& dn);
|
|
void update_inode_dist(Inode *in, InodeStat *st);
|
|
Inode* insert_trace(MClientReply *reply);
|
|
|
|
// ----------------------
|
|
// fs ops.
|
|
int mount();
|
|
int unmount();
|
|
|
|
// these shoud (more or less) mirror the actual system calls.
|
|
int statfs(const char *path, struct statvfs *stbuf);
|
|
|
|
// crap
|
|
int chdir(const char *s);
|
|
|
|
// namespace ops
|
|
int getdir(const char *path, list<string>& contents);
|
|
int getdir(const char *path, map<string,inode_t>& contents);
|
|
|
|
DIR *opendir(const char *name);
|
|
int closedir(DIR *dir);
|
|
struct dirent *readdir(DIR *dir);
|
|
void rewinddir(DIR *dir);
|
|
off_t telldir(DIR *dir);
|
|
void seekdir(DIR *dir, off_t offset);
|
|
|
|
struct dirent_plus *readdirplus(DIR *dirp);
|
|
int readdirplus_r(DIR *dirp, struct dirent_plus *entry, struct dirent_plus **result);
|
|
struct dirent_lite *readdirlite(DIR *dirp);
|
|
int readdirlite_r(DIR *dirp, struct dirent_lite *entry, struct dirent_lite **result);
|
|
|
|
|
|
int link(const char *existing, const char *newname);
|
|
int unlink(const char *path);
|
|
int rename(const char *from, const char *to);
|
|
|
|
// dirs
|
|
int mkdir(const char *path, mode_t mode);
|
|
int rmdir(const char *path);
|
|
|
|
// symlinks
|
|
int readlink(const char *path, char *buf, off_t size);
|
|
int symlink(const char *existing, const char *newname);
|
|
|
|
// inode stuff
|
|
int _lstat(const char *path, int mask, Inode **in);
|
|
int lstat(const char *path, struct stat *stbuf);
|
|
int lstatlite(const char *path, struct statlite *buf);
|
|
|
|
int chmod(const char *path, mode_t mode);
|
|
int chown(const char *path, uid_t uid, gid_t gid);
|
|
int utime(const char *path, struct utimbuf *buf);
|
|
|
|
// file ops
|
|
int mknod(const char *path, mode_t mode);
|
|
int open(const char *path, int mode);
|
|
int close(fh_t fh);
|
|
int read(fh_t fh, char *buf, off_t size, off_t offset=-1);
|
|
int write(fh_t fh, const char *buf, off_t size, off_t offset=-1);
|
|
int truncate(const char *file, off_t size);
|
|
//int truncate(fh_t fh, long long size);
|
|
int fsync(fh_t fh, bool syncdataonly);
|
|
|
|
// hpc lazyio
|
|
int lazyio_propogate(int fd, off_t offset, size_t count);
|
|
int lazyio_synchronize(int fd, off_t offset, size_t count);
|
|
|
|
int describe_layout(char *fn, list<ObjectExtent>& result);
|
|
|
|
void ms_handle_failure(Message*, const entity_inst_t& inst);
|
|
};
|
|
|
|
#endif
|