mirror of
https://github.com/ceph/ceph
synced 2025-01-27 13:34:31 +00:00
07ac5d3e74
git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1068 29311d96-e01e-0410-9327-a35deaab8ce9
496 lines
11 KiB
C++
496 lines
11 KiB
C++
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
|
|
/*
|
|
* Ceph - scalable distributed file system
|
|
*
|
|
* Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
|
|
*
|
|
* This is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software
|
|
* Foundation. See file COPYING.
|
|
*
|
|
*/
|
|
|
|
|
|
|
|
#include "CInode.h"
|
|
#include "CDir.h"
|
|
#include "CDentry.h"
|
|
|
|
#include "MDS.h"
|
|
#include "MDCache.h"
|
|
#include "AnchorTable.h"
|
|
|
|
#include "common/Clock.h"
|
|
|
|
#include <string>
|
|
|
|
#include "config.h"
|
|
#undef dout
|
|
#define dout(x) if (x <= g_conf.debug || x <= g_conf.debug_mds) cout << g_clock.now() << " mds" << mdcache->mds->get_nodeid() << ".cache.inode(" << inode.ino << ") "
|
|
|
|
|
|
int cinode_pins[CINODE_NUM_PINS]; // counts
|
|
|
|
|
|
ostream& operator<<(ostream& out, CInode& in)
|
|
{
|
|
string path;
|
|
in.make_path(path);
|
|
out << "[inode " << in.inode.ino << " " << path << (in.is_dir() ? "/ ":" ");
|
|
if (in.is_auth()) {
|
|
out << "auth";
|
|
if (in.is_cached_by_anyone()) {
|
|
//out << "+" << in.get_cached_by();
|
|
for (set<int>::iterator it = in.cached_by_begin();
|
|
it != in.cached_by_end();
|
|
it++) {
|
|
out << "+" << *it << "." << in.get_cached_by_nonce(*it);
|
|
}
|
|
}
|
|
} else {
|
|
out << "rep@" << in.authority();
|
|
//if (in.get_replica_nonce() > 1)
|
|
out << "." << in.get_replica_nonce();
|
|
assert(in.get_replica_nonce() >= 0);
|
|
}
|
|
|
|
if (in.is_symlink()) out << " symlink";
|
|
|
|
out << " v" << in.get_version();
|
|
|
|
out << " hard=" << in.hardlock;
|
|
out << " file=" << in.filelock;
|
|
|
|
if (in.is_pinned()) {
|
|
out << " |";
|
|
for(set<int>::iterator it = in.get_ref_set().begin();
|
|
it != in.get_ref_set().end();
|
|
it++)
|
|
if (*it < CINODE_NUM_PINS)
|
|
out << " " << cinode_pin_names[*it];
|
|
else
|
|
out << " " << *it;
|
|
}
|
|
|
|
// hack: spit out crap on which clients have caps
|
|
if (!in.get_client_caps().empty()) {
|
|
out << " caps={";
|
|
for (map<int,Capability>::iterator it = in.get_client_caps().begin();
|
|
it != in.get_client_caps().end();
|
|
it++) {
|
|
if (it != in.get_client_caps().begin()) out << ",";
|
|
out << it->first;
|
|
}
|
|
out << "}";
|
|
}
|
|
out << " " << ∈
|
|
out << "]";
|
|
return out;
|
|
}
|
|
|
|
|
|
// ====== CInode =======
|
|
CInode::CInode(MDCache *c, bool auth) : LRUObject() {
|
|
mdcache = c;
|
|
|
|
ref = 0;
|
|
|
|
parent = NULL;
|
|
|
|
dir = NULL; // CDir opened separately
|
|
|
|
auth_pins = 0;
|
|
nested_auth_pins = 0;
|
|
num_request_pins = 0;
|
|
|
|
state = 0;
|
|
|
|
committing_version = committed_version = 0;
|
|
|
|
if (auth) state_set(CINODE_STATE_AUTH);
|
|
}
|
|
|
|
CInode::~CInode() {
|
|
if (dir) { delete dir; dir = 0; }
|
|
}
|
|
|
|
CDir *CInode::get_parent_dir()
|
|
{
|
|
if (parent)
|
|
return parent->dir;
|
|
return NULL;
|
|
}
|
|
CInode *CInode::get_parent_inode()
|
|
{
|
|
if (parent)
|
|
return parent->dir->inode;
|
|
return NULL;
|
|
}
|
|
|
|
bool CInode::dir_is_auth() {
|
|
if (dir)
|
|
return dir->is_auth();
|
|
else
|
|
return is_auth();
|
|
}
|
|
|
|
CDir *CInode::get_or_open_dir(MDS *mds)
|
|
{
|
|
assert(is_dir());
|
|
|
|
if (dir) return dir;
|
|
|
|
// can't open a dir if we're frozen_dir, bc of hashing stuff.
|
|
assert(!is_frozen_dir());
|
|
|
|
// only auth can open dir alone.
|
|
assert(is_auth());
|
|
set_dir( new CDir(this, mds, true) );
|
|
dir->dir_auth = -1;
|
|
return dir;
|
|
}
|
|
|
|
CDir *CInode::set_dir(CDir *newdir)
|
|
{
|
|
assert(dir == 0);
|
|
dir = newdir;
|
|
return dir;
|
|
}
|
|
|
|
void CInode::set_auth(bool a)
|
|
{
|
|
if (!is_dangling() && !is_root() &&
|
|
is_auth() != a) {
|
|
/*
|
|
CDir *dir = get_parent_dir();
|
|
if (is_auth() && !a)
|
|
dir->nauthitems--;
|
|
else
|
|
dir->nauthitems++;
|
|
*/
|
|
}
|
|
|
|
if (a) state_set(CINODE_STATE_AUTH);
|
|
else state_clear(CINODE_STATE_AUTH);
|
|
}
|
|
|
|
|
|
|
|
void CInode::make_path(string& s)
|
|
{
|
|
if (parent) {
|
|
parent->make_path(s);
|
|
}
|
|
else if (is_root()) {
|
|
s = ""; // root
|
|
}
|
|
else {
|
|
s = "(dangling)"; // dangling
|
|
}
|
|
}
|
|
|
|
void CInode::make_anchor_trace(vector<Anchor*>& trace)
|
|
{
|
|
if (parent) {
|
|
parent->dir->inode->make_anchor_trace(trace);
|
|
|
|
dout(7) << "make_anchor_trace adding " << ino() << " dirino " << parent->dir->inode->ino() << " dn " << parent->name << endl;
|
|
trace.push_back( new Anchor(ino(),
|
|
parent->dir->inode->ino(),
|
|
parent->name) );
|
|
}
|
|
else if (state_test(CINODE_STATE_DANGLING)) {
|
|
dout(7) << "make_anchor_trace dangling " << ino() << " on mds " << dangling_auth << endl;
|
|
string ref_dn;
|
|
trace.push_back( new Anchor(ino(),
|
|
MDS_INO_INODEFILE_OFFSET+dangling_auth,
|
|
ref_dn) );
|
|
}
|
|
else
|
|
assert(is_root());
|
|
}
|
|
|
|
|
|
|
|
|
|
void CInode::mark_dirty() {
|
|
|
|
dout(10) << "mark_dirty " << *this << endl;
|
|
|
|
if (!parent) {
|
|
dout(10) << " dangling, not marking dirty!" << endl;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
NOTE: I may already be dirty, but this fn _still_ needs to be called so that
|
|
the directory is (perhaps newly) dirtied, and so that parent_dir_version is
|
|
updated below.
|
|
*/
|
|
|
|
// only auth can get dirty. "dirty" async data in replicas is relative to (say) filelock state, not dirty flag.
|
|
assert(is_auth());
|
|
|
|
// touch my private version
|
|
inode.version++;
|
|
if (!(state & CINODE_STATE_DIRTY)) {
|
|
state |= CINODE_STATE_DIRTY;
|
|
get(CINODE_PIN_DIRTY);
|
|
}
|
|
|
|
// relative to parent dir:
|
|
if (parent) {
|
|
// dir is now dirty (if it wasn't already)
|
|
parent->dir->mark_dirty();
|
|
|
|
// i now live in that (potentially newly dirty) version
|
|
parent_dir_version = parent->dir->get_version();
|
|
}
|
|
}
|
|
|
|
void CInode::mark_clean()
|
|
{
|
|
dout(10) << " mark_clean " << *this << endl;
|
|
if (state & CINODE_STATE_DIRTY) {
|
|
state &= ~CINODE_STATE_DIRTY;
|
|
put(CINODE_PIN_DIRTY);
|
|
}
|
|
}
|
|
|
|
// state
|
|
|
|
|
|
|
|
|
|
|
|
// new state encoders
|
|
|
|
void CInode::encode_file_state(bufferlist& bl)
|
|
{
|
|
bl.append((char*)&inode.size, sizeof(inode.size));
|
|
bl.append((char*)&inode.mtime, sizeof(inode.mtime));
|
|
bl.append((char*)&inode.atime, sizeof(inode.atime)); // ??
|
|
}
|
|
|
|
void CInode::decode_file_state(bufferlist& r, int& off)
|
|
{
|
|
r.copy(off, sizeof(inode.size), (char*)&inode.size);
|
|
off += sizeof(inode.size);
|
|
r.copy(off, sizeof(inode.mtime), (char*)&inode.mtime);
|
|
off += sizeof(inode.mtime);
|
|
r.copy(off, sizeof(inode.atime), (char*)&inode.atime);
|
|
off += sizeof(inode.atime);
|
|
}
|
|
|
|
/* not used currently
|
|
void CInode::decode_merge_file_state(crope& r, int& off)
|
|
{
|
|
__uint64_t size;
|
|
r.copy(off, sizeof(size), (char*)&size);
|
|
off += sizeof(size);
|
|
if (size > inode.size) inode.size = size;
|
|
|
|
time_t t;
|
|
r.copy(off, sizeof(t), (char*)&t);
|
|
off += sizeof(t);
|
|
if (t > inode.mtime) inode.mtime = t;
|
|
|
|
r.copy(off, sizeof(t), (char*)&t);
|
|
off += sizeof(t);
|
|
if (t > inode.atime) inode.atime = t;
|
|
}
|
|
*/
|
|
|
|
void CInode::encode_hard_state(bufferlist& r)
|
|
{
|
|
r.append((char*)&inode.mode, sizeof(inode.mode));
|
|
r.append((char*)&inode.uid, sizeof(inode.uid));
|
|
r.append((char*)&inode.gid, sizeof(inode.gid));
|
|
r.append((char*)&inode.ctime, sizeof(inode.ctime));
|
|
}
|
|
|
|
void CInode::decode_hard_state(bufferlist& r, int& off)
|
|
{
|
|
r.copy(off, sizeof(inode.mode), (char*)&inode.mode);
|
|
off += sizeof(inode.mode);
|
|
r.copy(off, sizeof(inode.uid), (char*)&inode.uid);
|
|
off += sizeof(inode.uid);
|
|
r.copy(off, sizeof(inode.gid), (char*)&inode.gid);
|
|
off += sizeof(inode.gid);
|
|
r.copy(off, sizeof(inode.ctime), (char*)&inode.ctime);
|
|
off += sizeof(inode.ctime);
|
|
}
|
|
|
|
|
|
// old state encoders
|
|
|
|
/*
|
|
void CInode::encode_basic_state(bufferlist& r)
|
|
{
|
|
// inode
|
|
r.append((char*)&inode, sizeof(inode));
|
|
::_encode(cached_by, r);
|
|
::_encode(cached_by_nonce, r);
|
|
}
|
|
|
|
void CInode::decode_basic_state(bufferlist& r, int& off)
|
|
{
|
|
// inode
|
|
r.copy(0,sizeof(inode_t), (char*)&inode);
|
|
off += sizeof(inode_t);
|
|
|
|
bool empty = cached_by.empty();
|
|
::_decode(cached_by, r, off);
|
|
::_decode(cached_by_nonce, r, off);
|
|
if (!empty)
|
|
get(CINODE_PIN_CACHED);
|
|
}
|
|
*/
|
|
|
|
|
|
// waiting
|
|
|
|
bool CInode::is_frozen()
|
|
{
|
|
if (parent && parent->dir->is_frozen())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool CInode::is_frozen_dir()
|
|
{
|
|
if (parent && parent->dir->is_frozen_dir())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool CInode::is_freezing()
|
|
{
|
|
if (parent && parent->dir->is_freezing())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
bool CInode::waiting_for(int tag)
|
|
{
|
|
return waiting.count(tag) > 0;
|
|
}
|
|
|
|
void CInode::add_waiter(int tag, Context *c) {
|
|
// waiting on hierarchy?
|
|
if (tag & CDIR_WAIT_ATFREEZEROOT && (is_freezing() || is_frozen())) {
|
|
parent->dir->add_waiter(tag, c);
|
|
return;
|
|
}
|
|
|
|
// this inode.
|
|
if (waiting.size() == 0)
|
|
get(CINODE_PIN_WAITER);
|
|
waiting.insert(pair<int,Context*>(tag,c));
|
|
dout(10) << "add_waiter " << tag << " " << c << " on " << *this << endl;
|
|
|
|
}
|
|
|
|
void CInode::take_waiting(int mask, list<Context*>& ls)
|
|
{
|
|
if (waiting.empty()) return;
|
|
|
|
multimap<int,Context*>::iterator it = waiting.begin();
|
|
while (it != waiting.end()) {
|
|
if (it->first & mask) {
|
|
ls.push_back(it->second);
|
|
dout(10) << "take_waiting mask " << mask << " took " << it->second << " tag " << it->first << " on " << *this << endl;
|
|
|
|
waiting.erase(it++);
|
|
} else {
|
|
dout(10) << "take_waiting mask " << mask << " SKIPPING " << it->second << " tag " << it->first << " on " << *this << endl;
|
|
it++;
|
|
}
|
|
}
|
|
|
|
if (waiting.empty())
|
|
put(CINODE_PIN_WAITER);
|
|
}
|
|
|
|
void CInode::finish_waiting(int mask, int result)
|
|
{
|
|
dout(11) << "finish_waiting mask " << mask << " result " << result << " on " << *this << endl;
|
|
|
|
list<Context*> finished;
|
|
take_waiting(mask, finished);
|
|
finish_contexts(finished, result);
|
|
}
|
|
|
|
|
|
// auth_pins
|
|
bool CInode::can_auth_pin() {
|
|
if (parent)
|
|
return parent->dir->can_auth_pin();
|
|
return true;
|
|
}
|
|
|
|
void CInode::auth_pin() {
|
|
if (auth_pins == 0)
|
|
get(CINODE_PIN_AUTHPIN);
|
|
auth_pins++;
|
|
|
|
dout(7) << "auth_pin on " << *this << " count now " << auth_pins << " + " << nested_auth_pins << endl;
|
|
|
|
if (parent)
|
|
parent->dir->adjust_nested_auth_pins( 1 );
|
|
}
|
|
|
|
void CInode::auth_unpin() {
|
|
auth_pins--;
|
|
if (auth_pins == 0)
|
|
put(CINODE_PIN_AUTHPIN);
|
|
|
|
dout(7) << "auth_unpin on " << *this << " count now " << auth_pins << " + " << nested_auth_pins << endl;
|
|
|
|
assert(auth_pins >= 0);
|
|
|
|
if (parent)
|
|
parent->dir->adjust_nested_auth_pins( -1 );
|
|
}
|
|
|
|
|
|
|
|
// authority
|
|
|
|
int CInode::authority() {
|
|
if (is_dangling())
|
|
return dangling_auth; // explicit
|
|
if (is_root())
|
|
return 0; // i am root
|
|
assert(parent);
|
|
return parent->dir->dentry_authority( parent->name );
|
|
}
|
|
|
|
|
|
CInodeDiscover* CInode::replicate_to( int rep )
|
|
{
|
|
assert(is_auth());
|
|
|
|
// relax locks?
|
|
if (!is_cached_by_anyone())
|
|
replicate_relax_locks();
|
|
|
|
// return the thinger
|
|
int nonce = cached_by_add( rep );
|
|
return new CInodeDiscover( this, nonce );
|
|
}
|
|
|
|
|
|
// debug crap -----------------------------
|
|
|
|
void CInode::dump(int dep)
|
|
{
|
|
string ind(dep, '\t');
|
|
//cout << ind << "[inode " << this << "]" << endl;
|
|
|
|
if (dir)
|
|
dir->dump(dep);
|
|
}
|
|
|