Merge PR #27866 into master

* refs/pull/27866/head:
	mds: fix deadlock when xlocking policylock
	mds: handle link request with zero depth filepath2
	mds: enable lock cache for openc/unlink requests
	mds: include linkage type in dentry lease
	mds: cleanup Server::set_trace_dist()
	mds: define lease mask bits
	mds: delegete lock cache to client
	mds: suppress frozen inode when locks of dir operation is cached.
	mds: invalidate lock caches when freezing dirfrag/subtree
	mds: invalidate lock caches if they hold conflicting locks
	mds: initial code for lock cache
	mds: adjust locking for subtree migration
	mds: add 'path_locked' flag to MDCache::find_ino_peers()
	mds: change MDCache::discover_path()'s wants_xlocked semantic
	mds: introduce Server::rdlock_two_paths_xlock_destdn()
	mds: make Server::rdlock_path_xlock_dentry take locks
	mds: make Server::rdlock_path_pin_ref() take dentry rdlocks
	mds: take snaplock and policylock during path traverse.
	mds: let Locker::acquire_locks()'s caller choose locking order

Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
Patrick Donnelly 2019-12-20 18:16:11 -08:00
commit 3ebdbd4f55
No known key found for this signature in database
GPG Key ID: 3A2A7E25BEA8AADB
25 changed files with 2090 additions and 1212 deletions

View File

@ -1004,7 +1004,7 @@ void Client::update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, Me
ceph_assert(dn);
if (dlease->mask & CEPH_LOCK_DN) {
if (dlease->mask & CEPH_LEASE_VALID) {
if (dttl > dn->lease_ttl) {
ldout(cct, 10) << "got dentry lease on " << dn->name
<< " dur " << dlease->duration_ms << "ms ttl " << dttl << dendl;
@ -2991,7 +2991,7 @@ void Client::handle_lease(const MConstRef<MClientLease>& m)
}
in = inode_map[vino];
if (m->get_mask() & CEPH_LOCK_DN) {
if (m->get_mask() & CEPH_LEASE_VALID) {
if (!in->dir || in->dir->dentries.count(m->dname) == 0) {
ldout(cct, 10) << " don't have dir|dentry " << m->get_ino() << "/" << m->dname <<dendl;
goto revoke;

View File

@ -318,19 +318,21 @@ extern const char *ceph_mds_state_name(int s);
* - they also define the lock ordering by the MDS
* - a few of these are internal to the mds
*/
#define CEPH_LOCK_DVERSION 1
#define CEPH_LOCK_DN 2
#define CEPH_LOCK_IVERSION 16 /* mds internal */
#define CEPH_LOCK_ISNAP 32
#define CEPH_LOCK_IFILE 64
#define CEPH_LOCK_IAUTH 128
#define CEPH_LOCK_ILINK 256
#define CEPH_LOCK_IDFT 512 /* dir frag tree */
#define CEPH_LOCK_INEST 1024 /* mds internal */
#define CEPH_LOCK_IXATTR 2048
#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */
#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */
#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */
#define CEPH_LOCK_DN (1 << 0)
#define CEPH_LOCK_DVERSION (1 << 1)
#define CEPH_LOCK_ISNAP (1 << 4) /* snapshot lock. MDS internal */
#define CEPH_LOCK_IPOLICY (1 << 5) /* policy lock on dirs. MDS internal */
#define CEPH_LOCK_IFILE (1 << 6)
#define CEPH_LOCK_INEST (1 << 7) /* mds internal */
#define CEPH_LOCK_IDFT (1 << 8) /* dir frag tree */
#define CEPH_LOCK_IAUTH (1 << 9)
#define CEPH_LOCK_ILINK (1 << 10)
#define CEPH_LOCK_IXATTR (1 << 11)
#define CEPH_LOCK_IFLOCK (1 << 12) /* advisory file locks */
#define CEPH_LOCK_IVERSION (1 << 13) /* mds internal */
#define CEPH_LOCK_IFIRST CEPH_LOCK_ISNAP
/* client_session ops */
enum {
@ -691,6 +693,9 @@ struct ceph_mds_reply_lease {
__le32 seq;
} __attribute__ ((packed));
#define CEPH_LEASE_VALID (1 | 2) /* old and new bit values */
#define CEPH_LEASE_PRIMARY_LINK 4 /* primary linkage */
struct ceph_mds_reply_dirfrag {
__le32 frag; /* fragment */
__le32 auth; /* auth mds, if this is a delegation point */
@ -761,7 +766,7 @@ int ceph_flags_to_mode(int flags);
#define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK)
#define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR)
#define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR)
#define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE)
#define CEPH_CAP_FILE(x) ((x) << CEPH_CAP_SFILE)
#define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE)
#define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE)
#define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE)
@ -816,6 +821,13 @@ int ceph_flags_to_mode(int flags);
#define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \
CEPH_LOCK_IXATTR)
/* cap masks async dir operations */
#define CEPH_CAP_DIR_CREATE CEPH_CAP_FILE_CACHE
#define CEPH_CAP_DIR_UNLINK CEPH_CAP_FILE_RD
#define CEPH_CAP_ANY_DIR_OPS (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | \
CEPH_CAP_FILE_WREXTEND | CEPH_CAP_FILE_LAZYIO)
int ceph_caps_for_mode(int mode);
enum {

View File

@ -195,6 +195,8 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) :
dirty_rstat_inodes(member_offset(CInode, dirty_rstat_item)),
dirty_dentries(member_offset(CDentry, item_dir_dirty)),
item_dirty(this), item_new(this),
lock_caches_with_auth_pins(member_offset(MDLockCache::DirItem, item_dir)),
freezing_inodes(member_offset(CInode, item_freezing_inode)),
dir_rep(REP_NONE),
pop_me(mdcache->decayrate),
pop_nested(mdcache->decayrate),
@ -590,6 +592,11 @@ void CDir::link_inode_work( CDentry *dn, CInode *in)
if (in->auth_pins)
dn->adjust_nested_auth_pins(in->auth_pins, NULL);
if (in->is_freezing_inode())
freezing_inodes.push_back(&in->item_freezing_inode);
else if (in->is_frozen_inode() || in->is_frozen_auth_pin())
num_frozen_inodes++;
// verify open snaprealm parent
if (in->snaprealm)
in->snaprealm->adjust_parent();
@ -671,6 +678,11 @@ void CDir::unlink_inode_work(CDentry *dn)
if (in->auth_pins)
dn->adjust_nested_auth_pins(-in->auth_pins, nullptr);
if (in->is_freezing_inode())
in->item_freezing_inode.remove_myself();
else if (in->is_frozen_inode() || in->is_frozen_auth_pin())
num_frozen_inodes--;
// detach inode
in->remove_primary_parent(dn);
if (in->is_dir())
@ -2868,14 +2880,18 @@ bool CDir::freeze_tree()
// gets decreased. Subtree become 'frozen' when the counter reaches zero.
freeze_tree_state = std::make_shared<freeze_tree_state_t>(this);
freeze_tree_state->auth_pins += get_auth_pins() + get_dir_auth_pins();
if (!lock_caches_with_auth_pins.empty())
cache->mds->locker->invalidate_lock_caches(this);
_walk_tree([this](CDir *dir) {
if (dir->freeze_tree_state)
return false;
dir->freeze_tree_state = freeze_tree_state;
freeze_tree_state->auth_pins += dir->get_auth_pins() + dir->get_dir_auth_pins();
if (!dir->lock_caches_with_auth_pins.empty())
cache->mds->locker->invalidate_lock_caches(dir);
return true;
}
}
);
if (is_freezeable(true)) {
@ -3118,6 +3134,8 @@ bool CDir::freeze_dir()
return true;
} else {
state_set(STATE_FREEZINGDIR);
if (!lock_caches_with_auth_pins.empty())
cache->mds->locker->invalidate_lock_caches(this);
dout(10) << "freeze_dir + wait " << *this << dendl;
return false;
}
@ -3164,6 +3182,19 @@ void CDir::unfreeze_dir()
}
}
void CDir::enable_frozen_inode()
{
ceph_assert(frozen_inode_suppressed > 0);
if (--frozen_inode_suppressed == 0) {
for (auto p = freezing_inodes.begin(); !p.end(); ) {
CInode *in = *p;
++p;
ceph_assert(in->is_freezing_inode());
in->maybe_finish_freeze_inode();
}
}
}
/**
* Slightly less complete than operator<<, because this is intended
* for identifying a directory and its state rather than for dumping

View File

@ -578,6 +578,18 @@ public:
return true;
}
bool is_any_freezing_or_frozen_inode() const {
return num_frozen_inodes || !freezing_inodes.empty();
}
bool is_auth_pinned_by_lock_cache() const {
return frozen_inode_suppressed;
}
void disable_frozen_inode() {
ceph_assert(num_frozen_inodes == 0);
frozen_inode_suppressed++;
}
void enable_frozen_inode();
ostream& print_db_line_prefix(ostream& out) override;
void print(ostream& out) override;
void dump(Formatter *f, int flags = DUMP_DEFAULT) const;
@ -599,6 +611,9 @@ public:
elist<CDentry*> dirty_dentries;
elist<CDir*>::item item_dirty, item_new;
// lock caches that auth-pin me
elist<MDLockCache::DirItem*> lock_caches_with_auth_pins;
// all dirfrags within freezing/frozen tree reference the 'state'
std::shared_ptr<freeze_tree_state_t> freeze_tree_state;
@ -680,6 +695,11 @@ protected:
static int num_frozen_trees;
static int num_freezing_trees;
// freezing/frozen inodes in this dirfrag
int num_frozen_inodes = 0;
int frozen_inode_suppressed = 0;
elist<CInode*> freezing_inodes;
int dir_auth_pins = 0;
// cache control (defined for authority; hints for replicas)

View File

@ -1588,6 +1588,7 @@ void CInode::decode_store(bufferlist::const_iterator& bl)
SimpleLock* CInode::get_lock(int type)
{
switch (type) {
case CEPH_LOCK_IVERSION: return &versionlock;
case CEPH_LOCK_IFILE: return &filelock;
case CEPH_LOCK_IAUTH: return &authlock;
case CEPH_LOCK_ILINK: return &linklock;
@ -2681,25 +2682,64 @@ void CInode::take_waiting(uint64_t mask, MDSContext::vec& ls)
MDSCacheObject::take_waiting(mask, ls);
}
void CInode::maybe_finish_freeze_inode()
{
CDir *dir = get_parent_dir();
if (auth_pins > auth_pin_freeze_allowance || dir->frozen_inode_suppressed)
return;
dout(10) << "maybe_finish_freeze_inode - frozen" << dendl;
ceph_assert(auth_pins == auth_pin_freeze_allowance);
get(PIN_FROZEN);
put(PIN_FREEZING);
state_clear(STATE_FREEZING);
state_set(STATE_FROZEN);
item_freezing_inode.remove_myself();
dir->num_frozen_inodes++;
finish_waiting(WAIT_FROZEN);
}
bool CInode::freeze_inode(int auth_pin_allowance)
{
CDir *dir = get_parent_dir();
ceph_assert(dir);
ceph_assert(auth_pin_allowance > 0); // otherwise we need to adjust parent's nested_auth_pins
ceph_assert(auth_pins >= auth_pin_allowance);
if (auth_pins > auth_pin_allowance) {
dout(10) << "freeze_inode - waiting for auth_pins to drop to " << auth_pin_allowance << dendl;
auth_pin_freeze_allowance = auth_pin_allowance;
get(PIN_FREEZING);
state_set(STATE_FREEZING);
return false;
if (auth_pins == auth_pin_allowance && !dir->frozen_inode_suppressed) {
dout(10) << "freeze_inode - frozen" << dendl;
if (!state_test(STATE_FROZEN)) {
get(PIN_FROZEN);
state_set(STATE_FROZEN);
dir->num_frozen_inodes++;
}
return true;
}
dout(10) << "freeze_inode - frozen" << dendl;
ceph_assert(auth_pins == auth_pin_allowance);
if (!state_test(STATE_FROZEN)) {
get(PIN_FROZEN);
state_set(STATE_FROZEN);
dout(10) << "freeze_inode - waiting for auth_pins to drop to " << auth_pin_allowance << dendl;
auth_pin_freeze_allowance = auth_pin_allowance;
dir->freezing_inodes.push_back(&item_freezing_inode);
get(PIN_FREEZING);
state_set(STATE_FREEZING);
if (!dir->lock_caches_with_auth_pins.empty())
mdcache->mds->locker->invalidate_lock_caches(dir);
const static int lock_types[] = {
CEPH_LOCK_IVERSION, CEPH_LOCK_IFILE, CEPH_LOCK_IAUTH, CEPH_LOCK_ILINK, CEPH_LOCK_IDFT,
CEPH_LOCK_IXATTR, CEPH_LOCK_ISNAP, CEPH_LOCK_INEST, CEPH_LOCK_IFLOCK, CEPH_LOCK_IPOLICY, 0
};
for (int i = 0; lock_types[i]; ++i) {
auto lock = get_lock(lock_types[i]);
if (lock->is_cached())
mdcache->mds->locker->invalidate_lock_caches(lock);
}
return true;
// invalidate_lock_caches() may decrease dir->frozen_inode_suppressed
// and finish freezing the inode
return state_test(STATE_FROZEN);
}
void CInode::unfreeze_inode(MDSContext::vec& finished)
@ -2708,9 +2748,11 @@ void CInode::unfreeze_inode(MDSContext::vec& finished)
if (state_test(STATE_FREEZING)) {
state_clear(STATE_FREEZING);
put(PIN_FREEZING);
item_freezing_inode.remove_myself();
} else if (state_test(STATE_FROZEN)) {
state_clear(STATE_FROZEN);
put(PIN_FROZEN);
get_parent_dir()->num_frozen_inodes--;
} else
ceph_abort();
take_waiting(WAIT_UNFREEZE, finished);
@ -2727,12 +2769,14 @@ void CInode::freeze_auth_pin()
{
ceph_assert(state_test(CInode::STATE_FROZEN));
state_set(CInode::STATE_FROZENAUTHPIN);
get_parent_dir()->num_frozen_inodes++;
}
void CInode::unfreeze_auth_pin()
{
ceph_assert(state_test(CInode::STATE_FROZENAUTHPIN));
state_clear(CInode::STATE_FROZENAUTHPIN);
get_parent_dir()->num_frozen_inodes--;
if (!state_test(STATE_FREEZING|STATE_FROZEN)) {
MDSContext::vec finished;
take_waiting(WAIT_UNFREEZE, finished);
@ -2809,15 +2853,8 @@ void CInode::auth_unpin(void *by)
if (parent)
parent->adjust_nested_auth_pins(-1, by);
if (is_freezing_inode() &&
auth_pins == auth_pin_freeze_allowance) {
dout(10) << "auth_unpin freezing!" << dendl;
get(PIN_FROZEN);
put(PIN_FREEZING);
state_clear(STATE_FREEZING);
state_set(STATE_FROZEN);
finish_waiting(WAIT_FROZEN);
}
if (is_freezing_inode())
maybe_finish_freeze_inode();
}
// authority
@ -3407,7 +3444,11 @@ int CInode::get_caps_allowed_for_client(Session *session, Capability *cap,
allowed = get_caps_allowed_by_type(CAP_ANY);
}
if (!is_dir()) {
if (is_dir()) {
allowed &= ~CEPH_CAP_ANY_DIR_OPS;
if (cap && (allowed & CEPH_CAP_FILE_EXCL))
allowed |= cap->get_lock_cache_allowed();
} else {
if (file_i->inline_data.version == CEPH_INLINE_NONE &&
file_i->layout.pool_ns.empty()) {
// noop

View File

@ -197,7 +197,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
friend class MDCache;
friend class StrayManager;
friend class CDir;
friend class CInodeExport;
friend ostream& operator<<(ostream&, const CInode&);
class scrub_stamp_info_t {
public:
@ -513,7 +513,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
bool has_snap_data(snapid_t s);
void purge_stale_snap_data(const std::set<snapid_t>& snaps);
bool has_dirfrags() { return !dirfrags.empty(); }
size_t get_num_dirfrags() const { return dirfrags.size(); }
CDir* get_dirfrag(frag_t fg) {
auto pi = dirfrags.find(fg);
if (pi != dirfrags.end()) {
@ -893,6 +893,9 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
parent = projected_parent.front();
projected_parent.pop_front();
}
bool is_parent_projected() const {
return !projected_parent.empty();
}
void maybe_export_pin(bool update=false);
void set_export_pin(mds_rank_t rank);
@ -954,8 +957,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir;
elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest;
int auth_pin_freeze_allowance = 0;
inode_load_vec_t pop;
elist<CInode*>::item item_pop_lru;
@ -1066,6 +1067,11 @@ protected:
// -- waiting --
mempool::mds_co::compact_map<frag_t, MDSContext::vec > waiting_on_dir;
// -- freezing inode --
int auth_pin_freeze_allowance = 0;
elist<CInode*>::item item_freezing_inode;
void maybe_finish_freeze_inode();
private:
friend class ValidationContinuation;

View File

@ -148,6 +148,7 @@ void Capability::revoke_info::generate_test_instances(std::list<Capability::revo
Capability::Capability(CInode *i, Session *s, uint64_t id) :
item_session_caps(this), item_snaprealm_caps(this),
item_revoking_caps(this), item_client_revoking_caps(this),
lock_caches(member_offset(MDLockCache, item_cap_lock_cache)),
inode(i), session(s), cap_id(id)
{
if (session) {

View File

@ -20,6 +20,7 @@
#include "include/counter.h"
#include "include/mempool.h"
#include "include/xlist.h"
#include "include/elist.h"
#include "common/config.h"
@ -62,6 +63,7 @@
class CInode;
class Session;
class MDLockCache;
namespace ceph {
class Formatter;
@ -180,8 +182,8 @@ public:
inc_last_seq();
return last_sent;
}
void confirm_receipt(ceph_seq_t seq, unsigned caps) {
bool was_revoking = (_issued & ~_pending);
int confirm_receipt(ceph_seq_t seq, unsigned caps) {
int was_revoking = (_issued & ~_pending);
if (seq == last_sent) {
_revokes.clear();
_issued = caps;
@ -206,7 +208,7 @@ public:
item_client_revoking_caps.remove_myself();
maybe_clear_notable();
}
//check_rdcaps_list();
return was_revoking & ~_issued; // return revoked
}
// we may get a release racing with revocations, which means our revokes will be ignored
// by the client. clean them out of our _revokes history so we don't wait on them.
@ -339,9 +341,10 @@ public:
set_wanted(wanted() | otherwanted);
}
void revoke() {
int revoke() {
if (revoking())
confirm_receipt(last_sent, pending());
return confirm_receipt(last_sent, pending());
return 0;
}
// serializers
@ -361,6 +364,11 @@ public:
xlist<Capability*>::item item_revoking_caps;
xlist<Capability*>::item item_client_revoking_caps;
elist<MDLockCache*> lock_caches;
int get_lock_cache_allowed() const { return lock_cache_allowed; }
void set_lock_cache_allowed(int c) { lock_cache_allowed |= c; }
void clear_lock_cache_allowed(int c) { lock_cache_allowed &= ~c; }
private:
void calc_issued() {
_issued = _pending;
@ -398,6 +406,8 @@ private:
int suppress = 0;
unsigned state = 0;
int lock_cache_allowed = 0;
};
WRITE_CLASS_ENCODER(Capability::Export)

File diff suppressed because it is too large Load Diff

View File

@ -51,14 +51,13 @@ public:
void nudge_log(SimpleLock *lock);
void include_snap_rdlocks(CInode *in, MutationImpl::LockOpVec& lov);
void include_snap_rdlocks_wlayout(CInode *in, MutationImpl::LockOpVec& lov,
file_layout_t **layout);
bool acquire_locks(MDRequestRef& mdr,
MutationImpl::LockOpVec& lov,
CInode *auth_pin_freeze=NULL,
bool auth_pin_nonblock=false);
bool auth_pin_nonblocking=false);
bool try_rdlock_snap_layout(CInode *in, MDRequestRef& mdr,
int n=0, bool want_layout=false);
void notify_freeze_waiter(MDSCacheObject *o);
void cancel_locking(MutationImpl *mut, std::set<CInode*> *pneed_issue);
@ -68,6 +67,15 @@ public:
void drop_rdlocks_for_early_reply(MutationImpl *mut);
void drop_locks_for_fragment_unfreeze(MutationImpl *mut);
int get_cap_bit_for_lock_cache(int op);
void create_lock_cache(MDRequestRef& mdr, CInode *diri, file_layout_t *dir_layout=nullptr);
bool find_and_attach_lock_cache(MDRequestRef& mdr, CInode *diri);
void invalidate_lock_caches(CDir *dir);
void invalidate_lock_caches(SimpleLock *lock);
void invalidate_lock_cache(MDLockCache *lock_cache);
void eval_lock_caches(Capability *cap);
void put_lock_cache(MDLockCache* lock_cache);
void eval_gather(SimpleLock *lock, bool first=false, bool *need_issue=0, MDSContext::vec *pfinishers=0);
void eval(SimpleLock *lock, bool *need_issue);
void eval_any(SimpleLock *lock, bool *need_issue, MDSContext::vec *pfinishers=0, bool first=false) {
@ -86,11 +94,11 @@ public:
void try_eval(SimpleLock *lock, bool *pneed_issue);
bool _rdlock_kick(SimpleLock *lock, bool as_anon);
bool rdlock_try(SimpleLock *lock, client_t client, MDSContext *c);
bool rdlock_try(SimpleLock *lock, client_t client);
bool rdlock_start(SimpleLock *lock, MDRequestRef& mut, bool as_anon=false);
void rdlock_finish(const MutationImpl::lock_iterator& it, MutationImpl *mut, bool *pneed_issue);
bool can_rdlock_set(MutationImpl::LockOpVec& lov);
void rdlock_take_set(MutationImpl::LockOpVec& lov, MutationRef& mut);
bool rdlock_try_set(MutationImpl::LockOpVec& lov, MDRequestRef& mdr);
bool rdlock_try_set(MutationImpl::LockOpVec& lov, MutationRef& mut);
void wrlock_force(SimpleLock *lock, MutationRef& mut);
bool wrlock_try(SimpleLock *lock, MutationRef& mut);
@ -106,6 +114,7 @@ public:
void xlock_export(const MutationImpl::lock_iterator& it, MutationImpl *mut);
void xlock_import(SimpleLock *lock);
void xlock_downgrade(SimpleLock *lock, MutationImpl *mut);
void try_simple_eval(SimpleLock *lock);
bool simple_rdlock_try(SimpleLock *lock, MDSContext *con);
@ -176,7 +185,7 @@ public:
// -- client leases --
void handle_client_lease(const cref_t<MClientLease> &m);
void issue_client_lease(CDentry *dn, client_t client, bufferlist &bl, utime_t now, Session *session);
void issue_client_lease(CDentry *dn, MDRequestRef &mdr, int mask, utime_t now, bufferlist &bl);
void revoke_client_leases(SimpleLock *lock);
static void encode_lease(bufferlist& bl, const session_info_t& info, const LeaseStat& ls);

View File

@ -4766,13 +4766,13 @@ void MDCache::handle_cache_rejoin_strong(const cref_t<MMDSCacheRejoin> &strong)
if (!mdr->is_xlocked(&dn->versionlock)) {
ceph_assert(dn->versionlock.can_xlock_local());
dn->versionlock.get_xlock(mdr, mdr->get_client());
mdr->locks.emplace(&dn->versionlock, MutationImpl::LockOp::XLOCK);
mdr->emplace_lock(&dn->versionlock, MutationImpl::LockOp::XLOCK);
}
if (dn->lock.is_stable())
dn->auth_pin(&dn->lock);
dn->lock.set_state(LOCK_XLOCK);
dn->lock.get_xlock(mdr, mdr->get_client());
mdr->locks.emplace(&dn->lock, MutationImpl::LockOp::XLOCK);
mdr->emplace_lock(&dn->lock, MutationImpl::LockOp::XLOCK);
}
}
@ -4864,7 +4864,7 @@ void MDCache::handle_cache_rejoin_strong(const cref_t<MMDSCacheRejoin> &strong)
if (!mdr->is_xlocked(&in->versionlock)) {
ceph_assert(in->versionlock.can_xlock_local());
in->versionlock.get_xlock(mdr, mdr->get_client());
mdr->locks.emplace(&in->versionlock, MutationImpl::LockOp::XLOCK);
mdr->emplace_lock(&in->versionlock, MutationImpl::LockOp::XLOCK);
}
if (lock->is_stable())
in->auth_pin(lock);
@ -4872,7 +4872,7 @@ void MDCache::handle_cache_rejoin_strong(const cref_t<MMDSCacheRejoin> &strong)
if (lock == &in->filelock)
in->loner_cap = -1;
lock->get_xlock(mdr, mdr->get_client());
mdr->locks.emplace(lock, MutationImpl::LockOp::XLOCK);
mdr->emplace_lock(lock, MutationImpl::LockOp::XLOCK);
}
}
}
@ -4890,7 +4890,7 @@ void MDCache::handle_cache_rejoin_strong(const cref_t<MMDSCacheRejoin> &strong)
if (lock == &in->filelock)
in->loner_cap = -1;
lock->get_wrlock(true);
mdr->locks.emplace(lock, MutationImpl::LockOp::WRLOCK);
mdr->emplace_lock(lock, MutationImpl::LockOp::WRLOCK);
}
}
}
@ -5966,7 +5966,7 @@ void MDCache::opened_undef_inode(CInode *in) {
if (in->is_dir()) {
// FIXME: re-hash dentries if necessary
ceph_assert(in->inode.dir_layout.dl_dir_hash == g_conf()->mds_default_dir_hash);
if (in->has_dirfrags() && !in->dirfragtree.is_leaf(frag_t())) {
if (in->get_num_dirfrags() && !in->dirfragtree.is_leaf(frag_t())) {
CDir *dir = in->get_dirfrag(frag_t());
ceph_assert(dir);
rejoin_undef_dirfrags.erase(dir);
@ -6909,7 +6909,7 @@ bool MDCache::trim_inode(CDentry *dn, CInode *in, CDir *con, expiremap& expirema
// This is because that unconnected replicas are problematic for
// subtree migration.
//
if (!in->is_auth() && !mds->locker->rdlock_try(&in->dirfragtreelock, -1, nullptr)) {
if (!in->is_auth() && !mds->locker->rdlock_try(&in->dirfragtreelock, -1)) {
return true;
}
@ -8063,9 +8063,13 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
{
bool discover = (flags & MDS_TRAVERSE_DISCOVER);
bool forward = !discover;
bool last_xlocked = (flags & MDS_TRAVERSE_LAST_XLOCKED);
bool path_locked = (flags & MDS_TRAVERSE_PATH_LOCKED);
bool want_dentry = (flags & MDS_TRAVERSE_WANT_DENTRY);
bool want_auth = (flags & MDS_TRAVERSE_WANT_AUTH);
bool rdlock_snap = (flags & (MDS_TRAVERSE_RDLOCK_SNAP | MDS_TRAVERSE_RDLOCK_SNAP2));
bool rdlock_path = (flags & MDS_TRAVERSE_RDLOCK_PATH);
bool xlock_dentry = (flags & MDS_TRAVERSE_XLOCK_DENTRY);
bool rdlock_authlock = (flags & MDS_TRAVERSE_RDLOCK_AUTHLOCK);
if (forward)
ceph_assert(mdr); // forward requires a request
@ -8080,14 +8084,20 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
dout(7) << "traverse: opening base ino " << path.get_ino() << " snap " << snapid << dendl;
CInode *cur = get_inode(path.get_ino());
if (cur == NULL) {
if (MDS_INO_IS_MDSDIR(path.get_ino()))
if (!cur) {
if (MDS_INO_IS_MDSDIR(path.get_ino())) {
open_foreign_mdsdir(path.get_ino(), cf.build());
else {
//ceph_abort(); // hrm.. broken
return -ESTALE;
return 1;
}
return 1;
if (MDS_INO_IS_STRAY(path.get_ino())) {
mds_rank_t rank = MDS_INO_STRAY_OWNER(path.get_ino());
unsigned idx = MDS_INO_STRAY_INDEX(path.get_ino());
filepath path(strays[idx]->get_parent_dn()->get_name(),
MDS_INO_MDSDIR(rank));
MDRequestRef null_ref;
return path_traverse(null_ref, cf, path, MDS_TRAVERSE_DISCOVER, nullptr);
}
return -ESTALE;
}
if (cur->state_test(CInode::STATE_PURGING))
return -ESTALE;
@ -8098,14 +8108,31 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
return 1;
}
if (flags & MDS_TRAVERSE_CHECK_LOCKCACHE)
mds->locker->find_and_attach_lock_cache(mdr, cur);
if (mdr && mdr->lock_cache) {
if (flags & MDS_TRAVERSE_WANT_DIRLAYOUT)
mdr->dir_layout = mdr->lock_cache->get_dir_layout();
} else if (rdlock_snap) {
int n = (flags & MDS_TRAVERSE_RDLOCK_SNAP2) ? 1 : 0;
if ((n == 0 && !(mdr->locking_state & MutationImpl::SNAP_LOCKED)) ||
(n == 1 && !(mdr->locking_state & MutationImpl::SNAP2_LOCKED))) {
bool want_layout = (flags & MDS_TRAVERSE_WANT_DIRLAYOUT);
if (!mds->locker->try_rdlock_snap_layout(cur, mdr, n, want_layout))
return 1;
}
}
// start trace
if (pdnvec)
pdnvec->clear();
if (pin)
*pin = cur;
unsigned depth = 0;
while (depth < path.depth()) {
MutationImpl::LockOpVec lov;
for (unsigned depth = 0; depth < path.depth(); ) {
dout(12) << "traverse: path seg depth " << depth << " '" << path[depth]
<< "' snapid " << snapid << dendl;
@ -8132,18 +8159,8 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
snapid = realm->resolve_snapname(path[depth], cur->ino());
dout(10) << "traverse: snap " << path[depth] << " -> " << snapid << dendl;
if (!snapid) {
CInode *t = cur;
while (t) {
// if snaplock isn't readable, it's possible that other mds is creating
// snapshot, but snap update message hasn't been received.
if (!t->snaplock.can_read(client)) {
dout(10) << " non-readable snaplock on " << *t << dendl;
t->snaplock.add_waiter(SimpleLock::WAIT_RD, cf.build());
return 1;
}
CDentry *pdn = t->get_projected_parent_dn();
t = pdn ? pdn->get_dir()->get_inode() : NULL;
}
if (pdnvec)
pdnvec->clear(); // do not confuse likes of rdlock_path_pin_ref();
return -ENOENT;
}
mdr->snapid = snapid;
@ -8167,7 +8184,7 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
// discover?
dout(10) << "traverse: need dirfrag " << fg << ", doing discover from " << *cur << dendl;
discover_path(cur, snapid, path.postfixpath(depth), cf.build(),
last_xlocked);
path_locked);
if (mds->logger) mds->logger->inc(l_mds_traverse_discover);
return 1;
}
@ -8215,9 +8232,32 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
// dentry
CDentry *dn = curdir->lookup(path[depth], snapid);
if (dn) {
if (!dn->lock.can_read(client) &&
!(last_xlocked && depth == path.depth() - 1) &&
!(dn->lock.is_xlocked() && dn->lock.get_xlock_by() == mdr)) {
if (dn->state_test(CDentry::STATE_PURGING))
return -ENOENT;
if (rdlock_path) {
lov.clear();
if (xlock_dentry && depth == path.depth() - 1) {
if (depth > 0 || !mdr->lock_cache) {
lov.add_wrlock(&cur->filelock);
lov.add_wrlock(&cur->nestlock);
if (rdlock_authlock)
lov.add_rdlock(&cur->authlock);
}
lov.add_xlock(&dn->lock);
} else {
// force client to flush async dir operation if necessary
if (cur->filelock.is_cached())
lov.add_wrlock(&cur->filelock);
lov.add_rdlock(&dn->lock);
}
if (!mds->locker->acquire_locks(mdr, lov)) {
dout(10) << "traverse: failed to rdlock " << dn->lock << " " << *dn << dendl;
return 1;
}
} else if (!path_locked &&
!dn->lock.can_read(client) &&
!(dn->lock.is_xlocked() && dn->lock.get_xlock_by() == mdr)) {
dout(10) << "traverse: non-readable dentry at " << *dn << dendl;
dn->lock.add_waiter(SimpleLock::WAIT_RD, cf.build());
if (mds->logger)
@ -8262,7 +8302,7 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
return -EIO;
}
open_remote_dentry(dn, true, cf.build(),
(last_xlocked && depth == path.depth() - 1));
(path_locked && depth == path.depth() - 1));
if (mds->logger) mds->logger->inc(l_mds_traverse_remote_ino);
return 1;
}
@ -8275,6 +8315,15 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
return 1;
}
if (rdlock_snap && !(want_dentry && depth == path.depth() - 1)) {
lov.clear();
lov.add_rdlock(&cur->snaplock);
if (!mds->locker->acquire_locks(mdr, lov)) {
dout(10) << "traverse: failed to rdlock " << cur->snaplock << " " << *cur << dendl;
return 1;
}
}
// add to trace, continue.
touch_inode(cur);
if (pin)
@ -8309,6 +8358,28 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
// create a null dentry
dn = curdir->add_null_dentry(path[depth]);
dout(20) << " added null " << *dn << dendl;
if (rdlock_path) {
lov.clear();
if (xlock_dentry) {
if (depth > 0 || !mdr->lock_cache) {
lov.add_wrlock(&cur->filelock);
lov.add_wrlock(&cur->nestlock);
if (rdlock_authlock)
lov.add_rdlock(&cur->authlock);
}
lov.add_xlock(&dn->lock);
} else {
// force client to flush async dir operation if necessary
if (cur->filelock.is_cached())
lov.add_wrlock(&cur->filelock);
lov.add_rdlock(&dn->lock);
}
if (!mds->locker->acquire_locks(mdr, lov)) {
dout(10) << "traverse: failed to rdlock " << dn->lock << " " << *dn << dendl;
return 1;
}
}
}
if (dn) {
pdnvec->push_back(dn);
@ -8352,7 +8423,7 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
if ((discover)) {
dout(7) << "traverse: discover from " << path[depth] << " from " << *curdir << dendl;
discover_path(curdir, snapid, path.postfixpath(depth), cf.build(),
last_xlocked);
path_locked);
if (mds->logger) mds->logger->inc(l_mds_traverse_discover);
return 1;
}
@ -8397,6 +8468,15 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf,
dout(10) << "path_traverse finish on snapid " << snapid << dendl;
if (mdr)
ceph_assert(mdr->snapid == snapid);
if (flags & MDS_TRAVERSE_RDLOCK_SNAP)
mdr->locking_state |= MutationImpl::SNAP_LOCKED;
else if (flags & MDS_TRAVERSE_RDLOCK_SNAP2)
mdr->locking_state |= MutationImpl::SNAP2_LOCKED;
if (rdlock_path)
mdr->locking_state |= MutationImpl::PATH_LOCKED;
return 0;
}
@ -9088,7 +9168,8 @@ void MDCache::open_ino(inodeno_t ino, int64_t pool, MDSContext* fin,
- traverse path
*/
void MDCache::find_ino_peers(inodeno_t ino, MDSContext *c, mds_rank_t hint)
void MDCache::find_ino_peers(inodeno_t ino, MDSContext *c,
mds_rank_t hint, bool path_locked)
{
dout(5) << "find_ino_peers " << ino << " hint " << hint << dendl;
CInode *in = get_inode(ino);
@ -9103,6 +9184,7 @@ void MDCache::find_ino_peers(inodeno_t ino, MDSContext *c, mds_rank_t hint)
fip.ino = ino;
fip.tid = tid;
fip.fin = c;
fip.path_locked = path_locked;
fip.hint = hint;
_do_find_ino_peer(fip);
}
@ -9164,7 +9246,7 @@ void MDCache::handle_find_ino(const cref_t<MMDSFindIno> &m)
void MDCache::handle_find_ino_reply(const cref_t<MMDSFindInoReply> &m)
{
map<ceph_tid_t, find_ino_peer_info_t>::iterator p = find_ino_peer.find(m->tid);
auto p = find_ino_peer.find(m->tid);
if (p != find_ino_peer.end()) {
dout(10) << "handle_find_ino_reply " << *m << dendl;
find_ino_peer_info_t& fip = p->second;
@ -9187,7 +9269,10 @@ void MDCache::handle_find_ino_reply(const cref_t<MMDSFindInoReply> &m)
vector<CDentry*> trace;
CF_MDS_RetryMessageFactory cf(mds, m);
MDRequestRef null_ref;
int r = path_traverse(null_ref, cf, m->path, MDS_TRAVERSE_DISCOVER, &trace);
int flags = MDS_TRAVERSE_DISCOVER;
if (fip.path_locked)
flags |= MDS_TRAVERSE_PATH_LOCKED;
int r = path_traverse(null_ref, cf, m->path, flags, &trace);
if (r > 0)
return;
dout(0) << "handle_find_ino_reply failed with " << r << " on " << m->path
@ -9560,7 +9645,7 @@ void MDCache::request_kill(MDRequestRef& mdr)
// rollback slave requests is tricky. just let the request proceed.
if (mdr->has_more() &&
(!mdr->more()->witnessed.empty() || !mdr->more()->waiting_on_slave.empty())) {
if (!mdr->done_locking) {
if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) {
ceph_assert(mdr->more()->witnessed.empty());
mdr->aborted = true;
dout(10) << "request_kill " << *mdr << " -- waiting for slave reply, delaying" << dendl;
@ -9857,7 +9942,7 @@ void MDCache::fetch_backtrace(inodeno_t ino, int64_t pool, bufferlist& bl, Conte
void MDCache::_send_discover(discover_info_t& d)
{
auto dis = make_message<MDiscover>(d.ino, d.frag, d.snap, d.want_path,
d.want_base_dir, d.want_xlocked);
d.want_base_dir, d.path_locked);
dis->set_tid(d.tid);
mds->send_message_mds(dis, d.mds);
}
@ -9917,14 +10002,14 @@ void MDCache::discover_path(CInode *base,
snapid_t snap,
filepath want_path,
MDSContext *onfinish,
bool want_xlocked,
bool path_locked,
mds_rank_t from)
{
if (from < 0)
from = base->authority().first;
dout(7) << "discover_path " << base->ino() << " " << want_path << " snap " << snap << " from mds." << from
<< (want_xlocked ? " want_xlocked":"")
<< (path_locked ? " path_locked":"")
<< dendl;
if (base->is_ambiguous_auth()) {
@ -9941,7 +10026,7 @@ void MDCache::discover_path(CInode *base,
}
frag_t fg = base->pick_dirfrag(want_path[0]);
if ((want_xlocked && want_path.depth() == 1) ||
if ((path_locked && want_path.depth() == 1) ||
!base->is_waiting_for_dir(fg) || !onfinish) {
discover_info_t& d = _create_discover(from);
d.ino = base->ino();
@ -9950,7 +10035,7 @@ void MDCache::discover_path(CInode *base,
d.snap = snap;
d.want_path = want_path;
d.want_base_dir = true;
d.want_xlocked = want_xlocked;
d.path_locked = path_locked;
_send_discover(d);
}
@ -9974,12 +10059,12 @@ void MDCache::discover_path(CDir *base,
snapid_t snap,
filepath want_path,
MDSContext *onfinish,
bool want_xlocked)
bool path_locked)
{
mds_rank_t from = base->authority().first;
dout(7) << "discover_path " << base->dirfrag() << " " << want_path << " snap " << snap << " from mds." << from
<< (want_xlocked ? " want_xlocked":"")
<< (path_locked ? " path_locked":"")
<< dendl;
if (base->is_ambiguous_auth()) {
@ -9995,7 +10080,7 @@ void MDCache::discover_path(CDir *base,
return;
}
if ((want_xlocked && want_path.depth() == 1) ||
if ((path_locked && want_path.depth() == 1) ||
!base->is_waiting_for_dentry(want_path[0].c_str(), snap) || !onfinish) {
discover_info_t& d = _create_discover(from);
d.ino = base->ino();
@ -10004,7 +10089,7 @@ void MDCache::discover_path(CDir *base,
d.snap = snap;
d.want_path = want_path;
d.want_base_dir = false;
d.want_xlocked = want_xlocked;
d.path_locked = path_locked;
_send_discover(d);
}
@ -10266,11 +10351,10 @@ void MDCache::handle_discover(const cref_t<MDiscover> &dis)
// xlocked dentry?
// ...always block on non-tail items (they are unrelated)
// ...allow xlocked tail disocvery _only_ if explicitly requested
bool tailitem = (dis->get_want().depth() == 0) || (i == dis->get_want().depth() - 1);
if (dn->lock.is_xlocked()) {
// is this the last (tail) item in the discover traversal?
if (tailitem && dis->wants_xlocked()) {
dout(7) << "handle_discover allowing discovery of xlocked tail " << *dn << dendl;
if (dis->is_path_locked()) {
dout(7) << "handle_discover allowing discovery of xlocked " << *dn << dendl;
} else if (reply->is_empty()) {
dout(7) << "handle_discover blocking on xlocked " << *dn << dendl;
dn->lock.add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryMessage(mds, dis));
@ -10282,8 +10366,9 @@ void MDCache::handle_discover(const cref_t<MDiscover> &dis)
}
// frozen inode?
bool tailitem = (dis->get_want().depth() == 0) || (i == dis->get_want().depth() - 1);
if (dnl->is_primary() && dnl->get_inode()->is_frozen_inode()) {
if (tailitem && dis->wants_xlocked()) {
if (tailitem && dis->is_path_locked()) {
dout(7) << "handle_discover allowing discovery of frozen tail " << *dnl->get_inode() << dendl;
} else if (reply->is_empty()) {
dout(7) << *dnl->get_inode() << " is frozen, empty reply, waiting" << dendl;
@ -10448,7 +10533,7 @@ void MDCache::handle_discover_reply(const cref_t<MDiscoverReply> &m)
m->get_wanted_snapid(), finished);
} else {
filepath relpath(m->get_error_dentry(), 0);
discover_path(dir, m->get_wanted_snapid(), relpath, 0, m->get_wanted_xlocked());
discover_path(dir, m->get_wanted_snapid(), relpath, 0, m->is_path_locked());
}
} else
dout(7) << " doing nothing, have dir but nobody is waiting on dentry "
@ -11610,15 +11695,20 @@ void MDCache::dispatch_fragment_dir(MDRequestRef& mdr)
dout(10) << "dispatch_fragment_dir " << basedirfrag << " bits " << info.bits
<< " on " << *diri << dendl;
if (mdr->more()->slave_error)
mdr->aborted = true;
if (!mdr->aborted) {
MutationImpl::LockOpVec lov;
lov.add_wrlock(&diri->dirfragtreelock);
// prevent a racing gather on any other scatterlocks too
lov.lock_scatter_gather(&diri->nestlock);
lov.lock_scatter_gather(&diri->filelock);
if (!mds->locker->acquire_locks(mdr, lov, NULL, true))
if (!mds->locker->acquire_locks(mdr, lov, NULL, true)) {
if (!mdr->aborted)
return;
}
}
if (mdr->aborted) {
@ -12566,18 +12656,13 @@ void MDCache::enqueue_scrub(
void MDCache::enqueue_scrub_work(MDRequestRef& mdr)
{
MutationImpl::LockOpVec lov;
CInode *in = mds->server->rdlock_path_pin_ref(mdr, 0, lov, true);
CInode *in = mds->server->rdlock_path_pin_ref(mdr, true);
if (NULL == in)
return;
// TODO: Remove this restriction
ceph_assert(in->is_auth());
bool locked = mds->locker->acquire_locks(mdr, lov);
if (!locked)
return;
C_MDS_EnqueueScrub *cs = static_cast<C_MDS_EnqueueScrub*>(mdr->internal_op_finish);
ScrubHeaderRef header = cs->header;
@ -12898,10 +12983,7 @@ void MDCache::upgrade_inode_snaprealm_work(MDRequestRef& mdr)
}
MutationImpl::LockOpVec lov;
mds->locker->include_snap_rdlocks(in, lov);
lov.erase_rdlock(&in->snaplock);
lov.add_xlock(&in->snaplock);
if (!mds->locker->acquire_locks(mdr, lov))
return;
@ -12953,15 +13035,11 @@ public:
void MDCache::flush_dentry_work(MDRequestRef& mdr)
{
MutationImpl::LockOpVec lov;
CInode *in = mds->server->rdlock_path_pin_ref(mdr, 0, lov, true);
if (NULL == in)
CInode *in = mds->server->rdlock_path_pin_ref(mdr, true);
if (!in)
return;
// TODO: Is this necessary? Fix it if so
ceph_assert(in->is_auth());
bool locked = mds->locker->acquire_locks(mdr, lov);
if (!locked)
return;
in->flush(new C_FinishIOMDR(mds, mdr));
}

View File

@ -112,9 +112,16 @@ enum {
// flags for path_traverse();
static const int MDS_TRAVERSE_DISCOVER = (1 << 0);
static const int MDS_TRAVERSE_LAST_XLOCKED = (1 << 1);
static const int MDS_TRAVERSE_PATH_LOCKED = (1 << 1);
static const int MDS_TRAVERSE_WANT_DENTRY = (1 << 2);
static const int MDS_TRAVERSE_WANT_AUTH = (1 << 3);
static const int MDS_TRAVERSE_RDLOCK_SNAP = (1 << 4);
static const int MDS_TRAVERSE_RDLOCK_SNAP2 = (1 << 5);
static const int MDS_TRAVERSE_WANT_DIRLAYOUT = (1 << 6);
static const int MDS_TRAVERSE_RDLOCK_PATH = (1 << 7);
static const int MDS_TRAVERSE_XLOCK_DENTRY = (1 << 8);
static const int MDS_TRAVERSE_RDLOCK_AUTHLOCK = (1 << 9);
static const int MDS_TRAVERSE_CHECK_LOCKCACHE = (1 << 10);
// flags for predirty_journal_parents()
@ -149,7 +156,7 @@ class MDCache {
filepath want_path;
CInode *basei = nullptr;
bool want_base_dir = false;
bool want_xlocked = false;
bool path_locked = false;
};
// [reconnect/rejoin caps]
@ -167,6 +174,7 @@ class MDCache {
inodeno_t ino;
ceph_tid_t tid = 0;
MDSContext *fin = nullptr;
bool path_locked = false;
mds_rank_t hint = MDS_RANK_NONE;
mds_rank_t checking = MDS_RANK_NONE;
set<mds_rank_t> checked;
@ -263,9 +271,9 @@ class MDCache {
void discover_dir_frag(CInode *base, frag_t approx_fg, MDSContext *onfinish,
mds_rank_t from=MDS_RANK_NONE);
void discover_path(CInode *base, snapid_t snap, filepath want_path, MDSContext *onfinish,
bool want_xlocked=false, mds_rank_t from=MDS_RANK_NONE);
bool path_locked=false, mds_rank_t from=MDS_RANK_NONE);
void discover_path(CDir *base, snapid_t snap, filepath want_path, MDSContext *onfinish,
bool want_xlocked=false);
bool path_locked=false);
void kick_discovers(mds_rank_t who); // after a failure.
// adjust subtree auth specification
@ -768,7 +776,7 @@ class MDCache {
* MDS_TRAVERSE_DISCOVER: Instead of forwarding request, path_traverse()
* attempts to look up the path from a different MDS (and bring them into
* its cache as replicas).
* MDS_TRAVERSE_LAST_XLOCKED: path_traverse() will procceed when xlocked tail
* MDS_TRAVERSE_PATH_LOCKED: path_traverse() will procceed when xlocked
* dentry is encountered.
* MDS_TRAVERSE_WANT_DENTRY: Caller wants tail dentry. Add a null dentry if
* tail dentry does not exist. return 0 even tail dentry is null.
@ -812,7 +820,8 @@ class MDCache {
void open_ino(inodeno_t ino, int64_t pool, MDSContext *fin,
bool want_replica=true, bool want_xlocked=false);
void find_ino_peers(inodeno_t ino, MDSContext *c, mds_rank_t hint=MDS_RANK_NONE);
void find_ino_peers(inodeno_t ino, MDSContext *c,
mds_rank_t hint=MDS_RANK_NONE, bool path_locked=false);
void _do_find_ino_peer(find_ino_peer_info_t& fip);
void handle_find_ino(const cref_t<MMDSFindIno> &m);
void handle_find_ino_reply(const cref_t<MMDSFindInoReply> &m);

View File

@ -737,37 +737,42 @@ public:
};
void Migrator::get_export_lock_set(CDir *dir, MutationImpl::LockOpVec& lov)
bool Migrator::export_try_grab_locks(CDir *dir, MutationRef& mut)
{
// path
vector<CDentry*> trace;
cache->make_trace(trace, dir->inode);
CInode *diri = dir->get_inode();
if (!diri->filelock.can_wrlock(diri->get_loner()) ||
!diri->nestlock.can_wrlock(diri->get_loner()))
return false;
MutationImpl::LockOpVec lov;
set<CDir*> wouldbe_bounds;
set<CInode*> bound_inodes;
cache->get_wouldbe_subtree_bounds(dir, wouldbe_bounds);
for (auto& bound : wouldbe_bounds)
bound_inodes.insert(bound->get_inode());
for (auto& in : bound_inodes)
lov.add_rdlock(&in->dirfragtreelock);
lov.reserve(trace.size() + wouldbe_bounds.size() + 8);
lov.add_rdlock(&diri->dirfragtreelock);
for (auto& dn : trace)
lov.add_rdlock(&dn->lock);
CInode* in = diri;
while (true) {
lov.add_rdlock(&in->snaplock);
CDentry* pdn = in->get_projected_parent_dn();
if (!pdn)
break;
in = pdn->get_dir()->get_inode();
}
// prevent scatter gather race
lov.add_rdlock(&dir->get_inode()->dirfragtreelock);
if (!mds->locker->rdlock_try_set(lov, mut))
return false;
// bound dftlocks:
// NOTE: We need to take an rdlock on bounding dirfrags during
// migration for a rather irritating reason: when we export the
// bound inode, we need to send scatterlock state for the dirfrags
// as well, so that the new auth also gets the correct info. If we
// race with a refragment, this info is useless, as we can't
// redivvy it up. And it's needed for the scatterlocks to work
// properly: when the auth is in a sync/lock state it keeps each
// dirfrag's portion in the local (auth OR replica) dirfrag.
for (auto& dir : wouldbe_bounds)
lov.add_rdlock(&dir->get_inode()->dirfragtreelock);
mds->locker->wrlock_force(&diri->filelock, mut);
mds->locker->wrlock_force(&diri->nestlock, mut);
// above code may add duplicated locks
lov.sort_and_merge();
return true;
}
@ -1038,8 +1043,13 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count)
}
ceph_assert(it->second.state == EXPORT_LOCKING);
mds_rank_t dest = it->second.peer;
if (mdr->more()->slave_error || dir->is_frozen() || dir->is_freezing()) {
dout(7) << "wouldblock|freezing|frozen, canceling export" << dendl;
export_try_cancel(dir);
return;
}
mds_rank_t dest = it->second.peer;
if (!mds->is_export_target(dest)) {
dout(7) << "dest is not yet an export target" << dendl;
if (count > 3) {
@ -1061,33 +1071,55 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count)
return;
}
if (mdr->aborted || dir->is_frozen() || dir->is_freezing()) {
dout(7) << "wouldblock|freezing|frozen, canceling export" << dendl;
export_try_cancel(dir);
return;
}
// locks?
MutationImpl::LockOpVec lov;
get_export_lock_set(dir, lov);
// If auth MDS of the subtree root inode is neither the exporter MDS
// nor the importer MDS and it gathers subtree root's fragstat/neststat
// while the subtree is exporting. It's possible that the exporter MDS
// and the importer MDS both are auth MDS of the subtree root or both
// are not auth MDS of the subtree root at the time they receive the
// lock messages. So the auth MDS of the subtree root inode may get no
// or duplicated fragstat/neststat for the subtree root dirfrag.
lov.lock_scatter_gather(&dir->get_inode()->filelock);
lov.lock_scatter_gather(&dir->get_inode()->nestlock);
if (dir->get_inode()->is_auth()) {
dir->get_inode()->filelock.set_scatter_wanted();
dir->get_inode()->nestlock.set_scatter_wanted();
}
if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) {
MutationImpl::LockOpVec lov;
// If auth MDS of the subtree root inode is neither the exporter MDS
// nor the importer MDS and it gathers subtree root's fragstat/neststat
// while the subtree is exporting. It's possible that the exporter MDS
// and the importer MDS both are auth MDS of the subtree root or both
// are not auth MDS of the subtree root at the time they receive the
// lock messages. So the auth MDS of the subtree root inode may get no
// or duplicated fragstat/neststat for the subtree root dirfrag.
lov.lock_scatter_gather(&dir->get_inode()->filelock);
lov.lock_scatter_gather(&dir->get_inode()->nestlock);
if (dir->get_inode()->is_auth()) {
dir->get_inode()->filelock.set_scatter_wanted();
dir->get_inode()->nestlock.set_scatter_wanted();
}
lov.add_rdlock(&dir->get_inode()->dirfragtreelock);
if (!mds->locker->acquire_locks(mdr, lov, NULL, true)) {
if (mdr->aborted)
export_try_cancel(dir);
return;
if (!mds->locker->acquire_locks(mdr, lov, nullptr, true)) {
if (mdr->aborted)
export_try_cancel(dir);
return;
}
lov.clear();
// bound dftlocks:
// NOTE: We need to take an rdlock on bounding dirfrags during
// migration for a rather irritating reason: when we export the
// bound inode, we need to send scatterlock state for the dirfrags
// as well, so that the new auth also gets the correct info. If we
// race with a refragment, this info is useless, as we can't
// redivvy it up. And it's needed for the scatterlocks to work
// properly: when the auth is in a sync/lock state it keeps each
// dirfrag's portion in the local (auth OR replica) dirfrag.
set<CDir*> wouldbe_bounds;
set<CInode*> bound_inodes;
cache->get_wouldbe_subtree_bounds(dir, wouldbe_bounds);
for (auto& bound : wouldbe_bounds)
bound_inodes.insert(bound->get_inode());
for (auto& in : bound_inodes)
lov.add_rdlock(&in->dirfragtreelock);
if (!mds->locker->rdlock_try_set(lov, mdr))
return;
if (!mds->locker->try_rdlock_snap_layout(dir->get_inode(), mdr))
return;
mdr->locking_state |= MutationImpl::ALL_LOCKED;
}
ceph_assert(g_conf()->mds_kill_export_at != 1);
@ -1315,28 +1347,20 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid)
ceph_assert(it->second.state == EXPORT_FREEZING);
ceph_assert(dir->is_frozen_tree_root());
CInode *diri = dir->get_inode();
it->second.mut = new MutationImpl();
// ok, try to grab all my locks.
MutationImpl::LockOpVec lov;
get_export_lock_set(dir, lov);
CInode *diri = dir->get_inode();
if ((diri->is_auth() && diri->is_frozen()) ||
!mds->locker->can_rdlock_set(lov) ||
// for pinning scatter gather. loner has a higher chance to get wrlock
!diri->filelock.can_wrlock(diri->get_loner()) ||
!diri->nestlock.can_wrlock(diri->get_loner())) {
!export_try_grab_locks(dir, it->second.mut)) {
dout(7) << "export_dir couldn't acquire all needed locks, failing. "
<< *dir << dendl;
export_try_cancel(dir);
return;
}
it->second.mut = new MutationImpl();
if (diri->is_auth())
it->second.mut->auth_pin(diri);
mds->locker->rdlock_take_set(lov, it->second.mut);
mds->locker->wrlock_force(&diri->filelock, it->second.mut);
mds->locker->wrlock_force(&diri->nestlock, it->second.mut);
cache->show_subtrees();
@ -2302,7 +2326,9 @@ void Migrator::handle_export_discover(const cref_t<MExportDirDiscover> &m, bool
filepath fpath(m->get_path());
vector<CDentry*> trace;
MDRequestRef null_ref;
int r = cache->path_traverse(null_ref, cf, fpath, MDS_TRAVERSE_DISCOVER, &trace);
int r = cache->path_traverse(null_ref, cf, fpath,
MDS_TRAVERSE_DISCOVER | MDS_TRAVERSE_PATH_LOCKED,
&trace);
if (r > 0) return;
if (r < 0) {
dout(7) << "handle_export_discover failed to discover or not dir " << m->get_path() << ", NAK" << dendl;
@ -3159,6 +3185,8 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
map<CInode*, map<client_t,Capability::Export> >& peer_exports,
list<ScatterLock*>& updated_scatterlocks)
{
CInode *in;
bool added = false;
DECODE_START(1, blp);
dout(15) << __func__ << " on " << *dn << dendl;
@ -3167,8 +3195,7 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
decode(ino, blp);
decode(last, blp);
bool added = false;
CInode *in = cache->get_inode(ino, last);
in = cache->get_inode(ino, last);
if (!in) {
in = new CInode(mds->mdcache, true, 1, last);
added = true;
@ -3180,6 +3207,8 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
// caps
decode_import_inode_caps(in, true, blp, peer_exports);
DECODE_FINISH(blp);
// link before state -- or not! -sage
if (dn->get_linkage()->get_inode() != in) {
ceph_assert(!dn->get_linkage()->get_inode());
@ -3222,7 +3251,9 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
in->snaplock.get_state() != LOCK_SYNC)
mds->locker->try_eval(&in->snaplock, NULL);
DECODE_FINISH(blp);
if (in->policylock.is_stable() &&
in->policylock.get_state() != LOCK_SYNC)
mds->locker->try_eval(&in->policylock, NULL);
}
void Migrator::decode_import_inode_caps(CInode *in, bool auth_cap,

View File

@ -199,7 +199,7 @@ public:
void maybe_split_export(CDir* dir, uint64_t max_size, bool null_okay,
vector<pair<CDir*, size_t> >& results);
void get_export_lock_set(CDir *dir, MutationImpl::LockOpVec& lov);
bool export_try_grab_locks(CDir *dir, MutationRef& mut);
void get_export_client_set(CDir *dir, std::set<client_t> &client_set);
void get_export_client_set(CInode *in, std::set<client_t> &client_set);

View File

@ -82,6 +82,24 @@ void MutationImpl::finish_locking(SimpleLock *lock)
locking_target_mds = -1;
}
bool MutationImpl::is_rdlocked(SimpleLock *lock) const {
auto it = locks.find(lock);
if (it != locks.end() && it->is_rdlock())
return true;
if (lock_cache)
return static_cast<const MutationImpl*>(lock_cache)->is_rdlocked(lock);
return false;
}
bool MutationImpl::is_wrlocked(SimpleLock *lock) const {
auto it = locks.find(lock);
if (it != locks.end() && it->is_wrlock())
return true;
if (lock_cache)
return static_cast<const MutationImpl*>(lock_cache)->is_wrlocked(lock);
return false;
}
void MutationImpl::LockOpVec::erase_rdlock(SimpleLock* lock)
{
for (int i = size() - 1; i >= 0; --i) {
@ -94,7 +112,21 @@ void MutationImpl::LockOpVec::erase_rdlock(SimpleLock* lock)
}
void MutationImpl::LockOpVec::sort_and_merge()
{
std::sort(begin(), end());
// sort locks on the same object
auto cmp = [](const LockOp &l, const LockOp &r) {
ceph_assert(l.lock->get_parent() == r.lock->get_parent());
return l.lock->type->type < r.lock->type->type;
};
for (auto i = begin(), j = i; ; ++i) {
if (i == end()) {
std::sort(j, i, cmp);
break;
}
if (j->lock->get_parent() != i->lock->get_parent()) {
std::sort(j, i, cmp);
j = i;
}
}
// merge ops on the same lock
for (auto i = end() - 1; i > begin(); ) {
auto j = i;
@ -118,7 +150,7 @@ void MutationImpl::LockOpVec::sort_and_merge()
if (j->is_xlock()) {
// xlock overwrites other types
ceph_assert(!j->is_remote_wrlock());
j->flags = MutationImpl::LockOp::XLOCK;
j->flags = LockOp::XLOCK;
}
erase(j + 1, i + 1);
i = j - 1;
@ -399,6 +431,19 @@ bool MDRequestImpl::is_batch_op()
client_request->get_filepath().depth() == 0);
}
int MDRequestImpl::compare_paths()
{
if (dir_root[0] < dir_root[1])
return -1;
if (dir_root[0] > dir_root[1])
return 1;
if (dir_depth[0] < dir_depth[1])
return -1;
if (dir_depth[0] > dir_depth[1])
return 1;
return 0;
}
cref_t<MClientRequest> MDRequestImpl::release_client_request()
{
msg_lock.lock();
@ -508,3 +553,53 @@ void MDRequestImpl::_dump_op_descriptor_unlocked(ostream& stream) const
stream << "rejoin:" << reqid;
}
}
void MDLockCache::attach_locks()
{
ceph_assert(!items_lock);
items_lock.reset(new LockItem[locks.size()]);
int i = 0;
for (auto& p : locks) {
items_lock[i].parent = this;
p.lock->add_cache(items_lock[i]);
++i;
}
}
void MDLockCache::attach_dirfrags(std::vector<CDir*>&& dfv)
{
std::sort(dfv.begin(), dfv.end());
auto last = std::unique(dfv.begin(), dfv.end());
dfv.erase(last, dfv.end());
auth_pinned_dirfrags = std::move(dfv);
ceph_assert(!items_dir);
items_dir.reset(new DirItem[auth_pinned_dirfrags.size()]);
int i = 0;
for (auto dir : auth_pinned_dirfrags) {
items_dir[i].parent = this;
dir->lock_caches_with_auth_pins.push_back(&items_dir[i].item_dir);
++i;
}
}
void MDLockCache::detach_all()
{
ceph_assert(items_lock);
int i = 0;
for (auto& p : locks) {
auto& item = items_lock[i];
p.lock->remove_cache(item);
++i;
}
items_lock.reset();
ceph_assert(items_dir);
i = 0;
for (auto dir : auth_pinned_dirfrags) {
(void)dir;
items_dir[i].item_dir.remove_myself();
++i;
}
items_dir.reset();
}

View File

@ -31,13 +31,13 @@
#include "messages/MClientReply.h"
class LogSegment;
class Capability;
class CInode;
class CDir;
class CDentry;
class Session;
class ScatterLock;
struct sr_t;
struct MDLockCache;
struct MutationImpl : public TrackedOp {
metareqid_t reqid;
@ -98,19 +98,8 @@ public:
wrlock_target = MDS_RANK_NONE;
}
bool is_state_pin() const { return !!(flags & STATE_PIN); }
bool operator<(const LockOp& r) const {
if ((lock->type->type <= CEPH_LOCK_DN) && (r.lock->type->type > CEPH_LOCK_DN))
return true;
if ((lock->type->type > CEPH_LOCK_DN) == (r.lock->type->type > CEPH_LOCK_DN)) {
auto lp = lock->get_parent();
auto rp = r.lock->get_parent();
// then sort by object
if (lp == rp)
return (lock->type->type < r.lock->type->type);
return lp->is_lt(rp);
}
return false;
return lock < r.lock;
}
};
@ -119,11 +108,17 @@ public:
emplace_back(lock, LockOp::RDLOCK);
}
void erase_rdlock(SimpleLock *lock);
void add_xlock(SimpleLock *lock) {
emplace_back(lock, LockOp::XLOCK);
void add_xlock(SimpleLock *lock, int idx=-1) {
if (idx >= 0)
emplace(cbegin() + idx, lock, LockOp::XLOCK);
else
emplace_back(lock, LockOp::XLOCK);
}
void add_wrlock(SimpleLock *lock) {
emplace_back(lock, LockOp::WRLOCK);
void add_wrlock(SimpleLock *lock, int idx=-1) {
if (idx >= 0)
emplace(cbegin() + idx, lock, LockOp::WRLOCK);
else
emplace_back(lock, LockOp::WRLOCK);
}
void add_remote_wrlock(SimpleLock *lock, mds_rank_t rank) {
ceph_assert(rank != MDS_RANK_NONE);
@ -138,27 +133,37 @@ public:
reserve(32);
}
};
typedef set<LockOp> lock_set;
typedef lock_set::iterator lock_iterator;
using lock_set = set<LockOp>;
using lock_iterator = lock_set::iterator;
lock_set locks; // full ordering
bool is_rdlocked(SimpleLock *lock) const {
auto it = locks.find(lock);
return it != locks.end() && it->is_rdlock();
MDLockCache* lock_cache = nullptr;
bool lock_cache_disabled = false;
void disable_lock_cache() {
lock_cache_disabled = true;
}
lock_iterator emplace_lock(SimpleLock *l, unsigned f=0, mds_rank_t t=MDS_RANK_NONE) {
last_locked = l;
return locks.emplace(l, f, t).first;
}
bool is_rdlocked(SimpleLock *lock) const;
bool is_wrlocked(SimpleLock *lock) const;
bool is_xlocked(SimpleLock *lock) const {
auto it = locks.find(lock);
return it != locks.end() && it->is_xlock();
}
bool is_wrlocked(SimpleLock *lock) const {
auto it = locks.find(lock);
return it != locks.end() && it->is_wrlock();
}
bool is_remote_wrlocked(SimpleLock *lock) const {
auto it = locks.find(lock);
return it != locks.end() && it->is_remote_wrlock();
}
bool is_last_locked(SimpleLock *lock) const {
return lock == last_locked;
}
SimpleLock *last_locked = nullptr;
// lock we are currently trying to acquire. if we give up for some reason,
// be sure to eval() this.
SimpleLock *locking = nullptr;
@ -166,7 +171,14 @@ public:
// if this flag is set, do not attempt to acquire further locks.
// (useful for wrlock, which may be a moving auth target)
bool done_locking = false;
enum {
SNAP_LOCKED = 1,
SNAP2_LOCKED = 2,
PATH_LOCKED = 4,
ALL_LOCKED = 8,
};
int locking_state = 0;
bool committing = false;
bool aborted = false;
bool killed = false;
@ -187,7 +199,8 @@ public:
reqid(ri), attempt(att),
slave_to_mds(slave_to) { }
~MutationImpl() override {
ceph_assert(locking == NULL);
ceph_assert(!locking);
ceph_assert(!lock_cache);
ceph_assert(num_pins == 0);
ceph_assert(num_auth_pins == 0);
}
@ -217,8 +230,8 @@ public:
}
// pin items in cache
void pin(MDSCacheObject *o);
void unpin(MDSCacheObject *o);
void pin(MDSCacheObject *object);
void unpin(MDSCacheObject *object);
void set_stickydirs(CInode *in);
void put_stickydirs();
void drop_pins();
@ -274,10 +287,15 @@ struct MDRequestImpl : public MutationImpl {
// -- i am a client (master) request
cref_t<MClientRequest> client_request; // client request (if any)
// tree and depth info of path1 and path2
inodeno_t dir_root[2] = {0, 0};
int dir_depth[2] = {-1, -1};
file_layout_t dir_layout;
// store up to two sets of dn vectors, inode pointers, for request path1 and path2.
vector<CDentry*> dn[2];
CDentry *straydn;
CInode *in[2];
CDentry *straydn;
snapid_t snapid;
CInode *tracei;
@ -429,6 +447,7 @@ struct MDRequestImpl : public MutationImpl {
void set_filepath2(const filepath& fp);
bool is_queued_for_replay() const;
bool is_batch_op();
int compare_paths();
void print(ostream &out) const override;
void dump(Formatter *f) const override;
@ -470,5 +489,50 @@ struct MDSlaveUpdate {
}
};
struct MDLockCacheItem {
MDLockCache *parent = nullptr;
elist<MDLockCacheItem*>::item item_lock;
};
struct MDLockCache : public MutationImpl {
CInode *diri;
Capability *client_cap;
int opcode;
file_layout_t dir_layout;
elist<MDLockCache*>::item item_cap_lock_cache;
using LockItem = MDLockCacheItem;
// link myself to locked locks
std::unique_ptr<LockItem[]> items_lock;
struct DirItem {
MDLockCache *parent = nullptr;
elist<DirItem*>::item item_dir;
};
// link myself to auth-pinned dirfrags
std::unique_ptr<DirItem[]> items_dir;
std::vector<CDir*> auth_pinned_dirfrags;
int ref = 1;
bool invalidating = false;
MDLockCache(Capability *cap, int op) :
MutationImpl(), diri(cap->get_inode()), client_cap(cap), opcode(op) {
client_cap->lock_caches.push_back(&item_cap_lock_cache);
}
CInode *get_dir_inode() { return diri; }
void set_dir_layout(file_layout_t& layout) {
dir_layout = layout;
}
const file_layout_t& get_dir_layout() const {
return dir_layout;
}
void attach_locks();
void attach_dirfrags(std::vector<CDir*>&& dfv);
void detach_all();
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -193,9 +193,7 @@ public:
void perf_gather_op_latency(const cref_t<MClientRequest> &req, utime_t lat);
void early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn);
void respond_to_request(MDRequestRef& mdr, int r = 0);
void set_trace_dist(Session *session, const ref_t<MClientReply> &reply, CInode *in, CDentry *dn,
snapid_t snapid,
int num_dentries_wanted,
void set_trace_dist(const ref_t<MClientReply> &reply, CInode *in, CDentry *dn,
MDRequestRef& mdr);
@ -215,14 +213,12 @@ public:
void journal_allocated_inos(MDRequestRef& mdr, EMetaBlob *blob);
void apply_allocated_inos(MDRequestRef& mdr, Session *session);
CInode* rdlock_path_pin_ref(MDRequestRef& mdr, int n, MutationImpl::LockOpVec& lov,
bool want_auth, bool no_want_auth=false,
file_layout_t **layout=nullptr,
bool no_lookup=false);
CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, int n,
MutationImpl::LockOpVec& lov,
bool okexist, bool alwaysxlock,
file_layout_t **layout=nullptr);
CInode* rdlock_path_pin_ref(MDRequestRef& mdr, bool want_auth,
bool no_want_auth=false);
CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, bool create,
bool okexist=false, bool want_layout=false);
std::pair<CDentry*, CDentry*>
rdlock_two_paths_xlock_destdn(MDRequestRef& mdr, bool xlock_srcdn);
CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr);
@ -237,6 +233,9 @@ public:
void handle_client_file_setlock(MDRequestRef& mdr);
void handle_client_file_readlock(MDRequestRef& mdr);
bool xlock_policylock(MDRequestRef& mdr, CInode *in,
bool want_layout=false, bool xlock_snaplock=false);
CInode* try_get_auth_inode(MDRequestRef& mdr, inodeno_t ino);
void handle_client_setattr(MDRequestRef& mdr);
void handle_client_setlayout(MDRequestRef& mdr);
void handle_client_setdirlayout(MDRequestRef& mdr);
@ -249,12 +248,8 @@ public:
string name,
string value,
file_layout_t *layout);
void handle_set_vxattr(MDRequestRef& mdr, CInode *cur,
file_layout_t *dir_layout,
MutationImpl::LockOpVec& lov);
void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur,
file_layout_t *dir_layout,
MutationImpl::LockOpVec& lov);
void handle_set_vxattr(MDRequestRef& mdr, CInode *cur);
void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur);
void handle_client_setxattr(MDRequestRef& mdr);
void handle_client_removexattr(MDRequestRef& mdr);

View File

@ -41,3 +41,32 @@ void SimpleLock::dump(Formatter *f) const {
}
f->close_section();
}
SimpleLock::unstable_bits_t::unstable_bits_t() :
lock_caches(member_offset(MDLockCache::LockItem, item_lock)) {}
void SimpleLock::add_cache(MDLockCacheItem& item)
{
more()->lock_caches.push_back(&item.item_lock);
state_flags |= CACHED;
}
void SimpleLock::remove_cache(MDLockCacheItem& item) {
auto& lock_caches = more()->lock_caches;
item.item_lock.remove_myself();
if (lock_caches.empty()) {
state_flags &= ~CACHED;
try_clear_more();
}
}
MDLockCache* SimpleLock::get_first_cache()
{
if (have_more()) {
auto& lock_caches = more()->lock_caches;
if (!lock_caches.empty()) {
return lock_caches.front()->parent;
}
}
return nullptr;
}

View File

@ -28,6 +28,9 @@
struct MutationImpl;
typedef boost::intrusive_ptr<MutationImpl> MutationRef;
struct MDLockCache;
struct MDLockCacheItem;
extern "C" {
#include "locks.h"
}
@ -142,10 +145,9 @@ public:
case CEPH_LOCK_INEST: return "inest";
case CEPH_LOCK_IXATTR: return "ixattr";
case CEPH_LOCK_ISNAP: return "isnap";
case CEPH_LOCK_INO: return "ino";
case CEPH_LOCK_IFLOCK: return "iflock";
case CEPH_LOCK_IPOLICY: return "ipolicy";
default: ceph_abort(); return std::string_view();
default: return "unknown";
}
}
@ -189,6 +191,7 @@ protected:
enum {
LEASED = 1 << 0,
NEED_RECOVER = 1 << 1,
CACHED = 1 << 2,
};
private:
@ -204,6 +207,8 @@ private:
client_t xlock_by_client = -1;
client_t excl_client = -1;
elist<MDLockCacheItem*> lock_caches;
bool empty() {
return
gather_set.empty() &&
@ -211,10 +216,10 @@ private:
num_xlock == 0 &&
xlock_by.get() == NULL &&
xlock_by_client == -1 &&
excl_client == -1;
excl_client == -1 &&
lock_caches.empty();
}
unstable_bits_t() {}
unstable_bits_t();
};
mutable std::unique_ptr<unstable_bits_t> _unstable;
@ -316,8 +321,13 @@ public:
bool is_waiter_for(uint64_t mask) const {
return parent->is_waiter_for(mask << get_wait_shift());
}
bool is_cached() const {
return state_flags & CACHED;
}
void add_cache(MDLockCacheItem& item);
void remove_cache(MDLockCacheItem& item);
MDLockCache* get_first_cache();
// state
int get_state() const { return state; }

View File

@ -649,15 +649,13 @@ void StrayManager::_eval_stray_remote(CDentry *stray_dn, CDentry *remote_dn)
void StrayManager::reintegrate_stray(CDentry *straydn, CDentry *rdn)
{
dout(10) << __func__ << " " << *straydn << " into " << *rdn << dendl;
dout(10) << __func__ << " " << *straydn << " to " << *rdn << dendl;
logger->inc(l_mdc_strays_reintegrated);
// rename it to another mds.
filepath src;
straydn->make_path(src);
filepath dst;
rdn->make_path(dst);
// rename it to remote linkage .
filepath src(straydn->get_name(), straydn->get_dir()->ino());
filepath dst(rdn->get_name(), rdn->get_dir()->ino());
auto req = make_message<MClientRequest>(CEPH_MDS_OP_RENAME);
req->set_filepath(dst);
@ -669,24 +667,16 @@ void StrayManager::reintegrate_stray(CDentry *straydn, CDentry *rdn)
void StrayManager::migrate_stray(CDentry *dn, mds_rank_t to)
{
CInode *in = dn->get_projected_linkage()->get_inode();
ceph_assert(in);
CInode *diri = dn->dir->get_inode();
ceph_assert(diri->is_stray());
dout(10) << "migrate_stray from mds." << MDS_INO_STRAY_OWNER(diri->inode.ino)
<< " to mds." << to
<< " " << *dn << " " << *in << dendl;
dout(10) << __func__ << " " << *dn << " to mds." << to << dendl;
logger->inc(l_mdc_strays_migrated);
// rename it to another mds.
filepath src;
dn->make_path(src);
ceph_assert(src.depth() == 2);
inodeno_t dirino = dn->get_dir()->ino();
ceph_assert(MDS_INO_IS_STRAY(dirino));
filepath dst(MDS_INO_MDSDIR(to));
dst.push_dentry(src[0]);
dst.push_dentry(src[1]);
filepath src(dn->get_name(), dirino);
filepath dst(dn->get_name(), MDS_INO_STRAY(to, MDS_INO_STRAY_INDEX(dirino)));
auto req = make_message<MClientRequest>(CEPH_MDS_OP_RENAME);
req->set_filepath(dst);

View File

@ -531,7 +531,7 @@ void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in)
<< dirfragtree << " on " << *in << dendl;
in->dirfragtree = dirfragtree;
in->force_dirfrags();
if (in->has_dirfrags() && in->authority() == CDIR_AUTH_UNDEF) {
if (in->get_num_dirfrags() && in->authority() == CDIR_AUTH_UNDEF) {
auto&& ls = in->get_nested_dirfrags();
for (const auto& dir : ls) {
if (dir->get_num_any() == 0 &&

View File

@ -34,7 +34,7 @@ private:
filepath want; // ... [/]need/this/stuff
bool want_base_dir = true;
bool want_xlocked = false;
bool path_locked = false;
public:
inodeno_t get_base_ino() const { return base_ino; }
@ -45,7 +45,7 @@ private:
const std::string& get_dentry(int n) const { return want[n]; }
bool wants_base_dir() const { return want_base_dir; }
bool wants_xlocked() const { return want_xlocked; }
bool is_path_locked() const { return path_locked; }
void set_base_dir_frag(frag_t f) { base_dir_frag = f; }
@ -56,14 +56,14 @@ protected:
snapid_t s,
filepath& want_path_,
bool want_base_dir_ = true,
bool discover_xlocks_ = false) :
bool path_locked_ = false) :
SafeMessage{MSG_MDS_DISCOVER},
base_ino(base_ino_),
base_dir_frag(base_frag_),
snapid(s),
want(want_path_),
want_base_dir(want_base_dir_),
want_xlocked(discover_xlocks_) { }
path_locked(path_locked_) { }
~MDiscover() override {}
public:
@ -80,7 +80,7 @@ public:
decode(snapid, p);
decode(want, p);
decode(want_base_dir, p);
decode(want_xlocked, p);
decode(path_locked, p);
}
void encode_payload(uint64_t features) override {
using ceph::encode;
@ -89,7 +89,7 @@ public:
encode(snapid, payload);
encode(want, payload);
encode(want_base_dir, payload);
encode(want_xlocked, payload);
encode(path_locked, payload);
}
private:
template<class T, typename... Args>

View File

@ -72,7 +72,7 @@ private:
inodeno_t base_ino;
frag_t base_dir_frag;
bool wanted_base_dir = false;
bool wanted_xlocked = false;
bool path_locked = false;
snapid_t wanted_snapid;
// and the response
@ -93,7 +93,7 @@ private:
inodeno_t get_base_ino() const { return base_ino; }
frag_t get_base_dir_frag() const { return base_dir_frag; }
bool get_wanted_base_dir() const { return wanted_base_dir; }
bool get_wanted_xlocked() const { return wanted_xlocked; }
bool is_path_locked() const { return path_locked; }
snapid_t get_wanted_snapid() const { return wanted_snapid; }
bool is_flag_error_dn() const { return flag_error_dn; }
@ -116,7 +116,7 @@ protected:
base_ino(dis.get_base_ino()),
base_dir_frag(dis.get_base_dir_frag()),
wanted_base_dir(dis.wants_base_dir()),
wanted_xlocked(dis.wants_xlocked()),
path_locked(dis.is_path_locked()),
wanted_snapid(dis.get_snapid()),
flag_error_dn(false),
flag_error_dir(false),
@ -131,7 +131,7 @@ protected:
base_ino(df.ino),
base_dir_frag(df.frag),
wanted_base_dir(false),
wanted_xlocked(false),
path_locked(false),
wanted_snapid(CEPH_NOSNAP),
flag_error_dn(false),
flag_error_dir(false),
@ -179,7 +179,7 @@ public:
decode(base_ino, p);
decode(base_dir_frag, p);
decode(wanted_base_dir, p);
decode(wanted_xlocked, p);
decode(path_locked, p);
decode(wanted_snapid, p);
decode(flag_error_dn, p);
decode(flag_error_dir, p);
@ -195,7 +195,7 @@ public:
encode(base_ino, payload);
encode(base_dir_frag, payload);
encode(wanted_base_dir, payload);
encode(wanted_xlocked, payload);
encode(path_locked, payload);
encode(wanted_snapid, payload);
encode(flag_error_dn, payload);
encode(flag_error_dir, payload);

View File

@ -98,12 +98,14 @@ public:
__s16 op;
mutable __u16 flags; /* XXX HACK for mark_interrupted */
static constexpr unsigned FLAG_NONBLOCK = 1<<0;
static constexpr unsigned FLAG_WOULDBLOCK = 1<<1;
static constexpr unsigned FLAG_NOTJOURNALED = 1<<2;
static constexpr unsigned FLAG_EROFS = 1<<3;
static constexpr unsigned FLAG_ABORT = 1<<4;
static constexpr unsigned FLAG_INTERRUPTED = 1<<5;
static constexpr unsigned FLAG_NONBLOCKING = 1<<0;
static constexpr unsigned FLAG_WOULDBLOCK = 1<<1;
static constexpr unsigned FLAG_NOTJOURNALED = 1<<2;
static constexpr unsigned FLAG_EROFS = 1<<3;
static constexpr unsigned FLAG_ABORT = 1<<4;
static constexpr unsigned FLAG_INTERRUPTED = 1<<5;
static constexpr unsigned FLAG_NOTIFYBLOCKING = 1<<6;
static constexpr unsigned FLAG_REQBLOCKED = 1<<7;
// for locking
__u16 lock_type; // lock object type
@ -140,8 +142,8 @@ public:
const vector<MDSCacheObjectInfo>& get_authpins() const { return authpins; }
vector<MDSCacheObjectInfo>& get_authpins() { return authpins; }
void mark_nonblock() { flags |= FLAG_NONBLOCK; }
bool is_nonblock() const { return (flags & FLAG_NONBLOCK); }
void mark_nonblocking() { flags |= FLAG_NONBLOCKING; }
bool is_nonblocking() const { return (flags & FLAG_NONBLOCKING); }
void mark_error_wouldblock() { flags |= FLAG_WOULDBLOCK; }
bool is_error_wouldblock() const { return (flags & FLAG_WOULDBLOCK); }
void mark_not_journaled() { flags |= FLAG_NOTJOURNALED; }
@ -152,6 +154,11 @@ public:
void mark_abort() { flags |= FLAG_ABORT; }
bool is_interrupted() const { return (flags & FLAG_INTERRUPTED); }
void mark_interrupted() const { flags |= FLAG_INTERRUPTED; }
bool should_notify_blocking() const { return (flags & FLAG_NOTIFYBLOCKING); }
void mark_notify_blocking() { flags |= FLAG_NOTIFYBLOCKING; }
void clear_notify_blocking() const { flags &= ~FLAG_NOTIFYBLOCKING; }
bool is_req_blocked() const { return (flags & FLAG_REQBLOCKED); }
void mark_req_blocked() { flags |= FLAG_REQBLOCKED; }
void set_lock_type(int t) { lock_type = t; }
const bufferlist& get_lock_data() const { return inode_export; }