Merge PR #36472 into master

* refs/pull/36472/head:
	qa/workunits/fs: add test for subvolume
	mds: don't move inode with nlink > 1 to global snaprealm if it's in subvolume
	mds: disallow hardlink across subvolume
	mds: disallow across subvolume rename
	mds: disallow creating snapshot on descendent directory of subvolume
	mds: add vxattr that marks/clears subvolume flag

Reviewed-by: Patrick Donnelly <pdonnell@redhat.com>
This commit is contained in:
Patrick Donnelly 2020-08-21 16:12:32 -07:00
commit 3228b19bda
No known key found for this signature in database
GPG Key ID: 3A2A7E25BEA8AADB
7 changed files with 198 additions and 37 deletions

View File

@ -0,0 +1,63 @@
#!/bin/sh -x
expect_failure() {
if "$@"; then return 1; else return 0; fi
}
set -e
mkdir group
mkdir group/subvol1
setfattr -n ceph.dir.subvolume -v 1 group/subvol1
# rename subvolume
mv group/subvol1 group/subvol2
# move file out of the subvolume
touch group/subvol2/file1
expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/file1')"
# move file into the subvolume
touch group/file2
expect_failure python3 -c "import os; os.rename('group/file2', 'group/subvol2/file2')"
# create hardlink within subvolume
ln group/subvol2/file1 group/subvol2/file1_
# create hardlink out of subvolume
expect_failure ln group/subvol2/file1 group/file1_
expect_failure ln group/file2 group/subvol1/file2_
# create snapshot at subvolume root
mkdir group/subvol2/.snap/s1
# create snapshot at descendent dir of subvolume
mkdir group/subvol2/dir
expect_failure mkdir group/subvol2/dir/.snap/s2
mkdir group/subvol3
setfattr -n ceph.dir.subvolume -v 1 group/subvol3
# move file across subvolumes
expect_failure python3 -c "import os; os.rename('group/subvol2/file1', 'group/subvol3/file1')"
# create hardlink across subvolumes
expect_failure ln group/subvol2/file1 group/subvol3/file1
# create subvolume inside existing subvolume
expect_failure setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir
# clear subvolume flag
setfattr -n ceph.dir.subvolume -v 0 group/subvol2
mkdir group/subvol2/dir/.snap/s2
# parent subvolume override child subvolume
setfattr -n ceph.dir.subvolume -v 1 group/subvol2/dir
setfattr -n ceph.dir.subvolume -v 1 group/subvol2
expect_failure mkdir group/subvol2/dir/.snap/s3
rmdir group/subvol2/.snap/s1
rmdir group/subvol2/dir/.snap/s2
rm -rf group
echo OK

View File

@ -605,18 +605,20 @@ void CInode::record_snaprealm_past_parent(sr_t *new_snap, SnapRealm *newparent)
}
}
void CInode::record_snaprealm_parent_dentry(sr_t *new_snap, SnapRealm *newparent,
void CInode::record_snaprealm_parent_dentry(sr_t *new_snap, SnapRealm *oldparent,
CDentry *dn, bool primary_dn)
{
ceph_assert(new_snap->is_parent_global());
SnapRealm *oldparent = dn->get_dir()->inode->find_snaprealm();
if (!oldparent)
oldparent = dn->get_dir()->inode->find_snaprealm();
auto& snaps = oldparent->get_snaps();
if (!primary_dn) {
auto p = snaps.lower_bound(dn->first);
if (p != snaps.end())
new_snap->past_parent_snaps.insert(p, snaps.end());
} else if (newparent != oldparent) {
} else {
// 'last_destroyed' is used as 'current_parent_since'
auto p = snaps.lower_bound(new_snap->last_destroyed);
if (p != snaps.end())

View File

@ -4699,15 +4699,17 @@ void Server::handle_client_readdir(MDRequestRef& mdr)
*/
class C_MDS_inode_update_finish : public ServerLogContext {
CInode *in;
bool truncating_smaller, changed_ranges, new_realm;
bool truncating_smaller, changed_ranges, adjust_realm;
public:
C_MDS_inode_update_finish(Server *s, MDRequestRef& r, CInode *i,
bool sm=false, bool cr=false, bool nr=false) :
bool sm=false, bool cr=false, bool ar=false) :
ServerLogContext(s, r), in(i),
truncating_smaller(sm), changed_ranges(cr), new_realm(nr) { }
truncating_smaller(sm), changed_ranges(cr), adjust_realm(ar) { }
void finish(int r) override {
ceph_assert(r == 0);
int snap_op = (in->snaprealm ? CEPH_SNAP_OP_UPDATE : CEPH_SNAP_OP_SPLIT);
// apply
mdr->apply();
@ -4719,10 +4721,9 @@ public:
mds->mdcache->truncate_inode(in, mdr->ls);
}
if (new_realm) {
int op = CEPH_SNAP_OP_SPLIT;
mds->mdcache->send_snap_update(in, 0, op);
mds->mdcache->do_realm_invalidate_and_update_notify(in, op);
if (adjust_realm) {
mds->mdcache->send_snap_update(in, 0, snap_op);
mds->mdcache->do_realm_invalidate_and_update_notify(in, snap_op);
}
get_mds()->balancer->hit_inode(in, META_POP_IWR);
@ -5508,7 +5509,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur)
return;
}
bool new_realm = false;
bool adjust_realm = false;
if (name.compare(0, 15, "ceph.dir.layout") == 0) {
if (!cur->is_dir()) {
respond_to_request(mdr, -EINVAL);
@ -5575,26 +5576,72 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur)
}
if (quota.is_enable() && !cur->get_projected_srnode())
new_realm = true;
adjust_realm = true;
if (!xlock_policylock(mdr, cur, false, new_realm))
if (!xlock_policylock(mdr, cur, false, adjust_realm))
return;
auto pi = cur->project_inode(mdr, false, new_realm);
if (cur->get_projected_inode()->quota == quota) {
respond_to_request(mdr, 0);
return;
}
auto pi = cur->project_inode(mdr, false, adjust_realm);
pi.inode->quota = quota;
if (new_realm) {
SnapRealm *realm = cur->find_snaprealm();
auto seq = realm->get_newest_seq();
auto &newsnap = *pi.snapnode;
newsnap.created = seq;
newsnap.seq = seq;
}
if (adjust_realm)
pi.snapnode->created = pi.snapnode->seq = cur->find_snaprealm()->get_newest_seq();
mdr->no_early_reply = true;
pip = pi.inode.get();
client_t exclude_ct = mdr->get_client();
mdcache->broadcast_quota_to_client(cur, exclude_ct, true);
} else if (name == "ceph.dir.subvolume"sv) {
if (!cur->is_dir()) {
respond_to_request(mdr, -EINVAL);
return;
}
bool val;
try {
val = boost::lexical_cast<bool>(value);
} catch (boost::bad_lexical_cast const&) {
dout(10) << "bad vxattr value, unable to parse bool for " << name << dendl;
respond_to_request(mdr, -EINVAL);
return;
}
if (!xlock_policylock(mdr, cur, false, true))
return;
SnapRealm *realm = cur->find_snaprealm();
if (val) {
inodeno_t subvol_ino = realm->get_subvolume_ino();
// can't create subvolume inside another subvolume
if (subvol_ino && subvol_ino != cur->ino()) {
respond_to_request(mdr, -EINVAL);
return;
}
}
const auto srnode = cur->get_projected_srnode();
if (val == (srnode && srnode->is_subvolume())) {
respond_to_request(mdr, 0);
return;
}
auto pi = cur->project_inode(mdr, false, true);
if (!srnode)
pi.snapnode->created = pi.snapnode->seq = realm->get_newest_seq();
if (val)
pi.snapnode->mark_subvolume();
else
pi.snapnode->clear_subvolume();
mdr->no_early_reply = true;
pip = pi.inode.get();
adjust_realm = true;
} else if (name == "ceph.dir.pin"sv) {
if (!cur->is_dir() || cur->is_root()) {
respond_to_request(mdr, -EINVAL);
@ -5690,7 +5737,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur)
mdcache->journal_dirty_inode(mdr.get(), &le->metablob, cur);
journal_and_reply(mdr, cur, 0, le, new C_MDS_inode_update_finish(this, mdr, cur,
false, false, new_realm));
false, false, adjust_realm));
return;
}
@ -6289,12 +6336,22 @@ void Server::handle_client_link(MDRequestRef& mdr)
return;
}
CInode* target_pin = targeti->get_projected_parent_dir()->inode;
SnapRealm *target_realm = target_pin->find_snaprealm();
if (target_pin != dir->inode &&
target_realm->get_subvolume_ino() !=
dir->inode->find_snaprealm()->get_subvolume_ino()) {
dout(7) << "target is in different subvolume, failing..." << dendl;
respond_to_request(mdr, -EXDEV);
return;
}
// go!
ceph_assert(g_conf()->mds_kill_link_at != 1);
// local or remote?
if (targeti->is_auth())
_link_local(mdr, destdn, targeti);
_link_local(mdr, destdn, targeti, target_realm);
else
_link_remote(mdr, true, destdn, targeti);
mds->balancer->maybe_fragment(dir, false);
@ -6319,7 +6376,7 @@ public:
};
void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti)
void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti, SnapRealm *target_realm)
{
dout(10) << "_link_local " << *dn << " to " << *targeti << dendl;
@ -6339,10 +6396,10 @@ void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti)
pi.inode->version = tipv;
bool adjust_realm = false;
if (!targeti->is_projected_snaprealm_global()) {
if (!target_realm->get_subvolume_ino() && !targeti->is_projected_snaprealm_global()) {
sr_t *newsnap = targeti->project_snaprealm();
targeti->mark_snaprealm_global(newsnap);
targeti->record_snaprealm_parent_dentry(newsnap, NULL, targeti->get_projected_parent_dn(), true);
targeti->record_snaprealm_parent_dentry(newsnap, target_realm, targeti->get_projected_parent_dn(), true);
adjust_realm = true;
}
@ -6594,10 +6651,13 @@ void Server::handle_peer_link_prep(MDRequestRef& mdr)
if (mdr->peer_request->get_op() == MMDSPeerRequest::OP_LINKPREP) {
inc = true;
pi.inode->nlink++;
if (!targeti->is_projected_snaprealm_global()) {
CDentry *target_pdn = targeti->get_projected_parent_dn();
SnapRealm *target_realm = target_pdn->get_dir()->inode->find_snaprealm();
if (!target_realm->get_subvolume_ino() && !targeti->is_projected_snaprealm_global()) {
sr_t *newsnap = targeti->project_snaprealm();
targeti->mark_snaprealm_global(newsnap);
targeti->record_snaprealm_parent_dentry(newsnap, NULL, targeti->get_projected_parent_dn(), true);
targeti->record_snaprealm_parent_dentry(newsnap, target_realm, target_pdn, true);
adjust_realm = true;
realm_projected = true;
}
@ -6977,7 +7037,7 @@ void Server::handle_client_unlink(MDRequestRef& mdr)
if (!mdr->more()->desti_srnode) {
if (in->is_projected_snaprealm_global()) {
sr_t *new_srnode = in->prepare_new_srnode(0);
in->record_snaprealm_parent_dentry(new_srnode, NULL, dn, dnl->is_primary());
in->record_snaprealm_parent_dentry(new_srnode, nullptr, dn, dnl->is_primary());
// dropping the last linkage or dropping the last remote linkage,
// detch the inode from global snaprealm
auto nlink = in->get_projected_inode()->nlink;
@ -7831,6 +7891,21 @@ void Server::handle_client_rename(MDRequestRef& mdr)
}
*/
SnapRealm *dest_realm = nullptr;
SnapRealm *src_realm = nullptr;
if (!linkmerge) {
dest_realm = destdir->inode->find_snaprealm();
if (srcdir->inode == destdir->inode)
src_realm = dest_realm;
else
src_realm = srcdir->inode->find_snaprealm();
if (src_realm != dest_realm &&
src_realm->get_subvolume_ino() != dest_realm->get_subvolume_ino()) {
respond_to_request(mdr, -EXDEV);
return;
}
}
ceph_assert(g_conf()->mds_kill_rename_at != 1);
// -- open all srcdn inode frags, if any --
@ -7861,7 +7936,7 @@ void Server::handle_client_rename(MDRequestRef& mdr)
srci->get_projected_inode()->nlink == 1 &&
srci->is_projected_snaprealm_global()) {
sr_t *new_srnode = srci->prepare_new_srnode(0);
srci->record_snaprealm_parent_dentry(new_srnode, NULL, destdn, false);
srci->record_snaprealm_parent_dentry(new_srnode, nullptr, destdn, false);
srci->clear_snaprealm_global(new_srnode);
mdr->more()->srci_srnode = new_srnode;
@ -7870,7 +7945,7 @@ void Server::handle_client_rename(MDRequestRef& mdr)
if (oldin && !mdr->more()->desti_srnode) {
if (oldin->is_projected_snaprealm_global()) {
sr_t *new_srnode = oldin->prepare_new_srnode(0);
oldin->record_snaprealm_parent_dentry(new_srnode, NULL, destdn, destdnl->is_primary());
oldin->record_snaprealm_parent_dentry(new_srnode, dest_realm, destdn, destdnl->is_primary());
// dropping the last linkage or dropping the last remote linkage,
// detch the inode from global snaprealm
auto nlink = oldin->get_projected_inode()->nlink;
@ -7880,7 +7955,6 @@ void Server::handle_client_rename(MDRequestRef& mdr)
oldin->clear_snaprealm_global(new_srnode);
mdr->more()->desti_srnode = new_srnode;
} else if (destdnl->is_primary()) {
SnapRealm *dest_realm = destdir->inode->find_snaprealm();
snapid_t follows = dest_realm->get_newest_seq();
if (oldin->snaprealm || follows + 1 > oldin->get_oldest_snap()) {
sr_t *new_srnode = oldin->prepare_new_srnode(follows);
@ -7890,13 +7964,11 @@ void Server::handle_client_rename(MDRequestRef& mdr)
}
}
if (!mdr->more()->srci_srnode) {
SnapRealm *dest_realm = destdir->inode->find_snaprealm();
if (srci->is_projected_snaprealm_global()) {
sr_t *new_srnode = srci->prepare_new_srnode(0);
srci->record_snaprealm_parent_dentry(new_srnode, dest_realm, srcdn, srcdnl->is_primary());
srci->record_snaprealm_parent_dentry(new_srnode, src_realm, srcdn, srcdnl->is_primary());
mdr->more()->srci_srnode = new_srnode;
} else if (srcdnl->is_primary()) {
SnapRealm *src_realm = srcdir->inode->find_snaprealm();
snapid_t follows = src_realm->get_newest_seq();
if (src_realm != dest_realm &&
(srci->snaprealm || follows + 1 > srci->get_oldest_snap())) {
@ -9890,6 +9962,12 @@ void Server::handle_client_mksnap(MDRequestRef& mdr)
if (!check_access(mdr, diri, MAY_WRITE|MAY_SNAPSHOT))
return;
if (inodeno_t subvol_ino = diri->find_snaprealm()->get_subvolume_ino();
(subvol_ino && subvol_ino != diri->ino())) {
respond_to_request(mdr, -EPERM);
return;
}
// check if we can create any more snapshots
// we don't allow any more if we are already at or beyond the limit
if (diri->snaprealm &&

View File

@ -234,7 +234,7 @@ public:
// link
void handle_client_link(MDRequestRef& mdr);
void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti);
void _link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti, SnapRealm *target_realm);
void _link_local_finish(MDRequestRef& mdr, CDentry *dn, CInode *targeti,
version_t, version_t, bool);

View File

@ -332,6 +332,13 @@ void SnapRealm::check_cache() const
cached_seq = seq;
cached_last_created = last_created;
cached_last_destroyed = last_destroyed;
cached_subvolume_ino = 0;
if (parent)
cached_subvolume_ino = parent->get_subvolume_ino();
if (!cached_subvolume_ino && srnode.is_subvolume())
cached_subvolume_ino = inode->ino();
build_snap_set();
build_snap_trace();

View File

@ -100,6 +100,11 @@ public:
return (p != s.end() && *p <= last);
}
inodeno_t get_subvolume_ino() {
check_cache();
return cached_subvolume_ino;
}
void adjust_parent();
void split_at(SnapRealm *child);
@ -154,6 +159,7 @@ private:
mutable std::set<snapid_t> cached_snaps;
mutable SnapContext cached_snap_context;
mutable ceph::buffer::list cached_snap_trace;
mutable inodeno_t cached_subvolume_ino = 0;
};
std::ostream& operator<<(std::ostream& out, const SnapRealm &realm);

View File

@ -74,6 +74,10 @@ struct sr_t {
void clear_parent_global() { flags &= ~PARENT_GLOBAL; }
bool is_parent_global() const { return flags & PARENT_GLOBAL; }
void mark_subvolume() { flags |= SUBVOLUME; }
void clear_subvolume() { flags &= ~SUBVOLUME; }
bool is_subvolume() const { return flags & SUBVOLUME; }
void encode(ceph::buffer::list &bl) const;
void decode(ceph::buffer::list::const_iterator &bl);
void dump(ceph::Formatter *f) const;
@ -90,7 +94,8 @@ struct sr_t {
__u32 flags = 0;
enum {
PARENT_GLOBAL = 1 << 0,
PARENT_GLOBAL = 1 << 0,
SUBVOLUME = 1 << 1,
};
};
WRITE_CLASS_ENCODER(sr_t)