From 26837134f941c976c3af1cd2ea9719c43c7af6c1 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 8 Dec 2020 18:33:10 -0800 Subject: [PATCH 1/3] mon,mds: mark multifs stable Fixes: https://tracker.ceph.com/issues/22477 Signed-off-by: Patrick Donnelly --- src/mds/FSMap.h | 4 ++-- src/mon/FSCommands.cc | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/mds/FSMap.h b/src/mds/FSMap.h index dc9fac4a50c..8fa7e5de676 100644 --- a/src/mds/FSMap.h +++ b/src/mds/FSMap.h @@ -576,8 +576,8 @@ protected: uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1; fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE; CompatSet compat; - bool enable_multiple = false; - bool ever_enabled_multiple = false; // < the cluster had multiple MDSes enabled once + bool enable_multiple = true; + bool ever_enabled_multiple = true; // < the cluster had multiple FS enabled once std::map filesystems; diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 0f2bb7e75ee..058148a95fa 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -47,13 +47,6 @@ using ceph::make_message; using ceph::mono_clock; using ceph::mono_time; -static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental." -"It may cause problems up to and including data loss." -"Consult the documentation at ceph.com, and if unsure, do not proceed." -"Add --yes-i-really-mean-it if you are certain."); - - - class FlagSetHandler : public FileSystemCommandHandler { public: @@ -86,9 +79,6 @@ class FlagSetHandler : public FileSystemCommandHandler return r; } - if (!sure) { - ss << EXPERIMENTAL_WARNING; - } fsmap.set_enable_multiple(flag_bool); return 0; } else { From 4bd9ef09652692267b3069fb3337a7da89872717 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 8 Dec 2020 18:38:15 -0800 Subject: [PATCH 2/3] qa: update tests concerning setting of multifs Signed-off-by: Patrick Donnelly --- qa/tasks/cephfs/cephfs_test_case.py | 1 + qa/tasks/cephfs/filesystem.py | 4 ---- qa/tasks/cephfs/test_failover.py | 2 ++ qa/tasks/cephfs/test_multifs_auth.py | 3 ++- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index 3905c029349..3c5a23d3ce7 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -175,6 +175,7 @@ class CephFSTestCase(CephTestCase): if self.REQUIRE_RECOVERY_FILESYSTEM: if not self.REQUIRE_FILESYSTEM: self.skipTest("Recovery filesystem requires a primary filesystem as well") + # After Octopus is EOL, we can remove this setting: self.fs.mon_manager.raw_cluster_cmd('fs', 'flag', 'set', 'enable_multiple', 'true', '--yes-i-really-mean-it') diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 9705dbe3cf7..e3e2f5b6e6b 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -1506,10 +1506,6 @@ class Filesystem(MDSCluster): def is_full(self): return self.is_pool_full(self.get_data_pool_name()) - def enable_multifs(self): - self.mon_manager.raw_cluster_cmd('fs', 'flag', 'set', - 'enable_multiple', 'true', '--yes-i-really-mean-it') - def authorize(self, client_id, caps=('/', 'rw')): """ Run "ceph fs authorize" and run "ceph auth get" to get and returnt the diff --git a/qa/tasks/cephfs/test_failover.py b/qa/tasks/cephfs/test_failover.py index 3712e5c10f6..42e2e1417c4 100644 --- a/qa/tasks/cephfs/test_failover.py +++ b/qa/tasks/cephfs/test_failover.py @@ -103,6 +103,7 @@ class TestClusterAffinity(CephFSTestCase): """ That a vanilla standby is preferred over others with mds_join_fs set to another fs. """ + # After Octopus is EOL, we can remove this setting: self.fs.set_allow_multifs() fs2 = self.mds_cluster.newfs(name="cephfs2") status, target = self._verify_init() @@ -128,6 +129,7 @@ class TestClusterAffinity(CephFSTestCase): standbys = [info['name'] for info in status.get_standbys()] for mds in standbys: self.config_set('mds.'+mds, 'mds_join_fs', 'cephfs2') + # After Octopus is EOL, we can remove this setting: self.fs.set_allow_multifs() fs2 = self.mds_cluster.newfs(name="cephfs2") for mds in standbys: diff --git a/qa/tasks/cephfs/test_multifs_auth.py b/qa/tasks/cephfs/test_multifs_auth.py index 2f21cf654d2..b247dd8f51b 100644 --- a/qa/tasks/cephfs/test_multifs_auth.py +++ b/qa/tasks/cephfs/test_multifs_auth.py @@ -29,7 +29,8 @@ class TestMultiFS(CapsHelper): self.run_cluster_cmd(f'auth rm {self.client_name}') self.fs1 = self.fs - self.fs1.enable_multifs() + # After Octopus is EOL, we can remove this setting: + self.fs1.set_allow_multifs() self.fs2 = self.mds_cluster.newfs(name='cephfs2', create=True) # we'll reassign caps to client.1 so that it can operate with cephfs2 From 0376ce721a1854a66b4ac6547cc3ac2b655dfa9f Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Tue, 8 Dec 2020 18:57:44 -0800 Subject: [PATCH 3/3] doc: add documentation for new multifs feature Signed-off-by: Patrick Donnelly --- PendingReleaseNotes | 5 ++ doc/cephfs/client-auth.rst | 2 + doc/cephfs/experimental-features.rst | 68 ---------------------------- doc/cephfs/index.rst | 3 +- doc/cephfs/multifs.rst | 54 ++++++++++++++++++++++ 5 files changed, 63 insertions(+), 69 deletions(-) create mode 100644 doc/cephfs/multifs.rst diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 3db77906e1f..af1c10f1843 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -26,6 +26,11 @@ ``osd_scrub_end_week_day`` are 0 - 6. The use of 7 is now illegal. Specifying ``0`` for both values causes every day of the week to be allowed. +* Multiple file systems in a single Ceph cluster is now stable. New Ceph clusters + enable support for multiple file systems by default. Existing clusters + must still set the "enable_multiple" flag on the fs. Please see the CephFS + documentation for more information. + * A new health check, DAEMON_OLD_VERSION, will warn if different versions of Ceph are running on daemons. It will generate a health error if multiple versions are detected. This condition must exist for over mon_warn_older_version_delay (set to 1 week by default) in order for the diff --git a/doc/cephfs/client-auth.rst b/doc/cephfs/client-auth.rst index 3e8a791d3f5..fd0faa83963 100644 --- a/doc/cephfs/client-auth.rst +++ b/doc/cephfs/client-auth.rst @@ -149,6 +149,8 @@ prefix length in CIDR notation (e.g., ``10.3.0.0/16``). If present, the use of this capability is restricted to clients connecting from this network. +.. _fs-authorize-multifs: + File system Information Restriction =================================== diff --git a/doc/cephfs/experimental-features.rst b/doc/cephfs/experimental-features.rst index 74aa6917d2e..ba60d12c79a 100644 --- a/doc/cephfs/experimental-features.rst +++ b/doc/cephfs/experimental-features.rst @@ -35,76 +35,8 @@ protect the mechanisms for balancing load (migration, replication, fragmentation) but stub out the balancing policies using Lua. For details, see :doc:`/cephfs/mantle`. -Snapshots ---------- -Like multiple active MDSes, CephFS is designed from the ground up to support -snapshotting of arbitrary directories. There are no known bugs at the time of -writing, but there is insufficient testing to provide stability guarantees and -every expansion of testing has generally revealed new issues. If you do enable -snapshots and experience failure, manual intervention will be needed. - -Snapshots are known not to work properly with multiple file systems (below) in -some cases. Specifically, if you share a pool for multiple FSes and delete -a snapshot in one FS, expect to lose snapshotted file data in any other FS using -snapshots. See the :doc:`/dev/cephfs-snapshots` page for more information. - -For somewhat obscure implementation reasons, the kernel client only supports up -to 400 snapshots (http://tracker.ceph.com/issues/21420). - -Snapshotting was blocked off with the ``allow_new_snaps`` flag prior to Mimic. - -Multiple File Systems within a Ceph Cluster -------------------------------------------- -Code was merged prior to the Jewel release which enables administrators -to create multiple independent CephFS file systems within a single Ceph cluster. -These independent file systems have their own set of active MDSes, cluster maps, -and data. But the feature required extensive changes to data structures which -are not yet fully qualified, and has security implications which are not all -apparent nor resolved. - -There are no known bugs, but any failures which do result from having multiple -active file systems in your cluster will require manual intervention and, so -far, will not have been experienced by anybody else -- knowledgeable help will -be extremely limited. You also probably do not have the security or isolation -guarantees you want or think you have upon doing so. - -Note that snapshots and multiple file systems are *not* tested in combination -and may not work together; see above. - -Multiple file systems were available starting in the Jewel release candidates -but must be turned on via the ``enable_multiple`` flag until declared stable. - LazyIO ------ LazyIO relaxes POSIX semantics. Buffered reads/writes are allowed even when a file is opened by multiple applications on multiple clients. Applications are responsible for managing cache coherency themselves. - -Previously experimental features -================================ - -Directory Fragmentation ------------------------ - -Directory fragmentation was considered experimental prior to the *Luminous* -(12.2.x). It is now enabled by default on new file systems. To enable -directory fragmentation on file systems created with older versions of Ceph, -set the ``allow_dirfrags`` flag on the file system:: - - ceph fs set allow_dirfrags 1 - -Multiple active metadata servers --------------------------------- - -Prior to the *Luminous* (12.2.x) release, running multiple active metadata -servers within a single file system was considered experimental. Creating -multiple active metadata servers is now permitted by default on new -file systems. - -File Systems created with older versions of Ceph still require explicitly -enabling multiple active metadata servers as follows:: - - ceph fs set allow_multimds 1 - -Note that the default size of the active mds cluster (``max_mds``) is -still set to 1 initially. diff --git a/doc/cephfs/index.rst b/doc/cephfs/index.rst index 7784b1f2ee8..6064321b7ce 100644 --- a/doc/cephfs/index.rst +++ b/doc/cephfs/index.rst @@ -77,7 +77,8 @@ Administration Create a CephFS file system Administrative commands - Provision/Add/Remove MDS(s) + Creating Multiple File Systems + Provision/Add/Remove MDS(s) MDS failover and standby configuration MDS Cache Configuration MDS Configuration Settings diff --git a/doc/cephfs/multifs.rst b/doc/cephfs/multifs.rst new file mode 100644 index 00000000000..2dcba7ae006 --- /dev/null +++ b/doc/cephfs/multifs.rst @@ -0,0 +1,54 @@ +.. _cephfs-multifs: + +Multiple Ceph File Systems +========================== + + +Beginning with the Pacific release, multiple file system support is stable +and ready to use. This functionality allows configuring separate file systems +with full data separation on separate pools. + +Existing clusters must set a flag to enable multiple file systems:: + + ceph fs flag set enable_multiple true + +New Ceph clusters automatically set this. + + +Creating a new Ceph File System +------------------------------- + +The new ``volumes`` plugin interface (see: :doc:`/cephfs/fs-volumes`) automates +most of the work of configuring a new file system. The "volume" concept is +simply a new file system. This can be done via:: + + ceph fs volume create + +Ceph will create the new pools and automate the deployment of new MDS to +support the new file system. The deployment technology used, e.g. cephadm, will +also configure the MDS affinity (see: :ref:`mds-join-fs`) of new MDS daemons to +operate the new file system. + + +Securing access +--------------- + +The ``fs authorize`` command allows configuring the client's access to a +particular file system. See also in :ref:`fs-authorize-multifs`. The client will +only have visibility of authorized file systems and the Monitors/MDS will +reject access to clients without authorization. + + +Other Notes +----------- + +Multiple file systems do not share pools. This is particularly important for +snapshots but also because no measures are in place to prevent duplicate +inodes. The Ceph commands prevent this dangerous configuration. + +Each file system has its own set of MDS ranks. Consequently, each new file +system requires more MDS daemons to operate and increases operational costs. +This can be useful for increasing metadata throughput by application or user +base but also adds cost to the creation of a file system. Generally, a single +file system with subtree pinning is a better choice for isolating load between +applications.