From b9edab80f048fee09b82cdd4ec58fa37bd937ded Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 15 Jul 2022 17:13:37 +0800 Subject: [PATCH 1/4] client: move a client's option to mds-client.yaml mds_max_retries_on_remount_failure option is used by Client.cc only. Fixes: https://tracker.ceph.com/issues/56532 Signed-off-by: Xiubo Li --- PendingReleaseNotes | 4 ++++ src/client/Client.cc | 2 +- src/common/options/mds-client.yaml.in | 8 ++++++++ src/common/options/mds.yaml.in | 8 -------- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index b6a18ff9a99..97e47148abb 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -100,6 +100,10 @@ potentially different from that of the parent image. The efficient copy-on-write semantics intrinsic to unformatted (regular) cloned images are retained. +* CEPHFS: Rename the `mds_max_retries_on_remount_failure` option to + `client_max_retries_on_remount_failure` and move it from mds.yaml.in to + mds-client.yaml.in because this option was only used by MDS client from its + birth. >=17.2.1 diff --git a/src/client/Client.cc b/src/client/Client.cc index 4c180ef754d..428fe83526a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -4564,7 +4564,7 @@ void Client::remove_session_caps(MetaSession *s, int err) std::pair Client::_do_remount(bool retry_on_error) { - uint64_t max_retries = cct->_conf.get_val("mds_max_retries_on_remount_failure"); + uint64_t max_retries = cct->_conf.get_val("client_max_retries_on_remount_failure"); bool abort_on_failure = false; errno = 0; diff --git a/src/common/options/mds-client.yaml.in b/src/common/options/mds-client.yaml.in index 7f96fa30d24..4e599d4cfa8 100644 --- a/src/common/options/mds-client.yaml.in +++ b/src/common/options/mds-client.yaml.in @@ -453,6 +453,14 @@ options: services: - mds_client with_legacy: true +- name: client_max_retries_on_remount_failure + type: uint + level: advanced + desc: number of consecutive failed remount attempts for invalidating kernel dcache + after which client would abort. + default: 5 + services: + - mds_client - name: client_die_on_failed_remount type: bool level: dev diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index f22d867cfea..8d54b851a51 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1343,14 +1343,6 @@ options: default: 0 services: - mds -- name: mds_max_retries_on_remount_failure - type: uint - level: advanced - desc: number of consecutive failed remount attempts for invalidating kernel dcache - after which client would abort. - default: 5 - services: - - mds - name: mds_dump_cache_threshold_formatter type: size level: dev From f6a9bd6a69362527502eba4906e2605f3474007f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 15 Jul 2022 17:41:43 +0800 Subject: [PATCH 2/4] ceph_fuse: retry the test_dentry_handling if fails For some unknown reason the remount will fail, we need to retry it for the test_dentry_handling. Fixes: https://tracker.ceph.com/issues/56532 Signed-off-by: Xiubo Li --- src/ceph_fuse.cc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/ceph_fuse.cc b/src/ceph_fuse.cc index 115856f381c..aae41269403 100644 --- a/src/ceph_fuse.cc +++ b/src/ceph_fuse.cc @@ -202,10 +202,21 @@ int main(int argc, const char **argv, const char *envp[]) { ceph_assert(ver != 0); bool client_try_dentry_invalidate = g_conf().get_val( "client_try_dentry_invalidate"); - bool can_invalidate_dentries = + bool can_invalidate_dentries = client_try_dentry_invalidate && ver < KERNEL_VERSION(3, 18, 0); - auto test_result = client->test_dentry_handling(can_invalidate_dentries); - int tr = test_result.first; + uint64_t max_retries = g_conf().get_val( + "client_max_retries_on_remount_failure"); + std::pair test_result; + uint64_t i = 0; + int tr = 0; + do { + test_result = client->test_dentry_handling(can_invalidate_dentries); + tr = test_result.first; + if (tr) { + sleep(1); + } + } while (++i < max_retries && tr); + bool abort_on_failure = test_result.second; bool client_die_on_failed_dentry_invalidate = g_conf().get_val( "client_die_on_failed_dentry_invalidate"); From f9c78fe72f3d7c02e927f71e0fbd841605c42708 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 19 Oct 2022 16:44:04 +0800 Subject: [PATCH 3/4] ceph_fuse: make it to force invalidating dentries when kernel >=3.18 The remount will fail randomly for unknown reasons. And in certain circumstance we can reprodce this very easy, which will block our testing. Make it posible to force to old method to invalidate the dcache when the "client_try_dentry_invalidate" option is enabled even kernel version >= 3.18.0 Fixes: https://tracker.ceph.com/issues/56532 Signed-off-by: Xiubo Li --- src/ceph_fuse.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/ceph_fuse.cc b/src/ceph_fuse.cc index aae41269403..3fa5346b463 100644 --- a/src/ceph_fuse.cc +++ b/src/ceph_fuse.cc @@ -198,12 +198,8 @@ int main(int argc, const char **argv, const char *envp[]) { ~RemountTest() override {} void *entry() override { #if defined(__linux__) - int ver = get_linux_version(); - ceph_assert(ver != 0); - bool client_try_dentry_invalidate = g_conf().get_val( - "client_try_dentry_invalidate"); - bool can_invalidate_dentries = - client_try_dentry_invalidate && ver < KERNEL_VERSION(3, 18, 0); + bool can_invalidate_dentries = g_conf().get_val( + "client_try_dentry_invalidate"); uint64_t max_retries = g_conf().get_val( "client_max_retries_on_remount_failure"); std::pair test_result; From b9b25a54414b60f5da841f3d16b2e267b670ec5b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 20 Oct 2022 12:16:55 +0800 Subject: [PATCH 4/4] client: switch to old method to invalidate dcache if euid != 0 Force to use the old and slow method to invalidate the dcache if the euid is non-root, or the remount may fail with return code 1 or 32. https://tracker.ceph.com/issues/56532 Signed-off-by: Xiubo Li --- src/client/Client.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/client/Client.cc b/src/client/Client.cc index 428fe83526a..a7211d5af7e 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -11705,6 +11705,17 @@ std::pair Client::test_dentry_handling(bool can_invalidate) can_invalidate_dentries = can_invalidate; + /* + * Force to use the old and slow method to invalidate the dcache + * if the euid is non-root, or the remount may fail with return + * code 1 or 32. + */ + uid_t euid = geteuid(); + ldout(cct, 10) << "euid: " << euid << dendl; + if (euid != 0) { + can_invalidate_dentries = true; + } + if (can_invalidate_dentries) { ceph_assert(dentry_invalidate_cb); ldout(cct, 1) << "using dentry_invalidate_cb" << dendl;