mgr/volumes: Fix subvoume snapshot clone failure

Problem:
The subvolume snapshot clone fails if the quota on the source
has exceeded. Since the quota is not strictly enforced at the
byte range, this is a possibility.

Cause:
The quota on the clone is set prior to copying the data
from the source. Hence the quota mostly get enforced before
copying the entire data from the source resulting in the
clone failure.

Solution:
Enforce quota on the clone after the data is copied.

Fixes: https://tracker.ceph.com/issues/53848
Signed-off-by: Kotresh HR <khiremat@redhat.com>
This commit is contained in:
Kotresh HR 2022-01-12 15:01:53 +05:30
parent 29ad638773
commit 18b85c53af
3 changed files with 30 additions and 0 deletions

View File

@ -4,6 +4,7 @@ import time
import errno import errno
import logging import logging
from contextlib import contextmanager from contextlib import contextmanager
from typing import Dict, Union
import cephfs import cephfs
from mgr_util import lock_timeout_log from mgr_util import lock_timeout_log
@ -185,12 +186,27 @@ def bulk_copy(fs_handle, source_path, dst_path, should_cancel):
if should_cancel(): if should_cancel():
raise VolumeException(-errno.EINTR, "clone operation interrupted") raise VolumeException(-errno.EINTR, "clone operation interrupted")
def set_quota_on_clone(fs_handle, clone_volumes_pair):
attrs = {} # type: Dict[str, Union[int, str, None]]
src_path = clone_volumes_pair[1].snapshot_data_path(clone_volumes_pair[2])
dst_path = clone_volumes_pair[0].path
try:
attrs["quota"] = int(fs_handle.getxattr(src_path,
'ceph.quota.max_bytes'
).decode('utf-8'))
except cephfs.NoData:
attrs["quota"] = None
if attrs["quota"] is not None:
clone_volumes_pair[0].set_attrs(dst_path, attrs)
def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel): def do_clone(fs_client, volspec, volname, groupname, subvolname, should_cancel):
with open_volume_lockless(fs_client, volname) as fs_handle: with open_volume_lockless(fs_client, volname) as fs_handle:
with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes: with open_clone_subvolume_pair(fs_client, fs_handle, volspec, volname, groupname, subvolname) as clone_volumes:
src_path = clone_volumes[1].snapshot_data_path(clone_volumes[2]) src_path = clone_volumes[1].snapshot_data_path(clone_volumes[2])
dst_path = clone_volumes[0].path dst_path = clone_volumes[0].path
bulk_copy(fs_handle, src_path, dst_path, should_cancel) bulk_copy(fs_handle, src_path, dst_path, should_cancel)
set_quota_on_clone(fs_handle, clone_volumes)
def log_clone_failure(volname, groupname, subvolname, ve): def log_clone_failure(volname, groupname, subvolname, ve):
if ve.errno == -errno.EINTR: if ve.errno == -errno.EINTR:

View File

@ -152,6 +152,13 @@ class SubvolumeV1(SubvolumeBase, SubvolumeTemplate):
# attributes of subvolume's content though, are synced during the cloning process. # attributes of subvolume's content though, are synced during the cloning process.
attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname)) attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname))
# The source of the clone may have exceeded its quota limit as
# CephFS quotas are imprecise. Cloning such a source may fail if
# the quota on the destination is set before starting the clone
# copy. So always set the quota on destination after cloning is
# successful.
attrs["quota"] = None
# override snapshot pool setting, if one is provided for the clone # override snapshot pool setting, if one is provided for the clone
if pool is not None: if pool is not None:
attrs["data_pool"] = pool attrs["data_pool"] = pool

View File

@ -219,6 +219,13 @@ class SubvolumeV2(SubvolumeV1):
# attributes of subvolume's content though, are synced during the cloning process. # attributes of subvolume's content though, are synced during the cloning process.
attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname)) attrs = source_subvolume.get_attrs(source_subvolume.snapshot_data_path(snapname))
# The source of the clone may have exceeded its quota limit as
# CephFS quotas are imprecise. Cloning such a source may fail if
# the quota on the destination is set before starting the clone
# copy. So always set the quota on destination after cloning is
# successful.
attrs["quota"] = None
# override snapshot pool setting, if one is provided for the clone # override snapshot pool setting, if one is provided for the clone
if pool is not None: if pool is not None:
attrs["data_pool"] = pool attrs["data_pool"] = pool