mirror of
https://github.com/ceph/ceph
synced 2024-12-29 23:12:27 +00:00
mgr/rbd_support: avoid wedging the task queue if pool is removed
rados.ObjectNotFound exception handler was referencing ioctx variable which is assigned only if the pool exists and rados.open_ioctx() call succeeds. This lead to a fatal error mgr[rbd_support] Failed to locate pool mypool mgr[rbd_support] execute_task: [errno 2] error opening pool 'b'mypool'' mgr[rbd_support] Fatal runtime error: local variable 'ioctx' referenced before assignment and wedged the task queue. No other commands were processed until ceph-mgr daemon restart. Fixes: https://tracker.ceph.com/issues/52932 Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
6f0d368793
commit
5a425927ed
@ -1459,6 +1459,55 @@ test_mirror_pool_peer_bootstrap_create() {
|
||||
ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
|
||||
}
|
||||
|
||||
test_tasks_removed_pool() {
|
||||
echo "testing removing pool under running tasks..."
|
||||
remove_images
|
||||
|
||||
ceph osd pool create rbd2 8
|
||||
rbd pool init rbd2
|
||||
|
||||
rbd create $RBD_CREATE_ARGS --size 1G foo
|
||||
rbd snap create foo@snap
|
||||
rbd snap protect foo@snap
|
||||
rbd clone foo@snap bar
|
||||
|
||||
rbd create $RBD_CREATE_ARGS --size 1G rbd2/dummy
|
||||
rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/dummy
|
||||
rbd snap create rbd2/dummy@snap
|
||||
rbd snap protect rbd2/dummy@snap
|
||||
for i in {1..5}; do
|
||||
rbd clone rbd2/dummy@snap rbd2/dummy$i
|
||||
done
|
||||
|
||||
# queue flattens on a few dummy images and remove that pool
|
||||
test "$(ceph rbd task list)" = "[]"
|
||||
for i in {1..5}; do
|
||||
ceph rbd task add flatten rbd2/dummy$i
|
||||
done
|
||||
ceph osd pool delete rbd2 rbd2 --yes-i-really-really-mean-it
|
||||
test "$(ceph rbd task list)" != "[]"
|
||||
|
||||
# queue flatten on another image and check that it completes
|
||||
rbd info bar | grep 'parent: '
|
||||
expect_fail rbd snap unprotect foo@snap
|
||||
ceph rbd task add flatten bar
|
||||
for i in {1..12}; do
|
||||
rbd info bar | grep 'parent: ' || break
|
||||
sleep 10
|
||||
done
|
||||
rbd info bar | expect_fail grep 'parent: '
|
||||
rbd snap unprotect foo@snap
|
||||
|
||||
# check that flattens disrupted by pool removal are cleaned up
|
||||
for i in {1..12}; do
|
||||
test "$(ceph rbd task list)" = "[]" && break
|
||||
sleep 10
|
||||
done
|
||||
test "$(ceph rbd task list)" = "[]"
|
||||
|
||||
remove_images
|
||||
}
|
||||
|
||||
test_pool_image_args
|
||||
test_rename
|
||||
test_ls
|
||||
@ -1483,5 +1532,6 @@ test_trash_purge_schedule
|
||||
test_mirror_snapshot_schedule
|
||||
test_perf_image_iostat
|
||||
test_mirror_pool_peer_bootstrap_create
|
||||
test_tasks_removed_pool
|
||||
|
||||
echo OK
|
||||
|
@ -348,17 +348,18 @@ class TaskHandler:
|
||||
return task_json
|
||||
|
||||
def remove_task(self,
|
||||
ioctx: rados.Ioctx,
|
||||
ioctx: Optional[rados.Ioctx],
|
||||
task: Task,
|
||||
remove_in_memory: bool = True) -> None:
|
||||
self.log.info("remove_task: task={}".format(str(task)))
|
||||
omap_keys = (task.sequence_key, )
|
||||
try:
|
||||
with rados.WriteOpCtx() as write_op:
|
||||
ioctx.remove_omap_keys(write_op, omap_keys)
|
||||
ioctx.operate_write_op(write_op, RBD_TASK_OID)
|
||||
except rados.ObjectNotFound:
|
||||
pass
|
||||
if ioctx:
|
||||
try:
|
||||
with rados.WriteOpCtx() as write_op:
|
||||
omap_keys = (task.sequence_key, )
|
||||
ioctx.remove_omap_keys(write_op, omap_keys)
|
||||
ioctx.operate_write_op(write_op, RBD_TASK_OID)
|
||||
except rados.ObjectNotFound:
|
||||
pass
|
||||
|
||||
if remove_in_memory:
|
||||
try:
|
||||
@ -422,9 +423,9 @@ class TaskHandler:
|
||||
task.retry_message = "{}".format(e)
|
||||
self.update_progress(task, 0)
|
||||
else:
|
||||
# pool DNE -- remove the task
|
||||
# pool DNE -- remove in-memory task
|
||||
self.complete_progress(task)
|
||||
self.remove_task(ioctx, task)
|
||||
self.remove_task(None, task)
|
||||
|
||||
except (rados.Error, rbd.Error) as e:
|
||||
self.log.error("execute_task: {}".format(e))
|
||||
|
Loading…
Reference in New Issue
Block a user