mgr/rbd_support: avoid wedging the task queue if pool is removed

rados.ObjectNotFound exception handler was referencing ioctx variable
which is assigned only if the pool exists and rados.open_ioctx() call
succeeds.  This lead to a fatal error

  mgr[rbd_support] Failed to locate pool mypool
  mgr[rbd_support] execute_task: [errno 2] error opening pool 'b'mypool''
  mgr[rbd_support] Fatal runtime error: local variable 'ioctx' referenced before assignment

and wedged the task queue.  No other commands were processed until
ceph-mgr daemon restart.

Fixes: https://tracker.ceph.com/issues/52932
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
Ilya Dryomov 2022-11-23 18:10:03 +01:00
parent 6f0d368793
commit 5a425927ed
2 changed files with 61 additions and 10 deletions

View File

@ -1459,6 +1459,55 @@ test_mirror_pool_peer_bootstrap_create() {
ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
}
test_tasks_removed_pool() {
echo "testing removing pool under running tasks..."
remove_images
ceph osd pool create rbd2 8
rbd pool init rbd2
rbd create $RBD_CREATE_ARGS --size 1G foo
rbd snap create foo@snap
rbd snap protect foo@snap
rbd clone foo@snap bar
rbd create $RBD_CREATE_ARGS --size 1G rbd2/dummy
rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/dummy
rbd snap create rbd2/dummy@snap
rbd snap protect rbd2/dummy@snap
for i in {1..5}; do
rbd clone rbd2/dummy@snap rbd2/dummy$i
done
# queue flattens on a few dummy images and remove that pool
test "$(ceph rbd task list)" = "[]"
for i in {1..5}; do
ceph rbd task add flatten rbd2/dummy$i
done
ceph osd pool delete rbd2 rbd2 --yes-i-really-really-mean-it
test "$(ceph rbd task list)" != "[]"
# queue flatten on another image and check that it completes
rbd info bar | grep 'parent: '
expect_fail rbd snap unprotect foo@snap
ceph rbd task add flatten bar
for i in {1..12}; do
rbd info bar | grep 'parent: ' || break
sleep 10
done
rbd info bar | expect_fail grep 'parent: '
rbd snap unprotect foo@snap
# check that flattens disrupted by pool removal are cleaned up
for i in {1..12}; do
test "$(ceph rbd task list)" = "[]" && break
sleep 10
done
test "$(ceph rbd task list)" = "[]"
remove_images
}
test_pool_image_args
test_rename
test_ls
@ -1483,5 +1532,6 @@ test_trash_purge_schedule
test_mirror_snapshot_schedule
test_perf_image_iostat
test_mirror_pool_peer_bootstrap_create
test_tasks_removed_pool
echo OK

View File

@ -348,17 +348,18 @@ class TaskHandler:
return task_json
def remove_task(self,
ioctx: rados.Ioctx,
ioctx: Optional[rados.Ioctx],
task: Task,
remove_in_memory: bool = True) -> None:
self.log.info("remove_task: task={}".format(str(task)))
omap_keys = (task.sequence_key, )
try:
with rados.WriteOpCtx() as write_op:
ioctx.remove_omap_keys(write_op, omap_keys)
ioctx.operate_write_op(write_op, RBD_TASK_OID)
except rados.ObjectNotFound:
pass
if ioctx:
try:
with rados.WriteOpCtx() as write_op:
omap_keys = (task.sequence_key, )
ioctx.remove_omap_keys(write_op, omap_keys)
ioctx.operate_write_op(write_op, RBD_TASK_OID)
except rados.ObjectNotFound:
pass
if remove_in_memory:
try:
@ -422,9 +423,9 @@ class TaskHandler:
task.retry_message = "{}".format(e)
self.update_progress(task, 0)
else:
# pool DNE -- remove the task
# pool DNE -- remove in-memory task
self.complete_progress(task)
self.remove_task(ioctx, task)
self.remove_task(None, task)
except (rados.Error, rbd.Error) as e:
self.log.error("execute_task: {}".format(e))