mirror of
https://github.com/ceph/ceph
synced 2025-01-02 00:52:22 +00:00
mgr/rbd_support: avoid wedging the task queue if pool is removed
rados.ObjectNotFound exception handler was referencing ioctx variable which is assigned only if the pool exists and rados.open_ioctx() call succeeds. This lead to a fatal error mgr[rbd_support] Failed to locate pool mypool mgr[rbd_support] execute_task: [errno 2] error opening pool 'b'mypool'' mgr[rbd_support] Fatal runtime error: local variable 'ioctx' referenced before assignment and wedged the task queue. No other commands were processed until ceph-mgr daemon restart. Fixes: https://tracker.ceph.com/issues/52932 Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
6f0d368793
commit
5a425927ed
@ -1459,6 +1459,55 @@ test_mirror_pool_peer_bootstrap_create() {
|
|||||||
ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
|
ceph osd pool rm rbd1 rbd1 --yes-i-really-really-mean-it
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test_tasks_removed_pool() {
|
||||||
|
echo "testing removing pool under running tasks..."
|
||||||
|
remove_images
|
||||||
|
|
||||||
|
ceph osd pool create rbd2 8
|
||||||
|
rbd pool init rbd2
|
||||||
|
|
||||||
|
rbd create $RBD_CREATE_ARGS --size 1G foo
|
||||||
|
rbd snap create foo@snap
|
||||||
|
rbd snap protect foo@snap
|
||||||
|
rbd clone foo@snap bar
|
||||||
|
|
||||||
|
rbd create $RBD_CREATE_ARGS --size 1G rbd2/dummy
|
||||||
|
rbd bench --io-type write --io-pattern seq --io-size 1M --io-total 1G rbd2/dummy
|
||||||
|
rbd snap create rbd2/dummy@snap
|
||||||
|
rbd snap protect rbd2/dummy@snap
|
||||||
|
for i in {1..5}; do
|
||||||
|
rbd clone rbd2/dummy@snap rbd2/dummy$i
|
||||||
|
done
|
||||||
|
|
||||||
|
# queue flattens on a few dummy images and remove that pool
|
||||||
|
test "$(ceph rbd task list)" = "[]"
|
||||||
|
for i in {1..5}; do
|
||||||
|
ceph rbd task add flatten rbd2/dummy$i
|
||||||
|
done
|
||||||
|
ceph osd pool delete rbd2 rbd2 --yes-i-really-really-mean-it
|
||||||
|
test "$(ceph rbd task list)" != "[]"
|
||||||
|
|
||||||
|
# queue flatten on another image and check that it completes
|
||||||
|
rbd info bar | grep 'parent: '
|
||||||
|
expect_fail rbd snap unprotect foo@snap
|
||||||
|
ceph rbd task add flatten bar
|
||||||
|
for i in {1..12}; do
|
||||||
|
rbd info bar | grep 'parent: ' || break
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
rbd info bar | expect_fail grep 'parent: '
|
||||||
|
rbd snap unprotect foo@snap
|
||||||
|
|
||||||
|
# check that flattens disrupted by pool removal are cleaned up
|
||||||
|
for i in {1..12}; do
|
||||||
|
test "$(ceph rbd task list)" = "[]" && break
|
||||||
|
sleep 10
|
||||||
|
done
|
||||||
|
test "$(ceph rbd task list)" = "[]"
|
||||||
|
|
||||||
|
remove_images
|
||||||
|
}
|
||||||
|
|
||||||
test_pool_image_args
|
test_pool_image_args
|
||||||
test_rename
|
test_rename
|
||||||
test_ls
|
test_ls
|
||||||
@ -1483,5 +1532,6 @@ test_trash_purge_schedule
|
|||||||
test_mirror_snapshot_schedule
|
test_mirror_snapshot_schedule
|
||||||
test_perf_image_iostat
|
test_perf_image_iostat
|
||||||
test_mirror_pool_peer_bootstrap_create
|
test_mirror_pool_peer_bootstrap_create
|
||||||
|
test_tasks_removed_pool
|
||||||
|
|
||||||
echo OK
|
echo OK
|
||||||
|
@ -348,17 +348,18 @@ class TaskHandler:
|
|||||||
return task_json
|
return task_json
|
||||||
|
|
||||||
def remove_task(self,
|
def remove_task(self,
|
||||||
ioctx: rados.Ioctx,
|
ioctx: Optional[rados.Ioctx],
|
||||||
task: Task,
|
task: Task,
|
||||||
remove_in_memory: bool = True) -> None:
|
remove_in_memory: bool = True) -> None:
|
||||||
self.log.info("remove_task: task={}".format(str(task)))
|
self.log.info("remove_task: task={}".format(str(task)))
|
||||||
omap_keys = (task.sequence_key, )
|
if ioctx:
|
||||||
try:
|
try:
|
||||||
with rados.WriteOpCtx() as write_op:
|
with rados.WriteOpCtx() as write_op:
|
||||||
ioctx.remove_omap_keys(write_op, omap_keys)
|
omap_keys = (task.sequence_key, )
|
||||||
ioctx.operate_write_op(write_op, RBD_TASK_OID)
|
ioctx.remove_omap_keys(write_op, omap_keys)
|
||||||
except rados.ObjectNotFound:
|
ioctx.operate_write_op(write_op, RBD_TASK_OID)
|
||||||
pass
|
except rados.ObjectNotFound:
|
||||||
|
pass
|
||||||
|
|
||||||
if remove_in_memory:
|
if remove_in_memory:
|
||||||
try:
|
try:
|
||||||
@ -422,9 +423,9 @@ class TaskHandler:
|
|||||||
task.retry_message = "{}".format(e)
|
task.retry_message = "{}".format(e)
|
||||||
self.update_progress(task, 0)
|
self.update_progress(task, 0)
|
||||||
else:
|
else:
|
||||||
# pool DNE -- remove the task
|
# pool DNE -- remove in-memory task
|
||||||
self.complete_progress(task)
|
self.complete_progress(task)
|
||||||
self.remove_task(ioctx, task)
|
self.remove_task(None, task)
|
||||||
|
|
||||||
except (rados.Error, rbd.Error) as e:
|
except (rados.Error, rbd.Error) as e:
|
||||||
self.log.error("execute_task: {}".format(e))
|
self.log.error("execute_task: {}".format(e))
|
||||||
|
Loading…
Reference in New Issue
Block a user