From fa8a8fbb4b57db45007d1ff967d73a6d33cea4e9 Mon Sep 17 00:00:00 2001 From: Jacob Anders Date: Wed, 13 Aug 2025 10:50:36 +1000 Subject: [PATCH] Fix servicing abort to respect abortable flag Currently, Ironic codebase allows aborting servicing state regardless of whether a servicing step has abortable flag set or not. This patch fixes this by adding handling of service wait states to abort code paths and adding the missing state machine transition. Generated-By: Claude Code Sonnet 3.5 Change-Id: Ie07490bdb9c6461bd6ac7a6315773dcfb13592f9 Signed-off-by: Jacob Anders --- ironic/common/states.py | 3 + ironic/conductor/manager.py | 37 ++++++++++ ironic/tests/unit/conductor/test_manager.py | 68 +++++++++++++++++++ ...rtable-flag-handling-d8e7f9a2c4b5e7f1.yaml | 9 +++ 4 files changed, 117 insertions(+) create mode 100644 releasenotes/notes/fix-servicing-abort-abortable-flag-handling-d8e7f9a2c4b5e7f1.yaml diff --git a/ironic/common/states.py b/ironic/common/states.py index 7f5aff509f..fbbbb334c0 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -655,6 +655,9 @@ machine.add_transition(SERVICEWAIT, SERVICING, 'resume') # A node in service wait can failed machine.add_transition(SERVICEWAIT, SERVICEFAIL, 'fail') +# A node in service hold can failed +machine.add_transition(SERVICEHOLD, SERVICEFAIL, 'fail') + # A node in service wait can be aborted machine.add_transition(SERVICEWAIT, SERVICEFAIL, 'abort') diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index 8def4675a9..a44eb69237 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -1441,6 +1441,8 @@ class ConductorManager(base_manager.BaseConductorManager): states.INSPECTWAIT, states.CLEANHOLD, states.DEPLOYHOLD, + states.SERVICEWAIT, + states.SERVICEHOLD, states.SERVICEFAIL)): self._do_abort(task) return @@ -1509,6 +1511,41 @@ class ConductorManager(base_manager.BaseConductorManager): last_error=cleaning.get_last_error(node)) return + if node.provision_state in (states.SERVICEWAIT, states.SERVICEHOLD): + # Check if the service step is abortable; if so abort it. + # Otherwise, indicate in that service step, that servicing + # should be aborted after that step is done. + if (node.service_step and not + node.service_step.get('abortable')): + LOG.info('The current service step "%(service_step)s" for ' + 'node %(node)s is not abortable. Adding a ' + 'flag to abort the servicing after the service ' + 'step is completed.', + {'service_step': node.service_step['step'], + 'node': node.uuid}) + service_step = node.service_step + if not service_step.get('abort_after'): + service_step['abort_after'] = True + node.service_step = service_step + node.save() + return + + LOG.debug('Aborting the servicing operation during service step ' + '"%(step)s" for node %(node)s in provision state ' + '"%(prov)s".', + {'node': node.uuid, + 'prov': node.provision_state, + 'step': node.service_step.get('step')}) + # First transition to SERVICEFAIL + task.process_event('fail') + # Then abort from SERVICEFAIL to ACTIVE + task.process_event( + 'abort', + callback=self._spawn_worker, + call_args=(servicing.do_node_service_abort, task), + err_handler=utils.provisioning_error_handler) + return + if node.provision_state == states.RESCUEWAIT: utils.remove_node_rescue_password(node, save=True) task.process_event( diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index ca9f6654cc..2d2f8e456b 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -2788,6 +2788,74 @@ class DoProvisioningActionTestCase(mgr_utils.ServiceSetUpMixin, self.assertEqual(states.CLEANWAIT, node.provision_state) self.assertEqual(states.AVAILABLE, node.target_provision_state) + def test_do_provision_action_abort_service_step_not_abortable_wait(self): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.SERVICEWAIT, + target_provision_state=states.ACTIVE, + service_step={'step': 'foo', 'abortable': False}) + + self._start_service() + self.service.do_provisioning_action(self.context, node.uuid, 'abort') + node.refresh() + # Assert the current service step was marked to be aborted later + self.assertIn('abort_after', node.service_step) + self.assertTrue(node.service_step['abort_after']) + # Make sure things stays as it was before + self.assertEqual(states.SERVICEWAIT, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + + def test_do_provision_action_abort_service_step_not_abortable_hold(self): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.SERVICEHOLD, + target_provision_state=states.ACTIVE, + service_step={'step': 'foo', 'abortable': False}) + + self._start_service() + self.service.do_provisioning_action(self.context, node.uuid, 'abort') + node.refresh() + # Assert the current service step was marked to be aborted later + self.assertIn('abort_after', node.service_step) + self.assertTrue(node.service_step['abort_after']) + # Make sure things stays as it was before + self.assertEqual(states.SERVICEHOLD, node.provision_state) + self.assertEqual(states.ACTIVE, node.target_provision_state) + + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker', + autospec=True) + def test_do_provision_action_abort_service_step_abortable_wait( + self, mock_spawn): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.SERVICEWAIT, + target_provision_state=states.ACTIVE, + service_step={'step': 'foo', 'abortable': True}) + + self._start_service() + self.service.do_provisioning_action(self.context, node.uuid, 'abort') + node.refresh() + # Assert the abort was initiated immediately + mock_spawn.assert_called_with( + self.service, servicing.do_node_service_abort, mock.ANY) + + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker', + autospec=True) + def test_do_provision_action_abort_service_step_abortable_hold( + self, mock_spawn): + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.SERVICEHOLD, + target_provision_state=states.ACTIVE, + service_step={'step': 'foo', 'abortable': True}) + + self._start_service() + self.service.do_provisioning_action(self.context, node.uuid, 'abort') + node.refresh() + # Assert the abort was initiated immediately + mock_spawn.assert_called_with( + self.service, servicing.do_node_service_abort, mock.ANY) + @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker', autospec=True) def _do_provision_action_abort_from_cleanhold(self, mock_spawn, diff --git a/releasenotes/notes/fix-servicing-abort-abortable-flag-handling-d8e7f9a2c4b5e7f1.yaml b/releasenotes/notes/fix-servicing-abort-abortable-flag-handling-d8e7f9a2c4b5e7f1.yaml new file mode 100644 index 0000000000..1bf316b5ee --- /dev/null +++ b/releasenotes/notes/fix-servicing-abort-abortable-flag-handling-d8e7f9a2c4b5e7f1.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes servicing abort handling to respect the `abortable` flag of service steps. + Previously, servicing steps could be aborted regardless of their `abortable` flag + setting. Now, if a service step has `abortable` set to `False`, the abort will + be deferred until the current step completes, similar to how cleaning steps work. + This ensures service steps that cannot be safely interrupted are allowed to complete + before the abort takes effect. \ No newline at end of file