Fix servicing abort to respect abortable flag

Currently, Ironic codebase allows aborting servicing state regardless
of whether a servicing step has abortable flag set or not. This patch
fixes this by adding handling of service wait states to abort code paths
and adding the missing state machine transition.

Generated-By: Claude Code Sonnet 3.5
Change-Id: Ie07490bdb9c6461bd6ac7a6315773dcfb13592f9
Signed-off-by: Jacob Anders <janders@redhat.com>
This commit is contained in:
Jacob Anders
2025-08-13 10:50:36 +10:00
parent 1eda80766c
commit fa8a8fbb4b
4 changed files with 117 additions and 0 deletions

View File

@@ -655,6 +655,9 @@ machine.add_transition(SERVICEWAIT, SERVICING, 'resume')
# A node in service wait can failed
machine.add_transition(SERVICEWAIT, SERVICEFAIL, 'fail')
# A node in service hold can failed
machine.add_transition(SERVICEHOLD, SERVICEFAIL, 'fail')
# A node in service wait can be aborted
machine.add_transition(SERVICEWAIT, SERVICEFAIL, 'abort')

View File

@@ -1441,6 +1441,8 @@ class ConductorManager(base_manager.BaseConductorManager):
states.INSPECTWAIT,
states.CLEANHOLD,
states.DEPLOYHOLD,
states.SERVICEWAIT,
states.SERVICEHOLD,
states.SERVICEFAIL)):
self._do_abort(task)
return
@@ -1509,6 +1511,41 @@ class ConductorManager(base_manager.BaseConductorManager):
last_error=cleaning.get_last_error(node))
return
if node.provision_state in (states.SERVICEWAIT, states.SERVICEHOLD):
# Check if the service step is abortable; if so abort it.
# Otherwise, indicate in that service step, that servicing
# should be aborted after that step is done.
if (node.service_step and not
node.service_step.get('abortable')):
LOG.info('The current service step "%(service_step)s" for '
'node %(node)s is not abortable. Adding a '
'flag to abort the servicing after the service '
'step is completed.',
{'service_step': node.service_step['step'],
'node': node.uuid})
service_step = node.service_step
if not service_step.get('abort_after'):
service_step['abort_after'] = True
node.service_step = service_step
node.save()
return
LOG.debug('Aborting the servicing operation during service step '
'"%(step)s" for node %(node)s in provision state '
'"%(prov)s".',
{'node': node.uuid,
'prov': node.provision_state,
'step': node.service_step.get('step')})
# First transition to SERVICEFAIL
task.process_event('fail')
# Then abort from SERVICEFAIL to ACTIVE
task.process_event(
'abort',
callback=self._spawn_worker,
call_args=(servicing.do_node_service_abort, task),
err_handler=utils.provisioning_error_handler)
return
if node.provision_state == states.RESCUEWAIT:
utils.remove_node_rescue_password(node, save=True)
task.process_event(

View File

@@ -2788,6 +2788,74 @@ class DoProvisioningActionTestCase(mgr_utils.ServiceSetUpMixin,
self.assertEqual(states.CLEANWAIT, node.provision_state)
self.assertEqual(states.AVAILABLE, node.target_provision_state)
def test_do_provision_action_abort_service_step_not_abortable_wait(self):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.SERVICEWAIT,
target_provision_state=states.ACTIVE,
service_step={'step': 'foo', 'abortable': False})
self._start_service()
self.service.do_provisioning_action(self.context, node.uuid, 'abort')
node.refresh()
# Assert the current service step was marked to be aborted later
self.assertIn('abort_after', node.service_step)
self.assertTrue(node.service_step['abort_after'])
# Make sure things stays as it was before
self.assertEqual(states.SERVICEWAIT, node.provision_state)
self.assertEqual(states.ACTIVE, node.target_provision_state)
def test_do_provision_action_abort_service_step_not_abortable_hold(self):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.SERVICEHOLD,
target_provision_state=states.ACTIVE,
service_step={'step': 'foo', 'abortable': False})
self._start_service()
self.service.do_provisioning_action(self.context, node.uuid, 'abort')
node.refresh()
# Assert the current service step was marked to be aborted later
self.assertIn('abort_after', node.service_step)
self.assertTrue(node.service_step['abort_after'])
# Make sure things stays as it was before
self.assertEqual(states.SERVICEHOLD, node.provision_state)
self.assertEqual(states.ACTIVE, node.target_provision_state)
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker',
autospec=True)
def test_do_provision_action_abort_service_step_abortable_wait(
self, mock_spawn):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.SERVICEWAIT,
target_provision_state=states.ACTIVE,
service_step={'step': 'foo', 'abortable': True})
self._start_service()
self.service.do_provisioning_action(self.context, node.uuid, 'abort')
node.refresh()
# Assert the abort was initiated immediately
mock_spawn.assert_called_with(
self.service, servicing.do_node_service_abort, mock.ANY)
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker',
autospec=True)
def test_do_provision_action_abort_service_step_abortable_hold(
self, mock_spawn):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.SERVICEHOLD,
target_provision_state=states.ACTIVE,
service_step={'step': 'foo', 'abortable': True})
self._start_service()
self.service.do_provisioning_action(self.context, node.uuid, 'abort')
node.refresh()
# Assert the abort was initiated immediately
mock_spawn.assert_called_with(
self.service, servicing.do_node_service_abort, mock.ANY)
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker',
autospec=True)
def _do_provision_action_abort_from_cleanhold(self, mock_spawn,

View File

@@ -0,0 +1,9 @@
---
fixes:
- |
Fixes servicing abort handling to respect the `abortable` flag of service steps.
Previously, servicing steps could be aborted regardless of their `abortable` flag
setting. Now, if a service step has `abortable` set to `False`, the abort will
be deferred until the current step completes, similar to how cleaning steps work.
This ensures service steps that cannot be safely interrupted are allowed to complete
before the abort takes effect.