diff --git a/ironic/common/states.py b/ironic/common/states.py index f5a620f992..a7cb822adf 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -667,6 +667,12 @@ machine.add_transition(SERVICEFAIL, SERVICING, 'service') # A node in service fail can be rescued machine.add_transition(SERVICEFAIL, RESCUING, 'rescue') +# A node in service fail can enter wait state +machine.add_transition(SERVICEFAIL, SERVICEWAIT, 'wait') + +# A node in service fail can be held +machine.add_transition(SERVICEFAIL, SERVICEHOLD, 'hold') + # A node in service fail may be deleted. machine.add_transition(SERVICEFAIL, DELETING, 'delete') diff --git a/ironic/tests/unit/conductor/test_servicing.py b/ironic/tests/unit/conductor/test_servicing.py index b46394df9b..9057f0e173 100644 --- a/ironic/tests/unit/conductor/test_servicing.py +++ b/ironic/tests/unit/conductor/test_servicing.py @@ -949,6 +949,32 @@ class DoNodeServiceTestCase(db_base.DbTestCase): # Start is the continuation from a heartbeat. self._test_do_next_service_step_handles_hold(states.SERVICEWAIT) + def test_do_next_service_step_handles_hold_from_failed(self): + # Test that hold step from SERVICEFAIL transitions to SERVICEHOLD + self._test_do_next_service_step_handles_hold(states.SERVICEFAIL) + + def test_do_next_service_step_handles_wait_from_failed(self): + # Test that wait step from SERVICEFAIL transitions to SERVICEWAIT + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.SERVICEFAIL, + driver_internal_info={ + 'service_steps': [ + { + 'step': 'wait', + 'priority': 10, + 'interface': 'power' + } + ], + 'service_step_index': None}, + service_step=None) + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + servicing.do_next_service_step(task, 0) + node.refresh() + self.assertEqual(states.SERVICEWAIT, node.provision_state) + @mock.patch.object(servicing, 'do_next_service_step', autospec=True) def _continue_node_service(self, mock_next_step, skip=True): # test that skipping current step mechanism works diff --git a/releasenotes/notes/fix-service-failed-wait-hold-transitions-c83ef2b376ae04fe.yaml b/releasenotes/notes/fix-service-failed-wait-hold-transitions-c83ef2b376ae04fe.yaml new file mode 100644 index 0000000000..1d3d6029ed --- /dev/null +++ b/releasenotes/notes/fix-service-failed-wait-hold-transitions-c83ef2b376ae04fe.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes bug where reserved service steps (wait, hold) executed on nodes in + "service failed" state would incorrectly transition the node directly to + "active" state, bypassing the expected intermediate wait/hold states. + Now nodes in "service failed" state properly transition to "service wait" + or "service hold" when wait/hold steps are executed, maintaining the + expected state machine flow.