From f1943cead57d00a468af72a376d3d4e46fd40eb9 Mon Sep 17 00:00:00 2001 From: Afonne-CID Date: Wed, 13 Aug 2025 23:19:09 +0100 Subject: [PATCH] Fix service failed state transitions for wait/hold Add missing state machine transitions from SERVICEFAIL to SERVICEWAIT and SERVICEHOLD for reserved wait/hold steps. This fixes the edge-case where nodes in service failed state would incorrectly transition directly to active state when wait/hold steps were executed, bypassing expected intermediate states. Closes-Bug: #2119990 Change-Id: I0a55ad45138c4d033570014bf45956dacaf11e72 Signed-off-by: Afonne-CID --- ironic/common/states.py | 6 +++++ ironic/tests/unit/conductor/test_servicing.py | 26 +++++++++++++++++++ ...ait-hold-transitions-c83ef2b376ae04fe.yaml | 9 +++++++ 3 files changed, 41 insertions(+) create mode 100644 releasenotes/notes/fix-service-failed-wait-hold-transitions-c83ef2b376ae04fe.yaml diff --git a/ironic/common/states.py b/ironic/common/states.py index f5a620f992..a7cb822adf 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -667,6 +667,12 @@ machine.add_transition(SERVICEFAIL, SERVICING, 'service') # A node in service fail can be rescued machine.add_transition(SERVICEFAIL, RESCUING, 'rescue') +# A node in service fail can enter wait state +machine.add_transition(SERVICEFAIL, SERVICEWAIT, 'wait') + +# A node in service fail can be held +machine.add_transition(SERVICEFAIL, SERVICEHOLD, 'hold') + # A node in service fail may be deleted. machine.add_transition(SERVICEFAIL, DELETING, 'delete') diff --git a/ironic/tests/unit/conductor/test_servicing.py b/ironic/tests/unit/conductor/test_servicing.py index b46394df9b..9057f0e173 100644 --- a/ironic/tests/unit/conductor/test_servicing.py +++ b/ironic/tests/unit/conductor/test_servicing.py @@ -949,6 +949,32 @@ class DoNodeServiceTestCase(db_base.DbTestCase): # Start is the continuation from a heartbeat. self._test_do_next_service_step_handles_hold(states.SERVICEWAIT) + def test_do_next_service_step_handles_hold_from_failed(self): + # Test that hold step from SERVICEFAIL transitions to SERVICEHOLD + self._test_do_next_service_step_handles_hold(states.SERVICEFAIL) + + def test_do_next_service_step_handles_wait_from_failed(self): + # Test that wait step from SERVICEFAIL transitions to SERVICEWAIT + node = obj_utils.create_test_node( + self.context, driver='fake-hardware', + provision_state=states.SERVICEFAIL, + driver_internal_info={ + 'service_steps': [ + { + 'step': 'wait', + 'priority': 10, + 'interface': 'power' + } + ], + 'service_step_index': None}, + service_step=None) + + with task_manager.acquire( + self.context, node.uuid, shared=False) as task: + servicing.do_next_service_step(task, 0) + node.refresh() + self.assertEqual(states.SERVICEWAIT, node.provision_state) + @mock.patch.object(servicing, 'do_next_service_step', autospec=True) def _continue_node_service(self, mock_next_step, skip=True): # test that skipping current step mechanism works diff --git a/releasenotes/notes/fix-service-failed-wait-hold-transitions-c83ef2b376ae04fe.yaml b/releasenotes/notes/fix-service-failed-wait-hold-transitions-c83ef2b376ae04fe.yaml new file mode 100644 index 0000000000..1d3d6029ed --- /dev/null +++ b/releasenotes/notes/fix-service-failed-wait-hold-transitions-c83ef2b376ae04fe.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes bug where reserved service steps (wait, hold) executed on nodes in + "service failed" state would incorrectly transition the node directly to + "active" state, bypassing the expected intermediate wait/hold states. + Now nodes in "service failed" state properly transition to "service wait" + or "service hold" when wait/hold steps are executed, maintaining the + expected state machine flow.