Fix service failed state transitions for wait/hold

Add missing state machine transitions from SERVICEFAIL to SERVICEWAIT
and SERVICEHOLD for reserved wait/hold steps.

This fixes the edge-case where nodes in service failed state would
incorrectly transition directly to active state when wait/hold steps
were executed, bypassing expected intermediate states.

Closes-Bug: #2119990
Change-Id: I0a55ad45138c4d033570014bf45956dacaf11e72
Signed-off-by: Afonne-CID <afonnepaulc@gmail.com>
This commit is contained in:
Afonne-CID
2025-08-13 23:19:09 +01:00
parent 90346e5985
commit f1943cead5
3 changed files with 41 additions and 0 deletions

View File

@@ -667,6 +667,12 @@ machine.add_transition(SERVICEFAIL, SERVICING, 'service')
# A node in service fail can be rescued
machine.add_transition(SERVICEFAIL, RESCUING, 'rescue')
# A node in service fail can enter wait state
machine.add_transition(SERVICEFAIL, SERVICEWAIT, 'wait')
# A node in service fail can be held
machine.add_transition(SERVICEFAIL, SERVICEHOLD, 'hold')
# A node in service fail may be deleted.
machine.add_transition(SERVICEFAIL, DELETING, 'delete')

View File

@@ -949,6 +949,32 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
# Start is the continuation from a heartbeat.
self._test_do_next_service_step_handles_hold(states.SERVICEWAIT)
def test_do_next_service_step_handles_hold_from_failed(self):
# Test that hold step from SERVICEFAIL transitions to SERVICEHOLD
self._test_do_next_service_step_handles_hold(states.SERVICEFAIL)
def test_do_next_service_step_handles_wait_from_failed(self):
# Test that wait step from SERVICEFAIL transitions to SERVICEWAIT
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.SERVICEFAIL,
driver_internal_info={
'service_steps': [
{
'step': 'wait',
'priority': 10,
'interface': 'power'
}
],
'service_step_index': None},
service_step=None)
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
servicing.do_next_service_step(task, 0)
node.refresh()
self.assertEqual(states.SERVICEWAIT, node.provision_state)
@mock.patch.object(servicing, 'do_next_service_step', autospec=True)
def _continue_node_service(self, mock_next_step, skip=True):
# test that skipping current step mechanism works

View File

@@ -0,0 +1,9 @@
---
fixes:
- |
Fixes bug where reserved service steps (wait, hold) executed on nodes in
"service failed" state would incorrectly transition the node directly to
"active" state, bypassing the expected intermediate wait/hold states.
Now nodes in "service failed" state properly transition to "service wait"
or "service hold" when wait/hold steps are executed, maintaining the
expected state machine flow.