Force wait for alarms on workers during sw-deploy

Workers now supports Ceph OSDs.  VIM does not currently have a way to
check if individual workers have an OSD present.  Because of this, all
workers need require the full wait for alarms step.

In the future, this will be improved with detection and expanded to
all strategies.

TEST PLAN
PASS: Standard sw-deploy-strategy, minor release
  * Verify new wait for alarms steps
  * No regressions

Partial-Bug: https://bugs.launchpad.net/starlingx/+bug/2106642
Change-Id: I2ac3687701c07671013bd27edfdc1d98e80a026d
Signed-off-by: Joshua Kraitberg <joshua.kraitberg@windriver.com>
This commit is contained in:
Joshua Kraitberg
2025-04-09 10:30:36 -04:00
parent 6286b3122a
commit 40fe7735ee
4 changed files with 34 additions and 26 deletions

View File

@@ -95,8 +95,8 @@ def validate_phase(phase, expected_results):
(key, stage_number, stages_key, (key, stage_number, stages_key,
step_number, step_key, step_number, step_key,
apply_step[step_key], step[step_key], apply_step[step_key], step[step_key],
json.dumps(apply_step, indent=2), json.dumps(step, indent=2),
json.dumps(step, indent=2)) json.dumps(apply_step, indent=2))
step_number += 1 step_number += 1
else: else:
assert apply_stage[stages_key] == stage[stages_key], \ assert apply_stage[stages_key] == stage[stages_key], \

View File

@@ -1770,7 +1770,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'system-stabilize', 'timeout': 15}, {'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['compute-0', 'compute-1', 'compute-2']}, 'entity_names': ['compute-0', 'compute-1', 'compute-2']},
{'name': 'wait-alarms-clear', 'timeout': 600}, {'name': 'wait-alarms-clear', 'timeout': 2400},
] ]
}, },
{ {
@@ -1891,7 +1891,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'system-stabilize', 'timeout': 15}, {'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': ['compute-0', 'compute-1', 'compute-2']}, 'entity_names': ['compute-0', 'compute-1', 'compute-2']},
{'name': 'wait-alarms-clear', 'timeout': 600}, {'name': 'wait-alarms-clear', 'timeout': 2400},
] ]
}, },
{ {

View File

@@ -288,7 +288,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-2', 'compute-3']), _unlock_hosts_stage_as_dict(['compute-2', 'compute-3']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -306,7 +306,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-0']), _unlock_hosts_stage_as_dict(['compute-0']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -324,7 +324,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-1']), _unlock_hosts_stage_as_dict(['compute-1']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
} }
] ]
@@ -401,7 +401,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-1', 'compute-5']), _unlock_hosts_stage_as_dict(['compute-1', 'compute-5']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -422,7 +422,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
_unlock_hosts_stage_as_dict( _unlock_hosts_stage_as_dict(
['compute-0', 'compute-2', 'compute-3']), ['compute-0', 'compute-2', 'compute-3']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -443,7 +443,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
_unlock_hosts_stage_as_dict( _unlock_hosts_stage_as_dict(
['compute-4', 'compute-6', 'compute-7']), ['compute-4', 'compute-6', 'compute-7']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -463,7 +463,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
_unlock_hosts_stage_as_dict( _unlock_hosts_stage_as_dict(
['compute-8', 'compute-9']), ['compute-8', 'compute-9']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
} }
] ]
@@ -581,7 +581,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-1']), _unlock_hosts_stage_as_dict(['compute-1']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -603,7 +603,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
_unlock_hosts_stage_as_dict( _unlock_hosts_stage_as_dict(
['compute-0', 'compute-2', 'compute-3']), ['compute-0', 'compute-2', 'compute-3']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -622,7 +622,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-4']), _unlock_hosts_stage_as_dict(['compute-4']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
} }
] ]
@@ -717,7 +717,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(stage_hosts[0]), _unlock_hosts_stage_as_dict(stage_hosts[0]),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
] ]
@@ -741,7 +741,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(stage_hosts[x]), _unlock_hosts_stage_as_dict(stage_hosts[x]),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
} }
) )
@@ -808,7 +808,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-2']), _unlock_hosts_stage_as_dict(['compute-2']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -825,7 +825,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-3']), _unlock_hosts_stage_as_dict(['compute-3']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -843,7 +843,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-0']), _unlock_hosts_stage_as_dict(['compute-0']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -861,7 +861,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-1']), _unlock_hosts_stage_as_dict(['compute-1']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600}, 'timeout': 2400},
] ]
}, },
] ]
@@ -917,7 +917,9 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
{'name': 'system-stabilize', 'timeout': 15}, {'name': 'system-stabilize', 'timeout': 15},
{'name': 'unlock-hosts', {'name': 'unlock-hosts',
'entity_names': [f'compute-{i}']}, 'entity_names': [f'compute-{i}']},
{'name': 'system-stabilize', 'timeout': 60}, {'name': 'wait-alarms-clear',
'ignore_alarms': IGNORE_ALARMS_LIST,
'timeout': 2400}
] ]
} }
for i in range(4) for i in range(4)
@@ -1533,7 +1535,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-1']), _unlock_hosts_stage_as_dict(['compute-1']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -1551,7 +1553,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-0']), _unlock_hosts_stage_as_dict(['compute-0']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600}, 'timeout': 2400},
] ]
}, },
{'name': 'sw-upgrade-complete', {'name': 'sw-upgrade-complete',
@@ -1666,7 +1668,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-1']), _unlock_hosts_stage_as_dict(['compute-1']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-worker-hosts', {'name': 'sw-upgrade-worker-hosts',
@@ -1683,7 +1685,7 @@ class TestSwUpgradeStrategy(sw_update_testcase.SwUpdateStrategyTestCase):
'timeout': 15}, 'timeout': 15},
_unlock_hosts_stage_as_dict(['compute-0']), _unlock_hosts_stage_as_dict(['compute-0']),
{'name': 'wait-alarms-clear', {'name': 'wait-alarms-clear',
'timeout': 600} 'timeout': 2400}
] ]
}, },
{'name': 'sw-upgrade-complete', {'name': 'sw-upgrade-complete',

View File

@@ -1345,7 +1345,13 @@ class UpdateWorkerHostsMixin(object):
# alarms to clear. Note: not all controller nodes will have # alarms to clear. Note: not all controller nodes will have
# OSDs configured, but the alarms should clear quickly in # OSDs configured, but the alarms should clear quickly in
# that case so this will not delay the update strategy. # that case so this will not delay the update strategy.
if any([HOST_PERSONALITY.CONTROLLER in host.personality if isinstance(self, SwUpgradeStrategy):
# TODO(jkraitbe): Workers can now support OSDs but VIM lacks a way to check.
stage.add_step(strategy.WaitAlarmsClearStep(
timeout_in_secs=WAIT_ALARM_TIMEOUT,
ignore_alarms=self._ignore_alarms,
ignore_alarms_conditional=self._ignore_alarms_conditional))
elif any([HOST_PERSONALITY.CONTROLLER in host.personality
for host in hosts_to_lock + hosts_to_reboot]): for host in hosts_to_lock + hosts_to_reboot]):
# Multiple personality nodes that need to wait for OSDs to sync: # Multiple personality nodes that need to wait for OSDs to sync:
stage.add_step(strategy.WaitAlarmsClearStep( stage.add_step(strategy.WaitAlarmsClearStep(