From a786974621d4737543eb2713ac0060ad133d6a30 Mon Sep 17 00:00:00 2001 From: Jay Faulkner Date: Wed, 12 Feb 2025 11:21:24 -0800 Subject: [PATCH] Make _by_arch ramdisk uses & validation consistent The various *_by_arch configurations were being applied inconsistently across boot drivers. This unifies the logic across the drivers and ensures consistent setting of kernel/ramdisk. An inconsistent setting of kernel/ramdisk is any case where we can't get *both* the kernel and ramdisk to use from the same configuration type. For instance, a node-level override in driver_info that specifies a deploy_kernel but no deploy_ramdisk. These represent a misconfiguration at a base level, since kernels and ramdisks must be tightly coupled. This adds validation at startup such that if any X_ramdisk_by_arch and X_kernel_by_arch configurations exist that are not mirrored -- e.g. a kernel exists for aarch64 but no ramdisk, Ironic will print a warning log on start. We've also unified behavior when inconsistencies occur at runtime: - If CONF.conductor.error_on_ramdisk_config_inconsistency is True, an inconsistency will cause an exception to be immediately raised, failing whatever operation was needing to boot a ramdisk to immediately fail. - If CONF.conductor.error_on_ramdisk_config_inconsistency is False, Ironic will fall back to a less specific configuration -- for instance, if driver_info[deploy_ramdisk] is set but not driver_info[deploy_kernel], Ironic would fall back to the next-less-specific option, the deploy_*_by_arch config options. If those are inconsistent, we'd fail back to deploy_kernel/deploy_ramdisk -- the global default. Previous behavior varied by driver, but in the worst cases would combine a deploy_kernel from one level (e.g. driver_info) and deploy_ramdisk from another (e.g. global default) or vice versa. This behavior is considered a bug as kernels are matched up with ramdisks and generally are not interchangable. We expect at a future Ironic release to enable strict validation of ramdisk/kernel consistency. Closes-bug: 2097798 Change-Id: I429a651894be4b31a6faa5dfac0f58dd75ce8f79 --- ironic/command/conductor.py | 44 ++++++ ironic/common/pxe_utils.py | 15 +- ironic/conf/conductor.py | 14 +- ironic/drivers/utils.py | 101 ++++++++++---- ironic/tests/unit/command/test_conductor.py | 110 +++++++++++++++ ironic/tests/unit/common/test_pxe_utils.py | 3 + .../unit/drivers/modules/redfish/test_boot.py | 129 +++++++++++------- ironic/tests/unit/drivers/test_utils.py | 121 ++++++++++++++++ ...tent-ramdisk-configs-6a37ef69bf73cd2e.yaml | 38 ++++++ 9 files changed, 487 insertions(+), 88 deletions(-) create mode 100644 releasenotes/notes/consistent-ramdisk-configs-6a37ef69bf73cd2e.yaml diff --git a/ironic/command/conductor.py b/ironic/command/conductor.py index 46cf7bc7cc..776a575b68 100644 --- a/ironic/command/conductor.py +++ b/ironic/command/conductor.py @@ -77,10 +77,54 @@ def warn_about_max_wait_parameters(conf): 'please re-evaluate your configuration.', error_with) +def warn_about_inconsistent_kernel_ramdisk(conf): + """Check consistency of configurations around kernels and ramdisks + + This method logs a warning if any of the paired + CONF.conductor.*_ramdisk_by_arch and CONF.conductor.*_kernel_by_arch + config dictionaries are inconsistent -- such as having a kernel configured + for aarch64 without having a ramdisk configured. + + :param conf: an ironic.conf.CONF oslo.config object + :returns: None + """ + config_pairs = [ + ('deploy_kernel_by_arch', 'deploy_ramdisk_by_arch', 'provisioning'), + ('rescue_kernel_by_arch', 'rescue_ramdisk_by_arch', 'rescue'), + ] + + for kernel_opt, ramdisk_opt, operation in config_pairs: + kernel_dict = getattr(conf.conductor, kernel_opt) + ramdisk_dict = getattr(conf.conductor, ramdisk_opt) + + kernel_arches = set(kernel_dict.keys()) + ramdisk_arches = set(ramdisk_dict.keys()) + + kernel_only = kernel_arches - ramdisk_arches + ramdisk_only = ramdisk_arches - kernel_arches + + if kernel_only: + LOG.warning('The [conductor]%s configuration has entries for ' + 'architectures %s that are missing from ' + '[conductor]%s. This may result in failed %s ' + 'operations for these architectures.', + kernel_opt, ', '.join(sorted(kernel_only)), + ramdisk_opt, operation) + + if ramdisk_only: + LOG.warning('The [conductor]%s configuration has entries for ' + 'architectures %s that are missing from ' + '[conductor]%s. This may result in failed %s ' + 'operations for these architectures.', + ramdisk_opt, ', '.join(sorted(ramdisk_only)), + kernel_opt, operation) + + def issue_startup_warnings(conf): warn_about_unsafe_shred_parameters(conf) warn_about_sqlite() warn_about_max_wait_parameters(conf) + warn_about_inconsistent_kernel_ramdisk(conf) def main(): diff --git a/ironic/common/pxe_utils.py b/ironic/common/pxe_utils.py index b6c3a11d1b..d1abeac576 100644 --- a/ironic/common/pxe_utils.py +++ b/ironic/common/pxe_utils.py @@ -666,20 +666,11 @@ def parse_driver_info(node, mode='deploy'): # options. Skipping. return {} - info = node.driver_info - - params_to_check = KERNEL_RAMDISK_LABELS[mode] - - d_info = {k: info.get(k) for k in params_to_check} - if not any(d_info.values()): - # NOTE(dtantsur): avoid situation when e.g. deploy_kernel comes from - # driver_info but deploy_ramdisk comes from configuration, since it's - # a sign of a potential operator's mistake. - d_info = {k: getattr(CONF.conductor, k) for k in params_to_check} + info = driver_utils.get_agent_kernel_ramdisk(node, mode=mode) error_msg = _("Cannot validate PXE bootloader. Some parameters were" " missing in node's driver_info and configuration") - deploy_utils.check_for_missing_params(d_info, error_msg) - return d_info + deploy_utils.check_for_missing_params(info, error_msg) + return info def get_instance_image_info(task, ipxe_enabled=False): diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py index 338b5eb27e..178fce1250 100644 --- a/ironic/conf/conductor.py +++ b/ironic/conf/conductor.py @@ -632,7 +632,19 @@ opts = [ mutable=True, help=_('Option to disable consideration of supplied ' 'network_data.json link MTU values as basis to ' - 'regenerate the supplied metadata.')) + 'regenerate the supplied metadata.')), + cfg.BoolOpt('error_on_ramdisk_config_inconsistency', + default=False, + mutable=True, + help=_('Option to determine if Ironic should fail to boot ' + 'ramdisk in situations where configuration is ' + 'ambiguous.e.g. if node[driver_info] contains an ' + 'override for deploy_ramdisk but not deploy_kernel ' + 'when ambiguous. When set to True, Ironic will raise ' + 'and fail the provisioning action that required a ' + 'ramdisk and kernel. When set to False, Ironic will ' + 'fallback to the next valid, consistent configured ' + 'ramdisk and kernel for the node.')) ] diff --git a/ironic/drivers/utils.py b/ironic/drivers/utils.py index 5e63d6dbe8..34bebe8bf8 100644 --- a/ironic/drivers/utils.py +++ b/ironic/drivers/utils.py @@ -475,39 +475,90 @@ def get_field(node, name, deprecated_prefix=None, use_conf=False, return getattr(CONF.conductor, name) -def get_agent_kernel_ramdisk(node, mode='deploy', deprecated_prefix=None): - """Get the agent kernel/ramdisk as a dictionary.""" +def _handle_inconsistent_ramdisk_config(msg): + LOG.warning(msg) + if CONF.conductor.error_on_ramdisk_config_inconsistency: + raise exception.MissingParameterValue(msg) + + +def _get_kernel_ramdisk_from_node(node, mode='deploy', deprecated_prefix=None): kernel_name = f'{mode}_kernel' ramdisk_name = f'{mode}_ramdisk' kernel, ramdisk = ( get_field(node, kernel_name, deprecated_prefix), get_field(node, ramdisk_name, deprecated_prefix), ) - # NOTE(dtantsur): avoid situation when e.g. deploy_kernel comes - # from driver_info but deploy_ramdisk comes from configuration, - # since it's a sign of a potential operator's mistake. if not kernel or not ramdisk: - # NOTE(kubajj): If kernel and/or ramdisk are specified by architecture, - # prioritise them, otherwise use the default. - kernel_dict_param_name = f'{mode}_kernel_by_arch' - ramdisk_dict_param_name = f'{mode}_ramdisk_by_arch' - kernel_dict = getattr(CONF.conductor, kernel_dict_param_name) - ramdisk_dict = getattr(CONF.conductor, ramdisk_dict_param_name) - cpu_arch = node.properties.get('cpu_arch') - kernel = kernel_dict.get(cpu_arch) if cpu_arch else None - ramdisk = ramdisk_dict.get(cpu_arch) if cpu_arch else None - if not kernel or not ramdisk: - kernel = getattr(CONF.conductor, kernel_name) - ramdisk = getattr(CONF.conductor, ramdisk_name) - return { - kernel_name: kernel, - ramdisk_name: ramdisk, - } + if ramdisk or kernel: + solo = 'unknown' + missing = 'unknown' + if kernel: + solo = kernel_name + missing = ramdisk_name + if ramdisk: + solo = ramdisk_name + missing = kernel_name + msg = _("Node %(node)s driver_info has an override for %(solo)s, " + "but not for %(missing)s. This configuration is " + "ambiguous. Node %(node)s cannot boot a ramdisk.") % { + 'node': node.uuid, 'solo': solo, 'missing': missing} + _handle_inconsistent_ramdisk_config(msg) + return None + + return {kernel_name: kernel, ramdisk_name: ramdisk} + + +def _get_kernel_ramdisk_by_arch(node, mode='deploy', deprecated_prefix=None): + kernel_name = f'{mode}_kernel' + ramdisk_name = f'{mode}_ramdisk' + kernel_dict_param_name = f'{mode}_kernel_by_arch' + ramdisk_dict_param_name = f'{mode}_ramdisk_by_arch' + kernel_dict = getattr(CONF.conductor, kernel_dict_param_name) + ramdisk_dict = getattr(CONF.conductor, ramdisk_dict_param_name) + cpu_arch = node.properties.get('cpu_arch') + kernel = kernel_dict.get(cpu_arch) if cpu_arch else None + ramdisk = ramdisk_dict.get(cpu_arch) if cpu_arch else None + if not kernel or not ramdisk: + solo = 'unknown' + missing = 'unknown' + if ramdisk or kernel: + if kernel: + solo = kernel_dict_param_name + missing = ramdisk_dict_param_name + if ramdisk: + solo = ramdisk_dict_param_name + missing = kernel_dict_param_name + msg = _("CONF.conductor.%(solo)s has a value for %(cpu_arch)s " + "servers, but doesn't have one in " + "CONF.conductor.%(missing)s. This configuration is " + "ambiguous. Node %(node)s cannot boot a ramdisk.") % { + 'solo': solo, 'cpu_arch': cpu_arch, + 'missing': missing, 'node': node.uuid} + _handle_inconsistent_ramdisk_config(msg) + return None else: - return { - kernel_name: kernel, - ramdisk_name: ramdisk - } + return {kernel_name: kernel, ramdisk_name: ramdisk} + + +def get_agent_kernel_ramdisk(node, mode='deploy', deprecated_prefix=None): + """Get the agent kernel/ramdisk as a dictionary. + + Get the agent kernel/ramdisk for a given $mode. This code enforces + """ + from_node = _get_kernel_ramdisk_from_node(node, mode, deprecated_prefix) + if from_node: + return from_node + + from_by_arch = _get_kernel_ramdisk_by_arch(node, mode, deprecated_prefix) + if from_by_arch: + return from_by_arch + + kernel_name = f'{mode}_kernel' + ramdisk_name = f'{mode}_ramdisk' + return { + kernel_name: getattr(CONF.conductor, kernel_name), + ramdisk_name: getattr(CONF.conductor, ramdisk_name), + } def get_agent_iso(node, mode='deploy', deprecated_prefix=None): diff --git a/ironic/tests/unit/command/test_conductor.py b/ironic/tests/unit/command/test_conductor.py index bb1c6488f8..094fd897eb 100644 --- a/ironic/tests/unit/command/test_conductor.py +++ b/ironic/tests/unit/command/test_conductor.py @@ -51,3 +51,113 @@ class ConductorStartTestCase(db_base.DbTestCase): 'deploy') conductor.warn_about_unsafe_shred_parameters(cfg.CONF) self.assertTrue(log_mock.warning.called) + + @mock.patch.object(conductor, 'LOG', autospec=True) + def test_warn_about_inconsistent_kernel_ramdisk_no_config(self, log_mock): + # Test when all config dicts are empty (default state) + conductor.warn_about_inconsistent_kernel_ramdisk(cfg.CONF) + self.assertFalse(log_mock.warning.called) + + @mock.patch.object(conductor, 'LOG', autospec=True) + def test_warn_about_inconsistent_kernel_ramdisk_consistent(self, log_mock): + # Test when kernel and ramdisk configs have matching architectures + cfg.CONF.set_override('deploy_kernel_by_arch', + {'x86_64': 'kernel1', 'aarch64': 'kernel2'}, + 'conductor') + cfg.CONF.set_override('deploy_ramdisk_by_arch', + {'x86_64': 'ramdisk1', 'aarch64': 'ramdisk2'}, + 'conductor') + cfg.CONF.set_override('rescue_kernel_by_arch', + {'x86_64': 'rkernel1'}, + 'conductor') + cfg.CONF.set_override('rescue_ramdisk_by_arch', + {'x86_64': 'rramdisk1'}, + 'conductor') + conductor.warn_about_inconsistent_kernel_ramdisk(cfg.CONF) + self.assertFalse(log_mock.warning.called) + + @mock.patch.object(conductor, 'LOG', autospec=True) + def test_warn_about_inconsistent_kernel_ramdisk_kernel_only(self, + log_mock): + # Test when kernel has architectures that ramdisk doesn't + cfg.CONF.set_override('deploy_kernel_by_arch', + {'x86_64': 'kernel1', 'aarch64': 'kernel2'}, + 'conductor') + cfg.CONF.set_override('deploy_ramdisk_by_arch', + {'x86_64': 'ramdisk1'}, + 'conductor') + conductor.warn_about_inconsistent_kernel_ramdisk(cfg.CONF) + self.assertEqual(1, log_mock.warning.call_count) + warning_call = log_mock.warning.call_args[0] + # Check the warning message format and arguments + self.assertIn('[conductor]%s', warning_call[0]) + self.assertEqual('deploy_kernel_by_arch', warning_call[1]) + self.assertEqual('aarch64', warning_call[2]) + self.assertEqual('deploy_ramdisk_by_arch', warning_call[3]) + self.assertEqual('provisioning', warning_call[4]) + + @mock.patch.object(conductor, 'LOG', autospec=True) + def test_warn_about_inconsistent_kernel_ramdisk_ramdisk_only(self, + log_mock): + # Test when ramdisk has architectures that kernel doesn't + cfg.CONF.set_override('rescue_kernel_by_arch', + {'x86_64': 'kernel1'}, + 'conductor') + cfg.CONF.set_override('rescue_ramdisk_by_arch', + {'x86_64': 'ramdisk1', 'ppc64le': 'ramdisk2'}, + 'conductor') + conductor.warn_about_inconsistent_kernel_ramdisk(cfg.CONF) + self.assertEqual(1, log_mock.warning.call_count) + warning_call = log_mock.warning.call_args[0] + # Check the warning message format and arguments + self.assertIn('[conductor]%s', warning_call[0]) + self.assertEqual('rescue_ramdisk_by_arch', warning_call[1]) + self.assertEqual('ppc64le', warning_call[2]) + self.assertEqual('rescue_kernel_by_arch', warning_call[3]) + self.assertEqual('rescue', warning_call[4]) + + @mock.patch.object(conductor, 'LOG', autospec=True) + def test_warn_about_inconsistent_kernel_ramdisk_both_bad(self, + log_mock): + # Test when both kernel and ramdisk have mismatched architectures + cfg.CONF.set_override('deploy_kernel_by_arch', + {'x86_64': 'kernel1', 'aarch64': 'kernel2'}, + 'conductor') + cfg.CONF.set_override('deploy_ramdisk_by_arch', + {'x86_64': 'ramdisk1', 'ppc64le': 'ramdisk2'}, + 'conductor') + conductor.warn_about_inconsistent_kernel_ramdisk(cfg.CONF) + self.assertEqual(2, log_mock.warning.call_count) + # Check that both warnings were issued with correct parameters + warning_args = [call[0] for call in log_mock.warning.call_args_list] + # First warning about kernel_only (aarch64) + self.assertEqual('deploy_kernel_by_arch', warning_args[0][1]) + self.assertEqual('aarch64', warning_args[0][2]) + # Second warning about ramdisk_only (ppc64le) + self.assertEqual('deploy_ramdisk_by_arch', warning_args[1][1]) + self.assertEqual('ppc64le', warning_args[1][2]) + + @mock.patch.object(conductor, 'LOG', autospec=True) + def test_warn_about_inconsistent_kernel_ramdisk_multiple_configs(self, + log_mock): + # Test multiple config pairs with inconsistencies + cfg.CONF.set_override('deploy_kernel_by_arch', + {'x86_64': 'kernel1', 'aarch64': 'kernel2'}, + 'conductor') + cfg.CONF.set_override('deploy_ramdisk_by_arch', + {'x86_64': 'ramdisk1'}, + 'conductor') + cfg.CONF.set_override('rescue_kernel_by_arch', + {'ppc64le': 'rkernel1'}, + 'conductor') + cfg.CONF.set_override('rescue_ramdisk_by_arch', + {'ppc64le': 'rramdisk1', 's390x': 'rramdisk2'}, + 'conductor') + conductor.warn_about_inconsistent_kernel_ramdisk(cfg.CONF) + self.assertEqual(2, log_mock.warning.call_count) + # Verify both deploy and rescue warnings were issued + warning_args = [call[0] for call in log_mock.warning.call_args_list] + # Check operation names in warnings + operations = [args[4] for args in warning_args] + self.assertIn('provisioning', operations) + self.assertIn('rescue', operations) diff --git a/ironic/tests/unit/common/test_pxe_utils.py b/ironic/tests/unit/common/test_pxe_utils.py index cb4866a6fc..b1a78322ac 100644 --- a/ironic/tests/unit/common/test_pxe_utils.py +++ b/ironic/tests/unit/common/test_pxe_utils.py @@ -1363,18 +1363,21 @@ class PXEInterfacesTestCase(db_base.DbTestCase): def test_parse_driver_info_mixed_source_deploy(self): self.config(deploy_kernel='file:///image', deploy_ramdisk='file:///image', + error_on_ramdisk_config_inconsistency=True, group='conductor') self._test_parse_driver_info_missing_ramdisk() def test_parse_driver_info_mixed_source_deploy_by_arch(self): self.config(deploy_kernel_by_arch={'x86_64': 'file:///image'}, deploy_ramdisk_by_arch={'x86_64': 'file:///image'}, + error_on_ramdisk_config_inconsistency=True, group='conductor') self._test_parse_driver_info_missing_ramdisk() def test_parse_driver_info_mixed_source_rescue(self): self.config(rescue_kernel='file:///image', rescue_ramdisk='file:///image', + error_on_ramdisk_config_inconsistency=True, group='conductor') self._test_parse_driver_info_missing_ramdisk(mode='rescue') diff --git a/ironic/tests/unit/drivers/modules/redfish/test_boot.py b/ironic/tests/unit/drivers/modules/redfish/test_boot.py index bb1330cf88..cef06aa13f 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_boot.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_boot.py @@ -268,36 +268,40 @@ class RedfishVirtualMediaBootTestCase(db_base.DbTestCase): self._test_parse_driver_info_choose_by_arch(mode='rescue') def _test_parse_driver_info_choose_by_hierarchy(self, mode='deploy', - ramdisk_missing=False): + set_dinfo=False): with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: - if mode == 'rescue': - task.node.provision_state = states.RESCUING - ramdisk = 'glance://def_%s_ramdisk_uuid' % mode kernel = 'glance://def_%s_kernel_uuid' % mode + dinfo_ramdisk = 'glance://di_%s_ramdisk_uuid' % mode + dinfo_kernel = 'glance://di_%s_kernel_uuid' % mode ramdisk_by_arch = 'glance://%s_ramdisk_by_arch_uuid' % mode kernel_by_arch = 'glance://%s_kernel_by_arch_uuid' % mode - config = { - '%s_kernel_by_arch' % mode: { - 'x86_64': kernel_by_arch}, - '%s_ramdisk' % mode: ramdisk, - '%s_kernel' % mode: kernel - } - if not ramdisk_missing: - config['%s_ramdisk_by_arch' % mode] = { - 'x86_64': ramdisk_by_arch} + if mode == 'rescue': + task.node.provision_state = states.RESCUING + + if set_dinfo: expected = { - '%s_ramdisk' % mode: ramdisk_by_arch, - '%s_kernel' % mode: kernel_by_arch + '%s_kernel' % mode: dinfo_kernel, + '%s_ramdisk' % mode: dinfo_ramdisk } + task.node.driver_info.update(expected) else: expected = { '%s_ramdisk' % mode: ramdisk, '%s_kernel' % mode: kernel } + config = { + '%s_kernel_by_arch' % mode: { + 'x86_64': kernel_by_arch}, + '%s_ramdisk_by_arch' % mode: { + 'x86_64': ramdisk_by_arch}, + '%s_kernel' % mode: kernel, + '%s_ramdisk' % mode: ramdisk, + } + self.config(group='conductor', **config) image_info = redfish_boot._parse_driver_info(task.node) @@ -306,17 +310,41 @@ class RedfishVirtualMediaBootTestCase(db_base.DbTestCase): self.assertEqual(value, image_info[key]) def test_parse_driver_info_choose_by_hierarchy_deploy(self): - self._test_parse_driver_info_choose_by_hierarchy() + """Test to ensure driver_info will override by_arch and defaults.""" + self._test_parse_driver_info_choose_by_hierarchy(set_dinfo=True) def test_parse_driver_info_choose_by_hierarchy_rescue(self): - self._test_parse_driver_info_choose_by_hierarchy(mode='rescue') + """Test to ensure driver_info will override in rescue.""" + self._test_parse_driver_info_choose_by_hierarchy(mode='rescue', + set_dinfo=True) - def test_parse_driver_info_choose_by_hierarchy_missing_param_deploy(self): - self._test_parse_driver_info_choose_by_hierarchy(ramdisk_missing=True) + def test_parse_driver_info_mismatched_override(self): + """This test validates partial rd/kernel overrides are ignored""" + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: - def test_parse_driver_info_choose_by_hierarchy_missing_param_rescue(self): - self._test_parse_driver_info_choose_by_hierarchy( - mode='rescue', ramdisk_missing=True) + task.node.driver_info.update( + {'deploy_kernel': 'glance://ignoreme'} + ) + + expected = { + 'deploy_kernel': 'glance://defaultkernel', + 'deploy_ramdisk': 'glance://defaultramdisk' + } + + config = { + 'deploy_ramdisk_by_arch': { + 'x86_64': 'glance://ignoremetoo'}, + 'deploy_kernel': 'glance://defaultkernel', + 'deploy_ramdisk': 'glance://defaultramdisk' + } + + self.config(group='conductor', **config) + + image_info = redfish_boot._parse_driver_info(task.node) + + for key, value in expected.items(): + self.assertEqual(value, image_info[key]) def test_parse_deploy_info(self): with task_manager.acquire(self.context, self.node.uuid, @@ -1952,35 +1980,42 @@ class RedfishHTTPBootTestCase(db_base.DbTestCase): self._test_parse_driver_info_choose_by_arch(mode='rescue') def _test_parse_driver_info_choose_by_hierarchy(self, mode='deploy', - ramdisk_missing=False): + set_dinfo=False): with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: + kname = '%s_kernel' % mode + rname = '%s_ramdisk' % mode + ramdisk = 'glance://def_%s_ramdisk_uuid' % mode + kernel = 'glance://def_%s_kernel_uuid' % mode + dinfo_ramdisk = 'glance://di_%s_ramdisk_uuid' % mode + dinfo_kernel = 'glance://di_%s_kernel_uuid' % mode + ramdisk_by_arch = 'glance://%s_ramdisk_by_arch_uuid' % mode + kernel_by_arch = 'glance://%s_kernel_by_arch_uuid' % mode + if mode == 'rescue': task.node.provision_state = states.RESCUING - ramdisk = 'glance://def_%s_ramdisk_uuid' % mode - kernel = 'glance://def_%s_kernel_uuid' % mode - ramdisk_by_arch = 'glance://%s_ramdisk_by_arch_uuid' % mode - kernel_by_arch = 'glance://%s_kernel_by_arch_uuid' % mode + if set_dinfo: + expected = { + kname: dinfo_kernel, + rname: dinfo_ramdisk + } + task.node.driver_info.update(expected) + else: + expected = { + rname: ramdisk, + kname: kernel + } config = { '%s_kernel_by_arch' % mode: { 'x86_64': kernel_by_arch}, - '%s_ramdisk' % mode: ramdisk, - '%s_kernel' % mode: kernel + '%s_ramdisk_by_arch' % mode: { + 'x86_64': ramdisk_by_arch + }, + rname: ramdisk, + kname: kernel } - if not ramdisk_missing: - config['%s_ramdisk_by_arch' % mode] = { - 'x86_64': ramdisk_by_arch} - expected = { - '%s_ramdisk' % mode: ramdisk_by_arch, - '%s_kernel' % mode: kernel_by_arch - } - else: - expected = { - '%s_ramdisk' % mode: ramdisk, - '%s_kernel' % mode: kernel - } self.config(group='conductor', **config) @@ -1990,17 +2025,11 @@ class RedfishHTTPBootTestCase(db_base.DbTestCase): self.assertEqual(value, image_info[key]) def test_parse_driver_info_choose_by_hierarchy_deploy(self): - self._test_parse_driver_info_choose_by_hierarchy() + self._test_parse_driver_info_choose_by_hierarchy(set_dinfo=True) def test_parse_driver_info_choose_by_hierarchy_rescue(self): - self._test_parse_driver_info_choose_by_hierarchy(mode='rescue') - - def test_parse_driver_info_choose_by_hierarchy_missing_param_deploy(self): - self._test_parse_driver_info_choose_by_hierarchy(ramdisk_missing=True) - - def test_parse_driver_info_choose_by_hierarchy_missing_param_rescue(self): - self._test_parse_driver_info_choose_by_hierarchy( - mode='rescue', ramdisk_missing=True) + self._test_parse_driver_info_choose_by_hierarchy(mode='rescue', + set_dinfo=True) def test_parse_deploy_info(self): with task_manager.acquire(self.context, self.node.uuid, diff --git a/ironic/tests/unit/drivers/test_utils.py b/ironic/tests/unit/drivers/test_utils.py index 9b68221c16..2d7ac48f22 100644 --- a/ironic/tests/unit/drivers/test_utils.py +++ b/ironic/tests/unit/drivers/test_utils.py @@ -394,6 +394,127 @@ class UtilsRamdiskLogsTestCase(tests_base.TestCase): mock_logs_name.assert_called_once_with(self.node, label=None) +class GetAgentKernelRamdiskTestCase(db_base.DbTestCase): + + def setUp(self): + super(GetAgentKernelRamdiskTestCase, self).setUp() + self.node = obj_utils.create_test_node(self.context) + + def test_get_agent_kernel_ramdisk_from_node_driver_info(self): + """Test getting kernel/ramdisk from node driver_info.""" + self.node.driver_info = { + 'deploy_kernel': 'http://example.com/kernel', + 'deploy_ramdisk': 'http://example.com/ramdisk' + } + result = driver_utils.get_agent_kernel_ramdisk(self.node) + expected = { + 'deploy_kernel': 'http://example.com/kernel', + 'deploy_ramdisk': 'http://example.com/ramdisk' + } + self.assertEqual(expected, result) + + def test_get_agent_kernel_ramdisk_from_by_arch(self): + """Test kernel/ramdisk from by_arch config when no driver_info.""" + self.config(deploy_kernel_by_arch={'x86_64': 'kernel_by_arch'}, + group='conductor') + self.config(deploy_ramdisk_by_arch={'x86_64': 'ramdisk_by_arch'}, + group='conductor') + self.node.properties = {'cpu_arch': 'x86_64'} + result = driver_utils.get_agent_kernel_ramdisk(self.node) + expected = { + 'deploy_kernel': 'kernel_by_arch', + 'deploy_ramdisk': 'ramdisk_by_arch' + } + self.assertEqual(expected, result) + + def test_get_agent_kernel_ramdisk_from_global_config(self): + """Test getting kernel/ramdisk from global config.""" + self.config(deploy_kernel='global_kernel', group='conductor') + self.config(deploy_ramdisk='global_ramdisk', group='conductor') + result = driver_utils.get_agent_kernel_ramdisk(self.node) + expected = { + 'deploy_kernel': 'global_kernel', + 'deploy_ramdisk': 'global_ramdisk' + } + self.assertEqual(expected, result) + + def test_get_agent_kernel_ramdisk_inconsistent_node_raises_exception(self): + """Test that inconsistent node config raises exception enabled.""" + self.config(error_on_ramdisk_config_inconsistency=True, + group='conductor') + self.node.driver_info = {'deploy_kernel': 'kernel_only'} + + self.assertRaises(exception.MissingParameterValue, + driver_utils.get_agent_kernel_ramdisk, self.node) + + def test_get_agent_kernel_ramdisk_inconsistent_node_logs_warning(self): + """Test that inconsistent node config logs warning when disabled.""" + self.config(error_on_ramdisk_config_inconsistency=False, + group='conductor') + self.config(deploy_kernel='global_kernel', group='conductor') + self.config(deploy_ramdisk='global_ramdisk', group='conductor') + self.node.driver_info = {'deploy_kernel': 'kernel_only'} + + with mock.patch.object(driver_utils.LOG, 'warning', + autospec=True) as mock_log: + result = driver_utils.get_agent_kernel_ramdisk(self.node) + # fall back to global config since node config is inconsistent + expected = { + 'deploy_kernel': 'global_kernel', + 'deploy_ramdisk': 'global_ramdisk' + } + self.assertEqual(expected, result) + mock_log.assert_called_once() + + def test_get_agent_kernel_ramdisk_inconsistent_by_arch_raises(self): + """Test that inconsistent by_arch config raises when enabled.""" + self.config(error_on_ramdisk_config_inconsistency=True, + group='conductor') + self.config(deploy_kernel_by_arch={'x86_64': 'kernel_by_arch'}, + group='conductor') + # Missing ramdisk_by_arch for x86_64 + self.node.properties = {'cpu_arch': 'x86_64'} + + self.assertRaises(exception.MissingParameterValue, + driver_utils.get_agent_kernel_ramdisk, self.node) + + def test_get_agent_kernel_ramdisk_inconsistent_by_arch_logs_warning(self): + """Test that inconsistent by_arch config logs warning when disabled.""" + self.config(error_on_ramdisk_config_inconsistency=False, + group='conductor') + self.config(deploy_kernel='fallback_kernel', group='conductor') + self.config(deploy_ramdisk='fallback_ramdisk', group='conductor') + self.config(deploy_kernel_by_arch={'x86_64': 'kernel_by_arch'}, + group='conductor') + # Missing ramdisk_by_arch for x86_64 + self.node.properties = {'cpu_arch': 'x86_64'} + + with mock.patch.object(driver_utils.LOG, 'warning', + autospec=True) as mock_log: + result = driver_utils.get_agent_kernel_ramdisk(self.node) + # Should fall back to global config + expected = { + 'deploy_kernel': 'fallback_kernel', + 'deploy_ramdisk': 'fallback_ramdisk' + } + self.assertEqual(expected, result) + mock_log.assert_called_once() + + def test_get_agent_kernel_ramdisk_rescue_mode(self): + """Test getting rescue kernel/ramdisk works correctly.""" + self.node.driver_info = { + 'rescue_kernel': 'http://example.com/rescue_kernel', + 'rescue_ramdisk': 'http://example.com/rescue_ramdisk' + } + result = driver_utils.get_agent_kernel_ramdisk(self.node, + mode='rescue') + expected = { + 'rescue_kernel': 'http://example.com/rescue_kernel', + 'rescue_ramdisk': 'http://example.com/rescue_ramdisk' + } + self.assertEqual(expected, result) + + class MixinVendorInterfaceTestCase(db_base.DbTestCase): def setUp(self): diff --git a/releasenotes/notes/consistent-ramdisk-configs-6a37ef69bf73cd2e.yaml b/releasenotes/notes/consistent-ramdisk-configs-6a37ef69bf73cd2e.yaml new file mode 100644 index 0000000000..3bc376c39f --- /dev/null +++ b/releasenotes/notes/consistent-ramdisk-configs-6a37ef69bf73cd2e.yaml @@ -0,0 +1,38 @@ +--- +features: + - | + Added a new configuration option + ``[conductor]error_on_ramdisk_config_inconsistency`` to control how Ironic + handles inconsistent kernel and ramdisk configurations. + + When ``error_on_ramdisk_config_inconsistency`` is set to ``True``, Ironic + will raise a ``MissingParameterValue`` exception when it encounters + inconsistent kernel/ramdisk configurations, such as: + + * A node's ``driver_info`` containing only ``deploy_kernel`` but missing + ``deploy_ramdisk`` (or vice versa) + * The ``[conductor]deploy_kernel_by_arch`` configuration having entries for + architectures that are missing from ``[conductor]deploy_ramdisk_by_arch`` + (or vice versa) + * Similar inconsistencies with rescue kernel/ramdisk configurations + + When set to ``False`` (the default), Ironic will log warning messages about + these inconsistencies but continue operation by falling back to global + configuration values when possible. + + In a future Ironic release, the default of this value will be changed to + True to enforce strict validation. + +upgrade: + - | + The new ``[conductor]error_on_ramdisk_config_inconsistency`` configuration + option defaults to ``False`` to maintain backward compatibility. Existing + deployments with inconsistent kernel/ramdisk configurations will continue + to work as before, with warning messages logged to help identify potential + configuration issues. + + Operators who want strict validation of kernel/ramdisk configurations can + set this option to ``True``, but should first review their configurations + to ensure consistency across all node ``driver_info`` entries and + architecture-specific configuration options. Ironic expects to enable + strict validation of these values in a future release. \ No newline at end of file