ci: Expand the multinode job -- remove tinyipa usage

This change moves multinode jobs to be leveraged across multiple
"compute" nodes with an increased amount of memory, which increases
the overall test resources available and limits controller node
hot spotting for deployment operations.

This effectively chagnes multinode jobs from being a single
compute node with a single controller node, to two compute
nodes and a single controller node. The controller node's
hosted virtual machines is also dialed back.

This was done to eliminate usage of tinyipa in favor of a more
realistic Centos based IPA ramdisk, and also removes fallback
logic to use tinyipa on more limited resource nodes.

Change-Id: Ib52f7039072901ce72ac96e660d35a10cca59737
Signed-off-by: Julia Kreger <juliaashleykreger@gmail.com>
This commit is contained in:
Julia Kreger
2025-05-16 09:34:41 -07:00
parent 0b8931835a
commit 72c208f765
3 changed files with 147 additions and 61 deletions

View File

@@ -128,6 +128,7 @@ fi
# IRONIC_SHARD_1_NAME + IRONIC_SHARDS will also configure n-cpu
IRONIC_SHARDS=${IRONIC_SHARDS:-0}
IRONIC_SHARD_1_NAME=${IRONIC_SHARD_1_NAME:-ds_shard_1}
IRONIC_NODE_SHARD_NAME=${IRONIC_NODE_SHARD_NAME:-ds_shard_1}
# The file is composed of multiple lines, each line includes fields
# separated by white space, in the format:
@@ -313,19 +314,6 @@ if [[ ! "$IRONIC_RAMDISK_TYPE" =~ $IRONIC_SUPPORTED_RAMDISK_TYPES_RE ]]; then
die $LINENO "Unrecognized IRONIC_RAMDISK_TYPE: $IRONIC_RAMDISK_TYPE. Expected 'tinyipa' or 'dib'"
fi
# NOTE(TheJulia): If we ever run any arm64, we will need to consider doing
# the same. Nested virt is not a thing there.
# Prevent a case that will likely result in a failure.
# NOTE(hjensas): tinyipa does not support dhcpv6-stateful
if [[ $IRONIC_HW_ARCH != "aarch64" ]] && [[ $IRONIC_IPV6_ADDRESS_MODE != "dhcpv6-stateful" ]]; then
if [[ "$hostdomain" =~ "rax" ]] || [[ "$hostdomain" =~ "iweb" ]] || ! $(grep -q -E 'vmx|svm' /proc/cpuinfo) ; then
if [[ "$IRONIC_RAMDISK_TYPE" == "dib" ]]; then
echo "** WARNING ** - DIB based IPA images have been defined, however we are running devstack on an environment which does not support nested VMs. Due to virtualization constraints, we are automatically falling back to TinyIPA to ensure CI job passage."
IRONIC_RAMDISK_TYPE="tinyipa"
fi
fi
fi
# Which deploy driver to use - valid choices right now
# are ``ipmi``, ``snmp`` and ``redfish``.
#
@@ -522,6 +510,25 @@ IRONIC_PROVISION_PROVIDER_NETWORK_TYPE=${IRONIC_PROVISION_PROVIDER_NETWORK_TYPE:
# This is only used if IRONIC_PROVISION_NETWORK_NAME has been set.
IRONIC_PROVISION_SEGMENTATION_ID=${IRONIC_PROVISION_SEGMENTATION_ID:-}
# NOTE(TheJulia): Some silly logic to help make multinode work because conductors
# refer to themselves and in this case we're running multiple conductors.
SUBNODE_FINAL="2"
# For nodes, there is a relative order, so when we have a subnode, we use
# this value to try and track which would be appropriate.
SUBNODE_ID=""
if [ -f /etc/devstack-host ]; then
# NOTE(TheJulia): this needs to match the Zuul Nodeset configuration
# of hostnames, and should then be mapped out with as many nodes
# required.
if $(grep -qi compute0 /etc/devstack-host); then
SUBNODE_FINAL="2"
SUBNODE_ID="1"
elif $(grep -qi compute1 /etc/devstack-host); then
SUBNODE_FINAL="3"
SUBNODE_ID="2"
fi
fi
if [[ "$IRONIC_IP_VERSION" != '6' ]]; then
# NOTE(TheJulia): Lets not try and support mixed mode since the conductor
# can't support mixed mode operation. We are either IPv4 OR IPv6.
@@ -535,7 +542,7 @@ if [[ "$IRONIC_IP_VERSION" != '6' ]]; then
# IRONIC_PROVISION_SUBNET_GATEWAY - is configured on primary node.
# Ironic provision subnet gateway.
IRONIC_PROVISION_SUBNET_GATEWAY=${IRONIC_PROVISION_SUBNET_GATEWAY:-'10.0.5.1'}
IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'10.0.5.2'}
IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'10.0.5.'$SUBNODE_FINAL}
# Ironic provision subnet prefix
# Example: IRONIC_PROVISION_SUBNET_PREFIX=10.0.5.0/24
@@ -546,7 +553,7 @@ else
# HOST_IPV6 cannot be used for v6 testing.
IRONIC_HOST_IPV6='fc00::1'
IRONIC_PROVISION_SUBNET_GATEWAY=${IRONIC_PROVISION_SUBNET_GATEWAY:-'fc01::1'}
IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'fc01::2'}
IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'fc01::'$SUBNODE_FINAL}
IRONIC_PROVISION_SUBNET_PREFIX=${IRONIC_PROVISION_SUBNET_PREFIX:-'fc01::/64'}
IRONIC_TFTPSERVER_IP=$IRONIC_HOST_IPV6
fi
@@ -3290,7 +3297,11 @@ function enroll_nodes {
local switch_id
if [[ "${IRONIC_NETWORK_SIMULATOR:-ovs}" == "ovs" ]]; then
switch_id=$(echo $hardware_info |awk '{print $4}')
switch_info=$(echo $hardware_info |awk '{print $5}')
if [[ "$SUBNODE_ID" == "" ]]; then
switch_info=$(echo $hardware_info |awk '{print $5}')
else
switch_info="sub$SUBNODE_ID$(echo $hardware_info |awk '{print $5}')"
fi
else
switch_id="00:00:00:00:00:00"
switch_info=${IRONIC_NETWORK_SIMULATOR:-brbm}
@@ -3404,7 +3415,7 @@ function enroll_nodes {
fi
if [[ $IRONIC_SHARDS == "1" ]]; then
openstack --os-cloud devstack-system-admin baremetal node set $node_id --shard $IRONIC_SHARD_1_NAME
openstack --os-cloud devstack-system-admin baremetal node set $node_id --shard $IRONIC_NODE_SHARD_NAME
fi
# In case we using portgroups, we should API version that support them.
@@ -3510,12 +3521,28 @@ function enroll_nodes {
# that was created will fail the service_check in the end of the deployment
_clean_ncpu_failure
start_nova_compute
else
# NOTE(vsaienko) we enrolling IRONIC_VM_COUNT on each node. So on subnode
# we expect to have 2 x total_cpus
total_nodes=$(( total_nodes * 2 ))
fi
wait_for_nova_resources $total_nodes
if [[ "$IRONIC_SHARD_1_NAME" != "$IRONIC_NODE_SHARD_NAME" ]]; then
# If we're running in a disjointed shard configuration, we may see
# only one node, the other compute services are the ones in use,
# which means our total local count is invalid and can't be used.
# So, we just make sure we can see *one* node successfully.
if [[ "$HOST_TOPOLOGY_ROLE" == "subnode" ]]; then
# If we're on a subnode, just wait for at least one node.
# If we're on the controller, we won't see any nodes.
wait_for_nova_resources $total_nodes
fi
# TODO(TheJulia): We should check the primary/controller node
# to ensure that nova-compute doesn't see the node configured
# for it if we are so configured. We would do that as the else
# of the above logic, which *should* never see the ironic baremetal
# node entry by default.
else
# In this case, we're looking to match all the nodes on this host,
# itself. Meaning the tests focus on the use of this node and we
# need to ensure we have all the nodes.
wait_for_nova_resources $total_nodes
fi
fi
}
@@ -4182,9 +4209,6 @@ function ironic_configure_tempest {
if [[ -n "$TEMPEST_BAREMETAL_MAX_MICROVERSION" ]]; then
iniset $TEMPEST_CONFIG baremetal max_microversion $TEMPEST_BAREMETAL_MAX_MICROVERSION
fi
if [[ -n "$IRONIC_VM_COUNT" ]]; then
iniset $TEMPEST_CONFIG baremetal available_nodes $IRONIC_VM_COUNT
fi
if [[ -n "$IRONIC_PING_TIMEOUT" ]]; then
iniset $TEMPEST_CONFIG validation ping_timeout $IRONIC_PING_TIMEOUT
fi
@@ -4346,7 +4370,7 @@ function ironic_configure_tempest {
function get_ironic_node_prefix {
local node_prefix="node"
if [[ "$HOST_TOPOLOGY_ROLE" == "subnode" ]]; then
node_prefix="$HOST_TOPOLOGY_ROLE"
node_prefix="$(cat /etc/devstack-host|sed s/compute/c/)node"
fi
echo $node_prefix
}

View File

@@ -10,6 +10,7 @@
sudo cp ~root/.ssh/id_rsa.pub ~root/.ssh/id_rsa ~stack/.ssh
sudo chmod 700 ~stack/.ssh
sudo chown -R stack ~stack
echo {{inventory_hostname}} | sudo tee /etc/devstack-host
executable: /bin/bash
roles:
- multi-node-bridge

View File

@@ -13,6 +13,42 @@
nodes:
- controller
# NOTE(TheJulia): Based upon openstack-two-node-noble, but modeled
# for Ironic's multinode jobs.
- nodeset:
name: ironic-three-node-noble
nodes:
- name: controller
label: ubuntu-noble
- name: compute0
label: ubuntu-noble
- name: compute1
label: ubuntu-noble
groups:
# Node where tests are executed and test results collected
- name: tempest
nodes:
- controller
# Nodes running the compute service
- name: compute
nodes:
- controller
- compute0
- compute1
# Nodes that are not the controller
- name: subnode
nodes:
- compute0
- compute1
# Switch node for multinode networking setup
- name: switch
nodes:
- controller
# Peer nodes for multinode networking setup
- name: peers
nodes:
- compute0
- compute1
- job:
name: ironic-base
@@ -684,17 +720,18 @@
# tests can also exercised as part of CI.
- job:
name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode
description: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode
name: ironic-tempest-ipa-wholedisk-direct-multinode
description: ironic-tempest-ipa-wholedisk-direct-multinode
parent: tempest-multinode-full-base
nodeset: openstack-two-node-noble
nodeset: ironic-three-node-noble
pre-run: playbooks/ci-workarounds/pre.yaml
post-run: playbooks/ci-workarounds/get_extra_logging.yaml
required-projects:
- opendev.org/openstack/ironic
- opendev.org/openstack/ironic-python-agent
- opendev.org/openstack/ironic-python-agent-builder
- opendev.org/openstack/ironic-tempest-plugin
- opendev.org/openstack/virtualbmc
- opendev.org/openstack/sushy-tools
- opendev.org/openstack/networking-generic-switch
irrelevant-files:
- ^.*\.rst$
@@ -728,27 +765,32 @@
HOST_TOPOLOGY_ROLE: primary
INSTALL_TEMPEST: False # Don't install a tempest package globally
IRONIC_AUTOMATED_CLEAN_ENABLED: False
HOST_TOPOLOGY_SUBNODES: "{{ hostvars['compute1']['nodepool']['public_ipv4'] }}"
# NOTE(TheJulia): We *MUST* list all of the hosts in this list,
# this drives the configuration of switch references on the controller
# node. Furthermore, this job will fail if there is not a public IPv4
# address available for the SSH access to manage port configurations.
HOST_TOPOLOGY_SUBNODES: "{{ hostvars['compute0']['nodepool']['public_ipv4'] }} {{ hostvars['compute1']['nodepool']['public_ipv4'] }}"
IRONIC_BAREMETAL_BASIC_OPS: True
IRONIC_BUILD_DEPLOY_RAMDISK: False
IRONIC_CALLBACK_TIMEOUT: 600
IRONIC_DEPLOY_DRIVER: ipmi
IRONIC_DEPLOY_DRIVER: redfish
IRONIC_ENABLED_BOOT_INTERFACES: "redfish-virtual-media"
IRONIC_ENABLED_HARDWARE_TYPES: redfish
IRONIC_ENABLED_MANAGEMENT_INTERFACES: redfish
IRONIC_REDFISH_EMULATOR_FEATURE_SET: vmedia
IRONIC_ENABLED_NETWORK_INTERFACES: flat,neutron
IRONIC_INSPECTOR_BUILD_RAMDISK: False
IRONIC_NETWORK_INTERFACE: neutron
IRONIC_PROVISION_NETWORK_NAME: ironic-provision
IRONIC_PROVISION_SUBNET_GATEWAY: 10.0.5.1
IRONIC_PROVISION_SUBNET_PREFIX: 10.0.5.0/24
IRONIC_RAMDISK_TYPE: tinyipa
IRONIC_TEMPEST_BUILD_TIMEOUT: 600
IRONIC_TEMPEST_WHOLE_DISK_IMAGE: True
IRONIC_USE_LINK_LOCAL: True
IRONIC_VM_COUNT: 3
IRONIC_VM_COUNT: 1
IRONIC_VM_EPHEMERAL_DISK: 0
IRONIC_VM_LOG_DIR: '{{ devstack_base_dir }}/ironic-bm-logs'
IRONIC_VM_SPECS_RAM: 1024
IRONIC_VM_SPECS_DISK: 4
IRONIC_VM_SPECS_CPU: 1
IRONIC_VM_SPECS_DISK: 10
OVS_BRIDGE_MAPPINGS: 'mynetwork:brbm,public:br-infra'
OVS_PHYSICAL_BRIDGE: brbm
PHYSICAL_NETWORK: mynetwork
@@ -809,8 +851,23 @@
ovn-northd: False
q-ovn-metadata-agent: False
rabbit: True
tls-proxy: False
group-vars:
# Turns out, devstack looks for the subnode group name.
subnode:
devstack_services:
atop: True
c-api: False
c-bak: False
c-sch: False
c-vol: False
cinder: False
q-agt: True
ovn-controller: False
ovn-northd: False
q-ovn-metadata-agent: False
n-cpu: True
tls-proxy: False
devstack_localrc:
ENABLE_TENANT_TUNNELS: False
ENABLE_TENANT_VLANS: True
@@ -819,61 +876,65 @@
HOST_TOPOLOGY_ROLE: subnode
IRONIC_AUTOMATED_CLEAN_ENABLED: False
IRONIC_BAREMETAL_BASIC_OPS: True
IRONIC_DEPLOY_DRIVER: ipmi
IRONIC_DEPLOY_DRIVER: redfish
IRONIC_ENABLED_BOOT_INTERFACES: "redfish-virtual-media"
IRONIC_ENABLED_HARDWARE_TYPES: redfish
IRONIC_ENABLED_MANAGEMENT_INTERFACES: redfish
IRONIC_REDFISH_EMULATOR_FEATURE_SET: vmedia
IRONIC_ENABLED_NETWORK_INTERFACES: flat,neutron
IRONIC_NETWORK_INTERFACE: neutron
IRONIC_PROVISION_NETWORK_NAME: ironic-provision
IRONIC_RAMDISK_TYPE: tinyipa
IRONIC_USE_LINK_LOCAL: True
IRONIC_VM_COUNT: 3
IRONIC_VM_EPHEMERAL_DISK: 0
IRONIC_VM_LOG_DIR: '{{ devstack_base_dir }}/ironic-bm-logs'
IRONIC_VM_NETWORK_BRIDGE: sub1brbm
IRONIC_VM_SPECS_RAM: 1024
IRONIC_VM_SPECS_DISK: 4
IRONIC_VM_SPECS_CPU: 1
OVS_BRIDGE_MAPPINGS: 'mynetwork:sub1brbm,public:br-infra'
OVS_PHYSICAL_BRIDGE: sub1brbm
PHYSICAL_NETWORK: mynetwork
Q_AGENT: openvswitch
Q_ML2_TENANT_NETWORK_TYPE: vlan
VIRT_DRIVER: ironic
PUBLIC_BRIDGE: br-infra
LIBVIRT_STORAGE_POOL_PATH: /opt/libvirt/images
devstack_services:
atop: True
c-api: False
c-bak: False
c-sch: False
c-vol: False
cinder: False
OVS_BRIDGE_MAPPINGS: 'mynetwork:brbm,public:br-infra'
OVS_PHYSICAL_BRIDGE: brbm
- job:
# NOTE(TheJulia): Compatibility job definition, can be removed.
name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode
description: ironic-tempest-ipa-wholedisk-direct-multinode
parent: ironic-tempest-ipa-wholedisk-direct-multinode
- job:
# NOTE(TheJulia): Compatibility job definition, can be removed.
name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode-shard
description: ironic-tempest-ipa-wholedisk-direct-multinode-shard
parent: ironic-tempest-ipa-wholedisk-direct-multinode-shard
q-agt: True
ovn-controller: False
ovn-northd: False
q-ovn-metadata-agent: False
n-cpu: True
- job:
# NOTE(JayF) This job sets up two nova-computes with two different shards
# TODO(JayF) Add a post-run validation to ensure the two n-cpus did not
# see each others' nodes
name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode-shard
description: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode-shard
name: ironic-tempest-ipa-wholedisk-direct-multinode-shard
description: ironic-tempest-ipa-wholedisk-direct-multinode-shard
with automated cleaning enabled.
parent: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode
nodeset: openstack-two-node-noble
parent: ironic-tempest-ipa-wholedisk-direct-multinode
nodeset: ironic-three-node-noble
vars:
tempest_test_regex: "BaremetalBasicOps"
devstack_localrc:
IRONIC_SHARDS: 1
IRONIC_SHARD_1_NAME: "main-node"
IRONIC_AUTOMATED_CLEAN_ENABLED: True
# Let the local services ignore the controller node,
# but focus on managing the nodes on the subnode, since
# we have greater resources on the other nodes.
IRONIC_NODE_SHARD_NAME: "subnode"
group-vars:
subnode:
devstack_localrc:
IRONIC_SHARDS: 1
IRONIC_SHARD_1_NAME: "subnode"
IRONIC_NODE_SHARD_NAME: "main-node"
- job:
name: ironic-tox-unit-with-driver-libs