From 72c208f7653a1edfe44dec8518a87f3af505bc35 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Fri, 16 May 2025 09:34:41 -0700 Subject: [PATCH] ci: Expand the multinode job -- remove tinyipa usage This change moves multinode jobs to be leveraged across multiple "compute" nodes with an increased amount of memory, which increases the overall test resources available and limits controller node hot spotting for deployment operations. This effectively chagnes multinode jobs from being a single compute node with a single controller node, to two compute nodes and a single controller node. The controller node's hosted virtual machines is also dialed back. This was done to eliminate usage of tinyipa in favor of a more realistic Centos based IPA ramdisk, and also removes fallback logic to use tinyipa on more limited resource nodes. Change-Id: Ib52f7039072901ce72ac96e660d35a10cca59737 Signed-off-by: Julia Kreger --- devstack/lib/ironic | 76 +++++++++++------ playbooks/ci-workarounds/pre.yaml | 1 + zuul.d/ironic-jobs.yaml | 131 ++++++++++++++++++++++-------- 3 files changed, 147 insertions(+), 61 deletions(-) diff --git a/devstack/lib/ironic b/devstack/lib/ironic index 050e9e6b41..9c8d0110ad 100644 --- a/devstack/lib/ironic +++ b/devstack/lib/ironic @@ -128,6 +128,7 @@ fi # IRONIC_SHARD_1_NAME + IRONIC_SHARDS will also configure n-cpu IRONIC_SHARDS=${IRONIC_SHARDS:-0} IRONIC_SHARD_1_NAME=${IRONIC_SHARD_1_NAME:-ds_shard_1} +IRONIC_NODE_SHARD_NAME=${IRONIC_NODE_SHARD_NAME:-ds_shard_1} # The file is composed of multiple lines, each line includes fields # separated by white space, in the format: @@ -313,19 +314,6 @@ if [[ ! "$IRONIC_RAMDISK_TYPE" =~ $IRONIC_SUPPORTED_RAMDISK_TYPES_RE ]]; then die $LINENO "Unrecognized IRONIC_RAMDISK_TYPE: $IRONIC_RAMDISK_TYPE. Expected 'tinyipa' or 'dib'" fi -# NOTE(TheJulia): If we ever run any arm64, we will need to consider doing -# the same. Nested virt is not a thing there. -# Prevent a case that will likely result in a failure. -# NOTE(hjensas): tinyipa does not support dhcpv6-stateful -if [[ $IRONIC_HW_ARCH != "aarch64" ]] && [[ $IRONIC_IPV6_ADDRESS_MODE != "dhcpv6-stateful" ]]; then - if [[ "$hostdomain" =~ "rax" ]] || [[ "$hostdomain" =~ "iweb" ]] || ! $(grep -q -E 'vmx|svm' /proc/cpuinfo) ; then - if [[ "$IRONIC_RAMDISK_TYPE" == "dib" ]]; then - echo "** WARNING ** - DIB based IPA images have been defined, however we are running devstack on an environment which does not support nested VMs. Due to virtualization constraints, we are automatically falling back to TinyIPA to ensure CI job passage." - IRONIC_RAMDISK_TYPE="tinyipa" - fi - fi -fi - # Which deploy driver to use - valid choices right now # are ``ipmi``, ``snmp`` and ``redfish``. # @@ -522,6 +510,25 @@ IRONIC_PROVISION_PROVIDER_NETWORK_TYPE=${IRONIC_PROVISION_PROVIDER_NETWORK_TYPE: # This is only used if IRONIC_PROVISION_NETWORK_NAME has been set. IRONIC_PROVISION_SEGMENTATION_ID=${IRONIC_PROVISION_SEGMENTATION_ID:-} +# NOTE(TheJulia): Some silly logic to help make multinode work because conductors +# refer to themselves and in this case we're running multiple conductors. +SUBNODE_FINAL="2" +# For nodes, there is a relative order, so when we have a subnode, we use +# this value to try and track which would be appropriate. +SUBNODE_ID="" +if [ -f /etc/devstack-host ]; then + # NOTE(TheJulia): this needs to match the Zuul Nodeset configuration + # of hostnames, and should then be mapped out with as many nodes + # required. + if $(grep -qi compute0 /etc/devstack-host); then + SUBNODE_FINAL="2" + SUBNODE_ID="1" + elif $(grep -qi compute1 /etc/devstack-host); then + SUBNODE_FINAL="3" + SUBNODE_ID="2" + fi +fi + if [[ "$IRONIC_IP_VERSION" != '6' ]]; then # NOTE(TheJulia): Lets not try and support mixed mode since the conductor # can't support mixed mode operation. We are either IPv4 OR IPv6. @@ -535,7 +542,7 @@ if [[ "$IRONIC_IP_VERSION" != '6' ]]; then # IRONIC_PROVISION_SUBNET_GATEWAY - is configured on primary node. # Ironic provision subnet gateway. IRONIC_PROVISION_SUBNET_GATEWAY=${IRONIC_PROVISION_SUBNET_GATEWAY:-'10.0.5.1'} - IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'10.0.5.2'} + IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'10.0.5.'$SUBNODE_FINAL} # Ironic provision subnet prefix # Example: IRONIC_PROVISION_SUBNET_PREFIX=10.0.5.0/24 @@ -546,7 +553,7 @@ else # HOST_IPV6 cannot be used for v6 testing. IRONIC_HOST_IPV6='fc00::1' IRONIC_PROVISION_SUBNET_GATEWAY=${IRONIC_PROVISION_SUBNET_GATEWAY:-'fc01::1'} - IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'fc01::2'} + IRONIC_PROVISION_SUBNET_SUBNODE_IP=${IRONIC_PROVISION_SUBNET_SUBNODE_IP:-'fc01::'$SUBNODE_FINAL} IRONIC_PROVISION_SUBNET_PREFIX=${IRONIC_PROVISION_SUBNET_PREFIX:-'fc01::/64'} IRONIC_TFTPSERVER_IP=$IRONIC_HOST_IPV6 fi @@ -3290,7 +3297,11 @@ function enroll_nodes { local switch_id if [[ "${IRONIC_NETWORK_SIMULATOR:-ovs}" == "ovs" ]]; then switch_id=$(echo $hardware_info |awk '{print $4}') - switch_info=$(echo $hardware_info |awk '{print $5}') + if [[ "$SUBNODE_ID" == "" ]]; then + switch_info=$(echo $hardware_info |awk '{print $5}') + else + switch_info="sub$SUBNODE_ID$(echo $hardware_info |awk '{print $5}')" + fi else switch_id="00:00:00:00:00:00" switch_info=${IRONIC_NETWORK_SIMULATOR:-brbm} @@ -3404,7 +3415,7 @@ function enroll_nodes { fi if [[ $IRONIC_SHARDS == "1" ]]; then - openstack --os-cloud devstack-system-admin baremetal node set $node_id --shard $IRONIC_SHARD_1_NAME + openstack --os-cloud devstack-system-admin baremetal node set $node_id --shard $IRONIC_NODE_SHARD_NAME fi # In case we using portgroups, we should API version that support them. @@ -3510,12 +3521,28 @@ function enroll_nodes { # that was created will fail the service_check in the end of the deployment _clean_ncpu_failure start_nova_compute - else - # NOTE(vsaienko) we enrolling IRONIC_VM_COUNT on each node. So on subnode - # we expect to have 2 x total_cpus - total_nodes=$(( total_nodes * 2 )) fi - wait_for_nova_resources $total_nodes + if [[ "$IRONIC_SHARD_1_NAME" != "$IRONIC_NODE_SHARD_NAME" ]]; then + # If we're running in a disjointed shard configuration, we may see + # only one node, the other compute services are the ones in use, + # which means our total local count is invalid and can't be used. + # So, we just make sure we can see *one* node successfully. + if [[ "$HOST_TOPOLOGY_ROLE" == "subnode" ]]; then + # If we're on a subnode, just wait for at least one node. + # If we're on the controller, we won't see any nodes. + wait_for_nova_resources $total_nodes + fi + # TODO(TheJulia): We should check the primary/controller node + # to ensure that nova-compute doesn't see the node configured + # for it if we are so configured. We would do that as the else + # of the above logic, which *should* never see the ironic baremetal + # node entry by default. + else + # In this case, we're looking to match all the nodes on this host, + # itself. Meaning the tests focus on the use of this node and we + # need to ensure we have all the nodes. + wait_for_nova_resources $total_nodes + fi fi } @@ -4182,9 +4209,6 @@ function ironic_configure_tempest { if [[ -n "$TEMPEST_BAREMETAL_MAX_MICROVERSION" ]]; then iniset $TEMPEST_CONFIG baremetal max_microversion $TEMPEST_BAREMETAL_MAX_MICROVERSION fi - if [[ -n "$IRONIC_VM_COUNT" ]]; then - iniset $TEMPEST_CONFIG baremetal available_nodes $IRONIC_VM_COUNT - fi if [[ -n "$IRONIC_PING_TIMEOUT" ]]; then iniset $TEMPEST_CONFIG validation ping_timeout $IRONIC_PING_TIMEOUT fi @@ -4346,7 +4370,7 @@ function ironic_configure_tempest { function get_ironic_node_prefix { local node_prefix="node" if [[ "$HOST_TOPOLOGY_ROLE" == "subnode" ]]; then - node_prefix="$HOST_TOPOLOGY_ROLE" + node_prefix="$(cat /etc/devstack-host|sed s/compute/c/)node" fi echo $node_prefix } diff --git a/playbooks/ci-workarounds/pre.yaml b/playbooks/ci-workarounds/pre.yaml index 27f9cad1fa..bc60ed9618 100644 --- a/playbooks/ci-workarounds/pre.yaml +++ b/playbooks/ci-workarounds/pre.yaml @@ -10,6 +10,7 @@ sudo cp ~root/.ssh/id_rsa.pub ~root/.ssh/id_rsa ~stack/.ssh sudo chmod 700 ~stack/.ssh sudo chown -R stack ~stack + echo {{inventory_hostname}} | sudo tee /etc/devstack-host executable: /bin/bash roles: - multi-node-bridge diff --git a/zuul.d/ironic-jobs.yaml b/zuul.d/ironic-jobs.yaml index ce1bd055df..d6e03fa544 100644 --- a/zuul.d/ironic-jobs.yaml +++ b/zuul.d/ironic-jobs.yaml @@ -13,6 +13,42 @@ nodes: - controller +# NOTE(TheJulia): Based upon openstack-two-node-noble, but modeled +# for Ironic's multinode jobs. +- nodeset: + name: ironic-three-node-noble + nodes: + - name: controller + label: ubuntu-noble + - name: compute0 + label: ubuntu-noble + - name: compute1 + label: ubuntu-noble + groups: + # Node where tests are executed and test results collected + - name: tempest + nodes: + - controller + # Nodes running the compute service + - name: compute + nodes: + - controller + - compute0 + - compute1 + # Nodes that are not the controller + - name: subnode + nodes: + - compute0 + - compute1 + # Switch node for multinode networking setup + - name: switch + nodes: + - controller + # Peer nodes for multinode networking setup + - name: peers + nodes: + - compute0 + - compute1 - job: name: ironic-base @@ -684,17 +720,18 @@ # tests can also exercised as part of CI. - job: - name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode - description: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode + name: ironic-tempest-ipa-wholedisk-direct-multinode + description: ironic-tempest-ipa-wholedisk-direct-multinode parent: tempest-multinode-full-base - nodeset: openstack-two-node-noble + nodeset: ironic-three-node-noble pre-run: playbooks/ci-workarounds/pre.yaml + post-run: playbooks/ci-workarounds/get_extra_logging.yaml required-projects: - opendev.org/openstack/ironic - opendev.org/openstack/ironic-python-agent - opendev.org/openstack/ironic-python-agent-builder - opendev.org/openstack/ironic-tempest-plugin - - opendev.org/openstack/virtualbmc + - opendev.org/openstack/sushy-tools - opendev.org/openstack/networking-generic-switch irrelevant-files: - ^.*\.rst$ @@ -728,27 +765,32 @@ HOST_TOPOLOGY_ROLE: primary INSTALL_TEMPEST: False # Don't install a tempest package globally IRONIC_AUTOMATED_CLEAN_ENABLED: False - HOST_TOPOLOGY_SUBNODES: "{{ hostvars['compute1']['nodepool']['public_ipv4'] }}" + # NOTE(TheJulia): We *MUST* list all of the hosts in this list, + # this drives the configuration of switch references on the controller + # node. Furthermore, this job will fail if there is not a public IPv4 + # address available for the SSH access to manage port configurations. + HOST_TOPOLOGY_SUBNODES: "{{ hostvars['compute0']['nodepool']['public_ipv4'] }} {{ hostvars['compute1']['nodepool']['public_ipv4'] }}" IRONIC_BAREMETAL_BASIC_OPS: True IRONIC_BUILD_DEPLOY_RAMDISK: False IRONIC_CALLBACK_TIMEOUT: 600 - IRONIC_DEPLOY_DRIVER: ipmi + IRONIC_DEPLOY_DRIVER: redfish + IRONIC_ENABLED_BOOT_INTERFACES: "redfish-virtual-media" + IRONIC_ENABLED_HARDWARE_TYPES: redfish + IRONIC_ENABLED_MANAGEMENT_INTERFACES: redfish + IRONIC_REDFISH_EMULATOR_FEATURE_SET: vmedia IRONIC_ENABLED_NETWORK_INTERFACES: flat,neutron IRONIC_INSPECTOR_BUILD_RAMDISK: False IRONIC_NETWORK_INTERFACE: neutron IRONIC_PROVISION_NETWORK_NAME: ironic-provision IRONIC_PROVISION_SUBNET_GATEWAY: 10.0.5.1 IRONIC_PROVISION_SUBNET_PREFIX: 10.0.5.0/24 - IRONIC_RAMDISK_TYPE: tinyipa IRONIC_TEMPEST_BUILD_TIMEOUT: 600 IRONIC_TEMPEST_WHOLE_DISK_IMAGE: True IRONIC_USE_LINK_LOCAL: True - IRONIC_VM_COUNT: 3 + IRONIC_VM_COUNT: 1 IRONIC_VM_EPHEMERAL_DISK: 0 IRONIC_VM_LOG_DIR: '{{ devstack_base_dir }}/ironic-bm-logs' - IRONIC_VM_SPECS_RAM: 1024 - IRONIC_VM_SPECS_DISK: 4 - IRONIC_VM_SPECS_CPU: 1 + IRONIC_VM_SPECS_DISK: 10 OVS_BRIDGE_MAPPINGS: 'mynetwork:brbm,public:br-infra' OVS_PHYSICAL_BRIDGE: brbm PHYSICAL_NETWORK: mynetwork @@ -809,8 +851,23 @@ ovn-northd: False q-ovn-metadata-agent: False rabbit: True + tls-proxy: False group-vars: + # Turns out, devstack looks for the subnode group name. subnode: + devstack_services: + atop: True + c-api: False + c-bak: False + c-sch: False + c-vol: False + cinder: False + q-agt: True + ovn-controller: False + ovn-northd: False + q-ovn-metadata-agent: False + n-cpu: True + tls-proxy: False devstack_localrc: ENABLE_TENANT_TUNNELS: False ENABLE_TENANT_VLANS: True @@ -819,61 +876,65 @@ HOST_TOPOLOGY_ROLE: subnode IRONIC_AUTOMATED_CLEAN_ENABLED: False IRONIC_BAREMETAL_BASIC_OPS: True - IRONIC_DEPLOY_DRIVER: ipmi + IRONIC_DEPLOY_DRIVER: redfish + IRONIC_ENABLED_BOOT_INTERFACES: "redfish-virtual-media" + IRONIC_ENABLED_HARDWARE_TYPES: redfish + IRONIC_ENABLED_MANAGEMENT_INTERFACES: redfish + IRONIC_REDFISH_EMULATOR_FEATURE_SET: vmedia IRONIC_ENABLED_NETWORK_INTERFACES: flat,neutron IRONIC_NETWORK_INTERFACE: neutron IRONIC_PROVISION_NETWORK_NAME: ironic-provision - IRONIC_RAMDISK_TYPE: tinyipa IRONIC_USE_LINK_LOCAL: True IRONIC_VM_COUNT: 3 IRONIC_VM_EPHEMERAL_DISK: 0 IRONIC_VM_LOG_DIR: '{{ devstack_base_dir }}/ironic-bm-logs' - IRONIC_VM_NETWORK_BRIDGE: sub1brbm - IRONIC_VM_SPECS_RAM: 1024 - IRONIC_VM_SPECS_DISK: 4 - IRONIC_VM_SPECS_CPU: 1 - OVS_BRIDGE_MAPPINGS: 'mynetwork:sub1brbm,public:br-infra' - OVS_PHYSICAL_BRIDGE: sub1brbm PHYSICAL_NETWORK: mynetwork Q_AGENT: openvswitch Q_ML2_TENANT_NETWORK_TYPE: vlan VIRT_DRIVER: ironic PUBLIC_BRIDGE: br-infra LIBVIRT_STORAGE_POOL_PATH: /opt/libvirt/images - devstack_services: - atop: True - c-api: False - c-bak: False - c-sch: False - c-vol: False - cinder: False + OVS_BRIDGE_MAPPINGS: 'mynetwork:brbm,public:br-infra' + OVS_PHYSICAL_BRIDGE: brbm + +- job: + # NOTE(TheJulia): Compatibility job definition, can be removed. + name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode + description: ironic-tempest-ipa-wholedisk-direct-multinode + parent: ironic-tempest-ipa-wholedisk-direct-multinode + +- job: + # NOTE(TheJulia): Compatibility job definition, can be removed. + name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode-shard + description: ironic-tempest-ipa-wholedisk-direct-multinode-shard + parent: ironic-tempest-ipa-wholedisk-direct-multinode-shard - q-agt: True - ovn-controller: False - ovn-northd: False - q-ovn-metadata-agent: False - n-cpu: True - job: # NOTE(JayF) This job sets up two nova-computes with two different shards # TODO(JayF) Add a post-run validation to ensure the two n-cpus did not # see each others' nodes - name: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode-shard - description: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode-shard + name: ironic-tempest-ipa-wholedisk-direct-multinode-shard + description: ironic-tempest-ipa-wholedisk-direct-multinode-shard with automated cleaning enabled. - parent: ironic-tempest-ipa-wholedisk-direct-tinyipa-multinode - nodeset: openstack-two-node-noble + parent: ironic-tempest-ipa-wholedisk-direct-multinode + nodeset: ironic-three-node-noble vars: tempest_test_regex: "BaremetalBasicOps" devstack_localrc: IRONIC_SHARDS: 1 IRONIC_SHARD_1_NAME: "main-node" IRONIC_AUTOMATED_CLEAN_ENABLED: True + # Let the local services ignore the controller node, + # but focus on managing the nodes on the subnode, since + # we have greater resources on the other nodes. + IRONIC_NODE_SHARD_NAME: "subnode" group-vars: subnode: devstack_localrc: IRONIC_SHARDS: 1 IRONIC_SHARD_1_NAME: "subnode" + IRONIC_NODE_SHARD_NAME: "main-node" - job: name: ironic-tox-unit-with-driver-libs