diff --git a/ironic/common/neutron.py b/ironic/common/neutron.py index cb32d92cc6..93e4716f4f 100644 --- a/ironic/common/neutron.py +++ b/ironic/common/neutron.py @@ -407,8 +407,27 @@ def add_ports_to_network(task, network_uuid, security_groups=None): update_port_attrs) if CONF.neutron.dhcpv6_stateful_address_count > 1: _add_ip_addresses_for_ipv6_stateful(task.context, port, client) - if is_smart_nic: - wait_for_port_status(client, port.id, 'ACTIVE') + + binding_fail_fatal = False + is_neutron_iface = node.network_interface == 'neutron' + + if is_neutron_iface: + binding_fail_fatal = getattr( + CONF.neutron, 'fail_on_port_binding_failure', False) + + default_failure_behavior = is_smart_nic or binding_fail_fatal + + fail_on_binding_failure = node.driver_info.get( + 'fail_on_binding_failure', default_failure_behavior) + + # NOTE(cid): Only check port status if it's a smart NIC or if we're + # configured to fail on binding failures. This avoids unnecessary + # failures when using network interfaces where binding may fail but + # we want to continue anyway (like in the case of flat networks). + if fail_on_binding_failure: + wait_for_port_status( + client, port.id, 'ACTIVE', + fail_on_binding_failure=fail_on_binding_failure) except openstack_exc.OpenStackCloudException as e: failures.append(ironic_port.uuid) LOG.warning("Could not create neutron port for node's " @@ -1061,12 +1080,15 @@ def wait_for_host_agent(client, host_id, target_state='up'): stop=tenacity.stop_after_attempt(CONF.agent.neutron_agent_max_attempts), wait=tenacity.wait_fixed(CONF.agent.neutron_agent_status_retry_interval), reraise=True) -def wait_for_port_status(client, port_id, status): +def wait_for_port_status(client, port_id, status, + fail_on_binding_failure=None): """Wait for port status to be the desired status :param client: A Neutron client object. :param port_id: Neutron port_id :param status: Port's target status, can be ACTIVE, DOWN ... etc. + :param fail_on_binding_failure: Whether to raise exception if port + binding fails. :returns: boolean indicates that the port status matches the required value passed by param status. :raises: InvalidParameterValue if the port does not exist. @@ -1080,6 +1102,14 @@ def wait_for_port_status(client, port_id, status): {'port_id': port_id, 'status': port.status}) if port.status == status: return True + + # fail early on port binding failure + if port.get('binding:vif_type') == 'binding_failed': + msg = "Binding failed for neutron port %s" % port_id + if fail_on_binding_failure: + LOG.error(msg) + raise openstack_exc.OpenStackCloudException(msg) + LOG.warning(msg) raise exception.NetworkError( 'Port %(port_id)s failed to reach status %(status)s' % { 'port_id': port_id, 'status': status}) diff --git a/ironic/conf/neutron.py b/ironic/conf/neutron.py index 4d9faeb05c..c36ceee4a4 100644 --- a/ironic/conf/neutron.py +++ b/ironic/conf/neutron.py @@ -26,6 +26,10 @@ opts = [ mutable=True, help=_('Delay value to wait for Neutron agents to setup ' 'sufficient DHCP configuration for port.')), + cfg.BoolOpt('fail_on_port_binding_failure', + default=True, + help=_('Whether to fail or continue deployment if neutron ' + 'port binding fails.')), cfg.IntOpt('retries', default=3, mutable=True, diff --git a/ironic/drivers/modules/network/common.py b/ironic/drivers/modules/network/common.py index 018a0ef024..d2370a0d29 100644 --- a/ironic/drivers/modules/network/common.py +++ b/ironic/drivers/modules/network/common.py @@ -294,8 +294,27 @@ def plug_port_to_tenant_network(task, port_like_obj, client=None): try: neutron.update_neutron_port(task.context, vif_id, port_attrs) - if is_smart_nic: - neutron.wait_for_port_status(client, vif_id, 'ACTIVE') + + binding_fail_fatal = False + is_neutron_iface = node.network_interface == 'neutron' + + if is_neutron_iface: + binding_fail_fatal = getattr( + CONF.neutron, 'fail_on_port_binding_failure', False) + + default_failure_behavior = is_smart_nic or binding_fail_fatal + + fail_on_binding_failure = node.driver_info.get( + 'fail_on_binding_failure', default_failure_behavior) + + # NOTE(cid): Only check port status if it's a smart NIC or if we're + # configured to fail on binding failures. This avoids unnecessary + # failures when using network interfaces where binding may fail but + # we want to continue anyway (like in the case of flat networks). + if fail_on_binding_failure: + neutron.wait_for_port_status( + client, vif_id, 'ACTIVE', + fail_on_binding_failure=fail_on_binding_failure) except openstack_exc.OpenStackCloudException as e: msg = (_('Could not add public network VIF %(vif)s ' 'to node %(node)s, possible network issue. %(exc)s') % diff --git a/ironic/tests/unit/common/test_neutron.py b/ironic/tests/unit/common/test_neutron.py index 93cebe5c57..623d2b783f 100644 --- a/ironic/tests/unit/common/test_neutron.py +++ b/ironic/tests/unit/common/test_neutron.py @@ -1026,7 +1026,8 @@ class TestNeutronNetworkActions(db_base.DbTestCase): wait_agent_mock.assert_called_once_with( self.client_mock, 'hostname') wait_port_mock.assert_called_once_with( - self.client_mock, self.neutron_port.id, 'ACTIVE') + self.client_mock, self.neutron_port.id, 'ACTIVE', + fail_on_binding_failure=True) @mock.patch.object(neutron, 'is_smartnic_port', autospec=True) @mock.patch.object(neutron, 'wait_for_host_agent', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/network/test_common.py b/ironic/tests/unit/drivers/modules/network/test_common.py index 7bd0b897fe..286ac35d38 100644 --- a/ironic/tests/unit/drivers/modules/network/test_common.py +++ b/ironic/tests/unit/drivers/modules/network/test_common.py @@ -428,8 +428,11 @@ class TestCommonFunctions(db_base.DbTestCase): task, self.vif_id, {'physnet2'}) @mock.patch.object(neutron_common, 'update_neutron_port', autospec=True) + @mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True) @mock.patch.object(neutron_common, 'get_client', autospec=True) - def test_plug_port_to_tenant_network_client(self, mock_gc, mock_update): + def test_plug_port_to_tenant_network_client(self, mock_gc, + wait_mock_status, + mock_update): self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id} self.port.save() with task_manager.acquire(self.context, self.node.id) as task: @@ -439,8 +442,11 @@ class TestCommonFunctions(db_base.DbTestCase): self.assertTrue(mock_update.called) @mock.patch.object(neutron_common, 'update_neutron_port', autospec=True) + @mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True) @mock.patch.object(neutron_common, 'get_client', autospec=True) - def test_plug_port_to_tenant_network_no_client(self, mock_gc, mock_update): + def test_plug_port_to_tenant_network_no_client(self, mock_gc, + wait_mock_status, + mock_update): self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id} self.port.save() with task_manager.acquire(self.context, self.node.id) as task: @@ -480,7 +486,7 @@ class TestCommonFunctions(db_base.DbTestCase): wait_agent_mock.assert_called_once_with( nclient, 'hostname') wait_port_mock.assert_called_once_with( - nclient, self.vif_id, 'ACTIVE') + nclient, self.vif_id, 'ACTIVE', fail_on_binding_failure=True) self.assertTrue(mock_update.called) diff --git a/ironic/tests/unit/drivers/modules/network/test_neutron.py b/ironic/tests/unit/drivers/modules/network/test_neutron.py index b7a352d63d..9e9bb350ca 100644 --- a/ironic/tests/unit/drivers/modules/network/test_neutron.py +++ b/ironic/tests/unit/drivers/modules/network/test_neutron.py @@ -561,10 +561,12 @@ class NeutronInterfaceTestCase(db_base.DbTestCase): @mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True) @mock.patch.object(neutron_common, 'update_neutron_port', autospec=True) + @mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True) @mock.patch.object(neutron_common, 'get_client', autospec=True) @mock.patch.object(neutron, 'LOG', autospec=True) def test_configure_tenant_networks_multiple_ports_one_vif_id( - self, log_mock, client_mock, update_mock, wait_agent_mock): + self, log_mock, client_mock, wait_mock_status, update_mock, + wait_agent_mock): expected_attrs = { 'binding:vnic_type': 'baremetal', 'binding:host_id': self.node.uuid, @@ -583,8 +585,10 @@ class NeutronInterfaceTestCase(db_base.DbTestCase): @mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True) @mock.patch.object(neutron_common, 'update_neutron_port', autospec=True) + @mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True) @mock.patch.object(neutron_common, 'get_client', autospec=True) def test_configure_tenant_networks_update_fail(self, client_mock, + wait_mock_status, update_mock, wait_agent_mock): update_mock.side_effect = openstack_exc.OpenStackCloudException( @@ -595,11 +599,26 @@ class NeutronInterfaceTestCase(db_base.DbTestCase): self.interface.configure_tenant_networks, task) client_mock.assert_called_once_with(context=task.context) + @mock.patch.object(neutron_common, '_get_port_by_uuid', autospec=True) + @mock.patch.object(neutron_common, 'get_client', autospec=True) + def test_configure_tenant_networks_update_binding_fail(self, client_mock, + mock_get_port): + self.config(fail_on_port_binding_failure=True, group='neutron') + port = mock.MagicMock() + port.get.return_value = 'binding_failed' + mock_get_port.return_value = port + + with task_manager.acquire(self.context, self.node.id) as task: + self.assertRaisesRegex( + exception.NetworkError, 'Binding failed', + self.interface.configure_tenant_networks, task) + @mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True) @mock.patch.object(neutron_common, 'update_neutron_port', autospec=True) + @mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True) @mock.patch.object(neutron_common, 'get_client', autospec=True) - def _test_configure_tenant_networks(self, client_mock, update_mock, - wait_agent_mock, + def _test_configure_tenant_networks(self, client_mock, wait_mock_status, + update_mock, wait_agent_mock, is_client_id=False): # NOTE(TheJulia): Until we have a replacement for infiniband client-id # storage, extra has to stay put. On a plus side, this would be @@ -669,12 +688,13 @@ class NeutronInterfaceTestCase(db_base.DbTestCase): @mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True) @mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True) @mock.patch.object(neutron_common, 'update_neutron_port', autospec=True) + @mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True) @mock.patch.object(neutron_common, 'get_client', autospec=True) @mock.patch.object(neutron_common, 'get_local_group_information', autospec=True) def test_configure_tenant_networks_with_portgroups( - self, glgi_mock, client_mock, update_mock, wait_agent_mock, - port_data_mock): + self, glgi_mock, client_mock, wait_mock_status, update_mock, + wait_agent_mock, port_data_mock): pg = utils.create_test_portgroup( self.context, node_id=self.node.id, address='ff:54:00:cf:2d:32', internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()}) @@ -730,12 +750,13 @@ class NeutronInterfaceTestCase(db_base.DbTestCase): @mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True) @mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True) @mock.patch.object(neutron_common, 'update_neutron_port', autospec=True) + @mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True) @mock.patch.object(neutron_common, 'get_client', autospec=True) @mock.patch.object(neutron_common, 'get_local_group_information', autospec=True) def test_configure_tenant_networks_with_portgroups_no_address( - self, glgi_mock, client_mock, update_mock, wait_agent_mock, - port_data_mock): + self, glgi_mock, client_mock, wait_mock_status, update_mock, + wait_agent_mock, port_data_mock): pg = utils.create_test_portgroup( self.context, node_id=self.node.id, address=None, internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()}) diff --git a/releasenotes/notes/configure-fail-early-on-port-binding-failure-e4c9749a84bd947f.yaml b/releasenotes/notes/configure-fail-early-on-port-binding-failure-e4c9749a84bd947f.yaml new file mode 100644 index 0000000000..b40de54263 --- /dev/null +++ b/releasenotes/notes/configure-fail-early-on-port-binding-failure-e4c9749a84bd947f.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + Adds a new boolean configuration option ``[neutron]fail_on_port_binding_failure`` + and corresponding node ``driver_info`` setting ``fail_on_binding_failure`` + to control whether deployment should fail or continue if Neutron port + binding fails. With a default of ``true``, if your network is not + configured properly, this will likely cause deployment failures. + To maintain the previous behavior, explicitly set this option to False in + your configuration.