From 3af2ecc13fa9334de8418accaed4fffefefb41da Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 19 Apr 2022 18:36:50 +0200 Subject: [PATCH] Allow claiming PCI PF if child VF is unavailable As If9ab424cc7375a1f0d41b03f01c4a823216b3eb8 stated there is a way for the pci_device table to become inconsistent. Parent PF can be in 'available' state while children VFs are still in 'unavailable' state. In this situation the PF is schedulable but the PCI claim will fail when try to mark the dependent VFs unavailable. This patch changes the PCI claim logic to allow claiming the parent PF in the inconsistent situation as we assume that it is safe to do so. This claim also fixed the inconsistency so that when the parent PF is freed the children VFs become available again. Closes-Bug: #1969496 Change-Id: I575ce06bcc913add7db0849f85728371da2032fc --- nova/objects/pci_device.py | 38 +++++++- nova/pci/stats.py | 6 +- nova/tests/unit/pci/test_manager.py | 144 ++++++++++++++++++++++++---- 3 files changed, 162 insertions(+), 26 deletions(-) diff --git a/nova/objects/pci_device.py b/nova/objects/pci_device.py index b675641a06db..b0d5b75826b4 100644 --- a/nova/objects/pci_device.py +++ b/nova/objects/pci_device.py @@ -346,10 +346,40 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject): # Update PF status to CLAIMED if all of it dependants are free # and set their status to UNCLAIMABLE vfs_list = self.child_devices - if not all([vf.is_available() for vf in vfs_list]): - raise exception.PciDeviceVFInvalidStatus( - compute_node_id=self.compute_node_id, - address=self.address) + non_free_dependants = [ + vf for vf in vfs_list if not vf.is_available()] + if non_free_dependants: + # NOTE(gibi): There should not be any dependent devices that + # are UNCLAIMABLE or UNAVAILABLE as the parent is AVAILABLE, + # but we got reports in bug 1969496 that this inconsistency + # can happen. So check if the only non-free devices are in + # state UNCLAIMABLE or UNAVAILABLE then we log a warning but + # allow to claim the parent. + actual_statuses = { + child.status for child in non_free_dependants} + allowed_non_free_statues = { + fields.PciDeviceStatus.UNCLAIMABLE, + fields.PciDeviceStatus.UNAVAILABLE, + } + if actual_statuses - allowed_non_free_statues == set(): + LOG.warning( + "Some child device of parent %s is in an inconsistent " + "state. If you can reproduce this warning then please " + "report a bug at " + "https://bugs.launchpad.net/nova/+filebug with " + "reproduction steps. Inconsistent children with " + "state: %s", + self.address, + ",".join( + "%s - %s" % (child.address, child.status) + for child in non_free_dependants + ), + ) + + else: + raise exception.PciDeviceVFInvalidStatus( + compute_node_id=self.compute_node_id, + address=self.address) self._bulk_update_status(vfs_list, fields.PciDeviceStatus.UNCLAIMABLE) diff --git a/nova/pci/stats.py b/nova/pci/stats.py index c8dda84d4bfb..6a53c43c787f 100644 --- a/nova/pci/stats.py +++ b/nova/pci/stats.py @@ -279,8 +279,12 @@ class PciDeviceStats(object): if pci_dev.dev_type == fields.PciDeviceType.SRIOV_PF: vfs_list = pci_dev.child_devices if vfs_list: + free_devs = self.get_free_devs() for vf in vfs_list: - self.remove_device(vf) + # NOTE(gibi): do not try to remove a device that are + # already removed + if vf in free_devs: + self.remove_device(vf) elif pci_dev.dev_type in ( fields.PciDeviceType.SRIOV_VF, fields.PciDeviceType.VDPA, diff --git a/nova/tests/unit/pci/test_manager.py b/nova/tests/unit/pci/test_manager.py index a924d71ef400..6682b3e3b216 100644 --- a/nova/tests/unit/pci/test_manager.py +++ b/nova/tests/unit/pci/test_manager.py @@ -507,32 +507,134 @@ class PciDevTrackerTestCase(test.NoDBTestCase): ) # now try to claim and allocate the PF. It should work as it is # available - # This is bug 1969496 as the claim fails with exception - ex = self.assertRaises( - exception.PciDevicePoolEmpty, - self.tracker.claim_instance, - mock.sentinel.context, - pci_requests_obj, - None - ) - self.assertIn( - 'Attempt to consume PCI device 1:0000:00:02.1 from empty pool', - str(ex) - ) + self.tracker.claim_instance( + mock.sentinel.context, pci_requests_obj, None) + self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + pf_dev = self._get_device_by_address(pf['address']) - self.assertEqual('available', pf_dev.status) + self.assertEqual('allocated', pf_dev.status) vf_dev = self._get_device_by_address(vf['address']) self.assertEqual('unavailable', vf_dev.status) - # This should work when the bug is fixed - # self.tracker.claim_instance( - # mock.sentinel.context, pci_requests_obj, None) - # self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + self.assertIn( + 'Some child device of parent 0000:00:01.1 is in an inconsistent ' + 'state. If you can reproduce this warning then please report a ' + 'bug at https://bugs.launchpad.net/nova/+filebug with ' + 'reproduction steps. Inconsistent children with state: ' + '0000:00:02.1 - unavailable', + self.stdlog.logger.output + ) - # pf_dev = self._get_device_by_address(pf['address']) - # self.assertEqual('allocated', pf_dev.status) - # vf_dev = self._get_device_by_address(vf['address']) - # self.assertEqual('unavailable', vf_dev.status) + # Ensure that the claim actually fixes the inconsistency so when the + # parent if freed the children become available too. + self.tracker.free_instance( + mock.sentinel.context, {'uuid': uuidsentinel.instance1}) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf_dev = self._get_device_by_address(vf['address']) + self.assertEqual('available', vf_dev.status) + + def test_claim_available_pf_while_children_vfs_are_in_mixed_state(self): + # We start with a PF parent and two VF children. The PF is available + # and one of the VF is unavailable while the other is available. + pf = copy.deepcopy(fake_db_dev_3) + vf1 = copy.deepcopy(fake_db_dev_4) + vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE + vf2 = copy.deepcopy(fake_db_dev_5) + vf2['status'] = fields.PciDeviceStatus.AVAILABLE + self._create_tracker([pf, vf1, vf2]) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('available', vf2_dev.status) + + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # now try to claim and allocate the PF. It should work as it is + # available + self.tracker.claim_instance( + mock.sentinel.context, pci_requests_obj, None) + self.tracker.allocate_instance({'uuid': uuidsentinel.instance1}) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('allocated', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('unavailable', vf2_dev.status) + + self.assertIn( + 'Some child device of parent 0000:00:01.1 is in an inconsistent ' + 'state. If you can reproduce this warning then please report a ' + 'bug at https://bugs.launchpad.net/nova/+filebug with ' + 'reproduction steps. Inconsistent children with state: ' + '0000:00:02.1 - unavailable', + self.stdlog.logger.output + ) + + # Ensure that the claim actually fixes the inconsistency so when the + # parent if freed the children become available too. + self.tracker.free_instance( + mock.sentinel.context, {'uuid': uuidsentinel.instance1}) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('available', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('available', vf2_dev.status) + + def test_claim_available_pf_while_a_child_is_used(self): + pf = copy.deepcopy(fake_db_dev_3) + vf1 = copy.deepcopy(fake_db_dev_4) + vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE + vf2 = copy.deepcopy(fake_db_dev_5) + vf2['status'] = fields.PciDeviceStatus.CLAIMED + self._create_tracker([pf, vf1, vf2]) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('claimed', vf2_dev.status) + + pci_requests_obj = self._create_pci_requests_object( + [ + { + 'count': 1, + 'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}] + } + ], + instance_uuid=uuidsentinel.instance1, + ) + # now try to claim and allocate the PF. The claim should fail as on of + # the child is used. + self.assertRaises( + exception.PciDeviceVFInvalidStatus, + self.tracker.claim_instance, + mock.sentinel.context, + pci_requests_obj, + None, + ) + + pf_dev = self._get_device_by_address(pf['address']) + self.assertEqual('available', pf_dev.status) + vf1_dev = self._get_device_by_address(vf1['address']) + self.assertEqual('unavailable', vf1_dev.status) + vf2_dev = self._get_device_by_address(vf2['address']) + self.assertEqual('claimed', vf2_dev.status) def test_update_pci_for_instance_active(self): pci_requests_obj = self._create_pci_requests_object(fake_pci_requests)