Merge "Allow claiming PCI PF if child VF is unavailable"
This commit is contained in:
@@ -346,10 +346,40 @@ class PciDevice(base.NovaPersistentObject, base.NovaObject):
|
|||||||
# Update PF status to CLAIMED if all of it dependants are free
|
# Update PF status to CLAIMED if all of it dependants are free
|
||||||
# and set their status to UNCLAIMABLE
|
# and set their status to UNCLAIMABLE
|
||||||
vfs_list = self.child_devices
|
vfs_list = self.child_devices
|
||||||
if not all([vf.is_available() for vf in vfs_list]):
|
non_free_dependants = [
|
||||||
raise exception.PciDeviceVFInvalidStatus(
|
vf for vf in vfs_list if not vf.is_available()]
|
||||||
compute_node_id=self.compute_node_id,
|
if non_free_dependants:
|
||||||
address=self.address)
|
# NOTE(gibi): There should not be any dependent devices that
|
||||||
|
# are UNCLAIMABLE or UNAVAILABLE as the parent is AVAILABLE,
|
||||||
|
# but we got reports in bug 1969496 that this inconsistency
|
||||||
|
# can happen. So check if the only non-free devices are in
|
||||||
|
# state UNCLAIMABLE or UNAVAILABLE then we log a warning but
|
||||||
|
# allow to claim the parent.
|
||||||
|
actual_statuses = {
|
||||||
|
child.status for child in non_free_dependants}
|
||||||
|
allowed_non_free_statues = {
|
||||||
|
fields.PciDeviceStatus.UNCLAIMABLE,
|
||||||
|
fields.PciDeviceStatus.UNAVAILABLE,
|
||||||
|
}
|
||||||
|
if actual_statuses - allowed_non_free_statues == set():
|
||||||
|
LOG.warning(
|
||||||
|
"Some child device of parent %s is in an inconsistent "
|
||||||
|
"state. If you can reproduce this warning then please "
|
||||||
|
"report a bug at "
|
||||||
|
"https://bugs.launchpad.net/nova/+filebug with "
|
||||||
|
"reproduction steps. Inconsistent children with "
|
||||||
|
"state: %s",
|
||||||
|
self.address,
|
||||||
|
",".join(
|
||||||
|
"%s - %s" % (child.address, child.status)
|
||||||
|
for child in non_free_dependants
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise exception.PciDeviceVFInvalidStatus(
|
||||||
|
compute_node_id=self.compute_node_id,
|
||||||
|
address=self.address)
|
||||||
self._bulk_update_status(vfs_list,
|
self._bulk_update_status(vfs_list,
|
||||||
fields.PciDeviceStatus.UNCLAIMABLE)
|
fields.PciDeviceStatus.UNCLAIMABLE)
|
||||||
|
|
||||||
|
@@ -279,8 +279,12 @@ class PciDeviceStats(object):
|
|||||||
if pci_dev.dev_type == fields.PciDeviceType.SRIOV_PF:
|
if pci_dev.dev_type == fields.PciDeviceType.SRIOV_PF:
|
||||||
vfs_list = pci_dev.child_devices
|
vfs_list = pci_dev.child_devices
|
||||||
if vfs_list:
|
if vfs_list:
|
||||||
|
free_devs = self.get_free_devs()
|
||||||
for vf in vfs_list:
|
for vf in vfs_list:
|
||||||
self.remove_device(vf)
|
# NOTE(gibi): do not try to remove a device that are
|
||||||
|
# already removed
|
||||||
|
if vf in free_devs:
|
||||||
|
self.remove_device(vf)
|
||||||
elif pci_dev.dev_type in (
|
elif pci_dev.dev_type in (
|
||||||
fields.PciDeviceType.SRIOV_VF,
|
fields.PciDeviceType.SRIOV_VF,
|
||||||
fields.PciDeviceType.VDPA,
|
fields.PciDeviceType.VDPA,
|
||||||
|
@@ -507,32 +507,134 @@ class PciDevTrackerTestCase(test.NoDBTestCase):
|
|||||||
)
|
)
|
||||||
# now try to claim and allocate the PF. It should work as it is
|
# now try to claim and allocate the PF. It should work as it is
|
||||||
# available
|
# available
|
||||||
# This is bug 1969496 as the claim fails with exception
|
self.tracker.claim_instance(
|
||||||
ex = self.assertRaises(
|
mock.sentinel.context, pci_requests_obj, None)
|
||||||
exception.PciDevicePoolEmpty,
|
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
|
||||||
self.tracker.claim_instance,
|
|
||||||
mock.sentinel.context,
|
|
||||||
pci_requests_obj,
|
|
||||||
None
|
|
||||||
)
|
|
||||||
self.assertIn(
|
|
||||||
'Attempt to consume PCI device 1:0000:00:02.1 from empty pool',
|
|
||||||
str(ex)
|
|
||||||
)
|
|
||||||
pf_dev = self._get_device_by_address(pf['address'])
|
pf_dev = self._get_device_by_address(pf['address'])
|
||||||
self.assertEqual('available', pf_dev.status)
|
self.assertEqual('allocated', pf_dev.status)
|
||||||
vf_dev = self._get_device_by_address(vf['address'])
|
vf_dev = self._get_device_by_address(vf['address'])
|
||||||
self.assertEqual('unavailable', vf_dev.status)
|
self.assertEqual('unavailable', vf_dev.status)
|
||||||
|
|
||||||
# This should work when the bug is fixed
|
self.assertIn(
|
||||||
# self.tracker.claim_instance(
|
'Some child device of parent 0000:00:01.1 is in an inconsistent '
|
||||||
# mock.sentinel.context, pci_requests_obj, None)
|
'state. If you can reproduce this warning then please report a '
|
||||||
# self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
|
'bug at https://bugs.launchpad.net/nova/+filebug with '
|
||||||
|
'reproduction steps. Inconsistent children with state: '
|
||||||
|
'0000:00:02.1 - unavailable',
|
||||||
|
self.stdlog.logger.output
|
||||||
|
)
|
||||||
|
|
||||||
# pf_dev = self._get_device_by_address(pf['address'])
|
# Ensure that the claim actually fixes the inconsistency so when the
|
||||||
# self.assertEqual('allocated', pf_dev.status)
|
# parent if freed the children become available too.
|
||||||
# vf_dev = self._get_device_by_address(vf['address'])
|
self.tracker.free_instance(
|
||||||
# self.assertEqual('unavailable', vf_dev.status)
|
mock.sentinel.context, {'uuid': uuidsentinel.instance1})
|
||||||
|
|
||||||
|
pf_dev = self._get_device_by_address(pf['address'])
|
||||||
|
self.assertEqual('available', pf_dev.status)
|
||||||
|
vf_dev = self._get_device_by_address(vf['address'])
|
||||||
|
self.assertEqual('available', vf_dev.status)
|
||||||
|
|
||||||
|
def test_claim_available_pf_while_children_vfs_are_in_mixed_state(self):
|
||||||
|
# We start with a PF parent and two VF children. The PF is available
|
||||||
|
# and one of the VF is unavailable while the other is available.
|
||||||
|
pf = copy.deepcopy(fake_db_dev_3)
|
||||||
|
vf1 = copy.deepcopy(fake_db_dev_4)
|
||||||
|
vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE
|
||||||
|
vf2 = copy.deepcopy(fake_db_dev_5)
|
||||||
|
vf2['status'] = fields.PciDeviceStatus.AVAILABLE
|
||||||
|
self._create_tracker([pf, vf1, vf2])
|
||||||
|
|
||||||
|
pf_dev = self._get_device_by_address(pf['address'])
|
||||||
|
self.assertEqual('available', pf_dev.status)
|
||||||
|
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||||
|
self.assertEqual('unavailable', vf1_dev.status)
|
||||||
|
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||||
|
self.assertEqual('available', vf2_dev.status)
|
||||||
|
|
||||||
|
pci_requests_obj = self._create_pci_requests_object(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
'count': 1,
|
||||||
|
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
instance_uuid=uuidsentinel.instance1,
|
||||||
|
)
|
||||||
|
# now try to claim and allocate the PF. It should work as it is
|
||||||
|
# available
|
||||||
|
self.tracker.claim_instance(
|
||||||
|
mock.sentinel.context, pci_requests_obj, None)
|
||||||
|
self.tracker.allocate_instance({'uuid': uuidsentinel.instance1})
|
||||||
|
|
||||||
|
pf_dev = self._get_device_by_address(pf['address'])
|
||||||
|
self.assertEqual('allocated', pf_dev.status)
|
||||||
|
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||||
|
self.assertEqual('unavailable', vf1_dev.status)
|
||||||
|
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||||
|
self.assertEqual('unavailable', vf2_dev.status)
|
||||||
|
|
||||||
|
self.assertIn(
|
||||||
|
'Some child device of parent 0000:00:01.1 is in an inconsistent '
|
||||||
|
'state. If you can reproduce this warning then please report a '
|
||||||
|
'bug at https://bugs.launchpad.net/nova/+filebug with '
|
||||||
|
'reproduction steps. Inconsistent children with state: '
|
||||||
|
'0000:00:02.1 - unavailable',
|
||||||
|
self.stdlog.logger.output
|
||||||
|
)
|
||||||
|
|
||||||
|
# Ensure that the claim actually fixes the inconsistency so when the
|
||||||
|
# parent if freed the children become available too.
|
||||||
|
self.tracker.free_instance(
|
||||||
|
mock.sentinel.context, {'uuid': uuidsentinel.instance1})
|
||||||
|
|
||||||
|
pf_dev = self._get_device_by_address(pf['address'])
|
||||||
|
self.assertEqual('available', pf_dev.status)
|
||||||
|
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||||
|
self.assertEqual('available', vf1_dev.status)
|
||||||
|
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||||
|
self.assertEqual('available', vf2_dev.status)
|
||||||
|
|
||||||
|
def test_claim_available_pf_while_a_child_is_used(self):
|
||||||
|
pf = copy.deepcopy(fake_db_dev_3)
|
||||||
|
vf1 = copy.deepcopy(fake_db_dev_4)
|
||||||
|
vf1['status'] = fields.PciDeviceStatus.UNAVAILABLE
|
||||||
|
vf2 = copy.deepcopy(fake_db_dev_5)
|
||||||
|
vf2['status'] = fields.PciDeviceStatus.CLAIMED
|
||||||
|
self._create_tracker([pf, vf1, vf2])
|
||||||
|
|
||||||
|
pf_dev = self._get_device_by_address(pf['address'])
|
||||||
|
self.assertEqual('available', pf_dev.status)
|
||||||
|
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||||
|
self.assertEqual('unavailable', vf1_dev.status)
|
||||||
|
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||||
|
self.assertEqual('claimed', vf2_dev.status)
|
||||||
|
|
||||||
|
pci_requests_obj = self._create_pci_requests_object(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
'count': 1,
|
||||||
|
'spec': [{'dev_type': fields.PciDeviceType.SRIOV_PF}]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
instance_uuid=uuidsentinel.instance1,
|
||||||
|
)
|
||||||
|
# now try to claim and allocate the PF. The claim should fail as on of
|
||||||
|
# the child is used.
|
||||||
|
self.assertRaises(
|
||||||
|
exception.PciDeviceVFInvalidStatus,
|
||||||
|
self.tracker.claim_instance,
|
||||||
|
mock.sentinel.context,
|
||||||
|
pci_requests_obj,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
|
||||||
|
pf_dev = self._get_device_by_address(pf['address'])
|
||||||
|
self.assertEqual('available', pf_dev.status)
|
||||||
|
vf1_dev = self._get_device_by_address(vf1['address'])
|
||||||
|
self.assertEqual('unavailable', vf1_dev.status)
|
||||||
|
vf2_dev = self._get_device_by_address(vf2['address'])
|
||||||
|
self.assertEqual('claimed', vf2_dev.status)
|
||||||
|
|
||||||
def test_update_pci_for_instance_active(self):
|
def test_update_pci_for_instance_active(self):
|
||||||
pci_requests_obj = self._create_pci_requests_object(fake_pci_requests)
|
pci_requests_obj = self._create_pci_requests_object(fake_pci_requests)
|
||||||
|
Reference in New Issue
Block a user