diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index 365e932868e8..6e2991680ed3 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -18008,9 +18008,6 @@ class HostStateTestCase(test.NoDBTestCase): def _get_vcpu_used(self): return 0 - def _get_vgpu_total(self): - return 0 - def _get_cpu_info(self): return HostStateTestCase.cpu_info @@ -18139,7 +18136,7 @@ class TestUpdateProviderTree(test.NoDBTestCase): @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_cpu_traits', new=mock.Mock(return_value=cpu_traits)) - @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vgpu_total') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', return_value={'total': disk_gb}) @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', @@ -18147,8 +18144,10 @@ class TestUpdateProviderTree(test.NoDBTestCase): @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total', return_value=vcpus) def _test_update_provider_tree(self, mock_vcpu, mock_mem, mock_disk, - mock_vgpus, total_vgpus=0): - mock_vgpus.return_value = total_vgpus + mock_gpu_invs, gpu_invs=None): + if gpu_invs: + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + mock_gpu_invs.return_value = gpu_invs self.driver.update_provider_tree(self.pt, self.cn_rp['name']) @@ -18160,18 +18159,58 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.pt.data(self.cn_rp['uuid']).traits) def test_update_provider_tree_with_vgpus(self): - self._test_update_provider_tree(total_vgpus=8) + pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0'] + gpu_inventory_dicts = { + pci_devices[0]: {'total': 16, + 'max_unit': 16, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + pci_devices[1]: {'total': 8, + 'max_unit': 8, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + } + self._test_update_provider_tree(gpu_invs=gpu_inventory_dicts) inventory = self._get_inventory() - # Add VGPU in the expected inventory - inventory[orc.VGPU] = {'step_size': 1, - 'min_unit': 1, - 'max_unit': 8, - 'total': 8} + # root compute node provider inventory is unchanged self.assertEqual(inventory, (self.pt.data(self.cn_rp['uuid'])).inventory) + # We should have two new pGPU child providers in the tree under the + # compute node root provider. + compute_node_tree_uuids = self.pt.get_provider_uuids( + self.cn_rp['name']) + self.assertEqual(3, len(compute_node_tree_uuids)) + # Create a default GPU inventory with no total and max_unit amounts yet + default_gpu_inventory = { + orc.VGPU: { + 'step_size': 1, 'min_unit': 1, 'reserved': 0, + 'allocation_ratio': 1.0 + } + } + # The pGPU child providers should be any item in the list but the first + # which is the root provider UUID + for rp_uuid in compute_node_tree_uuids[1:]: + pgpu_provider_data = self.pt.data(rp_uuid) + # Identify which PCI device is related to this Resource Provider + pci_device = (pci_devices[0] + if pci_devices[0] in pgpu_provider_data.name + else pci_devices[1]) + self.assertEqual('%s_%s' % (self.cn_rp['name'], pci_device), + pgpu_provider_data.name) + pgpu_inventory = default_gpu_inventory.copy() + inventory_dict = gpu_inventory_dicts[pci_device] + pgpu_inventory[orc.VGPU][ + 'total'] = inventory_dict['total'] + pgpu_inventory[orc.VGPU][ + 'max_unit'] = inventory_dict['max_unit'] + self.assertEqual(pgpu_inventory, pgpu_provider_data.inventory) - @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vgpu_total', - return_value=0) @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', return_value={'total': disk_gb}) @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', @@ -18181,7 +18220,7 @@ class TestUpdateProviderTree(test.NoDBTestCase): # TODO(efried): Bug #1784020 @unittest.expectedFailure def test_update_provider_tree_for_shared_disk_gb_resource( - self, mock_vcpu, mock_mem, mock_disk, mock_vgpus): + self, mock_vcpu, mock_mem, mock_disk): """Test to check DISK_GB is reported from shared resource provider. """ @@ -18229,6 +18268,207 @@ class TestUpdateProviderTree(test.NoDBTestCase): self.assertEqual(set(['HW_CPU_X86_AVX512F', 'HW_CPU_X86_BMI']), self.pt.data(self.cn_rp['uuid']).traits) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_cpu_traits', + new=mock.Mock(return_value=cpu_traits)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_mediated_device_information') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver.' + '_get_all_assigned_mediated_devices') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', + return_value={'total': disk_gb}) + @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', + return_value=memory_mb) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total', + return_value=vcpus) + def test_update_provider_tree_for_vgpu_reshape( + self, mock_vcpu, mock_mem, mock_disk, mock_gpus, mock_get_devs, + mock_get_mdev_info): + """Tests the VGPU reshape scenario.""" + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + # Let's assume we have two PCI devices each having 4 pGPUs for this + # type + pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0'] + gpu_inventory_dicts = { + pci_devices[0]: {'total': 4, + 'max_unit': 4, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + pci_devices[1]: {'total': 4, + 'max_unit': 4, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + } + mock_gpus.return_value = gpu_inventory_dicts + # Fake the fact that we have one vGPU allocated to one instance and + # this vGPU is on the first PCI device + mock_get_devs.return_value = {uuids.mdev1: uuids.consumer1} + mock_get_mdev_info.side_effect = [ + {"dev_id": "mdev_fake", + "uuid": uuids.mdev1, + "parent": pci_devices[0], + "type": "nvidia-11", + "iommu_group": 12 + }] + # First create a provider tree with VGPU inventory on the root node + # provider. Since we have 2 devices with 4 pGPUs each, the total is 8 + # as we were flattening all resources in one single inventory before + inventory = self._get_inventory() + vgpu_inventory = { + orc.VGPU: { + 'step_size': 1, 'min_unit': 1, 'max_unit': 8, 'total': 8 + } + } + inventory.update(vgpu_inventory) + self.pt.update_inventory(self.cn_rp['uuid'], inventory) + # Call update_provider_tree which will raise ReshapeNeeded because + # there is VGPU inventory on the root node provider. + self.assertRaises(exception.ReshapeNeeded, + self.driver.update_provider_tree, + self.pt, self.cn_rp['name']) + # Now make up some fake allocations to pass back to the upt method + # for the reshape. + allocations = { + uuids.consumer1: { + 'allocations': { + # This consumer has ram and vgpu allocations on the root + # node provider and should be changed. + self.cn_rp['uuid']: { + 'resources': { + orc.MEMORY_MB: 512, + orc.VGPU: 1 + } + } + } + }, + uuids.consumer2: { + 'allocations': { + # This consumer has ram and vcpu allocations on the root + # node provider and should not be changed. + self.cn_rp['uuid']: { + 'resources': { + orc.MEMORY_MB: 256, + orc.VCPU: 2 + } + } + } + } + } + original_allocations = copy.deepcopy(allocations) + # Initiate the reshape. + self.driver.update_provider_tree( + self.pt, self.cn_rp['name'], allocations=allocations) + # We should have two new VGPU child providers in the tree under the + # compute node root provider. + compute_node_tree_uuids = self.pt.get_provider_uuids( + self.cn_rp['name']) + self.assertEqual(3, len(compute_node_tree_uuids)) + rp_per_pci_device = {} + # The VGPU child providers should be the 2nd and 3rd UUIDs in that list + for rp_uuid in compute_node_tree_uuids[1:]: + # The VGPU inventory should be on the VGPU child provider + pgpu_provider_data = self.pt.data(rp_uuid) + # We want to map the PCI device with the RP UUID + if pci_devices[0] in pgpu_provider_data.name: + rp_per_pci_device[pci_devices[0]] = rp_uuid + elif pci_devices[1] in pgpu_provider_data.name: + rp_per_pci_device[pci_devices[1]] = rp_uuid + # Make sure we have two child resource providers + self.assertEqual(2, len(rp_per_pci_device)) + + # The compute node root provider should not have VGPU inventory. + del inventory[orc.VGPU] + self.assertEqual(inventory, self.pt.data(self.cn_rp['uuid']).inventory) + # consumer1 should now have allocations against two providers, + # MEMORY_MB on the root compute node provider and VGPU on the child + # provider. + consumer1_allocs = allocations[uuids.consumer1]['allocations'] + self.assertEqual(2, len(consumer1_allocs)) + self.assertEqual({orc.MEMORY_MB: 512}, + consumer1_allocs[self.cn_rp['uuid']]['resources']) + # Make sure the VGPU allocation moved to the corresponding child RP + self.assertEqual( + {orc.VGPU: 1}, + consumer1_allocs[rp_per_pci_device[pci_devices[0]]]['resources']) + # The allocations on consumer2 should be unchanged. + self.assertEqual(original_allocations[uuids.consumer2], + allocations[uuids.consumer2]) + + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_cpu_traits', + new=mock.Mock(return_value=cpu_traits)) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_gpu_inventories') + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_local_gb_info', + return_value={'total': disk_gb}) + @mock.patch('nova.virt.libvirt.host.Host.get_memory_mb_total', + return_value=memory_mb) + @mock.patch('nova.virt.libvirt.driver.LibvirtDriver._get_vcpu_total', + return_value=vcpus) + def test_update_provider_tree_for_vgpu_reshape_fails( + self, mock_vcpu, mock_mem, mock_disk, mock_gpus): + """Tests the VGPU reshape failure scenario where VGPU allocations + are not on the root compute node provider as expected. + """ + self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') + # Let's assume we have two PCI devices each having 4 pGPUs for this + # type + pci_devices = ['pci_0000_06_00_0', 'pci_0000_07_00_0'] + gpu_inventory_dicts = { + pci_devices[0]: {'total': 4, + 'max_unit': 4, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + pci_devices[1]: {'total': 4, + 'max_unit': 4, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + } + mock_gpus.return_value = gpu_inventory_dicts + # First create a provider tree with VGPU inventory on the root node + # provider. + inventory = self._get_inventory() + vgpu_inventory = { + orc.VGPU: { + 'step_size': 1, 'min_unit': 1, 'max_unit': 8, 'total': 8 + } + } + inventory.update(vgpu_inventory) + self.pt.update_inventory(self.cn_rp['uuid'], inventory) + # Now make up some fake allocations to pass back to the upt method + # for the reshape. + allocations = { + uuids.consumer1: { + 'allocations': { + # This consumer has invalid VGPU allocations on a non-root + # compute node provider. + uuids.other_rp: { + 'resources': { + orc.MEMORY_MB: 512, + orc.VGPU: 1 + } + } + } + } + } + # Initiate the reshape. + ex = self.assertRaises(exception.ReshapeFailed, + self.driver.update_provider_tree, + self.pt, self.cn_rp['name'], + allocations=allocations) + self.assertIn('Unexpected VGPU resource allocation on provider %s' + % uuids.other_rp, six.text_type(ex)) + class TraitsComparisonMixin(object): @@ -20418,37 +20658,62 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin): '._get_mediated_devices') @mock.patch('nova.virt.libvirt.driver.LibvirtDriver' '._get_mdev_capable_devices') - def test_get_vgpu_total(self, get_mdev_devs, get_mdevs): - get_mdev_devs.return_value = [ - {'dev_id': 'pci_0000_84_00_0', - 'vendor_id': 0x10de, - 'types': {'nvidia-11': {'availableInstances': 14, + def test_get_gpu_inventories(self, get_mdev_capable_devs, + get_mediated_devices): + get_mdev_capable_devs.return_value = [ + {"dev_id": "pci_0000_06_00_0", + "vendor_id": 0x10de, + "types": {'nvidia-11': {'availableInstances': 15, 'name': 'GRID M60-0B', 'deviceAPI': 'vfio-pci'}, - }}] - get_mdevs.return_value = [ - {'dev_id': 'mdev_4b20d080_1b54_4048_85b3_a6a62d165c01', - 'uuid': "4b20d080-1b54-4048-85b3-a6a62d165c01", - 'parent': 'pci_0000_84_00_0', - 'type': 'nvidia-11', - 'iommuGroup': 1 - }, - {'dev_id': 'mdev_4b20d080_1b54_4048_85b3_a6a62d165c02', - 'uuid': "4b20d080-1b54-4048-85b3-a6a62d165c02", - 'parent': 'pci_0000_84_00_0', - 'type': 'nvidia-11', - 'iommuGroup': 1 - }, + } + }, + {"dev_id": "pci_0000_07_00_0", + "vendor_id": 0x0000, + "types": {'nvidia-11': {'availableInstances': 7, + 'name': 'GRID M60-0B', + 'deviceAPI': 'vfio-pci'}, + } + }, ] + get_mediated_devices.return_value = [{'dev_id': 'mdev_some_uuid1', + 'uuid': uuids.mdev1, + 'parent': "pci_0000_06_00_0", + 'type': 'nvidia-11', + 'iommu_group': 1}, + {'dev_id': 'mdev_some_uuid2', + 'uuid': uuids.mdev2, + 'parent': "pci_0000_07_00_0", + 'type': 'nvidia-11', + 'iommu_group': 1}] + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) - # By default, no specific types are supported - self.assertEqual(0, self.drvr._get_vgpu_total()) + # If the operator doesn't provide GPU types + self.assertEqual({}, drvr._get_gpu_inventories()) - # Now, ask for only one + # Now, set a specific GPU type self.flags(enabled_vgpu_types=['nvidia-11'], group='devices') - # We have 14 available for nvidia-11. We also have 2 mdevs of the type. - # So, as a total, we have 14+2, hence 16. - self.assertEqual(16, self.drvr._get_vgpu_total()) + expected = { + # the first GPU also has one mdev allocated against it + 'pci_0000_06_00_0': {'total': 15 + 1, + 'max_unit': 15 + 1, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + # the second GPU also has another mdev + 'pci_0000_07_00_0': {'total': 7 + 1, + 'max_unit': 7 + 1, + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + }, + } + self.assertEqual(expected, drvr._get_gpu_inventories()) + get_mdev_capable_devs.assert_called_once_with(types=['nvidia-11']) + get_mediated_devices.assert_called_once_with(types=['nvidia-11']) @mock.patch.object(host.Host, 'device_lookup_by_name') @mock.patch.object(host.Host, 'list_mdev_capable_devices') diff --git a/nova/virt/driver.py b/nova/virt/driver.py index 51a2bfece67f..ac85cfe3125c 100644 --- a/nova/virt/driver.py +++ b/nova/virt/driver.py @@ -985,6 +985,8 @@ class ComputeDriver(object): :raises ReshapeNeeded: If allocations is None and any inventory needs to be moved from one provider to another and/or to a different resource class. + :raises: ReshapeFailed if the requested tree reshape fails for + whatever reason. """ raise NotImplementedError() diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 7ee7f680dbba..07dc77e15984 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -5818,26 +5818,91 @@ class LibvirtDriver(driver.ComputeDriver): requested_types = CONF.devices.enabled_vgpu_types[:1] return requested_types - def _get_vgpu_total(self): - """Returns the number of total available vGPUs for any GPU type that is - enabled with the enabled_vgpu_types CONF option. + def _count_mediated_devices(self, enabled_vgpu_types): + """Counts the sysfs objects (handles) that represent a mediated device + and filtered by $enabled_vgpu_types. + + Those handles can be in use by a libvirt guest or not. + + :param enabled_vgpu_types: list of enabled VGPU types on this host + :returns: dict, keyed by parent GPU libvirt PCI device ID, of number of + mdev device handles for that GPU """ - requested_types = self._get_supported_vgpu_types() + + counts_per_parent = collections.defaultdict(int) + mediated_devices = self._get_mediated_devices(types=enabled_vgpu_types) + for mdev in mediated_devices: + counts_per_parent[mdev['parent']] += 1 + return counts_per_parent + + def _count_mdev_capable_devices(self, enabled_vgpu_types): + """Counts the mdev-capable devices on this host filtered by + $enabled_vgpu_types. + + :param enabled_vgpu_types: list of enabled VGPU types on this host + :returns: dict, keyed by device name, to an integer count of available + instances of each type per device + """ + mdev_capable_devices = self._get_mdev_capable_devices( + types=enabled_vgpu_types) + counts_per_dev = collections.defaultdict(int) + for dev in mdev_capable_devices: + # dev_id is the libvirt name for the PCI device, + # eg. pci_0000_84_00_0 which matches a PCI address of 0000:84:00.0 + dev_name = dev['dev_id'] + for _type in dev['types']: + available = dev['types'][_type]['availableInstances'] + # TODO(sbauza): Once we support multiple types, check which + # PCI devices are set for this type + # NOTE(sbauza): Even if we support multiple types, Nova will + # only use one per physical GPU. + counts_per_dev[dev_name] += available + return counts_per_dev + + def _get_gpu_inventories(self): + """Returns the inventories for each physical GPU for a specific type + supported by the enabled_vgpu_types CONF option. + + :returns: dict, keyed by libvirt PCI name, of dicts like: + {'pci_0000_84_00_0': + {'total': $TOTAL, + 'min_unit': 1, + 'max_unit': $TOTAL, + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + } + } + """ + # Bail out early if operator doesn't care about providing vGPUs - if not requested_types: - return 0 + enabled_vgpu_types = self._get_supported_vgpu_types() + if not enabled_vgpu_types: + return {} + inventories = {} + count_per_parent = self._count_mediated_devices(enabled_vgpu_types) + for dev_name, count in count_per_parent.items(): + inventories[dev_name] = {'total': count} # Filter how many available mdevs we can create for all the supported # types. - mdev_capable_devices = self._get_mdev_capable_devices(requested_types) - vgpus = 0 - for dev in mdev_capable_devices: - for _type in dev['types']: - vgpus += dev['types'][_type]['availableInstances'] - # Count the already created (but possibly not assigned to a guest) - # mdevs for all the supported types - mediated_devices = self._get_mediated_devices(requested_types) - vgpus += len(mediated_devices) - return vgpus + count_per_dev = self._count_mdev_capable_devices(enabled_vgpu_types) + # Combine the counts into the dict that we return to the caller. + for dev_name, count in count_per_dev.items(): + inv_per_parent = inventories.setdefault( + dev_name, {'total': 0}) + inv_per_parent['total'] += count + inv_per_parent.update({ + 'min_unit': 1, + 'step_size': 1, + 'reserved': 0, + # NOTE(sbauza): There is no sense to have a ratio but 1.0 + # since we can't overallocate vGPU resources + 'allocation_ratio': 1.0, + # FIXME(sbauza): Some vendors could support only one + 'max_unit': inv_per_parent['total'], + }) + + return inventories def _get_instance_capabilities(self): """Get hypervisor instance capabilities @@ -6106,6 +6171,8 @@ class LibvirtDriver(driver.ComputeDriver): :returns: A dictionary of keys being mediated device UUIDs and their respective values the instance UUID of the guest using it. + Returns an empty dict if an instance is provided but not + found in the hypervisor. """ allocated_mdevs = {} if instance: @@ -6542,23 +6609,13 @@ class LibvirtDriver(driver.ComputeDriver): :raises ReshapeNeeded: If allocations is None and any inventory needs to be moved from one provider to another and/or to a different resource class. + :raises: ReshapeFailed if the requested tree reshape fails for + whatever reason. """ disk_gb = int(self._get_local_gb_info()['total']) memory_mb = int(self._host.get_memory_mb_total()) vcpus = self._get_vcpu_total() - # NOTE(sbauza): For the moment, the libvirt driver only supports - # providing the total number of virtual GPUs for a single GPU type. If - # you have multiple physical GPUs, each of them providing multiple GPU - # types, libvirt will return the total sum of virtual GPUs - # corresponding to the single type passed in enabled_vgpu_types - # configuration option. Eg. if you have 2 pGPUs supporting 'nvidia-35', - # each of them having 16 available instances, the total here will be - # 32. - # If one of the 2 pGPUs doesn't support 'nvidia-35', it won't be used. - # TODO(sbauza): Use traits to make a better world. - vgpus = self._get_vgpu_total() - # NOTE(yikun): If the inv record does not exists, the allocation_ratio # will use the CONF.xxx_allocation_ratio value if xxx_allocation_ratio # is set, and fallback to use the initial_xxx_allocation_ratio @@ -6600,14 +6657,17 @@ class LibvirtDriver(driver.ComputeDriver): 'reserved': self._get_reserved_host_disk_gb_from_config(), } - if vgpus > 0: - # Only provide VGPU resource classes if the driver supports it. - result[orc.VGPU] = { - 'total': vgpus, - 'min_unit': 1, - 'max_unit': vgpus, - 'step_size': 1, - } + # NOTE(sbauza): For the moment, the libvirt driver only supports + # providing the total number of virtual GPUs for a single GPU type. If + # you have multiple physical GPUs, each of them providing multiple GPU + # types, only one type will be used for each of the physical GPUs. + # If one of the pGPUs doesn't support this type, it won't be used. + # TODO(sbauza): Use traits to make a better world. + inventories_dict = self._get_gpu_inventories() + if inventories_dict: + self._update_provider_tree_for_vgpu( + inventories_dict, provider_tree, nodename, + allocations=allocations) provider_tree.update_inventory(nodename, result) @@ -6625,6 +6685,351 @@ class LibvirtDriver(driver.ComputeDriver): # so that spawn() or other methods can access it thru a getter self.provider_tree = copy.deepcopy(provider_tree) + @staticmethod + def _is_reshape_needed_vgpu_on_root(provider_tree, nodename): + """Determine if root RP has VGPU inventories. + + Check to see if the root compute node provider in the tree for + this host already has VGPU inventory because if it does, we either + need to signal for a reshape (if _update_provider_tree_for_vgpu() + has no allocations) or move the allocations within the ProviderTree if + passed. + + :param provider_tree: The ProviderTree object for this host. + :param nodename: The ComputeNode.hypervisor_hostname, also known as + the name of the root node provider in the tree for this host. + :returns: boolean, whether we have VGPU root inventory. + """ + root_node = provider_tree.data(nodename) + return orc.VGPU in root_node.inventory + + @staticmethod + def _ensure_pgpu_providers(inventories_dict, provider_tree, nodename): + """Ensures GPU inventory providers exist in the tree for $nodename. + + GPU providers are named $nodename_$gpu-device-id, e.g. + ``somehost.foo.bar.com_pci_0000_84_00_0``. + + :param inventories_dict: Dictionary of inventories for VGPU class + directly provided by _get_gpu_inventories() and which looks like: + {'pci_0000_84_00_0': + {'total': $TOTAL, + 'min_unit': 1, + 'max_unit': $MAX_UNIT, # defaults to $TOTAL + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + } + } + :param provider_tree: The ProviderTree to update. + :param nodename: The ComputeNode.hypervisor_hostname, also known as + the name of the root node provider in the tree for this host. + :returns: dict, keyed by GPU device ID, to ProviderData object + representing that resource provider in the tree + """ + # Create the VGPU child providers if they do not already exist. + # TODO(mriedem): For the moment, _get_supported_vgpu_types() only + # returns one single type but that will be changed once we support + # multiple types. + # Note that we can't support multiple vgpu types until a reshape has + # been performed on the vgpu resources provided by the root provider, + # if any. + + # Dict of PGPU RPs keyed by their libvirt PCI name + pgpu_rps = {} + for pgpu_dev_id, inventory in inventories_dict.items(): + # For each physical GPU, we make sure to have a child provider + pgpu_rp_name = '%s_%s' % (nodename, pgpu_dev_id) + if not provider_tree.exists(pgpu_rp_name): + # This is the first time creating the child provider so add + # it to the tree under the root node provider. + provider_tree.new_child(pgpu_rp_name, nodename) + # We want to idempotently return the resource providers with VGPUs + pgpu_rp = provider_tree.data(pgpu_rp_name) + pgpu_rps[pgpu_dev_id] = pgpu_rp + + # The VGPU inventory goes on a child provider of the given root + # node, identified by $nodename. + pgpu_inventory = {orc.VGPU: inventory} + provider_tree.update_inventory(pgpu_rp_name, pgpu_inventory) + return pgpu_rps + + @staticmethod + def _assert_is_root_provider( + rp_uuid, root_node, consumer_uuid, alloc_data): + """Asserts during a reshape that rp_uuid is for the root node provider. + + When reshaping, inventory and allocations should be on the root node + provider and then moved to child providers. + + :param rp_uuid: UUID of the provider that holds inventory/allocations. + :param root_node: ProviderData object representing the root node in a + provider tree. + :param consumer_uuid: UUID of the consumer (instance) holding resource + allocations against the given rp_uuid provider. + :param alloc_data: dict of allocation data for the consumer. + :raises: ReshapeFailed if rp_uuid is not the root node indicating a + reshape was needed but the inventory/allocation structure is not + expected. + """ + if rp_uuid != root_node.uuid: + # Something is wrong - VGPU inventory should + # only be on the root node provider if we are + # reshaping the tree. + msg = (_('Unexpected VGPU resource allocation ' + 'on provider %(rp_uuid)s for consumer ' + '%(consumer_uuid)s: %(alloc_data)s. ' + 'Expected VGPU allocation to be on root ' + 'compute node provider %(root_uuid)s.') + % {'rp_uuid': rp_uuid, + 'consumer_uuid': consumer_uuid, + 'alloc_data': alloc_data, + 'root_uuid': root_node.uuid}) + raise exception.ReshapeFailed(error=msg) + + def _get_assigned_mdevs_for_reshape( + self, instance_uuid, rp_uuid, alloc_data): + """Gets the mediated devices assigned to the instance during a reshape. + + :param instance_uuid: UUID of the instance consuming VGPU resources + on this host. + :param rp_uuid: UUID of the resource provider with VGPU inventory being + consumed by the instance. + :param alloc_data: dict of allocation data for the instance consumer. + :return: list of mediated device UUIDs assigned to the instance + :raises: ReshapeFailed if the instance is not found in the hypervisor + or no mediated devices were found to be assigned to the instance + indicating VGPU allocations are out of sync with the hypervisor + """ + # FIXME(sbauza): We don't really need an Instance + # object, but given some libvirt.host logs needs + # to have an instance name, just provide a fake one + Instance = collections.namedtuple('Instance', ['uuid', 'name']) + instance = Instance(uuid=instance_uuid, name=instance_uuid) + mdevs = self._get_all_assigned_mediated_devices(instance) + # _get_all_assigned_mediated_devices returns {} if the instance is + # not found in the hypervisor + if not mdevs: + # If we found a VGPU allocation against a consumer + # which is not an instance, the only left case for + # Nova would be a migration but we don't support + # this at the moment. + msg = (_('Unexpected VGPU resource allocation on provider ' + '%(rp_uuid)s for consumer %(consumer_uuid)s: ' + '%(alloc_data)s. The allocation is made against a ' + 'non-existing instance or there are no devices assigned.') + % {'rp_uuid': rp_uuid, 'consumer_uuid': instance_uuid, + 'alloc_data': alloc_data}) + raise exception.ReshapeFailed(error=msg) + return mdevs + + def _count_vgpus_per_pgpu(self, mdev_uuids): + """Count the number of VGPUs per physical GPU mediated device. + + :param mdev_uuids: List of physical GPU mediated device UUIDs. + :return: dict, keyed by PGPU device ID, to count of VGPUs on that + device + """ + vgpu_count_per_pgpu = collections.defaultdict(int) + for mdev_uuid in mdev_uuids: + # libvirt name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4 + dev_name = "mdev_" + mdev_uuid.replace('-', '_') + # Count how many vGPUs are in use for this instance + dev_info = self._get_mediated_device_information(dev_name) + pgpu_dev_id = dev_info['parent'] + vgpu_count_per_pgpu[pgpu_dev_id] += 1 + return vgpu_count_per_pgpu + + @staticmethod + def _check_vgpu_allocations_match_real_use( + vgpu_count_per_pgpu, expected_usage, rp_uuid, consumer_uuid, + alloc_data): + """Checks that the number of GPU devices assigned to the consumer + matches what is expected from the allocations in the placement service + and logs a warning if there is a mismatch. + + :param vgpu_count_per_pgpu: dict, keyed by PGPU device ID, to count of + VGPUs on that device where each device is assigned to the consumer + (guest instance on this hypervisor) + :param expected_usage: The expected usage from placement for the + given resource provider and consumer + :param rp_uuid: UUID of the resource provider with VGPU inventory being + consumed by the instance + :param consumer_uuid: UUID of the consumer (instance) holding resource + allocations against the given rp_uuid provider + :param alloc_data: dict of allocation data for the instance consumer + """ + actual_usage = sum(vgpu_count_per_pgpu.values()) + if actual_usage != expected_usage: + # Don't make it blocking, just make sure you actually correctly + # allocate the existing resources + LOG.warning( + 'Unexpected VGPU resource allocation on provider %(rp_uuid)s ' + 'for consumer %(consumer_uuid)s: %(alloc_data)s. Allocations ' + '(%(expected_usage)s) differ from actual use ' + '(%(actual_usage)s).', + {'rp_uuid': rp_uuid, 'consumer_uuid': consumer_uuid, + 'alloc_data': alloc_data, 'expected_usage': expected_usage, + 'actual_usage': actual_usage}) + + def _reshape_vgpu_allocations( + self, rp_uuid, root_node, consumer_uuid, alloc_data, resources, + pgpu_rps): + """Update existing VGPU allocations by moving them from the root node + provider to the child provider for the given VGPU provider. + + :param rp_uuid: UUID of the VGPU resource provider with allocations + from consumer_uuid (should be the root node provider before + reshaping occurs) + :param root_node: ProviderData object for the root compute node + resource provider in the provider tree + :param consumer_uuid: UUID of the consumer (instance) with VGPU + allocations against the resource provider represented by rp_uuid + :param alloc_data: dict of allocation information for consumer_uuid + :param resources: dict, keyed by resource class, of resources allocated + to consumer_uuid from rp_uuid + :param pgpu_rps: dict, keyed by GPU device ID, to ProviderData object + representing that resource provider in the tree + :raises: ReshapeFailed if the reshape fails for whatever reason + """ + # We've found VGPU allocations on a provider. It should be the root + # node provider. + self._assert_is_root_provider( + rp_uuid, root_node, consumer_uuid, alloc_data) + + # Find which physical GPU corresponds to this allocation. + mdev_uuids = self._get_assigned_mdevs_for_reshape( + consumer_uuid, rp_uuid, alloc_data) + + vgpu_count_per_pgpu = self._count_vgpus_per_pgpu(mdev_uuids) + + # We need to make sure we found all the mediated devices that + # correspond to an allocation. + self._check_vgpu_allocations_match_real_use( + vgpu_count_per_pgpu, resources[orc.VGPU], + rp_uuid, consumer_uuid, alloc_data) + + # Add the VGPU allocation for each VGPU provider. + allocs = alloc_data['allocations'] + for pgpu_dev_id, pgpu_rp in pgpu_rps.items(): + vgpu_count = vgpu_count_per_pgpu[pgpu_dev_id] + if vgpu_count: + allocs[pgpu_rp.uuid] = { + 'resources': { + orc.VGPU: vgpu_count + } + } + # And remove the VGPU allocation from the root node provider. + del resources[orc.VGPU] + + def _reshape_gpu_resources( + self, allocations, root_node, pgpu_rps): + """Reshapes the provider tree moving VGPU inventory from root to child + + :param allocations: + Dict of allocation data of the form: + { $CONSUMER_UUID: { + # The shape of each "allocations" dict below is identical + # to the return from GET /allocations/{consumer_uuid} + "allocations": { + $RP_UUID: { + "generation": $RP_GEN, + "resources": { + $RESOURCE_CLASS: $AMOUNT, + ... + }, + }, + ... + }, + "project_id": $PROJ_ID, + "user_id": $USER_ID, + "consumer_generation": $CONSUMER_GEN, + }, + ... + } + :params root_node: The root node in the provider tree + :params pgpu_rps: dict, keyed by GPU device ID, to ProviderData object + representing that resource provider in the tree + """ + LOG.info('Reshaping tree; moving VGPU allocations from root ' + 'provider %s to child providers %s.', root_node.uuid, + pgpu_rps.values()) + # For each consumer in the allocations dict, look for VGPU + # allocations and move them to the VGPU provider. + for consumer_uuid, alloc_data in allocations.items(): + # Copy and iterate over the current set of providers to avoid + # modifying keys while iterating. + allocs = alloc_data['allocations'] + for rp_uuid in list(allocs): + resources = allocs[rp_uuid]['resources'] + if orc.VGPU in resources: + self._reshape_vgpu_allocations( + rp_uuid, root_node, consumer_uuid, alloc_data, + resources, pgpu_rps) + + def _update_provider_tree_for_vgpu(self, inventories_dict, provider_tree, + nodename, allocations=None): + """Updates the provider tree for VGPU inventory. + + Before Stein, VGPU inventory and allocations were on the root compute + node provider in the tree. Starting in Stein, the VGPU inventory is + on a child provider in the tree. As a result, this method will + "reshape" the tree if necessary on first start of this compute service + in Stein. + + :param inventories_dict: Dictionary of inventories for VGPU class + directly provided by _get_gpu_inventories() and which looks like: + {'pci_0000_84_00_0': + {'total': $TOTAL, + 'min_unit': 1, + 'max_unit': $MAX_UNIT, # defaults to $TOTAL + 'step_size': 1, + 'reserved': 0, + 'allocation_ratio': 1.0, + } + } + :param provider_tree: The ProviderTree to update. + :param nodename: The ComputeNode.hypervisor_hostname, also known as + the name of the root node provider in the tree for this host. + :param allocations: If not None, indicates a reshape was requested and + should be performed. + :raises: nova.exception.ReshapeNeeded if ``allocations`` is None and + the method determines a reshape of the tree is needed, i.e. VGPU + inventory and allocations must be migrated from the root node + provider to a child provider of VGPU resources in the tree. + :raises: nova.exception.ReshapeFailed if the requested tree reshape + fails for whatever reason. + """ + # Check to see if the root compute node provider in the tree for + # this host already has VGPU inventory because if it does, and + # we're not currently reshaping (allocations is None), we need + # to indicate that a reshape is needed to move the VGPU inventory + # onto a child provider in the tree. + + # Ensure GPU providers are in the ProviderTree for the given inventory. + pgpu_rps = self._ensure_pgpu_providers( + inventories_dict, provider_tree, nodename) + + if self._is_reshape_needed_vgpu_on_root(provider_tree, nodename): + if allocations is None: + # We have old VGPU inventory on root RP, but we haven't yet + # allocations. That means we need to ask for a reshape. + LOG.info('Requesting provider tree reshape in order to move ' + 'VGPU inventory from the root compute node provider ' + '%s to a child provider.', nodename) + raise exception.ReshapeNeeded() + # We have allocations, that means we already asked for a reshape + # and the Placement API returned us them. We now need to move + # those from the root RP to the needed children RPs. + root_node = provider_tree.data(nodename) + # Reshape VGPU provider inventory and allocations, moving them + # from the root node provider to the child providers. + self._reshape_gpu_resources(allocations, root_node, pgpu_rps) + # Only delete the root inventory once the reshape is done + if orc.VGPU in root_node.inventory: + del root_node.inventory[orc.VGPU] + provider_tree.update_inventory(nodename, root_node.inventory) + def get_available_resource(self, nodename): """Retrieve resource information. diff --git a/releasenotes/notes/libvirt-stein-vgpu-reshape-a1fa23b8ad8aa966.yaml b/releasenotes/notes/libvirt-stein-vgpu-reshape-a1fa23b8ad8aa966.yaml new file mode 100644 index 000000000000..e0cce313c68a --- /dev/null +++ b/releasenotes/notes/libvirt-stein-vgpu-reshape-a1fa23b8ad8aa966.yaml @@ -0,0 +1,12 @@ +--- +upgrade: + - | + The libvirt compute driver will "reshape" VGPU inventories and allocations + on start of the ``nova-compute`` service. This will result in moving + VGPU inventory from the root compute node resource provider to a nested + (child) resource provider in the tree and move any associated VGPU + allocations with it. This will be a one-time operation on startup in Stein. + There is no end-user visible impact for this; it is for internal resource + tracking purposes. See the `spec`__ for more details. + + .. __: https://specs.openstack.org/openstack/nova-specs/specs/stein/approved/reshape-provider-tree.html