Refactoring

Change-Id: I05da2090422ef7caced06e9a93cc358386bf1fe2
This commit is contained in:
Aurelien Lourot
2022-02-18 12:04:54 +01:00
parent fbdc0e4615
commit 7779d861c3
7 changed files with 435 additions and 193 deletions

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# Copyright 2021 Canonical Ltd
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,25 +18,19 @@
import logging
import json
from charmhelpers.core.hookenv import cached
from charmhelpers.core.host import file_hash
from charmhelpers.fetch import (
apt_cache,
apt_install,
)
import ops_openstack.plugins.classes
from ops.main import main
from ops.model import (
ActiveStatus,
BlockedStatus,
ModelError,
)
from pylspci.parsers import SimpleParser
from ruamel.yaml import YAML
from charm_utils import (
check_status,
install_nvidia_software_if_needed,
is_nvidia_software_to_be_installed,
set_relation_data,
)
class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
@@ -65,7 +59,8 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
"""config-changed hook."""
# NOTE(lourot): We want to re-install the software here if a new
# version has just been provided as a charm resource.
self._install_nvidia_software_if_needed()
install_nvidia_software_if_needed(self._stored, self.config,
self.framework.model.resources)
for relation in self.framework.model.relations.get('nova-vgpu'):
self._set_principal_unit_relation_data(relation.data[self.unit])
@@ -78,7 +73,8 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
# the `install` hook because we want to be able to install software
# after a reboot if NVIDIA hardware has then been added for the
# first time.
self._install_nvidia_software_if_needed()
install_nvidia_software_if_needed(self._stored, self.config,
self.framework.model.resources)
# NOTE(lourot): this is used by OSBaseCharm.update_status():
self._stored.is_started = True
@@ -94,33 +90,24 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
# unit, then no service should be expected to run by
# OSBaseCharm.update_status(). Otherwise the services from the
# RESTART_MAP are expected to run.
if not self._is_nvidia_software_to_be_installed():
if not is_nvidia_software_to_be_installed(self.config):
return []
return super().services()
def _check_status(self):
"""Determine the unit status to be set.
:rtype: StatusBase
:rtype: ops.model.StatusBase
"""
unit_status_msg = ('no ' if not self._has_nvidia_gpu_hardware()
else '') + 'NVIDIA GPU found; '
installed_versions = self._installed_nvidia_software_versions()
if len(installed_versions) > 0:
unit_status_msg += 'installed NVIDIA software: '
unit_status_msg += ', '.join(installed_versions)
else:
unit_status_msg += 'no NVIDIA software installed'
if self._is_nvidia_software_to_be_installed() and len(
installed_versions) == 0:
return BlockedStatus(unit_status_msg)
return ActiveStatus('Unit is ready: ' + unit_status_msg)
return check_status(self.config)
def _set_principal_unit_relation_data(self, principal_unit_relation_data):
"""Pass configuration to a principal unit."""
"""Pass configuration to a principal unit.
:param principal_unit_relation_data: Relation data bag to principal
unit.
:type principal_unit_relation_data: ops.model.RelationData
"""
vgpu_device_mappings_str = self.config.get('vgpu-device-mappings')
if vgpu_device_mappings_str is not None:
vgpu_device_mappings = YAML().load(vgpu_device_mappings_str)
@@ -140,117 +127,13 @@ class NovaComputeNvidiaVgpuCharm(ops_openstack.core.OSBaseCharm):
}
}
})
_set_relation_data(
set_relation_data(
principal_unit_relation_data, 'subordinate_configuration',
nova_conf)
logging.debug(
'relation data to principal unit set to '
'subordinate_configuration={}'.format(nova_conf))
def _install_nvidia_software_if_needed(self):
"""Install the NVIDIA software on this unit if relevant."""
if self._is_nvidia_software_to_be_installed():
nvidia_software_path, nvidia_software_hash = (
self._path_and_hash_nvidia_resource())
if nvidia_software_path is None:
# No software has been provided as charm resource. We can't
# install anything. OSBaseCharm.update_status() will be
# executed later and put the unit in blocked state.
return
last_installed_hash = self._stored.last_installed_resource_hash
if nvidia_software_hash == last_installed_hash:
logging.info(
'NVIDIA vGPU software with hash {} already installed, '
'skipping'.format(nvidia_software_hash))
return
logging.info(
'Installing NVIDIA vGPU software with hash {}'.format(
nvidia_software_hash))
apt_install([nvidia_software_path], fatal=True)
self._stored.last_installed_resource_hash = nvidia_software_hash
@cached
def _is_nvidia_software_to_be_installed(self):
"""Determine whether the NVIDIA vGPU software is to be installed.
:returns: True if the software is to be installed and set up on the
unit.
:rtype: bool
"""
return (self._has_nvidia_gpu_hardware() or
self.config.get('force-install-nvidia-vgpu'))
def _path_and_hash_nvidia_resource(self):
"""Get path to and hash of software provided as charm resource.
:returns: Pair of path and hash. (None, None) if no charm resource has
been provided.
:rtype: Tuple[PosixPath, str]
"""
try:
nvidia_vgpu_software_path = (
self.framework.model.resources.fetch('nvidia-vgpu-software'))
except ModelError:
return None, None
return nvidia_vgpu_software_path, file_hash(nvidia_vgpu_software_path)
def _installed_nvidia_software_versions(self):
"""Get a list of installed NVIDIA vGPU software versions.
:returns: List of versions
:rtype: List[str]
"""
return [package['version'] for package in
apt_cache().dpkg_list(['nvidia-vgpu-ubuntu-*']).values()]
@classmethod
@cached
def _has_nvidia_gpu_hardware(cls):
"""Search for NVIDIA GPU hardware.
:returns: True if some NVIDIA GPU hardware is found on the current
unit.
:rtype: bool
"""
return cls._has_nvidia_gpu_hardware_notcached()
@staticmethod
def _has_nvidia_gpu_hardware_notcached():
nvidia_gpu_hardware_found = False
for device in SimpleParser().run():
device_class = device.cls.name
device_vendor = device.vendor.name
try:
device_subsystem_vendor = device.subsystem_vendor.name
except AttributeError:
device_subsystem_vendor = ''
if '3D' in device_class and ('NVIDIA' in device_vendor or
'NVIDIA' in device_subsystem_vendor):
logging.debug('NVIDIA GPU found: {}'.format(device))
# NOTE(lourot): we could `break` out here but it's interesting
# for debugging purposes to print them all.
nvidia_gpu_hardware_found = True
if not nvidia_gpu_hardware_found:
logging.debug('No NVIDIA GPU found.')
return nvidia_gpu_hardware_found
def _set_relation_data(relation_data, key, value):
"""Mockable setter.
Workaround for https://github.com/canonical/operator/issues/703
Used in unit test
TestNovaComputeNvidiaVgpuCharm.test_nova_vgpu_relation_joined
"""
relation_data[key] = value
if __name__ == '__main__':
main(NovaComputeNvidiaVgpuCharm)

136
src/charm_utils.py Normal file
View File

@@ -0,0 +1,136 @@
#!/usr/bin/env python3
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from charmhelpers.core.hookenv import cached
from charmhelpers.core.host import file_hash
from charmhelpers.fetch import apt_install
from ops.model import (
ActiveStatus,
BlockedStatus,
ModelError,
)
import nvidia_utils
@cached
def is_nvidia_software_to_be_installed(charm_config):
"""Determine whether the NVIDIA vGPU software is to be installed.
:param charm_config: Juju application configuration object.
:type charm_config: ops.model.ConfigData
:returns: True if the software is to be installed and set up on the
unit.
:rtype: bool
"""
return is_nvidia_software_to_be_installed_notcached(charm_config)
def is_nvidia_software_to_be_installed_notcached(charm_config):
return (nvidia_utils.has_nvidia_gpu_hardware() or
charm_config.get('force-install-nvidia-vgpu'))
def install_nvidia_software_if_needed(stored, config, resources):
"""Install the NVIDIA software on this unit if relevant.
:param stored: Unit's stored state.
:type stored: ops.framework.StoredState
:param config: Juju application config.
:type config: ops.model.ConfigData
:param resources: Juju application resources.
:type resources: ops.model.Resources
"""
if is_nvidia_software_to_be_installed(config):
nvidia_software_path, nvidia_software_hash = (
_path_and_hash_nvidia_resource(resources))
if nvidia_software_path is None:
# No software has been provided as charm resource. We can't
# install anything. OSBaseCharm.update_status() will be
# executed later and put the unit in blocked state.
return
last_installed_hash = stored.last_installed_resource_hash
if nvidia_software_hash == last_installed_hash:
logging.info(
'NVIDIA vGPU software with hash {} already installed, '
'skipping'.format(nvidia_software_hash))
return
logging.info(
'Installing NVIDIA vGPU software with hash {}'.format(
nvidia_software_hash))
apt_install([nvidia_software_path], fatal=True)
stored.last_installed_resource_hash = nvidia_software_hash
def check_status(config):
"""Determine the unit status to be set.
:param config: Juju application config.
:type config: ops.model.ConfigData
:rtype: ops.model.StatusBase
"""
unit_status_msg = ('no ' if not nvidia_utils.has_nvidia_gpu_hardware()
else '') + 'NVIDIA GPU found; '
installed_versions = nvidia_utils.installed_nvidia_software_versions()
if len(installed_versions) > 0:
unit_status_msg += 'installed NVIDIA software: '
unit_status_msg += ', '.join(installed_versions)
else:
unit_status_msg += 'no NVIDIA software installed'
if (is_nvidia_software_to_be_installed(config) and
len(installed_versions) == 0):
return BlockedStatus(unit_status_msg)
return ActiveStatus('Unit is ready: ' + unit_status_msg)
def set_relation_data(relation_data, key, value):
"""Mockable setter.
Workaround for https://github.com/canonical/operator/issues/703
Used in unit test
TestNovaComputeNvidiaVgpuCharm.test_nova_vgpu_relation_joined
:param relation_data: Relation data bag.
:type relation_data: ops.model.RelationData
"""
relation_data[key] = value
def _path_and_hash_nvidia_resource(resources):
"""Get path to and hash of software provided as charm resource.
:param resources: Juju application resources.
:type resources: ops.model.Resources
:returns: Pair of path and hash. (None, None) if no charm resource has
been provided.
:rtype: Tuple[PosixPath, str]
"""
try:
nvidia_vgpu_software_path = resources.fetch('nvidia-vgpu-software')
except ModelError:
return None, None
return nvidia_vgpu_software_path, file_hash(nvidia_vgpu_software_path)

69
src/nvidia_utils.py Normal file
View File

@@ -0,0 +1,69 @@
#!/usr/bin/env python3
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from charmhelpers.core.hookenv import cached
from charmhelpers.fetch import (
apt_cache,
)
from pylspci.parsers import SimpleParser
def installed_nvidia_software_versions():
"""Get a list of installed NVIDIA vGPU software versions.
:returns: List of versions
:rtype: List[str]
"""
return [package['version'] for package in
apt_cache().dpkg_list(['nvidia-vgpu-ubuntu-*']).values()]
@cached
def has_nvidia_gpu_hardware():
"""Search for NVIDIA GPU hardware.
:returns: True if some NVIDIA GPU hardware is found on the current
unit.
:rtype: bool
"""
return _has_nvidia_gpu_hardware_notcached()
def _has_nvidia_gpu_hardware_notcached():
nvidia_gpu_hardware_found = False
for device in SimpleParser().run():
device_class = device.cls.name
device_vendor = device.vendor.name
try:
device_subsystem_vendor = device.subsystem_vendor.name
except AttributeError:
device_subsystem_vendor = ''
if '3D' in device_class and ('NVIDIA' in device_vendor or
'NVIDIA' in device_subsystem_vendor):
logging.debug('NVIDIA GPU found: {}'.format(device))
# NOTE(lourot): we could `break` out here but it's interesting
# for debugging purposes to print them all.
nvidia_gpu_hardware_found = True
if not nvidia_gpu_hardware_found:
logging.debug('No NVIDIA GPU found.')
return nvidia_gpu_hardware_found

View File

@@ -1,4 +1,4 @@
# Copyright 2021 Canonical Ltd
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
# Copyright 2016 Canonical Ltd
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,14 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import unittest
from mock import ANY, MagicMock, patch
from mock import ANY, patch
from ops.model import ActiveStatus
from ops.testing import Harness
import src.charm
sys.path.append('src') # noqa
import charm
class CharmTestCase(unittest.TestCase):
@@ -41,59 +44,21 @@ class CharmTestCase(unittest.TestCase):
setattr(self, method, self.patch(method))
class MockLspciProperty:
def __init__(self, name):
self.name = name
class MockLspciDevice:
def __init__(self, cls_name, vendor_name):
self.cls = MockLspciProperty(cls_name)
self.vendor = MockLspciProperty(vendor_name)
class TestNovaComputeNvidiaVgpuCharm(CharmTestCase):
_PATCHES = [
'SimpleParser',
'_set_relation_data',
]
_PCI_DEVICES_LIST_WITHOUT_GPU = [
# This is an NVIDIA device, but not a GPU card:
MockLspciDevice(cls_name='VGA compatible controller',
vendor_name='NVIDIA Corporation'),
]
_PCI_DEVICES_LIST_WITH_NVIDIA_GPU = [
# This is an NVIDIA device, but not a GPU card:
MockLspciDevice(cls_name='VGA compatible controller',
vendor_name='NVIDIA Corporation'),
# This is an NVIDIA GPU card:
MockLspciDevice(cls_name='3D controller',
vendor_name='NVIDIA Corporation'),
'check_status',
'install_nvidia_software_if_needed',
'is_nvidia_software_to_be_installed',
'set_relation_data',
]
def setUp(self):
super().setUp(src.charm, self._PATCHES)
self.harness = Harness(src.charm.NovaComputeNvidiaVgpuCharm)
super().setUp(charm, self._PATCHES)
self.harness = Harness(charm.NovaComputeNvidiaVgpuCharm)
self.addCleanup(self.harness.cleanup)
self.harness.begin()
def test_has_nvidia_gpu_hardware_with_hw(self):
self.SimpleParser.return_value = MagicMock()
self.SimpleParser.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITH_NVIDIA_GPU)
self.assertTrue(
self.harness.charm._has_nvidia_gpu_hardware_notcached())
def test_has_nvidia_gpu_hardware_without_hw(self):
self.SimpleParser.return_value = MagicMock()
self.SimpleParser.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITHOUT_GPU)
self.assertFalse(
self.harness.charm._has_nvidia_gpu_hardware_notcached())
def test_init(self):
self.assertEqual(
self.harness.framework.model.app.name,
@@ -102,12 +67,12 @@ class TestNovaComputeNvidiaVgpuCharm(CharmTestCase):
self.assertIsNone(
self.harness.charm._stored.last_installed_resource_hash)
def test_start(self):
self.harness.charm.on.start.emit()
self.assertTrue(isinstance(
self.harness.model.unit.status, ActiveStatus))
def test_nova_vgpu_relation_joined(self):
# NOTE(lourot): these functions get called by the update-status hook,
# which is irrelevant for this test:
self.check_status.return_value = ActiveStatus('Unit is ready')
self.is_nvidia_software_to_be_installed.return_value = False
self.harness.set_leader(True)
self.harness.update_config({
"vgpu-device-mappings": "{'vgpu_type1': ['device_address1']}"
@@ -120,5 +85,5 @@ class TestNovaComputeNvidiaVgpuCharm(CharmTestCase):
# NOTE(lourot): We mock _set_relation_data() instead of using
# self.harness.get_relation_data() as a workaround for
# https://github.com/canonical/operator/issues/703
self._set_relation_data.assert_called_once_with(
self.set_relation_data.assert_called_once_with(
ANY, 'subordinate_configuration', ANY)

View File

@@ -0,0 +1,126 @@
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import unittest
from mock import MagicMock, patch
sys.path.append('src') # noqa
from ops.model import (
ActiveStatus,
BlockedStatus,
)
import charm_utils
class TestCharmUtils(unittest.TestCase):
@patch('nvidia_utils.has_nvidia_gpu_hardware')
def test_is_nvidia_software_to_be_installed(self,
has_nvidia_gpu_hardware_mock):
has_nvidia_gpu_hardware_mock.return_value = True
self.assertTrue(
charm_utils.is_nvidia_software_to_be_installed_notcached({
'force-install-nvidia-vgpu': False}))
has_nvidia_gpu_hardware_mock.return_value = False
self.assertTrue(
charm_utils.is_nvidia_software_to_be_installed_notcached({
'force-install-nvidia-vgpu': True}))
self.assertFalse(
charm_utils.is_nvidia_software_to_be_installed_notcached({
'force-install-nvidia-vgpu': False}))
@patch('charm_utils.apt_install')
@patch('charm_utils._path_and_hash_nvidia_resource')
@patch('charm_utils.is_nvidia_software_to_be_installed')
def test_install_nvidia_software_if_needed(
self, is_software_to_be_installed_mock, path_and_hash_mock,
apt_install_mock):
is_software_to_be_installed_mock.return_value = True
unit_stored_state = MagicMock()
unit_stored_state.last_installed_resource_hash = 'hash-1'
# If a software package with the exact same hash has already been
# installed, no new installation should be performed:
path_and_hash_mock.return_value = (
'path-to-software',
'hash-1',
)
charm_utils.install_nvidia_software_if_needed(unit_stored_state, None,
None)
self.assertFalse(apt_install_mock.called)
# If there is now a new software package with a different hash,
# installation should be performed:
path_and_hash_mock.return_value = (
'path-to-software',
'hash-2',
)
charm_utils.install_nvidia_software_if_needed(unit_stored_state, None,
None)
apt_install_mock.assert_called_once_with(['path-to-software'],
fatal=True)
@patch('charm_utils.is_nvidia_software_to_be_installed')
@patch('nvidia_utils.installed_nvidia_software_versions')
@patch('nvidia_utils.has_nvidia_gpu_hardware')
def test_check_status(self, has_hw_mock, installed_sw_mock,
is_sw_to_be_installed_mock):
has_hw_mock.return_value = True
installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True
self.assertEqual(
charm_utils.check_status(None),
ActiveStatus(
'Unit is ready: '
'NVIDIA GPU found; installed NVIDIA software: 42, 43'))
has_hw_mock.return_value = False
installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True
self.assertEqual(
charm_utils.check_status(None),
ActiveStatus(
'Unit is ready: '
'no NVIDIA GPU found; installed NVIDIA software: 42, 43'))
has_hw_mock.return_value = True
installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = True
self.assertEqual(
charm_utils.check_status(None),
BlockedStatus(
'NVIDIA GPU found; no NVIDIA software installed'))
has_hw_mock.return_value = True
installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = False
self.assertEqual(
charm_utils.check_status(None),
ActiveStatus(
'Unit is ready: '
'NVIDIA GPU found; no NVIDIA software installed'))
@patch('charm_utils.file_hash')
def test_path_and_hash_nvidia_resource(self, file_hash_mock):
file_hash_mock.return_value = 'nvidia-software-hash'
resources = MagicMock()
resources.fetch.return_value = 'nvidia-software-path'
self.assertEqual(charm_utils._path_and_hash_nvidia_resource(resources),
('nvidia-software-path', 'nvidia-software-hash'))

View File

@@ -0,0 +1,63 @@
# Copyright 2022 Canonical Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import unittest
from mock import patch
sys.path.append('src') # noqa
import nvidia_utils
class MockLspciProperty:
def __init__(self, name):
self.name = name
class MockLspciDevice:
def __init__(self, cls_name, vendor_name):
self.cls = MockLspciProperty(cls_name)
self.vendor = MockLspciProperty(vendor_name)
class TestNvidiaUtils(unittest.TestCase):
_PCI_DEVICES_LIST_WITHOUT_GPU = [
# This is an NVIDIA device, but not a GPU card:
MockLspciDevice(cls_name='VGA compatible controller',
vendor_name='NVIDIA Corporation'),
]
_PCI_DEVICES_LIST_WITH_NVIDIA_GPU = [
# This is an NVIDIA device, but not a GPU card:
MockLspciDevice(cls_name='VGA compatible controller',
vendor_name='NVIDIA Corporation'),
# This is an NVIDIA GPU card:
MockLspciDevice(cls_name='3D controller',
vendor_name='NVIDIA Corporation'),
]
@patch('nvidia_utils.SimpleParser')
def test_has_nvidia_gpu_hardware_with_hw(self, lspci_parser_mock):
lspci_parser_mock.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITH_NVIDIA_GPU)
self.assertTrue(nvidia_utils._has_nvidia_gpu_hardware_notcached())
@patch('nvidia_utils.SimpleParser')
def test_has_nvidia_gpu_hardware_without_hw(self, lspci_parser_mock):
lspci_parser_mock.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITHOUT_GPU)
self.assertFalse(nvidia_utils._has_nvidia_gpu_hardware_notcached())