Refactor unit status output

The unit status output for this charm was verbose compared to
others;  refactor to display minimal information when active
and pertinent information when in a blocked state.

Note that the NVIDIA driver version should really be in the
application version; this will require further refactoring
to expose this information to the operator framework.

Change-Id: I26663ca8da99083a409e085ea239e43245ef6265
This commit is contained in:
James Page
2022-05-12 09:30:40 +01:00
parent c699c7d135
commit 37e0fe5982
6 changed files with 47 additions and 48 deletions

1
.gitignore vendored
View File

@@ -6,5 +6,4 @@ interfaces
.stestr .stestr
*__pycache__* *__pycache__*
*.pyc *.pyc
/nova-compute-nvidia-vgpu.charm
*.charm *.charm

View File

@@ -58,7 +58,8 @@ def is_nvidia_software_to_be_installed(charm_config):
def is_nvidia_software_to_be_installed_notcached(charm_config): def is_nvidia_software_to_be_installed_notcached(charm_config):
return (nvidia_utils.has_nvidia_gpu_hardware() or nvidia_gpu_hardware, _ = nvidia_utils.has_nvidia_gpu_hardware()
return (nvidia_gpu_hardware or
charm_config.get('force-install-nvidia-vgpu')) charm_config.get('force-install-nvidia-vgpu'))
@@ -110,34 +111,24 @@ def check_status(config, services):
:type services: List[str] :type services: List[str]
:rtype: ops.model.StatusBase :rtype: ops.model.StatusBase
""" """
unit_status_msg = ('no ' if not nvidia_utils.has_nvidia_gpu_hardware()
else '') + 'NVIDIA GPU found; '
installed_versions = nvidia_utils.installed_nvidia_software_versions() installed_versions = nvidia_utils.installed_nvidia_software_versions()
software_is_installed = len(installed_versions) > 0 software_is_installed = len(installed_versions) > 0
if (is_nvidia_software_to_be_installed(config) and
not software_is_installed):
return BlockedStatus("NVIDIA GPU detected, drivers not installed")
_, services_not_running_msg = ows_check_services_running(services, _, services_not_running_msg = ows_check_services_running(services,
ports=[]) ports=[])
software_is_running = services_not_running_msg is None software_is_running = services_not_running_msg is None
if software_is_installed: if software_is_installed and not software_is_running:
unit_status_msg += 'installed NVIDIA software: ' return BlockedStatus("manual reboot required")
unit_status_msg += ', '.join(installed_versions)
if not software_is_running:
# NOTE(lourot): the exact list of services not running that should
# be will be displayed in the principal's blocked status message
# already, so no need to repeat it here on the subordinate.
unit_status_msg += '; reboot required?'
else:
unit_status_msg += 'no NVIDIA software installed'
if ((is_nvidia_software_to_be_installed(config) and nvidia_gpu_hardware, num_gpus = nvidia_utils.has_nvidia_gpu_hardware()
not software_is_installed) or unit_status_msg = "{} GPU".format(num_gpus)
(software_is_installed and
not software_is_running)):
return BlockedStatus(unit_status_msg)
return ActiveStatus('Unit is ready: ' + unit_status_msg) return ActiveStatus('Unit is ready ({})'.format(unit_status_msg))
def set_principal_unit_relation_data(relation_data_to_be_set, config, def set_principal_unit_relation_data(relation_data_to_be_set, config,

View File

@@ -102,15 +102,15 @@ def list_vgpu_types():
def has_nvidia_gpu_hardware(): def has_nvidia_gpu_hardware():
"""Search for NVIDIA GPU hardware. """Search for NVIDIA GPU hardware.
:returns: True if some NVIDIA GPU hardware is found on the current :returns: a tuple of (bool, int) indicating if NVIDIA GPU hardware
unit. is found and how many GPU's where detected.
:rtype: bool :rtype: (bool, int)
""" """
return _has_nvidia_gpu_hardware_notcached() return _has_nvidia_gpu_hardware_notcached()
def _has_nvidia_gpu_hardware_notcached(): def _has_nvidia_gpu_hardware_notcached():
nvidia_gpu_hardware_found = False num_nvidia_devices = 0
for device in SimpleParser().run(): for device in SimpleParser().run():
device_class = device.cls.name device_class = device.cls.name
device_vendor = device.vendor.name device_vendor = device.vendor.name
@@ -124,12 +124,12 @@ def _has_nvidia_gpu_hardware_notcached():
logging.debug('NVIDIA GPU found: {}'.format(device)) logging.debug('NVIDIA GPU found: {}'.format(device))
# NOTE(lourot): we could `break` out here but it's interesting # NOTE(lourot): we could `break` out here but it's interesting
# for debugging purposes to print them all. # for debugging purposes to print them all.
nvidia_gpu_hardware_found = True num_nvidia_devices += 1
if not nvidia_gpu_hardware_found: if num_nvidia_devices == 0:
logging.debug('No NVIDIA GPU found.') logging.debug('No NVIDIA GPU found.')
return nvidia_gpu_hardware_found return num_nvidia_devices > 0, num_nvidia_devices
def _installed_nvidia_software_packages(): def _installed_nvidia_software_packages():

View File

@@ -1,7 +1,6 @@
# This file is managed centrally. If you find the need to modify this as a # This file is managed centrally. If you find the need to modify this as a
# one-off, please don't. Intead, consult #openstack-charms and ask about # one-off, please don't. Intead, consult #openstack-charms and ask about
# requirements management in charms via bot-control. Thank you. # requirements management in charms via bot-control. Thank you.
charm-tools>=2.4.4
coverage>=3.6 coverage>=3.6
mock>=1.2 mock>=1.2
flake8>=4.0.1; python_version >= '3.6' flake8>=4.0.1; python_version >= '3.6'

View File

@@ -32,12 +32,12 @@ class TestCharmUtils(unittest.TestCase):
@patch('nvidia_utils.has_nvidia_gpu_hardware') @patch('nvidia_utils.has_nvidia_gpu_hardware')
def test_is_nvidia_software_to_be_installed(self, def test_is_nvidia_software_to_be_installed(self,
has_nvidia_gpu_hardware_mock): has_nvidia_gpu_hardware_mock):
has_nvidia_gpu_hardware_mock.return_value = True has_nvidia_gpu_hardware_mock.return_value = True, 1
self.assertTrue( self.assertTrue(
charm_utils.is_nvidia_software_to_be_installed_notcached({ charm_utils.is_nvidia_software_to_be_installed_notcached({
'force-install-nvidia-vgpu': False})) 'force-install-nvidia-vgpu': False}))
has_nvidia_gpu_hardware_mock.return_value = False has_nvidia_gpu_hardware_mock.return_value = False, 0
self.assertTrue( self.assertTrue(
charm_utils.is_nvidia_software_to_be_installed_notcached({ charm_utils.is_nvidia_software_to_be_installed_notcached({
'force-install-nvidia-vgpu': True})) 'force-install-nvidia-vgpu': True}))
@@ -88,55 +88,59 @@ class TestCharmUtils(unittest.TestCase):
def test_check_status( def test_check_status(
self, has_hw_mock, installed_sw_mock, is_sw_to_be_installed_mock, self, has_hw_mock, installed_sw_mock, is_sw_to_be_installed_mock,
check_services_running_mock): check_services_running_mock):
has_hw_mock.return_value = True has_hw_mock.return_value = True, 1
installed_sw_mock.return_value = ['42', '43'] installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None) check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None, None), charm_utils.check_status(None, None),
ActiveStatus( ActiveStatus(
'Unit is ready: ' 'Unit is ready (1 GPU)'
'NVIDIA GPU found; installed NVIDIA software: 42, 43')) )
)
has_hw_mock.return_value = False has_hw_mock.return_value = False, 0
installed_sw_mock.return_value = ['42', '43'] installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None) check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None, None), charm_utils.check_status(None, None),
ActiveStatus( ActiveStatus(
'Unit is ready: ' 'Unit is ready (0 GPU)'
'no NVIDIA GPU found; installed NVIDIA software: 42, 43')) )
)
has_hw_mock.return_value = True has_hw_mock.return_value = True, 1
installed_sw_mock.return_value = [] installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = True is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = (None, None) check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None, None), charm_utils.check_status(None, None),
BlockedStatus( BlockedStatus(
'NVIDIA GPU found; no NVIDIA software installed')) 'NVIDIA GPU detected, drivers not installed'
)
)
has_hw_mock.return_value = True has_hw_mock.return_value = True, 2
installed_sw_mock.return_value = [] installed_sw_mock.return_value = []
is_sw_to_be_installed_mock.return_value = False is_sw_to_be_installed_mock.return_value = False
check_services_running_mock.return_value = (None, None) check_services_running_mock.return_value = (None, None)
self.assertEqual( self.assertEqual(
charm_utils.check_status(None, None), charm_utils.check_status(None, None),
ActiveStatus( ActiveStatus(
'Unit is ready: ' 'Unit is ready (2 GPU)'
'NVIDIA GPU found; no NVIDIA software installed')) )
)
has_hw_mock.return_value = True has_hw_mock.return_value = True, 1
installed_sw_mock.return_value = ['42', '43'] installed_sw_mock.return_value = ['42', '43']
is_sw_to_be_installed_mock.return_value = True is_sw_to_be_installed_mock.return_value = True
check_services_running_mock.return_value = ( check_services_running_mock.return_value = (
None, 'Services not running that should be: nvidia-vgpu-mgr') None, 'Services not running that should be: nvidia-vgpu-mgr')
self.assertEqual( self.assertEqual(
charm_utils.check_status(None, None), charm_utils.check_status(None, None),
BlockedStatus( BlockedStatus('manual reboot required')
'NVIDIA GPU found; installed NVIDIA software: 42, 43; ' )
'reboot required?'))
@patch('nvidia_utils._installed_nvidia_software_packages') @patch('nvidia_utils._installed_nvidia_software_packages')
@patch('charm_utils.get_os_codename_package') @patch('charm_utils.get_os_codename_package')

View File

@@ -54,13 +54,19 @@ class TestNvidiaUtils(unittest.TestCase):
def test_has_nvidia_gpu_hardware_with_hw(self, lspci_parser_mock): def test_has_nvidia_gpu_hardware_with_hw(self, lspci_parser_mock):
lspci_parser_mock.return_value.run.return_value = ( lspci_parser_mock.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITH_NVIDIA_GPU) self._PCI_DEVICES_LIST_WITH_NVIDIA_GPU)
self.assertTrue(nvidia_utils._has_nvidia_gpu_hardware_notcached()) self.assertEqual(
nvidia_utils._has_nvidia_gpu_hardware_notcached(),
(True, 1)
)
@patch('nvidia_utils.SimpleParser') @patch('nvidia_utils.SimpleParser')
def test_has_nvidia_gpu_hardware_without_hw(self, lspci_parser_mock): def test_has_nvidia_gpu_hardware_without_hw(self, lspci_parser_mock):
lspci_parser_mock.return_value.run.return_value = ( lspci_parser_mock.return_value.run.return_value = (
self._PCI_DEVICES_LIST_WITHOUT_GPU) self._PCI_DEVICES_LIST_WITHOUT_GPU)
self.assertFalse(nvidia_utils._has_nvidia_gpu_hardware_notcached()) self.assertEqual(
nvidia_utils._has_nvidia_gpu_hardware_notcached(),
(False, 0)
)
@patch('nvidia_utils.Path') @patch('nvidia_utils.Path')
@patch('nvidia_utils.os.listdir') @patch('nvidia_utils.os.listdir')