Proper error handling by _ensure_resource_provider

Previously, if _ensure_resource_provider encountered any error from the
placement REST API, it would (sometimes log a message and) return None.

Furthermore, a name conflict while creating the provider was treated the
same as a UUID conflict, which would actually result in None being
returned.

With this change set, the error paths that previously returned None now
raise one of the new ResourceProviderRetrievalFailed or
ResourceProviderCreationFailed exceptions; and the name conflict path is
detected and treated as an error condition.

Note: This change set only touches the SchedulerReportClient side of
these error conditions - it makes no attempt to add error handling to
its callers.  Case in point, the API samples tests needed fixing because
they were previously running into the name conflict error condition, but
not noticing.  As currently implemented, the new exceptions will
percolate up to ComputeManager.update_available_resource_for_node like
any others coming from SchedulerReportClient, where they will be logged
and ignored.

Change-Id: I0c4ca6a81f213277fe7219cb905a805712f81e36
Closes-Bug: #1735430
This commit is contained in:
Eric Fried
2017-11-30 11:39:11 -06:00
parent 0792d7ad5b
commit 112cd9cd1f
10 changed files with 80 additions and 45 deletions

View File

@@ -22,7 +22,7 @@
"host_ip": "1.1.1.1", "host_ip": "1.1.1.1",
"free_disk_gb": 1028, "free_disk_gb": 1028,
"free_ram_mb": 7680, "free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini", "hypervisor_hostname": "host1",
"hypervisor_type": "fake", "hypervisor_type": "fake",
"hypervisor_version": 1000, "hypervisor_version": 1000,
"id": 2, "id": 2,

View File

@@ -1,7 +1,7 @@
{ {
"hypervisors": [ "hypervisors": [
{ {
"hypervisor_hostname": "fake-mini", "hypervisor_hostname": "host1",
"id": 2, "id": 2,
"state": "up", "state": "up",
"status": "enabled" "status": "enabled"

View File

@@ -22,7 +22,7 @@
"host_ip": "1.1.1.1", "host_ip": "1.1.1.1",
"free_disk_gb": 1028, "free_disk_gb": 1028,
"free_ram_mb": 7680, "free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini", "hypervisor_hostname": "host2",
"hypervisor_type": "fake", "hypervisor_type": "fake",
"hypervisor_version": 1000, "hypervisor_version": 1000,
"id": "1bb62a04-c576-402c-8147-9e89757a09e3", "id": "1bb62a04-c576-402c-8147-9e89757a09e3",

View File

@@ -2074,6 +2074,14 @@ class ResourceProviderInUse(NovaException):
msg_fmt = _("Resource provider has allocations.") msg_fmt = _("Resource provider has allocations.")
class ResourceProviderRetrievalFailed(NovaException):
msg_fmt = _("Failed to get resource provider with UUID %(uuid)s")
class ResourceProviderCreationFailed(NovaException):
msg_fmt = _("Failed to create resource provider %(name)s")
class InventoryWithResourceClassNotFound(NotFound): class InventoryWithResourceClassNotFound(NotFound):
msg_fmt = _("No inventory of class %(resource_class)s found.") msg_fmt = _("No inventory of class %(resource_class)s found.")

View File

@@ -395,10 +395,10 @@ class SchedulerReportClient(object):
"""Queries the placement API for a resource provider record with the """Queries the placement API for a resource provider record with the
supplied UUID. supplied UUID.
Returns a dict of resource provider information if found or None if no
such resource provider could be found.
:param uuid: UUID identifier for the resource provider to look up :param uuid: UUID identifier for the resource provider to look up
:return: A dict of resource provider information if found or None if no
such resource provider could be found.
:raise: ResourceProviderRetrievalFailed on error.
""" """
resp = self.get("/resource_providers/%s" % uuid) resp = self.get("/resource_providers/%s" % uuid)
if resp.status_code == 200: if resp.status_code == 200:
@@ -418,16 +418,18 @@ class SchedulerReportClient(object):
'placement_req_id': placement_req_id, 'placement_req_id': placement_req_id,
} }
LOG.error(msg, args) LOG.error(msg, args)
raise exception.ResourceProviderRetrievalFailed(uuid=uuid)
@safe_connect @safe_connect
def _create_resource_provider(self, uuid, name): def _create_resource_provider(self, uuid, name):
"""Calls the placement API to create a new resource provider record. """Calls the placement API to create a new resource provider record.
Returns a dict of resource provider information object representing
the newly-created resource provider.
:param uuid: UUID of the new resource provider :param uuid: UUID of the new resource provider
:param name: Name of the resource provider :param name: Name of the resource provider
:return: A dict of resource provider information object representing
the newly-created resource provider.
:raise: ResourceProviderCreationFailed or
ResourceProviderRetrievalFailed on error.
""" """
url = "/resource_providers" url = "/resource_providers"
payload = { payload = {
@@ -451,7 +453,10 @@ class SchedulerReportClient(object):
name=name, name=name,
generation=0, generation=0,
) )
elif resp.status_code == 409:
# TODO(efried): Push error codes from placement, and use 'em.
name_conflict = 'Conflicting resource provider name:'
if resp.status_code == 409 and name_conflict not in resp.text:
# Another thread concurrently created a resource provider with the # Another thread concurrently created a resource provider with the
# same UUID. Log a warning and then just return the resource # same UUID. Log a warning and then just return the resource
# provider object from _get_resource_provider() # provider object from _get_resource_provider()
@@ -464,18 +469,19 @@ class SchedulerReportClient(object):
} }
LOG.info(msg, args) LOG.info(msg, args)
return self._get_resource_provider(uuid) return self._get_resource_provider(uuid)
else:
msg = ("[%(placement_req_id)s] Failed to create resource provider " # A provider with the same *name* already exists, or some other error.
"record in placement API for UUID %(uuid)s. Got " msg = ("[%(placement_req_id)s] Failed to create resource provider "
"%(status_code)d: %(err_text)s.") "record in placement API for UUID %(uuid)s. Got "
args = { "%(status_code)d: %(err_text)s.")
'uuid': uuid, args = {
'status_code': resp.status_code, 'uuid': uuid,
'err_text': resp.text, 'status_code': resp.status_code,
'placement_req_id': placement_req_id, 'err_text': resp.text,
} 'placement_req_id': placement_req_id,
LOG.error(msg, args) }
return None LOG.error(msg, args)
raise exception.ResourceProviderCreationFailed(name=name)
def _ensure_resource_provider(self, uuid, name=None): def _ensure_resource_provider(self, uuid, name=None):
"""Ensures that the placement API has a record of a resource provider """Ensures that the placement API has a record of a resource provider
@@ -486,9 +492,9 @@ class SchedulerReportClient(object):
If found or created, an object representing the provider (and potential If found or created, an object representing the provider (and potential
child providers) is returned from this method. If the resource provider child providers) is returned from this method. If the resource provider
for the supplied uuid was not found and the resource provider record for the supplied uuid was not found and the resource provider record
could not be created in the placement API, we return None. could not be created in the placement API, an exception is raised.
If this method returns a non-None value, callers are assured both that If this method returns successfully, callers are assured both that
the placement API contains a record of the provider and the local tree the placement API contains a record of the provider and the local tree
of resource provider information contains a record of the provider. of resource provider information contains a record of the provider.
@@ -508,10 +514,7 @@ class SchedulerReportClient(object):
# the placement API. # the placement API.
rp = self._get_resource_provider(uuid) rp = self._get_resource_provider(uuid)
if rp is None: if rp is None:
name = name or uuid rp = self._create_resource_provider(uuid, name or uuid)
rp = self._create_resource_provider(uuid, name)
if rp is None:
return None
# If there had been no resource provider record, force refreshing # If there had been no resource provider record, force refreshing
# the aggregate map. # the aggregate map.

View File

@@ -22,7 +22,7 @@
"host_ip": "%(ip)s", "host_ip": "%(ip)s",
"free_disk_gb": 1028, "free_disk_gb": 1028,
"free_ram_mb": 7680, "free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini", "hypervisor_hostname": "host1",
"hypervisor_type": "fake", "hypervisor_type": "fake",
"hypervisor_version": 1000, "hypervisor_version": 1000,
"id": %(hypervisor_id)s, "id": %(hypervisor_id)s,

View File

@@ -1,7 +1,7 @@
{ {
"hypervisors": [ "hypervisors": [
{ {
"hypervisor_hostname": "fake-mini", "hypervisor_hostname": "host1",
"id": 2, "id": 2,
"state": "up", "state": "up",
"status": "enabled" "status": "enabled"

View File

@@ -22,7 +22,7 @@
"host_ip": "%(ip)s", "host_ip": "%(ip)s",
"free_disk_gb": 1028, "free_disk_gb": 1028,
"free_ram_mb": 7680, "free_ram_mb": 7680,
"hypervisor_hostname": "fake-mini", "hypervisor_hostname": "host2",
"hypervisor_type": "fake", "hypervisor_type": "fake",
"hypervisor_version": 1000, "hypervisor_version": 1000,
"id": "%(hypervisor_id)s", "id": "%(hypervisor_id)s",

View File

@@ -18,6 +18,7 @@ import mock
from nova.cells import utils as cells_utils from nova.cells import utils as cells_utils
from nova import objects from nova import objects
from nova.tests.functional.api_sample_tests import api_sample_base from nova.tests.functional.api_sample_tests import api_sample_base
from nova.virt import fake
class HypervisorsSampleJsonTests(api_sample_base.ApiSampleTestBaseV21): class HypervisorsSampleJsonTests(api_sample_base.ApiSampleTestBaseV21):
@@ -155,7 +156,10 @@ class HypervisorsSampleJson233Tests(api_sample_base.ApiSampleTestBaseV21):
self.api.microversion = self.microversion self.api.microversion = self.microversion
# Start a new compute service to fake a record with hypervisor id=2 # Start a new compute service to fake a record with hypervisor id=2
# for pagination test. # for pagination test.
self.start_service('compute', host='host1') host = 'host1'
fake.set_nodes([host])
self.addCleanup(fake.restore_nodes)
self.start_service('compute', host=host)
def test_hypervisors_list(self): def test_hypervisors_list(self):
response = self._do_get('os-hypervisors?limit=1&marker=1') response = self._do_get('os-hypervisors?limit=1&marker=1')
@@ -200,7 +204,10 @@ class HypervisorsSampleJson253Tests(HypervisorsSampleJson228Tests):
def test_hypervisors_detail(self): def test_hypervisors_detail(self):
# Start another compute service to get a 2nd compute for paging tests. # Start another compute service to get a 2nd compute for paging tests.
service_2 = self.start_service('compute', host='host2').service_ref host = 'host2'
fake.set_nodes([host])
self.addCleanup(fake.restore_nodes)
service_2 = self.start_service('compute', host=host).service_ref
compute_node_2 = service_2.compute_node compute_node_2 = service_2.compute_node
marker = self.compute_node_1.uuid marker = self.compute_node_1.uuid
subs = { subs = {

View File

@@ -1112,16 +1112,19 @@ class TestProviderOperations(SchedulerReportClientTestCase):
'_get_provider_aggregates') '_get_provider_aggregates')
@mock.patch('nova.scheduler.client.report.SchedulerReportClient.' @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'
'_get_resource_provider') '_get_resource_provider')
def test_ensure_resource_provider_create_none(self, get_rp_mock, def test_ensure_resource_provider_create_fail(self, get_rp_mock,
get_agg_mock, create_rp_mock): get_agg_mock, create_rp_mock):
# No resource provider exists in the client's cache, and # No resource provider exists in the client's cache, and
# _create_provider returns None, indicating there was an error with the # _create_provider raises, indicating there was an error with the
# create call. Ensure we don't populate the resource provider cache # create call. Ensure we don't populate the resource provider cache
# with a None value. # with a None value.
get_rp_mock.return_value = None get_rp_mock.return_value = None
create_rp_mock.return_value = None create_rp_mock.side_effect = exception.ResourceProviderCreationFailed(
name=uuids.compute_node)
self.client._ensure_resource_provider(uuids.compute_node) self.assertRaises(
exception.ResourceProviderCreationFailed,
self.client._ensure_resource_provider, uuids.compute_node)
get_rp_mock.assert_called_once_with(uuids.compute_node) get_rp_mock.assert_called_once_with(uuids.compute_node)
create_rp_mock.assert_called_once_with(uuids.compute_node, create_rp_mock.assert_called_once_with(uuids.compute_node,
@@ -1262,7 +1265,9 @@ class TestProviderOperations(SchedulerReportClientTestCase):
'openstack-request-id': uuids.request_id} 'openstack-request-id': uuids.request_id}
uuid = uuids.compute_node uuid = uuids.compute_node
result = self.client._get_resource_provider(uuid) self.assertRaises(
exception.ResourceProviderRetrievalFailed,
self.client._get_resource_provider, uuid)
expected_url = '/resource_providers/' + uuid expected_url = '/resource_providers/' + uuid
self.ks_adap_mock.get.assert_called_once_with( self.ks_adap_mock.get.assert_called_once_with(
@@ -1271,7 +1276,6 @@ class TestProviderOperations(SchedulerReportClientTestCase):
# that includes the placement request id and return None # that includes the placement request id and return None
# from _get_resource_provider() # from _get_resource_provider()
self.assertTrue(logging_mock.called) self.assertTrue(logging_mock.called)
self.assertIsNone(result)
self.assertEqual(uuids.request_id, self.assertEqual(uuids.request_id,
logging_mock.call_args[0][1]['placement_req_id']) logging_mock.call_args[0][1]['placement_req_id'])
@@ -1313,10 +1317,10 @@ class TestProviderOperations(SchedulerReportClientTestCase):
# record. # record.
uuid = uuids.compute_node uuid = uuids.compute_node
name = 'computehost' name = 'computehost'
resp_mock = mock.Mock(status_code=409) self.ks_adap_mock.post.return_value = mock.Mock(
self.ks_adap_mock.post.return_value = resp_mock status_code=409,
self.ks_adap_mock.post.return_value.headers = { headers={'openstack-request-id': uuids.request_id},
'openstack-request-id': uuids.request_id} text='not a name conflict')
get_rp_mock.return_value = mock.sentinel.get_rp get_rp_mock.return_value = mock.sentinel.get_rp
@@ -1336,6 +1340,18 @@ class TestProviderOperations(SchedulerReportClientTestCase):
self.assertEqual(uuids.request_id, self.assertEqual(uuids.request_id,
logging_mock.call_args[0][1]['placement_req_id']) logging_mock.call_args[0][1]['placement_req_id'])
def test_create_resource_provider_name_conflict(self):
# When the API call to create the resource provider fails 409 with a
# name conflict, we raise an exception.
self.ks_adap_mock.post.return_value = mock.Mock(
status_code=409,
text='<stuff>Conflicting resource provider name: foo already '
'exists.</stuff>')
self.assertRaises(
exception.ResourceProviderCreationFailed,
self.client._create_resource_provider, uuids.compute_node, 'foo')
@mock.patch.object(report.LOG, 'error') @mock.patch.object(report.LOG, 'error')
def test_create_resource_provider_error(self, logging_mock): def test_create_resource_provider_error(self, logging_mock):
# Ensure _create_resource_provider() sets the error flag when trying to # Ensure _create_resource_provider() sets the error flag when trying to
@@ -1348,7 +1364,9 @@ class TestProviderOperations(SchedulerReportClientTestCase):
self.ks_adap_mock.post.return_value.headers = { self.ks_adap_mock.post.return_value.headers = {
'x-openstack-request-id': uuids.request_id} 'x-openstack-request-id': uuids.request_id}
result = self.client._create_resource_provider(uuid, name) self.assertRaises(
exception.ResourceProviderCreationFailed,
self.client._create_resource_provider, uuid, name)
expected_payload = { expected_payload = {
'uuid': uuid, 'uuid': uuid,
@@ -1364,7 +1382,6 @@ class TestProviderOperations(SchedulerReportClientTestCase):
self.assertTrue(logging_mock.called) self.assertTrue(logging_mock.called)
self.assertEqual(uuids.request_id, self.assertEqual(uuids.request_id,
logging_mock.call_args[0][1]['placement_req_id']) logging_mock.call_args[0][1]['placement_req_id'])
self.assertFalse(result)
class TestAggregates(SchedulerReportClientTestCase): class TestAggregates(SchedulerReportClientTestCase):