diff --git a/nova/image/glance.py b/nova/image/glance.py index 9797bbd6700d..7f6f5c6ed3ae 100644 --- a/nova/image/glance.py +++ b/nova/image/glance.py @@ -69,7 +69,8 @@ glance_opts = [ deprecated_name='glance_api_insecure'), cfg.IntOpt('num_retries', default=0, - help='Number of retries when downloading an image from glance', + help='Number of retries when uploading / downloading an image ' + 'to / from glance.', deprecated_group='DEFAULT', deprecated_name='glance_num_retries'), cfg.ListOpt('allowed_direct_url_schemes', diff --git a/nova/tests/virt/xenapi/client/test_session.py b/nova/tests/virt/xenapi/client/test_session.py index f78c611e8cba..02361db42f41 100644 --- a/nova/tests/virt/xenapi/client/test_session.py +++ b/nova/tests/virt/xenapi/client/test_session.py @@ -112,14 +112,16 @@ class CallPluginTestCase(stubs.XenAPITestBaseNoDB): fn = 'download_vhd' num_retries = 1 callback = None + retry_cb = mock.Mock() with mock.patch.object(self.session, 'call_plugin_serialized', autospec=True) as call_plugin_serialized: call_plugin_serialized.side_effect = exc self.assertRaises(exception.PluginRetriesExceeded, self.session.call_plugin_serialized_with_retry, plugin, fn, - num_retries, callback) + num_retries, callback, retry_cb) call_plugin_serialized.assert_called_with(plugin, fn) self.assertEqual(2, call_plugin_serialized.call_count) + self.assertEqual(2, retry_cb.call_count) def test_serialized_with_retry_socket_error_reraised(self): exc = socket.error @@ -128,10 +130,29 @@ class CallPluginTestCase(stubs.XenAPITestBaseNoDB): fn = 'download_vhd' num_retries = 1 callback = None + retry_cb = mock.Mock() with mock.patch.object(self.session, 'call_plugin_serialized', autospec=True) as call_plugin_serialized: call_plugin_serialized.side_effect = exc self.assertRaises(socket.error, self.session.call_plugin_serialized_with_retry, plugin, fn, - num_retries, callback) + num_retries, callback, retry_cb) call_plugin_serialized.assert_called_once_with(plugin, fn) + self.assertEqual(0, retry_cb.call_count) + + def test_serialized_with_retry_socket_reset_reraised(self): + exc = socket.error + exc.errno = errno.ECONNRESET + plugin = 'glance' + fn = 'download_vhd' + num_retries = 1 + callback = None + retry_cb = mock.Mock() + with mock.patch.object(self.session, 'call_plugin_serialized', + autospec=True) as call_plugin_serialized: + call_plugin_serialized.side_effect = exc + self.assertRaises(exception.PluginRetriesExceeded, + self.session.call_plugin_serialized_with_retry, plugin, fn, + num_retries, callback, retry_cb) + call_plugin_serialized.assert_called_with(plugin, fn) + self.assertEqual(2, call_plugin_serialized.call_count) diff --git a/nova/tests/virt/xenapi/image/test_glance.py b/nova/tests/virt/xenapi/image/test_glance.py index 908ee84b8083..b2150980f75e 100644 --- a/nova/tests/virt/xenapi/image/test_glance.py +++ b/nova/tests/virt/xenapi/image/test_glance.py @@ -17,7 +17,9 @@ import random import time import mock +from mox3 import mox +from nova.compute import utils as compute_utils from nova import context from nova import exception from nova.openstack.common import log as logging @@ -93,11 +95,11 @@ class TestGlanceStore(stubs.XenAPITestBaseNoDB): @mock.patch.object(vm_utils, '_make_uuid_stack', return_value=['uuid1']) @mock.patch.object(random, 'shuffle') @mock.patch.object(time, 'sleep') + @mock.patch.object(compute_utils, 'add_instance_fault_from_exc') @mock.patch.object(logging.getLogger('nova.virt.xenapi.client.session'), 'debug') - def test_download_image_retry(self, mock_log_debug, - mock_sleep, mock_shuffle, - mock_make_uuid_stack): + def test_download_image_retry(self, mock_log_debug, mock_fault, mock_sleep, + mock_shuffle, mock_make_uuid_stack): params = self._get_download_params() self.flags(num_retries=2, group='glance') @@ -132,6 +134,8 @@ class TestGlanceStore(stubs.XenAPITestBaseNoDB): mock_call_plugin_serialized.assert_has_calls(calls) mock_log_debug.assert_has_calls(log_calls, any_order=True) + self.assertEqual(1, mock_fault.call_count) + def _get_upload_params(self, auto_disk_config=True, expected_os_type='default'): params = self._get_params() @@ -186,16 +190,29 @@ class TestGlanceStore(stubs.XenAPITestBaseNoDB): self.mox.StubOutWithMock(self.session, 'call_plugin_serialized') self.mox.StubOutWithMock(time, 'sleep') + self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') error_details = ["", "", "RetryableError", ""] error = self.session.XenAPI.Failure(details=error_details) self.session.call_plugin_serialized('glance', 'upload_vhd', **params).AndRaise(error) + compute_utils.add_instance_fault_from_exc(self.context, self.instance, + error, (fake.Failure, + error, + mox.IgnoreArg())) time.sleep(0.5) self.session.call_plugin_serialized('glance', 'upload_vhd', **params).AndRaise(error) + compute_utils.add_instance_fault_from_exc(self.context, self.instance, + error, (fake.Failure, + error, + mox.IgnoreArg())) time.sleep(1) self.session.call_plugin_serialized('glance', 'upload_vhd', **params).AndRaise(error) + compute_utils.add_instance_fault_from_exc(self.context, self.instance, + error, (fake.Failure, + error, + mox.IgnoreArg())) self.mox.ReplayAll() self.assertRaises(exception.CouldNotUploadImage, @@ -210,16 +227,25 @@ class TestGlanceStore(stubs.XenAPITestBaseNoDB): self.mox.StubOutWithMock(self.session, 'call_plugin_serialized') self.mox.StubOutWithMock(time, 'sleep') + self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc') error_details = ["", "task signaled", "", ""] error = self.session.XenAPI.Failure(details=error_details) self.session.call_plugin_serialized('glance', 'upload_vhd', **params).AndRaise(error) + compute_utils.add_instance_fault_from_exc(self.context, self.instance, + error, (fake.Failure, + error, + mox.IgnoreArg())) time.sleep(0.5) # Note(johngarbutt) XenServer 6.1 and later has this error error_details = ["", "signal: SIGTERM", "", ""] error = self.session.XenAPI.Failure(details=error_details) self.session.call_plugin_serialized('glance', 'upload_vhd', **params).AndRaise(error) + compute_utils.add_instance_fault_from_exc(self.context, self.instance, + error, (fake.Failure, + error, + mox.IgnoreArg())) time.sleep(1) self.session.call_plugin_serialized('glance', 'upload_vhd', **params) diff --git a/nova/tests/virt/xenapi/test_vm_utils.py b/nova/tests/virt/xenapi/test_vm_utils.py index f65891fbecc2..099857a2e0a2 100644 --- a/nova/tests/virt/xenapi/test_vm_utils.py +++ b/nova/tests/virt/xenapi/test_vm_utils.py @@ -276,7 +276,7 @@ class FetchVhdImageTestCase(VMUtilsTestBase): self.mox.StubOutWithMock( self.session, 'call_plugin_serialized_with_retry') func = self.session.call_plugin_serialized_with_retry( - 'glance', 'download_vhd', 0, mox.IgnoreArg(), + 'glance', 'download_vhd', 0, mox.IgnoreArg(), mox.IgnoreArg(), extra_headers={'X-Service-Catalog': '[]', 'X-Auth-Token': 'auth_token', 'X-Roles': '', diff --git a/nova/virt/xenapi/client/session.py b/nova/virt/xenapi/client/session.py index e1de6e23f06a..46f8de921535 100644 --- a/nova/virt/xenapi/client/session.py +++ b/nova/virt/xenapi/client/session.py @@ -212,7 +212,8 @@ class XenAPISession(object): return pickle.loads(rv) def call_plugin_serialized_with_retry(self, plugin, fn, num_retries, - callback, *args, **kwargs): + callback, retry_cb=None, *args, + **kwargs): """Allows a plugin to raise RetryableError so we can try again.""" attempts = num_retries + 1 sleep_time = 0.5 @@ -237,6 +238,8 @@ class XenAPISession(object): if self._is_retryable_exception(exc, fn): LOG.warn(_('%(plugin)s.%(fn)s failed. Retrying call.') % {'plugin': plugin, 'fn': fn}) + if retry_cb: + retry_cb(exc=exc) else: raise except socket.error as exc: @@ -244,6 +247,8 @@ class XenAPISession(object): LOG.warn(_('Lost connection to XenAPI during call to ' '%(plugin)s.%(fn)s. Retrying call.') % {'plugin': plugin, 'fn': fn}) + if retry_cb: + retry_cb(exc=exc) else: raise diff --git a/nova/virt/xenapi/image/glance.py b/nova/virt/xenapi/image/glance.py index 474cc8a432ab..7eb69c39cd13 100644 --- a/nova/virt/xenapi/image/glance.py +++ b/nova/virt/xenapi/image/glance.py @@ -13,19 +13,25 @@ # License for the specific language governing permissions and limitations # under the License. +import functools +import sys + from oslo.config import cfg +from nova.compute import utils as compute_utils from nova import exception from nova.image import glance +from nova.openstack.common import log as logging from nova import utils from nova.virt.xenapi import vm_utils CONF = cfg.CONF CONF.import_opt('num_retries', 'nova.image.glance', group='glance') +LOG = logging.getLogger(__name__) class GlanceStore(object): - def _call_glance_plugin(self, session, fn, params): + def _call_glance_plugin(self, context, instance, session, fn, params): glance_api_servers = glance.get_api_servers() def pick_glance(kwargs): @@ -35,8 +41,17 @@ class GlanceStore(object): kwargs['glance_use_ssl'] = g_use_ssl return g_host + def retry_cb(context, instance, exc=None): + if exc: + exc_info = sys.exc_info() + LOG.debug(exc.message, exc_info=exc_info) + compute_utils.add_instance_fault_from_exc( + context, instance, exc, exc_info) + + cb = functools.partial(retry_cb, context, instance) + return session.call_plugin_serialized_with_retry( - 'glance', fn, CONF.glance.num_retries, pick_glance, **params) + 'glance', fn, CONF.glance.num_retries, pick_glance, cb, **params) def _make_params(self, context, session, image_id): return {'image_id': image_id, @@ -48,7 +63,8 @@ class GlanceStore(object): params['uuid_stack'] = vm_utils._make_uuid_stack() try: - vdis = self._call_glance_plugin(session, 'download_vhd', params) + vdis = self._call_glance_plugin(context, instance, session, + 'download_vhd', params) except exception.PluginRetriesExceeded: raise exception.CouldNotFetchImage(image_id=image_id) @@ -72,6 +88,7 @@ class GlanceStore(object): props["auto_disk_config"] = "disabled" try: - self._call_glance_plugin(session, 'upload_vhd', params) + self._call_glance_plugin(context, instance, session, + 'upload_vhd', params) except exception.PluginRetriesExceeded: raise exception.CouldNotUploadImage(image_id=image_id) diff --git a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance index 1d63b0413798..f3658c21547f 100755 --- a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance +++ b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance @@ -25,9 +25,8 @@ import md5 import socket import urllib2 -import utils - import pluginlib_nova +import utils pluginlib_nova.configure_logging('glance') @@ -205,7 +204,7 @@ def _upload_tarball(staging_path, image_id, glance_host, glance_port, None, staging_path, callback=send_chunked_transfer_encoded, compression_level=compression_level) finally: - send_chunked_transfer_encoded('') # Chunked-Transfer terminator + send_chunked_transfer_encoded('') # Chunked-Transfer terminator bytes_written = callback_data['bytes_written'] logging.info("Wrote %d bytes to %s" % (bytes_written, url)) @@ -218,22 +217,54 @@ def _upload_tarball(staging_path, image_id, glance_host, glance_port, "Response Status: %i, Response body: %s" % (url, resp.status, resp.read())) - if resp.status in (httplib.UNAUTHORIZED, - httplib.REQUEST_ENTITY_TOO_LARGE, - httplib.PRECONDITION_FAILED, - httplib.CONFLICT, - httplib.FORBIDDEN, - httplib.INTERNAL_SERVER_ERROR): - # No point in retrying for these conditions - raise PluginError("Got Error response [%i] while uploading " - "image [%s] " - "to glance host [%s:%s]" + # Note(Jesse): This branch sorts errors into those that are permanent, + # those that are ephemeral, and those that are unexpected. + if resp.status in (httplib.BAD_REQUEST, # 400 + httplib.UNAUTHORIZED, # 401 + httplib.PAYMENT_REQUIRED, # 402 + httplib.FORBIDDEN, # 403 + httplib.NOT_FOUND, # 404 + httplib.METHOD_NOT_ALLOWED, # 405 + httplib.NOT_ACCEPTABLE, # 406 + httplib.PROXY_AUTHENTICATION_REQUIRED, # 407 + httplib.CONFLICT, # 409 + httplib.GONE, # 410 + httplib.LENGTH_REQUIRED, # 411 + httplib.PRECONDITION_FAILED, # 412 + httplib.REQUEST_ENTITY_TOO_LARGE, # 413 + httplib.REQUEST_URI_TOO_LONG, # 414 + httplib.UNSUPPORTED_MEDIA_TYPE, # 415 + httplib.REQUESTED_RANGE_NOT_SATISFIABLE, # 416 + httplib.EXPECTATION_FAILED, # 417 + httplib.UNPROCESSABLE_ENTITY, # 422 + httplib.LOCKED, # 423 + httplib.FAILED_DEPENDENCY, # 424 + httplib.UPGRADE_REQUIRED, # 426 + httplib.NOT_IMPLEMENTED, # 501 + httplib.HTTP_VERSION_NOT_SUPPORTED, # 505 + httplib.NOT_EXTENDED, # 510 + ): + raise PluginError("Got Permanent Error response [%i] while " + "uploading image [%s] to glance host [%s:%s]" % (resp.status, image_id, glance_host, glance_port)) + elif resp.status in (httplib.REQUEST_TIMEOUT, # 408 + httplib.INTERNAL_SERVER_ERROR, # 500 + httplib.BAD_GATEWAY, # 502 + httplib.SERVICE_UNAVAILABLE, # 503 + httplib.GATEWAY_TIMEOUT, # 504 + httplib.INSUFFICIENT_STORAGE, # 507 + ): + raise RetryableError("Got Ephemeral Error response [%i] while " + "uploading image [%s] to glance host [%s:%s]" + % (resp.status, image_id, + glance_host, glance_port)) else: - raise RetryableError("Unexpected response [%i] while uploading " - "image [%s] " - "to glance host [%s:%s]" + # Note(Jesse): Assume unexpected errors are retryable. If you are + # seeing this error message, the error should probably be added + # to either the ephemeral or permanent error list. + raise RetryableError("Got Unexpected Error response [%i] while " + "uploading image [%s] to glance host [%s:%s]" % (resp.status, image_id, glance_host, glance_port)) finally: