diff --git a/nova/api/openstack/wsgi_app.py b/nova/api/openstack/wsgi_app.py index 6a2b72a6111f..d6f1030f835b 100644 --- a/nova/api/openstack/wsgi_app.py +++ b/nova/api/openstack/wsgi_app.py @@ -15,6 +15,7 @@ import os import sys from oslo_config import cfg +from oslo_db import exception as odbe from oslo_log import log as logging from oslo_reports import guru_meditation_report as gmr from oslo_reports import opts as gmr_opts @@ -116,6 +117,7 @@ def init_global_data(conf_files, service_name): logging.DEBUG) +@utils.latch_error_on_raise(retryable=(odbe.DBConnectionError,)) def init_application(name): conf_files = _get_config_files() diff --git a/nova/test.py b/nova/test.py index 0ddd928904c2..e084f5c14f7e 100644 --- a/nova/test.py +++ b/nova/test.py @@ -304,6 +304,7 @@ class TestCase(base.BaseTestCase): # make sure that the wsgi app is fully initialized for all testcase # instead of only once initialized for test worker wsgi_app.init_global_data.reset() + wsgi_app.init_application.reset() # Reset the placement client singleton report.PLACEMENTCLIENT = None diff --git a/nova/tests/unit/api/openstack/test_wsgi_app.py b/nova/tests/unit/api/openstack/test_wsgi_app.py index 0eb7011c116f..7fa1eacc6236 100644 --- a/nova/tests/unit/api/openstack/test_wsgi_app.py +++ b/nova/tests/unit/api/openstack/test_wsgi_app.py @@ -82,6 +82,8 @@ document_root = /tmp # raised during it. self.assertRaises(test.TestingException, wsgi_app.init_application, 'nova-api') + # reset the latch_error_on_raise decorator + wsgi_app.init_application.reset() # Now run init_application a second time, it should succeed since no # exception is being raised (the init of global data should not be # re-attempted). @@ -89,6 +91,26 @@ document_root = /tmp self.assertIn('Global data already initialized, not re-initializing.', self.stdlog.logger.output) + @mock.patch( + 'sys.argv', new=mock.MagicMock(return_value=mock.sentinel.argv)) + @mock.patch('nova.api.openstack.wsgi_app._get_config_files') + def test_init_application_called_unrecoverable(self, mock_get_files): + """Test that init_application can tolerate being called more than once + in a single python interpreter instance and raises the same exception + forever if its unrecoverable. + """ + error = ValueError("unrecoverable config error") + excepted_type = type(error) + mock_get_files.side_effect = [ + error, test.TestingException, test.TestingException] + for i in range(3): + e = self.assertRaises( + excepted_type, wsgi_app.init_application, 'nova-api') + self.assertIs(e, error) + # since the expction is latched on the first raise mock_get_files + # should not be called again on each iteration + mock_get_files.assert_called_once() + @mock.patch('nova.objects.Service.get_by_host_and_binary') @mock.patch('nova.utils.raise_if_old_compute') def test_setup_service_version_workaround(self, mock_check_old, mock_get): diff --git a/nova/tests/unit/test_utils.py b/nova/tests/unit/test_utils.py index 1ee8ba936589..b34136b0522b 100644 --- a/nova/tests/unit/test_utils.py +++ b/nova/tests/unit/test_utils.py @@ -1398,3 +1398,58 @@ class RunOnceTests(test.NoDBTestCase): self.assertRaises(ValueError, f.reset) self.assertFalse(f.called) mock_clean.assert_called_once_with() + + +class LatchErrorOnRaiseTests(test.NoDBTestCase): + + error = test.TestingException() + unrecoverable = ValueError('some error') + + @utils.latch_error_on_raise(retryable=(test.TestingException,)) + def dummy_test_func(self, error=None): + if error: + raise error + return True + + def setUp(self): + super().setUp() + self.dummy_test_func.reset() + + @mock.patch.object(utils.LOG, 'exception') + def test_wrapped_success(self, fake_logger): + self.assertTrue(self.dummy_test_func()) + fake_logger.assert_not_called() + self.assertIsNone(self.dummy_test_func.error) + + @mock.patch.object(utils.LOG, 'exception') + def test_wrapped_raises_recoverable(self, fake_logger): + expected = LatchErrorOnRaiseTests.error + e = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(expected, e) + # we just leave recoverable exception flow though the decorator + # without catching them so the logger should not be called by the + # decorator + fake_logger.assert_not_called() + self.assertIsNone(self.dummy_test_func.error) + self.assertTrue(self.dummy_test_func()) + + @mock.patch.object(utils.LOG, 'exception') + def test_wrapped_raises_unrecoverable(self, fake_logger): + expected = LatchErrorOnRaiseTests.unrecoverable + e = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(expected, e) + fake_logger.assert_called_once_with(expected) + self.assertIsNotNone(self.dummy_test_func.error) + self.assertIs(self.dummy_test_func.error, expected) + + @mock.patch.object(utils.LOG, 'exception', new=mock.MagicMock()) + def test_wrapped_raises_forever(self): + expected = LatchErrorOnRaiseTests.unrecoverable + first = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(expected, first) + second = self.assertRaises( + type(expected), self.dummy_test_func, error=expected) + self.assertIs(first, second) diff --git a/nova/utils.py b/nova/utils.py index b6b378533111..a3b2353c5e02 100644 --- a/nova/utils.py +++ b/nova/utils.py @@ -1194,3 +1194,42 @@ def run_once(message, logger, cleanup=None): wrapper.reset = functools.partial(reset, wrapper) return wrapper return outer_wrapper + + +class _SentinelException(Exception): + """This type exists to act as a placeholder and will never be raised""" + + +def latch_error_on_raise(retryable=(_SentinelException,)): + """This is a utility decorator to ensure if a function ever raises + it will always raise the same exception going forward. + + The only exception we know is safe to ignore is an oslo db connection + error as the db may be temporarily unavailable and we should allow + mod_wsgi to retry + """ + + def outer_wrapper(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if wrapper.error: + raise wrapper.error + try: + return func(*args, **kwargs) + except retryable: + # reraise any retryable exception to allow them to be handled + # by the caller. + raise + except Exception as e: + wrapper.error = e + LOG.exception(e) + raise + + wrapper.error = None + + def reset(wrapper): + wrapper.error = None + + wrapper.reset = functools.partial(reset, wrapper) + return wrapper + return outer_wrapper diff --git a/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml b/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml new file mode 100644 index 000000000000..3c364b8b99a4 --- /dev/null +++ b/releasenotes/notes/latch-error-on-raise-cf2da71a12b5f55f.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + The nova (metadata)api wsgi application will now detect fatal errors + (configuration, et al) on startup and lock into a permanent error state + until fixed and restarted. This solves a problem with some wsgi runtimes + ignoring initialization errors and continuing to send requests to the + half-initialized service. See https://bugs.launchpad.net/nova/+bug/2103811 + for more details.