Cells: Handle delete with BuildRequest

When an instance delete is issued the instance could be in a few
different states which require different handling.

If the instance is in a cell and booted or booting then the delete can
proceed normally.

If there is a BuildRequest object for the instance then it is still
being built and has not reached a cell yet. In this situation there may
or may not also be an instance record depending on how much future work
has merged. The BuildRequest should be destroyed, any InstanceMapping
that exists should be cleaned up, and if there is an Instance it should
be deleted normally.

The added functional test does not check for a change in behavior. It
ensures that the behavior of delete does not change as the delete
process is updated. Note that the only delete path that bails early is
not yet reachable. The test keeps things honest as that changes.

Change-Id: I27d89143e8804aebdd414791f03b622abdaafe90
Partially-implements: bp add-buildrequest-obj
Partially-implements: bp cells-scheduling-interaction
This commit is contained in:
Andrew Laski
2016-06-03 14:01:51 -04:00
parent b284af8937
commit 32076267f6
7 changed files with 442 additions and 25 deletions

View File

@@ -50,6 +50,7 @@ from nova.compute import vm_states
from nova import conductor
import nova.conf
from nova.consoleauth import rpcapi as consoleauth_rpcapi
from nova import context as nova_context
from nova import crypto
from nova.db import base
from nova import exception
@@ -1532,12 +1533,145 @@ class API(base.Base):
auto_disk_config,
image_ref)
def _lookup_instance(self, context, uuid):
'''Helper method for pulling an instance object from a database.
During the transition to cellsv2 there is some complexity around
retrieving an instance from the database which this method hides. If
there is an instance mapping then query the cell for the instance, if
no mapping exists then query the configured nova database.
Once we are past the point that all deployments can be assumed to be
migrated to cellsv2 this method can go away.
'''
inst_map = None
try:
inst_map = objects.InstanceMapping.get_by_instance_uuid(
context, uuid)
except exception.InstanceMappingNotFound:
# TODO(alaski): This exception block can be removed once we're
# guaranteed everyone is using cellsv2.
pass
if inst_map is None or inst_map.cell_mapping is None:
# If inst_map is None then the deployment has not migrated to
# cellsv2 yet.
# If inst_map.cell_mapping is None then the instance is not in a
# cell yet. Until instance creation moves to the conductor the
# instance can be found in the configured database, so attempt
# to look it up.
try:
instance = objects.Instance.get_by_uuid(context, uuid)
except exception.InstanceNotFound:
# If we get here then the conductor is in charge of writing the
# instance to the database and hasn't done that yet. It's up to
# the caller of this method to determine what to do with that
# information.
return
else:
with nova_context.target_cell(context, inst_map.cell_mapping):
try:
instance = objects.Instance.get_by_uuid(context,
uuid)
except exception.InstanceNotFound:
# Since the cell_mapping exists we know the instance is in
# the cell, however InstanceNotFound means it's already
# deleted.
return
return instance
def _delete_while_booting(self, context, instance):
"""Handle deletion if the instance has not reached a cell yet
Deletion before an instance reaches a cell needs to be handled
differently. What we're attempting to do is delete the BuildRequest
before the api level conductor does. If we succeed here then the boot
request stops before reaching a cell. If not then the instance will
need to be looked up in a cell db and the normal delete path taken.
"""
# Before service version 15 deletion of the BuildRequest has no effect
# and will be cleaned up as part of the boot process.
service_version = objects.Service.get_minimum_version(
context, 'nova-api')
if service_version < 15:
return False
deleted = self._attempt_delete_of_buildrequest(context, instance)
if deleted:
# If we've reached this block the successful deletion of the
# buildrequest indicates that the build process should be halted by
# the conductor.
# Since conductor has halted the build process no cleanup of the
# instance is necessary, but quotas must still be decremented.
project_id, user_id = quotas_obj.ids_from_instance(
context, instance)
# This is confusing but actually decrements quota.
quotas = self._create_reservations(context,
instance,
instance.task_state,
project_id, user_id)
try:
quotas.commit()
# NOTE(alaski): Though the conductor halts the build process it
# does not currently delete the instance record. This is
# because in the near future the instance record will not be
# created if the buildrequest has been deleted here. For now we
# ensure the instance has been set to deleted at this point.
# Yes this directly contradicts the comment earlier in this
# method, but this is a temporary measure.
# Look up the instance because the current instance object was
# stashed on the buildrequest and therefore not complete enough
# to run .destroy().
instance = self._lookup_instance(context, instance.uuid)
if instance is not None:
# If instance is None it has already been deleted.
instance.destroy()
except exception.InstanceNotFound:
quotas.rollback()
return True
return False
def _attempt_delete_of_buildrequest(self, context, instance):
# If there is a BuildRequest then the instance may not have been
# written to a cell db yet. Delete the BuildRequest here, which
# will indicate that the Instance build should not proceed.
try:
build_req = objects.BuildRequest.get_by_instance_uuid(
context, instance.uuid)
build_req.destroy()
except exception.BuildRequestNotFound:
# This means that conductor has deleted the BuildRequest so the
# instance is now in a cell and the delete needs to proceed
# normally.
return False
return True
def _delete(self, context, instance, delete_type, cb, **instance_attrs):
if instance.disable_terminate:
LOG.info(_LI('instance termination disabled'),
instance=instance)
return
# If there is an instance.host the instance has been scheduled and
# sent to a cell/compute which means it was pulled from the cell db.
# Normal delete should be attempted.
if not instance.host:
if self._delete_while_booting(context, instance):
return
# If instance.host was not set it's possible that the Instance
# object here was pulled from a BuildRequest object and is not
# fully populated. Notably it will be missing an 'id' field which
# will prevent instance.destroy from functioning properly. A lookup
# is attempted which will either return a full Instance or None if
# not found. If not found then it's acceptable to skip the rest of
# the delete processing.
instance = self._lookup_instance(context, instance.uuid)
if not instance:
# Instance is already deleted.
return
bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
context, instance.uuid)

View File

@@ -221,6 +221,20 @@ class ComputeCellsAPI(compute_api.API):
context, instance.uuid)
# NOTE(danms): If we try to delete an instance with no cell,
# there isn't anything to salvage, so we can hard-delete here.
if self._delete_while_booting(context, instance):
return
# If instance.cell_name was not set it's possible that the Instance
# object here was pulled from a BuildRequest object and is not
# fully populated. Notably it will be missing an 'id' field which
# will prevent instance.destroy from functioning properly. A
# lookup is attempted which will either return a full Instance or
# None if not found. If not found then it's acceptable to skip the
# rest of the delete processing.
instance = self._lookup_instance(context, instance.uuid)
if instance is None:
# Instance has been deleted out from under us
return
try:
super(ComputeCellsAPI, self)._local_delete(context, instance,
bdms, method_name,

View File

@@ -513,7 +513,9 @@ class ComputeTaskManager(base.Base):
self._destroy_build_request(context, instance)
except exception.BuildRequestNotFound:
# This indicates an instance delete has been requested in the
# API. Stop the build and cleanup the instance_mapping.
# API. Stop the build, cleanup the instance_mapping and
# potentially the block_device_mappings
# TODO(alaski): Handle block_device_mapping cleanup
if inst_mapping:
inst_mapping.destroy()
return

View File

@@ -0,0 +1,78 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.unit.image import fake as fake_image
from nova.tests.unit import policy_fixture
class ServersPreSchedulingTestCase(test.TestCase):
"""Tests for the servers API with unscheduled instances.
With cellsv2 an instance is not written to an instance table in the cell
database until it has been scheduled to a cell. This means we need to be
careful to ensure the instance can still be represented before that point.
NOTE(alaski): The above is the desired future state, this test class is
here to confirm that the behavior does not change as the transition is
made.
This test class starts the wsgi stack for the nova api service, and uses
an in memory database for persistence. It does not allow requests to get
past scheduling.
"""
api_major_version = 'v2.1'
def setUp(self):
super(ServersPreSchedulingTestCase, self).setUp()
fake_image.stub_out_image_service(self)
self.useFixture(policy_fixture.RealPolicyFixture())
self.useFixture(nova_fixtures.NoopConductorFixture())
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
api_version='v2.1'))
self.api = api_fixture.api
self.api.microversion = 'latest'
def test_delete_instance_from_buildrequest(self):
self.useFixture(nova_fixtures.AllServicesCurrent())
image_ref = fake_image.get_valid_image_id()
body = {
'server': {
'name': 'foo',
'imageRef': image_ref,
'flavorRef': '1',
}
}
create_resp = self.api.api_post('servers', body)
self.api.api_delete('servers/%s' % create_resp.body['server']['id'])
get_resp = self.api.api_get('servers/%s' %
create_resp.body['server']['id'],
check_response_status=False)
self.assertEqual(404, get_resp.status)
def test_delete_instance_from_buildrequest_old_service(self):
image_ref = fake_image.get_valid_image_id()
body = {
'server': {
'name': 'foo',
'imageRef': image_ref,
'flavorRef': '1',
}
}
create_resp = self.api.api_post('servers', body)
self.api.api_delete('servers/%s' % create_resp.body['server']['id'])
get_resp = self.api.api_get('servers/%s' %
create_resp.body['server']['id'],
check_response_status=False)
self.assertEqual(404, get_resp.status)

View File

@@ -7492,6 +7492,11 @@ class ComputeAPITestCase(BaseTestCase):
self.fake_show = fake_show
def fake_lookup(self, context, instance):
return instance
self.stub_out('nova.compute.api.API._lookup_instance', fake_lookup)
# Mock out build_instances and rebuild_instance since nothing in these
# tests should need those to actually run. We do this to avoid
# possible races with other tests that actually test those methods
@@ -8962,7 +8967,9 @@ class ComputeAPITestCase(BaseTestCase):
self.compute_api.add_fixed_ip(self.context, instance, '1')
self.compute_api.remove_fixed_ip(self.context,
instance, '192.168.1.1')
self.compute_api.delete(self.context, instance)
with mock.patch.object(self.compute_api, '_lookup_instance',
return_value=instance):
self.compute_api.delete(self.context, instance)
def test_attach_volume_invalid(self):
instance = fake_instance.fake_instance_obj(None, **{

View File

@@ -45,6 +45,7 @@ from nova.objects import fields as fields_obj
from nova.objects import quotas as quotas_obj
from nova import quota
from nova import test
from nova.tests import fixtures
from nova.tests.unit import fake_block_device
from nova.tests.unit import fake_instance
from nova.tests.unit import fake_volume
@@ -1167,6 +1168,7 @@ class _ComputeAPIUnitTestMixIn(object):
self.mox.StubOutWithMock(db, 'constraint')
self.mox.StubOutWithMock(db, 'instance_destroy')
self.mox.StubOutWithMock(self.compute_api, '_create_reservations')
self.mox.StubOutWithMock(self.compute_api, '_lookup_instance')
self.mox.StubOutWithMock(compute_utils,
'notify_about_instance_usage')
if self.cell_type == 'api':
@@ -1175,6 +1177,8 @@ class _ComputeAPIUnitTestMixIn(object):
rpcapi = self.compute_api.compute_rpcapi
self.mox.StubOutWithMock(rpcapi, 'terminate_instance')
self.compute_api._lookup_instance(self.context,
inst.uuid).AndReturn(inst)
objects.BlockDeviceMappingList.get_by_instance_uuid(
self.context, inst.uuid).AndReturn(
objects.BlockDeviceMappingList())
@@ -1321,6 +1325,149 @@ class _ComputeAPIUnitTestMixIn(object):
self.assertRaises(test.TestingException,
self.compute_api.soft_delete, self.context, inst)
@mock.patch.object(objects.BuildRequest, 'get_by_instance_uuid')
def test_attempt_delete_of_buildrequest_success(self, mock_get_by_inst):
build_req_mock = mock.MagicMock()
mock_get_by_inst.return_value = build_req_mock
inst = self._create_instance_obj()
self.assertTrue(
self.compute_api._attempt_delete_of_buildrequest(self.context,
inst))
self.assertTrue(build_req_mock.destroy.called)
@mock.patch.object(objects.BuildRequest, 'get_by_instance_uuid')
def test_attempt_delete_of_buildrequest_not_found(self, mock_get_by_inst):
mock_get_by_inst.side_effect = exception.BuildRequestNotFound(
uuid='fake')
inst = self._create_instance_obj()
self.assertFalse(
self.compute_api._attempt_delete_of_buildrequest(self.context,
inst))
def test_attempt_delete_of_buildrequest_already_deleted(self):
inst = self._create_instance_obj()
build_req_mock = mock.MagicMock()
build_req_mock.destroy.side_effect = exception.BuildRequestNotFound(
uuid='fake')
with mock.patch.object(objects.BuildRequest, 'get_by_instance_uuid',
return_value=build_req_mock):
self.assertFalse(
self.compute_api._attempt_delete_of_buildrequest(self.context,
inst))
self.assertTrue(build_req_mock.destroy.called)
def test_delete_while_booting_low_service_version(self):
inst = self._create_instance_obj()
with mock.patch.object(self.compute_api,
'_attempt_delete_of_buildrequest') as mock_attempt_delete:
self.assertFalse(
self.compute_api._delete_while_booting(self.context, inst))
self.assertFalse(mock_attempt_delete.called)
def test_delete_while_booting_buildreq_not_deleted(self):
self.useFixture(fixtures.AllServicesCurrent())
inst = self._create_instance_obj()
with mock.patch.object(self.compute_api,
'_attempt_delete_of_buildrequest',
return_value=False):
self.assertFalse(
self.compute_api._delete_while_booting(self.context, inst))
def test_delete_while_booting_buildreq_deleted_instance_none(self):
self.useFixture(fixtures.AllServicesCurrent())
inst = self._create_instance_obj()
quota_mock = mock.MagicMock()
@mock.patch.object(self.compute_api, '_attempt_delete_of_buildrequest',
return_value=True)
@mock.patch.object(self.compute_api, '_lookup_instance',
return_value=None)
@mock.patch.object(self.compute_api, '_create_reservations',
return_value=quota_mock)
def test(mock_create_res, mock_lookup, mock_attempt):
self.assertTrue(
self.compute_api._delete_while_booting(self.context,
inst))
self.assertTrue(quota_mock.commit.called)
test()
def test_delete_while_booting_buildreq_deleted_instance_not_found(self):
self.useFixture(fixtures.AllServicesCurrent())
inst = self._create_instance_obj()
quota_mock = mock.MagicMock()
@mock.patch.object(self.compute_api, '_attempt_delete_of_buildrequest',
return_value=True)
@mock.patch.object(self.compute_api, '_lookup_instance',
side_effect=exception.InstanceNotFound(
instance_id='fake'))
@mock.patch.object(self.compute_api, '_create_reservations',
return_value=quota_mock)
def test(mock_create_res, mock_lookup, mock_attempt):
self.assertTrue(
self.compute_api._delete_while_booting(self.context,
inst))
self.assertTrue(quota_mock.commit.called)
self.assertTrue(quota_mock.rollback.called)
test()
@mock.patch.object(context, 'target_cell')
@mock.patch.object(objects.InstanceMapping, 'get_by_instance_uuid',
side_effect=exception.InstanceMappingNotFound(
uuid='fake'))
def test_lookup_instance_mapping_none(self, mock_map_get,
mock_target_cell):
instance = self._create_instance_obj()
with mock.patch.object(objects.Instance, 'get_by_uuid',
return_value=instance) as mock_inst_get:
ret_instance = self.compute_api._lookup_instance(self.context,
instance.uuid)
self.assertEqual(instance, ret_instance)
mock_inst_get.assert_called_once_with(self.context, instance.uuid)
self.assertFalse(mock_target_cell.called)
@mock.patch.object(context, 'target_cell')
@mock.patch.object(objects.InstanceMapping, 'get_by_instance_uuid',
return_value=objects.InstanceMapping(cell_mapping=None))
def test_lookup_instance_cell_mapping_none(self, mock_map_get,
mock_target_cell):
instance = self._create_instance_obj()
with mock.patch.object(objects.Instance, 'get_by_uuid',
return_value=instance) as mock_inst_get:
ret_instance = self.compute_api._lookup_instance(self.context,
instance.uuid)
self.assertEqual(instance, ret_instance)
mock_inst_get.assert_called_once_with(self.context, instance.uuid)
self.assertFalse(mock_target_cell.called)
@mock.patch.object(context, 'target_cell')
def test_lookup_instance_cell_mapping(self, mock_target_cell):
instance = self._create_instance_obj()
inst_map = objects.InstanceMapping(
cell_mapping=objects.CellMapping(database_connection='',
transport_url='none'))
@mock.patch.object(objects.InstanceMapping, 'get_by_instance_uuid',
return_value=inst_map)
@mock.patch.object(objects.Instance, 'get_by_uuid',
return_value=instance)
def test(mock_inst_get, mock_map_get):
ret_instance = self.compute_api._lookup_instance(self.context,
instance.uuid)
self.assertEqual(instance, ret_instance)
mock_inst_get.assert_called_once_with(self.context, instance.uuid)
mock_target_cell.assert_called_once_with(self.context,
inst_map.cell_mapping)
test()
def _test_confirm_resize(self, mig_ref_passed=False):
params = dict(vm_state=vm_states.RESIZED)
fake_inst = self._create_instance_obj(params=params)

View File

@@ -143,15 +143,35 @@ class CellsComputeAPITestCase(test_compute.ComputeAPITestCase):
def test_error_evacuate(self):
self.skipTest("Test is incompatible with cells.")
@mock.patch.object(compute_api.API, '_local_delete')
@mock.patch.object(compute_api.API, '_lookup_instance',
return_value=None)
def test_delete_instance_no_cell_instance_disappear(self, mock_lookup,
mock_local_delete):
inst = self._create_fake_instance_obj()
@mock.patch.object(self.compute_api.cells_rpcapi,
'instance_delete_everywhere')
def test(mock_inst_del):
self.compute_api.delete(self.context, inst)
mock_lookup.assert_called_once_with(self.context, inst.uuid)
mock_inst_del.assert_called_once_with(self.context, inst, 'hard')
self.assertFalse(mock_local_delete.called)
test()
def _test_delete_instance_no_cell(self, method_name):
cells_rpcapi = self.compute_api.cells_rpcapi
self.mox.StubOutWithMock(cells_rpcapi,
'instance_delete_everywhere')
self.mox.StubOutWithMock(compute_api.API, '_local_delete')
self.mox.StubOutWithMock(compute_api.API, '_lookup_instance')
inst = self._create_fake_instance_obj()
delete_type = method_name == 'soft_delete' and 'soft' or 'hard'
cells_rpcapi.instance_delete_everywhere(self.context,
inst, delete_type)
compute_api.API._lookup_instance(self.context,
inst.uuid).AndReturn(inst)
compute_api.API._local_delete(self.context, inst,
mox.IsA(objects.BlockDeviceMappingList),
method_name, mox.IgnoreArg())
@@ -161,37 +181,45 @@ class CellsComputeAPITestCase(test_compute.ComputeAPITestCase):
getattr(self.compute_api, method_name)(self.context, inst)
def test_delete_instance_no_cell_constraint_failure_does_not_loop(self):
with mock.patch.object(self.compute_api.cells_rpcapi,
'instance_delete_everywhere'):
inst = self._create_fake_instance_obj()
inst.cell_name = None
inst = self._create_fake_instance_obj()
inst.cell_name = None
inst.destroy = mock.MagicMock()
inst.destroy.side_effect = exception.ObjectActionError(action='',
reason='')
inst.refresh = mock.MagicMock()
inst.destroy = mock.MagicMock()
inst.destroy.side_effect = exception.ObjectActionError(action='',
reason='')
inst.refresh = mock.MagicMock()
@mock.patch.object(self.compute_api.cells_rpcapi,
'instance_delete_everywhere')
@mock.patch.object(compute_api.API, '_lookup_instance',
return_value=inst)
def _test(_mock_lookup_inst, _mock_delete_everywhere):
self.assertRaises(exception.ObjectActionError,
self.compute_api.delete, self.context, inst)
inst.destroy.assert_called_once_with()
_test()
def test_delete_instance_no_cell_constraint_failure_corrects_itself(self):
def add_cell_name(context, instance, delete_type):
instance.cell_name = 'fake_cell_name'
inst = self._create_fake_instance_obj()
inst.cell_name = None
inst.destroy = mock.MagicMock()
inst.destroy.side_effect = exception.ObjectActionError(action='',
reason='')
inst.refresh = mock.MagicMock()
@mock.patch.object(compute_api.API, 'delete')
@mock.patch.object(self.compute_api.cells_rpcapi,
'instance_delete_everywhere', side_effect=add_cell_name)
def _test(mock_delete_everywhere, mock_compute_delete):
inst = self._create_fake_instance_obj()
inst.cell_name = None
inst.destroy = mock.MagicMock()
inst.destroy.side_effect = exception.ObjectActionError(action='',
reason='')
inst.refresh = mock.MagicMock()
@mock.patch.object(compute_api.API, '_lookup_instance',
return_value=inst)
def _test(_mock_lookup_inst, mock_delete_everywhere,
mock_compute_delete):
self.compute_api.delete(self.context, inst)
inst.destroy.assert_called_once_with()
@@ -204,8 +232,9 @@ class CellsComputeAPITestCase(test_compute.ComputeAPITestCase):
# it will raise ObjectActionError if the instance has already
# been deleted by a instance_destroy_at_top, and instance.refresh()
# will raise InstanceNotFound
instance = objects.Instance(uuid=uuids.destroy_instance,
cell_name=None)
instance = objects.Instance(context=self.context,
uuid=uuids.destroy_instance,
cell_name=None, host=None)
actionerror = exception.ObjectActionError(action='destroy', reason='')
notfound = exception.InstanceNotFound(instance_id=instance.uuid)
@@ -215,8 +244,10 @@ class CellsComputeAPITestCase(test_compute.ComputeAPITestCase):
@mock.patch.object(compute_api.API, '_local_delete',
side_effect=actionerror)
@mock.patch.object(instance, 'refresh', side_effect=notfound)
def _test(mock_refresh, mock_local_delete, mock_delete_everywhere,
mock_compute_delete):
@mock.patch.object(compute_api.API, '_lookup_instance',
return_value=instance)
def _test(_mock_lookup_instance, mock_refresh, mock_local_delete,
mock_delete_everywhere, mock_compute_delete):
self.compute_api.delete(self.context, instance)
mock_delete_everywhere.assert_called_once_with(self.context,
instance, 'hard')
@@ -232,15 +263,19 @@ class CellsComputeAPITestCase(test_compute.ComputeAPITestCase):
# lookup before instance.destroy() is reached, if the instance has
# already been deleted by a instance_destroy_at_top,
# InstanceNotFound will be raised
instance = objects.Instance(uuid=uuids.delete_instance, cell_name=None)
instance = objects.Instance(context=self.context,
uuid=uuids.delete_instance, cell_name=None,
host=None)
notfound = exception.InstanceNotFound(instance_id=instance.uuid)
@mock.patch.object(compute_api.API, 'delete')
@mock.patch.object(self.compute_api.cells_rpcapi,
'instance_delete_everywhere')
@mock.patch.object(compute_api.API, '_lookup_instance',
return_value=instance)
@mock.patch.object(compute_api.API, '_local_delete',
side_effect=notfound)
def _test(mock_local_delete, mock_delete_everywhere,
def _test(mock_local_delete, _mock_lookup, mock_delete_everywhere,
mock_compute_delete):
self.compute_api.delete(self.context, instance)
mock_delete_everywhere.assert_called_once_with(self.context,