Delete orphaned instance files from compute nodes

While resizing/revert-resizing instance, if instance gets deleted
in between, then instance files remains either on the source or
destination compute node.

To address this issue, added a new periodic task
'_cleanup_incomplete_migrations' which takes care of deleting
instance files from source/destination compute nodes and then
mark migration record as failed so that it doesn't appear again
in the next periodic task run.

SecurityImpact

Closes-Bug: 1392527
Change-Id: I9866d8e32e99b9f907921f4b226edf7b62bd83a7
This commit is contained in:
Rajesh Tailor
2015-03-04 05:05:19 -08:00
committed by Tristan Cacqueray
parent bd3b5a62b4
commit 18d6b5cc79
2 changed files with 129 additions and 4 deletions

View File

@@ -285,15 +285,21 @@ def errors_out_migration(function):
def decorated_function(self, context, *args, **kwargs):
try:
return function(self, context, *args, **kwargs)
except Exception:
except Exception as ex:
with excutils.save_and_reraise_exception():
wrapped_func = utils.get_wrapped_function(function)
keyed_args = safe_utils.getcallargs(wrapped_func, context,
*args, **kwargs)
migration = keyed_args['migration']
status = migration.status
if status not in ['migrating', 'post-migrating']:
return
# NOTE(rajesht): If InstanceNotFound error is thrown from
# decorated function, migration status should be set to
# 'error', without checking current migration status.
if not isinstance(ex, exception.InstanceNotFound):
status = migration.status
if status not in ['migrating', 'post-migrating']:
return
migration.status = 'error'
try:
with migration.obj_as_admin():
@@ -3383,6 +3389,7 @@ class ComputeManager(manager.Manager):
@wrap_exception()
@reverts_task_state
@wrap_instance_event
@errors_out_migration
@wrap_instance_fault
def revert_resize(self, context, instance, migration, reservations):
"""Destroys the new instance on the destination machine.
@@ -3440,6 +3447,7 @@ class ComputeManager(manager.Manager):
@wrap_exception()
@reverts_task_state
@wrap_instance_event
@errors_out_migration
@wrap_instance_fault
def finish_revert_resize(self, context, instance, reservations, migration):
"""Finishes the second half of reverting a resize.
@@ -6422,6 +6430,51 @@ class ComputeManager(manager.Manager):
with utils.temporary_mutation(context, read_deleted='yes'):
instance.save()
@periodic_task.periodic_task(spacing=CONF.instance_delete_interval)
def _cleanup_incomplete_migrations(self, context):
"""Delete instance files on failed resize/revert-resize operation
During resize/revert-resize operation, if that instance gets deleted
in-between then instance files might remain either on source or
destination compute node because of race condition.
"""
LOG.debug('Cleaning up deleted instances with incomplete migration ')
migration_filters = {'host': CONF.host,
'status': 'error'}
migrations = objects.MigrationList.get_by_filters(context,
migration_filters)
if not migrations:
return
inst_uuid_from_migrations = set([migration.instance_uuid for migration
in migrations])
inst_filters = {'deleted': True, 'soft_deleted': False,
'uuid': inst_uuid_from_migrations}
attrs = ['info_cache', 'security_groups', 'system_metadata']
with utils.temporary_mutation(context, read_deleted='yes'):
instances = objects.InstanceList.get_by_filters(
context, inst_filters, expected_attrs=attrs, use_slave=True)
for instance in instances:
if instance.host != CONF.host:
for migration in migrations:
if instance.uuid == migration.instance_uuid:
# Delete instance files if not cleanup properly either
# from the source or destination compute nodes when
# the instance is deleted during resizing.
self.driver.delete_instance_files(instance)
try:
migration.status = 'failed'
with migration.obj_as_admin():
migration.save()
except exception.MigrationNotFound:
LOG.warning(_LW("Migration %s is not found."),
migration.id, context=context,
instance=instance)
break
@messaging.expected_exceptions(exception.InstanceQuiesceNotSupported,
exception.QemuGuestAgentNotEnabled,
exception.NovaException,

View File

@@ -1425,6 +1425,78 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
self.assertFalse(c.cleaned)
self.assertEqual('1', c.system_metadata['clean_attempts'])
@mock.patch.object(objects.Migration, 'obj_as_admin')
@mock.patch.object(objects.Migration, 'save')
@mock.patch.object(objects.MigrationList, 'get_by_filters')
@mock.patch.object(objects.InstanceList, 'get_by_filters')
def _test_cleanup_incomplete_migrations(self, inst_host,
mock_inst_get_by_filters,
mock_migration_get_by_filters,
mock_save, mock_obj_as_admin):
def fake_inst(context, uuid, host):
inst = objects.Instance(context)
inst.uuid = uuid
inst.host = host
return inst
def fake_migration(uuid, status, inst_uuid, src_host, dest_host):
migration = objects.Migration()
migration.uuid = uuid
migration.status = status
migration.instance_uuid = inst_uuid
migration.source_compute = src_host
migration.dest_compute = dest_host
return migration
fake_instances = [fake_inst(self.context, '111', inst_host),
fake_inst(self.context, '222', inst_host)]
fake_migrations = [fake_migration('123', 'error', '111',
'fake-host', 'fake-mini'),
fake_migration('456', 'error', '222',
'fake-host', 'fake-mini')]
mock_migration_get_by_filters.return_value = fake_migrations
mock_inst_get_by_filters.return_value = fake_instances
with mock.patch.object(self.compute.driver, 'delete_instance_files'):
self.compute._cleanup_incomplete_migrations(self.context)
# Ensure that migration status is set to 'failed' after instance
# files deletion for those instances whose instance.host is not
# same as compute host where periodic task is running.
for inst in fake_instances:
if inst.host != CONF.host:
for mig in fake_migrations:
if inst.uuid == mig.instance_uuid:
self.assertEqual('failed', mig.status)
def test_cleanup_incomplete_migrations_dest_node(self):
"""Test to ensure instance files are deleted from destination node.
If instance gets deleted during resizing/revert-resizing operation,
in that case instance files gets deleted from instance.host (source
host here), but there is possibility that instance files could be
present on destination node.
This test ensures that `_cleanup_incomplete_migration` periodic
task deletes orphaned instance files from destination compute node.
"""
self.flags(host='fake-mini')
self._test_cleanup_incomplete_migrations('fake-host')
def test_cleanup_incomplete_migrations_source_node(self):
"""Test to ensure instance files are deleted from source node.
If instance gets deleted during resizing/revert-resizing operation,
in that case instance files gets deleted from instance.host (dest
host here), but there is possibility that instance files could be
present on source node.
This test ensures that `_cleanup_incomplete_migration` periodic
task deletes orphaned instance files from source compute node.
"""
self.flags(host='fake-host')
self._test_cleanup_incomplete_migrations('fake-mini')
def test_attach_interface_failure(self):
# Test that the fault methods are invoked when an attach fails
db_instance = fake_instance.fake_db_instance()