Compute restart causes period of network 'blackout'
Fixes bug 1034401 When a compute service is restarted each instance running on the host has its iptables rules built and applied sequentially during the host init stage. The impact of this, especially on a host running many instances, can be observed as a period where some instances are not accessible as the existing iptables rules have been torn down and not yet re-applied. The presented work-around for this is a configurable/flagged deferred mode that prevents the application of the iptables rules until all instances on the host had been initialised then the rules for all instances are applied all at once preventing a 'blackout' period. Change-Id: I0da90d07e54225fb63f3884897fb00a6027cd537
This commit is contained in:
@@ -275,43 +275,55 @@ class ComputeManager(manager.SchedulerDependentManager):
|
||||
self.driver.init_host(host=self.host)
|
||||
context = nova.context.get_admin_context()
|
||||
instances = self.db.instance_get_all_by_host(context, self.host)
|
||||
for count, instance in enumerate(instances):
|
||||
db_state = instance['power_state']
|
||||
drv_state = self._get_power_state(context, instance)
|
||||
|
||||
expect_running = (db_state == power_state.RUNNING and
|
||||
drv_state != db_state)
|
||||
if FLAGS.defer_iptables_apply:
|
||||
self.driver.filter_defer_apply_on()
|
||||
|
||||
LOG.debug(_('Current state is %(drv_state)s, state in DB is '
|
||||
'%(db_state)s.'), locals(), instance=instance)
|
||||
try:
|
||||
for count, instance in enumerate(instances):
|
||||
db_state = instance['power_state']
|
||||
drv_state = self._get_power_state(context, instance)
|
||||
|
||||
net_info = compute_utils.get_nw_info_for_instance(instance)
|
||||
expect_running = (db_state == power_state.RUNNING and
|
||||
drv_state != db_state)
|
||||
|
||||
# We're calling plug_vifs to ensure bridge and iptables
|
||||
# filters are present, calling it once is enough.
|
||||
if count == 0:
|
||||
legacy_net_info = self._legacy_nw_info(net_info)
|
||||
self.driver.plug_vifs(instance, legacy_net_info)
|
||||
LOG.debug(_('Current state is %(drv_state)s, state in DB is '
|
||||
'%(db_state)s.'), locals(), instance=instance)
|
||||
|
||||
if ((expect_running and FLAGS.resume_guests_state_on_host_boot) or
|
||||
FLAGS.start_guests_on_host_boot):
|
||||
LOG.info(_('Rebooting instance after nova-compute restart.'),
|
||||
locals(), instance=instance)
|
||||
try:
|
||||
self.driver.resume_state_on_host_boot(context, instance,
|
||||
self._legacy_nw_info(net_info))
|
||||
except NotImplementedError:
|
||||
LOG.warning(_('Hypervisor driver does not support '
|
||||
'resume guests'), instance=instance)
|
||||
net_info = compute_utils.get_nw_info_for_instance(instance)
|
||||
|
||||
elif drv_state == power_state.RUNNING:
|
||||
# VMWareAPI drivers will raise an exception
|
||||
try:
|
||||
self.driver.ensure_filtering_rules_for_instance(instance,
|
||||
self._legacy_nw_info(net_info))
|
||||
except NotImplementedError:
|
||||
LOG.warning(_('Hypervisor driver does not support '
|
||||
'firewall rules'), instance=instance)
|
||||
# We're calling plug_vifs to ensure bridge and iptables
|
||||
# filters are present, calling it once is enough.
|
||||
if count == 0:
|
||||
legacy_net_info = self._legacy_nw_info(net_info)
|
||||
self.driver.plug_vifs(instance, legacy_net_info)
|
||||
|
||||
if ((expect_running and FLAGS.resume_guests_state_on_host_boot)
|
||||
or FLAGS.start_guests_on_host_boot):
|
||||
LOG.info(
|
||||
_('Rebooting instance after nova-compute restart.'),
|
||||
locals(), instance=instance)
|
||||
try:
|
||||
self.driver.resume_state_on_host_boot(context,
|
||||
instance,
|
||||
self._legacy_nw_info(net_info))
|
||||
except NotImplementedError:
|
||||
LOG.warning(_('Hypervisor driver does not support '
|
||||
'resume guests'), instance=instance)
|
||||
|
||||
elif drv_state == power_state.RUNNING:
|
||||
# VMWareAPI drivers will raise an exception
|
||||
try:
|
||||
self.driver.ensure_filtering_rules_for_instance(
|
||||
instance,
|
||||
self._legacy_nw_info(net_info))
|
||||
except NotImplementedError:
|
||||
LOG.warning(_('Hypervisor driver does not support '
|
||||
'firewall rules'), instance=instance)
|
||||
|
||||
finally:
|
||||
if FLAGS.defer_iptables_apply:
|
||||
self.driver.filter_defer_apply_off()
|
||||
|
||||
def _get_power_state(self, context, instance):
|
||||
"""Retrieve the power state for the given instance."""
|
||||
|
@@ -428,6 +428,11 @@ global_opts = [
|
||||
'min_disk'],
|
||||
help='These are image properties which a snapshot should not'
|
||||
' inherit from an instance'),
|
||||
cfg.BoolOpt('defer_iptables_apply',
|
||||
default=False,
|
||||
help='Whether to batch up the application of IPTables rules'
|
||||
' during a host restart and apply all at the end of the'
|
||||
' init phase'),
|
||||
]
|
||||
|
||||
FLAGS.register_opts(global_opts)
|
||||
|
@@ -263,6 +263,8 @@ class IptablesManager(object):
|
||||
'nat': IptablesTable()}
|
||||
self.ipv6 = {'filter': IptablesTable()}
|
||||
|
||||
self.iptables_apply_deferred = False
|
||||
|
||||
# Add a nova-filter-top chain. It's intended to be shared
|
||||
# among the various nova components. It sits at the very top
|
||||
# of FORWARD and OUTPUT.
|
||||
@@ -312,8 +314,21 @@ class IptablesManager(object):
|
||||
self.ipv4['nat'].add_chain('float-snat')
|
||||
self.ipv4['nat'].add_rule('snat', '-j $float-snat')
|
||||
|
||||
@utils.synchronized('iptables', external=True)
|
||||
def defer_apply_on(self):
|
||||
self.iptables_apply_deferred = True
|
||||
|
||||
def defer_apply_off(self):
|
||||
self.iptables_apply_deferred = False
|
||||
self._apply()
|
||||
|
||||
def apply(self):
|
||||
if self.iptables_apply_deferred:
|
||||
return
|
||||
|
||||
self._apply()
|
||||
|
||||
@utils.synchronized('iptables', external=True)
|
||||
def _apply(self):
|
||||
"""Apply the current in-memory set of iptables rules.
|
||||
|
||||
This will blow away any rules left over from previous runs of the
|
||||
|
@@ -508,3 +508,28 @@ class LinuxNetworkTestCase(test.TestCase):
|
||||
'2001:db8::/64', 'dev', 'eth0'),
|
||||
]
|
||||
self._test_initialize_gateway(existing, expected)
|
||||
|
||||
def test_apply_ran(self):
|
||||
manager = linux_net.IptablesManager()
|
||||
manager.iptables_apply_deferred = False
|
||||
self.mox.StubOutWithMock(manager, '_apply')
|
||||
manager._apply()
|
||||
self.mox.ReplayAll()
|
||||
empty_ret = manager.apply()
|
||||
self.assertEqual(empty_ret, None)
|
||||
|
||||
def test_apply_not_run(self):
|
||||
manager = linux_net.IptablesManager()
|
||||
manager.iptables_apply_deferred = True
|
||||
self.mox.StubOutWithMock(manager, '_apply')
|
||||
self.mox.ReplayAll()
|
||||
manager.apply()
|
||||
|
||||
def test_deferred_unset_apply_ran(self):
|
||||
manager = linux_net.IptablesManager()
|
||||
manager.iptables_apply_deferred = True
|
||||
self.mox.StubOutWithMock(manager, '_apply')
|
||||
manager._apply()
|
||||
self.mox.ReplayAll()
|
||||
manager.defer_apply_off()
|
||||
self.assertFalse(manager.iptables_apply_deferred)
|
||||
|
@@ -488,6 +488,14 @@ class ComputeDriver(object):
|
||||
# TODO(Vek): Need to pass context in for access to auth_token
|
||||
raise NotImplementedError()
|
||||
|
||||
def filter_defer_apply_on(self):
|
||||
"""Defer application of IPTables rules"""
|
||||
pass
|
||||
|
||||
def filter_defer_apply_off(self):
|
||||
"""Turn off deferral of IPTables rules and apply the rules now"""
|
||||
pass
|
||||
|
||||
def unfilter_instance(self, instance, network_info):
|
||||
"""Stop filtering instance"""
|
||||
# TODO(Vek): Need to pass context in for access to auth_token
|
||||
|
@@ -47,6 +47,14 @@ class FirewallDriver(object):
|
||||
At this point, the instance isn't running yet."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def filter_defer_apply_on(self):
|
||||
"""Defer application of IPTables rules"""
|
||||
pass
|
||||
|
||||
def filter_defer_apply_off(self):
|
||||
"""Turn off deferral of IPTables rules and apply the rules now"""
|
||||
pass
|
||||
|
||||
def unfilter_instance(self, instance, network_info):
|
||||
"""Stop filtering instance"""
|
||||
raise NotImplementedError()
|
||||
@@ -128,6 +136,12 @@ class IptablesFirewallDriver(FirewallDriver):
|
||||
"""No-op. Everything is done in prepare_instance_filter."""
|
||||
pass
|
||||
|
||||
def filter_defer_apply_on(self):
|
||||
self.iptables.defer_apply_on()
|
||||
|
||||
def filter_defer_apply_off(self):
|
||||
self.iptables.defer_apply_off()
|
||||
|
||||
def unfilter_instance(self, instance, network_info):
|
||||
# make sure this is legacy nw_info
|
||||
network_info = self._handle_network_info_model(network_info)
|
||||
|
@@ -2435,6 +2435,12 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
raise exception.NovaException(msg % instance_ref["name"])
|
||||
time.sleep(1)
|
||||
|
||||
def filter_defer_apply_on(self):
|
||||
self.firewall_driver.filter_defer_apply_on()
|
||||
|
||||
def filter_defer_apply_off(self):
|
||||
self.firewall_driver.filter_defer_apply_off()
|
||||
|
||||
def live_migration(self, ctxt, instance_ref, dest,
|
||||
post_method, recover_method, block_migration=False):
|
||||
"""Spawning live_migration operation for distributing high-load.
|
||||
|
Reference in New Issue
Block a user