Merge "Add a workaround config toggle to refuse ceph image upload"
This commit is contained in:
@@ -246,6 +246,30 @@ candidates if necessary. This has a slight performance impact and is not
|
||||
necessary on new or upgraded deployments where the new configuration has been
|
||||
set on all hosts. By setting this option, the second lookup is disabled and the
|
||||
scheduler will only request ``PCPU``-based allocations.
|
||||
"""),
|
||||
cfg.BoolOpt(
|
||||
'never_download_image_if_on_rbd',
|
||||
default=False,
|
||||
help="""
|
||||
When booting from an image on a ceph-backed compute node, if the image does not
|
||||
already reside on the ceph cluster (as would be the case if glance is
|
||||
also using the same cluster), nova will download the image from glance and
|
||||
upload it to ceph itself. If using multiple ceph clusters, this may cause nova
|
||||
to unintentionally duplicate the image in a non-COW-able way in the local
|
||||
ceph deployment, wasting space.
|
||||
|
||||
For more information, refer to the bug report:
|
||||
|
||||
https://bugs.launchpad.net/nova/+bug/1858877
|
||||
|
||||
Enabling this option will cause nova to *refuse* to boot an instance if it
|
||||
would require downloading the image from glance and uploading it to ceph
|
||||
itself.
|
||||
|
||||
Related options:
|
||||
|
||||
* ``compute_driver`` (libvirt)
|
||||
* ``[libvirt]/images_type`` (rbd)
|
||||
"""),
|
||||
]
|
||||
|
||||
|
@@ -21454,6 +21454,52 @@ class LibvirtDriverTestCase(test.NoDBTestCase, TraitsComparisonMixin):
|
||||
None)
|
||||
self.assertFalse(mock_inject.called)
|
||||
|
||||
@mock.patch('nova.virt.libvirt.utils.fetch_image')
|
||||
@mock.patch('nova.virt.libvirt.storage.rbd_utils.RBDDriver')
|
||||
@mock.patch.object(imagebackend, 'IMAGE_API')
|
||||
def test_create_fetch_image_ceph_workaround(self, mock_image, mock_rbd,
|
||||
mock_fetch):
|
||||
# Make sure that rbd clone will fail as un-clone-able
|
||||
mock_rbd.is_cloneable.return_value = False
|
||||
# Make sure the rbd code thinks the image does not already exist
|
||||
mock_rbd.return_value.exists.return_value = False
|
||||
# Make sure the rbd code says the image is small
|
||||
mock_rbd.return_value.size.return_value = 128 * units.Mi
|
||||
# Make sure IMAGE_API.get() returns a raw image
|
||||
mock_image.get.return_value = {'locations': [], 'disk_format': 'raw'}
|
||||
|
||||
instance = self._create_instance()
|
||||
disk_images = {'image_id': 'foo'}
|
||||
self.flags(images_type='rbd', group='libvirt')
|
||||
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
|
||||
|
||||
def do_create():
|
||||
# Reset the fetch mock and run our driver method so we can
|
||||
# check for called-ness after each attempt
|
||||
mock_fetch.reset_mock()
|
||||
drvr._create_and_inject_local_root(self.context,
|
||||
instance,
|
||||
False,
|
||||
'',
|
||||
disk_images,
|
||||
get_injection_info(),
|
||||
None)
|
||||
|
||||
# Do an image create with rbd
|
||||
do_create()
|
||||
# Make sure it tried fetch, which implies that it tried and
|
||||
# failed to clone.
|
||||
mock_fetch.assert_called()
|
||||
|
||||
# Enable the workaround
|
||||
self.flags(never_download_image_if_on_rbd=True,
|
||||
group='workarounds')
|
||||
# Ensure that we raise the original ImageUnacceptable from the
|
||||
# failed clone...
|
||||
self.assertRaises(exception.ImageUnacceptable, do_create)
|
||||
# ...and ensure that we did _not_ try to fetch
|
||||
mock_fetch.assert_not_called()
|
||||
|
||||
@mock.patch('nova.virt.netutils.get_injected_network_template')
|
||||
@mock.patch('nova.virt.disk.api.inject_data')
|
||||
@mock.patch.object(libvirt_driver.LibvirtDriver, "_conn")
|
||||
|
@@ -3867,9 +3867,24 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
backend.create_snap(libvirt_utils.RESIZE_SNAPSHOT_NAME)
|
||||
if backend.SUPPORTS_CLONE:
|
||||
def clone_fallback_to_fetch(*args, **kwargs):
|
||||
refuse_fetch = (
|
||||
CONF.libvirt.images_type == 'rbd' and
|
||||
CONF.workarounds.never_download_image_if_on_rbd)
|
||||
try:
|
||||
backend.clone(context, disk_images['image_id'])
|
||||
except exception.ImageUnacceptable:
|
||||
if refuse_fetch:
|
||||
# Re-raise the exception from the failed
|
||||
# ceph clone. The compute manager expects
|
||||
# ImageUnacceptable as a possible result
|
||||
# of spawn(), from which this is called.
|
||||
with excutils.save_and_reraise_exception():
|
||||
LOG.warning(
|
||||
'Image %s is not on my ceph and '
|
||||
'[workarounds]/'
|
||||
'never_download_image_if_on_rbd=True;'
|
||||
' refusing to fetch and upload.',
|
||||
disk_images['image_id'])
|
||||
libvirt_utils.fetch_image(*args, **kwargs)
|
||||
fetch_func = clone_fallback_to_fetch
|
||||
else:
|
||||
|
@@ -0,0 +1,19 @@
|
||||
---
|
||||
other:
|
||||
- |
|
||||
Nova now has a config option called
|
||||
``[workarounds]/never_download_image_if_on_rbd`` which helps to
|
||||
avoid pathological storage behavior with multiple ceph clusters.
|
||||
Currently, Nova does *not* support multiple ceph clusters
|
||||
properly, but Glance can be configured with them. If an instance
|
||||
is booted from an image residing in a ceph cluster other than the
|
||||
one Nova knows about, it will silently download it from Glance and
|
||||
re-upload the image to the local ceph privately for that
|
||||
instance. Unlike the behavior you expect when configuring Nova and
|
||||
Glance for ceph, Nova will continue to do this over and over for
|
||||
the same image when subsequent instances are booted, consuming a
|
||||
large amount of storage unexpectedly. The new workaround option
|
||||
will cause Nova to refuse to do this download/upload behavior and
|
||||
instead fail the instance boot. It is simply a stop-gap effort to
|
||||
allow unsupported deployments with multiple ceph clusters from
|
||||
silently consuming large amounts of disk space.
|
Reference in New Issue
Block a user