From ed7b6dbc952e49ca69de9a94a01398b106aece4b Mon Sep 17 00:00:00 2001 From: Qiaowei Ren Date: Thu, 14 Jul 2016 09:57:35 +0800 Subject: [PATCH] add memory bandwidth meter Some Intel processor families (e.g. the Intel Xeon processor E5 v3 family) introduced MBM (Memory Bandwidth Monitoring) to measure bandwidth from one level of the cache hierarchy to the next by applications running on the platform. It supports both 'local bandwidth' and 'total bandwidth' monitoring for the socket. Local bandwidth measures the amount of data sent through the memory controller on the socket and total b/w measures the total system bandwidth. This patch introduces two new meters to get memory bandwidth statistics based on Intel CMT feature. Change-Id: Iab9b326695b4ac5b5841b930ebad633d4e3a01e6 --- ceilometer/compute/pollsters/memory.py | 86 +++++++++++++++++++ ceilometer/compute/virt/inspector.py | 18 ++++ ceilometer/compute/virt/libvirt/inspector.py | 24 ++++++ .../unit/compute/pollsters/test_memory.py | 59 +++++++++++++ .../compute/virt/libvirt/test_inspector.py | 14 +++ ...mory-bandwidth-meter-f86cf01178573671.yaml | 5 ++ setup.cfg | 2 + 7 files changed, 208 insertions(+) create mode 100644 releasenotes/notes/memory-bandwidth-meter-f86cf01178573671.yaml diff --git a/ceilometer/compute/pollsters/memory.py b/ceilometer/compute/pollsters/memory.py index 8b7417e5df..21e39ecf7b 100644 --- a/ceilometer/compute/pollsters/memory.py +++ b/ceilometer/compute/pollsters/memory.py @@ -13,6 +13,9 @@ # License for the specific language governing permissions and limitations # under the License. +import abc +import collections + from oslo_log import log import ceilometer @@ -26,6 +29,10 @@ from ceilometer import sample LOG = log.getLogger(__name__) +MemoryBandwidthData = collections.namedtuple('MemoryBandwidthData', + ['total', 'local']) + + class MemoryUsagePollster(pollsters.BaseComputePollster): def get_samples(self, manager, cache, resources): @@ -117,3 +124,82 @@ class MemoryResidentPollster(pollsters.BaseComputePollster): LOG.exception(_LE('Could not get Resident Memory Usage for ' '%(id)s: %(e)s'), {'id': instance.id, 'e': err}) + + +class _MemoryBandwidthPollster(pollsters.BaseComputePollster): + + CACHE_KEY_MEMORY_BANDWIDTH = 'memory-bandwidth' + + def _populate_cache(self, inspector, cache, instance): + i_cache = cache.setdefault(self.CACHE_KEY_MEMORY_BANDWIDTH, {}) + if instance.id not in i_cache: + memory_bandwidth = self.inspector.inspect_memory_bandwidth( + instance, self._inspection_duration) + i_cache[instance.id] = MemoryBandwidthData( + memory_bandwidth.total, + memory_bandwidth.local, + ) + return i_cache[instance.id] + + @abc.abstractmethod + def _get_samples(self, instance, c_data): + """Return one or more Samples.""" + + def _get_sample_total_and_local(self, instance, _name, _unit, + c_data, _element): + """Total / local Pollster and return one Sample""" + return [util.make_sample_from_instance( + instance, + name=_name, + type=sample.TYPE_GAUGE, + unit=_unit, + volume=getattr(c_data, _element), + )] + + def get_samples(self, manager, cache, resources): + self._inspection_duration = self._record_poll_time() + for instance in resources: + try: + c_data = self._populate_cache( + self.inspector, + cache, + instance, + ) + for s in self._get_samples(instance, c_data): + yield s + except virt_inspector.InstanceNotFoundException as err: + # Instance was deleted while getting samples. Ignore it. + LOG.debug('Exception while getting samples %s', err) + except virt_inspector.InstanceShutOffException as e: + LOG.debug('Instance %(instance_id)s was shut off while ' + 'getting samples of %(pollster)s: %(exc)s', + {'instance_id': instance.id, + 'pollster': self.__class__.__name__, 'exc': e}) + except virt_inspector.NoDataException as e: + LOG.warning(_LW('Cannot inspect data of %(pollster)s for ' + '%(instance_id)s, non-fatal reason: %(exc)s'), + {'pollster': self.__class__.__name__, + 'instance_id': instance.id, 'exc': e}) + raise plugin_base.PollsterPermanentError(resources) + except ceilometer.NotImplementedError: + # Selected inspector does not implement this pollster. + LOG.debug('Obtaining memory bandwidth is not implemented' + ' for %s', self.inspector.__class__.__name__) + except Exception as err: + LOG.exception(_LE('Could not get memory bandwidth for ' + '%(id)s: %(e)s'), {'id': instance.id, + 'e': err}) + + +class MemoryBandwidthTotalPollster(_MemoryBandwidthPollster): + + def _get_samples(self, instance, c_data): + return self._get_sample_total_and_local( + instance, 'memory.bandwidth.total', 'B/s', c_data, 'total') + + +class MemoryBandwidthLocalPollster(_MemoryBandwidthPollster): + + def _get_samples(self, instance, c_data): + return self._get_sample_total_and_local( + instance, 'memory.bandwidth.local', 'B/s', c_data, 'local') diff --git a/ceilometer/compute/virt/inspector.py b/ceilometer/compute/virt/inspector.py index 8635a30c99..7278338f1a 100644 --- a/ceilometer/compute/virt/inspector.py +++ b/ceilometer/compute/virt/inspector.py @@ -80,6 +80,14 @@ MemoryResidentStats = collections.namedtuple('MemoryResidentStats', ['resident']) +# Named tuple representing memory bandwidth statistics. +# +# total: total system bandwidth from one level of cache +# local: bandwidth of memory traffic for a memory controller +# +MemoryBandwidthStats = collections.namedtuple('MemoryBandwidthStats', + ['total', 'local']) + # Named tuple representing vNICs. # # name: the name of the vNIC @@ -286,6 +294,16 @@ class Inspector(object): """ raise ceilometer.NotImplementedError + def inspect_memory_bandwidth(self, instance, duration=None): + """Inspect the memory bandwidth statistics for an instance. + + :param instance: the target instance + :param duration: the last 'n' seconds, over which the value should be + inspected + :return: + """ + raise ceilometer.NotImplementedError + def inspect_disk_rates(self, instance, duration=None): """Inspect the disk statistics as rates for an instance. diff --git a/ceilometer/compute/virt/libvirt/inspector.py b/ceilometer/compute/virt/libvirt/inspector.py index 96330cda0d..ee059eeca6 100644 --- a/ceilometer/compute/virt/libvirt/inspector.py +++ b/ceilometer/compute/virt/libvirt/inspector.py @@ -255,3 +255,27 @@ class LibvirtInspector(virt_inspector.Inspector): domain = self._get_domain_not_shut_off_or_raise(instance) memory = domain.memoryStats()['rss'] / units.Ki return virt_inspector.MemoryResidentStats(resident=memory) + + def inspect_memory_bandwidth(self, instance, duration=None): + domain = self._get_domain_not_shut_off_or_raise(instance) + + try: + stats = self.connection.domainListGetStats( + [domain], libvirt.VIR_DOMAIN_STATS_PERF) + perf = stats[0][1] + return virt_inspector.MemoryBandwidthStats(total=perf["perf.mbmt"], + local=perf["perf.mbml"]) + except AttributeError as e: + msg = _('Perf is not supported by current version of libvirt, and ' + 'failed to inspect memory bandwidth of %(instance_uuid)s, ' + 'can not get info from libvirt: %(error)s') % { + 'instance_uuid': instance.id, 'error': e} + raise virt_inspector.NoDataException(msg) + # domainListGetStats might launch an exception if the method or + # mbmt/mbml perf event is not supported by the underlying hypervisor + # being used by libvirt. + except libvirt.libvirtError as e: + msg = _('Failed to inspect memory bandwidth of %(instance_uuid)s, ' + 'can not get info from libvirt: %(error)s') % { + 'instance_uuid': instance.id, 'error': e} + raise virt_inspector.NoDataException(msg) diff --git a/ceilometer/tests/unit/compute/pollsters/test_memory.py b/ceilometer/tests/unit/compute/pollsters/test_memory.py index dc84dd434c..3922d3538d 100644 --- a/ceilometer/tests/unit/compute/pollsters/test_memory.py +++ b/ceilometer/tests/unit/compute/pollsters/test_memory.py @@ -132,3 +132,62 @@ class TestResidentMemoryPollster(base.TestPollsterBase): _verify_resident_memory_metering(1, 2.0, 0) _verify_resident_memory_metering(0, 0, 1) _verify_resident_memory_metering(0, 0, 0) + + +class TestMemoryBandwidthPollster(base.TestPollsterBase): + + def setUp(self): + super(TestMemoryBandwidthPollster, self).setUp() + + @mock.patch('ceilometer.pipeline.setup_pipeline', mock.MagicMock()) + def test_get_samples(self): + next_value = iter(( + virt_inspector.MemoryBandwidthStats(total=1892352, local=1802240), + virt_inspector.MemoryBandwidthStats(total=1081344, local=90112), + )) + + def inspect_memory_bandwidth(instance, duration): + return next(next_value) + + self.inspector.inspect_memory_bandwidth = mock.Mock( + side_effect=inspect_memory_bandwidth) + mgr = manager.AgentManager() + + def _check_memory_bandwidth_total(expected_usage): + pollster = memory.MemoryBandwidthTotalPollster() + + samples = list(pollster.get_samples(mgr, {}, [self.instance])) + self.assertEqual(1, len(samples)) + self.assertEqual(set(['memory.bandwidth.total']), + set([s.name for s in samples])) + self.assertEqual(expected_usage, samples[0].volume) + + def _check_memory_bandwidth_local(expected_usage): + pollster = memory.MemoryBandwidthLocalPollster() + + samples = list(pollster.get_samples(mgr, {}, [self.instance])) + self.assertEqual(1, len(samples)) + self.assertEqual(set(['memory.bandwidth.local']), + set([s.name for s in samples])) + self.assertEqual(expected_usage, samples[0].volume) + + _check_memory_bandwidth_total(1892352) + _check_memory_bandwidth_local(90112) + + @mock.patch('ceilometer.pipeline.setup_pipeline', mock.MagicMock()) + def test_get_samples_with_empty_stats(self): + + def inspect_memory_bandwidth(instance, duration): + raise virt_inspector.NoDataException() + + self.inspector.inspect_memory_bandwidth = mock.Mock( + side_effect=inspect_memory_bandwidth) + + mgr = manager.AgentManager() + pollster = memory.MemoryBandwidthTotalPollster() + + def all_samples(): + return list(pollster.get_samples(mgr, {}, [self.instance])) + + self.assertRaises(plugin_base.PollsterPermanentError, + all_samples) diff --git a/ceilometer/tests/unit/compute/virt/libvirt/test_inspector.py b/ceilometer/tests/unit/compute/virt/libvirt/test_inspector.py index 64b329b7af..fa527ac76f 100644 --- a/ceilometer/tests/unit/compute/virt/libvirt/test_inspector.py +++ b/ceilometer/tests/unit/compute/virt/libvirt/test_inspector.py @@ -372,6 +372,20 @@ class TestLibvirtInspection(base.BaseTestCase): self.inspector.inspect_memory_usage, self.instance) + def test_inspect_memory_bandwidth(self): + fake_stats = [({}, {'perf.mbmt': 1892352, 'perf.mbml': 1802240})] + connection = self.inspector.connection + with mock.patch.object(connection, 'lookupByUUIDString', + return_value=self.domain): + with mock.patch.object(self.domain, 'info', + return_value=(0, 0, 51200, + 2, 999999)): + with mock.patch.object(connection, 'domainListGetStats', + return_value=fake_stats): + mb = self.inspector.inspect_memory_bandwidth(self.instance) + self.assertEqual(1892352, mb.total) + self.assertEqual(1802240, mb.local) + class TestLibvirtInspectionWithError(base.BaseTestCase): diff --git a/releasenotes/notes/memory-bandwidth-meter-f86cf01178573671.yaml b/releasenotes/notes/memory-bandwidth-meter-f86cf01178573671.yaml new file mode 100644 index 0000000000..edbd53a439 --- /dev/null +++ b/releasenotes/notes/memory-bandwidth-meter-f86cf01178573671.yaml @@ -0,0 +1,5 @@ +--- +features: + - Add two new meters, including memory.bandwidth.total and + memory.bandwidth.local, to get memory bandwidth statistics + based on Intel CMT feature. diff --git a/setup.cfg b/setup.cfg index c409bf32d3..480969edc4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -112,6 +112,8 @@ ceilometer.poll.compute = instance = ceilometer.compute.pollsters.instance:InstancePollster memory.usage = ceilometer.compute.pollsters.memory:MemoryUsagePollster memory.resident = ceilometer.compute.pollsters.memory:MemoryResidentPollster + memory.bandwidth.total = ceilometer.compute.pollsters.memory:MemoryBandwidthTotalPollster + memory.bandwidth.local = ceilometer.compute.pollsters.memory:MemoryBandwidthLocalPollster disk.capacity = ceilometer.compute.pollsters.disk:CapacityPollster disk.allocation = ceilometer.compute.pollsters.disk:AllocationPollster disk.usage = ceilometer.compute.pollsters.disk:PhysicalPollster