Fix removing stale metrics from Prometheus exporter
The Prometheus exporter tries to remove stale metrics every polling cycle so VMs that have removed from the node do not leave metrics there. This works fine when there are multiple VMs in the node, but if the last VM is removed from the node, its metrics survive in the exporter forever. This is due Ceilometer not running the pollsters when there are no VMs available to collect metrics and the current code running the cleanup on the pollster code. This fix moves the existent cleanup code so it can run even if the pollster does not execute due to no VMs available, which removes stale metrics in every case. Change-Id: I8394c71a78f9b0004514fbb624ac7436d3c60e61 Signed-off-by: jlarriba <jlarriba@redhat.com>
This commit is contained in:
@@ -312,6 +312,9 @@ class PollingTask:
|
||||
polling_resources.append(x)
|
||||
poll_history[pollster.name] = history
|
||||
|
||||
if self.manager.conf.polling.enable_prometheus_exporter:
|
||||
prom_exporter.purge_stale_metrics(pollster.name)
|
||||
|
||||
# If no resources, skip for this pollster
|
||||
if not polling_resources:
|
||||
p_context = 'new' if history else ''
|
||||
|
@@ -28,22 +28,12 @@ def export(prom_iface, prom_port, tls_cert=None, tls_key=None):
|
||||
|
||||
|
||||
def collect_metrics(samples):
|
||||
metric_cleared = False
|
||||
|
||||
for sample in samples:
|
||||
name = "ceilometer_" + sample['counter_name'].replace('.', '_')
|
||||
labels = _gen_labels(sample)
|
||||
|
||||
metric = CEILOMETER_REGISTRY._names_to_collectors.get(name, None)
|
||||
|
||||
# NOTE: Ungregister the metric at the first iteration to purge stale
|
||||
# samples
|
||||
if not metric_cleared:
|
||||
if metric:
|
||||
CEILOMETER_REGISTRY.unregister(metric)
|
||||
metric = None
|
||||
metric_cleared = True
|
||||
|
||||
if metric is None:
|
||||
metric = prom.Gauge(name=name, documentation="",
|
||||
labelnames=labels['keys'],
|
||||
@@ -51,6 +41,18 @@ def collect_metrics(samples):
|
||||
metric.labels(*labels['values']).set(sample['counter_volume'])
|
||||
|
||||
|
||||
def purge_stale_metrics(pollster):
|
||||
metric_cleared = False
|
||||
|
||||
metric_name = "ceilometer_" + pollster.replace('.', '_')
|
||||
metric = CEILOMETER_REGISTRY._names_to_collectors.get(metric_name, None)
|
||||
if not metric_cleared:
|
||||
if metric:
|
||||
CEILOMETER_REGISTRY.unregister(metric)
|
||||
metric = None
|
||||
metric_cleared = True
|
||||
|
||||
|
||||
def _gen_labels(sample):
|
||||
labels = dict(keys=[], values=[])
|
||||
cNameShards = sample['counter_name'].split(".")
|
||||
|
@@ -406,3 +406,33 @@ class TestPromExporter(base.BaseTestCase):
|
||||
'cirros2', 'server_group123']
|
||||
label3 = prom_exporter._gen_labels(self.test_image_size[0])
|
||||
self.assertDictEqual(label3, slabels3)
|
||||
|
||||
@mock.patch.object(prom_exporter.CEILOMETER_REGISTRY, 'unregister')
|
||||
def test_purge_stale_metrics_existing_metric(self, mock_unregister):
|
||||
mock_metric = mock.MagicMock()
|
||||
prom_exporter.CEILOMETER_REGISTRY._names_to_collectors = {
|
||||
'ceilometer_test_metric': mock_metric
|
||||
}
|
||||
|
||||
prom_exporter.purge_stale_metrics('test.metric')
|
||||
|
||||
mock_unregister.assert_called_once_with(mock_metric)
|
||||
|
||||
@mock.patch.object(prom_exporter.CEILOMETER_REGISTRY, 'unregister')
|
||||
def test_purge_stale_metrics_no_existing_metric(self, mock_unregister):
|
||||
prom_exporter.CEILOMETER_REGISTRY._names_to_collectors = {}
|
||||
|
||||
prom_exporter.purge_stale_metrics('nonexistent.metric')
|
||||
|
||||
mock_unregister.assert_not_called()
|
||||
|
||||
@mock.patch.object(prom_exporter.CEILOMETER_REGISTRY, 'unregister')
|
||||
def test_purge_stale_metrics_name_transformation(self, mock_unregister):
|
||||
mock_metric = mock.MagicMock()
|
||||
prom_exporter.CEILOMETER_REGISTRY._names_to_collectors = {
|
||||
'ceilometer_cpu_util': mock_metric
|
||||
}
|
||||
|
||||
prom_exporter.purge_stale_metrics('cpu.util')
|
||||
|
||||
mock_unregister.assert_called_once_with(mock_metric)
|
||||
|
Reference in New Issue
Block a user