From afa6f98a01031ae5e16f6cf67f82cccfa90aa286 Mon Sep 17 00:00:00 2001 From: Corentin ARNOULD Date: Tue, 4 Jun 2024 22:17:49 +0200 Subject: [PATCH] Allow Multiple Ratings for the same Metric on Prometheus This commit moves a part of the gnocchi collector to the base collector, so that all collectors can benefit from multirating per metric. It solves the issue with Prometheus having one "*_info" metric containing the scope and multiple other metric containing the values we want to rate. Also splitting prometheus' fetch_all, to gain in readability and allow us to render the promql query from tests. Change-Id: I4f0892a80f37ce052f573e7622534aa728e7cff1 --- cloudkitty/collector/__init__.py | 34 +++++- cloudkitty/collector/gnocchi.py | 45 ++----- cloudkitty/collector/prometheus.py | 44 +++++-- .../tests/collectors/test_validation.py | 110 ++++++++++++++++++ doc/source/admin/configuration/collector.rst | 62 ++++++++-- 5 files changed, 229 insertions(+), 66 deletions(-) diff --git a/cloudkitty/collector/__init__.py b/cloudkitty/collector/__init__.py index c62b7285..38069a94 100644 --- a/cloudkitty/collector/__init__.py +++ b/cloudkitty/collector/__init__.py @@ -64,7 +64,16 @@ COLLECTORS_NAMESPACE = 'cloudkitty.collector.backends' def MetricDict(value): if isinstance(value, dict) and len(value.keys()) > 0: return value - raise Invalid("Not a dict with at least one key") + if isinstance(value, list) and len(value) > 0: + for v in value: + if not (isinstance(v, dict) and len(v.keys()) > 0): + raise Invalid("Not a dict with at least one key or a " + "list with at least one dict with at " + "least one key. Provided value: %s" % value) + return value + raise Invalid("Not a dict with at least one key or a " + "list with at least one dict with at " + "least one key. Provided value: %s" % value) CONF_BASE_SCHEMA = {Required('metrics'): MetricDict} @@ -189,9 +198,26 @@ class BaseCollector(object, metaclass=abc.ABCMeta): output = {} for metric_name, metric in conf['metrics'].items(): - output[metric_name] = metric_schema(metric) - if scope_key not in output[metric_name]['groupby']: - output[metric_name]['groupby'].append(scope_key) + if not isinstance(metric, list): + metric = [metric] + for m in metric: + met = metric_schema(m) + names = [metric_name] + alt_name = met.get('alt_name') + if alt_name is not None: + names.append(alt_name) + + new_metric_name = "@#".join(names) + if output.get(new_metric_name) is not None: + raise InvalidConfiguration( + "Metric {} already exists, you should change the" + "alt_name for metric: {}" + .format(new_metric_name, metric)) + + output[new_metric_name] = met + + if scope_key not in output[new_metric_name]['groupby']: + output[new_metric_name]['groupby'].append(scope_key) return output diff --git a/cloudkitty/collector/gnocchi.py b/cloudkitty/collector/gnocchi.py index 2dfd771f..813b47c4 100644 --- a/cloudkitty/collector/gnocchi.py +++ b/cloudkitty/collector/gnocchi.py @@ -25,7 +25,6 @@ from oslo_config import cfg from oslo_log import log as logging from voluptuous import All from voluptuous import In -from voluptuous import Invalid from voluptuous import Length from voluptuous import Range from voluptuous import Required @@ -40,24 +39,6 @@ LOG = logging.getLogger(__name__) COLLECTOR_GNOCCHI_OPTS = 'collector_gnocchi' - -def GnocchiMetricDict(value): - if isinstance(value, dict) and len(value.keys()) > 0: - return value - if isinstance(value, list) and len(value) > 0: - for v in value: - if not (isinstance(v, dict) and len(v.keys()) > 0): - raise Invalid("Not a dict with at least one key or a " - "list with at least one dict with at " - "least one key. Provided value: %s" % value) - return value - raise Invalid("Not a dict with at least one key or a " - "list with at least one dict with at " - "least one key. Provided value: %s" % value) - - -GNOCCHI_CONF_SCHEMA = {Required('metrics'): GnocchiMetricDict} - collector_gnocchi_opts = [ cfg.StrOpt( 'gnocchi_auth_type', @@ -202,30 +183,18 @@ class GnocchiCollector(collector.BaseCollector): """Check metrics configuration """ - conf = Schema(GNOCCHI_CONF_SCHEMA)(conf) - conf = copy.deepcopy(conf) - scope_key = CONF.collect.scope_key + conf = collector.BaseCollector.check_configuration(conf) + metric_schema = Schema(collector.METRIC_BASE_SCHEMA).extend( GNOCCHI_EXTRA_SCHEMA) output = {} - for metric_name, metric in conf['metrics'].items(): - if not isinstance(metric, list): - metric = [metric] - for m in metric: - met = metric_schema(m) - m.update(met) - if scope_key not in m['groupby']: - m['groupby'].append(scope_key) - if met['extra_args']['resource_key'] not in m['groupby']: - m['groupby'].append(met['extra_args']['resource_key']) + for metric_name, metric in conf.items(): + met = metric_schema(metric) + if met['extra_args']['resource_key'] not in met['groupby']: + met['groupby'].append(met['extra_args']['resource_key']) - names = [metric_name] - alt_name = met.get('alt_name') - if alt_name is not None: - names.append(alt_name) - new_metric_name = "@#".join(names) - output[new_metric_name] = m + output[metric_name] = met return output diff --git a/cloudkitty/collector/prometheus.py b/cloudkitty/collector/prometheus.py index fe94d47a..1139af98 100644 --- a/cloudkitty/collector/prometheus.py +++ b/cloudkitty/collector/prometheus.py @@ -153,24 +153,24 @@ class PrometheusCollector(collector.BaseCollector): return metadata, groupby, qty - def fetch_all(self, metric_name, start, end, scope_id, q_filter=None): - """Returns metrics to be valorized.""" - scope_key = CONF.collect.scope_key - method = self.conf[metric_name]['extra_args']['aggregation_method'] - query_function = self.conf[metric_name]['extra_args'].get( + @staticmethod + def build_query(conf, metric_name, start, end, scope_key, scope_id, + groupby, metadata): + """Builds the query for the metrics to be valorized.""" + + method = conf[metric_name]['extra_args']['aggregation_method'] + query_function = conf[metric_name]['extra_args'].get( 'query_function') - range_function = self.conf[metric_name]['extra_args'].get( + range_function = conf[metric_name]['extra_args'].get( 'range_function') - groupby = self.conf[metric_name].get('groupby', []) - metadata = self.conf[metric_name].get('metadata', []) - query_prefix = self.conf[metric_name]['extra_args']['query_prefix'] - query_suffix = self.conf[metric_name]['extra_args']['query_suffix'] + query_prefix = conf[metric_name]['extra_args']['query_prefix'] + query_suffix = conf[metric_name]['extra_args']['query_suffix'] + query_metric = metric_name.split('@#')[0] period = tzutils.diff_seconds(end, start) - time = end # The metric with the period query = '{0}{{{1}="{2}"}}[{3}s]'.format( - metric_name, + query_metric, scope_key, scope_id, period @@ -212,6 +212,26 @@ class PrometheusCollector(collector.BaseCollector): if query_suffix: query = "{0} {1}".format(query, query_suffix) + return query + + def fetch_all(self, metric_name, start, end, scope_id, q_filter=None): + """Returns metrics to be valorized.""" + time = end + metadata = self.conf[metric_name].get('metadata', []) + groupby = self.conf[metric_name].get('groupby', []) + scope_key = CONF.collect.scope_key + + query = self.build_query( + self.conf, + metric_name, + start, + end, + scope_key, + scope_id, + groupby, + metadata + ) + try: res = self._conn.get_instant( query, diff --git a/cloudkitty/tests/collectors/test_validation.py b/cloudkitty/tests/collectors/test_validation.py index 5813d4c4..c2c29ea2 100644 --- a/cloudkitty/tests/collectors/test_validation.py +++ b/cloudkitty/tests/collectors/test_validation.py @@ -19,6 +19,8 @@ from voluptuous import error as verror from cloudkitty import collector from cloudkitty import tests +from datetime import datetime +from datetime import timedelta class MetricConfigValidationTest(tests.TestCase): @@ -44,6 +46,46 @@ class MetricConfigValidationTest(tests.TestCase): } } + list_data = { + 'metrics': { + 'metric_one': [ + { + 'groupby': ['one'], + 'metadata': ['two'], + 'alt_name': 'metric_u', + 'unit': 'u', + }, + { + 'groupby': ['three'], + 'metadata': ['four'], + 'alt_name': 'metric_v', + 'unit': 'v', + } + ] + } + } + + list_output = { + 'metric_one@#metric_u': { + 'groupby': ['one'], + 'metadata': ['two'], + 'unit': 'u', + 'alt_name': 'metric_u', + 'factor': 1, + 'offset': 0, + 'mutate': 'NONE', + }, + 'metric_one@#metric_v': { + 'groupby': ['three'], + 'metadata': ['four'], + 'unit': 'v', + 'alt_name': 'metric_v', + 'factor': 1, + 'offset': 0, + 'mutate': 'NONE', + }, + } + def test_base_minimal_config(self): data = copy.deepcopy(self.base_data) expected_output = copy.deepcopy(self.base_output) @@ -149,6 +191,48 @@ class MetricConfigValidationTest(tests.TestCase): expected_output, ) + def test_prometheus_query_builder(self): + data = copy.deepcopy(self.base_data) + data['metrics']['metric_one']['extra_args'] = { + 'aggregation_method': 'max', + 'query_function': 'abs', + 'query_prefix': 'custom_prefix', + 'query_suffix': 'custom_suffix', + 'range_function': 'delta', + } + + prometheus = collector.prometheus.PrometheusCollector + + conf = prometheus.check_configuration(data) + metric_name = list(conf.keys())[0] + start = datetime.now() + end = start + timedelta(seconds=60) + scope_key = "random_key" + scope_id = "random_value" + groupby = conf[metric_name].get('groupby', []) + metadata = conf[metric_name].get('metadata', []) + + query = prometheus.build_query( + conf, + metric_name, + start, + end, + scope_key, + scope_id, + groupby, + metadata + ) + + expected_output = ( + 'custom_prefix max(abs(delta(metric_one{random_key="random_value"}' + '[60s]))) by (one, project_id, two) custom_suffix' + ) + + self.assertEqual( + query, + expected_output, + ) + def test_check_duplicates(self): data = copy.deepcopy(self.base_data) for metric_name, metric in data['metrics'].items(): @@ -179,3 +263,29 @@ class MetricConfigValidationTest(tests.TestCase): self.assertRaises( collector.InvalidConfiguration, collector.validate_map_mutator, metric_name, metric) + + def test_base_minimal_config_list(self): + data = copy.deepcopy(self.list_data) + expected_output = copy.deepcopy(self.list_output) + + for _, metric in expected_output.items(): + metric['groupby'].append('project_id') + + self.assertEqual( + collector.BaseCollector.check_configuration(data), + expected_output, + ) + + # submetric with same alt_name should fail + # Because they would overlap in the dict + def test_check_duplicates_list(self): + data = copy.deepcopy(self.list_data) + data['metrics']['metric_one'].append({ + 'groupby': ['five'], + 'metadata': ['six'], + 'alt_name': 'metric_v', + 'unit': 'w', + }) + self.assertRaises( + collector.InvalidConfiguration, + collector.BaseCollector.check_configuration, data) diff --git a/doc/source/admin/configuration/collector.rst b/doc/source/admin/configuration/collector.rst index d827a578..6eee2e13 100644 --- a/doc/source/admin/configuration/collector.rst +++ b/doc/source/admin/configuration/collector.rst @@ -293,21 +293,14 @@ summary GET API. - ram metadata: [] -Collector-specific configuration --------------------------------- +Multiple ratings in one metric +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Some collectors require extra options. These must be specified through the -``extra_args`` option. Some options have defaults, other must be systematically -specified. The extra args for each collector are detailed below. - -Gnocchi -~~~~~~~ - -Besides the common configuration, the Gnocchi collector also accepts a list of +Besides the common configuration, the collectors also accept a list of rating types definitions for each metric. Using a list of rating types definitions allows operators to rate different aspects of the same resource -type collected through the same metric in Gnocchi, otherwise operators would -need to create multiple metrics in Gnocchi to create multiple rating types in +type collected through the same metric, otherwise operators would need to +create multiple metrics in the collector to create multiple rating types in CloudKitty. .. code-block:: yaml @@ -329,6 +322,15 @@ CloudKitty. metadata: - os_license +Collector-specific configuration +-------------------------------- + +Some collectors require extra options. These must be specified through the +``extra_args`` option. Some options have defaults, other must be systematically +specified. The extra args for each collector are detailed below. + +Gnocchi +~~~~~~~ .. note:: In order to retrieve metrics from Gnocchi, Cloudkitty uses the dynamic aggregates endpoint. It builds an operation of the following @@ -419,4 +421,40 @@ Prometheus ``delta``, ``deriv``, ``idelta``, ``irange``, ``irate``, ``rate``. For more information on these functions, you can check `this page`_ +Here is one example of a rating with PromQL `vector matching`_: + +.. code-block:: yaml + + metrics: + libvirt_domain_openstack_info: + - alt_name: cpu + unit: vCPU + groupby: + - project_id + metadata: + - instance_name + - domain + extra_args: + query_suffix: "* on (domain) group_left() libvirt_domain_vcpu_maximum" + + +And the resulting PromQL: + +.. code-block:: promql + + max( + max_over_time( + libvirt_domain_openstack_info{project_id=""} + [1h] + ) + ) by ( + project_id, + instance_name, + domain + ) * on (domain) + group_left() + libvirt_domain_vcpu_maximum + + .. _this page: https://prometheus.io/docs/prometheus/latest/querying/basics/ +.. _vector matching: https://prometheus.io/docs/prometheus/latest/querying/operators/#vector-matching