From afa6f98a01031ae5e16f6cf67f82cccfa90aa286 Mon Sep 17 00:00:00 2001
From: Corentin ARNOULD <corentin@keepcalm.dev>
Date: Tue, 4 Jun 2024 22:17:49 +0200
Subject: [PATCH] Allow Multiple Ratings for the same Metric on Prometheus

This commit moves a part of the gnocchi collector to the base collector,
so that all collectors can benefit from multirating per metric.

It solves the issue with Prometheus having one "*_info" metric containing
the scope and multiple other metric containing the values we want to rate.

Also splitting prometheus' fetch_all, to gain in readability and allow
us to render the promql query from tests.

Change-Id: I4f0892a80f37ce052f573e7622534aa728e7cff1
---
 cloudkitty/collector/__init__.py              |  34 +++++-
 cloudkitty/collector/gnocchi.py               |  45 ++-----
 cloudkitty/collector/prometheus.py            |  44 +++++--
 .../tests/collectors/test_validation.py       | 110 ++++++++++++++++++
 doc/source/admin/configuration/collector.rst  |  62 ++++++++--
 5 files changed, 229 insertions(+), 66 deletions(-)

diff --git a/cloudkitty/collector/__init__.py b/cloudkitty/collector/__init__.py
index c62b7285..38069a94 100644
--- a/cloudkitty/collector/__init__.py
+++ b/cloudkitty/collector/__init__.py
@@ -64,7 +64,16 @@ COLLECTORS_NAMESPACE = 'cloudkitty.collector.backends'
 def MetricDict(value):
     if isinstance(value, dict) and len(value.keys()) > 0:
         return value
-    raise Invalid("Not a dict with at least one key")
+    if isinstance(value, list) and len(value) > 0:
+        for v in value:
+            if not (isinstance(v, dict) and len(v.keys()) > 0):
+                raise Invalid("Not a dict with at least one key or a "
+                              "list with at least one dict with at "
+                              "least one key. Provided value: %s" % value)
+        return value
+    raise Invalid("Not a dict with at least one key or a "
+                  "list with at least one dict with at "
+                  "least one key. Provided value: %s" % value)
 
 
 CONF_BASE_SCHEMA = {Required('metrics'): MetricDict}
@@ -189,9 +198,26 @@ class BaseCollector(object, metaclass=abc.ABCMeta):
 
         output = {}
         for metric_name, metric in conf['metrics'].items():
-            output[metric_name] = metric_schema(metric)
-            if scope_key not in output[metric_name]['groupby']:
-                output[metric_name]['groupby'].append(scope_key)
+            if not isinstance(metric, list):
+                metric = [metric]
+            for m in metric:
+                met = metric_schema(m)
+                names = [metric_name]
+                alt_name = met.get('alt_name')
+                if alt_name is not None:
+                    names.append(alt_name)
+
+                new_metric_name = "@#".join(names)
+                if output.get(new_metric_name) is not None:
+                    raise InvalidConfiguration(
+                        "Metric {} already exists, you should change the"
+                        "alt_name for metric: {}"
+                        .format(new_metric_name, metric))
+
+                output[new_metric_name] = met
+
+                if scope_key not in output[new_metric_name]['groupby']:
+                    output[new_metric_name]['groupby'].append(scope_key)
 
         return output
 
diff --git a/cloudkitty/collector/gnocchi.py b/cloudkitty/collector/gnocchi.py
index 2dfd771f..813b47c4 100644
--- a/cloudkitty/collector/gnocchi.py
+++ b/cloudkitty/collector/gnocchi.py
@@ -25,7 +25,6 @@ from oslo_config import cfg
 from oslo_log import log as logging
 from voluptuous import All
 from voluptuous import In
-from voluptuous import Invalid
 from voluptuous import Length
 from voluptuous import Range
 from voluptuous import Required
@@ -40,24 +39,6 @@ LOG = logging.getLogger(__name__)
 
 COLLECTOR_GNOCCHI_OPTS = 'collector_gnocchi'
 
-
-def GnocchiMetricDict(value):
-    if isinstance(value, dict) and len(value.keys()) > 0:
-        return value
-    if isinstance(value, list) and len(value) > 0:
-        for v in value:
-            if not (isinstance(v, dict) and len(v.keys()) > 0):
-                raise Invalid("Not a dict with at least one key or a "
-                              "list with at least one dict with at "
-                              "least one key. Provided value: %s" % value)
-        return value
-    raise Invalid("Not a dict with at least one key or a "
-                  "list with at least one dict with at "
-                  "least one key. Provided value: %s" % value)
-
-
-GNOCCHI_CONF_SCHEMA = {Required('metrics'): GnocchiMetricDict}
-
 collector_gnocchi_opts = [
     cfg.StrOpt(
         'gnocchi_auth_type',
@@ -202,30 +183,18 @@ class GnocchiCollector(collector.BaseCollector):
         """Check metrics configuration
 
         """
-        conf = Schema(GNOCCHI_CONF_SCHEMA)(conf)
-        conf = copy.deepcopy(conf)
-        scope_key = CONF.collect.scope_key
+        conf = collector.BaseCollector.check_configuration(conf)
+
         metric_schema = Schema(collector.METRIC_BASE_SCHEMA).extend(
             GNOCCHI_EXTRA_SCHEMA)
 
         output = {}
-        for metric_name, metric in conf['metrics'].items():
-            if not isinstance(metric, list):
-                metric = [metric]
-            for m in metric:
-                met = metric_schema(m)
-                m.update(met)
-                if scope_key not in m['groupby']:
-                    m['groupby'].append(scope_key)
-                if met['extra_args']['resource_key'] not in m['groupby']:
-                    m['groupby'].append(met['extra_args']['resource_key'])
+        for metric_name, metric in conf.items():
+            met = metric_schema(metric)
+            if met['extra_args']['resource_key'] not in met['groupby']:
+                met['groupby'].append(met['extra_args']['resource_key'])
 
-                names = [metric_name]
-                alt_name = met.get('alt_name')
-                if alt_name is not None:
-                    names.append(alt_name)
-                new_metric_name = "@#".join(names)
-                output[new_metric_name] = m
+            output[metric_name] = met
 
         return output
 
diff --git a/cloudkitty/collector/prometheus.py b/cloudkitty/collector/prometheus.py
index fe94d47a..1139af98 100644
--- a/cloudkitty/collector/prometheus.py
+++ b/cloudkitty/collector/prometheus.py
@@ -153,24 +153,24 @@ class PrometheusCollector(collector.BaseCollector):
 
         return metadata, groupby, qty
 
-    def fetch_all(self, metric_name, start, end, scope_id, q_filter=None):
-        """Returns metrics to be valorized."""
-        scope_key = CONF.collect.scope_key
-        method = self.conf[metric_name]['extra_args']['aggregation_method']
-        query_function = self.conf[metric_name]['extra_args'].get(
+    @staticmethod
+    def build_query(conf, metric_name, start, end, scope_key, scope_id,
+                    groupby, metadata):
+        """Builds the query for the metrics to be valorized."""
+
+        method = conf[metric_name]['extra_args']['aggregation_method']
+        query_function = conf[metric_name]['extra_args'].get(
             'query_function')
-        range_function = self.conf[metric_name]['extra_args'].get(
+        range_function = conf[metric_name]['extra_args'].get(
             'range_function')
-        groupby = self.conf[metric_name].get('groupby', [])
-        metadata = self.conf[metric_name].get('metadata', [])
-        query_prefix = self.conf[metric_name]['extra_args']['query_prefix']
-        query_suffix = self.conf[metric_name]['extra_args']['query_suffix']
+        query_prefix = conf[metric_name]['extra_args']['query_prefix']
+        query_suffix = conf[metric_name]['extra_args']['query_suffix']
+        query_metric = metric_name.split('@#')[0]
         period = tzutils.diff_seconds(end, start)
-        time = end
 
         # The metric with the period
         query = '{0}{{{1}="{2}"}}[{3}s]'.format(
-            metric_name,
+            query_metric,
             scope_key,
             scope_id,
             period
@@ -212,6 +212,26 @@ class PrometheusCollector(collector.BaseCollector):
         if query_suffix:
             query = "{0} {1}".format(query, query_suffix)
 
+        return query
+
+    def fetch_all(self, metric_name, start, end, scope_id, q_filter=None):
+        """Returns metrics to be valorized."""
+        time = end
+        metadata = self.conf[metric_name].get('metadata', [])
+        groupby = self.conf[metric_name].get('groupby', [])
+        scope_key = CONF.collect.scope_key
+
+        query = self.build_query(
+            self.conf,
+            metric_name,
+            start,
+            end,
+            scope_key,
+            scope_id,
+            groupby,
+            metadata
+        )
+
         try:
             res = self._conn.get_instant(
                 query,
diff --git a/cloudkitty/tests/collectors/test_validation.py b/cloudkitty/tests/collectors/test_validation.py
index 5813d4c4..c2c29ea2 100644
--- a/cloudkitty/tests/collectors/test_validation.py
+++ b/cloudkitty/tests/collectors/test_validation.py
@@ -19,6 +19,8 @@ from voluptuous import error as verror
 
 from cloudkitty import collector
 from cloudkitty import tests
+from datetime import datetime
+from datetime import timedelta
 
 
 class MetricConfigValidationTest(tests.TestCase):
@@ -44,6 +46,46 @@ class MetricConfigValidationTest(tests.TestCase):
         }
     }
 
+    list_data = {
+        'metrics': {
+            'metric_one': [
+                {
+                    'groupby': ['one'],
+                    'metadata': ['two'],
+                    'alt_name': 'metric_u',
+                    'unit': 'u',
+                },
+                {
+                    'groupby': ['three'],
+                    'metadata': ['four'],
+                    'alt_name': 'metric_v',
+                    'unit': 'v',
+                }
+            ]
+        }
+    }
+
+    list_output = {
+        'metric_one@#metric_u': {
+            'groupby': ['one'],
+            'metadata': ['two'],
+            'unit': 'u',
+            'alt_name': 'metric_u',
+            'factor': 1,
+            'offset': 0,
+            'mutate': 'NONE',
+        },
+        'metric_one@#metric_v': {
+            'groupby': ['three'],
+            'metadata': ['four'],
+            'unit': 'v',
+            'alt_name': 'metric_v',
+            'factor': 1,
+            'offset': 0,
+            'mutate': 'NONE',
+        },
+    }
+
     def test_base_minimal_config(self):
         data = copy.deepcopy(self.base_data)
         expected_output = copy.deepcopy(self.base_output)
@@ -149,6 +191,48 @@ class MetricConfigValidationTest(tests.TestCase):
             expected_output,
         )
 
+    def test_prometheus_query_builder(self):
+        data = copy.deepcopy(self.base_data)
+        data['metrics']['metric_one']['extra_args'] = {
+            'aggregation_method': 'max',
+            'query_function': 'abs',
+            'query_prefix': 'custom_prefix',
+            'query_suffix': 'custom_suffix',
+            'range_function': 'delta',
+        }
+
+        prometheus = collector.prometheus.PrometheusCollector
+
+        conf = prometheus.check_configuration(data)
+        metric_name = list(conf.keys())[0]
+        start = datetime.now()
+        end = start + timedelta(seconds=60)
+        scope_key = "random_key"
+        scope_id = "random_value"
+        groupby = conf[metric_name].get('groupby', [])
+        metadata = conf[metric_name].get('metadata', [])
+
+        query = prometheus.build_query(
+            conf,
+            metric_name,
+            start,
+            end,
+            scope_key,
+            scope_id,
+            groupby,
+            metadata
+        )
+
+        expected_output = (
+            'custom_prefix max(abs(delta(metric_one{random_key="random_value"}'
+            '[60s]))) by (one, project_id, two) custom_suffix'
+        )
+
+        self.assertEqual(
+            query,
+            expected_output,
+        )
+
     def test_check_duplicates(self):
         data = copy.deepcopy(self.base_data)
         for metric_name, metric in data['metrics'].items():
@@ -179,3 +263,29 @@ class MetricConfigValidationTest(tests.TestCase):
             self.assertRaises(
                 collector.InvalidConfiguration,
                 collector.validate_map_mutator, metric_name, metric)
+
+    def test_base_minimal_config_list(self):
+        data = copy.deepcopy(self.list_data)
+        expected_output = copy.deepcopy(self.list_output)
+
+        for _, metric in expected_output.items():
+            metric['groupby'].append('project_id')
+
+        self.assertEqual(
+            collector.BaseCollector.check_configuration(data),
+            expected_output,
+        )
+
+    # submetric with same alt_name should fail
+    # Because they would overlap in the dict
+    def test_check_duplicates_list(self):
+        data = copy.deepcopy(self.list_data)
+        data['metrics']['metric_one'].append({
+            'groupby': ['five'],
+            'metadata': ['six'],
+            'alt_name': 'metric_v',
+            'unit': 'w',
+        })
+        self.assertRaises(
+            collector.InvalidConfiguration,
+            collector.BaseCollector.check_configuration, data)
diff --git a/doc/source/admin/configuration/collector.rst b/doc/source/admin/configuration/collector.rst
index d827a578..6eee2e13 100644
--- a/doc/source/admin/configuration/collector.rst
+++ b/doc/source/admin/configuration/collector.rst
@@ -293,21 +293,14 @@ summary GET API.
          - ram
        metadata: []
 
-Collector-specific configuration
---------------------------------
+Multiple ratings in one metric
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Some collectors require extra options. These must be specified through the
-``extra_args`` option. Some options have defaults, other must be systematically
-specified. The extra args for each collector are detailed below.
-
-Gnocchi
-~~~~~~~
-
-Besides the common configuration, the Gnocchi collector also accepts a list of
+Besides the common configuration, the collectors also accept a list of
 rating types definitions for each metric. Using a list of rating types
 definitions allows operators to rate different aspects of the same resource
-type collected through the same metric in Gnocchi, otherwise operators would
-need to create multiple metrics in Gnocchi to create multiple rating types in
+type collected through the same metric, otherwise operators would need to
+create multiple metrics in the collector to create multiple rating types in
 CloudKitty.
 
 .. code-block:: yaml
@@ -329,6 +322,15 @@ CloudKitty.
          metadata:
            - os_license
 
+Collector-specific configuration
+--------------------------------
+
+Some collectors require extra options. These must be specified through the
+``extra_args`` option. Some options have defaults, other must be systematically
+specified. The extra args for each collector are detailed below.
+
+Gnocchi
+~~~~~~~
 
 .. note:: In order to retrieve metrics from Gnocchi, Cloudkitty uses the
           dynamic aggregates endpoint. It builds an operation of the following
@@ -419,4 +421,40 @@ Prometheus
   ``delta``, ``deriv``, ``idelta``, ``irange``, ``irate``, ``rate``. For more
   information on these functions, you can check `this page`_
 
+Here is one example of a rating with PromQL `vector matching`_:
+
+.. code-block:: yaml
+
+  metrics:
+    libvirt_domain_openstack_info:
+    - alt_name: cpu
+      unit: vCPU
+      groupby:
+      - project_id
+      metadata:
+      - instance_name
+      - domain
+      extra_args:
+        query_suffix: "* on (domain) group_left() libvirt_domain_vcpu_maximum"
+
+
+And the resulting PromQL:
+
+.. code-block:: promql
+
+  max(
+     max_over_time(
+        libvirt_domain_openstack_info{project_id="<PROJECT_ID>"}
+        [1h]
+     )
+  ) by (
+     project_id,
+     instance_name,
+     domain
+  ) * on (domain)
+  group_left()
+  libvirt_domain_vcpu_maximum
+
+
 .. _this page: https://prometheus.io/docs/prometheus/latest/querying/basics/
+.. _vector matching: https://prometheus.io/docs/prometheus/latest/querying/operators/#vector-matching