Merge "Added monitor (e.g. CPU) to monitor and collect data"

This commit is contained in:
Jenkins
2013-11-26 11:53:33 +00:00
committed by Gerrit Code Review
13 changed files with 590 additions and 0 deletions

View File

@@ -0,0 +1,110 @@
# Copyright 2013 Intel Corporation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# @author: Shane Wang, Intel Corporation.
"""
Resource monitor API specification.
ResourceMonitorBase provides the definition of minimum set of methods
that needs to be implemented by Resource Monitor.
"""
import types
import six
from nova import loadables
from nova.openstack.common import timeutils
class ResourceMonitorMeta(type):
def __init__(cls, names, bases, dict_):
"""Metaclass that allows us to create a function map and call it later
to get the metric names and their values.
"""
super(ResourceMonitorMeta, cls).__init__(names, bases, dict_)
prefix = '_get_'
prefix_len = len(prefix)
cls.metric_map = {}
for name, value in cls.__dict__.iteritems():
if (len(name) > prefix_len
and name[:prefix_len] == prefix
and isinstance(value, types.FunctionType)):
metric_name = name[prefix_len:].replace('_', '.')
cls.metric_map[metric_name] = value
@six.add_metaclass(ResourceMonitorMeta)
class ResourceMonitorBase(object):
"""Base class for resource monitors
"""
def __init__(self, parent):
self.compute_manager = parent
self.source = None
def get_metric_names(self):
"""Get available metric names.
Get available metric names, which are represented by a set of keys
that can be used to check conflicts and duplications
:returns: a set of keys representing metrics names
"""
return self.metric_map.keys()
def get_metrics(self, **kwargs):
"""Get metrics.
Get metrics, which are represented by a list of dictionaries
[{'name': metric name,
'value': metric value,
'timestamp': the time when the value is retrieved,
'source': what the value is got by}, ...]
:param kwargs: extra arguments that might be present
:returns: a list to tell the current metrics
"""
data = []
for name, func in self.metric_map.iteritems():
ret = func(self, **kwargs)
data.append(self._populate(name, ret[0], ret[1]))
return data
def _populate(self, metric_name, metric_value, timestamp=None):
"""Populate the format what we want from metric name and metric value
"""
result = {}
result['name'] = metric_name
result['value'] = metric_value
result['timestamp'] = timestamp or timeutils.utcnow()
result['source'] = self.source
return result
class ResourceMonitorHandler(loadables.BaseLoader):
"""Base class to handle loading monitor classes.
"""
def __init__(self):
super(ResourceMonitorHandler, self).__init__(ResourceMonitorBase)
def all_monitors():
"""Return a list of monitor classes found in this directory.
This method is used as the default for available monitors
and should return a list of all monitor classes avaiable.
"""
return ResourceMonitorHandler().get_all_classes()

View File

@@ -0,0 +1,65 @@
# Copyright 2013 Intel Corporation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# @author: Shane Wang, Intel Corporation.
"""
CPU monitor to retrieve CPU information
"""
from nova.compute import monitors
class _CPUMonitorBase(monitors.ResourceMonitorBase):
"""CPU monitor base."""
def _get_cpu_frequency(self, **kwargs):
"""Return CPU current frequency and its timestamp."""
return None, None
def _get_cpu_user_time(self, **kwargs):
"""Return CPU user mode time and its timestamp."""
return None, None
def _get_cpu_kernel_time(self, **kwargs):
"""Return CPU kernel time and its timestamp."""
return None, None
def _get_cpu_idle_time(self, **kwargs):
"""Return CPU idle time and its timestamp."""
return None, None
def _get_cpu_iowait_time(self, **kwargs):
"""Return CPU I/O wait time and its timestamp."""
return None, None
def _get_cpu_user_percent(self, **kwargs):
"""Return CPU user mode percentage and its timestamp."""
return None, None
def _get_cpu_kernel_percent(self, **kwargs):
"""Return CPU kernel percentage and its timestamp."""
return None, None
def _get_cpu_idle_percent(self, **kwargs):
"""Return CPU idle percentage and its timestamp."""
return None, None
def _get_cpu_iowait_percent(self, **kwargs):
"""Return CPU I/O wait percentage and its timestamp."""
return None, None
def _get_cpu_percent(self, **kwargs):
"""Return generic CPU utilization and its timestamp."""
return None, None

View File

@@ -0,0 +1,19 @@
# Copyright 2013 Intel Corporation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# @author: Shane Wang, Intel Corporation.
from nova.compute.monitors.virt import cpu_monitor
ComputeDriverCPUMonitor = cpu_monitor.ComputeDriverCPUMonitor

View File

@@ -0,0 +1,171 @@
# Copyright 2013 Intel Corporation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# @author: Shane Wang, Intel Corporation.
"""
CPU monitor based on compute driver to retrieve CPU information
"""
from oslo.config import cfg
from nova.compute.monitors import cpu_monitor as monitor
from nova import exception
from nova.openstack.common.gettextutils import _
from nova.openstack.common import log as logging
from nova.openstack.common import timeutils
CONF = cfg.CONF
CONF.import_opt('compute_driver', 'nova.virt.driver')
LOG = logging.getLogger(__name__)
class ComputeDriverCPUMonitor(monitor._CPUMonitorBase):
"""CPU monitor based on compute driver
The class inherits from the base class for resource monitors,
and implements the essential methods to get metric names and their real
values for CPU utilization.
The compute manager could load the monitors to retrieve the metrics
of the devices on compute nodes and know their resource information
periodically.
"""
def __init__(self, parent):
super(ComputeDriverCPUMonitor, self).__init__(parent)
self.source = CONF.compute_driver
self.driver = self.compute_manager.driver
self._cpu_stats = {}
self._data = {}
def add_timestamp(f):
"""Decorator to indicate that a method needs to add a timestamp.
The decorator (w/o any argument) is used in this way in this class
only. When a function returning a value is decorated by the decorator,
which means a timestamp should be added into the returned value.
That is, a tuple (value, timestamp) is returned.
The timestamp is not the time when the function is called but probably
when the value the function returns was retrieved.
Actually the value is retrieved by the internal method
_update_cpustat(). Because we don't allow _update_cpustat() is called
so frequently. So, the value is read from the cache which was got in
the last call sometimes. And the timestamp is saved for utilization
aware scheduling in the future.
The decorator is mainly used in this class. If users hope to define
how the timestamp is got by themselves, they should not use this
decorator in their own classes.
"""
def wrapper(self, **kwargs):
self._update_cpustat()
return f(self, **kwargs), self._data.get("timestamp")
return wrapper
@add_timestamp
def _get_cpu_frequency(self, **kwargs):
return self._data.get("cpu.frequency")
@add_timestamp
def _get_cpu_user_time(self, **kwargs):
return self._data.get("cpu.user.time")
@add_timestamp
def _get_cpu_kernel_time(self, **kwargs):
return self._data.get("cpu.kernel.time")
@add_timestamp
def _get_cpu_idle_time(self, **kwargs):
return self._data.get("cpu.idle.time")
@add_timestamp
def _get_cpu_iowait_time(self, **kwargs):
return self._data.get("cpu.iowait.time")
@add_timestamp
def _get_cpu_user_percent(self, **kwargs):
return self._data.get("cpu.user.percent")
@add_timestamp
def _get_cpu_kernel_percent(self, **kwargs):
return self._data.get("cpu.kernel.percent")
@add_timestamp
def _get_cpu_idle_percent(self, **kwargs):
return self._data.get("cpu.idle.percent")
@add_timestamp
def _get_cpu_iowait_percent(self, **kwargs):
return self._data.get("cpu.iowait.percent")
@add_timestamp
def _get_cpu_percent(self, **kwargs):
return self._data.get("cpu.percent")
def _update_cpustat(self, **kwargs):
# Don't allow to call this function so frequently (<= 1 sec)
now = timeutils.utcnow()
if self._data.get("timestamp") is not None:
delta = now - self._data.get("timestamp")
if delta.seconds <= 1:
return
self._data = {}
self._data["timestamp"] = now
# Extract node's CPU statistics.
try:
stats = self.driver.get_host_cpu_stats()
self._data["cpu.user.time"] = stats["user"]
self._data["cpu.kernel.time"] = stats["kernel"]
self._data["cpu.idle.time"] = stats["idle"]
self._data["cpu.iowait.time"] = stats["iowait"]
self._data["cpu.frequency"] = stats["frequency"]
except (NotImplementedError, TypeError, KeyError) as ex:
LOG.exception(_("Not all properties needed are implemented "
"in the compute driver: %s"), ex)
raise exception.ResourceMonitorError(
monitor=self.__class__.__name__)
# The compute driver API returns the absolute values for CPU times.
# We compute the utilization percentages for each specific CPU time
# after calculating the delta between the current reading and the
# previous reading.
stats["total"] = (stats["user"] + stats["kernel"]
+ stats["idle"] + stats["iowait"])
cputime = float(stats["total"] - self._cpu_stats.get("total", 0))
perc = (stats["user"] - self._cpu_stats.get("user", 0)) / cputime
self._data["cpu.user.percent"] = perc
perc = (stats["kernel"] - self._cpu_stats.get("kernel", 0)) / cputime
self._data["cpu.kernel.percent"] = perc
perc = (stats["idle"] - self._cpu_stats.get("idle", 0)) / cputime
self._data["cpu.idle.percent"] = perc
perc = (stats["iowait"] - self._cpu_stats.get("iowait", 0)) / cputime
self._data["cpu.iowait.percent"] = perc
# Compute the current system-wide CPU utilization as a percentage.
used = stats["user"] + stats["kernel"] + stats["iowait"]
prev_used = (self._cpu_stats.get("user", 0)
+ self._cpu_stats.get("kernel", 0)
+ self._cpu_stats.get("iowait", 0))
perc = (used - prev_used) / cputime
self._data["cpu.percent"] = perc
self._cpu_stats = stats.copy()

View File

@@ -1399,6 +1399,10 @@ class ImageDownloadModuleConfigurationError(ImageDownloadModuleError):
msg_fmt = _("The module %(module)s is misconfigured: %(reason)s.")
class ResourceMonitorError(NovaException):
msg_fmt = _("Error when creating resource monitor: %(monitor)s")
class PciDeviceWrongAddressFormat(NovaException):
msg_fmt = _("The PCI address %(address)s has an incorrect format.")

View File

View File

@@ -0,0 +1,88 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2013 Intel Corporation
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Tests for Compute Driver CPU resource monitor."""
import fixtures
from nova.compute import manager
from nova.compute.monitors import virt
from nova import test
class FakeLibvirt(object):
def getCPUStats(self, cpuNum, flag):
if cpuNum < 2:
return {'kernel': 5664160000000L,
'idle': 1592705190000000L,
'user': 26728850000000L,
'iowait': 6121490000000L}
else:
raise Exception("invalid argument: Invalid cpu number")
def getInfo(self):
return [0, 0, 0, 800, 0, 0, 0, 0]
class ComputeDriverCPUMonitorTestCase(test.TestCase):
def setUp(self):
super(ComputeDriverCPUMonitorTestCase, self).setUp()
self.flags(compute_driver='nova.virt.libvirt.LibvirtDriver')
self.useFixture(fixtures.MonkeyPatch(
'nova.virt.libvirt.driver.LibvirtDriver._conn',
FakeLibvirt()))
cm = manager.ComputeManager()
self.monitor = virt.ComputeDriverCPUMonitor(cm)
def test_get_metric_names(self):
names = self.monitor.get_metric_names()
self.assertEquals(10, len(names))
self.assertIn("cpu.frequency", names)
self.assertIn("cpu.user.time", names)
self.assertIn("cpu.kernel.time", names)
self.assertIn("cpu.idle.time", names)
self.assertIn("cpu.iowait.time", names)
self.assertIn("cpu.user.percent", names)
self.assertIn("cpu.kernel.percent", names)
self.assertIn("cpu.idle.percent", names)
self.assertIn("cpu.iowait.percent", names)
self.assertIn("cpu.percent", names)
def test_get_metrics(self):
metrics_raw = self.monitor.get_metrics()
names = self.monitor.get_metric_names()
metrics = {}
for metric in metrics_raw:
self.assertIn(metric['name'], names)
metrics[metric['name']] = metric['value']
self.assertEqual(metrics["cpu.frequency"], 800)
self.assertEqual(metrics["cpu.user.time"], 26728850000000L)
self.assertEqual(metrics["cpu.kernel.time"], 5664160000000L)
self.assertEqual(metrics["cpu.idle.time"], 1592705190000000L)
self.assertEqual(metrics["cpu.iowait.time"], 6121490000000L)
self.assertTrue(metrics["cpu.user.percent"] <= 1
and metrics["cpu.user.percent"] >= 0)
self.assertTrue(metrics["cpu.kernel.percent"] <= 1
and metrics["cpu.kernel.percent"] >= 0)
self.assertTrue(metrics["cpu.idle.percent"] <= 1
and metrics["cpu.idle.percent"] >= 0)
self.assertTrue(metrics["cpu.iowait.percent"] <= 1
and metrics["cpu.iowait.percent"] >= 0)
self.assertTrue(metrics["cpu.percent"] <= 1
and metrics["cpu.percent"] >= 0)

View File

@@ -0,0 +1,73 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2013 Intel Corporation
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Tests for resource monitors."""
from nova.compute import monitors
from nova import test
class FakeResourceMonitor(monitors.ResourceMonitorBase):
def get_metric_names(self):
return ["foo.metric1", "foo.metric2"]
def get_metrics(self):
data = []
data.append(self._populate('foo.metric1', '1000'))
data.append(self._populate('foo.metric2', '99.999'))
return data
class ResourceMonitorBaseTestCase(test.TestCase):
def setUp(self):
super(ResourceMonitorBaseTestCase, self).setUp()
self.monitor = FakeResourceMonitor(None)
def test_get_metric_names(self):
names = self.monitor.get_metric_names()
self.assertEquals(2, len(names))
self.assertIn("foo.metric1", names)
self.assertIn("foo.metric2", names)
def test_get_metrics(self):
metrics_raw = self.monitor.get_metrics()
names = self.monitor.get_metric_names()
metrics = {}
for metric in metrics_raw:
self.assertIn(metric['name'], names)
metrics[metric['name']] = metric['value']
self.assertEquals(metrics["foo.metric1"], '1000')
self.assertEquals(metrics["foo.metric2"], '99.999')
class ResourceMonitorsTestCase(test.TestCase):
"""Test case for monitors."""
def setUp(self):
super(ResourceMonitorsTestCase, self).setUp()
monitor_handler = monitors.ResourceMonitorHandler()
classes = monitor_handler.get_matching_classes(
['nova.compute.monitors.all_monitors'])
self.class_map = {}
for cls in classes:
self.class_map[cls.__name__] = cls
def test_all_monitors(self):
# Double check at least a couple of known monitors exist
self.assertIn('ComputeDriverCPUMonitor', self.class_map)

View File

@@ -111,6 +111,8 @@ VIR_CRED_EXTERNAL = 9
VIR_MIGRATE_PEER2PEER = 2
VIR_MIGRATE_UNDEFINE_SOURCE = 16
VIR_NODE_CPU_STATS_ALL_CPUS = -1
# libvirtError enums
# (Intentionally different from what's in libvirt. We do this to check,
# that consumers of the library are using the symbolic names rather than
@@ -885,6 +887,15 @@ class Connection(object):
return VIR_CPU_COMPARE_IDENTICAL
def getCPUStats(self, cpuNum, flag):
if cpuNum < 2:
return {'kernel': 5664160000000L,
'idle': 1592705190000000L,
'user': 26728850000000L,
'iowait': 6121490000000L}
else:
raise libvirtError("invalid argument: Invalid cpu number")
def nwfilterLookupByName(self, name):
try:
return self._nwfilters[name]

View File

@@ -580,6 +580,19 @@ class _VirtDriverTestCase(_FakeDriverBackendTestCase):
'myhostname')
self._check_available_resouce_fields(available_resource)
@catch_notimplementederror
def _check_host_cpu_status_fields(self, host_cpu_status):
self.assertIn('kernel', host_cpu_status)
self.assertIn('idle', host_cpu_status)
self.assertIn('user', host_cpu_status)
self.assertIn('iowait', host_cpu_status)
self.assertIn('frequency', host_cpu_status)
@catch_notimplementederror
def test_get_host_cpu_stats(self):
host_cpu_status = self.connection.get_host_cpu_stats()
self._check_host_cpu_status_fields(host_cpu_status)
@catch_notimplementederror
def test_set_host_enabled(self):
self.connection.set_host_enabled('a useless argument?', True)

View File

@@ -802,6 +802,25 @@ class ComputeDriver(object):
"""
raise NotImplementedError()
def get_host_cpu_stats(self):
"""Get the currently known host CPU stats.
:returns: a dict containing the CPU stat info, eg:
{'kernel': kern,
'idle': idle,
'user': user,
'iowait': wait,
'frequency': freq},
where kern and user indicate the cumulative CPU time
(nanoseconds) spent by kernel and user processes
respectively, idle indicates the cumulative idle CPU time
(nanoseconds), wait indicates the cumulative I/O wait CPU
time (nanoseconds), since the host is booting up; freq
indicates the current CPU frequency (MHz). All values are
long integers.
"""
raise NotImplementedError()
def block_stats(self, instance_name, disk_id):
"""
Return performance counters associated with the given disk_id on the

View File

@@ -291,6 +291,14 @@ class FakeDriver(driver.ComputeDriver):
volusage = []
return volusage
def get_host_cpu_stats(self):
stats = {'kernel': 5664160000000L,
'idle': 1592705190000000L,
'user': 26728850000000L,
'iowait': 6121490000000L}
stats['frequency'] = 800
return stats
def block_stats(self, instance_name, disk_id):
return [0L, 0L, 0L, 0L, None]

View File

@@ -4550,6 +4550,15 @@ class LibvirtDriver(driver.ComputeDriver):
"""
return self.host_state.get_host_stats(refresh=refresh)
def get_host_cpu_stats(self):
"""Return the current CPU state of the host."""
# Extract node's CPU statistics.
stats = self._conn.getCPUStats(libvirt.VIR_NODE_CPU_STATS_ALL_CPUS, 0)
# getInfo() returns various information about the host node
# No. 3 is the expected CPU frequency.
stats["frequency"] = self._conn.getInfo()[3]
return stats
def get_host_uptime(self, host):
"""Returns the result of calling "uptime"."""
#NOTE(dprince): host seems to be ignored for this call and in