From 7d44c38c90483d7177ede9eaca0223ed98feea19 Mon Sep 17 00:00:00 2001 From: Salman Rana Date: Tue, 11 Feb 2025 03:06:31 -0500 Subject: [PATCH] Introduce dccertmon service This commit introduces dccertmon, a new managed service for DC certificate auditing and management. Currently, platform cert management, DC cert management, and subcloud cert auditing are coupled into a single platform service (certmon). To meet the requirements of DC scalability and portability, DC specific functionality must be decoupled. These changes lay the groundwork for the new service, by: - Creating the necessary service files. - Introducing configs for the service. - Declaring high level methods (Skeleton - lifecycle and manager) DC-specific functionality will be migrated to this dccertmon service and optimized in subsequent changes. Non-DC cert management will continue to be handled by certmon. Overall, this commit introduces: - The OCF file necessary for high availability management of the dccertmon service by SM. - Package configurations to build the service (Package: distributedcloud-dccertmon). - Lifecycle manager for a running DC cert monitor service. - Skeleton/base service application logic - CertificateMonitorManager. - RPC notification handlers for subcloud online/managed. - Configuration for the log folders and log rotation. The logs will be available in /var/log/dccertmon/dccertmon.log. These changes are part of a set of commits to introduce the dccertmon service: [1] https://review.opendev.org/c/starlingx/ha/+/941205 [2] https://review.opendev.org/c/starlingx/stx-puppet/+/941208 Test Plan: - PASS: Build dccertmon package - PASS: Install and bootstrap system with custom ISO containing the newly created dccertmon package - PASS: Verify that the dccertmon.service is loaded - PASS: Verify dccertmon is being properly logged to the correct folder. - PASS: Check logged messages and verify execution of - Cert Watcher thread - Task Executor (Audit thread) - Periodic tasks running at expected intervals - PASS: Configure and provision the service using SM and verify it has correctly started and can be restarted with 'sm-restart'. - PASS: Tox checks running on dccertmon Note: This commit has been tested alongside the related changes and their respective test plans. [1][2] Story: 2011311 Task: 51663 Change-Id: Ic23d8d13e4b292cf0508d23eaae99b8e07f36d31 Signed-off-by: Salman Rana --- debian_iso_image.inc | 1 + distributedcloud/.testr.conf | 1 + distributedcloud/dccertmon/__init__.py | 10 + distributedcloud/dccertmon/cmd/__init__.py | 0 distributedcloud/dccertmon/cmd/cert_mon.py | 54 +++ distributedcloud/dccertmon/common/__init__.py | 0 .../common/certificate_monitor_manager.py | 95 ++++++ distributedcloud/dccertmon/common/config.py | 132 +++++++ distributedcloud/dccertmon/common/service.py | 83 +++++ distributedcloud/dccertmon/common/utils.py | 26 ++ distributedcloud/dccertmon/common/watcher.py | 23 ++ .../dccertmon/config-generator.conf | 9 + distributedcloud/dcmanager/common/consts.py | 3 +- distributedcloud/dcmanager/rpc/client.py | 9 +- distributedcloud/debian/deb_folder/control | 8 + .../distributedcloud-dccertmon.dirs | 1 + .../distributedcloud-dccertmon.install | 5 + .../distributedcloud-dccommon.tmpfiles | 1 + distributedcloud/debian/deb_folder/rules | 11 +- .../etc/dccertmon/README-dccertmon.conf.txt | 4 + distributedcloud/etc/dccertmon/policy.json | 5 + distributedcloud/files/dccertmon.conf | 1 + .../files/distcloud-logrotate.conf | 15 + distributedcloud/files/distcloud-syslog.conf | 4 + distributedcloud/ocf/dccertmon | 323 ++++++++++++++++++ distributedcloud/setup.cfg | 3 + distributedcloud/tox.ini | 2 +- 27 files changed, 820 insertions(+), 9 deletions(-) create mode 100644 distributedcloud/dccertmon/__init__.py create mode 100644 distributedcloud/dccertmon/cmd/__init__.py create mode 100644 distributedcloud/dccertmon/cmd/cert_mon.py create mode 100644 distributedcloud/dccertmon/common/__init__.py create mode 100644 distributedcloud/dccertmon/common/certificate_monitor_manager.py create mode 100644 distributedcloud/dccertmon/common/config.py create mode 100644 distributedcloud/dccertmon/common/service.py create mode 100644 distributedcloud/dccertmon/common/utils.py create mode 100644 distributedcloud/dccertmon/common/watcher.py create mode 100644 distributedcloud/dccertmon/config-generator.conf create mode 100644 distributedcloud/debian/deb_folder/distributedcloud-dccertmon.dirs create mode 100644 distributedcloud/debian/deb_folder/distributedcloud-dccertmon.install create mode 100644 distributedcloud/etc/dccertmon/README-dccertmon.conf.txt create mode 100755 distributedcloud/etc/dccertmon/policy.json create mode 100644 distributedcloud/files/dccertmon.conf create mode 100644 distributedcloud/ocf/dccertmon diff --git a/debian_iso_image.inc b/debian_iso_image.inc index 0eb98922f..8b186e353 100644 --- a/debian_iso_image.inc +++ b/debian_iso_image.inc @@ -6,5 +6,6 @@ distributedcloud-dcdbsync distributedcloud-dcmanager distributedcloud-dcorch distributedcloud-dcagent +distributedcloud-dccertmon python3-redfish diff --git a/distributedcloud/.testr.conf b/distributedcloud/.testr.conf index 98e6ba521..e48ec65ab 100644 --- a/distributedcloud/.testr.conf +++ b/distributedcloud/.testr.conf @@ -10,6 +10,7 @@ test_command=OS_STDOUT_CAPTURE=${OS_STDOUT_CAPTURE:-1} ${PYTHON} -m subunit.run discover -s dcmanager $LISTOPT $IDOPTION ${PYTHON} -m subunit.run discover -s dcorch $LISTOPT $IDOPTION ${PYTHON} -m subunit.run discover -s dcagent $LISTOPT $IDOPTION + ${PYTHON} -m subunit.run discover -s dccertmon $LISTOPT $IDOPTION test_id_option=--load-list $IDFILE test_list_option=--list test_run_concurrency=echo 5 diff --git a/distributedcloud/dccertmon/__init__.py b/distributedcloud/dccertmon/__init__.py new file mode 100644 index 000000000..1f6b411cf --- /dev/null +++ b/distributedcloud/dccertmon/__init__.py @@ -0,0 +1,10 @@ +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import pbr.version + + +__version__ = pbr.version.VersionInfo("distributedcloud").version_string() diff --git a/distributedcloud/dccertmon/cmd/__init__.py b/distributedcloud/dccertmon/cmd/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/distributedcloud/dccertmon/cmd/cert_mon.py b/distributedcloud/dccertmon/cmd/cert_mon.py new file mode 100644 index 000000000..2ca3dd3b9 --- /dev/null +++ b/distributedcloud/dccertmon/cmd/cert_mon.py @@ -0,0 +1,54 @@ +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +""" +DC Certificate Monitor Service +""" +import eventlet + +eventlet.monkey_patch() + +# pylint: disable=wrong-import-position +from oslo_config import cfg # noqa: E402 +from oslo_i18n import _lazy # noqa: E402 +from oslo_log import log as logging # noqa: E402 +from oslo_service import service # noqa: E402 + +from dccertmon.common import config # noqa: E402 +from dcmanager.common import messaging # noqa: E402 + +# pylint: enable=wrong-import-position + +_lazy.enable_lazy() + +LOG = logging.getLogger("dccertmon") +CONF = cfg.CONF + + +def main(): + config.generate_config() + logging.register_options(CONF) + CONF(project="dccertmon") + config.register_config_opts() + + logging.set_defaults() + logging.setup(CONF, "dccertmon") + messaging.setup() + + from dccertmon.common import service as dc_cert_mon + + srv = dc_cert_mon.CertificateMonitorService() + launcher = service.launch(cfg.CONF, srv) + + LOG.info("Starting...") + LOG.debug("Configuration:") + cfg.CONF.log_opt_values(LOG, logging.DEBUG) + + launcher.wait() + + +if __name__ == "__main__": + main() diff --git a/distributedcloud/dccertmon/common/__init__.py b/distributedcloud/dccertmon/common/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/distributedcloud/dccertmon/common/certificate_monitor_manager.py b/distributedcloud/dccertmon/common/certificate_monitor_manager.py new file mode 100644 index 000000000..35e7718a8 --- /dev/null +++ b/distributedcloud/dccertmon/common/certificate_monitor_manager.py @@ -0,0 +1,95 @@ +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import time + +import eventlet +import greenlet +from oslo_config import cfg +from oslo_log import log +from oslo_service import periodic_task + +from dccertmon.common import watcher + + +LOG = log.getLogger(__name__) +CONF = cfg.CONF + + +class CertificateMonitorManager(periodic_task.PeriodicTasks): + def __init__(self): + super(CertificateMonitorManager, self).__init__(CONF) + self.mon_thread = None + self.worker_thread = None + + def on_start(self): + LOG.info("Service Start - prepare for initial audit") + + def start_task_executor(self): + self.worker_thread = eventlet.greenthread.spawn(self.worker_task_loop) + self.on_start() + + def start_cert_watcher(self): + dc_monitor = None + while True: + try: + dc_monitor = watcher.DC_CertWatcher() + dc_monitor.initialize() + except Exception as e: + LOG.exception(e) + time.sleep(5) + else: + break + + # spawn monitor thread + self.mon_thread = eventlet.greenthread.spawn(self.monitor_cert_loop, dc_monitor) + + def stop_cert_watcher(self): + if self.mon_thread: + self.mon_thread.kill() + self.mon_thread.wait() + self.mon_thread = None + + def stop_task_executor(self): + if self.worker_thread: + self.worker_thread.kill() + self.worker_thread.wait() + self.worker_thread = None + + def worker_task_loop(self): + while True: + try: + self.run_periodic_tasks(context=None) + # TODO(srana): Reset sleep after proper implementation + time.sleep(60) + except greenlet.GreenletExit: + break + except Exception as e: + LOG.exception(e) + + def monitor_cert_loop(self, monitor): + while True: + # never exit until exit signal received + try: + monitor.start_watch(on_success=None, on_error=None) + except greenlet.GreenletExit: + break + except Exception: + # It shouldn't fall to here, but log and restart if it did + LOG.exception("Unexpected exception from start_watch") + time.sleep(1) + + @periodic_task.periodic_task(spacing=CONF.dccertmon.audit_interval) + def audit_sc_cert_start(self, context): + LOG.info("periodic_task: audit_sc_cert_start") + + @periodic_task.periodic_task(spacing=5) + def audit_sc_cert_task(self, context): + LOG.info("periodic_task: audit_sc_cert_task") + + @periodic_task.periodic_task(spacing=CONF.dccertmon.retry_interval) + def retry_monitor_task(self, context): + LOG.info("periodic_task: retry_monitor_task") diff --git a/distributedcloud/dccertmon/common/config.py b/distributedcloud/dccertmon/common/config.py new file mode 100644 index 000000000..808bf01ef --- /dev/null +++ b/distributedcloud/dccertmon/common/config.py @@ -0,0 +1,132 @@ +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import configparser +import os + +import keyring +from oslo_config import cfg + +from dccommon import consts as dccommon_consts +from dcmanager.common import utils + +CONF = cfg.CONF + +logging_default_format_string = ( + "%(process)d %(levelname)s %(name)s [-] %(instance)s%(message)s" +) + +config_values = { + "keystone_authtoken": { + "auth_url": "http://controller.internal:5000", + "auth_uri": "http://controller.internal:5000", + "auth_type": "password", + "project_name": "services", + "username": "sysinv", + "password": "None", + "user_domain_name": "Default", + "project_domain_name": "Default", + "interface": "internal", + "region_name": "None", + }, + "DEFAULT": { + "syslog_log_facility": "local4", + "use_syslog": "True", + "debug": "False", + "logging_default_format_string": logging_default_format_string, + "logging_debug_format_suffix": "%(pathname)s:%(lineno)d", + "auth_strategy": "keystone", + "transport_url": "None", + }, + "dccertmon": { + "retry_interval": "600", + "max_retry": "14", + "audit_interval": "86400", + "startup_audit_all": "False", + "network_retry_interval": "180", + "network_max_retry": "30", + "audit_batch_size": "40", + "audit_greenpool_size": "20", + "certificate_timeout_secs": "5", + }, + "endpoint_cache": { + "auth_plugin": "password", + "username": "dcmanager", + "password": "None", + "project_name": "services", + "user_domain_name": "Default", + "project_domain_name": "Default", + "http_connect_timeout": "15", + "auth_uri": "http://controller.internal:5000/v3", + }, +} + +common_opts = [cfg.StrOpt("host", default="localhost", help="hostname of the machine")] + +dc_cert_mon_opts = [ + cfg.IntOpt( + "audit_interval", + default=86400, # 24 hours + help="Interval to run certificate audit", + ), + cfg.IntOpt( + "retry_interval", + default=10 * 60, # retry every 10 minutes + help="Interval to reattempt accessing external system if failure occurred", + ), +] + + +def register_config_opts(): + CONF.register_opts(common_opts) + CONF.register_opts(dc_cert_mon_opts, "dccertmon") + + +def override_config_values(): + rabbit_auth_password = keyring.get_password("amqp", "rabbit") + + config_values["keystone_authtoken"]["region_name"] = utils.get_region_name( + "http://controller.internal:6385" + ) + config_values["endpoint_cache"]["password"] = keyring.get_password( + "dcmanager", dccommon_consts.SERVICES_USER_NAME + ) + config_values["keystone_authtoken"]["password"] = keyring.get_password( + "sysinv", dccommon_consts.SERVICES_USER_NAME + ) + config_values["DEFAULT"][ + "transport_url" + ] = f"rabbit://guest:{rabbit_auth_password}@controller.internal:5672" + + +def create_conf_file(): + output_dir = "/etc/dccertmon" + output_file = os.path.join(output_dir, "dccertmon.conf") + + os.makedirs(output_dir, exist_ok=True) + + config = configparser.RawConfigParser() + + # Populate the config parser with values + for section, options in config_values.items(): + config[section] = options + + with open(output_file, "w") as f: + config.write(f) + + os.chmod(output_file, 0o600) + + +def generate_config(): + # Set dynamic values (e.g., passwords, urls, etc) + override_config_values() + # Create service conf file + create_conf_file() + + +def list_opts(): + yield "dccertmon", dc_cert_mon_opts + yield None, common_opts diff --git a/distributedcloud/dccertmon/common/service.py b/distributedcloud/dccertmon/common/service.py new file mode 100644 index 000000000..13b2f2803 --- /dev/null +++ b/distributedcloud/dccertmon/common/service.py @@ -0,0 +1,83 @@ +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from oslo_config import cfg +from oslo_log import log as logging +import oslo_messaging +from oslo_service import service + +from dccertmon.common.certificate_monitor_manager import CertificateMonitorManager +from dccertmon.common import utils +from dcmanager.common import consts +from dcmanager.common import messaging as rpc_messaging + +CONF = cfg.CONF +LOG = logging.getLogger(__name__) + + +class CertificateMonitorService(service.Service): + """Lifecycle manager for a running DC cert monitor service.""" + + def __init__(self): + super(CertificateMonitorService, self).__init__() + self.rpc_api_version = consts.RPC_API_VERSION + self.topic = consts.TOPIC_DC_NOTIFICATION + # TODO(srana): Refactor DC role usage due to deprecation. + self.dc_role = utils.DC_ROLE_UNDETECTED + self.manager = CertificateMonitorManager() + self._rpc_server = None + self.target = None + + def start(self): + LOG.info("Starting %s", self.__class__.__name__) + super(CertificateMonitorService, self).start() + self._get_dc_role() + + self.manager.start_cert_watcher() + self.manager.start_task_executor() + + if self.dc_role == utils.DC_ROLE_SYSTEMCONTROLLER: + self.target = oslo_messaging.Target( + version=self.rpc_api_version, server=CONF.host, topic=self.topic + ) + self._rpc_server = rpc_messaging.get_rpc_server(self.target, self) + self._rpc_server.start() + + def stop(self): + LOG.info("Stopping %s", self.__class__.__name__) + + if self.dc_role == utils.DC_ROLE_SYSTEMCONTROLLER: + self._stop_rpc_server() + + self.manager.stop_cert_watcher() + self.manager.stop_task_executor() + super(CertificateMonitorService, self).stop() + + def _stop_rpc_server(self): + if self._rpc_server: + try: + self._rpc_server.stop() + self._rpc_server.wait() + LOG.info("Engine service stopped successfully") + except Exception as ex: + LOG.error("Failed to stop engine service: %s" % ex) + LOG.exception(ex) + + def _get_dc_role(self): + # TODO(srana): Update after migrating from certmon + return utils.DC_ROLE_SYSTEMCONTROLLER + + def subcloud_online(self, context, subcloud_name=None): + """TODO(srana): Trigger a subcloud online audit""" + LOG.info("%s is online." % subcloud_name) + + def subcloud_managed(self, context, subcloud_name=None): + """TODO(srana): Trigger a subcloud audit""" + LOG.info("%s is managed." % subcloud_name) + + def subcloud_sysinv_endpoint_update(self, ctxt, subcloud_name, endpoint): + """TODO(srana): Update sysinv endpoint of dc token cache""" + LOG.info("Update subcloud: %s sysinv endpoint" % subcloud_name) diff --git a/distributedcloud/dccertmon/common/utils.py b/distributedcloud/dccertmon/common/utils.py new file mode 100644 index 000000000..d9482ea85 --- /dev/null +++ b/distributedcloud/dccertmon/common/utils.py @@ -0,0 +1,26 @@ +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# TODO(srana): Refactor DC role usage due to deprecation. +DC_ROLE_UNDETECTED = "unknown" +DC_ROLE_SUBCLOUD = "subcloud" +DC_ROLE_SYSTEMCONTROLLER = "systemcontroller" + +DC_ROLE_TIMEOUT_SECONDS = 180 +DC_ROLE_DELAY_SECONDS = 5 + +INVALID_SUBCLOUD_AUDIT_DEPLOY_STATES = [ + # Secondary subclouds should not be audited as they are expected + # to be managed by a peer system controller (geo-redundancy feat.) + "create-complete", + "create-failed", + "pre-rehome", + "rehome-failed", + "rehome-pending", + "rehoming", + "secondary", + "secondary-failed", +] diff --git a/distributedcloud/dccertmon/common/watcher.py b/distributedcloud/dccertmon/common/watcher.py new file mode 100644 index 000000000..06df15a9d --- /dev/null +++ b/distributedcloud/dccertmon/common/watcher.py @@ -0,0 +1,23 @@ +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import time + +from oslo_log import log + +LOG = log.getLogger(__name__) + + +class DC_CertWatcher(object): + def __init__(self): + pass + + def initialize(self): + LOG.info("initialize DC_CertWatcher") + + def start_watch(self, on_success, on_error): + LOG.info("DC_CertWatcher start_watch") + time.sleep(60) diff --git a/distributedcloud/dccertmon/config-generator.conf b/distributedcloud/dccertmon/config-generator.conf new file mode 100644 index 000000000..1d296a5c3 --- /dev/null +++ b/distributedcloud/dccertmon/config-generator.conf @@ -0,0 +1,9 @@ +[DEFAULT] +output_file = etc/dccertmon/dccertmon.conf.sample +wrap_width = 79 +namespace = dccertmon.cmd.cert_mon +namespace = oslo.messaging +namespace = oslo.middleware +namespace = oslo.log +namespace = oslo.service.service +namespace = oslo.service.periodic_task diff --git a/distributedcloud/dcmanager/common/consts.py b/distributedcloud/dcmanager/common/consts.py index 765166ecd..486465d9d 100644 --- a/distributedcloud/dcmanager/common/consts.py +++ b/distributedcloud/dcmanager/common/consts.py @@ -1,5 +1,5 @@ # Copyright (c) 2016 Ericsson AB. -# Copyright (c) 2017-2024 Wind River Systems, Inc. +# Copyright (c) 2017-2025 Wind River Systems, Inc. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -23,6 +23,7 @@ TOPIC_DC_MANAGER_STATE = "dcmanager-state" TOPIC_DC_MANAGER_AUDIT = "dcmanager-audit" TOPIC_DC_MANAGER_AUDIT_WORKER = "dcmanager-audit-worker" TOPIC_DC_MANAGER_ORCHESTRATOR = "dcmanager-orchestrator" +TOPIC_DC_NOTIFICATION = "DCMANAGER-NOTIFICATION" CERTS_VAULT_DIR = "/opt/dc-vault/certs" PATCH_VAULT_DIR = "/opt/dc-vault/patches" diff --git a/distributedcloud/dcmanager/rpc/client.py b/distributedcloud/dcmanager/rpc/client.py index 8edf21bf1..5e6ba5264 100644 --- a/distributedcloud/dcmanager/rpc/client.py +++ b/distributedcloud/dcmanager/rpc/client.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017-2024 Wind River Systems, Inc. +# Copyright (c) 2017-2025 Wind River Systems, Inc. # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -468,12 +468,9 @@ class DCManagerNotifications(RPCClient): 1.0 - Initial version """ - DCMANAGER_RPC_API_VERSION = "1.0" - TOPIC_DC_NOTIFICIATION = "DCMANAGER-NOTIFICATION" - def __init__(self, timeout=None): super(DCManagerNotifications, self).__init__( - timeout, self.TOPIC_DC_NOTIFICIATION, self.DCMANAGER_RPC_API_VERSION + timeout, consts.TOPIC_DC_NOTIFICATION, consts.RPC_API_VERSION ) def subcloud_online(self, ctxt, subcloud_name): @@ -495,5 +492,5 @@ class DCManagerNotifications(RPCClient): endpoint=endpoint, ), fanout=True, - version=self.DCMANAGER_RPC_API_VERSION, + version=consts.RPC_API_VERSION, ) diff --git a/distributedcloud/debian/deb_folder/control b/distributedcloud/debian/deb_folder/control index c3de09324..bc9aee026 100644 --- a/distributedcloud/debian/deb_folder/control +++ b/distributedcloud/debian/deb_folder/control @@ -83,6 +83,14 @@ Depends: Description: Distributed Cloud DCAgent Distributed Cloud DCAgent +Package: distributedcloud-dccertmon +Architecture: any +Depends: + ${misc:Depends}, + ${python3:Depends} +Description: Distributed Cloud Certificate Monitor + Distributed Cloud Certificate Monitor + Package: distributedcloud-wheels Architecture: any Depends: diff --git a/distributedcloud/debian/deb_folder/distributedcloud-dccertmon.dirs b/distributedcloud/debian/deb_folder/distributedcloud-dccertmon.dirs new file mode 100644 index 000000000..d67e304e9 --- /dev/null +++ b/distributedcloud/debian/deb_folder/distributedcloud-dccertmon.dirs @@ -0,0 +1 @@ +var/log/dccertmon diff --git a/distributedcloud/debian/deb_folder/distributedcloud-dccertmon.install b/distributedcloud/debian/deb_folder/distributedcloud-dccertmon.install new file mode 100644 index 000000000..d5baa3b9d --- /dev/null +++ b/distributedcloud/debian/deb_folder/distributedcloud-dccertmon.install @@ -0,0 +1,5 @@ +etc/dccertmon/dccertmon.conf +usr/bin/dccertmon +usr/lib/ocf/resource.d/openstack/dccertmon* +usr/lib/python3/dist-packages/dccertmon/* +usr/lib/tmpfiles.d/dccertmon.conf diff --git a/distributedcloud/debian/deb_folder/distributedcloud-dccommon.tmpfiles b/distributedcloud/debian/deb_folder/distributedcloud-dccommon.tmpfiles index e1eb43e5c..a94e89bb4 100644 --- a/distributedcloud/debian/deb_folder/distributedcloud-dccommon.tmpfiles +++ b/distributedcloud/debian/deb_folder/distributedcloud-dccommon.tmpfiles @@ -1,3 +1,4 @@ +d /var/log/dccertmon 0755 root root - - d /var/log/dcagent 0755 root root - - d /var/log/dcdbsync 0755 root root - - d /var/log/dcmanager 0755 root root - - diff --git a/distributedcloud/debian/deb_folder/rules b/distributedcloud/debian/deb_folder/rules index ae9e274c2..6e13e013c 100755 --- a/distributedcloud/debian/deb_folder/rules +++ b/distributedcloud/debian/deb_folder/rules @@ -43,6 +43,7 @@ override_dh_install: install -p -D -m 644 files/dcorch.conf $(TMP_DIR)/dcorch.conf install -p -D -m 644 files/dcmanager.conf $(TMP_DIR)/dcmanager.conf install -p -D -m 644 files/dcagent.conf $(TMP_DIR)/dcagent.conf + install -p -D -m 644 files/dccertmon.conf $(TMP_DIR)/dccertmon.conf # install systemd unit files for optional second instance install -p -D -m 644 files/dcdbsync-openstack-api.service $(SYSTEMD_DIR)/dcdbsync-openstack-api.service @@ -67,6 +68,7 @@ override_dh_install: PYTHONPATH=. oslo-config-generator --config-file=./dcorch/config-generator.conf PYTHONPATH=. oslo-config-generator --config-file=./dcdbsync/config-generator.conf PYTHONPATH=. oslo-config-generator --config-file=./dcagent/config-generator.conf + PYTHONPATH=. oslo-config-generator --config-file=./dccertmon/config-generator.conf # install default config files oslo-config-generator \ @@ -89,6 +91,11 @@ override_dh_install: --output-file ./dcagent/dcagent.conf.sample install -p -D -m 640 ./dcagent/dcagent.conf.sample $(SYS_CONF_DIR)/dcagent/dcagent.conf + oslo-config-generator \ + --config-file ./dccertmon/config-generator.conf \ + --output-file ./dccertmon/dccertmon.conf.sample + install -p -D -m 640 ./dccertmon/dccertmon.conf.sample $(SYS_CONF_DIR)/dccertmon/dccertmon.conf + # install rvmc_install.py script install -d $(ROOT)/usr/local/bin/ install -p -D -m 700 scripts/rvmc_install.py $(ROOT)/usr/local/bin @@ -98,6 +105,7 @@ override_dh_install: rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcmanager/tests rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcorch/tests rm -rf $(ROOT)/usr/lib/python3/dist-packages/dcagent/tests + rm -rf $(ROOT)/usr/lib/python3/dist-packages/dccertmon/tests dh_install @@ -109,7 +117,8 @@ override_dh_fixperms: --exclude etc/dcdbsync/dcdbsync.conf \ --exclude etc/dcmanager/dcmanager.conf \ --exclude etc/dcorch/dcorch.conf \ - --exclude etc/dcagent/dcagent.conf + --exclude etc/dcagent/dcagent.conf \ + --exclude etc/dccertmon/dccertmon.conf execute_after_dh_fixperms: # forcing 600 for /var/opt/dc/ansible diff --git a/distributedcloud/etc/dccertmon/README-dccertmon.conf.txt b/distributedcloud/etc/dccertmon/README-dccertmon.conf.txt new file mode 100644 index 000000000..9e62d759a --- /dev/null +++ b/distributedcloud/etc/dccertmon/README-dccertmon.conf.txt @@ -0,0 +1,4 @@ +To generate the sample dccertmon.conf file, run the following +command from the top level of the dccertmon directory: + +tox -egenconfig diff --git a/distributedcloud/etc/dccertmon/policy.json b/distributedcloud/etc/dccertmon/policy.json new file mode 100755 index 000000000..a102e75cf --- /dev/null +++ b/distributedcloud/etc/dccertmon/policy.json @@ -0,0 +1,5 @@ +{ + "context_is_admin": "role:admin", + "admin_or_owner": "is_admin:True or project_id:%(project_id)s", + "default": "rule:admin_or_owner" +} diff --git a/distributedcloud/files/dccertmon.conf b/distributedcloud/files/dccertmon.conf new file mode 100644 index 000000000..364211ac0 --- /dev/null +++ b/distributedcloud/files/dccertmon.conf @@ -0,0 +1 @@ +d /var/run/dccertmon 0755 root root - diff --git a/distributedcloud/files/distcloud-logrotate.conf b/distributedcloud/files/distcloud-logrotate.conf index 910e1f2a8..24a9dc865 100644 --- a/distributedcloud/files/distcloud-logrotate.conf +++ b/distributedcloud/files/distcloud-logrotate.conf @@ -91,6 +91,21 @@ endscript } +/var/log/dccertmon/*.log +{ + nodateext + size 20M + start 1 + rotate 20 + missingok + notifempty + compress + sharedscripts + postrotate + systemctl reload syslog-ng > /dev/null 2>&1 || true + endscript +} + /var/log/dcmanager/ansible/*.log { nodateext diff --git a/distributedcloud/files/distcloud-syslog.conf b/distributedcloud/files/distcloud-syslog.conf index d1b9fe823..6a710a0b2 100644 --- a/distributedcloud/files/distcloud-syslog.conf +++ b/distributedcloud/files/distcloud-syslog.conf @@ -7,6 +7,7 @@ destination d_dcmanager_orch { file("/var/log/dcmanager/orchestrator.log" templa destination d_dcorch { file("/var/log/dcorch/dcorch.log" template(t_preformatted)); }; destination d_dcdbsync { file("/var/log/dcdbsync/dcdbsync.log" template(t_preformatted)); }; destination d_dcagent { file("/var/log/dcagent/dcagent.log" template(t_preformatted)); }; +destination d_dccertmon { file("/var/log/dccertmon/dccertmon.log" template(t_preformatted)); }; # Distributed Cloud Log Filters filter f_dcmanagermanager { facility(local4) and program(dcmanager-manager); }; @@ -22,6 +23,8 @@ filter f_dcdbsyncapi { facility(local4) and program(dcdbsync-api); }; filter f_dcagentapi { facility(local4) and program(dcagent-api); }; +filter f_dccertmon { facility(local4) and program(dccertmon); }; + # Distributed Cloud Log Path log {source(s_src); filter(f_dcmanagermanager); destination(d_dcmanager); }; log {source(s_src); filter(f_dcmanageraudit); destination(d_dcmanager_audit); }; @@ -32,4 +35,5 @@ log {source(s_src); filter(f_dcorchengine); destination(d_dcorch); }; log {source(s_src); filter(f_dcorchapiproxy); destination(d_dcorch); }; log {source(s_src); filter(f_dcdbsyncapi); destination(d_dcdbsync); }; log {source(s_src); filter(f_dcagentapi); destination(d_dcagent); }; +log {source(s_src); filter(f_dccertmon); destination(d_dccertmon); }; diff --git a/distributedcloud/ocf/dccertmon b/distributedcloud/ocf/dccertmon new file mode 100644 index 000000000..77ee98bd3 --- /dev/null +++ b/distributedcloud/ocf/dccertmon @@ -0,0 +1,323 @@ +#!/bin/sh +# OpenStack DC Certificate Monitor Service (dccertmon) +# +# Description: Manages a DC Certificate Monitor Service +# (dccertmon) process as an HA resource +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +# +# See usage() function below for more details ... +# +# OCF instance parameters: +# OCF_RESKEY_binary +# OCF_RESKEY_config +# OCF_RESKEY_user +# OCF_RESKEY_pid +# OCF_RESKEY_additional_parameters +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + +####################################################################### + +# Fill in some defaults if no values are specified + +OCF_RESKEY_binary_default="dccertmon" +OCF_RESKEY_config_default="/etc/dccertmon/dccertmon.conf" +OCF_RESKEY_user_default="root" +OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid" + +: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}} +: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}} +: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}} +: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}} + +####################################################################### + +usage() { + cat < + + +1.0 + + +Resource agent for the DC Certificate Monitor Service (dccertmon) + +Manages the DC Certificate Monitor +Service (dccertmon) + + + + +Location of the DC Certificate Monitor binary (dccertmon) + +DC Certificate Monitor binary (dccertmon) + + + + + +Location of the DC Certificate Monitor (dccertmon) configuration file + +DC Certificate Monitor (dccertmon registry) config file + + + + + +User running DC Certificate Monitor (dccertmon) + +DC Certificate Monitor (dccertmon) user + + + + + +The pid file to use for this DC Certificate Monitor (dccertmon) instance + +DC Certificate Monitor (dccertmon) pid file + + + + + +Additional parameters to pass on to the DC Certificate Monitor (dccertmon) + +Additional parameters for dccertmon + + + + + + + + + + + + + + +END +} + +####################################################################### +# Functions invoked by resource manager actions + +dccertmon_validate() { + local rc + + check_binary $OCF_RESKEY_binary + check_binary curl + check_binary tr + check_binary grep + check_binary cut + check_binary head + + # A config file on shared storage that is not available + # during probes is OK. + if [ ! -f $OCF_RESKEY_config ]; then + if ! ocf_is_probe; then + ocf_log err "Config $OCF_RESKEY_config doesn't exist" + return $OCF_ERR_INSTALLED + fi + ocf_log_warn "Config $OCF_RESKEY_config not available during a probe" + fi + + getent passwd $OCF_RESKEY_user >/dev/null 2>&1 + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "User $OCF_RESKEY_user doesn't exist" + return $OCF_ERR_INSTALLED + fi + + true +} + +dccertmon_status() { + local pid + local rc + + if [ ! -f $OCF_RESKEY_pid ]; then + ocf_log info "DC Certificate Monitor (dccertmon) is not running" + return $OCF_NOT_RUNNING + else + pid=`cat $OCF_RESKEY_pid` + fi + + ocf_run -warn kill -s 0 $pid + rc=$? + if [ $rc -eq 0 ]; then + return $OCF_SUCCESS + else + ocf_log info "Old PID file found, but DC Certificate Monitor (dccertmon) is not running" + rm -f $OCF_RESKEY_pid + return $OCF_NOT_RUNNING + fi +} + +dccertmon_monitor() { + local rc + + dccertmon_status + rc=$? + + # If status returned anything but success, return that immediately + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc + fi + + ocf_log debug "DC Certificate Monitor (dccertmon) monitor succeeded" + return $OCF_SUCCESS +} + +dccertmon_start() { + local rc + + dccertmon_status + rc=$? + if [ $rc -eq $OCF_SUCCESS ]; then + ocf_log info "DC Certificate Monitor (dccertmon) already running" + return $OCF_SUCCESS + fi + + # Change the working dir to /, to be sure it's accesible + cd / + + # run the actual dccertmon daemon. Don't use ocf_run as we're sending the tool's output + # straight to /dev/null anyway and using ocf_run would break stdout-redirection here. + su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \ + $OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid + + # Spin waiting for the server to come up. + # Let the CRM/LRM time us out if required + while true; do + dccertmon_monitor + rc=$? + [ $rc -eq $OCF_SUCCESS ] && break + if [ $rc -ne $OCF_NOT_RUNNING ]; then + ocf_log err "DC Certificate Monitor (dccertmon) start failed" + exit $OCF_ERR_GENERIC + fi + sleep 1 + done + + ocf_log info "DC Certificate Monitor (dccertmon) started" + return $OCF_SUCCESS +} + +dccertmon_confirm_stop() { + local my_bin + local my_processes + + my_binary=`which ${OCF_RESKEY_binary}` + my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)"` + + if [ -n "${my_processes}" ] + then + ocf_log info "About to SIGKILL the following: ${my_processes}" + pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python3) ${my_binary}([^\w-]|$)" + fi +} + +dccertmon_stop() { + local rc + local pid + + dccertmon_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + ocf_log info "DC Certificate Monitor (dccertmon) already stopped" + dccertmon_confirm_stop + return $OCF_SUCCESS + fi + + # Try SIGTERM + pid=`cat $OCF_RESKEY_pid` + ocf_run kill -s TERM $pid + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "DC Certificate Monitor (dccertmon) couldn't be stopped" + dccertmon_confirm_stop + exit $OCF_ERR_GENERIC + fi + + # stop waiting + shutdown_timeout=15 + if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then + shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5)) + fi + count=0 + while [ $count -lt $shutdown_timeout ]; do + dccertmon_status + rc=$? + if [ $rc -eq $OCF_NOT_RUNNING ]; then + break + fi + count=`expr $count + 1` + sleep 1 + ocf_log debug "DC Certificate Monitor (dccertmon) still hasn't stopped yet. Waiting ..." + done + + dccertmon_status + rc=$? + if [ $rc -ne $OCF_NOT_RUNNING ]; then + # SIGTERM didn't help either, try SIGKILL + ocf_log info "DC Certificate Monitor (dccertmon) failed to stop after ${shutdown_timeout}s \ + using SIGTERM. Trying SIGKILL ..." + ocf_run kill -s KILL $pid + fi + dccertmon_confirm_stop + + ocf_log info "DC Certificate Monitor (dccertmon) stopped" + + rm -f $OCF_RESKEY_pid + + return $OCF_SUCCESS +} + +####################################################################### + +case "$1" in + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; +esac + +# Anything except meta-data and help must pass validation +dccertmon_validate || exit $? + +# What kind of method was invoked? +case "$1" in + start) dccertmon_start;; + stop) dccertmon_stop;; + status) dccertmon_status;; + monitor) dccertmon_monitor;; + validate-all) ;; + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; +esac + diff --git a/distributedcloud/setup.cfg b/distributedcloud/setup.cfg index e6b609db8..8ad72bb80 100644 --- a/distributedcloud/setup.cfg +++ b/distributedcloud/setup.cfg @@ -26,6 +26,7 @@ packages = dcorch dcdbsync dcagent + dccertmon [entry_points] console_scripts = @@ -43,8 +44,10 @@ console_scripts = dcorch-api-proxy = dcorch.cmd.api_proxy:main dcdbsync-api = dcdbsync.cmd.api:main dcagent-api = dcagent.cmd.audit:main + dccertmon = dccertmon.cmd.cert_mon:main oslo.config.opts = + dccertmon.cmd.cert_mon.config = dccertmon.common.config:list_opts dcagent.common.config = dcagent.common.config:list_opts dcagent.common.api.api_config = dcagent.api.api_config:list_opts dcorch.common.config = dcorch.common.config:list_opts diff --git a/distributedcloud/tox.ini b/distributedcloud/tox.ini index 192971307..3deb66af5 100644 --- a/distributedcloud/tox.ini +++ b/distributedcloud/tox.ini @@ -91,7 +91,7 @@ setenv = {[testenv]setenv} PYTHONPATH = {toxinidir} commands = - pylint {posargs} dccommon dcdbsync dcmanager dcorch dcagent --rcfile=./.pylintrc + pylint {posargs} dccommon dcdbsync dcmanager dcorch dcagent dccertmon --rcfile=./.pylintrc [testenv:black] # This environment checks and displays the recommended changes by Black for formatting