Add threading backend implementation using cotyledon and standard threads

Introduces a new threading backend for oslo.service that provides a
complete alternative to the eventlet-based backend.

This backend includes:
	• A ThreadGroup and Thread implementation using the Python
	  threading module.
	• A reimplementation of all looping call utilities (
	  FixedIntervalLoopingCall, DynamicLoopingCall, etc.) using
	  threading.Event and futurist.ThreadPoolExecutor.
	• A service management layer based on cotyledon, including
	  Service,ServiceWrapper, Launcher, and ProcessLauncher.
	• A new ThreadingBackend class that exposes all components
	  needed for integration into oslo.service.

This change provides a robust and eventlet-free alternative backend while maintaining API compatibility.

Change-Id: Ia7518401ccdcc28afbffd4ffe5c3b772f3617001
This commit is contained in:
Daniel Bengtsson
2025-03-27 16:21:36 +01:00
parent d3f51fcd79
commit 5de514f1f8
23 changed files with 1395 additions and 201 deletions

View File

@@ -12,12 +12,14 @@
# implied. # implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import annotations from __future__ import annotations
import enum import enum
import importlib import importlib
import logging import logging
from typing import Any from typing import Any
from typing import Callable
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from . import exceptions from . import exceptions
@@ -35,31 +37,61 @@ class BackendType(enum.Enum):
DEFAULT_BACKEND_TYPE = BackendType.EVENTLET DEFAULT_BACKEND_TYPE = BackendType.EVENTLET
_cached_backend_type: BackendType | None = None # backend type _cached_backend_type: BackendType | None = None
_cached_backend: BaseBackend | None = None # current backend _cached_backend: BaseBackend | None = None
_cached_components: dict[str, Any] | None = None # backend components _cached_components: dict[str, Any] | None = None
# optional override hook
_backend_hook: Callable[[], BackendType] | None = None
def register_backend_default_hook(hook: Callable[[], BackendType]) -> None:
"""Register a hook that decides the default backend type.
This is used when no init_backend() call has been made, and
get_backend() is called. The hook will be invoked to determine
the backend type to use *only if* no backend has been selected yet.
Example:
def my_hook():
return BackendType.THREADING
register_backend_default_hook(my_hook)
:param hook: A callable that returns a BackendType
"""
global _backend_hook
_backend_hook = hook
def _reset_backend() -> None: def _reset_backend() -> None:
"""used by test functions to reset the selected backend""" """Used by test functions to reset the selected backend."""
global _cached_backend, _cached_components
global _cached_backend_type, _backend_hook
global _cached_backend, _cached_components, _cached_backend_type
_cached_backend_type = _cached_backend = _cached_components = None _cached_backend_type = _cached_backend = _cached_components = None
_backend_hook = None
def init_backend(type_: BackendType) -> None: def init_backend(type_: BackendType) -> None:
"""establish which backend will be used when get_backend() is called""" """Establish which backend will be used when get_backend() is called."""
global _cached_backend, _cached_components, _cached_backend_type global _cached_backend, _cached_components, _cached_backend_type
if _cached_backend_type is not None: if _cached_backend_type is not None:
raise exceptions.BackendAlreadySelected( if _cached_backend_type != type_:
f"The {_cached_backend_type.value!r} backend is already set up" raise exceptions.BackendAlreadySelected(
) f"Backend already set to {_cached_backend_type.value!r},"
f" cannot reinitialize with {type_.value!r}"
)
return # already initialized with same value; no-op
backend_name = type_.value backend_name = type_.value
LOG.info(f"Loading backend: {backend_name}") LOG.info(f"Loading backend: {backend_name}")
try: try:
module_name = f"oslo_service.backend.{backend_name}" module_name = f"oslo_service.backend.{backend_name}"
module = importlib.import_module(module_name) module = importlib.import_module(module_name)
@@ -81,17 +113,33 @@ def init_backend(type_: BackendType) -> None:
def get_backend() -> BaseBackend: def get_backend() -> BaseBackend:
"""Load backend dynamically based on the default constant.""" """Load backend dynamically based on the default constant or hook."""
global _cached_backend global _cached_backend
if _cached_backend is None: if _cached_backend is None:
init_backend(DEFAULT_BACKEND_TYPE) type_ = DEFAULT_BACKEND_TYPE
if _backend_hook is not None:
try:
type_ = _backend_hook()
LOG.info(f"Backend hook selected: {type_.value}")
except Exception:
LOG.exception(
"Backend hook raised an exception."
" Falling back to default.")
init_backend(type_)
assert _cached_backend is not None # nosec B101 : this is for typing assert _cached_backend is not None # nosec B101 : this is for typing
return _cached_backend return _cached_backend
def get_backend_type() -> BackendType | None:
"""Return the type of the current backend, or None if not set."""
return _cached_backend_type
def get_component(name: str) -> Any: def get_component(name: str) -> Any:
"""Retrieve a specific component from the backend.""" """Retrieve a specific component from the backend."""
global _cached_components global _cached_components
@@ -103,4 +151,5 @@ def get_component(name: str) -> Any:
if name not in _cached_components: if name not in _cached_components:
raise KeyError(f"Component {name!r} not found in backend.") raise KeyError(f"Component {name!r} not found in backend.")
return _cached_components[name] return _cached_components[name]

View File

@@ -15,17 +15,16 @@
from __future__ import annotations from __future__ import annotations
from abc import ABC import abc
from abc import abstractmethod
from typing import Any from typing import Any
from oslo_service.backend.exceptions import BackendComponentNotAvailable from oslo_service.backend.exceptions import BackendComponentNotAvailable
class BaseBackend(ABC): class BaseBackend(abc.ABC):
"""Base class for all backend implementations.""" """Base class for all backend implementations."""
@abstractmethod @abc.abstractmethod
def get_service_components(self) -> dict[str, Any]: def get_service_components(self) -> dict[str, Any]:
"""Return the backend components. """Return the backend components.
@@ -42,6 +41,29 @@ class BaseBackend(ABC):
pass pass
class ServiceBase(metaclass=abc.ABCMeta):
"""Base class for all services."""
@abc.abstractmethod
def start(self):
"""Start service."""
@abc.abstractmethod
def stop(self):
"""Stop service."""
@abc.abstractmethod
def wait(self):
"""Wait for service to complete."""
@abc.abstractmethod
def reset(self):
"""Reset service.
Called in case a service running in daemon mode receives SIGHUP.
"""
class ComponentRegistry: class ComponentRegistry:
"""A registry to manage access to backend components. """A registry to manage access to backend components.
@@ -49,12 +71,13 @@ class ComponentRegistry:
raises an explicit error, improving clarity and debugging. It acts raises an explicit error, improving clarity and debugging. It acts
as a centralized registry for backend components. as a centralized registry for backend components.
""" """
def __init__(self, components): def __init__(self, components):
"""Initialize the registry with a dictionary of components. """Initialize the registry with a dictionary of components.
:param components: A dictionary containing backend components, where :param components: A dictionary containing backend components,
the keys are component names and the values are where the keys are component names and the values are the
the respective implementations. respective implementations.
""" """
self._components = components self._components = components
@@ -62,8 +85,8 @@ class ComponentRegistry:
"""Retrieve a component by its key from the registry. """Retrieve a component by its key from the registry.
:param key: The name of the component to retrieve. :param key: The name of the component to retrieve.
:raises NotImplementedError: If the component is :raises NotImplementedError: If the component is not registered
not registered or available. or available.
:return: The requested component instance. :return: The requested component instance.
""" """
if key not in self._components or self._components[key] is None: if key not in self._components or self._components[key] is None:
@@ -75,7 +98,7 @@ class ComponentRegistry:
"""Check if a component is registered and available. """Check if a component is registered and available.
:param key: The name of the component to check. :param key: The name of the component to check.
:return: True if the component is registered :return: True if the component is registered and available,
and available, False otherwise. False otherwise.
""" """
return key in self._components and self._components[key] is not None return key in self._components and self._components[key] is not None

View File

View File

@@ -0,0 +1,16 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Restart method options
_LAUNCHER_RESTART_METHODS = ['reload', 'mutate']

View File

@@ -0,0 +1,33 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import errno
import io
import os
import signal
import sys
def is_daemon():
try:
return os.getpgrp() != os.tcgetpgrp(sys.stdout.fileno())
except io.UnsupportedOperation:
return True
except OSError as err:
return err.errno == errno.ENOTTY or False
def is_sighup_and_daemon(signo, signal_handler):
return (signal_handler.is_signal_supported('SIGHUP') and
signo == signal.SIGHUP and is_daemon())

View File

@@ -0,0 +1,38 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import signal
def get_signal_mappings(ignore=('SIG_DFL', 'SIG_IGN')):
signals_by_name = {
name: getattr(signal, name)
for name in dir(signal)
if name.startswith('SIG') and name not in ignore
}
signals_to_name = {v: k for k, v in signals_by_name.items()}
return signals_by_name, signals_to_name
class SignalExit(SystemExit):
"""Raised to indicate a signal-based exit.
This exception is commonly raised when the process receives a termination
signal (e.g., SIGTERM, SIGINT). The signal number is stored in `signo`.
"""
def __init__(self, signo, exccode=1):
super().__init__(exccode)
self.signo = signo

View File

@@ -0,0 +1,26 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_concurrency import lockutils
class Singleton(type):
_instances = {}
_semaphores = lockutils.Semaphores()
def __call__(cls, *args, **kwargs):
with lockutils.lock('singleton_lock', semaphores=cls._semaphores):
if cls not in cls._instances:
cls._instances[cls] = super().__call__(*args, **kwargs)
return cls._instances[cls]

View File

@@ -0,0 +1,23 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_service._i18n import _
from oslo_service.backend.base import ServiceBase
def check_service_base(service):
if not isinstance(service, ServiceBase):
raise TypeError(
_("Service %(service)s must be an instance of %(base)s!")
% {'service': service, 'base': ServiceBase})

View File

@@ -15,6 +15,9 @@
from oslo_service.backend.base import BaseBackend from oslo_service.backend.base import BaseBackend
from oslo_service.backend.common import daemon_utils
from oslo_service.backend.common import signal_utils
from oslo_service.backend.common import singleton
from oslo_service.backend.eventlet import loopingcall from oslo_service.backend.eventlet import loopingcall
from oslo_service.backend.eventlet import service from oslo_service.backend.eventlet import service
from oslo_service.backend.eventlet import threadgroup from oslo_service.backend.eventlet import threadgroup
@@ -37,8 +40,8 @@ class EventletBackend(BaseBackend):
"Services": service.Services, "Services": service.Services,
"ServiceWrapper": service.ServiceWrapper, "ServiceWrapper": service.ServiceWrapper,
"SignalHandler": service.SignalHandler, "SignalHandler": service.SignalHandler,
"SignalExit": service.SignalExit, "SignalExit": signal_utils.SignalExit,
"Singleton": service.Singleton, "Singleton": singleton.Singleton,
# Looping call-related classes # Looping call-related classes
"LoopingCallBase": loopingcall.LoopingCallBase, "LoopingCallBase": loopingcall.LoopingCallBase,
@@ -57,6 +60,6 @@ class EventletBackend(BaseBackend):
# Functions # Functions
"launch": service.launch, "launch": service.launch,
"_is_daemon": service._is_daemon, "_is_daemon": daemon_utils.is_daemon,
"_is_sighup_and_daemon": service._is_sighup_and_daemon, "_is_sighup_and_daemon": daemon_utils.is_sighup_and_daemon,
} }

View File

@@ -37,13 +37,13 @@ LOG = logging.getLogger(__name__)
class LoopingCallDone(Exception): class LoopingCallDone(Exception):
"""Exception to break out and stop a LoopingCallBase. """Exception to break out and stop a LoopingCallBase.
The poll-function passed to LoopingCallBase can raise this exception to The poll-function passed to LoopingCallBase can raise this exception
break out of the loop normally. This is somewhat analogous to to break out of the loop normally. This is somewhat analogous to
StopIteration. StopIteration.
An optional return-value can be included as the argument to the exception; An optional return-value can be included as the argument to the
this return-value will be returned by LoopingCallBase.wait() exception; this return-value will be returned by
LoopingCallBase.wait()
""" """
def __init__(self, retvalue=True): def __init__(self, retvalue=True):
@@ -109,16 +109,17 @@ class LoopingCallBase:
self._abort.wait(timeout) self._abort.wait(timeout)
def _start(self, idle_for, initial_delay=None, stop_on_exception=True): def _start(self, idle_for, initial_delay=None, stop_on_exception=True):
"""Start the looping """Start the looping.
:param idle_for: Callable that takes two positional arguments, returns :param idle_for: Callable that takes two positional arguments,
how long to idle for. The first positional argument is returns how long to idle for. The first positional argument
the last result from the function being looped and the is the last result from the function being looped and the
second positional argument is the time it took to second positional argument is the time it took to calculate
calculate that result. that result.
:param initial_delay: How long to delay before starting the looping. :param initial_delay: How long to delay before starting the
Value is in seconds. looping. Value is in seconds.
:param stop_on_exception: Whether to stop if an exception occurs. :param stop_on_exception: Whether to stop if an exception
occurs.
:returns: eventlet event instance :returns: eventlet event instance
""" """
if self._thread is not None: if self._thread is not None:
@@ -359,33 +360,33 @@ class BackOffLoopingCall(LoopingCallBase):
class RetryDecorator: class RetryDecorator:
"""Decorator for retrying a function upon suggested exceptions. """Decorator for retrying a function upon suggested exceptions.
The decorated function is retried for the given number of times, and the The decorated function is retried for the given number of times, and
sleep time between the retries is incremented until max sleep time is the sleep time between the retries is incremented until max sleep
reached. If the max retry count is set to -1, then the decorated function time is reached. If the max retry count is set to -1, then the
is invoked indefinitely until an exception is thrown, and the caught decorated function is invoked indefinitely until an exception is
exception is not in the list of suggested exceptions. thrown, and the caught exception is not in the list of suggested
exceptions.
""" """
def __init__(self, max_retry_count=-1, inc_sleep_time=10, def __init__(self, max_retry_count=-1, inc_sleep_time=10,
max_sleep_time=60, exceptions=()): max_sleep_time=60, exceptions=()):
"""Configure the retry object using the input params. """Configure the retry object using the input params.
:param max_retry_count: maximum number of times the given function must :param max_retry_count: maximum number of times the given
be retried when one of the input 'exceptions' function must be retried when one of the input 'exceptions'
is caught. When set to -1, it will be retried is caught. When set to -1, it will be retried indefinitely
indefinitely until an exception is thrown until an exception is thrown and the caught exception is not
and the caught exception is not in param in param exceptions.
exceptions. :param inc_sleep_time: incremental time in seconds for sleep
:param inc_sleep_time: incremental time in seconds for sleep time time between retries
between retries :param max_sleep_time: max sleep time in seconds beyond which
:param max_sleep_time: max sleep time in seconds beyond which the sleep the sleep time will not be incremented using param
time will not be incremented using param inc_sleep_time. On reaching this threshold, max_sleep_time
inc_sleep_time. On reaching this threshold, will be used as the sleep time.
max_sleep_time will be used as the sleep time. :param exceptions: suggested exceptions for which the function
:param exceptions: suggested exceptions for which the function must be must be retried, if no exceptions are provided (the default)
retried, if no exceptions are provided (the default) then all exceptions will be reraised, and no retrying will
then all exceptions will be reraised, and no be triggered.
retrying will be triggered.
""" """
self._max_retry_count = max_retry_count self._max_retry_count = max_retry_count
self._inc_sleep_time = inc_sleep_time self._inc_sleep_time = inc_sleep_time

View File

@@ -14,17 +14,13 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
"""Generic Node base class for all workers that run on hosts.""" """Generic Node base class for all workers that run on hosts."""
import abc
import collections import collections
import errno import errno
import functools import functools
import gc import gc
import inspect import inspect
import io
import logging import logging
import os import os
import random import random
@@ -36,9 +32,21 @@ import eventlet
from eventlet import event from eventlet import event
from eventlet import tpool from eventlet import tpool
from oslo_concurrency import lockutils
from oslo_service._i18n import _ from oslo_service._i18n import _
from oslo_service import _options from oslo_service import _options
from oslo_service.backend.base import ServiceBase
from oslo_service.backend.common.constants import \
_LAUNCHER_RESTART_METHODS
from oslo_service.backend.common.daemon_utils import \
is_sighup_and_daemon as _is_sighup_and_daemon
from oslo_service.backend.common.signal_utils import \
get_signal_mappings
from oslo_service.backend.common.signal_utils import \
SignalExit
from oslo_service.backend.common.singleton import \
Singleton
from oslo_service.backend.common.validation_utils import \
check_service_base as _check_service_base
from oslo_service.backend.eventlet import threadgroup from oslo_service.backend.eventlet import threadgroup
from oslo_service import eventlet_backdoor from oslo_service import eventlet_backdoor
from oslo_service import systemd from oslo_service import systemd
@@ -46,78 +54,6 @@ from oslo_service import systemd
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
_LAUNCHER_RESTART_METHODS = ['reload', 'mutate']
def _is_daemon():
# The process group for a foreground process will match the
# process group of the controlling terminal. If those values do
# not match, or ioctl() fails on the stdout file handle, we assume
# the process is running in the background as a daemon.
# http://www.gnu.org/software/bash/manual/bashref.html#Job-Control-Basics
try:
is_daemon = os.getpgrp() != os.tcgetpgrp(sys.stdout.fileno())
except io.UnsupportedOperation:
# Could not get the fileno for stdout, so we must be a daemon.
is_daemon = True
except OSError as err:
if err.errno == errno.ENOTTY:
# Assume we are a daemon because there is no terminal.
is_daemon = True
else:
raise
return is_daemon
def _is_sighup_and_daemon(signo):
if not (SignalHandler().is_signal_supported('SIGHUP') and
signo == signal.SIGHUP):
# Avoid checking if we are a daemon, because the signal isn't
# SIGHUP.
return False
return _is_daemon()
def _check_service_base(service):
if not isinstance(service, ServiceBase):
raise TypeError(_("Service %(service)s must an instance of %(base)s!")
% {'service': service, 'base': ServiceBase})
class ServiceBase(metaclass=abc.ABCMeta):
"""Base class for all services."""
@abc.abstractmethod
def start(self):
"""Start service."""
@abc.abstractmethod
def stop(self):
"""Stop service."""
@abc.abstractmethod
def wait(self):
"""Wait for service to complete."""
@abc.abstractmethod
def reset(self):
"""Reset service.
Called in case service running in daemon mode receives SIGHUP.
"""
class Singleton(type):
_instances = {}
_semaphores = lockutils.Semaphores()
def __call__(cls, *args, **kwargs):
with lockutils.lock('singleton_lock', semaphores=cls._semaphores):
if cls not in cls._instances:
cls._instances[cls] = super().__call__(
*args, **kwargs)
return cls._instances[cls]
class SignalHandler(metaclass=Singleton): class SignalHandler(metaclass=Singleton):
@@ -128,14 +64,7 @@ class SignalHandler(metaclass=Singleton):
# Map all signal names to signal integer values and create a # Map all signal names to signal integer values and create a
# reverse mapping (for easier + quick lookup). # reverse mapping (for easier + quick lookup).
self._ignore_signals = ('SIG_DFL', 'SIG_IGN') self._signals_by_name, self.signals_to_name = get_signal_mappings()
self._signals_by_name = {name: getattr(signal, name)
for name in dir(signal)
if name.startswith("SIG") and
name not in self._ignore_signals}
self.signals_to_name = {
sigval: name
for (name, sigval) in self._signals_by_name.items()}
self._signal_handlers = collections.defaultdict(list) self._signal_handlers = collections.defaultdict(list)
self.clear() self.clear()
@@ -192,8 +121,8 @@ class SignalHandler(metaclass=Singleton):
def __setup_signal_interruption(self): def __setup_signal_interruption(self):
"""Set up to do the Right Thing with signals during poll() and sleep(). """Set up to do the Right Thing with signals during poll() and sleep().
Deal with the changes introduced in PEP 475 that prevent a signal from Deal with the changes introduced in PEP 475 that prevent a
interrupting eventlet's call to poll() or sleep(). signal from interrupting eventlet's call to poll() or sleep().
""" """
select_module = eventlet.patcher.original('select') select_module = eventlet.patcher.original('select')
self.__force_interrupt_on_signal = hasattr(select_module, 'poll') self.__force_interrupt_on_signal = hasattr(select_module, 'poll')
@@ -245,10 +174,9 @@ class Launcher:
"""Initialize the service launcher. """Initialize the service launcher.
:param restart_method: If 'reload', calls reload_config_files on :param restart_method: If 'reload', calls reload_config_files on
SIGHUP. If 'mutate', calls mutate_config_files on SIGHUP. Other SIGHUP. If 'mutate', calls mutate_config_files on SIGHUP.
values produce a ValueError. Other values produce a ValueError.
:returns: None :returns: None
""" """
self.conf = conf self.conf = conf
conf.register_opts(_options.service_opts) conf.register_opts(_options.service_opts)
@@ -266,7 +194,6 @@ class Launcher:
ProcessLauncher.launch_service. It must be None, 1 or ProcessLauncher.launch_service. It must be None, 1 or
omitted. omitted.
:returns: None :returns: None
""" """
if workers is not None and workers != 1: if workers is not None and workers != 1:
raise ValueError(_("Launcher asked to start multiple workers")) raise ValueError(_("Launcher asked to start multiple workers"))
@@ -278,7 +205,6 @@ class Launcher:
"""Stop all services which are currently running. """Stop all services which are currently running.
:returns: None :returns: None
""" """
self.services.stop() self.services.stop()
@@ -286,7 +212,6 @@ class Launcher:
"""Wait until all services have been stopped, and then return. """Wait until all services have been stopped, and then return.
:returns: None :returns: None
""" """
self.services.wait() self.services.wait()
@@ -294,7 +219,7 @@ class Launcher:
"""Reload config files and restart service. """Reload config files and restart service.
:returns: The return value from reload_config_files or :returns: The return value from reload_config_files or
mutate_config_files, according to the restart_method. mutate_config_files, according to the restart_method.
""" """
if self.restart_method == 'reload': if self.restart_method == 'reload':
self.conf.reload_config_files() self.conf.reload_config_files()
@@ -303,14 +228,9 @@ class Launcher:
self.services.restart() self.services.restart()
class SignalExit(SystemExit):
def __init__(self, signo, exccode=1):
super().__init__(exccode)
self.signo = signo
class ServiceLauncher(Launcher): class ServiceLauncher(Launcher):
"""Runs one or more service in a parent process.""" """Runs one or more service in a parent process."""
def __init__(self, conf, restart_method='reload'): def __init__(self, conf, restart_method='reload'):
"""Constructor. """Constructor.
@@ -381,7 +301,7 @@ class ServiceLauncher(Launcher):
while True: while True:
self.handle_signal() self.handle_signal()
status, signo = self._wait_for_exit_or_signal() status, signo = self._wait_for_exit_or_signal()
if not _is_sighup_and_daemon(signo): if not _is_sighup_and_daemon(signo, SignalHandler()):
break break
self.restart() self.restart()
@@ -405,10 +325,10 @@ class ProcessLauncher:
:param conf: an instance of ConfigOpts :param conf: an instance of ConfigOpts
:param wait_interval: The interval to sleep for between checks :param wait_interval: The interval to sleep for between checks
of child process exit. of child process exit.
:param restart_method: If 'reload', calls reload_config_files on :param restart_method: If 'reload', calls reload_config_files on
SIGHUP. If 'mutate', calls mutate_config_files on SIGHUP. Other SIGHUP. If 'mutate', calls mutate_config_files on SIGHUP.
values produce a ValueError. Other values produce a ValueError.
""" """
self.conf = conf self.conf = conf
conf.register_opts(_options.service_opts) conf.register_opts(_options.service_opts)
@@ -563,7 +483,7 @@ class ProcessLauncher:
self._child_process_handle_signal() self._child_process_handle_signal()
status, signo = self._child_wait_for_exit_or_signal( status, signo = self._child_wait_for_exit_or_signal(
self.launcher) self.launcher)
if not _is_sighup_and_daemon(signo): if not _is_sighup_and_daemon(signo, SignalHandler()):
self.launcher.wait() self.launcher.wait()
break break
self.launcher.restart() self.launcher.restart()
@@ -580,10 +500,10 @@ class ProcessLauncher:
def launch_service(self, service, workers=1): def launch_service(self, service, workers=1):
"""Launch a service with a given number of workers. """Launch a service with a given number of workers.
:param service: a service to launch, must be an instance of :param service: a service to launch, must be an instance of
:class:`oslo_service.service.ServiceBase` :class:`oslo_service.service.ServiceBase`
:param workers: a number of processes in which a service :param workers: a number of processes in which a service
will be running will be running
""" """
_check_service_base(service) _check_service_base(service)
wrap = ServiceWrapper(service, workers) wrap = ServiceWrapper(service, workers)
@@ -657,7 +577,7 @@ class ProcessLauncher:
signame = self.signal_handler.signals_to_name[self.sigcaught] signame = self.signal_handler.signals_to_name[self.sigcaught]
LOG.info('Caught %s, stopping children', signame) LOG.info('Caught %s, stopping children', signame)
if not _is_sighup_and_daemon(self.sigcaught): if not _is_sighup_and_daemon(self.sigcaught, SignalHandler()):
break break
child_signal = signal.SIGTERM child_signal = signal.SIGTERM
@@ -725,8 +645,8 @@ class Service(ServiceBase):
def stop(self, graceful=False): def stop(self, graceful=False):
"""Stop a service. """Stop a service.
:param graceful: indicates whether to wait for all threads to finish :param graceful: indicates whether to wait for all threads to
or terminate them instantly finish or terminate them instantly
""" """
self.tg.stop(graceful) self.tg.stop(graceful)
@@ -798,7 +718,6 @@ class Services:
:param service: service to run :param service: service to run
:param done: event to wait on until a shutdown is triggered :param done: event to wait on until a shutdown is triggered
:returns: None :returns: None
""" """
try: try:
service.start() service.start()

View File

@@ -39,10 +39,11 @@ def _on_thread_done(_greenthread, group, thread):
class Thread: class Thread:
"""Wrapper around a greenthread. """Wrapper around a greenthread.
Holds a reference to the :class:`ThreadGroup`. The Thread will notify Holds a reference to the :class:`ThreadGroup`. The Thread will notify
the :class:`ThreadGroup` when it has done so it can be removed from the :class:`ThreadGroup` when it has done so it can be removed from
the threads list. the threads list.
""" """
def __init__(self, thread, group, link=True): def __init__(self, thread, group, link=True):
self.thread = thread self.thread = thread
if link: if link:
@@ -91,8 +92,8 @@ class ThreadGroup:
def __init__(self, thread_pool_size=10): def __init__(self, thread_pool_size=10):
"""Create a ThreadGroup with a pool of greenthreads. """Create a ThreadGroup with a pool of greenthreads.
:param thread_pool_size: the maximum number of threads allowed to run :param thread_pool_size: the maximum number of threads allowed
concurrently. to run concurrently.
""" """
self.pool = greenpool.GreenPool(thread_pool_size) self.pool = greenpool.GreenPool(thread_pool_size)
self.threads = [] self.threads = []
@@ -244,8 +245,8 @@ class ThreadGroup:
def thread_done(self, thread): def thread_done(self, thread):
"""Remove a completed thread from the group. """Remove a completed thread from the group.
This method is automatically called on completion of a thread in the This method is automatically called on completion of a thread in
group, and should not be called explicitly. the group, and should not be called explicitly.
""" """
self.threads.remove(thread) self.threads.remove(thread)

View File

@@ -15,7 +15,7 @@
class BackendAlreadySelected(Exception): class BackendAlreadySelected(Exception):
"""raised when init_backend() is called more than once""" """Raised when init_backend() is called more than once."""
pass pass

View File

@@ -0,0 +1,60 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_service.backend.base import BaseBackend
from oslo_service.backend.common import daemon_utils
from oslo_service.backend.common import signal_utils
from oslo_service.backend.common import singleton
from oslo_service.backend.threading import loopingcall
from oslo_service.backend.threading import service
from oslo_service.backend.threading import threadgroup
class ThreadingBackend(BaseBackend):
"""Backend implementation using Python threading and Cotyledon."""
@staticmethod
def get_service_components():
"""Return the components provided by the Threading backend."""
return {
# Service-related classes
"ServiceBase": service.Service,
"ServiceLauncher": service.ProcessLauncher,
"Launcher": service.ProcessLauncher,
"ProcessLauncher": service.ProcessLauncher,
"Service": service.Service,
"Services": service.Services,
"ServiceWrapper": service.ServiceWrapper,
"SignalExit": signal_utils.SignalExit,
"SignalHandler": service.SignalHandler,
"Singleton": singleton.Singleton,
# Looping call-related classes
"LoopingCallBase": loopingcall.LoopingCallBase,
"LoopingCallDone": loopingcall.LoopingCallDone,
"LoopingCallTimeOut": loopingcall.LoopingCallTimeOut,
"FixedIntervalLoopingCall": loopingcall.FixedIntervalLoopingCall,
"FixedIntervalWithTimeoutLoopingCall":
loopingcall.FixedIntervalWithTimeoutLoopingCall,
"DynamicLoopingCall": loopingcall.DynamicLoopingCall,
"BackOffLoopingCall": loopingcall.BackOffLoopingCall,
"RetryDecorator": loopingcall.RetryDecorator,
# Thread group-related classes
"ThreadGroup": threadgroup.ThreadGroup,
"Thread": threadgroup.Thread,
# Functions
"_is_daemon": daemon_utils.is_daemon,
"_is_sighup_and_daemon": daemon_utils.is_sighup_and_daemon,
"launch": service.launch,
}

View File

@@ -0,0 +1,403 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import random
import sys
import threading
import time
import futurist
from oslo_log import log as logging
from oslo_utils import excutils
from oslo_utils import reflection
from oslo_utils import timeutils
from oslo_service._i18n import _
LOG = logging.getLogger(__name__)
class LoopingCallDone(Exception):
"""Exception to break out and stop a LoopingCallBase.
The function passed to a looping call may raise this exception to
break out of the loop normally. An optional return value may be
provided; this value will be returned by LoopingCallBase.wait().
"""
def __init__(self, retvalue=True):
""":param retvalue: Value that LoopingCallBase.wait() should return."""
self.retvalue = retvalue
class LoopingCallTimeOut(Exception):
"""Exception raised when a LoopingCall times out."""
pass
class FutureEvent:
"""A simple event object that can carry a result or an exception."""
def __init__(self):
self._event = threading.Event()
self._result = None
self._exc_info = None
def send(self, result):
self._result = result
self._event.set()
def send_exception(self, exc_type, exc_value, tb):
self._exc_info = (exc_type, exc_value, tb)
self._event.set()
def wait(self, timeout=None):
flag = self._event.wait(timeout)
if not flag:
raise RuntimeError("Timed out waiting for event")
if self._exc_info:
exc_type, exc_value, tb = self._exc_info
raise exc_value.with_traceback(tb)
return self._result
def _safe_wrapper(f, kind, func_name):
"""Wrapper that calls the wrapped function and logs errors as needed."""
def func(*args, **kwargs):
try:
return f(*args, **kwargs)
except LoopingCallDone:
raise # Let the outer handler process this
except Exception:
LOG.error('%(kind)s %(func_name)r failed',
{'kind': kind, 'func_name': func_name},
exc_info=True)
return 0
return func
class LoopingCallBase:
_KIND = _("Unknown looping call")
_RUN_ONLY_ONE_MESSAGE = _(
"A looping call can only run one function at a time")
def __init__(self, f=None, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.f = f
self._future = None
self.done = None
self._abort = threading.Event() # When set, the loop stops
@property
def _running(self):
return not self._abort.is_set()
def stop(self):
if self._running:
self._abort.set()
def wait(self):
"""Wait for the looping call to complete and return its result."""
return self.done.wait()
def _on_done(self, future):
self._future = None
def _sleep(self, timeout):
# Instead of eventlet.sleep, we wait on the abort event for timeout
# seconds.
self._abort.wait(timeout)
def _start(self, idle_for, initial_delay=None, stop_on_exception=True):
"""Start the looping call.
:param idle_for: Callable taking two arguments (last result,
elapsed time) and returning how long to idle.
:param initial_delay: Delay (in seconds) before starting the
loop.
:param stop_on_exception: Whether to stop on exception.
:returns: A FutureEvent instance.
"""
if self._future is not None:
raise RuntimeError(self._RUN_ONLY_ONE_MESSAGE)
self.done = FutureEvent()
self._abort.clear()
def _run_loop():
kind = self._KIND
func_name = reflection.get_callable_name(self.f)
func = self.f if stop_on_exception else _safe_wrapper(self.f, kind,
func_name)
if initial_delay:
self._sleep(initial_delay)
try:
watch = timeutils.StopWatch()
while self._running:
watch.restart()
result = func(*self.args, **self.kwargs)
watch.stop()
if not self._running:
break
idle = idle_for(result, watch.elapsed())
LOG.debug(
'%(kind)s %(func_name)r sleeping for %(idle).02f'
' seconds',
{'func_name': func_name, 'idle': idle, 'kind': kind})
self._sleep(idle)
except LoopingCallDone as e:
self.done.send(e.retvalue)
except Exception:
exc_info = sys.exc_info()
try:
LOG.error('%(kind)s %(func_name)r failed',
{'kind': kind, 'func_name': func_name},
exc_info=exc_info)
self.done.send_exception(*exc_info)
finally:
del exc_info
return
else:
self.done.send(True)
# Use futurist's ThreadPoolExecutor to run the loop in a background
# thread.
executor = futurist.ThreadPoolExecutor(max_workers=1)
self._future = executor.submit(_run_loop)
self._future.add_done_callback(self._on_done)
return self.done
# NOTE: _elapsed() is a thin wrapper for StopWatch.elapsed()
def _elapsed(self, watch):
return watch.elapsed()
class FixedIntervalLoopingCall(LoopingCallBase):
"""A fixed interval looping call."""
_RUN_ONLY_ONE_MESSAGE = _(
"A fixed interval looping call can only run one function at a time")
_KIND = _('Fixed interval looping call')
def start(self, interval, initial_delay=None, stop_on_exception=True):
def _idle_for(result, elapsed):
delay = round(elapsed - interval, 2)
if delay > 0:
func_name = reflection.get_callable_name(self.f)
LOG.warning(
'Function %(func_name)r run outlasted interval by'
' %(delay).2f sec',
{'func_name': func_name, 'delay': delay})
return -delay if delay < 0 else 0
return self._start(_idle_for, initial_delay=initial_delay,
stop_on_exception=stop_on_exception)
class FixedIntervalWithTimeoutLoopingCall(LoopingCallBase):
"""A fixed interval looping call with timeout checking."""
_RUN_ONLY_ONE_MESSAGE = _(
"A fixed interval looping call with timeout checking"
" can only run one function at a time")
_KIND = _('Fixed interval looping call with timeout checking.')
def start(self, interval, initial_delay=None, stop_on_exception=True,
timeout=0):
start_time = time.time()
def _idle_for(result, elapsed):
delay = round(elapsed - interval, 2)
if delay > 0:
func_name = reflection.get_callable_name(self.f)
LOG.warning(
'Function %(func_name)r run outlasted interval by'
' %(delay).2f sec',
{'func_name': func_name, 'delay': delay})
elapsed_time = time.time() - start_time
if timeout > 0 and elapsed_time > timeout:
raise LoopingCallTimeOut(
_('Looping call timed out after %.02f seconds')
% elapsed_time)
return -delay if delay < 0 else 0
return self._start(_idle_for, initial_delay=initial_delay,
stop_on_exception=stop_on_exception)
class DynamicLoopingCall(LoopingCallBase):
"""A looping call which sleeps until the next known event.
The function called should return how long to sleep before being
called again.
"""
_RUN_ONLY_ONE_MESSAGE = _(
"A dynamic interval looping call can only run one function at a time")
_TASK_MISSING_SLEEP_VALUE_MESSAGE = _(
"A dynamic interval looping call should supply either an interval or"
" periodic_interval_max"
)
_KIND = _('Dynamic interval looping call')
def start(self, initial_delay=None, periodic_interval_max=None,
stop_on_exception=True):
def _idle_for(suggested_delay, elapsed):
delay = suggested_delay
if delay is None:
if periodic_interval_max is not None:
delay = periodic_interval_max
else:
raise RuntimeError(self._TASK_MISSING_SLEEP_VALUE_MESSAGE)
else:
if periodic_interval_max is not None:
delay = min(delay, periodic_interval_max)
return delay
return self._start(_idle_for, initial_delay=initial_delay,
stop_on_exception=stop_on_exception)
class BackOffLoopingCall(LoopingCallBase):
"""Run a method in a loop with backoff on error.
The provided function should return True (indicating success, which resets
the backoff interval), False (indicating an error, triggering a backoff),
or raise LoopingCallDone(retvalue=...) to quit the loop.
"""
_RNG = random.SystemRandom()
_KIND = _('Dynamic backoff interval looping call')
_RUN_ONLY_ONE_MESSAGE = _(
"A dynamic backoff interval looping call can only run one function at"
" a time")
def __init__(self, f=None, *args, **kwargs):
super().__init__(f=f, *args, **kwargs)
self._error_time = 0
self._interval = 1
def start(self, initial_delay=None, starting_interval=1, timeout=300,
max_interval=300, jitter=0.75, min_interval=0.001):
if self._future is not None:
raise RuntimeError(self._RUN_ONLY_ONE_MESSAGE)
# Reset state.
self._error_time = 0
self._interval = starting_interval
def _idle_for(success, _elapsed):
random_jitter = abs(self._RNG.gauss(jitter, 0.1))
if success:
# Reset error state on success.
self._interval = starting_interval
self._error_time = 0
return self._interval * random_jitter
else:
# Back off on error with jitter.
idle = max(self._interval * 2 * random_jitter, min_interval)
idle = min(idle, max_interval)
self._interval = max(self._interval * 2 * jitter, min_interval)
if timeout > 0 and self._error_time + idle > timeout:
raise LoopingCallTimeOut(
_('Looping call timed out after %.02f seconds') % (
self._error_time + idle))
self._error_time += idle
return idle
return self._start(_idle_for, initial_delay=initial_delay)
class RetryDecorator:
"""Decorator for retrying a function upon suggested exceptions.
The decorated function is retried for the given number of times,
with an incrementally increasing sleep time between retries. A max
sleep time may be set. If max_retry_count is -1, the function is
retried indefinitely until an exception is raised that is not in the
suggested exceptions.
"""
def __init__(self, max_retry_count=-1, inc_sleep_time=10,
max_sleep_time=60, exceptions=()):
"""Document parameters for retry behavior.
:param max_retry_count: Maximum number of retries for exceptions in
'exceptions'. -1 means retry indefinitely.
:param inc_sleep_time: Incremental sleep time (seconds) between
retries.
:param max_sleep_time: Maximum sleep time (seconds).
:param exceptions: A tuple of exception types to catch for retries.
"""
self._max_retry_count = max_retry_count
self._inc_sleep_time = inc_sleep_time
self._max_sleep_time = max_sleep_time
self._exceptions = exceptions
self._retry_count = 0
self._sleep_time = 0
def __call__(self, f):
func_name = reflection.get_callable_name(f)
def _func(*args, **kwargs):
try:
if self._retry_count:
LOG.debug(
"Invoking %(func_name)s; retry count is"
" %(retry_count)d.",
{'func_name': func_name,
'retry_count': self._retry_count})
result = f(*args, **kwargs)
except self._exceptions:
with excutils.save_and_reraise_exception() as ctxt:
LOG.debug(
"Exception in %(func_name)s occurred which is in the"
" retry list.",
{'func_name': func_name})
if (self._max_retry_count != -1 and
self._retry_count >= self._max_retry_count):
LOG.debug(
"Cannot retry %(func_name)s because retry count"
" (%(retry_count)d) reached max"
" (%(max_retry_count)d).",
{'retry_count': self._retry_count,
'max_retry_count': self._max_retry_count,
'func_name': func_name})
else:
ctxt.reraise = False
self._retry_count += 1
self._sleep_time += self._inc_sleep_time
return self._sleep_time
raise LoopingCallDone(result)
@functools.wraps(f)
def func(*args, **kwargs):
loop = DynamicLoopingCall(_func, *args, **kwargs)
evt = loop.start(periodic_interval_max=self._max_sleep_time)
LOG.debug("Waiting for function %s to return.", func_name)
return evt.wait()
return func

View File

@@ -0,0 +1,260 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import logging
import signal
import sys
import threading
import traceback
import cotyledon
from cotyledon import oslo_config_glue
from oslo_service._i18n import _
from oslo_service.backend.base import ServiceBase
from oslo_service.backend.common.constants import _LAUNCHER_RESTART_METHODS
from oslo_service.backend.common.signal_utils import get_signal_mappings
from oslo_service.backend.common.singleton import Singleton
from oslo_service.backend.common.validation_utils import \
check_service_base as _check_service_base
from oslo_service.backend.threading import threadgroup
LOG = logging.getLogger(__name__)
class SignalHandler(metaclass=Singleton):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._signals_by_name, self.signals_to_name = get_signal_mappings()
self._signal_handlers = collections.defaultdict(list)
self.clear()
def clear(self):
for sig in list(self._signal_handlers.keys()):
signal.signal(sig, signal.SIG_DFL)
self._signal_handlers.clear()
def add_handlers(self, signals, handler):
for sig in signals:
self.add_handler(sig, handler)
def add_handler(self, sig, handler):
if not self.is_signal_supported(sig):
return
signo = self._signals_by_name[sig]
self._signal_handlers[signo].append(handler)
signal.signal(signo, self._handle_signal)
def _handle_signal(self, signo, frame):
threading.Thread(target=self._handle_signal_cb,
args=(signo, frame)).start()
def _handle_signal_cb(self, signo, frame):
for handler in reversed(self._signal_handlers[signo]):
handler(signo, frame)
def is_signal_supported(self, sig_name):
return sig_name in self._signals_by_name
class ServiceWrapper(cotyledon.Service):
def __init__(self, worker_id, service_instance, **kwargs):
super().__init__(worker_id)
if not isinstance(service_instance, ServiceBase):
raise TypeError("Service must be an instance of ServiceBase")
self.service_instance = service_instance
def run(self):
try:
self.service_instance.start()
self.service_instance.wait()
except Exception:
traceback.print_exc()
sys.exit(1)
def terminate(self):
self.service_instance.stop()
class Launcher:
def __init__(self):
self._launcher = None
def launch_service(self, service, workers=None):
_check_service_base(service)
if workers not in (None, 1):
raise NotImplementedError("Multiple workers is not supported.")
self._launcher = service
service.start()
return service
def wait(self):
if self._launcher:
self._launcher.wait()
def stop(self):
if self._launcher:
self._launcher.stop()
def restart(self):
if self._launcher:
self._launcher.stop()
self._launcher.start()
class ServiceLauncher:
def __init__(self, conf, restart_method='reload'):
self.conf = conf
self.restart_method = restart_method
self.backdoor_port = None
self._manager = cotyledon.ServiceManager()
oslo_config_glue.setup(self._manager, conf)
def launch_service(self, service_instance, workers=1):
_check_service_base(service_instance)
service_instance.backdoor_port = self.backdoor_port
if not isinstance(workers, int) or workers < 1:
raise ValueError("Number of workers must be >= 1")
self._manager.add(ServiceWrapper, workers, args=(service_instance,))
def stop(self):
self._manager._terminate(None, None)
def wait(self):
try:
return self._manager.run()
except SystemExit as exc:
self.stop()
return exc.code
except BaseException:
self.stop()
LOG.exception("Unhandled exception")
return 2
class Service(ServiceBase):
def __init__(self, threads=1000):
super().__init__()
self.tg = threadgroup.ThreadGroup(threads)
self.backdoor_port = None
def reset(self):
pass
def start(self):
pass
def stop(self, graceful=False):
self.tg.stop(graceful)
def wait(self):
self.tg.waitall()
class Services:
def __init__(self, restart_method='reload'):
if restart_method not in _LAUNCHER_RESTART_METHODS:
raise ValueError(_("Invalid restart_method: %s") % restart_method)
self.restart_method = restart_method
self.services = []
self.tg = threadgroup.ThreadGroup()
self.done = threading.Event()
def add(self, service):
self.services.append(service)
self.tg.add_thread(self.run_service, service, self.done)
def stop(self):
for service in self.services:
service.stop()
if not self.done.is_set():
self.done.set()
self.tg.stop()
def wait(self):
for service in self.services:
service.wait()
self.tg.wait()
def restart(self):
if self.restart_method == 'reload':
self.stop()
self.done = threading.Event()
for restart_service in self.services:
restart_service.reset()
if self.restart_method == 'reload':
self.tg.add_thread(
self.run_service, restart_service, self.done)
@staticmethod
def run_service(service, done):
try:
service.start()
except Exception:
LOG.exception('Error starting service thread.')
raise SystemExit(1)
else:
done.wait()
class ProcessLauncher:
def __init__(self, conf, restart_method='reload', no_fork=False):
self.conf = conf
self.restart_method = restart_method
self.no_fork = no_fork
self._manager = None
self._service_instance = None
def launch_service(self, service, workers=1):
_check_service_base(service)
self._service_instance = service
if self.no_fork:
LOG.warning("no_fork=True: running service in main process")
service.start()
service.wait()
return
self._manager = cotyledon.ServiceManager()
oslo_config_glue.setup(self._manager, self.conf)
self._manager.add(ServiceWrapper, workers,
args=(self._service_instance,))
def wait(self):
if self.no_fork:
return 0
return self._manager.run()
def stop(self):
LOG.info("Stopping service")
if self._manager:
self._manager._terminate(None, None)
def launch(conf, service, workers=1, restart_method='reload', no_fork=False):
if workers is not None and not isinstance(workers, int):
raise TypeError("Type of workers should be int!")
if workers is not None and workers <= 0:
raise ValueError("Number of workers should be positive!")
if workers == 1 and not no_fork:
launcher = ServiceLauncher(conf, restart_method=restart_method)
else:
launcher = ProcessLauncher(
conf, restart_method=restart_method, no_fork=no_fork)
launcher.launch_service(service, workers=workers)
return launcher

View File

@@ -0,0 +1,233 @@
# Copyright (C) 2025 Red Hat, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import threading
import time
import warnings
from oslo_service.backend.threading import loopingcall
LOG = logging.getLogger(__name__)
# --- New: Add a Thread wrapper to mimic the eventlet interface ---
class Thread:
"""A simple wrapper around native threads.
This class mimics the eventlet Thread interface (stop, wait, link,
cancel) for compatibility with oslo.service consumers. The methods
`stop`, `link`, and `cancel` are implemented as no-ops in the threading
backend since native Python threads do not support these operations
natively.
"""
def __init__(self, thread, group=None, link=True):
self.thread = thread
self._ident = thread.ident
self.group = group
# Optionally, support for a link callback can be added here.
@property
def ident(self):
return self._ident
def stop(self):
# These methods are no-ops in the threading backend because native
# Python threads cannot be forcefully stopped or cancelled once
# started. They are kept here to preserve API compatibility with the
# eventlet backend, where these methods are implemented.
pass
def wait(self):
self.thread.join()
def link(self, func, *args, **kwargs):
# Optionally schedule a callback after thread completion.
pass
def cancel(self, *throw_args):
# Optionally implement cancellation if required.
pass
# --- End of new Thread wrapper ---
class ThreadGroup:
"""A group of threads and timers similar to eventlet's GreenPool."""
def __init__(self, max_threads=1000):
self.max_threads = max_threads
self.threads = []
self.timers = []
self._lock = threading.Lock()
def __getstate__(self):
# Exclude _lock from pickling.
state = self.__dict__.copy()
if '_lock' in state:
del state['_lock']
return state
def __setstate__(self, state):
self.__dict__.update(state)
# Recreate the lock after unpickling.
self._lock = threading.Lock()
def add_timer(self, delay, callback, *args, **kwargs):
if args or kwargs:
warnings.warn(
"Calling add_timer() with arguments is deprecated. Use "
"add_timer_args() instead.",
DeprecationWarning
)
new_args = list(args)
if new_args and new_args[0] == delay:
new_args = new_args[1:]
return self.add_timer_args(delay, callback, new_args, kwargs)
def add_timer_args(self, interval, callback, args=None, kwargs=None,
initial_delay=None, stop_on_exception=True):
args = args or []
kwargs = kwargs or {}
pulse = loopingcall.FixedIntervalLoopingCall(
callback, *args, **kwargs)
pulse.start(interval=interval,
initial_delay=initial_delay,
stop_on_exception=stop_on_exception)
self._set_attr(pulse, '_running', True)
pulse.args = tuple(args)
pulse.kw = kwargs
with self._lock:
self.timers.append(pulse)
return pulse
def add_dynamic_timer(self, callback, initial_delay, periodic_interval_max,
*args, **kwargs):
warnings.warn(
"Calling add_dynamic_timer() with arguments is deprecated. Use "
"add_dynamic_timer_args() instead.",
DeprecationWarning
)
return self.add_dynamic_timer_args(
callback, list(args), kwargs, initial_delay=initial_delay,
periodic_interval_max=periodic_interval_max)
def add_dynamic_timer_args(self, callback, args=None, kwargs=None,
initial_delay=None, periodic_interval_max=None,
stop_on_exception=True):
args = args or []
kwargs = kwargs or {}
timer = loopingcall.DynamicLoopingCall(callback, *args, **kwargs)
timer.start(initial_delay=initial_delay,
periodic_interval_max=periodic_interval_max,
stop_on_exception=stop_on_exception)
self._set_attr(timer, '_running', True)
timer.args = tuple(args)
timer.kw = kwargs
with self._lock:
self.timers.append(timer)
return timer
def add_thread(self, callback, *args, **kwargs):
with self._lock:
if len(self.threads) >= self.max_threads:
raise RuntimeError("Maximum number of threads reached")
t = threading.Thread(target=callback, args=args, kwargs=kwargs)
t.args = args
t.kw = kwargs
self.threads.append(t)
t.start()
return Thread(t, group=self)
def thread_done(self, thread):
with self._lock:
try:
self.threads.remove(
thread.thread if isinstance(thread, Thread) else thread)
except ValueError:
pass
def timer_done(self, timer):
with self._lock:
try:
self.timers.remove(timer)
except ValueError:
pass
def cancel(self, *throw_args, **kwargs):
pass
def stop_timers(self):
with self._lock:
for timer in self.timers:
timer.stop()
self.timers = []
def stop(self, graceful=False):
self.stop_timers()
if graceful:
self._wait_threads()
else:
self._stop_threads()
time.sleep(0.05)
def _stop_threads(self):
current = threading.current_thread()
with self._lock:
for t in self.threads:
if t is not current and hasattr(t, "abort"):
t.abort.set()
self.threads = [t for t in self.threads if t is current]
def _wait_threads(self):
current = threading.current_thread()
with self._lock:
for t in self.threads:
if t is not current:
try:
t.join()
except Exception:
LOG.exception('Error waiting on thread.')
self.threads = [t for t in self.threads if t is current]
def waitall(self):
with self._lock:
threads_copy = list(self.threads)
timers_copy = list(self.timers)
for t in threads_copy:
t.join()
for timer in timers_copy:
timer.wait()
def _set_attr(self, obj, attr, value):
try:
object.__setattr__(obj, attr, value)
except Exception:
if hasattr(obj, '__dict__'):
obj.__dict__[attr] = value

View File

@@ -35,34 +35,38 @@ class TestBackend(unittest.TestCase):
self.assertEqual(backend.__class__.__name__, "EventletBackend") self.assertEqual(backend.__class__.__name__, "EventletBackend")
def test_init_backend_explicit(self): def test_init_backend_explicit(self):
"""test that init_backend() can be called before get_backend()""" """Test that init_backend() can be called before get_backend()."""
init_backend(BackendType.EVENTLET) init_backend(BackendType.EVENTLET)
backend = get_backend() backend = get_backend()
self.assertEqual(backend.__class__.__name__, "EventletBackend") self.assertEqual(backend.__class__.__name__, "EventletBackend")
def test_dont_reinit_backend_from_default(self): def test_dont_reinit_backend_from_default(self):
"""test that init_backend() can't be called after get_backend()""" """Fail if init_backend() called after get_backend() with another."""
get_backend() get_backend()
with self.assertRaisesRegex( with self.assertRaisesRegex(
exceptions.BackendAlreadySelected, exceptions.BackendAlreadySelected,
"The 'eventlet' backend is already set up", "Backend already set to 'eventlet'",
): ):
init_backend(BackendType.EVENTLET) init_backend(BackendType.THREADING)
def test_dont_reinit_backend_explicit_init(self): def test_dont_reinit_backend_explicit_init(self):
"""test that init_backend() can't be called twice""" """Fail if init_backend() called twice with different backend."""
init_backend(BackendType.EVENTLET) init_backend(BackendType.EVENTLET)
with self.assertRaisesRegex( with self.assertRaisesRegex(
exceptions.BackendAlreadySelected, exceptions.BackendAlreadySelected,
"The 'eventlet' backend is already set up", "Backend already set to 'eventlet'",
): ):
init_backend(BackendType.THREADING)
def test_reinit_backend_same_type_is_noop(self):
"""init_backend() with same type is a no-op."""
init_backend(BackendType.EVENTLET)
try:
init_backend(BackendType.EVENTLET) init_backend(BackendType.EVENTLET)
except exceptions.BackendAlreadySelected:
self.fail(
"init_backend() should be a no-op if same type is passed"
)
def test_cached_backend(self): def test_cached_backend(self):
"""Test backend is cached after initial load.""" """Test backend is cached after initial load."""
@@ -82,13 +86,46 @@ class TestBackend(unittest.TestCase):
def test_backend_components(self): def test_backend_components(self):
"""Test that components are cached when init_backend is called.""" """Test that components are cached when init_backend is called."""
init_backend(BackendType.EVENTLET) init_backend(BackendType.EVENTLET)
backend = get_backend() backend = get_backend()
self.assertTrue( self.assertTrue(
{"ServiceBase", "ServiceLauncher"}.intersection( {"ServiceBase", "ServiceLauncher"}.intersection(
backend.get_service_components() backend.get_service_components()
) )
) )
def test_get_backend_type(self):
"""Ensure get_backend_type() returns the selected backend."""
self.assertIsNone(backend_module.get_backend_type())
init_backend(BackendType.THREADING)
self.assertEqual(
backend_module.get_backend_type(), BackendType.THREADING)
class TestBackendHook(unittest.TestCase):
def setUp(self):
super().setUp()
backend_module._reset_backend()
def test_hook_sets_default_backend_when_not_explicitly_initialized(self):
backend_module.register_backend_default_hook(
lambda: BackendType.THREADING)
result = backend_module.get_backend()
self.assertEqual(
backend_module._cached_backend_type, BackendType.THREADING)
self.assertIsNotNone(result)
self.assertIsNotNone(result.get_service_components())
def test_hook_is_ignored_if_backend_already_initialized(self):
backend_module.init_backend(BackendType.EVENTLET)
backend_module.register_backend_default_hook(
lambda: BackendType.THREADING)
self.assertEqual(
backend_module._cached_backend_type, BackendType.EVENTLET)
def test_second_init_backend_raises_exception_even_with_hook(self):
backend_module.init_backend(BackendType.THREADING)
backend_module.register_backend_default_hook(
lambda: BackendType.EVENTLET)
with self.assertRaises(exceptions.BackendAlreadySelected):
backend_module.init_backend(BackendType.EVENTLET)

View File

@@ -0,0 +1,64 @@
features:
- |
Added a new `threading` backend to `oslo.service`, using standard Python
threads instead of `eventlet`. This includes a full implementation of:
- `Service`, `Launcher`, `ServiceLauncher`, and `ProcessLauncher` using
`cotyledon`
- `LoopingCall` variants (`FixedIntervalLoopingCall`,
`DynamicLoopingCall`, etc.) using `futurist.ThreadPoolExecutor`
- A new `ThreadGroup` and `Thread` abstraction to mimic `eventlet`-like
behavior
- A native `SignalHandler` implementation using standard Python signals
This backend provides a standard-thread-compatible alternative that avoids
monkey-patching, making it suitable for environments where `eventlet` is
problematic or undesirable.
Additionally:
- `ProcessLauncher` now supports a `no_fork=True` mode, allowing services
to run in the main process without forking. This is useful when `fork()`
is unsafe — for example, in threaded environments or with Python 3.12+
where the default multiprocessing start method has changed to `spawn`.
- A new `register_backend_default_hook()` API has been added. It allows
users to define a fallback backend type in case `init_backend()` was not
called early enough. This is helpful in environments where import order
or initialization timing cannot be guaranteed.
Example:
```python
from oslo_service import backend
backend.register_backend_default_hook(lambda: backend.BackendType.THREADING)
```
This hook will only be used if `init_backend()` has not already been called.
upgrade:
- |
While Python 3.14 defaults to ``spawn`` as the multiprocessing start
method, `oslo.service` continues to rely on ``fork`` as the only supported
method for creating worker processes. Many parts of OpenStack depend on
objects that cannot be safely pickled (e.g. argparse parsers, thread locks,
lambdas in config defaults), which makes ``spawn`` currently impractical.
In the long term, process scaling should be handled externally (e.g. via
Kubernetes or systemd), rather than by the service itself.
issues:
- |
When using the `threading` backend with multiple workers
(i.e. `ProcessLauncher` with `fork()`), **starting threads before the fork
occurs can lead to corrupted state** due to how `os.fork()` behaves in
multi-threaded processes. This is a known limitation in Python.
See: https://gibizer.github.io/posts/Eventlet-Removal-The-First-Threading-Bug/#threads--osfork--confused-threadpools
To avoid this issue, you can:
- Ensure that no threads are started before `oslo.service` forks the workers.
- Use `ProcessLauncher(no_fork=True)` to disable forking entirely.
- Explicitly manage thread lifecycle — for example, stop all threads before
forking, as currently done in Nova.

View File

@@ -22,6 +22,11 @@ classifier =
Programming Language :: Python :: 3 :: Only Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: CPython
[options.extras_require]
threading =
cotyledon>=2.0.0
futurist>=3.1.1
[files] [files]
packages = packages =
oslo_service oslo_service

View File

@@ -15,6 +15,4 @@
import setuptools import setuptools
setuptools.setup( setuptools.setup(setup_requires=['pbr>=2.0.0'], pbr=True)
setup_requires=['pbr>=2.0.0'],
pbr=True)

View File

@@ -4,3 +4,5 @@ requests>=2.14.2 # Apache-2.0
stestr>=2.0.0 # Apache-2.0 stestr>=2.0.0 # Apache-2.0
coverage>=4.0 # Apache-2.0 coverage>=4.0 # Apache-2.0
cotyledon>=2.0.0
futurist>=3.1.1