Issue a smart-reconfigure after (re-)creating zuul

Zuul no longer automatically performs a smart-reconfigure on startup
so we need to do that ourselves in case the tenant config has
changed.

There's a minor race window after the zuul CR spec changes where
the statefulset of the scheduler has not rolled out. We have to
wait for it to complete before calling smart-reconfigure, or
we risk running it on pods scheduled for deletion.

Also adding a fix from:
  https://review.opendev.org/c/zuul/zuul-operator/+/861279

This is needed to get exec in pods to work.

Change-Id: Ib35e85ed7666c2eb322971302f7f0d94a28bfa1f
Co-Authored-By: Jan Gutter <github@jangutter.com>
Co-Authored-By: Michal Nasiadka <mnasiadka@gmail.com>
Co-Authored-By: Michael Kelly <mkelly@arista.com>
This commit is contained in:
James E. Blair
2025-07-09 07:21:54 -07:00
parent 1c627c53c5
commit a9858a5b77
4 changed files with 30 additions and 3 deletions

View File

@@ -161,7 +161,7 @@
var: console_stream
- name: fail if console stream does not contains expected job output
when: "'Job console starting...' not in console_stream.stdout"
when: "'Job console starting' not in console_stream.stdout"
# It seems like wsdump.py doesn't always stay connected for the whole job duration
# when: "'Demo job is running' not in console_stream.stdout"
fail:

View File

@@ -174,6 +174,11 @@ def update_fn(name, namespace, logger, old, new, memo, **kwargs):
if spec_changed:
zuul.create_zuul()
if conf_changed:
if spec_changed:
zuul.wait_for_statefulset('zuul-scheduler')
zuul.smart_reconfigure()
memoize_secrets(memo, logger)

View File

@@ -19,7 +19,6 @@ import string
import kopf
import yaml
import jinja2
import kubernetes
from kubernetes.client import Configuration
from kubernetes.client.api import core_v1_api
from kubernetes.stream import stream
@@ -82,7 +81,6 @@ def update_secret(api, namespace, name, string_data):
def pod_exec(namespace, name, command):
kubernetes.config.load_kube_config()
try:
c = Configuration().get_default_copy()
except AttributeError:

View File

@@ -16,6 +16,7 @@ import kopf
import copy
import base64
import hashlib
import time
import jinja2
import pykube
@@ -410,6 +411,29 @@ class Zuul:
utils.apply_file(self.api, 'zuul.yaml', namespace=self.namespace, **kw)
self.create_nodepool()
def wait_for_statefulset(self, set_name, tries=6, delay=10):
self.log.info("Waiting for StatefulSet %s to finish rollout", set_name)
for _ in range(tries):
scheduler_set = objects.StatefulSet.objects(self.api).filter(
namespace=self.namespace,
selector={'app.kubernetes.io/instance': self.name,
'app.kubernetes.io/component': set_name,
'app.kubernetes.io/name': 'zuul',
'app.kubernetes.io/part-of': 'zuul'}).get(
name=set_name)
spec = scheduler_set.obj['spec']
status = scheduler_set.obj['status']
if (spec['replicas'] == status.get('replicas', None) and
spec['replicas'] == status.get('currentReplicas', None) and
spec['replicas'] == status.get('readyReplicas', None) and
(status.get('updateRevision', None) ==
status.get('currentRevision', None))):
self.log.info("StatefulSet %s completed rollout", set_name)
return
time.sleep(delay)
self.log.error("StatefulSet did not finish rollout after %d seconds",
tries * delay)
def smart_reconfigure(self):
self.log.info("Smart reconfigure")
try: