Issue a smart-reconfigure after (re-)creating zuul
Zuul no longer automatically performs a smart-reconfigure on startup so we need to do that ourselves in case the tenant config has changed. There's a minor race window after the zuul CR spec changes where the statefulset of the scheduler has not rolled out. We have to wait for it to complete before calling smart-reconfigure, or we risk running it on pods scheduled for deletion. Also adding a fix from: https://review.opendev.org/c/zuul/zuul-operator/+/861279 This is needed to get exec in pods to work. Change-Id: Ib35e85ed7666c2eb322971302f7f0d94a28bfa1f Co-Authored-By: Jan Gutter <github@jangutter.com> Co-Authored-By: Michal Nasiadka <mnasiadka@gmail.com> Co-Authored-By: Michael Kelly <mkelly@arista.com>
This commit is contained in:
@@ -161,7 +161,7 @@
|
||||
var: console_stream
|
||||
|
||||
- name: fail if console stream does not contains expected job output
|
||||
when: "'Job console starting...' not in console_stream.stdout"
|
||||
when: "'Job console starting' not in console_stream.stdout"
|
||||
# It seems like wsdump.py doesn't always stay connected for the whole job duration
|
||||
# when: "'Demo job is running' not in console_stream.stdout"
|
||||
fail:
|
||||
|
@@ -174,6 +174,11 @@ def update_fn(name, namespace, logger, old, new, memo, **kwargs):
|
||||
if spec_changed:
|
||||
zuul.create_zuul()
|
||||
|
||||
if conf_changed:
|
||||
if spec_changed:
|
||||
zuul.wait_for_statefulset('zuul-scheduler')
|
||||
zuul.smart_reconfigure()
|
||||
|
||||
memoize_secrets(memo, logger)
|
||||
|
||||
|
||||
|
@@ -19,7 +19,6 @@ import string
|
||||
import kopf
|
||||
import yaml
|
||||
import jinja2
|
||||
import kubernetes
|
||||
from kubernetes.client import Configuration
|
||||
from kubernetes.client.api import core_v1_api
|
||||
from kubernetes.stream import stream
|
||||
@@ -82,7 +81,6 @@ def update_secret(api, namespace, name, string_data):
|
||||
|
||||
|
||||
def pod_exec(namespace, name, command):
|
||||
kubernetes.config.load_kube_config()
|
||||
try:
|
||||
c = Configuration().get_default_copy()
|
||||
except AttributeError:
|
||||
|
@@ -16,6 +16,7 @@ import kopf
|
||||
import copy
|
||||
import base64
|
||||
import hashlib
|
||||
import time
|
||||
|
||||
import jinja2
|
||||
import pykube
|
||||
@@ -410,6 +411,29 @@ class Zuul:
|
||||
utils.apply_file(self.api, 'zuul.yaml', namespace=self.namespace, **kw)
|
||||
self.create_nodepool()
|
||||
|
||||
def wait_for_statefulset(self, set_name, tries=6, delay=10):
|
||||
self.log.info("Waiting for StatefulSet %s to finish rollout", set_name)
|
||||
for _ in range(tries):
|
||||
scheduler_set = objects.StatefulSet.objects(self.api).filter(
|
||||
namespace=self.namespace,
|
||||
selector={'app.kubernetes.io/instance': self.name,
|
||||
'app.kubernetes.io/component': set_name,
|
||||
'app.kubernetes.io/name': 'zuul',
|
||||
'app.kubernetes.io/part-of': 'zuul'}).get(
|
||||
name=set_name)
|
||||
spec = scheduler_set.obj['spec']
|
||||
status = scheduler_set.obj['status']
|
||||
if (spec['replicas'] == status.get('replicas', None) and
|
||||
spec['replicas'] == status.get('currentReplicas', None) and
|
||||
spec['replicas'] == status.get('readyReplicas', None) and
|
||||
(status.get('updateRevision', None) ==
|
||||
status.get('currentRevision', None))):
|
||||
self.log.info("StatefulSet %s completed rollout", set_name)
|
||||
return
|
||||
time.sleep(delay)
|
||||
self.log.error("StatefulSet did not finish rollout after %d seconds",
|
||||
tries * delay)
|
||||
|
||||
def smart_reconfigure(self):
|
||||
self.log.info("Smart reconfigure")
|
||||
try:
|
||||
|
Reference in New Issue
Block a user