diff --git a/openstack-helm-infra/debian/deb_folder/patches/0023-Update-libvirt-cgroup-controllers-initiation.patch b/openstack-helm-infra/debian/deb_folder/patches/0023-Update-libvirt-cgroup-controllers-initiation.patch new file mode 100644 index 00000000..47232f58 --- /dev/null +++ b/openstack-helm-infra/debian/deb_folder/patches/0023-Update-libvirt-cgroup-controllers-initiation.patch @@ -0,0 +1,162 @@ +From 809afdbc5bada6acbe0e16fcd650b0fed8d4824e Mon Sep 17 00:00:00 2001 +From: Daniel Caires +Date: Fri, 26 Sep 2025 07:07:05 -0300 +Subject: [PATCH] Update libvirt cgroup controllers initialization + +The libvirt cgroup initialization in the caracal version +uses a hard-coded list of controllers, that are set +in the libvirt bash file. This patch updates the .sh +to it's latest version [1], where it compares a list of +controllers set in the values file with the controllers +available in the host, and use that list to initialize +the controllers in the libvirt process. This patch also +removes a hugepage that existed in the bash file, as +it was removed from the upstream repo as well [2]. + +Commit's SHA that added the change in this patch, on the +upstream repository: +[1] - https://opendev.org/openstack/openstack-helm/commit/3903f54d0c1701f86f92da9023b67b7b453c4760 +[2] - https://opendev.org/openstack/openstack-helm/commit/ea3c04a7d9e39d63402751353e00d21762d988e5 + +Signed-off-by: Daniel Caires +--- + libvirt/templates/bin/_libvirt.sh.tpl | 76 +++++---------------------- + libvirt/values.yaml | 14 +++++ + 2 files changed, 26 insertions(+), 64 deletions(-) + +diff --git a/libvirt/templates/bin/_libvirt.sh.tpl b/libvirt/templates/bin/_libvirt.sh.tpl +index d16cdca3..af1b4f5e 100644 +--- a/libvirt/templates/bin/_libvirt.sh.tpl ++++ b/libvirt/templates/bin/_libvirt.sh.tpl +@@ -24,13 +24,6 @@ if [ -f /tmp/vnc.crt ]; then + mv /tmp/vnc-ca.crt /etc/pki/libvirt-vnc/ca-cert.pem + fi + +-# TODO: We disable cgroup functionality for cgroup v2, we should fix this in the future +-if $(stat -fc %T /sys/fs/cgroup/ | grep -q cgroup2fs); then +- CGROUP_VERSION=v2 +-else +- CGROUP_VERSION=v1 +-fi +- + if [ -n "$(cat /proc/*/comm 2>/dev/null | grep -w libvirtd)" ]; then + set +x + for proc in $(ls /proc/*/comm 2>/dev/null); do +@@ -55,16 +48,14 @@ if [ "$(cat /etc/os-release | grep -w NAME= | grep -w CentOS)" ]; then + fi + fi + +-if [ $CGROUP_VERSION != "v2" ]; then +- #Setup Cgroups to use when breaking out of Kubernetes defined groups +- CGROUPS="" +- for CGROUP in cpu rdma hugetlb; do +- if [ -d /sys/fs/cgroup/${CGROUP} ]; then +- CGROUPS+="${CGROUP}," +- fi +- done +- cgcreate -g ${CGROUPS%,}:/osh-libvirt +-fi ++#Setup Cgroups to use when breaking out of Kubernetes defined groups ++CGROUPS="" ++for CGROUP in {{ .Values.conf.kubernetes.cgroup_controllers | include "helm-toolkit.utils.joinListWithSpace" }}; do ++ if [ -d /sys/fs/cgroup/${CGROUP} ] || grep -w $CGROUP /sys/fs/cgroup/cgroup.controllers; then ++ CGROUPS+="${CGROUP}," ++ fi ++done ++cgcreate -g ${CGROUPS%,}:/osh-libvirt + + # We assume that if hugepage count > 0, then hugepages should be exposed to libvirt/qemu + hp_count="$(cat /proc/meminfo | grep HugePages_Total | tr -cd '[:digit:]')" +@@ -86,50 +77,11 @@ if [ 0"$hp_count" -gt 0 ]; then + echo "ERROR: Hugepages configured in kernel, but libvirtd container cannot access /dev/hugepages" + exit 1 + fi +- +- if [ $CGROUP_VERSION != "v2" ]; then +- # Kubernetes 1.10.x introduced cgroup changes that caused the container's +- # hugepage byte limit quota to zero out. This workaround sets that pod limit +- # back to the total number of hugepage bytes available to the baremetal host. +- if [ -d /sys/fs/cgroup/hugetlb ]; then +- limits="$(ls /sys/fs/cgroup/hugetlb/{{ .Values.conf.kubernetes.cgroup }}/hugetlb.*.limit_in_bytes)" || \ +- (echo "ERROR: Failed to locate any hugetable limits. Did you set the correct cgroup in your values used for this chart?" +- exit 1) +- for limit in $limits; do +- target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)" +- # Ensure the write target for the hugepage limit for the pod exists +- if [ ! -f "$target" ]; then +- echo "ERROR: Could not find write target for hugepage limit: $target" +- fi +- +- # Write hugetable limit for pod +- echo "$(cat $limit)" > "$target" +- done +- fi +- +- # Determine OS default hugepage size to use for the hugepage write test +- default_hp_kb="$(cat /proc/meminfo | grep Hugepagesize | tr -cd '[:digit:]')" +- +- # Attempt to write to the hugepage mount to ensure it is operational, but only +- # if we have at least 1 free page. +- num_free_pages="$(cat /sys/kernel/mm/hugepages/hugepages-${default_hp_kb}kB/free_hugepages | tr -cd '[:digit:]')" +- echo "INFO: '$num_free_pages' free hugepages of size ${default_hp_kb}kB" +- if [ 0"$num_free_pages" -gt 0 ]; then +- (fallocate -o0 -l "$default_hp_kb" /dev/hugepages/foo && rm /dev/hugepages/foo) || \ +- (echo "ERROR: fallocate failed test at /dev/hugepages with size ${default_hp_kb}kB" +- rm /dev/hugepages/foo +- exit 1) +- fi +- fi + fi + + if [ -n "${LIBVIRT_CEPH_CINDER_SECRET_UUID}" ] || [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then +- if [ $CGROUP_VERSION != "v2" ]; then +- #NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied. +- cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen & +- else +- systemd-run --scope --slice=system libvirtd --listen & +- fi ++ ++ cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen & + + tmpsecret=$(mktemp --suffix .xml) + if [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then +@@ -205,9 +157,5 @@ EOF + + fi + +-if [ $CGROUP_VERSION != "v2" ]; then +- #NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied. +- cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen +-else +- systemd-run --scope --slice=system libvirtd --listen +-fi ++# NOTE(vsaienko): changing CGROUP is required as restart of the pod will cause domains restarts ++cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen +diff --git a/libvirt/values.yaml b/libvirt/values.yaml +index b3a4373b..7f41ae60 100644 +--- a/libvirt/values.yaml ++++ b/libvirt/values.yaml +@@ -125,6 +125,20 @@ conf: + group: "kvm" + kubernetes: + cgroup: "kubepods.slice" ++ # List of cgroup controller we want to use when breaking out of ++ # Kubernetes defined groups ++ cgroup_controllers: ++ - blkio ++ - cpu ++ - devices ++ - freezer ++ - hugetlb ++ - memory ++ - net_cls ++ - perf_event ++ - rdma ++ - misc ++ - pids + vencrypt: + # Issuer to use for the vencrypt certs. + issuer: +-- +2.34.1 + diff --git a/openstack-helm-infra/debian/deb_folder/patches/series b/openstack-helm-infra/debian/deb_folder/patches/series index 4c79b6b9..4666d896 100644 --- a/openstack-helm-infra/debian/deb_folder/patches/series +++ b/openstack-helm-infra/debian/deb_folder/patches/series @@ -20,3 +20,4 @@ 0020-Bring-necessary-upstream-commits.patch 0021-Add-custom-pod-annotations-to-libvirt.patch 0022-Update-ipFamilyPolicy-to-support-DualStack.patch +0023-Update-libvirt-cgroup-controllers-initiation.patch diff --git a/stx-openstack-helm-fluxcd/stx-openstack-helm-fluxcd/manifests/libvirt/libvirt-static-overrides.yaml b/stx-openstack-helm-fluxcd/stx-openstack-helm-fluxcd/manifests/libvirt/libvirt-static-overrides.yaml index bfff04fe..24629ef5 100644 --- a/stx-openstack-helm-fluxcd/stx-openstack-helm-fluxcd/manifests/libvirt/libvirt-static-overrides.yaml +++ b/stx-openstack-helm-fluxcd/stx-openstack-helm-fluxcd/manifests/libvirt/libvirt-static-overrides.yaml @@ -19,6 +19,18 @@ conf: enabled: true kubernetes: cgroup: "k8s-infra" + cgroup_controllers: + - blkio + - cpu + - devices + - freezer + - hugetlb + - memory + - net_cls + - perf_event + - rdma + - misc + - pids libvirt: listen_addr: "::" pod: