From 362597b0fb91ff817a2a58121462dddc85855204 Mon Sep 17 00:00:00 2001 From: Jim Gauld Date: Wed, 16 Oct 2024 16:04:12 -0400 Subject: [PATCH] Configure systemd CPUShares/Nice/IOScheduler for software and sw-patching This updates CPUShares, Nice, and IOScheduler for: - software: software-controller-daemon.service; several processes are 100% cpu hog and use significant disk IO (eg, sysinv-app, system CLI, ansible-playbooks) - software: software.service; several processes are 100% cpu hog, and/or use significant disk IO (eg, /usr/bin/software-agent, ostree) - sw-patch: sw-patch-agent.service; ostree pull, ostree admin processes are 100% cpu hog; these do significant disk read and write IO - sw-patch: sw-patch-controller-daemon.service; /usr/sbin/sw-patch-controller-daemon is 100% cpu hog, and does significant disk write IO This gives 1/8th reduced CPUShares, reduced Nice, and reduced IOScheduler priority since these services have substantial CPU and disk IO load, yet are not latency critical. This is part of an overall set of adjustments are required for systemd cgroups CPUShares, CPUQuota, and AllowedCPUs for for key system services. This will improve latency of Kubernetes critical components, and throttles lesser important services. Partial-Bug: 2084714 TEST PLAN: AIO-SX, AIO-DX, Standard, Storage, DC: - PASS: Fresh install - PASS: verify systemd parameters for updated software and sw-patch services - TODO: Platform USM Upgrade Example: systemctl show software-controller-daemon.service | \ grep -e CPUShares -e CPUQuota -e Nice -e IOScheduling systemctl show software.service | \ grep -e CPUShares -e CPUQuota -e Nice -e IOScheduling systemctl show sw-patch-agent.service | \ grep -e CPUShares -e CPUQuota -e Nice -e IOScheduling systemctl show sw-patch-controller-daemon.service | \ grep -e CPUShares -e CPUQuota -e Nice -e IOScheduling Change-Id: Iee104d0487f3bd7a02bd1d6c833b30f7e605ecd4 Signed-off-by: Jim Gauld --- .../service-files/software-controller-daemon.service | 12 +++++++++++- software/service-files/software.service | 11 +++++++++++ sw-patch/bin/sw-patch-agent.service | 11 +++++++++++ sw-patch/bin/sw-patch-controller-daemon.service | 11 +++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/software/service-files/software-controller-daemon.service b/software/service-files/software-controller-daemon.service index e55733f8..cdfc521e 100644 --- a/software/service-files/software-controller-daemon.service +++ b/software/service-files/software-controller-daemon.service @@ -13,6 +13,16 @@ PIDFile=/var/run/software-controller-daemon.pid # process recovery is handled by pmon Restart=no +# cgroup performance engineering +# - software-controller-daemon.service does not provide latency critical service +# - several processes are 100% cpu hog and use significant disk IO +# (eg, sysinv-app, system CLI, ansible-playbooks, etc) +# - set 1/8th default share +# - set lower IO priority (effective only with 'bfq' scheduler) +CPUShares=128 +Nice=19 +IOSchedulingClass=best-effort +IOSchedulingPriority=7 + [Install] WantedBy=multi-user.target - diff --git a/software/service-files/software.service b/software/service-files/software.service index 15893e90..f38802b0 100644 --- a/software/service-files/software.service +++ b/software/service-files/software.service @@ -11,5 +11,16 @@ RemainAfterExit=yes StandardOutput=journal+console StandardError=journal+console +# cgroup performance engineering +# - software.service does not provide latency critical service +# - several processes are 100% cpu hog, and/or use significant disk IO +# (eg, /usr/bin/software-agent, ostree, etc) +# - set 1/8th default share +# - set lower IO priority (effective only with 'bfq' scheduler) +CPUShares=128 +Nice=19 +IOSchedulingClass=best-effort +IOSchedulingPriority=7 + [Install] WantedBy=multi-user.target diff --git a/sw-patch/bin/sw-patch-agent.service b/sw-patch/bin/sw-patch-agent.service index 9ca3a253..de96e493 100644 --- a/sw-patch/bin/sw-patch-agent.service +++ b/sw-patch/bin/sw-patch-agent.service @@ -11,6 +11,17 @@ ExecStop=/etc/init.d/sw-patch-agent stop ExecReload=/etc/init.d/sw-patch-agent restart PIDFile=/var/run/sw-patch-agent.pid +# cgroup performance engineering +# - sw-patch-agent.service does not provide latency critical service +# - ostree pull, ostree admin processes are 100% cpu hog; +# these do significant disk read and write IO +# - set 1/8th default share +# - set lower IO priority (effective only with 'bfq' scheduler) +CPUShares=128 +Nice=19 +IOSchedulingClass=best-effort +IOSchedulingPriority=7 + [Install] WantedBy=multi-user.target diff --git a/sw-patch/bin/sw-patch-controller-daemon.service b/sw-patch/bin/sw-patch-controller-daemon.service index 7b11291a..c1a94217 100644 --- a/sw-patch/bin/sw-patch-controller-daemon.service +++ b/sw-patch/bin/sw-patch-controller-daemon.service @@ -11,6 +11,17 @@ ExecStop=/etc/init.d/sw-patch-controller-daemon stop ExecReload=/etc/init.d/sw-patch-controller-daemon restart PIDFile=/var/run/sw-patch-controller-daemon.pid +# cgroup performance engineering +# - sw-patch-controller-daemon.service does not provide latency critical service +# /usr/sbin/sw-patch-controller-daemon is 100% cpu hog, +# and does significant disk write IO +# - set 1/8th default share +# - set lower IO priority (effective only with 'bfq' scheduler) +CPUShares=128 +Nice=19 +IOSchedulingClass=best-effort +IOSchedulingPriority=7 + [Install] WantedBy=multi-user.target