diff --git a/software/debian/deb_folder/rules b/software/debian/deb_folder/rules index cfc5c3d5..1a31c29c 100755 --- a/software/debian/deb_folder/rules +++ b/software/debian/deb_folder/rules @@ -43,6 +43,8 @@ override_dh_install: ${ROOT}/etc/init.d/software-controller install -m 500 service-files/usm-initialize-init.sh \ ${ROOT}/etc/init.d/usm-initialize + install -m 500 service-files/lvm-snapshot-restore.sh \ + ${ROOT}/etc/init.d/lvm-snapshot-restore install -m 600 service-files/software.conf \ ${ROOT}/etc/software/software.conf install -m 644 service-files/policy.json \ diff --git a/software/service-files/lvm-snapshot-restore.service b/software/service-files/lvm-snapshot-restore.service new file mode 100644 index 00000000..2815f1f4 --- /dev/null +++ b/software/service-files/lvm-snapshot-restore.service @@ -0,0 +1,14 @@ +[Unit] +Description=Restore LVM Snapshots +DefaultDependencies=no +After=systemd-udev-settle.service local-fs.target var-log.mount +Before=software.service controllerconfig.service + +[Service] +Type=oneshot +ExecStart=/etc/init.d/lvm-snapshot-restore +TimeoutStartSec=300 +RemainAfterExit=yes + +[Install] +WantedBy=multi-user.target diff --git a/software/service-files/lvm-snapshot-restore.sh b/software/service-files/lvm-snapshot-restore.sh new file mode 100644 index 00000000..eff37645 --- /dev/null +++ b/software/service-files/lvm-snapshot-restore.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# +# Copyright (c) 2025 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +### BEGIN INIT INFO +# Description: lvm-snapshot-restore +# +# Short-Description: Restore LVM Snapshots +# Provides: lvm-snapshot-restore +# Required-Start: +# Required-Stop: +# Default-Start: 3 5 +# Default-Stop: 3 5 +### END INIT INFO + +NAME=$(basename $0) +LOG_FILE="/var/log/lvm-snapshot-restore.log" +RESTORE_SCRIPT="/usr/sbin/software-deploy/manage-lvm-snapshots" + +# Function to log messages to both stdout and log file +log() { + echo "$(date '+%FT%T.%3N'): $NAME: $*" >> $LOG_FILE +} + +# Detect if the system booted into the previous deployment +if ! grep -q "ostree=/ostree/2" /proc/cmdline; then + log "System is not booted from the rollback deployment." + exit 0 +fi +log "System is booted from rollback deployment." + +# Verify if deployed commit-id matches rollback ostree commit-id +source /etc/build.info +log "Rollback major release version is ${SW_VERSION}" +DEPLOYED_COMMIT_ID=$(ostree admin status | grep "^\*" | awk '{ sub(/\.[0-9]+/, "", $3); print $3 }') +ROLLBACK_COMMIT_ID=$(ostree --repo=/var/www/pages/feed/rel-${SW_VERSION}/ostree_repo rev-parse starlingx) +if [ ! $DEPLOYED_COMMIT_ID = $ROLLBACK_COMMIT_ID ]; then + log "Deployed ostree commit-id doesn't match ${SW_VERSION} ostree commit-id" + exit 0 +fi + +log "Checking LVM snapshots..." +${RESTORE_SCRIPT} --list +if [ $? -ne 0 ]; then + log "No LVM snapshots to restore." + exit 0 +fi + +log "Starting LVM snapshot restore..." +${RESTORE_SCRIPT} --restore + +if [ $? -eq 0 ]; then + log "All LVM snapshots restored successfully. Rebooting..." + reboot +else + log "Couldn't restore the LVM snapshots, lvdisplay output:" + log "$(lvdisplay)" + log "Check software.log for more details." + exit 1 +fi + +exit 0 diff --git a/software/service-files/software-init.sh b/software/service-files/software-init.sh index 22968272..f8a151ca 100644 --- a/software/service-files/software-init.sh +++ b/software/service-files/software-init.sh @@ -39,29 +39,6 @@ function LOG_TO_FILE { echo "`date "+%FT%T.%3N"`: $NAME: $*" >> $logfile } -function check_for_rr_software_update { - if [ -f ${node_is_software_updated_rr_file} ]; then - if [ ! -f ${software_updated_during_init_file} ]; then - echo - echo "Node has had its software updated and requires an immediate reboot." - echo - LOG_TO_FILE "Node has had its software updated, with reboot-required flag set. Rebooting" - touch ${software_updated_during_init_file} - /sbin/reboot - else - echo - echo "Node has had its software updated during init a second consecutive time. Skipping reboot due to possible error" - echo - LOG_TO_FILE "Node has had its software updated during init a second consecutive time. Skipping reboot due to possible error" - touch ${software_install_failed_file} - rm -f ${software_updated_during_init_file} - exit 1 - fi - else - rm -f ${software_updated_during_init_file} - fi -} - function check_install_uuid { # Check whether our installed load matches the active controller CONTROLLER_UUID=`curl -sf http://controller:${http_port}/feed/rel-${SW_VERSION}/install_uuid` @@ -158,7 +135,6 @@ case "$1" in LOG_TO_FILE "***** Finished software operation *****" fi - check_for_rr_software_update ;; stop) # Nothing to do here diff --git a/software/software/lvm_snapshot.py b/software/software/lvm_snapshot.py index fbdd3b7a..051d3069 100644 --- a/software/software/lvm_snapshot.py +++ b/software/software/lvm_snapshot.py @@ -12,6 +12,7 @@ from datetime import datetime from datetime import timezone import json import logging +from packaging import version from pathlib import Path import shutil import subprocess @@ -170,6 +171,11 @@ class VarSnapshot(LVMSnapshot): deploy = content.get("deploy") for d in deploy: d["state"] = "host-rollback-done" + from_release = d["from_release"] + to_release = d["to_release"] + if version.Version(to_release) > version.Version(from_release): + d["from_release"] = to_release + d["to_release"] = from_release with open(software_json, "w") as fp: fp.write(json.dumps(content)) LOG.info("Deployment data updated") @@ -375,6 +381,7 @@ def main(): manager.delete_snapshots() elif args.list: snapshots = [snapshot.to_json() for snapshot in manager.list_snapshots()] + success = bool(snapshots) # True is snapshots exists, False otherwise print(json.dumps(snapshots, indent=4)) else: parser.print_usage() diff --git a/software/software/ostree_utils.py b/software/software/ostree_utils.py index c7d44dee..52b35c9b 100644 --- a/software/software/ostree_utils.py +++ b/software/software/ostree_utils.py @@ -498,19 +498,17 @@ def delete_older_deployments(): # Sample command and output that is parsed to get the list of # deployment IDs # - # Command: ostree admin status | grep debian + # Command: ostree admin status | egrep 'debian [a-z0-9]+' # # Output: # - # * debian 3334dc80691a38c0ba6c519ec4b4b449f8420e98ac4d8bded3436ade56bb229d.2 - # debian 3334dc80691a38c0ba6c519ec4b4b449f8420e98ac4d8bded3436ade56bb229d.1 (rollback) - # debian 3334dc80691a38c0ba6c519ec4b4b449f8420e98ac4d8bded3436ade56bb229d.0 - - LOG.info("Inside delete_older_deployments of ostree_utils") - cmd = "ostree admin status | grep debian" + # * debian 9a4d8040800f8cf9191ca3401f8006f3df5760b33d78f931309b5bb5db062ab3.2 + # debian 9a4d8040800f8cf9191ca3401f8006f3df5760b33d78f931309b5bb5db062ab3.1 (rollback) + # debian 9a4d8040800f8cf9191ca3401f8006f3df5760b33d78f931309b5bb5db062ab3.0 + cmd = "ostree admin status | egrep 'debian [a-z0-9]+'" try: - output = subprocess.run(cmd, shell=True, check=True, capture_output=True) + output = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: msg = "Failed to fetch ostree admin status." info_msg = "OSTree Admin Status Error: return code: %s , Output: %s" \ @@ -518,68 +516,34 @@ def delete_older_deployments(): LOG.info(info_msg) raise OSTreeCommandFail(msg) - # Store the output of the above command in a string - output_string = output.stdout.decode('utf-8') + # Find the active deployment (which usually is the first, but there are exceptions) + # and once found attempt to delete deployments after it in the list, except the rollback + delete_deployments = False + deployments_to_delete = [] + for index, deployment in enumerate(output.stdout.strip().split("\n")): + if delete_deployments and "rollback" not in deployment: + deployments_to_delete.append(index) + if "*" in deployment: + LOG.info("Active deployment %s: %s", index, deployment) + delete_deployments = True - # Parse the string to get the latest commit for the ostree - split_output_string = output_string.split() - deployment_id_list = [] - for index, deployment_id in enumerate(split_output_string): - if deployment_id == "debian": - deployment_id_list.append(split_output_string[index + 1]) - - # After a reboot, the deployment ID at the 0th index of the list - # is always the active deployment and the deployment ID at the - # 1st index of the list is always the fallback deployment. - # We want to delete all deployments except the two mentioned above. - # This means we will undeploy all deployments starting from the - # 2nd index of deployment_id_list - deploys_amount = len(deployment_id_list) - if deploys_amount <= 2: + if not deployments_to_delete: LOG.info("No older deployments to delete") - return + return True - for index in reversed(range(2, deploys_amount)): + for index in reversed(deployments_to_delete): try: cmd = "ostree admin undeploy %s" % index - output = subprocess.run(cmd, shell=True, check=True, capture_output=True) - info_log = "Deleted ostree deployment %s" % deployment_id_list[index] + output = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True) + info_log = "Deleted ostree deployment %s: %s" % (index, output.stdout) LOG.info(info_log) except subprocess.CalledProcessError as e: - msg = "Failed to undeploy ostree deployment %s." % deployment_id_list[index] + msg = "Failed to undeploy ostree deployment %s." % index info_msg = "OSTree Undeploy Error: return code: %s , Output: %s" \ - % (e.returncode, e.stderr.decode("utf-8")) + % (e.returncode, e.stderr) LOG.info(info_msg) raise OSTreeCommandFail(msg) - - -def undeploy_inactive_deployments(): - """ - Remove deployments other than the current deployment, - i.e. deployments from index 1 to len(deployments) - 1, - in the reverse order, from the oldest to the newest - """ - cmd = ["ostree", "admin", "status"] - try: - output = subprocess.run(cmd, text=True, check=True, capture_output=True) - except subprocess.CalledProcessError as e: - LOG.exception("Error getting ostree deployment list: %s" % e.stderr) - return False - - success = True - pattern = r"debian [a-z0-9]+.[0-9]+" - deployments = re.findall(pattern, output.stdout) - # skip the first (active) deployment - for index, deployment in reversed(list(enumerate(deployments[1:], 1))): - commit_id = deployment.replace("debian ", "").split(".")[0] - cmd = ["ostree", "admin", "undeploy", str(index)] - try: - subprocess.run(cmd, check=True) - LOG.info("Removed deployment %s, commit-id %s" % (index, commit_id)) - except subprocess.CalledProcessError as e: - LOG.exception("Error removing deployment %s, commit-id %s: %s" % (index, commit_id, e.stderr)) - success = False - return success + return True def checkout_latest_ostree_commit(patch_sw_version): diff --git a/software/software/software_agent.py b/software/software/software_agent.py index d2db7cf9..d18b3cd8 100644 --- a/software/software/software_agent.py +++ b/software/software/software_agent.py @@ -375,7 +375,7 @@ class SoftwareMessageDeployDeleteCleanupReq(messages.PatchMessage): success_remove_upgrade_flags = remove_major_release_deployment_flags() # undeploy the from-release ostree deployment to free sysroot disk space - success_ostree_undeploy_from_release = ostree_utils.undeploy_inactive_deployments() + success_ostree_undeploy_from_release = ostree_utils.delete_older_deployments() cleanup_results = [ (success_ostree_remote_cleanup, "cleaning temporary refs/remotes"),