Files
jenkins-pipelines/scripts/archive-misc.sh
Davlet Panech 044b6c050b archive-dir.sh: rewrite in Python
Old script was slow and didn't preserve hardlinks within the source set.

This script doesn't link files that are identical within the source set,
ie same checksum & attributes, but different inode. It can only link
such files to similar files from older builds. This deficiency will be
addressed in a separate commit.

TESTS
===================
* Manually test various input directories, including:
  - a directory that contains each type of file (regular, devices,
    sockets, symlinks, etc)
  - old index files with spaces in file names
* Given a build with a dozen or so historical builds,
  copied the "aptly" directory and compared timing and destination
  directory size before/after this patch:
  - old script: time=4m13s size=56.0G
  - new script: time=14s   size=6.1G
* Run a Jenkins build that rebuilds one package, and doesn't
  clean/rebuild the ISO. Make sure "archive-misc" works as expected.

Change-Id: Ic8f8931c4143bc355db1ccbad56ed772c0f3081e
Signed-off-by: Davlet Panech <davlet.panech@windriver.com>
2025-07-29 15:39:38 -04:00

176 lines
5.9 KiB
Bash
Executable File

#!/bin/bash
#
# Copyright (c) 2022 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# Archive evrything except:
# - symlinks that point to $BUILD_OUTPUT_HOME
# - localdisk/deploy/ which is archive separately by archive-iso.sh
# - large temp dirs left behind by ostree
#
set -e
THIS_DIR="$(readlink -f "$(dirname "$0")")"
source "$THIS_DIR"/lib/job_utils.sh
source "$THIS_DIR"/lib/publish_utils.sh
load_build_env
#VERBOSE_ARG="--verbose"
print_regfile_name_if_exists() {
if [[ -f "$1" ]] ; then
echo "$1"
fi
}
find_old_archive_dirs() {
find "$BUILD_OUTPUT_ROOT" -mindepth 1 -maxdepth 1 -type d \! -name "$TIMESTAMP" \
-regextype posix-extended -regex '.*/[0-9]{4,}[^/]*$'
}
find_old_checksum_files__mirrors() {
local archive_dir package_dir
find_old_archive_dirs | while read archive_dir ; do
print_regfile_name_if_exists "$archive_dir/mirrors/$CHECKSUMS_FILENAME"
print_regfile_name_if_exists "$archive_dir/aptly/$CHECKSUMS_FILENAME"
done
check_pipe_status
}
find_old_checksum_files__aptly() {
find_old_checksum_files__mirrors
}
find_old_checksum_files__docker() {
local archive_dir
find_old_archive_dirs | while read archive_dir ; do
print_regfile_name_if_exists "$archive_dir/docker/$CHECKSUMS_FILENAME"
done
check_pipe_status
}
# Usage: do_archive_dir DIR_ID [EXTRA_CHECKSUMS_FILE...]
#
# DIR_ID is "mirrors" "docker" or "aptly"
#
# Example:
#
# # archive mirrors/
# do_archive_dir "mirrors"
#
# # archive aptly/ , but also consider files archived under "mirrors" by the
# # the previous line for hardlinking
# do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/StxChecksums"
#
do_archive_dir() {
local id="$1" ; shift || :
local dir="$id"
local spec
local spec_id spec_metod
notice "archiving $id"
# ARCHIVE_BIG_DIRS contains a space-separated list of "method"
# or "dir:method" pairs, eg:
# "top-symlink aptly:shecksum-hardlink",
spec_method="checksum-hardlink"
for spec in $ARCHIVE_BIG_DIRS ; do
if [[ "$spec" =~ : ]] ; then
spec_id="${spec%%:*}"
if [[ "$spec_id" == "$id" ]] ; then
spec_method="${spec#*:}"
fi
continue
fi
spec_method="$spec"
done
info "dir=$dir method=$spec_method"
case "$spec_method" in
top-symlink)
if [[ -e "$BUILD_HOME/$dir" ]] ; then
if [[ -e "$BUILD_OUTPUT_HOME/$dir" && -d "$BUILD_OUTPUT_HOME/$dir" ]] ; then
safe_rm $DRY_RUN_ARG "$BUILD_OUTPUT_HOME/$dir"
fi
maybe_run ln -sfn "$BUILD_HOME/$dir" "$BUILD_OUTPUT_HOME/$dir"
fi
;;
checksum-hardlink|checksum-copy)
if [[ -e "$BUILD_HOME/$dir" ]] ; then
if [[ -e "$BUILD_OUTPUT_HOME/$dir" ]] ; then
safe_rm "$BUILD_OUTPUT_HOME/$dir"
fi
tmp_dir="$BUILD_HOME/tmp/archive-misc"
rm -rf "$tmp_dir/$id"
mkdir -p "$tmp_dir/$id"
cp -a "$THIS_DIR/helpers/archive-dir.py" "$tmp_dir/"
local archive_args=()
if [[ "$spec_method" == "checksum-hardlink" ]] ; then
local old_checksums_file_list="$tmp_dir/$id/old_checksums_file.list"
local find_func=find_old_checksum_files__$id
$find_func >"$old_checksums_file_list"
archive_args+=("--checksum-hardlink")
archive_args+=("--old-index-files-from=$old_checksums_file_list")
local extra_checksums_file
for extra_checksums_file in "$@" ; do
print_regfile_name_if_exists "$extra_checksums_file"
done >>"$old_checksums_file_list"
fi
if $SHELL_XTRACE ; then
archive_args+=("--xtrace")
fi
#local egid
#egid=$(id -g)
#archive_args+=(--owner "$EUID" --group "$egid")
local src_dir="$BUILD_HOME/$dir"
local dst_dir="$BUILD_OUTPUT_HOME/$dir"
maybe_run mkdir -p "$dst_dir"
safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.py" \
"${archive_args[@]}" \
-j ${PARALLEL_CMD_JOBS:-1} \
--output-checksums="$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \
"$src_dir" \
"$dst_dir" \
"$tmp_dir/$id"
fi
;;
*)
die "ARCHIVE_BIG_DIRS: invalid copy method \"$spec_method\": expecting \"top_symlink\", \"checksum-hardlink\" or \"checksum-copy\""
;;
esac
}
mkdir -p "$BUILD_OUTPUT_HOME"
# Straight copy the other files
notice "archiving misc files"
exclude_args=()
exclude_args+=(--exclude "/localdisk/designer/**") # symlink inside
exclude_args+=(--exclude "/aptly") # see below
exclude_args+=(--exclude "/mirrors") # see below
exclude_args+=(--exclude "/docker") # see below
exclude_args+=(--exclude "/workspace") # symlink
exclude_args+=(--exclude "/repo") # symlink
exclude_args+=(--exclude "/localdisk/workdir/**") # ostree temp files
exclude_args+=(--exclude "/localdisk/sub_workdir/workdir/**") # ostree temp files
exclude_args+=(--exclude "/localdisk/deploy/**") # archived by archive-iso.sh
exclude_args+=(--exclude "/tmp/*") # some of the files here are quite large, exclude
safe_copy_dir $DRY_RUN_ARG $VERBOSE_ARG \
"${exclude_args[@]}" \
"$BUILD_HOME/" "$BUILD_OUTPUT_HOME/"
# Link or copy big directories
do_archive_dir "mirrors"
do_archive_dir "aptly" "$BUILD_OUTPUT_HOME/mirrors/$CHECKSUMS_FILENAME"
do_archive_dir "docker"