archive-dir.sh: rewrite in Python

Old script was slow and didn't preserve hardlinks within the source set.

This script doesn't link files that are identical within the source set,
ie same checksum & attributes, but different inode. It can only link
such files to similar files from older builds. This deficiency will be
addressed in a separate commit.

TESTS
===================
* Manually test various input directories, including:
  - a directory that contains each type of file (regular, devices,
    sockets, symlinks, etc)
  - old index files with spaces in file names
* Given a build with a dozen or so historical builds,
  copied the "aptly" directory and compared timing and destination
  directory size before/after this patch:
  - old script: time=4m13s size=56.0G
  - new script: time=14s   size=6.1G
* Run a Jenkins build that rebuilds one package, and doesn't
  clean/rebuild the ISO. Make sure "archive-misc" works as expected.

Change-Id: Ic8f8931c4143bc355db1ccbad56ed772c0f3081e
Signed-off-by: Davlet Panech <davlet.panech@windriver.com>
This commit is contained in:
Davlet Panech
2025-07-25 16:30:58 -04:00
parent 47f01f5952
commit 044b6c050b
5 changed files with 834 additions and 523 deletions

View File

@@ -1,8 +1,8 @@
FROM debian:11
FROM debian:12
RUN apt-get update -y && \
apt-get upgrade -y && \
apt-get install -y bsdextrautils parallel && \
apt-get install -y bsdextrautils parallel python3 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

View File

@@ -109,13 +109,14 @@ do_archive_dir() {
tmp_dir="$BUILD_HOME/tmp/archive-misc"
rm -rf "$tmp_dir/$id"
mkdir -p "$tmp_dir/$id"
cp -a "$THIS_DIR/helpers/archive-dir.sh" "$tmp_dir/"
cp -a "$THIS_DIR/helpers/archive-dir.py" "$tmp_dir/"
local archive_args=()
if [[ "$spec_method" == "checksum-hardlink" ]] ; then
local old_checksums_file_list="$tmp_dir/$id/old_checksums_file.list"
local find_func=find_old_checksum_files__$id
$find_func >"$old_checksums_file_list"
archive_args+=("--checksum-hardlink" "$old_checksums_file_list")
archive_args+=("--checksum-hardlink")
archive_args+=("--old-index-files-from=$old_checksums_file_list")
local extra_checksums_file
for extra_checksums_file in "$@" ; do
print_regfile_name_if_exists "$extra_checksums_file"
@@ -132,10 +133,10 @@ do_archive_dir() {
local src_dir="$BUILD_HOME/$dir"
local dst_dir="$BUILD_OUTPUT_HOME/$dir"
maybe_run mkdir -p "$dst_dir"
safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.sh" \
safe_docker_run $DRY_RUN_ARG --writeable-archive-root --rm "$COREUTILS_DOCKER_IMG" "$tmp_dir/archive-dir.py" \
"${archive_args[@]}" \
-j ${PARALLEL_CMD_JOBS:-1} \
--output-checksums "$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \
--output-checksums="$BUILD_OUTPUT_HOME/$dir/$CHECKSUMS_FILENAME" \
"$src_dir" \
"$dst_dir" \
"$tmp_dir/$id"

826
scripts/helpers/archive-dir.py Executable file
View File

@@ -0,0 +1,826 @@
#!/usr/bin/env python3
import sys
assert sys.version_info >= (3, 9), "Python >= 3.9 is required"
HELP="""\
Usage: archive-dir.py [<OPTIONS>...] <SRC_DIR> <DST_DIR> <TMP_DIR>
[<OLD_INDEX_FILES>...]
Archive SRC_DIR in DST_DIR, using TMP_DIR for temporary files.
Create the index file, DST_DIR/StxChecksums.
With --checksum-hardlink, attempt to link identical files form older builds
instead of copying them.
-v,--verbose be verbose
-j,--jobs=N perform various operations in parallel (default: 1)
--owner=OWNER set destination files' owner, requires requires root
privileges.
--group=GROUP set desintation files' group as specified; requires root
privileges, or current user must be a member of GROUP
--checksum-hardlink
Hardlink destination files if possible. You must provide
one or more index files (StxChecksums) generated by older
builds. We will use the files with matching properties &
checksums to create hard links in DST_DIR.
--old-index-files-from=OLD_INDEX_LIST_FILE
Read additional index file names from INDEX_LIST_FILE
--reflink Create light-weight (COW) file copies if possible. This
only applies when copying (ie when no link candidates
found)
--skip-existing Skip files that already exist at destination. We still need
to calculate thir checksums in order to create the index,
but we will skip the copy.
--keep-temp-files
Normally we delete temporary files upon successful
completion, this option will keep them.
"""
# FIXME: this doesn't link files that are identical within the source set,
# ie same checksum & attributes, but different inode. It can only link
# such files to similar files from older builds.
import argparse
from collections.abc import Iterable
from collections.abc import Callable
from dataclasses import dataclass
import grp
import hashlib
import itertools
from multiprocessing import Pool
import os
from pathlib import Path
import pwd
import re
import shutil
import shlex
import stat
import subprocess
from typing import TextIO
JOBS = 1
CHECKSUM_READ_SIZE = 4 * 1024 * 1024 # 4 MiB
COPY_REFLINK = False
OLD_INDEX_FILES = []
SKIP_EXISTING = False
SRC_DIR = None
DST_DIR = None
TMP_DIR = None
CHANGE_UID = None
CHANGE_GID = None
VERBOSITY = 0
CURRENT_GID_LIST = []
OUTPUT_INDEX_FILE = None
KEEP_TEMP_FILES = False
def log_error(msg:str)->None:
print('ERROR: %s' % msg, file=sys.stderr)
def log_warn(msg:str)->None:
print('WARNING: %s' % msg, file=sys.stderr)
def log_info(msg:str)->None:
print('%s' % msg, file=sys.stderr)
def log_debug(msg:str)->None:
if VERBOSITY > 0:
print('%s' % msg, file=sys.stderr)
def log_shell_cmd(cmd:str)->None:
if VERBOSITY > 0:
print('%% %s' % cmd, file=sys.stderr)
# Apply func to items returned by an iterator in parallel.
# Returns an iterator with the results of func, in unpredictable
# order.
def map_p(func:Callable, it:Iterable)->Iterable:
pool = Pool(JOBS)
try:
for x in pool.imap_unordered(func, it):
yield x
pool.close()
pool.join()
except:
pool.terminate()
pool.join()
raise
pass
# Remove a file if it exists. Raise an exception on directories.
def remove_file(filename:str)->None:
try:
os.unlink(filename)
except FileNotFoundError:
pass
# Sort a file, ie replace it with a sorted version
def sort_file_inplace(filename:str, tmp_filename:str)->None:
cmd = [ 'sort', '--parallel=%d' % JOBS, '-o', tmp_filename, filename ]
log_shell_cmd(shlex.join(cmd))
subprocess.run(cmd, check=True)
log_debug('rename(%s,%s)' % (tmp_filename, filename))
os.unlink(filename)
os.rename(tmp_filename, filename)
# Combine old index files into one and sort it by checksum
# Output saved to TMP_DIR/old_index.list
def combine_old_index_files():
if OLD_INDEX_FILES:
log_info('Combining old index files into one')
# Use 'awk' to add StxChecksums' base directory to each relative filename in it,
# for each input file, otherwise we won't be able to find the referenced file
# later when we read these entries.
#
# Pipe awk's output to sort
#
# ie: ( awk [...] StxChecksums_1 ; awk [...] StxChecksums_2 ; ... ) | sort [...]
# Start the sort process, reading from STDIN
combined_index_file = os.path.join(TMP_DIR, 'old_index.list')
sort_cmd = [ 'sort', '--parallel=%s' % JOBS, '--output=%s' % combined_index_file ]
log_shell_cmd(shlex.join(sort_cmd))
sort_proc = subprocess.Popen(sort_cmd, stdin=subprocess.PIPE)
# For each input file, execute AWK with its STDOUT set to sort's STDIN
try:
dst_dir_realpath = os.path.realpath(DST_DIR)
awk_expr = '{ if (match($0, /^[[:space:]]*[^[:space:]]+[[:space:]]+/) >= 0) print substr($0, 1, RLENGTH) DIR substr($0, RLENGTH+1) }'
for old_index_file in OLD_INDEX_FILES:
try:
# Skip StxChecksums file that we are (re-)generating now
base_dir = os.path.realpath(os.path.dirname(old_index_file))
if base_dir == dst_dir_realpath and os.path.basename(old_index_file) == 'StxChecksums':
log_warn('Ignoring output index file %s' % old_index_file)
continue
# Input file may get deleted by job cleanup scripts from underneath us
# Open the file for reading and pass the open file descriptor to AWK
with open(old_index_file) as old_index_fh:
os.set_inheritable(old_index_fh.fileno(), True)
log_debug('fd %d = %s' % (old_index_fh.fileno(), old_index_file))
awk_cmd = [ 'awk', '-v', 'DIR=%s/' % base_dir, awk_expr, '/dev/fd/%d' % old_index_fh.fileno() ]
log_shell_cmd(shlex.join(awk_cmd))
subprocess.run(awk_cmd, stdout=sort_proc.stdin, check=True, close_fds=False)
except OSError as e:
# Ignore errors (typically ENOENT) -- fall back to copy elsewhere
subprocess.warn('Failed to process %s: %s' % old_index_file, str(e))
continue
finally:
sort_proc.stdin.close()
sort_proc.wait()
if sort_proc.returncode != 0:
raise subprocess.CalledProcessError(returncode=sort_proc.returncode, cmd=sort_cmd)
# Format a line of StxChecksums file
def format_index_line(rel_path:str, orig_path:str, checksum:str, st:os.stat_result)->str:
return '%s %s %d %d %d %d %s' % (checksum, rel_path, st.st_size, st.st_mtime, st.st_dev, st.st_ino, orig_path)
# File information for intermediate file lists
@dataclass
class FileInfo:
dev:int
ino:int
uid:int
gid:int
mode:int
size:int
mtime:float
checksum:str
rel_path:str
# Create a FileInfo object from a stat record
def stat_to_file_info(st:os.stat_result, checksum:str, rel_path:str)->FileInfo:
return FileInfo(st.st_dev, st.st_ino, st.st_uid, st.st_gid, st.st_mode, st.st_size, st.st_mtime, checksum, rel_path)
# Format a FileInfo record as a line of text
# DEV INO UID GID MODE SIZE MTIME CHECKSUM REL_PATH
def format_file_info(fi:FileInfo)->str:
return '%d %d %d %d %d %d %f %s %s' % (fi.dev, fi.ino, fi.uid, fi.gid, fi.mode, fi.size, fi.mtime, fi.checksum, fi.rel_path)
# Parse a line of text into a FileInfo object
# DEV INO UID GID MODE SIZE MTIME CHECKSUM REL_PATH
RE_FILE_INFO = re.compile(r'^(\d+) (\d+) (\d+) (\d+) (\d+) (\d+) (\S+) (-|[0-9a-f]+) (.+)$', re.ASCII)
def parse_file_info(line:str)->FileInfo:
match = RE_FILE_INFO.match(line)
if match:
return FileInfo(
int(match.group(1)), # dev
int(match.group(2)), # ino
int(match.group(3)), # uid
int(match.group(4)), # gid
int(match.group(5)), # mode
int(match.group(6)), # size
float(match.group(7)), # mtime
match.group(8), # checksum
match.group(9), # rel_path
)
return None
# Read a list of FileInfo objects from a file
def read_file_info_lines(filename:str)->Iterable[FileInfo]:
with open(filename) as fh:
for line in fh:
fi = parse_file_info(line.rstrip('\n'))
if fi:
yield fi
#
# Find a hardlink candidate among the index (StxChecksums) files
# generated by older builds.
# Returns an iterator of tuples (old_path, stat_result), or None.
#
RE_OLD_FILE_INFO_LIST = [
# Faster, but won't match filenames with spaces in them
re.compile(r'^([0-9a-f]+) (\S+) (\d+) (\d+) (\d+) (\d+) (.+)$', re.ASCII),
# Slower (because of .+ in the middle)
re.compile(r'^([0-9a-f]+) (.+) (\d+) (\d+) (\d+) (\d+) (.+)$', re.ASCII)
]
def find_old_files(checksum:str)->Iterable[tuple[str, os.stat_result]]:
# If there are no index files => no combined index either
if OLD_INDEX_FILES:
cmd = [ 'look', '%s ' % checksum, os.path.join(TMP_DIR, 'old_index.list') ]
log_shell_cmd(shlex.join(cmd))
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, encoding='utf8')
try:
for line in p.stdout:
line = line.rstrip('\n')
re_match_found = False
for regex in RE_OLD_FILE_INFO_LIST:
match = regex.match(line)
if match:
re_match_found = True
full_path = match.group(2)
size = int(match.group(3))
mtime = int(match.group(4))
try:
st = os.stat(full_path, follow_symlinks=False)
# NOTE: index files store time stamps as integer's (ie truncated)
if st.st_size == size and int(st.st_mtime) == mtime:
yield (full_path, st)
else:
log_debug('ignoring old index entry because its metadata doesn\'t match reality [%s] size=%d:%d mtime=%d:%d' % (line, size, st.st_size, mtime, int(st.st_mtime)))
except FileNotFoundError:
log_debug('ignoring old index entry because the referenced file doesn\'t exist: %s' % full_path)
except OSError as e:
log_warn('ignoring old index entry: %s: %s' % (full_path, str(e)))
if not re_match_found:
log_warn('Failed to parse (old) index line [%s]' % line)
finally:
p.stdout.close()
p.wait()
#
# Search SRC_DIR and save the FileInfo entries to 3 files:
# dirs.list -- directores
# files.list -- non-directories with unique dev/ino
# links.list -- duplicate dev/inos
#
# All files will have the checksum field set to "-"; we will calculate
# the checksums separately for files.list.
#
# Returns a tuple with total counts.
#
def find_files()->tuple[int,int,int]:
log_info("searching for files")
dirs_file = os.path.join(TMP_DIR, 'dirs.list')
dirs_fh = None
dirs_count = 0
files_file = os.path.join(TMP_DIR, 'files.list')
files_fh = None
files_count = 0
links_file = os.path.join(TMP_DIR, 'links.list')
links_fh = None
links_count = 0
try:
log_debug('creating %s' % dirs_file)
dirs_fh = open(dirs_file, 'w')
log_debug('creating %s' % files_file)
files_fh = open(files_file, 'w')
log_debug('creating %s' % links_file)
links_fh = open(links_file, 'w')
dev_map = {}
def walk_error(err:Exception)->None:
raise err
dirs_count = 0
files_count = 0
links_count = 0
log_debug(' %s/' % '.')
st = os.stat(SRC_DIR, follow_symlinks=False)
print('%s' % format_file_info(stat_to_file_info(st, '-', '.')), file=dirs_fh)
dirs_count += 1
for (dirpath, dirnames, filenames) in os.walk(SRC_DIR, onerror=walk_error):
rel_dirpath = dirpath[len(SRC_DIR)+1:]
extra_files = []
# directories
for dirname in dirnames:
full_path = os.path.join(dirpath, dirname)
st = os.stat(full_path, follow_symlinks=False)
# os.walk() returns directory symlinks as "directories" here.
# Treat them as any other non-directory file below
if stat.S_ISDIR(st.st_mode):
rel_path = os.path.join(rel_dirpath, dirname)
log_debug(' %s/' % rel_path)
print('%s' % format_file_info(stat_to_file_info(st, '-', rel_path)), file=dirs_fh)
dirs_count += 1
else:
extra_files.append(dirname)
# files
for filename in itertools.chain.from_iterable([filenames, extra_files]):
rel_path = os.path.join(rel_dirpath, filename)
full_path = os.path.join(dirpath, filename)
log_debug (' %s' % rel_path)
st = os.stat(full_path, follow_symlinks=False)
ino_map = dev_map.get(st.st_dev)
if ino_map is None:
ino_map = {}
dev_map[st.st_dev] = ino_map
if st.st_ino not in ino_map:
ino_map[st.st_ino] = None
fh = files_fh
files_count += 1
else:
fh = links_fh
links_count += 1
print('%s' % format_file_info(stat_to_file_info(st, '-', rel_path)), file=fh)
finally:
for fh in (links_fh, files_fh, dirs_fh):
if fh is not None:
fh.close()
# Sort files.list because we need to look up duplicate devno/ino entries
# there for creating links
sort_file_inplace(files_file, '%s.tmp' % files_file)
log_info ('found dirs=%d files=%d links=%d' % (dirs_count, files_count, links_count))
return (dirs_count, files_count, links_count)
# Get the SHA256 of a file
def get_sha256(path:str)->str:
with open(path, "rb") as f:
file_hash = hashlib.sha256()
while chunk := f.read(CHECKSUM_READ_SIZE):
file_hash.update(chunk)
return file_hash.hexdigest()
# Calculate and add the checksum given a FileInfo and return
# the updated FileInfo. Make no changes for non-regfiles.
def add_one_checksum(fi:FileInfo)->FileInfo:
if stat.S_ISREG(fi.mode):
src_path = os.path.join(SRC_DIR, fi.rel_path)
log_debug('sha256(%s)' % src_path)
fi.checksum = get_sha256(src_path)
return fi
#
# Add checksums and sort files.list
#
def calc_checksums(files_count:int)->None:
log_info("calculating checksums, count=%d" % files_count)
list_file = os.path.join(TMP_DIR, 'files.list')
tmp_list_file = os.path.join(TMP_DIR, 'files.list.tmp')
log_debug('creating sorted %s' % tmp_list_file)
with open(tmp_list_file, 'w') as fh:
fi_iter = read_file_info_lines(list_file)
for fi in map_p(add_one_checksum, fi_iter):
print(format_file_info(fi), file=fh)
cmd = [ 'sort', '--parallel=%d' % JOBS, '-o', list_file, tmp_list_file ]
log_shell_cmd(shlex.join(cmd))
subprocess.run(cmd, check=True)
os.unlink(tmp_list_file)
#
# Create directores at destination
#
def create_dirs(dirs_count:int)->None:
log_info("creating directories, count=%d" % dirs_count)
for fi in read_file_info_lines(os.path.join(TMP_DIR, 'dirs.list')):
if fi.rel_path == '.':
path = DST_DIR
else:
path = os.path.join(DST_DIR, fi.rel_path)
dst_exists = False
try:
st = os.stat(path)
if stat.S_ISDIR(st.st_mode):
dst_exists = True
else:
remove_file(path)
except FileNotFoundError:
pass
if not dst_exists:
log_debug('mkdir(%s)' % path)
os.mkdir(path)
# If we are not root, set directory permissions to be
# writable by owner, because we will be creating files
# there. This will fail if destination directory is not
# already owned by us (to be expected).
if os.geteuid() != 0:
log_debug('chmod(%s, 0%o)' % (path, 0o700))
# Don't set follow_symlinks because this function
# is never called for symlinks
os.chmod(path, 0o700)
# Copy a file and its attributes, but change UID/GID as specified
def do_copy(src_path:str, dst_path:str, new_uid:int, new_gid:int)->None:
#log_debug("copy(%s, %s)" % (src_path, dst_path))
cmd = [ 'cp', '-a' ]
if COPY_REFLINK:
cmd.append('--reflink')
cmd.append('--no-dereference')
cmd.append('--')
cmd.append(src_path)
cmd.append(dst_path)
log_shell_cmd(shlex.join(cmd))
subprocess.run(cmd, check=True)
# Doesn't support reflinks, see https://github.com/python/cpython/issues/81338
#shutil.copy2(src_path, dst_path, follow_symlinks=False)
st = os.stat(dst_path, follow_symlinks=False)
if new_gid != st.st_gid or new_uid != st.st_uid:
log_debug('chown(%s, %d, %d)' % (dst_path, new_uid, new_gid))
os.chown(dst_path, new_uid, new_gid)
st = os.stat(dst_path, follow_symlinks=False)
return st
#
# Copy or link a regfile:
# If there's an older file with the same checksum, link it
# Otherwise copy it
# If linking fails, also copy it
#
# Return tuple(REL_PATH, FULL_PATH, CHECKSUM, DST_STAT_RESULT, LINKED, COPIED, SKIPPED)
#
def copy_one_file(fi:FileInfo)->tuple:
dst_path = os.path.join(DST_DIR, fi.rel_path)
src_path = os.path.join(SRC_DIR, fi.rel_path)
# Work out target file's UID/GID
if CHANGE_GID is not None:
new_gid = CHANGE_GID
else:
new_gid = fi.gid
if os.geteuid() != 0 and new_gid not in CURRENT_GID_LIST:
new_gid = os.getegid()
if CHANGE_UID is not None:
new_uid = CHANGE_UID
else:
new_uid = fi.uid
if os.geteuid() != 0:
new_uid = os.geteuid()
# Skip existing files
if SKIP_EXISTING:
try:
st = os.stat(dst_path, follow_symlinks=False)
if st.st_uid == new_uid and \
st.st_gid == new_gid and \
st.st_size == fi.size and \
st.st_mtime == fi.mtime and \
st.st_mode == fi.mode:
log_debug('skipping existing %s' % dst_path)
# (REL_PATH, FULL_PATH, CHECKSUM, STAT_RES, LINKED, COPIED, SKIPPED)
return (fi.rel_path, dst_path, fi.checksum, st, 0, 0, 1)
except FileNotFoundError:
pass
# Delete destination file if it exists
remove_file(dst_path)
# Regular file: try to link it to a file from an older build
if stat.S_ISREG(fi.mode) and fi.checksum != '-':
# Look up an identical file among the older builds
for (old_path, old_st) in find_old_files(fi.checksum):
try:
log_debug('found link candidate by checksum: %s' % old_path)
# Only link old files whose attributes match the source file
# except mtime
if old_st.st_uid == new_uid and \
old_st.st_gid == new_gid and \
old_st.st_size == fi.size and \
old_st.st_mode == fi.mode:
log_debug('link(%s,%s)' % (old_path, dst_path))
os.link(old_path, dst_path)
dst_stat = os.stat(dst_path, follow_symlinks=False)
# (REL_PATH, FULL_PATH, CHECKSUM, STAT_RES, LINKED, COPIED, SKIPPED)
return (fi.rel_path, old_path, fi.checksum, dst_stat, 1, 0, 0)
break
except OSError as e:
log_warn('link(old_path,dst_path): %s' % str(e))
# Checksum not found, or link failed: copy
dst_stat = do_copy(src_path, dst_path, new_uid, new_gid)
# (REL_PATH, FULL_PATH, CHECKSUM, STAT_RES, LINKED, COPIED, SKIPPED)
return (fi.rel_path, dst_path, fi.checksum, dst_stat, 0, 1, 0)
#
# Copy files to DST_DIR
#
# Returns tuple (total_linked, total_copied, total_skipped)
#
def copy_files(files_count:int)->tuple[int,int]:
log_info("copying files, count=%d" % files_count)
total_linked = 0
total_copied = 0
total_skipped = 0
with open(os.path.join(TMP_DIR, 'files.index'), 'w') as fh:
fi_iter = read_file_info_lines(os.path.join(TMP_DIR, 'files.list'))
for (rel_path, full_path, checksum, st, linked, copied, skipped) in map_p(copy_one_file, fi_iter):
total_linked += linked
total_copied += copied
total_skipped += skipped
if stat.S_ISREG(st.st_mode):
index_line = format_index_line(rel_path, full_path, checksum, st)
print('%s' % index_line, file = fh)
return (total_linked, total_copied, total_skipped)
#
# Re-create a hardlink at destination, ie create a file
# as a link to a previously copied file, because it was
# linked in SRC_DIR.
#
# Fall back to copy if link fails.
#
# Return tuple(REL_PATH, FULL_PATH, CHECKSUM, DST_STAT_RESULT, LINKED, COPIED, SKIPPED)
#
def copy_one_link(fi:FileInfo)->tuple:
dst_path = os.path.join(DST_DIR, fi.rel_path)
src_path = os.path.join(SRC_DIR, fi.rel_path)
# Delete destination file if it exists
remove_file(dst_path)
# Try to link it to a file we previously installed in copy_files()
# Find the previously-installed source file in files.list, by dev/ino
try:
cmd = [ 'look', '%d %d ' % (fi.dev, fi.ino), os.path.join(TMP_DIR, 'files.list') ]
log_shell_cmd(shlex.join(cmd))
cmd_res = subprocess.run(cmd, check=False, encoding='utf8', stdout=subprocess.PIPE).stdout
old_fi = parse_file_info(cmd_res)
if old_fi:
orig_path = os.path.join(DST_DIR, old_fi.rel_path)
log_debug('link(%s,%s)' % (orig_path, dst_path))
os.link(orig_path, dst_path)
st = os.stat(dst_path, follow_symlinks=False)
# (REL_PATH, FULL_PATH, CHECKSUM, STAT_RES, LINKED, COPIED, SKIPPED)
return (fi.rel_path, orig_path, old_fi.checksum, st, 1, 0, 0)
except OSError as e:
log_warn('failed to link %s: %s' % (dst_path, str(e)))
# Fall back to copy
return copy_one_file(fi)
#
# Re-create or copy hardlinks at destination
#
# Returns tuple (total_linked, total_copied, total_skipped)
#
def copy_links(links_count:int)->tuple[int,int]:
log_info("copying links, count=%d" % links_count)
total_linked = 0
total_copied = 0
total_skipped = 0
with open(os.path.join(TMP_DIR, 'links.index'), 'w') as fh:
fi_iter = read_file_info_lines(os.path.join(TMP_DIR, 'links.list'))
for (rel_path, full_path, checksum, st, linked, copied, skipped) in map_p(copy_one_link, fi_iter):
total_linked += linked
total_copied += copied
total_skipped += skipped
if stat.S_ISREG(st.st_mode):
index_line = format_index_line(rel_path, full_path, checksum, st)
print('%s' % index_line, file = fh)
return (total_linked, total_copied, total_skipped)
#
# Set directory permissions & ownership to how they were at the source
#
def adjust_one_dir_perms(fi:FileInfo)->FileInfo:
path = os.path.join(DST_DIR, fi.rel_path)
perms = stat.S_IMODE(fi.mode)
log_debug("chmod(%s, 0%o)" % (path, perms))
# Don't set follow_symlinks because this function
# is never called for symlinks
os.chmod(path, perms)
# At this point target directory exists and is owned
# by the current UID:GID due to create_dirs().
st = os.stat(path, follow_symlinks=False)
if CHANGE_GID is not None:
new_gid = CHANGE_GID
else:
new_gid = fi.gid
if os.geteuid() != 0 and new_gid not in CURRENT_GID_LIST:
new_gid = os.getegid()
if CHANGE_UID is not None:
new_uid = CHANGE_UID
else:
new_uid = fi.uid
if os.geteuid() != 0:
new_uid = os.geteuid()
if new_uid != st.st_uid or new_gid != st.st_gid:
log_debug("chown(%s, %d, %d)" % (path, new_uid, new_gid))
os.chown(path, new_uid, new_gid, follow_symlinks=False)
# Set both access time and modification time to modification fime of the
# source directory
log_debug("utime(%s, (%f, %f))" % (path, fi.mtime, fi.mtime))
os.utime(path, (fi.mtime, fi.mtime))
return fi
#
# Adjust directory permissions & ownership at destination
#
def adjust_dir_perms(dirs_count:int)->None:
log_info("adjusting directory permissions, count=%d" % dirs_count)
fi_iter = read_file_info_lines(os.path.join(TMP_DIR, 'dirs.list'))
for fi in map_p(adjust_one_dir_perms, fi_iter):
pass
# Save or print "standard" index (StxChecksums) for regfiles and links
def save_index(files_count:int, links_count:int)->None:
files_index_file = os.path.join(TMP_DIR, 'files.index')
links_index_file = os.path.join(TMP_DIR, 'links.index')
full_index_file = os.path.join(DST_DIR, 'StxChecksums')
log_info('creating index, count=%d' % (files_count + links_count))
sort_cmd = [ 'sort', '--parallel=%d' % JOBS, '--output=%s' % full_index_file, files_index_file, links_index_file ]
log_shell_cmd(shlex.join(sort_cmd))
subprocess.run(sort_cmd, check=True)
# Delete temp files
def cleanup():
if not KEEP_TEMP_FILES:
tmp_files = [
'dirs.list',
'files.index',
'files.list',
'links.index',
'links.list',
'old_index.list',
]
for file in tmp_files:
remove_file(os.path.join(TMP_DIR, file))
# process command line
def init()->None:
def positive_integer(s:str)->int:
v = int(s)
if v < 1:
raise ValueError()
return v
def user_id(s:str)->int:
try:
uid = int(s)
except:
try:
uid = pwd.getpwnam(s).pw_uid
except:
raise ValueError()
if uid < 0:
raise ValueError
return uid
def group_id(s:str)->int:
try:
uid = int(s)
except:
try:
uid = grp.getgrnam(s).gr_gid
except:
raise ValueError()
if uid < 0:
raise ValueError
return uid
p = argparse.ArgumentParser()
p.add_argument('-j', '--jobs', type=positive_integer, default=1)
p.add_argument('--owner', type=user_id)
p.add_argument('--group', type=group_id)
p.add_argument('--checksum-hardlink', action='store_true', default=False)
p.add_argument('--old-index-files-from')
p.add_argument('--output-checksums')
p.add_argument('--skip-existing', action='store_true', default=False)
p.add_argument('-v', '--verbose', action='count', default=0, dest='verbosity')
p.add_argument('--reflink', action='store_true', default=False)
p.add_argument('--keep-temp-files', action='store_true', default=False)
p.add_argument('SRC_DIR')
p.add_argument('DST_DIR')
p.add_argument('TMP_DIR')
p.add_argument('old_index_files', nargs='*')
p.format_help = lambda: HELP
args = p.parse_args()
current_gid_list = [ os.getegid(), *os.getgroups() ]
if args.owner is not None:
if os.geteuid() != 0 and args.owner != os.geteuid():
log_error('--owner can only be changed by root')
sys.exit(1)
if args.group is not None:
if os.geteuid() != 0 and args.group not in current_gid_list:
log_error('--group can only be changed by root; or it must be a group you are a member of')
sys.exit(1)
existing_old_index_files = []
if args.checksum_hardlink:
old_index_files = []
old_index_files += args.old_index_files
if args.old_index_files_from:
with open(args.old_index_files_from) as fh:
for filename in fh:
filename = filename.rstrip()
old_index_files.append(filename)
# Ignore missing/non-readable files because they may disappear
# while this script is running
for filename in old_index_files:
try:
with open(filename) as ref_fh:
existing_old_index_files.append(filename)
except OSError as x:
log_warn('Ignoring index file %s: %s' % (filename, str(x)))
elif args.old_index_files:
log_warn('old index files are meaningless without --checksum-hardlink')
global JOBS, CHANGE_UID, CHANGE_GID, CURRENT_GID_LIST
global VERBOSITY, COPY_REFLINK, SRC_DIR, DST_DIR, TMP_DIR
global OLD_INDEX_FILES, OUTPUT_INDEX_FILE
global KEEP_TEMP_FILES, SKIP_EXISTING
JOBS = args.jobs
CHANGE_UID = args.owner
CHANGE_GID = args.group
CURRENT_GID_LIST = current_gid_list
VERBOSITY = args.verbosity
COPY_REFLINK = args.reflink
SRC_DIR = str(Path(args.SRC_DIR).absolute())
DST_DIR = str(Path(args.DST_DIR).absolute())
TMP_DIR = str(Path(args.TMP_DIR).absolute())
OLD_INDEX_FILES = existing_old_index_files
OUTPUT_INDEX_FILE = args.output_checksums
SKIP_EXISTING = args.skip_existing
KEEP_TEMP_FILES = args.keep_temp_files
init()
log_debug('SRC_DIR=%s' % SRC_DIR)
log_debug('DST_DIR=%s' % DST_DIR)
log_debug('TMP_DIR=%s' % TMP_DIR)
log_debug('JOBS=%d' % JOBS)
if CHANGE_UID:
log_debug('CHANGE_UID=%d' % CHANGE_UID)
if CHANGE_GID:
log_debug('CHANGE_GID=%d' % CHANGE_GID)
log_debug('OLD_INDEX_FILES=%s' % OLD_INDEX_FILES)
log_debug('KEEP_TEMP_FILES=%d' % KEEP_TEMP_FILES)
if not os.path.isdir(TMP_DIR):
os.mkdir(TMP_DIR)
(dirs_count, files_count, links_count) = find_files()
calc_checksums(files_count)
create_dirs(dirs_count)
combine_old_index_files() # DST_DIR must already exist
(linked1, copied1, skipped1) = copy_files(files_count)
(linked2, copied2, skipped2) = copy_links(links_count)
adjust_dir_perms(dirs_count)
save_index(files_count, links_count)
cleanup()
log_info('%s linked=%d copied=%d skipped=%d' % (DST_DIR, linked1+linked2, copied1+copied2, skipped1+skipped2))

View File

@@ -1,516 +0,0 @@
#!/bin/bash
PROGNAME="${BASH_SOURCE[0]##*/}"
SRC_DIR=
DST_DIR=
CHECKSUM_FILES_LIST_FILE=
DST_CHECKSUMS_FILE=
CHANGE_OWNER=
CHANGE_GROUP=
JOBS=1
XTRACE=0
usage() {
echo -n "\
Usage: $0 [OPTIONS...] SRC_DIR DST_DIR TMP_DIR
Archive SRC_DIR in DST_DIR, using TMP_DIR for temporary files.
-j,--jobs=N calculate checksums in parallel (default: 1)
--owner=OWNER set copied file's owner as specified
--group=GROUP set copied file's group as specified
--output-checksums=CK_FILE
save StxChecksums to this file; by default print it to
STDOUT
--checksum-hardlink=CK_LIST_FILE
Hardlink destination files if possible. CK_LIST_FILE
must contain a list of existing StxChecksums file names
from previously-archived directories, one per line.
We will use the files with matching properties & checksums
to create hard links in DST_DIR.
--xtrace Enable debug output
If executed by root, we will preserve owners/groups of the copied files,
unless they are overridden on the command line.
If this script is called by non-root, it will create all files with the
calling user's effective user & group ownership.
"
exit 0
}
cmdline_error() {
if [[ "$#" -gt 0 ]] ; then
echo "ERROR:" "$@" >&2;
fi
echo "Type \`$0 --help' for more info" >&2
exit 1
}
check_pipe_status() {
local -a pipestatus=(${PIPESTATUS[*]})
local -i i
for ((i=0; i<${#pipestatus[*]}; ++i)) ; do
[[ "${pipestatus[$i]}" -eq 0 ]] || return 1
done
return 0
}
# Process command line
temp=$(getopt -o h,j: --long help,jobs:,owner:,group:,output-checksums:,checksum-hardlink:,xtrace -n "$PROGNAME" -- "$@") || cmdline_error
eval set -- "$temp"
while [[ "$#" -gt 0 ]] ; do
case "$1" in
-h|--help)
usage
exit 0
;;
-j|--jobs)
JOBS="$2"
if [[ ! "$JOBS" =~ ^[0-9]{1,2}$ || "$JOBS" -le 0 || "$JOBS" -ge 99 ]] ; then
cmdline_error "$1 must be an integer [1.99]"
fi
shift 2
;;
--owner)
CHANGE_OWNER="$2"
shift 2
;;
--group)
CHANGE_GROUP="$2"
shift 2
;;
--checksum-hardlink)
CHECKSUM_FILES_LIST_FILE="$2"
shift 2
;;
--output-checksums)
DST_CHECKSUMS_FILE="$2"
shift 2
;;
--xtrace)
XTRACE=1
shift
;;
--)
shift
break
;;
*)
cmdline_error
;;
esac
done
[[ "$#" -ge 3 ]] || cmdline_error "not enough arguments"
[[ "$#" -le 3 ]] || cmdline_error "too many arguments"
SRC_DIR="$1"
DST_DIR="$2"
TMP_DIR="$3"
if [[ ! "$EGID" ]] ; then
EGID="$(id -g)" || exit 1
fi
if [[ $XTRACE -eq 1 ]] ; then
set -x
fi
# Make sure BSD look is installed
if ! look --help >/dev/null ; then
echo "This script requires \"look\" to be installed" >&2
exit 1
fi
# Check for GNU parallel
if parallel --help >/dev/null 2>&1 ; then
GNU_PARALLEL_EXISTS=1
else
GNU_PARALLEL_EXISTS=0
fi
set -e
#
# Combine checksum list files into one
#
if [[ "$CHECKSUM_FILES_LIST_FILE" ]] ; then
echo $'\n## Combining checksum lists into one' >&2
combined_checksums_file="$TMP_DIR/combined_checksums.list"
while read -r checksums_file ; do
# skip empty lines and comments
if echo "$checksums_file" | grep -E '^\s*(#.*)$' ; then
continue
fi
# skip missing files
[[ -f "$checksums_file" ]] || continue
# add file path to the second token (file name)
checksums_dir="$(dirname "$checksums_file")"
awk -v "DIR=$checksums_dir/" '{ if (match($0, /^[[:space:]]*[^[:space:]]+[[:space:]]+/) >= 0) print substr($0, 1, RLENGTH) DIR substr($0, RLENGTH+1) }' \
"$checksums_file"
done <"$CHECKSUM_FILES_LIST_FILE" | sort >"$combined_checksums_file"
check_pipe_status
fi
#
# Create source file lists
#
# Cretate a list file with each source file or dir + their stat properties
echo $'\n## Compiling file list: '"$SRC_DIR" >&2
full_list_file="$TMP_DIR/full.list"
( cd "$SRC_DIR" && find -printf 'type=%y owner=%U group=%G mode=%#m size=%s mtime=%T@ name=%p\n' ) \
| sed 's#name=[.]/#name=#' \
| sed 's#\(mtime=[0-9]\+\)[.][0-9]\+#\1#g' \
>"${full_list_file}"
check_pipe_status
# Create another list file that contains only regular files
regfile_list_file="$TMP_DIR/regfile.list"
\grep '^type=f' "$full_list_file" | sort -k 7 >"$regfile_list_file" || exit 1
# Create a list file that contains only directories
# Sort by the last field "name=..."
dir_list_file="$TMP_DIR/dir.list"
\grep '^type=d' "$full_list_file" | sort -k 7 >"$dir_list_file" || exit 1
# Create a list file that contains all other entries (non-dirs & non-files)
other_list_file="$TMP_DIR/other.list"
\grep '^type=[^df]' "$full_list_file" | sort -k 7 >"$other_list_file" || exit 1
#
# Usage: process_lines MESSAGE INPUT_FILE FUNC ARGS...
#
# Call shell function FUNC in parallel, similar to xargs.
# We will read lines from INPUT_FILE, then pass some subset of lines
# to FUNC many times in parallel, until all lines have been processed.
# Input lines will be appended as additional arguments to FUNC calls.
#
# FUNC and any global vars it references must be exported before
# calling process_lines().
#
# MESSAGE will be printed to STDERR before starting
#
process_lines() {
local message="$1" ; shift
local input_file="$1" ; shift
# how many input lines? bail out if 0
local line_count
line_count="$(cat "$input_file" | wc -l)" || exit 1
[[ "$line_count" -gt 0 ]] || return 0
# How many lines to process at a time. The more the better, but with too
# many some child jobs may starve -- cap it at 256
local lines_per_job
if [[ "$JOBS" -gt 1 ]] ; then
let lines_per_job="line_count / JOBS / 2"
if [[ "$lines_per_job" -eq 0 ]] ; then
lines_per_job=1
elif [[ "$lines_per_job" -gt 256 ]] ; then
lines_per_job=256
fi
else
lines_per_job=256
fi
echo "** $message [JOBS=$JOBS lines_per_job=$lines_per_job]" >&2
# Prefer GNU parallel because it can exit early
local -a cmd
if [[ $GNU_PARALLEL_EXISTS -eq 1 ]] ; then
cmd=(parallel --halt now,fail=1 -q -r -d '\n' -n $lines_per_job -P $JOBS "$@")
else
cmd=(xargs -r -d '\n' -n $lines_per_job -P $JOBS $SHELL -c '"$@"' unused_arg "$@")
fi
if ! "${cmd[@]}" <"$input_file" ; then
echo "ERROR: command failed (\"$message\")" >&2
return 1
fi
}
#
# create directories in sort order, ie create parents before
# children
#
echo $'\n## Creating directories: '"$DST_DIR" >&2
while read -r line ; do
[[ -n "$line" ]] || continue
name="${line#* name=}"
[[ -n "$name" ]] || continue
attr_line="${line% name=*}"
mode="$(echo "$attr_line" | sed -n -r 's#.*mode=([0-9]+).*#\1#p')"
install_args=()
if [[ "$CHANGE_OWNER" ]] ; then
install_args+=("--owner" "$CHANGE_OWNER")
elif [[ $EUID -eq 0 ]] ; then
owner="$(echo "$attr_line" | sed -n -r 's#.*owner=([0-9]+).*#\1#p')"
install_args+=("--owner" "$owner")
fi
if [[ "$CHANGE_GROUP" ]] ; then
install_args+=("--group" "$CHANGE_GROUP")
elif [[ $EUID -eq 0 ]] ; then
group="$(echo "$attr_line" | sed -n -r 's#.*group=([0-9]+).*#\1#p')"
install_args+=("--group" "$group")
fi
echo " MKDIR $name" >&2
if [[ -e "$DST_DIR/$name" && ! -d "$DST_DIR/$name" ]] ; then
\rm "$DST_DIR/$name" || exit 1
fi
install -d "${install_args[@]}" "$DST_DIR/$name"
done <"$dir_list_file" || exit 1
#
# Copy or hardlink regular files
#
echo $'\n## Copying regular files: '"$SRC_DIR" >&2
# helper function to process regular files
# global vars used:
# SRC_DIR
# DST_DIR
# CHANGE_OWNER
# CHANGE_GROUP
# EUID (always definedby bash)
# EGID
# TMP_DIR
# XTRACE
# combined_checksums_file
process_regfiles() {
if [[ $XTRACE -eq 1 ]] ; then
set -x
fi
# Temp file generated by this function. Its name must be unique to
# prevent interference from other jobs with -j N.
local matching_checksums_file
matching_checksums_file="$TMP_DIR/matching_checksums-$$.list"
local line attr_line
for line in "$@" ; do
# source file name relative to SRC_DIR
local name
name="${line#* name=}"
[[ "$name" ]] || continue
# all attributes leading up to name=
attr_line="${line% name=*}"
# source checksum
local checksum
#flock -s "$DST_DIR" echo " SHA256 $name" >&2
checksum="$(sha256sum "$SRC_DIR/$name" | awk '{print $1}')"
if [[ ! "$checksum" ]] ; then
flock -s "$DST_DIR" echo "$SRC_DIR/$name: failed to calculate checksum" >&2
return 1
fi
# source owner; or a user-provided override
local -a install_args=()
local owner
if [[ "$CHANGE_OWNER" ]] ; then
owner="$CHANGE_OWNER"
install_args+=("--owner" "$owner")
elif [[ $EUID -eq 0 ]] ; then
owner="$(echo "$attr_line" | sed -n -r 's#.* owner=([0-9]+).*#\1#p')"
install_args+=("--owner" "$owner")
else
owner=$EUID
fi
# source group; or a user-provided override
local group
if [[ "$CHANGE_GROUP" ]] ; then
group="$CHANGE_GROUP"
install_args+=("--group" "$group")
elif [[ $EGID -eq 0 ]] ; then
group="$(echo "$attr_line" | sed -n -r 's#.* group=([0-9]+).*#\1#p')"
install_args+=("--group" "$group")
else
group=$EGID
fi
# source file's mode/permissions
local mode
mode="$(echo "$attr_line" | sed -n -r 's#.* mode=([^[:space:]]+).*#\1#p')"
# Search for the checksum in an older StxChecksums file
if [[ "$combined_checksums_file" ]] ; then
if look "$checksum " "$combined_checksums_file" >"$matching_checksums_file" 2>/dev/null ; then
(
# As we read previosuly-archived files properties from StxChecksums,
# make sure they have not changed compared to the actual files on disk.
while read -r ref_checksum ref_name ref_size ref_mtime ref_dev ref_inode ref_path x_rest ; do
[[ -f "$ref_path" ]] || continue
# read on-disk file properties
local ref_stat
ref_stat=($(stat -c '%s %Y %u %g %#04a' "$ref_path" || true))
[[ "${#ref_stat[@]}" -eq 5 ]] || continue
# on-disk size does not match StxChecksums
local ref_ondisk_size
ref_ondisk_size="${ref_stat[0]}"
[[ "$ref_size" == "$ref_ondisk_size" ]] || continue
# on-disk mtime does not match StxChecksums
local ref_ondisk_mtime
ref_ondisk_mtime="${ref_stat[1]}"
[[ "${ref_mtime}" == "$ref_ondisk_mtime" ]] || continue
# on-disk owner does not match requested owner
local ref_ondisk_owner
ref_ondisk_owner="${ref_stat[2]}"
[[ "${owner}" == "$ref_ondisk_owner" ]] || continue
# on-disk group does not match requested group
local ref_ondisk_group
ref_ondisk_group="${ref_stat[3]}"
[[ "${group}" == "$ref_ondisk_group" ]] || continue
# on-disk mode does not match the mode of the source file
ref_ondisk_mode="${ref_stat[4]}"
[[ "${mode}" == "$ref_ondisk_mode" ]] || continue
# At this point checksum, size, mtime, mode, owner, group and checksums of the
# exsiting file match with the file we are trying to copy.
# Use that file to create a hardlink.
flock -s "$DST_DIR" echo " LINK $name (from $ref_name)" >&2
if ln -f "$ref_name" "${DST_DIR}/$name" ; then
flock -s "$DST_DIR" echo "$checksum $name $ref_size $ref_mtime $ref_dev $ref_inode $DST_DIR/$name"
exit 0
fi
done <"$matching_checksums_file"
# checksum not found in older archives
exit 1
) && continue || true
fi
fi
# No matching files found: really copy it
if [[ -e "$DST_DIR/$name" ]] ; then
\rm "$DST_DIR/$name" || exit 1
fi
# source file's size & mtime
local size mtime
size="$(echo "$attr_line" | sed -n -r 's#.* size=([^[:space:]]+).*#\1#p')"
mtime="$(echo "$attr_line" | sed -n -r 's#.* mtime=([^[:space:]]+).*#\1#p')"
# copy it to $DST_DIR
flock -s "$DST_DIR" echo " COPY $name" >&2
rm -f "$DST_DIR/$name" || exit 1
install --preserve-timestamps "${install_args[@]}" --mode="$mode" -T "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1
# check destination file properties
local dst_stat dst_size dst_dev dst_ino
dst_stat=($(stat -c '%s %d %i' "$DST_DIR/$name")) || exit 1
dst_size="${dst_stat[0]}"
dst_dev="${dst_stat[1]}"
dst_ino="${dst_stat[2]}"
# file changed while copying
if [[ "$dst_size" != "$size" ]] ; then
flock -s "$DST_DIR" echo "ERROR: $SRC_DIR/$name changed while copying!" >&2
exit 1
fi
# print out a line for StxChecksums using source file properties (preserved
# during copying), but with destination file's dev & ino.
flock -s "$DST_DIR" echo "$checksum $name $size $mtime $dst_dev $dst_ino $DST_DIR/$name"
done
rm -f "$matching_checksums_file"
}
# process files in parallel
(
if [[ "$DST_CHECKSUMS_FILE" ]] ; then
dst_checksums_fd=5
exec 5<>"$DST_CHECKSUMS_FILE" || exit 1
else
dst_checksums_fd=1
fi
export SRC_DIR \
DST_DIR \
CHANGE_OWNER \
CHANGE_GROUP \
EGID \
TMP_DIR \
XTRACE \
combined_checksums_file
export -f process_regfiles
message="processing regular files"
process_lines "$message" "$regfile_list_file" process_regfiles | sort >&$dst_checksums_fd
[[ "${PIPESTATUS[0]}" -eq 0 && "${PIPESTATUS[1]}" -eq 0 ]] || exit 1
) || exit 1
#
# copy special files
#
echo $'\n## Copying special files: '"$DST_DIR" >&2
# helper function for processing special files
# global vars used:
# SRC_DIR
# DST_DIR
# CHANGE_OWNER
# CHANGE_GROUP
# XTRACE
process_other() {
if [[ $XTRACE -eq 1 ]] ; then
set -x
fi
local line attr_line
for line in "$@" ; do
local name
name="${line#* name=}"
[[ -n "$name" ]] || continue
attr_line="${line% name=*}"
local type
type="$(echo "$attr_line" | sed 's#^type=\(.\) .*#\1#g')"
[[ -n "$type" ]] || continue
flock -s "$DST_DIR" echo " CREATE type=$type $name" >&2
if [[ -e "$DST_DIR/$name" ]] ; then
rm "$DST_DIR/$name" || exit 1
fi
cp -a --no-dereference "$SRC_DIR/$name" "$DST_DIR/$name" || exit 1
if [[ "$CHANGE_OWNER" || "$CHANGE_GROUP" ]] ; then
local chown_arg=
if [[ "$CHANGE_OWNER" ]] ; then
chown_arg="$CHANGE_OWNER"
fi
if [[ "$CHANGE_GROUP" ]] ; then
chown_arg+=":$CHANGE_GROUP"
fi
chown --no-dereference "$chown_arg" "$DST_DIR/$name" || exit 1
fi
done
}
# process them in parallel
(
export SRC_DIR \
DST_DIR \
CHANGE_OWNER \
CHANGE_GROUP \
XTRACE
export -f process_other
message="processing other files"
process_lines "$message" "$other_list_file" process_other || exit 1
) || exit 1

View File

@@ -35,7 +35,7 @@ export REPO_TRACE=0
# docker images
SAFE_RSYNC_DOCKER_IMG="servercontainers/rsync:3.1.3"
COREUTILS_DOCKER_IMG="starlingx/jenkins-pipelines-coreutils:20230529"
COREUTILS_DOCKER_IMG="starlingx/jenkins-pipelines-coreutils:20250709"
notice() {
( set +x ; print_log -i --notice "$@" ; )