added support for clean archive splitting

This commit is contained in:
Michael Stumpf (ifhcluster) 2019-09-02 14:32:18 +02:00
parent 50d68cd2dd
commit f580a9378e
2 changed files with 220 additions and 80 deletions

View File

@ -1,6 +1,6 @@
#!/bin/bash
display_help() {
(>&2 echo "Usage: $(basename "$0") [-chqv] [-o outfile] indir iseq")
(>&2 echo "Usage: $(basename "$0") [-chqv] [-s size] [-o outfile] indir iseq")
(>&2 echo "UCF tar packer")
(>&2 echo)
(>&2 echo " indir path to input directory")
@ -9,6 +9,7 @@ display_help() {
(>&2 echo " -h, --help display this help message")
(>&2 echo " -o, --outfile output file (default: snapshot_XXXX.ucf.tar)")
(>&2 echo " -q, --quicksum same as --checksum, but compares only first bytes")
(>&2 echo " -s, --split split archives so that they are smaller than given size (in GiB)")
(>&2 echo " -v, --verbose verbose output")
}
exit_script() {
@ -23,9 +24,11 @@ if [ $# -eq 0 ]; then
exit -1
fi
fout=""
maxsize=""
verbose=0
checksum=0
quicksum=0
split=0
POSITIONAL=()
while [[ $# -gt 0 ]]
do
@ -53,6 +56,12 @@ case $key in
quicksum=1
shift # past argument
;;
-s|--split)
split=1
maxsize=$(echo "$2*1024*1024*1024" | bc -l | awk -F "." '{print $1}') # Convert GiB to bytes
shift # past argument
shift # past value
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
@ -128,47 +137,108 @@ fi
# Create a full file list for the archive
flist=(${fparam} ${fgrid} ${fproc} ${fpart} ${fuvwp[@]})
# Now tar them and remove seqence number from file names while doing so
flagtar=""
# Construct flags of tar command
flagtar="--format=ustar --transform=flags=r;s|_$seqnum||"
if [ $verbose -eq 1 ]; then
flagtar="$flagtar --verbose"
fi
if [ $split -eq 1 ]; then
flagtar="$flagtar --blocking-factor=1"
fi
# Initialize variables needed for splitting
if [ $split -eq 1 ]; then
sarchivenum=0 # Current archive number (suffix to file name)
scounter=0 # Counter of current file
snumfilecur=0 # Current number of files to be archived
snumfiletot=${#flist[@]} # Total number of files to be archived
scursize=0 # Current size of archive in bytes
sfilesize=0 # Size of current file in archive
spredictsize=0 # Predicted size of archive if current file is added
fout_sbase=$fout # Original outfile name (number is appended)
flist_sbase=("${flist[@]}") # Original list of files (to be split)
flag_splitdone=0 # Flag to indicate if all files have been packed
unset flist
fi
# Start packing loop (only executed once, if archive is not to be split)
packloop=1
trap exit_script SIGINT SIGTERM
tar $flagtar --format ustar --transform="flags=r;s|_$seqnum||" --directory=${din} -cf ${fout} ${flist[@]}
tarexit=$?
# Set exit status accoring to tar
if [ $tarexit -ne 0 ]; then
(>&2 echo "tar failed with exit code $tarexit")
exit 254
fi
while [ $packloop -eq 1 ]; do
# Check if archive is to be split. If so, determine files to be packed in this iteration.
if [ $split -eq 1 ]; then
# Construct outfile name
fout="${fout_sbase}.${sarchivenum}"
((sarchivenum++))
# Cumulate size of archive and construct file list
snumfilecur=0
scursize=1024 # Initialize with size of trailing zero bloc
unset flist
while true; do
sfilesize=$(wc -c ${din}/${flist_sbase[${scounter}]} | awk '{print $1}') # raw size
sfilesize=$(((${sfilesize}+511)/512*512+512)) # adjust to 512 byte-blocks and add header
spredictsize=$((${scursize}+${sfilesize}))
if [ ${spredictsize} -lt ${maxsize} ]; then
flist[${snumfilecur}]=${flist_sbase[${scounter}]}
((snumfilecur++))
((scounter++))
scursize=${spredictsize}
elif [ ${snumfilecur} -eq 0 ]; then
(>&2 echo "Error: file larger than maximum archive size: ${flist_sbase[${scounter}]}")
exit 101
else
if [ $verbose -eq 1 ]; then
echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)"
fi
break
fi
if [ ${scounter} -ge ${snumfiletot} ]; then
if [ $verbose -eq 1 ]; then
echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)"
fi
flag_splitdone=1
break
fi
done
fi
# Create tar archive
tar $flagtar --directory=${din} -cf ${fout} ${flist[@]}
tarexit=$?
if [ $tarexit -ne 0 ]; then
(>&2 echo "tar failed with exit code $tarexit")
exit 254
fi
# Compare checksums (CNC32), if flag is set
flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g))
if [ $checksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying checksum: ${flist[$ii]}")
fi
crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }')
crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }')
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
elif [ $quicksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying partial checksum: ${flist[$ii]}")
fi
crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -)
crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -)
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
fi
# Continue looping?
if ([ $split -eq 1 ] && [ $flag_splitdone -eq 1 ]) || [ $split -eq 0 ]; then
packloop=0
fi
done
# Compare checksums (CNC32), if flag is set
#din="./archive/" #for testing
flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g))
if [ $checksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying checksum: ${flist[$ii]}")
fi
crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }')
crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }')
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
elif [ $quicksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying partial checksum: ${flist[$ii]}")
fi
crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -)
crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -)
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
fi
exit 0
exit 0

View File

@ -1,6 +1,6 @@
#!/bin/bash
display_help() {
(>&2 echo "Usage: $(basename "$0") [-chqv] [-o outfile] indir iseq")
(>&2 echo "Usage: $(basename "$0") [-chqv] [-s size] [-o outfile] indir iseq")
(>&2 echo "UCF tar scalar packer")
(>&2 echo)
(>&2 echo " indir path to input directory")
@ -9,6 +9,7 @@ display_help() {
(>&2 echo " -h, --help display this help message")
(>&2 echo " -o, --outfile output file (default: snapshot_XXXX.ucf.tar)")
(>&2 echo " -q, --quicksum same as --checksum, but compares only first bytes")
(>&2 echo " -s, --split split archives so that they are smaller than given size (in GiB)")
(>&2 echo " -v, --verbose verbose output")
}
exit_script() {
@ -23,9 +24,11 @@ if [ $# -eq 0 ]; then
exit -1
fi
fout=""
maxsize=""
verbose=0
checksum=0
quicksum=0
split=0
POSITIONAL=()
while [[ $# -gt 0 ]]
do
@ -53,6 +56,12 @@ case $key in
quicksum=1
shift # past argument
;;
-s|--split)
split=1
maxsize=$(echo "$2*1024*1024*1024" | bc -l | awk -F "." '{print $1}') # Convert GiB to bytes
shift # past argument
shift # past value
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
@ -129,47 +138,108 @@ fi
# Create a full file list for the archive
flist=(${fparam} ${fgrid} ${fproc} ${fpart} ${fscal[@]})
# Now tar them and remove seqence number from file names while doing so
flagtar=""
# Construct flags of tar command
flagtar="--format=ustar --transform=flags=r;s|_$seqnum||"
if [ $verbose -eq 1 ]; then
flagtar="$flagtar --verbose"
fi
if [ $split -eq 1 ]; then
flagtar="$flagtar --blocking-factor=1"
fi
# Initialize variables needed for splitting
if [ $split -eq 1 ]; then
sarchivenum=0 # Current archive number (suffix to file name)
scounter=0 # Counter of current file
snumfilecur=0 # Current number of files to be archived
snumfiletot=${#flist[@]} # Total number of files to be archived
scursize=0 # Current size of archive in bytes
sfilesize=0 # Size of current file in archive
spredictsize=0 # Predicted size of archive if current file is added
fout_sbase=$fout # Original outfile name (number is appended)
flist_sbase=("${flist[@]}") # Original list of files (to be split)
flag_splitdone=0 # Flag to indicate if all files have been packed
unset flist
fi
# Start packing loop (only executed once, if archive is not to be split)
packloop=1
trap exit_script SIGINT SIGTERM
tar $flagtar --format ustar --transform="flags=r;s|_$seqnum||" --directory=${din} -cf ${fout} ${flist[@]}
tarexit=$?
# Set exit status accoring to tar
if [ $tarexit -ne 0 ]; then
(>&2 echo "tar failed with exit code $tarexit")
exit 254
fi
while [ $packloop -eq 1 ]; do
# Check if archive is to be split. If so, determine files to be packed in this iteration.
if [ $split -eq 1 ]; then
# Construct outfile name
fout="${fout_sbase}.${sarchivenum}"
((sarchivenum++))
# Cumulate size of archive and construct file list
snumfilecur=0
scursize=1024 # Initialize with size of trailing zero bloc
unset flist
while true; do
sfilesize=$(wc -c ${din}/${flist_sbase[${scounter}]} | awk '{print $1}') # raw size
sfilesize=$(((${sfilesize}+511)/512*512+512)) # adjust to 512 byte-blocks and add header
spredictsize=$((${scursize}+${sfilesize}))
if [ ${spredictsize} -lt ${maxsize} ]; then
flist[${snumfilecur}]=${flist_sbase[${scounter}]}
((snumfilecur++))
((scounter++))
scursize=${spredictsize}
elif [ ${snumfilecur} -eq 0 ]; then
(>&2 echo "Error: file larger than maximum archive size: ${flist_sbase[${scounter}]}")
exit 101
else
if [ $verbose -eq 1 ]; then
echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)"
fi
break
fi
if [ ${scounter} -ge ${snumfiletot} ]; then
if [ $verbose -eq 1 ]; then
echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)"
fi
flag_splitdone=1
break
fi
done
fi
# Create tar archive
tar $flagtar --directory=${din} -cf ${fout} ${flist[@]}
tarexit=$?
if [ $tarexit -ne 0 ]; then
(>&2 echo "tar failed with exit code $tarexit")
exit 254
fi
# Compare checksums (CNC32), if flag is set
flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g))
if [ $checksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying checksum: ${flist[$ii]}")
fi
crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }')
crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }')
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
elif [ $quicksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying partial checksum: ${flist[$ii]}")
fi
crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -)
crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -)
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
fi
# Continue looping?
if ([ $split -eq 1 ] && [ $flag_splitdone -eq 1 ]) || [ $split -eq 0 ]; then
packloop=0
fi
done
# Compare checksums (CNC32), if flag is set
#din="./archive/" #for testing
flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g))
if [ $checksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying checksum: ${flist[$ii]}")
fi
crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }')
crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }')
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
elif [ $quicksum -eq 1 ]; then
for ii in "${!flistx[@]}"; do
if [ $verbose -eq 1 ]; then
(>&2 echo "Verifying partial checksum: ${flist[$ii]}")
fi
crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -)
crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -)
if [ "$crcori" != "$crctar" ]; then
(>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}")
exit 5
fi
done
fi
exit 0
exit 0