From f580a9378e7ed7f05d5d2c70201e180c022f03d7 Mon Sep 17 00:00:00 2001 From: "Michael Stumpf (ifhcluster)" Date: Mon, 2 Sep 2019 14:32:18 +0200 Subject: [PATCH] added support for clean archive splitting --- bash/ucftar_pack | 150 +++++++++++++++++++++++++++++----------- bash/ucftar_scalar_pack | 150 +++++++++++++++++++++++++++++----------- 2 files changed, 220 insertions(+), 80 deletions(-) diff --git a/bash/ucftar_pack b/bash/ucftar_pack index 3b358e0..b135131 100755 --- a/bash/ucftar_pack +++ b/bash/ucftar_pack @@ -1,6 +1,6 @@ #!/bin/bash display_help() { - (>&2 echo "Usage: $(basename "$0") [-chqv] [-o outfile] indir iseq") + (>&2 echo "Usage: $(basename "$0") [-chqv] [-s size] [-o outfile] indir iseq") (>&2 echo "UCF tar packer") (>&2 echo) (>&2 echo " indir path to input directory") @@ -9,6 +9,7 @@ display_help() { (>&2 echo " -h, --help display this help message") (>&2 echo " -o, --outfile output file (default: snapshot_XXXX.ucf.tar)") (>&2 echo " -q, --quicksum same as --checksum, but compares only first bytes") + (>&2 echo " -s, --split split archives so that they are smaller than given size (in GiB)") (>&2 echo " -v, --verbose verbose output") } exit_script() { @@ -23,9 +24,11 @@ if [ $# -eq 0 ]; then exit -1 fi fout="" +maxsize="" verbose=0 checksum=0 quicksum=0 +split=0 POSITIONAL=() while [[ $# -gt 0 ]] do @@ -53,6 +56,12 @@ case $key in quicksum=1 shift # past argument ;; + -s|--split) + split=1 + maxsize=$(echo "$2*1024*1024*1024" | bc -l | awk -F "." '{print $1}') # Convert GiB to bytes + shift # past argument + shift # past value + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -128,47 +137,108 @@ fi # Create a full file list for the archive flist=(${fparam} ${fgrid} ${fproc} ${fpart} ${fuvwp[@]}) -# Now tar them and remove seqence number from file names while doing so -flagtar="" +# Construct flags of tar command +flagtar="--format=ustar --transform=flags=r;s|_$seqnum||" if [ $verbose -eq 1 ]; then flagtar="$flagtar --verbose" fi +if [ $split -eq 1 ]; then + flagtar="$flagtar --blocking-factor=1" +fi + +# Initialize variables needed for splitting +if [ $split -eq 1 ]; then + sarchivenum=0 # Current archive number (suffix to file name) + scounter=0 # Counter of current file + snumfilecur=0 # Current number of files to be archived + snumfiletot=${#flist[@]} # Total number of files to be archived + scursize=0 # Current size of archive in bytes + sfilesize=0 # Size of current file in archive + spredictsize=0 # Predicted size of archive if current file is added + fout_sbase=$fout # Original outfile name (number is appended) + flist_sbase=("${flist[@]}") # Original list of files (to be split) + flag_splitdone=0 # Flag to indicate if all files have been packed + unset flist +fi + +# Start packing loop (only executed once, if archive is not to be split) +packloop=1 trap exit_script SIGINT SIGTERM -tar $flagtar --format ustar --transform="flags=r;s|_$seqnum||" --directory=${din} -cf ${fout} ${flist[@]} -tarexit=$? -# Set exit status accoring to tar -if [ $tarexit -ne 0 ]; then - (>&2 echo "tar failed with exit code $tarexit") - exit 254 -fi +while [ $packloop -eq 1 ]; do + # Check if archive is to be split. If so, determine files to be packed in this iteration. + if [ $split -eq 1 ]; then + # Construct outfile name + fout="${fout_sbase}.${sarchivenum}" + ((sarchivenum++)) + # Cumulate size of archive and construct file list + snumfilecur=0 + scursize=1024 # Initialize with size of trailing zero bloc + unset flist + while true; do + sfilesize=$(wc -c ${din}/${flist_sbase[${scounter}]} | awk '{print $1}') # raw size + sfilesize=$(((${sfilesize}+511)/512*512+512)) # adjust to 512 byte-blocks and add header + spredictsize=$((${scursize}+${sfilesize})) + if [ ${spredictsize} -lt ${maxsize} ]; then + flist[${snumfilecur}]=${flist_sbase[${scounter}]} + ((snumfilecur++)) + ((scounter++)) + scursize=${spredictsize} + elif [ ${snumfilecur} -eq 0 ]; then + (>&2 echo "Error: file larger than maximum archive size: ${flist_sbase[${scounter}]}") + exit 101 + else + if [ $verbose -eq 1 ]; then + echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)" + fi + break + fi + if [ ${scounter} -ge ${snumfiletot} ]; then + if [ $verbose -eq 1 ]; then + echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)" + fi + flag_splitdone=1 + break + fi + done + fi + # Create tar archive + tar $flagtar --directory=${din} -cf ${fout} ${flist[@]} + tarexit=$? + if [ $tarexit -ne 0 ]; then + (>&2 echo "tar failed with exit code $tarexit") + exit 254 + fi + # Compare checksums (CNC32), if flag is set + flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g)) + if [ $checksum -eq 1 ]; then + for ii in "${!flistx[@]}"; do + if [ $verbose -eq 1 ]; then + (>&2 echo "Verifying checksum: ${flist[$ii]}") + fi + crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }') + crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }') + if [ "$crcori" != "$crctar" ]; then + (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") + exit 5 + fi + done + elif [ $quicksum -eq 1 ]; then + for ii in "${!flistx[@]}"; do + if [ $verbose -eq 1 ]; then + (>&2 echo "Verifying partial checksum: ${flist[$ii]}") + fi + crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -) + crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -) + if [ "$crcori" != "$crctar" ]; then + (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") + exit 5 + fi + done + fi + # Continue looping? + if ([ $split -eq 1 ] && [ $flag_splitdone -eq 1 ]) || [ $split -eq 0 ]; then + packloop=0 + fi +done -# Compare checksums (CNC32), if flag is set -#din="./archive/" #for testing -flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g)) -if [ $checksum -eq 1 ]; then - for ii in "${!flistx[@]}"; do - if [ $verbose -eq 1 ]; then - (>&2 echo "Verifying checksum: ${flist[$ii]}") - fi - crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }') - crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }') - if [ "$crcori" != "$crctar" ]; then - (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") - exit 5 - fi - done -elif [ $quicksum -eq 1 ]; then - for ii in "${!flistx[@]}"; do - if [ $verbose -eq 1 ]; then - (>&2 echo "Verifying partial checksum: ${flist[$ii]}") - fi - crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -) - crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -) - if [ "$crcori" != "$crctar" ]; then - (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") - exit 5 - fi - done -fi - -exit 0 +exit 0 \ No newline at end of file diff --git a/bash/ucftar_scalar_pack b/bash/ucftar_scalar_pack index e2b0a00..b106eb8 100755 --- a/bash/ucftar_scalar_pack +++ b/bash/ucftar_scalar_pack @@ -1,6 +1,6 @@ #!/bin/bash display_help() { - (>&2 echo "Usage: $(basename "$0") [-chqv] [-o outfile] indir iseq") + (>&2 echo "Usage: $(basename "$0") [-chqv] [-s size] [-o outfile] indir iseq") (>&2 echo "UCF tar scalar packer") (>&2 echo) (>&2 echo " indir path to input directory") @@ -9,6 +9,7 @@ display_help() { (>&2 echo " -h, --help display this help message") (>&2 echo " -o, --outfile output file (default: snapshot_XXXX.ucf.tar)") (>&2 echo " -q, --quicksum same as --checksum, but compares only first bytes") + (>&2 echo " -s, --split split archives so that they are smaller than given size (in GiB)") (>&2 echo " -v, --verbose verbose output") } exit_script() { @@ -23,9 +24,11 @@ if [ $# -eq 0 ]; then exit -1 fi fout="" +maxsize="" verbose=0 checksum=0 quicksum=0 +split=0 POSITIONAL=() while [[ $# -gt 0 ]] do @@ -53,6 +56,12 @@ case $key in quicksum=1 shift # past argument ;; + -s|--split) + split=1 + maxsize=$(echo "$2*1024*1024*1024" | bc -l | awk -F "." '{print $1}') # Convert GiB to bytes + shift # past argument + shift # past value + ;; *) # unknown option POSITIONAL+=("$1") # save it in an array for later shift # past argument @@ -129,47 +138,108 @@ fi # Create a full file list for the archive flist=(${fparam} ${fgrid} ${fproc} ${fpart} ${fscal[@]}) -# Now tar them and remove seqence number from file names while doing so -flagtar="" +# Construct flags of tar command +flagtar="--format=ustar --transform=flags=r;s|_$seqnum||" if [ $verbose -eq 1 ]; then flagtar="$flagtar --verbose" fi +if [ $split -eq 1 ]; then + flagtar="$flagtar --blocking-factor=1" +fi + +# Initialize variables needed for splitting +if [ $split -eq 1 ]; then + sarchivenum=0 # Current archive number (suffix to file name) + scounter=0 # Counter of current file + snumfilecur=0 # Current number of files to be archived + snumfiletot=${#flist[@]} # Total number of files to be archived + scursize=0 # Current size of archive in bytes + sfilesize=0 # Size of current file in archive + spredictsize=0 # Predicted size of archive if current file is added + fout_sbase=$fout # Original outfile name (number is appended) + flist_sbase=("${flist[@]}") # Original list of files (to be split) + flag_splitdone=0 # Flag to indicate if all files have been packed + unset flist +fi + +# Start packing loop (only executed once, if archive is not to be split) +packloop=1 trap exit_script SIGINT SIGTERM -tar $flagtar --format ustar --transform="flags=r;s|_$seqnum||" --directory=${din} -cf ${fout} ${flist[@]} -tarexit=$? -# Set exit status accoring to tar -if [ $tarexit -ne 0 ]; then - (>&2 echo "tar failed with exit code $tarexit") - exit 254 -fi +while [ $packloop -eq 1 ]; do + # Check if archive is to be split. If so, determine files to be packed in this iteration. + if [ $split -eq 1 ]; then + # Construct outfile name + fout="${fout_sbase}.${sarchivenum}" + ((sarchivenum++)) + # Cumulate size of archive and construct file list + snumfilecur=0 + scursize=1024 # Initialize with size of trailing zero bloc + unset flist + while true; do + sfilesize=$(wc -c ${din}/${flist_sbase[${scounter}]} | awk '{print $1}') # raw size + sfilesize=$(((${sfilesize}+511)/512*512+512)) # adjust to 512 byte-blocks and add header + spredictsize=$((${scursize}+${sfilesize})) + if [ ${spredictsize} -lt ${maxsize} ]; then + flist[${snumfilecur}]=${flist_sbase[${scounter}]} + ((snumfilecur++)) + ((scounter++)) + scursize=${spredictsize} + elif [ ${snumfilecur} -eq 0 ]; then + (>&2 echo "Error: file larger than maximum archive size: ${flist_sbase[${scounter}]}") + exit 101 + else + if [ $verbose -eq 1 ]; then + echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)" + fi + break + fi + if [ ${scounter} -ge ${snumfiletot} ]; then + if [ $verbose -eq 1 ]; then + echo "Splitter: ${fout} (${snumfilecur} files, ${scursize} bytes)" + fi + flag_splitdone=1 + break + fi + done + fi + # Create tar archive + tar $flagtar --directory=${din} -cf ${fout} ${flist[@]} + tarexit=$? + if [ $tarexit -ne 0 ]; then + (>&2 echo "tar failed with exit code $tarexit") + exit 254 + fi + # Compare checksums (CNC32), if flag is set + flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g)) + if [ $checksum -eq 1 ]; then + for ii in "${!flistx[@]}"; do + if [ $verbose -eq 1 ]; then + (>&2 echo "Verifying checksum: ${flist[$ii]}") + fi + crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }') + crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }') + if [ "$crcori" != "$crctar" ]; then + (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") + exit 5 + fi + done + elif [ $quicksum -eq 1 ]; then + for ii in "${!flistx[@]}"; do + if [ $verbose -eq 1 ]; then + (>&2 echo "Verifying partial checksum: ${flist[$ii]}") + fi + crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -) + crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -) + if [ "$crcori" != "$crctar" ]; then + (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") + exit 5 + fi + done + fi + # Continue looping? + if ([ $split -eq 1 ] && [ $flag_splitdone -eq 1 ]) || [ $split -eq 0 ]; then + packloop=0 + fi +done -# Compare checksums (CNC32), if flag is set -#din="./archive/" #for testing -flistx=($(echo ${flist[@]} | sed s/"_$seqnum"/""/g)) -if [ $checksum -eq 1 ]; then - for ii in "${!flistx[@]}"; do - if [ $verbose -eq 1 ]; then - (>&2 echo "Verifying checksum: ${flist[$ii]}") - fi - crcori=$(cksum ${din}/${flist[$ii]} | awk '{ print $1, $2 }') - crctar=$(tar --to-command='cksum -' -xf ${fout} ${flistx[$ii]} | awk '{ print $1, $2 }') - if [ "$crcori" != "$crctar" ]; then - (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") - exit 5 - fi - done -elif [ $quicksum -eq 1 ]; then - for ii in "${!flistx[@]}"; do - if [ $verbose -eq 1 ]; then - (>&2 echo "Verifying partial checksum: ${flist[$ii]}") - fi - crcori=$(head -c 1M ${din}/${flist[$ii]} | cksum -) - crctar=$(tar --to-command='head -c 1M' -xf ${fout} ${flistx[$ii]} | cksum -) - if [ "$crcori" != "$crctar" ]; then - (>&2 echo "Verification failed: ${flist[$ii]} ${flistx[$ii]}") - exit 5 - fi - done -fi - -exit 0 +exit 0 \ No newline at end of file