bwdatools/bwdasync

145 lines
3.6 KiB
Bash
Executable File

#!/bin/bash
# Compare contents of a directory with the contents of the corresponding
# archive on HPSS.
# Requirements:
# - lftp must be available
# - ~/.netrc must be configured
# - ~/.bwda must be configured (bwda_acc, bwda_url, ldir_base, rdir_base)
# - directory structure needs to be the same on both ends
source $HOME/.bwda
# Usage function
usage(){
echo "Usage: $(basename $0) [opt] [dir]"
echo " dir path to local directory on LSDF"
echo " --include-size-mismatch include files which mismatch in size to sync list"
echo " -n | --dry-run print commands to stdout instead of executing them"
echo " -e | --extension ext only sync files with specified extension"
echo " -P | --parallel N transfer N files in parallel"
echo " -h | --help display this message"
}
# Get command line argument
flag_sizemismatch=false
flag_dryrun=false
filter_ext=
nparallel=1
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
--include-size-mismatch)
flag_sizemismatch=true
shift
;;
-n|--dry-run)
flag_dryrun=true
shift
;;
-e|--extension)
filter_ext="$2"
shift
shift
;;
-P|--parallel)
nparallel="$2"
shift
shift
;;
-h|--help)
usage
exit 0
shift
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
if [ $# -gt 1 ]; then
usage
exit -1
elif [ $# -eq 0 ]; then
ldir_target=$(realpath $PWD)
else
ldir_target="$(realpath $1)"
fi
# Get current path relative to base
if [[ $ldir_target == "${ldir_base}"* ]]; then
dir_target=${ldir_target#"$ldir_base"}
else
echo "Target directory is not located on LSDF!"
echo "Is the base directory setting correct?"
echo "ldir_base: $ldir_base"
exit -2
fi
rdir_target="${rdir_base}/${dir_target}"
# Receive list of files on SFTP server, omit directories
rfilelist=$(lftp sftp://${bwda_acc}@${bwda_url} -e "ls -l ${rdir_target}; bye" | grep -v '^d')
rfilename=($(echo "$rfilelist" | awk '{print $9}'))
rfilesize=($(echo "$rfilelist" | awk '{print $5}'))
# Get list of local files, also omit directories
lfilelist=$(ls -l ${ldir_target} | grep -v '^d')
lfilename=($(echo "$lfilelist" | awk '{print $9}'))
lfilesize=($(echo "$lfilelist" | awk '{print $5}'))
# Remove local files not ending with specified extension
if [ ! -z "$filter_ext" ]; then
for il in ${!lfilename[@]}; do
if [[ ${lfilename[${il}]} != *"$filter_ext" ]]; then
unset 'lfilename[il]'
unset 'lfilesize[il]'
fi
done
fi
# Check for local files which are not on HPSS
# and write a batch file to sync them.
for ir in ${!rfilename[@]}; do
for il in ${!lfilename[@]}; do
if [[ ${rfilename[${ir}]} == ${lfilename[${il}]} ]]; then
if [[ ${rfilesize[${ir}]} != ${lfilesize[${il}]} ]]; then
>&2 echo "Filesize mismatch: ${rfilename[${ir}]}, HPSS=${rfilesize[${ir}]}, local=${lfilesize[${il}]}"
[[ "$flag_sizemismatch" == true ]] && break
fi
unset 'lfilename[il]'
unset 'lfilesize[il]'
break
fi
done
done
# Get number of files to sync
nfiles="${#lfilename[@]}"
# Write a lftp batch script for syncing
if [[ $nfiles -eq 0 ]]; then
>&2 echo "Nothing to be done."
else
>&2 echo "Syncing $nfiles files."
cmd="open sftp://${bwda_acc}@${bwda_url}\n"
cmd+="set cmd:parallel ${nparallel}\n"
cmd+="cd ${rdir_target}\n"
cmd+="lcd ${ldir_target}\n"
for il in ${!lfilename[@]}; do
cmd+="put ${lfilename[${il}]}\n"
done
cmd+="bye\n"
if [[ "$flag_dryrun" == true ]]; then
printf "$cmd"
else
tmpfile=$(mktemp)
printf "$cmd" > $tmpfile
lftp -f $tmpfile
rm $tmpfile
fi
fi