#!/bin/bash # Compare contents of a directory with the contents of the corresponding # archive on HPSS. # Requirements: # - lftp must be available # - ~/.netrc must be configured # - ~/.bwda must be configured (bwda_acc, bwda_url, ldir_base, rdir_base) # - directory structure needs to be the same on both ends source $HOME/.bwda # Usage function usage(){ echo "Usage: $(basename $0) [opt] [dir]" echo " dir path to local directory on LSDF" echo " --include-size-mismatch include files which mismatch in size to sync list" echo " -n | --dry-run print commands to stdout instead of executing them" echo " -e | --extension ext only sync files with specified extension" echo " -P | --parallel N transfer N files in parallel" echo " -h | --help display this message" } # Get command line argument flag_sizemismatch=false flag_dryrun=false filter_ext= nparallel=1 POSITIONAL=() while [[ $# -gt 0 ]] do key="$1" case $key in --include-size-mismatch) flag_sizemismatch=true shift ;; -n|--dry-run) flag_dryrun=true shift ;; -e|--extension) filter_ext="$2" shift shift ;; -P|--parallel) nparallel="$2" shift shift ;; -h|--help) usage exit 0 shift ;; *) POSITIONAL+=("$1") shift ;; esac done set -- "${POSITIONAL[@]}" # restore positional parameters if [ $# -gt 1 ]; then usage exit -1 elif [ $# -eq 0 ]; then ldir_target=$(realpath $PWD) else ldir_target="$(realpath $1)" fi # Get current path relative to base if [[ $ldir_target == "${ldir_base}"* ]]; then dir_target=${ldir_target#"$ldir_base"} else echo "Target directory is not located on LSDF!" echo "Is the base directory setting correct?" echo "ldir_base: $ldir_base" exit -2 fi rdir_target="${rdir_base}/${dir_target}" # Receive list of files on SFTP server, omit directories rfilelist=$(lftp sftp://${bwda_acc}@${bwda_url} -e "ls -l ${rdir_target}; bye" | grep -v '^d') rfilename=($(echo "$rfilelist" | awk '{print $9}')) rfilesize=($(echo "$rfilelist" | awk '{print $5}')) # Get list of local files, also omit directories lfilelist=$(ls -l ${ldir_target} | grep -v '^d') lfilename=($(echo "$lfilelist" | awk '{print $9}')) lfilesize=($(echo "$lfilelist" | awk '{print $5}')) # Remove local files not ending with specified extension if [ ! -z "$filter_ext" ]; then for il in ${!lfilename[@]}; do if [[ ${lfilename[${il}]} != *"$filter_ext" ]]; then unset 'lfilename[il]' unset 'lfilesize[il]' fi done fi # Check for local files which are not on HPSS # and write a batch file to sync them. for ir in ${!rfilename[@]}; do for il in ${!lfilename[@]}; do if [[ ${rfilename[${ir}]} == ${lfilename[${il}]} ]]; then if [[ ${rfilesize[${ir}]} != ${lfilesize[${il}]} ]]; then >&2 echo "Filesize mismatch: ${rfilename[${ir}]}, HPSS=${rfilesize[${ir}]}, local=${lfilesize[${il}]}" [[ "$flag_sizemismatch" == true ]] && break fi unset 'lfilename[il]' unset 'lfilesize[il]' break fi done done # Get number of files to sync nfiles="${#lfilename[@]}" # Write a lftp batch script for syncing if [[ $nfiles -eq 0 ]]; then >&2 echo "Nothing to be done." else >&2 echo "Syncing $nfiles files." cmd="open sftp://${bwda_acc}@${bwda_url}\n" cmd+="set cmd:parallel ${nparallel}\n" cmd+="cd ${rdir_target}\n" cmd+="lcd ${ldir_target}\n" for il in ${!lfilename[@]}; do cmd+="put ${lfilename[${il}]}\n" done cmd+="bye\n" if [[ "$flag_dryrun" == true ]]; then printf "$cmd" else tmpfile=$(mktemp) printf "$cmd" > $tmpfile lftp -f $tmpfile rm $tmpfile fi fi