commit 76cdcd130fcb2dd67510337c95344a4773a92f5c Author: pe7321@kit.edu Date: Thu Feb 18 12:06:58 2021 +0100 initial commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..609a6ca --- /dev/null +++ b/README.md @@ -0,0 +1,76 @@ +# bwdatools +Some tools to simplify file syncronization between LSDF@KIT +and the bwDataArchive. + +## Getting started +### Prerequisites +The tool `lftp` is used to access bwDataArchive and needs to be available +in your `$PATH`. The tool is installed on `os-login.lsdf.kit.edu` or can be +obtained from [here](https://lftp.yar.ru/). + +In order to minimize password inputs, the bwda sftp server can be +configured in `~/.netrc` in the following way. +``` +~/.netrc +--- +machine archive-sftp.lsdf.kit.edu +login +password +``` + +The tools included in this package require further machine-specific +configurations to determine the path on bwda automatically. This is +done using the file `~/.bwda`. Here is an example configuration for +the machine `os-login.lsdf.kit.edu`. +``` +~/.bwda +--- +bwda_acc="xyz" +bwda_url="archive-sftp.lsdf.kit.edu" +ldir_base="/lsdf02/lsdf/kit/ifh/projects/" # no trailing slash! +rdir_base="private" # no trailing slash! +``` +By default, it is assumed that the directory tree (starting from `ldir_base` on LSDF +and `rdir_base` on bwDataArchive) is the same. + +### Installing +Clone this repository using `git clone https://git.mkray.de/mwtkrayer/bwdatools.git` +and add the directory to your `$PATH`. Do not forget to setup `$HOME/.bwda`. + +## Usage +### Interactive session +```bash +Usage: bwda [-h] [path] +Opens an interactive lftp session on bwDataArchive. + path directory on LSDF, session will cd to the corresponding bwda directory + -h | --help display this message +``` + +### List directory content on bwda +```bash +Usage: bwdals [opt] +List the contents of the current directory on bwda. +Options are directly passed to lftp's ls command with the following exceptions: + -h | --help display this message +``` + +### Transfer files from bwda to LSDF (in parallel) +```bash +Usage: bwdaget files + files files to be downloaded + -c | --channel N use N channels per file + -n | --dry-run print commands to stdout instead of executing them + -P | --parallel N transfer N files in parallel + -h | --help display this message +``` + +### Syncronize files from LSDF to bwda +```bash +Usage: bwdasync [opt] [dir] + dir path to local directory on LSDF + --include-size-mismatch include files which mismatch in size to sync list + -n | --dry-run print commands to stdout instead of executing them + -e | --extension ext only sync files with specified extension + -P | --parallel N transfer N files in parallel + -h | --help display this message +``` diff --git a/bwda b/bwda new file mode 100755 index 0000000..0ffa886 --- /dev/null +++ b/bwda @@ -0,0 +1,51 @@ +#!/bin/bash +source $HOME/.bwda + +# Usage function +usage(){ + echo "Usage: $(basename $0) [opt] [path]" + echo "Opens an interactive lftp session on bwDataArchive." + echo " path directory on LSDF, session will cd to the corresponding bwda directory" + echo " -h | --help display this message" +} + +# Get command line argument +nparallel=1 +nchannel=1 +flag_printcommands=false + +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" +case $key in + -h|--help) + usage + exit 0 + shift + ;; + *) + POSITIONAL+=("$1") + shift + ;; +esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters + +if [ $# -eq 0 ]; then + lftp sftp://${bwda_acc}@${bwda_url} +else + ldir_target="$(realpath $1)" + # Get current path relative to base + if [[ $ldir_target == "${ldir_base}"* ]]; then + dir_target=${ldir_target#"$ldir_base"} + else + echo "Target directory is not located on LSDF!" + echo "Is the base directory setting correct?" + echo "ldir_base: $ldir_base" + exit -1 + fi + rdir_target="${rdir_base}/${dir_target}" + lftp sftp://${bwda_acc}@${bwda_url} -e "cd $rdir_target" +fi + diff --git a/bwdaget b/bwdaget new file mode 100755 index 0000000..02b8634 --- /dev/null +++ b/bwdaget @@ -0,0 +1,89 @@ +#!/bin/bash +source $HOME/.bwda + +# Usage function +usage(){ + echo "Usage: $(basename $0) files" + echo " files files to be downloaded" + echo " -c | --channel N use N channels per file" + echo " -n | --dry-run print commands to stdout instead of executing them" + echo " -P | --parallel N transfer N files in parallel" + echo " -h | --help display this message" +} + +# Get command line argument +nparallel=1 +nchannel=1 +flag_dryrun=false + +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" +case $key in + -c|--channel) + nchannel="$2" + shift + shift + ;; + -n|--dry-run) + flag_dryrun=true + shift + ;; + -P|--parallel) + nparallel="$2" + shift + shift + ;; + -h|--help) + usage + exit 0 + shift + ;; + *) + POSITIONAL+=("$1") + shift + ;; +esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters + +# Check if we got enough command line arguments +if [ $# -eq 0 ]; then + usage + exit -1 +fi + +# Get path to correct directory on bwda +ldir_target="$(realpath $PWD)" +if [[ $ldir_target == "${ldir_base}"* ]]; then + dir_target=${ldir_target#"$ldir_base"} +else + echo "Current directory is not located on LSDF!" + echo "Is the base directory setting correct?" + echo "ldir_base: $ldir_base" + exit -2 +fi +rdir_target="${rdir_base}/${dir_target}" + +# Construct command +cmd="open sftp://${bwda_acc}@${bwda_url}\n" +cmd+="set cmd:parallel ${nparallel}\n" +cmd+="cd ${rdir_target}\n" +cmd+="lcd ${ldir_target}\n" +for file in $*; do + cmd+="pget -n ${nchannel} $file\n" +done +cmd+="bye\n" + +# Print or execute commands +if [ "$flag_dryrun" == true ]; then + printf "$cmd" +else + tmpfile=$(mktemp) + printf "$cmd" > $tmpfile + lftp -f $tmpfile + rm $tmpfile +fi + + diff --git a/bwdals b/bwdals new file mode 100755 index 0000000..9987a07 --- /dev/null +++ b/bwdals @@ -0,0 +1,57 @@ +#!/bin/bash +source $HOME/.bwda + +# Usage function +usage(){ + echo "Usage: $(basename $0) [opt]" + echo "List the contents of the current directory on bwda." + echo "Options are directly passed to lftp's ls command with the following exceptions:" + echo " -h | --help display this message" +} + +# Get command line argument +nparallel=1 +nchannel=1 +flag_printcommands=false + +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" +case $key in + -h|--help) + usage + exit 0 + shift + ;; + *) + POSITIONAL+=("$1") + shift + ;; +esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters + +# Get path to correct directory on bwda +ldir_target="$(realpath $PWD)" +if [[ $ldir_target == "${ldir_base}"* ]]; then + dir_target=${ldir_target#"$ldir_base"} +else + echo "Current directory is not located on LSDF!" + echo "Is the base directory setting correct?" + echo "ldir_base: $ldir_base" + exit -2 +fi +rdir_target="${rdir_base}/${dir_target}" + +# Construct command +cmd="open sftp://${bwda_acc}@${bwda_url}\n" +cmd+="cd ${rdir_target}\n" +cmd+="ls ${POSITIONAL[@]}\n" +cmd+="bye\n" + +# Execute commands +tmpfile=$(mktemp) +printf "$cmd" > $tmpfile +lftp -f $tmpfile +rm $tmpfile diff --git a/bwdasync b/bwdasync new file mode 100755 index 0000000..dc2aa4f --- /dev/null +++ b/bwdasync @@ -0,0 +1,144 @@ +#!/bin/bash +# Compare contents of a directory with the contents of the corresponding +# archive on HPSS. +# Requirements: +# - lftp must be available +# - ~/.netrc must be configured +# - ~/.bwda must be configured (bwda_acc, bwda_url, ldir_base, rdir_base) +# - directory structure needs to be the same on both ends +source $HOME/.bwda + +# Usage function +usage(){ + echo "Usage: $(basename $0) [opt] [dir]" + echo " dir path to local directory on LSDF" + echo " --include-size-mismatch include files which mismatch in size to sync list" + echo " -n | --dry-run print commands to stdout instead of executing them" + echo " -e | --extension ext only sync files with specified extension" + echo " -P | --parallel N transfer N files in parallel" + echo " -h | --help display this message" +} + +# Get command line argument +flag_sizemismatch=false +flag_dryrun=false +filter_ext= +nparallel=1 + +POSITIONAL=() +while [[ $# -gt 0 ]] +do +key="$1" +case $key in + --include-size-mismatch) + flag_sizemismatch=true + shift + ;; + -n|--dry-run) + flag_dryrun=true + shift + ;; + -e|--extension) + filter_ext="$2" + shift + shift + ;; + -P|--parallel) + nparallel="$2" + shift + shift + ;; + -h|--help) + usage + exit 0 + shift + ;; + *) + POSITIONAL+=("$1") + shift + ;; +esac +done +set -- "${POSITIONAL[@]}" # restore positional parameters + +if [ $# -gt 1 ]; then + usage + exit -1 +elif [ $# -eq 0 ]; then + ldir_target=$(realpath $PWD) +else + ldir_target="$(realpath $1)" +fi + +# Get current path relative to base +if [[ $ldir_target == "${ldir_base}"* ]]; then + dir_target=${ldir_target#"$ldir_base"} +else + echo "Target directory is not located on LSDF!" + echo "Is the base directory setting correct?" + echo "ldir_base: $ldir_base" + exit -2 +fi +rdir_target="${rdir_base}/${dir_target}" + +# Receive list of files on SFTP server, omit directories +rfilelist=$(lftp sftp://${bwda_acc}@${bwda_url} -e "ls -l ${rdir_target}; bye" | grep -v '^d') +rfilename=($(echo "$rfilelist" | awk '{print $9}')) +rfilesize=($(echo "$rfilelist" | awk '{print $5}')) + +# Get list of local files, also omit directories +lfilelist=$(ls -l ${ldir_target} | grep -v '^d') +lfilename=($(echo "$lfilelist" | awk '{print $9}')) +lfilesize=($(echo "$lfilelist" | awk '{print $5}')) + +# Remove local files not ending with specified extension +if [ ! -z "$filter_ext" ]; then + for il in ${!lfilename[@]}; do + if [[ ${lfilename[${il}]} != *"$filter_ext" ]]; then + unset 'lfilename[il]' + unset 'lfilesize[il]' + fi + done +fi + +# Check for local files which are not on HPSS +# and write a batch file to sync them. +for ir in ${!rfilename[@]}; do + for il in ${!lfilename[@]}; do + if [[ ${rfilename[${ir}]} == ${lfilename[${il}]} ]]; then + if [[ ${rfilesize[${ir}]} != ${lfilesize[${il}]} ]]; then + >&2 echo "Filesize mismatch: ${rfilename[${ir}]}, HPSS=${rfilesize[${ir}]}, local=${lfilesize[${il}]}" + [[ "$flag_sizemismatch" == true ]] && break + fi + unset 'lfilename[il]' + unset 'lfilesize[il]' + break + fi + done +done + +# Get number of files to sync +nfiles="${#lfilename[@]}" + +# Write a lftp batch script for syncing +if [[ $nfiles -eq 0 ]]; then + >&2 echo "Nothing to be done." +else + >&2 echo "Syncing $nfiles files." + cmd="open sftp://${bwda_acc}@${bwda_url}\n" + cmd+="set cmd:parallel ${nparallel}\n" + cmd+="cd ${rdir_target}\n" + cmd+="lcd ${ldir_target}\n" + for il in ${!lfilename[@]}; do + cmd+="put ${lfilename[${il}]}\n" + done + cmd+="bye\n" + if [[ "$flag_dryrun" == true ]]; then + printf "$cmd" + else + tmpfile=$(mktemp) + printf "$cmd" > $tmpfile + lftp -f $tmpfile + rm $tmpfile + fi +fi