initial commit

This commit is contained in:
pe7321@kit.edu 2021-02-18 12:06:58 +01:00
commit 76cdcd130f
5 changed files with 417 additions and 0 deletions

76
README.md Normal file
View File

@ -0,0 +1,76 @@
# bwdatools
Some tools to simplify file syncronization between LSDF@KIT
and the bwDataArchive.
## Getting started
### Prerequisites
The tool `lftp` is used to access bwDataArchive and needs to be available
in your `$PATH`. The tool is installed on `os-login.lsdf.kit.edu` or can be
obtained from [here](https://lftp.yar.ru/).
In order to minimize password inputs, the bwda sftp server can be
configured in `~/.netrc` in the following way.
```
~/.netrc
---
machine archive-sftp.lsdf.kit.edu
login <xyz>
password <password>
```
The tools included in this package require further machine-specific
configurations to determine the path on bwda automatically. This is
done using the file `~/.bwda`. Here is an example configuration for
the machine `os-login.lsdf.kit.edu`.
```
~/.bwda
---
bwda_acc="xyz"
bwda_url="archive-sftp.lsdf.kit.edu"
ldir_base="/lsdf02/lsdf/kit/ifh/projects/<your-name>" # no trailing slash!
rdir_base="private" # no trailing slash!
```
By default, it is assumed that the directory tree (starting from `ldir_base` on LSDF
and `rdir_base` on bwDataArchive) is the same.
### Installing
Clone this repository using `git clone https://git.mkray.de/mwtkrayer/bwdatools.git`
and add the directory to your `$PATH`. Do not forget to setup `$HOME/.bwda`.
## Usage
### Interactive session
```bash
Usage: bwda [-h] [path]
Opens an interactive lftp session on bwDataArchive.
path directory on LSDF, session will cd to the corresponding bwda directory
-h | --help display this message
```
### List directory content on bwda
```bash
Usage: bwdals [opt]
List the contents of the current directory on bwda.
Options are directly passed to lftp's ls command with the following exceptions:
-h | --help display this message
```
### Transfer files from bwda to LSDF (in parallel)
```bash
Usage: bwdaget files
files files to be downloaded
-c | --channel N use N channels per file
-n | --dry-run print commands to stdout instead of executing them
-P | --parallel N transfer N files in parallel
-h | --help display this message
```
### Syncronize files from LSDF to bwda
```bash
Usage: bwdasync [opt] [dir]
dir path to local directory on LSDF
--include-size-mismatch include files which mismatch in size to sync list
-n | --dry-run print commands to stdout instead of executing them
-e | --extension ext only sync files with specified extension
-P | --parallel N transfer N files in parallel
-h | --help display this message
```

51
bwda Executable file
View File

@ -0,0 +1,51 @@
#!/bin/bash
source $HOME/.bwda
# Usage function
usage(){
echo "Usage: $(basename $0) [opt] [path]"
echo "Opens an interactive lftp session on bwDataArchive."
echo " path directory on LSDF, session will cd to the corresponding bwda directory"
echo " -h | --help display this message"
}
# Get command line argument
nparallel=1
nchannel=1
flag_printcommands=false
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-h|--help)
usage
exit 0
shift
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
if [ $# -eq 0 ]; then
lftp sftp://${bwda_acc}@${bwda_url}
else
ldir_target="$(realpath $1)"
# Get current path relative to base
if [[ $ldir_target == "${ldir_base}"* ]]; then
dir_target=${ldir_target#"$ldir_base"}
else
echo "Target directory is not located on LSDF!"
echo "Is the base directory setting correct?"
echo "ldir_base: $ldir_base"
exit -1
fi
rdir_target="${rdir_base}/${dir_target}"
lftp sftp://${bwda_acc}@${bwda_url} -e "cd $rdir_target"
fi

89
bwdaget Executable file
View File

@ -0,0 +1,89 @@
#!/bin/bash
source $HOME/.bwda
# Usage function
usage(){
echo "Usage: $(basename $0) files"
echo " files files to be downloaded"
echo " -c | --channel N use N channels per file"
echo " -n | --dry-run print commands to stdout instead of executing them"
echo " -P | --parallel N transfer N files in parallel"
echo " -h | --help display this message"
}
# Get command line argument
nparallel=1
nchannel=1
flag_dryrun=false
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-c|--channel)
nchannel="$2"
shift
shift
;;
-n|--dry-run)
flag_dryrun=true
shift
;;
-P|--parallel)
nparallel="$2"
shift
shift
;;
-h|--help)
usage
exit 0
shift
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
# Check if we got enough command line arguments
if [ $# -eq 0 ]; then
usage
exit -1
fi
# Get path to correct directory on bwda
ldir_target="$(realpath $PWD)"
if [[ $ldir_target == "${ldir_base}"* ]]; then
dir_target=${ldir_target#"$ldir_base"}
else
echo "Current directory is not located on LSDF!"
echo "Is the base directory setting correct?"
echo "ldir_base: $ldir_base"
exit -2
fi
rdir_target="${rdir_base}/${dir_target}"
# Construct command
cmd="open sftp://${bwda_acc}@${bwda_url}\n"
cmd+="set cmd:parallel ${nparallel}\n"
cmd+="cd ${rdir_target}\n"
cmd+="lcd ${ldir_target}\n"
for file in $*; do
cmd+="pget -n ${nchannel} $file\n"
done
cmd+="bye\n"
# Print or execute commands
if [ "$flag_dryrun" == true ]; then
printf "$cmd"
else
tmpfile=$(mktemp)
printf "$cmd" > $tmpfile
lftp -f $tmpfile
rm $tmpfile
fi

57
bwdals Executable file
View File

@ -0,0 +1,57 @@
#!/bin/bash
source $HOME/.bwda
# Usage function
usage(){
echo "Usage: $(basename $0) [opt]"
echo "List the contents of the current directory on bwda."
echo "Options are directly passed to lftp's ls command with the following exceptions:"
echo " -h | --help display this message"
}
# Get command line argument
nparallel=1
nchannel=1
flag_printcommands=false
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-h|--help)
usage
exit 0
shift
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
# Get path to correct directory on bwda
ldir_target="$(realpath $PWD)"
if [[ $ldir_target == "${ldir_base}"* ]]; then
dir_target=${ldir_target#"$ldir_base"}
else
echo "Current directory is not located on LSDF!"
echo "Is the base directory setting correct?"
echo "ldir_base: $ldir_base"
exit -2
fi
rdir_target="${rdir_base}/${dir_target}"
# Construct command
cmd="open sftp://${bwda_acc}@${bwda_url}\n"
cmd+="cd ${rdir_target}\n"
cmd+="ls ${POSITIONAL[@]}\n"
cmd+="bye\n"
# Execute commands
tmpfile=$(mktemp)
printf "$cmd" > $tmpfile
lftp -f $tmpfile
rm $tmpfile

144
bwdasync Executable file
View File

@ -0,0 +1,144 @@
#!/bin/bash
# Compare contents of a directory with the contents of the corresponding
# archive on HPSS.
# Requirements:
# - lftp must be available
# - ~/.netrc must be configured
# - ~/.bwda must be configured (bwda_acc, bwda_url, ldir_base, rdir_base)
# - directory structure needs to be the same on both ends
source $HOME/.bwda
# Usage function
usage(){
echo "Usage: $(basename $0) [opt] [dir]"
echo " dir path to local directory on LSDF"
echo " --include-size-mismatch include files which mismatch in size to sync list"
echo " -n | --dry-run print commands to stdout instead of executing them"
echo " -e | --extension ext only sync files with specified extension"
echo " -P | --parallel N transfer N files in parallel"
echo " -h | --help display this message"
}
# Get command line argument
flag_sizemismatch=false
flag_dryrun=false
filter_ext=
nparallel=1
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
--include-size-mismatch)
flag_sizemismatch=true
shift
;;
-n|--dry-run)
flag_dryrun=true
shift
;;
-e|--extension)
filter_ext="$2"
shift
shift
;;
-P|--parallel)
nparallel="$2"
shift
shift
;;
-h|--help)
usage
exit 0
shift
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
if [ $# -gt 1 ]; then
usage
exit -1
elif [ $# -eq 0 ]; then
ldir_target=$(realpath $PWD)
else
ldir_target="$(realpath $1)"
fi
# Get current path relative to base
if [[ $ldir_target == "${ldir_base}"* ]]; then
dir_target=${ldir_target#"$ldir_base"}
else
echo "Target directory is not located on LSDF!"
echo "Is the base directory setting correct?"
echo "ldir_base: $ldir_base"
exit -2
fi
rdir_target="${rdir_base}/${dir_target}"
# Receive list of files on SFTP server, omit directories
rfilelist=$(lftp sftp://${bwda_acc}@${bwda_url} -e "ls -l ${rdir_target}; bye" | grep -v '^d')
rfilename=($(echo "$rfilelist" | awk '{print $9}'))
rfilesize=($(echo "$rfilelist" | awk '{print $5}'))
# Get list of local files, also omit directories
lfilelist=$(ls -l ${ldir_target} | grep -v '^d')
lfilename=($(echo "$lfilelist" | awk '{print $9}'))
lfilesize=($(echo "$lfilelist" | awk '{print $5}'))
# Remove local files not ending with specified extension
if [ ! -z "$filter_ext" ]; then
for il in ${!lfilename[@]}; do
if [[ ${lfilename[${il}]} != *"$filter_ext" ]]; then
unset 'lfilename[il]'
unset 'lfilesize[il]'
fi
done
fi
# Check for local files which are not on HPSS
# and write a batch file to sync them.
for ir in ${!rfilename[@]}; do
for il in ${!lfilename[@]}; do
if [[ ${rfilename[${ir}]} == ${lfilename[${il}]} ]]; then
if [[ ${rfilesize[${ir}]} != ${lfilesize[${il}]} ]]; then
>&2 echo "Filesize mismatch: ${rfilename[${ir}]}, HPSS=${rfilesize[${ir}]}, local=${lfilesize[${il}]}"
[[ "$flag_sizemismatch" == true ]] && break
fi
unset 'lfilename[il]'
unset 'lfilesize[il]'
break
fi
done
done
# Get number of files to sync
nfiles="${#lfilename[@]}"
# Write a lftp batch script for syncing
if [[ $nfiles -eq 0 ]]; then
>&2 echo "Nothing to be done."
else
>&2 echo "Syncing $nfiles files."
cmd="open sftp://${bwda_acc}@${bwda_url}\n"
cmd+="set cmd:parallel ${nparallel}\n"
cmd+="cd ${rdir_target}\n"
cmd+="lcd ${ldir_target}\n"
for il in ${!lfilename[@]}; do
cmd+="put ${lfilename[${il}]}\n"
done
cmd+="bye\n"
if [[ "$flag_dryrun" == true ]]; then
printf "$cmd"
else
tmpfile=$(mktemp)
printf "$cmd" > $tmpfile
lftp -f $tmpfile
rm $tmpfile
fi
fi