initial commit
This commit is contained in:
commit
76cdcd130f
|
|
@ -0,0 +1,76 @@
|
|||
# bwdatools
|
||||
Some tools to simplify file syncronization between LSDF@KIT
|
||||
and the bwDataArchive.
|
||||
|
||||
## Getting started
|
||||
### Prerequisites
|
||||
The tool `lftp` is used to access bwDataArchive and needs to be available
|
||||
in your `$PATH`. The tool is installed on `os-login.lsdf.kit.edu` or can be
|
||||
obtained from [here](https://lftp.yar.ru/).
|
||||
|
||||
In order to minimize password inputs, the bwda sftp server can be
|
||||
configured in `~/.netrc` in the following way.
|
||||
```
|
||||
~/.netrc
|
||||
---
|
||||
machine archive-sftp.lsdf.kit.edu
|
||||
login <xyz>
|
||||
password <password>
|
||||
```
|
||||
|
||||
The tools included in this package require further machine-specific
|
||||
configurations to determine the path on bwda automatically. This is
|
||||
done using the file `~/.bwda`. Here is an example configuration for
|
||||
the machine `os-login.lsdf.kit.edu`.
|
||||
```
|
||||
~/.bwda
|
||||
---
|
||||
bwda_acc="xyz"
|
||||
bwda_url="archive-sftp.lsdf.kit.edu"
|
||||
ldir_base="/lsdf02/lsdf/kit/ifh/projects/<your-name>" # no trailing slash!
|
||||
rdir_base="private" # no trailing slash!
|
||||
```
|
||||
By default, it is assumed that the directory tree (starting from `ldir_base` on LSDF
|
||||
and `rdir_base` on bwDataArchive) is the same.
|
||||
|
||||
### Installing
|
||||
Clone this repository using `git clone https://git.mkray.de/mwtkrayer/bwdatools.git`
|
||||
and add the directory to your `$PATH`. Do not forget to setup `$HOME/.bwda`.
|
||||
|
||||
## Usage
|
||||
### Interactive session
|
||||
```bash
|
||||
Usage: bwda [-h] [path]
|
||||
Opens an interactive lftp session on bwDataArchive.
|
||||
path directory on LSDF, session will cd to the corresponding bwda directory
|
||||
-h | --help display this message
|
||||
```
|
||||
|
||||
### List directory content on bwda
|
||||
```bash
|
||||
Usage: bwdals [opt]
|
||||
List the contents of the current directory on bwda.
|
||||
Options are directly passed to lftp's ls command with the following exceptions:
|
||||
-h | --help display this message
|
||||
```
|
||||
|
||||
### Transfer files from bwda to LSDF (in parallel)
|
||||
```bash
|
||||
Usage: bwdaget files
|
||||
files files to be downloaded
|
||||
-c | --channel N use N channels per file
|
||||
-n | --dry-run print commands to stdout instead of executing them
|
||||
-P | --parallel N transfer N files in parallel
|
||||
-h | --help display this message
|
||||
```
|
||||
|
||||
### Syncronize files from LSDF to bwda
|
||||
```bash
|
||||
Usage: bwdasync [opt] [dir]
|
||||
dir path to local directory on LSDF
|
||||
--include-size-mismatch include files which mismatch in size to sync list
|
||||
-n | --dry-run print commands to stdout instead of executing them
|
||||
-e | --extension ext only sync files with specified extension
|
||||
-P | --parallel N transfer N files in parallel
|
||||
-h | --help display this message
|
||||
```
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
#!/bin/bash
|
||||
source $HOME/.bwda
|
||||
|
||||
# Usage function
|
||||
usage(){
|
||||
echo "Usage: $(basename $0) [opt] [path]"
|
||||
echo "Opens an interactive lftp session on bwDataArchive."
|
||||
echo " path directory on LSDF, session will cd to the corresponding bwda directory"
|
||||
echo " -h | --help display this message"
|
||||
}
|
||||
|
||||
# Get command line argument
|
||||
nparallel=1
|
||||
nchannel=1
|
||||
flag_printcommands=false
|
||||
|
||||
POSITIONAL=()
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
case $key in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
POSITIONAL+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
set -- "${POSITIONAL[@]}" # restore positional parameters
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
lftp sftp://${bwda_acc}@${bwda_url}
|
||||
else
|
||||
ldir_target="$(realpath $1)"
|
||||
# Get current path relative to base
|
||||
if [[ $ldir_target == "${ldir_base}"* ]]; then
|
||||
dir_target=${ldir_target#"$ldir_base"}
|
||||
else
|
||||
echo "Target directory is not located on LSDF!"
|
||||
echo "Is the base directory setting correct?"
|
||||
echo "ldir_base: $ldir_base"
|
||||
exit -1
|
||||
fi
|
||||
rdir_target="${rdir_base}/${dir_target}"
|
||||
lftp sftp://${bwda_acc}@${bwda_url} -e "cd $rdir_target"
|
||||
fi
|
||||
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
#!/bin/bash
|
||||
source $HOME/.bwda
|
||||
|
||||
# Usage function
|
||||
usage(){
|
||||
echo "Usage: $(basename $0) files"
|
||||
echo " files files to be downloaded"
|
||||
echo " -c | --channel N use N channels per file"
|
||||
echo " -n | --dry-run print commands to stdout instead of executing them"
|
||||
echo " -P | --parallel N transfer N files in parallel"
|
||||
echo " -h | --help display this message"
|
||||
}
|
||||
|
||||
# Get command line argument
|
||||
nparallel=1
|
||||
nchannel=1
|
||||
flag_dryrun=false
|
||||
|
||||
POSITIONAL=()
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
case $key in
|
||||
-c|--channel)
|
||||
nchannel="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
-n|--dry-run)
|
||||
flag_dryrun=true
|
||||
shift
|
||||
;;
|
||||
-P|--parallel)
|
||||
nparallel="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
POSITIONAL+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
set -- "${POSITIONAL[@]}" # restore positional parameters
|
||||
|
||||
# Check if we got enough command line arguments
|
||||
if [ $# -eq 0 ]; then
|
||||
usage
|
||||
exit -1
|
||||
fi
|
||||
|
||||
# Get path to correct directory on bwda
|
||||
ldir_target="$(realpath $PWD)"
|
||||
if [[ $ldir_target == "${ldir_base}"* ]]; then
|
||||
dir_target=${ldir_target#"$ldir_base"}
|
||||
else
|
||||
echo "Current directory is not located on LSDF!"
|
||||
echo "Is the base directory setting correct?"
|
||||
echo "ldir_base: $ldir_base"
|
||||
exit -2
|
||||
fi
|
||||
rdir_target="${rdir_base}/${dir_target}"
|
||||
|
||||
# Construct command
|
||||
cmd="open sftp://${bwda_acc}@${bwda_url}\n"
|
||||
cmd+="set cmd:parallel ${nparallel}\n"
|
||||
cmd+="cd ${rdir_target}\n"
|
||||
cmd+="lcd ${ldir_target}\n"
|
||||
for file in $*; do
|
||||
cmd+="pget -n ${nchannel} $file\n"
|
||||
done
|
||||
cmd+="bye\n"
|
||||
|
||||
# Print or execute commands
|
||||
if [ "$flag_dryrun" == true ]; then
|
||||
printf "$cmd"
|
||||
else
|
||||
tmpfile=$(mktemp)
|
||||
printf "$cmd" > $tmpfile
|
||||
lftp -f $tmpfile
|
||||
rm $tmpfile
|
||||
fi
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
#!/bin/bash
|
||||
source $HOME/.bwda
|
||||
|
||||
# Usage function
|
||||
usage(){
|
||||
echo "Usage: $(basename $0) [opt]"
|
||||
echo "List the contents of the current directory on bwda."
|
||||
echo "Options are directly passed to lftp's ls command with the following exceptions:"
|
||||
echo " -h | --help display this message"
|
||||
}
|
||||
|
||||
# Get command line argument
|
||||
nparallel=1
|
||||
nchannel=1
|
||||
flag_printcommands=false
|
||||
|
||||
POSITIONAL=()
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
case $key in
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
POSITIONAL+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
set -- "${POSITIONAL[@]}" # restore positional parameters
|
||||
|
||||
# Get path to correct directory on bwda
|
||||
ldir_target="$(realpath $PWD)"
|
||||
if [[ $ldir_target == "${ldir_base}"* ]]; then
|
||||
dir_target=${ldir_target#"$ldir_base"}
|
||||
else
|
||||
echo "Current directory is not located on LSDF!"
|
||||
echo "Is the base directory setting correct?"
|
||||
echo "ldir_base: $ldir_base"
|
||||
exit -2
|
||||
fi
|
||||
rdir_target="${rdir_base}/${dir_target}"
|
||||
|
||||
# Construct command
|
||||
cmd="open sftp://${bwda_acc}@${bwda_url}\n"
|
||||
cmd+="cd ${rdir_target}\n"
|
||||
cmd+="ls ${POSITIONAL[@]}\n"
|
||||
cmd+="bye\n"
|
||||
|
||||
# Execute commands
|
||||
tmpfile=$(mktemp)
|
||||
printf "$cmd" > $tmpfile
|
||||
lftp -f $tmpfile
|
||||
rm $tmpfile
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
#!/bin/bash
|
||||
# Compare contents of a directory with the contents of the corresponding
|
||||
# archive on HPSS.
|
||||
# Requirements:
|
||||
# - lftp must be available
|
||||
# - ~/.netrc must be configured
|
||||
# - ~/.bwda must be configured (bwda_acc, bwda_url, ldir_base, rdir_base)
|
||||
# - directory structure needs to be the same on both ends
|
||||
source $HOME/.bwda
|
||||
|
||||
# Usage function
|
||||
usage(){
|
||||
echo "Usage: $(basename $0) [opt] [dir]"
|
||||
echo " dir path to local directory on LSDF"
|
||||
echo " --include-size-mismatch include files which mismatch in size to sync list"
|
||||
echo " -n | --dry-run print commands to stdout instead of executing them"
|
||||
echo " -e | --extension ext only sync files with specified extension"
|
||||
echo " -P | --parallel N transfer N files in parallel"
|
||||
echo " -h | --help display this message"
|
||||
}
|
||||
|
||||
# Get command line argument
|
||||
flag_sizemismatch=false
|
||||
flag_dryrun=false
|
||||
filter_ext=
|
||||
nparallel=1
|
||||
|
||||
POSITIONAL=()
|
||||
while [[ $# -gt 0 ]]
|
||||
do
|
||||
key="$1"
|
||||
case $key in
|
||||
--include-size-mismatch)
|
||||
flag_sizemismatch=true
|
||||
shift
|
||||
;;
|
||||
-n|--dry-run)
|
||||
flag_dryrun=true
|
||||
shift
|
||||
;;
|
||||
-e|--extension)
|
||||
filter_ext="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
-P|--parallel)
|
||||
nparallel="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
POSITIONAL+=("$1")
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
set -- "${POSITIONAL[@]}" # restore positional parameters
|
||||
|
||||
if [ $# -gt 1 ]; then
|
||||
usage
|
||||
exit -1
|
||||
elif [ $# -eq 0 ]; then
|
||||
ldir_target=$(realpath $PWD)
|
||||
else
|
||||
ldir_target="$(realpath $1)"
|
||||
fi
|
||||
|
||||
# Get current path relative to base
|
||||
if [[ $ldir_target == "${ldir_base}"* ]]; then
|
||||
dir_target=${ldir_target#"$ldir_base"}
|
||||
else
|
||||
echo "Target directory is not located on LSDF!"
|
||||
echo "Is the base directory setting correct?"
|
||||
echo "ldir_base: $ldir_base"
|
||||
exit -2
|
||||
fi
|
||||
rdir_target="${rdir_base}/${dir_target}"
|
||||
|
||||
# Receive list of files on SFTP server, omit directories
|
||||
rfilelist=$(lftp sftp://${bwda_acc}@${bwda_url} -e "ls -l ${rdir_target}; bye" | grep -v '^d')
|
||||
rfilename=($(echo "$rfilelist" | awk '{print $9}'))
|
||||
rfilesize=($(echo "$rfilelist" | awk '{print $5}'))
|
||||
|
||||
# Get list of local files, also omit directories
|
||||
lfilelist=$(ls -l ${ldir_target} | grep -v '^d')
|
||||
lfilename=($(echo "$lfilelist" | awk '{print $9}'))
|
||||
lfilesize=($(echo "$lfilelist" | awk '{print $5}'))
|
||||
|
||||
# Remove local files not ending with specified extension
|
||||
if [ ! -z "$filter_ext" ]; then
|
||||
for il in ${!lfilename[@]}; do
|
||||
if [[ ${lfilename[${il}]} != *"$filter_ext" ]]; then
|
||||
unset 'lfilename[il]'
|
||||
unset 'lfilesize[il]'
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Check for local files which are not on HPSS
|
||||
# and write a batch file to sync them.
|
||||
for ir in ${!rfilename[@]}; do
|
||||
for il in ${!lfilename[@]}; do
|
||||
if [[ ${rfilename[${ir}]} == ${lfilename[${il}]} ]]; then
|
||||
if [[ ${rfilesize[${ir}]} != ${lfilesize[${il}]} ]]; then
|
||||
>&2 echo "Filesize mismatch: ${rfilename[${ir}]}, HPSS=${rfilesize[${ir}]}, local=${lfilesize[${il}]}"
|
||||
[[ "$flag_sizemismatch" == true ]] && break
|
||||
fi
|
||||
unset 'lfilename[il]'
|
||||
unset 'lfilesize[il]'
|
||||
break
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Get number of files to sync
|
||||
nfiles="${#lfilename[@]}"
|
||||
|
||||
# Write a lftp batch script for syncing
|
||||
if [[ $nfiles -eq 0 ]]; then
|
||||
>&2 echo "Nothing to be done."
|
||||
else
|
||||
>&2 echo "Syncing $nfiles files."
|
||||
cmd="open sftp://${bwda_acc}@${bwda_url}\n"
|
||||
cmd+="set cmd:parallel ${nparallel}\n"
|
||||
cmd+="cd ${rdir_target}\n"
|
||||
cmd+="lcd ${ldir_target}\n"
|
||||
for il in ${!lfilename[@]}; do
|
||||
cmd+="put ${lfilename[${il}]}\n"
|
||||
done
|
||||
cmd+="bye\n"
|
||||
if [[ "$flag_dryrun" == true ]]; then
|
||||
printf "$cmd"
|
||||
else
|
||||
tmpfile=$(mktemp)
|
||||
printf "$cmd" > $tmpfile
|
||||
lftp -f $tmpfile
|
||||
rm $tmpfile
|
||||
fi
|
||||
fi
|
||||
Loading…
Reference in New Issue