Commit 689627c3 authored by Pape, David (FWCC) - 139658's avatar Pape, David (FWCC) - 139658
Browse files

Refactoring

parent 0bf01cd0
......@@ -24,21 +24,37 @@ function getSlurmVar {
}
}
function tailFile {
touch "$1" # make sure the file exists
#until [[ -f "$1" ]]; do sleep 0.1; done && tail -f "$1"
tail -f "$1"
}
function pidChildOf {
ps --ppid "$1" | tail -n +2 | head -n 1 | awk '{ print $1 }'
}
function slurmJobExitCode {
scontrol show job="$1" | grep "ExitCode" |
awk '{ print $5 }' | cut -d "=" -f 2 | cut -d ":" -f 1
}
#
# ./this_script [my_args...] script stage_name
# \____/
# This has to be run!
#
module load git
# This is needed since the runner's working directory is /tmp which is not shared between nodes.
# TODO: Find out why the runner uses /tmp instead of its working directory, anyway.
WORK_DIR="/home/pape58/runner-wd"
module load git 2> /dev/null
script=${*: -2:1}
runStage=${*: -1:1}
# This is needed since the runner's working directory is /tmp which is not shared between nodes.
workDir="/home/pape58/runner-wd"
#1>&2 echo "Run stage: $runStage"
# Only run the build script on the cluster.
if [[ "$runStage" == "build_script" ]]; then
# This will be the name of the batch script that is constructed from the script passed to this
# program. Later we move this to the old script name.
......@@ -47,8 +63,8 @@ if [[ "$runStage" == "build_script" ]]; then
{
# write shebang and SBATCH options to new script
echo "#!/usr/bin/env bash";
echo "#SBATCH --output=$workDir/slurm-%j.out";
echo "#SBATCH --error=$workDir/slurm-%j.err";
echo "#SBATCH --output=$WORK_DIR/slurm-%j.out";
echo "#SBATCH --error=$WORK_DIR/slurm-%j.err";
# iterate over SLURM options and get the desired setting
# only write to new script if variable not empty
......@@ -77,34 +93,29 @@ if [[ "$runStage" == "build_script" ]]; then
chmod +x "$script"
jobID=$(sbatch "$script" | awk '{ print $4 }')
outFile="$workDir/slurm-$jobID.out"
errFile="$workDir/slurm-$jobID.err"
outFile="$WORK_DIR/slurm-$jobID.out"
errFile="$WORK_DIR/slurm-$jobID.err"
# spawn subshells for live output of std and err
(until [[ -f "$outFile" ]]; do sleep 0.1; done && tail -f "$outFile") &
(tailFile "$outFile") &
pidTailOutParent="$!"
(until [[ -f "$errFile" ]]; do sleep 0.1; done && tail -f "$errFile") &
(tailFile "$errFile") &
pidTailErrParent="$!"
# wait for job to finish
until [[ $(squeue -j "$jobID" | wc -l) -le "1" ]]; do sleep 2; done
pidTailOut=$(ps --ppid "$pidTailOutParent" | tail -n 1 | awk '{ print $1 }')
pidTailErr=$(ps --ppid "$pidTailErrParent" | tail -n 1 | awk '{ print $1 }')
pidTailOut=$(pidChildOf "$pidTailOutParent")
pidTailErr=$(pidChildOf "$pidTailErrParent")
# send SIGPIPE to suppress output when killing
kill -13 "$pidTailOut" "$pidTailErr"
# print output and error
#cat "$outFile"
#1>&2 cat "$errFile"
# cleanup
rm "$outFile" "$errFile"
# get the jobs exit code
exitCode=$(scontrol show job="$jobID" | grep "ExitCode" | awk '{ print $5 }' |
cut -d "=" -f 2 | cut -d ":" -f 1)
# get the jobs exit code and pass it to GitLab CI on exit
exitCode=$(slurmJobExitCode "$jobID")
exit "$exitCode"
else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment