#!/bin/bash

# **************************** LICENSE START ***********************************
#
# Copyright 2012 ECMWF and INPE. This software is distributed under the terms
# of the Apache License version 2.0. In applying this license, ECMWF does not
# waive the privileges and immunities granted to it by virtue of its status as
# an Intergovernmental Organization or submit itself to any jurisdiction.
#
# ***************************** LICENSE END ************************************


# ---------------------------------------------------------
# Script to run FLEXPART from within Metview
# ---------------------------------------------------------

set -o pipefail
set -x

MY_PID=$$
EXE_PID=""

#Print an error message
print_err()
{
	echo ${text_ERR}  $* >> "${f_LOG}"
} 

#Find the executable's pid among the children of the specified pid.
#We call it recursively.
findExePid()
{
    parentPid=$1
    chPidLst=$(ps --ppid $parentPid ho pid 2>/dev/null)

    #echo "child pids="$chPidLst
    for chPid in $chPidLst ; do

        chCmd=$(ps --pid $chPid ho comm)
        if [[ x"$chCmd" != "x" &&  "${cmd_EXE}" = "${chCmd}" ]] ; then
           EXE_PID=$chPid
           return
        fi
        findExePid $chPid
    done
}

#Kill the executable using its pid
killExe()
{
    if [[ "${EXE_PID}" != "" && ${EXE_PID} -ne 0 && ${EXE_PID} -ne -1 ]] ; then
        kill -9 ${EXE_PID}
    fi
    EXE_PID=""
}

cleanup()
{
    if [[ "$1" = "SIGNAL" ]] ; then
        echo "Script $(basename $0) stopped with SIGNAL $2" >> "${f_LOG}"
        outCode=1
    fi

    #We need to stop the flexpart exe because it
    #does not stop when we exit this script!
    killExe

    #Copy the tmp log to real log
    mv ${f_LOG} ${f_LOG_FINAL}

    trap "" EXIT ERR
    if [[ "$1" = "SIGNAL" ]] ; then
        exit $outCode
    fi
}


MY_SIGLIST="HUP INT QUIT TERM XCPU  XFSZ"
#           1   2   3    15   30/24 31/25

trap 'cleanup "EXIT"' EXIT
for sig in ${MY_SIGLIST} ; do
    trap 'cleanup "SIGNAL" $sig' $sig
done

#Define error text prefix
text_ERR="Script $(basename $0) FAILED! "

#-------------------------
# Get args
#-------------------------
if [[ $# -ne 5 ]] ; then
    print_err echo "Invalid number of arguments specified! (" $# " instead of 5)"
    exit 1
fi

d_WORK=$1
f_LOG_FINAL=$2
f_EXE=$3
d_RESOURCE=$4
d_SPECIES=$5

#A temporary log. We need this because if we kill flexpart there is still something
#written to the log (even after calling exit) overwriting our error messages. So we
#use the temporary log and copy it to real log in cleanup().
f_LOG=${f_LOG_FINAL}.tmp

#get the exe
if [[ "$f_EXE" = "_UNDEF_" ]] ; then
	exe_FLEXPART=${MV_FLEXPART_EXE}
else
	exe_FLEXPART=${f_EXE}
fi

cmd_EXE=$(basename ${exe_FLEXPART})

#get the resources
if [[ "$d_RESOURCE" = "_UNDEF_" ]] ; then
    d_RESOURCE=${MV_FLEXPART_RESOURCES}
fi

#get the species
if [[ "$d_SPECIES" = "_UNDEF_" ]] ; then
    d_SPECIES=${MV_FLEXPART_SPECIES}
fi

#-------------------------------
# Go to the work directory
#-------------------------------

if [[ ! -d "$d_WORK" ]] ; then
   print_err "No working directory found: " $d_WORK
   exit 1
fi

cd $d_WORK

#-------------------
# Clean up
#-------------------
rm -f $f_LOG
rm -f $f_LOG_FINAL

#-------------------------------
# Checks
#-------------------------------

#Executable
if [[ x"$exe_FLEXPART" = "x" ]] ; then
   print_err "No FLEXPART executable is defined. Please define it via env variable MV_FLEXPART_EXE."
   exit 1
fi

if [[ ! -f "$exe_FLEXPART" ]] ; then
   print_err "No FLEXPART executable found: " $exe_FLEXPART
   exit 1
fi

if [[ ! -x "$exe_FLEXPART" ]] ; then
   print_err "FLEXPART executable cannot be run! Permission is missing. " $exe_FLEXPART
   exit 1
fi

if [[ "${cmd_EXE}" = ""  || $(echo $exe_FLEXPART | grep -c -e "${cmd_EXE}" 2>/dev/null) -ne 1 ]] ; then
    print_err "FLEXPART executable command is not defined. " $cmd_EXE
    exit 1
fi

#Resources
if [[ x"$d_RESOURCE" = "x" ]] ; then
   print_err "No FLEXPART resources directory is defined. Please define it via env variable MV_FLEXPART_RESOURCES."
   exit 1
fi

if [[ ! -d "$d_RESOURCE" ]] ; then
   print_err "No FLEXPART resources directory found: " $d_RESOURCE
   exit 1
fi

if [[ ! -r "$d_RESOURCE" ]] ; then
   print_err "FLEXPART resources directory cannot be read! Permission is missing. " $d_RESOURCE
   exit 1
fi

#Species
if [[ x"$d_SPECIES" = "x" ]] ; then
   print_err "No FLEXPART species directory is defined. Please define it via env variable MV_FLEXPART_SPECIES."
   exit 1
fi

if [[ ! -d "$d_SPECIES" ]] ; then
   print_err "No FLEXPART species directory found: " $d_SPECIES
   exit 1
fi

if [[ ! -r "$d_SPECIES" ]] ; then
   print_err "FLEXPART species directory cannot be read! Permission is missing. " $d_SPECIES
   exit 1
fi

cp ${d_RESOURCE}/IGBP_int1.dat .
cp ${d_RESOURCE}/OH_7lev_agl.dat .
cp ${d_RESOURCE}/surfdata.t .
cp ${d_RESOURCE}/surfdepo.t .

ln -sf ${d_SPECIES} SPECIES

#-------------------------------
#Run flextra
#-------------------------------

#We need to unset it so that FLEXPART could pick up the right
#definition path that belongs to the ecCodes/grib_api version we used for
#its compilation
unset GRIB_DEFINITION_PATH

#WARNING!
# flexpart can enter an infinite loop dumping this message:
#   "ERROR: could not allocate outh"
# on an on. Therefore we need to parse its output and detect
# if it happens. We also need to check the size of the logfile.
errString="ERROR: could not allocate outh"

#When the logfile is larger than this we stop flexpart!
maxLogSize=4194304 #4MB

#With this solution we can check the flexpart output but will not know the
#return value

outCode=0
while read -r line
do
    echo $line

    logSize=$(wc -c <${f_LOG})

    if [[ "x${EXE_PID}" = "x" ]] ; then
        findExePid ${MY_PID}
        echo ${EXE_PID}
    fi

    if [  ${logSize} -gt 1024 ] ; then

        if [[ $(tail -n 10 ${f_LOG} | grep -i -c -e "${errString}" 2>/dev/null) -ne 0 ]] ; then
            killExe
            print_err "FLEXPART was stopped because it seemed to enter an infinite loop!"
            exit 1
        fi

        if [[  ${logSize} -gt ${maxLogSize} ]] ; then
            killExe
            print_err "FLEXPART was stopped because maximum log file size (=${maxLogSize} B) reached!"
            exit 1
        fi
    fi

done < <(${exe_FLEXPART} 2>&1 | tee ${f_LOG} )
outCode=$?

#-----------------------------------
#  Check log
#-----------------------------------

if [[ -f ${f_LOG} ]] ; then
  if [[ $(grep -c -i WARNING $f_LOG) -ne 0 ]] ; then
	outCode=255 
  elif [[ $(grep -c -i ERROR $f_LOG) -ne 0 ]] ; then
	outCode=1
  elif [[ $(grep -c -i SIGSEGV $f_LOG) -ne 0 ]] ; then
     outCode=1
  elif [[ $(grep -c -i -e "segmentation fault" $f_LOG) -ne 0 ]] ; then
     outCode=1
  elif [ $outCode -ne 0 ] ; then
    outCode=$outCode
  elif [[ ! -s ${f_LOG} ]] ; then
    outCode=1
    cat "FLEXPART seems to be crashed" > ${f_LOG}
  fi
fi

exit $outCode
