#!/bin/bash

LANG="C"
export LANG

darshan_module_loaded=`env | grep LOADEDMODULES | grep darshan | wc -l`
if [ $darshan_module_loaded -gt 0 ] ; then
    echo "ERROR: Darshan is not compatible with TAU."
    echo "Please unload any darshan modules before running tau_exec."
    exit 1
fi

eval `tau-config`

scriptname=`basename $0`
#Get the interpreter from tau-config
python_interpreter=$(tau-config | grep "PYTHONINT" | sed 's/^PYTHONINT=//')
#Check if the interpreter exists and if it is python3 or not
#if the interpreter does not exist, use the old interpreter setting lines
if [ -x "$(command -v $python_interpreter)" ] ; then
    if [[ "$(python3 -V)" =~ "Python 3" ]] ; then
	found=yes
    else
	found=no
    fi
else
	python_interpreter=python
	found=yes
	# Check if python is in the user's path
	if ! [ -x "$(command -v $python_interpreter)" ]; then
	    python_interpreter=python3
	    # Check if python3 is in the user's path
	    if ! [ -x "$(command -v $python_interpreter)" ]; then
	        found=no
	    fi
	fi
	# Check if python3 is the explicit python executable used at configuration time
	if tau-config | grep -q 'PYTHONINT=python3' 2>/dev/null ; then
	  python_interpreter=python3
	fi
fi

# For Python < 3.12, use the old python support
python_module_name=tau_python_wrapper
if [ $found == "yes" ]; then
    python_version=`${python_interpreter} --version 2>&1 | awk '{print $NF}'`
    version_array=( ${python_version//./ } )
    if [ "${version_array[0]}" == 3 ] ; then
        if [ "${version_array[1]}" -gt 11 ] ; then
            # For Python >= 3.12, use the new python support
            python_module_name=pstubs
        fi
    fi
fi

if [ "$scriptname" == "tau_python" ] ; then
  python_wrapper=true
  spark_wrapper=false
  julia_wrapper=false
  default_tags="python,MPI"
else
  python_wrapper=false
  spark_wrapper=false
  julia_wrapper=false
  default_tags="MPI"
fi

if [ "$scriptname" == "tau_julia" ] ; then
  julia_wrapper=true
  python_wrapper=false
  spark_wrapper=false
  default_tags="julia,MPI"
fi

if tau-config | grep -q 'TAUARCH=craycnl' 2>/dev/null ; then
  on_craycnl=true
else
  on_craycnl=false
fi

if tau-config | grep -q 'TAUARCH=arm64_linux' 2>/dev/null ; then
  on_arm64l=true
else
  on_arm64l=false
fi



echoIfVerbose () {
  if [ $verbose = "true" ] ; then
    echo -e "$1"
  fi
}

usage()
{
  echo ""
  echo "Usage: $scriptname [options] [--] <exe> <exe options>"
  echo ""
  # Common options first
  echo "Options:"

  echo "        -v            Verbose mode"
  echo "        -vv           Very Verbose mode (enables TAU_VERBOSE=1)"
  echo "        -s            Show what will be done but don't actually do anything (dryrun)"
  echo "        -qsub         Use qsub mode (BG/P only, see below)"
  echo "        -io           Track I/O"
  echo "        -memory       Track memory allocation/deallocation"
  echo "        -memory_debug Enable memory debugger"
  echo "        -cuda         Track GPU events via CUDA"
  echo "        -cupti        Track GPU events via CUPTI (Also see env. variable TAU_CUPTI_API)"
  echo "        -cupti_pc        Track GPU events via CUPTI and enables PC Sampling(Also see env. variable TAU_CUPTI_API)"
  echo "        -opencl       Track GPU events via OpenCL"
  echo "        -openacc      Track GPU events via OpenACC (currently PGI only)"
  echo "        -rocm         Track ROCm events via rocprofiler "
  echo "        -rocm_pc      Track ROCm PC Sampling events (needs -rocm)"
  echo "        -ompt         Track OpenMP events via OMPT interface"
  echo "        -oneapi       Track Intel OneAPI Level Zero events"
  echo "        -power        Track power events via PAPI's perf RAPL interface"
  echo "        -numa         Track remote DRAM, total DRAM events (needs papi with recent perf support for x86_64)"
  echo "        -armci        Track ARMCI events via PARMCI"
  echo "        -shmem        Track SHMEM events"
  echo "        -pin          Use Intel PIN for instrumentation"
  echo "        -ptts         Run with ParaTools ThreadSpotter"
  echo "        -ptts-post    Skip application sampling and post-process existing PTTS sample files"
  echo "        -ptts-num=<N>  Number of ranks used for ThreadSpotter"
  echo "        -ptts-sample-flags=<flags> Flags to pass to PTTS sample_ts command. Overrides TAU_TS_SAMPLE_FLAGS env. var."
  echo "        -ptts-restart Enable restart support within PTTS, allowing application to continue running and be reinstrumented after stop."
  echo "        -ptts-start=<addr> Address at which to start a PTTS sampling region"
  echo "        -ptts-stop=<addr>  Address at which to stop a PTTS sampling region"
  echo "        -ptts-report-flags=<flags> Flags to pass to PTTS report_ts command. Overrides TAU_TS_REPORT_FLAGS env. var."
  echo "        -ebs          Enable event-based sampling"
  echo "        -ebs_period=<count> Sampling period (default 1000)"
  echo "        -ebs_source=<counter> Counter (default itimer)"
  echo "        -ebs_resolution=<file|function|line> Choose sampling granularity."
  echo "        -ebs_unwind   Enable callstack unwinding for sampling"
  echo "        -um           Enable Unified Memory events via CUPTI"
  echo "        -sass=<level> Track GPU events via CUDA with Source Code Locator activity (kernel level or source level)"
  echo "        -csv          Outputs sass profile in CSV"
  echo "        -env          Track GPU environment activity (power utilization, SM, memory frequency, temperature)"
  echo "        -syscall      Track SYSCALL"
  options=`tau-config --list-options`
  echo "        -T <$options> : Specify TAU tags"
  echo "        -loadlib=<file.so>   : Specify additional load library"
  echo "        -disable_ld_preload  : Does not set LD_PRELOAD environment variable"
  echo "        -XrunTAUsh-<options> : Specify TAU library directly"
  echo "        -gdb          Run program in the gdb debugger"
if [ $python_wrapper = "true" ] ; then
  echo "        -tau-python-args=<\"arguments\"> Arguments to the python interpreter"
  echo "        -tau-python-interpreter=<\"python\"> name of python interpreter (e.g, python, python3.6)"
if [ $python_module_name = "pstubs" ] ; then
  echo "        -tau-python-filter=<\"filename\"> name of selective measurement JSON file"
fi
fi
  echo "        -adios2       Write output to ADIOS2 archive"
  echo "        -adios2_trace Write trace output to ADIOS2 archive"
  echo "        -monitoring   Periodically read available OS/HW monitoring counters"
  echo "        -pthread-stopper   Prevent pthread tracking during specified functions"
  echo "        -sqlite3      Write profile data to SQLite3 database"
  echo "        -skel         Write trace event data to skeleton files"
  echo "        -disable-kokkos  Disable Kokkos profiling (removes fences that hurt performance)"
  echo "        -ugni         Track Cray UGNI network library functions."
  echo "        -chapel       Track Cray Chapel network atomics library functions."
  echo "        -ittnotify    Track Intel ITTNotify tasks."
  echo "        -gptl         Track GPTL timers."
  echo ""
  echo "Notes:"
  echo "    Defaults if unspecified: -T $default_tags"
  echo "    MPI is assumed unless SERIAL is specified"
  echo ""
  echo "Example:"
  echo "    mpirun -np 2 $scriptname -io ./ring"
  echo "Example - event-based sampling with samples taken every 1,000,000 FP instructions"
  echo "    mpirun -np 8 $scriptname -ebs -ebs_period=1000000 -ebs_source=PAPI_FP_INS ./ring"
  echo "Examples - GPU:"
  echo "    $scriptname -T serial,cupti -cupti ./matmult (Preferred for CUDA 4.1 or later)"
  echo "    $scriptname -T serial -cuda ./matmult (Preferred for CUDA 4.0 or earlier)"
  echo "    $scriptname -T serial -opencl (OPENCL)"
  echo "    $scriptname -T serial,cupti -cupti -um ./unified_memory (Unified Virtual Memory)"
  echo "    $scriptname -T serial,cupti -cupti -sass=kernel -csv ./sass_source (Metrics Intensity)"
  echo "    $scriptname -T serial,cupti -cupti -env ./matmult (GPU Environment)"
  echo ""
  echo "qsub mode (IBM BG/P only):"
  echo "    Original:"
  echo "      qsub -n 1 --mode smp -t 10 ./a.out"
  echo "    With TAU:"
  echo "      $scriptname -qsub -io -memory -- qsub -n 1 --mode smp -t 10 ./a.out"
  echo ""
  echo "Memory Debugging:"
  echo "    -memory option:"
  echo "      Tracks heap allocation/deallocation and memory leaks."
  echo "    -memory_debug option:"
  echo "      Detects memory leaks, checks for invalid alignment, and checks for"
  echo "      array overflow.  This is exactly like setting TAU_TRACK_MEMORY_LEAKS=1"
  echo "      and TAU_MEMDBG_PROTECT_ABOVE=1 and running with -memory"
  echo ""
  exit
}


set_node()
{
  if echo "$binding_options" | grep -iq mpi 2>/dev/null ; then
    # ThreadSpotter's sample_ts uses the same method to determine MPI rank,
    # see get_rank() in library/exe_path/path_substitute.cc
    rank_env_vars=("PMI_RANK"
                   "OMPI_COMM_WORLD_RANK"
                   "OMPI_MCA_ns_nds_vpid"
                   "PMI_ID"
                   "SLURM_PROCID"
                   "LAMRANK"
                   "MPI_RANKID"
                   "MP_CHILD"
                   "MP_RANK"
                   "MPIRUN_RANK"
                   "MPT_MPI_RANK"
                   "ALPS_APP_PE")
    for i in ${rank_env_vars[@]}; do
      eval node=\$$i
      if [ -n "$node" ] ; then
        break
      fi
    done
  elif echo "$binding_options" | grep -iq shmem 2>/dev/null ; then
    echo "ERROR: ptts support for SHMEM not implemented yet"
    exit 1
  fi
  if [ -z "$node" ] ; then
    echo "ERROR: Cannot determine node"
    exit 1
  fi
}

set_nodecount()
{
  if echo "$binding_options" | grep -iq mpi 2>/dev/null ; then
    if [ -n "$ptts_num" ]; then
      nodecount="$ptts_num"
    else
      if $on_craycnl ; then
        echo "ERROR: Please use -ptts-num=MPI_RANKS to indicate number of MPI ranks."
        exit 1
      fi
      for i in PMI_SIZE OMPI_COMM_WORLD_SIZE SLURM_NPROCS BC_MPI_TASKS_ALLOC; do
        eval nodecount=\$$i
        if [ -n "$nodecount" ] ; then
          break
        fi
      done
    fi
  elif echo "$binding_options" | grep -iq shmem 2>/dev/null ; then
    echo "ERROR: ptts support for SHMEM not implemented yet"
    exit 1
  fi
  if [ -z "$nodecount" ] ; then
    echo "ERROR: Cannot determine node count (if using MPT then set MPI_SHEPHERD=true)"
    exit 1
  fi
}


write_ptts_index()
{
  eval `tau-config | grep TAUROOT`
  etcdir="$TAUROOT/etc"

  cp "$etcdir/ptts.html.index" "$pttsprefix/index.html"

  index_html="$pttsprefix/node_0/index.html"
  cat "$etcdir/ptts.html.head" > "$index_html"
  i=0
  while [ $i -lt $nodecount ] ; do
    cat "$etcdir/ptts.html.body" | sed -e "s/%NODE%/$i/g" >> "$index_html"
    ((i++))
  done
  cat "$etcdir/ptts.html.foot" >> "$index_html"
}

compress_ptts_report()
{
  shared=("bl.gif"
  "br.gif"
  "ch.png"
  "cu.png"
  "dot.gif"
  "down.png"
  "f.png"
  "f1.png"
  "f2.png"
  "false.png"
  "fh.png"
  "filter.png"
  "fpc.png"
  "ft.png"
  "fu.png"
  "hover_popup.css"
  "lo.png"
  "manual_html"
  "minus.png"
  "nav.js"
  "perf.csv"
  "pfc.png"
  "pff.png"
  "pfh.png"
  "pfnt.png"
  "plus.png"
  "pt-small.png"
  "q-h.png"
  "q.png"
  "r.png"
  "rw-small.png"
  "sb.png"
  "source.css"
  "src.png"
  "stb.png"
  "summary.css"
  "tabber.js"
  "table.js"
  "tb.png"
  "tl.gif"
  "tr.gif"
  "u.png"
  "up.png"
  "wh.png"
  "wnt.png"
  "wu.png")
  cd "$pttsprefix/$viewdir"
  rm -rf "${shared[@]}"
  for i in "${shared[@]}" ; do
    ln -s "../node_0/$i" .
  done
  cd "$OLDPWD"
}

mpi_barrier()
{
  # A stupid, brute force barrier that uses the file system to syncrhonize all MPI ranks
  # before post-processing PTTS sample files.
  touch "$pttsprefix/$node"
  i=0
  while [ $i -lt $nodecount ] ; do
    sync
    if [ -f "$pttsprefix/$i" ] ; then
      ((i++))
    else
      sleep 10
    fi
  done
}

if [ $# = 0 ] ; then
  usage
fi

# Unfortunately LD_PRELOAD items can be seperated by spaces or colons.
# Change spaces to colons so we can reuse $LD_PRELOAD below.
LD_PRELOAD=`echo "$LD_PRELOAD" | tr [:space:] ':' | tr -s ':'`

dryrun=""
processT=false
TauOptions=""
TauOptionsExclude=""
verbose=false
binding_specified=""
binding_options=""
track_io=false
track_memory=false
memory_debug=false
track_cuda=false
track_ompt=false
track_l0=false
track_l0_stall_sampling=false
track_power=false
track_cupti=false
track_cupti_pc=false
unified_memory=false
track_sass=false
sass_type=""
csv_output=false
binary_exe=""
track_env=false
track_opencl=false
track_rocm=false
track_rocm_pc=false
track_openacc=false
track_armci=false
tau_use_ebs=false
tau_ebs_period=""
tau_ebs_resolution=""
tau_ebs_source=""
scorep=false
qsub_mode=false
TAU_PAPI_DEFAULT_DOMAIN=PAPI_DOM_USER
extraloadlibs=""
disable_preload=false
track_gomp=false
use_gdb=false
track_numa=false
track_shmem=false
use_ptts=false
use_pin=false
ptts_post=false
ptts_num=""
py_interp_args=""
ptts_sample_flags="${TAU_TS_SAMPLE_FLAGS:-}" # Set these from environment vars if they exist, don't complain even
ptts_report_flags="${TAU_TS_REPORT_FLAGS:-}" # if set -o nounset
adios2=false
pthread_stopper=false
monitoring=false
sqlite3=false
skel=false
track_kokkos=true
adios2_trace=false
sos=false
track_ugni=false
track_chapel=false
track_ittnotify=false
track_gptl=false
processM=false
python_launch_module=false
python_m_arg=""
track_syscall=false

# This saves the args to tau_exec so they can be reused later.
# This is used for MPI_Comm_spawn support, in which TAU spawns the child
# through tau_exec, since the child doesn't inherit environment variables.
save_arg()
{
  export TAU_EXEC_ARGS="${TAU_EXEC_ARGS} $@"
}

for arg in "$@" ; do
  # Thanks to Bernd Mohr for the following that handles quotes and spaces (see configure for explanation)
  modarg=`echo "x$arg" | sed -e 's/^x//' -e 's/"/\\\"/g' -e s,\',%@%\',g -e 's/%@%/\\\/g' -e 's/ /\\\ /g'`

  if [ "$processT" = true ] ; then
    binding_options=`echo $binding_options $arg | sed -e 's/,/ /g' | tr '[A-Z]' '[a-z]'`
    processT="false"
    test_arg=`echo $arg | sed -e 's@scorep@@g'`
    if [ "x$test_arg" != "x$arg" ]; then
      scorep=true
    fi
    save_arg $arg
    shift
  elif [ "$processM" = true ] ; then
    processM=false
    python_m_arg=$arg
    save_arg $arg
    shift
  else
    case $arg in
      -vv)
        verbose=true
        export TAU_VERBOSE=1
        save_arg $arg
        shift
        ;;
      -v|-d|-verbose|--verbose)
        verbose=true
        save_arg $arg
        shift
        ;;
      -h|-help|--help)
        usage
        save_arg $arg
        ;;
      -io)
        track_io=true
        save_arg $arg
        shift
        ;;
      -numa)
        track_numa=true
        save_arg $arg
        shift
        ;;
      -memory)
        track_memory=true
        save_arg $arg
        shift
        ;;
      -memory_debug)
        memory_debug=true
        save_arg $arg
        shift
        ;;
      -cuda)
        track_cuda=true
        save_arg $arg
        shift
        ;;
      -ompt)
        track_ompt=true
        save_arg $arg
        shift
        ;;
      -l0|-L0|-oneapi|-level_zero|-levelzero)
        track_l0=true
        save_arg $arg
        shift
        ;;
      -l0_sampling)
        track_l0_stall_sampling=true
        track_l0=true
        save_arg $arg
        shift
        ;;
      -monitoring)
        monitoring=true
        shift
        ;;
      -sqlite3)
        sqlite3=true
        shift
        ;;
      -skel)
        skel=true
        shift
        ;;
      -disable-kokkos)
        track_kokkos=false
        shift
        ;;
      -pthread-stopper)
        pthread_stopper=true
        shift
        ;;
      -adios2)
        adios2=true
        shift
        ;;
      -adios2_trace)
        adios2_trace=true
        shift
        ;;
      -sos)
        sos=true
        shift
        ;;
      -power)
        track_power=true
        save_arg $arg
        shift
        ;;
      -cupti)
        track_cupti=true
        save_arg $arg
        shift
        ;;
      -cupti_pc)
        track_cupti_pc=true
        save_arg $arg
        shift
        ;;
      -um | -uvm)
        unified_memory=true
        save_arg $arg
        shift
        ;;
      -sass=*)
        track_sass=true
        myarg=`echo $arg | sed 's/-sass=//'`
        sass_type="$myarg"
        save_arg $arg
        shift
        ;;
      -csv)
        csv_output=true
        save_arg $arg
        shift
        ;;
      -env)
        track_env=true
        save_arg $arg
        shift
        ;;
      -opencl)
        track_opencl=true
        save_arg $arg
        shift
        ;;
      -openacc)
        track_openacc=true
        save_arg $arg
        shift
        ;;
      -rocm)
        track_rocm=true
        save_arg $arg
        shift
        ;;
      -rocm_pc)
        track_rocm_pc=true
	      track_rocm=true
        save_arg $arg
        shift
        ;;
      -gomp)
        track_gomp=true
        save_arg $arg
        shift
        ;;
      -armci)
        track_armci=true
        save_arg $arg
        shift
        ;;
      -shmem)
        track_shmem=true
        save_arg $arg
        shift
        ;;
      -pin)
        use_pin=true
        save_arg $arg
        shift
        ;;
      -ptts)
        use_ptts=true
        save_arg $arg
        shift
        ;;
      -ptts-post)
        use_ptts=true
        ptts_post=true
        save_arg $arg
        shift
        ;;
      -ptts-num=*)
        myarg=`echo $arg | sed 's/-ptts-num=//'`
        ptts_num="$myarg"
        save_arg $arg
        shift
        ;;
      -ptts-sample-flags=*)
        ptts_sample_flags="${arg/#-ptts-sample-flags=/}" # bash parameter expansion, bash 3 and up
        save_arg $arg
        shift
        ;;
      -ptts-restart)
        ptts_restart="true"
        save_arg $arg
        shift
        ;;
      -ptts-start=*)
        ptts_start="${arg/#-ptts-start=/}" # bash parameter expansion, bash 3 and up
        save_arg $arg
        shift
        ;;
      -ptts-stop=*)
        ptts_stop="${arg/#-ptts-stop=/}" # bash parameter expansion, bash 3 and up
        save_arg $arg
        shift
        ;;
      -ptts-report-flags=*)
        ptts_report_flags="${arg/#-ptts-report-flags=/}" # Also propper quoting must be used when multiple passed
        save_arg $arg
        shift
        ;;
      -ebs)
        tau_use_ebs=true
        save_arg $arg
        shift
        ;;
      -ebs_unwind)
        tau_use_ebs_unwind=true
        save_arg $arg
        shift
        ;;
      -ebs_period=*)
        tau_use_ebs=true
        myarg=`echo $arg | sed 's/-ebs_period=//'`
        tau_ebs_period="$myarg"
        save_arg $arg
        shift
        ;;
      -ebs_source=*)
        tau_use_ebs=true
        myarg=`echo $arg | sed 's/-ebs_source=//'`
        tau_ebs_source="$myarg"
        save_arg $arg
        shift
        ;;
      -ebs_resolution=*)
        tau_use_ebs=true
        myarg=`echo $arg | sed 's/-ebs_resolution=//'`
        tau_ebs_resolution="$myarg"
        export TAU_EBS_RESOLUTION=$tau_ebs_resolution
        save_arg $arg
        shift
        ;;
      -qsub)
        qsub_mode=true
        save_arg $arg
        shift
        ;;
      -s)
        dryrun=echo
        save_arg $arg
        shift
        ;;
      -gdb)
        use_gdb=true
        save_arg $arg
        shift
        ;;
      -T)
        processT=true
        save_arg $arg
        shift
        ;;
      -tau:*)
        binding_options="$binding_options `echo $arg | sed -e 's/-tau://' -e 's/,/ /g'`"
        save_arg $arg
        shift
        ;;
      -loadlib=*)
        myarg=`echo $arg | sed 's/-loadlib=//'`
        extraloadlibs="$extraloadlibs:$myarg"
        save_arg $arg
        shift
        ;;
      -disable_ld_preload)
        disable_preload=true
        save_arg $arg
        shift
        ;;
      -XrunTAU-*)
        myarg=`echo $arg | sed 's/-XrunTAU-//'`
        binding_specified="shared-$myarg"
        save_arg $arg
        shift
        ;;
      -XrunTAUsh-*)
        myarg=`echo $arg | sed 's/-XrunTAUsh-//'`
        binding_specified="shared-$myarg"
        save_arg $arg
        shift
        ;;
      -spark-python)
        spark_wrapper=true
        save_arg $arg
        shift
        ;;
      -tau-python-args=*)
        if [ $python_wrapper = "true" ] ; then
          myarg=`echo $arg | sed 's/-tau-python-args=//'`
          py_interp_args="$myarg"
          save_arg $arg
          shift
        else
          echo "Unknown option: $arg" >&2
          exit 1
        fi
        ;;
      -tau-python-interpreter=*)
        if [ $python_wrapper = "true" ] ; then
          myarg=`echo $arg | sed 's/-tau-python-interpreter=//'`
          python_interpreter="$myarg"
          save_arg $arg
          shift
        else
          echo "Unknown option: $arg" >&2
          exit 1
        fi
        ;;
      -tau-python-filter=*)
        if [ $python_wrapper = "true" ] ; then
            if [ $python_module_name = "pstubs" ] ; then
                myarg=`echo $arg | sed 's/-tau-python-filter=//'`
                python_filter="$myarg"
                export PERFSTUBS_PYTHON_FILTER_FILENAME=$python_filter
                save_arg $arg
                shift
            else
                echo "Unknown option: $arg" >&2
                exit 1
            fi
        else
            echo "Unknown option: $arg" >&2
            exit 1
        fi
        ;;
      -ugni)
        track_ugni=true
        save_arg $arg
        shift
        ;;
      -gni)
        track_ugni=true
        save_arg $arg
        shift
        ;;
      -chapel)
        track_chapel=true
        save_arg $arg
        shift
        ;;
      -ittnotify)
        track_ittnotify=true
        save_arg $arg
        shift
        ;;
      -gptl)
        track_gptl=true
        save_arg $arg
        shift
        ;;
      -c)
        if [ $python_wrapper = "true" ] ; then
          save_arg $arg
          break
        else
          echo "Unknown option: $arg" >&2
          exit 1
        fi
        ;;
      -m)
        if [ $python_wrapper = "true" ] ; then
          save_arg $arg
          processM=true
          python_launch_module=true
          shift
          break
        else
          echo "Unknown option: $arg" >&2
          exit 1
        fi
        ;;
      -syscall)
        track_syscall=true
        save_arg $arg
        shift
        ;;
      --)
        save_arg $arg
        shift
        break
        ;;
      -*)
        echo "Unknown option: $arg" >&2
        exit 1
        # First non-option signifies end of options. This would be much easier with getopt()
        ;;
      *)
        break
        ;;
    esac
  fi
done
export TAU_EXEC_PATH=${BASH_SOURCE[0]}

if [ "x$ptts_start" != "x" ]; then
    ptts_sample_flags="$ptts_sample_flags --start-at-address $ptts_start"
    ptts_restart=true
fi

if [ "x$ptts_stop" != "x" ]; then
    ptts_sample_flags="$ptts_sample_flags --stop-at-address $ptts_stop"
    ptts_restart=true
fi

if [ "x$ptts_restart" != "x" ]; then
    ptts_sample_flags="$ptts_sample_flags --sample-restart"
fi

# choose TAU library
new_binding_options=""
if [ "x$binding_options" != "x" ]; then
  for i in $binding_options ; do
    case $i in
      *)
        new_binding_options="$new_binding_options $i"
        ;;
    esac
  done
fi
binding_options="$new_binding_options"


if [ "x$binding_specified" = "x" ] && ! $use_pin ; then
  if [ "x$binding_options" = "x" ]; then
    binding_options=`echo "$default_tags" | sed -e 's/,/ /g' | tr '[A-Z]' '[a-z]'`
  else
    # Add MPI by default
    if ! echo "$binding_options" | grep -q "serial" >/dev/null 2>&1 ; then
      # add mpi if shmem is not specified
      if ! echo "$binding_options" | grep -q "shmem" >/dev/null 2>&1 ; then
        binding_options="$binding_options mpi"
      fi
    fi
    if [ "$python_wrapper" == "true" ] ; then
      binding_options="$binding_options python"
    fi
  fi
  theBinding=`tau-config --binding $binding_options`
  if [ $? != 0 ] ; then
    if [ $verbose = "true" ]; then
      echo "Binding options: $binding_options didn't work; trying without MPI using serial instead."
    fi
    new_binding_options=`echo $binding_options | sed -e 's/mpi/serial/' `
    theBinding=`tau-config --binding $new_binding_options`
    if [ $? != 0 ] ; then
      echo "Binding not found. Exiting..."
      exit 1
    else
      echo "Using $theBinding"
      binding_options=$new_binding_options
    fi
  fi
else
  theBinding=$binding_specified
fi

if [ $verbose = "true" ] ; then
  echo ""
  echo "Program to run : $@"
  echo ""
fi


if [ `uname -s ` = Darwin ]; then
  apple=1
  TAU_SHLIBX=.dylib
else
  apple=0
  TAU_SHLIBX=.so
fi

python_bindings=`echo $theBinding | sed -e 's/shared/bindings/'`
export PYTHONPATH=$BASEDIR/lib/$python_bindings:${BASEDIR}/lib:$PYTHONPATH

if [ "x$LD_LIBRARY_PATH" = "x" ] ; then
  TAUEX_LD_LIBRARY_PATH=$BASEDIR/lib/$theBinding:$BASEDIR/lib
else
  TAUEX_LD_LIBRARY_PATH=$BASEDIR/lib/$theBinding:$BASEDIR/lib:$LD_LIBRARY_PATH
fi
if [ $apple = 1 ]; then
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
else
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX:$LD_PRELOAD
fi

# Make sure this plugin is first!  In the default configuration, it
# reads the counters on the "dump" event.
if [ $monitoring = "true" ]; then
    export TAU_PLUGINS=$TAU_PLUGINS:libTAU-monitoring-plugin${TAU_SHLIBX}
    export TAU_PLUGINS_PATH=$BASEDIR/lib/$theBinding
fi

if [ $pthread_stopper = "true" ]; then
    export TAU_PLUGINS=$TAU_PLUGINS:libTAU-pthread-stopper-plugin${TAU_SHLIBX}
    export TAU_PLUGINS_PATH=$BASEDIR/lib/$theBinding
fi

if [ $adios2 = "true" ]; then
    export TAU_PLUGINS=$TAU_PLUGINS:libTAU-adios2-plugin${TAU_SHLIBX}
    export TAU_PLUGINS_PATH=$BASEDIR/lib/$theBinding
    # Because we are writing the data to ADIOS, disable profile output
    export TAU_PROFILE=0
    unset TAU_PROFILE_FORMAT
fi

if [ $sqlite3 = "true" ]; then
    export TAU_PLUGINS=$TAU_PLUGINS:libTAU-sqlite3-plugin${TAU_SHLIBX}
    export TAU_PLUGINS_PATH=$BASEDIR/lib/$theBinding
    # Because we are writing the data to SQLite, disable profile output
    export TAU_PROFILE=0
    unset TAU_PROFILE_FORMAT
fi

if [ $skel = "true" ]; then
    export TAU_PLUGINS=$TAU_PLUGINS:libTAU-skel-plugin${TAU_SHLIBX}
    export TAU_PLUGINS_PATH=$BASEDIR/lib/$theBinding
    # Because we are writing the data to SQLite, disable profile output
    export TAU_PROFILE=0
    unset TAU_PROFILE_FORMAT
    export TAU_CURRENT_TIMER_EXIT_PARAMS=1
    export TAU_TRACE=0
fi

if [ $adios2_trace = "true" ]; then
    export TAU_PLUGINS=$TAU_PLUGINS:libTAU-adios2-trace-plugin${TAU_SHLIBX}
    export TAU_PLUGINS_PATH=$BASEDIR/lib/$theBinding
    # Because we are writing the data to ADIOS, disable profile output
    export TAU_PROFILE=0
    unset TAU_PROFILE_FORMAT
    export TAU_THREAD_PER_GPU_STREAM=1
fi

if [ $sos = "true" ]; then
    export TAU_PLUGINS=$TAU_PLUGINS:libTAU-sos-plugin${TAU_SHLIBX}
    export TAU_PLUGINS_PATH=$BASEDIR/lib/$theBinding
fi

if [ $track_kokkos = "true" ]; then
    export KOKKOS_PROFILE_LIBRARY=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
    export KOKKOS_TOOLS_LIBS=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
    export KOKKOS_PLUGINS=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
fi



if [ $track_ompt = "true" ]; then
  export OMP_TOOL_LIBRARIES=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
  export OMP_TOOL=enabled
fi


#Check if TAU_METRICS is set
if [ -v TAU_METRICS ]; then
  #only add TIME if not found at the start, or between : or comma, or 
  # at the ends preceded by one of the separators 
  if [ ! `echo $TAU_METRICS | grep -E "(^|[:,])TIME($|[:,])"` ]; then
    export TAU_METRICS=TIME,$TAU_METRICS
  fi
#If TAU_METRICS is not set, set it with TIME
else
  export TAU_METRICS=TIME
fi

if [ $track_l0 = "true" ]; then
  if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
    if [ `echo $TAU_METRICS | grep "TIME"` ]; then
        $dryrun export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
    else
        $dryrun export TAU_METRICS=TAUGPU_TIME,$TAU_METRICS
    fi
  fi
  $dryrun export PTI_ENABLE=1
  $dryrun export ZE_ENABLE_TRACING_LAYER=1

  if [ $track_l0_stall_sampling = "true" ]; then
    if [ ! -v L0_METRICGROUP ]; then
      $dryrun export L0_METRICGROUP="EuStallSampling"
    else
      #At this moment this is for testing, it doesn't work with our machines
      # as stall sampling (pc sampling) seems to have some driver/GPU issues,
      # even with unitrace (Intel tools), will add to the usage menu once it works
      echo "L0_METRICGROUP must not be initialized to use stall sampling"
    fi
    $dryrun export ZE_ENABLE_STALL_SAMPLING=1
  fi

  if [ -v L0_METRICGROUP ]; then
    $dryrun export ZET_ENABLE_METRICS=1
  fi
fi

if [ $track_rocm = "true" ] ; then
  if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
    if [ `echo $TAU_METRICS | grep "TIME"` ]; then
        $dryrun export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
    else
        $dryrun export TAU_METRICS=TAUGPU_TIME,$TAU_METRICS
    fi
  fi
  if [ $track_rocm_pc = "true" ] ; then
    #This environmental variable is needed by current versions of rocprofiler-sdk
    # a variable is needed to enable ROCm pc sampling inside TAU (name could be modified then)
    export ROCPROFILER_PC_SAMPLING_BETA_ENABLED=1
  fi
fi

isTBB=`echo $theBinding | grep tbb`
if [ ! "x$isTBB" == "x" -a -r $BASEDIR/lib/$theBinding/libTAU-pthread$TAU_SHLIBX ]; then
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-pthread$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi


isPthread=`echo $theBinding | grep pthread`
if [ ! "x$isPthread" == "x" -a -r $BASEDIR/lib/$theBinding/libTAU-pthread$TAU_SHLIBX ]; then
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-pthread$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi

if [ $track_gomp = "true" -a -r $BASEDIR/lib/$theBinding/libTAU-gomp$TAU_SHLIBX ]; then
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-gomp$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi

if [ $track_io = "true" ] ; then
  # Add the io wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-iowrap$TAU_SHLIBX:$TAUEX_LD_PRELOAD
  if [ $on_arm64l ];then
     unset TAUEX_LD_AUDITOR
  else
  #use the auditor
  TAUEX_LD_AUDITOR=$BASEDIR/lib/$theBinding/libTAU-dl-auditor$TAU_SHLIBX
  fi
  #Needed for the auditor... but DON'T SET IT NOW!
  # Only needed by GDB and the auditor.
  # export LD_BIND_NOW=1
fi

if [ $track_ugni = "true" ] ; then
  # Add the UGNI wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-gni-wrap$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi

if [ $track_chapel = "true" ] ; then
  # Add the Chapel wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-chapel-wrap$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi

if [ $track_gptl = "true" ] ; then
  # Add the GPTL wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-gptl-wrap$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi

if [ $track_ittnotify = "true" ] ; then
  # Use ITTNotify collector with applications linked to Intel ITT stub
  export INTEL_LIBITTNOTIFY64=$BASEDIR/lib/$theBinding/libTAU-ittnotify-collector$TAU_SHLIBX
  # Use Intel JIT Events collector with applications linked to Intel JIT Events stub
  export INTEL_JIT_PROFILER64=$BASEDIR/lib/$theBinding/libTAU-ittnotify-collector$TAU_SHLIBX
  # Use v1 JIT events
  export INTEL_JIT_BACKWARD_COMPATIBILITY=1
fi

if [ $track_numa = "true" ]; then
  options=`echo $theBinding | sed -e 's/shared-//g' `
  mk=$BASEDIR/lib/Makefile.tau-$options
  if [ -r $mk ]; then
    if [ $verbose = "true" ]; then
      echo "Using TAU_MAKEFILE=$mk to extract PAPIDIR"
    fi
    papibin=`grep "^PAPIDIR" $mk | sed -e 's/^PAPIDIR=//g' | head -n 1 `/bin
    if [ "x$papibin" != "x" -a -x $papibin/papi_event_chooser ]; then
      $papibin/papi_event_chooser NATIVE perf::PERF_COUNT_HW_CACHE_NODE:ACCESS OFFCORE_RESPONSE_0:REMOTE_DRAM &> /dev/null
      ret=$?
      if [ $ret = 0 -a "x$TAU_METRICS" = "x" ]; then
        if [ $verbose = "true" ]; then
          echo "Setting TAU_METRICS=TIME,PAPI_NATIVE_OFFCORE_RESPONSE_0:REMOTE_DRAM,PAPI_NATIVE_perf::PERF_COUNT_HW_CACHE_NODE:ACCESS  for $scriptname -numa"
        fi
        export TAU_METRICS=TIME,PAPI_NATIVE_OFFCORE_RESPONSE_0:REMOTE_DRAM,PAPI_NATIVE_perf::PERF_COUNT_HW_CACHE_NODE:ACCESS
      else
        echo "WARNING: $scriptname: a PAPI configuration that supports perf events perf::PERF_COUNT_HW_CACHE_NODE:ACCESS and OFFCORE_RESPONSE_0:REMOTE_DRAM should be specified to use the -numa option"
      fi
    fi
  else
    echo "WARNING: $scriptname: a PAPI configuration that supports perf events perf::PERF_COUNT_HW_CACHE_NODE:ACCESS and OFFCORE_RESPONSE_0:REMOTE_DRAM should be specified to use the -numa option"
  fi
fi

if [ $track_memory = "true" ] || [ $memory_debug = "true" ] ; then
  # Add the memory wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-memorywrap$TAU_SHLIBX:$TAUEX_LD_PRELOAD
   if [ $on_arm64l ];then
     unset TAUEX_LD_AUDITOR
  else
  #use the auditor
  TAUEX_LD_AUDITOR=$BASEDIR/lib/$theBinding/libTAU-dl-auditor$TAU_SHLIBX
  fi
  #Needed for the auditor... but DON'T SET IT NOW!
  # Only needed by GDB and the auditor.
  #export LD_BIND_NOW=1
  # Track heap usage
  export TAU_TRACK_HEAP=1
  # Track memory leaks
  export TAU_TRACK_MEMORY_LEAKS=1
fi
if [ $track_cuda = "true" ] ; then
  cupti_exists=`test -f $BASEDIR/lib/$theBinding/libTAU-CUpti$TAU_SHLIBX -a -f $BASEDIR/lib/$theBinding/libTAU-CudaQP$TAU_SHLIBX`
  if [ $? == 0 ] ; then
    #tell the user cupti is available.
    echo "NOTE: CUPTI is available with your TAU configuration use '-cupti' instead of '-cuda' to get the latest available features."
  fi
  # Add the CUDA wrapper library to the LD_PRELOAD list
  #TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-CUDA$TAU_SHLIBX:$TAUEX_LD_PRELOAD
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-CUDArt$TAU_SHLIBX:$TAUEX_LD_PRELOAD
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-CudaQP$TAU_SHLIBX:$TAUEX_LD_PRELOAD
  if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
    if [ `echo $TAU_METRICS | grep "TIME"` ]; then
        export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
    else
        export TAU_METRICS=TAUGPU_TIME,$TAU_METRICS
    fi
  fi
fi
if [ $track_cupti = "true" ] ; then
  cupti_exists=`test -f $BASEDIR/lib/$theBinding/libTAU-CUpti$TAU_SHLIBX -a -f $BASEDIR/lib/$theBinding/libTAU-CudaQP$TAU_SHLIBX`
  if [ $? != 0 ] ; then
    #tell the user to add cupti to bindings list.
    echo "ERROR: CUPTI library not found. Please ensure that 'cupti' is on the list of bindings specified with the '-T' option."
    exit
  else
    # Add the wrapper library to the LD_PRELOAD list
    TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-CUact$TAU_SHLIBX:$TAUEX_LD_PRELOAD
    TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-CUpti$TAU_SHLIBX:$TAUEX_LD_PRELOAD
    TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-CudaQP$TAU_SHLIBX:$TAUEX_LD_PRELOAD
    #export NVTX_INJECTION64_PATH=/packages/cuda/12.8.1/extras/CUPTI/lib64/libcupti.so
    export NVTX_INJECTION64_PATH=$BASEDIR/lib/$theBinding/libTAU-CUpti$TAU_SHLIBX
    if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
      if [ `echo $TAU_METRICS | grep "TIME"` ]; then
        export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
      else
        export TAU_METRICS=TAUGPU_TIME,$TAU_METRICS
      fi
    fi
  fi

  ## Unified Memory called along with -cupti
  if [ $unified_memory = "true" ] ; then
    export TAU_TRACK_UNIFIED_MEMORY=1
  fi
  ## SASS called along with -cupti
  if [ $track_sass = "true" ] ; then
    export TAU_TRACK_CUDA_SASS=1
    if [ $csv_output = "true" ] ; then
      export TAU_OUTPUT_CUDA_CSV=1
    fi
    if [ "x$sass_type" != "x" ] ; then
      export TAU_SASS_TYPE=$sass_type
    fi
    export TAU_CUDA_BINARY_EXE=$arg
  fi
  if [ $track_env = "true" ] ; then
      export TAU_TRACK_CUDA_ENV=1
  fi
  #echo "api found: $TAU_CUPTI_API"
  if [ $python_wrapper = "true" ] ; then
    export TAU_CUPTI_API=both
  fi
fi

if [ $track_cupti_pc = "true" ] ; then
  if [ $track_cupti = "true" ] ; then
    echo "-cupti_pc is not compatible with -cupti, select only one."
  fi
  cupti_exists=`test -f $BASEDIR/lib/$theBinding/libTAU-CUpti$TAU_SHLIBX -a -f $BASEDIR/lib/$theBinding/libTAU-CudaQP$TAU_SHLIBX`
  if [ $? != 0 ] ; then
    #tell the user to add cupti to bindings list.
    echo "ERROR: CUPTI library not found. Please ensure that 'cupti' is on the list of bindings specified with the '-T' option."
    exit
  else
    TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-CUpti$TAU_SHLIBX:$TAUEX_LD_PRELOAD
    export TAU_CUPTI_PC=1
    if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
      if [ `echo $TAU_METRICS | grep "TIME"` ]; then
        export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
      else
        export TAU_METRICS=TAUGPU_TIME,$TAU_METRICS
      fi
    fi
  fi
fi

if [ $track_opencl = "true" ] ; then
  # Add the OpenCL wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-OpenCL$TAU_SHLIBX:$TAUEX_LD_PRELOAD
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-OCLci$TAU_SHLIBX:$TAUEX_LD_PRELOAD
  if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
    if [ `echo $TAU_METRICS | grep "TIME"` ]; then
      export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
    else
      export TAU_METRICS=$TAU_METRICS,TAUGPU_TIME
    fi
  fi
fi
if [ $track_l0 = "true" -a $track_opencl = "false" ]; then
  if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
    if [ `echo $TAU_METRICS | grep "TIME"` ]; then
      export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
    else
      export TAU_METRICS=$TAU_METRICS,TAUGPU_TIME
    fi
  fi
fi
if [ $track_openacc = "true" ] ; then
  export PGI_ACC_PROFLIB=$BASEDIR/lib/$theBinding/libTAU${TAU_SHLIBX}
  export ACC_PROFLIB=$BASEDIR/lib/$theBinding/libTAU${TAU_SHLIBX}
fi
if [ $track_rocm = "true" ] ; then
  isRocprofsdk=`echo $theBinding | grep rocprofsdk`
  if [ ! "x$isRocprofsdk" == "x"  ]; then
    options=`echo $theBinding | sed -e 's/shared-//g' `
	  mk=$BASEDIR/lib/Makefile.tau-$options
	  rocprofsdkdir=`grep "TAUROCPROFSDK_DIR=" $mk | sed -e 's@TAUROCPROFSDK_DIR=@@g'`   
    if [ -r $rocprofsdkdir/lib/librocprofiler-sdk.so ]; then
      $dryrun export TAU_USE_ROCPROFILERSDK=1
	  fi
  else
    isRocmv6=`echo $theBinding | grep rocmv6`
    if [ ! "x$isRocmv6" == "x"  ]; then
      options=`echo $theBinding | sed -e 's/shared-//g' `
  	  mk=$BASEDIR/lib/Makefile.tau-$options
  	  rocprofdir=`grep "TAU_ROCPROFILER=" $mk | sed -e 's@TAU_ROCPROFILER=@@g'`
      isRocprofv2=`echo $theBinding | grep rocprofv2`
      if [ ! "x$isRocprofv2" == "x"  ]; then
        if [ -r $rocprofdir/lib/librocprofiler64v2.so ]; then
      		$dryrun export TAU_USE_ROCPROFV2=1
        fi
      else
        $dryrun export ROCP_HSA_INTERCEPT=2
        $dryrun export ROCP_TOOL_LIB=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
        if [ -r $rocprofdir/lib/librocprofiler64.so ]; then
  		   $dryrun export TAU_HSA_TOOLS_LIB=$rocprofdir/lib/librocprofiler64.so
    	  fi
      fi
  	  if [ -r $rocprofdir/lib/rocprofiler/metrics.xml ]; then
  		  $dryrun export ROCP_METRICS=$rocprofdir/lib/rocprofiler/metrics.xml
  	  fi
    else
      $dryrun export ROCP_HSA_INTERCEPT=2
      $dryrun export ROCP_TOOL_LIB=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
      options=`echo $theBinding | sed -e 's/shared-//g' `
  	  mk=$BASEDIR/lib/Makefile.tau-$options
  	  rocprofdir=`grep "TAU_ROCPROFILER=" $mk | sed -e 's@TAU_ROCPROFILER=@@g'`
  	  if [ -r $rocprofdir/lib/librocprofiler64.so ]; then
  		  $dryrun export TAU_HSA_TOOLS_LIB=$rocprofdir/lib/librocprofiler64.so
  	  fi
  	  if [ -r $rocprofdir/lib/metrics.xml ]; then
  		  $dryrun export ROCP_METRICS=$rocprofdir/lib/metrics.xml
  	  fi
    fi
  fi
  
fi

isRoctracer=`echo $theBinding | grep roctracer`
if [ ! "x$isRoctracer" == "x" -a -r $BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX ]; then
  export TAU_HSA_TOOLS_LIB=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
fi

if [ $track_armci = "true" ] ; then
  # Add the ARMCI wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/$theBinding/libTAU-armciwrap$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi

if [ $track_shmem = "true" ] ; then
  # Add the SHMEM wrapper library to the LD_PRELOAD list
  TAUEX_LD_PRELOAD=$BASEDIR/lib/libTAUsh_shmem_wrap$TAU_SHLIBX:$TAUEX_LD_PRELOAD
fi

if [ $tau_use_ebs = "true" ] ; then
  # Set TAU_SAMPLING=1 environment variable. Deferring the capability
  #    to enable HPCToolkit until later.
  export TAU_SAMPLING=1
  if [ "x$tau_ebs_period" != "x" ] ; then
    export TAU_EBS_PERIOD=$tau_ebs_period
  fi
  if [ "x$tau_ebs_source" != "x" ] ; then
    export TAU_EBS_SOURCE=$tau_ebs_source
  fi
  if [ "x$tau_ebs_unwind" != "x" ] ; then
    export TAU_EBS_UNWIND=1
  fi
fi

if [ $track_power = "true" ]; then
  export TAU_TRACK_POWER=1
  if [ "x$TAU_INTERRUPT_INTERVAL" == "x" ]; then
    export TAU_INTERRUPT_INTERVAL=1
  fi
fi
if [ $track_ompt = "true" -a -r $BASEDIR/lib/$theBinding/libiomp5$TAU_SHLIBX ]; then
  TAUEX_LD_PRELOAD=$TAUEX_LD_PRELOAD:$BASEDIR/lib/$theBinding/libiomp5$TAU_SHLIBX
  if [ $verbose = "true" ]; then
    echo "Tracking with TAU's OMPT wrapper is enabled by preloading $BASEDIR/lib/$theBinding/libiomp5$TAU_SHLIBX"
  fi
fi

if [ $track_ompt = "true" -a -r $BASEDIR/lib/$theBinding/libomp$TAU_SHLIBX ]; then
  TAUEX_LD_PRELOAD=$TAUEX_LD_PRELOAD:$BASEDIR/lib/$theBinding/libomp$TAU_SHLIBX
  if [ $verbose = "true" ]; then
    echo "Tracking with TAU's OMPT wrapper is enabled by preloading $BASEDIR/lib/$theBinding/libiomp5$TAU_SHLIBX"
  fi
fi

if [ $scorep = true ]; then
  if [ $verbose = "true" ]; then
    echo "Score-P is specified"
  fi
  scoreplib=`ldd $BASEDIR/lib/$theBinding/libTAU${TAU_SHLIBX} | grep scorep_adapter_mpi_event.so | awk ' { print $3;}'`
  if [ "x$scoreplib" != "x" ]; then
    if [ -f $scoreplib -a $verbose = "true" ]; then
      echo "Preloading: $scoreplib"
    fi
    if [ "x$extraloadlibs" = "x" ]; then
      extraloadlibs=":$scoreplib"
    else
      extraloadlibs=":$scoreplib:$extraloadlibs"
    fi
  fi
fi

# add libraries specified by -loadlib=<foo.so>
TAUEX_LD_PRELOAD=${TAUEX_LD_PRELOAD}${extraloadlibs}

# remove double colons
TAUEX_LD_PRELOAD=`echo $TAUEX_LD_PRELOAD | sed -e "s/::/:/g" -e "s/:$//"`


if [ $apple = 1 ]; then
  TAU_LDD='otool -L'
else
  TAU_LDD=ldd
fi


if [ $qsub_mode = false ]; then
  prog="$1"
  if [ ! -x "$prog" ] ; then
    prog=`which $prog 2>/dev/null`
  fi

  if [ "$julia_wrapper" != "true" ] ; then
    if [ "$python_wrapper" != "true" -a ! -x "$prog" ] ; then
      echo "$scriptname: $1: command not found"
      exit
    fi
  fi

  if [ $track_syscall = true  -a -r $BASEDIR/lib/$theBinding/libTAU-ptrace-syscall$TAU_SHLIBX  ]; then
    if [ ! x"$TAU_TRACE" == "x" ]; then
       if [ $TAU_TRACE == 1 ]; then
	    echo "-syscall option can't be used with TAU_TRACE=1. Disable tracing when using -syscall."
	    exit
       fi
    fi
    TAUEX_LD_PRELOAD=$TAUEX_LD_PRELOAD:$BASEDIR/lib/$theBinding/libTAU-ptrace-syscall$TAU_SHLIBX
  else
    # always use the basic preload library now
    TAUEX_LD_PRELOAD=$TAUEX_LD_PRELOAD:$BASEDIR/lib/$theBinding/libTAU-preload$TAU_SHLIBX
  fi

fi

if [ $disable_preload = "true" ]; then
  TAUEX_LD_PRELOAD=""
fi

if [ $verbose = "true" ] ; then
  DY=""
  if [ $apple = 1 ]; then
    DY="DY"
  fi
  echo "Matching bindings:"
  tau-config --list-matching $binding_options
  echo ""
  echo "Using:"
  echo "$theBinding"
  echo ""
  echo "Configuration:"
  echo ""
  echo "Setting ${DY}LD_LIBRARY_PATH to $TAUEX_LD_LIBRARY_PATH"
  echo "Setting ${DY}LD_PRELOAD to $TAUEX_LD_PRELOAD"
  echo "Setting ${DY}LD_AUDIT to $TAUEX_LD_AUDITOR"
  echo "Setting PYTHONPATH to $PYTHONPATH"
  echo ""
fi

if $use_pin ; then
  if [ -n "$dryrun" ] ; then
    echo "ERROR: dryrun not implemented for pin"
    exit 1
  fi

  mk_options=`echo $theBinding | sed -e 's/shared/tau/g'`
  if ! echo "$mk_options" | grep -q "pin" >/dev/null 2>&1 ; then
    mk_options=tau-pin
    theBinding=shared-pin
  fi
  mk=$BASEDIR/lib/Makefile.$mk_options
  if [ -r $mk ]; then
    PIN_ROOT=`grep "^PIN_ROOT=" $mk | sed -e 's@^PIN_ROOT=@@g' `
    #echo "PIN_ROOT=$PIN_ROOT"
    $PIN_ROOT/pin -t $BASEDIR/lib/$theBinding/libTAU${TAU_SHLIBX} -- $@
    exit $?
  else
    echo "$0: PIN configuration not found. Please use ./configure -pin=download; make install;  and retry"
  fi
fi
# Prep ThreadSpotter before LD_PRELOAD get set
if $use_ptts ; then
  if ! which sample_ts report_ts view-static_ts >/dev/null 2>&1 ; then
    echo "ERROR: ParaTools ThreadSpotter not found on PATH"
    exit 1
  fi
  set_node
  set_nodecount
  pttsprefix="${PROFILEDIR:-.}/ptts"
  mkdir -p "$pttsprefix"
fi

if [ $qsub_mode = true ] ; then

  # gather all TAU_* environment variabls, but skip TAU_OPTIONS since it often has spaces,
  # and the ACLF staff tells us that it's impossible to pass env vars with spaces through qsub
  tau_vars=`env | grep TAU_ | grep -v TAU_OPTIONS | tr '\n' ':'`
  cmd="$@"
  # don't use the current LD_LIBRARY_PATH or it will screw things up for the backend
  TAUEX_LD_LIBRARY_PATH=$BASEDIR/lib/$theBinding
  envs="LD_PRELOAD=$TAUEX_LD_PRELOAD:LD_LIBRARY_PATH=$TAUEX_LD_LIBRARY_PATH:$tau_vars"

  prevEnv=""
  processEnv=false
  newCmd=""
  for arg in $cmd ; do
    # Thanks to Bernd Mohr for the following that handles quotes and spaces (see configure for explanation)
    modarg=`echo "x$arg" | sed -e 's/^x//' -e 's/"/\\\"/g' -e s,\',%@%\',g -e 's/%@%/\\\/g' -e 's/ /\\\ /g'`
    if [ "$processEnv" = true ] ; then
      prevEnv="$arg"
      processEnv=false
    else
      case $arg in
        --env)
          processEnv=true
          ;;
        *)
          newCmd="$newCmd $arg"
          ;;
      esac
    fi
  done

  envs="$envs:$prevEnv"
  envs=`echo $envs | sed -e "s/::/:/g" -e "s/:$//"`
  env_option="--env $envs"

  newCmd=`echo $newCmd | sed -e 's/^qsub //'`
  $dryrun qsub $env_option $newCmd

else

  if [ $use_gdb = "true" ]; then
    python_gdb_cmd=""
    if [ $python_wrapper = "true" ] ; then
      if [ $python_launch_module = "true" ] ; then
          python_gdb_cmd="$python_interpreter $py_interp_args -m ${python_module_name} -m $python_m_arg"
      else
          python_gdb_cmd="$dryrun $python_interpreter $py_interp_args -m ${python_module_name}"
      fi
    fi
    envcmd="set env"
    if [ $apple = 1 ]; then
        envcmd="_regexp-env"
    fi
    echo "" > .gdb_commands
    if [ $track_memory = "true" ] || [ $memory_debug = "true" ] ; then
      echo "${envcmd} LD_BIND_NOW=1" >> .gdb_commands
    fi
    if [ $track_io = "true" ] ; then
      echo "${envcmd} LD_BIND_NOW=1" >> .gdb_commands
    fi
    if [ $apple = 1 ]; then
      echo "_regexp-env DYLD_LIBRARY_PATH=$TAUEX_LD_LIBRARY_PATH" >> .gdb_commands
      echo "_regexp-env DYLD_INSERT_LIBRARIES=$TAUEX_LD_PRELOAD" >> .gdb_commands
      echo "_regexp-env DYLD_FORCE_FLAT_NAMESPACE=1" >> .gdb_commands
      if [ $python_wrapper = "true" ] ; then
        echo "_regexp-env PYTHONPATH=$PYTHONPATH" >> .gdb_commands
      fi
      if [ $# -gt 1 ] ; then
        echo "settings set target.run-args ${*:2}" >> .gdb_commands
      fi
    else
      echo "set env LD_LIBRARY_PATH=$TAUEX_LD_LIBRARY_PATH" >> .gdb_commands
      echo "set env LD_AUDIT=$TAUEX_LD_AUDITOR" >> .gdb_commands
      echo "set env LD_PRELOAD=$TAUEX_LD_PRELOAD" >> .gdb_commands
      if [ $memory_debug = "true" ] ; then
        echo "set env TAU_TRACK_MEMORY_LEAKS=1" >> .gdb_commands
        echo "set env TAU_MEMDBG_PROTECT_ABOVE=1">> .gdb_commands
      fi
      echo "set env LD_PRELOAD=$TAUEX_LD_PRELOAD" >> .gdb_commands
      if [ $python_wrapper = "true" ] ; then
        echo "set env PYTHONPATH=$PYTHONPATH" >> .gdb_commands
      fi
    fi
    # If Python is not used ${python_gdb_cmd} is the empty string
    # If Python is used, ${python_gdb_cmd} is the command to use the TAU python wrapper.
    if [ $apple = 1 ]; then
      lldb -s .gdb_commands -- ${python_gdb_cmd} "$@"
    else
      gdb -x .gdb_commands --args ${python_gdb_cmd} "$@"
    fi
    rm -f ./.gdb_commands
    exit 0;
  fi

  if [ $apple = 1 ]; then
    ORIG_LIBRARY_PATH=$DYLD_LIBRARY_PATH
    ORIG_INSERT_LIBRARIES=$DYLD_INSERT_LIBRARIES
    ORIG_FORCE_FLAT_NAMESPACE=$DYLD_FORCE_FLAT_NAMESPACE
    $dryrun export DYLD_LIBRARY_PATH=$TAUEX_LD_LIBRARY_PATH
    $dryrun export DYLD_INSERT_LIBRARIES=$TAUEX_LD_PRELOAD
    $dryrun export DYLD_FORCE_FLAT_NAMESPACE=1
    $dryrun export HSA_TOOLS_LIB=$TAU_HSA_TOOLS_LIB
  else
    ORIG_LIBRARY_PATH=$LD_LIBRARY_PATH
    ORIG_AUDIT_AUDIT=$LD_AUDIT
    ORIG_LD_PRELOAD=$LD_PRELOAD
    $dryrun export LD_LIBRARY_PATH=$TAUEX_LD_LIBRARY_PATH
    $dryrun export LD_AUDIT=$TAUEX_LD_AUDITOR
    $dryrun export HSA_TOOLS_LIB=$TAU_HSA_TOOLS_LIB
    #$dryrun export LD_PRELOAD=$TAUEX_LD_PRELOAD
    if [ $track_rocm = "true" ] ; then
      if [ `echo $TAU_METRICS | grep -v "TAUGPU_TIME"` ]; then
        if [ `echo $TAU_METRICS | grep "TIME"` ]; then
          if [ `echo $TAU_METICS | grep "TAUGPU_TIME" ` ]; then
            $dryrun export TAU_METRICS=`echo $TAU_METRICS | sed -e 's/TIME/TAUGPU_TIME/'`
          fi
        else
        $dryrun export TAU_METRICS=TAUGPU_TIME,$TAU_METRICS
      fi
     fi
  fi
    $dryrun export LD_PRELOAD=$TAUEX_LD_PRELOAD
  fi
  fi
  if [ $memory_debug = "true" ] ; then
    $dryrun export TAU_TRACK_MEMORY_LEAKS=1
    $dryrun export TAU_MEMDBG_PROTECT_ABOVE=1
  fi

  if $spark_wrapper ; then
    $dryrun export PYTHONPATH=$PYTHONPATH
    $dryrun $python_interpreter $py_interp_args -m tau_pyspark_wrapper "$@"
  elif $python_wrapper ; then
    $dryrun export PYTHONPATH=$PYTHONPATH
    if [ $python_launch_module = "true" ] ; then
        $dryrun $python_interpreter $py_interp_args -m ${python_module_name} -m $python_m_arg "$@"
    else
        $dryrun $python_interpreter $py_interp_args -m ${python_module_name} "$@"
    fi
    retval=$?
  elif $julia_wrapper ; then
    $dryrun export JULIA_LOAD_PATH="$BASEDIR/lib/$theBinding:$JULIA_LOAD_PATH"
    $dryrun export ENABLE_JITPROFILING=1
    $dryrun export INTEL_JIT_BACKWARD_COMPATIBILITY=1
    $dryrun export TAU_JULIA_LIB=$BASEDIR/lib/$theBinding/libTAU$TAU_SHLIBX
    $dryrun julia "$@"
  elif $use_ptts ; then
    # Can't dryrun because `$dryrun CMD >$logfile 2>&1` creates $logfile
    if [ -n "$dryrun" ] ; then
      echo "ERROR: dryrun not implemented for ptts"
      exit 1
    fi
    # Can't use $node because the "-g" flag to sample_ts causes some ranks to write
    # sample files on behalf of others.  "%U" uses the filesystem to get a (hopefully) unique number.
    samplefile="$pttsprefix/sample%U.smp"
    reportfile="$pttsprefix/report.${node}.tsr"
    reportlog="$pttsprefix/report_ts.${node}.log"
    # view-static_ts paths should be relative to $pttsprefix
    # since view-static_ts must be run with PWD=$pttsprefix
    viewdir="node_${node}"
    viewlog="view-static_ts.${node}.log"

    # Restore environment and start sampling the child process
    if [ $apple = 1 ]; then
      export DYLD_LIBRARY_PATH=$ORIG_LIBRARY_PATH
      export DYLD_INSERT_LIBRARIES=$ORIG_INSERT_LIBRARIES
      export DYLD_FORCE_FLAT_NAMESPACE=$ORIG_FORCE_FLAT_NAMESPACE
    else
      export LD_LIBRARY_PATH=$ORIG_LIBRARY_PATH
      export LD_AUDIT=$ORIG_AUDIT_AUDIT
      export LD_PRELOAD=$ORIG_LD_PRELOAD
    fi
    rm -f "$reportlog" "$pttsprefix/$viewlog" "$pttsprefix/$node"

    if ! $ptts_post ; then
      rm -rf "$pttsprefix/sample*.smp" "$reportfile" "$pttsprefix/$viewdir"
      if [ "$node" -eq 0 ] ; then
        echo "PTTS($node/$nodecount): Sampling \"$@\""
        samplingstarttime=`date +%s`
      fi
      sample_ts $ptts_sample_flags -o "$samplefile" -r $@
      retval=$?
      if [ "$node" -eq 0 ] ; then
        samplingendtime=`date +%s`
        echo "PTTS($node/$nodecount): sample_ts time: $((samplingendtime-samplingstarttime)) seconds"
      fi
    fi

    # Write the index first.  Even if some nodes fail later the index will help
    # navigate the rest of the report.

    # All sample files must be written before generating a report from any sample file.
    # Barrier before checking for sample files since sample.$node.smp may have been
    # written by another MPI rank. After the barrier each rank can "own" sample.$node.smp
    mpi_barrier
    samplefile="$pttsprefix/sample.${node}.smp"

    # Shared filesystems must be in sync before proceeding
    echo "PTTS($node/$nodecount): Synchronizing filesystems..."
    sleep 5 ; sync
    sleep 5 ; sync
    sleep 5 ; sync

    # sample_ts may return nonzero without an error so only abort if sample_ts didn't write *.smp.
    # Check after the barrier so we can post-process even if some ranks fail
    if [ ! -f "$samplefile" ] ; then
      echo "PTTS($node/$nodecount): Sample file \"$samplefile\" does not exist. sample_ts returned $retval."
      if [ $retval -eq 0 ] ; then
        exit 255
      else
        exit $retval
      fi
    fi

    # Post-process samples
    if [ "$node" -eq 0 ] ; then
      echo "PTTS($node/$nodecount): Postprocessing samples..."
      reportstarttime=`date +%s`
    fi
    report_ts $ptts_report_flags -i "$samplefile" -o "$reportfile" >$reportlog 2>&1
    retval=$?
    if [ "$node" -eq 0 ] ; then
      reportendtime=`date +%s`
      echo "PTTS($node/$nodecount): report_ts time: $((reportendtime-reportstarttime)) seconds"
    fi
    # Abort if sample_ts returned nonzero or didn't write the sample file
    if [ $retval -eq 0 ] ; then
      if [ ! -f "$reportfile" ] ; then
        echo "PTTS($node/$nodecount): report_ts returned 0 but report file \"$reportfile\" does not exist."
        exit 255
      fi
    else
      echo "PTTS($node/$nodecount): report_ts returned error code $retval."
      exit $retval
    fi

    # Generate report. view-static_ts must be run in $pttsprefix
    cd "$pttsprefix"
    if [ "$node" -eq 0 ] ; then
      staticstarttime=`date +%s`
    fi
    view-static_ts  -i "`basename $reportfile`" -o "$viewdir" >$viewlog 2>&1
    retval=$?
    if [ "$node" -eq 0 ] ; then
      staticendtime=`date +%s`
      echo "PTTS($node/$nodecount): view-static_ts time: $((staticendtime-staticstarttime)) seconds"
    fi
    # Abort if view-static_ts returned nonzero or didn't write HTML files
    if [ $retval -eq 0 ] ; then
      if [ ! -f "$viewdir/front.html" ] ; then
        echo "PTTS($node/$nodecount): view-static_ts returned 0 but report file \"$viewdir/front.html\" does not exist."
        exit 255
      fi
    else
      echo "PTTS($node/$nodecount): view-static_ts returned error code $retval."
      exit $retval
    fi
    cd "$OLDPWD"

    # Clean up
    if [ "$node" -eq 0 ] ; then
      write_ptts_index
    else
      compress_ptts_report
    fi
    rm -f "$reportfile" "$pttsprefix/${viewdir}.html" "$pttsprefix/$node"

  else
    $dryrun "$@"
    retval=$?
  fi

  unset LD_PRELOAD
  exit $retval
fi

