#!/bin/bash

set -e

#######################################################################
#
#  Settings
#

DEGREES=0.5

#######################################################################

TOP_DIR=`pwd`

export PATH=/ccg/software/montage/Montage_v3.3_mats/bin:$PATH

# unique directory for this run
RUN_ID=`/bin/date +'%F_%H%M%S'`
RUN_DIR=`pwd`/work/$RUN_ID

echo "Work directory: $RUN_DIR"

mkdir -p $RUN_DIR/inputs

cd $RUN_DIR
cp $TOP_DIR/pegasusrc .

# create the transformation catalogue (tc)
echo
echo "Creating the transformation catalog..."
for BINARY in `(cd /ccg/software/montage/Montage_v3.3_mats/bin/ && ls)`; do
    cat >>tc <<EOF
tr $BINARY:3.3 {
    site CCG {
        pfn "/ccg/software/montage/Montage_v3.3_mats/bin/$BINARY"
        arch "x86"
        os "linux"
        type "INSTALLED"
        profile pegasus "clusters.size" "20"
    }
}
EOF
done

# site catalog
echo
echo "Creating the site catalog..."
cat >sites.xml <<EOF
<?xml version="1.0" encoding="UTF-8"?>
<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-3.0.xsd" version="3.0">
    <site handle="local" arch="x86" os="LINUX">
        <grid  type="gt2" contact="localhost/jobmanager-fork" scheduler="Fork" jobtype="auxillary"/>
        <head-fs>
            <scratch>
                <shared>
                    <file-server protocol="file" url="file://" mount-point="$RUN_DIR/scratch"/>
                    <internal-mount-point mount-point="$RUN_DIR/scratch"/>
                </shared>
            </scratch>
            <storage>
                <shared>
                    <file-server protocol="file" url="file://" mount-point="$RUN_DIR/outputs"/>
                    <internal-mount-point mount-point="$RUN_DIR/outputs"/>
                </shared>
            </storage>
        </head-fs>
        <replica-catalog  type="LRC" url="rlsn://dummyValue.url.edu" />
        <profile namespace="env" key="GLOBUS_LOCATION" >/ccg/software/globus/default</profile>
        <profile namespace="env" key="GLOBUS_TCP_PORT_RANGE" >40000,50000</profile>
    </site>
    <site handle="CCG" arch="x86" os="LINUX">
        <head-fs>
            <scratch />
            <storage />
        </head-fs>
        <replica-catalog type="LRC" url="rlsn://dummyValue.url.edu" />
        <profile namespace="pegasus" key="style">condor</profile>
        <profile namespace="condor" key="requirements">(TARGET.FileSystemDomain =!= &quot;&quot;)</profile>
        <profile namespace="condor" key="should_transfer_files">Yes</profile>
        <profile namespace="condor" key="when_to_transfer_output">ON_EXIT</profile>
        <profile namespace="env" key="MONTAGE_HOME" >/ccg/software/montage/Montage_v3.3_mats</profile>
    </site>
    <site handle="CCGData">
        <head-fs>
            <scratch>
                <shared>
                    <file-server protocol="gsiftp" url="gsiftp://obelix.isi.edu" mount-point="/data/scratch/ptesting"/>
                    <internal-mount-point mount-point="/data/scratch/ptesting"/>
                </shared>
            </scratch>
            <storage />
        </head-fs>
        <replica-catalog type="LRC" url="rlsn://dummyValue.url.edu" />
        <profile namespace="env" key="GLOBUS_LOCATION" >/ccg/software/globus/default</profile>
    </site>
</sitecatalog>
EOF

echo
echo "Running mDAG (finding input images, generating DAX, ...)..."
mDAG 2mass j M17 $DEGREES $DEGREES 0.0002777778 . "file://$RUN_DIR" "file://$RUN_DIR/inputs"

echo
echo "Adding input images to the replica catalog..."
echo "  " `cat cache.list | wc -l` "images found"
cat cache.list | grep -v ".fits " >rc
perl -p -i -e 's/ipac_cluster/local/' rc
cat url.list | sed 's/ http:.*ref=/ http:\/\/obelix.isi.edu\/irsa-cache/' >>rc

echo "Planning  the workflow..."
pegasus-plan \
    --conf pegasusrc \
    --sites CCG \
    --staging-site CCGData \
    --dir work \
    --output-site local \
    --dax dag.xml \
    --cluster horizontal \
    | tee $TOP_DIR/plan.out

WORK_DIR=`cat $TOP_DIR/plan.out | grep pegasus-run | sed -E 's/.*pegasus-run[ ]+//'` 
echo "work directory is $WORK_DIR"
cd $WORK_DIR

# retrive all the src url's from the stage-in files
set +e
grep  -A 1 src stage_in*in | grep "[.*][://]" | sed s/stage_in.*in-// | grep -v "file://" 
EC=$?
set -e

if [ $EC -eq 0 ]; then
    # all non file URL's should be directly retrieved from the jobs
    echo "The stage in jobs have non file URL's" 
    exit 1
fi

# now we are good to submit
echo "Submitting the workflow for execution"
pegasus-run $WORK_DIR
