#!/usr/bin/env perl

use strict;
use Getopt::Long;
use List::Util qw[min max];
use File::Basename qw(dirname basename fileparse);
use File::Path;
use File::Copy;
use File::Spec;
use Switch;

my %jobs = ();
my %map = ();
my %files = ();

my $defaultFilenameLength = 40;
my $defaultJobNameLength = 40;
my $longJobNames = 0;
my $longFilenames = 0;

sub usage(;$) {
    my $ec = shift || 0;
    my $basename = basename($0,'.pl');
    print << "EOF";

Usage: $basename --dag=<dag name> --adax=<annotated dax>|--jobstate-log=<jobstate log> --output=<output directory> --help 

Mandatory arguments:
 -d|--dag dagname       name of the dag file to process
 -o|--output dir        write outputs in given directory

Complex arguments:
 -a|--adax              use annotations from dax
 -l|--jobstate-log log  jobstate log to process

Optional arguments:
 -c|--condor            pure condor run - no GRID_SUBMIT events
 -h|--help              print this help message and exit
 -i|--input dir         read inputs from given directory

EOF
    exit($ec);
}

sub processTransfers {
    my $jobsRef = shift;
    my $filesRef = shift;
    my $job = shift;
    my $link = shift;

    my ($srcSite, $dstURL, $dstSite);

    my @files = ();

    if (open(INPUT, "$job.in")) {
        # List of transfers
        # Format:
        # Comment indicating src site
        # Source url
        # Comment indicating dest site
        # Dest url
        while ($srcSite = <INPUT>) {
            chomp $srcSite;
            $srcSite =~ s/^#//;

            #ignore src url
            <INPUT>;

            $dstSite = <INPUT>;
            chomp $dstSite;
            $dstSite =~ s/^#//;

            $dstURL = <INPUT>;
            chomp $dstURL;
            $dstURL =~ s/.*\///;

            push(@files, $dstURL);

            if (length($dstURL) > $defaultFilenameLength) {
                $longFilenames++;
            }

            #Save the source only for stage-in jobs
            if ($job !~ /stage_out_/) {
                my %fileParams = ();
                $filesRef->{$dstURL} = \%fileParams;

                $filesRef->{$dstURL}->{'src'} = $srcSite;
            }
        }
        close(INPUT);
    }
    $jobsRef->{$job}->{$link} = \@files;
}

sub processFiles {
    my $jobsRef = shift;
    my $filesRef = shift;
    my $job = shift;
    my $link = shift;

    my @files = ();

    if (open(INPUT, "$job.$link.lof")) {
        while (my $file = <INPUT>) {
            chomp $file;
            push(@files, $file);

            if (length($file) > $defaultFilenameLength) {
                $longFilenames++;
            }
            
            my %fileParams = ();
            $filesRef->{$file} = \%fileParams;
        }
        close(INPUT);
    }
    $jobsRef->{$job}->{$link} = \@files;
}

sub processDAGManOutput {
    #
    # Read dagman.out
    # Create hash of jobs
    # Create parent -> child relationships
    #
    my $jobsRef = shift;
    my $mapRef = shift;
    my $filesRef = shift;
    my $dagName = shift;
    my $outputDir = shift;

    open(DAGIN, "$dagName") || die "$dagName not found\n";
    open(DAGOUT, ">$outputDir/dag") || die "Could not open $outputDir/dag\n";
    foreach my $line (<DAGIN>) {
        chomp($line);
        if ($line =~ '^JOB') {
            my @tokens = split(' ', $line);
            my $job = $tokens[1];

            my %jobParams;
            $jobsRef->{$job} = \%jobParams;

            if ($job =~ /stage_in_/) {
                processTransfers($jobsRef, $filesRef, $job, 'in');
                # We're not processing the outputs of
                # the transfer job.
            } else {
                processFiles($jobsRef, $filesRef, $job, 'in');
                processFiles($jobsRef, $filesRef, $job, 'out');
            }

            my @parents = ();
            $mapRef->{$job} = \@parents;

            if (length($job) > $defaultJobNameLength) {
                $longJobNames++;
            }

            print DAGOUT $line, "\n";
        }elsif ($line =~ '^SUBDAG') {
            my @tokens = split(' ', $line);
            my $job = $tokens[2];

            my %jobParams;
            $jobsRef->{$job} = \%jobParams;
            
            my @parents = ();
            $mapRef->{$job} = \@parents;

            if (length($job) > $defaultJobNameLength) {
                $longJobNames++;
            }

            print DAGOUT $line, "\n";
        }elsif ($line =~ '^PARENT') {
            my @tokens = split(' ', $line);
            my $parent = $tokens[1];
            my $child = $tokens[3];
            
            push(@{$mapRef->{$child}}, $parent);

            print DAGOUT $line, "\n";
        }
    }
    foreach  my $job (keys %{$jobsRef}) {
        foreach my $file (@{$jobsRef->{$job}->{'in'}}) {
            print DAGOUT "FILE $file INPUT $job\n";
        }
        foreach my $file (@{$jobsRef->{$job}->{'out'}}) {
            print DAGOUT "FILE $file OUTPUT $job\n";
        }
    }
    close(DAGOUT);
    close(DAGIN);
}

sub processFileStats {
    #
    # Read file size information
    # Exitpost only
    #
    my $filesRef = shift;

    my @statfiles = glob "*.out*";
    foreach my $file (@statfiles) {
        if (open(FILE, $file)) {
            my $filename;
            while (my $line = <FILE>) {
                chomp $line;
                if ($line =~ /<file name=\"([^\"]*)\">.*<\/file>/) {
                    $filename = $1;
                } elsif ($line =~ /<statinfo/) {
                    $line =~ s/.* size=\"([0-9]*)\".*\/>/\1/;
                    if (defined $filesRef->{$filename}) {
                        $filesRef->{$filename}->{'size'} = $line;
                    }
                }
            }
        }
    }
}

sub processAnnotations {
    my $jobsRef = shift;
    my $filesRef = shift;
    my $adaxName = shift;

    open(INPUT, "<$adaxName") || die "$adaxName not found\n";
    while (my $line = <INPUT>) {
        chomp($line);
        my $job;
        if ($line =~ /<job id="(ID[0-9]*)" .* name="([A-Za-z0-9_-]*)" .* runtime="([0-9\.]*)">/) {
            $job = "$2_$1";
            if (defined($jobsRef->{$job})) {
                $jobsRef->{$job}->{"ksruntime"} = $3;
                #$jobsRef->{$job}->{"postscripttime"} = $4;
            } else {
                warn "$job not known\n";
            }

            my $siteInfo = `grep pegasus_site "$job.sub"`;
            chomp $siteInfo;
            $siteInfo =~ s/\+pegasus_site\s+=\s+\"([^\"]*)\"/\1/;

            $jobsRef->{$job}->{"site"} = $siteInfo;
        } elsif ($line =~ /<uses file="([A-Za-z0-9_\-\.]*)" link="(input|output)" .* size="([0-9]*)"\/>/) {
            my $filename=$1;
            if (defined($filesRef->{$filename})) {
                $filesRef->{$filename}->{"size"} = $3;
            } else {
                warn "$filename not known\n";
            }
        }
    }
    close(INPUT);
}

sub processJobstateLog {
    #
    # Read jobstate.log
    # Note epochs for job submission, execute and termination
    # Also note site job was run on
    # Copy log with "relative" timestamps into OUTPUTDIR/log
    #
    # If this is a pure condor (glidein) run, there will not be
    # any GRID_SUBMIT events. In this case, calculate
    # condorDelay = EXECUTE - SUBMIT
    # Calculate condorQLen at the time of an EXECUTE event.
    #
    my $jobsRef = shift;
    my $mapRef = shift;
    my $jobstateLog = shift;
    my $outputDir = shift;
    my $condorRun = shift;

    open(JOBSTATE, "$jobstateLog") || die "$jobstateLog not found\n";
    open(OUT, ">$outputDir/out") || die "Cannot open $outputDir/out\n";

    my $condorQLength = 0;
    my $foundStart = 0;
    my $globalStartTime;
    foreach my $line (<JOBSTATE>) {
        if ($line =~ /INTERNAL/) {
            if ($line =~ /DAGMAN_([A-Z]*)/) {
                my @tokens = split(' ', $line);
                if ($1 eq "STARTED" && !$foundStart) {
                    $foundStart = 1;
                    chomp($line);
                    $globalStartTime = $tokens[0];
                }
                $tokens[0] -= $globalStartTime;
                print OUT join(' ', @tokens), "\n";
            } else {
                if ($line =~ /DAGMAN_FINISHED ([0-9])+/) {
                    if ($1 != 0) {
                        warn "Workflow execution failed/restarted.";
                    }
                }
                print OUT $line;
            }
        } else {
            chomp($line);
            my @tokens = split(' ', $line);
            my $job = $tokens[1];
            my $event = $tokens[2];
            if (defined $jobsRef->{$job}) {
                $tokens[0] -= $globalStartTime;

                # Some events need additional processing.
                switch($event) {
                    case 'SUBMIT' {
                        $jobsRef->{$job}->{'site'} = $tokens[4];
                        if ($jobsRef->{$job}->{'site'} ne "local") {
                            $condorQLength++;
                        }
                    }

                    case 'EXECUTE' {
                        if (defined($jobs{$job}{'EXECUTE'})) {
                            # For whatever reason, this job was executed twice.
                            # This run may not be good. 
                            warn "$job was executed more than once.\n";
                        }
                        else{
	                        if ($condorRun && $jobsRef->{$job}->{'site'} ne "local") {
	                            $jobsRef->{$job}->{'condorQLength'} = $condorQLength;
	                            $condorQLength--;
	                        }
                        }
                        
                    }

                    case 'GRID_SUBMIT' {
                        # We won't have GRID_SUBMIT events if condorRun is false.
                        # This is made explicit.
                        # Also, if we have GRID_SUBMIT, site won't be local.
                        # This is also mentioned explicitly for readability.
                        if (!$condorRun && $jobsRef->{$job}->{'site'} ne "local") {
                            $jobsRef->{$job}->{'condorQLength'} = $condorQLength;
                            $condorQLength--;
                        }
                    }
                }
                $jobsRef->{$job}->{$event} = $tokens[0];
                
                #cumulative run time
                if($event eq 'JOB_TERMINATED'){
                	if (defined ($jobsRef->{$job}->{'EXECUTE'}) or defined($jobsRef->{$job}->{'SUBMIT'}) ) {
                		my $exec_runtime_start = (defined ($jobsRef->{$job}->{'EXECUTE'}))? $jobsRef->{$job}->{'EXECUTE'}:$jobsRef->{$job}->{'SUBMIT'};
                		if(defined($jobsRef->{$job}->{'cumulative_runtime'})){
                			if(!($jobsRef->{$job}->{'cumulative_runtime'} eq '-')){
			            		$jobsRef->{$job}->{'cumulative_runtime'} += $jobsRef->{$job}->{'JOB_TERMINATED'} -  $exec_runtime_start;
			            	}
			            }else{
			            	$jobsRef->{$job}->{'cumulative_runtime'} = $jobsRef->{$job}->{'JOB_TERMINATED'} -  $exec_runtime_start;
			            }
			        } else {
			        	# Mismatch between the tags.
			        	warn "Unable to calculate runtime for job '$job'.Absence or mismatch of events JOB_TERMINATED,EXECUTE.\n";
			            $jobsRef->{$job}->{'cumulative_runtime'} = '-';
			        }
                }
                
                #cumulative post script time
                if ( $event eq 'POST_SCRIPT_TERMINATED') {
		            if ( defined($jobsRef->{$job}->{'POST_SCRIPT_STARTED'}) or defined($jobsRef->{$job}->{'JOB_TERMINATED'})) {
		            	my $post_start = (defined($jobsRef->{$job}->{'POST_SCRIPT_STARTED'}) )? $jobsRef->{$job}->{'POST_SCRIPT_STARTED'}:$jobsRef->{$job}->{'JOB_TERMINATED'};
		                if (defined($jobsRef->{$job}->{'cumulative_postscripttime'})){
		                	if(!($jobsRef->{$job}->{'cumulative_postscripttime'} eq '-')){
		                		$jobsRef->{$job}->{'cumulative_postscripttime'} += $jobsRef->{$job}->{'POST_SCRIPT_TERMINATED'} - $post_start;
		                	} 
		                }else{
		                	$jobsRef->{$job}->{'cumulative_postscripttime'} = $jobsRef->{$job}->{'POST_SCRIPT_TERMINATED'} - $post_start;
		                }
		            }else {
			        	# Mismatch between the tags.
			        	warn "Unable to calculate post script time for job '$job'.Absence or mismatch of events POST_SCRIPT_TERMINATED ,POST_SCRIPT_STARTED/JOB_TERMINATED.\n";
			            $jobsRef->{$job}->{'cumulative_postscripttime'} = '-';
			        }
		        }
		        
		        # cumulative resource delay and dagman delay
		        
		        #Find with GRID_SUBMIT
		        if(!$condorRun){
			        if ( $event eq 'GRID_SUBMIT') {
			        	if(defined ( $jobsRef->{$job}->{'SUBMIT'}) ){
			        		if (defined($jobsRef->{$job}->{'cumulative_condorDelay'})){
				        		if(!($jobsRef->{$job}->{'cumulative_condorDelay'} eq '-')){
				        			$jobsRef->{$job}->{'cumulative_condorDelay'} += $jobsRef->{$job}->{'GRID_SUBMIT'} - $jobsRef->{$job}->{'SUBMIT'};
				        		}
				        	}else{
				        		$jobsRef->{$job}->{'cumulative_condorDelay'} = $jobsRef->{$job}->{'GRID_SUBMIT'} - $jobsRef->{$job}->{'SUBMIT'};
				        	}
				        	
			        	}else{
			        		# Mismatch between the tags.
			        		warn "Calculate of condor delay may fail for job '$job'.Absence or mismatch of events  GRID_SUBMIT , SUBMIT.\n";
			        		$jobsRef->{$job}->{'cumulative_condorDelay'} ='-';
			        	}
	            		
			        }
		        }
		        
		        #Find with GLOBUS_SUBMIT
		        if(!$condorRun){
			        if ( $event eq 'GLOBUS_SUBMIT') {
			        	if(defined ( $jobsRef->{$job}->{'SUBMIT'}) ){
			        		if (defined($jobsRef->{$job}->{'cumulative_condorDelay_globus'})){
				        		if(!($jobsRef->{$job}->{'cumulative_condorDelay_globus'} eq '-')){
				        			$jobsRef->{$job}->{'cumulative_condorDelay_globus'} += $jobsRef->{$job}->{'GLOBUS_SUBMIT'} - $jobsRef->{$job}->{'SUBMIT'};
				        		}
				        	}else{
				        		$jobsRef->{$job}->{'cumulative_condorDelay_globus'} =$jobsRef->{$job}->{'GLOBUS_SUBMIT'} - $jobsRef->{$job}->{'SUBMIT'};
				        	}
				        	
			        	}else{
			        		# Mismatch between the tags.
			        		warn "Calculate of condor delay may fail for job '$job'.Absence or mismatch of events  GLOBUS_SUBMIT , SUBMIT.\n";
			        		$jobsRef->{$job}->{'cumulative_condorDelay_globus'} ='-';
			        	}
	            		
			        }
		        }
		        
		        if ($event eq 'EXECUTE'){
		        	if ( defined($jobsRef->{$job}->{'GRID_SUBMIT'}) or defined($jobsRef->{$job}->{'GLOBUS_SUBMIT'})) {
		        		my $remote_submit_start =  (defined($jobsRef->{$job}->{'GRID_SUBMIT'}) )? $jobsRef->{$job}->{'GRID_SUBMIT'}:$jobsRef->{$job}->{'GLOBUS_SUBMIT'};
	        			if (defined($jobsRef->{$job}->{'cumulative_resourceDelay'})){
	        				if(!($jobsRef->{$job}->{'cumulative_resourceDelay'} eq '-')){
	        					$jobsRef->{$job}->{'cumulative_resourceDelay'} += $jobsRef->{$job}->{'EXECUTE'} - $remote_submit_start;
	        				}	
	        			}else{
	        				$jobsRef->{$job}->{'cumulative_resourceDelay'} = $jobsRef->{$job}->{'EXECUTE'} - $remote_submit_start;
	        			}
		        	}else{
		        		# Mismatch between the tags.
		        			warn "Unable to calculate resource delay for job '$job' .Absence or mismatch of events GRID_SUBMIT/ GLOBUS_SUBMIT , EXECUTE.\n";
			        		$jobsRef->{$job}->{'cumulative_resourceDelay'} ='-';
		        	}
		        	
		        	if($condorRun){
		        		if(defined ( $jobsRef->{$job}->{'SUBMIT'}) ){
	        				if (defined($jobsRef->{$job}->{'cumulative_condorDelay'})){
	        					if(!($jobsRef->{$job}->{'cumulative_condorDelay'} eq '-')){
	        						$jobsRef->{$job}->{'cumulative_condorDelay'} += $jobsRef->{$job}->{'EXECUTE'} -$jobsRef->{$job}->{'SUBMIT'};
	        					}
	        				}else{
	        					$jobsRef->{$job}->{'cumulative_condorDelay'} = $jobsRef->{$job}->{'EXECUTE'} -$jobsRef->{$job}->{'SUBMIT'};
	        				}
		        		}else{
		        			# Mismatch between the tags.
		        			warn "Unable to calculate condor delay for job '$job'.Absence or mismatch of events EXECUTE , SUBMIT.\n";
			        		$jobsRef->{$job}->{'cumulative_condorDelay'} ='-';
		        		}
		        	}
		        	
		        } 
                
            }
            print OUT join(' ', @tokens), "\n";
        }
    }
    close(OUT);
    close(JOBSTATE);

    #
    # Calculate runtimes for each job
    #
    foreach my $job (keys %{$jobsRef}) {
    	if (defined $jobsRef->{$job}->{'JOB_TERMINATED'}) {
	        if (defined $jobsRef->{$job}->{'EXECUTE'}) {
	            $jobsRef->{$job}->{'runtime'} = $jobsRef->{$job}->{'JOB_TERMINATED'} - $jobsRef->{$job}->{'EXECUTE'};
	        } else {
	            $jobsRef->{$job}->{'runtime'} = $jobsRef->{$job}->{'JOB_TERMINATED'} - $jobsRef->{$job}->{'SUBMIT'};
	        }
        }else{
        	$jobsRef->{$job}->{'runtime'} =0;
        }

	
	# kickstart time is calculated from the kickstart file generated.kickstart time is set to zero if there is no associated kick start file
		$jobsRef->{$job}->{'ksruntime'} = 0;
    	my @grepLines = `grep -h '<invocation' $job.out* 2>&1`;
        foreach my $line (@grepLines) {
            chomp($line);
            my $i1 = index($line, "duration=\"");
            $line = substr($line, $i1 + 10);
            my $i2 = index($line, "\"");
            $line= substr($line, 0, $i2);
            if ($line =~ /^[\.0-9]*$/) {
                $jobsRef->{$job}->{'ksruntime'} += $line;
                if(defined ($jobsRef->{$job}->{'cumulative_ksruntime'})){
			     	$jobsRef->{$job}->{'cumulative_ksruntime'} += $line;		
			    }else{
			     	$jobsRef->{$job}->{'cumulative_ksruntime'} = $line;
			    }			
            }
        }
        if ($jobsRef->{$job}->{'site'} eq 'local' && $job !~ /register_/ ) {
            $jobsRef->{$job}->{'seqexec'} = "-";
			$jobsRef->{$job}->{'seqexec-delay'} = "-";
			$jobsRef->{$job}->{'cumulative_seqexec'} = "-";
	        $jobsRef->{$job}->{'cumulative_seqexec-delay'} = "-";
        }
	#elsif ($job =~ /stage_in/) {
        #    $jobsRef->{$job}->{'ksruntime'} = 0;
        #}
	else {
            
	    #for clustered jobs try to get seqexec progress at end of .out
	    if( $job =~ /merge_/ ){
			my $tail = `tail -n 1  $job.out* 2>&1`;
			chomp($tail);
			if( index($tail, "[struct") == 0 ){
	      		     my $i1 = index($tail, "duration=");
			     $tail = substr($tail, $i1 + 9);
			     my $i2 = index($tail, ",");
			     $tail=substr( $tail, 0, $i2);
			     if ($tail =~ /^[\.0-9]*$/) {
				 $jobsRef->{$job}->{'seqexec'} = $tail;
				 $jobsRef->{$job}->{'seqexec-delay'} = sprintf( "%.2f", $tail - $jobsRef->{$job}->{'ksruntime'});
			     }
			     else{
				 #some problem with the parsing of .out files
				 $jobsRef->{$job}->{'seqexec'} = "-";
				 $jobsRef->{$job}->{'seqexec-delay'} = "-";
			     }
			}
			my @clusterLines = `grep -h '.*struct.*duration=.*' $job.out* 2>&1`;
			foreach my $clusterLine (@clusterLines) {
				chomp($clusterLine);
	            if( index($clusterLine, "[struct") == 0 ){
	      		 	my $i1 = index($clusterLine, "duration=");
			     	$clusterLine = substr($clusterLine, $i1 + 9);
			     	my $i2 = index($clusterLine, ",");
			     	$clusterLine=substr( $clusterLine, 0, $i2);
			     	if ($clusterLine =~ /^[\.0-9]*$/) {
			     		if(defined ($jobsRef->{$job}->{'cumulative_seqexec'})){
			     			if(!($jobsRef->{$job}->{'cumulative_seqexec'} eq '-')){
						 		$jobsRef->{$job}->{'cumulative_seqexec'} += sprintf( "%.2f",$clusterLine);
						 	}
				 		}else{
				 			$jobsRef->{$job}->{'cumulative_seqexec'} = sprintf( "%.2f",$clusterLine);
				 			
				 		}
			     	}
				    else{
						#some problem with the parsing of .out files
						$jobsRef->{$job}->{'cumulative_seqexec'} = "-";
					}
				}
			}
			# Find the seqexec-delay after taking the cumulative seqexec time
			if(defined ($jobsRef->{$job}->{'cumulative_seqexec'}) and defined ($jobsRef->{$job}->{'cumulative_ksruntime'})){
				$jobsRef->{$job}->{'cumulative_seqexec-delay'} =  sprintf( "%.2f",$jobsRef->{$job}->{'cumulative_seqexec'} - $jobsRef->{$job}->{'cumulative_ksruntime'});
			}else{
				$jobsRef->{$job}->{'cumulative_seqexec-delay'} = "-";
			}		
	    }else{
			$jobsRef->{$job}->{'seqexec'} = "-";
	        $jobsRef->{$job}->{'seqexec-delay'} = "-";
	        $jobsRef->{$job}->{'cumulative_seqexec'} = "-";
	        $jobsRef->{$job}->{'cumulative_seqexec-delay'} = "-";
	    }
        }
        if (defined $jobsRef->{$job}->{'POST_SCRIPT_TERMINATED'}) {
            if (!defined $jobsRef->{$job}->{'POST_SCRIPT_STARTED'}) {
                $jobsRef->{$job}->{'POST_SCRIPT_STARTED'} = $jobsRef->{$job}->{'JOB_TERMINATED'};
            }
            $jobsRef->{$job}->{'postscripttime'} = $jobsRef->{$job}->{'POST_SCRIPT_TERMINATED'} - $jobsRef->{$job}->{'POST_SCRIPT_STARTED'};
        } else {
            $jobsRef->{$job}->{'postscripttime'} = 0;
        }
    }

    #
    # Calculate dagmanDelays and condor Delays
    # 

    foreach my $child (keys %{$jobsRef}) {
        my $maxParentEnd = 0;
        foreach my $parent (@{$mapRef->{$child}}) {
            my $parentEnd = $jobsRef->{$parent}->{'JOB_TERMINATED'};
            if (defined $jobsRef->{$parent}->{'POST_SCRIPT_TERMINATED'}) {
                $parentEnd = $jobsRef->{$parent}->{'POST_SCRIPT_TERMINATED'};
            }
            if ($parentEnd > $maxParentEnd) {
                $maxParentEnd = $parentEnd;
            }
        }
        my $dagmanDelay = 0;
        if(defined $jobsRef->{$child}->{'SUBMIT'}){
        	$dagmanDelay = $jobsRef->{$child}->{'SUBMIT'} - $maxParentEnd;
        	# dagman delay is not cumulative only name is to match the other values
        	$jobsRef->{$child}->{'cumulative_dagmanDelay'} = $jobsRef->{$child}->{'SUBMIT'} - $maxParentEnd;
        }
        my ($condorDelay, $resourceDelay);

        if (defined $jobsRef->{$child}->{'GRID_SUBMIT'}) {
            $condorDelay = $jobsRef->{$child}->{'GRID_SUBMIT'} - $jobsRef->{$child}->{'SUBMIT'};
            $resourceDelay = $jobsRef->{$child}->{'EXECUTE'} - $jobsRef->{$child}->{'GRID_SUBMIT'};
        } elsif ($condorRun) {
            if (defined $jobsRef->{$child}->{'EXECUTE'}) {
                # In some cases (pegasus_concat) under glidein (atleast),
                # there is no EXECUTE event.
                $condorDelay = $jobsRef->{$child}->{'EXECUTE'} - $jobsRef->{$child}->{'SUBMIT'};
            } else {
                $condorDelay = 0;
            }
            $resourceDelay = 0;
        } else {
            $condorDelay = 0;
            $resourceDelay = 0;
        }

        $jobsRef->{$child}->{'dagmanDelay'} = $dagmanDelay;
        $jobsRef->{$child}->{'condorDelay'} = $condorDelay;
        $jobsRef->{$child}->{'resourceDelay'} = $resourceDelay;

        #
        # If condorQLength is not known, set it to 0.
        #
        if (!defined($jobsRef->{$child}->{'condorQLength'})) {
            $jobsRef->{$child}->{'condorQLength'} = 0;
        }
    }
}

sub writeJobs {
    my $jobsRef = shift;
    my $outputDir = shift;

    my $jobsFile = "$outputDir/jobs";
    open(JOBS, ">$jobsFile") || die "Could not open $jobsFile\n";
    #print JOBS "#Legends \n";
    #print JOBS "#Job - the name of the job \n";
    #print JOBS "#Site - the site where the job ran \n";
    #print JOBS "#Kickstart(sec.) - the actual duration of the job in seconds on the remote compute node \n";
    #print JOBS "#Post(sec.) - the postscript time as reported by DAGMan \n";
    #print JOBS "#DAGMan(sec.) - the time between the last parent job  of a job completes and the job gets submitted \n";
    #print JOBS "#CondorQTime(sec.) - the time between submission by DAGMan and the remote Grid submission. It is an estimate of the time spent in the condor q on the submit node \n";
    #print JOBS "#Resource(sec.) - the time between the remote Grid submission and start of remote execution . It is an estimate of the time job spent in the remote queue \n";
    #print JOBS "#Runtime(sec.) - the time spent on the resource as seen by Condor DAGMan . Is always >=kickstart \n";
    #print JOBS "#CondorQLen - the number of outstanding jobs in the queue when this job was released \n";
    #print JOBS "#Seqexec(sec.) -  the time taken for the completion of a clustered job\n";
    #print JOBS "#Seqexec-Delay(sec.) - the time difference between the time for the completion of a clustered job and sum of all the individual tasks kickstart time\n\n";
    if ($longJobNames) {
        print JOBS sprintf("#%-79s %20s %8s %8s %8s %13s %8s %8s %7s %13s\n", "Job", "Site", "Kickstart", "Post", "DAGMan", "CondorQTime", "Res", "Runtime", "Seqexec", "Seqexec-Delay");
    } else {
        print JOBS sprintf("#%-39s %20s %8s %8s %8s %13s %8s %8s %7s %13s\n", "Job", "Site", "Kickstart", "Post", "DAGMan", "CondorQTime", "Resource", "Runtime",  "Seqexec", "Seqexec-Delay");
    }
    foreach my $job (sort keys %{$jobsRef}) {
    	#Setting default values 
        if (!defined($jobsRef->{$job}->{"site"})) {
            $jobsRef->{$job}->{"site"} = "local";
        }
        $jobsRef->{$job}->{'cumulative_ksruntime'} = defined($jobsRef->{$job}->{'cumulative_ksruntime'} )?$jobsRef->{$job}->{'cumulative_ksruntime'} :'-';
        if (!($jobsRef->{$job}->{'cumulative_ksruntime'} eq '-')){
        	$jobsRef->{$job}->{'cumulative_ksruntime'}  =  sprintf( "%.2f",$jobsRef->{$job}->{'cumulative_ksruntime'} );
        }
        $jobsRef->{$job}->{'cumulative_postscripttime'} = defined($jobsRef->{$job}->{'cumulative_postscripttime'} )?$jobsRef->{$job}->{'cumulative_postscripttime'} :'-';
        if(!defined($jobsRef->{$job}->{'cumulative_condorDelay'}) or  $jobsRef->{$job}->{'cumulative_condorDelay'} eq '-'){
        	if (defined($jobsRef->{$job}->{'cumulative_condorDelay_globus'})){
        		$jobsRef->{$job}->{'cumulative_condorDelay'} = $jobsRef->{$job}->{'cumulative_condorDelay_globus'};
        	}
        }
        $jobsRef->{$job}->{'cumulative_dagmanDelay'} = defined($jobsRef->{$job}->{'cumulative_dagmanDelay'})?$jobsRef->{$job}->{'cumulative_dagmanDelay'}:'-'; 
    	$jobsRef->{$job}->{'cumulative_condorDelay'} = defined($jobsRef->{$job}->{'cumulative_condorDelay'})?$jobsRef->{$job}->{'cumulative_condorDelay'}:'-';
    	$jobsRef->{$job}->{'cumulative_resourceDelay'} = defined($jobsRef->{$job}->{'cumulative_resourceDelay'})?$jobsRef->{$job}->{'cumulative_resourceDelay'}:'-';
    	$jobsRef->{$job}->{'cumulative_runtime'} = defined($jobsRef->{$job}->{'cumulative_runtime'})?$jobsRef->{$job}->{'cumulative_runtime'}:'-';
        if ($longJobNames) {
        	print JOBS sprintf("%-80s %20s %8s %8s %8s %13s %8s %8s %7s %13s\n", $job, $jobsRef->{$job}->{'site'}, $jobsRef->{$job}->{'cumulative_ksruntime'}, $jobsRef->{$job}->{'cumulative_postscripttime'}, $jobsRef->{$job}->{'cumulative_dagmanDelay'}, $jobsRef->{$job}->{'cumulative_condorDelay'}, $jobsRef->{$job}->{'cumulative_resourceDelay'}, $jobsRef->{$job}->{'cumulative_runtime'}, $jobsRef->{$job}->{'cumulative_seqexec'}, $jobsRef->{$job}->{'cumulative_seqexec-delay'});
        } else {
        	print JOBS sprintf("%-40s %20s %8s %8s %8s %13s %8s %8s %7s %13s\n", $job, $jobsRef->{$job}->{'site'}, $jobsRef->{$job}->{'cumulative_ksruntime'}, $jobsRef->{$job}->{'cumulative_postscripttime'}, $jobsRef->{$job}->{'cumulative_dagmanDelay'}, $jobsRef->{$job}->{'cumulative_condorDelay'}, $jobsRef->{$job}->{'cumulative_resourceDelay'}, $jobsRef->{$job}->{'cumulative_runtime'}, $jobsRef->{$job}->{'cumulative_seqexec'},$jobsRef->{$job}->{'cumulative_seqexec-delay'});
        }
    }
    close(JOBS);
}

sub writeFiles {
    #
    # Write out files in OUTPUTDIR/files
    #
    my $filesRef = shift;
    my $outputDir = shift;

    my $filesFile = "$outputDir/files";
    open(FILES, ">$filesFile") || die "Could not open $filesFile\n";
    if ($longFilenames) {
        print FILES sprintf("#%-79s %12s %20s\n", "Filename", "Size", "Source");
    } else {
        print FILES sprintf("#%-39s % 12s %20s\n", "Filename", "Size", "Source");
    }
    foreach my $fileName (sort keys %{$filesRef}) {
        if ($longFilenames) {
            print FILES sprintf("%-80s % 12d %20s\n", $fileName, $filesRef->{$fileName}->{'size'}, $filesRef->{$fileName}->{'src'});
        } else {
            print FILES sprintf("%-40s % 12d %20s\n", $fileName, $filesRef->{$fileName}->{'size'}, $filesRef->{$fileName}->{'src'});
        }
    }
    close(FILES);
}

sub writeDAX {
    my $jobsRef = shift;
    my $mapRef = shift;
    my $filesRef = shift;
    my $outputDir = shift;

    my $daxFile = "$outputDir/dax";
    open(DAX, ">$daxFile") || die "Could not open $daxFile\n";

    print DAX << "HERE";
    <?xml version="1.0" encoding="UTF-8"?>
    <adag xmlns="http://www.griphyn.org/chimera/DAX"
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
        xsi:schemaLocation="http://www.griphyn.org/chimera/DAX
        http://www.griphyn.org/chimera/dax-1.8.xsd"
        name="inspiral" index="0" count="1" version="1.8">

HERE

    foreach my $file (sort keys %{$filesRef}) {
        if (defined $filesRef->{$file}->{'used'}) {
            print DAX "\t<filename file=\"$file\" size=\"$filesRef->{$file}->{'size'}\"";
            if (defined $filesRef->{$file}->{'src'}) {
                print DAX " src=\"$filesRef->{$file}->{'src'}\"";
            }
            print DAX "/>\n";
        }
    }

    foreach my $job (sort keys %{$jobsRef}) {
        my $runtime = $jobsRef->{$job}->{'runtime'};
        my $ksruntime = $jobsRef->{$job}->{'ksruntime'};
        my $postscripttime = $jobsRef->{$job}->{'postscripttime'};
        my $dagmanDelay = $jobsRef->{$job}->{'dagmanDelay'};
        my $condorDelay = $jobsRef->{$job}->{'condorDelay'};
        my $resourceDelay = $jobsRef->{$job}->{'resourceDelay'};
        my $site = $jobsRef->{$job}->{'site'};
        my $condorQLength = $jobsRef->{$job}->{'condorQLength'};
        print DAX "\t<job id=\"$job\" site=\"$site\" runtime=\"$ksruntime\" postscripttime=\"$postscripttime\" dagmanDelay=\"$dagmanDelay\" condorDelay=\"$condorDelay\" resourceDelay=\"$resourceDelay\" runSeen=\"$runtime\" condorQLength=\"$condorQLength\">\n";

        foreach my $file (@{$jobsRef->{$job}->{'in'}}) {
            print DAX "\t\t<uses file=\"$file\" link=\"input\"/>\n";
        }
        foreach my $file (@{$jobsRef->{$job}->{'out'}}) {
            print DAX "\t\t<uses file=\"$file\" link=\"output\"/>\n";
        }
        print DAX "\t</job>\n";
    }

    foreach my $child (sort keys %{$jobsRef}) {

        if (scalar @{$mapRef->{$child}} > 0) {
            print DAX "\t<child ref=\"$child\">\n";
            foreach my $parent (@{$mapRef->{$child}}) {
                print DAX "\t\t<parent ref=\"$parent\"/>\n";
            }
            print DAX "\t</child>\n";
        }
    }

    print DAX "</adag>\n";
    close(DAX);
}

my ($adaxName, $condorRun, $dagName, $inputDir, $jobstateLog, $outputDir);
my $result = GetOptions(
    "adax|a=s" => \$adaxName,
    "condor|c" => sub { $condorRun++; },
    "dag|d=s" => \$dagName,
    "help|h" => \&usage,
    "input|i=s" => \$inputDir,
    "jobstate-log|l=s" => \$jobstateLog,
    "output|o=s" => \$outputDir,
);

if (!defined($dagName) || !defined($outputDir)) {
    usage();
}

die "only one of --adax or --jobstate-log can be specified\n" if (defined($adaxName) && defined($jobstateLog));

$outputDir = File::Spec->rel2abs($outputDir);
if (! -d $outputDir) {
    eval {mkpath($outputDir), {verbose => 1}};
    if ($@) {
        die "Could not create $outputDir\n";
    }
}

if (defined $jobstateLog) {
	$jobstateLog =File::Spec->rel2abs($jobstateLog);
}
$dagName= File::Spec->rel2abs($dagName);

if (defined($inputDir)) {
	$inputDir= File::Spec->rel2abs($inputDir);
    chdir $inputDir || die "Cannot chdir to $inputDir\n";
}else{
	 my ($dagbase,$submit_dir ,$type ) = fileparse( $dagName  ,qr{\..*});
	 chdir $submit_dir || die "Cannot chdir to $submit_dir\n";
}

processDAGManOutput(\%jobs,\%map,\%files, $dagName, $outputDir);

processFileStats(\%files);

if (defined $jobstateLog) {
    processJobstateLog(\%jobs, \%map, $jobstateLog, $outputDir, $condorRun);
} elsif (defined $adaxName) {
    processAnnotations(\%jobs, \%files, $adaxName);
}

#
# Write out various files.
#
writeJobs(\%jobs, $outputDir);
writeFiles(\%files, $outputDir);


#
#Write out dax in OUTPUTDIR/dax
#
writeDAX(\%jobs, \%map, \%files, $outputDir);
#Copy info.txt if it exists
copy("info.txt", "$outputDir/info.txt");
