#! /usr/bin/perl

# This script constructs PhotoML descriptions from the EXIF data in
# JPEG images from digital cameras

# Copyright © 2005-2010 Brendt Wohlberg <photoml@wohlberg.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License at
# http://www.gnu.org/licenses/gpl-2.0.txt.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.

# Most recent modification: 18 April 2010

use strict;
use File::Basename;
use File::Temp qw(tmpnam);
use Getopt::Std;
use Date::Manip;
use Image::ExifTool;

# Set up path variables
my $pmlpath = dirname($0) . "/..";
my $xsl = "$pmlpath/xsl/misc/digital.xsl";
my $tmp = '/tmp';
if (-r '/etc/xml/catalog' and $ENV{'XML_CATALOG_FILES'} eq '') {
  $ENV{'XML_CATALOG_FILES'} = "/etc/xml/catalog";
}
$ENV{'XML_CATALOG_FILES'} = "$pmlpath/dtd/catalog.xml " .
                            $ENV{'XML_CATALOG_FILES'};
undef $ENV{'SGML_CATALOG_FILES'};

# Define list of extracted EXIF tags
my $exftgs0 = [
'ApertureValue',
'BitsPerSample',
'BitDepth',
'City',
'ColorGain',
'ColorSpace',
'Country',
'CreateDate',
'DateTimeOriginal',
'DigitalGEM',
'DigitalICE',
'DigitalROC',
'ExposureCompensation',
'ExposureMode',
'ExposureTime',
'FileSource',
'FilmType',
'Flash',
'FlashExposureComp',
'FNumber',
'FocalLength',
'FocalLengthIn35mmFormat',
'FocusDistance',
'GPSAltitude',
'GPSAltitudeRef',
'GPSLatitude',
'GPSLatitudeRef',
'GPSLongitude',
'GPSLongitudeRef',
'GPSMapDatum',
'ImageDepth',
'ImageHeight',
'ImageWidth',
'InteropIndex',
'ISO',
'LensID',
'Location',
'Make',
'MasterGain',
'MeteringMode',
'Model',
'ModifyDate',
'MultiSample',
'OwnerName',
'ShutterSpeedValue',
'Software',
'State',
'SubjectDistance'
	     ];
my $exftgs1 = [ @$exftgs0 ];
# Add tags exluded from list used for XMP processing
push @$exftgs1, ('FileType', 'MimeType');

# Ensure imageinfo, md5sum, dcraw, and xsltproc are available
die "pmldigital: error executing imageinfo\n"
    if (`which imageinfo 2>/dev/null` eq '');
die "pmldigital: error executing md5sum\n"
    if (`which md5sum 2>/dev/null` eq '');
die "pmldigital: error executing dcraw\n"
    if (`which dcraw 2>/dev/null` eq '');
die "pmldigital: error executing xstlproc\n"
    if (`which xsltproc 2>/dev/null` eq '');

# Parse command line switches
my $ustr = <<EOF;
usage: pmldigital [-h] [-b] [-i] [-f] [-d] [-r] [-x] ([-s] | [-g group-id])
                  [-t date-time-offset] infile [infile] ...
                 -h Display usage information
                 -b Include file basename only in output
                 -i Output intermediate XML representation of image data
                 -f Fast output excluding image and file md5 hashes
                 -d Construct digimage descriptions of scanned images
                 -r Skip JPEG images with same basename as a RAW image
                 -x Look for corresponding XMP files for each image
                 -s Set group and frame (and image) ids from standard filenames
                 -g Specify output group id
                 -t Specify offset for date/time stamps
EOF
my $options = {};
getopts('bifdrxsg:ht:', $options);
die "$ustr" if (defined $options->{'h'} or @ARGV == 0 or
               (defined $options->{'s'} and defined $options->{'g'}));
my $flags = {'base' => defined $options->{'b'},
             'hash' => !defined $options->{'f'},
	     'scan' => defined $options->{'d'},
	     'rskp' => defined $options->{'r'},
             'xmps' => defined $options->{'x'},
	     'stid' => defined $options->{'s'}};
my $gattr = '';
$gattr = " group-id=\"" . $options->{'g'} . "\"" if defined ($options->{'g'});
my $exifout = defined ($options->{'i'});
my $dtdl;
if (defined $options->{'t'}) {
  Date_Init("DateFormat=non-US");
  $dtdl = ParseDateDelta($options->{'t'});
  die "pmldigital: invalid date/time offset " . $options->{'t'} . "\n"
    if ($dtdl eq '');
}

my $rbase = {};
my ($file,$base,$path,$sffx);
foreach $file ( @ARGV ) {
  die "pmldigital: file $file does not exist\n" if (! -f $file);
  die "pmldigital: file $file is not readable\n" if (! -r $file);
  ($base,$path,$sffx) = fileparse($file, qr/\.[^.]*/);
  $sffx = lc($sffx);
  $rbase->{$base} = 1 if ($sffx eq '.nef' or $sffx eq '.crw');
}

# Set output to stdout or pipe to xsltproc depending on command line flags
my $of;
if ($exifout) {
  $of = \*STDOUT;
} else {
  open(OPH, "| xsltproc $xsl -") or die "pmldigital: pipe open failed\n";
  $of = \*OPH;
}

print $of "<?xml version=\"1.0\"?>\n<pmldigital$gattr>\n";

my ($exif, $tgls, $info, $tag, $grp, $elt, $gid, $fid, $iid,
    $satr, $hatr, $fhsh, $ihsh, $inam, $dupl);
foreach $file ( @ARGV ) {

  # Parse filename into components
  ($base,$path,$sffx) = fileparse($file, qr/\.[^.]*/);

  # Check for skip of JPEG with same basename as a raw file
  next if ($flags->{'rskp'} and lc($sffx) eq '.jpg' and
	   defined $rbase->{$base});
  # Check for skip of XMP files
  next if ($flags->{'xmps'} and lc($sffx) eq '.xmp');
  # Set tag duplication flag for NEF files
  $dupl = (lc($sffx) eq '.nef')?0:1;

  # Set group/frame/image id attribute strings
  $gid = ''; $fid = ''; $iid = '';
  if ($flags->{'stid'}) {
    if ($flags->{'scan'} and
	basename($file) =~ /^(\d{4}[rs]\d+)f(\d+)v(\d+)\./) {
      $gid = " group-id=\"$1\"";
      $fid = " frame-id=\"$2\"";
      $iid = " image-id=\"$3\"";
    } elsif (basename($file) =~ /^(\d{4}d\d+)f(\d+)v\d+\./) {
      $gid = " group-id=\"$1\"";
      $fid = " frame-id=\"$2\"";
    }
  }

  # Set scan identifier attribute
  my $satr = ($flags->{'scan'})?' scan="yes"':'';
  my $inam = ($flags->{'base'})?basename($file):$file;

  # Initialise tags hash
  my $tgs = {};

  # Check for and process XMP files
  if ($flags->{'xmps'}) {
    my $xmpf = $path . $base . ".xmp";
    if (-f $xmpf) {

       $exif = new Image::ExifTool;
       @$tgls = @$exftgs0;
       $info = $exif->ImageInfo($xmpf, $tgls, {PrintConv => 1,
					       CoordFormat => "%.8f",
					       Group1 => '-IFD1'});
       gettags($info,$dtdl,$tgs);
    }
  }

  # Construct ExifTool object for image file
  $exif = new Image::ExifTool;
  $path = $file;
  if ($path =~ /\.bz2$/) {
    $path = qq{bzip2 -dc "$path" |};
  }

  # Pass in a copy because the tag list is modified
  @$tgls = @$exftgs1;
  $info = $exif->ImageInfo($path, $tgls,
			   {PrintConv => 1,
			    DateFormat => "%Y-%m-%d %H:%M:%S",
			    CoordFormat => "%.8f",
			    Group1 => '-IFD1',
			    Duplicates => $dupl});

  # Close pipe and exit with error if non-image file encountered
  if (scalar keys %$info == 0) {
    print $of "</pmldigital>\n";
    close(OPH) if (!$exifout);
    die "pmldigital: $file is not an image file\n";
  }

  # Extract EXIF tags from image file
  gettags($info,$dtdl,$tgs);

  # Set hash attribute strings
  if ($flags->{'hash'}) {
    ($fhsh, $ihsh) = gethash($file,$info->{'MIMEType'});
    $hatr = " file-hash=\"$fhsh\" image-hash=\"$ihsh\"";
  } else {
    $hatr = "";
  }

  # Output collected tags in XML format
  print $of "  <image$satr name=\"$inam\"$gid$fid$iid$hatr>\n";
  my $val;
  foreach $elt (sort keys %$tgs) {
    $val = $tgs->{$elt};
    print $of "    <$elt>$val</$elt>\n";
  }
  print $of "  </image>\n";
}

print $of "</pmldigital>\n";

# Close output if it is a pipe (rather than stdout)
close(OPH) if (!$exifout);

exit 0;


# Extract and process EXIF tags in ExifTool ImageInfo output format
sub gettags {
  my $info = shift;
  my $dtdl = shift;
  my $tags = shift;

  my ($tag,$grp,$elt,$val,$date);
  foreach $tag (sort keys %$info) {
    $grp = $exif->GetGroup($tag);
    $elt = "$grp.".Image::ExifTool::GetTagName($tag);
    # Check for necessity of applying date offset
    if (defined $dtdl and $elt =~ /Date/) {
      $date = $info->{$tag};
      if ($date =~ /(\d{4}):(\d{2}):(\d{2})\s+(.*)/) {
	$date = "$1-$2-$3 $4";
      }
      $val = UnixDate(DateCalc($date, $dtdl),'%Y:%m:%d %H:%M:%S');
      print $of "    <!-- Original date: ".$info->{$tag}."-->\n";
    } else {
      $val = $info->{$tag};
    }
    # Apply necessary changes to tag value
    $val =~ s/\s+$//;     # Remove trailing spaces
    $val =~ s/\&/&amp;/g; # Convert & to &amp;

    # Insert tag into tag hash data structure
    $tags->{$elt} = $val if (!defined $tags->{$elt});
  }
}



# Get md5 hash data from specified file by using dcraw or imageinfo,
# and md5sum
sub gethash {
  my $path = shift;
  my $mtyp = shift;

  my $fhash = `md5sum $path | awk '{printf("%s",\$1)}'`;
  my $ipth;
  if ($mtyp eq 'image/x-raw') {
    $ipth = tmpnam();
    my $cmd = "dcraw -d -c $path > $ipth";
    system($cmd); # Should be more careful here
  } else {
    $ipth = $path;
  }
  my $ihash = `imageinfo --md5hash $ipth 2>/dev/null`;
  unlink $ipth if ($mtyp eq 'image/x-raw');
  return ($fhash, $ihash);
}
