#!/usr/bin/perl -w

## runbib-missing: checks for missing files after a runbib run
##
## usage: (perl) runbib-missing -b basename
## usage: (perl) runbib-missing -f basename.bib.xml < basename.id.xml
##
## Dependencies: perl 5.0.0 or later
##               Getopt::Std;
##
## markus@mhoenicka.de 2011-03-07

##   This program is free software; you can redistribute it and/or modify
##   it under the terms of the GNU General Public License as published by
##   the Free Software Foundation; either version 2 of the License, or
##   (at your option) any later version.
##   
##   This program is distributed in the hope that it will be useful,
##   but WITHOUT ANY WARRANTY; without even the implied warranty of
##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##   GNU General Public License for more details.
   
##   You should have received a copy of the GNU General Public License
##   along with this program; if not, write to the Free Software
##   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

# use this module to read command line options
use Getopt::Std;

my @idlist;
my $lastid = "";
my $bibfile;
my $idfile;
my $basename;

# this hash will receive the command line options
my %opts;

getopts('b:f:h', \%opts);

## loop over all command line options
while (($key, $value) = each %opts) {
    if ($key eq "b") {
	$basename = $value;
	$idfile = $basename . ".id.xml";
	$bibfile = $basename . ".bib.xml";
    }
    elsif ($key eq "f") {
	$bibfile = $value;
    }
    elsif ($key eq "h") {
	print("runbib-missing displays any ID values not found by runbib
Usage: runbib-missing -b basename
       runbib-missing -f basename.bib.xml < basename.id.xml\n");
	exit (0);
    }
}

unless (defined($bibfile)) {
    die "error: you must use -b to specify a bibliography file";
}

if (defined($idfile)) {
    open IDFILE, $idfile
	or die "cannot access $idfile";
    while (<IDFILE>) {
	chomp;
	if (/<xref>/) {
	    s%.*<xref>(.*)</xref>.*%$1%;
	    push(@idlist, $_);
	}
    }
    close IDFILE;
}
else {
    while (<>) {
	chomp;
	if (/<xref>/) {
	    s%.*<xref>(.*)</xref>.*%$1%;
	    push(@idlist, $_);
	}
    }
}

# to save time while processing large input files, check only unique
# IDs. We emulate the unix sort|uniq pipe by sorting the array and
# checking for the existence only if the ID differs from the previous
# iteration
@idlist = sort(@idlist);

foreach my $id (@idlist) {
    if ($id ne $lastid) {
	# grep -c prints only the match count
	my $matches = `grep -c $id $bibfile`;
	if ($matches == 0) {
	    print $id . "\n";
	}
	$lastid = $id;
    }
}

exit (0);
