#!/usr/bin/perl

=head1 NAME

winibw2pica - convert WinIBW PICA+ download to valid PICA+

=cut

use strict;
use warnings;

our $VERSION = '0.1';

use PICA::Parser qw(parsedata);
use PICA::Field qw(parse_pp_tag);
use Getopt::Long;
use Pod::Usage;

my ($outfile, $pretty, $xml, $help, $man, $version);

GetOptions(
    "output:s" => \$outfile,
    "prettyxml" => \$pretty,
    "help|?" => \$help,
    "man" => \$man,
    "version" => \$version,
    "xml" => \$xml
) or pod2usage(2);
pod2usage(-verbose => 2) if $man;
print "winibw2pica version $VERSION\n" and pod2usage(1) if $version;
pod2usage(1) if $help or @ARGV == 0;

$outfile = "-" unless defined $outfile;
my @p = ($outfile ne "-" ? $outfile : \*STDOUT);
$xml = 1 if $pretty;
push @p, ('format' => 'XML') if $xml;
my $writer = PICA::Writer->new( @p, pretty => $pretty );

my $FILE;
my @buffer;

sub nextline {
    return @buffer ? shift @buffer : readline($FILE);
}
sub lookahead {
    push @buffer, readline($FILE) unless @buffer > 0;
    return $buffer[0];
}

foreach my $infile (@ARGV) {
    open $FILE, $infile or die("Failed to open file $infile");
    parsedata( \&winibw2pica, Record => $writer );
    close $FILE;
}

$writer->end;

sub winibw2pica {
    my $line = nextline();
    $line = nextline() if defined $line and $line =~ /^SET/;
    return unless defined $line;

    $line =~ s/\x83/\x1F/g;

    my $next = lookahead();    
    while ( defined $next and not $next =~ /^\s*$/ and not
             $next =~ /^\[\d+\s*\].+$/ and not
            ( $next =~  /^(....(\/..)?) / && parse_pp_tag($1) )
          ) {
        chomp($line);
        $line .= nextline();
        $next = lookahead();
    }

    # fake level 1
    if ( $line =~ /^\[(\d+)\s*\]\s*(.+)$/ ) { # /^\[(\d+)\s*\]([^<]+)(<.*>)?/ 
        return "101\@ \x1Fd$2\xA0";
    }

    return $line;
}

=head1 SYNOPSIS

winibw2pica [options] inputfile(s)

=head1 OPTIONS

 -help          brief help message
 -man           full documentation with examples
 -output FILE   write PICA+ records to a given file (default: '-' for STDOUT)
 -prettyxml     pretty print PICA/XML
 -xml           output of records in PICA/XML
 -version       show version information

=head1 DESCRIPTION

This script can be used to convert "PICA+" download files from WinIBW software
to valid PICA+. You can download so called PICA+ from WinIBW with the WinIBW
command C<DOWNLOAD> (or just C<DOW>). The calling syntax is

  DOW <n1>[-<n2> ] P[<n3>]

Where C<n1> ist the first and C<n2> is the last record number that you want to
download from the current result list (staring with 1) and C<n3> is either a
library number or C<A> for all libraries. If you omit C<n3> then only level 0
will be downloaded. Unfortunately the resulting download format is not valid
PICA+ but it includes some additional lines, linebreaks and other nasty stuff.
This script tries to clean the WinIBW output and returns PICA+ on success. 

If this script failed to convert some data, please first make sure to install 
the latest version of L<PICA::Record>. If there is still an error afterwards,
send me a detailed bug report with the version number of this script, the
downloaded data, your WinIBW version number and a B<detailed list> of which
WinIBW commands you performed to produce the download.

=head1 AUTHOR

Jakob Voss C<< jakob.voss@gbv.de >>

=head1 LICENSE

This script is published as Public Domain. Feel free to reuse as you like!
