#!/usr/bin/perl -- # -*- Perl -*-
# build the character entity tables from the glyphtable map
# This script must be run in the 'build' directory

use XML::DOM;

$verbose = shift @ARGV;

$entities = "lib/entities.xml";

my $parser = new XML::DOM::Parser (NoExpand => 0);
print STDERR "Loading $entities...";
$doc = $parser->parsefile($entities);
$root = $doc->getDocumentElement();
$entlist = $root->getElementsByTagName('entity');
print STDERR "\n";

%hex = ();
%desc = ();
for (my $count = 0; $count < $entlist->getLength(); $count++) {
    my $ent = $entlist->item($count);
    my $ucode = $ent->getAttribute('ucode');
    my $name = $ent->getAttribute('name');
    my $iso = $ent->getAttribute('iso');
    my $desce = $ent->getElementsByTagName('desc')->item(0);
    my $text = $desce->toString();
    $text = $1 if $text =~ /<desc>(.*)<\/desc>/;

    next if $ucode eq '';
    warn "Ignoring dup: $name\n", next if $hex{$name};

    $hex{$name} = $ucode;
    $desc{$name} = $text;
}

$dir = "lib/isoents";
$tbldir = ".";
$glyphdir = "../../glyphs/100dpi";

$missglyph = 0;
$unkglyph = 0;

opendir(DIR, $dir);
while ($name = readdir(DIR)) {
    $file = "$dir/$name";

    next if ! -f $file;
    next if $name !~ /^(.*)\.ent$/;

    $base = $1;

    open (TBL, ">$tbldir/$base.gen");

    print TBL "<informaltable pgwide='1'>\n";
    print TBL "<tgroup cols='4'>\n";
    print TBL "<?dbhtml table-summary=\"Unicode character entity table\"?>\n";
    print TBL "<colspec colwidth=\"15*\"/>\n";
    print TBL "<colspec colwidth=\"15*\" align='center'/>\n";
    print TBL "<colspec colwidth=\"10*\" align='center'/>\n";
    print TBL "<colspec colwidth=\"60*\"/>\n";
    print TBL "<thead>\n";
    print TBL "<row>\n";
    print TBL "  <entry align='center'>Entity <?lb?>Name</entry>\n";
    print TBL "  <entry align='center'>Unicode <?lb?>Code point</entry>\n";
    print TBL "  <entry align='center'>Sample <?lb?>Glyph</entry>\n";
    print TBL "  <entry align='left'>Description</entry>\n";
    print TBL "</row>\n";
    print TBL "</thead>\n";
    print TBL "<tbody>\n";

    open(F, $file);
    while (<F>) {
	chop;
	if (/<!ENTITY\s+(\S+)\s+.*?--[=\/]?(.*)--/) {
	    my($name, $desc) = ($1,$2);

	    $desc = $1 if $desc =~ /[A-Z]:\s*(.*)/;
	    $desc =~ s/\&/\&amp;/g;
	    $desc =~ s/\</\&lt;/g;

	    $desc = $desc{$name} if exists($desc{$name});

	    $glyph = "Blank";

	    $code = "&nbsp;";
	    $code = $hex{$name} if $hex{$name};

	    if ($hex{$name} && -f "$glyphdir/U$code.png") {
		$glyph = "U$code";
		if (! -f "$glyphdir/$glyph.png") {
		    $glyph = "Unknown";
		    print STDERR "$name\t$code\t$desc\n" if $verbose;
		    $missglyph++;
		}
	    } else {
		$glyph = "$name";
		$glyph = "vsubn-cap-E" if $name eq 'vsubnE';
		$glyph = "vsupn-cap-E" if $name eq 'vsupnE';
		$glyph = "xh-cap-Arr" if $name eq 'xhArr';

		if (! -f "$glyphdir/$glyph.png") {
		    $glyph = "Unknown";
		    $code = "&nbsp;";
		    print STDERR "$name\t\t$desc\n" if $verbose;
		    $unkglyph++;
		}
	    }

	    print TBL "<row>\n  <entry> $name</entry><entry align='center'>$code</entry>\n";
	    print TBL "  <entry valign='bottom'>";
	    print TBL "<mediaobject>\n<imageobject>\n";
	    print TBL "<imagedata fileref=\"&$glyph.glyph;\"/>\n";
	    print TBL "</imageobject>\n";
	    print TBL "<textobject>\n";
	    print TBL "<phrase>Unicode $code</phrase>\n";
	    print TBL "</textobject>\n";
	    print TBL "</mediaobject>";
	    print TBL "</entry>\n  <entry>$desc</entry>\n";
	    print TBL "</row>\n";
	}
    }
    close (F);

    print TBL "</tbody>\n";
    print TBL "</tgroup>\n";
    print TBL "</informaltable>\n";

}
closedir(DIR);

print "$missglyph missing glyphs.\n";
print "$unkglyph unknown glyphs.\n";
