#!/usr/bin/perl
# Copyright (C) 2004 Kevin P. Scannell
# This is free software; see the file COPYING for copying conditions.  There is
# NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
#   Reads a plain text lexicon from standard input;
#   words with frequency 0 and 1 have been taken out and dealt with already.
#   Processes these words and writes 5 files of approximately the same size:
#   lexicon0.txt through lexicon4.txt ranging from highest to lowest freq.

use strict;

my @everything;
binmode STDIN, ":bytes";
while (<STDIN>) {
	push @everything, $_;
}

my $linenum = 0;
my $stop = $linenum + @everything/5;
my $filenum = 0;
my $prev;
open (OUTSTREAM, ">:bytes", "lexicon$filenum.txt") or die "Could not open output file: $!\n";
foreach (@everything) {
	$linenum++;
	if ($linenum > $stop) {
		$prev =~ s/ .*/ /s;
		unless (m/^$prev/) {
			close OUTSTREAM;
			$filenum++;
			open (OUTSTREAM, ">:bytes", "lexicon$filenum.txt") or die "Could not open output file: $!\n";
			$stop += @everything/5;
		}
	}
	print OUTSTREAM $_;
	$prev = $_;
}
exit 0;
