#!/usr/bin/env perl
use strict;
use 5.006;
use warnings;

my $progname = $0;

if (scalar @ARGV != 2) {
  print "Usage: $progname <outfile> <modules description file>\n";
  exit 1;
}

my $outname = shift(@ARGV);
my $descfile = shift(@ARGV);

my %aliases = ();
my %algorithms = ();
my %algorithm_encs = ();

my %encs = ();

sub addalgenc($$) {
  my $alg = shift();
  my $enc = shift();

  if (defined $algorithm_encs{$alg}) {
      my $hashref = $algorithm_encs{$alg};
      $$hashref{$enc}=1;
  } else {
      my %newhash = ($enc => 1);
      $algorithm_encs{$alg}=\%newhash;
  }

  $encs{$enc} = 1;
}

sub readinput()
{
    open DESCFILE, $descfile;
    my $line;
    while ($line = <DESCFILE>)
    {
        next if $line =~ m/^\s*#/;
        next if $line =~ m/^\s*$/;
        my ($alg,$encstr,$aliases) = split(/\s+/, $line);
        my $enc;
        my $alias;

        $algorithms{$alg} = 1;
        foreach $alias (split(/,/, $aliases)) {
            foreach $enc (split(/,/, $encstr)) {
                $aliases{$alias} = $alg;
                addalgenc($alg, $enc);
            }
        }
    }
}

sub printoutput()
{
    open (OUT, ">$outname") or die "Can't open output file `$outname': $!\n";

    print OUT <<EOS;
# $outname: Lists of stemming modules.
#
# This file is generated by mkalgorithms.pl from a list of module names.
# Do not edit manually.
EOS

    my $need_sep = 0;
    my $lang;
    my $enc;
    my @algorithms = sort keys(%algorithms);
    print OUT "\nlibstemmer_algorithms =";
    foreach $lang (@algorithms) {
        print OUT "\\\n    ", $lang;
    }
    print OUT "\n";

    for my $enc (qw(ISO_8859_1 ISO_8859_2 KOI8_R)) {
        print OUT "\n${enc}_algorithms =";
        foreach $lang (@algorithms) {
            print OUT "\\\n    ", $lang if exists $algorithm_encs{$lang}->{$enc};
        }
        print OUT "\n";
    }
}

readinput();
printoutput();
