#!/usr/bin/perl
# Copyright 2012-2022, Alexander Shibakov
# This file is part of SPLinT
#
# SPLinT is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# SPLinT is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SPLinT. If not, see .
use Getopt::Long;
use Pod::Usage;
use File::Basename;
use English;
( $my_name, $my_path, $my_suffix ) = fileparse( $PROGRAM_NAME );
$invocation_line = "\% ".$my_name." ".(join ' ', @ARGV)."\n";
my $man = 0;
my $help = 0;
my $fine_index = 0;
my $fine_headercs = "\\FI"; # index command sequence for the fine format
my $crude_headercs = "\\GI"; # index command sequence for the standard format
my $headercs = ""; # the default is the standard format
#Getopt::Long::Configure ("bundling"); # to allow -abc to set a, b, and c
GetOptions ("help|?" => \$help,
man => \$man,
"fine" => \$fine_index,
"cs=s" => \$headercs
) or pod2usage(2);
pod2usage(-exitval => 0, -verbose => 1) if $help;
pod2usage(-exitval => 0, -verbose => 2) if $man;
open FILE, "$ARGV[0]" or die "Cannot open input file $ARGV[0]\n";
open FILE_OUT, ">$ARGV[1]" or die "Cannot open input file $ARGV[1]\n";
if ( $headercs eq "" ) {
if ( $fine_index ) {
$headercs = $fine_headercs;
} else {
$headercs = $crude_headercs;
}
}
print FILE_OUT $invocation_line;
sub lex_order (\@\@) { # lexicographic ordering
my (@string1) = @{shift @_};
my (@string2) = @{shift @_};
my ($i);
$i = 0;
while ( $string1[$i] == $string2[$i] && $i <= $#string1 && $i <= $#string2 ) { $i++ }
if ( $i > $#string1 || $i > $#string2 ) {
return $#string1 <=> $#string2;
}
return ( $string1[$i] <=> $string2[$i] );
}
sub numerically { $b <=> $a; }
sub alphabetically { # lexicographic ordering based on the ASCII order defined in @main_order
my (@chars1) = map { $main_order{$_} } split //, $a;
my (@chars2) = map { $main_order{$_} } split //, $b;
return lex_order @chars1, @chars2;
}
sub lexicographically { # lexicographic ordering for numeric sequences separated by spaces
my (@chars1) = split / /, $a;
my (@chars2) = split / /, $b;
return lex_order @chars1, @chars2;
}
$alphabet = " /\#\$\%^&*<>[]{}()+-=_|\\,:;~`.?!\'\"\@0123456789AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
@main_set = split //, $alphabet;
map { $main_order{$_} = index $alphabet, $_ } @main_set; # inefficient ...
$ldelim[0] = "\\("; $rdelim[0] = ")";
$ldelim[1] = "\\["; $rdelim[1] = "]";
$ldelim[2] = ""; $rdelim[2] = "";
$ldelim[3] = "\\g"; $rdelim[3] = "g";
$ldelim[4] = "\\f"; $rdelim[4] = "f";
$ldelim[5] = "\\e"; $rdelim[5] = "e";
sub alpha_jump { # have we switched to the next letter?
my $a = substr shift @_, 0, 1;
my $b = substr shift @_, 0, 1;
$a =~ tr/a-z/A-Z/; $a =~ tr/a-zA-Z/0/cs;
return (ord $a) <=> (ord $b);
}
while () {
$input = $_;
if ( $input =~ /\\i\@\@\@e\s* \{([0-9]+)\} # section number
\{([0-9]+)\} # page number
\{((\{[^\{\}]*\})+)\} # host namespace, context, etc.
\{([^\{\}]+)\} # domain
\{([0-9]+)\} # rank
\{([^\{\}]*)\} # type1
\{([^\{\}]+)\s*\} # type2
\{((\{[^\{\}]+\})+|\\vend.*\\vend\s*)\} # key
\{((\{[^\{\}]+\})*)\} # visual key
(\%.*)*\n/x ) {
# ordinary index entry
($section, $pageno, $nspace, $junk0, $domain, $rank, $type1, $type2, $key, $junk1, $vkey) =
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11 );
$term = $key;
$key =~ s/\{([0-9]+)\}/pack "c1", $1/eg;
if ( $term =~ /\\vend/ ) {
# only process the key if it is not raw
} else {
$term = $key;
}
$vkey =~ s/\{([0-9]+)\}/pack "c1", $1/eg;
if ( $vkey ne "" ) {
if (exists $index{$domain}{$vkey}{type} && $index{$domain}{$vkey}{type} ne $type2) {
#warn "Differing output types for term <$term> with key <$vkey>.\n", "$index{$domain}{$vkey}{type} vs. $type2\n";
$key = "$vkey $key";
if (exists $index{$domain}{$key}{type} && $index{$domain}{$key}{type} ne $type2) {
warn "Differing output types for term <$term> with key <$vkey>.\n", "$index{$domain}{$vkey}{type} vs. $type2\n";
}
} else {
$key = $vkey;
}
}
# print "KEY: ", $key, " ", $vkey, "\n";
push @{$index{$domain}{$key}{refs}}, "$section $rank $pageno";
$index{$domain}{$key}{nspace} = $nspace;
$index{$domain}{$key}{type} = $type2;
$index{$domain}{$key}{term} = $term;
}
}
$i = 0;
$last_alpha = "0"; # the last index section
foreach $domain (sort keys %index ) {
if ( $i > 0) {
$last_alpha = "0";
print FILE_OUT "\\indexseparator{$domain}{$i}\n";
}
$i++;
foreach $key ( sort alphabetically keys %{$index{$domain}} ) {
if (exists $index{$domain}{$key}{refs}) {
%ref_hash = ();
map { @r = split / /, $_; exists $ref_hash{$r[0]}{$r[1]}{$r[2]} ?
$ref_hash{$r[0]}{$r[1]}{$r[2]}++ : ($ref_hash{$r[0]}{$r[1]}{$r[2]} = 0) } @{$index{$domain}{$key}{refs}};
my @fine_ref_list = ();
my @crude_ref_list = ();
foreach $key ( reverse sort numerically keys %ref_hash ) {
foreach $rkey ( reverse sort numerically keys %{$ref_hash{$key}} ) {
$ref_string = "$ldelim[$rkey]$key$rdelim[$rkey]";
push @crude_ref_list, $ref_string;
$ref_string = $ref_string."\{".(join ', ', (reverse sort numerically keys %{$ref_hash{$key}{$rkey}}))."\}";
push @fine_ref_list, $ref_string;
}
}
$ref_string = join ', ', @fine_ref_list;
$cref_string = join ', ', @crude_ref_list;
$term = $index{$domain}{$key}{term};
$term_printable = $term;
if ( $term =~ /\\vend/ ) {
$term =~ s/\\vend(.*)\\vend/$1/eg; # unwrap the entry
} else {
$term =~ s/(.)/"\{".(unpack "c1", $1)."\}"/eg;
}
if ( alpha_jump( $key, $last_alpha ) > 0 ) {
$last_alpha = substr $key, 0, 1;
$last_alpha =~ tr/a-z/A-Z/;
print FILE_OUT "\\indexsection{$last_alpha}\n",
}
print FILE_OUT $headercs."{$index{$domain}{$key}{nspace}}{$index{$domain}{$key}{type}}{$term}, ".
($fine_index ? $ref_string : $cref_string).".\% $term_printable, ($key)\n",
"\% sec nos. ".$ref_string."\n";
}
}
}
__END__
=head1 BINDX
bindx.pl - Postprocess an index (.gdx) in to produce a set of index entries in
the (.gdy)
=head1 SYNOPSIS
bindx.pl [options] input_file output_file
Options:
--help|-h|-? brief help message
--man|-m full documentation
--fine|-f add page references to each index entry
--cs= specify the index control sequence name
=head1 OPTIONS
=over 8
=item B<--help>
Print a brief help message and exit.
=item B<--man>
Print the manual page and exit.
=item B<--fine>
Create index entries in the form BIB{n1, n2, ...} where B and B
are the left and ring delimeters, I is the section number and the list of page
numbers appears inside the braced group.
=item B<--cs>
The name of the index control sequence. The default is B<\GI> for the standard
index format and B<\FI> for the 'fine' format.
=back
=head1 DESCRIPTION
B will read the given , and output an index
in the .
=cut