#! /usr/bin/perl
use Getopt::Long;
use HTML::Parser;
sub text_handler;
sub comment_handler;
sub default_handler;
sub start_handler;
sub end_handler;
sub body_start;
sub find_tag;
sub nospaces;
$opt_product = "openSUSE";
%help_key_rename = (
'F2' => 'F3',
'F3' => 'F4',
'F4' => 'F2',
);
GetOptions(
'product=s' => \$opt_product
);
$p = HTML::Parser->new(api_version => 3);
# $p->utf8_mode(1);
$p->unbroken_text(1);
$p->handler(default => "" );
$p->handler(start => \&body_start, "self,tagname,line");
$p->parse_file(shift);
# print "help type = \"$helptype\"\n";
$t .= $_->[1] for @t;
$t =~ s/ +/ /sg;
$t =~ s/\n /\n/sg;
$t =~ s/ \n/\n/sg;
$t =~ s/(\x14.*?\x10)\s*/$1/sg;
$t =~ s/\s*(\x04|$)/$1/sg;
$t =~ s/(&product;|\@product\@)/$opt_product/g;
# FIXME: we need a better solution
$t =~ s/(\bF[234]\b)/$help_key_rename{$1}/sg if $helptype eq 'install';
print $t, "\x00";
if($helptype eq 'boot') {
$ref{help}++;
$ref{keytable}++;
$ref{profile}++;
}
if($helptype eq 'install') {
$ref{bits}++;
}
for (keys %label) {
if(!$ref{$_}) {
$err = 1;
print STDERR "\"$_\" never referenced\n"
}
}
for (keys %ref) {
if(!$label{$_}) {
$err = 1;
print STDERR "\"$_\" never defined\n"
}
}
warn "*** inconsistencies detected ***\n" if $err;
sub text_handler
{
local $_;
$_ = shift;
s/\s+/ /g;
push @t, [ "", $_ ];
}
sub comment_handler
{
$helptype = $1 if $_[0] =~ /\bhelp=([a-z]+)/;
}
sub default_handler
{
my $line = shift;
die "oops at line $line\n";
}
sub body_start
{
my ($self, $tag) = @_;
if($tag eq "body") {
$self->handler(text => \&text_handler, "text");
$self->handler(comment => \&comment_handler, "text" );
$self->handler(default => \&default_handler, "line" );
$self->handler(start => \&start_handler, "self,tagname,attr,line");
$self->handler(end => \&end_handler, "self,tagname,line");
}
}
sub start_handler
{
my ($self, $tag, $attr, $line) = @_;
my $l;
return if $tag eq "hr";
return if $tag eq "h3";
if($tag eq "br") {
push @t, [ "", "\n" ];
}
elsif($tag eq "em") {
push @t, [ "em" ];
}
elsif($tag eq "a") {
$l = $attr->{href};
$l =~ s/^#//;
push @t, [ "a", undef, $attr->{name}, $l ];
}
else {
die "line $line: unsupported tag $tag\n";
}
}
sub end_handler
{
my ($self, $tag, $line) = @_;
my ($i, $j);
return if $tag eq "h3";
if($tag eq "body") {
$self->handler(text => "");
$self->handler(comment => "" );
$self->handler(default => "" );
$self->handler(start => "");
$self->handler(end => "");
return;
}
$i = find_tag $tag, $line;
die "line $line: no matching opening tag for $tag found\n" unless $i;
if($tag eq "em") {
$i = pop @t;
pop @t;
push @t, [ "", "\x11$i->[1]\x10" ];
}
elsif($tag eq "a") {
$i = pop @t;
$j = pop @t;
if($j->[2]) {
# name
die "line $line: label $j->[2] too long (max. 32)\n" if length($j->[2]) > 32;
die "line $line: label $j->[2] already exists\n" if $label{$j->[2]};
$label{$j->[2]} = 1;
push @t, [ "", "\x04\x12$j->[2]\x14$i->[1]\x10" ];
}
elsif($j->[3]) {
# href
die "line $line: label $j->[3] too long (max. 32)\n" if length($j->[3]) > 32;
$ref{$j->[3]}++;
push @t, [ "", "\x12$j->[3]\x13" . nospaces($i->[1]) . "\x10" ];
}
else {
die "line $line: strange tag $tag\n";
}
}
else {
die "line $line: unexpected tag $tag\n";
}
}
sub find_tag
{
my $i = @t - 1;
return undef if $i < 1;
return 1 if $t[$i][0] eq "" && $t[$i-1][0] eq $_[0];
die "line $_[1]: nested tags not supported\n";
}
# \x1f looks like a space but is not a space. This is useful
# to prevent automatic line breaks.
sub nospaces
{
local $_;
$_ = shift;
s/\s/\x1f/g;
return $_;
}