#! /usr/bin/perl use Getopt::Long; use HTML::Parser; sub text_handler; sub comment_handler; sub default_handler; sub start_handler; sub end_handler; sub body_start; sub find_tag; sub nospaces; $opt_product = "openSUSE"; %help_key_rename = ( 'F2' => 'F3', 'F3' => 'F4', 'F4' => 'F2', ); GetOptions( 'product=s' => \$opt_product ); $p = HTML::Parser->new(api_version => 3); # $p->utf8_mode(1); $p->unbroken_text(1); $p->handler(default => "" ); $p->handler(start => \&body_start, "self,tagname,line"); $p->parse_file(shift); # print "help type = \"$helptype\"\n"; $t .= $_->[1] for @t; $t =~ s/ +/ /sg; $t =~ s/\n /\n/sg; $t =~ s/ \n/\n/sg; $t =~ s/(\x14.*?\x10)\s*/$1/sg; $t =~ s/\s*(\x04|$)/$1/sg; $t =~ s/(&product;|\@product\@)/$opt_product/g; # FIXME: we need a better solution $t =~ s/(\bF[234]\b)/$help_key_rename{$1}/sg if $helptype eq 'install'; print $t, "\x00"; if($helptype eq 'boot') { $ref{help}++; $ref{keytable}++; $ref{profile}++; } if($helptype eq 'install') { $ref{bits}++; } for (keys %label) { if(!$ref{$_}) { $err = 1; print STDERR "\"$_\" never referenced\n" } } for (keys %ref) { if(!$label{$_}) { $err = 1; print STDERR "\"$_\" never defined\n" } } warn "*** inconsistencies detected ***\n" if $err; sub text_handler { local $_; $_ = shift; s/\s+/ /g; push @t, [ "", $_ ]; } sub comment_handler { $helptype = $1 if $_[0] =~ /\bhelp=([a-z]+)/; } sub default_handler { my $line = shift; die "oops at line $line\n"; } sub body_start { my ($self, $tag) = @_; if($tag eq "body") { $self->handler(text => \&text_handler, "text"); $self->handler(comment => \&comment_handler, "text" ); $self->handler(default => \&default_handler, "line" ); $self->handler(start => \&start_handler, "self,tagname,attr,line"); $self->handler(end => \&end_handler, "self,tagname,line"); } } sub start_handler { my ($self, $tag, $attr, $line) = @_; my $l; return if $tag eq "hr"; return if $tag eq "h3"; if($tag eq "br") { push @t, [ "", "\n" ]; } elsif($tag eq "em") { push @t, [ "em" ]; } elsif($tag eq "a") { $l = $attr->{href}; $l =~ s/^#//; push @t, [ "a", undef, $attr->{name}, $l ]; } else { die "line $line: unsupported tag $tag\n"; } } sub end_handler { my ($self, $tag, $line) = @_; my ($i, $j); return if $tag eq "h3"; if($tag eq "body") { $self->handler(text => ""); $self->handler(comment => "" ); $self->handler(default => "" ); $self->handler(start => ""); $self->handler(end => ""); return; } $i = find_tag $tag, $line; die "line $line: no matching opening tag for $tag found\n" unless $i; if($tag eq "em") { $i = pop @t; pop @t; push @t, [ "", "\x11$i->[1]\x10" ]; } elsif($tag eq "a") { $i = pop @t; $j = pop @t; if($j->[2]) { # name die "line $line: label $j->[2] too long (max. 32)\n" if length($j->[2]) > 32; die "line $line: label $j->[2] already exists\n" if $label{$j->[2]}; $label{$j->[2]} = 1; push @t, [ "", "\x04\x12$j->[2]\x14$i->[1]\x10" ]; } elsif($j->[3]) { # href die "line $line: label $j->[3] too long (max. 32)\n" if length($j->[3]) > 32; $ref{$j->[3]}++; push @t, [ "", "\x12$j->[3]\x13" . nospaces($i->[1]) . "\x10" ]; } else { die "line $line: strange tag $tag\n"; } } else { die "line $line: unexpected tag $tag\n"; } } sub find_tag { my $i = @t - 1; return undef if $i < 1; return 1 if $t[$i][0] eq "" && $t[$i-1][0] eq $_[0]; die "line $_[1]: nested tags not supported\n"; } # \x1f looks like a space but is not a space. This is useful # to prevent automatic line breaks. sub nospaces { local $_; $_ = shift; s/\s/\x1f/g; return $_; }