#! /usr/bin/perl -w # Script to take the output of nroff -man and remove all the backspacing and # the page footers and the screen commands etc so that it is more usefully # readable online. In fact, in the latest nroff, intermediate footers don't # seem to be generated any more. $blankcount = 0; $lastwascut = 0; $firstheader = 1; # Input on STDIN; output to STDOUT. while () { s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" s/.\x8//g; # Remove "char, backspace" # Handle header lines. Retain only the first one we encounter, but remove # the blank line that follows. Any others (e.g. at end of document) and the # following blank line are dropped. if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) { if ($firstheader) { $firstheader = 0; print; $lastprinted = $_; $lastwascut = 0; } $_=; # Remove a blank that follows next; } # Count runs of empty lines if (/^\s*$/) { $blankcount++; $lastwascut = 0; next; } # If a chunk of lines has been cut out (page footer) and the next line # has a different indentation, put back one blank line. if ($lastwascut && $blankcount < 1 && defined($lastprinted)) { ($a) = $lastprinted =~ /^(\s*)/; ($b) = $_ =~ /^(\s*)/; $blankcount++ if ($a ne $b); } # We get here only when we have a non-blank line in hand. If it was preceded # by 3 or more blank lines, read the next 3 lines and see if they are blank. # If so, remove all 7 lines, and remember that we have just done a cut. if ($blankcount >= 3) { for ($i = 0; $i < 3; $i++) { $next[$i] = ; $next[$i] = "" if !defined $next[$i]; $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" $next[$i] =~ s/.\x8//g; # Remove "char, backspace" } # Cut out chunks of the form <3 blanks><3 blanks> if ($next[0] =~ /^\s*$/ && $next[1] =~ /^\s*$/ && $next[2] =~ /^\s*$/) { $blankcount -= 3; $lastwascut = 1; } # Otherwise output the saved blanks, the current, and the next three # lines. Remember the last printed line. else { for ($i = 0; $i < $blankcount; $i++) { print "\n"; } print; for ($i = 0; $i < 3; $i++) { $next[$i] =~ s/.\x8//g; print $next[$i]; $lastprinted = $_; } $lastwascut = 0; $blankcount = 0; } } # This non-blank line is not preceded by 3 or more blank lines. Output # any blanks there are, and the line. Remember it. Force two blank lines # before headings. else { $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && defined($lastprinted); for ($i = 0; $i < $blankcount; $i++) { print "\n"; } print; $lastprinted = $_; $lastwascut = 0; $blankcount = 0; } } # End