diff options
author | Russ Allbery <rra@cpan.org> | 2021-07-18 17:31:12 -0700 |
---|---|---|
committer | Russ Allbery <rra@cpan.org> | 2021-07-18 17:32:07 -0700 |
commit | a774a276e2e387f8e6579829af2ea7de8bac0c59 (patch) | |
tree | 54a11f01e18913bcad0fa8466842b5454ce12eff /bin | |
parent | e6132fe93920aac8d9531c7f299f06a331ac642f (diff) |
Move spin to App::DocKnot::Spin
Convert the spin program to a module and do the absolute minimum
work required to make tests pass, mostly by skipping a bunch of
tests. This is not yet a well-behaved module and may not actually
work.
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/spin | 2149 |
1 files changed, 0 insertions, 2149 deletions
diff --git a/bin/spin b/bin/spin deleted file mode 100755 index 396ce7f..0000000 --- a/bin/spin +++ /dev/null @@ -1,2149 +0,0 @@ -#!/usr/bin/perl -w -$ID = q$Id$; -# -# spin -- Translate thread (an HTML macro language) into HTML. -# -# Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, -# 2010, 2011, 2013 Russ Allbery <rra@stanford.edu> -# -# This program is free software; you may redistribute it and/or modify it -# under the same terms as Perl itself. - -############################################################################## -# Modules and declarations -############################################################################## - -require 5.005; - -# The default list of files and/or directories to exclude from spinning. This -# can be added to with the -e option. Each of these should be a regular -# expression. -@EXCLUDES = (qr/^\.(?!\.\z)(?!htaccess\z)/, qr/^CVS\z/, qr/^Makefile\z/, - qr/^RCS\z/); - -# The URL to the software page for all of my web page generation software. -$URL = 'http://www.eyrie.org/~eagle/software/web/'; - -use strict; -use subs qw(expand parse parse_context); -use vars qw(%DEPEND $DOCID @EXCLUDES $FILE @FILES $FULLPATH $ID $OUTPUT - %OUTPUT $REPO @RSS %SITEDESCS %SITELINKS @SITEMAP $SOURCE $SPACE - @STATE $STYLES $URL %VERSIONS %commands %macros %strings); - -use Cwd qw(getcwd); -use FileHandle (); -use Getopt::Long qw(GetOptions); -use Image::Size qw(html_imgsize); -use File::Copy qw(copy); -use File::Find qw(find finddepth); -use File::Spec (); -use POSIX qw(mktime strftime); -use Text::Balanced qw(extract_bracketed); - -############################################################################## -# Output -############################################################################## - -# Sends something to the output file. Pull out any trailing space and stash -# it temporarily, and put any trailing space that we'd previously stashed into -# the output string after any close tags. This gets spacing working properly -# around boundaries. -sub output { - local $_ = join ('', @_); - if ($SPACE) { - my ($close, $body) = m%^(\s*(?:</(?!body)[^>]+>\s*)*)(.*)%s; - $close .= $SPACE; - $close =~ s/\n\s*\n\s*\n/\n\n/g; - $_ = $close . $body; - $SPACE = ''; - } - if (s/\n(\s+)\z/\n/) { $SPACE = $1 } - print OUT $_; -} - -############################################################################## -# Basic parsing -############################################################################## - -# Escapes &, <, and > characters found in a string. -sub escape { local $_ = shift; s/&/&/g; s/</</g; s/>/>/g; $_ } - -# Undo HTML entity escaping. -sub unescape { local $_ = shift; s/</</g; s/>/>/g; s/&/&/g; $_ } - -# Wrap something in paragraph markers, being careful to get newlines right. -# Special-case a paragraph consisting entirely of <span> by turning it into a -# <p> with the same class. -sub paragraph { - my $text = shift; - $text =~ s/^\n(\s*\n)*//; - $text =~ s/(\S[ \t]*)\z/$1\n/; - if ($text =~ m%^(\s*)<span(?!.*<span)([^>]*)>(.*)</span>(\s*)\z%s) { - my ($lead, $class, $text, $trail) = ($1, $2, $3, $4); - return "$lead<p$class>$text</p>$trail"; - } else { - $text =~ s/^/<p>\n/; - $text =~ s%(\n\s*)\z%\n</p>$1%; - return $text; - } -} - -# Opens or closes a border of a continued structure. Either takes the name of -# the state and its start and end tags, or takes no arguments to close all -# open states. -sub border { - my ($border, $start, $end) = @_; - my $output = ''; - if ($border) { - if ($STATE[-1] eq 'BLOCK' || $STATE[-1][0] ne $border) { - $output .= $start; - push (@STATE, [ $border, $end ]); - } - } else { - my $state; - while (defined ($state = pop @STATE)) { - last if $state eq 'BLOCK'; - $output .= $$state[1]; - } - push (@STATE, 'BLOCK'); - } - return $output; -} - -# Marks the beginning of major block structure. Within this structure, -# borders will only clear to the level of this structure. -sub border_start { - push (@STATE, 'BLOCK'); -} - -# Clears a major block structure. -sub border_clear { - my $output = border; - pop @STATE; - return $output; -} - -# Extract some number of arguments from the front of the given string. If the -# optional third argument is true, try to pull off a parenthesized formatting -# instruction first, returning it as the first result (or undef if it's not -# found). If the count is -1, pull off as many arguments as we can find. -sub extract { - my ($text, $count, $format) = @_; - my (@result, $code); - $text =~ s/\s*//; - if ($format && $text =~ /^\(/) { - ($result[0], $text) = extract_bracketed ($text, '()'); - $result[0] = substr ($result[0], 1, -1); - } else { - $result[0] = ''; - } - if ($count >= 0) { - for (1..$count) { - ($result[$_], $text) = extract_bracketed ($text, '[]'); - if ($result[$_]) { - $result[$_] = substr ($result[$_], 1, -1); - } else { - warn "$0:$FILE:$.: cannot find argument $_\n"; - $result[$_] = ''; - } - } - } else { - while ($text =~ /^\s*\[/) { - my $result; - ($result, $text) = extract_bracketed ($text, '[]'); - last unless $result; - $result = substr ($result, 1, -1); - push (@result, $result); - } - } - unless ($format) { shift @result } - (@result, $text); -} - -# Process a macro. Takes the number of arguments, the definition of the -# macro, a flag saying whether we're at a block level, and then the values of -# all the arguments. Only straight substitution commands are allowed here, of -# course. -sub macro { - my ($args, $definition, $block) = @_; - $definition =~ s/\\(\d+)/($1 > $args) ? "\\$1" : $_[$1 + 2]/ge; - return parse_context ($definition, $block); -} - -# Expand a given command into its representation. This function is mutually -# recursive with parse. Takes a third argument indicating whether this is a -# top-level element (if it is, and it doesn't generate its own container, it -# may have to be wrapped in <p>). Returns the result of expanding the -# command, a flag saying whether the command is block level, and the remaining -# text in the paragraph. -sub expand { - my ($command, $text, $block) = @_; - if ($command eq '==') { - my ($new, $args, $definition); - ($new, $args, $definition, $text) = extract ($text, 3); - if (defined $definition) { - $macros{$new} = [ $args, $definition ]; - return ('', 1, $text); - } - } elsif ($command eq '=') { - my ($variable, $value); - ($variable, $value, $text) = extract ($text, 2); - $strings{$variable} = parse ($value); - return ('', 1, $text); - } elsif ($command =~ s/^=//) { - if (exists $strings{$command}) { - return ($strings{$command}, 0, $text); - } else { - warn "$0:$FILE:$.: unknown string $command\n"; - return ('', 0, $text); - } - } elsif ($command eq '\\') { - return ('\\', 0, $text); - } elsif (ref $macros{$command}) { - my ($args, $definition) = @{ $macros{$command} }; - my @args; - if ($args != 0) { - @args = extract ($text, $args, 0); - $text = pop @args; - } - my $block = $block && ($text !~ /\S/); - return (macro ($args, $definition, $block, @args), $text); - } else { - if (!ref $commands{$command}) { - warn "$0:$FILE:$.: bad command $command\n"; - return ('', 1, $text); - } - my ($args, $handler) = @{ $commands{$command} }; - my ($blocktag, $result); - if ($args == 0) { - ($blocktag, $result) = &$handler (); - } else { - my @args = extract ($text, $args, 1); - $text = pop @args; - my $format = shift @args; - ($blocktag, $result) = &$handler ($format, @args); - } - return ($result, $blocktag, $text); - } -} - -# Given a text string, check it for escape sequences and expand them. This -# function is mutually recursive with expand. Takes one flag, saying whether -# we're at the block level. Returns the expanded text and a flag saying -# whether the result is suitable for block level. -sub parse_context { - my ($text, $block) = @_; - if (index ($text, '\\') == -1) { - my $output = $text; - $output = border . paragraph ($output) if $block; - return ($output, $block); - } - - # Chop off everything up to the first backslash and save it in output. - # Then grab the escape and figure out what to do with it. - # - # If we are at block level, we have to distinguish between plain text and - # inline commands, which have to be wrapped in paragraph tags, and - # block-level commands, which shouldn't be. We accumulate any output that - # has to be wrapped in a paragraph in $paragraph (and put the border - # before it in $border). Whenever we see a block-level command, we wrap - # anything currently in $paragraph in a paragraph, tack it on to the - # output, and then add on the results of the block command. $space holds - # leading space, which we want to add to the paragraph if we end up - # creating a paragraph. - # - # $nonblock is a flag indicating that we saw some construct that wasn't - # suitable for block level. - my $output = ''; - my ($border, $paragraph, $space) = ('', '', ''); - my $nonblock = 0; - while ($text ne '') { - unless ($text =~ s/^([^\\]+|\\([\w=]+|.))//) { - my $error = substr ($text, 0, 20); - $error =~ s/\n.*//s; - die "$0:$FILE:$.: unable to parse at '$error'\n"; - } - my $command; - if (index ($1, '\\') == -1) { - my $string = $1; - if ($block && $string =~ /^\s+$/ && $paragraph eq '') { - $space .= $string; - } elsif ($block && ($string =~ /\S/ || $paragraph ne '')) { - $border = border if $paragraph eq ''; - $paragraph .= $space . $string; - $space = ''; - } else { - $output .= $string; - $nonblock = 1; - } - } else { - $command = $2; - my ($result, $blocktag); - my $force = $block && $paragraph eq ''; - ($result, $blocktag, $text) = expand ($command, $text, $force); - if ($blocktag) { - if ($block && $paragraph ne '') { - $output .= $border . paragraph ($space . $paragraph); - $border = ''; - $paragraph = ''; - } else { - $output .= $space; - } - $output .= $result; - } elsif ($block) { - $border = border if $paragraph eq ''; - $paragraph .= $space . $result; - $nonblock = 1; - } else { - $output .= $result; - $nonblock = 1; - } - $space = ''; - } - if ($text =~ s/^\n(\s*)//) { - if ($paragraph ne '') { - $paragraph .= "\n$1"; - } else { - $output .= "\n" if $text || $nonblock; - $output .= $1; - } - } - } - - # Wrap any remaining output in paragraph tags and then return the output. - # If we were at block level, our output is always suitable for block - # level. Otherwise, it's suitable for block level only if all of our - # output was from block commands. - $output .= $border . paragraph ($space . $paragraph) - unless $paragraph eq ''; - return ($output, $block || !$nonblock); -} - -# A wrapper around parse_context for callers who don't care about the block -# level of the results. -sub parse { - my ($output) = parse_context (@_); - return $output; -} - -############################################################################## -# Data files -############################################################################## - -# Read the sitemap file for a site and flesh out the @SITEMAP array and -# %SITEDESCS and %SITELINKS hashes with information from that file. -# -# @SITEMAP is an array of anonymous arrays holding the complete site map. -# Each element represents a page. The element will contain three elements: -# the numeric indent level, the partial URL, and the description. %SITEDESCS -# holds a map of partial URLs to descriptions, and %SITELINKS map partial URLs -# to a list of other partial URLs (previous, next, and up). -# -# The format of the sitemap file is one line per web page, with indentation -# showing the tree structure, and with each line formatted as a partial URL, a -# colon, and a page description. If two pages at the same level aren't -# related, a line with three dashes should be put between them at the same -# indentation level. -sub read_sitemap { - my ($map) = @_; - - # @indents holds a stack of indentation levels. @parents is a matching - # stack of parent URLs for each level of indentation, and @prev is a - # matching stack of the previous page at each level of indentation. If - # $prev[0] is undef, there is no previous page at that level. - my @indents = (0); - my (@parents, @prev); - open (MAP, $map) or return; - local $_; - while (<MAP>) { - next if /^\s*\#/; - if (/^( *)---$/) { - my $indent = length ($1); - while ($indents[0] > $indent) { - shift @indents; - shift @prev; - shift @parents; - } - $prev[0] = undef; - next; - } - my ($indent, $url, $desc) = /^( *)([^\s:]+):\s+(.+)$/; - next unless defined $desc; - $indent = length ($indent); - if ($indent > $indents[0]) { - unshift (@parents, $prev[0]); - unshift (@indents, $indent); - unshift (@prev, undef); - } - while ($indents[0] > $indent) { - shift @indents; - shift @prev; - shift @parents; - } - $SITELINKS{$url} = [ $prev[0], undef, @parents ]; - $SITELINKS{$prev[0]}[1] = $url if defined $prev[0]; - $prev[0] = $url; - $SITEDESCS{$url} = $desc; - push (@SITEMAP, [ $indent, $url, $desc ]); - } - close MAP; -} - -# Given a date and time in ISO format, convert it to seconds since epoch. -sub time_to_seconds { - my ($date, $time) = @_; - my @datetime = reverse split (':', $time); - push (@datetime, reverse split ('-', $date)); - $datetime[4]--; - $datetime[5] -= 1900; - $datetime[6] = 0; - $datetime[7] = 0; - $datetime[8] = -1; - return mktime (@datetime); -} - -# Read in the .versions file for a site and flesh out the %VERSIONS hash. It -# contains a mapping of product name to an anonymous array of version number -# and date of the last update. It also fleshes out the %DEPEND hash, which -# holds a mapping of file names that use a particular version to the timestamp -# of the last change in that version. -sub read_versions { - my ($versions) = @_; - open (VERSIONS, $versions) or return; - local $_; - my $last; - while (<VERSIONS>) { - next if /^\s*$/; - next if /^\s*\#/; - my @files; - if (/^\s/) { - @files = split; - } else { - my ($product, $version, $date, $time); - ($product, $version, $date, $time, @files) = split; - my $timestamp; - if ($date) { - $time ||= '00:00:00'; - $timestamp = time_to_seconds ($date, $time); - } else { - $timestamp = 0; - } - $date = strftime ('%Y-%m-%d', gmtime $timestamp); - $VERSIONS{$product} = [ $version, $date ]; - $last = $timestamp; - } - for (@files) { - $DEPEND{$_} = $last if (!$DEPEND{$_} || $DEPEND{$_} < $last); - } - } - close VERSIONS; -} - -############################################################################## -# Page headers and footers -############################################################################## - -# Given the partial URL to the current page and the partial URL to another -# page, generate a relative URL between the two. -sub relative { - my ($start, $end) = @_; - my @start = split ('/', $start, -1); - my @end = split ('/', $end, -1); - while (@start && @end && $start[0] eq $end[0]) { - shift @start; - shift @end; - } - if (@start == 1 && @end == 1) { - return ($end[0] ? $end[0] : './'); - } else { - return ('../' x $#start) . join ('/', @end); - } -} - -# Given the name of the current file being processed, return the <link> tags -# for that file suitable for the <head> section. Uses the global %SITEDESCS -# and %SITELINKS variables. If the partial URL isn't found in those variables -# or we're at the top page, nothing is returned. -sub sitelinks { - my $file = shift; - $file = $File::Find::dir . '/' . $file; - $file =~ s%^\Q$SOURCE%%; - $file =~ s%/index\.html$%/%; - - my $output = ''; - if ($file ne '/' && $SITELINKS{$file}) { - my @links = @{ $SITELINKS{$file} }; - my @descs = map { defined ($_) ? $SITEDESCS{$_} : '' } @links; - @descs = map { s/\"/"/g; $_ } map { escape $_ } @descs; - @links = map { defined ($_) ? relative ($file, $_) : undef } @links; - - # Make the HTML for the footer. - my @types = ('previous', 'next', 'up'); - for my $i (0..2) { - next unless defined $links[$i]; - my $link = qq( <link rel="$types[$i]" href="$links[$i]"); - if ($descs[$i] ne '') { - if (length ($link) + length ($descs[$i]) + 12 > 79) { - $link .= "\n "; - } - $link .= qq( title="$descs[$i]" />\n); - } else { - $link .= " />\n"; - } - $output .= $link; - } - my $href = relative ($file, '/'); - $output .= qq( <link rel="top" href="$href" />\n); - } - return $output; -} - -# Given the name of the current file being processed, return the HTML for the -# navigation links for that file. Uses the global %SITEDESCS and %SITELINKS -# variables. If the partial URL isn't found in those variables or we're at -# the top page, nothing is returned. -sub placement { - my $file = shift; - $file = $File::Find::dir . '/' . $file; - $file =~ s%^\Q$SOURCE%%; - $file =~ s%/index\.html$%/%; - - my $output = ''; - if ($file ne '/' && $SITELINKS{$file}) { - my @links = @{ $SITELINKS{$file} }; - my @descs = map { defined ($_) ? $SITEDESCS{$_} : '' } @links; - @descs = map { escape $_ } @descs; - @links = map { defined ($_) ? relative ($file, $_) : undef } @links; - - # Build the table for the navigation bar. - $output = qq(<table class="navbar"><tr>\n); - $output .= qq( <td class="navleft">); - if (defined $links[0]) { - $output .= qq(< <a href="$links[0]">$descs[0]</a>); - } - $output .= qq(</td>\n); - if (defined $links[2]) { - $output .= qq( <td>\n); - my $first = 1; - for my $i (reverse (2 .. $#links)) { - next unless defined $links[$i]; - $output .= ' '; - if ($first) { - $first = 0; - } else { - $output .= qq(> ); - } - $output .= qq(<a href="$links[$i]">$descs[$i]</a>\n); - } - $output .= qq( </td>\n); - } - $output .= qq( <td class="navright">); - if (defined $links[1]) { - $output .= qq(<a href="$links[1]">$descs[1]</a> >); - } - $output .= qq(</td>\n); - $output .= qq(</tr></table>\n\n); - } - return $output; -} - -# Return the signature file for pages in this directory, if present. -sub sign { - my $output = ''; - if (open (SIG, '< .signature') || open (SIG, "< $SOURCE/.signature")) { - local $/ = "\n"; - my @signature = <SIG>; - chomp @signature; - close SIG; - $output .= join ("\n ", @signature); - $output .= " <br />\n "; - } - return $output; -} - -# Returns the page footer, which consists of the navigation links, the regular -# signature, and the last modified date. Takes as arguments the full path to -# the source file, the name of the destination file, the CVS Id of the source -# file if known, the template to use if the modification and current dates are -# the same, and the temlate to use if they're different. The templates will -# have the strings %MOD% and %NOW% replaced by the appropriate dates and %URL% -# with the URL to my HTML generation software.. -sub footer { - my ($source, $file, $id, @templates) = @_; - my $output = placement $file; - $output .= "<address>\n " . sign; - - # Figure out the modified dates. Use the RCS/CVS Id if available, - # otherwise use the Git repository if available. - my $modified; - if (defined $id) { - my $date = (split (' ', $id))[3]; - if ($date && $date =~ m%^(\d+)[-/](\d+)[-/](\d+)%) { - $modified = sprintf ("%d-%02d-%02d", $1, $2, $3); - } - } elsif (defined $REPO && $source =~ /^\Q$SOURCE/) { - $modified = $REPO->run ('log', '-1', '--format=%ct', $source); - if ($modified) { - $modified = strftime ('%Y-%m-%d', gmtime $modified); - } - } - if (!$modified) { - $modified = strftime ('%Y-%m-%d', gmtime ((stat $source)[9])); - } - my $now = strftime ('%Y-%m-%d', gmtime); - - # Determine which template to use and substitute in the appropriate times. - my $template = ($modified eq $now) ? $templates[0] : $templates[1]; - if ($template) { - for ($template) { - s/%MOD%/$modified/g; - s/%NOW%/$now/g; - s/%URL%/$URL/g; - } - $output .= "$template\n"; - } - $output .= "</address>\n"; - return $output; -} - -############################################################################## -# Supporting functions -############################################################################## - -# Given the format argument to a command, return the class or id attribute -# that should be used preceeded by a space, or an empty string if none should -# be used. -sub format_string { - my $format = shift; - if ($format) { - if ($format =~ s/^\#//) { - if ($format =~ /\s/) { - warn qq($0:$FILE:$.: Space in anchor "$format"\n); - } - return ' id="' . $format . '"'; - } else { - return ' class="' . $format . '"'; - } - } else { - return ''; - } -} - -# Splits a block of text apart at paired newlines so that it can be reparsed -# in paragraphs, but combines a paragraph with the next one if it has an -# unbalanced number of open brackets. Used by containiners like \block that -# can contain multiple paragraphs. -sub split_paragraphs { - my $text = shift; - $text =~ s/^\n(\s*\n)+/\n/; - my @paragraphs; - while ($text && $text =~ s/^(.*?(?:\n\n+|\s*\z))//s) { - my $paragraph = $1; - my $open = ($paragraph =~ tr/\[//); - my $close = ($paragraph =~ tr/\]//); - while ($text && $open > $close) { - $text =~ s/^(.*?(?:\n\n+|\s*\z))//s; - my $extra = $1; - $open += ($extra =~ tr/\[//); - $close += ($extra =~ tr/\]//); - $paragraph .= $extra; - } - push (@paragraphs, $paragraph); - } - return @paragraphs; -} - -# A simple block element. Takes the name of the tag, an initial string to be -# prepended verbatim, the format, and the text. Handles splitting the -# argument on paragraph boundaries and surrounding things properly with the -# tag. -sub block { - my ($tag, $border, $format, $text) = @_; - my $output; - border_start; - if ($format eq 'packed') { - $output = parse ($text, 0); - } else { - $output = join ('', map { parse ($_, 1) } split_paragraphs ($text)); - } - $output .= border_clear; - $output = $border . "<$tag" . format_string ($format) . '>' . $output; - $output =~ s%\s*\z%</$tag>%; - $output .= "\n" unless $format eq 'packed'; - return (1, $output); -} - -# A heading. Handles formats of #something specially by adding an <a name> -# tag inside the heading tag to make it a valid target for internal links even -# in old browsers. -sub heading { - my ($level, $format, $text) = @_; - my $output = border; - if ($format && $format =~ /^\#/) { - my $tag = $format; - $tag =~ s/^\#//; - $text = qq(<a name="$tag">$text</a>); - } - $output .= "<h$level" . format_string ($format) . '>'; - $output .= parse ($text); - $output =~ s/\n\z//; - $output .= "</h$level>\n"; - return (1, $output); -} - -# A simple inline element. Takes the name of the tag, the format, and the -# body and returns the appropriate list of block level and HTML. -sub inline { - my ($tag, $format, $text) = @_; - my $output = "<$tag" . format_string ($format) . '>'; - $output .= parse ($text) . "</$tag>"; - return (0, $output); -} - -# Enclose some text in another tag. The one special thing that we do is if -# the enclosed text is entirely enclosed in <span> or <div> tags, we pull the -# options of the <span> or <div> out and instead apply them to the parent tag. -# Takes the tag and the text to enclose. -sub enclose { - my ($tag, $text) = @_; - my $close = $tag; - $close =~ s/ .*//; - if ($text =~ m%^(\s*)<span(?!.*<span)([^>]*)>(.*)</span>(\s*)\z%s) { - my ($lead, $class, $text, $trail) = ($1, $2, $3, $4); - return "$lead<$tag$class>$text</$close>$trail"; - } elsif ($text =~ m%^(\s*)<div(?!.*<div)([^>]*)>(.*)</div>(\s*)\z%s) { - my ($lead, $class, $text, $trail) = ($1, $2, $3, $4); - return "$lead<$tag$class>$text</$close>$trail"; - } else { - return "<$tag>$text</$close>"; - } -} - -############################################################################## -# Commands -############################################################################## - -# Basic inline commands. -sub do_break { (0, '<br />') } -sub do_bold { inline ('b', @_) } -sub do_cite { inline ('cite', @_) } -sub do_class { inline ('span', @_) } -sub do_code { inline ('code', @_) } -sub do_emph { inline ('em', @_) } -sub do_italic { inline ('i', @_) } -sub do_rule { return (1, border . "<hr />\n") } -sub do_strike { inline ('strike', @_) } -sub do_strong { inline ('strong', @_) } -sub do_sub { inline ('sub', @_) } -sub do_sup { inline ('sup', @_) } -sub do_under { inline ('u', @_) } - -# Basic block commands. -sub do_div { block ('div', '', @_) } -sub do_block { block ('blockquote', '', @_) } -sub do_bullet { block ('li', border ('bullet', "<ul>\n", "</ul>\n\n"), @_) } -sub do_number { block ('li', border ('number', "<ol>\n", "</ol>\n\n"), @_) } - -# A description list entry, which takes the heading and the body as arguments. -sub do_desc { - my ($format, $heading, $text) = @_; - my $initial = border ('desc', "<dl>\n", "</dl>\n\n"); - $initial .= '<dt' . format_string ($format) . '>' . parse ($heading) - . "</dt>\n"; - return block ('dd', $initial, $format, $text); -} - -# An HTML entity. Check for and handle numeric entities properly, including -# special-casing [ and ] since the user may have needed to use \entity to -# express text that contains literal brackets. -sub do_entity { - my ($format, $char) = @_; - $char = parse ($char); - if ($char eq '91') { - return (0, '['); - } elsif ($char eq '93') { - return (0, ']'); - } elsif ($char =~ /^\d+$/) { - return (0, '&#' . $char . ';'); - } else { - return (0, '&' . $char . ';'); - } -} - -# Generates the page heading at the top of the document. Takes as arguments -# the page title and the page style. This is where the XHTML declarations -# come from. -sub do_heading { - my ($format, $title, $style) = @_; - $title = parse ($title); - $style = parse ($style); - my $file = $FILE; - $file =~ s/\.th$/.html/; - my $output = qq(<?xml version="1.0" encoding="utf-8"?>\n); - $output .= qq(<!DOCTYPE html\n); - $output .= qq( PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n); - $output .= qq( "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n); - $output .= qq(\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"); - $output .= qq( lang="en">\n); - $output .= qq(<head>\n <title>$title</title>\n); - $output .= qq( <meta http-equiv="Content-Type"); - $output .= qq( content="text/html; charset=utf-8" />\n); - if ($style) { - $style .= '.css'; - $style = $STYLES . $style if $STYLES; - $output .= qq( <link rel="stylesheet" href="$style"); - $output .= qq( type="text/css" />\n); - } - if (@RSS) { - for my $rss (@RSS) { - my ($url, $title) = @$rss; - $output .= qq( <link rel="alternate" type="application/rss+xml"); - $output .= qq( href="$url"\n); - $output .= qq( title="$title" />\n); - } - } - if ($FILE ne '-') { - $output .= sitelinks $file; - } - $output .= "</head>\n\n"; - my $version = (split (' ', $ID))[2]; - my $date = strftime ('%Y-%m-%d %T -0000', gmtime); - $output .= '<!-- Spun' . ($FILE eq '-' ? '' : " from $FILE") - . " by spin $version on $date -->\n"; - $output .= "<!-- $DOCID -->\n" if $DOCID; - $output .= "\n<body>\n"; - if ($FILE ne '-') { - $output .= placement ($file); - } - return (1, $output); -} - -# Used to save the RCS Id for the document. Doesn't actually output anything -# (the identifier is later used in do_heading). -sub do_id { - my ($format, $id) = @_; - $DOCID = $id; - return (1, ''); -} - -# Include an image. The size is added to the HTML tag automatically. Takes -# the relative path to the image and the alt text. -sub do_image { - my ($format, $image, $text) = @_; - $image = parse ($image); - $text = parse ($text); - my $size = ''; - if (-f $image) { - $size = ' ' . lc html_imgsize ($image); - } - my $output = qq(<img src="$image" alt="$text"$size); - $output .= format_string ($format) . " />"; - return (1, $output); -} - -# Include a file. Note that this includes a file after the current paragraph, -# not immediately at the current point, which may be a bit surprising. -# Someday, I should fix that. -sub do_include { - my ($format, $file) = @_; - $file = parse ($file); - my $fh = FileHandle->new ("< $file") - or die "$0:$FILE:$.: cannot include $file: $!\n"; - unshift (@FILES, [$fh, $file]); - return (1, ''); -} - -# A link to a URL or partial URL. -sub do_link { - my ($format, $url, $text) = @_; - my $output = '<a href="' . parse ($url) . '"'; - $output .= format_string ($format) . '>' . parse ($text) . '</a>'; - return (0, $output); -} - -# Preformatted text, the same as the HTML tag. -sub do_pre { - my ($format, $text) = @_; - my $output = border; - $output .= '<pre' . format_string ($format) . '>' . parse ($text); - $output .= "</pre>\n"; - return (1, $output); -} - -# Used for the leading quotes that I have on many of my pages. Takes the -# quote, the author, and the citation; the citation may be empty. If the -# format is "broken", adds line breaks at the end of each line. -sub do_quote { - my ($format, $quote, $author, $cite) = @_; - my $output = border . '<blockquote class="quote">'; - border_start; - $quote = join ('', map { parse ($_, 1) } split_paragraphs ($quote)); - $quote .= border_clear; - if ($format && $format eq 'broken') { - $quote =~ s%(\S *)(\n\s*(?!</p>)\S)%$1<br />$2%g; - $quote =~ s%\n<br />%\n%g; - $quote =~ s%<p><br />%<p>%g; - } - $quote =~ s/\n+$//; - if ($format) { - my $class = format_string ($format); - $quote =~ s/<p>/<p$class>/g; - } - $output .= $quote; - if ($author) { - $author = parse ($author); - my $prefix = ''; - if ($format && ($format eq 'broken' || $format eq 'short')) { - $output .= qq(<p class="attribution">\n); - } else { - $output .= qq(<p class="long-attrib">\n); - $prefix = '— '; - } - if ($cite) { - $cite = parse ($cite); - $output .= " $prefix$author,\n $cite\n"; - } else { - $output .= " $prefix$author\n"; - } - $output .= "</p>"; - } else { - $output .= "\n"; - } - $output .= "</blockquote>\n"; - return (1, $output); -} - -# Given the name of a product, return the release date of the product. -sub do_release { - my ($format, $product) = @_; - $product = parse ($product); - if ($VERSIONS{$product}) { - my $date = $VERSIONS{$product}[1]; - $date =~ s/ .*//; - return (0, $date); - } else { - warn qq($0:$FILE:$.: No release date known for "$product"\n); - return (0, ''); - } -} - -# Used to save RSS feed information for the page. Doesn't output anything -# directly; the RSS feed information is used later in do_heading. -sub do_rss { - my ($format, $url, $title) = @_; - $url = parse ($url); - $title = parse ($title); - push (@RSS, [ $url, $title ]); - return (1, ''); -} - -# Used to end each page, this adds the navigation links and my standard -# address block. -sub do_signature { - my $output = border; - if ($FILE eq '-') { - $output .= "</body>\n</html>\n"; - return (1, $output); - } - my $file = $FILE; - $file =~ s/\.th$/.html/; - my $link = '<a href="%URL%">spun</a>'; - my $source = $FILE; - if (defined $File::Find::dir) { - $source = $File::Find::dir . '/' . $source; - } - $output .= footer ($source, $file, $DOCID, - "Last modified and\n $link %MOD%", - "Last $link\n %NOW% from thread modified %MOD%"); - $output .= "</body>\n</html>\n"; - return (1, $output); -} - -# Insert the formatted size in bytes, kilobytes, or megabytes of some local -# file. We could use Number::Format here, but what we're doing is simple -# enough and doesn't seem worth the trouble of another dependency. -sub do_size { - my ($format, $file) = @_; - $file = parse ($file); - unless ($file) { - warn "$0:$FILE:$.: empty file name in \\size\n"; - return (0, ''); - } - my ($size) = (stat $file)[7]; - unless (defined $size) { - warn "$0:$FILE:$.: cannot stat file $file: $!\n"; - return (0, ''); - } - my @suffixes = qw(K M G T); - my $suffix = '';; - while ($size > 1024 && @suffixes) { - $size /= 1024; - $suffix = shift @suffixes; - } - $size = sprintf ('%.0f', $size) . $suffix . 'B'; - return (0, $size); -} - -# Generates a HTML version of the sitemap and outputs that. -sub do_sitemap { - unless (@SITEMAP) { - warn qq($0:$FILE:$.: No sitemap file found\n); - return (1, ''); - } - my $output = border; - my @indents = (0); - for my $page (@SITEMAP) { - my ($indent, $url, $title) = @$page; - next if $indent == 0; - $url =~ s,^/,,; - if ($indent > $indents[0]) { - $output .= (' ' x $indent) . "<ul>\n"; - unshift (@indents, $indent); - } else { - while ($indent < $indents[0]) { - $output .= (' ' x $indents[0]) . "</ul>\n"; - shift @indents; - } - } - $output .= ' ' x $indent; - $output .= qq(<li><a href="$url">$title</a></li>\n); - } - for my $indent (@indents) { - last if $indent <= 0; - $output .= (' ' x $indent) . "</ul>\n"; - } - return (1, $output); -} - -# Start a table. Takes any additional HTML attributes to set for the table -# (this is ugly, but <table> takes so many attributes for which there is no -# style sheet equivalent that it's unavoidable) and the body of the table -# (which should consist of \tablehead and \tablerow lines). -sub do_table { - my ($format, $options, $body) = @_; - my $tag = $options ? "table $options" : 'table'; - return block ($tag, '', $format, $body); -} - -# A heading of a table. Takes the contents of the cells in that heading. -sub do_tablehead { - my ($format, @cells) = @_; - my $output = ' <tr' . format_string ($format) . ">\n"; - for (@cells) { - $output .= ' ' . enclose ('th', parse ($_) . border) . "\n"; - } - $output .= " </tr>\n"; - return (1, $output); -} - -# A data line of a table. Takes the contents of the cells in that row. -sub do_tablerow { - my ($format, @cells) = @_; - my $output = ' <tr' . format_string ($format) . ">\n"; - for (@cells) { - $output .= ' ' . enclose ('td', parse ($_) . border) . "\n"; - } - $output .= " </tr>\n"; - return (1, $output); -} - -# Output HTML text completely verbatim. -sub do_verbatim { - my ($format, $text) = @_; - $text = unescape ($text); - return (1, $text); -} - -# Given the name of a product, return the version number of that product. -sub do_version { - my ($format, $product) = @_; - $product = parse ($product); - if ($VERSIONS{$product}) { - return (0, $VERSIONS{$product}[0]); - } else { - warn qq($0:$FILE:$.: No version known for "$product"\n); - return (0, ''); - } -} - -# The table of available commands. First column is the number of arguments, -# second column is the handler, and the third column is whether this is its -# own top-level element or whether it needs to be wrapped in <p> tags. A -# count of -1 means pull off as many arguments as we can find. -%commands = (block => [ 1, \&do_block ], - bold => [ 1, \&do_bold ], - break => [ 0, \&do_break ], - bullet => [ 1, \&do_bullet ], - class => [ 1, \&do_class ], - cite => [ 1, \&do_cite ], - code => [ 1, \&do_code ], - desc => [ 2, \&do_desc ], - div => [ 1, \&do_div ], - emph => [ 1, \&do_emph ], - entity => [ 1, \&do_entity ], - heading => [ 2, \&do_heading ], - id => [ 1, \&do_id ], - image => [ 2, \&do_image ], - include => [ 1, \&do_include ], - italic => [ 1, \&do_italic ], - link => [ 2, \&do_link ], - number => [ 1, \&do_number ], - pre => [ 1, \&do_pre ], - quote => [ 3, \&do_quote ], - release => [ 1, \&do_release ], - rss => [ 2, \&do_rss ], - rule => [ 0, \&do_rule ], - signature => [ 0, \&do_signature ], - sitemap => [ 0, \&do_sitemap ], - size => [ 1, \&do_size ], - strike => [ 1, \&do_strike ], - strong => [ 1, \&do_strong ], - sub => [ 1, \&do_sub ], - sup => [ 1, \&do_sup ], - table => [ 2, \&do_table ], - tablehead => [ -1, \&do_tablehead ], - tablerow => [ -1, \&do_tablerow ], - under => [ 1, \&do_under ], - verbatim => [ 1, \&do_verbatim ], - version => [ 1, \&do_version ]); - -# Add handlers for all the headings. -for (1..6) { $commands{"h$_"} = [ 1, eval "sub { heading ($_, \@_) }" ] } - -############################################################################## -# Interface -############################################################################## - -# This function is called, giving an input and an output file name, to spin -# HTML from thread. -sub spin { - my ($thread, $output) = @_; - open (OUT, "> $output") or die "$0: cannott create $output: $!\n"; - my $fh = FileHandle->new ("< $thread") - or die "$0: cannott open $thread: $!\n"; - @FILES = ([$fh, $thread]); - $SPACE = ''; - - # Parse the thread file a paragraph at a time (but pick up macro contents - # that are continued across paragraphs. We maintain the stack of files - # that we're parsing in @FILES, and do_include will unshift new file - # handle and filename pairs onto that stack. That means that the top of - # the stack may change any time we call parse, so we have to grab our - # current values again each time through the loop. - local $/ = ''; - local $_; - border_start; - while (@FILES) { - ($fh, $FILE) = @{ $FILES[0] }; - while (<$fh>) { - if ("\n" !~ /\015/ && /\015/) { - warn "$0:$FILE:$.: found CR characters; are your line endings" - . " correct?\n"; - } - my $open = tr/\[//; - my $close = tr/\]//; - while (!eof && $open > $close) { - my $extra = <$fh>; - $open += ($extra =~ tr/\[//); - $close += ($extra =~ tr/\]//); - $_ .= $extra; - } - my $result = parse (escape ($_), 1); - $result =~ s/^(?:\s*\n)+//; - output $result unless ($result =~ /^\s*$/); - ($fh, $FILE) = @{ $FILES[0] }; - } - close $fh; - shift @FILES; - } - print OUT border_clear, $SPACE; - close OUT; - undef %macros; - undef %strings; - undef $DOCID; - undef @RSS; -} - -############################################################################## -# External converters -############################################################################## - -# Given the command to run to generate the page, the file to save the output -# in, and an anonymous sub that takes three arguments, the first being the -# captured blurb, the second being the document ID if found, and the third -# being the base name of the output file, and prints out a last modified line, -# handle a call to an external converter. -sub run_converter { - my ($command, $output, $footer) = @_; - my @page = `$command`; - if ($? != 0) { - $command =~ s/ .*//; - die "$0: command failed with exit status ", ($? >> 8), "\n"; - } - open (OUT, "> $output") or die "$0: cannot create $output: $!\n"; - my $file = $output; - $file =~ s%.*/%%; - - # Grab the first few lines of input, looking for a blurb and Id string. - # Give up if we encounter <body> first. Also look for a </head> tag and - # add the navigation link tags before it, if applicable. Add the - # navigation bar right at the beginning of the body. - my ($blurb, $docid); - local $_; - while (defined ($_ = shift @page)) { - if (/<!--\s*(\$Id.*?)\s*-->/) { - $docid = $1; - } - if (/<!--\s*((?:Generated|Converted).*?)\s*-->/) { - $blurb = $1; - $blurb =~ s/ \d\d:\d\d:\d\d -0000//; - $blurb =~ s/ \(\d{4}-\d\d-\d\d\)//; - } - if (m%^</head>%) { - print OUT sitelinks $file; - } - print OUT $_; - if (m%<body%i) { - print OUT placement $file; - last; - } - } - warn "$0: malformed HTML output from $command\n" unless @page; - - # Snarf input and write it to output until we see </body>, which is our - # signal to start adding things. We just got very confused if </body> was - # on the same line as <body>, so don't do that. - print OUT $_ while (defined ($_ = shift @page) && !m%</body>%i); - - # Add the footer and finish with the output. - print OUT &$footer ($blurb, $docid, $file); - print OUT $_, @page if defined; - close OUT; -} - -# A wrapper around the cl2xhtml script, used to handle .changelog pointers in -# a tree being spun. Adds the navigation links and the signature to the -# cl2xhtml output. -sub cl2xhtml { - my ($source, $output, $options, $style) = @_; - $style = $STYLES . 'changelog.css' unless $style; - my $command = "cl2xhtml $options -s $style $source"; - my $footer = sub { - my ($blurb, $id, $file) = @_; - $blurb =~ s%cl2xhtml%\n<a href="$URL">cl2xhtml</a>% if $blurb; - footer ($source, $file, $id, $blurb, $blurb); - }; - run_converter ($command, $output, $footer); -} - -# A wrapper around the cvs2xhtml script, used to handle .log pointers in a -# tree being spun. Adds the navigation links and the signature to the -# cvs2xhtml output. -sub cvs2xhtml { - my ($source, $output, $options, $style) = @_; - my $dir = $source; - $dir =~ s%/+[^/]+$%%; - my $name = $source; - $name =~ s%^.*/%%; - $options .= " -n $name" unless $options =~ /-n /; - $style = $STYLES . 'cvs.css' unless $style; - $options .= " -s $style"; - my $command = "(cd $dir && cvs log $name) | cvs2xhtml $options"; - my $footer = sub { - my ($blurb, $id, $file) = @_; - $blurb =~ s%cvs2xhtml%\n<a href="$URL">cvs2xhtml</a>% if $blurb; - footer ($source, $file, $id, $blurb, $blurb); - }; - run_converter ($command, $output, $footer); -} - -# A wrapper around the faq2html script, used to handle .faq pointers in a tree -# being spun. Adds the navigation links and the signature to the faq2html -# output. -sub faq2html { - my ($source, $output, $options, $style) = @_; - $style = $STYLES . 'faq.css' unless $style; - my $command = "faq2html $options -s $style $source"; - my $footer = sub { - my ($blurb, $id, $file) = @_; - $blurb =~ s%faq2html%\n<a href="$URL">faq2html</a>%; - footer ($source, $file, $id, $blurb, $blurb); - }; - run_converter ($command, $output, $footer); -} - -# A wrapper around pod2thread and spin -f, used to handle .pod pointers in a -# tree being spun. Adds the navigation links and the signature to the output. -sub pod2html { - my ($source, $output, $options, $style) = @_; - $options = '-n' unless $options; - my $styles = ($STYLES ? " -s $STYLES" : ''); - $style = 'pod' unless $style; - $options .= " -s $style"; - my $command = "pod2thread $options $source | $FULLPATH -f$styles"; - my $footer = sub { - my ($blurb, $id, $file) = @_; - my $link = '<a href="%URL%">spun</a>'; - footer ($source, $file, $id, - "Last modified and\n $link %MOD%", - "Last $link\n %NOW% from POD modified %MOD%"); - }; - run_converter ($command, $output, $footer); -} - -############################################################################## -# Per-file operations -############################################################################## - -# Given a pointer file, read the master file and any options from that file, -# returning them as a list with the newlines chomped off. -sub read_pointer { - my $file = shift; - open (POINTER, $file) or die "$0: cannot open $file: $!\n"; - my $master = <POINTER>; - my $options = <POINTER>; - my $style = <POINTER>; - close POINTER; - die "$0: no master file specified in $file" unless $master; - chomp $master; - chomp $options if defined $options; - chomp $style if defined $style; - $options ||= ''; - return ($master, $options, $style); -} - -# This routine is called for every file in the source tree, and references the -# variables $SOURCE and $OUTPUT to find the roots of the source and output -# tree. It decides what to do with each file, whether spinning it or copying -# it. It's called from within File::Find and therefore uses the standard -# File::Find variables. -sub process_file { - return if ($_ eq '.' || $_ eq '..'); - for my $regex (@EXCLUDES) { - if (/$regex/) { - $File::Find::prune = 1; - return; - } - } - my $input = $File::Find::name; - my $output = $input; - $output =~ s/^\Q$SOURCE/$OUTPUT/ or die "$0: $input out of tree?\n"; - my $shortout = $output; - $shortout =~ s/^\Q$OUTPUT/.../; - - # Conversion rules for pointers. The key is the extension, the first - # value is the name of the command for the purposes of output, and the - # second is the sub to run. - my %rules = (changelog => [ 'cl2xhtml', \&cl2xhtml ], - faq => [ 'faq2html', \&faq2html ], - log => [ 'cvs2xhtml', \&cvs2xhtml ], - rpod => [ 'pod2thread', \&pod2html ]); - - # Figure out what to do with the input. - if (-d) { - $OUTPUT{$output} = 1; - if (-e $output && !-d $output) { - die "$0: cannot replace $output with a directory\n"; - } elsif (!-d $output) { - print "Creating $shortout\n"; - mkdir ($output, 0755) or die "$0: mkdir $output failed: $!\n"; - } - if (-f "$_/.rss") { - system ('spin-rss', '-b', $_, "$_/.rss") == 0 - or die "$0: running spin-rss on $input/.rss failed\n"; - } - } elsif (/\.th$/) { - $output =~ s/\.th$/.html/; - $OUTPUT{$output} = 1; - $shortout =~ s/\.th$/.html/; - my $relative = $input; - $relative =~ s%^\Q$SOURCE/%%; - my $time = $DEPEND{$relative} || 0; - if (-e $output) { - return if (-M $_ >= -M $output && (stat $output)[9] >= $time); - } - print "Spinning $shortout\n"; - spin ($_, $output); - } else { - my ($extension) = (/\.([^.]+)$/); - if ($extension && $rules{$extension}) { - my ($name, $sub) = @{ $rules{$extension} }; - $output =~ s/\.\Q$extension\E$/.html/; - $OUTPUT{$output} = 1; - $shortout =~ s/\.\Q$extension\E$/.html/; - my ($file, $options, $style) = read_pointer ($input); - if (-e $output && -e $file) { - return if (-M $file >= -M $output && -M $_ >= -M $output); - } - print "Running $name for $shortout\n"; - &$sub ($file, $output, $options, $style); - } else { - $OUTPUT{$output} = 1; - return unless (!-e $output || -M $_ < -M $output); - print "Updating $shortout\n"; - copy ($_, $output) - or die "$0: copy of $input to $output failed: $!\n"; - } - } -} - -# This routine is called for every file in the destination tree, if the user -# requested file deletion of files not generated from the source tree. It -# checks each file to see if it is in the %OUTPUT hash that was generated -# during spin processing, and if not, removes it. It's called from within -# File::Find and therefore uses the standard File::Find variables. -sub delete_files { - return if ($_ eq '.' || $_ eq '..'); - my $file = $File::Find::name; - my $shortfile = $file; - $shortfile =~ s/^\Q$OUTPUT/.../; - return if $OUTPUT{$file}; - print "Deleting $shortfile\n"; - if (-d $file) { - rmdir $file or warn "$0: cannot remove directory $file: $!\n"; - $File::Find::prune = 1; - } else { - unlink $file or die "$0: unable to remove $file: $!\n"; - } -} - -############################################################################## -# Main routine -############################################################################## - -$| = 1; -$FULLPATH = $0; -$0 =~ s%.*/%%; - -# Parse command-line options. -my ($delete, @excludes, $filter, $help, $overrides, $version); -$STYLES = ''; -Getopt::Long::config ('bundling'); -GetOptions ('d|delete' => \$delete, - 'e|exclude=s' => \@excludes, - 'f|filter' => \$filter, - 'h|help' => \$help, - 'o|overrides=s' => \$overrides, - 's|style-url=s' => \$STYLES, - 'v|version' => \$version) or exit 1; -if ($help) { - print "Feeding myself to perldoc, please wait....\n"; - exec ('perldoc', '-t', $FULLPATH); -} elsif ($version) { - my $version = join (' ', (split (' ', $ID))[1..3]); - $version =~ s/,v\b//; - $version =~ s/(\S+)$/($1)/; - $version =~ tr%/%-%; - print $version, "\n"; - exit; -} -$STYLES =~ s%/*$%/% if $STYLES; -push (@EXCLUDES, map { qr/$_/ } @excludes); - -# Load overrides from the specified file, if desired. -if ($overrides) { - unless (do "$overrides") { - if ($@) { - die "$0: cannot load $overrides: $@\n"; - } else { - die "$0: cannot load $overrides: $!\n"; - } - } -} - -# The arguments depend on whether -f is given. If it is, just filter stdin to -# stdout; otherwise, take the input tree and the output tree on the command -# line and process the input into the output. -if ($filter) { - if (@ARGV) { die "Usage: $0 -f\n" } - spin ('-', '-'); -} else { - die "Usage: $0 <source> [<output>]\n" unless (@ARGV >= 1 && @ARGV <= 2); - ($SOURCE, $OUTPUT) = @ARGV; - $OUTPUT ||= '-'; - $OUTPUT =~ s%/+$%%; - if (-f $SOURCE) { - open (STDIN, $SOURCE) or die "$0: cannot open $SOURCE: $!\n"; - if ($OUTPUT ne '-') { - my (undef, $dir, $file) = File::Spec->splitpath ($OUTPUT); - my $current = getcwd; - chdir $dir or die "$0: cannot chdir to $dir: $!\n"; - $OUTPUT = File::Spec->catpath ('', getcwd, $file); - chdir $current or die "$0: cannot chdir to $current: $!\n"; - open (STDOUT, "> $OUTPUT") - or die "$0: cannot create $OUTPUT: $!\n"; - } - my (undef, $dir, $file) = File::Spec->splitpath ($SOURCE); - my $current = getcwd; - chdir $dir or die "$0: cannot chdir to $dir: $!\n"; - $SOURCE = File::Spec->catpath ('', getcwd, $file); - spin ('-', '-'); - } else { - die "$0: no output directory specified\n" if $OUTPUT eq '-'; - if ($SOURCE !~ m%^/%) { - my $current = getcwd; - chdir $SOURCE or die "$0: cannot chdir to $SOURCE: $!\n"; - $SOURCE = getcwd; - chdir $current or die "$0: cannot chdir to $current: $!\n"; - } - if ($OUTPUT !~ m%^/%) { - unless (-d $OUTPUT) { - print "Creating $OUTPUT\n"; - mkdir ($OUTPUT, 0755) or die "$0: cannot create $OUTPUT: $!\n"; - } - chdir $OUTPUT or die "$0: cannot chdir to $OUTPUT: $!\n"; - $OUTPUT = getcwd; - } - read_sitemap ("$SOURCE/.sitemap"); - read_versions ("$SOURCE/.versions"); - if (-d "$SOURCE/.git") { - eval { - require Git::Repository; - $REPO = Git::Repository->new (work_tree => $SOURCE); - }; - } - $File::Find::dont_use_nlink = 1; - if (-f "$SOURCE/.rss") { - my $current = getcwd; - chdir $SOURCE or die "$0: cannot chdir to $SOURCE: $!\n"; - system ('spin-rss', '.rss') == 0 - or die "$0: running spin-rss on $SOURCE/.rss failed\n"; - chdir $current or die "$0: cannot chdir to $current: $!\n"; - } - find (\&process_file, $SOURCE); - finddepth (\&delete_files, $OUTPUT) if $delete; - } -} - -############################################################################## -# Documentation -############################################################################## - -=head1 NAME - -spin - Translate thread, an HTML macro language, into XHTML - -=head1 SYNOPSIS - -spin [B<-dhv>] [B<-e> I<pattern> ...] [B<-s> I<url>] [B<-o> I<overrides>] -I<source> [I<output>] - -spin [B<-s> I<url>] [B<-o> I<overrides>] B<-f> - -=head1 REQUIREMENTS - -Perl 5.005 or later and the Image::Size and Text::Balanced modules. Also -expects to find B<faq2html>, B<cvs2xhtml>, B<cl2xhtml>, and B<pod2thread> -to convert certain types of files. The Git::Repository module is required -to determine last change dates for thread source from Git history. - -=head1 DESCRIPTION - -B<spin> implements a fairly simple macro language that expands out into -XHTML, as well as serving as a tool to maintain a set of web pages, -updating a staging area with the latest versions, converting pages written -in the macro language (named "thread"), and running B<faq2html> where -directed. - -When invoked with the B<-f> option, B<spin> works in filter mode, reading -thread from stdin and writing the converted output to stdout. Some -features, such as appending a signature or navigation links, are disabled -in this mode. - -If I<source> is a regular file, I<output> should be the name of the file -into which to put the output, and B<spin> will process only that one file -(which is assumed to be thread). I<output> may be omitted to send the -output to standard output. The same features are disabled in this mode as -in filter mode. - -Otherwise, each file in the directory I<source> is examined recursively. -For each one, it is either copied verbatim into the same relative path -under I<output>, used as instructions to an external program (see the -details on converters below), or converted to HTML. The HTML output for -external programs or for converted pages is put under I<output> with the -same file name but with the extension changed to C<.html>. Missing -directories are created. If the B<-d> flag is given, files and -directories in the I<output> directory that do not correspond to files in -the I<source> directory will be deleted. - -Files that end in C<.th> are assumed to be in thread and are turned into -HTML. For the details of the thread language, see L<THREAD LANGUAGE> -below. - -Files that end in various other extensions are taken to be instructions to -run an external converter on a file. The first line of such a pointer -file should be the path to the source file, the second line any arguments -to the converter, and the third line the style sheet to use if not the -default. Which converter to run is based on the extension of the file as -follows: - - .changelog cl2xhtml - .faq faq2html - .log cvs log <file> | cvs2xhtml - .rpod pod2thread <file> | spin -f - -All other files not beginning with a period are copied as-is, except that -files or directories named F<CVS>, F<Makefile>, or F<RCS> are ignored. As -an exception, F<.htaccess> files are also copied over. - -B<spin> looks for a file named F<.sitemap> at the top of the I<source> -directory and reads it for navigation information to generate the -navigation links at the top and bottom of each page. The format of this -file is one line per web page, with indentation showing the tree -structure, and with each line formatted as a partial URL, a colon, and a -page description. If two pages at the same level aren't related, a line -with three dashes should be put between them at the same indentation -level. The partial URLs should start with / representing the top of the -hierarchy (the I<source> directory), but all generated links will be -relative. - -Here's an example of a simple F<.sitemap> file: - - /personal/: Personal Information - /personal/contact.html: Contact Information - --- - /personal/projects.html: Current Projects - /links/: Links - /links/lit.html: Other Literature - /links/music.html: Music - /links/sf.html: Science Fiction and Fantasy - -This defines two sub-pages of the top page, /personal/ and /links/. -/personal/ has two pages under it that are not part of the same set (and -therefore shouldn't have links to each other). /links/ has three pages -under it which are part of a set and should be linked between each other. - -If F<.sitemap> is present, this navigation information will also be put -into the <head> section of the resulting HTML file as <link> tags. Some -browsers will display this information as a navigation toolbar. - -B<spin> also looks for a file named F<.signature> in the same directory as -a thread file (and then at the top of the source tree if none is found in -the current directory) and copies its contents verbatim into an <address> -block at the end of the XHTML page (so the contents should be valid -XHTML). The contents will be surrounded by an <address> tag, and added to -the end of the supplied F<.signature> contents will be information about -when the page was last modified and generated. - -B<spin> looks for a file named F<.versions> at the top of the I<source> -directory and reads it for version information. If it is present, each -line should be of the form: - - <product> <version> <date> <time> <files> - -where <product> is the name of a product with a version number, <version> -is the version, <date> and <time> specify the time of the last release (in -ISO YYYY-MM-DD HH:MM:SS format and the local time zone), and <files> is -any number of paths relative to I<source>, separated by spaces, listing -source thread files that use \version or \release for <product>. If there -are more files than can be listed on one line, additional files can be -listed on the next and subsequent lines so long as they all begin with -whitespace (otherwise, they'll be taken to be other products). This -information is not only used for the \version and \release commands, but -also as dependency information. If the date of a release is newer than -the timestamp of the output from one of the files listed in <files>, that -file will be spun again even if it hasn't changed (to pick up the latest -version and release information). - -B<spin> looks for a file named F<.rss> in each directory it processes. If -one is found, B<spin> runs B<spin-rss> on that file, passing the B<-b> -option to point to the directory about to be processed. B<spin> does this -before processing the files in that directory, so B<spin-rss> can create -or update files that will then be processed by B<spin> as normal. - -If there is a directory named F<.git> at the top of the source tree, -B<spin> will assume that the source is a Git repository and will try to -use C<git log> to determine the last modification date of files. - -=head1 OPTIONS - -=over 4 - -=item B<-d>, B<--delete> - -After populating the I<output> tree with the results of converting or -copying all the files in the I<source> tree, delete all regular files in -the I<output> tree that do not have a corresponding file in the I<source> -tree. Directories will be mentioned in B<spin>'s output but will not be -deleted. - -=item B<-e> I<pattern>, B<--exclude>=I<pattern> - -Exclude files matching the given regular expression I<pattern> from being -converted. This flag may be used multiple times. - -=item B<-f>, B<--filter> - -Run B<spin> in filter mode rather than converting a whole tree of files. -Thread source is read from stdin and the XHTML output is written to -stdout. The signature and navigation links are disabled. - -=item B<-h>, B<--help> - -Print out this documentation (which is done simply by feeding the script -to C<perldoc -t>). - -=item B<-o> I<overrides>, B<--overrides>=I<overrides> - -Load the I<overrides> file using the Perl do command. This file should -contain Perl code that overrides or adds to the Perl code that's part of -B<spin>. It can be used to define new commands or change the behavior of -existing commands. - -=item B<-s> I<url>, B<--style-url>=I<url> - -The base URL for style sheets. All style sheets specified in \heading -commands will be considered to be relative to this URL and this URL will -be prepended to them (otherwise, they'll be referred to as if they're in -the same directory as the generated file). This will similarly be used as -the base URL to style sheets for the output of B<cl2xhtml>, B<cvs2xhtml>, -and B<faq2html>. - -=item B<-v>, B<--version> - -Print out the version of B<spin> and exit. - -=back - -=head1 THREAD LANGUAGE - -=head2 Basic Syntax - -A thread file is mostly plain ASCII text with a blank line between -paragraphs. There is no need to explicitly mark paragraphs; paragraph -boundaries will be inferred from the blank line between them and the -appropriate <p> tags will be added to the HTML output. There is no need -to escape any character except C<\> (which should be written as C<\\>) and -an unbalanced [ or ] (which should be written as C<\entity[91]> or -C<\entity[93]> respectively). Escaping [ or ] is not necessary if the -brackets are balanced within the paragraph, and therefore is only rarely -needed. - -Commands begin with C<\>. For example, the command to insert a line break -(corresponding to the <br> tag in HTML) is \break. If the command takes -arguments, they are enclosed in square brackets after the command. If -there are multiple arguments, they are each enclosed in square brackets -and follow each other. Any amount of whitespace (but nothing else) is -allowed between the command and the arguments, or between the arguments. -So, for example, all of the following are entirely equivalent: - - \link[index.html][Main page] - \link [index.html] [Main page] - - \link[index.html] - [Main page] - - \link - [index.html] - [Main page] - -(\link is a command that takes two arguments.) - -Commands can take multiple paragraphs of text as arguments in some cases -(for things like list items). Commands can be arbitrarily nested. - -Some commands take an additional optional argument which specifies the -class attribute for that HTML tag, for use with style sheets, or the id -attribute, for use with style sheets or as an anchor. That argument is -enclosed in parentheses and placed before any other arguments. If the -argument begins with C<#>, it will be taken to be an id. Otherwise, it -will be taken as a class. For example, a first-level heading is normally -written as: - - \h1[Heading] - -(with one argument). Either of the following will add a class attribute -of C<header> to that HTML container that can be referred to in style -sheets: - - \h1(header)[Heading] - \h1 (header) [Heading] - -and the following would add an id attribute of C<intro> to the heading so -that it could be referred to with the anchor C<#intro>: - - \h1(#intro)[Introduction] - -Note that the heading commands have special handling for id attributes; -see below for more details. - -=head2 Basic Format - -There are two commands that are required to occur in every document. The -first is \heading, which must occur before any regular page text. It -takes two arguments, the first of which is the page title (the title that -shows up in the window title bar for the browser and is the default text -for bookmarks, not anything that's displayed as part of the body of the -page) and the second of which is the style sheet to use. If there is no -style sheet for this page, the second argument should be empty ([]). - -The second required command is \signature, which must be the last command -in the file. \signature will take care of appending the signature, -appending navigation links, closing any open blocks, and any other cleanup -that has to happen at the end of a generated HTML page. - -It is also highly recommended, if you are using Subversion, CVS, or RCS -for revision control, to put \id[$Z<>Id$] as the first command in each -file. In Subversion, you will also need to enable keyword expansion with -C<svn propset svn:keywords Id I<file>>. B<spin> will then take care of -putting the last modified date in the footer for you based on the Id -timestamp (which may be more accurate than the last modified time of the -thread file). If you are using Git, you don't need to include anything -special in the thread source; as long as the source directory is the -working tree of a Git repository, B<spin> will use Git to determine the -last modification date of the file. - -You can include other files with the \include command, although it has a -few restrictions. The \include command must appear either at the -beginning of the file or after a blank line and should be followed by a -blank line, and you should be careful not to include the same file -recursively. Thread files will not be automatically respun when included -files change, so you will need touch the thread file to force it to be -respun. - -=head2 Block Commands - -Block commands are commands that should occur in a paragraph by -themselves, not containined in a paragraph with other text. They indicate -high-level structural elements of the page. Three of them were already -discussed above: - -=over 4 - -=item \heading[<title>][<style>] - -As described above, this sets the page title to <title> and the style -sheet to <style>. If the B<-s> option was given, that base URL will be -prepended to <style> to form the URL for the style sheet; otherwise, -<style> will be used verbatim as a URL. - -=item \id[$Z<>Id$] - -Tells B<spin> the Subversion, CVS, or RCS revision number and time. This -string is embedded verbatim in an HTML comment near the beginning of the -generated output as well as used for the last modified information added -by the \signature command. For this command to behave properly, it must -be given before \heading. - -=item \include[<file>] - -Include <file> after the current paragraph. If multiple files are -included in the same paragraph, they're included in reverse order, but -this behavior may change in later versions of B<spin>. It's strongly -recommended to always put the \include command in its own paragraph. -Don't put \heading or \signature into an included file; the results won't -be correct. - -=back - -Here are the rest of the block commands. Any argument of <text> can be -multiple paragraphs and contain other embedded block commands (so you can -nest a list inside another list, for example). - -=over 4 - -=item \block[<text>] - -Put text in an indented block, equivalent to <blockquote> in HTML. Used -primarily for quotations or things like license statements embedded in -regular text. - -=item \bullet[<text>] - -<text> is formatted as an item in a bullet list. This is like <li> inside -<ul> in HTML, but the surrounding list tags are inferred automatically and -handled correctly when multiple \bullet commands are used in a row. -Normally, <text> is treated like a paragraph. - -If used with a class attribute of C<packed>, such as with: - - \bullet(packed)[First item] - -then the <text> argument will not be treated as a paragraph and will not -be surrounded in <p> tags. No block commands should be used inside this -type of \bullet command. This variation will, on most browsers, not put -any additional whitespace around the line and will look better for -bulleted lists where each item is a single line. - -=item \desc[<heading>][<text>] - -An element in a description list, where each item has a tag <heading> and -an associated body text of <text>, like <dt> and <dd> in HTML. As with -\bullet, the <dl> tags are inferred automatically. - -=item \h1[<heading>] .. \h6[<heading>] - -Level one through level six headings, just like <h1> .. <h6> in HTML. If -given an id argument, such as: - - \h1(#anchor)[Heading] - -then not only will an id attribute be added to the <h1> container but the -text of the heading will also be enclosed in an <a name> container to -ensure that C<#anchor> can be used as an anchor in a link even in older -browsers that don't understand id attributes. This is special handling -that only works with \h1 through \h6, not with other commands. - -=item \number[<text>] - -<text> is formatted as an item in a numbered list, like <li> inside <ol> -in HTML. As with \bullet and \desc, the surrounding tags are inferred -automatically. As with \bullet, a class attribute of C<packed> will omit -the paragraph tags around <text> for better formatting with a list of -short items. See the description under \bullet for more information. - -=item \pre[<text>] - -Insert <text> preformatted, preserving spacing and line breaks. This uses -the HTML <pre> tag, and therefore is normally also shown in a fixed-width -font by the browser. - -When using \pre inside indented blocks or lists, it's worth bearing in -mind how browsers show indentation with \pre. Normally, the browser -indents text inside \pre relative to the enclosing block, so you should -only put as much whitespace before each line in \pre as those lines should -be indented relative to the enclosing text. However B<lynx>, -unfortunately, indents relative to the left margin, so it's difficult to -use indentation that looks correct in both B<lynx> and other browsers. - -=item \quote[<text>][<author>][<work>] - -Used for quotes at the top of a web page. The whole text will be enclosed -in a <blockquote> tag with class C<quote> for style sheets. <text> may be -multiple paragraphs, and then a final paragraph will be added (with class -C<attribution>) containing the author, a comma, and the <work> inside -<cite> tags. <work> can be omitted by passing an empty third argument. -If \quote is given a class argument of C<broken>, <text> will be treated -as a series of lines and a line break (C<< <br /> >>) will be added to the -end of each line. - -=item \rss[<url>][<title>] - -Indicates that this page has a corresponding RSS feed at the URL <url>. -The title of the RSS feed (particularly important if a page has more than -one feed) is given by <title>. The feed links are included in the page -header output by \heading, so this command must be given before \heading -to be effective. - -=item \rule - -A horizontal rule, <hr> in HTML. - -=item \sitemap - -Inserts an unordered list showing the structure of the whole site, -provided that a F<.sitemap> file was found at the root of the I<source> -directory and B<spin> wasn't run as a filter or on a single file. If -F<.sitemap> wasn't found or if B<spin> is running as a filter or on a -single file, inserts nothing. - -Be aware that B<spin> doesn't know whether a file contains a \sitemap -command and hence won't know to respin a file when the F<.sitemap> file -has changed. You will need touch the source file to force it to be -respun. - -=item \table[<options>][<body>] - -Creates a table. The <options> text is added verbatim to the <table> tag -in the generated HTML, so it can be used to set various HTML attributes -like C<cellpadding> that aren't easily accessible in a portable fashion -from style sheets. <body> is the body of the table, which should -generally consist exclusively of \tablehead and \tablerow commands. - -The descriptions are somewhat hard to read, so here's a sample table: - - \table[rules="cols" borders="1"][ - \tablehead [Older Versions] [Webauth v3] - \tablerow [suauthSidentSrvtab] [WebAuthKeytab] - \tablerow [suauthFailAction] [WebAuthLoginURL] - \tablerow [suauthDebug] [WebAuthDebug] - \tablerow [suauthProxyHeader] [(use mod_headers)] - ] - -The table support is currently preliminary. I've not yet found a good way -of expressing tables, and it's possible that the syntax will change later. - -=item \tablehead[<cell>][<cell>] ... - -A heading row in a table. \tablehead takes any number of <cell> -arguments, wraps them all in a <tr> table row tag, and puts each cell -inside <th>. If a cell should have a certain class attribute, the easiest -way to do that is to use a \class command around the <cell> text, and the -class attribute will be "lifted" up to become an attribute of the -enclosing <th> tag. - -=item \tablerow[<cell>][<cell>] ... - -A regular row in a table. \tablerow takes any number of <cell> arguments, -wraps them all in a <tr> table row tag, and puts each cell inside <td>. -If a cell should have a certain class attribute, the easiest way to do -that is to use a \class command around the <cell> text, and the class -attribute will be "lifted" up to become an attribute of the enclosing <th> -tag. - -=back - -=head2 Inline Commands - -Inline commands can be used in the middle of a paragraph intermixed with -other text. Most of them are simple analogs to their HTML counterparts. -All of the following take a single argument (the enclosed text) and map to -simple HTML tags: - - \bold <b></b> (usually use \strong) - \cite <cite></cite> - \code <code></code> - \emph <em></em> - \italic <i></i> (usually use \emph) - \strike <strike></strike> (should use styles) - \strong <strong></strong> - \sub <sub></sub> - \sup <sup></sup> - \under <u></u> (should use styles) - -Here are the other inline commands: - -=over 4 - -=item \break - -A forced line break, <br> in HTML. - -=item \class[<text>] - -Does nothing except wrap <text> in an HTML <span> tag. The only purpose -of this command is to use it with a class argument that can be used in a -style sheet. For example, you might write: - - \class(red)[A style sheet can make this text red.] - -so that the style sheet can then refer to class C<red> and change its -color. - -=item \entity[<code>] - -An HTML entity with code <code>. Basically, becomes &<code>; in the -generated HTML, or &#<code>; if <code> is entirely numeric. About the -only time you'd need to use this is for non-ASCII characters (European -names, for example) or if you need a literal [ or ] that isn't balanced. - -=item \image[<url>][<text>] - -Insert an inline image. <text> is the alt text for the image (which will -be displayed on non-graphical browsers). Height and width tags are added -automatically assuming that <url> is a relative URL in the same tree of -files as the thread source. - -=item \link[<url>][<text>] - -Create a link to <url> with link text <text>. Basically <a href=""></a>. - -=item \release[<product>] - -Replaced with the date portion of the version information for <product>, -taken from the F<.versions> file at the top of the source tree. The date -will be returned in the UTC time zone, not the local time zone. - -=item \size[<file>] - -Replaced with the size of <file> in B, KB, MB, GB, or TB as is most -appropriate, without decimal places. The next largest unit is used if the -value is larger than 1024. 1024 is used as the scaling factor, not 1000. - -=item \version[<product>] - -Replaced with the version number for <product>, taken from the -F<.versions> file at the top of the source tree. - -=back - -=head2 Defining New Macros - -One of the important things that thread supports over HTML is the ability -to define new macros on the fly. If there are particular constructs that -are frequently used on the page, you can define a macro at the top of that -page and then just use it repeatedly throughout the page. - -A string can be defined with the command: - - \=[<string>][<value>] - -where <string> is the name that will be used (can only be alphanumerics -plus underscore) and <value> is the value that string will expand into. -Any later occurrance of \=<string> in the file will be replaced with -<value>. For example: - - \=[HOME][http://www.stanford.edu/] - -will cause any later occurrences of \=HOME in the file to be replaced with -the text C<http://www.stanford.edu/>. This can be useful for things like -URLs for links, so that all the URLs can be collected at the top of the -page for easy updating. - -A new macro can be defined with the command: - - \==[<name>][<arguments>][<definition>] - -where <name> is the name of the macro (again consisting only of -alphanumerics or underscore), <arguments> is the number of arguments that -it takes, and <definition> is the definition of the macro. When the macro -is expanded, any occurrence of \1 in the definition is replaced with the -first argument, any occurrence of \2 with the second argument, and so -forth. - -For example: - - \==[bolddesc] [2] [\desc[\bold[\1]][\2]] - -defines a new macro \bolddesc that takes the same arguments as the regular -\desc command but always wraps the first argument, the heading, in -<strong>. - -=head1 BUGS - -Currently, the style sheets for B<cl2xhtml>, B<cvs2xhtml>, B<faq2html>, -and B<pod2thread> are hard-coded into this program to fit my web pages. -This makes this program awkward for others to use, since the style sheet -has to be specified in every pointer file if they're using different -names. - -There is no way to configure how navigation links are added if the sitemap -support is used. - -\include needs some work to make it behave as expected without requiring -that each \include be in its own paragraph. It should be possible to -support \heading and \signature in included files without breaking the -navigation link support. - -\sitemap can only be used at the top of the web site or the links would be -wrong. It needs to do relative adjustment of the links. - -The sitemap support currently only adds previous, next, up, and top links -in the header of the generated web page. Most browsers that support this -functionality also support first and last links, and the information is -available in the sitemap file to generate those. They should also be -included. - -=head1 SEE ALSO - -cl2xhtml(1), cvs2xhtml(1), faq2html(1), pod2thread(1), spin-rss(1) - -The XHTML 1.0 standard at L<http://www.w3.org/TR/xhtml1/>. - -Current versions of this program are available from my web tools page at -L<http://www.eyrie.org/~eagle/software/web/>, as are copies of all of the -above-mentioned programs. - -=head1 AUTHOR - -Russ Allbery <rra@stanford.edu> - -=head1 COPYRIGHT AND LICENSE - -Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 -Russ Allbery <rra@stanford.edu>. - -This program is free software; you may redistribute it and/or modify it -under the same terms as Perl itself. - -=cut |