diff options
author | Russ Allbery <rra@cpan.org> | 2021-09-08 21:08:44 -0700 |
---|---|---|
committer | Russ Allbery <rra@cpan.org> | 2021-09-08 21:08:44 -0700 |
commit | cda2e4a52a464dc6360047cd3683e5d4ab21ba0a (patch) | |
tree | 0651187fc5cd2f6bb5951bf0823aaa60b336239c | |
parent | c1c9bc717cf3b4684bd2d0e8211f674b4ba0f49b (diff) |
Refactor the core spin parsing code
Start the grand refactor of App::DocKnot::Spin. Refactor the core
parsing code and move handling of macro definitions, string
definitions and \\ into command handlers. Tag commands with
whether they care about format arguments and remove a bunch of
useless arguments to commands that take arguments but not a format
string.
-rw-r--r-- | README | 1 | ||||
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | docs/docknot.yaml | 1 | ||||
-rw-r--r-- | lib/App/DocKnot/Spin.pm | 673 | ||||
-rw-r--r-- | lib/App/DocKnot/Spin/Sitemap.pm | 8 | ||||
-rw-r--r-- | t/data/generate/docknot/output/thread | 1 |
6 files changed, 434 insertions, 251 deletions
@@ -71,7 +71,6 @@ REQUIREMENTS * Perl6::Slurp * Pod::Thread 2.00 or later * Template (part of Template Toolkit) - * Text::Balanced * YAML::XS 0.81 or later BUILDING AND INSTALLATION @@ -77,7 +77,6 @@ The following additional Perl modules are required to use it: * Perl6::Slurp * Pod::Thread 2.00 or later * Template (part of Template Toolkit) -* Text::Balanced * YAML::XS 0.81 or later ## Building and Installation diff --git a/docs/docknot.yaml b/docs/docknot.yaml index f352ec0..1ce592d 100644 --- a/docs/docknot.yaml +++ b/docs/docknot.yaml @@ -127,7 +127,6 @@ requirements: | * Perl6::Slurp * Pod::Thread 2.00 or later * Template (part of Template Toolkit) - * Text::Balanced * YAML::XS 0.81 or later test: diff --git a/lib/App/DocKnot/Spin.pm b/lib/App/DocKnot/Spin.pm index 2c1594b..62b5d3f 100644 --- a/lib/App/DocKnot/Spin.pm +++ b/lib/App/DocKnot/Spin.pm @@ -30,7 +30,7 @@ use IPC::System::Simple qw(capture systemx); use File::Basename qw(fileparse); use File::Copy qw(copy); use File::Find qw(find finddepth); -use File::Spec (); +use File::Spec (); use Pod::Thread (); use POSIX qw(strftime); use Text::Balanced qw(extract_bracketed); @@ -47,52 +47,57 @@ my @EXCLUDES = ( # used to embed a link to the software that generated the page. my $URL = 'https://www.eyrie.org/~eagle/software/web/'; -# The table of available commands. First column is the number of arguments, -# second column is the handler, and the third column is whether this is its -# own top-level element or whether it needs to be wrapped in <p> tags. A -# count of -1 means pull off as many arguments as we can find. +# The table of available commands. The columns are: +# +# 1. Number of arguments or -1 to consume as many arguments as it can find. +# 2. Name of the method to call with the arguments and (if wanted) format. +# 3. Whether to look for a format in parens before the arguments. my %COMMANDS = ( - block => [ 1, '_cmd_block' ], - bold => [ 1, '_cmd_bold' ], - break => [ 0, '_cmd_break' ], - bullet => [ 1, '_cmd_bullet' ], - class => [ 1, '_cmd_class' ], - cite => [ 1, '_cmd_cite' ], - code => [ 1, '_cmd_code' ], - desc => [ 2, '_cmd_desc' ], - div => [ 1, '_cmd_div' ], - emph => [ 1, '_cmd_emph' ], - entity => [ 1, '_cmd_entity' ], - heading => [ 2, '_cmd_heading' ], - h1 => [ 1, '_cmd_h1' ], - h2 => [ 1, '_cmd_h2' ], - h3 => [ 1, '_cmd_h3' ], - h4 => [ 1, '_cmd_h4' ], - h5 => [ 1, '_cmd_h5' ], - h6 => [ 1, '_cmd_h6' ], - id => [ 1, '_cmd_id' ], - image => [ 2, '_cmd_image' ], - include => [ 1, '_cmd_include' ], - italic => [ 1, '_cmd_italic' ], - link => [ 2, '_cmd_link' ], - number => [ 1, '_cmd_number' ], - pre => [ 1, '_cmd_pre' ], - quote => [ 3, '_cmd_quote' ], - release => [ 1, '_cmd_release' ], - rss => [ 2, '_cmd_rss' ], - rule => [ 0, '_cmd_rule' ], - signature => [ 0, '_cmd_signature'], - sitemap => [ 0, '_cmd_sitemap' ], - size => [ 1, '_cmd_size' ], - strike => [ 1, '_cmd_strike' ], - strong => [ 1, '_cmd_strong' ], - sub => [ 1, '_cmd_sub' ], - sup => [ 1, '_cmd_sup' ], - table => [ 2, '_cmd_table' ], - tablehead => [-1, '_cmd_tablehead'], - tablerow => [-1, '_cmd_tablerow' ], - under => [ 1, '_cmd_under' ], - version => [ 1, '_cmd_version' ], + # name args method want_format + block => [ 1, '_cmd_block', 1], + bold => [ 1, '_cmd_bold', 1], + break => [ 0, '_cmd_break', 0], + bullet => [ 1, '_cmd_bullet', 1], + class => [ 1, '_cmd_class', 1], + cite => [ 1, '_cmd_cite', 1], + code => [ 1, '_cmd_code', 1], + desc => [ 2, '_cmd_desc', 1], + div => [ 1, '_cmd_div', 1], + emph => [ 1, '_cmd_emph', 1], + entity => [ 1, '_cmd_entity', 0], + heading => [ 2, '_cmd_heading', 0], + h1 => [ 1, '_cmd_h1', 1], + h2 => [ 1, '_cmd_h2', 1], + h3 => [ 1, '_cmd_h3', 1], + h4 => [ 1, '_cmd_h4', 1], + h5 => [ 1, '_cmd_h5', 1], + h6 => [ 1, '_cmd_h6', 1], + id => [ 1, '_cmd_id', 0], + image => [ 2, '_cmd_image', 1], + include => [ 1, '_cmd_include', 0], + italic => [ 1, '_cmd_italic', 1], + link => [ 2, '_cmd_link', 1], + number => [ 1, '_cmd_number', 1], + pre => [ 1, '_cmd_pre', 1], + quote => [ 3, '_cmd_quote', 1], + release => [ 1, '_cmd_release', 0], + rss => [ 2, '_cmd_rss', 0], + rule => [ 0, '_cmd_rule', 0], + signature => [ 0, '_cmd_signature', 0], + sitemap => [ 0, '_cmd_sitemap', 0], + size => [ 1, '_cmd_size', 0], + strike => [ 1, '_cmd_strike', 1], + strong => [ 1, '_cmd_strong', 1], + sub => [ 1, '_cmd_sub', 1], + sup => [ 1, '_cmd_sup', 1], + table => [ 2, '_cmd_table', 1], + tablehead => [-1, '_cmd_tablehead', 1], + tablerow => [-1, '_cmd_tablerow', 1], + under => [ 1, '_cmd_under', 1], + version => [ 1, '_cmd_version', 0], + '=' => [ 2, '_define_variable', 0], + '==' => [ 3, '_define_macro', 0], + '\\' => [ 0, '_literal', 0], ); ############################################################################## @@ -181,264 +186,397 @@ sub _warning { # Basic parsing ############################################################################## -# Escapes &, <, and > characters found in a string. +# Escapes &, <, and > characters for HTML output. +# +# $string - Input string +# +# Returns: Escaped string sub _escape { - my ($self, $string) = @_; - local $_ = $string; - s/&/&/g; - s/</</g; - s/>/>/g; - return $_; + my ($string) = @_; + $string =~ s{ & }{&}xmsg; + $string =~ s{ < }{<}xmsg; + $string =~ s{ > }{>}xmsg; + return $string; } # Wrap something in paragraph markers, being careful to get newlines right. # Special-case a paragraph consisting entirely of <span> by turning it into a # <p> with the same class. +# +# $text - Text to wrap +# +# Returns: Text wrapped in <p> tags sub _paragraph { my ($self, $text) = @_; - $text =~ s/^\n(\s*\n)*//; - $text =~ s/(\S[ \t]*)\z/$1\n/; - if ($text =~ m%^(\s*)<span(?!.*<span)([^>]*)>(.*)</span>(\s*)\z%s) { - my ($lead, $class, $text, $trail) = ($1, $2, $3, $4); - return "$lead<p$class>$text</p>$trail"; + + # Trim leading newline and whitespace and ensure the paragraph ends with a + # newline. + $text =~ s{ \A \n (\s*\n)* }{}xms; + $text =~ s{ ( \S [ \t]* ) \z }{$1\n}xms; + + # If the whole paragraph is wrapped in <span>, lift its attributes into + # the <p> tag. Otherwise, just add the <p> tags. + if ($text =~ m{ \A (\s*) <span ([^>]*) > (.*) </span> (\s*) \z }xms) { + my ($lead, $attrs, $text, $trail) = ($1, $2, $3, $4); + return "$lead<p$attrs>$text</p>$trail"; } else { - $text =~ s/^/<p>\n/; - $text =~ s%(\n\s*)\z%\n</p>$1%; + $text =~ s{ \A }{<p>\n}xms; + $text =~ s{ (\n\s*) \z }{\n</p>$1}xms; return $text; } } -# Opens or closes a border of a continued structure. Either takes the name of -# the state and its start and end tags, or takes no arguments to close all -# open states. -sub _border { +# Opens the border of a continued structure. +# +# spin, unlike HTML, does not require declaring structures like lists in +# advance of adding elements to them. You start a bullet list by simply +# having a bullet item, and a list is started if one is not already open. +# This is the method that does that: check whether the desired structure is +# already open and, if not, open it and add it to the state stack. +# +# $border - Name of the border state to open +# $start - The opening tag +# $end - The closing tag +# +# Returns: Output to write to start the structure +sub _border_start { my ($self, $border, $start, $end) = @_; - my $output = ''; - if ($border) { - my $state = $self->{state}[-1]; - if ($state eq 'BLOCK' || $state->[0] ne $border) { - $output .= $start; - push($self->{state}->@*, [$border, $end]); - } - } else { - while (defined(my $state = pop($self->{state}->@*))) { - last if $state eq 'BLOCK'; - $output .= $state->[1]; - } - push($self->{state}->@*, 'BLOCK'); + my $state = $self->{state}[-1]; + my $output = q{}; + + # If we're at the top-level block structure or inside a structure other + # than ours, open the structure and add it to the state stack. + if ($state eq 'BLOCK' || $state->[0] ne $border) { + $output .= $start; + push($self->{state}->@*, [$border, $end]); } + + return $output; +} + +# Closes the border of any currently-open continued structure. This is done, +# for example, when a new block structure is opened or a paragraph of regular +# text is seen at the same level as the structure elements. +# +# Returns: Output to write to close the structure. +sub _border_end { + my ($self) = @_; + my $output = q{}; + + # Find all open structures up to the first general block structure. We'll + # pop off the block structure so put it back when we're done. + while (defined(my $state = pop($self->{state}->@*))) { + last if $state eq 'BLOCK'; + $output .= $state->[1]; + } + push($self->{state}->@*, 'BLOCK'); + return $output; } # Marks the beginning of major block structure. Within this structure, # borders will only clear to the level of this structure. -sub _border_start { +sub _block_start { my ($self) = @_; push($self->{state}->@*, 'BLOCK'); } # Clears a major block structure. -sub _border_clear { +sub _block_end { my ($self) = @_; - my $output = $self->_border(); + my $output = $self->_border_end(); pop($self->{state}->@*); return $output; } -# Extract some number of arguments from the front of the given string. If the -# optional third argument is true, try to pull off a parenthesized formatting -# instruction first, returning it as the first result (or undef if it's not -# found). If the count is -1, pull off as many arguments as we can find. +# Extract some number of arguments from the front of the given string. +# +# $text - Text to parse arguments from +# $count - How many arguments to extract, or -1 for as many as possible +# $want_format - If true, check for a parenthesized formatting instruction +# first and extract it if present +# +# Returns: List of the following strings: +# $format - Format or empty string, omitted if !$want_format +# $text - The remaining unparsed text +# @args - $count arguments (undef if the argument wasn't found) sub _extract { - my ($self, $text, $count, $format) = @_; - my (@result, $code); - $text =~ s/\s*//; - if ($format && $text =~ /^\(/) { - ($result[0], $text) = extract_bracketed ($text, '()'); - $result[0] = substr ($result[0], 1, -1); - } else { - $result[0] = ''; + my ($self, $text, $count, $want_format) = @_; + my $format = q{}; + my @args; + + # Extract the format string if requested. + if ($want_format) { + $format = extract_bracketed($text, '()') // q{}; + if ($format) { + $format = substr($format, 1, -1); + } } + + # Extract the desired number of arguments, or all arguments present if + # $count was negative. if ($count >= 0) { - for (1..$count) { - ($result[$_], $text) = extract_bracketed ($text, '[]'); - if ($result[$_]) { - $result[$_] = substr ($result[$_], 1, -1); + for my $i (1 .. $count) { + my $arg = extract_bracketed($text, '[]'); + if (defined($arg)) { + $arg = substr($arg, 1, -1); } else { - $self->_warning("cannot find argument $_"); - $result[$_] = ''; + $self->_warning("cannot find argument $i: $@"); + $arg = q{}; } + push(@args, $arg); } } else { - while ($text =~ /^\s*\[/) { - my $result; - ($result, $text) = extract_bracketed ($text, '[]'); - last unless $result; - $result = substr ($result, 1, -1); - push (@result, $result); + while (defined(my $arg = extract_bracketed($text, '[]'))) { + push(@args, substr($arg, 1, -1)); } } - unless ($format) { shift @result } - (@result, $text); + + # Return the results. + return $want_format ? ($format, $text, @args) : ($text, @args); } -# Process a macro. Takes the number of arguments, the definition of the -# macro, a flag saying whether we're at a block level, and then the values of -# all the arguments. Only straight substitution commands are allowed here, of -# course. +# Expand a macro invocation. +# +# $definition - Definition of the macro +# $block - True if currently in block context +# @args - The arguments to the macro +# +# Returns: List with the macro expansion and the block context flag sub _macro { - my ($self, $args, $definition, $block, @args) = @_; - $definition =~ s/\\(\d+)/($1 > $args) ? "\\$1" : $args[$1 - 1]/ge; + my ($self, $definition, $block, @args) = @_; + + # The function that expands a macro substitution marker. If the number of + # the marker is higher than the number of arguments of the macro, leave it + # as-is. (We will have already warned about this when defining the + # macro.) + my $expand = sub { + my ($n) = @_; + return ($n > scalar(@args)) ? "\\$n" : $args[$n - 1]; + }; + + # Replace the substitution markers in the macro definition. + $definition =~ s{ \\(\d+) }{ $expand->($1) }xmsge; + + # Now parse the result as if it were input thread and return the results. return $self->_parse_context($definition, $block); } # Expand a given command into its representation. This function is mutually -# recursive with parse. Takes a third argument indicating whether this is a -# top-level element (if it is, and it doesn't generate its own container, it -# may have to be wrapped in <p>). Returns the result of expanding the -# command, a flag saying whether the command is block level, and the remaining -# text in the paragraph. +# recursive with _parse_context and _macro. +# +# $command - Name of the command +# $text - Input text following the command +# $block - True if currently in block context (if so, and if the command +# doesn't generate its own container, it will need to be wrapped +# in <p> +# +# Returns: List with the following elements: +# $output - Output from expanding the command +# $block - Whether the output is block context +# $text - Remaining unparsed text sub _expand { my ($self, $command, $text, $block) = @_; - if ($command eq '==') { - my ($new, $args, $definition); - ($new, $args, $definition, $text) = $self->_extract($text, 3); - if (defined $definition) { - $self->{macros}{$new} = [$args, $definition]; - return ('', 1, $text); - } - } elsif ($command eq '=') { - my ($variable, $value); - ($variable, $value, $text) = $self->_extract($text, 2); - $self->{strings}{$variable} = $self->_parse($value); - return ('', 1, $text); - } elsif ($command =~ s/^=//) { - if (exists($self->{strings}{$command})) { - return ($self->{strings}{$command}, 0, $text); + + # Special handling for expanding variables. These references look like + # \=NAME and expand to the value of the variable "NAME". + if ($command =~ m{ \A = \w }xms) { + my $variable = substr($command, 1); + if (exists($self->{variable}{$variable})) { + return ($self->{variable}{$variable}, 0, $text); } else { - $self->_warning("unknown string $command"); - return ('', 0, $text); + $self->_warning("unknown variable $variable"); + return (q{}, 0, $text); } - } elsif ($command eq '\\') { - return ('\\', 0, $text); - } elsif (exists($self->{macros}{$command})) { - my ($args, $definition) = $self->{macros}{$command}->@*; + } + + # Special handling for macros. Macros shadow commands of the same name. + if (exists($self->{macro}{$command})) { + my ($args, $definition) = $self->{macro}{$command}->@*; + + # Extract the macro arguments, if any were requested. my @args; if ($args != 0) { - @args = $self->_extract($text, $args, 0); - $text = pop @args; + ($text, @args) = $self->_extract($text, $args, 0); } - my $block = $block && ($text !~ /\S/); - return ($self->_macro($args, $definition, $block, @args), $text); + + # The macro runs in a block context if we're currently in block + # context and there is no remaining non-whitespace text. Otherwise, + # use an inline context. + $block &&= $text =~ m{ \A \s* \z }xms; + + # Return the macro expansion. + return ($self->_macro($definition, $block, @args), $text); + } + + # The normal command-handling case. Ensure it is a valid command. + if (!ref($COMMANDS{$command})) { + $self->_warning("unknown command or macro $command"); + return (q{}, 1, $text); + } + + # Dispatch the command to its handler. + my ($args, $handler, $want_format) = $COMMANDS{$command}->@*; + if ($want_format) { + my ($format, $text, @args) = $self->_extract($text, $args, 1); + my ($blocktag, $output) = $self->$handler($format, @args); + return ($output, $blocktag, $text); } else { - if (!ref($COMMANDS{$command})) { - $self->_warning("bad command $command"); - return ('', 1, $text); - } - my ($args, $handler) = $COMMANDS{$command}->@*; - my ($blocktag, $result); - if ($args == 0) { - ($blocktag, $result) = $self->$handler(); - } else { - my @args = $self->_extract($text, $args, 1); - $text = pop @args; - my $format = shift @args; - ($blocktag, $result) = $self->$handler($format, @args); - } - return ($result, $blocktag, $text); + my ($text, @args) = $self->_extract($text, $args); + my ($blocktag, $output) = $self->$handler(@args); + return ($output, $blocktag, $text); } } -# Given a text string, check it for escape sequences and expand them. This -# function is mutually recursive with expand. Takes one flag, saying whether -# we're at the block level. Returns the expanded text and a flag saying -# whether the result is suitable for block level. +# This is the heart of the input parser. Take a string of raw input, expand +# the commands in it, and format the results as HTML. This function is +# mutually recursive with _expand and _macro. +# +# $text - Input text to parse +# $block - True if the parse is done in a block context +# +# Returns: List of the following values: +# $output - HTML output corresponding to $text +# $block - Whether the result is suitable for block level sub _parse_context { my ($self, $text, $block) = @_; - if (index ($text, '\\') == -1) { + + # Check if there are any commands in the input. If not, we have a + # paragraph of regular text. + if (index($text, '\\') == -1) { my $output = $text; - $output = $self->_border() . $self->_paragraph($output) if $block; + + # If we are at block context, we need to make the text into a block + # element, which means wrapping it in <p> tags. Since that is a + # top-level block construct, also close any open block structure. + if ($block) { + $output = $self->_border_end() . $self->_paragraph($output); + } + + # Return the result. return ($output, $block); } - # Chop off everything up to the first backslash and save it in output. - # Then grab the escape and figure out what to do with it. + # The output seen so far. + my $output = q{}; + + # Output required to close any open block-level constructs that we saw + # prior to the text we're currently parsing. + my $border = q{}; + + # Output with inline context that needs to be wrapped in <p> tags. + my $paragraph = q{}; + + # Leading whitespace that should be added to a created paragraph. This is + # only non-empty if $paragraph is empty. + my $space = q{}; + + # Whether we saw a construct not suitable for block level. + my $nonblock = 0; + + # We have at least one command. Parse the text into sections of regular + # text and commands, expand the commands, and glue the results together as + # HTML. # # If we are at block level, we have to distinguish between plain text and # inline commands, which have to be wrapped in paragraph tags, and - # block-level commands, which shouldn't be. We accumulate any output that - # has to be wrapped in a paragraph in $paragraph (and put the border - # before it in $border). Whenever we see a block-level command, we wrap - # anything currently in $paragraph in a paragraph, tack it on to the - # output, and then add on the results of the block command. $space holds - # leading space, which we want to add to the paragraph if we end up - # creating a paragraph. - # - # $nonblock is a flag indicating that we saw some construct that wasn't - # suitable for block level. - my $output = ''; - my ($border, $paragraph, $space) = ('', '', ''); - my $nonblock = 0; - while ($text ne '') { - unless ($text =~ s/^([^\\]+|\\([\w=]+|.))//) { - my $error = substr ($text, 0, 20); - $error =~ s/\n.*//s; - $self->_fatal(qq(unable to parse at "$error")); + # block-level commands, which shouldn't be. + while ($text ne q{}) { + my ($string, $command); + + # Extract text before the next command, or a command name (but none of + # its arguments). I think it's impossible for this regex to fail to + # match as long as $text is non-empty, but do error handling just in + # case. + if ($text =~ s{ \A ( [^\\]+ | \\ ([\w=]+ | .) ) }{}xms) { + ($string, $command) = ($1, $2); + } else { + my $context = substr($text, 0, 20); + $context =~ s{ \n .* }{}xms; + $self->_fatal(qq(unable to parse near "$context")); } - my $command; - if (index ($1, '\\') == -1) { - my $string = $1; - if ($block && $string =~ /^\s+$/ && $paragraph eq '') { - $space .= $string; - } elsif ($block && ($string =~ /\S/ || $paragraph ne '')) { - $border = $self->_border() if $paragraph eq ''; - $paragraph .= $space . $string; - $space = ''; + + # If this is not a command, and we're not at the block level, just add + # it verbatim to the output. + # + # if we are at the block level, pull off any leading space. If there + # is still remaining text, add it plus any accumulated whitespace to a + # new paragraph. + if (index($string, '\\') == -1) { + if ($block) { + if ($string =~ s{ \A (\s+) }{}xms) { + $space .= $1; + } + if ($paragraph ne q{} || $string ne q{}) { + if ($paragraph eq q{}) { + $border = $self->_border_end(); + } + $paragraph .= $space . $string; + $space = q{}; + } } else { $output .= $string; $nonblock = 1; } - } else { - $command = $2; + } + + # Otherwise, we have a command. Expand that command, setting block + # context if we haven't seen any inline content so far. + else { my ($result, $blocktag); - my $force = $block && $paragraph eq ''; ($result, $blocktag, $text) - = $self->_expand($command, $text, $force); + = $self->_expand($command, $text, $block && $paragraph eq q{}); + + # If the result requires block context, output any pending + # paragraph and then the result. Otherwise, if we are already at + # block context, start a new paragraph. Otherwise, just append + # the result to our output. if ($blocktag) { - if ($block && $paragraph ne '') { - $output .= $border . $self->_paragraph($space . $paragraph); - $border = ''; - $paragraph = ''; + if ($block && $paragraph ne q{}) { + $output .= $border . $self->_paragraph($paragraph); + $border = q{}; + $paragraph = q{}; } else { $output .= $space; } $output .= $result; } elsif ($block) { - $border = $self->_border() if $paragraph eq ''; + if ($paragraph eq q{}) { + $border = $self->_border_end(); + } $paragraph .= $space . $result; $nonblock = 1; } else { $output .= $result; $nonblock = 1; } - $space = ''; + $space = q{}; } - if ($text =~ s/^\n(\s*)//) { - if ($paragraph ne '') { + + # If the next bit of unparsed text starts with a newline, extract it + # and any following whitespace now. Add it to our paragraph if we're + # accumulating one; otherwise, add it to the output, but only add the + # newline if we saw inline elements or there is remaining text. This + # suppresses some useless black lines. + if ($text =~ s{ \A \n (\s*) }{}xms) { + if ($paragraph ne q{}) { $paragraph .= "\n$1"; } else { - $output .= "\n" if $text || $nonblock; + if ($text ne q{} || $nonblock) { + $output .= "\n"; + } $output .= $1; } } } - # Wrap any remaining output in paragraph tags and then return the output. - # If we were at block level, our output is always suitable for block - # level. Otherwise, it's suitable for block level only if all of our - # output was from block commands. - $output .= $border . $self->_paragraph($space . $paragraph) - unless $paragraph eq ''; + # If there is any remaining paragraph text, wrap it in tags and append it + # to the output. If we were at block level, our output is always suitable + # for block level. Otherwise, it's suitable for block level only if all + # of our output was from block commands. + if ($paragraph ne q{}) { + $output .= $border . $self->_paragraph($paragraph); + } return ($output, $block || !$nonblock); } @@ -504,14 +642,14 @@ sub _split_paragraphs { sub _block { my ($self, $tag, $border, $format, $text) = @_; my $output = $border . "<$tag" . $self->_format_string($format) . '>'; - $self->_border_start(); + $self->_block_start(); if ($format eq 'packed') { $output .= $self->_parse($text, 0); } else { my @paragraphs = $self->_split_paragraphs($text); $output .= join('', map { $self->_parse($_, 1) } @paragraphs); } - $output .= $self->_border_clear(); + $output .= $self->_block_end(); $output =~ s%\s*\z%</$tag>%; $output .= "\n" unless $format eq 'packed'; return (1, $output); @@ -522,7 +660,7 @@ sub _block { # in old browsers. sub _heading { my ($self, $level, $format, $text) = @_; - my $output = $self->_border(); + my $output = $self->_border_end(); if ($format && $format =~ /^\#/) { my $tag = $format; $tag =~ s/^\#//; @@ -620,7 +758,53 @@ sub _footer { } ############################################################################## -# Commands +# Special commands +############################################################################## + +# Define a new macro. This is the command handler for \==. +# +# $name - Name of the macro +# $args - Number of arguments +# $definition - Definition of the macro +# +# Returns: Block context, empty output +sub _define_macro { + my ($self, $name, $args, $definition) = @_; + $args = $self->_parse($args); + + # Verify the argument count and definition. + if ($args !~ m{ \A \d+ \z }xms) { + $self->_warning("invalid macro argument count for $name"); + } + for my $arg ($definition =~ m{ \\(\d+) }xmsg) { + if ($1 > $args) { + my $msg = "invalid macro placeholder \\$1 (greater than $args)"; + $self->_warning($msg); + } + } + + # Define the macro. + $self->{macro}{$name} = [$self->_parse($args), $definition]; + return (1, q{}); +} + +# Define a new variable. This is the command handler for \=. +# +# $name - Name of the variable +# $value - Value of the variable +# +# Returns: Block context, empty output +sub _define_variable { + my ($self, $name, $value) = @_; + $self->{variable}{$name} = $self->_parse($value); + return (1, q{}); +} + +# Literal backslash. This is the command handler for \\. +sub _literal { return (0, q{\\}) } + +############################################################################## +# Regular commands ############################################################################## # Basic inline commands. @@ -646,7 +830,10 @@ sub _cmd_h5 { my ($self, @a) = @_; $self->_heading(5, @a); } sub _cmd_h6 { my ($self, @a) = @_; $self->_heading(6, @a); } # A horizontal rule. -sub _cmd_rule { my ($self) = @_; return (1, $self->_border() . "<hr />\n") } +sub _cmd_rule { + my ($self) = @_; + return (1, $self->_border_end() . "<hr />\n"); +} # Simple block commands. sub _cmd_div { @@ -661,12 +848,14 @@ sub _cmd_block { sub _cmd_bullet { my ($self, @args) = @_; - $self->_block('li', $self->_border('bullet', "<ul>\n", "</ul>\n\n"), @args); + my $border = $self->_border_start('bullet', "<ul>\n", "</ul>\n\n"); + $self->_block('li', $border, @args); } sub _cmd_number { my ($self, @args) = @_; - $self->_block('li', $self->_border('number', "<ol>\n", "</ol>\n\n"), @args); + my $border = $self->_border_start('number', "<ol>\n", "</ol>\n\n"); + $self->_block('li', $border, @args); } # A description list entry, which takes the heading and the body as arguments. @@ -674,7 +863,7 @@ sub _cmd_desc { my ($self, $format, $heading, $text) = @_; $heading = $self->_parse($heading); my $format_attr = $self->_format_string($format); - my $border = $self->_border('desc', "<dl>\n", "</dl>\n\n"); + my $border = $self->_border_start('desc', "<dl>\n", "</dl>\n\n"); my $initial = $border . "<dt$format_attr>" . $heading . "</dt>\n"; return $self->_block('dd', $initial, $format, $text); } @@ -683,7 +872,7 @@ sub _cmd_desc { # special-casing [ and ] since the user may have needed to use \entity to # express text that contains literal brackets. sub _cmd_entity { - my ($self, $format, $char) = @_; + my ($self, $char) = @_; $char = $self->_parse($char); if ($char eq '91') { return (0, '['); @@ -700,7 +889,7 @@ sub _cmd_entity { # the page title and the page style. This is where the XHTML declarations # come from. sub _cmd_heading { - my ($self, $format, $title, $style) = @_; + my ($self, $title, $style) = @_; $title = $self->_parse($title); $style = $self->_parse($style); @@ -756,7 +945,7 @@ sub _cmd_heading { # Used to save the RCS Id for the document. Doesn't actually output anything # (the identifier is later used in _cmd_heading). sub _cmd_id { - my ($self, $format, $id) = @_; + my ($self, $id) = @_; $self->{id} = $id; return (1, ''); } @@ -780,7 +969,7 @@ sub _cmd_image { # not immediately at the current point, which may be a bit surprising. # Someday, I should fix that. sub _cmd_include { - my ($self, $format, $file) = @_; + my ($self, $file) = @_; $file = $self->_parse($file); my $fh = FileHandle->new ("< $file") or $self->_fatal("cannot include $file: $!"); @@ -800,7 +989,7 @@ sub _cmd_link { # Preformatted text, the same as the HTML tag. sub _cmd_pre { my ($self, $format, $text) = @_; - my $output = $self->_border(); + my $output = $self->_border_end(); $output .= '<pre' . $self->_format_string($format) . '>'; $output .= $self->_parse($text); $output .= "</pre>\n"; @@ -812,11 +1001,11 @@ sub _cmd_pre { # format is "broken", adds line breaks at the end of each line. sub _cmd_quote { my ($self, $format, $quote, $author, $cite) = @_; - my $output = $self->_border() . '<blockquote class="quote">'; - $self->_border_start(); + my $output = $self->_border_end() . '<blockquote class="quote">'; + $self->_block_start(); my @paragraphs = $self->_split_paragraphs ($quote); $quote = join ('', map { $self->_parse($_, 1) } @paragraphs); - $quote .= $self->_border_clear(); + $quote .= $self->_block_end(); if ($format && $format eq 'broken') { $quote =~ s%(\S *)(\n\s*(?!</p>)\S)%$1<br />$2%g; $quote =~ s%\n<br />%\n%g; @@ -853,7 +1042,7 @@ sub _cmd_quote { # Given the name of a product, return the release date of the product. sub _cmd_release { - my ($self, undef, $package) = @_; + my ($self, $package) = @_; $package = $self->_parse($package); if (!$self->{versions}) { $self->_warning("no package release information available"); @@ -870,7 +1059,7 @@ sub _cmd_release { # Used to save RSS feed information for the page. Doesn't output anything # directly; the RSS feed information is used later in _cmd_heading. sub _cmd_rss { - my ($self, $format, $url, $title) = @_; + my ($self, $url, $title) = @_; $url = $self->_parse($url); $title = $self->_parse($title); push($self->{rss}->@*, [$url, $title]); @@ -881,7 +1070,7 @@ sub _cmd_rss { # address block. sub _cmd_signature { my ($self) = @_; - my $output = $self->_border(); + my $output = $self->_border_end(); if ($self->{file} eq '-') { $output .= "</body>\n</html>\n"; return (1, $output); @@ -900,7 +1089,7 @@ sub _cmd_signature { # file. We could use Number::Format here, but what we're doing is simple # enough and doesn't seem worth the trouble of another dependency. sub _cmd_size { - my ($self, $format, $file) = @_; + my ($self, $file) = @_; $file = $self->_parse($file); unless ($file) { $self->_warning("empty file name in \\size"); @@ -928,7 +1117,7 @@ sub _cmd_sitemap { $self->_warning("no sitemap file found"); return (1, ''); } - return (1, $self->_border() . $self->{sitemap}->sitemap()); + return (1, $self->_border_end() . $self->{sitemap}->sitemap()); } # Start a table. Takes any additional HTML attributes to set for the table @@ -946,9 +1135,8 @@ sub _cmd_tablehead { my ($self, $format, @cells) = @_; my $output = ' <tr' . $self->_format_string($format) . ">\n"; for (@cells) { - $output .= ' '; - $output .= $self->_enclose('th', $self->_parse ($_) . $self->_border()); - $output .= "\n"; + my $text = $self->_parse($_) . $self->_border_end(); + $output .= (q{ } x 4) . $self->_enclose('th', $text) . "\n"; } $output .= " </tr>\n"; return (1, $output); @@ -959,9 +1147,8 @@ sub _cmd_tablerow { my ($self, $format, @cells) = @_; my $output = ' <tr' . $self->_format_string($format) . ">\n"; for (@cells) { - $output .= ' '; - $output .= $self->_enclose('td', $self->_parse($_) . $self->_border()); - $output .= "\n"; + my $text = $self->_parse($_) . $self->_border_end(); + $output .= (q{ } x 4) . $self->_enclose('td', $text) . "\n"; } $output .= " </tr>\n"; return (1, $output); @@ -970,7 +1157,7 @@ sub _cmd_tablerow { # Given the name of a package, return the version number of its latest # release. sub _cmd_version { - my ($self, undef, $package) = @_; + my ($self, $package) = @_; if (!$self->{versions}) { $self->_warning("no package version information available"); return (0, q{}); @@ -1001,13 +1188,13 @@ sub _spin { # Initialize object state for a new document. $self->{files} = [[$in_fh, $in_path]]; $self->{id} = undef; - $self->{macros} = {}; + $self->{macro} = {}; $self->{out_fh} = $out_fh; $self->{out_path} = $out_path; $self->{rss} = []; $self->{space} = q{}; $self->{state} = ['BLOCK']; - $self->{strings} = {}; + $self->{variable} = {}; # Parse the thread file a paragraph at a time (but pick up macro contents # that are continued across paragraphs. @@ -1033,7 +1220,7 @@ sub _spin { $close_count += ($extra =~ tr{\]}{}); $para .= $extra; } - my $result = $self->_parse($self->_escape($para), 1); + my $result = $self->_parse(_escape($para), 1); $result =~ s{ \A (?:\s*\n)+ }{}xms; if ($result !~ m{ \A \s* \z }xms) { $self->_output($result); @@ -1044,7 +1231,7 @@ sub _spin { } # Close open tags and print any deferred whitespace. - _print_fh($out_fh, $out_path, $self->_border_clear(), $self->{space}); + _print_fh($out_fh, $out_path, $self->_block_end(), $self->{space}); } ############################################################################## diff --git a/lib/App/DocKnot/Spin/Sitemap.pm b/lib/App/DocKnot/Spin/Sitemap.pm index 4e266ee..15654be 100644 --- a/lib/App/DocKnot/Spin/Sitemap.pm +++ b/lib/App/DocKnot/Spin/Sitemap.pm @@ -111,7 +111,7 @@ sub _read_data { # # $desc - The string to escape # -# Returns: $desc escaped so that it's safe to interpolate into an attribute. +# Returns: $desc escaped so that it's safe to interpolate into an attribute sub _escape { my ($desc) = @_; $desc =~ s{ & }{&}xmsg; @@ -219,7 +219,7 @@ sub new { # # $path - Path to the output, relative to the top of the web site # -# Returns: List of lines to add to the <head> section. +# Returns: List of lines to add to the <head> section sub links { my ($self, $path) = @_; my @links = $self->_page_links($path); @@ -260,7 +260,7 @@ sub links { # # $path - Path to the output, relative to the top of the web site # -# Returns: List of lines that create the navbar. +# Returns: List of lines that create the navbar sub navbar { my ($self, $path) = @_; my ($prev, $next, @parents) = $self->_page_links($path); @@ -309,7 +309,7 @@ sub navbar { # Return the sitemap formatted as HTML. The resulting HTML will only be valid # from a page at the top of the output tree due to the relative links. # -# Returns: List of lines presenting the sitemap in HTML. +# Returns: List of lines presenting the sitemap in HTML sub sitemap { my ($self) = @_; my @output; diff --git a/t/data/generate/docknot/output/thread b/t/data/generate/docknot/output/thread index 1058b92..b5dfbe8 100644 --- a/t/data/generate/docknot/output/thread +++ b/t/data/generate/docknot/output/thread @@ -118,7 +118,6 @@ The following additional Perl modules are required to use it: \bullet(packed)[Perl6::Slurp] \bullet(packed)[Pod::Thread 2.00 or later] \bullet(packed)[Template (part of Template Toolkit)] -\bullet(packed)[Text::Balanced] \bullet(packed)[YAML::XS 0.81 or later] \h2[Download] |