summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuss Allbery <rra@cpan.org>2021-09-08 21:08:44 -0700
committerRuss Allbery <rra@cpan.org>2021-09-08 21:08:44 -0700
commitcda2e4a52a464dc6360047cd3683e5d4ab21ba0a (patch)
tree0651187fc5cd2f6bb5951bf0823aaa60b336239c
parentc1c9bc717cf3b4684bd2d0e8211f674b4ba0f49b (diff)
Refactor the core spin parsing code
Start the grand refactor of App::DocKnot::Spin. Refactor the core parsing code and move handling of macro definitions, string definitions and \\ into command handlers. Tag commands with whether they care about format arguments and remove a bunch of useless arguments to commands that take arguments but not a format string.
-rw-r--r--README1
-rw-r--r--README.md1
-rw-r--r--docs/docknot.yaml1
-rw-r--r--lib/App/DocKnot/Spin.pm673
-rw-r--r--lib/App/DocKnot/Spin/Sitemap.pm8
-rw-r--r--t/data/generate/docknot/output/thread1
6 files changed, 434 insertions, 251 deletions
diff --git a/README b/README
index 43056c6..7a88461 100644
--- a/README
+++ b/README
@@ -71,7 +71,6 @@ REQUIREMENTS
* Perl6::Slurp
* Pod::Thread 2.00 or later
* Template (part of Template Toolkit)
- * Text::Balanced
* YAML::XS 0.81 or later
BUILDING AND INSTALLATION
diff --git a/README.md b/README.md
index e332474..d95f69c 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,6 @@ The following additional Perl modules are required to use it:
* Perl6::Slurp
* Pod::Thread 2.00 or later
* Template (part of Template Toolkit)
-* Text::Balanced
* YAML::XS 0.81 or later
## Building and Installation
diff --git a/docs/docknot.yaml b/docs/docknot.yaml
index f352ec0..1ce592d 100644
--- a/docs/docknot.yaml
+++ b/docs/docknot.yaml
@@ -127,7 +127,6 @@ requirements: |
* Perl6::Slurp
* Pod::Thread 2.00 or later
* Template (part of Template Toolkit)
- * Text::Balanced
* YAML::XS 0.81 or later
test:
diff --git a/lib/App/DocKnot/Spin.pm b/lib/App/DocKnot/Spin.pm
index 2c1594b..62b5d3f 100644
--- a/lib/App/DocKnot/Spin.pm
+++ b/lib/App/DocKnot/Spin.pm
@@ -30,7 +30,7 @@ use IPC::System::Simple qw(capture systemx);
use File::Basename qw(fileparse);
use File::Copy qw(copy);
use File::Find qw(find finddepth);
-use File::Spec ();
+use File::Spec ();
use Pod::Thread ();
use POSIX qw(strftime);
use Text::Balanced qw(extract_bracketed);
@@ -47,52 +47,57 @@ my @EXCLUDES = (
# used to embed a link to the software that generated the page.
my $URL = 'https://www.eyrie.org/~eagle/software/web/';
-# The table of available commands. First column is the number of arguments,
-# second column is the handler, and the third column is whether this is its
-# own top-level element or whether it needs to be wrapped in <p> tags. A
-# count of -1 means pull off as many arguments as we can find.
+# The table of available commands. The columns are:
+#
+# 1. Number of arguments or -1 to consume as many arguments as it can find.
+# 2. Name of the method to call with the arguments and (if wanted) format.
+# 3. Whether to look for a format in parens before the arguments.
my %COMMANDS = (
- block => [ 1, '_cmd_block' ],
- bold => [ 1, '_cmd_bold' ],
- break => [ 0, '_cmd_break' ],
- bullet => [ 1, '_cmd_bullet' ],
- class => [ 1, '_cmd_class' ],
- cite => [ 1, '_cmd_cite' ],
- code => [ 1, '_cmd_code' ],
- desc => [ 2, '_cmd_desc' ],
- div => [ 1, '_cmd_div' ],
- emph => [ 1, '_cmd_emph' ],
- entity => [ 1, '_cmd_entity' ],
- heading => [ 2, '_cmd_heading' ],
- h1 => [ 1, '_cmd_h1' ],
- h2 => [ 1, '_cmd_h2' ],
- h3 => [ 1, '_cmd_h3' ],
- h4 => [ 1, '_cmd_h4' ],
- h5 => [ 1, '_cmd_h5' ],
- h6 => [ 1, '_cmd_h6' ],
- id => [ 1, '_cmd_id' ],
- image => [ 2, '_cmd_image' ],
- include => [ 1, '_cmd_include' ],
- italic => [ 1, '_cmd_italic' ],
- link => [ 2, '_cmd_link' ],
- number => [ 1, '_cmd_number' ],
- pre => [ 1, '_cmd_pre' ],
- quote => [ 3, '_cmd_quote' ],
- release => [ 1, '_cmd_release' ],
- rss => [ 2, '_cmd_rss' ],
- rule => [ 0, '_cmd_rule' ],
- signature => [ 0, '_cmd_signature'],
- sitemap => [ 0, '_cmd_sitemap' ],
- size => [ 1, '_cmd_size' ],
- strike => [ 1, '_cmd_strike' ],
- strong => [ 1, '_cmd_strong' ],
- sub => [ 1, '_cmd_sub' ],
- sup => [ 1, '_cmd_sup' ],
- table => [ 2, '_cmd_table' ],
- tablehead => [-1, '_cmd_tablehead'],
- tablerow => [-1, '_cmd_tablerow' ],
- under => [ 1, '_cmd_under' ],
- version => [ 1, '_cmd_version' ],
+ # name args method want_format
+ block => [ 1, '_cmd_block', 1],
+ bold => [ 1, '_cmd_bold', 1],
+ break => [ 0, '_cmd_break', 0],
+ bullet => [ 1, '_cmd_bullet', 1],
+ class => [ 1, '_cmd_class', 1],
+ cite => [ 1, '_cmd_cite', 1],
+ code => [ 1, '_cmd_code', 1],
+ desc => [ 2, '_cmd_desc', 1],
+ div => [ 1, '_cmd_div', 1],
+ emph => [ 1, '_cmd_emph', 1],
+ entity => [ 1, '_cmd_entity', 0],
+ heading => [ 2, '_cmd_heading', 0],
+ h1 => [ 1, '_cmd_h1', 1],
+ h2 => [ 1, '_cmd_h2', 1],
+ h3 => [ 1, '_cmd_h3', 1],
+ h4 => [ 1, '_cmd_h4', 1],
+ h5 => [ 1, '_cmd_h5', 1],
+ h6 => [ 1, '_cmd_h6', 1],
+ id => [ 1, '_cmd_id', 0],
+ image => [ 2, '_cmd_image', 1],
+ include => [ 1, '_cmd_include', 0],
+ italic => [ 1, '_cmd_italic', 1],
+ link => [ 2, '_cmd_link', 1],
+ number => [ 1, '_cmd_number', 1],
+ pre => [ 1, '_cmd_pre', 1],
+ quote => [ 3, '_cmd_quote', 1],
+ release => [ 1, '_cmd_release', 0],
+ rss => [ 2, '_cmd_rss', 0],
+ rule => [ 0, '_cmd_rule', 0],
+ signature => [ 0, '_cmd_signature', 0],
+ sitemap => [ 0, '_cmd_sitemap', 0],
+ size => [ 1, '_cmd_size', 0],
+ strike => [ 1, '_cmd_strike', 1],
+ strong => [ 1, '_cmd_strong', 1],
+ sub => [ 1, '_cmd_sub', 1],
+ sup => [ 1, '_cmd_sup', 1],
+ table => [ 2, '_cmd_table', 1],
+ tablehead => [-1, '_cmd_tablehead', 1],
+ tablerow => [-1, '_cmd_tablerow', 1],
+ under => [ 1, '_cmd_under', 1],
+ version => [ 1, '_cmd_version', 0],
+ '=' => [ 2, '_define_variable', 0],
+ '==' => [ 3, '_define_macro', 0],
+ '\\' => [ 0, '_literal', 0],
);
##############################################################################
@@ -181,264 +186,397 @@ sub _warning {
# Basic parsing
##############################################################################
-# Escapes &, <, and > characters found in a string.
+# Escapes &, <, and > characters for HTML output.
+#
+# $string - Input string
+#
+# Returns: Escaped string
sub _escape {
- my ($self, $string) = @_;
- local $_ = $string;
- s/&/&amp;/g;
- s/</&lt;/g;
- s/>/&gt;/g;
- return $_;
+ my ($string) = @_;
+ $string =~ s{ & }{&amp;}xmsg;
+ $string =~ s{ < }{&lt;}xmsg;
+ $string =~ s{ > }{&gt;}xmsg;
+ return $string;
}
# Wrap something in paragraph markers, being careful to get newlines right.
# Special-case a paragraph consisting entirely of <span> by turning it into a
# <p> with the same class.
+#
+# $text - Text to wrap
+#
+# Returns: Text wrapped in <p> tags
sub _paragraph {
my ($self, $text) = @_;
- $text =~ s/^\n(\s*\n)*//;
- $text =~ s/(\S[ \t]*)\z/$1\n/;
- if ($text =~ m%^(\s*)<span(?!.*<span)([^>]*)>(.*)</span>(\s*)\z%s) {
- my ($lead, $class, $text, $trail) = ($1, $2, $3, $4);
- return "$lead<p$class>$text</p>$trail";
+
+ # Trim leading newline and whitespace and ensure the paragraph ends with a
+ # newline.
+ $text =~ s{ \A \n (\s*\n)* }{}xms;
+ $text =~ s{ ( \S [ \t]* ) \z }{$1\n}xms;
+
+ # If the whole paragraph is wrapped in <span>, lift its attributes into
+ # the <p> tag. Otherwise, just add the <p> tags.
+ if ($text =~ m{ \A (\s*) <span ([^>]*) > (.*) </span> (\s*) \z }xms) {
+ my ($lead, $attrs, $text, $trail) = ($1, $2, $3, $4);
+ return "$lead<p$attrs>$text</p>$trail";
} else {
- $text =~ s/^/<p>\n/;
- $text =~ s%(\n\s*)\z%\n</p>$1%;
+ $text =~ s{ \A }{<p>\n}xms;
+ $text =~ s{ (\n\s*) \z }{\n</p>$1}xms;
return $text;
}
}
-# Opens or closes a border of a continued structure. Either takes the name of
-# the state and its start and end tags, or takes no arguments to close all
-# open states.
-sub _border {
+# Opens the border of a continued structure.
+#
+# spin, unlike HTML, does not require declaring structures like lists in
+# advance of adding elements to them. You start a bullet list by simply
+# having a bullet item, and a list is started if one is not already open.
+# This is the method that does that: check whether the desired structure is
+# already open and, if not, open it and add it to the state stack.
+#
+# $border - Name of the border state to open
+# $start - The opening tag
+# $end - The closing tag
+#
+# Returns: Output to write to start the structure
+sub _border_start {
my ($self, $border, $start, $end) = @_;
- my $output = '';
- if ($border) {
- my $state = $self->{state}[-1];
- if ($state eq 'BLOCK' || $state->[0] ne $border) {
- $output .= $start;
- push($self->{state}->@*, [$border, $end]);
- }
- } else {
- while (defined(my $state = pop($self->{state}->@*))) {
- last if $state eq 'BLOCK';
- $output .= $state->[1];
- }
- push($self->{state}->@*, 'BLOCK');
+ my $state = $self->{state}[-1];
+ my $output = q{};
+
+ # If we're at the top-level block structure or inside a structure other
+ # than ours, open the structure and add it to the state stack.
+ if ($state eq 'BLOCK' || $state->[0] ne $border) {
+ $output .= $start;
+ push($self->{state}->@*, [$border, $end]);
}
+
+ return $output;
+}
+
+# Closes the border of any currently-open continued structure. This is done,
+# for example, when a new block structure is opened or a paragraph of regular
+# text is seen at the same level as the structure elements.
+#
+# Returns: Output to write to close the structure.
+sub _border_end {
+ my ($self) = @_;
+ my $output = q{};
+
+ # Find all open structures up to the first general block structure. We'll
+ # pop off the block structure so put it back when we're done.
+ while (defined(my $state = pop($self->{state}->@*))) {
+ last if $state eq 'BLOCK';
+ $output .= $state->[1];
+ }
+ push($self->{state}->@*, 'BLOCK');
+
return $output;
}
# Marks the beginning of major block structure. Within this structure,
# borders will only clear to the level of this structure.
-sub _border_start {
+sub _block_start {
my ($self) = @_;
push($self->{state}->@*, 'BLOCK');
}
# Clears a major block structure.
-sub _border_clear {
+sub _block_end {
my ($self) = @_;
- my $output = $self->_border();
+ my $output = $self->_border_end();
pop($self->{state}->@*);
return $output;
}
-# Extract some number of arguments from the front of the given string. If the
-# optional third argument is true, try to pull off a parenthesized formatting
-# instruction first, returning it as the first result (or undef if it's not
-# found). If the count is -1, pull off as many arguments as we can find.
+# Extract some number of arguments from the front of the given string.
+#
+# $text - Text to parse arguments from
+# $count - How many arguments to extract, or -1 for as many as possible
+# $want_format - If true, check for a parenthesized formatting instruction
+# first and extract it if present
+#
+# Returns: List of the following strings:
+# $format - Format or empty string, omitted if !$want_format
+# $text - The remaining unparsed text
+# @args - $count arguments (undef if the argument wasn't found)
sub _extract {
- my ($self, $text, $count, $format) = @_;
- my (@result, $code);
- $text =~ s/\s*//;
- if ($format && $text =~ /^\(/) {
- ($result[0], $text) = extract_bracketed ($text, '()');
- $result[0] = substr ($result[0], 1, -1);
- } else {
- $result[0] = '';
+ my ($self, $text, $count, $want_format) = @_;
+ my $format = q{};
+ my @args;
+
+ # Extract the format string if requested.
+ if ($want_format) {
+ $format = extract_bracketed($text, '()') // q{};
+ if ($format) {
+ $format = substr($format, 1, -1);
+ }
}
+
+ # Extract the desired number of arguments, or all arguments present if
+ # $count was negative.
if ($count >= 0) {
- for (1..$count) {
- ($result[$_], $text) = extract_bracketed ($text, '[]');
- if ($result[$_]) {
- $result[$_] = substr ($result[$_], 1, -1);
+ for my $i (1 .. $count) {
+ my $arg = extract_bracketed($text, '[]');
+ if (defined($arg)) {
+ $arg = substr($arg, 1, -1);
} else {
- $self->_warning("cannot find argument $_");
- $result[$_] = '';
+ $self->_warning("cannot find argument $i: $@");
+ $arg = q{};
}
+ push(@args, $arg);
}
} else {
- while ($text =~ /^\s*\[/) {
- my $result;
- ($result, $text) = extract_bracketed ($text, '[]');
- last unless $result;
- $result = substr ($result, 1, -1);
- push (@result, $result);
+ while (defined(my $arg = extract_bracketed($text, '[]'))) {
+ push(@args, substr($arg, 1, -1));
}
}
- unless ($format) { shift @result }
- (@result, $text);
+
+ # Return the results.
+ return $want_format ? ($format, $text, @args) : ($text, @args);
}
-# Process a macro. Takes the number of arguments, the definition of the
-# macro, a flag saying whether we're at a block level, and then the values of
-# all the arguments. Only straight substitution commands are allowed here, of
-# course.
+# Expand a macro invocation.
+#
+# $definition - Definition of the macro
+# $block - True if currently in block context
+# @args - The arguments to the macro
+#
+# Returns: List with the macro expansion and the block context flag
sub _macro {
- my ($self, $args, $definition, $block, @args) = @_;
- $definition =~ s/\\(\d+)/($1 > $args) ? "\\$1" : $args[$1 - 1]/ge;
+ my ($self, $definition, $block, @args) = @_;
+
+ # The function that expands a macro substitution marker. If the number of
+ # the marker is higher than the number of arguments of the macro, leave it
+ # as-is. (We will have already warned about this when defining the
+ # macro.)
+ my $expand = sub {
+ my ($n) = @_;
+ return ($n > scalar(@args)) ? "\\$n" : $args[$n - 1];
+ };
+
+ # Replace the substitution markers in the macro definition.
+ $definition =~ s{ \\(\d+) }{ $expand->($1) }xmsge;
+
+ # Now parse the result as if it were input thread and return the results.
return $self->_parse_context($definition, $block);
}
# Expand a given command into its representation. This function is mutually
-# recursive with parse. Takes a third argument indicating whether this is a
-# top-level element (if it is, and it doesn't generate its own container, it
-# may have to be wrapped in <p>). Returns the result of expanding the
-# command, a flag saying whether the command is block level, and the remaining
-# text in the paragraph.
+# recursive with _parse_context and _macro.
+#
+# $command - Name of the command
+# $text - Input text following the command
+# $block - True if currently in block context (if so, and if the command
+# doesn't generate its own container, it will need to be wrapped
+# in <p>
+#
+# Returns: List with the following elements:
+# $output - Output from expanding the command
+# $block - Whether the output is block context
+# $text - Remaining unparsed text
sub _expand {
my ($self, $command, $text, $block) = @_;
- if ($command eq '==') {
- my ($new, $args, $definition);
- ($new, $args, $definition, $text) = $self->_extract($text, 3);
- if (defined $definition) {
- $self->{macros}{$new} = [$args, $definition];
- return ('', 1, $text);
- }
- } elsif ($command eq '=') {
- my ($variable, $value);
- ($variable, $value, $text) = $self->_extract($text, 2);
- $self->{strings}{$variable} = $self->_parse($value);
- return ('', 1, $text);
- } elsif ($command =~ s/^=//) {
- if (exists($self->{strings}{$command})) {
- return ($self->{strings}{$command}, 0, $text);
+
+ # Special handling for expanding variables. These references look like
+ # \=NAME and expand to the value of the variable "NAME".
+ if ($command =~ m{ \A = \w }xms) {
+ my $variable = substr($command, 1);
+ if (exists($self->{variable}{$variable})) {
+ return ($self->{variable}{$variable}, 0, $text);
} else {
- $self->_warning("unknown string $command");
- return ('', 0, $text);
+ $self->_warning("unknown variable $variable");
+ return (q{}, 0, $text);
}
- } elsif ($command eq '\\') {
- return ('\\', 0, $text);
- } elsif (exists($self->{macros}{$command})) {
- my ($args, $definition) = $self->{macros}{$command}->@*;
+ }
+
+ # Special handling for macros. Macros shadow commands of the same name.
+ if (exists($self->{macro}{$command})) {
+ my ($args, $definition) = $self->{macro}{$command}->@*;
+
+ # Extract the macro arguments, if any were requested.
my @args;
if ($args != 0) {
- @args = $self->_extract($text, $args, 0);
- $text = pop @args;
+ ($text, @args) = $self->_extract($text, $args, 0);
}
- my $block = $block && ($text !~ /\S/);
- return ($self->_macro($args, $definition, $block, @args), $text);
+
+ # The macro runs in a block context if we're currently in block
+ # context and there is no remaining non-whitespace text. Otherwise,
+ # use an inline context.
+ $block &&= $text =~ m{ \A \s* \z }xms;
+
+ # Return the macro expansion.
+ return ($self->_macro($definition, $block, @args), $text);
+ }
+
+ # The normal command-handling case. Ensure it is a valid command.
+ if (!ref($COMMANDS{$command})) {
+ $self->_warning("unknown command or macro $command");
+ return (q{}, 1, $text);
+ }
+
+ # Dispatch the command to its handler.
+ my ($args, $handler, $want_format) = $COMMANDS{$command}->@*;
+ if ($want_format) {
+ my ($format, $text, @args) = $self->_extract($text, $args, 1);
+ my ($blocktag, $output) = $self->$handler($format, @args);
+ return ($output, $blocktag, $text);
} else {
- if (!ref($COMMANDS{$command})) {
- $self->_warning("bad command $command");
- return ('', 1, $text);
- }
- my ($args, $handler) = $COMMANDS{$command}->@*;
- my ($blocktag, $result);
- if ($args == 0) {
- ($blocktag, $result) = $self->$handler();
- } else {
- my @args = $self->_extract($text, $args, 1);
- $text = pop @args;
- my $format = shift @args;
- ($blocktag, $result) = $self->$handler($format, @args);
- }
- return ($result, $blocktag, $text);
+ my ($text, @args) = $self->_extract($text, $args);
+ my ($blocktag, $output) = $self->$handler(@args);
+ return ($output, $blocktag, $text);
}
}
-# Given a text string, check it for escape sequences and expand them. This
-# function is mutually recursive with expand. Takes one flag, saying whether
-# we're at the block level. Returns the expanded text and a flag saying
-# whether the result is suitable for block level.
+# This is the heart of the input parser. Take a string of raw input, expand
+# the commands in it, and format the results as HTML. This function is
+# mutually recursive with _expand and _macro.
+#
+# $text - Input text to parse
+# $block - True if the parse is done in a block context
+#
+# Returns: List of the following values:
+# $output - HTML output corresponding to $text
+# $block - Whether the result is suitable for block level
sub _parse_context {
my ($self, $text, $block) = @_;
- if (index ($text, '\\') == -1) {
+
+ # Check if there are any commands in the input. If not, we have a
+ # paragraph of regular text.
+ if (index($text, '\\') == -1) {
my $output = $text;
- $output = $self->_border() . $self->_paragraph($output) if $block;
+
+ # If we are at block context, we need to make the text into a block
+ # element, which means wrapping it in <p> tags. Since that is a
+ # top-level block construct, also close any open block structure.
+ if ($block) {
+ $output = $self->_border_end() . $self->_paragraph($output);
+ }
+
+ # Return the result.
return ($output, $block);
}
- # Chop off everything up to the first backslash and save it in output.
- # Then grab the escape and figure out what to do with it.
+ # The output seen so far.
+ my $output = q{};
+
+ # Output required to close any open block-level constructs that we saw
+ # prior to the text we're currently parsing.
+ my $border = q{};
+
+ # Output with inline context that needs to be wrapped in <p> tags.
+ my $paragraph = q{};
+
+ # Leading whitespace that should be added to a created paragraph. This is
+ # only non-empty if $paragraph is empty.
+ my $space = q{};
+
+ # Whether we saw a construct not suitable for block level.
+ my $nonblock = 0;
+
+ # We have at least one command. Parse the text into sections of regular
+ # text and commands, expand the commands, and glue the results together as
+ # HTML.
#
# If we are at block level, we have to distinguish between plain text and
# inline commands, which have to be wrapped in paragraph tags, and
- # block-level commands, which shouldn't be. We accumulate any output that
- # has to be wrapped in a paragraph in $paragraph (and put the border
- # before it in $border). Whenever we see a block-level command, we wrap
- # anything currently in $paragraph in a paragraph, tack it on to the
- # output, and then add on the results of the block command. $space holds
- # leading space, which we want to add to the paragraph if we end up
- # creating a paragraph.
- #
- # $nonblock is a flag indicating that we saw some construct that wasn't
- # suitable for block level.
- my $output = '';
- my ($border, $paragraph, $space) = ('', '', '');
- my $nonblock = 0;
- while ($text ne '') {
- unless ($text =~ s/^([^\\]+|\\([\w=]+|.))//) {
- my $error = substr ($text, 0, 20);
- $error =~ s/\n.*//s;
- $self->_fatal(qq(unable to parse at "$error"));
+ # block-level commands, which shouldn't be.
+ while ($text ne q{}) {
+ my ($string, $command);
+
+ # Extract text before the next command, or a command name (but none of
+ # its arguments). I think it's impossible for this regex to fail to
+ # match as long as $text is non-empty, but do error handling just in
+ # case.
+ if ($text =~ s{ \A ( [^\\]+ | \\ ([\w=]+ | .) ) }{}xms) {
+ ($string, $command) = ($1, $2);
+ } else {
+ my $context = substr($text, 0, 20);
+ $context =~ s{ \n .* }{}xms;
+ $self->_fatal(qq(unable to parse near "$context"));
}
- my $command;
- if (index ($1, '\\') == -1) {
- my $string = $1;
- if ($block && $string =~ /^\s+$/ && $paragraph eq '') {
- $space .= $string;
- } elsif ($block && ($string =~ /\S/ || $paragraph ne '')) {
- $border = $self->_border() if $paragraph eq '';
- $paragraph .= $space . $string;
- $space = '';
+
+ # If this is not a command, and we're not at the block level, just add
+ # it verbatim to the output.
+ #
+ # if we are at the block level, pull off any leading space. If there
+ # is still remaining text, add it plus any accumulated whitespace to a
+ # new paragraph.
+ if (index($string, '\\') == -1) {
+ if ($block) {
+ if ($string =~ s{ \A (\s+) }{}xms) {
+ $space .= $1;
+ }
+ if ($paragraph ne q{} || $string ne q{}) {
+ if ($paragraph eq q{}) {
+ $border = $self->_border_end();
+ }
+ $paragraph .= $space . $string;
+ $space = q{};
+ }
} else {
$output .= $string;
$nonblock = 1;
}
- } else {
- $command = $2;
+ }
+
+ # Otherwise, we have a command. Expand that command, setting block
+ # context if we haven't seen any inline content so far.
+ else {
my ($result, $blocktag);
- my $force = $block && $paragraph eq '';
($result, $blocktag, $text)
- = $self->_expand($command, $text, $force);
+ = $self->_expand($command, $text, $block && $paragraph eq q{});
+
+ # If the result requires block context, output any pending
+ # paragraph and then the result. Otherwise, if we are already at
+ # block context, start a new paragraph. Otherwise, just append
+ # the result to our output.
if ($blocktag) {
- if ($block && $paragraph ne '') {
- $output .= $border . $self->_paragraph($space . $paragraph);
- $border = '';
- $paragraph = '';
+ if ($block && $paragraph ne q{}) {
+ $output .= $border . $self->_paragraph($paragraph);
+ $border = q{};
+ $paragraph = q{};
} else {
$output .= $space;
}
$output .= $result;
} elsif ($block) {
- $border = $self->_border() if $paragraph eq '';
+ if ($paragraph eq q{}) {
+ $border = $self->_border_end();
+ }
$paragraph .= $space . $result;
$nonblock = 1;
} else {
$output .= $result;
$nonblock = 1;
}
- $space = '';
+ $space = q{};
}
- if ($text =~ s/^\n(\s*)//) {
- if ($paragraph ne '') {
+
+ # If the next bit of unparsed text starts with a newline, extract it
+ # and any following whitespace now. Add it to our paragraph if we're
+ # accumulating one; otherwise, add it to the output, but only add the
+ # newline if we saw inline elements or there is remaining text. This
+ # suppresses some useless black lines.
+ if ($text =~ s{ \A \n (\s*) }{}xms) {
+ if ($paragraph ne q{}) {
$paragraph .= "\n$1";
} else {
- $output .= "\n" if $text || $nonblock;
+ if ($text ne q{} || $nonblock) {
+ $output .= "\n";
+ }
$output .= $1;
}
}
}
- # Wrap any remaining output in paragraph tags and then return the output.
- # If we were at block level, our output is always suitable for block
- # level. Otherwise, it's suitable for block level only if all of our
- # output was from block commands.
- $output .= $border . $self->_paragraph($space . $paragraph)
- unless $paragraph eq '';
+ # If there is any remaining paragraph text, wrap it in tags and append it
+ # to the output. If we were at block level, our output is always suitable
+ # for block level. Otherwise, it's suitable for block level only if all
+ # of our output was from block commands.
+ if ($paragraph ne q{}) {
+ $output .= $border . $self->_paragraph($paragraph);
+ }
return ($output, $block || !$nonblock);
}
@@ -504,14 +642,14 @@ sub _split_paragraphs {
sub _block {
my ($self, $tag, $border, $format, $text) = @_;
my $output = $border . "<$tag" . $self->_format_string($format) . '>';
- $self->_border_start();
+ $self->_block_start();
if ($format eq 'packed') {
$output .= $self->_parse($text, 0);
} else {
my @paragraphs = $self->_split_paragraphs($text);
$output .= join('', map { $self->_parse($_, 1) } @paragraphs);
}
- $output .= $self->_border_clear();
+ $output .= $self->_block_end();
$output =~ s%\s*\z%</$tag>%;
$output .= "\n" unless $format eq 'packed';
return (1, $output);
@@ -522,7 +660,7 @@ sub _block {
# in old browsers.
sub _heading {
my ($self, $level, $format, $text) = @_;
- my $output = $self->_border();
+ my $output = $self->_border_end();
if ($format && $format =~ /^\#/) {
my $tag = $format;
$tag =~ s/^\#//;
@@ -620,7 +758,53 @@ sub _footer {
}
##############################################################################
-# Commands
+# Special commands
+##############################################################################
+
+# Define a new macro. This is the command handler for \==.
+#
+# $name - Name of the macro
+# $args - Number of arguments
+# $definition - Definition of the macro
+#
+# Returns: Block context, empty output
+sub _define_macro {
+ my ($self, $name, $args, $definition) = @_;
+ $args = $self->_parse($args);
+
+ # Verify the argument count and definition.
+ if ($args !~ m{ \A \d+ \z }xms) {
+ $self->_warning("invalid macro argument count for $name");
+ }
+ for my $arg ($definition =~ m{ \\(\d+) }xmsg) {
+ if ($1 > $args) {
+ my $msg = "invalid macro placeholder \\$1 (greater than $args)";
+ $self->_warning($msg);
+ }
+ }
+
+ # Define the macro.
+ $self->{macro}{$name} = [$self->_parse($args), $definition];
+ return (1, q{});
+}
+
+# Define a new variable. This is the command handler for \=.
+#
+# $name - Name of the variable
+# $value - Value of the variable
+#
+# Returns: Block context, empty output
+sub _define_variable {
+ my ($self, $name, $value) = @_;
+ $self->{variable}{$name} = $self->_parse($value);
+ return (1, q{});
+}
+
+# Literal backslash. This is the command handler for \\.
+sub _literal { return (0, q{\\}) }
+
+##############################################################################
+# Regular commands
##############################################################################
# Basic inline commands.
@@ -646,7 +830,10 @@ sub _cmd_h5 { my ($self, @a) = @_; $self->_heading(5, @a); }
sub _cmd_h6 { my ($self, @a) = @_; $self->_heading(6, @a); }
# A horizontal rule.
-sub _cmd_rule { my ($self) = @_; return (1, $self->_border() . "<hr />\n") }
+sub _cmd_rule {
+ my ($self) = @_;
+ return (1, $self->_border_end() . "<hr />\n");
+}
# Simple block commands.
sub _cmd_div {
@@ -661,12 +848,14 @@ sub _cmd_block {
sub _cmd_bullet {
my ($self, @args) = @_;
- $self->_block('li', $self->_border('bullet', "<ul>\n", "</ul>\n\n"), @args);
+ my $border = $self->_border_start('bullet', "<ul>\n", "</ul>\n\n");
+ $self->_block('li', $border, @args);
}
sub _cmd_number {
my ($self, @args) = @_;
- $self->_block('li', $self->_border('number', "<ol>\n", "</ol>\n\n"), @args);
+ my $border = $self->_border_start('number', "<ol>\n", "</ol>\n\n");
+ $self->_block('li', $border, @args);
}
# A description list entry, which takes the heading and the body as arguments.
@@ -674,7 +863,7 @@ sub _cmd_desc {
my ($self, $format, $heading, $text) = @_;
$heading = $self->_parse($heading);
my $format_attr = $self->_format_string($format);
- my $border = $self->_border('desc', "<dl>\n", "</dl>\n\n");
+ my $border = $self->_border_start('desc', "<dl>\n", "</dl>\n\n");
my $initial = $border . "<dt$format_attr>" . $heading . "</dt>\n";
return $self->_block('dd', $initial, $format, $text);
}
@@ -683,7 +872,7 @@ sub _cmd_desc {
# special-casing [ and ] since the user may have needed to use \entity to
# express text that contains literal brackets.
sub _cmd_entity {
- my ($self, $format, $char) = @_;
+ my ($self, $char) = @_;
$char = $self->_parse($char);
if ($char eq '91') {
return (0, '[');
@@ -700,7 +889,7 @@ sub _cmd_entity {
# the page title and the page style. This is where the XHTML declarations
# come from.
sub _cmd_heading {
- my ($self, $format, $title, $style) = @_;
+ my ($self, $title, $style) = @_;
$title = $self->_parse($title);
$style = $self->_parse($style);
@@ -756,7 +945,7 @@ sub _cmd_heading {
# Used to save the RCS Id for the document. Doesn't actually output anything
# (the identifier is later used in _cmd_heading).
sub _cmd_id {
- my ($self, $format, $id) = @_;
+ my ($self, $id) = @_;
$self->{id} = $id;
return (1, '');
}
@@ -780,7 +969,7 @@ sub _cmd_image {
# not immediately at the current point, which may be a bit surprising.
# Someday, I should fix that.
sub _cmd_include {
- my ($self, $format, $file) = @_;
+ my ($self, $file) = @_;
$file = $self->_parse($file);
my $fh = FileHandle->new ("< $file")
or $self->_fatal("cannot include $file: $!");
@@ -800,7 +989,7 @@ sub _cmd_link {
# Preformatted text, the same as the HTML tag.
sub _cmd_pre {
my ($self, $format, $text) = @_;
- my $output = $self->_border();
+ my $output = $self->_border_end();
$output .= '<pre' . $self->_format_string($format) . '>';
$output .= $self->_parse($text);
$output .= "</pre>\n";
@@ -812,11 +1001,11 @@ sub _cmd_pre {
# format is "broken", adds line breaks at the end of each line.
sub _cmd_quote {
my ($self, $format, $quote, $author, $cite) = @_;
- my $output = $self->_border() . '<blockquote class="quote">';
- $self->_border_start();
+ my $output = $self->_border_end() . '<blockquote class="quote">';
+ $self->_block_start();
my @paragraphs = $self->_split_paragraphs ($quote);
$quote = join ('', map { $self->_parse($_, 1) } @paragraphs);
- $quote .= $self->_border_clear();
+ $quote .= $self->_block_end();
if ($format && $format eq 'broken') {
$quote =~ s%(\S *)(\n\s*(?!</p>)\S)%$1<br />$2%g;
$quote =~ s%\n<br />%\n%g;
@@ -853,7 +1042,7 @@ sub _cmd_quote {
# Given the name of a product, return the release date of the product.
sub _cmd_release {
- my ($self, undef, $package) = @_;
+ my ($self, $package) = @_;
$package = $self->_parse($package);
if (!$self->{versions}) {
$self->_warning("no package release information available");
@@ -870,7 +1059,7 @@ sub _cmd_release {
# Used to save RSS feed information for the page. Doesn't output anything
# directly; the RSS feed information is used later in _cmd_heading.
sub _cmd_rss {
- my ($self, $format, $url, $title) = @_;
+ my ($self, $url, $title) = @_;
$url = $self->_parse($url);
$title = $self->_parse($title);
push($self->{rss}->@*, [$url, $title]);
@@ -881,7 +1070,7 @@ sub _cmd_rss {
# address block.
sub _cmd_signature {
my ($self) = @_;
- my $output = $self->_border();
+ my $output = $self->_border_end();
if ($self->{file} eq '-') {
$output .= "</body>\n</html>\n";
return (1, $output);
@@ -900,7 +1089,7 @@ sub _cmd_signature {
# file. We could use Number::Format here, but what we're doing is simple
# enough and doesn't seem worth the trouble of another dependency.
sub _cmd_size {
- my ($self, $format, $file) = @_;
+ my ($self, $file) = @_;
$file = $self->_parse($file);
unless ($file) {
$self->_warning("empty file name in \\size");
@@ -928,7 +1117,7 @@ sub _cmd_sitemap {
$self->_warning("no sitemap file found");
return (1, '');
}
- return (1, $self->_border() . $self->{sitemap}->sitemap());
+ return (1, $self->_border_end() . $self->{sitemap}->sitemap());
}
# Start a table. Takes any additional HTML attributes to set for the table
@@ -946,9 +1135,8 @@ sub _cmd_tablehead {
my ($self, $format, @cells) = @_;
my $output = ' <tr' . $self->_format_string($format) . ">\n";
for (@cells) {
- $output .= ' ';
- $output .= $self->_enclose('th', $self->_parse ($_) . $self->_border());
- $output .= "\n";
+ my $text = $self->_parse($_) . $self->_border_end();
+ $output .= (q{ } x 4) . $self->_enclose('th', $text) . "\n";
}
$output .= " </tr>\n";
return (1, $output);
@@ -959,9 +1147,8 @@ sub _cmd_tablerow {
my ($self, $format, @cells) = @_;
my $output = ' <tr' . $self->_format_string($format) . ">\n";
for (@cells) {
- $output .= ' ';
- $output .= $self->_enclose('td', $self->_parse($_) . $self->_border());
- $output .= "\n";
+ my $text = $self->_parse($_) . $self->_border_end();
+ $output .= (q{ } x 4) . $self->_enclose('td', $text) . "\n";
}
$output .= " </tr>\n";
return (1, $output);
@@ -970,7 +1157,7 @@ sub _cmd_tablerow {
# Given the name of a package, return the version number of its latest
# release.
sub _cmd_version {
- my ($self, undef, $package) = @_;
+ my ($self, $package) = @_;
if (!$self->{versions}) {
$self->_warning("no package version information available");
return (0, q{});
@@ -1001,13 +1188,13 @@ sub _spin {
# Initialize object state for a new document.
$self->{files} = [[$in_fh, $in_path]];
$self->{id} = undef;
- $self->{macros} = {};
+ $self->{macro} = {};
$self->{out_fh} = $out_fh;
$self->{out_path} = $out_path;
$self->{rss} = [];
$self->{space} = q{};
$self->{state} = ['BLOCK'];
- $self->{strings} = {};
+ $self->{variable} = {};
# Parse the thread file a paragraph at a time (but pick up macro contents
# that are continued across paragraphs.
@@ -1033,7 +1220,7 @@ sub _spin {
$close_count += ($extra =~ tr{\]}{});
$para .= $extra;
}
- my $result = $self->_parse($self->_escape($para), 1);
+ my $result = $self->_parse(_escape($para), 1);
$result =~ s{ \A (?:\s*\n)+ }{}xms;
if ($result !~ m{ \A \s* \z }xms) {
$self->_output($result);
@@ -1044,7 +1231,7 @@ sub _spin {
}
# Close open tags and print any deferred whitespace.
- _print_fh($out_fh, $out_path, $self->_border_clear(), $self->{space});
+ _print_fh($out_fh, $out_path, $self->_block_end(), $self->{space});
}
##############################################################################
diff --git a/lib/App/DocKnot/Spin/Sitemap.pm b/lib/App/DocKnot/Spin/Sitemap.pm
index 4e266ee..15654be 100644
--- a/lib/App/DocKnot/Spin/Sitemap.pm
+++ b/lib/App/DocKnot/Spin/Sitemap.pm
@@ -111,7 +111,7 @@ sub _read_data {
#
# $desc - The string to escape
#
-# Returns: $desc escaped so that it's safe to interpolate into an attribute.
+# Returns: $desc escaped so that it's safe to interpolate into an attribute
sub _escape {
my ($desc) = @_;
$desc =~ s{ & }{&amp;}xmsg;
@@ -219,7 +219,7 @@ sub new {
#
# $path - Path to the output, relative to the top of the web site
#
-# Returns: List of lines to add to the <head> section.
+# Returns: List of lines to add to the <head> section
sub links {
my ($self, $path) = @_;
my @links = $self->_page_links($path);
@@ -260,7 +260,7 @@ sub links {
#
# $path - Path to the output, relative to the top of the web site
#
-# Returns: List of lines that create the navbar.
+# Returns: List of lines that create the navbar
sub navbar {
my ($self, $path) = @_;
my ($prev, $next, @parents) = $self->_page_links($path);
@@ -309,7 +309,7 @@ sub navbar {
# Return the sitemap formatted as HTML. The resulting HTML will only be valid
# from a page at the top of the output tree due to the relative links.
#
-# Returns: List of lines presenting the sitemap in HTML.
+# Returns: List of lines presenting the sitemap in HTML
sub sitemap {
my ($self) = @_;
my @output;
diff --git a/t/data/generate/docknot/output/thread b/t/data/generate/docknot/output/thread
index 1058b92..b5dfbe8 100644
--- a/t/data/generate/docknot/output/thread
+++ b/t/data/generate/docknot/output/thread
@@ -118,7 +118,6 @@ The following additional Perl modules are required to use it:
\bullet(packed)[Perl6::Slurp]
\bullet(packed)[Pod::Thread 2.00 or later]
\bullet(packed)[Template (part of Template Toolkit)]
-\bullet(packed)[Text::Balanced]
\bullet(packed)[YAML::XS 0.81 or later]
\h2[Download]