with the same class. # # $text - Text to wrap # # Returns: Text wrapped in
tags
sub _paragraph {
my ($self, $text) = @_;
# Trim leading newline and whitespace and ensure the paragraph ends with a
# newline.
$text =~ s{ \A \n (\s*\n)* }{}xms;
$text =~ s{ ( \S [ \t]* ) \z }{$1\n}xms;
# If the whole paragraph is wrapped in , lift its attributes into
# the tag. Otherwise, just add the tags. This unfortunately
# means we also won't lift for any paragraph with nexted \class
# commands; doing that would require more HTML parsing than I want to do.
my $re = qr{
\A # start of paragraph
(\s*) # any whitespace (1)
]*)> # span tag before any text with class (2)
(?! .* # close span tag
(\s*) # any whitespace (4)
\z # end of paragraph without other text
}xms;
if ($text =~ $re) {
my ($lead, $attrs, $body, $trail) = ($1, $2, $3, $4);
return "$lead $body \n}xms;
$text =~ s{ (\n\s*) \z }{\n
#
# Returns: List with the following elements:
# $output - Output from expanding the command
# $block - Whether the output is block context
# $text - Remaining unparsed text
sub _expand {
my ($self, $command, $text, $block) = @_;
# Special handling for expanding variables. These references look like
# \=NAME and expand to the value of the variable "NAME".
if ($command =~ m{ \A = \w }xms) {
my $variable = substr($command, 1);
if (exists($self->{variable}{$variable})) {
return ($self->{variable}{$variable}, 0, $text);
} else {
$self->_warning("unknown variable \\=$variable");
return (q{}, 0, $text);
}
}
# Special handling for macros. Macros shadow commands of the same name.
if (exists($self->{macro}{$command})) {
my ($args, $definition) = $self->{macro}{$command}->@*;
# Extract the macro arguments, if any were requested.
my @args;
if ($args != 0) {
($text, @args) = $self->_extract($text, $args, 0);
}
# The macro runs in a block context if we're currently in block
# context and there is no remaining non-whitespace text. Otherwise,
# use an inline context.
$block &&= $text =~ m{ \A \s* \z }xms;
# Expand the macro.
my ($result, $blocktag) = $self->_macro($definition, $block, @args);
# We have now double-counted all of the lines in the macro body
# itself, so we need to subtract the line count in the macro
# definition from the line number.
#
# This unfortunately means that the line number of errors that happen
# inside macro arguments will be somewhat off if the macro definition
# itself contains newlines. I don't see a way to avoid that without
# much more complex parsing and state tracking.
$self->{input}[-1][2] -= $definition =~ tr{\n}{};
# Return the macro results.
return ($result, $blocktag, $text);
}
# The normal command-handling case. Ensure it is a valid command.
if (!ref($COMMANDS{$command})) {
$self->_warning("unknown command or macro \\$command");
return (q{}, 1, $text);
}
# Dispatch the command to its handler.
my ($args, $handler, $want_format) = $COMMANDS{$command}->@*;
if ($want_format) {
my ($format, $rest, @args) = $self->_extract($text, $args, 1);
my ($blocktag, $output) = $self->$handler($format, @args);
return ($output, $blocktag, $rest);
} else {
my ($rest, @args) = $self->_extract($text, $args);
my ($blocktag, $output) = $self->$handler(@args);
return ($output, $blocktag, $rest);
}
}
# This is the heart of the input parser. Take a string of raw input, expand
# the commands in it, and format the results as HTML. This function is
# mutually recursive with _expand and _macro.
#
# This function is responsible for maintaining the line number in the file
# currently being processed, for error reporting. The strategy used is to
# increment the line number whenever a newline is seen in processed text.
# This means that newlines are not seen until the text containing them is
# parsed, which in turn means that every argument that may contain a newline
# must be parsed or must update the line number.
#
# $text - Input text to parse
# $block - True if the parse is done in a block context
#
# Returns: List of the following values:
# $output - HTML output corresponding to $text
# $block - Whether the result is suitable for block level
#
## no critic (Subroutines::ProhibitExcessComplexity)
sub _parse_context {
my ($self, $text, $block) = @_;
# Check if there are any commands in the input. If not, we have a
# paragraph of regular text.
if (index($text, q{\\}) == -1) {
my $output = $text;
# Update the line number.
$self->{input}[-1][2] += $text =~ tr{\n}{};
# If we are at block context, we need to make the text into a block
# element, which means wrapping it in tags. Since that is a
# top-level block construct, also close any open block structure.
if ($block) {
$output = $self->_border_end() . $self->_paragraph($output);
}
# Return the result.
return ($output, $block);
}
# The output seen so far.
my $output = q{};
# Output required to close any open block-level constructs that we saw
# prior to the text we're currently parsing.
my $border = q{};
# Output with inline context that needs to be wrapped in tags.
my $paragraph = q{};
# Leading whitespace that should be added to a created paragraph. This is
# only non-empty if $paragraph is empty.
my $space = q{};
# Whether we saw a construct not suitable for block level.
my $nonblock = 0;
# We have at least one command. Parse the text into sections of regular
# text and commands, expand the commands, and glue the results together as
# HTML.
#
# If we are at block level, we have to distinguish between plain text and
# inline commands, which have to be wrapped in paragraph tags, and
# block-level commands, which shouldn't be.
while ($text ne q{}) {
my ($string, $command);
# Extract text before the next command, or a command name (but none of
# its arguments). I think it's impossible for this regex to fail to
# match as long as $text is non-empty, but do error handling just in
# case.
if ($text =~ s{ \A ( [^\\]+ | \\ ([\w=]+ | .) ) }{}xms) {
($string, $command) = ($1, $2);
} else {
my $context = substr($text, 0, 20);
$context =~ s{ \n .* }{}xms;
$self->_fatal(qq(unable to parse near "$context"));
}
# Update the line number.
$self->{input}[-1][2] += $string =~ tr{\n}{};
# If this is not a command, and we're not at the block level, just add
# it verbatim to the output.
#
# if we are at the block level, pull off any leading space. If there
# is still remaining text, add it plus any accumulated whitespace to a
# new paragraph.
if (index($string, q{\\}) == -1) {
if ($block) {
if ($string =~ s{ \A (\s+) }{}xms) {
$space .= $1;
}
if ($paragraph ne q{} || $string ne q{}) {
if ($paragraph eq q{}) {
$border = $self->_border_end();
}
$paragraph .= $space . $string;
$space = q{};
}
} else {
$output .= $string;
$nonblock = 1;
}
}
# Otherwise, we have a command. Expand that command, setting block
# context if we haven't seen any inline content so far.
else {
my ($result, $blocktag);
($result, $blocktag, $text)
= $self->_expand($command, $text, $block && $paragraph eq q{});
# If the result requires block context, output any pending
# paragraph and then the result. Otherwise, if we are already at
# block context, start a new paragraph. Otherwise, just append
# the result to our output.
if ($blocktag) {
if ($block && $paragraph ne q{}) {
$output .= $border . $self->_paragraph($paragraph);
$border = q{};
$paragraph = q{};
} else {
$output .= $space;
}
$output .= $result;
} elsif ($block) {
if ($paragraph eq q{}) {
$border = $self->_border_end();
}
$paragraph .= $space . $result;
$nonblock = 1;
} else {
$output .= $result;
$nonblock = 1;
}
$space = q{};
}
# If the next bit of unparsed text starts with a newline, extract it
# and any following whitespace now.
if ($text =~ s{ \A \n (\s*) }{}xms) {
my $spaces = $1;
# Update the line number.
$self->{input}[-1][2] += 1 + $spaces =~ tr{\n}{};
# Add it to our paragraph if we're accumulating one; otherwise,
# add it to the output, but only add the newline if we saw inline
# elements or there is remaining text. This suppresses some
# useless black lines.
if ($paragraph ne q{}) {
$paragraph .= "\n$spaces";
} else {
if ($text ne q{} || $nonblock) {
$output .= "\n";
}
$output .= $spaces;
}
}
}
# If there is any remaining paragraph text, wrap it in tags and append it
# to the output. If we were at block level, our output is always suitable
# for block level. Otherwise, it's suitable for block level only if all
# of our output was from block commands.
if ($paragraph ne q{}) {
$output .= $border . $self->_paragraph($paragraph);
}
return ($output, $block || !$nonblock);
}
## use critic
# A wrapper around parse_context for callers who don't care about the block
# level of the results.
#
# $text - Input text to parse
# $block - True if the parse is done in a block context
#
# Returns: HTML output corresponding to $text
sub _parse {
my ($self, $text, $block) = @_;
my ($output) = $self->_parse_context($text, $block);
return $output;
}
# The top-level function for parsing a thread document. Be aware that the
# working directory from which this function is run matters a great deal,
# since thread may contain relative paths to files that the spinning process
# needs to access.
#
# $thread - Thread to spin
# $in_path - Input file path as a Path::Tiny object, or undef
# $out_fh - Output file handle to which to write the HTML
# $out_path - Output file path as a Path::Tiny object, or undef
# $input_type - Optional one-word description of input type
sub _parse_document {
my ($self, $thread, $in_path, $out_fh, $out_path, $input_type) = @_;
# Parse the thread into paragraphs and reverse them to form a stack.
my @input = reverse($self->_split_paragraphs($thread));
# Initialize object state for a new document.
#<<<
$self->{input} = [[\@input, $in_path, 1]];
$self->{input_type} = $input_type // 'thread';
$self->{macro} = {};
$self->{out_fh} = $out_fh;
$self->{out_path} = $out_path;
$self->{rss} = [];
$self->{space} = q{};
$self->{state} = ['BLOCK'];
$self->{variable} = {};
#>>>
# Parse the thread file a paragraph at a time. _split_paragraphs takes
# care of ensuring that each paragraph contains the complete value of a
# command argument.
#
# The stack of parsed input is maintained in $self->{input} and the file
# being parsed at any given point is $self->{input}[-1]. _cmd_include
# will push new file information into this stack, and we pop off the top
# element of the stack when we exhaust its paragraphs.
while ($self->{input}->@*) {
while (defined(my $para = pop($self->{input}[-1][0]->@*))) {
my $result = $self->_parse(_escape($para), 1);
$result =~ s{ \A (?:\s*\n)+ }{}xms;
if ($result !~ m{ \A \s* \z }xms) {
$self->_output($result);
}
}
pop($self->{input}->@*);
}
# Close open tags and print any deferred whitespace.
print_fh($out_fh, $out_path, $self->_block_end(), $self->{space});
return;
}
##############################################################################
# Supporting functions
##############################################################################
# Generate the format attributes for an HTML tag.
#
# $format - Format argument to the command
#
# Returns: String suitable for interpolating into the tag, which means it
# starts with a space if non-empty
sub _format_attr {
my ($self, $format) = @_;
return q{} if !$format;
# Formats starting with # become id tags. Otherwise, it is a class.
if ($format =~ s{ \A \# }{}xms) {
if ($format =~ m{ \s }xms) {
$self->_warning(qq(space in anchor "#$format"));
}
return qq{ id="$format"};
} else {
return qq{ class="$format"};
}
}
# Split a block of text apart at paired newlines so that it can be reparsed as
# paragraphs, but combine a paragraph with the next one if it has an
# unbalanced number of open brackets. Used to parse the top-level structure
# of a file and by containiners like \block that can contain multiple
# paragraphs.
#
# $text - Text to split
#
# Returns: List of paragraphs
sub _split_paragraphs {
my ($self, $text) = @_;
my @paragraphs;
# Collapse any consecutive newlines at the start to a single newline.
$text =~ s{ \A \n (\s*\n)+ }{\n}xms;
# Pull paragraphs off the text one by one.
while ($text ne q{} && $text =~ s{ \A ( .*? (?: \n\n+ | \s*\z ) )}{}xms) {
my $para = $1;
my $open_count = ($para =~ tr{\[}{});
my $close_count = ($para =~ tr{\]}{});
while ($text ne q{} && $open_count > $close_count) {
if ($text =~ s{ \A ( .*? (?: \n\n+ | \s*\z ) )}{}xms) {
my $extra = $1;
$open_count += ($extra =~ tr{\[}{});
$close_count += ($extra =~ tr{\]}{});
$para .= $extra;
} else {
# This should be impossible.
break;
}
}
push(@paragraphs, $para);
}
# Return the paragraphs.
return @paragraphs;
}
# A simple block element. Handles splitting the argument on paragraph
# boundaries and surrounding things properly with the tag.
#
# $tag - Name of the tag
# $border - Initial string to output before the block
# $format - Format string for the block
# $text - Contents of the block
#
# Returns: Block context, output
sub _block {
my ($self, $tag, $border, $format, $text) = @_;
my $output = $border . "<$tag" . $self->_format_attr($format) . '>';
$self->_block_start();
# If the format is packed, the contents of the block should be treated as
# inline rather than block and not surrounded by . This is how compact
# bullet or number lists are done. Otherwise, parse each containing
# paragraph separately in block context.
if ($format eq 'packed') {
$output .= $self->_parse($text, 0);
} else {
my @paragraphs = $self->_split_paragraphs($text);
$output .= join(q{}, map { $self->_parse($_, 1) } @paragraphs);
}
$output .= $self->_block_end();
# Close the tag. The tag may have contained attributes, which aren't
# allowed in the closing tag.
$tag =~ s{ [ ] .* }{}xms;
$output =~ s{ \s* \z }{$tag>}xms;
if ($format ne 'packed') {
$output .= "\n";
}
return (1, $output);
}
# A simple inline element.
#
# $tag - Name of the tag
# $format - Format string
# $text - Contents of the element
#
# Returns: Inline context, output
sub _inline {
my ($self, $tag, $format, $text) = @_;
my $output = "<$tag" . $self->_format_attr($format) . '>';
$output .= $self->_parse($text) . "$tag>";
return (0, $output);
}
# A heading. Handles formats of #something specially by adding an
# tag inside the heading tag to make it a valid target for internal links even
# in old browsers.
#
# $level - Level of the heading
# $format - Format string
# $text - Content of the heading
#
# Returns: Block context, output
sub _heading {
my ($self, $level, $format, $text) = @_;
my $output = $self->_border_end();
$text = $self->_parse($text);
# Special handling for anchors in the format string.
if ($format =~ m{ \A \# }xms) {
my $tag = $format;
$tag =~ s{ \A \# }{}xms;
$text = qq{$text};
}
# Build the output.
$output .= " tag inside the
# }xmsg;
}
# If there was a format, apply it to every tag in the quote.
if ($format) {
my $format_attr = $self->_format_attr($format);
$quote =~ s{ }{ }xmsg;
}
# Done with the quote.
$output .= $quote;
# Format the author and citation.
if ($author) {
my $prefix = q{};
if ($format eq 'broken' || $format eq 'short') {
$output .= qq{ \n};
} else {
$output .= qq{ \n};
$prefix = '— ';
}
if ($cite) {
$output .= " $prefix$author,\n $cite\n";
} else {
$output .= " $prefix$author\n";
}
$output .= '
') }
sub _cmd_bold { my ($self, @a) = @_; return $self->_inline('b', @a) }
sub _cmd_cite { my ($self, @a) = @_; return $self->_inline('cite', @a) }
sub _cmd_class { my ($self, @a) = @_; return $self->_inline('span', @a) }
sub _cmd_code { my ($self, @a) = @_; return $self->_inline('code', @a) }
sub _cmd_emph { my ($self, @a) = @_; return $self->_inline('em', @a) }
sub _cmd_italic { my ($self, @a) = @_; return $self->_inline('i', @a) }
sub _cmd_strike { my ($self, @a) = @_; return $self->_inline('strike', @a) }
sub _cmd_strong { my ($self, @a) = @_; return $self->_inline('strong', @a) }
sub _cmd_sub { my ($self, @a) = @_; return $self->_inline('sub', @a) }
sub _cmd_sup { my ($self, @a) = @_; return $self->_inline('sup', @a) }
sub _cmd_under { my ($self, @a) = @_; return $self->_inline('u', @a) }
#>>>
# The headings.
sub _cmd_h1 { my ($self, @a) = @_; return $self->_heading(1, @a); }
sub _cmd_h2 { my ($self, @a) = @_; return $self->_heading(2, @a); }
sub _cmd_h3 { my ($self, @a) = @_; return $self->_heading(3, @a); }
sub _cmd_h4 { my ($self, @a) = @_; return $self->_heading(4, @a); }
sub _cmd_h5 { my ($self, @a) = @_; return $self->_heading(5, @a); }
sub _cmd_h6 { my ($self, @a) = @_; return $self->_heading(6, @a); }
# A horizontal rule.
sub _cmd_rule {
my ($self) = @_;
return (1, $self->_border_end() . "
\n");
}
# Simple block commands.
sub _cmd_div {
my ($self, $format, $text) = @_;
return $self->_block('div', q{}, $format, $text);
}
sub _cmd_block {
my ($self, $format, $text) = @_;
return $self->_block('blockquote', q{}, $format, $text);
}
sub _cmd_bullet {
my ($self, $format, $text) = @_;
my $border = $self->_border_start('bullet', "\n", "
\n\n");
return $self->_block('li', $border, $format, $text);
}
sub _cmd_number {
my ($self, $format, $text) = @_;
my $border = $self->_border_start('number', "\n", "
\n\n");
return $self->_block('li', $border, $format, $text);
}
# A description list entry.
#
# $format - Format string
# $heading - Initial heading
# $text - Body text
sub _cmd_desc {
my ($self, $format, $heading, $text) = @_;
$heading = $self->_parse($heading);
my $format_attr = $self->_format_attr($format);
my $border = $self->_border_start('desc', "\n", "
\n\n");
my $initial = $border . "_format_attr($format) . ' />';
return (1, $output);
}
# Include a file. Note that this includes a file after the current paragraph,
# not immediately, which may be a bit surprising.
sub _cmd_include {
my ($self, $file) = @_;
$file = $self->_file_path($self->_parse($file));
# Read the thread, split it on paragraphs, and reverse it to make a stack.
my $thread = $self->_read_file($file);
my @paragraphs = reverse($self->_split_paragraphs($thread));
# Add it to the file stack.
push($self->{input}->@*, [\@paragraphs, $file, 1]);
# Expand into empty output.
return (1, q{});
}
# A link to a URL or partial URL.
#
# $format - Format string
# $url - Target URL
# $text - Anchor text
sub _cmd_link {
my ($self, $format, $url, $text) = @_;
$url = $self->_parse($url);
$text = $self->_parse($text);
my $format_attr = $self->_format_attr($format);
return (0, qq{$text});
}
# Preformatted text. This does not use _block because we don't want to split
# the contained text into paragraphs and we want to parse it all in inline
# context always.
sub _cmd_pre {
my ($self, $format, $text) = @_;
my $output = $self->_border_end();
$output .= '
_format_attr($format) . '>';
$output .= $self->_parse($text);
$output .= "
\n";
return (1, $output);
}
# Used for the leading quotes that I have on many of my pages. If the format
# is "broken", adds line breaks at the end of each line.
#
# $format - Format string, used as the format for the main . Values broken and short trigger special handling,
# such as adding line breaks or changing the attribution class.
# $quote - Text of the quote
# $author - Author of the quote
# $cite - Attribution of the quote
sub _cmd_quote {
my ($self, $format, $quote, $author, $cite) = @_;
$author = $self->_parse($author);
$cite = $self->_parse($cite);
my $output = $self->_border_end() . q{
};
# Parse the contents of the quote in a new block context.
$self->_block_start();
my @paragraphs = $self->_split_paragraphs($quote);
$quote = join(q{}, map { $self->_parse($_, 1) } @paragraphs);
$quote .= $self->_block_end();
# Remove trailing newlines.
$quote =~ s{ \n+ \z }{}xms;
# If this is a broken quote, add line breaks to each line.
if ($format eq 'broken') {
$quote =~ s{ ( \S [ ]* ) ( \n\s* (?!)\S )}{$1
\n";
return (1, $output);
}
# Given the name of a product, return the release date of the product.
sub _cmd_release {
my ($self, $package) = @_;
$package = $self->_parse($package);
if (!$self->{versions}) {
$self->_warning('no package release information available');
return (0, q{});
}
my $date = $self->{versions}->release_date($package);
if (!defined($date)) {
$self->_warning(qq(no release date known for "$package"));
return (0, q{});
}
return (0, $date);
}
# Used to save RSS feed information for the page. Doesn't output anything
# directly; the RSS feed information is used later in _cmd_heading.
sub _cmd_rss {
my ($self, $url, $title) = @_;
$url = $self->_parse($url);
$title = $self->_parse($title);
push($self->{rss}->@*, [$url, $title]);
return (1, q{});
}
# Used to end each page, this adds the navigation links and my standard
# address block.
sub _cmd_signature {
my ($self) = @_;
my $input_path = $self->{input}[-1][1];
my $output = $self->_border_end();
# If we're spinning from standard input to standard output, don't add any
# of the standard footer, just close the HTML tags.
if (!defined($input_path) && !defined($self->{out_path})) {
$output .= "\n\n";
return (1, $output);
}
# Add the end-of-page navbar if we have sitemap information.
if ($self->{sitemap} && $self->{output}) {
my $page = $self->{out_path}->relative($self->{output});
$output .= join(q{}, $self->{sitemap}->navbar($page)) . "\n";
}
# Figure out the modification dates. Use the Git repository if available.
my $now = strftime('%Y-%m-%d', gmtime());
my $modified = $now;
if (defined($input_path)) {
$modified = strftime('%Y-%m-%d', gmtime($input_path->stat()->[9]));
}
if ($self->{repository} && $self->{source}) {
if (path($self->{source})->subsumes($input_path)) {
my $repository = $self->{repository};
$modified = $self->{repository}->run(
'log', '-1', '--format=%ct', "$input_path",
);
if ($modified) {
$modified = strftime('%Y-%m-%d', gmtime($modified));
}
}
}
# Determine which template to use and substitute in the appropriate times.
$output .= "\n";
my $link = qq{spun};
if ($modified eq $now) {
$output .= " Last modified and\n $link $modified\n";
} else {
$output .= " Last $link\n";
$output .= " $now from $self->{input_type} modified $modified\n";
}
# Close out the document.
$output .= "\n\n
$2}xmsg;
# Remove
tags for blank lines or at the start.
$quote =~ s{ \n
}{\n}xmsg;
$quote =~ s{
}{