Support spinning a scalar of thread

This should be the last major API change for now. Get rid of spin_fh and change the interface to App::DocKnot::Spin::Thread to spin_thread (which takes a scalar) and spin_thread_file (which takes an input file and optional output file).
author: Russ Allbery <rra@cpan.org> 2021-09-09 17:32:06 -0700
committer: Russ Allbery <rra@cpan.org> 2021-09-09 17:32:06 -0700
commit: a2dcb67d1dd33d97bc7cf7145e231612bd716432 (patch)
tree: faae99cb2ab69bb628e54a4d89aea9108a2912df
parent: f43e7f41e58cb096d0f275fb8ae6940d359d699e (diff)
8 files changed, 181 insertions, 112 deletions
diff --git a/lib/App/DocKnot/Command.pm b/lib/App/DocKnot/Command.pm
index 6d3adbc..50da739 100644
--- a/lib/App/DocKnot/Command.pm
+++ b/lib/App/DocKnot/Command.pm
@@ -84,7 +84,7 @@ our %COMMANDS = (
         maximum => 2,
     },
     'spin-thread' => {
-        method  => 'spin_file',
+        method  => 'spin_thread_file',
         module  => 'App::DocKnot::Spin::Thread',
         options => ['style-url|s=s'],
         maximum => 2,
diff --git a/lib/App/DocKnot/Spin.pm b/lib/App/DocKnot/Spin.pm
index 48fc398..2290e31 100644
--- a/lib/App/DocKnot/Spin.pm
+++ b/lib/App/DocKnot/Spin.pm
@@ -297,13 +297,8 @@ sub _pod2html {
     $podthread->output_string(\$data);
     $podthread->parse_file($source);
 
-    # Run that through spin to convert to HTML.
-    my $page;
-    open(my $in_fh,  '<', \$data);
-    open(my $out_fh, '>', \$page);
-    $self->{thread}->spin_fh($in_fh, q{-}, $out_fh, q{-});
-    close($in_fh);
-    close($out_fh);
+    # Spin that thread into HTML.
+    my $page = $self->{thread}->spin_thread($data);
 
     # Push the result through _write_converter_output.
     my $file = $source;
@@ -426,11 +421,7 @@ sub _process_file {
             return if (-M $file >= -M $output && (stat($output))[9] >= $time);
         }
         _print_checked("Spinning $shortout\n");
-        open(my $in_fh,  '<', $input);
-        open(my $out_fh, '>', $output);
-        $self->{thread}->spin_fh($in_fh, $input, $out_fh, $output);
-        close($in_fh);
-        close($out_fh);
+        $self->{thread}->spin_thread_file($input, $output);
     } else {
         my ($extension) = ($file =~ m{ [.] ([^.]+) \z }xms);
         if (defined($extension) && $rules{$extension}) {
diff --git a/lib/App/DocKnot/Spin/Thread.pm b/lib/App/DocKnot/Spin/Thread.pm
index d7cca5e..f9831c5 100644
--- a/lib/App/DocKnot/Spin/Thread.pm
+++ b/lib/App/DocKnot/Spin/Thread.pm
@@ -85,16 +85,11 @@ my %COMMANDS = (
 # Input and output
 ##############################################################################
 
-# Read a file and return its data in a form suitable for the processing stack.
+# Read a file and check it for bad line endings.
 #
-# $fh   - Input file handle
-# $path - Input path
+# $path - File path
 #
-# Returns: List suitable for the processing stack with the following elements:
-#            $paragraphs_ref - The input text split into paragraphs and
-#                              reversed so that it can be used as a stack
-#            $in_path        - Path to the input file
-#            $lineno         - Current processing line number
+# Returns: Contents of the file
 sub _read_file {
     my ($self, $fh, $path) = @_;
     my $text = slurp($fh);
@@ -105,9 +100,8 @@ sub _read_file {
         $self->_warning($m);
     }
 
-    # Parse the text into paragraphs and return the data for the files stack.
-    my @paragraphs = reverse($self->_split_paragraphs($text));
-    return [\@paragraphs, $path, 1];
+    # Return the contents.
+    return $text;
 }
 
 # print with error checking and an explicit file handle.  autodie
@@ -637,6 +631,56 @@ sub _parse {
     return $output;
 }
 
+# The top-level function for parsing a thread document.  Be aware that the
+# working directory from which this function is run matters a great deal,
+# since thread may contain relative paths to files that the spinning process
+# needs to access.
+#
+# $thread   - Thread to spin
+# $in_path  - Input file path if any, used for error reporting
+# $out_fh   - Output file handle to which to write the HTML
+# $out_path - Optional output file path for error reporting and page links
+sub _parse_document {
+    my ($self, $thread, $in_path, $out_fh, $out_path) = @_;
+
+    # Parse the thread into paragraphs and reverse them to form a stack.
+    my @input = reverse($self->_split_paragraphs($thread));
+
+    # Initialize object state for a new document.
+    $self->{input}    = [[\@input, $in_path, 1]];
+    $self->{id}       = undef;
+    $self->{macro}    = {};
+    $self->{out_fh}   = $out_fh;
+    $self->{out_path} = $out_path // q{-};
+    $self->{rss}      = [];
+    $self->{space}    = q{};
+    $self->{state}    = ['BLOCK'];
+    $self->{variable} = {};
+
+    # Parse the thread file a paragraph at a time.  _split_paragraphs takes
+    # care of ensuring that each paragraph contains the complete value of a
+    # command argument.
+    #
+    # The stack of parsed input is maintained in $self->{input} and the file
+    # being parsed at any given point is $self->{input}[-1].  _cmd_include
+    # will push new file information into this stack, and we pop off the top
+    # element of the stack when we exhaust its paragraphs.
+    while ($self->{input}->@*) {
+        while (defined(my $para = pop($self->{input}[-1][0]->@*))) {
+            my $result = $self->_parse(_escape($para), 1);
+            $result =~ s{ \A (?:\s*\n)+ }{}xms;
+            if ($result !~ m{ \A \s* \z }xms) {
+                $self->_output($result);
+            }
+        }
+        pop($self->{input}->@*);
+    }
+
+    # Close open tags and print any deferred whitespace.
+    _print_fh($out_fh, $out_path, $self->_block_end(), $self->{space});
+    return;
+}
+
 ##############################################################################
 # Supporting functions
 ##############################################################################
@@ -1121,9 +1165,15 @@ sub _cmd_image {
 sub _cmd_include {
     my ($self, $file) = @_;
     $file = realpath($self->_parse($file));
-    open(my $fh, '<', $file);
-    push($self->{input}->@*, $self->_read_file($fh, $file));
-    close($fh);
+
+    # Read the thread, split it on paragraphs, and reverse it to make a stack.
+    my $thread     = $self->_read_file($file);
+    my @paragraphs = reverse($self->_split_paragraphs($thread));
+
+    # Add it to the file stack.
+    push($self->{input}->@*, [\@paragraphs, $file, 1]);
+
+    # Expand into empty output.
     return (1, q{});
 }
 
@@ -1398,55 +1448,19 @@ sub new {
     return $self;
 }
 
-# Convert thread to HTML from a file descriptor.  Be aware that the working
-# directory from which this function is run matters a great deal, since thread
-# may contain relative paths to files that the spinning process needs to
-# access.
+# Convert thread to HTML and return the output as a string.  The working
+# directory still matters for file references in the thread.
 #
-# $in_fh    - Input file handle of thread
-# $in_path  - Input file path, used for error reporting
-# $out_fh   - Output file handle to which to write the HTML
-# $out_path - Output file path, used for error reporting and page links
-sub spin_fh {
-    my ($self, $in_fh, $in_path, $out_fh, $out_path) = @_;
-
-    # Initialize object state for a new document.
-    $self->{input}    = [$self->_read_file($in_fh, $in_path)];
-    $self->{id}       = undef;
-    $self->{macro}    = {};
-    $self->{out_fh}   = $out_fh;
-    $self->{out_path} = $out_path;
-    $self->{rss}      = [];
-    $self->{space}    = q{};
-    $self->{state}    = ['BLOCK'];
-    $self->{variable} = {};
-
-    # Read the entirety of the input file, split into paragraphs, and add it
-    # to the processing stack.
-    push($self->{input}->@*, $self->_read_file($in_fh, $in_path));
-
-    # Parse the thread file a paragraph at a time.  _split_paragraphs takes
-    # care of ensuring that each paragraph contains the complete value of a
-    # command argument.
-    #
-    # The stack of parsed input is maintained in $self->{input} and the file
-    # being parsed at any given point is $self->{input}[-1].  _cmd_include
-    # will push new file information into this stack, and we pop off the top
-    # element of the stack when we exhaust its paragraphs.
-    while ($self->{input}->@*) {
-        while (defined(my $para = pop($self->{input}[-1][0]->@*))) {
-            my $result = $self->_parse(_escape($para), 1);
-            $result =~ s{ \A (?:\s*\n)+ }{}xms;
-            if ($result !~ m{ \A \s* \z }xms) {
-                $self->_output($result);
-            }
-        }
-        pop($self->{input}->@*);
-    }
-
-    # Close open tags and print any deferred whitespace.
-    _print_fh($out_fh, $out_path, $self->_block_end(), $self->{space});
-    return;
+# $thread  - Thread to spin
+#
+# Returns: Resulting HTML
+sub spin_thread {
+    my ($self, $thread) = @_;
+    my $result;
+    open(my $out_fh, '>', \$result);
+    $self->_parse_document($thread, q{-}, $out_fh, q{-});
+    close($out_fh);
+    return $result;
 }
 
 # Spin a single file of thread to HTML.
@@ -1455,31 +1469,30 @@ sub spin_fh {
 # $output - Output file (if not given, assumes standard output)
 #
 # Raises: Text exception on processing error
-sub spin_file {
+sub spin_thread_file {
     my ($self, $input, $output) = @_;
     my $cwd = getcwd() or die "cannot get current directory: $!\n";
-    my ($in_fh, $out_fh);
+    my $out_fh;
+    my $thread;
 
-    # When spinning a single file, the input file must not be a directory.  We
-    # do the work from the directory of the file to ensure that relative file
-    # references resolve properly.
+    # Read the input file.  We do the work from the directory of the file to
+    # ensure that relative file references resolve properly.
     if (defined($input)) {
-        $input = realpath($input) or die "cannot canonicalize $input: $!\n";
-        if (-d $input) {
-            die "input file $input must be a regular file\n";
-        }
-        open($in_fh, '<', $input);
+        my $path = realpath($input) or die "cannot canonicalize $input: $!\n";
+        $input  = $path;
+        $thread = slurp($input);
         my (undef, $input_dir) = fileparse($input);
         chdir($input_dir);
     } else {
-        $input = q{-};
-        open($in_fh, '<&', 'STDIN');
+        $input  = q{-};
+        $thread = slurp(\*STDIN);
     }
 
     # Open the output file.
     if (defined($output)) {
-        $output = realpath($output) or die "cannot canonicalize $output: $!\n";
-        $output =~ s{ /+ \z }{}xms;
+        my $path = realpath($output)
+          or die "cannot canonicalize $output: $!\n";
+        $output = $path;
         open($out_fh, '>', $output);
     } else {
         $output = q{-};
@@ -1487,10 +1500,9 @@ sub spin_file {
     }
 
     # Do the work.
-    $self->spin_fh($in_fh, $input, $out_fh, $output);
+    $self->_parse_document($thread, $input, $out_fh, $output);
 
     # Clean up and restore the working directory.
-    close($in_fh);
     close($out_fh);
     chdir($cwd);
     return;
@@ -1598,14 +1610,22 @@ data for the C<\release> and C<\version> commands.
 
 =over 4
 
-=item spin_file([INPUT[, OUTPUT]])
+=item spin_thread(THREAD)
+
+Convert the given thread to HTML, returning the result.  When run via this
+API, App::DocKnot::Spin::Thread will not be able to obtain sitemap information
+even if a sitemap was provided and therefore will not add inter-page links.
+
+=item spin_thread_file([INPUT[, OUTPUT]])
 
 Convert a single thread file to HTML.  INPUT is the path of the thread file
 and OUTPUT is the path of the output file.  OUTPUT or both INPUT and OUTPUT
 may be omitted, in which case standard input or standard output, respectively,
-will be used.  If OUTPUT is omitted, App::DocKnot::Spin::Thread will not be
-able to obtain sitemap information even if a sitemap was provided and
-therefore will not add inter-page links.
+will be used.
+
+If OUTPUT is omitted, App::DocKnot::Spin::Thread will not be able to obtain
+sitemap information even if a sitemap was provided and therefore will not add
+inter-page links.
 
 =back
 
diff --git a/t/data/perl.conf b/t/data/perl.conf
index ae60011..3c4adcc 100644
--- a/t/data/perl.conf
+++ b/t/data/perl.conf
@@ -10,9 +10,5 @@ $COVERAGE_LEVEL = 85;
 # Default minimum version requirement.
 $MINIMUM_VERSION = '5.024';
 
-# Additional regexes that match methods that should be considered private and
-# therefore don't have to be documented in POD coverage.
-@POD_COVERAGE_EXCLUDE = (qr{ \A spin_fh \z }xms);
-
 # File must end with this line.
 1;
diff --git a/t/data/regenerate-data b/t/data/regenerate-data
index 4aa8ba7..a7f148b 100755
--- a/t/data/regenerate-data
+++ b/t/data/regenerate-data
@@ -57,12 +57,7 @@ my $spin      = App::DocKnot::Spin::Thread->new();
 my $thread;
 $podthread->output_string(\$thread);
 $podthread->parse_file($source);
-my $html;
-open(my $in_fh,  '<', \$thread);
-open(my $out_fh, '>', \$html);
-$spin->spin_fh($in_fh, q{-}, $out_fh, q{-});
-close($in_fh);
-close($out_fh);
+my $html = $spin->spin_thread($thread);
 
 # Add the additional metadata that should be added by spin.
 my $links = <<'EOD';
diff --git a/t/spin/errors.t b/t/spin/errors.t
index a9069ae..61d1538 100755
--- a/t/spin/errors.t
+++ b/t/spin/errors.t
@@ -38,7 +38,7 @@ require_ok('App::DocKnot::Spin::Thread');
 my $input = File::Spec->catfile('t', 'data', 'spin', 'errors', 'errors.th');
 my $spin  = App::DocKnot::Spin::Thread->new();
 my ($stdout, $stderr) = capture {
-    $spin->spin_file($input);
+    $spin->spin_thread_file($input);
 };
 
 # Strip off the prefix and simplify the file name, and then check against the
diff --git a/t/spin/file.t b/t/spin/file.t
index ddc563e..354ec77 100755
--- a/t/spin/file.t
+++ b/t/spin/file.t
@@ -12,21 +12,35 @@ use warnings;
 
 use lib 't/lib';
 
+use Capture::Tiny qw(capture_stdout);
+use Cwd qw(getcwd);
+use Fcntl qw(SEEK_SET);
 use File::Spec;
 use File::Temp;
 use Perl6::Slurp qw(slurp);
 use Test::DocKnot::Spin qw(is_spin_output);
 
-use Test::More tests => 2;
+use Test::More tests => 3;
 
 require_ok('App::DocKnot::Spin::Thread');
 
 # Spin a single file.
 my $tempfile = File::Temp->new();
-my $datadir  = File::Spec->catfile('t',      'data',   'spin');
-my $input    = File::Spec->catfile($datadir, 'input',  'index.th');
-my $expected = File::Spec->catfile($datadir, 'output', 'index.html');
+my $datadir  = File::Spec->catfile('t',       'data', 'spin');
+my $inputdir = File::Spec->catfile($datadir,  'input');
+my $input    = File::Spec->catfile($inputdir, 'index.th');
+my $expected = File::Spec->catfile($datadir,  'output', 'index.html');
 my $spin
   = App::DocKnot::Spin::Thread->new({ 'style-url' => '/~eagle/styles/' });
-$spin->spin_file($input, $tempfile->filename);
-is_spin_output($tempfile, $expected, 'Single file conversion');
+$spin->spin_thread_file($input, $tempfile->filename);
+is_spin_output($tempfile, $expected, 'spin_thread_file with output path');
+
+# The same but spin to standard output.
+my $html = capture_stdout {
+    $spin->spin_thread_file($input);
+};
+$tempfile->seek(0, SEEK_SET);
+$tempfile->truncate(0);
+print {$tempfile} $html or die "Cannot write to $tempfile: $!\n";
+$tempfile->flush();
+is_spin_output($tempfile->filename, $expected, 'spin_thread_file to stdout');
diff --git a/t/spin/thread.t b/t/spin/thread.t
new file mode 100755
index 0000000..8373eba
--- /dev/null
+++ b/t/spin/thread.t
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+#
+# Test running spin on a scalar containing thread source.
+#
+# Copyright 2021 Russ Allbery <rra@cpan.org>
+#
+# SPDX-License-Identifier: MIT
+
+use 5.024;
+use autodie;
+use warnings;
+
+use lib 't/lib';
+
+use Cwd qw(getcwd);
+use File::Spec;
+use File::Temp;
+use Perl6::Slurp qw(slurp);
+use Test::DocKnot::Spin qw(is_spin_output);
+
+use Test::More tests => 2;
+
+require_ok('App::DocKnot::Spin::Thread');
+
+# Test data file paths.
+my $datadir  = File::Spec->catfile('t',       'data', 'spin');
+my $inputdir = File::Spec->catfile($datadir,  'input');
+my $input    = File::Spec->catfile($inputdir, 'index.th');
+my $expected = File::Spec->catfile($datadir,  'output', 'index.html');
+
+# The expected output is a bit different since we won't add timestamp
+# information or the filename to the comment, so we have to generate our
+# expected output file.
+my $tempfile = File::Temp->new();
+my $output   = slurp($expected);
+$output =~ s{ from [ ] index[.]th [ ] }{}xms;
+$output =~ s{ <address> .* </address> \n }{}xms;
+print {$tempfile} $output or die "Cannot write to $tempfile: $!\n";
+$tempfile->flush();
+
+# Spin the file using the spin_thread() API, using the right working directory
+# to expand \image and the like.
+my $spin
+  = App::DocKnot::Spin::Thread->new({ 'style-url' => '/~eagle/styles/' });
+my $thread = slurp($input);
+my $cwd    = getcwd();
+chdir($inputdir);
+my $html = $spin->spin_thread($thread);
+chdir($cwd);
+my $outfile = File::Temp->new();
+print {$outfile} $html or die "Cannot write to $outfile: $!\n";
+$outfile->flush();
+is_spin_output($outfile->filename, $tempfile->filename, 'spin_thread');
author	Russ Allbery <rra@cpan.org>	2021-09-09 17:32:06 -0700
committer	Russ Allbery <rra@cpan.org>	2021-09-09 17:32:06 -0700
commit	a2dcb67d1dd33d97bc7cf7145e231612bd716432 (patch)
tree	faae99cb2ab69bb628e54a4d89aea9108a2912df
parent	f43e7f41e58cb096d0f275fb8ae6940d359d699e (diff)