summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2006-12-28 02:20:09 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2006-12-28 02:20:09 +0000
commita1a30d69bd83e57e494cd3ca5077086077731998 (patch)
tree8193378eb51d13a20cf9f28ce479af431d3dcba4
parenta1539d9ab8141ab11add5b304792e7c9bc7a363a (diff)
+ Removed the convenience symlinks (which don't work on Windows under
Cygwin, due to Windows' lack of true symbolic links). + Modified the wrappers to use 'pandoc' instead of the symlinks. + Modified the Makefile to remove all references to the symlinks. + Removed code from Main.hs that made pandoc's behavior depend on the name of the calling program. + Added code to Main.hs that sets default reader and writer based on extensions of input and output filenames (if provided). (Thanks to roktas for the idea.) + Modified README and man pages accordingly. + Removed WINDOWS-README target from Makefile. It is no longer needed now that we don't have the symlinks. git-svn-id: https://pandoc.googlecode.com/svn/trunk@295 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--Makefile19
-rw-r--r--README139
-rw-r--r--man/man1/html2markdown.11
-rw-r--r--man/man1/latex2markdown.11
-rw-r--r--man/man1/markdown2html.11
-rw-r--r--man/man1/markdown2latex.11
-rw-r--r--man/man1/markdown2pdf.127
-rw-r--r--man/man1/markdown2rst.11
-rw-r--r--man/man1/markdown2rtf.11
-rw-r--r--man/man1/markdown2s5.11
-rw-r--r--man/man1/pandoc.150
-rw-r--r--man/man1/rst2markdown.11
-rw-r--r--man/man1/web2markdown.13
-rw-r--r--src/Main.hs157
-rw-r--r--src/wrappers/markdown2pdf.in11
-rw-r--r--src/wrappers/web2markdown.in15
16 files changed, 198 insertions, 231 deletions
diff --git a/Makefile b/Makefile
index 5ea1ae24d..e6e87cea2 100644
--- a/Makefile
+++ b/Makefile
@@ -26,8 +26,6 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:[[:space:]]*//p' $(CABAL).in)
# Install targets
#-------------------------------------------------------------------------------
WRAPPERS := web2markdown markdown2pdf
-SYMLINKS := markdown2html markdown2latex markdown2s5 markdown2rst \
- markdown2rtf html2markdown latex2markdown rst2markdown
# Add .exe extensions if we're running Windows/Cygwin.
EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \
sed -ne 's/^cygwin.*$$/\.exe/p')
@@ -96,12 +94,6 @@ all: build-program
templates: $(SRCDIR)/templates
$(MAKE) -C $(SRCDIR)/templates
-.PHONY: symlinks
-cleanup_files+=$(SYMLINKS)
-symlinks: $(SYMLINKS)
-$(SYMLINKS): $(MAIN)
- ln -sf ./$(MAIN) $@
-
define generate-shell-script
echo "Generating $@..."; \
awk ' \
@@ -141,7 +133,7 @@ build: configure
$(BUILDCMD) build
.PHONY: build-exec
-build-exec: $(PROGS) $(SYMLINKS)
+build-exec: $(PROGS)
cleanup_files+=$(EXECS)
$(EXECS): build
for f in $@; do \
@@ -201,9 +193,8 @@ install-exec: build-exec
fi; \
$(INSTALL_PROGRAM) $$f $(BINPATH)/; \
done
- cd $(BINPATH); for f in $(SYMLINKS); do ln -sf $(MAIN) $$f; done
uninstall-exec:
- -for f in $(notdir $(PROGS) $(SYMLINKS)); do rm -f $(BINPATH)/$$f; done ;
+ -for f in $(notdir $(PROGS)); do rm -f $(BINPATH)/$$f; done ;
# Program + user documents installation.
.PHONY: install-program uninstall-program
@@ -295,15 +286,11 @@ $(osx_dmg_name): $(osx_pkg_name)
.PHONY: win-pkg
win_pkg_name:=$(RELNAME).zip
-win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README-WINDOWS.txt README-WINDOWS.html
+win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README.txt README.html
cleanup_files+=$(win_pkg_name) $(win_docs)
win-pkg: $(win_pkg_name)
$(win_pkg_name): $(THIS).exe $(win_docs)
zip -r $(win_pkg_name) $(THIS).exe $(win_docs)
-cleanup_files+=README-WINDOWS
-README-WINDOWS: README
- sed -e '/^Requirements/,/^\[fancyvrb\]:/ d' \
- -e '/^Character encodings/,/mysite.com$$/ d' $< > $@
.PHONY: test test-markdown
test: $(MAIN)
diff --git a/README b/README
index 82537eb6a..6251cb58d 100644
--- a/README
+++ b/README
@@ -36,14 +36,11 @@ Requirements
============
The `pandoc` program itself does not depend on any external libraries
-or programs. The convenience programs `markdown2html`, `markdown2latex`,
-`markdown2rst`, `markdown2rtf`, `markdown2s5`, `html2markdown`,
-`latex2markdown`, and `rst2markdown` are implemented as symbolic links to
-`pandoc`.
+or programs.
The wrapper script `web2markdown` requires
- - `html2markdown` (included with Pandoc)
+ - `pandoc` (which must be in the PATH)
- a POSIX-compliant shell (installed by default on all linux and unix
systems, including Mac OS X, and in [Cygwin] for Windows),
- `HTML Tidy`
@@ -56,7 +53,7 @@ The wrapper script `web2markdown` requires
The wrapper script `markdown2pdf` requires
- - `markdown2latex` (included with Pandoc)
+ - `pandoc` (which must be in the PATH)
- a POSIX-compliant shell
- `pdflatex`, which should be part of any [LaTeX] distribution
- the [unicode] and [fancyvrb] LaTeX packages, which are included
@@ -80,47 +77,11 @@ Using Pandoc
If you run `pandoc` without arguments, it will accept input from
STDIN. If you run it with file names as arguments, it will take input
-from those files. It accepts several command-line options. For a
-list, type
-
- pandoc -h
-
-The most important options specify the format of the source file and
-the output. The default reader is markdown; the default writer is
-HTML. So if you don't specify a reader or writer, `pandoc` will
-convert markdown to HTML. For example,
-
- pandoc hello.txt
-
-will convert `hello.txt` from markdown to HTML. For other conversions,
-you must specify a reader and/or a writer using the `-r` and `-w`
-flags. To convert markdown to LaTeX, you would write:
-
- pandoc -w latex hello.txt
-
-To convert html to markdown:
-
- pandoc -r html -w markdown hello.txt
-
-Supported writers include `markdown`, `latex`, `html`, `rtf` (rich text
-format), `rst` (reStructuredText), and `s5` (which produces an HTML
-file that acts like powerpoint). Supported readers include `markdown`,
-`html`, `latex`, and `rst`. Note that the `rst` reader only parses
-a subset of reStructuredText syntax. For example, it doesn't handle
-tables, definition lists, option lists, or footnotes. It handles only the
-constructs expressible in unextended markdown. But for simple documents
-it should be adequate. The `latex` and `html` readers are also limited
-in what they can do. Because the `html` reader is picky about the HTML
-it parses, it is recommended that you pipe HTML through [HTML Tidy] before
-sending it to `pandoc`, or use the `web2markdown` script described below.
-
-By default, `pandoc` writes its output to STDOUT. If you want to
-write to a file, use the `-o` option or shell redirection:
+from those files. By default, `pandoc` writes its output to STDOUT.
+If you want to write to a file, use the `-o` option:
pandoc -o hello.html hello.txt
- pandoc hello.txt > hello.html
-
Note that you can specify multiple input files on the command line.
`pandoc` will concatenate them all (with blank lines between them)
before parsing:
@@ -131,6 +92,44 @@ before parsing:
with a proper header, rather than a fragment. For more details on this
and many other command-line options, see below.)
+The format of the input and output can be specified explicitly using
+command-line options. The input format can be specified using the
+`-r/--read` or `-f/--from` options, the output format using the
+`-w/--write` or `-t/--to` options. Thus, to convert `hello.txt` from
+markdown to LaTeX, you could type:
+
+ pandoc -f markdown -t latex hello.txt
+
+To convert `hello.html` from html to markdown:
+
+ pandoc -f html -t markdown hello.html
+
+Supported output formats include `markdown`, `latex`, `html`, `rtf`
+(rich text format), `rst` (reStructuredText), and `s5` (which produces
+an HTML file that acts like powerpoint). Supported input formats
+include `markdown`, `html`, `latex`, and `rst`. Note that the `rst`
+reader only parses a subset of reStructuredText syntax. For example,
+it doesn't handle tables, definition lists, option lists, or footnotes.
+It handles only the constructs expressible in unextended markdown.
+But for simple documents it should be adequate. The `latex` and `html`
+readers are also limited in what they can do. Because the `html`
+reader is picky about the HTML it parses, it is recommended that you
+pipe HTML through [HTML Tidy] before sending it to `pandoc`, or use the
+`web2markdown` script described below.
+
+If you don't specify a reader or writer explicitly, `pandoc` will
+try to determine the input and output format from the extensions of
+the input and output filenames. Thus, for example,
+
+ pandoc -o hello.tex hello.txt
+
+will convert `hello.txt` from markdown to LaTeX. If no output file
+is specified (so that output goes to STDOUT), or if the output file's
+extension is unknown, the output format will default to HTML.
+If no input file is specified (so that input comes from STDIN), or
+if the input files' extensions are unknown, the input format will
+be assumed to be markdown unless explicitly specified.
+
Character encodings
-------------------
@@ -150,31 +149,16 @@ The shell scripts (described below) automatically convert the input
from the local encoding to UTF-8 before running them through `pandoc`,
then convert the output back to the local encoding.
-Convenience programs and wrapper scripts
-========================================
-
-For convenience, eight variant programs are included with Pandoc:
-`markdown2html` (which is equivalent to `pandoc -w html`),
-`markdown2latex` (equivalent to `pandoc -w latex`), `markdown2rst`
-(equivalent to `pandoc -w rst`), `markdown2rtf` (equivalent to
-`pandoc -w rtf`), `markdown2s5` (equivalent to `pandoc -w s5`),
-`html2markdown` (equivalent to `pandoc -r html -w markdown`),
-`latex2markdown` (equivalent to `pandoc -r latex -w markdown`), and
-`rst2markdown` (equivalent to `pandoc -r rst -w markdown`). These
-programs take an appropriately restricted subset of `pandoc`'s
-options. (Run them with the `-h` flag for a full list of allowed
-options.)
-
-Like `pandoc`, all of these programs produce fragments by default.
-If you want to produce a standalone file, complete with a header
-and footer appropriate to the format, use the `-s` option:
+`markdown2pdf` and `web2markdown`
+=================================
- markdown2latex -s sample.txt > sample.tex
-
-Two shell scripts have also been included:
+Two shell scripts, `markdown2pdf` and `web2markdown`, are included in
+the standard Pandoc installation. (They are not included in the Windows
+binary package, as they require a POSIX shell, but they may be used
+in Windows under Cygwin.)
1. `markdown2pdf` produces a PDF file from markdown-formatted
- text, using `markdown2latex` and `pdflatex`. The default
+ text, using `pandoc` and `pdflatex`. The default
behavior of `markdown2pdf` is to create a file with the same
base name as the first argument and the extension `pdf`; thus,
for example,
@@ -190,7 +174,7 @@ Two shell scripts have also been included:
If no input file is specified, input will be taken from STDIN.
2. `web2markdown` grabs a web page from a file or URL and converts
- it to markdown-formatted text, using `tidy` and `html2markdown`.
+ it to markdown-formatted text, using `tidy` and `pandoc`.
Unless input is from STDIN, an attempt is made to determine the
character encoding of the page from the "Content-type" meta tag.
If this is not present, UTF-8 is assumed. Alternatively, a character
@@ -207,9 +191,20 @@ Command-line options
====================
Various command-line options can be used to customize the output.
-For a complete list, type
- pandoc --help
+`-f`, `--from`, `-r`, or `--read` can be used to specify the input
+format -- the format Pandoc will be converting *from*. Available
+formats are `native`, `markdown`, `rst`, `html`, and `latex`.
+
+`-t`, `--to`, `-w`, or `--write` can be used to specify the output
+format -- the format Pandoc will be converting *to*. Available formats
+are `native`, `html`, `s5`, `latex`, `markdown`, `rst`, and `rtf`.
+
+`-s` or `--standalone` indicates that a standalone document is to be
+produced (with appropriate headers and footers), rather than a fragment.
+
+`-o` or `--output` specifies the name of the output file. If no output
+filename is given, output will be sent to STDOUT.
`-p` or `--preserve-tabs` causes tabs in the source text to be
preserved, rather than converted to spaces (the default).
@@ -225,12 +220,6 @@ untranslatable HTML codes and LaTeX environments. (The LaTeX reader
does pass through untranslatable LaTeX commands, even if `-R` is not
specified.)
-`-s` or `--standalone` causes `pandoc` to produce a standalone file,
-complete with appropriate document headers. By default, `pandoc`
-produces a fragment.
-
-`-o` or `--output-file` can be used to specify an output file.
-
`-C` or `--custom-header` can be used to specify a custom document
header. To see the headers used by default, use the `-D` option:
for example, `pandoc -D html` prints the default HTML header.
diff --git a/man/man1/html2markdown.1 b/man/man1/html2markdown.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/html2markdown.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/latex2markdown.1 b/man/man1/latex2markdown.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/latex2markdown.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2html.1 b/man/man1/markdown2html.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2html.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2latex.1 b/man/man1/markdown2latex.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2latex.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2pdf.1 b/man/man1/markdown2pdf.1
index c15131a42..423ac6546 100644
--- a/man/man1/markdown2pdf.1
+++ b/man/man1/markdown2pdf.1
@@ -6,14 +6,13 @@ markdown2pdf \- converts markdown-formatted text to PDF, using pdflatex
.SH DESCRIPTION
\fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from standard
input) from markdown\-formatted plain text to PDF, using \fBpdflatex\fR.
-If no output filename is specified, the name of the output file is
-derived from the input file; thus, for example, if the input file
-is \fIhello.txt\fR, the output file will be \fIhello.pdf\fR. If
-the input is read from STDIN and no output filename is
-specified, the output file will be named \fIstdin.pdf\fR. If
-multiple input files are specified, they will be concatenated before
-conversion, and the name of the output file will be derived from
-the first input file.
+If no output filename is specified (using the \fB\-o\fR option),
+the name of the output file is derived from the input file; thus, for
+example, if the input file is \fIhello.txt\fR, the output file will be
+\fIhello.pdf\fR. If the input is read from STDIN and no output filename
+is specified, the output file will be named \fIstdin.pdf\fR. If multiple
+input files are specified, they will be concatenated before conversion,
+and the name of the output file will be derived from the first input file.
.PP
Input is assumed to be in the UTF\-8 character encoding. If your
local character encoding is not UTF\-8, you should pipe input and
@@ -21,11 +20,11 @@ output through \fBiconv\fR:
.IP
.B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
.PP
-\fBmarkdown2pdf\fR assumes that the 'unicode' package
-is in latex's search path. If this package is not included in your
-latex setup, it can be obtained from <http://ctan.org>.
+\fBmarkdown2pdf\fR assumes that the 'unicode' and 'fancyvrb' packages
+are in latex's search path. If these packages are not included in your
+latex setup, they can be obtained from <http://ctan.org>.
.PP
-\fBmarkdown2pdf\fR is a wrapper around \fBmarkdown2latex\fR.
+\fBmarkdown2pdf\fR is a wrapper around \fBpandoc\fR.
.SH OPTIONS
.TP
.B \-o FILE, \-\-output=FILE
@@ -37,10 +36,6 @@ Preserve tabs instead of converting them to spaces.
.B \-\-tab-stop=\fITABSTOP\fB
Specify tab stop (default is 4).
.TP
-.B \-R, \-\-parse-raw
-Parse untranslatable LaTeX environments as raw LaTeX,
-instead of ignoring them.
-.TP
.B \-N, \-\-number-sections
Number section headings in LaTeX output. (Default is not to number them.)
.TP
diff --git a/man/man1/markdown2rst.1 b/man/man1/markdown2rst.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2rst.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2rtf.1 b/man/man1/markdown2rtf.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2rtf.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2s5.1 b/man/man1/markdown2s5.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2s5.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/pandoc.1 b/man/man1/pandoc.1
index 82c9ae321..f6280f463 100644
--- a/man/man1/pandoc.1
+++ b/man/man1/pandoc.1
@@ -1,8 +1,6 @@
.TH PANDOC 1 "December 15, 2006" Pandoc "User Manuals"
.SH NAME
-pandoc, markdown2html, markdown2latex, markdown2rst, markdown2rtf,
-markdown2s5, html2markdown2, latex2markdown, rst2markdown \- general
-markup converter
+pandoc \- general markup converter
.SH SYNOPSIS
\fBpandoc\fR [\fIoptions\fR] [\fIinput\-file\fR]...
.SH DESCRIPTION
@@ -13,41 +11,37 @@ slide shows.
.PP
If no \fIinput\-file\fR is specified, input is read from STDIN.
Otherwise, the \fIinput\-files\fR are concatenated (with a blank
-line between each) and used as input. Output goes to standard
-output. If you want output to a file, use the \fB\-o\fR option or
-shell redirection:
+line between each) and used as input. Output goes to STDOUT by
+default. For output to a file, use the \fB\-o\fR option:
.IP
.B pandoc \-o output.html input.txt
-.IP
-.B pandoc input.txt > output.html
.PP
-The default behavior of \fIPandoc\fR is to convert the input from
-markdown\-formatted plain text to HTML. Different input and output
-formats can be specified using command\-line options. For example,
+The input and output formats may be specified using command-line options
+(see \fBOPTIONS\fR, below, for details). If these formats are not
+specified explicitly, \fIPandoc\fR will attempt to determine them
+from the extensions of the input and output filenames. If input comes
+from STDIN or from a file with an unknown extension, the input is assumed
+to be markdown. If no output filename is specified using the \fB\-o\fR
+option, or if a filename is specified but its extension is unknown,
+the output will default to HTML. Thus, for example,
.IP
-.B pandoc \-f latex \-t markdown chap1.tex > chap1.txt
+.B pandoc -o chap1.tex chap1.txt
.PP
-converts \fIchap1.tex\fR from LaTeX to markdown\-formatted plain text.
-See below for a detailed list of command\-line options.
+converts \fIchap1.txt\fR from markdown to LaTeX. And
+.IP
+.B pandoc README
.PP
-For convenience, eight variant programs are available:
-\fBmarkdown2html\fR (same as \fBpandoc \-w html\fR),
-\fBmarkdown2latex\fR (same as \fBpandoc \-w latex\fR),
-\fBmarkdown2rst\fR (same as \fBpandoc \-w rst\fR),
-\fBmarkdown2rtf\fR (same as \fBpandoc \-w rtf\fR),
-\fBmarkdown2s5\fR (same as \fBpandoc \-w s5\fR),
-\fBhtml2markdown\fR (same as \fBpandoc \-r html \-w markdown\fR),
-\fBlatex2markdown\fR (same as \fBpandoc \-r latex \-w markdown\fR),
-and \fBrst2markdown\fR (same as \fBpandoc \-r rst \-w markdown\fR).
-These programs take an appropriately restricted subset of \fBpandoc\fR's
-options. (Run them with the \fB-h\fR flag for a full list of allowed
-options.)
+converts \fIREADME\fR from markdown to HTML.
.PP
\fIPandoc\fR uses the UTF\-8 character encoding for both input and output.
If your local character encoding is not UTF\-8, you should pipe input
and output through \fBiconv\fR:
.IP
.B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
+.PP
+\fIPandoc\fR's HTML parser is not very forgiving. If your input is
+HTML, consider running it through \fBtidy\fR(1) before passing it
+to Pandoc. Or use \fBweb2markdown\fR(1), a wrapper around \fBpandoc\fR.
.SH OPTIONS
.TP
@@ -158,9 +152,7 @@ Show usage message.
.SH "SEE ALSO"
\fBweb2markdown\fR(1),
-\fBmarkdown2pdf\fR(1),
-\fBiconv\fR(1)
-
+\fBmarkdown2pdf\fR(1).
The
.I README
file distributed with Pandoc contains full documentation.
diff --git a/man/man1/rst2markdown.1 b/man/man1/rst2markdown.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/rst2markdown.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/web2markdown.1 b/man/man1/web2markdown.1
index a570cfc97..242b50671 100644
--- a/man/man1/web2markdown.1
+++ b/man/man1/web2markdown.1
@@ -16,7 +16,7 @@ option.
from STDIN, UTF-8 is assumed. A character encoding may be specified
explicitly using the \fB\-e\fR option.
.PP
-\fBweb2markdown\fR is a wrapper for \fBhtml2markdown\fR.
+\fBweb2markdown\fR is a wrapper for \fBpandoc\fR.
.SH OPTIONS
.TP
.B \-s, \-\-standalone
@@ -76,7 +76,6 @@ web2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com
.SH "SEE ALSO"
\fBpandoc\fR(1),
-\fBhtml2markdown\fR(1),
\fBiconv\fR(1)
.SH AUTHOR
John MacFarlane and Recai Oktas
diff --git a/src/Main.hs b/src/Main.hs
index 0f8567517..94be551d3 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -45,7 +45,7 @@ import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader,
defaultRTFHeader, defaultS5Header, defaultLaTeXHeader )
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Regex ( mkRegex, splitRegex )
+import Text.Regex ( mkRegex, matchRegex )
import System ( exitWith, getArgs, getProgName )
import System.Exit
import System.Console.GetOpt
@@ -94,8 +94,8 @@ data Opt = Opt
{ optPreserveTabs :: Bool -- ^ If @False@, convert tabs to spaces
, optTabStop :: Int -- ^ Number of spaces per tab
, optStandalone :: Bool -- ^ If @True@, include header, footer
- , optReader :: ParserState -> String -> Pandoc -- ^ Read format
- , optWriter :: WriterOptions -> Pandoc -> String -- ^ Write fmt
+ , optReader :: String -- ^ Reader format
+ , optWriter :: String -- ^ Writer format
, optParseRaw :: Bool -- ^ If @True@, parse unconvertable
-- HTML and TeX
, optCSS :: String -- ^ CSS file to link to
@@ -103,64 +103,55 @@ data Opt = Opt
, optIncludeBeforeBody :: String -- ^ File to include at top of body
, optIncludeAfterBody :: String -- ^ File to include at end of body
, optCustomHeader :: String -- ^ Custom header to use, or "DEFAULT"
- , optDefaultHeader :: String -- ^ Default header
, optTitlePrefix :: String -- ^ Optional prefix for HTML title
, optOutputFile :: String -- ^ Name of output file
, optNumberSections :: Bool -- ^ If @True@, number sections in LaTeX
, optIncremental :: Bool -- ^ If @True@, incremental lists in S5
, optSmart :: Bool -- ^ If @True@, use smart typography
, optASCIIMathML :: Bool -- ^ If @True@, use ASCIIMathML in HTML
- , optShowUsage :: Bool -- ^ If @True@, show usage message
, optDebug :: Bool -- ^ If @True@, output debug messages
}
-- | Defaults for command-line options.
-startOpt :: Opt
-startOpt = Opt
+defaultOpts :: Opt
+defaultOpts = Opt
{ optPreserveTabs = False
, optTabStop = 4
, optStandalone = False
- , optReader = readMarkdown
- , optWriter = writeHtml
+ , optReader = "" -- null for default reader
+ , optWriter = "" -- null for default writer
, optParseRaw = False
, optCSS = ""
, optIncludeInHeader = ""
, optIncludeBeforeBody = ""
, optIncludeAfterBody = ""
, optCustomHeader = "DEFAULT"
- , optDefaultHeader = defaultHtmlHeader
, optTitlePrefix = ""
, optOutputFile = "" -- null for stdout
, optNumberSections = False
, optIncremental = False
, optSmart = False
, optASCIIMathML = False
- , optShowUsage = False
, optDebug = False
}
--- | A list of functions, each transforming the options data structure in response
--- to a command-line option.
-allOptions :: [OptDescr (Opt -> IO Opt)]
-allOptions =
+-- | A list of functions, each transforming the options data structure
+-- in response to a command-line option.
+options :: [OptDescr (Opt -> IO Opt)]
+options =
[ Option "fr" ["from","read"]
(ReqArg
- (\arg opt -> case (lookup (map toLower arg) readers) of
- Just reader -> return opt { optReader = reader }
- Nothing -> error ("Unknown reader: " ++ arg) )
+ (\arg opt -> return opt { optReader = map toLower arg })
"FORMAT")
- ("Source format (" ++
- (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )")
+ ("Input format (" ++ (joinWithSep ", " (map fst readers)) ++
+ ")")
, Option "tw" ["to","write"]
(ReqArg
- (\arg opt -> case (lookup (map toLower arg) writers) of
- Just (writer, defaultHeader) ->
- return opt { optWriter = writer,
- optDefaultHeader = defaultHeader }
- Nothing -> error ("Unknown writer: " ++ arg) )
+ (\arg opt -> return opt { optWriter = map toLower arg })
"FORMAT")
- ("Output format (" ++ (concatMap (\(name, fn) -> " " ++ name) writers) ++ " )")
+ ("Output format (" ++ (joinWithSep ", " (map fst writers)) ++
+ ")")
, Option "s" ["standalone"]
(NoArg
@@ -169,8 +160,7 @@ allOptions =
, Option "o" ["output"]
(ReqArg
- (\arg opt -> do
- return opt { optOutputFile = arg })
+ (\arg opt -> return opt { optOutputFile = arg })
"FILENAME")
"Name of output file"
@@ -286,57 +276,66 @@ allOptions =
, Option "h" ["help"]
(NoArg
- (\opt -> return opt { optShowUsage = True }))
+ (\_ -> do
+ prg <- getProgName
+ hPutStr stderr (reformatUsageInfo $
+ usageInfo (prg ++ " [OPTIONS] [FILES]") options)
+ exitWith $ ExitFailure 2))
"Show help"
]
--- parse name of calling program and return default reader and writer descriptions
-parseProgName name =
- case (splitRegex (mkRegex "2") (map toLower name)) of
- [from, to] -> (from, to)
- _ -> ("markdown", "html")
-
--- set default options based on reader and writer descriptions; start is starting options
-setDefaultOpts from to start =
- case ((lookup from readers), (lookup to writers)) of
- (Just reader, Just (writer, header)) -> start {optReader = reader,
- optWriter = writer,
- optDefaultHeader = header}
- _ -> start
-
--- True if single-letter option is in option list
-inOptList :: [Char] -> OptDescr (Opt -> IO Opt) -> Bool
-inOptList list desc =
- let (Option letters _ _ _) = desc in
- any (\x -> x `elem` list) letters
-
-- Reformat usage message so it doesn't wrap illegibly
+reformatUsageInfo :: String -> String
reformatUsageInfo = gsub " *--" " --" .
gsub "(-[A-Za-z0-9]) *--" "\\1, --" .
gsub " *([^- ])" "\n\t\\1"
-main = do
-
- name <- getProgName
- let (from, to) = parseProgName name
+-- Determine default reader based on source file extensions
+defaultReaderName :: [String] -> String
+defaultReaderName [] = "markdown"
+defaultReaderName (x:xs) =
+ let x' = map toLower x in
+ case (matchRegex (mkRegex ".*\\.(.*)") x') of
+ Nothing -> defaultReaderName xs -- no extension
+ Just ["xhtml"] -> "html"
+ Just ["html"] -> "html"
+ Just ["htm"] -> "html"
+ Just ["tex"] -> "latex"
+ Just ["latex"] -> "latex"
+ Just ["ltx"] -> "latex"
+ Just ["rst"] -> "rst"
+ Just ["native"] -> "native"
+ Just _ -> "markdown"
+
+-- Determine default writer based on output file extension
+defaultWriterName :: String -> String
+defaultWriterName "" = "html" -- no output file
+defaultWriterName x =
+ let x' = map toLower x in
+ case (matchRegex (mkRegex ".*\\.(.*)") x') of
+ Nothing -> "markdown" -- no extension
+ Just [""] -> "markdown" -- empty extension
+ Just ["tex"] -> "latex"
+ Just ["latex"] -> "latex"
+ Just ["ltx"] -> "latex"
+ Just ["rtf"] -> "rtf"
+ Just ["rst"] -> "rst"
+ Just ["s5"] -> "s5"
+ Just ["native"] -> "native"
+ Just ["txt"] -> "markdown"
+ Just ["text"] -> "markdown"
+ Just ["md"] -> "markdown"
+ Just ["markdown"] -> "markdown"
+ Just _ -> "html"
- let irrelevantOptions = if not ('2' `elem` name)
- then ""
- else "frtwD" ++
- (if (to /= "html" && to /= "s5") then "SmcT" else "") ++
- (if (to /= "latex") then "N" else "") ++
- (if (to /= "s5") then "i" else "") ++
- (if (from /= "html" && from /= "latex") then "R" else "")
-
- let options = filter (not . inOptList irrelevantOptions) allOptions
-
- let defaultOpts = setDefaultOpts from to startOpt
+main = do
args <- getArgs
let (actions, sources, errors) = getOpt Permute options args
if (not (null errors))
then do
+ name <- getProgName
mapM (\e -> hPutStrLn stderr e) errors
hPutStrLn stderr (reformatUsageInfo $
usageInfo (name ++ " [OPTIONS] [FILES]") options)
@@ -350,30 +349,39 @@ main = do
let Opt { optPreserveTabs = preserveTabs
, optTabStop = tabStop
, optStandalone = standalone
- , optReader = reader
- , optWriter = writer
+ , optReader = readerName
+ , optWriter = writerName
, optParseRaw = parseRaw
, optCSS = css
, optIncludeInHeader = includeHeader
, optIncludeBeforeBody = includeBefore
, optIncludeAfterBody = includeAfter
, optCustomHeader = customHeader
- , optDefaultHeader = defaultHeader
, optTitlePrefix = titlePrefix
, optOutputFile = outputFile
, optNumberSections = numberSections
, optIncremental = incremental
, optSmart = smart
, optASCIIMathML = asciiMathML
- , optShowUsage = showUsage
, optDebug = debug
} = opts
- if showUsage
- then do
- hPutStr stderr (reformatUsageInfo $ usageInfo (name ++ " [OPTIONS] [FILES]") options)
- exitWith $ ExitFailure 2
- else return ()
+ -- assign reader and writer based on options and filenames
+ let readerName' = if null readerName
+ then defaultReaderName sources
+ else readerName
+
+ let writerName' = if null writerName
+ then defaultWriterName outputFile
+ else writerName
+
+ reader <- case (lookup readerName' readers) of
+ Just r -> return r
+ Nothing -> error ("Unknown reader: " ++ readerName')
+
+ (writer, defaultHeader) <- case (lookup writerName' writers) of
+ Just (w,h) -> return (w, h)
+ Nothing -> error ("Unknown writer: " ++ writerName')
output <- if ((null outputFile) || debug)
then return stdout
@@ -385,7 +393,6 @@ main = do
hPutStr stderr $ concatMap (\s -> "INPUT=" ++ s ++ "\n") sources
else return ()
- let writingS5 = (defaultHeader == defaultS5Header)
let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop)
let addBlank str = str ++ "\n\n"
let removeCRs str = filter (/= '\r') str -- remove DOS-style line endings
@@ -407,7 +414,7 @@ main = do
writerTitlePrefix = titlePrefix,
writerSmart = smart,
writerTabStop = tabStop,
- writerS5 = writingS5,
+ writerS5 = (writerName=="s5"),
writerIncremental = incremental,
writerNumberSections = numberSections,
writerIncludeBefore = includeBefore,
diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in
index c222c1cbd..71d58a7cd 100644
--- a/src/wrappers/markdown2pdf.in
+++ b/src/wrappers/markdown2pdf.in
@@ -1,6 +1,6 @@
#!/bin/sh -e
-REQUIRED="markdown2latex pdflatex"
+REQUIRED="pdflatex"
### common.sh
@@ -9,9 +9,12 @@ REQUIRED="markdown2latex pdflatex"
texname=output
logfile=$THIS_TEMPDIR/log
-if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then
- [ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \
- -e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2
+if ! pandoc -s -d -r markdown -w latex "$@" >$THIS_TEMPDIR/$texname.tex \
+2>$logfile; then
+ [ -f $logfile ] && sed -e 's/^pandoc/markdown2pdf/g' \
+ -e '/^INPUT=/d' -e '/^OUTPUT=/d' \
+ -e '/^[[:space:]]*\(-f\|-t\|-s\|-R\|-S\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\
+ -e 's/(implies -s)//g' $logfile >&2
exit 1
fi
diff --git a/src/wrappers/web2markdown.in b/src/wrappers/web2markdown.in
index 64ff3db9b..89e884c3d 100644
--- a/src/wrappers/web2markdown.in
+++ b/src/wrappers/web2markdown.in
@@ -2,7 +2,7 @@
# converts HTML from a URL, file, or stdin to markdown
# uses an available program to fetch URL and tidy to normalize it first
-REQUIRED="tidy html2markdown"
+REQUIRED="tidy"
### common.sh
@@ -72,14 +72,16 @@ grabber=
while [ $# -gt 0 ]; do
case "$1" in
-h|--help)
- html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2
+ pandoc -h 2>&1 | sed -e 's/pandoc/web2markdown/' \
+ -e '/^[[:space:]]*\(-f\|-t\|-S\|-N\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\
+ 1>&2
err " -e ENCODING, --encoding=ENCODING"
err " Specify character encoding of input"
err " -g COMMAND, --grabber=COMMAND"
err " Specify command to be used to grab contents of URL"
exit 0 ;;
-v|--version)
- html2markdown -v
+ pandoc -v 2>&1 | sed -e 's/pandoc/web2markdown/' 1>&2
exit 0 ;;
-e)
shift
@@ -112,7 +114,7 @@ while [ $# -gt 0 ]; do
shift
done
-# Unpack options. Now "$@" will hold the html2markdown options.
+# Unpack options. Now "$@" will hold the pandoc options.
oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs"
inurl=
@@ -162,10 +164,11 @@ else # assume UTF-8
fi
if [ -z "$argument" ]; then
- tidy -utf8 2>/dev/null | html2markdown "$@"
+ tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@"
else
if [ -f "$argument" ]; then
- to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@"
+ to_utf8 "$argument" |
+ tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@"
else
err "File '$argument' not found."
exit 1