+ Removed the convenience symlinks (which don't work on Windows under

Cygwin, due to Windows' lack of true symbolic links). + Modified the wrappers to use 'pandoc' instead of the symlinks. + Modified the Makefile to remove all references to the symlinks. + Removed code from Main.hs that made pandoc's behavior depend on the name of the calling program. + Added code to Main.hs that sets default reader and writer based on extensions of input and output filenames (if provided). (Thanks to roktas for the idea.) + Modified README and man pages accordingly. + Removed WINDOWS-README target from Makefile. It is no longer needed now that we don't have the symlinks. git-svn-id: https://pandoc.googlecode.com/svn/trunk@295 788f1e2b-df1e-0410-8736-df70ead52e1b
author: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2006-12-28 02:20:09 +0000
committer: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> 2006-12-28 02:20:09 +0000
commit: a1a30d69bd83e57e494cd3ca5077086077731998 (patch)
tree: 8193378eb51d13a20cf9f28ce479af431d3dcba4
parent: a1539d9ab8141ab11add5b304792e7c9bc7a363a (diff)
16 files changed, 198 insertions, 231 deletions
diff --git a/Makefile b/Makefile
index 5ea1ae24d..e6e87cea2 100644
--- a/Makefile
+++ b/Makefile
@@ -26,8 +26,6 @@ EXECSBASE := $(shell sed -ne 's/^[Ee]xecutable:[[:space:]]*//p' $(CABAL).in)
 # Install targets
 #-------------------------------------------------------------------------------
 WRAPPERS  := web2markdown markdown2pdf
-SYMLINKS  := markdown2html markdown2latex markdown2s5 markdown2rst \
-             markdown2rtf html2markdown latex2markdown rst2markdown
 # Add .exe extensions if we're running Windows/Cygwin.
 EXTENSION := $(shell uname | tr '[:upper:]' '[:lower:]' | \
                sed -ne 's/^cygwin.*$$/\.exe/p')
@@ -96,12 +94,6 @@ all: build-program
 templates: $(SRCDIR)/templates
 	$(MAKE) -C $(SRCDIR)/templates
 
-.PHONY: symlinks
-cleanup_files+=$(SYMLINKS)
-symlinks: $(SYMLINKS)
-$(SYMLINKS): $(MAIN)
-	ln -sf ./$(MAIN) $@ 
-
 define generate-shell-script
 echo "Generating $@...";                                 \
 awk '                                                    \
@@ -141,7 +133,7 @@ build: configure
 	$(BUILDCMD) build
 
 .PHONY: build-exec
-build-exec: $(PROGS) $(SYMLINKS)
+build-exec: $(PROGS)
 cleanup_files+=$(EXECS)
 $(EXECS): build
 	for f in $@; do \
@@ -201,9 +193,8 @@ install-exec: build-exec
 		fi; \
 		$(INSTALL_PROGRAM) $$f $(BINPATH)/; \
 	done
-	cd $(BINPATH); for f in $(SYMLINKS); do ln -sf $(MAIN) $$f; done
 uninstall-exec:
-	-for f in $(notdir $(PROGS) $(SYMLINKS)); do rm -f $(BINPATH)/$$f; done ;
+	-for f in $(notdir $(PROGS)); do rm -f $(BINPATH)/$$f; done ;
 
 # Program + user documents installation.
 .PHONY: install-program uninstall-program
@@ -295,15 +286,11 @@ $(osx_dmg_name): $(osx_pkg_name)
 
 .PHONY: win-pkg
 win_pkg_name:=$(RELNAME).zip
-win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README-WINDOWS.txt README-WINDOWS.html
+win_docs:=COPYING.txt COPYRIGHT.txt BUGS.txt README.txt README.html
 cleanup_files+=$(win_pkg_name) $(win_docs)
 win-pkg: $(win_pkg_name)
 $(win_pkg_name): $(THIS).exe  $(win_docs)
 	zip -r $(win_pkg_name) $(THIS).exe $(win_docs)
-cleanup_files+=README-WINDOWS	
-README-WINDOWS: README
-	sed -e '/^Requirements/,/^\[fancyvrb\]:/ d' \
-        -e '/^Character encodings/,/mysite.com$$/ d' $< > $@	
 
 .PHONY: test test-markdown
 test: $(MAIN)
diff --git a/README b/README
index 82537eb6a..6251cb58d 100644
--- a/README
+++ b/README
@@ -36,14 +36,11 @@ Requirements
 ============
 
 The `pandoc` program itself does not depend on any external libraries
-or programs.  The convenience programs `markdown2html`, `markdown2latex`,
-`markdown2rst`, `markdown2rtf`, `markdown2s5`, `html2markdown`,
-`latex2markdown`, and `rst2markdown` are implemented as symbolic links to
-`pandoc`.
+or programs.
 
 The wrapper script `web2markdown` requires
 
-  - `html2markdown` (included with Pandoc)
+  - `pandoc` (which must be in the PATH)
   - a POSIX-compliant shell (installed by default on all linux and unix
     systems, including Mac OS X, and in [Cygwin] for Windows),
   - `HTML Tidy`
@@ -56,7 +53,7 @@ The wrapper script `web2markdown` requires
 
 The wrapper script `markdown2pdf` requires
 
-  - `markdown2latex` (included with Pandoc)
+  - `pandoc` (which must be in the PATH)
   - a POSIX-compliant shell
   - `pdflatex`, which should be part of any [LaTeX] distribution
   - the [unicode] and [fancyvrb] LaTeX packages, which are included
@@ -80,47 +77,11 @@ Using Pandoc
 
 If you run `pandoc` without arguments, it will accept input from
 STDIN.  If you run it with file names as arguments, it will take input
-from those files.  It accepts several command-line options.  For a
-list, type
-
-	pandoc -h
-
-The most important options specify the format of the source file and
-the output.  The default reader is markdown; the default writer is
-HTML.  So if you don't specify a reader or writer, `pandoc` will
-convert markdown to HTML.  For example,
-
-	pandoc hello.txt
-
-will convert `hello.txt` from markdown to HTML.  For other conversions,
-you must specify a reader and/or a writer using the `-r` and `-w`
-flags.  To convert markdown to LaTeX, you would write:
-
-	pandoc -w latex hello.txt
-
-To convert html to markdown:
-
-	pandoc -r html -w markdown hello.txt
-
-Supported writers include `markdown`, `latex`, `html`, `rtf` (rich text
-format), `rst` (reStructuredText), and `s5` (which produces an HTML
-file that acts like powerpoint).  Supported readers include `markdown`,
-`html`, `latex`, and `rst`.  Note that the `rst` reader only parses
-a subset of reStructuredText syntax.  For example, it doesn't handle
-tables, definition lists, option lists, or footnotes.  It handles only the
-constructs expressible in unextended markdown.  But for simple documents
-it should be adequate.  The `latex` and `html` readers are also limited
-in what they can do.  Because the `html` reader is picky about the HTML
-it parses, it is recommended that you pipe HTML through [HTML Tidy] before
-sending it to `pandoc`, or use the `web2markdown` script described below.
-
-By default, `pandoc` writes its output to STDOUT.  If you want to
-write to a file, use the `-o` option or shell redirection:
+from those files.  By default, `pandoc` writes its output to STDOUT.
+If you want to write to a file, use the `-o` option:
 
     pandoc -o hello.html hello.txt
 
-	pandoc hello.txt > hello.html
-
 Note that you can specify multiple input files on the command line.
 `pandoc` will concatenate them all (with blank lines between them)
 before parsing:
@@ -131,6 +92,44 @@ before parsing:
 with a proper header, rather than a fragment.  For more details on this
 and many other command-line options, see below.)
 
+The format of the input and output can be specified explicitly using
+command-line options.  The input format can be specified using the
+`-r/--read` or `-f/--from` options, the output format using the
+`-w/--write` or `-t/--to` options.  Thus, to convert `hello.txt` from
+markdown to LaTeX, you could type:
+
+	pandoc -f markdown -t latex hello.txt
+
+To convert `hello.html` from html to markdown:
+
+	pandoc -f html -t markdown hello.html
+
+Supported output formats include `markdown`, `latex`, `html`, `rtf`
+(rich text format), `rst` (reStructuredText), and `s5` (which produces
+an HTML file that acts like powerpoint).  Supported input formats
+include `markdown`, `html`, `latex`, and `rst`.  Note that the `rst`
+reader only parses a subset of reStructuredText syntax.  For example,
+it doesn't handle tables, definition lists, option lists, or footnotes.
+It handles only the constructs expressible in unextended markdown.
+But for simple documents it should be adequate.  The `latex` and `html`
+readers are also limited in what they can do.  Because the `html`
+reader is picky about the HTML it parses, it is recommended that you
+pipe HTML through [HTML Tidy] before sending it to `pandoc`, or use the
+`web2markdown` script described below.
+
+If you don't specify a reader or writer explicitly, `pandoc` will
+try to determine the input and output format from the extensions of
+the input and output filenames.  Thus, for example, 
+
+	pandoc -o hello.tex hello.txt
+
+will convert `hello.txt` from markdown to LaTeX.  If no output file
+is specified (so that output goes to STDOUT), or if the output file's
+extension is unknown, the output format will default to HTML.
+If no input file is specified (so that input comes from STDIN), or
+if the input files' extensions are unknown, the input format will
+be assumed to be markdown unless explicitly specified.
+
 Character encodings
 -------------------
 
@@ -150,31 +149,16 @@ The shell scripts (described below) automatically convert the input
 from the local encoding to UTF-8 before running them through `pandoc`,
 then convert the output back to the local encoding.
 
-Convenience programs and wrapper scripts 
-========================================
-
-For convenience, eight variant programs are included with Pandoc:
-`markdown2html` (which is equivalent to `pandoc  -w  html`),
-`markdown2latex` (equivalent to `pandoc -w latex`), `markdown2rst`
-(equivalent to `pandoc -w rst`), `markdown2rtf` (equivalent to
-`pandoc -w rtf`), `markdown2s5` (equivalent to `pandoc -w s5`),
-`html2markdown` (equivalent to `pandoc -r html -w markdown`),
-`latex2markdown` (equivalent to `pandoc -r latex -w markdown`), and
-`rst2markdown` (equivalent to `pandoc -r rst -w markdown`).  These
-programs take an appropriately restricted subset of `pandoc`'s
-options.  (Run them with the `-h` flag for a full list of allowed
-options.)
-
-Like `pandoc`, all of these programs produce fragments by default.
-If you want to produce a standalone file, complete with a header
-and footer appropriate to the format, use the `-s` option:
+`markdown2pdf` and `web2markdown`
+=================================
 
-   markdown2latex -s sample.txt > sample.tex 
-
-Two shell scripts have also been included:
+Two shell scripts, `markdown2pdf` and `web2markdown`, are included in
+the standard Pandoc installation.  (They are not included in the Windows
+binary package, as they require a POSIX shell, but they may be used
+in Windows under Cygwin.)
 
 1.  `markdown2pdf` produces a PDF file from markdown-formatted
-    text, using `markdown2latex` and `pdflatex`.  The default
+    text, using `pandoc` and `pdflatex`.  The default
     behavior of `markdown2pdf` is to create a file with the same
     base name as the first argument and the extension `pdf`; thus,
     for example,
@@ -190,7 +174,7 @@ Two shell scripts have also been included:
     If no input file is specified, input will be taken from STDIN.
 
 2.  `web2markdown` grabs a web page from a file or URL and converts
-    it to markdown-formatted text, using `tidy` and `html2markdown`.
+    it to markdown-formatted text, using `tidy` and `pandoc`.
     Unless input is from STDIN, an attempt is made to determine the
     character encoding of the page from the "Content-type" meta tag.
     If this is not present, UTF-8 is assumed.  Alternatively, a character
@@ -207,9 +191,20 @@ Command-line options
 ====================
 
 Various command-line options can be used to customize the output.
-For a complete list, type 
 
-	pandoc --help
+`-f`, `--from`, `-r`, or `--read` can be used to specify the input
+format -- the format Pandoc will be converting *from*.  Available
+formats are `native`, `markdown`, `rst`, `html`, and `latex`.
+
+`-t`, `--to`, `-w`, or `--write` can be used to specify the output
+format -- the format Pandoc will be converting *to*.  Available formats
+are `native`, `html`, `s5`, `latex`, `markdown`, `rst`, and `rtf`.
+
+`-s` or `--standalone` indicates that a standalone document is to be
+produced (with appropriate headers and footers), rather than a fragment.
+
+`-o` or `--output` specifies the name of the output file.  If no output
+filename is given, output will be sent to STDOUT.
 
 `-p` or `--preserve-tabs` causes tabs in the source text to be
 preserved, rather than converted to spaces (the default).
@@ -225,12 +220,6 @@ untranslatable HTML codes and LaTeX environments.  (The LaTeX reader
 does pass through untranslatable LaTeX commands, even if `-R` is not
 specified.)
 
-`-s` or `--standalone` causes `pandoc` to produce a standalone file,
-complete with appropriate document headers.  By default, `pandoc`
-produces a fragment.
-
-`-o` or `--output-file` can be used to specify an output file.
-
 `-C` or `--custom-header` can be used to specify a custom document
 header.  To see the headers used by default, use the `-D` option:
 for example, `pandoc -D html` prints the default HTML header.
diff --git a/man/man1/html2markdown.1 b/man/man1/html2markdown.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/html2markdown.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/latex2markdown.1 b/man/man1/latex2markdown.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/latex2markdown.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2html.1 b/man/man1/markdown2html.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2html.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2latex.1 b/man/man1/markdown2latex.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2latex.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2pdf.1 b/man/man1/markdown2pdf.1
index c15131a42..423ac6546 100644
--- a/man/man1/markdown2pdf.1
+++ b/man/man1/markdown2pdf.1
@@ -6,14 +6,13 @@ markdown2pdf \- converts markdown-formatted text to PDF, using pdflatex
 .SH DESCRIPTION
 \fBmarkdown2pdf\fR converts \fIinput\-file\fR (or text from standard 
 input) from markdown\-formatted plain text to PDF, using \fBpdflatex\fR.
-If no output filename is specified, the name of the output file is
-derived from the input file; thus, for example, if the input file
-is \fIhello.txt\fR, the output file will be \fIhello.pdf\fR.  If
-the input is read from STDIN and no output filename is
-specified, the output file will be named \fIstdin.pdf\fR.  If
-multiple input files are specified, they will be concatenated before
-conversion, and the name of the output file will be derived from
-the first input file.
+If no output filename is specified (using the \fB\-o\fR option),
+the name of the output file is derived from the input file; thus, for
+example, if the input file is \fIhello.txt\fR, the output file will be
+\fIhello.pdf\fR.  If the input is read from STDIN and no output filename
+is specified, the output file will be named \fIstdin.pdf\fR.  If multiple
+input files are specified, they will be concatenated before conversion,
+and the name of the output file will be derived from the first input file.
 .PP
 Input is assumed to be in the UTF\-8 character encoding.  If your
 local character encoding is not UTF\-8, you should pipe input and
@@ -21,11 +20,11 @@ output through \fBiconv\fR:
 .IP
 .B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
 .PP
-\fBmarkdown2pdf\fR assumes that the 'unicode' package
-is in latex's search path.  If this package is not included in your
-latex setup, it can be obtained from <http://ctan.org>.
+\fBmarkdown2pdf\fR assumes that the 'unicode' and 'fancyvrb' packages
+are in latex's search path.  If these packages are not included in your
+latex setup, they can be obtained from <http://ctan.org>.
 .PP
-\fBmarkdown2pdf\fR is a wrapper around \fBmarkdown2latex\fR.
+\fBmarkdown2pdf\fR is a wrapper around \fBpandoc\fR.
 .SH OPTIONS
 .TP
 .B \-o FILE, \-\-output=FILE
@@ -37,10 +36,6 @@ Preserve tabs instead of converting them to spaces.
 .B \-\-tab-stop=\fITABSTOP\fB
 Specify tab stop (default is 4).
 .TP
-.B \-R, \-\-parse-raw
-Parse untranslatable LaTeX environments as raw LaTeX,
-instead of ignoring them.
-.TP
 .B \-N, \-\-number-sections
 Number section headings in LaTeX output.  (Default is not to number them.)
 .TP
diff --git a/man/man1/markdown2rst.1 b/man/man1/markdown2rst.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2rst.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2rtf.1 b/man/man1/markdown2rtf.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2rtf.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/markdown2s5.1 b/man/man1/markdown2s5.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/markdown2s5.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/pandoc.1 b/man/man1/pandoc.1
index 82c9ae321..f6280f463 100644
--- a/man/man1/pandoc.1
+++ b/man/man1/pandoc.1
@@ -1,8 +1,6 @@
 .TH PANDOC 1 "December 15, 2006" Pandoc "User Manuals"
 .SH NAME
-pandoc, markdown2html, markdown2latex, markdown2rst, markdown2rtf,
-markdown2s5, html2markdown2, latex2markdown, rst2markdown \- general
-markup converter
+pandoc \- general markup converter
 .SH SYNOPSIS
 \fBpandoc\fR [\fIoptions\fR] [\fIinput\-file\fR]...
 .SH DESCRIPTION
@@ -13,41 +11,37 @@ slide shows.
 .PP
 If no \fIinput\-file\fR is specified, input is read from STDIN.
 Otherwise, the \fIinput\-files\fR are concatenated (with a blank
-line between each) and used as input.  Output goes to standard
-output.  If you want output to a file, use the \fB\-o\fR option or
-shell redirection:
+line between each) and used as input.  Output goes to STDOUT by
+default.  For output to a file, use the \fB\-o\fR option:
 .IP
 .B pandoc \-o output.html input.txt
-.IP
-.B pandoc input.txt > output.html
 .PP
-The default behavior of \fIPandoc\fR is to convert the input from
-markdown\-formatted plain text to HTML.  Different input and output
-formats can be specified using command\-line options.  For example,
+The input and output formats may be specified using command-line options
+(see \fBOPTIONS\fR, below, for details).  If these formats are not
+specified explicitly, \fIPandoc\fR will attempt to determine them
+from the extensions of the input and output filenames.  If input comes
+from STDIN or from a file with an unknown extension, the input is assumed
+to be markdown.  If no output filename is specified using the \fB\-o\fR
+option, or if a filename is specified but its extension is unknown,
+the output will default to HTML.  Thus, for example,
 .IP
-.B pandoc \-f latex \-t markdown chap1.tex > chap1.txt
+.B pandoc -o chap1.tex chap1.txt
 .PP
-converts \fIchap1.tex\fR from LaTeX to markdown\-formatted plain text.
-See below for a detailed list of command\-line options.
+converts \fIchap1.txt\fR from markdown to LaTeX.  And
+.IP
+.B pandoc README
 .PP
-For convenience, eight variant programs are available:
-\fBmarkdown2html\fR (same as \fBpandoc \-w html\fR),
-\fBmarkdown2latex\fR (same as \fBpandoc \-w latex\fR),
-\fBmarkdown2rst\fR (same as \fBpandoc \-w rst\fR),
-\fBmarkdown2rtf\fR (same as \fBpandoc \-w rtf\fR),
-\fBmarkdown2s5\fR (same as \fBpandoc \-w s5\fR),
-\fBhtml2markdown\fR (same as \fBpandoc \-r html \-w markdown\fR),
-\fBlatex2markdown\fR (same as \fBpandoc \-r latex \-w markdown\fR),
-and \fBrst2markdown\fR (same as \fBpandoc \-r rst \-w markdown\fR).
-These programs take an appropriately restricted subset of \fBpandoc\fR's
-options.  (Run them with the \fB-h\fR flag for a full list of allowed
-options.)  
+converts \fIREADME\fR from markdown to HTML.
 .PP
 \fIPandoc\fR uses the UTF\-8 character encoding for both input and output.
 If your local character encoding is not UTF\-8, you should pipe input
 and output through \fBiconv\fR:
 .IP
 .B iconv \-t utf\-8 input.txt | pandoc | iconv \-f utf\-8
+.PP
+\fIPandoc\fR's HTML parser is not very forgiving.  If your input is
+HTML, consider running it through \fBtidy\fR(1) before passing it
+to Pandoc.  Or use \fBweb2markdown\fR(1), a wrapper around \fBpandoc\fR.
 
 .SH OPTIONS
 .TP
@@ -158,9 +152,7 @@ Show usage message.
 
 .SH "SEE ALSO"
 \fBweb2markdown\fR(1),
-\fBmarkdown2pdf\fR(1),
-\fBiconv\fR(1)
-
+\fBmarkdown2pdf\fR(1).
 The
 .I README
 file distributed with Pandoc contains full documentation.
diff --git a/man/man1/rst2markdown.1 b/man/man1/rst2markdown.1
deleted file mode 100644
index 7b82576d6..000000000
--- a/man/man1/rst2markdown.1
+++ /dev/null
@@ -1 +0,0 @@
-.so man1/pandoc.1
diff --git a/man/man1/web2markdown.1 b/man/man1/web2markdown.1
index a570cfc97..242b50671 100644
--- a/man/man1/web2markdown.1
+++ b/man/man1/web2markdown.1
@@ -16,7 +16,7 @@ option.
 from STDIN, UTF-8 is assumed.  A character encoding may be specified
 explicitly using the \fB\-e\fR option.
 .PP
-\fBweb2markdown\fR is a wrapper for \fBhtml2markdown\fR.
+\fBweb2markdown\fR is a wrapper for \fBpandoc\fR.
 .SH OPTIONS
 .TP
 .B \-s, \-\-standalone
@@ -76,7 +76,6 @@ web2markdown \-g 'wget \-\-user=foo \-\-password=bar' mysite.com
 
 .SH "SEE ALSO"
 \fBpandoc\fR(1),
-\fBhtml2markdown\fR(1),
 \fBiconv\fR(1)
 .SH AUTHOR
 John MacFarlane and Recai Oktas
diff --git a/src/Main.hs b/src/Main.hs
index 0f8567517..94be551d3 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -45,7 +45,7 @@ import Text.Pandoc.Writers.DefaultHeaders ( defaultHtmlHeader,
        defaultRTFHeader, defaultS5Header, defaultLaTeXHeader )
 import Text.Pandoc.Definition
 import Text.Pandoc.Shared
-import Text.Regex ( mkRegex, splitRegex )
+import Text.Regex ( mkRegex, matchRegex )
 import System ( exitWith, getArgs, getProgName )
 import System.Exit
 import System.Console.GetOpt
@@ -94,8 +94,8 @@ data Opt = Opt
     { optPreserveTabs      :: Bool    -- ^ If @False@, convert tabs to spaces
     , optTabStop           :: Int     -- ^ Number of spaces per tab
     , optStandalone        :: Bool    -- ^ If @True@, include header, footer
-    , optReader            :: ParserState -> String -> Pandoc -- ^ Read format
-    , optWriter            :: WriterOptions -> Pandoc -> String -- ^ Write fmt
+    , optReader            :: String  -- ^ Reader format
+    , optWriter            :: String  -- ^ Writer format
     , optParseRaw          :: Bool    -- ^ If @True@, parse unconvertable 
                                       -- HTML and TeX
     , optCSS               :: String  -- ^ CSS file to link to
@@ -103,64 +103,55 @@ data Opt = Opt
     , optIncludeBeforeBody :: String  -- ^ File to include at top of body
     , optIncludeAfterBody  :: String  -- ^ File to include at end of body
     , optCustomHeader      :: String  -- ^ Custom header to use, or "DEFAULT"
-    , optDefaultHeader     :: String  -- ^ Default header
     , optTitlePrefix       :: String  -- ^ Optional prefix for HTML title
     , optOutputFile        :: String  -- ^ Name of output file
     , optNumberSections    :: Bool    -- ^ If @True@, number sections in LaTeX
     , optIncremental       :: Bool    -- ^ If @True@, incremental lists in S5
     , optSmart             :: Bool    -- ^ If @True@, use smart typography
     , optASCIIMathML       :: Bool    -- ^ If @True@, use ASCIIMathML in HTML
-    , optShowUsage         :: Bool    -- ^ If @True@, show usage message
     , optDebug             :: Bool    -- ^ If @True@, output debug messages 
     }
 
 -- | Defaults for command-line options.
-startOpt :: Opt
-startOpt = Opt
+defaultOpts :: Opt
+defaultOpts = Opt
     { optPreserveTabs      = False
     , optTabStop           = 4
     , optStandalone        = False
-    , optReader            = readMarkdown
-    , optWriter            = writeHtml
+    , optReader            = ""    -- null for default reader
+    , optWriter            = ""    -- null for default writer
     , optParseRaw          = False
     , optCSS               = ""
     , optIncludeInHeader   = ""
     , optIncludeBeforeBody = ""
     , optIncludeAfterBody  = ""
     , optCustomHeader      = "DEFAULT"
-    , optDefaultHeader     = defaultHtmlHeader
     , optTitlePrefix       = ""
     , optOutputFile        = ""    -- null for stdout
     , optNumberSections    = False
     , optIncremental       = False
     , optSmart             = False
     , optASCIIMathML       = False
-    , optShowUsage         = False
     , optDebug             = False
     }
 
--- | A list of functions, each transforming the options data structure in response
--- to a command-line option.
-allOptions :: [OptDescr (Opt -> IO Opt)]
-allOptions =
+-- | A list of functions, each transforming the options data structure
+--   in response to a command-line option.
+options :: [OptDescr (Opt -> IO Opt)]
+options =
     [ Option "fr" ["from","read"]
                  (ReqArg
-                  (\arg opt -> case (lookup (map toLower arg) readers) of
-                      Just reader -> return opt { optReader = reader }
-                      Nothing     -> error ("Unknown reader: " ++ arg) )
+                  (\arg opt -> return opt { optReader = map toLower arg })
                   "FORMAT")
-                 ("Source format (" ++ 
-                  (concatMap (\(name, fn) -> " " ++ name) readers) ++ " )")
+                 ("Input format (" ++ (joinWithSep ", " (map fst readers)) ++
+                  ")")
 
     , Option "tw" ["to","write"]
                  (ReqArg
-                  (\arg opt -> case (lookup (map toLower arg) writers) of
-                      Just (writer, defaultHeader) -> 
-                              return opt { optWriter = writer, 
-                                           optDefaultHeader = defaultHeader }
-                      Nothing     -> error ("Unknown writer: " ++ arg) )
+                  (\arg opt -> return opt { optWriter = map toLower arg })
                   "FORMAT")
-                 ("Output format (" ++ (concatMap (\(name, fn) -> " " ++ name) writers) ++ " )")
+                 ("Output format (" ++ (joinWithSep ", " (map fst writers)) ++ 
+                  ")")
     
     , Option "s" ["standalone"]
                  (NoArg
@@ -169,8 +160,7 @@ allOptions =
 
     , Option "o" ["output"]
                  (ReqArg
-                  (\arg opt -> do
-                     return opt { optOutputFile = arg })
+                  (\arg opt -> return opt { optOutputFile = arg })
                   "FILENAME")
                  "Name of output file"
 
@@ -286,57 +276,66 @@ allOptions =
 
     , Option "h" ["help"]
                  (NoArg
-                  (\opt -> return opt { optShowUsage = True }))
+                  (\_ -> do
+                     prg <- getProgName
+                     hPutStr stderr (reformatUsageInfo $ 
+                             usageInfo (prg ++ " [OPTIONS] [FILES]") options)
+                     exitWith $ ExitFailure 2))
                  "Show help"
     ]
 
--- parse name of calling program and return default reader and writer descriptions
-parseProgName name =
-    case (splitRegex (mkRegex "2") (map toLower name)) of
-      [from, to] -> (from, to)
-      _          -> ("markdown", "html")
-
--- set default options based on reader and writer descriptions; start is starting options
-setDefaultOpts from to start =
-    case ((lookup from readers), (lookup to writers)) of
-      (Just reader, Just (writer, header)) -> start {optReader      = reader, 
-                                                     optWriter      = writer, 
-                                                     optDefaultHeader = header}
-      _                                    -> start
-
--- True if single-letter option is in option list
-inOptList :: [Char] -> OptDescr (Opt -> IO Opt) -> Bool
-inOptList list desc =
-  let (Option letters _ _ _) = desc in
-  any (\x -> x `elem` list) letters
-
 -- Reformat usage message so it doesn't wrap illegibly
+reformatUsageInfo :: String -> String
 reformatUsageInfo = gsub "   *--" "  --" .
                     gsub "(-[A-Za-z0-9])   *--" "\\1, --" . 
                     gsub "   *([^- ])" "\n\t\\1"
 
-main = do
-
-  name <- getProgName
-  let (from, to) = parseProgName name
+-- Determine default reader based on source file extensions
+defaultReaderName :: [String] -> String
+defaultReaderName [] = "markdown"
+defaultReaderName (x:xs) = 
+  let x' = map toLower x in
+  case (matchRegex (mkRegex ".*\\.(.*)") x') of
+    Nothing         -> defaultReaderName xs -- no extension
+    Just ["xhtml"]  -> "html"
+    Just ["html"]   -> "html"
+    Just ["htm"]    -> "html"
+    Just ["tex"]    -> "latex"
+    Just ["latex"]  -> "latex"
+    Just ["ltx"]    -> "latex"
+    Just ["rst"]    -> "rst"
+    Just ["native"] -> "native"
+    Just _          -> "markdown"
+
+-- Determine default writer based on output file extension
+defaultWriterName :: String -> String
+defaultWriterName "" = "html" -- no output file
+defaultWriterName x =
+  let x' = map toLower x in
+  case (matchRegex (mkRegex ".*\\.(.*)") x') of
+    Nothing           -> "markdown" -- no extension
+    Just [""]         -> "markdown" -- empty extension 
+    Just ["tex"]      -> "latex"
+    Just ["latex"]    -> "latex"
+    Just ["ltx"]      -> "latex"
+    Just ["rtf"]      -> "rtf"
+    Just ["rst"]      -> "rst"
+    Just ["s5"]       -> "s5"
+    Just ["native"]   -> "native"
+    Just ["txt"]      -> "markdown"
+    Just ["text"]     -> "markdown"
+    Just ["md"]       -> "markdown"
+    Just ["markdown"] -> "markdown"
+    Just _            -> "html"
 
-  let irrelevantOptions = if not ('2' `elem` name)
-         then ""
-         else "frtwD" ++
-              (if (to /= "html" && to /= "s5") then "SmcT" else "") ++
-              (if (to /= "latex") then "N" else "") ++
-              (if (to /= "s5") then "i" else "") ++
-              (if (from /= "html" && from /= "latex") then "R" else "")
-  
-  let options = filter (not . inOptList irrelevantOptions) allOptions
-
-  let defaultOpts = setDefaultOpts from to startOpt
+main = do
 
   args <- getArgs
   let (actions, sources, errors) = getOpt Permute options args
 
   if (not (null errors))
     then do
+      name <- getProgName
       mapM (\e -> hPutStrLn stderr e) errors
       hPutStrLn stderr (reformatUsageInfo $ 
                         usageInfo (name ++ " [OPTIONS] [FILES]") options)
@@ -350,30 +349,39 @@ main = do
   let Opt    { optPreserveTabs       = preserveTabs
               , optTabStop           = tabStop
               , optStandalone        = standalone
-              , optReader            = reader
-              , optWriter            = writer
+              , optReader            = readerName
+              , optWriter            = writerName
               , optParseRaw          = parseRaw
               , optCSS               = css
               , optIncludeInHeader   = includeHeader
               , optIncludeBeforeBody = includeBefore
               , optIncludeAfterBody  = includeAfter
               , optCustomHeader      = customHeader
-              , optDefaultHeader     = defaultHeader 
               , optTitlePrefix       = titlePrefix
               , optOutputFile        = outputFile
               , optNumberSections    = numberSections
               , optIncremental       = incremental
               , optSmart             = smart
               , optASCIIMathML       = asciiMathML
-              , optShowUsage         = showUsage
 			  , optDebug             = debug
              } = opts
 
-  if showUsage
-    then do
-        hPutStr stderr (reformatUsageInfo $ usageInfo (name ++ " [OPTIONS] [FILES]") options)
-        exitWith $ ExitFailure 2
-    else return ()
+  -- assign reader and writer based on options and filenames
+  let readerName' = if null readerName 
+                      then defaultReaderName sources
+                      else readerName
+
+  let writerName' = if null writerName 
+                      then defaultWriterName outputFile
+                      else writerName
+
+  reader <- case (lookup readerName' readers) of
+     Just r  -> return r
+     Nothing -> error ("Unknown reader: " ++ readerName')
+
+  (writer, defaultHeader) <- case (lookup writerName' writers) of
+     Just (w,h) -> return (w, h)
+     Nothing    -> error ("Unknown writer: " ++ writerName')
 
   output <- if ((null outputFile) || debug)
               then return stdout 
@@ -385,7 +393,6 @@ main = do
         hPutStr stderr $ concatMap (\s -> "INPUT=" ++ s ++ "\n") sources
     else return ()
 
-  let writingS5 = (defaultHeader == defaultS5Header)
   let tabFilter = if preserveTabs then id else (tabsToSpaces tabStop)
   let addBlank str = str ++ "\n\n"
   let removeCRs str = filter (/= '\r') str  -- remove DOS-style line endings
@@ -407,7 +414,7 @@ main = do
                                       writerTitlePrefix    = titlePrefix,
                                       writerSmart          = smart, 
                                       writerTabStop        = tabStop, 
-                                      writerS5             = writingS5,
+                                      writerS5             = (writerName=="s5"),
                                       writerIncremental    = incremental, 
                                       writerNumberSections = numberSections,
                                       writerIncludeBefore  = includeBefore, 
diff --git a/src/wrappers/markdown2pdf.in b/src/wrappers/markdown2pdf.in
index c222c1cbd..71d58a7cd 100644
--- a/src/wrappers/markdown2pdf.in
+++ b/src/wrappers/markdown2pdf.in
@@ -1,6 +1,6 @@
 #!/bin/sh -e
 
-REQUIRED="markdown2latex pdflatex"
+REQUIRED="pdflatex"
 
 ### common.sh
 
@@ -9,9 +9,12 @@ REQUIRED="markdown2latex pdflatex"
 texname=output
 logfile=$THIS_TEMPDIR/log
 
-if ! markdown2latex -s -d "$@" >$THIS_TEMPDIR/$texname.tex 2>$logfile; then
-    [ -f $logfile ] && sed -e 's/markdown2latex/markdown2pdf/g' \
-        -e '/^INPUT=/d' -e '/^OUTPUT=/d' $logfile >&2
+if ! pandoc -s -d -r markdown -w latex "$@" >$THIS_TEMPDIR/$texname.tex \
+2>$logfile; then
+    [ -f $logfile ] && sed -e 's/^pandoc/markdown2pdf/g' \
+        -e '/^INPUT=/d' -e '/^OUTPUT=/d' \
+        -e '/^[[:space:]]*\(-f\|-t\|-s\|-R\|-S\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\
+        -e 's/(implies -s)//g' $logfile >&2
     exit 1
 fi
 
diff --git a/src/wrappers/web2markdown.in b/src/wrappers/web2markdown.in
index 64ff3db9b..89e884c3d 100644
--- a/src/wrappers/web2markdown.in
+++ b/src/wrappers/web2markdown.in
@@ -2,7 +2,7 @@
 # converts HTML from a URL, file, or stdin to markdown
 # uses an available program to fetch URL and tidy to normalize it first
 
-REQUIRED="tidy html2markdown"
+REQUIRED="tidy"
 
 ### common.sh
 
@@ -72,14 +72,16 @@ grabber=
 while [ $# -gt 0 ]; do
     case "$1" in
         -h|--help)
-            html2markdown -h 2>&1 | sed -e 's/html2markdown/web2markdown/' 1>&2 
+            pandoc -h 2>&1 | sed -e 's/pandoc/web2markdown/' \
+            -e '/^[[:space:]]*\(-f\|-t\|-S\|-N\|-m\|-i\|-c\|-T\|-D\|-d\)/,/./d'\
+            1>&2 
             err "  -e ENCODING, --encoding=ENCODING"
             err "        Specify character encoding of input"
             err "  -g COMMAND, --grabber=COMMAND"
             err "        Specify command to be used to grab contents of URL"
             exit 0 ;;
         -v|--version) 
-            html2markdown -v
+            pandoc -v 2>&1 | sed -e 's/pandoc/web2markdown/' 1>&2
             exit 0 ;;
         -e)
             shift
@@ -112,7 +114,7 @@ while [ $# -gt 0 ]; do
     shift
 done
 
-# Unpack options.  Now "$@" will hold the html2markdown options.
+# Unpack options.  Now "$@" will hold the pandoc options.
 oldifs="$IFS"; IFS="$NEWLINE"; set -- $options; IFS="$oldifs"
 
 inurl=
@@ -162,10 +164,11 @@ else # assume UTF-8
 fi 
 
 if [ -z "$argument" ]; then
-    tidy -utf8 2>/dev/null | html2markdown "$@"
+    tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@"
 else
     if [ -f "$argument" ]; then
-        to_utf8 "$argument" | tidy -utf8 2>/dev/null | html2markdown "$@"
+        to_utf8 "$argument" | 
+        tidy -utf8 2>/dev/null | pandoc -r html -w markdown "$@"
     else
         err "File '$argument' not found."
         exit 1
author	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2006-12-28 02:20:09 +0000
committer	fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>	2006-12-28 02:20:09 +0000
commit	a1a30d69bd83e57e494cd3ca5077086077731998 (patch)
tree	8193378eb51d13a20cf9f28ce479af431d3dcba4
parent	a1539d9ab8141ab11add5b304792e7c9bc7a363a (diff)