summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2011-08-23 00:00:32 +0200
committerJonas Smedegaard <dr@jones.dk>2011-08-23 00:00:32 +0200
commita242ebaf29539d6a9a4eec97e510b5f8e4b59b30 (patch)
treedfc3eab668562da5efbc9cd5577292ee891d9d94 /src
parent6479926bb9955dcbf0d175d053e2b38c44d59507 (diff)
Imported Upstream version 1.8.2
Diffstat (limited to 'src')
-rw-r--r--src/Tests/Readers/Markdown.hs33
-rw-r--r--src/Tests/Readers/RST.hs16
-rw-r--r--src/Tests/Writers/Markdown.hs34
-rw-r--r--src/Text/Pandoc/MIME.hs483
-rw-r--r--src/Text/Pandoc/Parsing.hs44
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs94
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs31
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs63
-rw-r--r--src/Text/Pandoc/Readers/RST.hs65
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs2
-rw-r--r--src/Text/Pandoc/S5.hs24
-rw-r--r--src/Text/Pandoc/Shared.hs3
-rw-r--r--src/Text/Pandoc/Templates.hs5
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs8
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs42
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs68
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs191
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs187
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs14
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs36
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs53
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs31
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs12
-rw-r--r--src/markdown2pdf.hs84
-rw-r--r--src/pandoc.hs66
-rw-r--r--src/test-pandoc.hs2
26 files changed, 1323 insertions, 368 deletions
diff --git a/src/Tests/Readers/Markdown.hs b/src/Tests/Readers/Markdown.hs
index feec8fa65..941762bd0 100644
--- a/src/Tests/Readers/Markdown.hs
+++ b/src/Tests/Readers/Markdown.hs
@@ -8,10 +8,14 @@ import Tests.Arbitrary()
import Text.Pandoc.Builder
-- import Text.Pandoc.Shared ( normalize )
import Text.Pandoc
+import Data.Sequence (singleton)
markdown :: String -> Pandoc
markdown = readMarkdown defaultParserState{ stateStandalone = True }
+markdownSmart :: String -> Pandoc
+markdownSmart = readMarkdown defaultParserState{ stateSmart = True }
+
infix 5 =:
(=:) :: ToString c
=> String -> (String, c) -> Test
@@ -40,6 +44,22 @@ tests = [ testGroup "inline code"
"`*` {.haskell .special x=\"7\"}"
=?> para (codeWith ("",["haskell","special"],[("x","7")]) "*")
]
+ , testGroup "smart punctuation"
+ [ test markdownSmart "quote before ellipses"
+ ("'...hi'"
+ =?> para (singleQuoted (singleton Ellipses +++ "hi")))
+ ]
+ , testGroup "mixed emphasis and strong"
+ [ "emph and strong emph alternating" =:
+ "*xxx* ***xxx*** xxx\n*xxx* ***xxx*** xxx"
+ =?> para (emph "xxx" +++ space +++ strong (emph "xxx") +++
+ space +++ "xxx" +++ space +++
+ emph "xxx" +++ space +++ strong (emph "xxx") +++
+ space +++ "xxx")
+ , "emph with spaced strong" =:
+ "*x **xx** x*"
+ =?> para (emph ("x" +++ space +++ strong "xx" +++ space +++ "x"))
+ ]
, testGroup "footnotes"
[ "indent followed by newline and flush-left text" =:
"[^1]\n\n[^1]: my note\n\n \nnot in note\n"
@@ -47,6 +67,19 @@ tests = [ testGroup "inline code"
, "indent followed by newline and indented text" =:
"[^1]\n\n[^1]: my note\n \n in note\n"
=?> para (note (para "my note" +++ para "in note"))
+ , "recursive note" =:
+ "[^1]\n\n[^1]: See [^1]\n"
+ =?> para (note (para "See [^1]"))
+ ]
+ , testGroup "lhs"
+ [ test (readMarkdown defaultParserState{stateLiterateHaskell = True})
+ "inverse bird tracks and html" $
+ "> a\n\n< b\n\n<div>\n"
+ =?> codeBlockWith ("",["sourceCode","literate","haskell"],[]) "a"
+ +++
+ codeBlockWith ("",["sourceCode","haskell"],[]) "b"
+ +++
+ rawBlock "html" "<div>\n\n"
]
-- the round-trip properties frequently fail
-- , testGroup "round trip"
diff --git a/src/Tests/Readers/RST.hs b/src/Tests/Readers/RST.hs
index c0f60ff51..4b8c9301b 100644
--- a/src/Tests/Readers/RST.hs
+++ b/src/Tests/Readers/RST.hs
@@ -17,7 +17,10 @@ infix 5 =:
(=:) = test rst
tests :: [Test]
-tests = [ "field list" =:
+tests = [ "line block with blank line" =:
+ "| a\n|\n| b" =?> para (str "a" +++ linebreak +++
+ linebreak +++ str " " +++ str "b")
+ , "field list" =:
[_LIT|
:Hostname: media08
:IP address: 10.0.0.19
@@ -32,6 +35,8 @@ tests = [ "field list" =:
with the first line, but they must be indented relative to the
field name marker, and they must line up with each other.
:Parameter i: integer
+:Final: item
+ on two lines
|] =?> ( setAuthors ["Me","Myself","I"]
$ setDate "2001-08-16"
$ doc
@@ -41,6 +46,15 @@ tests = [ "field list" =:
, (str "Version", [para "1"])
, (str "Indentation", [para "Since the field marker may be quite long, the second and subsequent lines of the field body do not have to line up with the first line, but they must be indented relative to the field name marker, and they must line up with each other."])
, (str "Parameter i", [para "integer"])
+ , (str "Final", [para "item on two lines"])
])
+ , "URLs with following punctuation" =:
+ ("http://google.com, http://yahoo.com; http://foo.bar.baz.\n" ++
+ "http://foo.bar/baz_(bam) (http://foo.bar)") =?>
+ para (link "http://google.com" "" "http://google.com" +++ ", " +++
+ link "http://yahoo.com" "" "http://yahoo.com" +++ "; " +++
+ link "http://foo.bar.baz" "" "http://foo.bar.baz" +++ ". " +++
+ link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)"
+ +++ " (" +++ link "http://foo.bar" "" "http://foo.bar" +++ ")")
]
diff --git a/src/Tests/Writers/Markdown.hs b/src/Tests/Writers/Markdown.hs
new file mode 100644
index 000000000..70266a683
--- /dev/null
+++ b/src/Tests/Writers/Markdown.hs
@@ -0,0 +1,34 @@
+{-# LANGUAGE OverloadedStrings, QuasiQuotes #-}
+module Tests.Writers.Markdown (tests) where
+
+import Test.Framework
+import Text.Pandoc.Builder
+import Text.Pandoc
+import Tests.Helpers
+import Tests.Arbitrary()
+
+markdown :: (ToString a, ToPandoc a) => a -> String
+markdown = writeMarkdown defaultWriterOptions . toPandoc
+
+{-
+ "my test" =: X =?> Y
+
+is shorthand for
+
+ test markdown "my test" $ X =?> Y
+
+which is in turn shorthand for
+
+ test markdown "my test" (X,Y)
+-}
+
+infix 5 =:
+(=:) :: (ToString a, ToPandoc a)
+ => String -> (a, String) -> Test
+(=:) = test markdown
+
+tests :: [Test]
+tests = [ "indented code after list"
+ =: (orderedList [ para "one" +++ para "two" ] +++ codeBlock "test")
+ =?> "1. one\n\n two\n\n<!-- -->\n\n test"
+ ]
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
new file mode 100644
index 000000000..c52a4c475
--- /dev/null
+++ b/src/Text/Pandoc/MIME.hs
@@ -0,0 +1,483 @@
+{-
+Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.MIME
+ Copyright : Copyright (C) 2011 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Mime type lookup for ODT writer.
+-}
+module Text.Pandoc.MIME ( getMimeType )
+where
+import System.FilePath
+import Data.Char ( toLower )
+import qualified Data.Map as M
+
+-- | Determine mime type appropriate for file path.
+getMimeType :: FilePath -> Maybe String
+getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
+ where mimeTypes = M.fromList -- List borrowed from happstack-server.
+ [("gz","application/x-gzip")
+ ,("cabal","application/x-cabal")
+ ,("%","application/x-trash")
+ ,("323","text/h323")
+ ,("3gp","video/3gpp")
+ ,("7z","application/x-7z-compressed")
+ ,("abw","application/x-abiword")
+ ,("ai","application/postscript")
+ ,("aif","audio/x-aiff")
+ ,("aifc","audio/x-aiff")
+ ,("aiff","audio/x-aiff")
+ ,("alc","chemical/x-alchemy")
+ ,("art","image/x-jg")
+ ,("asc","text/plain")
+ ,("asf","video/x-ms-asf")
+ ,("asn","chemical/x-ncbi-asn1")
+ ,("aso","chemical/x-ncbi-asn1-binary")
+ ,("asx","video/x-ms-asf")
+ ,("atom","application/atom")
+ ,("atomcat","application/atomcat+xml")
+ ,("atomsrv","application/atomserv+xml")
+ ,("au","audio/basic")
+ ,("avi","video/x-msvideo")
+ ,("b","chemical/x-molconn-Z")
+ ,("bak","application/x-trash")
+ ,("bat","application/x-msdos-program")
+ ,("bcpio","application/x-bcpio")
+ ,("bib","text/x-bibtex")
+ ,("bin","application/octet-stream")
+ ,("bmp","image/x-ms-bmp")
+ ,("boo","text/x-boo")
+ ,("book","application/x-maker")
+ ,("bsd","chemical/x-crossfire")
+ ,("c","text/x-csrc")
+ ,("c++","text/x-c++src")
+ ,("c3d","chemical/x-chem3d")
+ ,("cab","application/x-cab")
+ ,("cac","chemical/x-cache")
+ ,("cache","chemical/x-cache")
+ ,("cap","application/cap")
+ ,("cascii","chemical/x-cactvs-binary")
+ ,("cat","application/vnd.ms-pki.seccat")
+ ,("cbin","chemical/x-cactvs-binary")
+ ,("cbr","application/x-cbr")
+ ,("cbz","application/x-cbz")
+ ,("cc","text/x-c++src")
+ ,("cdf","application/x-cdf")
+ ,("cdr","image/x-coreldraw")
+ ,("cdt","image/x-coreldrawtemplate")
+ ,("cdx","chemical/x-cdx")
+ ,("cdy","application/vnd.cinderella")
+ ,("cef","chemical/x-cxf")
+ ,("cer","chemical/x-cerius")
+ ,("chm","chemical/x-chemdraw")
+ ,("chrt","application/x-kchart")
+ ,("cif","chemical/x-cif")
+ ,("class","application/java-vm")
+ ,("cls","text/x-tex")
+ ,("cmdf","chemical/x-cmdf")
+ ,("cml","chemical/x-cml")
+ ,("cod","application/vnd.rim.cod")
+ ,("com","application/x-msdos-program")
+ ,("cpa","chemical/x-compass")
+ ,("cpio","application/x-cpio")
+ ,("cpp","text/x-c++src")
+ ,("cpt","application/mac-compactpro")
+ ,("crl","application/x-pkcs7-crl")
+ ,("crt","application/x-x509-ca-cert")
+ ,("csf","chemical/x-cache-csf")
+ ,("csh","application/x-csh")
+ ,("csm","chemical/x-csml")
+ ,("csml","chemical/x-csml")
+ ,("css","text/css")
+ ,("csv","text/csv")
+ ,("ctab","chemical/x-cactvs-binary")
+ ,("ctx","chemical/x-ctx")
+ ,("cu","application/cu-seeme")
+ ,("cub","chemical/x-gaussian-cube")
+ ,("cxf","chemical/x-cxf")
+ ,("cxx","text/x-c++src")
+ ,("d","text/x-dsrc")
+ ,("dat","chemical/x-mopac-input")
+ ,("dcr","application/x-director")
+ ,("deb","application/x-debian-package")
+ ,("dif","video/dv")
+ ,("diff","text/x-diff")
+ ,("dir","application/x-director")
+ ,("djv","image/vnd.djvu")
+ ,("djvu","image/vnd.djvu")
+ ,("dl","video/dl")
+ ,("dll","application/x-msdos-program")
+ ,("dmg","application/x-apple-diskimage")
+ ,("dms","application/x-dms")
+ ,("doc","application/msword")
+ ,("dot","application/msword")
+ ,("dv","video/dv")
+ ,("dvi","application/x-dvi")
+ ,("dx","chemical/x-jcamp-dx")
+ ,("dxr","application/x-director")
+ ,("emb","chemical/x-embl-dl-nucleotide")
+ ,("embl","chemical/x-embl-dl-nucleotide")
+ ,("eml","message/rfc822")
+ ,("ent","chemical/x-ncbi-asn1-ascii")
+ ,("eps","application/postscript")
+ ,("etx","text/x-setext")
+ ,("exe","application/x-msdos-program")
+ ,("ez","application/andrew-inset")
+ ,("fb","application/x-maker")
+ ,("fbdoc","application/x-maker")
+ ,("fch","chemical/x-gaussian-checkpoint")
+ ,("fchk","chemical/x-gaussian-checkpoint")
+ ,("fig","application/x-xfig")
+ ,("flac","application/x-flac")
+ ,("fli","video/fli")
+ ,("fm","application/x-maker")
+ ,("frame","application/x-maker")
+ ,("frm","application/x-maker")
+ ,("gal","chemical/x-gaussian-log")
+ ,("gam","chemical/x-gamess-input")
+ ,("gamin","chemical/x-gamess-input")
+ ,("gau","chemical/x-gaussian-input")
+ ,("gcd","text/x-pcs-gcd")
+ ,("gcf","application/x-graphing-calculator")
+ ,("gcg","chemical/x-gcg8-sequence")
+ ,("gen","chemical/x-genbank")
+ ,("gf","application/x-tex-gf")
+ ,("gif","image/gif")
+ ,("gjc","chemical/x-gaussian-input")
+ ,("gjf","chemical/x-gaussian-input")
+ ,("gl","video/gl")
+ ,("gnumeric","application/x-gnumeric")
+ ,("gpt","chemical/x-mopac-graph")
+ ,("gsf","application/x-font")
+ ,("gsm","audio/x-gsm")
+ ,("gtar","application/x-gtar")
+ ,("h","text/x-chdr")
+ ,("h++","text/x-c++hdr")
+ ,("hdf","application/x-hdf")
+ ,("hh","text/x-c++hdr")
+ ,("hin","chemical/x-hin")
+ ,("hpp","text/x-c++hdr")
+ ,("hqx","application/mac-binhex40")
+ ,("hs","text/x-haskell")
+ ,("hta","application/hta")
+ ,("htc","text/x-component")
+ ,("htm","text/html")
+ ,("html","text/html")
+ ,("hxx","text/x-c++hdr")
+ ,("ica","application/x-ica")
+ ,("ice","x-conference/x-cooltalk")
+ ,("ico","image/x-icon")
+ ,("ics","text/calendar")
+ ,("icz","text/calendar")
+ ,("ief","image/ief")
+ ,("iges","model/iges")
+ ,("igs","model/iges")
+ ,("iii","application/x-iphone")
+ ,("inp","chemical/x-gamess-input")
+ ,("ins","application/x-internet-signup")
+ ,("iso","application/x-iso9660-image")
+ ,("isp","application/x-internet-signup")
+ ,("ist","chemical/x-isostar")
+ ,("istr","chemical/x-isostar")
+ ,("jad","text/vnd.sun.j2me.app-descriptor")
+ ,("jar","application/java-archive")
+ ,("java","text/x-java")
+ ,("jdx","chemical/x-jcamp-dx")
+ ,("jmz","application/x-jmol")
+ ,("jng","image/x-jng")
+ ,("jnlp","application/x-java-jnlp-file")
+ ,("jpe","image/jpeg")
+ ,("jpeg","image/jpeg")
+ ,("jpg","image/jpeg")
+ ,("js","application/x-javascript")
+ ,("kar","audio/midi")
+ ,("key","application/pgp-keys")
+ ,("kil","application/x-killustrator")
+ ,("kin","chemical/x-kinemage")
+ ,("kml","application/vnd.google-earth.kml+xml")
+ ,("kmz","application/vnd.google-earth.kmz")
+ ,("kpr","application/x-kpresenter")
+ ,("kpt","application/x-kpresenter")
+ ,("ksp","application/x-kspread")
+ ,("kwd","application/x-kword")
+ ,("kwt","application/x-kword")
+ ,("latex","application/x-latex")
+ ,("lha","application/x-lha")
+ ,("lhs","text/x-literate-haskell")
+ ,("lsf","video/x-la-asf")
+ ,("lsx","video/x-la-asf")
+ ,("ltx","text/x-tex")
+ ,("lyx","application/x-lyx")
+ ,("lzh","application/x-lzh")
+ ,("lzx","application/x-lzx")
+ ,("m3u","audio/mpegurl")
+ ,("m4a","audio/mpeg")
+ ,("maker","application/x-maker")
+ ,("man","application/x-troff-man")
+ ,("mcif","chemical/x-mmcif")
+ ,("mcm","chemical/x-macmolecule")
+ ,("mdb","application/msaccess")
+ ,("me","application/x-troff-me")
+ ,("mesh","model/mesh")
+ ,("mid","audio/midi")
+ ,("midi","audio/midi")
+ ,("mif","application/x-mif")
+ ,("mm","application/x-freemind")
+ ,("mmd","chemical/x-macromodel-input")
+ ,("mmf","application/vnd.smaf")
+ ,("mml","text/mathml")
+ ,("mmod","chemical/x-macromodel-input")
+ ,("mng","video/x-mng")
+ ,("moc","text/x-moc")
+ ,("mol","chemical/x-mdl-molfile")
+ ,("mol2","chemical/x-mol2")
+ ,("moo","chemical/x-mopac-out")
+ ,("mop","chemical/x-mopac-input")
+ ,("mopcrt","chemical/x-mopac-input")
+ ,("mov","video/quicktime")
+ ,("movie","video/x-sgi-movie")
+ ,("mp2","audio/mpeg")
+ ,("mp3","audio/mpeg")
+ ,("mp4","video/mp4")
+ ,("mpc","chemical/x-mopac-input")
+ ,("mpe","video/mpeg")
+ ,("mpeg","video/mpeg")
+ ,("mpega","audio/mpeg")
+ ,("mpg","video/mpeg")
+ ,("mpga","audio/mpeg")
+ ,("ms","application/x-troff-ms")
+ ,("msh","model/mesh")
+ ,("msi","application/x-msi")
+ ,("mvb","chemical/x-mopac-vib")
+ ,("mxu","video/vnd.mpegurl")
+ ,("nb","application/mathematica")
+ ,("nc","application/x-netcdf")
+ ,("nwc","application/x-nwc")
+ ,("o","application/x-object")
+ ,("oda","application/oda")
+ ,("odb","application/vnd.oasis.opendocument.database")
+ ,("odc","application/vnd.oasis.opendocument.chart")
+ ,("odf","application/vnd.oasis.opendocument.formula")
+ ,("odg","application/vnd.oasis.opendocument.graphics")
+ ,("odi","application/vnd.oasis.opendocument.image")
+ ,("odm","application/vnd.oasis.opendocument.text-master")
+ ,("odp","application/vnd.oasis.opendocument.presentation")
+ ,("ods","application/vnd.oasis.opendocument.spreadsheet")
+ ,("odt","application/vnd.oasis.opendocument.text")
+ ,("oga","audio/ogg")
+ ,("ogg","application/ogg")
+ ,("ogv","video/ogg")
+ ,("ogx","application/ogg")
+ ,("old","application/x-trash")
+ ,("otg","application/vnd.oasis.opendocument.graphics-template")
+ ,("oth","application/vnd.oasis.opendocument.text-web")
+ ,("otp","application/vnd.oasis.opendocument.presentation-template")
+ ,("ots","application/vnd.oasis.opendocument.spreadsheet-template")
+ ,("ott","application/vnd.oasis.opendocument.text-template")
+ ,("oza","application/x-oz-application")
+ ,("p","text/x-pascal")
+ ,("p7r","application/x-pkcs7-certreqresp")
+ ,("pac","application/x-ns-proxy-autoconfig")
+ ,("pas","text/x-pascal")
+ ,("pat","image/x-coreldrawpattern")
+ ,("patch","text/x-diff")
+ ,("pbm","image/x-portable-bitmap")
+ ,("pcap","application/cap")
+ ,("pcf","application/x-font")
+ ,("pcf.Z","application/x-font")
+ ,("pcx","image/pcx")
+ ,("pdb","chemical/x-pdb")
+ ,("pdf","application/pdf")
+ ,("pfa","application/x-font")
+ ,("pfb","application/x-font")
+ ,("pgm","image/x-portable-graymap")
+ ,("pgn","application/x-chess-pgn")
+ ,("pgp","application/pgp-signature")
+ ,("php","application/x-httpd-php")
+ ,("php3","application/x-httpd-php3")
+ ,("php3p","application/x-httpd-php3-preprocessed")
+ ,("php4","application/x-httpd-php4")
+ ,("phps","application/x-httpd-php-source")
+ ,("pht","application/x-httpd-php")
+ ,("phtml","application/x-httpd-php")
+ ,("pk","application/x-tex-pk")
+ ,("pl","text/x-perl")
+ ,("pls","audio/x-scpls")
+ ,("pm","text/x-perl")
+ ,("png","image/png")
+ ,("pnm","image/x-portable-anymap")
+ ,("pot","text/plain")
+ ,("ppm","image/x-portable-pixmap")
+ ,("pps","application/vnd.ms-powerpoint")
+ ,("ppt","application/vnd.ms-powerpoint")
+ ,("prf","application/pics-rules")
+ ,("prt","chemical/x-ncbi-asn1-ascii")
+ ,("ps","application/postscript")
+ ,("psd","image/x-photoshop")
+ ,("py","text/x-python")
+ ,("pyc","application/x-python-code")
+ ,("pyo","application/x-python-code")
+ ,("qt","video/quicktime")
+ ,("qtl","application/x-quicktimeplayer")
+ ,("ra","audio/x-pn-realaudio")
+ ,("ram","audio/x-pn-realaudio")
+ ,("rar","application/rar")
+ ,("ras","image/x-cmu-raster")
+ ,("rd","chemical/x-mdl-rdfile")
+ ,("rdf","application/rdf+xml")
+ ,("rgb","image/x-rgb")
+ ,("rhtml","application/x-httpd-eruby")
+ ,("rm","audio/x-pn-realaudio")
+ ,("roff","application/x-troff")
+ ,("ros","chemical/x-rosdal")
+ ,("rpm","application/x-redhat-package-manager")
+ ,("rss","application/rss+xml")
+ ,("rtf","application/rtf")
+ ,("rtx","text/richtext")
+ ,("rxn","chemical/x-mdl-rxnfile")
+ ,("sct","text/scriptlet")
+ ,("sd","chemical/x-mdl-sdfile")
+ ,("sd2","audio/x-sd2")
+ ,("sda","application/vnd.stardivision.draw")
+ ,("sdc","application/vnd.stardivision.calc")
+ ,("sdd","application/vnd.stardivision.impress")
+ ,("sdf","application/vnd.stardivision.math")
+ ,("sds","application/vnd.stardivision.chart")
+ ,("sdw","application/vnd.stardivision.writer")
+ ,("ser","application/java-serialized-object")
+ ,("sgf","application/x-go-sgf")
+ ,("sgl","application/vnd.stardivision.writer-global")
+ ,("sh","application/x-sh")
+ ,("shar","application/x-shar")
+ ,("shtml","text/html")
+ ,("sid","audio/prs.sid")
+ ,("sik","application/x-trash")
+ ,("silo","model/mesh")
+ ,("sis","application/vnd.symbian.install")
+ ,("sisx","x-epoc/x-sisx-app")
+ ,("sit","application/x-stuffit")
+ ,("sitx","application/x-stuffit")
+ ,("skd","application/x-koan")
+ ,("skm","application/x-koan")
+ ,("skp","application/x-koan")
+ ,("skt","application/x-koan")
+ ,("smi","application/smil")
+ ,("smil","application/smil")
+ ,("snd","audio/basic")
+ ,("spc","chemical/x-galactic-spc")
+ ,("spl","application/futuresplash")
+ ,("spx","audio/ogg")
+ ,("src","application/x-wais-source")
+ ,("stc","application/vnd.sun.xml.calc.template")
+ ,("std","application/vnd.sun.xml.draw.template")
+ ,("sti","application/vnd.sun.xml.impress.template")
+ ,("stl","application/vnd.ms-pki.stl")
+ ,("stw","application/vnd.sun.xml.writer.template")
+ ,("sty","text/x-tex")
+ ,("sv4cpio","application/x-sv4cpio")
+ ,("sv4crc","application/x-sv4crc")
+ ,("svg","image/svg+xml")
+ ,("svgz","image/svg+xml")
+ ,("sw","chemical/x-swissprot")
+ ,("swf","application/x-shockwave-flash")
+ ,("swfl","application/x-shockwave-flash")
+ ,("sxc","application/vnd.sun.xml.calc")
+ ,("sxd","application/vnd.sun.xml.draw")
+ ,("sxg","application/vnd.sun.xml.writer.global")
+ ,("sxi","application/vnd.sun.xml.impress")
+ ,("sxm","application/vnd.sun.xml.math")
+ ,("sxw","application/vnd.sun.xml.writer")
+ ,("t","application/x-troff")
+ ,("tar","application/x-tar")
+ ,("taz","application/x-gtar")
+ ,("tcl","application/x-tcl")
+ ,("tex","text/x-tex")
+ ,("texi","application/x-texinfo")
+ ,("texinfo","application/x-texinfo")
+ ,("text","text/plain")
+ ,("tgf","chemical/x-mdl-tgf")
+ ,("tgz","application/x-gtar")
+ ,("tif","image/tiff")
+ ,("tiff","image/tiff")
+ ,("tk","text/x-tcl")
+ ,("tm","text/texmacs")
+ ,("torrent","application/x-bittorrent")
+ ,("tr","application/x-troff")
+ ,("ts","text/texmacs")
+ ,("tsp","application/dsptype")
+ ,("tsv","text/tab-separated-values")
+ ,("txt","text/plain")
+ ,("udeb","application/x-debian-package")
+ ,("uls","text/iuls")
+ ,("ustar","application/x-ustar")
+ ,("val","chemical/x-ncbi-asn1-binary")
+ ,("vcd","application/x-cdlink")
+ ,("vcf","text/x-vcard")
+ ,("vcs","text/x-vcalendar")
+ ,("vmd","chemical/x-vmd")
+ ,("vms","chemical/x-vamas-iso14976")
+ ,("vrm","x-world/x-vrml")
+ ,("vrml","model/vrml")
+ ,("vsd","application/vnd.visio")
+ ,("wad","application/x-doom")
+ ,("wav","audio/x-wav")
+ ,("wax","audio/x-ms-wax")
+ ,("wbmp","image/vnd.wap.wbmp")
+ ,("wbxml","application/vnd.wap.wbxml")
+ ,("wk","application/x-123")
+ ,("wm","video/x-ms-wm")
+ ,("wma","audio/x-ms-wma")
+ ,("wmd","application/x-ms-wmd")
+ ,("wml","text/vnd.wap.wml")
+ ,("wmlc","application/vnd.wap.wmlc")
+ ,("wmls","text/vnd.wap.wmlscript")
+ ,("wmlsc","application/vnd.wap.wmlscriptc")
+ ,("wmv","video/x-ms-wmv")
+ ,("wmx","video/x-ms-wmx")
+ ,("wmz","application/x-ms-wmz")
+ ,("wp5","application/wordperfect5.1")
+ ,("wpd","application/wordperfect")
+ ,("wrl","model/vrml")
+ ,("wsc","text/scriptlet")
+ ,("wvx","video/x-ms-wvx")
+ ,("wz","application/x-wingz")
+ ,("xbm","image/x-xbitmap")
+ ,("xcf","application/x-xcf")
+ ,("xht","application/xhtml+xml")
+ ,("xhtml","application/xhtml+xml")
+ ,("xlb","application/vnd.ms-excel")
+ ,("xls","application/vnd.ms-excel")
+ ,("xlt","application/vnd.ms-excel")
+ ,("xml","application/xml")
+ ,("xpi","application/x-xpinstall")
+ ,("xpm","image/x-xpixmap")
+ ,("xsl","application/xml")
+ ,("xtel","chemical/x-xtel")
+ ,("xul","application/vnd.mozilla.xul+xml")
+ ,("xwd","image/x-xwindowdump")
+ ,("xyz","chemical/x-xyz")
+ ,("zip","application/zip")
+ ,("zmt","chemical/x-mopac-input")
+ ]
+
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 9ce064f91..eaf0c0f67 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -33,6 +33,7 @@ module Text.Pandoc.Parsing ( (>>~),
notFollowedBy',
oneOfStrings,
spaceChar,
+ nonspaceChar,
skipSpaces,
blankline,
blanklines,
@@ -78,7 +79,7 @@ import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.ParserCombinators.Parsec
import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit )
+import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
import Control.Monad ( join, liftM, guard )
@@ -122,6 +123,10 @@ oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
spaceChar :: CharParser st Char
spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
+-- | Parses a nonspace, nonnewline character.
+nonspaceChar :: CharParser st Char
+nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r'
+
-- | Skips zero or more spaces or tabs.
skipSpaces :: GenParser Char st ()
skipSpaces = skipMany spaceChar
@@ -264,8 +269,24 @@ uri = try $ do
let protocols = [ "http:", "https:", "ftp:", "file:", "mailto:",
"news:", "telnet:" ]
lookAhead $ oneOfStrings protocols
- -- scan non-ascii characters and ascii characters allowed in a URI
- str <- many1 $ satisfy (\c -> not (isAscii c) || isAllowedInURI c)
+ -- Scan non-ascii characters and ascii characters allowed in a URI.
+ -- We allow punctuation except when followed by a space, since
+ -- we don't want the trailing '.' in 'http://google.com.'
+ let innerPunct = try $ satisfy isPunctuation >>~
+ notFollowedBy (newline <|> spaceChar)
+ let uriChar = innerPunct <|>
+ satisfy (\c -> not (isPunctuation c) &&
+ (not (isAscii c) || isAllowedInURI c))
+ -- We want to allow
+ -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
+ -- as a URL, while NOT picking up the closing paren in
+ -- (http://wikipedia.org)
+ -- So we include balanced parens in the URL.
+ let inParens = try $ do char '('
+ res <- many uriChar
+ char ')'
+ return $ '(' : res ++ ")"
+ str <- liftM concat $ many1 $ inParens <|> count 1 (innerPunct <|> uriChar)
-- now see if they amount to an absolute URI
case parseURI (escapeURI str) of
Just uri' -> if uriScheme uri' `elem` protocols
@@ -742,8 +763,9 @@ charOrRef cs =
singleQuoteStart :: GenParser Char ParserState ()
singleQuoteStart = do
failIfInQuoteContext InSingleQuote
- try $ do charOrRef "'\8216"
- notFollowedBy (oneOf ")!],.;:-? \t\n")
+ try $ do charOrRef "'\8216\145"
+ notFollowedBy (oneOf ")!],;:-? \t\n")
+ notFollowedBy (char '.') <|> lookAhead (string "..." >> return ())
notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
satisfy (not . isAlphaNum)))
-- possess/contraction
@@ -751,23 +773,23 @@ singleQuoteStart = do
singleQuoteEnd :: GenParser Char st ()
singleQuoteEnd = try $ do
- charOrRef "'\8217"
+ charOrRef "'\8217\146"
notFollowedBy alphaNum
doubleQuoteStart :: GenParser Char ParserState ()
doubleQuoteStart = do
failIfInQuoteContext InDoubleQuote
- try $ do charOrRef "\"\8220"
+ try $ do charOrRef "\"\8220\147"
notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n'))
doubleQuoteEnd :: GenParser Char st ()
doubleQuoteEnd = do
- charOrRef "\"\8221"
+ charOrRef "\"\8221\148"
return ()
ellipses :: GenParser Char st Inline
ellipses = do
- try (charOrRef "…") <|> try (string "..." >> return '…')
+ try (charOrRef "…\133") <|> try (string "..." >> return '…')
return Ellipses
dash :: GenParser Char st Inline
@@ -775,13 +797,13 @@ dash = enDash <|> emDash
enDash :: GenParser Char st Inline
enDash = do
- try (charOrRef "–") <|>
+ try (charOrRef "–\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
return EnDash
emDash :: GenParser Char st Inline
emDash = do
- try (charOrRef "—") <|> (try $ string "--" >> optional (char '-') >> return '—')
+ try (charOrRef "—\151") <|> (try $ string "--" >> optional (char '-') >> return '—')
return EmDash
--
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 18e3113d3..7c882f680 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -47,7 +47,7 @@ import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe, isJust )
import Data.List ( intercalate )
import Data.Char ( isSpace, isDigit )
-import Control.Monad ( liftM, guard )
+import Control.Monad ( liftM, guard, when )
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ParserState -- ^ Parser state
@@ -75,7 +75,7 @@ parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest)
t ~== TagOpen "body" []) tags
parseBody :: TagParser [Block]
-parseBody = liftM concat $ manyTill block eof
+parseBody = liftM (fixPlains False . concat) $ manyTill block eof
block :: TagParser [Block]
block = choice
@@ -107,7 +107,7 @@ pBulletList = try $ do
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
- return [BulletList items]
+ return [BulletList $ map (fixPlains True) items]
pOrderedList :: TagParser [Block]
pOrderedList = try $ do
@@ -138,7 +138,7 @@ pOrderedList = try $ do
-- treat it as a list item, though it's not valid xhtml...
skipMany nonItem
items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
- return [OrderedList (start, style, DefaultDelim) items]
+ return [OrderedList (start, style, DefaultDelim) $ map (fixPlains True) items]
pDefinitionList :: TagParser [Block]
pDefinitionList = try $ do
@@ -154,7 +154,22 @@ pDefListItem = try $ do
defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
skipMany nonItem
let term = intercalate [LineBreak] terms
- return (term, defs)
+ return (term, map (fixPlains True) defs)
+
+fixPlains :: Bool -> [Block] -> [Block]
+fixPlains inList bs = if any isParaish bs
+ then map plainToPara bs
+ else bs
+ where isParaish (Para _) = True
+ isParaish (CodeBlock _ _) = True
+ isParaish (Header _ _) = True
+ isParaish (BlockQuote _) = True
+ isParaish (BulletList _) = not inList
+ isParaish (OrderedList _ _) = not inList
+ isParaish (DefinitionList _) = not inList
+ isParaish _ = False
+ plainToPara (Plain xs) = Para xs
+ plainToPara x = x
pRawTag :: TagParser String
pRawTag = do
@@ -199,9 +214,9 @@ pSimpleTable :: TagParser [Block]
pSimpleTable = try $ do
TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
skipMany pBlank
- head' <- option [] $ pInTags "th" pTd
- rows <- many1 $ try $
- skipMany pBlank >> pInTags "tr" pTd
+ head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+ rows <- pOptInTag "tbody"
+ $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
skipMany pBlank
TagClose _ <- pSatisfy (~== TagClose "table")
let cols = maximum $ map length rows
@@ -209,17 +224,17 @@ pSimpleTable = try $ do
let widths = replicate cols 0
return [Table [] aligns widths head' rows]
-pTd :: TagParser [TableCell]
-pTd = try $ do
+pCell :: String -> TagParser [TableCell]
+pCell celltype = try $ do
skipMany pBlank
- res <- pInTags "td" pPlain
+ res <- pInTags celltype pPlain
skipMany pBlank
return [res]
pBlockQuote :: TagParser [Block]
pBlockQuote = do
contents <- pInTags "blockquote" block
- return [BlockQuote contents]
+ return [BlockQuote $ fixPlains False contents]
pPlain :: TagParser [Block]
pPlain = do
@@ -358,7 +373,7 @@ pInlinesInTags :: String -> ([Inline] -> Inline)
-> TagParser [Inline]
pInlinesInTags tagtype f = do
contents <- pInTags tagtype inline
- return [f contents]
+ return [f $ normalizeSpaces contents]
pInTags :: String -> TagParser [a]
-> TagParser [a]
@@ -366,6 +381,16 @@ pInTags tagtype parser = try $ do
pSatisfy (~== TagOpen tagtype [])
liftM concat $ manyTill parser (pCloses tagtype <|> eof)
+pOptInTag :: String -> TagParser a
+ -> TagParser a
+pOptInTag tagtype parser = try $ do
+ open <- option False (pSatisfy (~== TagOpen tagtype []) >> return True)
+ skipMany pBlank
+ x <- parser
+ skipMany pBlank
+ when open $ pCloses tagtype
+ return x
+
pCloses :: String -> TagParser ()
pCloses tagtype = try $ do
t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
@@ -391,10 +416,12 @@ pBlank = try $ do
guard $ all isSpace str
pTagContents :: GenParser Char ParserState Inline
-pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol
+pTagContents =
+ pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
pStr :: GenParser Char ParserState Inline
-pStr = liftM Str $ many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c)
+pStr = liftM Str $ many1 $ satisfy $ \c ->
+ not (isSpace c) && not (isSpecial c) && not (isBad c)
isSpecial :: Char -> Bool
isSpecial '"' = True
@@ -410,6 +437,43 @@ isSpecial _ = False
pSymbol :: GenParser Char ParserState Inline
pSymbol = satisfy isSpecial >>= return . Str . (:[])
+isBad :: Char -> Bool
+isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
+
+pBad :: GenParser Char ParserState Inline
+pBad = do
+ c <- satisfy isBad
+ let c' = case c of
+ '\128' -> '\8364'
+ '\130' -> '\8218'
+ '\131' -> '\402'
+ '\132' -> '\8222'
+ '\133' -> '\8230'
+ '\134' -> '\8224'
+ '\135' -> '\8225'
+ '\136' -> '\710'
+ '\137' -> '\8240'
+ '\138' -> '\352'
+ '\139' -> '\8249'
+ '\140' -> '\338'
+ '\142' -> '\381'
+ '\145' -> '\8216'
+ '\146' -> '\8217'
+ '\147' -> '\8220'
+ '\148' -> '\8221'
+ '\149' -> '\8226'
+ '\150' -> '\8211'
+ '\151' -> '\8212'
+ '\152' -> '\732'
+ '\153' -> '\8482'
+ '\154' -> '\353'
+ '\155' -> '\8250'
+ '\156' -> '\339'
+ '\158' -> '\382'
+ '\159' -> '\376'
+ _ -> '?'
+ return $ Str [c']
+
pSpace :: GenParser Char ParserState Inline
pSpace = many1 (satisfy isSpace) >> return Space
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index dca745b56..02c7361d7 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -391,7 +391,7 @@ para = do
--
bibliographic :: GenParser Char ParserState Block
-bibliographic = choice [ maketitle, title, authors, date ]
+bibliographic = choice [ maketitle, title, subtitle, authors, date ]
maketitle :: GenParser Char st Block
maketitle = try (string "\\maketitle") >> spaces >> return Null
@@ -404,11 +404,22 @@ title = try $ do
updateState (\state -> state { stateTitle = tit })
return Null
+subtitle :: GenParser Char ParserState Block
+subtitle = try $ do
+ string "\\subtitle{"
+ tit <- manyTill inline (char '}')
+ spaces
+ updateState (\state -> state { stateTitle = stateTitle state ++
+ Str ":" : LineBreak : tit })
+ return Null
+
authors :: GenParser Char ParserState Block
authors = try $ do
string "\\author{"
- raw <- many1 (notFollowedBy (char '}') >> inline)
- let authors' = map normalizeSpaces $ splitBy (== LineBreak) raw
+ let andsep = try $ string "\\and" >> notFollowedBy letter >>
+ spaces >> return '&'
+ raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep
+ let authors' = map normalizeSpaces raw
char '}'
spaces
updateState (\s -> s { stateAuthors = authors' })
@@ -777,8 +788,12 @@ doubleQuoteEnd :: CharParser st String
doubleQuoteEnd = try $ string "''"
ellipses :: GenParser Char st Inline
-ellipses = try $ string "\\ldots" >> optional (try $ string "{}") >>
- return Ellipses
+ellipses = try $ do
+ char '\\'
+ optional $ char 'l'
+ string "dots"
+ optional $ try $ string "{}"
+ return Ellipses
enDash :: GenParser Char st Inline
enDash = try (string "--") >> return EnDash
@@ -801,7 +816,11 @@ nonbreakingSpace = char '~' >> return (Str "\160")
-- hard line break
linebreak :: GenParser Char st Inline
-linebreak = try (string "\\\\") >> return LineBreak
+linebreak = try $ do
+ string "\\\\"
+ optional $ bracketedText '[' ']' -- e.g. \\[10pt]
+ spaces
+ return LineBreak
str :: GenParser Char st Inline
str = many1 (noneOf specialChars) >>= return . Str
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 01cc5e2e8..53531dc1a 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE RelaxedPolyRec #-} -- needed for inlinesBetween on GHC < 7
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -471,8 +472,10 @@ lhsCodeBlockBirdWith c = try $ do
return $ intercalate "\n" lns'
birdTrackLine :: Char -> GenParser Char st [Char]
-birdTrackLine c = do
+birdTrackLine c = try $ do
char c
+ -- allow html tags on left margin:
+ when (c == '<') $ notFollowedBy letter
manyTill anyChar newline
@@ -905,7 +908,7 @@ inlineParsers = [ whitespace
, str
, endline
, code
- , (fourOrMore '*' <|> fourOrMore '_')
+ , fours
, strong
, emph
, note
@@ -1016,24 +1019,45 @@ mathInline = try $ do
-- to avoid performance problems, treat 4 or more _ or * in a row as a literal
-- rather than attempting to parse for emph/strong
-fourOrMore :: Char -> GenParser Char st Inline
-fourOrMore c = try $ count 4 (char c) >> many (char c) >>= \s ->
- return (Str $ replicate 4 c ++ s)
+fours :: GenParser Char st Inline
+fours = try $ do
+ x <- char '*' <|> char '_'
+ count 2 $ satisfy (==x)
+ rest <- many1 (satisfy (==x))
+ return $ Str (x:x:x:rest)
+
+-- | Parses a list of inlines between start and end delimiters.
+inlinesBetween :: (Show b)
+ => GenParser Char ParserState a
+ -> GenParser Char ParserState b
+ -> GenParser Char ParserState [Inline]
+inlinesBetween start end =
+ normalizeSpaces `liftM` try (start >> many1Till inner end)
+ where inner = innerSpace <|> (notFollowedBy' whitespace >> inline)
+ innerSpace = try $ whitespace >>~ notFollowedBy' end
emph :: GenParser Char ParserState Inline
-emph = ((enclosed (char '*') (notFollowedBy' strong >> char '*') inline) <|>
- (enclosed (char '_') (notFollowedBy' strong >> char '_' >>
- notFollowedBy alphaNum) inline)) >>=
- return . Emph . normalizeSpaces
+emph = Emph `liftM`
+ (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
+ where starStart = char '*' >> lookAhead nonspaceChar
+ starEnd = notFollowedBy' strong >> char '*'
+ ulStart = char '_' >> lookAhead nonspaceChar
+ ulEnd = notFollowedBy' strong >> char '_'
strong :: GenParser Char ParserState Inline
-strong = ((enclosed (string "**") (try $ string "**") inline) <|>
- (enclosed (string "__") (try $ string "__") inline)) >>=
- return . Strong . normalizeSpaces
+strong = Strong `liftM`
+ (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
+ where starStart = string "**" >> lookAhead nonspaceChar
+ starEnd = try $ string "**"
+ ulStart = string "__" >> lookAhead nonspaceChar
+ ulEnd = try $ string "__"
strikeout :: GenParser Char ParserState Inline
-strikeout = failIfStrict >> enclosed (string "~~") (try $ string "~~") inline >>=
- return . Strikeout . normalizeSpaces
+strikeout = Strikeout `liftM`
+ (failIfStrict >> inlinesBetween strikeStart strikeEnd)
+ where strikeStart = string "~~" >> lookAhead nonspaceChar
+ >> notFollowedBy (char '~')
+ strikeEnd = try $ string "~~"
superscript :: GenParser Char ParserState Inline
superscript = failIfStrict >> enclosed (char '^') (char '^')
@@ -1185,7 +1209,14 @@ note = try $ do
let notes = stateNotes state
case lookup ref notes of
Nothing -> fail "note not found"
- Just raw -> liftM Note $ parseFromString parseBlocks raw
+ Just raw -> do
+ -- We temporarily empty the note list while parsing the note,
+ -- so that we don't get infinite loops with notes inside notes...
+ -- Note references inside other notes do not work.
+ updateState $ \st -> st{ stateNotes = [] }
+ contents <- parseFromString parseBlocks raw
+ updateState $ \st -> st{ stateNotes = notes }
+ return $ Note contents
inlineNote :: GenParser Char ParserState Inline
inlineNote = try $ do
@@ -1281,7 +1312,7 @@ citeKey = try $ do
suppress_author <- option False (char '-' >> return True)
char '@'
first <- letter
- rest <- many $ (noneOf ",;]@ \t\n")
+ rest <- many $ (noneOf ",;!?[]()@ \t\n")
let key = first:rest
st <- getState
guard $ key `elem` stateCitations st
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 32fae5ee7..7fda0da19 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -34,7 +34,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
-import Control.Monad ( when )
+import Control.Monad ( when, liftM )
import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
@@ -58,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\`|*_<>$:[-.\"'\8216\8217\8220\8221"
+specialChars = "\\`|*_<>$:[]()-.\"'\8216\8217\8220\8221"
--
-- parsing documents
@@ -162,6 +162,7 @@ fieldListItem indent = try $ do
(name, raw) <- rawFieldListItem indent
let term = [Str name]
contents <- parseFromString (many block) raw
+ optional blanklines
case (name, contents) of
("Author", x) -> do
updateState $ \st ->
@@ -187,7 +188,6 @@ fieldList :: GenParser Char ParserState Block
fieldList = try $ do
indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
- blanklines
if null items
then return Null
else return $ DefinitionList $ catMaybes items
@@ -198,11 +198,14 @@ fieldList = try $ do
lineBlockLine :: GenParser Char ParserState [Inline]
lineBlockLine = try $ do
- string "| "
+ char '|'
+ char ' ' <|> lookAhead (char '\n')
white <- many spaceChar
line <- many $ (notFollowedBy newline >> inline) <|> (try $ endline >>~ char ' ')
optional endline
- return $ normalizeSpaces $ (if null white then [] else [Str white]) ++ line
+ return $ if null white
+ then normalizeSpaces line
+ else Str white : normalizeSpaces line
lineBlock :: GenParser Char ParserState Block
lineBlock = try $ do
@@ -330,15 +333,14 @@ indentedLine indents = try $ do
string indents
manyTill anyChar newline
--- two or more indented lines, possibly separated by blank lines.
+-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: GenParser Char st [Char]
-indentedBlock = try $ do
+indentedBlock = try $ do
indents <- lookAhead $ many1 spaceChar
- lns <- many $ choice $ [ indentedLine indents,
- try $ do b <- blanklines
- l <- indentedLine indents
- return (b ++ l) ]
+ lns <- many1 $ try $ do b <- option "" blanklines
+ l <- indentedLine indents
+ return (b ++ l)
optional blanklines
return $ unlines lns
@@ -538,9 +540,15 @@ noteBlock = try $ do
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
noteMarker :: GenParser Char ParserState [Char]
-noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']'
+noteMarker = do
+ char '['
+ res <- many1 digit
+ <|> (try $ char '#' >> liftM ('#':) simpleReferenceName')
+ <|> count 1 (oneOf "#*")
+ char ']'
+ return res
---
+--
-- reference key
--
@@ -555,13 +563,20 @@ unquotedReferenceName = try $ do
label' <- many1Till inline (lookAhead $ char ':')
return label'
-isolated :: Char -> GenParser Char st Char
-isolated ch = try $ char ch >>~ notFollowedBy (char ch)
+-- Simple reference names are single words consisting of alphanumerics
+-- plus isolated (no two adjacent) internal hyphens, underscores,
+-- periods, colons and plus signs; no whitespace or other characters
+-- are allowed.
+simpleReferenceName' :: GenParser Char st String
+simpleReferenceName' = do
+ x <- alphaNum
+ xs <- many $ alphaNum
+ <|> (try $ oneOf "-_:+." >> lookAhead alphaNum)
+ return (x:xs)
simpleReferenceName :: GenParser Char st [Inline]
simpleReferenceName = do
- raw <- many1 (alphaNum <|> isolated '-' <|> isolated '.' <|>
- (try $ char '_' >>~ lookAhead alphaNum))
+ raw <- simpleReferenceName'
return [Str raw]
referenceName :: GenParser Char ParserState [Inline]
@@ -864,10 +879,16 @@ note = try $ do
case lookup ref notes of
Nothing -> fail "note not found"
Just raw -> do
+ -- We temporarily empty the note list while parsing the note,
+ -- so that we don't get infinite loops with notes inside notes...
+ -- Note references inside other notes are allowed in reST, but
+ -- not yet in this implementation.
+ updateState $ \st -> st{ stateNotes = [] }
contents <- parseFromString parseBlocks raw
- when (ref == "*" || ref == "#") $ do -- auto-numbered
- -- delete the note so the next auto-numbered note
- -- doesn't get the same contents:
- let newnotes = deleteFirstsBy (==) notes [(ref,raw)]
- updateState $ \st -> st{ stateNotes = newnotes }
+ let newnotes = if (ref == "*" || ref == "#") -- auto-numbered
+ -- delete the note so the next auto-numbered note
+ -- doesn't get the same contents:
+ then deleteFirstsBy (==) notes [(ref,raw)]
+ else notes
+ updateState $ \st -> st{ stateNotes = newnotes }
return $ Note contents
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 19357b343..12d299aa4 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -465,7 +465,7 @@ link :: GenParser Char ParserState Inline
link = try $ do
name <- surrounded (char '"') inline
char ':'
- url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline))))
+ url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;,:" >> (space <|> newline))))
return $ Link name (url, "")
-- | Detect plain links to http or email.
diff --git a/src/Text/Pandoc/S5.hs b/src/Text/Pandoc/S5.hs
index 1567a3ede..b17b052c5 100644
--- a/src/Text/Pandoc/S5.hs
+++ b/src/Text/Pandoc/S5.hs
@@ -31,6 +31,8 @@ Definitions for creation of S5 powerpoint-like HTML.
module Text.Pandoc.S5 ( s5HeaderIncludes) where
import Text.Pandoc.Shared ( readDataFile )
import System.FilePath ( (</>) )
+import Data.ByteString.UTF8 ( toString, fromString )
+import Data.ByteString.Base64 ( encode )
s5HeaderIncludes :: Maybe FilePath -> IO String
s5HeaderIncludes datadir = do
@@ -40,10 +42,14 @@ s5HeaderIncludes datadir = do
s5Javascript :: Maybe FilePath -> IO String
s5Javascript datadir = do
- jsCom <- readDataFile datadir $ "s5" </> "default" </> "slides.js.comment"
- jsPacked <- readDataFile datadir $ "s5" </> "default" </> "slides.js.packed"
- return $ "<script type=\"text/javascript\">\n" ++ jsCom ++ jsPacked ++
- "</script>\n"
+ js <- readDataFile datadir $ "s5" </> "default" </> "slides.min.js"
+ return $ "<script type=\"text/javascript\">\n" ++ inCDATA js ++ "</script>\n"
+
+inCDATA :: String -> String
+inCDATA s = "/*<![CDATA[*/\n" ++ s ++ "\n/*]]>*/\n"
+
+base64 :: String -> String
+base64 = toString . encode . fromString
s5CSS :: Maybe FilePath -> IO String
s5CSS datadir = do
@@ -53,5 +59,11 @@ s5CSS datadir = do
s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css"
s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css"
s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css"
- return $ "<style type=\"text/css\" media=\"projection\" id=\"slideProj\">\n" ++ s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS ++ "\n</style>\n<style type=\"text/css\" media=\"projection\" id=\"operaFix\">\n" ++ s5OperaCSS ++ "\n</style>\n<style type=\"text/css\" media=\"screen\" id=\"outlineStyle\">\n" ++ s5OutlineCSS ++ "\n</style>\n<style type=\"text/css\" media=\"print\" id=\"slidePrint\">\n" ++ s5PrintCSS ++ "\n</style>\n"
-
+ return $ "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"slideProj\" href=\"data:text/css;charset=utf-8;base64," ++
+ base64 (s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS) ++ "\" />\n" ++
+ "<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" href=\"data:text/css;charset=utf-8;base64," ++
+ base64 s5OutlineCSS ++ "\" />\n" ++
+ "<link rel=\"stylesheet\" type=\"text/css\" media=\"print\" id=\"slidePrint\" href=\"data:text/css;charset=utf-8;base64," ++
+ base64 s5PrintCSS ++ "\" />\n" ++
+ "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"operaFix\" href=\"data:text/css;charset=utf-8;base64," ++
+ base64 s5OperaCSS ++ "\" />\n"
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index c4bc66830..9717e1bc8 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -67,7 +67,7 @@ module Text.Pandoc.Shared (
-- * File handling
inDirectory,
findDataFile,
- readDataFile
+ readDataFile,
) where
import Text.Pandoc.Definition
@@ -493,6 +493,7 @@ data WriterOptions = WriterOptions
, writerAscii :: Bool -- ^ Avoid non-ascii characters
} deriving Show
+{-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-}
-- | Default writer options.
defaultWriterOptions :: WriterOptions
defaultWriterOptions =
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index b03e8c73f..19c9a808a 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
{-
Copyright (C) 2009-2010 John MacFarlane <jgm@berkeley.edu>
@@ -83,9 +83,10 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
-> IO (Either E.IOException String)
getDefaultTemplate _ "native" = return $ Right ""
getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument"
+getDefaultTemplate user "epub" = getDefaultTemplate user "html"
getDefaultTemplate user writer = do
let format = takeWhile (/='+') writer -- strip off "+lhs" if present
- let fname = "templates" </> format <.> "template"
+ let fname = "templates" </> "default" <.> format
E.try $ readDataFile user fname
data TemplateState = TemplateState Int [(String,String)]
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 0f6e00a3b..763f77d7c 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -36,6 +36,7 @@ import Data.List ( intercalate )
import Control.Monad.State
import Text.Pandoc.Pretty
import Text.Pandoc.Templates ( renderTemplate )
+import Network.URI ( isAbsoluteURI, unEscapeString )
data WriterState =
WriterState { stNextRef :: Int -- number of next URL reference
@@ -122,7 +123,7 @@ blockToConTeXt (BlockQuote lst) = do
contents <- blockListToConTeXt lst
return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline
blockToConTeXt (CodeBlock _ str) =
- return $ "\\starttyping" <> cr <> flush (text str) <> cr <> "\\stoptyping" $$ blankline
+ return $ flush ("\\starttyping" <> cr <> text str <> cr <> "\\stoptyping") $$ blankline
-- blankline because \stoptyping can't have anything after it, inc. '}'
blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
blockToConTeXt (RawBlock _ _ ) = return empty
@@ -282,7 +283,10 @@ inlineToConTeXt (Link txt (src, _)) = do
brackets empty <> brackets label <>
"\\from" <> brackets (text ref)
inlineToConTeXt (Image _ (src, _)) = do
- return $ braces $ "\\externalfigure" <> brackets (text src)
+ let src' = if isAbsoluteURI src
+ then src
+ else unEscapeString src
+ return $ braces $ "\\externalfigure" <> brackets (text src')
inlineToConTeXt (Note contents) = do
contents' <- blockListToConTeXt contents
return $ text "\\footnote{" <>
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 9d09d46e3..29c042cf9 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -147,9 +147,9 @@ blockToDocbook opts (Para lst) =
inTagsIndented "para" $ inlinesToDocbook opts lst
blockToDocbook opts (BlockQuote blocks) =
inTagsIndented "blockquote" $ blocksToDocbook opts blocks
-blockToDocbook _ (CodeBlock (_,classes,_) str) =
- text ("<screen" ++ lang ++ ">") <> cr <>
- flush (text (escapeStringForXML str) <> cr <> text "</screen>")
+blockToDocbook _ (CodeBlock (_,classes,_) str) =
+ text ("<programlisting" ++ lang ++ ">") <> cr <>
+ flush (text (escapeStringForXML str) <> cr <> text "</programlisting>")
where lang = if null langs
then ""
else " language=\"" ++ escapeStringForXML (head langs) ++
@@ -185,24 +185,24 @@ blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block
blockToDocbook _ (RawBlock _ _) = empty
blockToDocbook _ HorizontalRule = empty -- not semantic
blockToDocbook opts (Table caption aligns widths headers rows) =
- let alignStrings = map alignmentToString aligns
- captionDoc = if null caption
+ let captionDoc = if null caption
then empty
- else inTagsIndented "caption"
+ else inTagsIndented "title"
(inlinesToDocbook opts caption)
tableType = if isEmpty captionDoc then "informaltable" else "table"
- percent w = show (truncate (100*w) :: Integer) ++ "%"
- coltags = if all (== 0.0) widths
- then empty
- else vcat $ map (\w ->
- selfClosingTag "col" [("width", percent w)]) widths
+ percent w = show (truncate (100*w) :: Integer) ++ "*"
+ coltags = vcat $ zipWith (\w al -> selfClosingTag "colspec"
+ ([("colwidth", percent w) | w > 0] ++
+ [("align", alignmentToString al)])) widths aligns
head' = if all null headers
then empty
else inTagsIndented "thead" $
- tableRowToDocbook opts alignStrings "th" headers
+ tableRowToDocbook opts headers
body' = inTagsIndented "tbody" $
- vcat $ map (tableRowToDocbook opts alignStrings "td") rows
- in inTagsIndented tableType $ captionDoc $$ coltags $$ head' $$ body'
+ vcat $ map (tableRowToDocbook opts) rows
+ in inTagsIndented tableType $ captionDoc $$
+ (inTags True "tgroup" [("cols", show (length headers))] $
+ coltags $$ head' $$ body')
alignmentToString :: Alignment -> [Char]
alignmentToString alignment = case alignment of
@@ -212,22 +212,16 @@ alignmentToString alignment = case alignment of
AlignDefault -> "left"
tableRowToDocbook :: WriterOptions
- -> [String]
- -> String
-> [[Block]]
-> Doc
-tableRowToDocbook opts aligns celltype cols =
- inTagsIndented "tr" $ vcat $
- zipWith (tableItemToDocbook opts celltype) aligns cols
+tableRowToDocbook opts cols =
+ inTagsIndented "row" $ vcat $ map (tableItemToDocbook opts) cols
tableItemToDocbook :: WriterOptions
- -> [Char]
- -> [Char]
-> [Block]
-> Doc
-tableItemToDocbook opts tag align item =
- let attrib = [("align", align)]
- in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
+tableItemToDocbook opts item =
+ inTags True "entry" [] $ vcat $ map (blockToDocbook opts) item
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index c0cc815d4..9fc393fed 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -46,6 +46,8 @@ import Text.Pandoc.UUID
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.Markdown ( writePlain )
import Data.Char ( toLower )
+import System.Directory ( copyFile )
+import Network.URI ( unEscapeString )
-- | Produce an EPUB file from a Pandoc document.
writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line
@@ -59,9 +61,26 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
, writerStandalone = True
, writerWrapText = False }
let sourceDir = writerSourceDirectory opts'
+ let vars = writerVariables opts'
+ let mbCoverImage = lookup "epub-cover-image" vars
+
+ -- cover page
+ (cpgEntry, cpicEntry) <-
+ case mbCoverImage of
+ Nothing -> return ([],[])
+ Just img -> do
+ let coverImage = "cover-image" ++ takeExtension img
+ copyFile img coverImage
+ let cpContent = fromString $ writeHtmlString
+ opts'{writerTemplate = pageTemplate
+ ,writerVariables =
+ ("coverimage",coverImage):vars}
+ (Pandoc meta [])
+ imgContent <- B.readFile img
+ return ( [mkEntry "cover.xhtml" cpContent]
+ , [mkEntry coverImage imgContent] )
-- title page
- let vars = writerVariables opts'
let tpContent = fromString $ writeHtmlString
opts'{writerTemplate = pageTemplate
,writerVariables = ("titlepage","yes"):vars}
@@ -84,8 +103,7 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
let chunks = splitByIndices h1Indices blocks
let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys
titleize xs = Pandoc meta xs
- let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate
- , writerHTMLMathMethod = PlainMath }
+ let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
let chapters = map titleize chunks
let chapterToEntry :: Int -> Pandoc -> Entry
chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
@@ -117,17 +135,21 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
,("xmlns","http://www.idpf.org/2007/opf")
,("unique-identifier","BookId")] $
[ metadataElement (writerEPUBMetadata opts')
- uuid lang plainTitle plainAuthors
+ uuid lang plainTitle plainAuthors mbCoverImage
, unode "manifest" $
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
,("media-type","application/x-dtbncx+xml")] $ ()
, unode "item" ! [("id","style"), ("href","stylesheet.css")
,("media-type","text/css")] $ ()
] ++
- map chapterNode (tpEntry : chapterEntries) ++
- map pictureNode picEntries
+ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
+ map pictureNode (cpicEntry ++ picEntries)
, unode "spine" ! [("toc","ncx")] $
- map chapterRefNode (tpEntry : chapterEntries)
+ case mbCoverImage of
+ Nothing -> []
+ Just _ -> [ unode "itemref" !
+ [("idref", "cover"),("linear","no")] $ () ]
+ ++ map chapterRefNode (tpEntry : chapterEntries)
]
let contentsEntry = mkEntry "content.opf" contentsData
@@ -142,7 +164,7 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
let tocData = fromString $ ppTopElement $
unode "ncx" ! [("version","2005-1")
,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
- [ unode "head"
+ [ unode "head" $
[ unode "meta" ! [("name","dtb:uid")
,("content", show uuid)] $ ()
, unode "meta" ! [("name","dtb:depth")
@@ -151,7 +173,10 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
,("content", "0")] $ ()
, unode "meta" ! [("name","dtb:maxPageNumber")
,("content", "0")] $ ()
- ]
+ ] ++ case mbCoverImage of
+ Nothing -> []
+ Just _ -> [unode "meta" ! [("name","cover"),
+ ("content","cover-image")] $ ()]
, unode "docTitle" $ unode "text" $ plainTitle
, unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries)
[1..(length chapterEntries + 1)]
@@ -181,11 +206,12 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
-- construct archive
let archive = foldr addEntryToArchive emptyArchive
(mimetypeEntry : containerEntry : stylesheetEntry : tpEntry :
- contentsEntry : tocEntry : (picEntries ++ chapterEntries) )
+ contentsEntry : tocEntry :
+ (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries) )
return $ fromArchive archive
-metadataElement :: String -> UUID -> String -> String -> [String] -> Element
-metadataElement metadataXML uuid lang title authors =
+metadataElement :: String -> UUID -> String -> String -> [String] -> Maybe a -> Element
+metadataElement metadataXML uuid lang title authors mbCoverImage =
let userNodes = parseXML metadataXML
elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:opf","http://www.idpf.org/2007/opf")] $
@@ -200,7 +226,9 @@ metadataElement metadataXML uuid lang title authors =
[ unode "dc:language" lang | not (elt `contains` "language") ] ++
[ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
not (elt `contains` "identifier") ] ++
- [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ]
+ [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++
+ [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
+ not (isNothing mbCoverImage) ]
in elt{ elContent = elContent elt ++ map Elem newNodes }
transformInlines :: HTMLMathMethod
@@ -211,9 +239,10 @@ transformInlines :: HTMLMathMethod
transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) =
return $ Emph lab : xs
transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do
+ let src' = unEscapeString src
pics <- readIORef picsRef
- let oldsrc = sourceDir </> src
- let ext = takeExtension src
+ let oldsrc = sourceDir </> src'
+ let ext = takeExtension src'
newsrc <- case lookup oldsrc pics of
Just n -> return n
Nothing -> do
@@ -266,9 +295,17 @@ pageTemplate = unlines
, "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
, "<head>"
, "<title>$title$</title>"
+ , "$if(coverimage)$"
+ , "<style type=\"text/css\">img{ max-width: 100%; }</style>"
+ , "$endif$"
, "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />"
, "</head>"
, "<body>"
+ , "$if(coverimage)$"
+ , "<div id=\"cover-image\">"
+ , "<img src=\"$coverimage$\" alt=\"$title$\" />"
+ , "</div>"
+ , "$else$"
, "$if(titlepage)$"
, "<h1 class=\"title\">$title$</h1>"
, "$for(author)$"
@@ -279,6 +316,7 @@ pageTemplate = unlines
, "$if(toc)$"
, "$toc$"
, "$endif$"
+ , "$endif$"
, "$body$"
, "$endif$"
, "</body>"
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index b3320fa06..573adbf4a 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -18,9 +18,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
- Module : Text.Pandoc.Writers.HTML
+ Module : Text.Pandoc.Writers.HTML
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -46,6 +46,7 @@ import Text.XHtml.Transitional hiding ( stringToHtml, unordList, ordList )
import qualified Text.XHtml.Transitional as XHtml
import Text.TeXMath
import Text.XML.Light.Output
+import System.FilePath (takeExtension)
data WriterState = WriterState
{ stNotes :: [Html] -- ^ List of notes
@@ -101,42 +102,37 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
else return noHtml
auths <- if standalone
then mapM (inlineListToHtml opts) authors'
- else return []
+ else return []
date <- if standalone
then inlineListToHtml opts date'
- else return noHtml
- let sects = hierarchicalize blocks
- toc <- if writerTableOfContents opts
+ else return noHtml
+ let sects = hierarchicalize $
+ if writerSlideVariant opts == NoSlides
+ then blocks
+ else case blocks of
+ (Header 1 _ : _) -> blocks
+ _ ->
+ let isL1 (Header 1 _) = True
+ isL1 _ = False
+ (preBlocks, rest) = break isL1 blocks
+ in (RawBlock "html" "<div class=\"slide\">" :
+ preBlocks) ++ (RawBlock "html" "</div>" :
+ rest)
+ toc <- if writerTableOfContents opts
then tableOfContents opts sects
else return Nothing
- let startSlide = RawBlock "html" "<div class=\"slide\">\n"
- endSlide = RawBlock "html" "</div>\n"
- let cutUp (HorizontalRule : Header 1 ys : xs) = cutUp (Header 1 ys : xs)
- cutUp (HorizontalRule : xs) = [endSlide, startSlide] ++ cutUp xs
- cutUp (Header 1 ys : xs) = [endSlide, startSlide] ++
- (Header 1 ys : cutUp xs)
- cutUp (x:xs) = x : cutUp xs
- cutUp [] = []
- let slides = case blocks of
- (HorizontalRule : xs) -> [startSlide] ++ cutUp xs ++ [endSlide]
- (Header 1 ys : xs) -> [startSlide, Header 1 ys] ++
- cutUp xs ++ [endSlide]
- _ -> [startSlide] ++ cutUp blocks ++
- [endSlide]
blocks' <- liftM (toHtmlFromList . intersperse (nl opts)) $
- if writerSlideVariant opts `elem` [SlidySlides, S5Slides]
- then mapM (blockToHtml opts) slides
- else mapM (elementToHtml opts) sects
+ mapM (elementToHtml opts) sects
st <- get
let notes = reverse (stNotes st)
let thebody = blocks' +++ footnoteSection opts notes
let math = if stMath st
then case writerHTMLMathMethod opts of
LaTeXMathML (Just url) ->
- script !
+ script !
[src url, thetype "text/javascript"] $ noHtml
MathML (Just url) ->
- script !
+ script !
[src url, thetype "text/javascript"] $ noHtml
MathJax url ->
script ! [src url, thetype "text/javascript"] $ noHtml
@@ -144,9 +140,10 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
script !
[src url, thetype "text/javascript"] $ noHtml
_ -> case lookup "mathml-script" (writerVariables opts) of
- Just s ->
+ Just s ->
script ! [thetype "text/javascript"] <<
- primHtml s
+ primHtml ("/*<![CDATA[*/\n" ++ s ++
+ "/*]]>*/\n")
Nothing -> noHtml
else noHtml
let newvars = [("highlighting-css", defaultHighlightingCss) |
@@ -173,7 +170,10 @@ inTemplate opts tit auths date toc body' newvars =
[ ("body", dropWhile (=='\n') $ showHtmlFragment body')
, ("pagetitle", topTitle')
, ("title", dropWhile (=='\n') $ showHtmlFragment tit)
- , ("date", date') ] ++
+ , ("date", date')
+ , ("idprefix", writerIdentifierPrefix opts)
+ , ("slidy-url", "http://www.w3.org/Talks/Tools/Slidy2")
+ , ("s5-url", "ui/default") ] ++
[ ("html5","true") | writerHtml5 opts ] ++
(case toc of
Just t -> [ ("toc", showHtmlFragment t)]
@@ -202,14 +202,7 @@ tableOfContents opts sects = do
let tocList = catMaybes contents
return $ if null tocList
then Nothing
- else Just $
- if writerHtml5 opts
- then (tag "nav" ! [prefixedId opts' "TOC"] $
- nl opts +++ unordList opts tocList +++ nl opts)
- +++ nl opts
- else (thediv ! [prefixedId opts' "TOC"] $
- nl opts +++ unordList opts tocList +++ nl opts)
- +++ nl opts
+ else Just $ unordList opts tocList
-- | Convert section number to string
showSecNum :: [Int] -> String
@@ -233,33 +226,35 @@ elementToListItem opts (Sec _ num id' headerText subsecs) = do
-- | Convert an Element to Html.
elementToHtml :: WriterOptions -> Element -> State WriterState Html
-elementToHtml opts (Blk block) = blockToHtml opts block
+elementToHtml opts (Blk HorizontalRule) | writerSlideVariant opts /= NoSlides =
+ return $ primHtml "</div>" +++ nl opts +++ primHtml "<div class=\"slide\">"
+elementToHtml opts (Blk block) = blockToHtml opts block
elementToHtml opts (Sec level num id' title' elements) = do
- innerContents <- mapM (elementToHtml opts) elements
modify $ \st -> st{stSecNum = num} -- update section number
header' <- blockToHtml opts (Header level title')
- let slides = writerSlideVariant opts `elem` [SlidySlides, S5Slides]
+ innerContents <- mapM (elementToHtml opts) elements
let header'' = header' ! [prefixedId opts id' |
not (writerStrictMarkdown opts ||
- writerSectionDivs opts || slides)]
+ writerSectionDivs opts ||
+ writerSlideVariant opts == S5Slides)]
let stuff = header'' : innerContents
- return $ if slides -- S5 gets confused by the extra divs around sections
- then toHtmlFromList $ intersperse (nl opts) stuff
- else if writerSectionDivs opts
- then if writerHtml5 opts
- then tag "section" ! [prefixedId opts id']
- << (nl opts : (intersperse (nl opts) stuff
- ++ [nl opts]))
- else thediv ! [prefixedId opts id'] <<
- (nl opts : (intersperse (nl opts) stuff
- ++ [nl opts]))
- else toHtmlFromList $ intersperse (nl opts) stuff
+ let slide = writerSlideVariant opts /= NoSlides && level == 1
+ let stuff' = if slide
+ then [thediv ! [theclass "slide"] <<
+ (nl opts : intersperse (nl opts) stuff ++ [nl opts])]
+ else intersperse (nl opts) stuff
+ let inNl x = nl opts : x ++ [nl opts]
+ return $ if writerSectionDivs opts
+ then if writerHtml5 opts
+ then tag "section" ! [prefixedId opts id'] << inNl stuff'
+ else thediv ! [prefixedId opts id'] << inNl stuff'
+ else toHtmlFromList stuff'
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
footnoteSection :: WriterOptions -> [Html] -> Html
footnoteSection opts notes =
- if null notes
+ if null notes
then noHtml
else nl opts +++ (thediv ! [theclass "footnotes"]
$ nl opts +++ hr +++ nl opts +++
@@ -272,7 +267,7 @@ parseMailto ('m':'a':'i':'l':'t':'o':':':addr) =
let (name', rest) = span (/='@') addr
domain = drop 1 rest
in Just (name', domain)
-parseMailto _ = Nothing
+parseMailto _ = Nothing
-- | Obfuscate a "mailto:" link.
obfuscateLink :: WriterOptions -> String -> String -> Html
@@ -280,15 +275,15 @@ obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
anchor ! [href s] << txt
obfuscateLink opts txt s =
let meth = writerEmailObfuscation opts
- s' = map toLower s
+ s' = map toLower s
in case parseMailto s' of
(Just (name', domain)) ->
let domain' = substitute "." " dot " domain
at' = obfuscateChar '@'
- (linkText, altText) =
+ (linkText, altText) =
if txt == drop 7 s' -- autolink
then ("'<code>'+e+'</code>'", name' ++ " at " ++ domain')
- else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++
+ else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++
domain' ++ ")")
in case meth of
ReferenceObfuscation ->
@@ -297,18 +292,18 @@ obfuscateLink opts txt s =
++ "\">" ++ (obfuscateString txt) ++ "</a>"
JavascriptObfuscation ->
(script ! [thetype "text/javascript"] $
- primHtml ("\n<!--\nh='" ++
- obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
+ primHtml ("\n<!--\nh='" ++
+ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name' ++ "';e=n+a+h;\n" ++
- "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
- linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
+ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
+ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
noscript (primHtml $ obfuscateString altText)
_ -> error $ "Unknown obfuscation method: " ++ show meth
_ -> anchor ! [href s] $ stringToHtml opts txt -- malformed email
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
-obfuscateChar char =
+obfuscateChar char =
let num = ord char
numstr = if even num then show num else "x" ++ showHex num ""
in "&#" ++ numstr ++ ";"
@@ -323,6 +318,17 @@ attrsToHtml opts (id',classes',keyvals) =
[prefixedId opts id' | not (null id')] ++
map (\(x,y) -> strAttr x y) keyvals
+imageExts :: [String]
+imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
+ "gif", "ico", "ief", "jng", "jpg", "jpeg", "nef", "orf", "pat", "pbm",
+ "pcx", "pgm", "png", "pnm", "ppm", "psd", "ras", "rgb", "svg", "tiff",
+ "wbmp", "xbm", "xpm", "xwd" ]
+
+treatAsImage :: FilePath -> Bool
+treatAsImage fp =
+ let ext = map toLower $ drop 1 $ takeExtension fp
+ in null ext || ext `elem` imageExts
+
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
blockToHtml _ Null = return noHtml
@@ -361,14 +367,14 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
return h
blockToHtml opts (BlockQuote blocks) =
-- in S5, treat list in blockquote specially
- -- if default is incremental, make it nonincremental;
+ -- if default is incremental, make it nonincremental;
-- otherwise incremental
if writerSlideVariant opts /= NoSlides
then let inc = not (writerIncremental opts) in
- case blocks of
+ case blocks of
[BulletList lst] -> blockToHtml (opts {writerIncremental = inc})
(BulletList lst)
- [OrderedList attribs lst] ->
+ [OrderedList attribs lst] ->
blockToHtml (opts {writerIncremental = inc})
(OrderedList attribs lst)
_ -> do contents <- blockListToHtml opts blocks
@@ -377,7 +383,7 @@ blockToHtml opts (BlockQuote blocks) =
else do
contents <- blockListToHtml opts blocks
return $ blockquote (nl opts +++ contents +++ nl opts)
-blockToHtml opts (Header level lst) = do
+blockToHtml opts (Header level lst) = do
contents <- inlineListToHtml opts lst
secnum <- liftM stSecNum get
let contents' = if writerNumberSections opts
@@ -470,8 +476,8 @@ tableRowToHtml opts aligns rownum cols' = do
0 -> "header"
x | x `rem` 2 == 1 -> "odd"
_ -> "even"
- cols'' <- sequence $ zipWith
- (\alignment item -> tableItemToHtml opts mkcell alignment item)
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToHtml opts mkcell alignment item)
aligns cols'
return $ (tr ! [theclass rowclass] $ nl opts +++ toHtmlFromList cols'')
+++ nl opts
@@ -508,13 +514,13 @@ blockListToHtml opts lst =
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
-inlineListToHtml opts lst =
+inlineListToHtml opts lst =
mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
inlineToHtml opts inline =
- case inline of
+ case inline of
(Str str) -> return $ stringToHtml opts str
(Space) -> return $ stringToHtml opts " "
(LineBreak) -> return br
@@ -543,9 +549,9 @@ inlineToHtml opts inline =
stringToHtml opts "”")
in do contents <- inlineListToHtml opts lst
return $ leftQuote +++ contents +++ rightQuote
- (Math t str) -> modify (\st -> st {stMath = True}) >>
+ (Math t str) -> modify (\st -> st {stMath = True}) >>
(case writerHTMLMathMethod opts of
- LaTeXMathML _ ->
+ LaTeXMathML _ ->
-- putting LaTeXMathML in container with class "LaTeX" prevents
-- non-math elements on the page from being treated as math by
-- the javascript
@@ -594,37 +600,44 @@ inlineToHtml opts inline =
LaTeXMathML _ -> do modify (\st -> st {stMath = True})
return $ primHtml str
_ -> return noHtml
- (RawInline "html" str) -> return $ primHtml str
+ (RawInline "html" str) -> return $ primHtml str
(RawInline _ _) -> return noHtml
(Link [Code _ str] (s,_)) | "mailto:" `isPrefixOf` s ->
return $ obfuscateLink opts str s
(Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do
- linkText <- inlineListToHtml opts txt
+ linkText <- inlineListToHtml opts txt
return $ obfuscateLink opts (show linkText) s
(Link txt (s,tit)) -> do
linkText <- inlineListToHtml opts txt
- return $ anchor ! ([href s] ++
- if null tit then [] else [title tit]) $
+ return $ anchor ! ([href s] ++
+ if null tit then [] else [title tit]) $
linkText
- (Image txt (s,tit)) -> do
+ (Image txt (s,tit)) | treatAsImage s -> do
let alternate' = stringify txt
let attributes = [src s] ++
- (if null tit
- then []
- else [title tit]) ++
- if null txt
- then []
+ (if null tit
+ then []
+ else [title tit]) ++
+ if null txt
+ then []
else [alt alternate']
- return $ image ! attributes
- -- note: null title included, as in Markdown.pl
- (Note contents) -> do
+ return $ image ! attributes
+ -- note: null title included, as in Markdown.pl
+ (Image _ (s,tit)) -> do
+ let attributes = [src s] ++
+ (if null tit
+ then []
+ else [title tit])
+ return $ itag "embed" ! attributes
+ -- note: null title included, as in Markdown.pl
+ (Note contents) -> do
st <- get
let notes = stNotes st
let number = (length notes) + 1
let ref = show number
- htmlContents <- blockListToNote opts ref contents
+ htmlContents <- blockListToNote opts ref contents
-- push contents onto front of notes
- put $ st {stNotes = (htmlContents:notes)}
+ put $ st {stNotes = (htmlContents:notes)}
return $ sup <<
anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref),
theclass "footnoteRef",
@@ -635,9 +648,8 @@ blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
blockListToNote opts ref blocks =
-- If last block is Para or Plain, include the backlink at the end of
-- that block. Otherwise, insert a new Plain block with the backlink.
- let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++
- "\" class=\"footnoteBackLink\"" ++
- " title=\"Jump back to footnote " ++ ref ++ "\">" ++
+ let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++
+ "\" class=\"footnoteBackLink\">" ++
(if writerAscii opts then "&#8617;" else "↩") ++ "</a>"]
blocks' = if null blocks
then []
@@ -652,4 +664,3 @@ blockListToNote opts ref blocks =
Plain backlink]
in do contents <- blockListToHtml opts blocks'
return $ nl opts +++ (li ! [prefixedId opts ("fn" ++ ref)]) contents
-
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 7dd736da4..d925b2897 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -34,7 +34,9 @@ import Text.Pandoc.Generic
import Text.Pandoc.Shared
import Text.Pandoc.Templates
import Text.Printf ( printf )
-import Data.List ( (\\), isSuffixOf, isPrefixOf, intercalate, intersperse )
+import Network.URI ( isAbsoluteURI, unEscapeString )
+import Data.List ( (\\), isSuffixOf, isInfixOf,
+ isPrefixOf, intercalate, intersperse )
import Data.Char ( toLower, isPunctuation )
import Control.Monad.State
import Text.Pandoc.Pretty
@@ -42,6 +44,9 @@ import System.FilePath (dropExtension)
data WriterState =
WriterState { stInNote :: Bool -- @True@ if we're in a note
+ , stInTable :: Bool -- @True@ if we're in a table
+ , stTableNotes :: [(Char, Doc)] -- List of markers, notes
+ -- in current table
, stOLLevel :: Int -- level of ordered list nesting
, stOptions :: WriterOptions -- writer options, so they don't have to be parameter
, stVerbInNote :: Bool -- true if document has verbatim text in note
@@ -53,17 +58,20 @@ data WriterState =
, stGraphics :: Bool -- true if document contains images
, stLHS :: Bool -- true if document has literate haskell code
, stBook :: Bool -- true if document uses book or memoir class
+ , stCsquotes :: Bool -- true if document uses csquotes
}
-- | Convert Pandoc to LaTeX.
writeLaTeX :: WriterOptions -> Pandoc -> String
writeLaTeX options document =
evalState (pandocToLaTeX options document) $
- WriterState { stInNote = False, stOLLevel = 1, stOptions = options,
+ WriterState { stInNote = False, stInTable = False,
+ stTableNotes = [], stOLLevel = 1, stOptions = options,
stVerbInNote = False, stEnumerate = False,
stTable = False, stStrikeout = False, stSubscript = False,
stUrl = False, stGraphics = False,
- stLHS = False, stBook = writerChapters options }
+ stLHS = False, stBook = writerChapters options,
+ stCsquotes = False }
pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String
pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
@@ -73,6 +81,10 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
"{report}" `isSuffixOf` x)
when (any usesBookClass (lines template)) $
modify $ \s -> s{stBook = True}
+ -- check for \usepackage...{csquotes}; if present, we'll use
+ -- \enquote{...} for smart quotes:
+ when ("{csquotes}" `isInfixOf` template) $
+ modify $ \s -> s{stCsquotes = True}
opts <- liftM stOptions get
let colwidth = if writerWrapText opts
then Just $ writerColumns opts
@@ -106,7 +118,6 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
, ("title", titletext)
, ("date", dateText) ] ++
[ ("author", a) | a <- authorsText ] ++
- [ ("xetex", "yes") | writerXeTeX options ] ++
[ ("verbatim-in-note", "yes") | stVerbInNote st ] ++
[ ("fancy-enums", "yes") | stEnumerate st ] ++
[ ("tables", "yes") | stTable st ] ++
@@ -147,14 +158,6 @@ stringToLaTeX = escapeStringUsing latexEscapes
inCmd :: String -> Doc -> Doc
inCmd cmd contents = char '\\' <> text cmd <> braces contents
--- | Remove all code elements from list of inline elements
--- (because it's illegal to have verbatim inside some command arguments)
-deVerb :: [Inline] -> [Inline]
-deVerb [] = []
-deVerb ((Code _ str):rest) =
- (RawInline "latex" $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest)
-deVerb (other:rest) = other:(deVerb rest)
-
-- | Convert Pandoc block element to LaTeX.
blockToLaTeX :: Block -- ^ Block to convert
-> State WriterState Doc
@@ -211,8 +214,9 @@ blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
| null params = empty
| otherwise = "[" <> hsep (intersperse "," (map text params)) <>
"]"
- return $ "\\begin{" <> text env <> "}" <> printParams $$ flush (text str) $$
- "\\end{" <> text env <> "}" $$ cr -- final cr needed because of footnotes
+ return $ flush ("\\begin{" <> text env <> "}" <> printParams $$ text str $$
+ "\\end{" <> text env <> "}") $$ cr
+ -- final cr needed because of footnotes
blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline
blockToLaTeX (RawBlock _ _) = return empty
blockToLaTeX (BulletList lst) = do
@@ -244,14 +248,13 @@ blockToLaTeX (DefinitionList lst) = do
blockToLaTeX HorizontalRule = return $
"\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline
blockToLaTeX (Header level lst) = do
- let lst' = deVerb lst
- txt <- inlineListToLaTeX lst'
+ txt <- inlineListToLaTeX lst
let noNote (Note _) = Str ""
noNote x = x
- let lstNoNotes = bottomUp noNote lst'
+ let lstNoNotes = bottomUp noNote lst
-- footnotes in sections don't work unless you specify an optional
-- argument: \section[mysec]{mysec\footnote{blah}}
- optional <- if lstNoNotes == lst'
+ optional <- if lstNoNotes == lst
then return empty
else do
res <- inlineListToLaTeX lstNoNotes
@@ -269,47 +272,61 @@ blockToLaTeX (Header level lst) = do
5 -> headerWith "\\subparagraph" stuffing
_ -> txt $$ blankline
blockToLaTeX (Table caption aligns widths heads rows) = do
+ modify $ \s -> s{ stInTable = True, stTableNotes = [] }
headers <- if all null heads
then return empty
- else liftM ($$ "\\hline") $ (tableRowToLaTeX widths) heads
+ else liftM ($$ "\\ML")
+ $ (tableRowToLaTeX True aligns widths) heads
captionText <- inlineListToLaTeX caption
- rows' <- mapM (tableRowToLaTeX widths) rows
- let colDescriptors = concat $ zipWith toColDescriptor widths aligns
- let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$
- headers $$ vcat rows' $$ "\\end{tabular}"
- let centered txt = "\\begin{center}" $$ txt $$ "\\end{center}"
- modify $ \s -> s{ stTable = True }
- return $ if isEmpty captionText
- then centered tableBody $$ blankline
- else "\\begin{table}[h]" $$ centered tableBody $$
- inCmd "caption" captionText $$ "\\end{table}" $$ blankline
-
-toColDescriptor :: Double -> Alignment -> String
-toColDescriptor 0 align =
+ let capt = if isEmpty captionText
+ then empty
+ else text "caption = " <> captionText <> "," <> space
+ rows' <- mapM (tableRowToLaTeX False aligns widths) rows
+ let rows'' = intersperse ("\\\\\\noalign{\\medskip}") rows'
+ tableNotes <- liftM (reverse . stTableNotes) get
+ let toNote (marker, x) = "\\tnote" <> brackets (char marker) <>
+ braces (nest 2 x)
+ let notes = vcat $ map toNote tableNotes
+ let colDescriptors = text $ concat $ map toColDescriptor aligns
+ let tableBody =
+ ("\\ctable" <> brackets (capt <> text "pos = H, center, botcap"))
+ <> braces colDescriptors
+ $$ braces ("% notes" <> cr <> notes <> cr)
+ $$ braces (text "% rows" $$ "\\FL" $$
+ vcat (headers : rows'') $$ "\\LL" <> cr)
+ modify $ \s -> s{ stTable = True, stInTable = False, stTableNotes = [] }
+ return $ tableBody $$ blankline
+
+toColDescriptor :: Alignment -> String
+toColDescriptor align =
case align of
AlignLeft -> "l"
AlignRight -> "r"
AlignCenter -> "c"
AlignDefault -> "l"
-toColDescriptor width align = ">{\\PBS" ++
- (case align of
- AlignLeft -> "\\raggedright"
- AlignRight -> "\\raggedleft"
- AlignCenter -> "\\centering"
- AlignDefault -> "\\raggedright") ++
- "\\hspace{0pt}}p{" ++ printf "%.2f" width ++ "\\columnwidth}"
blockListToLaTeX :: [Block] -> State WriterState Doc
blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat
-tableRowToLaTeX :: [Double] -> [[Block]] -> State WriterState Doc
-tableRowToLaTeX widths cols = do
+tableRowToLaTeX :: Bool
+ -> [Alignment]
+ -> [Double]
+ -> [[Block]]
+ -> State WriterState Doc
+tableRowToLaTeX header aligns widths cols = do
renderedCells <- mapM blockListToLaTeX cols
- let toCell 0 c = c
- toCell w c = "\\parbox{" <> text (printf "%.2f" w) <>
- "\\columnwidth}{" <> c <> cr <> "}"
- let cells = zipWith toCell widths renderedCells
- return $ (hcat $ intersperse (" & ") cells) <> "\\\\"
+ let valign = text $ if header then "[b]" else "[t]"
+ let halign x = case x of
+ AlignLeft -> "\\raggedright"
+ AlignRight -> "\\raggedleft"
+ AlignCenter -> "\\centering"
+ AlignDefault -> "\\raggedright"
+ let toCell 0 _ c = c
+ toCell w a c = "\\parbox" <> valign <>
+ braces (text (printf "%.2f\\columnwidth" w)) <>
+ braces (halign a <> cr <> c <> cr)
+ let cells = zipWith3 toCell widths aligns renderedCells
+ return $ hcat $ intersperse (" & ") cells
listItemToLaTeX :: [Block] -> State WriterState Doc
listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) .
@@ -317,7 +334,7 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) .
defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc
defListItemToLaTeX (term, defs) = do
- term' <- inlineListToLaTeX $ deVerb term
+ term' <- inlineListToLaTeX term
def' <- liftM vsep $ mapM blockListToLaTeX defs
return $ "\\item" <> brackets term' $$ def'
@@ -335,23 +352,23 @@ isQuoted _ = False
inlineToLaTeX :: Inline -- ^ Inline to convert
-> State WriterState Doc
inlineToLaTeX (Emph lst) =
- inlineListToLaTeX (deVerb lst) >>= return . inCmd "emph"
+ inlineListToLaTeX lst >>= return . inCmd "emph"
inlineToLaTeX (Strong lst) =
- inlineListToLaTeX (deVerb lst) >>= return . inCmd "textbf"
+ inlineListToLaTeX lst >>= return . inCmd "textbf"
inlineToLaTeX (Strikeout lst) = do
- contents <- inlineListToLaTeX $ deVerb lst
+ contents <- inlineListToLaTeX lst
modify $ \s -> s{ stStrikeout = True }
return $ inCmd "sout" contents
inlineToLaTeX (Superscript lst) =
- inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsuperscript"
+ inlineListToLaTeX lst >>= return . inCmd "textsuperscript"
inlineToLaTeX (Subscript lst) = do
modify $ \s -> s{ stSubscript = True }
- contents <- inlineListToLaTeX $ deVerb lst
+ contents <- inlineListToLaTeX lst
-- oddly, latex includes \textsuperscript but not \textsubscript
-- so we have to define it (using a different name so as not to conflict with memoir class):
return $ inCmd "textsubscr" contents
inlineToLaTeX (SmallCaps lst) =
- inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc"
+ inlineListToLaTeX lst >>= return . inCmd "textsc"
inlineToLaTeX (Cite cits lst) = do
st <- get
let opts = stOptions st
@@ -362,29 +379,38 @@ inlineToLaTeX (Cite cits lst) = do
inlineToLaTeX (Code _ str) = do
st <- get
- when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True }
- let chr = ((enumFromTo '!' '~') \\ str) !! 0
if writerListings (stOptions st)
- then return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
- else return $ text $ "\\verb" ++ [chr] ++ str ++ [chr]
+ then do
+ when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True }
+ let chr = ((enumFromTo '!' '~') \\ str) !! 0
+ return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
+ else return $ text $ "\\texttt{" ++ stringToLaTeX str ++ "}"
inlineToLaTeX (Quoted SingleQuote lst) = do
contents <- inlineListToLaTeX lst
- let s1 = if (not (null lst)) && (isQuoted (head lst))
- then "\\,"
- else empty
- let s2 = if (not (null lst)) && (isQuoted (last lst))
- then "\\,"
- else empty
- return $ char '`' <> s1 <> contents <> s2 <> char '\''
+ csquotes <- liftM stCsquotes get
+ if csquotes
+ then return $ "\\enquote" <> braces contents
+ else do
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then "\\,"
+ else empty
+ return $ char '`' <> s1 <> contents <> s2 <> char '\''
inlineToLaTeX (Quoted DoubleQuote lst) = do
contents <- inlineListToLaTeX lst
- let s1 = if (not (null lst)) && (isQuoted (head lst))
- then "\\,"
- else empty
- let s2 = if (not (null lst)) && (isQuoted (last lst))
- then "\\,"
- else empty
- return $ "``" <> s1 <> contents <> s2 <> "''"
+ csquotes <- liftM stCsquotes get
+ if csquotes
+ then return $ "\\enquote" <> braces contents
+ else do
+ let s1 = if (not (null lst)) && (isQuoted (head lst))
+ then "\\,"
+ else empty
+ let s2 = if (not (null lst)) && (isQuoted (last lst))
+ then "\\,"
+ else empty
+ return $ "``" <> s1 <> contents <> s2 <> "''"
inlineToLaTeX Apostrophe = return $ char '\''
inlineToLaTeX EmDash = return "---"
inlineToLaTeX EnDash = return "--"
@@ -402,19 +428,28 @@ inlineToLaTeX (Link txt (src, _)) =
[Code _ x] | x == src -> -- autolink
do modify $ \s -> s{ stUrl = True }
return $ text $ "\\url{" ++ x ++ "}"
- _ -> do contents <- inlineListToLaTeX $ deVerb txt
+ _ -> do contents <- inlineListToLaTeX txt
return $ text ("\\href{" ++ stringToLaTeX src ++ "}{") <>
contents <> char '}'
inlineToLaTeX (Image _ (source, _)) = do
modify $ \s -> s{ stGraphics = True }
- return $ "\\includegraphics" <> braces (text source)
+ let source' = if isAbsoluteURI source
+ then source
+ else unEscapeString source
+ return $ "\\includegraphics" <> braces (text source')
inlineToLaTeX (Note contents) = do
modify (\s -> s{stInNote = True})
contents' <- blockListToLaTeX contents
modify (\s -> s {stInNote = False})
- -- note: a \n before } is needed when note ends with a Verbatim environment
- return $ "\\footnote" <> braces (nest 2 contents')
-
+ inTable <- liftM stInTable get
+ if inTable
+ then do
+ curnotes <- liftM stTableNotes get
+ let marker = cycle ['a'..'z'] !! length curnotes
+ modify $ \s -> s{ stTableNotes = (marker, contents') : curnotes }
+ return $ "\\tmark" <> brackets (char marker) <> space
+ else return $ "\\footnote" <> braces (nest 2 contents')
+ -- note: a \n before } needed when note ends with a Verbatim environment
citationsToNatbib :: [Citation] -> State WriterState Doc
citationsToNatbib (one:[])
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 5e12c4aca..48e9578b4 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -366,7 +366,19 @@ blockListToMarkdown :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> State WriterState Doc
blockListToMarkdown opts blocks =
- mapM (blockToMarkdown opts) blocks >>= return . cat
+ mapM (blockToMarkdown opts) (fixBlocks blocks) >>= return . cat
+ -- insert comment between list and indented code block, or the
+ -- code block will be treated as a list continuation paragraph
+ where fixBlocks (b : CodeBlock attr x : rest)
+ | (writerStrictMarkdown opts || attr == nullAttr) && isListBlock b =
+ b : RawBlock "html" "<!-- -->\n" : CodeBlock attr x :
+ fixBlocks rest
+ fixBlocks (x : xs) = x : fixBlocks xs
+ fixBlocks [] = []
+ isListBlock (BulletList _) = True
+ isListBlock (OrderedList _ _) = True
+ isListBlock (DefinitionList _) = True
+ isListBlock _ = False
-- | Get reference for target; if none exists, create unique one and return.
-- Prefer label if possible; otherwise, generate a unique key.
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index cf1be8755..f8030965c 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -29,6 +29,7 @@ Conversion of 'Pandoc' documents to ODT.
-}
module Text.Pandoc.Writers.ODT ( writeODT ) where
import Data.IORef
+import Data.List ( isPrefixOf )
import System.FilePath ( (</>), takeExtension )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
@@ -36,11 +37,15 @@ import Codec.Archive.Zip
import System.Time
import Paths_pandoc ( getDataFileName )
import Text.Pandoc.Shared ( WriterOptions(..) )
+import Text.Pandoc.MIME ( getMimeType )
import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
import System.Directory
import Control.Monad (liftM)
+import Network.URI ( unEscapeString )
+import Text.Pandoc.XML
+import Text.Pandoc.Pretty
-- | Produce an ODT file from a Pandoc document.
writeODT :: Maybe FilePath -- ^ Path specified by --reference-odt
@@ -69,14 +74,37 @@ writeODT mbRefOdt opts doc = do
(TOD epochtime _) <- getClockTime
let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
picEntries <- readIORef picEntriesRef
- let archive = foldr addEntryToArchive refArchive (contentEntry : picEntries)
- return $ fromArchive archive
+ let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries
+ -- construct META-INF/manifest.xml based on archive
+ let toFileEntry fp = case getMimeType fp of
+ Nothing -> empty
+ Just m -> selfClosingTag "manifest:file-entry"
+ [("manifest:media-type", m)
+ ,("manifest:full-path", fp)
+ ]
+ let files = [ ent | ent <- filesInArchive archive, not ("META-INF" `isPrefixOf` ent) ]
+ let manifestEntry = toEntry "META-INF/manifest.xml" epochtime
+ $ fromString $ show
+ $ text "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
+ $$
+ ( inTags True "manifest:manifest"
+ [("xmlns:manifest","urn:oasis:names:tc:opendocument:xmlns:manifest:1.0")]
+ $ ( selfClosingTag "manifest:file-entry"
+ [("manifest:media-type","application/vnd.oasis.opendocument.text")
+ ,("manifest:version","1.2")
+ ,("manifest:full-path","/")]
+ $$ vcat ( map toFileEntry $ files )
+ )
+ )
+ let archive' = addEntryToArchive manifestEntry archive
+ return $ fromArchive archive'
transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
transformPic sourceDir entriesRef (Image lab (src,tit)) = do
+ let src' = unEscapeString src
entries <- readIORef entriesRef
- let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
- catch (readEntry [] (sourceDir </> src) >>= \entry ->
+ let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src'
+ catch (readEntry [] (sourceDir </> src') >>= \entry ->
modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
return (Image lab (newsrc, tit)))
(\_ -> return (Emph lab))
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index b9444aac7..e675f4e65 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -62,6 +62,7 @@ data WriterState =
, stIndentPara :: Int
, stInDefinition :: Bool
, stTight :: Bool
+ , stFirstPara :: Bool
}
defaultWriterState :: WriterState
@@ -75,6 +76,7 @@ defaultWriterState =
, stIndentPara = 0
, stInDefinition = False
, stTight = False
+ , stFirstPara = False
}
when :: Bool -> Doc -> Doc
@@ -111,10 +113,18 @@ inTightList f = modify (\s -> s { stTight = True }) >> f >>= \r ->
setInDefinitionList :: Bool -> State WriterState ()
setInDefinitionList b = modify $ \s -> s { stInDefinition = b }
-inParagraphTags :: Doc -> Doc
-inParagraphTags d | isEmpty d = empty
-inParagraphTags d =
- inTags False "text:p" [("text:style-name", "Text_20_body")] d
+setFirstPara :: State WriterState ()
+setFirstPara = modify $ \s -> s { stFirstPara = True }
+
+inParagraphTags :: Doc -> State WriterState Doc
+inParagraphTags d | isEmpty d = return empty
+inParagraphTags d = do
+ b <- gets stFirstPara
+ a <- if b
+ then do modify $ \st -> st { stFirstPara = False }
+ return $ [("text:style-name", "First_20_paragraph")]
+ else return [("text:style-name", "Text_20_body")]
+ return $ inTags False "text:p" a d
inParagraphTagsWithStyle :: String -> Doc -> Doc
inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)]
@@ -138,9 +148,10 @@ inTextStyle d = do
$ selfClosingTag "style:text-properties" (concatMap snd $ Map.toList at)
return $ inTags False "text:span" [("text:style-name","T" ++ show tn)] d
-inHeaderTags :: Int -> Doc -> Doc
-inHeaderTags i = inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i)
- , ("text:outline-level", show i)]
+inHeaderTags :: Int -> Doc -> State WriterState Doc
+inHeaderTags i d =
+ return $ inTags False "text:h" [ ("text:style-name", "Heading_20_" ++ show i)
+ , ("text:outline-level", show i)] d
inQuotes :: QuoteType -> Doc -> Doc
inQuotes SingleQuote s = text "&#8216;" <> s <> text "&#8217;"
@@ -164,7 +175,7 @@ writeOpenDocument :: WriterOptions -> Pandoc -> String
writeOpenDocument opts (Pandoc (Meta title authors date) blocks) =
let ((doc, title', authors', date'),s) = flip runState
defaultWriterState $ do
- title'' <- inlinesToOpenDocument opts title
+ title'' <- inlinesToOpenDocument opts title
authors'' <- mapM (inlinesToOpenDocument opts) authors
date'' <- inlinesToOpenDocument opts date
doc'' <- blocksToOpenDocument opts blocks
@@ -274,18 +285,20 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b
-- | Convert a Pandoc block element to OpenDocument.
blockToOpenDocument :: WriterOptions -> Block -> State WriterState Doc
blockToOpenDocument o bs
- | Plain b <- bs = inParagraphTags <$> inlinesToOpenDocument o b
- | Para b <- bs = inParagraphTags <$> inlinesToOpenDocument o b
- | Header i b <- bs = inHeaderTags i <$> inlinesToOpenDocument o b
- | BlockQuote b <- bs = mkBlockQuote b
- | CodeBlock _ s <- bs = preformatted s
+ | Plain b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
+ | Para b <- bs = inParagraphTags =<< inlinesToOpenDocument o b
+ | Header i b <- bs = setFirstPara >>
+ (inHeaderTags i =<< inlinesToOpenDocument o b)
+ | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b
+ | DefinitionList b <- bs = setFirstPara >> defList b
+ | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b
+ | OrderedList a b <- bs = setFirstPara >> orderedList a b
+ | CodeBlock _ s <- bs = setFirstPara >> preformatted s
+ | Table c a w h r <- bs = setFirstPara >> table c a w h r
+ | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p"
+ [ ("text:style-name", "Horizontal_20_Line") ])
| RawBlock _ _ <- bs = return empty
- | DefinitionList b <- bs = defList b
- | BulletList b <- bs = bulletListToOpenDocument o b
- | OrderedList a b <- bs = orderedList a b
- | Table c a w h r <- bs = table c a w h r
| Null <- bs = return empty
- | HorizontalRule <- bs = return $ selfClosingTag "text:p" [ ("text:style-name", "Horizontal_20_Line") ]
| otherwise = return empty
where
defList b = do setInDefinitionList True
@@ -381,14 +394,14 @@ inlineToOpenDocument o ils
mkImg s = inTags False "draw:frame" [] $
selfClosingTag "draw:image" [ ("xlink:href" , s )
, ("xlink:type" , "simple")
- , (" xlink:show" , "embed" )
+ , ("xlink:show" , "embed" )
, ("xlink:actuate", "onLoad")]
mkNote l = do
n <- length <$> gets stNotes
let footNote t = inTags False "text:note"
[ ("text:id" , "ftn" ++ show n)
, ("text:note-class", "footnote" )] $
- inTagsSimple "text:note-citation" (text . show $ n + 1) <>
+ inTagsSimple "text:note-citation" (text . show $ n + 1) <>
inTagsSimple "text:note-body" t
nn <- footNote <$> withParagraphStyle o "Footnote" l
addNote nn
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index 605e4162b..eb36c1ca6 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -37,23 +37,28 @@ import Data.Char ( ord, isDigit, toLower )
import System.FilePath ( takeExtension )
import qualified Data.ByteString as B
import Text.Printf ( printf )
+import Network.URI ( isAbsoluteURI, unEscapeString )
-- | Convert Image inlines into a raw RTF embedded image, read from a file.
-- If file not found or filetype not jpeg or png, leave the inline unchanged.
rtfEmbedImage :: Inline -> IO Inline
-rtfEmbedImage x@(Image _ (src,_))
- | map toLower (takeExtension src) `elem` [".jpg",".jpeg",".png"] = do
- imgdata <- catch (B.readFile src) (\_ -> return B.empty)
- let bytes = map (printf "%02x") $ B.unpack imgdata
- let filetype = case map toLower (takeExtension src) of
- ".jpg" -> "\\jpegblip"
- ".jpeg" -> "\\jpegblip"
- ".png" -> "\\pngblip"
- _ -> error "Unknown file type"
- let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}"
- return $ if B.null imgdata
- then x
- else RawInline "rtf" raw
+rtfEmbedImage x@(Image _ (src,_)) = do
+ let ext = map toLower (takeExtension src)
+ if ext `elem` [".jpg",".jpeg",".png"] && not (isAbsoluteURI src)
+ then do
+ let src' = unEscapeString src
+ imgdata <- catch (B.readFile src') (\_ -> return B.empty)
+ let bytes = map (printf "%02x") $ B.unpack imgdata
+ let filetype = case ext of
+ ".jpg" -> "\\jpegblip"
+ ".jpeg" -> "\\jpegblip"
+ ".png" -> "\\pngblip"
+ _ -> error "Unknown file type"
+ let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}"
+ return $ if B.null imgdata
+ then x
+ else RawInline "rtf" raw
+ else return x
rtfEmbedImage x = return x
-- | Convert Pandoc to a string in rich text format.
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index c8638cdd7..4f6645cd5 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -37,6 +37,8 @@ import Data.Ord ( comparing )
import Data.Char ( chr, ord )
import Control.Monad.State
import Text.Pandoc.Pretty
+import Network.URI ( isAbsoluteURI, unEscapeString )
+import System.FilePath
data WriterState =
WriterState { stStrikeout :: Bool -- document contains strikeout
@@ -412,11 +414,11 @@ inlineToTexinfo (Image alternate (source, _)) = do
return $ text ("@image{" ++ base ++ ",,,") <> content <> text "," <>
text (ext ++ "}")
where
- (revext, revbase) = break (=='.') (reverse source)
- ext = reverse revext
- base = case revbase of
- ('.' : rest) -> reverse rest
- _ -> reverse revbase
+ ext = drop 1 $ takeExtension source'
+ base = takeBaseName source'
+ source' = if isAbsoluteURI source
+ then source
+ else unEscapeString source
inlineToTexinfo (Note contents) = do
contents' <- blockListToTexinfo contents
diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs
index 005717bf0..d6ee39dab 100644
--- a/src/markdown2pdf.hs
+++ b/src/markdown2pdf.hs
@@ -2,24 +2,63 @@ module Main where
import Data.List (isInfixOf, intercalate, isPrefixOf)
import Data.Maybe (isNothing)
+import qualified Data.ByteString as BS
+import Codec.Binary.UTF8.String (decodeString, encodeString)
+import Data.ByteString.UTF8 (toString)
+import Control.Monad (unless, guard, liftM, when)
+import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
+import Control.Exception (tryJust, bracket, evaluate)
-import Control.Monad (unless, guard)
-import Control.Exception (tryJust, bracket)
-
-import System.IO (stderr)
+import System.IO
import System.IO.Error (isDoesNotExistError)
import System.Environment ( getArgs, getProgName )
import qualified Text.Pandoc.UTF8 as UTF8
import System.Exit (ExitCode (..), exitWith)
import System.FilePath
import System.Directory
-import System.Process (readProcessWithExitCode)
-import Codec.Binary.UTF8.String (decodeString, encodeString)
-import Control.Monad (liftM)
+import System.Process
+
+-- A variant of 'readProcessWithExitCode' that does not
+-- cause an error if the output is not UTF-8. (Copied
+-- with slight variants from 'System.Process'.)
+readProcessWithExitCode'
+ :: FilePath -- ^ command to run
+ -> [String] -- ^ any arguments
+ -> String -- ^ standard input
+ -> IO (ExitCode,String,String) -- ^ exitcode, stdout, stderr
+readProcessWithExitCode' cmd args input = do
+ (Just inh, Just outh, Just errh, pid) <-
+ createProcess (proc cmd args){ std_in = CreatePipe,
+ std_out = CreatePipe,
+ std_err = CreatePipe }
+
+ outMVar <- newEmptyMVar
+
+ -- fork off a thread to start consuming stdout
+ out <- liftM toString $ BS.hGetContents outh
+ _ <- forkIO $ evaluate (length out) >> putMVar outMVar ()
+
+ -- fork off a thread to start consuming stderr
+ err <- liftM toString $ BS.hGetContents errh
+ _ <- forkIO $ evaluate (length err) >> putMVar outMVar ()
+
+ -- now write and flush any input
+ when (not (null input)) $ do hPutStr inh input; hFlush inh
+ hClose inh -- done with stdin
+
+ -- wait on the output
+ takeMVar outMVar
+ takeMVar outMVar
+ hClose outh
+
+ -- wait on the process
+ ex <- waitForProcess pid
+
+ return (ex, out, err)
run :: FilePath -> [String] -> IO (Either String String)
run file opts = do
- (code, out, err) <- readProcessWithExitCode (encodeString file)
+ (code, out, err) <- readProcessWithExitCode' (encodeString file)
(map encodeString opts) ""
let msg = out ++ err
case code of
@@ -48,11 +87,12 @@ runPandoc inputsAndArgs output = do
runLatexRaw :: String -> FilePath -> IO (Either (Either String String) FilePath)
runLatexRaw latexProgram file = do
-- we ignore the ExitCode because pdflatex always fails the first time
- run latexProgram ["-interaction=batchmode", "-output-directory",
- takeDirectory file, dropExtension file] >> return ()
+ run latexProgram ["-halt-on-error", "-interaction", "nonstopmode",
+ "-output-directory", takeDirectory file, dropExtension file] >> return ()
let pdfFile = replaceExtension file "pdf"
let logFile = replaceExtension file "log"
- txt <- tryJust (guard . isDoesNotExistError) (UTF8.readFile logFile)
+ txt <- tryJust (guard . isDoesNotExistError)
+ (liftM toString $ BS.readFile logFile)
let checks = checkLatex $ either (const "") id txt
case checks of
-- err , bib , ref , msg
@@ -79,7 +119,9 @@ checkLatex "" = (True, False, False, "Could not read log file")
checkLatex txt = (err , bib, ref, unlines $! msgs ++ tips)
where
xs `oneOf` x = any (flip isInfixOf x) xs
- msgs = filter (oneOf ["Error:", "Warning:"]) (lines txt)
+ msgs = dropWhile (not . errorline) $ lines txt
+ errorline ('!':_) = True
+ errorline _ = False
tips = checkPackages msgs
err = any (oneOf ["!", "LaTeX Error:", "Latex Error:"]) msgs
bib = any (oneOf ["Warning: Citation"
@@ -122,7 +164,7 @@ exit x = do
saveStdin :: FilePath -> IO (Either String FilePath)
saveStdin file = do
- text <- UTF8.getContents
+ text <- liftM toString $ BS.getContents
UTF8.writeFile file text
fileExist <- doesFileExist (encodeString file)
case fileExist of
@@ -152,7 +194,7 @@ main = bracket
let goodopts = ["-f","-r","-N", "-p","-R","-H","-B","-A", "-C","-o","-V"]
let goodoptslong = ["--from","--read","--strict",
"--preserve-tabs","--tab-stop","--parse-raw",
- "--toc","--table-of-contents", "--xetex",
+ "--toc","--table-of-contents", "--xetex", "--luatex",
"--number-sections","--include-in-header",
"--include-before-body","--include-after-body",
"--custom-header","--output",
@@ -164,17 +206,23 @@ main = bracket
-- note that a long option can come in this form: --opt=val
let isGoodopt x = x `elem` (goodopts ++ goodoptslong) ||
any (\o -> (o ++ "=") `isPrefixOf` x) goodoptslong
+ let markdown2pdfOpts = ["--xetex","--luatex"]
unless (all isGoodopt opts) $ do
(code, out, _err) <- readProcessWithExitCode "pandoc" ["--help"] ""
UTF8.putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:"
UTF8.putStr $ unlines $
- filter (\l -> any (`isInfixOf` l) goodoptslong) $ lines out
+ filter (\l -> any (`isInfixOf` l) goodoptslong) (lines out)
+ ++ map (replicate 24 ' ' ++) markdown2pdfOpts
exitWith code
+ let args' = filter (`notElem` markdown2pdfOpts) args
+
-- check for executable files
let latexProgram = if "--xetex" `elem` opts
then "xelatex"
- else "pdflatex"
+ else if "--luatex" `elem` opts
+ then "lualatex"
+ else "pdflatex"
let execs = ["pandoc", latexProgram, "bibtex"]
paths <- mapM findExecutable execs
let miss = map snd $ filter (isNothing . fst) $ zip paths execs
@@ -182,7 +230,7 @@ main = bracket
-- parse arguments
-- if no input given, use 'stdin'
- pandocArgs <- parsePandocArgs args
+ pandocArgs <- parsePandocArgs args'
(input, output) <- case pandocArgs of
Nothing -> exit "Could not parse arguments"
Just ([],out) -> do
@@ -193,7 +241,7 @@ main = bracket
-- no need because we'll pass all arguments to pandoc
Just (_ ,out) -> return ([], out)
-- run pandoc
- pandocRes <- runPandoc (input ++ args) $ replaceDirectory output tmp
+ pandocRes <- runPandoc (input ++ args') $ replaceDirectory output tmp
case pandocRes of
Left err -> exit err
Right texFile -> do
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 27e4579aa..27bc2c25c 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -44,6 +44,8 @@ import Data.Char ( toLower )
import Data.List ( intercalate, isSuffixOf, isPrefixOf )
import System.Directory ( getAppUserDataDirectory, doesFileExist )
import System.IO ( stdout, stderr )
+import System.IO.Error ( isDoesNotExistError )
+import Control.Exception.Extensible ( throwIO )
import qualified Text.Pandoc.UTF8 as UTF8
import Text.CSL
import Text.Pandoc.Biblio
@@ -93,7 +95,7 @@ data Opt = Opt
, optParseRaw :: Bool -- ^ Parse unconvertable HTML and TeX
, optTableOfContents :: Bool -- ^ Include table of contents
, optTransforms :: [Pandoc -> Pandoc] -- ^ Doc transforms to apply
- , optTemplate :: String -- ^ Custom template
+ , optTemplate :: Maybe FilePath -- ^ Custom template
, optVariables :: [(String,String)] -- ^ Template variables to set
, optOutputFile :: String -- ^ Name of output file
, optNumberSections :: Bool -- ^ Number sections in LaTeX
@@ -137,7 +139,7 @@ defaultOpts = Opt
, optParseRaw = False
, optTableOfContents = False
, optTransforms = []
- , optTemplate = ""
+ , optTemplate = Nothing
, optVariables = []
, optOutputFile = "-" -- "-" means stdout
, optNumberSections = False
@@ -286,8 +288,12 @@ options =
"" -- "Use jsMath for HTML math"
, Option "" ["mathjax"]
- (ReqArg
- (\arg opt -> return opt { optHTMLMathMethod = MathJax arg})
+ (OptArg
+ (\arg opt -> do
+ let url' = case arg of
+ Just u -> u
+ Nothing -> "https://d3eoax9i5htok0.cloudfront.net/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
+ return opt { optHTMLMathMethod = MathJax url'})
"URL")
"" -- "Use MathJax for HTML math"
@@ -309,7 +315,10 @@ options =
, Option "" ["xetex"]
(NoArg
- (\opt -> return opt { optXeTeX = True }))
+ (\opt -> do
+ UTF8.hPutStrLn stderr $ "pandoc: --xetex is deprecated. "
+ ++ "It is no longer needed for use with XeTeX."
+ return opt { optXeTeX = True }))
"" -- "Format latex for processing by XeTeX"
, Option "" ["chapters"]
@@ -404,8 +413,7 @@ options =
, Option "" ["template"]
(ReqArg
(\arg opt -> do
- text <- UTF8.readFile arg
- return opt{ optTemplate = text,
+ return opt{ optTemplate = Just arg,
optStandalone = True })
"FILENAME")
"" -- "Use custom template"
@@ -490,6 +498,14 @@ options =
"FILENAME")
"" -- "Path of epub.css"
+ , Option "" ["epub-cover-image"]
+ (ReqArg
+ (\arg opt ->
+ return opt { optVariables =
+ ("epub-cover-image", arg) : optVariables opt })
+ "FILENAME")
+ "" -- "Path of epub cover image"
+
, Option "" ["epub-metadata"]
(ReqArg
(\arg opt -> do
@@ -661,13 +677,12 @@ main = do
, optVariables = variables
, optTableOfContents = toc
, optTransforms = transforms
- , optTemplate = template
+ , optTemplate = templatePath
, optOutputFile = outputFile
, optNumberSections = numberSections
, optSectionDivs = sectionDivs
, optIncremental = incremental
, optOffline = offline
- , optXeTeX = xetex
, optSmart = smart
, optHtml5 = html5
, optChapters = chapters
@@ -721,10 +736,25 @@ main = do
Just r -> return r
Nothing -> error ("Unknown reader: " ++ readerName')
- templ <- getDefaultTemplate datadir writerName'
- let defaultTemplate = case templ of
- Right t -> t
- Left e -> error (show e)
+ templ <- case templatePath of
+ Nothing -> do
+ deftemp <- getDefaultTemplate datadir writerName'
+ case deftemp of
+ Left e -> throwIO e
+ Right t -> return t
+ Just tp -> do
+ -- strip off "+lhs" if present
+ let format = takeWhile (/='+') writerName'
+ let tp' = case takeExtension tp of
+ "" -> tp <.> format
+ _ -> tp
+ catch (UTF8.readFile tp')
+ (\e -> if isDoesNotExistError e
+ then catch
+ (readDataFile datadir $
+ "templates" </> tp')
+ (\_ -> throwIO e)
+ else throwIO e)
let standalone' = standalone || isNonTextOutput writerName'
@@ -736,7 +766,7 @@ main = do
slidyJs <- readDataFile datadir $
"slidy" </> "slidy.min.js"
slidyCss <- readDataFile datadir $
- "slidy" </> "slidy.min.css"
+ "slidy" </> "slidy.css"
return $ ("slidy-js", slidyJs) :
("slidy-css", slidyCss) : variables
_ -> return variables
@@ -778,10 +808,9 @@ main = do
stateIndentedCodeClasses = codeBlockClasses,
stateApplyMacros = writerName' `notElem` ["latex", "latex+lhs"] }
- let writerOptions = WriterOptions { writerStandalone = standalone',
- writerTemplate = if null template
- then defaultTemplate
- else template,
+ let writerOptions = defaultWriterOptions
+ { writerStandalone = standalone',
+ writerTemplate = templ,
writerVariables = variables'',
writerEPUBMetadata = epubMetadata,
writerTabStop = tabStop,
@@ -790,7 +819,6 @@ main = do
writerHTMLMathMethod = mathMethod,
writerSlideVariant = slideVariant,
writerIncremental = incremental,
- writerXeTeX = xetex,
writerCiteMethod = citeMethod,
writerBiblioFiles = reffiles,
writerIgnoreNotes = False,
diff --git a/src/test-pandoc.hs b/src/test-pandoc.hs
index 2b6c2bb49..33c46f2ba 100644
--- a/src/test-pandoc.hs
+++ b/src/test-pandoc.hs
@@ -11,6 +11,7 @@ import qualified Tests.Readers.RST
import qualified Tests.Writers.ConTeXt
import qualified Tests.Writers.HTML
import qualified Tests.Writers.Native
+import qualified Tests.Writers.Markdown
import qualified Tests.Shared
tests :: [Test]
@@ -20,6 +21,7 @@ tests = [ testGroup "Old" Tests.Old.tests
[ testGroup "Native" Tests.Writers.Native.tests
, testGroup "ConTeXt" Tests.Writers.ConTeXt.tests
, testGroup "HTML" Tests.Writers.HTML.tests
+ , testGroup "Markdown" Tests.Writers.Markdown.tests
]
, testGroup "Readers"
[ testGroup "LaTeX" Tests.Readers.LaTeX.tests