summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Tests/Arbitrary.hs10
-rw-r--r--src/Tests/Helpers.hs12
-rw-r--r--src/Tests/Old.hs8
-rw-r--r--src/Tests/Readers/LaTeX.hs80
-rw-r--r--src/Tests/Readers/Markdown.hs43
-rw-r--r--src/Tests/Readers/RST.hs14
-rw-r--r--src/Tests/Writers/ConTeXt.hs6
-rw-r--r--src/Tests/Writers/HTML.hs4
-rw-r--r--src/Tests/Writers/Markdown.hs4
-rw-r--r--src/Text/Pandoc.hs61
-rw-r--r--src/Text/Pandoc/Biblio.hs75
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs72
-rw-r--r--src/Text/Pandoc/Highlighting.hs78
-rw-r--r--src/Text/Pandoc/ImageSize.hs151
-rw-r--r--src/Text/Pandoc/MIME.hs3
-rw-r--r--src/Text/Pandoc/PDF.hs136
-rw-r--r--src/Text/Pandoc/Parsing.hs138
-rw-r--r--src/Text/Pandoc/Pretty.hs135
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs38
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1790
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs219
-rw-r--r--src/Text/Pandoc/Readers/RST.hs53
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs19
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs5
-rw-r--r--src/Text/Pandoc/S5.hs69
-rw-r--r--src/Text/Pandoc/SelfContained.hs162
-rw-r--r--src/Text/Pandoc/Shared.hs105
-rw-r--r--src/Text/Pandoc/Slides.hs57
-rw-r--r--src/Text/Pandoc/Templates.hs6
-rw-r--r--src/Text/Pandoc/Writers/AsciiDoc.hs367
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs110
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs59
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs666
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs177
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs549
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs424
-rw-r--r--src/Text/Pandoc/Writers/Man.hs12
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs33
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs12
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs12
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs25
-rw-r--r--src/Text/Pandoc/Writers/Org.hs14
-rw-r--r--src/Text/Pandoc/Writers/RST.hs23
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs14
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs8
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs30
-rw-r--r--src/Text/Pandoc/XML.hs25
-rw-r--r--src/markdown2pdf.hs256
-rw-r--r--src/pandoc.hs738
49 files changed, 4495 insertions, 2612 deletions
diff --git a/src/Tests/Arbitrary.hs b/src/Tests/Arbitrary.hs
index f5a44ceab..9d65e1f1f 100644
--- a/src/Tests/Arbitrary.hs
+++ b/src/Tests/Arbitrary.hs
@@ -1,5 +1,5 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, ScopedTypeVariables #-}
-- provides Arbitrary instance for Pandoc types
module Tests.Arbitrary ()
where
@@ -22,10 +22,10 @@ arbAttr = do
return (id',classes,keyvals)
instance Arbitrary Inlines where
- arbitrary = liftM fromList arbitrary
+ arbitrary = liftM (fromList :: [Inline] -> Inlines) arbitrary
instance Arbitrary Blocks where
- arbitrary = liftM fromList arbitrary
+ arbitrary = liftM (fromList :: [Block] -> Blocks) arbitrary
instance Arbitrary Inline where
arbitrary = resize 3 $ arbInline 2
@@ -41,10 +41,6 @@ arbInline :: Int -> Gen Inline
arbInline n = frequency $ [ (60, liftM Str realString)
, (60, return Space)
, (10, liftM2 Code arbAttr realString)
- , (5, return EmDash)
- , (5, return EnDash)
- , (5, return Apostrophe)
- , (5, return Ellipses)
, (5, elements [ RawInline "html" "<a id=\"eek\">"
, RawInline "latex" "\\my{command}" ])
] ++ [ x | x <- nesters, n > 1]
diff --git a/src/Tests/Helpers.hs b/src/Tests/Helpers.hs
index b8d6b83a7..66879efed 100644
--- a/src/Tests/Helpers.hs
+++ b/src/Tests/Helpers.hs
@@ -69,17 +69,15 @@ test fn name (input, expected) =
vividize :: (DI,String) -> String
vividize (B,s) = s
-vividize (_,s) = vivid s
+vividize (F,s) = s
+vividize (S,s) = setSGRCode [SetColor Background Dull Red
+ , SetColor Foreground Vivid White] ++ s
+ ++ setSGRCode [Reset]
property :: QP.Testable a => TestName -> a -> Test
property = testProperty
-vivid :: String -> String
-vivid s = setSGRCode [SetColor Background Dull Red
- , SetColor Foreground Vivid White] ++ s
- ++ setSGRCode [Reset]
-
-infix 6 =?>
+infix 5 =?>
(=?>) :: a -> b -> (a,b)
x =?> y = (x, y)
diff --git a/src/Tests/Old.hs b/src/Tests/Old.hs
index cb1417ffa..1ec32a30d 100644
--- a/src/Tests/Old.hs
+++ b/src/Tests/Old.hs
@@ -13,7 +13,6 @@ import Data.Algorithm.Diff
import Text.Pandoc.Shared ( normalize, defaultWriterOptions )
import Text.Pandoc.Writers.Native ( writeNative )
import Text.Pandoc.Readers.Native ( readNative )
-import Text.Pandoc.Highlighting ( languages )
import Prelude hiding ( readFile )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 (toString)
@@ -105,7 +104,7 @@ tests = [ testGroup "markdown"
]
, testGroup "other writers" $ map (\f -> testGroup f $ writerTests f)
[ "docbook", "opendocument" , "context" , "texinfo"
- , "man" , "plain" , "mediawiki", "rtf", "org"
+ , "man" , "plain" , "mediawiki", "rtf", "org", "asciidoc"
]
]
@@ -121,10 +120,7 @@ lhsWriterTests format
]
where
t n f = test n ["--columns=78", "-r", "native", "-s", "-w", f]
- "lhs-test.native" ("lhs-test" <.> ext f)
- ext f = if null languages && format == "html"
- then "nohl" <.> f
- else f
+ "lhs-test.native" ("lhs-test" <.> f)
lhsReaderTest :: String -> Test
lhsReaderTest format =
diff --git a/src/Tests/Readers/LaTeX.hs b/src/Tests/Readers/LaTeX.hs
index 6d28441f8..d60026b20 100644
--- a/src/Tests/Readers/LaTeX.hs
+++ b/src/Tests/Readers/LaTeX.hs
@@ -11,7 +11,7 @@ import Text.Pandoc
latex :: String -> Pandoc
latex = readLaTeX defaultParserState
-infix 5 =:
+infix 4 =:
(=:) :: ToString c
=> String -> (String, c) -> Test
(=:) = test latex
@@ -35,10 +35,15 @@ tests = [ testGroup "basic"
"\\subsubsection{header}" =?> header 3 "header"
, "emph" =:
"\\section{text \\emph{emph}}" =?>
- header 1 ("text" +++ space +++ emph "emph")
+ header 1 ("text" <> space <> emph "emph")
, "link" =:
"\\section{text \\href{/url}{link}}" =?>
- header 1 ("text" +++ space +++ link "/url" "" "link")
+ header 1 ("text" <> space <> link "/url" "" "link")
+ ]
+
+ , testGroup "math"
+ [ "escaped $" =:
+ "$x=\\$4$" =?> para (math "x=\\$4")
]
, testGroup "space and comments"
@@ -64,98 +69,101 @@ baseCitation = Citation{ citationId = "item1"
, citationNoteNum = 0
, citationHash = 0 }
+rt :: String -> Inlines
+rt = rawInline "latex"
+
natbibCitations :: Test
natbibCitations = testGroup "natbib"
[ "citet" =: "\\citet{item1}"
- =?> para (cite [baseCitation] empty)
+ =?> para (cite [baseCitation] (rt "\\citet{item1}"))
, "suffix" =: "\\citet[p.~30]{item1}"
=?> para
- (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty)
+ (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\citet[p.~30]{item1}"))
, "suffix long" =: "\\citet[p.~30, with suffix]{item1}"
=?> para (cite [baseCitation{ citationSuffix =
- toList $ text "p.\160\&30, with suffix" }] empty)
+ toList $ text ", p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}"))
, "multiple" =: "\\citeauthor{item1} \\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}"
=?> para (cite [baseCitation{ citationMode = AuthorInText }
,baseCitation{ citationMode = SuppressAuthor
- , citationSuffix = [Str "p.\160\&30"]
+ , citationSuffix = [Str ",",Space,Str "p.\160\&30"]
, citationId = "item2" }
,baseCitation{ citationId = "item3"
, citationPrefix = [Str "see",Space,Str "also"]
, citationMode = NormalCitation }
- ] empty)
+ ] (rt "\\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}"))
, "group" =: "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
, citationPrefix = [Str "see"]
- , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] }
+ , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] }
,baseCitation{ citationMode = NormalCitation
, citationId = "item3"
, citationPrefix = [Str "also"]
- , citationSuffix = [Str "chap.",Space,Str "3"] }
- ] empty)
+ , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] }
+ ] (rt "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}"))
, "suffix and locator" =: "\\citep[pp.~33, 35--37, and nowhere else]{item1}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
- , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty)
+ , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}"))
, "suffix only" =: "\\citep[and nowhere else]{item1}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
- , citationSuffix = toList $ text "and nowhere else" }] empty)
+ , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\citep[and nowhere else]{item1}"))
, "no author" =: "\\citeyearpar{item1}, and now Doe with a locator \\citeyearpar[p.~44]{item2}"
- =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++
- text ", and now Doe with a locator " +++
+ =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\citeyearpar{item1}") <>
+ text ", and now Doe with a locator " <>
cite [baseCitation{ citationMode = SuppressAuthor
- , citationSuffix = [Str "p.\160\&44"]
- , citationId = "item2" }] empty)
+ , citationSuffix = [Str ",",Space,Str "p.\160\&44"]
+ , citationId = "item2" }] (rt "\\citeyearpar[p.~44]{item2}"))
, "markup" =: "\\citep[\\emph{see}][p. \\textbf{32}]{item1}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
, citationPrefix = [Emph [Str "see"]]
- , citationSuffix = [Str "p.",Space,
- Strong [Str "32"]] }] empty)
+ , citationSuffix = [Str ",",Space,Str "p.",Space,
+ Strong [Str "32"]] }] (rt "\\citep[\\emph{see}][p. \\textbf{32}]{item1}"))
]
biblatexCitations :: Test
biblatexCitations = testGroup "biblatex"
[ "textcite" =: "\\textcite{item1}"
- =?> para (cite [baseCitation] empty)
+ =?> para (cite [baseCitation] (rt "\\textcite{item1}"))
, "suffix" =: "\\textcite[p.~30]{item1}"
=?> para
- (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty)
+ (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\textcite[p.~30]{item1}"))
, "suffix long" =: "\\textcite[p.~30, with suffix]{item1}"
=?> para (cite [baseCitation{ citationSuffix =
- toList $ text "p.\160\&30, with suffix" }] empty)
+ toList $ text ", p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}"))
, "multiple" =: "\\textcites{item1}[p.~30]{item2}[see also][]{item3}"
=?> para (cite [baseCitation{ citationMode = AuthorInText }
,baseCitation{ citationMode = NormalCitation
- , citationSuffix = [Str "p.\160\&30"]
+ , citationSuffix = [Str ",",Space,Str "p.\160\&30"]
, citationId = "item2" }
,baseCitation{ citationId = "item3"
, citationPrefix = [Str "see",Space,Str "also"]
, citationMode = NormalCitation }
- ] empty)
+ ] (rt "\\textcites{item1}[p.~30]{item2}[see also][]{item3}"))
, "group" =: "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
, citationPrefix = [Str "see"]
- , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] }
+ , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] }
,baseCitation{ citationMode = NormalCitation
, citationId = "item3"
, citationPrefix = [Str "also"]
- , citationSuffix = [Str "chap.",Space,Str "3"] }
- ] empty)
+ , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] }
+ ] (rt "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}"))
, "suffix and locator" =: "\\autocite[pp.~33, 35--37, and nowhere else]{item1}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
- , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty)
+ , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}"))
, "suffix only" =: "\\autocite[and nowhere else]{item1}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
- , citationSuffix = toList $ text "and nowhere else" }] empty)
+ , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}"))
, "no author" =: "\\autocite*{item1}, and now Doe with a locator \\autocite*[p.~44]{item2}"
- =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++
- text ", and now Doe with a locator " +++
+ =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\autocite*{item1}") <>
+ text ", and now Doe with a locator " <>
cite [baseCitation{ citationMode = SuppressAuthor
- , citationSuffix = [Str "p.\160\&44"]
- , citationId = "item2" }] empty)
+ , citationSuffix = [Str ",",Space,Str "p.\160\&44"]
+ , citationId = "item2" }] (rt "\\autocite*[p.~44]{item2}"))
, "markup" =: "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}"
=?> para (cite [baseCitation{ citationMode = NormalCitation
, citationPrefix = [Emph [Str "see"]]
- , citationSuffix = [Str "p.",Space,
- Strong [Str "32"]] }] empty)
+ , citationSuffix = [Str ",",Space,Str "p.",Space,
+ Strong [Str "32"]] }] (rt "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}"))
, "parencite" =: "\\parencite{item1}"
- =?> para (cite [baseCitation{ citationMode = NormalCitation }] empty)
+ =?> para (cite [baseCitation{ citationMode = NormalCitation }] (rt "\\parencite{item1}"))
]
diff --git a/src/Tests/Readers/Markdown.hs b/src/Tests/Readers/Markdown.hs
index 941762bd0..5ad974adf 100644
--- a/src/Tests/Readers/Markdown.hs
+++ b/src/Tests/Readers/Markdown.hs
@@ -8,7 +8,6 @@ import Tests.Arbitrary()
import Text.Pandoc.Builder
-- import Text.Pandoc.Shared ( normalize )
import Text.Pandoc
-import Data.Sequence (singleton)
markdown :: String -> Pandoc
markdown = readMarkdown defaultParserState{ stateStandalone = True }
@@ -16,7 +15,7 @@ markdown = readMarkdown defaultParserState{ stateStandalone = True }
markdownSmart :: String -> Pandoc
markdownSmart = readMarkdown defaultParserState{ stateSmart = True }
-infix 5 =:
+infix 4 =:
(=:) :: ToString c
=> String -> (String, c) -> Test
(=:) = test markdown
@@ -44,29 +43,49 @@ tests = [ testGroup "inline code"
"`*` {.haskell .special x=\"7\"}"
=?> para (codeWith ("",["haskell","special"],[("x","7")]) "*")
]
+ , testGroup "backslash escapes"
+ [ "in URL" =:
+ "[hi](/there\\))"
+ =?> para (link "/there)" "" "hi")
+ , "in title" =:
+ "[hi](/there \"a\\\"a\")"
+ =?> para (link "/there" "a\"a" "hi")
+ , "in reference link title" =:
+ "[hi]\n\n[hi]: /there (a\\)a)"
+ =?> para (link "/there" "a)a" "hi")
+ , "in reference link URL" =:
+ "[hi]\n\n[hi]: /there\\.0"
+ =?> para (link "/there.0" "" "hi")
+ ]
, testGroup "smart punctuation"
[ test markdownSmart "quote before ellipses"
("'...hi'"
- =?> para (singleQuoted (singleton Ellipses +++ "hi")))
+ =?> para (singleQuoted ("…hi")))
+ , test markdownSmart "apostrophe before emph"
+ ("D'oh! A l'*aide*!"
+ =?> para ("D’oh! A l’" <> emph "aide" <> "!"))
+ , test markdownSmart "apostrophe in French"
+ ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»"
+ =?> para ("À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»"))
]
, testGroup "mixed emphasis and strong"
[ "emph and strong emph alternating" =:
"*xxx* ***xxx*** xxx\n*xxx* ***xxx*** xxx"
- =?> para (emph "xxx" +++ space +++ strong (emph "xxx") +++
- space +++ "xxx" +++ space +++
- emph "xxx" +++ space +++ strong (emph "xxx") +++
- space +++ "xxx")
+ =?> para (emph "xxx" <> space <> strong (emph "xxx") <>
+ space <> "xxx" <> space <>
+ emph "xxx" <> space <> strong (emph "xxx") <>
+ space <> "xxx")
, "emph with spaced strong" =:
"*x **xx** x*"
- =?> para (emph ("x" +++ space +++ strong "xx" +++ space +++ "x"))
+ =?> para (emph ("x" <> space <> strong "xx" <> space <> "x"))
]
, testGroup "footnotes"
[ "indent followed by newline and flush-left text" =:
"[^1]\n\n[^1]: my note\n\n \nnot in note\n"
- =?> para (note (para "my note")) +++ para "not in note"
+ =?> para (note (para "my note")) <> para "not in note"
, "indent followed by newline and indented text" =:
"[^1]\n\n[^1]: my note\n \n in note\n"
- =?> para (note (para "my note" +++ para "in note"))
+ =?> para (note (para "my note" <> para "in note"))
, "recursive note" =:
"[^1]\n\n[^1]: See [^1]\n"
=?> para (note (para "See [^1]"))
@@ -76,9 +95,9 @@ tests = [ testGroup "inline code"
"inverse bird tracks and html" $
"> a\n\n< b\n\n<div>\n"
=?> codeBlockWith ("",["sourceCode","literate","haskell"],[]) "a"
- +++
+ <>
codeBlockWith ("",["sourceCode","haskell"],[]) "b"
- +++
+ <>
rawBlock "html" "<div>\n\n"
]
-- the round-trip properties frequently fail
diff --git a/src/Tests/Readers/RST.hs b/src/Tests/Readers/RST.hs
index 4b8c9301b..3269092a6 100644
--- a/src/Tests/Readers/RST.hs
+++ b/src/Tests/Readers/RST.hs
@@ -11,15 +11,15 @@ import Text.Pandoc
rst :: String -> Pandoc
rst = readRST defaultParserState{ stateStandalone = True }
-infix 5 =:
+infix 4 =:
(=:) :: ToString c
=> String -> (String, c) -> Test
(=:) = test rst
tests :: [Test]
tests = [ "line block with blank line" =:
- "| a\n|\n| b" =?> para (str "a" +++ linebreak +++
- linebreak +++ str " " +++ str "b")
+ "| a\n|\n| b" =?> para (str "a" <> linebreak <>
+ linebreak <> str " " <> str "b")
, "field list" =:
[_LIT|
:Hostname: media08
@@ -51,10 +51,10 @@ tests = [ "line block with blank line" =:
, "URLs with following punctuation" =:
("http://google.com, http://yahoo.com; http://foo.bar.baz.\n" ++
"http://foo.bar/baz_(bam) (http://foo.bar)") =?>
- para (link "http://google.com" "" "http://google.com" +++ ", " +++
- link "http://yahoo.com" "" "http://yahoo.com" +++ "; " +++
- link "http://foo.bar.baz" "" "http://foo.bar.baz" +++ ". " +++
+ para (link "http://google.com" "" "http://google.com" <> ", " <>
+ link "http://yahoo.com" "" "http://yahoo.com" <> "; " <>
+ link "http://foo.bar.baz" "" "http://foo.bar.baz" <> ". " <>
link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)"
- +++ " (" +++ link "http://foo.bar" "" "http://foo.bar" +++ ")")
+ <> " (" <> link "http://foo.bar" "" "http://foo.bar" <> ")")
]
diff --git a/src/Tests/Writers/ConTeXt.hs b/src/Tests/Writers/ConTeXt.hs
index 704571e95..506ff698f 100644
--- a/src/Tests/Writers/ConTeXt.hs
+++ b/src/Tests/Writers/ConTeXt.hs
@@ -26,7 +26,7 @@ which is in turn shorthand for
test context "my test" (X,Y)
-}
-infix 5 =:
+infix 4 =:
(=:) :: (ToString a, ToPandoc a)
=> String -> (a, String) -> Test
(=:) = test context
@@ -43,9 +43,7 @@ tests = [ testGroup "inline code"
]
, testGroup "headers"
[ "level 1" =:
- header 1 "My header" =?> "\\subject{My header}"
- , property "header 1 property" $ \ils ->
- context' (header 1 ils) == "\\subject{" ++ context' ils ++ "}"
+ header 1 "My header" =?> "\\section[my-header]{My header}"
]
, testGroup "bullet lists"
[ "nested" =:
diff --git a/src/Tests/Writers/HTML.hs b/src/Tests/Writers/HTML.hs
index 3e1e0ddc2..8561aa421 100644
--- a/src/Tests/Writers/HTML.hs
+++ b/src/Tests/Writers/HTML.hs
@@ -23,7 +23,7 @@ which is in turn shorthand for
test html "my test" (X,Y)
-}
-infix 5 =:
+infix 4 =:
(=:) :: (ToString a, ToPandoc a)
=> String -> (a, String) -> Test
(=:) = test html
@@ -40,7 +40,7 @@ tests = [ testGroup "inline code"
]
, testGroup "images"
[ "alt with formatting" =:
- image "/url" "title" ("my " +++ emph "image")
+ image "/url" "title" ("my " <> emph "image")
=?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />"
]
]
diff --git a/src/Tests/Writers/Markdown.hs b/src/Tests/Writers/Markdown.hs
index 70266a683..d90dc83b1 100644
--- a/src/Tests/Writers/Markdown.hs
+++ b/src/Tests/Writers/Markdown.hs
@@ -22,13 +22,13 @@ which is in turn shorthand for
test markdown "my test" (X,Y)
-}
-infix 5 =:
+infix 4 =:
(=:) :: (ToString a, ToPandoc a)
=> String -> (a, String) -> Test
(=:) = test markdown
tests :: [Test]
tests = [ "indented code after list"
- =: (orderedList [ para "one" +++ para "two" ] +++ codeBlock "test")
+ =: (orderedList [ para "one" <> para "two" ] <> codeBlock "test")
=?> "1. one\n\n two\n\n<!-- -->\n\n test"
]
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index ef8560284..878f0e0dd 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -94,8 +95,10 @@ module Text.Pandoc
, writeTextile
, writeRTF
, writeODT
+ , writeDocx
, writeEPUB
, writeOrg
+ , writeAsciiDoc
-- * Writer options used in writers
, WriterOptions (..)
, HTMLSlideVariant (..)
@@ -109,6 +112,7 @@ module Text.Pandoc
-- * Miscellaneous
, rtfEmbedImage
, jsonFilter
+ , ToJsonFilter(..)
) where
import Text.Pandoc.Definition
@@ -127,6 +131,7 @@ import Text.Pandoc.Writers.ConTeXt
import Text.Pandoc.Writers.Texinfo
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.ODT
+import Text.Pandoc.Writers.Docx
import Text.Pandoc.Writers.EPUB
import Text.Pandoc.Writers.Docbook
import Text.Pandoc.Writers.OpenDocument
@@ -135,6 +140,7 @@ import Text.Pandoc.Writers.RTF
import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.Writers.Textile
import Text.Pandoc.Writers.Org
+import Text.Pandoc.Writers.AsciiDoc
import Text.Pandoc.Templates
import Text.Pandoc.Parsing
import Text.Pandoc.Shared
@@ -164,20 +170,28 @@ readers = [("native" , \_ -> readNative)
]
-- | Association list of formats and writers (omitting the
--- binary writers, odt and epub).
+-- binary writers, odt, docx, and epub).
writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
writers = [("native" , writeNative)
,("json" , \_ -> encodeJSON)
,("html" , writeHtmlString)
+ ,("html5" , \o ->
+ writeHtmlString o{ writerHtml5 = True })
,("html+lhs" , \o ->
writeHtmlString o{ writerLiterateHaskell = True })
+ ,("html5+lhs" , \o ->
+ writeHtmlString o{ writerLiterateHaskell = True,
+ writerHtml5 = True })
,("s5" , writeHtmlString)
,("slidy" , writeHtmlString)
+ ,("dzslides" , writeHtmlString)
,("docbook" , writeDocbook)
,("opendocument" , writeOpenDocument)
,("latex" , writeLaTeX)
,("latex+lhs" , \o ->
writeLaTeX o{ writerLiterateHaskell = True })
+ ,("beamer" , \o ->
+ writeLaTeX o{ writerBeamer = True })
,("context" , writeConTeXt)
,("texinfo" , writeTexinfo)
,("man" , writeMan)
@@ -192,10 +206,55 @@ writers = [("native" , writeNative)
,("textile" , writeTextile)
,("rtf" , writeRTF)
,("org" , writeOrg)
+ ,("asciidoc" , writeAsciiDoc)
]
+{-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-}
-- | Converts a transformation on the Pandoc AST into a function
-- that reads and writes a JSON-encoded string. This is useful
-- for writing small scripts.
jsonFilter :: (Pandoc -> Pandoc) -> String -> String
jsonFilter f = encodeJSON . f . decodeJSON
+
+-- | 'toJsonFilter' convert a function into a filter that reads pandoc's json output
+-- from stdin, transforms it by walking the AST and applying the specified
+-- function, and writes the result as json to stdout. Usage example:
+--
+-- > -- capitalize.hs
+-- > -- compile with: ghc --make capitalize
+-- > -- run with: pandoc -t json | ./capitalize | pandoc -f json
+-- >
+-- > import Text.Pandoc
+-- > import Data.Char (toUpper)
+-- >
+-- > main :: IO ()
+-- > main = toJsonFilter capitalizeStrings
+-- >
+-- > capitalizeStrings :: Inline -> Inline
+-- > capitalizeStrings (Str s) = Str $ map toUpper s
+-- > capitalizeStrings x = x
+--
+-- The function can be any type @(a -> a)@, @(a -> IO a)@, @(a -> [a])@,
+-- or @(a -> IO [a])@, where @a@ is an instance of 'Data'.
+-- So, for example, @a@ can be 'Pandoc', 'Inline', 'Block', ['Inline'],
+-- ['Block'], 'Meta', 'ListNumberStyle', 'Alignment', 'ListNumberDelim',
+-- 'QuoteType', etc. See 'Text.Pandoc.Definition'.
+class ToJsonFilter a where
+ toJsonFilter :: a -> IO ()
+
+instance (Data a) => ToJsonFilter (a -> a) where
+ toJsonFilter f = getContents
+ >>= putStr . encodeJSON . (bottomUp f :: Pandoc -> Pandoc) . decodeJSON
+
+instance (Data a) => ToJsonFilter (a -> IO a) where
+ toJsonFilter f = getContents >>= (bottomUpM f :: Pandoc -> IO Pandoc) . decodeJSON
+ >>= putStr . encodeJSON
+
+instance (Data a) => ToJsonFilter (a -> [a]) where
+ toJsonFilter f = getContents
+ >>= putStr . encodeJSON . (bottomUp (concatMap f) :: Pandoc -> Pandoc) . decodeJSON
+
+instance (Data a) => ToJsonFilter (a -> IO [a]) where
+ toJsonFilter f = getContents
+ >>= (bottomUpM (fmap concat . mapM f) :: Pandoc -> IO Pandoc) . decodeJSON
+ >>= putStr . encodeJSON
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index d65c9de1c..c8e87b2a0 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -31,7 +31,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where
import Data.List
import Data.Unique
-import Data.Char ( isDigit, isPunctuation )
+import Data.Char ( isDigit )
import qualified Data.Map as M
import Text.CSL hiding ( Cite(..), Citation(..) )
import qualified Text.CSL as CSL ( Cite(..) )
@@ -43,11 +43,15 @@ import Control.Monad
-- | Process a 'Pandoc' document by adding citations formatted
-- according to a CSL style, using 'citeproc' from citeproc-hs.
-processBiblio :: FilePath -> [Reference] -> Pandoc -> IO Pandoc
-processBiblio cslfile r p
+processBiblio :: FilePath -> Maybe FilePath -> [Reference] -> Pandoc
+ -> IO Pandoc
+processBiblio cslfile abrfile r p
= if null r then return p
else do
csl <- readCSLFile cslfile
+ abbrevs <- case abrfile of
+ Just f -> readJsonAbbrevFile f
+ Nothing -> return []
p' <- bottomUpM setHash p
let (nts,grps) = if styleClass csl == "note"
then let cits = queryWith getCite p'
@@ -55,29 +59,36 @@ processBiblio cslfile r p
needNt = cits \\ concat ncits
in (,) needNt $ getNoteCitations needNt p'
else (,) [] $ queryWith getCitation p'
- result = citeproc procOpts csl r (setNearNote csl $
+ style = csl { styleAbbrevs = abbrevs }
+ result = citeproc procOpts style r (setNearNote style $
map (map toCslCite) grps)
cits_map = M.fromList $ zip grps (citations result)
- biblioList = map (renderPandoc' csl) (bibliography result)
- Pandoc m b = bottomUp (procInlines $ processCite csl cits_map) p'
+ biblioList = map (renderPandoc' style) (bibliography result)
+ Pandoc m b = bottomUp (procInlines $ processCite style cits_map) p'
return . generateNotes nts . Pandoc m $ b ++ biblioList
-- | Substitute 'Cite' elements with formatted citations.
processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline]
+processCite s cs (Cite t _ : rest) =
+ case M.lookup t cs of
+ Just (x:xs) ->
+ if isTextualCitation t
+ then renderPandoc s [x] ++
+ if null xs
+ then processCite s cs rest
+ else [Space, Cite t (renderPandoc s xs)]
+ ++ processCite s cs rest
+ else Cite t (renderPandoc s (x:xs)) : processCite s cs rest
+ _ -> Str ("Error processing " ++ show t) : processCite s cs rest
+processCite s cs (x:xs) = x : processCite s cs xs
processCite _ _ [] = []
-processCite s cs (i:is)
- | Cite t _ <- i = process t ++ processCite s cs is
- | otherwise = i : processCite s cs is
- where
- addNt t x = if null x then [] else [Cite t $ renderPandoc s x]
- process t = case M.lookup t cs of
- Just x -> if isTextualCitation t && x /= []
- then renderPandoc s [head x] ++
- if tail x /= []
- then Space : addNt t (tail x)
- else []
- else [Cite t $ renderPandoc s x]
- Nothing -> [Str ("Error processing " ++ show t)]
+
+procInlines :: ([Inline] -> [Inline]) -> Block -> Block
+procInlines f b
+ | Plain inls <- b = Plain $ f inls
+ | Para inls <- b = Para $ f inls
+ | Header i inls <- b = Header i $ f inls
+ | otherwise = b
isTextualCitation :: [Citation] -> Bool
isTextualCitation (c:_) = citationMode c == AuthorInText
@@ -112,13 +123,6 @@ setHash (Citation i p s cm nn _)
generateNotes :: [Inline] -> Pandoc -> Pandoc
generateNotes needNote = bottomUp (mvCiteInNote needNote)
-procInlines :: ([Inline] -> [Inline]) -> Block -> Block
-procInlines f b
- | Plain inls <- b = Plain $ f inls
- | Para inls <- b = Para $ f inls
- | Header i inls <- b = Header i $ f inls
- | otherwise = b
-
mvCiteInNote :: [Inline] -> Block -> Block
mvCiteInNote is = procInlines mvCite
where
@@ -143,9 +147,8 @@ mvCiteInNote is = procInlines mvCite
| otherwise = toCapital (i ++ [Str "."])
checkPt i
- | Cite c o : xs <- i
- , endWithPunct o, startWithPunct xs
- , endWithPunct o = Cite c (initInline o) : checkPt xs
+ | Cite c o : xs <- i , endWithPunct o, startWithPunct xs
+ = Cite c (initInline o) : checkPt xs
| x:xs <- i = x : checkPt xs
| otherwise = []
checkNt = bottomUp $ procInlines checkPt
@@ -165,13 +168,9 @@ toCslCite c
AuthorInText -> (True, False)
SuppressAuthor -> (False,True )
NormalCitation -> (False,False)
- s' = case s of
- [] -> []
- (Str (y:_) : _) | isPunctuation y -> s
- _ -> Str "," : Space : s
in emptyCite { CSL.citeId = citationId c
, CSL.citePrefix = PandocText $ citationPrefix c
- , CSL.citeSuffix = PandocText $ s'
+ , CSL.citeSuffix = PandocText $ s
, CSL.citeLabel = la
, CSL.citeLocator = lo
, CSL.citeNoteNumber = show $ citationNoteNum c
@@ -182,9 +181,13 @@ toCslCite c
locatorWords :: [Inline] -> (String, [Inline])
locatorWords inp =
- case parse pLocatorWords "suffix" inp of
+ case parse pLocatorWords "suffix" $ breakup inp of
Right r -> r
Left _ -> ("",inp)
+ where breakup [] = []
+ breakup (Str x : xs) = map Str (splitup x) ++ breakup xs
+ breakup (x : xs) = x : breakup xs
+ splitup = groupBy (\x y -> x /= '\160' && y /= '\160')
pLocatorWords :: GenParser Inline st (String, [Inline])
pLocatorWords = do
@@ -201,7 +204,7 @@ pMatch condition = try $ do
return t
pSpace :: GenParser Inline st Inline
-pSpace = pMatch (== Space)
+pSpace = pMatch (\t -> t == Space || t == Str "\160")
pLocator :: GenParser Inline st String
pLocator = try $ do
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
deleted file mode 100644
index 8157d94d3..000000000
--- a/src/Text/Pandoc/CharacterReferences.hs
+++ /dev/null
@@ -1,72 +0,0 @@
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.CharacterReferences
- Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for parsing character references.
--}
-module Text.Pandoc.CharacterReferences (
- characterReference,
- decodeCharacterReferences,
- ) where
-import Text.ParserCombinators.Parsec
-import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity )
-import Data.Maybe ( fromMaybe )
-
--- | Parse character entity.
-characterReference :: GenParser Char st Char
-characterReference = try $ do
- char '&'
- character <- numRef <|> entity
- char ';'
- return character
-
-numRef :: GenParser Char st Char
-numRef = do
- char '#'
- num <- hexNum <|> decNum
- return $ fromMaybe '?' $ lookupNumericEntity num
-
-hexNum :: GenParser Char st [Char]
-hexNum = do
- x <- oneOf "Xx"
- num <- many1 hexDigit
- return (x:num)
-
-decNum :: GenParser Char st [Char]
-decNum = many1 digit
-
-entity :: GenParser Char st Char
-entity = do
- body <- many1 alphaNum
- return $ fromMaybe '?' $ lookupNamedEntity body
-
--- | Convert entities in a string to characters.
-decodeCharacterReferences :: String -> String
-decodeCharacterReferences str =
- case parse (many (characterReference <|> anyChar)) str str of
- Left err -> error $ "\nError: " ++ show err
- Right result -> result
-
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index 5ddaf1379..4fb799cf1 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Highlighting
Copyright : Copyright (C) 2008 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -28,47 +28,49 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Exports functions for syntax highlighting.
-}
-module Text.Pandoc.Highlighting ( languages, highlightHtml, defaultHighlightingCss, languagesByExtension ) where
-import Text.XHtml
+module Text.Pandoc.Highlighting ( languages
+ , languagesByExtension
+ , highlight
+ , formatLaTeXInline
+ , formatLaTeXBlock
+ , styleToLaTeX
+ , formatHtmlInline
+ , formatHtmlBlock
+ , styleToCss
+ , pygments
+ , espresso
+ , tango
+ , kate
+ , monochrome
+ , haddock
+ , Style
+ ) where
import Text.Pandoc.Definition
-#ifdef _HIGHLIGHTING
-import Text.Highlighting.Kate ( languages, highlightAs, formatAsXHtml, FormatOption (..), defaultHighlightingCss, languagesByExtension )
+import Text.Highlighting.Kate
import Data.List (find)
import Data.Maybe (fromMaybe)
import Data.Char (toLower)
-highlightHtml :: Bool -- ^ True if inline HTML
- -> Attr -- ^ Attributes of the Code or CodeBlock
- -> String -- ^ Raw contents of the Code or CodeBlock
- -> Either String Html -- ^ An error or the formatted Html
-highlightHtml inline (_, classes, keyvals) rawCode =
- let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals
- fmtOpts = [OptNumberFrom firstNum] ++
- [OptInline | inline] ++
- case find (`elem` ["number","numberLines","number-lines"]) classes of
- Nothing -> []
- Just _ -> [OptNumberLines]
- addBirdTracks = "literate" `elem` classes
- lcLanguages = map (map toLower) languages
- in case find (\c -> (map toLower c) `elem` lcLanguages) classes of
- Nothing -> Left "Unknown or unsupported language"
- Just language -> case highlightAs language rawCode of
- Left err -> Left err
- Right hl -> Right $ formatAsXHtml fmtOpts language $
- if addBirdTracks
- then map ((["Special"],"> "):) hl
- else hl
+lcLanguages :: [String]
+lcLanguages = map (map toLower) languages
-#else
-defaultHighlightingCss :: String
-defaultHighlightingCss = ""
+highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter
+ -> Attr -- ^ Attributes of the CodeBlock
+ -> String -- ^ Raw contents of the CodeBlock
+ -> Maybe a -- ^ Maybe the formatted result
+highlight formatter (_, classes, keyvals) rawCode =
+ let firstNum = case reads (fromMaybe "1" $ lookup "startFrom" keyvals) of
+ ((n,_):_) -> n
+ [] -> 1
+ fmtOpts = defaultFormatOpts{
+ startNumber = firstNum,
+ numberLines = any (`elem`
+ ["number","numberLines", "number-lines"]) classes }
+ lcclasses = map (map toLower) classes
+ in case find (`elem` lcLanguages) lcclasses of
+ Nothing -> Nothing
+ Just language -> Just
+ $ formatter fmtOpts{ codeClasses = [language],
+ containerClasses = classes }
+ $ highlightAs language rawCode
-languages :: [String]
-languages = []
-
-languagesByExtension :: String -> [String]
-languagesByExtension _ = []
-
-highlightHtml :: Bool -> Attr -> String -> Either String Html
-highlightHtml _ _ _ = Left "Pandoc was not compiled with support for highlighting"
-#endif
diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs
new file mode 100644
index 000000000..d48c6a5ae
--- /dev/null
+++ b/src/Text/Pandoc/ImageSize.hs
@@ -0,0 +1,151 @@
+{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
+{-
+ Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+Module : Text.Pandoc.ImageSize
+Copyright : Copyright (C) 2011 John MacFarlane
+License : GNU GPL, version 2 or above
+
+Maintainer : John MacFarlane <jgm@berkeley.edu>
+Stability : alpha
+Portability : portable
+
+Functions for determining the size of a PNG, JPEG, or GIF image.
+-}
+module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize,
+ sizeInPixels, sizeInPoints, readImageSize ) where
+import Data.ByteString.Lazy (ByteString, unpack)
+import qualified Data.ByteString.Lazy.Char8 as B
+import Control.Monad
+import Data.Bits
+
+-- quick and dirty functions to get image sizes
+-- algorithms borrowed from wwwis.pl
+
+data ImageType = Png | Gif | Jpeg deriving Show
+
+data ImageSize = ImageSize{
+ pxX :: Integer
+ , pxY :: Integer
+ , dpiX :: Integer
+ , dpiY :: Integer
+ } deriving (Read, Show, Eq)
+
+
+readImageSize :: FilePath -> IO (Maybe ImageSize)
+readImageSize fp = imageSize `fmap` B.readFile fp
+
+imageType :: ByteString -> Maybe ImageType
+imageType img = case B.take 4 img of
+ "\x89\x50\x4e\x47" -> return Png
+ "\x47\x49\x46\x38" -> return Gif
+ "\xff\xd8\xff\xe0" -> return Jpeg
+ _ -> fail "Unknown image type"
+
+imageSize :: ByteString -> Maybe ImageSize
+imageSize img = do
+ t <- imageType img
+ case t of
+ Png -> pngSize img
+ Gif -> gifSize img
+ Jpeg -> jpegSize img
+
+sizeInPixels :: ImageSize -> (Integer, Integer)
+sizeInPixels s = (pxX s, pxY s)
+
+sizeInPoints :: ImageSize -> (Integer, Integer)
+sizeInPoints s = (pxX s * 72 `div` dpiX s, pxY s * 72 `div` dpiY s)
+
+pngSize :: ByteString -> Maybe ImageSize
+pngSize img = do
+ let (h, rest) = B.splitAt 8 img
+ guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" ||
+ h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
+ let (i, rest') = B.splitAt 4 $ B.drop 4 rest
+ guard $ i == "MHDR" || i == "IHDR"
+ let (sizes, rest'') = B.splitAt 8 rest'
+ (x,y) <- case map fromIntegral $ unpack $ sizes of
+ ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return
+ ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4,
+ (shift h1 24) + (shift h2 16) + (shift h3 8) + h4)
+ _ -> fail "PNG parse error"
+ let (dpix, dpiy) = findpHYs rest''
+ return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy }
+
+findpHYs :: ByteString -> (Integer, Integer)
+findpHYs x =
+ if B.null x || "IDAT" `B.isPrefixOf` x
+ then (72,72) -- default, no pHYs
+ else if "pHYs" `B.isPrefixOf` x
+ then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral
+ $ unpack $ B.take 9 $ B.drop 4 x
+ factor = if u == 1 -- dots per meter
+ then \z -> z * 254 `div` 10000
+ else const 72
+ in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4,
+ factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 )
+ else findpHYs $ B.drop 1 x -- read another byte
+
+gifSize :: ByteString -> Maybe ImageSize
+gifSize img = do
+ let (h, rest) = B.splitAt 6 img
+ guard $ h == "GIF87a" || h == "GIF89a"
+ case map fromIntegral $ unpack $ B.take 4 rest of
+ [w2,w1,h2,h1] -> return ImageSize {
+ pxX = shift w1 8 + w2,
+ pxY = shift h1 8 + h2,
+ dpiX = 72,
+ dpiY = 72
+ }
+ _ -> fail "GIF parse error"
+
+jpegSize :: ByteString -> Maybe ImageSize
+jpegSize img = do
+ let (hdr, rest) = B.splitAt 4 img
+ guard $ hdr == "\xff\xd8\xff\xe0"
+ guard $ B.length rest >= 14
+ let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral
+ $ unpack $ B.take 5 $ B.drop 9 $ rest
+ let factor = case dpiDensity of
+ 1 -> id
+ 2 -> \x -> (x * 254 `div` 10)
+ _ -> const 72
+ let dpix = factor (shift dpix1 8 + dpix2)
+ let dpiy = factor (shift dpiy1 8 + dpiy2)
+ (w,h) <- findJpegSize rest
+ return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy }
+
+findJpegSize :: ByteString -> Maybe (Integer,Integer)
+findJpegSize bs = do
+ let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs
+ case B.uncons bs' of
+ Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do
+ case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of
+ [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2)
+ _ -> fail "JPEG parse error"
+ Just (_,bs'') -> do
+ case map fromIntegral $ unpack $ B.take 2 bs'' of
+ [c1,c2] -> do
+ let len = shift c1 8 + c2
+ -- skip variables
+ findJpegSize $ B.drop len bs''
+ _ -> fail "JPEG parse error"
+ Nothing -> fail "Did not find length record"
+
+
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index c52a4c475..d3df2f2e1 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -35,6 +35,7 @@ import qualified Data.Map as M
-- | Determine mime type appropriate for file path.
getMimeType :: FilePath -> Maybe String
+getMimeType "layout-cache" = Just "application/binary" -- in ODT
getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
where mimeTypes = M.fromList -- List borrowed from happstack-server.
[("gz","application/x-gzip")
@@ -294,6 +295,7 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
,("oth","application/vnd.oasis.opendocument.text-web")
,("otp","application/vnd.oasis.opendocument.presentation-template")
,("ots","application/vnd.oasis.opendocument.spreadsheet-template")
+ ,("otf","application/x-font-opentype")
,("ott","application/vnd.oasis.opendocument.text-template")
,("oza","application/x-oz-application")
,("p","text/x-pascal")
@@ -427,6 +429,7 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes
,("ts","text/texmacs")
,("tsp","application/dsptype")
,("tsv","text/tab-separated-values")
+ ,("ttf","application/x-font-truetype")
,("txt","text/plain")
,("udeb","application/x-debian-package")
,("uls","text/iuls")
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs
new file mode 100644
index 000000000..cc19e1c50
--- /dev/null
+++ b/src/Text/Pandoc/PDF.hs
@@ -0,0 +1,136 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.PDF
+ Copyright : Copyright (C) 2012 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of LaTeX documents to PDF.
+-}
+module Text.Pandoc.PDF ( tex2pdf ) where
+
+import System.IO.Temp
+import Data.ByteString.Lazy (ByteString)
+import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy.Char8 as BC
+import System.Exit (ExitCode (..))
+import System.FilePath
+import System.Directory
+import System.Process
+import Control.Exception (evaluate)
+import System.IO (hClose)
+import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
+import Text.Pandoc.UTF8 as UTF8
+import Control.Monad (unless)
+import Data.List (isInfixOf)
+
+tex2pdf :: String -- ^ tex program (pdflatex, lualatex, xelatex)
+ -> String -- ^ latex source
+ -> IO (Either ByteString ByteString)
+tex2pdf program source = withSystemTempDirectory "tex2pdf" $ \tmpdir ->
+ tex2pdf' tmpdir program source
+
+tex2pdf' :: FilePath -- ^ temp directory for output
+ -> String -- ^ tex program
+ -> String -- ^ tex source
+ -> IO (Either ByteString ByteString)
+tex2pdf' tmpDir program source = do
+ let numruns = if "\\tableofcontents" `isInfixOf` source
+ then 2
+ else 1
+ (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source
+ let msg = "Error producing PDF from TeX source."
+ case (exit, mbPdf) of
+ (ExitFailure _, _) -> return $ Left $
+ msg <> "\n" <> extractMsg log'
+ (ExitSuccess, Nothing) -> return $ Left msg
+ (ExitSuccess, Just pdf) -> return $ Right pdf
+
+(<>) :: ByteString -> ByteString -> ByteString
+(<>) = B.append
+
+-- parsing output
+
+extractMsg :: ByteString -> ByteString
+extractMsg log' = do
+ let msg' = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log'
+ let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg'
+ let lineno = take 1 rest
+ if null msg'
+ then log'
+ else BC.unlines (msg'' ++ lineno)
+
+-- running tex programs
+
+-- Run a TeX program on an input bytestring and return (exit code,
+-- contents of stdout, contents of produced PDF if any). Rerun
+-- a fixed number of times to resolve references.
+runTeXProgram :: String -> Int -> FilePath -> String
+ -> IO (ExitCode, ByteString, Maybe ByteString)
+runTeXProgram program runsLeft tmpDir source = do
+ let file = tmpDir </> "input.tex"
+ exists <- doesFileExist file
+ unless exists $ UTF8.writeFile file source
+ let programArgs = ["-halt-on-error", "-interaction", "nonstopmode",
+ "-output-directory", tmpDir, file]
+ (exit, out, err) <- readCommand program programArgs
+ if runsLeft > 1
+ then runTeXProgram program (runsLeft - 1) tmpDir source
+ else do
+ let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir
+ pdfExists <- doesFileExist pdfFile
+ pdf <- if pdfExists
+ then Just `fmap` B.readFile pdfFile
+ else return Nothing
+ return (exit, out <> err, pdf)
+
+-- utility functions
+
+-- Run a command and return exitcode, contents of stdout, and
+-- contents of stderr. (Based on
+-- 'readProcessWithExitCode' from 'System.Process'.)
+readCommand :: FilePath -- ^ command to run
+ -> [String] -- ^ any arguments
+ -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr
+readCommand cmd args = do
+ (Just inh, Just outh, Just errh, pid) <-
+ createProcess (proc cmd args){ std_in = CreatePipe,
+ std_out = CreatePipe,
+ std_err = CreatePipe }
+ outMVar <- newEmptyMVar
+ -- fork off a thread to start consuming stdout
+ out <- B.hGetContents outh
+ _ <- forkIO $ evaluate (B.length out) >> putMVar outMVar ()
+ -- fork off a thread to start consuming stderr
+ err <- B.hGetContents errh
+ _ <- forkIO $ evaluate (B.length err) >> putMVar outMVar ()
+ -- now write and flush any input
+ hClose inh -- done with stdin
+ -- wait on the output
+ takeMVar outMVar
+ takeMVar outMVar
+ hClose outh
+ -- wait on the process
+ ex <- waitForProcess pid
+ return (ex, out, err)
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index eaf0c0f67..725621ce2 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -42,15 +42,16 @@ module Text.Pandoc.Parsing ( (>>~),
parseFromString,
lineClump,
charsInBalanced,
- charsInBalanced',
romanNumeral,
emailAddress,
uri,
withHorizDisplacement,
+ withRaw,
nullBlock,
failIfStrict,
failUnlessLHS,
escaped,
+ characterReference,
anyOrderedListMarker,
orderedListMarker,
charRef,
@@ -78,7 +79,6 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.ParserCombinators.Parsec
-import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
@@ -86,6 +86,7 @@ import Control.Monad ( join, liftM, guard )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
+import Text.HTML.TagSoup.Entity ( lookupEntity )
-- | Like >>, but returns the operation on the left.
-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@@ -174,29 +175,23 @@ lineClump = blanklines
-- | Parse a string of characters between an open character
-- and a close character, including text between balanced
-- pairs of open and close, which must be different. For example,
--- @charsInBalanced '(' ')'@ will parse "(hello (there))"
--- and return "hello (there)". Stop if a blank line is
--- encountered.
-charsInBalanced :: Char -> Char -> GenParser Char st String
-charsInBalanced open close = try $ do
+-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))"
+-- and return "hello (there)".
+charsInBalanced :: Char -> Char -> GenParser Char st Char
+ -> GenParser Char st String
+charsInBalanced open close parser = try $ do
char open
- raw <- many $ (many1 (satisfy $ \c ->
- c /= open && c /= close && c /= '\n'))
- <|> (do res <- charsInBalanced open close
- return $ [open] ++ res ++ [close])
- <|> try (string "\n" >>~ notFollowedBy' blanklines)
+ let isDelim c = c == open || c == close
+ raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser)
+ <|> (do res <- charsInBalanced open close parser
+ return $ [open] ++ res ++ [close])
char close
return $ concat raw
--- | Like @charsInBalanced@, but allow blank lines in the content.
-charsInBalanced' :: Char -> Char -> GenParser Char st String
-charsInBalanced' open close = try $ do
- char open
- raw <- many $ (many1 (satisfy $ \c -> c /= open && c /= close))
- <|> (do res <- charsInBalanced' open close
- return $ [open] ++ res ++ [close])
- char close
- return $ concat raw
+-- old charsInBalanced would be:
+-- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline)
+-- old charsInBalanced' would be:
+-- charsInBalanced open close anyChar
-- Auxiliary functions for romanNumeral:
@@ -306,6 +301,23 @@ withHorizDisplacement parser = do
pos2 <- getPosition
return (result, sourceColumn pos2 - sourceColumn pos1)
+-- | Applies a parser and returns the raw string that was parsed,
+-- along with the value produced by the parser.
+withRaw :: GenParser Char st a -> GenParser Char st (a, [Char])
+withRaw parser = do
+ pos1 <- getPosition
+ inp <- getInput
+ result <- parser
+ pos2 <- getPosition
+ let (l1,c1) = (sourceLine pos1, sourceColumn pos1)
+ let (l2,c2) = (sourceLine pos2, sourceColumn pos2)
+ let inplines = take ((l2 - l1) + 1) $ lines inp
+ let raw = case inplines of
+ [] -> error "raw: inplines is null" -- shouldn't happen
+ [l] -> take (c2 - c1) l
+ ls -> unlines (init ls) ++ take (c2 - 1) (last ls)
+ return (result, raw)
+
-- | Parses a character and returns 'Null' (so that the parser can move on
-- if it gets stuck).
nullBlock :: GenParser Char st Block
@@ -319,17 +331,21 @@ failIfStrict = do
-- | Fail unless we're in literate haskell mode.
failUnlessLHS :: GenParser tok ParserState ()
-failUnlessLHS = do
- state <- getState
- if stateLiterateHaskell state then return () else fail "Literate haskell feature"
+failUnlessLHS = getState >>= guard . stateLiterateHaskell
-- | Parses backslash, then applies character parser.
escaped :: GenParser Char st Char -- ^ Parser for character to escape
- -> GenParser Char st Inline
-escaped parser = try $ do
- char '\\'
- result <- parser
- return (Str [result])
+ -> GenParser Char st Char
+escaped parser = try $ char '\\' >> parser
+
+-- | Parse character entity.
+characterReference :: GenParser Char st Char
+characterReference = try $ do
+ char '&'
+ ent <- many1Till nonspaceChar (char ';')
+ case lookupEntity ent of
+ Just c -> return c
+ Nothing -> fail "entity not found"
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
upperRoman :: GenParser Char st (ListNumberStyle, Int)
@@ -512,7 +528,7 @@ gridTableWith block tableCaption headless =
gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
- splitByIndices (init indices) $ removeTrailingSpace line
+ splitStringByIndices (init indices) $ removeTrailingSpace line
gridPart :: Char -> GenParser Char st (Int, Int)
gridPart ch = do
@@ -598,7 +614,7 @@ readWith :: GenParser t ParserState a -- ^ parser
-> a
readWith parser state input =
case runParser parser state "source" input of
- Left err -> error $ "\nError:\n" ++ show err
+ Left err' -> error $ "\nError:\n" ++ show err'
Right result -> result
-- | Parse a string with @parser@ (for testing).
@@ -613,6 +629,8 @@ data ParserState = ParserState
{ stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
stateParserContext :: ParserContext, -- ^ Inside list?
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
+ stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph
+ stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed
stateKeys :: KeyTable, -- ^ List of reference keys
stateCitations :: [String], -- ^ List of available citations
stateNotes :: NoteTable, -- ^ List of notes
@@ -623,6 +641,9 @@ data ParserState = ParserState
stateDate :: [Inline], -- ^ Date of document
stateStrict :: Bool, -- ^ Use strict markdown syntax?
stateSmart :: Bool, -- ^ Use smart typography?
+ stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior
+ -- in parsing dashes; -- is em-dash;
+ -- before numeral is en-dash
stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell
stateColumns :: Int, -- ^ Number of columns in terminal
stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used
@@ -640,6 +661,8 @@ defaultParserState =
ParserState { stateParseRaw = False,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
+ stateMaxNestingLevel = 6,
+ stateLastStrPos = Nothing,
stateKeys = M.empty,
stateCitations = [],
stateNotes = [],
@@ -650,6 +673,7 @@ defaultParserState =
stateDate = [],
stateStrict = False,
stateSmart = False,
+ stateOldDashes = False,
stateLiterateHaskell = False,
stateColumns = 80,
stateHeaderTable = [],
@@ -714,7 +738,7 @@ smartPunctuation inlineParser = do
choice [ quoted inlineParser, apostrophe, dash, ellipses ]
apostrophe :: GenParser Char ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019")
quoted :: GenParser Char ParserState Inline
-> GenParser Char ParserState Inline
@@ -761,8 +785,12 @@ charOrRef cs =
return c)
singleQuoteStart :: GenParser Char ParserState ()
-singleQuoteStart = do
+singleQuoteStart = do
failIfInQuoteContext InSingleQuote
+ pos <- getPosition
+ st <- getState
+ -- single quote start can't be right after str
+ guard $ stateLastStrPos st /= Just pos
try $ do charOrRef "'\8216\145"
notFollowedBy (oneOf ")!],;:-? \t\n")
notFollowedBy (char '.') <|> lookAhead (string "..." >> return ())
@@ -789,22 +817,42 @@ doubleQuoteEnd = do
ellipses :: GenParser Char st Inline
ellipses = do
- try (charOrRef "…\133") <|> try (string "..." >> return '…')
- return Ellipses
-
-dash :: GenParser Char st Inline
-dash = enDash <|> emDash
+ try (charOrRef "\8230\133") <|> try (string "..." >> return '…')
+ return (Str "\8230")
+
+dash :: GenParser Char ParserState Inline
+dash = do
+ oldDashes <- stateOldDashes `fmap` getState
+ if oldDashes
+ then emDashOld <|> enDashOld
+ else Str `fmap` (hyphenDash <|> emDash <|> enDash)
+
+-- Two hyphens = en-dash, three = em-dash
+hyphenDash :: GenParser Char st String
+hyphenDash = do
+ try $ string "--"
+ option "\8211" (char '-' >> return "\8212")
+
+emDash :: GenParser Char st String
+emDash = do
+ try (charOrRef "\8212\151")
+ return "\8212"
-enDash :: GenParser Char st Inline
+enDash :: GenParser Char st String
enDash = do
- try (charOrRef "–\150") <|>
+ try (charOrRef "\8212\151")
+ return "\8211"
+
+enDashOld :: GenParser Char st Inline
+enDashOld = do
+ try (charOrRef "\8211\150") <|>
try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
- return EnDash
+ return (Str "\8211")
-emDash :: GenParser Char st Inline
-emDash = do
- try (charOrRef "—\151") <|> (try $ string "--" >> optional (char '-') >> return '—')
- return EmDash
+emDashOld :: GenParser Char st Inline
+emDashOld = do
+ try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-')
+ return (Str "\8212")
--
-- Macros
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 54d65af6f..bf78b2594 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -42,6 +42,7 @@ module Text.Pandoc.Pretty (
, flush
, nest
, hang
+ , beforeNonBlank
, nowrap
, offset
, height
@@ -59,16 +60,20 @@ module Text.Pandoc.Pretty (
, hsep
, vcat
, vsep
+ , chomp
, inside
, braces
, brackets
, parens
, quotes
, doubleQuotes
+ , charWidth
+ , realLength
)
where
-import Data.DList (DList, fromList, toList, cons, singleton)
+import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex)
+import Data.Foldable (toList)
import Data.List (intercalate)
import Data.Monoid
import Data.String
@@ -90,6 +95,7 @@ type DocState a = State (RenderState a) ()
data D = Text Int String
| Block Int [String]
| Prefixed String Doc
+ | BeforeNonBlank Doc
| Flush Doc
| BreakingSpace
| CarriageReturn
@@ -97,7 +103,7 @@ data D = Text Int String
| BlankLine
deriving (Show)
-newtype Doc = Doc { unDoc :: DList D }
+newtype Doc = Doc { unDoc :: Seq D }
deriving (Monoid)
instance Show Doc where
@@ -106,6 +112,14 @@ instance Show Doc where
instance IsString Doc where
fromString = text
+isBlank :: D -> Bool
+isBlank BreakingSpace = True
+isBlank CarriageReturn = True
+isBlank NewLine = True
+isBlank BlankLine = True
+isBlank (Text _ (c:_)) = isSpace c
+isBlank _ = False
+
-- | True if the document is empty.
isEmpty :: Doc -> Bool
isEmpty = null . toList . unDoc
@@ -114,9 +128,17 @@ isEmpty = null . toList . unDoc
empty :: Doc
empty = mempty
--- | @a <> b@ is the result of concatenating @a@ with @b@.
-(<>) :: Doc -> Doc -> Doc
+#if MIN_VERSION_base(4,5,0)
+-- (<>) is defined in Data.Monoid
+#else
+infixr 6 <>
+
+-- | An infix synonym for 'mappend'.
+-- @a <> b@ is the result of concatenating @a@ with @b@.
+(<>) :: Monoid m => m -> m -> m
(<>) = mappend
+{-# INLINE (<>) #-}
+#endif
-- | Concatenate a list of 'Doc's.
cat :: [Doc] -> Doc
@@ -128,6 +150,7 @@ hcat = mconcat
-- | Concatenate a list of 'Doc's, putting breakable spaces
-- between them.
+infixr 6 <+>
(<+>) :: Doc -> Doc -> Doc
(<+>) x y = if isEmpty x
then y
@@ -140,6 +163,7 @@ hcat = mconcat
hsep :: [Doc] -> Doc
hsep = foldr (<+>) empty
+infixr 5 $$
-- | @a $$ b@ puts @a@ above @b@.
($$) :: Doc -> Doc -> Doc
($$) x y = if isEmpty x
@@ -148,6 +172,7 @@ hsep = foldr (<+>) empty
then x
else x <> cr <> y
+infixr 5 $+$
-- | @a $$ b@ puts @a@ above @b@, with a blank line between.
($+$) :: Doc -> Doc -> Doc
($+$) x y = if isEmpty x
@@ -164,6 +189,17 @@ vcat = foldr ($$) empty
vsep :: [Doc] -> Doc
vsep = foldr ($+$) empty
+-- | Chomps trailing blank space off of a 'Doc'.
+chomp :: Doc -> Doc
+chomp d = Doc (fromList dl')
+ where dl = toList (unDoc d)
+ dl' = reverse $ dropWhile removeable $ reverse dl
+ removeable BreakingSpace = True
+ removeable CarriageReturn = True
+ removeable NewLine = True
+ removeable BlankLine = True
+ removeable _ = False
+
outp :: (IsString a, Monoid a)
=> Int -> String -> DocState a
outp off s | off <= 0 = do
@@ -172,7 +208,7 @@ outp off s | off <= 0 = do
when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do
let pref = reverse $ dropWhile isSpace $ reverse rawpref
modify $ \st -> st{ output = fromString pref : output st
- , column = column st + length pref }
+ , column = column st + realLength pref }
when (off < 0) $ do
modify $ \st -> st { output = fromString s : output st
, column = 0
@@ -182,7 +218,7 @@ outp off s = do
let pref = prefix st'
when (column st' == 0 && usePrefix st' && not (null pref)) $ do
modify $ \st -> st{ output = fromString pref : output st
- , column = column st + length pref }
+ , column = column st + realLength pref }
modify $ \st -> st{ output = fromString s : output st
, column = column st + off
, newlines = 0 }
@@ -229,6 +265,12 @@ renderList (Flush d : xs) = do
modify $ \s -> s{ usePrefix = oldUsePrefix }
renderList xs
+renderList (BeforeNonBlank d : xs) =
+ case xs of
+ (x:_) | isBlank x -> renderList xs
+ | otherwise -> renderDoc d >> renderList xs
+ [] -> renderList xs
+
renderList (BlankLine : xs) = do
st <- get
case output st of
@@ -283,7 +325,7 @@ renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) =
renderList (Block width lns : xs) = do
st <- get
let oldPref = prefix st
- case column st - length oldPref of
+ case column st - realLength oldPref of
n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' }
_ -> return ()
renderDoc $ blockToDoc width lns
@@ -295,7 +337,7 @@ mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) =
Block (w1 + w2 + if addSpace then 1 else 0) $
zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties)
where empties = replicate (abs $ length lns1 - length lns2) ""
- pad n s = s ++ replicate (n - length s) ' '
+ pad n s = s ++ replicate (n - realLength s) ' '
sp "" = ""
sp xs = if addSpace then (' ' : xs) else xs
mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!"
@@ -312,13 +354,13 @@ offsetOf _ = 0
-- | A literal string.
text :: String -> Doc
text = Doc . toChunks
- where toChunks :: String -> DList D
+ where toChunks :: String -> Seq D
toChunks [] = mempty
toChunks s = case break (=='\n') s of
- ([], _:ys) -> NewLine `cons` toChunks ys
- (xs, _:ys) -> Text (length xs) xs `cons`
- NewLine `cons` toChunks ys
- (xs, []) -> singleton $ Text (length xs) xs
+ ([], _:ys) -> NewLine <| toChunks ys
+ (xs, _:ys) -> Text (realLength xs) xs <|
+ (NewLine <| toChunks ys)
+ (xs, []) -> singleton $ Text (realLength xs) xs
-- | A character.
char :: Char -> Doc
@@ -359,15 +401,20 @@ nest ind = prefixed (replicate ind ' ')
hang :: Int -> Doc -> Doc -> Doc
hang ind start doc = start <> nest ind doc
+-- | @beforeNonBlank d@ conditionally includes @d@ unless it is
+-- followed by blank space.
+beforeNonBlank :: Doc -> Doc
+beforeNonBlank d = Doc $ singleton (BeforeNonBlank d)
+
-- | Makes a 'Doc' non-reflowable.
nowrap :: Doc -> Doc
-nowrap doc = Doc $ fromList $ map replaceSpace $ toList $ unDoc doc
- where replaceSpace BreakingSpace = Text 1 " "
- replaceSpace x = x
+nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc
+ where replaceSpace _ BreakingSpace = Text 1 " "
+ replaceSpace _ x = x
-- | Returns the width of a 'Doc'.
offset :: Doc -> Int
-offset d = case map length . lines . render Nothing $ d of
+offset d = case map realLength . lines . render Nothing $ d of
[] -> 0
os -> maximum os
@@ -382,11 +429,11 @@ lblock = block id
-- | Like 'lblock' but aligned to the right.
rblock :: Int -> Doc -> Doc
-rblock w = block (\s -> replicate (w - length s) ' ' ++ s) w
+rblock w = block (\s -> replicate (w - realLength s) ' ' ++ s) w
-- | Like 'lblock' but aligned centered.
cblock :: Int -> Doc -> Doc
-cblock w = block (\s -> replicate ((w - length s) `div` 2) ' ' ++ s) w
+cblock w = block (\s -> replicate ((w - realLength s) `div` 2) ' ' ++ s) w
-- | Returns the height of a block or other 'Doc'.
height :: Doc -> Int
@@ -401,7 +448,7 @@ chop n cs = case break (=='\n') cs of
(_:[]) -> [xs, ""]
(_:zs) -> xs : chop n zs
else take n xs : chop n (drop n xs ++ ys)
- where len = length xs
+ where len = realLength xs
-- | Encloses a 'Doc' inside a start and end 'Doc'.
inside :: Doc -> Doc -> Doc -> Doc
@@ -427,3 +474,51 @@ quotes = inside (char '\'') (char '\'')
-- | Wraps a 'Doc' in double quotes.
doubleQuotes :: Doc -> Doc
doubleQuotes = inside (char '"') (char '"')
+
+-- | Returns width of a character in a monospace font: 0 for a combining
+-- character, 1 for a regular character, 2 for an East Asian wide character.
+charWidth :: Char -> Int
+charWidth c =
+ case c of
+ _ | c < '\x0300' -> 1
+ | c >= '\x0300' && c <= '\x036F' -> 0 -- combining
+ | c >= '\x0370' && c <= '\x10FC' -> 1
+ | c >= '\x1100' && c <= '\x115F' -> 2
+ | c >= '\x1160' && c <= '\x11A2' -> 1
+ | c >= '\x11A3' && c <= '\x11A7' -> 2
+ | c >= '\x11A8' && c <= '\x11F9' -> 1
+ | c >= '\x11FA' && c <= '\x11FF' -> 2
+ | c >= '\x1200' && c <= '\x2328' -> 1
+ | c >= '\x2329' && c <= '\x232A' -> 2
+ | c >= '\x232B' && c <= '\x2E31' -> 1
+ | c >= '\x2E80' && c <= '\x303E' -> 2
+ | c == '\x303F' -> 1
+ | c >= '\x3041' && c <= '\x3247' -> 2
+ | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous
+ | c >= '\x3250' && c <= '\x4DBF' -> 2
+ | c >= '\x4DC0' && c <= '\x4DFF' -> 1
+ | c >= '\x4E00' && c <= '\xA4C6' -> 2
+ | c >= '\xA4D0' && c <= '\xA95F' -> 1
+ | c >= '\xA960' && c <= '\xA97C' -> 2
+ | c >= '\xA980' && c <= '\xABF9' -> 1
+ | c >= '\xAC00' && c <= '\xD7FB' -> 2
+ | c >= '\xD800' && c <= '\xDFFF' -> 1
+ | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous
+ | c >= '\xF900' && c <= '\xFAFF' -> 2
+ | c >= '\xFB00' && c <= '\xFDFD' -> 1
+ | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous
+ | c >= '\xFE10' && c <= '\xFE19' -> 2
+ | c >= '\xFE20' && c <= '\xFE26' -> 1
+ | c >= '\xFE30' && c <= '\xFE6B' -> 2
+ | c >= '\xFE70' && c <= '\x16A38' -> 1
+ | c >= '\x1B000' && c <= '\x1B001' -> 2
+ | c >= '\x1D000' && c <= '\x1F1FF' -> 1
+ | c >= '\x1F200' && c <= '\x1F251' -> 2
+ | c >= '\x1F300' && c <= '\x1F773' -> 1
+ | c >= '\x20000' && c <= '\x3FFFD' -> 2
+ | otherwise -> 1
+
+-- | Get real length of string, taking into account combining and double-wide
+-- characters.
+realLength :: String -> Int
+realLength = sum . map charWidth
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 7c882f680..43165ceb1 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -215,6 +215,7 @@ pSimpleTable = try $ do
TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
skipMany pBlank
head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th")
+ skipMany pBlank
rows <- pOptInTag "tbody"
$ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td")
skipMany pBlank
@@ -420,8 +421,12 @@ pTagContents =
pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad
pStr :: GenParser Char ParserState Inline
-pStr = liftM Str $ many1 $ satisfy $ \c ->
- not (isSpace c) && not (isSpecial c) && not (isBad c)
+pStr = do
+ result <- many1 $ satisfy $ \c ->
+ not (isSpace c) && not (isSpecial c) && not (isBad c)
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
+ return $ Str result
isSpecial :: Char -> Bool
isSpecial '"' = True
@@ -502,16 +507,35 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
"dt", "frameset", "li", "tbody", "td", "tfoot",
"th", "thead", "tr", "script", "style"]
+-- We want to allow raw docbook in markdown documents, so we
+-- include docbook block tags here too.
+blockDocBookTags :: [String]
+blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist",
+ "orderedlist", "segmentedlist", "simplelist",
+ "variablelist", "caution", "important", "note", "tip",
+ "warning", "address", "literallayout", "programlisting",
+ "programlistingco", "screen", "screenco", "screenshot",
+ "synopsis", "example", "informalexample", "figure",
+ "informalfigure", "table", "informaltable", "para",
+ "simpara", "formalpara", "equation", "informalequation",
+ "figure", "screenshot", "mediaobject", "qandaset",
+ "procedure", "task", "cmdsynopsis", "funcsynopsis",
+ "classsynopsis", "blockquote", "epigraph", "msgset",
+ "sidebar", "title"]
+
+blockTags :: [String]
+blockTags = blockHtmlTags ++ blockDocBookTags
+
isInlineTag :: Tag String -> Bool
-isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t ||
- tagClose (`notElem` blockHtmlTags) t ||
+isInlineTag t = tagOpen (`notElem` blockTags) (const True) t ||
+ tagClose (`notElem` blockTags) t ||
tagComment (const True) t
isBlockTag :: Tag String -> Bool
isBlockTag t = tagOpen (`elem` blocktags) (const True) t ||
tagClose (`elem` blocktags) t ||
tagComment (const True) t
- where blocktags = blockHtmlTags ++ eitherBlockOrInline
+ where blocktags = blockTags ++ eitherBlockOrInline
isTextTag :: Tag String -> Bool
isTextTag = tagText (const True)
@@ -546,8 +570,8 @@ t `closes` t2 |
t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
t1 `closes` t2 |
- t1 `elem` blockHtmlTags &&
- t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True
+ t1 `elem` blockTags &&
+ t2 `notElem` (blockTags ++ eitherBlockOrInline) = True
_ `closes` _ = False
--- parsers for use in markdown, textile readers
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 02c7361d7..5e69347b6 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.LaTeX
- Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ Copyright : Copyright (C) 2006-2012 John MacFarlane
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
@@ -27,20 +27,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of LaTeX to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.LaTeX (
- readLaTeX,
- rawLaTeXInline,
- rawLaTeXEnvironment'
+module Text.Pandoc.Readers.LaTeX ( readLaTeX,
+ rawLaTeXInline,
+ rawLaTeXBlock,
+ handleIncludes
) where
-import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional)
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Data.Maybe ( fromMaybe )
-import Data.Char ( chr, toUpper )
-import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import qualified Text.Pandoc.UTF8 as UTF8
+import Data.Char ( chr, ord )
import Control.Monad
+import Text.Pandoc.Builder
+import Data.Char (isLetter, isPunctuation, isSpace)
+import Control.Applicative
+import Data.Monoid
+import System.FilePath (replaceExtension)
+import Data.List (intercalate)
+import qualified Data.Map as M
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
@@ -48,985 +54,867 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser
-> Pandoc
readLaTeX = readWith parseLaTeX
--- characters with special meaning
-specialChars :: [Char]
-specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-"
-
---
--- utility functions
---
-
--- | Returns text between brackets and its matching pair.
-bracketedText :: Char -> Char -> GenParser Char st [Char]
-bracketedText openB closeB = do
- result <- charsInBalanced' openB closeB
- return $ [openB] ++ result ++ [closeB]
-
--- | Returns an option or argument of a LaTeX command.
-optOrArg :: GenParser Char st [Char]
-optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']')
+parseLaTeX :: LP Pandoc
+parseLaTeX = do
+ bs <- blocks
+ eof
+ st <- getState
+ let title' = stateTitle st
+ let authors' = stateAuthors st
+ let date' = stateDate st
+ return $ Pandoc (Meta title' authors' date') $ toList bs
--- | True if the string begins with '{'.
-isArg :: [Char] -> Bool
-isArg ('{':_) = True
-isArg _ = False
+type LP = GenParser Char ParserState
--- | Returns list of options and arguments of a LaTeX command.
-commandArgs :: GenParser Char st [[Char]]
-commandArgs = many optOrArg
+anyControlSeq :: LP String
+anyControlSeq = do
+ char '\\'
+ next <- option '\n' anyChar
+ name <- case next of
+ '\n' -> return ""
+ c | isLetter c -> (c:) <$> (many letter <* optional sp)
+ | otherwise -> return [c]
+ return name
--- | Parses LaTeX command, returns (name, star, list of options or arguments).
-command :: GenParser Char st ([Char], [Char], [[Char]])
-command = do
+controlSeq :: String -> LP String
+controlSeq name = try $ do
char '\\'
- name <- many1 letter
- star <- option "" (string "*") -- some commands have starred versions
- args <- commandArgs
- return (name, star, args)
-
-begin :: [Char] -> GenParser Char st [Char]
-begin name = try $ do
- string "\\begin"
- spaces
- char '{'
- string name
- char '}'
- optional commandArgs
- spaces
+ case name of
+ "" -> mzero
+ [c] | not (isLetter c) -> string [c]
+ cs -> string cs <* optional sp
return name
-end :: [Char] -> GenParser Char st [Char]
-end name = try $ do
- string "\\end"
- spaces
+sp :: LP ()
+sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t')
+ <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline)
+
+isLowerHex :: Char -> Bool
+isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f'
+
+tildeEscape :: LP Char
+tildeEscape = try $ do
+ string "^^"
+ c <- satisfy (\x -> x >= '\0' && x <= '\128')
+ d <- if isLowerHex c
+ then option "" $ count 1 (satisfy isLowerHex)
+ else return ""
+ if null d
+ then case ord c of
+ x | x >= 64 && x <= 127 -> return $ chr (x - 64)
+ | otherwise -> return $ chr (x + 64)
+ else return $ chr $ read ('0':'x':c:d)
+
+comment :: LP ()
+comment = do
+ char '%'
+ skipMany (satisfy (/='\n'))
+ newline
+ return ()
+
+grouped :: Monoid a => LP a -> LP a
+grouped parser = try $ char '{' *> (mconcat <$> manyTill parser (char '}'))
+
+braced :: LP String
+braced = char '{' *> (concat <$> manyTill
+ ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{'))
+ <|> try (string "\\}")
+ <|> try (string "\\{")
+ <|> try (string "\\\\")
+ <|> ((\x -> "{" ++ x ++ "}") <$> braced)
+ <|> count 1 anyChar
+ ) (char '}'))
+
+bracketed :: Monoid a => LP a -> LP a
+bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']'))
+
+trim :: String -> String
+trim = removeLeadingTrailingSpace
+
+mathDisplay :: LP String -> LP Inlines
+mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim)
+
+mathInline :: LP String -> LP Inlines
+mathInline p = math <$> (try p >>= applyMacros')
+
+mathChars :: LP String
+mathChars = concat <$>
+ many ( many1 (satisfy (\c -> c /= '$' && c /='\\'))
+ <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar)
+ )
+
+double_quote :: LP Inlines
+double_quote = (doubleQuoted . mconcat) <$>
+ (try $ string "``" *> manyTill inline (try $ string "''"))
+
+single_quote :: LP Inlines
+single_quote = char '`' *>
+ ( try ((singleQuoted . mconcat) <$>
+ manyTill inline (try $ char '\'' >> notFollowedBy letter))
+ <|> lit "`")
+
+inline :: LP Inlines
+inline = (mempty <$ comment)
+ <|> (space <$ sp)
+ <|> inlineText
+ <|> inlineCommand
+ <|> grouped inline
+ <|> (char '-' *> option (str "-")
+ ((char '-') *> option (str "–") (str "—" <$ char '-')))
+ <|> double_quote
+ <|> single_quote
+ <|> (str "’" <$ char '\'')
+ <|> (str "\160" <$ char '~')
+ <|> (mathDisplay $ string "$$" *> mathChars <* string "$$")
+ <|> (mathInline $ char '$' *> mathChars <* char '$')
+ <|> (superscript <$> (char '^' *> tok))
+ <|> (subscript <$> (char '_' *> tok))
+ <|> (failUnlessLHS *> char '|' *> doLHSverb)
+ <|> (str <$> count 1 tildeEscape)
+ <|> (str <$> string "]")
+ <|> (str <$> string "#") -- TODO print warning?
+ <|> (str <$> string "&") -- TODO print warning?
+ -- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters
+
+inlines :: LP Inlines
+inlines = mconcat <$> many (notFollowedBy (char '}') *> inline)
+
+block :: LP Blocks
+block = (mempty <$ comment)
+ <|> (mempty <$ ((spaceChar <|> blankline) *> spaces))
+ <|> environment
+ <|> mempty <$ macro -- TODO improve macros, make them work everywhere
+ <|> blockCommand
+ <|> grouped block
+ <|> paragraph
+ <|> (mempty <$ char '&') -- loose & in table environment
+
+
+blocks :: LP Blocks
+blocks = mconcat <$> many block
+
+blockCommand :: LP Blocks
+blockCommand = try $ do
+ name <- anyControlSeq
+ star <- option "" (string "*" <* optional sp)
+ let name' = name ++ star
+ case M.lookup name' blockCommands of
+ Just p -> p
+ Nothing -> case M.lookup name blockCommands of
+ Just p -> p
+ Nothing -> mzero
+
+inBrackets :: Inlines -> Inlines
+inBrackets x = (str "[") <> x <> (str "]")
+
+-- eat an optional argument and one or more arguments in braces
+ignoreInlines :: String -> (String, LP Inlines)
+ignoreInlines name = (name, doraw <|> (mempty <$ optargs))
+ where optargs = skipopts *> skipMany (try $ optional sp *> braced)
+ contseq = '\\':name
+ doraw = (rawInline "latex" . (contseq ++) . snd) <$>
+ (getState >>= guard . stateParseRaw >> (withRaw optargs))
+
+ignoreBlocks :: String -> (String, LP Blocks)
+ignoreBlocks name = (name, doraw <|> (mempty <$ optargs))
+ where optargs = skipopts *> skipMany (try $ optional sp *> braced)
+ contseq = '\\':name
+ doraw = (rawBlock "latex" . (contseq ++) . snd) <$>
+ (getState >>= guard . stateParseRaw >> (withRaw optargs))
+
+blockCommands :: M.Map String (LP Blocks)
+blockCommands = M.fromList $
+ [ ("par", mempty <$ skipopts)
+ , ("title", mempty <$ (skipopts *> tok >>= addTitle))
+ , ("subtitle", mempty <$ (skipopts *> tok >>= addSubtitle))
+ , ("author", mempty <$ (skipopts *> authors))
+ -- -- in letter class, temp. store address & sig as title, author
+ , ("address", mempty <$ (skipopts *> tok >>= addTitle))
+ , ("signature", mempty <$ (skipopts *> authors))
+ , ("date", mempty <$ (skipopts *> tok >>= addDate))
+ -- sectioning
+ , ("chapter", updateState (\s -> s{ stateHasChapters = True }) *> section 0)
+ , ("section", section 1)
+ , ("subsection", section 2)
+ , ("subsubsection", section 3)
+ , ("paragraph", section 4)
+ , ("subparagraph", section 5)
+ -- beamer slides
+ , ("frametitle", section 3)
+ , ("framesubtitle", section 4)
+ -- letters
+ , ("opening", (para . trimInlines) <$> (skipopts *> tok))
+ , ("closing", skipopts *> closing)
+ --
+ , ("rule", skipopts *> tok *> tok *> pure horizontalRule)
+ , ("begin", mzero) -- these are here so they won't be interpreted as inline
+ , ("end", mzero)
+ , ("item", skipopts *> loose_item)
+ , ("documentclass", skipopts *> braced *> preamble)
+ ] ++ map ignoreBlocks
+ -- these commands will be ignored unless --parse-raw is specified,
+ -- in which case they will appear as raw latex blocks
+ [ "newcommand", "renewcommand", "newenvironment", "renewenvironment"
+ -- newcommand, etc. should be parsed by macro, but we need this
+ -- here so these aren't parsed as inline commands to ignore
+ , "special", "pdfannot", "pdfstringdef"
+ , "bibliography", "bibliographystyle"
+ , "maketitle", "makeindex", "makeglossary"
+ , "addcontentsline", "addtocontents", "addtocounter"
+ -- \ignore{} is used conventionally in literate haskell for definitions
+ -- that are to be processed by the compiler but not printed.
+ , "ignore"
+ , "hyperdef"
+ , "noindent"
+ , "markboth", "markright", "markleft"
+ , "hspace", "vspace"
+ ]
+
+addTitle :: Inlines -> LP ()
+addTitle tit = updateState (\s -> s{ stateTitle = toList tit })
+
+addSubtitle :: Inlines -> LP ()
+addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++
+ toList (str ":" <> linebreak <> tit) })
+
+authors :: LP ()
+authors = try $ do
char '{'
- string name
+ let oneAuthor = mconcat <$>
+ many1 (notFollowedBy' (controlSeq "and") >> inline)
+ auths <- sepBy oneAuthor (controlSeq "and")
char '}'
- return name
+ updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths })
+
+addDate :: Inlines -> LP ()
+addDate dat = updateState (\s -> s{ stateDate = toList dat })
+
+section :: Int -> LP Blocks
+section lvl = do
+ hasChapters <- stateHasChapters `fmap` getState
+ let lvl' = if hasChapters then lvl + 1 else lvl
+ skipopts
+ contents <- grouped inline
+ return $ header lvl' contents
+
+inlineCommand :: LP Inlines
+inlineCommand = try $ do
+ name <- anyControlSeq
+ guard $ not $ isBlockCommand name
+ parseRaw <- stateParseRaw `fmap` getState
+ star <- option "" (string "*")
+ let name' = name ++ star
+ case M.lookup name' inlineCommands of
+ Just p -> p
+ Nothing -> case M.lookup name inlineCommands of
+ Just p -> p
+ Nothing
+ | parseRaw ->
+ (rawInline "latex" . (('\\':name') ++)) <$>
+ (withRaw (skipopts *> many braced)
+ >>= applyMacros' . snd)
+ | otherwise -> return mempty
+
+isBlockCommand :: String -> Bool
+isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands
+
+inlineCommands :: M.Map String (LP Inlines)
+inlineCommands = M.fromList $
+ [ ("emph", emph <$> tok)
+ , ("textit", emph <$> tok)
+ , ("textsc", smallcaps <$> tok)
+ , ("sout", strikeout <$> tok)
+ , ("textsuperscript", superscript <$> tok)
+ , ("textsubscript", subscript <$> tok)
+ , ("textbackslash", lit "\\")
+ , ("backslash", lit "\\")
+ , ("textbf", strong <$> tok)
+ , ("ldots", lit "…")
+ , ("dots", lit "…")
+ , ("mdots", lit "…")
+ , ("sim", lit "~")
+ , ("label", inBrackets <$> tok)
+ , ("ref", inBrackets <$> tok)
+ , ("(", mathInline $ manyTill anyChar (try $ string "\\)"))
+ , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]"))
+ , ("ensuremath", mathInline $ braced)
+ , ("P", lit "¶")
+ , ("S", lit "§")
+ , ("$", lit "$")
+ , ("%", lit "%")
+ , ("&", lit "&")
+ , ("#", lit "#")
+ , ("_", lit "_")
+ , ("{", lit "{")
+ , ("}", lit "}")
+ -- old TeX commands
+ , ("em", emph <$> inlines)
+ , ("it", emph <$> inlines)
+ , ("sl", emph <$> inlines)
+ , ("bf", strong <$> inlines)
+ , ("rm", inlines)
+ , ("itshape", emph <$> inlines)
+ , ("slshape", emph <$> inlines)
+ , ("scshape", smallcaps <$> inlines)
+ , ("bfseries", strong <$> inlines)
+ , ("/", pure mempty) -- italic correction
+ , ("cc", lit "ç")
+ , ("cC", lit "Ç")
+ , ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("`", option (str "`") $ try $ tok >>= accent grave)
+ , ("'", option (str "'") $ try $ tok >>= accent acute)
+ , ("^", option (str "^") $ try $ tok >>= accent hat)
+ , ("~", option (str "~") $ try $ tok >>= accent circ)
+ , ("\"", option (str "\"") $ try $ tok >>= accent umlaut)
+ , (".", option (str ".") $ try $ tok >>= accent dot)
+ , ("=", option (str "=") $ try $ tok >>= accent macron)
+ , ("i", lit "i")
+ , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp))
+ , (",", pure mempty)
+ , ("@", pure mempty)
+ , (" ", lit "\160")
+ , ("ps", pure $ str "PS." <> space)
+ , ("TeX", lit "TeX")
+ , ("LaTeX", lit "LaTeX")
+ , ("bar", lit "|")
+ , ("textless", lit "<")
+ , ("textgreater", lit ">")
+ , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+ , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}')))
+ , ("verb", doverb)
+ , ("lstinline", doverb)
+ , ("texttt", (code . stringify . toList) <$> tok)
+ , ("url", (unescapeURL <$> braced) >>= \url ->
+ pure (link url "" (codeWith ("",["url"],[]) url)))
+ , ("href", (unescapeURL <$> braced <* optional sp) >>= \url ->
+ tok >>= \lab ->
+ pure (link url "" lab))
+ , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>=
+ (\src -> pure (image src "" (str "image"))))
+ , ("cite", citation "cite" NormalCitation False)
+ , ("citep", citation "citep" NormalCitation False)
+ , ("citep*", citation "citep*" NormalCitation False)
+ , ("citeal", citation "citeal" NormalCitation False)
+ , ("citealp", citation "citealp" NormalCitation False)
+ , ("citealp*", citation "citealp*" NormalCitation False)
+ , ("autocite", citation "autocite" NormalCitation False)
+ , ("footcite", citation "footcite" NormalCitation False)
+ , ("parencite", citation "parencite" NormalCitation False)
+ , ("supercite", citation "supercite" NormalCitation False)
+ , ("footcitetext", citation "footcitetext" NormalCitation False)
+ , ("citeyearpar", citation "citeyearpar" SuppressAuthor False)
+ , ("citeyear", citation "citeyear" SuppressAuthor False)
+ , ("autocite*", citation "autocite*" SuppressAuthor False)
+ , ("cite*", citation "cite*" SuppressAuthor False)
+ , ("parencite*", citation "parencite*" SuppressAuthor False)
+ , ("textcite", citation "textcite" AuthorInText False)
+ , ("citet", citation "citet" AuthorInText False)
+ , ("citet*", citation "citet*" AuthorInText False)
+ , ("citealt", citation "citealt" AuthorInText False)
+ , ("citealt*", citation "citealt*" AuthorInText False)
+ , ("textcites", citation "textcites" AuthorInText True)
+ , ("cites", citation "cites" NormalCitation True)
+ , ("autocites", citation "autocites" NormalCitation True)
+ , ("footcites", citation "footcites" NormalCitation True)
+ , ("parencites", citation "parencites" NormalCitation True)
+ , ("supercites", citation "supercites" NormalCitation True)
+ , ("footcitetexts", citation "footcitetexts" NormalCitation True)
+ , ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Footcite", citation "Footcite" NormalCitation False)
+ , ("Parencite", citation "Parencite" NormalCitation False)
+ , ("Supercite", citation "Supercite" NormalCitation False)
+ , ("Footcitetext", citation "Footcitetext" NormalCitation False)
+ , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False)
+ , ("Citeyear", citation "Citeyear" SuppressAuthor False)
+ , ("Autocite*", citation "Autocite*" SuppressAuthor False)
+ , ("Cite*", citation "Cite*" SuppressAuthor False)
+ , ("Parencite*", citation "Parencite*" SuppressAuthor False)
+ , ("Textcite", citation "Textcite" AuthorInText False)
+ , ("Textcites", citation "Textcites" AuthorInText True)
+ , ("Cites", citation "Cites" NormalCitation True)
+ , ("Autocites", citation "Autocites" NormalCitation True)
+ , ("Footcites", citation "Footcites" NormalCitation True)
+ , ("Parencites", citation "Parencites" NormalCitation True)
+ , ("Supercites", citation "Supercites" NormalCitation True)
+ , ("Footcitetexts", citation "Footcitetexts" NormalCitation True)
+ , ("citetext", complexNatbibCitation NormalCitation)
+ , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *>
+ complexNatbibCitation AuthorInText)
+ <|> citation "citeauthor" AuthorInText False)
+ ] ++ map ignoreInlines
+ -- these commands will be ignored unless --parse-raw is specified,
+ -- in which case they will appear as raw latex blocks:
+ [ "index", "nocite" ]
+
+unescapeURL :: String -> String
+unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
+ where isEscapable '%' = True
+ isEscapable '#' = True
+ isEscapable _ = False
+unescapeURL (x:xs) = x:unescapeURL xs
+unescapeURL [] = ""
+
+doverb :: LP Inlines
+doverb = do
+ marker <- anyChar
+ code <$> manyTill (satisfy (/='\n')) (char marker)
+
+doLHSverb :: LP Inlines
+doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|')
+
+lit :: String -> LP Inlines
+lit = pure . str
+
+accent :: (Char -> Char) -> Inlines -> LP Inlines
+accent f ils =
+ case toList ils of
+ (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys)
+ [] -> mzero
+ _ -> return ils
+
+grave :: Char -> Char
+grave 'A' = 'À'
+grave 'E' = 'È'
+grave 'I' = 'Ì'
+grave 'O' = 'Ò'
+grave 'U' = 'Ù'
+grave 'a' = 'à'
+grave 'e' = 'è'
+grave 'i' = 'ì'
+grave 'o' = 'ò'
+grave 'u' = 'ù'
+grave c = c
+
+acute :: Char -> Char
+acute 'A' = 'Á'
+acute 'E' = 'É'
+acute 'I' = 'Í'
+acute 'O' = 'Ó'
+acute 'U' = 'Ú'
+acute 'a' = 'á'
+acute 'e' = 'é'
+acute 'i' = 'í'
+acute 'o' = 'ó'
+acute 'u' = 'ú'
+acute c = c
+
+hat :: Char -> Char
+hat 'A' = 'Â'
+hat 'E' = 'Ê'
+hat 'I' = 'Î'
+hat 'O' = 'Ô'
+hat 'U' = 'Û'
+hat 'a' = 'ã'
+hat 'e' = 'ê'
+hat 'i' = 'î'
+hat 'o' = 'ô'
+hat 'u' = 'û'
+hat c = c
+
+circ :: Char -> Char
+circ 'A' = 'Ã'
+circ 'O' = 'Õ'
+circ 'o' = 'õ'
+circ 'N' = 'Ñ'
+circ 'n' = 'ñ'
+circ c = c
+
+umlaut :: Char -> Char
+umlaut 'A' = 'Ä'
+umlaut 'E' = 'Ë'
+umlaut 'I' = 'Ï'
+umlaut 'O' = 'Ö'
+umlaut 'U' = 'Ü'
+umlaut 'a' = 'ä'
+umlaut 'e' = 'ë'
+umlaut 'i' = 'ï'
+umlaut 'o' = 'ö'
+umlaut 'u' = 'ü'
+umlaut c = c
+
+dot :: Char -> Char
+dot 'C' = 'Ċ'
+dot 'c' = 'ċ'
+dot 'E' = 'Ė'
+dot 'e' = 'ė'
+dot 'G' = 'Ġ'
+dot 'g' = 'ġ'
+dot 'I' = 'İ'
+dot 'Z' = 'Ż'
+dot 'z' = 'ż'
+dot c = c
+
+macron :: Char -> Char
+macron 'A' = 'Ā'
+macron 'E' = 'Ē'
+macron 'I' = 'Ī'
+macron 'O' = 'Ō'
+macron 'U' = 'Ū'
+macron 'a' = 'ā'
+macron 'e' = 'ē'
+macron 'i' = 'ī'
+macron 'o' = 'ō'
+macron 'u' = 'ū'
+macron c = c
+
+tok :: LP Inlines
+tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar)
+
+opt :: LP Inlines
+opt = bracketed inline <* optional sp
+
+skipopts :: LP ()
+skipopts = skipMany opt
+
+inlineText :: LP Inlines
+inlineText = str <$> many1 inlineChar
+
+inlineChar :: LP Char
+inlineChar = satisfy $ \c ->
+ not (c == '\\' || c == '$' || c == '%' || c == '^' || c == '_' ||
+ c == '&' || c == '~' || c == '#' || c == '{' || c == '}' ||
+ c == '^' || c == '\'' || c == '`' || c == '-' || c == ']' ||
+ c == ' ' || c == '\t' || c == '\n' )
+
+environment :: LP Blocks
+environment = do
+ controlSeq "begin"
+ name <- braced
+ case M.lookup name environments of
+ Just p -> p <|> rawEnv name
+ Nothing -> rawEnv name
+
+rawEnv :: String -> LP Blocks
+rawEnv name = do
+ let addBegin x = "\\begin{" ++ name ++ "}" ++ x
+ parseRaw <- stateParseRaw `fmap` getState
+ if parseRaw
+ then (rawBlock "latex" . addBegin) <$>
+ (withRaw (env name blocks) >>= applyMacros' . snd)
+ else env name blocks
+
+-- | Replace "include" commands with file contents.
+handleIncludes :: String -> IO String
+handleIncludes [] = return []
+handleIncludes ('\\':xs) =
+ case runParser include defaultParserState "input" ('\\':xs) of
+ Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f)
+ (\_ -> return "")
+ yss <- mapM getfile fs
+ (intercalate "\n" yss ++) `fmap`
+ handleIncludes rest
+ _ -> case runParser (verbCmd <|> verbatimEnv) defaultParserState
+ "input" ('\\':xs) of
+ Right (r, rest) -> (r ++) `fmap` handleIncludes rest
+ _ -> ('\\':) `fmap` handleIncludes xs
+handleIncludes (x:xs) = (x:) `fmap` handleIncludes xs
+
+include :: LP ([FilePath], String)
+include = do
+ name <- controlSeq "include" <|> controlSeq "usepackage"
+ skipopts
+ fs <- (splitBy (==',')) <$> braced
+ rest <- getInput
+ let fs' = if name == "include"
+ then map (flip replaceExtension ".tex") fs
+ else map (flip replaceExtension ".sty") fs
+ return (fs', rest)
+
+verbCmd :: LP (String, String)
+verbCmd = do
+ (_,r) <- withRaw $ do
+ controlSeq "verb"
+ c <- anyChar
+ manyTill anyChar (char c)
+ rest <- getInput
+ return (r, rest)
+
+verbatimEnv :: LP (String, String)
+verbatimEnv = do
+ (_,r) <- withRaw $ do
+ controlSeq "begin"
+ name <- braced
+ guard $ name == "verbatim" || name == "Verbatim" ||
+ name == "lstlisting"
+ verbEnv name
+ rest <- getInput
+ return (r,rest)
--- | Returns a list of block elements containing the contents of an
--- environment.
-environment :: [Char] -> GenParser Char ParserState [Block]
-environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces
+rawLaTeXBlock :: GenParser Char ParserState String
+rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand)
-anyEnvironment :: GenParser Char ParserState Block
-anyEnvironment = try $ do
- string "\\begin"
- spaces
- char '{'
- name <- many letter
- star <- option "" (string "*") -- some environments have starred variants
- char '}'
- optional commandArgs
+rawLaTeXInline :: GenParser Char ParserState Inline
+rawLaTeXInline = do
+ (res, raw) <- withRaw inlineCommand
+ if res == mempty
+ then return (Str "")
+ else RawInline "latex" <$> (applyMacros' raw)
+
+environments :: M.Map String (LP Blocks)
+environments = M.fromList
+ [ ("document", env "document" blocks <* skipMany anyChar)
+ , ("letter", env "letter" letter_contents)
+ , ("center", env "center" blocks)
+ , ("tabular", env "tabular" simpTable)
+ , ("quote", blockQuote <$> env "quote" blocks)
+ , ("quotation", blockQuote <$> env "quotation" blocks)
+ , ("verse", blockQuote <$> env "verse" blocks)
+ , ("itemize", bulletList <$> listenv "itemize" (many item))
+ , ("description", definitionList <$> listenv "description" (many descItem))
+ , ("enumerate", ordered_list)
+ , ("code", failUnlessLHS *>
+ (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$>
+ verbEnv "code"))
+ , ("verbatim", codeBlock <$> (verbEnv "verbatim"))
+ , ("Verbatim", codeBlock <$> (verbEnv "Verbatim"))
+ , ("lstlisting", codeBlock <$> (verbEnv "listlisting"))
+ , ("displaymath", mathEnv Nothing "displaymath")
+ , ("equation", mathEnv Nothing "equation")
+ , ("equation*", mathEnv Nothing "equation*")
+ , ("gather", mathEnv (Just "gathered") "gather")
+ , ("gather*", mathEnv (Just "gathered") "gather*")
+ , ("multiline", mathEnv (Just "gathered") "multiline")
+ , ("multiline*", mathEnv (Just "gathered") "multiline*")
+ , ("eqnarray", mathEnv (Just "aligned*") "eqnarray")
+ , ("eqnarray*", mathEnv (Just "aligned*") "eqnarray*")
+ , ("align", mathEnv (Just "aligned*") "align")
+ , ("align*", mathEnv (Just "aligned*") "align*")
+ , ("alignat", mathEnv (Just "aligned*") "alignat")
+ , ("alignat*", mathEnv (Just "aligned*") "alignat*")
+ ]
+
+letter_contents :: LP Blocks
+letter_contents = do
+ bs <- blocks
+ st <- getState
+ -- add signature (author) and address (title)
+ let addr = case stateTitle st of
+ [] -> mempty
+ x -> para $ trimInlines $ fromList x
+ updateState $ \s -> s{ stateAuthors = [], stateTitle = [] }
+ return $ addr <> bs -- sig added by \closing
+
+closing :: LP Blocks
+closing = do
+ contents <- tok
+ st <- getState
+ let sigs = case stateAuthors st of
+ [] -> mempty
+ xs -> para $ trimInlines $ fromList
+ $ intercalate [LineBreak] xs
+ return $ para (trimInlines contents) <> sigs
+
+item :: LP Blocks
+item = blocks *> controlSeq "item" *> skipopts *> blocks
+
+loose_item :: LP Blocks
+loose_item = do
+ ctx <- stateParserContext `fmap` getState
+ if ctx == ListItemState
+ then mzero
+ else return mempty
+
+descItem :: LP (Inlines, [Blocks])
+descItem = do
+ blocks -- skip blocks before item
+ controlSeq "item"
+ optional sp
+ ils <- opt
+ bs <- blocks
+ return (ils, [bs])
+
+env :: String -> LP a -> LP a
+env name p = p <* (controlSeq "end" *> braced >>= guard . (== name))
+
+listenv :: String -> LP a -> LP a
+listenv name p = try $ do
+ oldCtx <- stateParserContext `fmap` getState
+ updateState $ \st -> st{ stateParserContext = ListItemState }
+ res <- env name p
+ updateState $ \st -> st{ stateParserContext = oldCtx }
+ return res
+
+mathEnv :: Maybe String -> String -> LP Blocks
+mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name)
+ where inner x = case innerEnv of
+ Nothing -> x
+ Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++
+ "\\end{" ++ y ++ "}"
+
+verbEnv :: String -> LP String
+verbEnv name = do
+ skipopts
+ optional blankline
+ let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name)
+ res <- manyTill anyChar endEnv
+ return $ stripTrailingNewlines res
+
+ordered_list :: LP Blocks
+ordered_list = do
+ optional sp
+ (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
+ try $ char '[' *> anyOrderedListMarker <* char ']'
spaces
- contents <- manyTill block (end (name ++ star))
+ optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced
spaces
- return $ BlockQuote contents
-
---
--- parsing documents
---
+ start <- option 1 $ try $ do controlSeq "setcounter"
+ grouped (string "enum" *> many1 (oneOf "iv"))
+ optional sp
+ num <- grouped (many1 digit)
+ spaces
+ return $ (read num + 1 :: Int)
+ bs <- listenv "enumerate" (many item)
+ return $ orderedListWith (start, style, delim) bs
+
+paragraph :: LP Blocks
+paragraph = do
+ x <- mconcat <$> many1 inline
+ if x == mempty
+ then return mempty
+ else return $ para $ trimInlines x
+
+preamble :: LP Blocks
+preamble = mempty <$> manyTill preambleBlock beginDoc
+ where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}"
+ preambleBlock = (mempty <$ comment)
+ <|> (mempty <$ sp)
+ <|> (mempty <$ blanklines)
+ <|> (mempty <$ macro)
+ <|> blockCommand
+ <|> (mempty <$ anyControlSeq)
+ <|> (mempty <$ braced)
+ <|> (mempty <$ anyChar)
+
+-------
+
+-- citations
--- | Process LaTeX preamble, extracting metadata.
-processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = do
- try $ string "\\documentclass"
- skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
--- | Parse LaTeX and return 'Pandoc'.
-parseLaTeX :: GenParser Char ParserState Pandoc
-parseLaTeX = do
- spaces
- skipMany $ comment >> spaces
- blocks <- try (processLaTeXPreamble >> environment "document")
- <|> (many block >>~ (spaces >> eof))
- state <- getState
- let blocks' = filter (/= Null) blocks
- let title' = stateTitle state
- let authors' = stateAuthors state
- let date' = stateDate state
- return $ Pandoc (Meta title' authors' date') blocks'
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> many block
-
-block :: GenParser Char ParserState Block
-block = choice [ hrule
- , codeBlock
- , header
- , list
- , blockQuote
- , simpleTable
- , commentBlock
- , macro
- , bibliographic
- , para
- , itemBlock
- , unknownEnvironment
- , ignore
- , unknownCommand
- ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = section <|> chapter
-
-chapter :: GenParser Char ParserState Block
-chapter = try $ do
- string "\\chapter"
- result <- headerWithLevel 1
- updateState $ \s -> s{ stateHasChapters = True }
- return result
-
-section :: GenParser Char ParserState Block
-section = try $ do
- char '\\'
- subs <- many (try (string "sub"))
- base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4)
- st <- getState
- let lev = if stateHasChapters st
- then length subs + base + 1
- else length subs + base
- headerWithLevel lev
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) =
+ let k = last ks
+ s' = case s of
+ (Str (c:_):_)
+ | not (isPunctuation c || isSpace c) -> Str "," : Space : s
+ _ -> s
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s'}]
+addSuffix _ _ = []
-headerWithLevel :: Int -> GenParser Char ParserState Block
-headerWithLevel lev = try $ do
- spaces
- optional (char '*')
- spaces
- optional $ bracketedText '[' ']' -- alt title
- spaces
+simpleCiteArgs :: LP [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ toList <$> opt
+ second <- optionMaybe $ toList <$> opt
char '{'
- title' <- manyTill inline (char '}')
- spaces
- return $ Header lev (normalizeSpaces title')
-
---
--- hrule block
---
+ keys <- manyTill citationLabel (char '}')
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> (mempty, s )
+ (Just s , Just t ) -> (s , t )
+ _ -> (mempty, mempty)
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
-hrule :: GenParser Char st Block
-hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
- "\\newpage" ] >> spaces >> return HorizontalRule
+citationLabel :: LP String
+citationLabel = trim <$>
+ (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp)
--- tables
+cites :: CitationMode -> Bool -> LP [Citation]
+cites mode multi = try $ do
+ cits <- if multi
+ then many1 simpleCiteArgs
+ else count 1 simpleCiteArgs
+ let (c:cs) = concat cits
+ return $ case mode of
+ AuthorInText -> c {citationMode = mode} : cs
+ _ -> map (\a -> a {citationMode = mode}) (c:cs)
-simpleTable :: GenParser Char ParserState Block
-simpleTable = try $ do
- string "\\begin"
- spaces
- string "{tabular}"
- spaces
- aligns <- parseAligns
- let cols = length aligns
- optional hline
- header' <- option [] $ parseTableHeader cols
- rows <- many (parseTableRow cols >>~ optional hline)
- spaces
- end "tabular"
- spaces
- let header'' = if null header'
- then replicate cols []
- else header'
- return $ Table [] aligns (replicate cols 0) header'' rows
+citation :: String -> CitationMode -> Bool -> LP Inlines
+citation name mode multi = do
+ (c,raw) <- withRaw $ cites mode multi
+ return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw)
+
+complexNatbibCitation :: CitationMode -> LP Inlines
+complexNatbibCitation mode = try $ do
+ let ils = (toList . trimInlines . mconcat) <$>
+ many (notFollowedBy (oneOf "\\};") >> inline)
+ let parseOne = try $ do
+ skipSpaces
+ pref <- ils
+ cit' <- inline -- expect a citation
+ let citlist = toList cit'
+ cits' <- case citlist of
+ [Cite cs _] -> return cs
+ _ -> mzero
+ suff <- ils
+ skipSpaces
+ optional $ char ';'
+ return $ addPrefix pref $ addSuffix suff $ cits'
+ (c:cits, raw) <- withRaw $ grouped parseOne
+ return $ cite (c{ citationMode = mode }:cits)
+ (rawInline "latex" $ "\\citetext" ++ raw)
-hline :: GenParser Char st ()
-hline = try $ spaces >> string "\\hline" >> return ()
+-- tables
-parseAligns :: GenParser Char ParserState [Alignment]
+parseAligns :: LP [Alignment]
parseAligns = try $ do
char '{'
optional $ char '|'
let cAlign = char 'c' >> return AlignCenter
let lAlign = char 'l' >> return AlignLeft
let rAlign = char 'r' >> return AlignRight
- let alignChar = cAlign <|> lAlign <|> rAlign
+ let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign)
aligns' <- sepEndBy alignChar (optional $ char '|')
+ spaces
char '}'
spaces
return aligns'
-parseTableHeader :: Int -- ^ number of columns
- -> GenParser Char ParserState [TableCell]
-parseTableHeader cols = try $ do
- cells' <- parseTableRow cols
- hline
- return cells'
+hline :: LP ()
+hline = () <$ (try $ spaces >> controlSeq "hline")
parseTableRow :: Int -- ^ number of columns
- -> GenParser Char ParserState [TableCell]
+ -> LP [Blocks]
parseTableRow cols = try $ do
- let tableCellInline = notFollowedBy (char '&' <|>
- (try $ char '\\' >> char '\\')) >> inline
- cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces)
- (many tableCellInline)) (char '&')
+ let amp = try $ spaces *> string "&"
+ let tableCellInline = notFollowedBy (amp <|> controlSeq "\\") >> inline
+ cells' <- sepBy ((plain . trimInlines . mconcat) <$> many tableCellInline) amp
guard $ length cells' == cols
spaces
- (try $ string "\\\\" >> spaces) <|>
- (lookAhead (end "tabular") >> return ())
- return cells'
-
---
--- code blocks
---
-
-codeBlock :: GenParser Char ParserState Block
-codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock
--- Note: Verbatim is from fancyvrb.
-
-codeBlockWith :: String -> GenParser Char st Block
-codeBlockWith env = try $ do
- string "\\begin"
- spaces -- don't use begin function because it
- string $ "{" ++ env ++ "}" -- gobbles whitespace; we want to gobble
- optional blanklines -- blank lines, but not leading space
- contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
- spaces
- let classes = if env == "code" then ["haskell"] else []
- return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents)
-
-lhsCodeBlock :: GenParser Char ParserState Block
-lhsCodeBlock = do
- failUnlessLHS
- (CodeBlock (_,_,_) cont) <- codeBlockWith "code"
- return $ CodeBlock ("", ["sourceCode","literate","haskell"], []) cont
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>=
- return . BlockQuote
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = bulletList <|> orderedList <|> definitionList <?> "list"
-
-listItem :: GenParser Char ParserState ([Inline], [Block])
-listItem = try $ do
- ("item", _, args) <- command
- spaces
- state <- getState
- let oldParserContext = stateParserContext state
- updateState (\s -> s {stateParserContext = ListItemState})
- blocks <- many block
- updateState (\s -> s {stateParserContext = oldParserContext})
- opt <- case args of
- ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x ->
- parseFromString (many inline) $ tail $ init x
- _ -> return []
- return (opt, blocks)
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
- string "\\begin"
- spaces
- string "{enumerate}"
- spaces
- (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
- try $ do failIfStrict
- char '['
- res <- anyOrderedListMarker
- char ']'
- return res
- spaces
- option "" $ try $ do string "\\setlength{\\itemindent}"
- char '{'
- manyTill anyChar (char '}')
- spaces
- start <- option 1 $ try $ do failIfStrict
- string "\\setcounter{enum"
- many1 (oneOf "iv")
- string "}{"
- num <- many1 digit
- char '}'
- spaces
- return $ (read num) + 1
- items <- many listItem
- end "enumerate"
- spaces
- return $ OrderedList (start, style, delim) $ map snd items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
- begin "itemize"
- items <- many listItem
- end "itemize"
- spaces
- return (BulletList $ map snd items)
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
- begin "description"
- items <- many listItem
- end "description"
- spaces
- return $ DefinitionList $ map (\(t,d) -> (t,[d])) items
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = do
- res <- many1 inline
- spaces
- return $ if null (filter (`notElem` [Str "", Space]) res)
- then Null
- else Para $ normalizeSpaces res
-
---
--- title authors date
---
-
-bibliographic :: GenParser Char ParserState Block
-bibliographic = choice [ maketitle, title, subtitle, authors, date ]
-
-maketitle :: GenParser Char st Block
-maketitle = try (string "\\maketitle") >> spaces >> return Null
-
-title :: GenParser Char ParserState Block
-title = try $ do
- string "\\title{"
- tit <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateTitle = tit })
- return Null
-
-subtitle :: GenParser Char ParserState Block
-subtitle = try $ do
- string "\\subtitle{"
- tit <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateTitle = stateTitle state ++
- Str ":" : LineBreak : tit })
- return Null
-
-authors :: GenParser Char ParserState Block
-authors = try $ do
- string "\\author{"
- let andsep = try $ string "\\and" >> notFollowedBy letter >>
- spaces >> return '&'
- raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep
- let authors' = map normalizeSpaces raw
- char '}'
+ optional $ controlSeq "\\"
spaces
- updateState (\s -> s { stateAuthors = authors' })
- return Null
-
-date :: GenParser Char ParserState Block
-date = try $ do
- string "\\date{"
- date' <- manyTill inline (char '}')
- spaces
- updateState (\state -> state { stateDate = normalizeSpaces date' })
- return Null
-
---
--- item block
--- for use in unknown environments that aren't being parsed as raw latex
---
-
--- this forces items to be parsed in different blocks
-itemBlock :: GenParser Char ParserState Block
-itemBlock = try $ do
- ("item", _, args) <- command
- state <- getState
- if stateParserContext state == ListItemState
- then fail "item should be handled by list block"
- else if null args
- then return Null
- else return $ Plain [Str (stripFirstAndLast (head args))]
-
---
--- raw LaTeX
---
-
--- | Parse any LaTeX environment and return a Para block containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment :: GenParser Char st Block
-rawLaTeXEnvironment = do
- contents <- rawLaTeXEnvironment'
- spaces
- return $ RawBlock "latex" contents
+ return cells'
--- | Parse any LaTeX environment and return a string containing
--- the whole literal environment as raw TeX.
-rawLaTeXEnvironment' :: GenParser Char st String
-rawLaTeXEnvironment' = try $ do
- string "\\begin"
- spaces
- char '{'
- name <- many1 letter
- star <- option "" (string "*") -- for starred variants
- let name' = name ++ star
- char '}'
- args <- option [] commandArgs
- let argStr = concat args
- contents <- manyTill (choice [ (many1 (noneOf "\\")),
- rawLaTeXEnvironment',
- string "\\" ])
- (end name')
- return $ "\\begin{" ++ name' ++ "}" ++ argStr ++
- concat contents ++ "\\end{" ++ name' ++ "}"
-
-unknownEnvironment :: GenParser Char ParserState Block
-unknownEnvironment = try $ do
- state <- getState
- result <- if stateParseRaw state -- check whether we should include raw TeX
- then rawLaTeXEnvironment -- if so, get whole raw environment
- else anyEnvironment -- otherwise just the contents
- return result
-
--- \ignore{} is used conventionally in literate haskell for definitions
--- that are to be processed by the compiler but not printed.
-ignore :: GenParser Char ParserState Block
-ignore = try $ do
- ("ignore", _, _) <- command
- spaces
- return Null
-
-demacro :: (String, String, [String]) -> GenParser Char ParserState Inline
-demacro (n,st,args) = try $ do
- let raw = "\\" ++ n ++ st ++ concat args
- s' <- applyMacros' raw
- if raw == s'
- then return $ RawInline "latex" raw
- else do
- inp <- getInput
- setInput $ s' ++ inp
- return $ Str ""
-
-unknownCommand :: GenParser Char ParserState Block
-unknownCommand = try $ do
- spaces
- notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"]
- state <- getState
- when (stateParserContext state == ListItemState) $
- notFollowedBy' (string "\\item")
- if stateParseRaw state
- then command >>= demacro >>= return . Plain . (:[])
- else do
- (name, _, args) <- command
- spaces
- unless (name `elem` commandsToIgnore) $ do
- -- put arguments back in input to be parsed
- inp <- getInput
- setInput $ intercalate " " args ++ inp
- return Null
-
-commandsToIgnore :: [String]
-commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"]
-
-skipChar :: GenParser Char ParserState Block
-skipChar = do
- satisfy (/='\\') <|>
- (notFollowedBy' (try $
- string "\\begin" >> spaces >> string "{document}") >>
- anyChar)
+simpTable :: LP Blocks
+simpTable = try $ do
spaces
- return Null
-
-commentBlock :: GenParser Char st Block
-commentBlock = many1 (comment >> spaces) >> return Null
-
---
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline = choice [ str
- , endline
- , whitespace
- , quoted
- , apostrophe
- , strong
- , math
- , ellipses
- , emDash
- , enDash
- , hyphen
- , emph
- , strikeout
- , superscript
- , subscript
- , code
- , url
- , link
- , image
- , footnote
- , linebreak
- , accentedChar
- , nonbreakingSpace
- , cite
- , specialChar
- , ensureMath
- , rawLaTeXInline'
- , escapedChar
- , unescapedChar
- , comment
- ] <?> "inline"
-
-
--- latex comment
-comment :: GenParser Char st Inline
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "")
-
-accentedChar :: GenParser Char st Inline
-accentedChar = normalAccentedChar <|> specialAccentedChar
-
-normalAccentedChar :: GenParser Char st Inline
-normalAccentedChar = try $ do
- char '\\'
- accent <- oneOf "'`^\"~"
- character <- (try $ char '{' >> letter >>~ char '}') <|> letter
- let table = fromMaybe [] $ lookup character accentTable
- let result = case lookup accent table of
- Just num -> chr num
- Nothing -> '?'
- return $ Str [result]
-
--- an association list of letters and association list of accents
--- and decimal character numbers.
-accentTable :: [(Char, [(Char, Int)])]
-accentTable =
- [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]),
- ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]),
- ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]),
- ('N', [('~', 209)]),
- ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]),
- ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]),
- ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]),
- ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]),
- ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]),
- ('n', [('~', 241)]),
- ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]),
- ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
-
-specialAccentedChar :: GenParser Char st Inline
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash,
- oslash, pound, euro, copyright, sect ]
-
-ccedil :: GenParser Char st Inline
-ccedil = try $ do
- char '\\'
- letter' <- oneOfStrings ["cc", "cC"]
- let num = if letter' == "cc" then 231 else 199
- return $ Str [chr num]
-
-aring :: GenParser Char st Inline
-aring = try $ do
- char '\\'
- letter' <- oneOfStrings ["aa", "AA"]
- let num = if letter' == "aa" then 229 else 197
- return $ Str [chr num]
-
-iuml :: GenParser Char st Inline
-iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >>
- return (Str [chr 239])
-
-szlig :: GenParser Char st Inline
-szlig = try (string "\\ss") >> return (Str [chr 223])
-
-oslash :: GenParser Char st Inline
-oslash = try $ do
- char '\\'
- letter' <- choice [char 'o', char 'O']
- let num = if letter' == 'o' then 248 else 216
- return $ Str [chr num]
-
-lslash :: GenParser Char st Inline
-lslash = try $ do
- cmd <- oneOfStrings ["{\\L}","{\\l}","\\L ","\\l "]
- return $ if 'l' `elem` cmd
- then Str "\x142"
- else Str "\x141"
-
-aelig :: GenParser Char st Inline
-aelig = try $ do
- char '\\'
- letter' <- oneOfStrings ["ae", "AE"]
- let num = if letter' == "ae" then 230 else 198
- return $ Str [chr num]
-
-pound :: GenParser Char st Inline
-pound = try (string "\\pounds") >> return (Str [chr 163])
-
-euro :: GenParser Char st Inline
-euro = try (string "\\euro") >> return (Str [chr 8364])
-
-copyright :: GenParser Char st Inline
-copyright = try (string "\\copyright") >> return (Str [chr 169])
-
-sect :: GenParser Char st Inline
-sect = try (string "\\S") >> return (Str [chr 167])
-
-escapedChar :: GenParser Char st Inline
-escapedChar = do
- result <- escaped (oneOf specialChars)
- return $ if result == Str "\n" then Str " " else result
-
--- nonescaped special characters
-unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c])
-
-specialChar :: GenParser Char st Inline
-specialChar = choice [ spacer, interwordSpace,
- backslash, tilde, caret,
- bar, lt, gt, doubleQuote ]
-
-spacer :: GenParser Char st Inline
-spacer = try (string "\\,") >> return (Str "")
-
-interwordSpace :: GenParser Char st Inline
-interwordSpace = try (string "\\ ") >> return (Str "\160")
-
-backslash :: GenParser Char st Inline
-backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\")
-
-tilde :: GenParser Char st Inline
-tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~")
-
-caret :: GenParser Char st Inline
-caret = try (string "\\^{}") >> return (Str "^")
-
-bar :: GenParser Char st Inline
-bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\")
-
-lt :: GenParser Char st Inline
-lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<")
-
-gt :: GenParser Char st Inline
-gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">")
-
-doubleQuote :: GenParser Char st Inline
-doubleQuote = char '"' >> return (Str "\"")
-
-code :: GenParser Char ParserState Inline
-code = code1 <|> code2 <|> code3 <|> lhsInlineCode
-
-code1 :: GenParser Char st Inline
-code1 = try $ do
- string "\\verb"
- marker <- anyChar
- result <- manyTill anyChar (char marker)
- return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-code2 :: GenParser Char st Inline
-code2 = try $ do
- string "\\texttt{"
- result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
- return $ Code nullAttr result
-
-code3 :: GenParser Char st Inline
-code3 = try $ do
- string "\\lstinline"
- marker <- anyChar
- result <- manyTill anyChar (char marker)
- return $ Code nullAttr $ removeLeadingTrailingSpace result
-
-lhsInlineCode :: GenParser Char ParserState Inline
-lhsInlineCode = try $ do
- failUnlessLHS
- char '|'
- result <- manyTill (noneOf "|\n") (char '|')
- return $ Code ("",["haskell"],[]) result
-
-emph :: GenParser Char ParserState Inline
-emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
- manyTill inline (char '}') >>= return . Emph
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>=
- return . Strikeout
-
-superscript :: GenParser Char ParserState Inline
-superscript = try $ string "\\textsuperscript{" >>
- manyTill inline (char '}') >>= return . Superscript
-
--- note: \textsubscript isn't a standard latex command, but we use
--- a defined version in pandoc.
-subscript :: GenParser Char ParserState Inline
-subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>=
- return . Subscript
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = char '\'' >> return Apostrophe
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-singleQuoteStart :: GenParser Char st Char
-singleQuoteStart = char '`'
-
-singleQuoteEnd :: GenParser Char st ()
-singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum
-
-doubleQuoteStart :: CharParser st String
-doubleQuoteStart = string "``"
-
-doubleQuoteEnd :: CharParser st String
-doubleQuoteEnd = try $ string "''"
-
-ellipses :: GenParser Char st Inline
-ellipses = try $ do
- char '\\'
- optional $ char 'l'
- string "dots"
- optional $ try $ string "{}"
- return Ellipses
-
-enDash :: GenParser Char st Inline
-enDash = try (string "--") >> return EnDash
-
-emDash :: GenParser Char st Inline
-emDash = try (string "---") >> return EmDash
-
-hyphen :: GenParser Char st Inline
-hyphen = char '-' >> return (Str "-")
-
-strong :: GenParser Char ParserState Inline
-strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
- return . Strong
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 (oneOf " \t") >> return Space
-
-nonbreakingSpace :: GenParser Char st Inline
-nonbreakingSpace = char '~' >> return (Str "\160")
-
--- hard line break
-linebreak :: GenParser Char st Inline
-linebreak = try $ do
- string "\\\\"
- optional $ bracketedText '[' ']' -- e.g. \\[10pt]
+ aligns <- parseAligns
+ let cols = length aligns
+ optional hline
+ header' <- option [] $ try (parseTableRow cols <* hline)
+ rows <- many (parseTableRow cols <* optional hline)
spaces
- return LineBreak
-
-str :: GenParser Char st Inline
-str = many1 (noneOf specialChars) >>= return . Str
-
--- endline internal to paragraph
-endline :: GenParser Char st Inline
-endline = try $ newline >> notFollowedBy blankline >> return Space
-
--- math
-math :: GenParser Char ParserState Inline
-math = (math3 >>= applyMacros' >>= return . Math DisplayMath)
- <|> (math1 >>= applyMacros' >>= return . Math InlineMath)
- <|> (math2 >>= applyMacros' >>= return . Math InlineMath)
- <|> (math4 >>= applyMacros' >>= return . Math DisplayMath)
- <|> (math5 >>= applyMacros' >>= return . Math DisplayMath)
- <|> (math6 >>= applyMacros' >>= return . Math DisplayMath)
- <?> "math"
-
-math1 :: GenParser Char st String
-math1 = try $ char '$' >> manyTill anyChar (char '$')
-
-math2 :: GenParser Char st String
-math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)")
-
-math3 :: GenParser Char st String
-math3 = try $ char '$' >> math1 >>~ char '$'
-
-math4 :: GenParser Char st String
-math4 = try $ do
- name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|>
- begin "gather" <|> begin "gather*" <|> begin "gathered" <|>
- begin "multline" <|> begin "multline*"
- manyTill anyChar (end name)
-
-math5 :: GenParser Char st String
-math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]")
-
-math6 :: GenParser Char st String
-math6 = try $ do
- name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|>
- begin "align*" <|> begin "alignat" <|> begin "alignat*" <|>
- begin "split" <|> begin "aligned" <|> begin "alignedat"
- res <- manyTill anyChar (end name)
- return $ filter (/= '&') res -- remove alignment codes
-
-ensureMath :: GenParser Char st Inline
-ensureMath = try $ do
- (n, _, args) <- command
- guard $ n == "ensuremath" && not (null args)
- return $ Math InlineMath $ tail $ init $ head args
-
---
--- links and images
---
-
-url :: GenParser Char ParserState Inline
-url = try $ do
- string "\\url"
- url' <- charsInBalanced '{' '}'
- return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "")
-
-link :: GenParser Char ParserState Inline
-link = try $ do
- string "\\href{"
- url' <- manyTill anyChar (char '}')
- char '{'
- label' <- manyTill inline (char '}')
- return $ Link (normalizeSpaces label') (escapeURI url', "")
-
-image :: GenParser Char ParserState Inline
-image = try $ do
- ("includegraphics", _, args) <- command
- let args' = filter isArg args -- filter out options
- let (src,tit) = case args' of
- [] -> ("", "")
- (x:_) -> (stripFirstAndLast x, "")
- return $ Image [Str "image"] (escapeURI src, tit)
-
-footnote :: GenParser Char ParserState Inline
-footnote = try $ do
- (name, _, (contents:[])) <- command
- if ((name == "footnote") || (name == "thanks"))
- then string ""
- else fail "not a footnote or thanks command"
- let contents' = stripFirstAndLast contents
- -- parse the extracted block, which may contain various block elements:
- rest <- getInput
- setInput $ contents'
- blocks <- parseBlocks
- setInput rest
- return $ Note blocks
-
--- | citations
-cite :: GenParser Char ParserState Inline
-cite = simpleCite <|> complexNatbibCites
-
-simpleCiteArgs :: GenParser Char ParserState [Citation]
-simpleCiteArgs = try $ do
- first <- optionMaybe $ (char '[') >> manyTill inline (char ']')
- second <- optionMaybe $ (char '[') >> manyTill inline (char ']')
- char '{'
- keys <- many1Till citationLabel (char '}')
- let (pre, suf) = case (first , second ) of
- (Just s , Nothing) -> ([], s )
- (Just s , Just t ) -> (s , t )
- _ -> ([], [])
- conv k = Citation { citationId = k
- , citationPrefix = []
- , citationSuffix = []
- , citationMode = NormalCitation
- , citationHash = 0
- , citationNoteNum = 0
- }
- return $ addPrefix pre $ addSuffix suf $ map conv keys
-
-
-simpleCite :: GenParser Char ParserState Inline
-simpleCite = try $ do
- char '\\'
- let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]]
- ++ ["footcitetext"]
- normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]]
- ++ biblatex
- supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"]
- intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]]
- mintext = ["textcites"]
- mnormal = map (++ "s") biblatex
- cmdend = notFollowedBy (letter <|> char '*')
- capit [] = []
- capit (x:xs) = toUpper x : xs
- addUpper xs = xs ++ map capit xs
- toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t
- (mode, multi) <- toparser normal (NormalCitation, False)
- <|> toparser supress (SuppressAuthor, False)
- <|> toparser intext (AuthorInText , False)
- <|> toparser mnormal (NormalCitation, True )
- <|> toparser mintext (AuthorInText , True )
- cits <- if multi then
- many1 simpleCiteArgs
- else
- simpleCiteArgs >>= \c -> return [c]
- let (c:cs) = concat cits
- cits' = case mode of
- AuthorInText -> c {citationMode = mode} : cs
- _ -> map (\a -> a {citationMode = mode}) (c:cs)
- return $ Cite cits' []
-
-complexNatbibCites :: GenParser Char ParserState Inline
-complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical
-
-complexNatbibTextual :: GenParser Char ParserState Inline
-complexNatbibTextual = try $ do
- string "\\citeauthor{"
- manyTill (noneOf "}") (char '}')
- skipSpaces
- Cite (c:cs) _ <- complexNatbibParenthetical
- return $ Cite (c {citationMode = AuthorInText} : cs) []
-
-
-complexNatbibParenthetical :: GenParser Char ParserState Inline
-complexNatbibParenthetical = try $ do
- string "\\citetext{"
- cits <- many1Till parseOne (char '}')
- return $ Cite (concat cits) []
- where
- parseOne = do
- skipSpaces
- pref <- many (notFollowedBy (oneOf "\\}") >> inline)
- (Cite cites _) <- simpleCite
- suff <- many (notFollowedBy (oneOf "\\};") >> inline)
- skipSpaces
- optional $ char ';'
- return $ addPrefix pref $ addSuffix suff $ cites
-
-addPrefix :: [Inline] -> [Citation] -> [Citation]
-addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
-addPrefix _ _ = []
-
-addSuffix :: [Inline] -> [Citation] -> [Citation]
-addSuffix s ks@(_:_) = let k = last ks
- in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
-addSuffix _ _ = []
-
-citationLabel :: GenParser Char ParserState String
-citationLabel = do
- res <- many1 $ noneOf ",}"
- optional $ char ','
- return $ removeLeadingTrailingSpace res
-
--- | Parse any LaTeX inline command and return it in a raw TeX inline element.
-rawLaTeXInline' :: GenParser Char ParserState Inline
-rawLaTeXInline' = do
- notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore",
- "\\section"]
- rawLaTeXInline
+ let header'' = if null header'
+ then replicate cols mempty
+ else header'
+ lookAhead $ controlSeq "end" -- make sure we're at end
+ return $ table mempty (zip aligns (repeat 0)) header'' rows
--- | Parse any LaTeX command and return it in a raw TeX inline element.
-rawLaTeXInline :: GenParser Char ParserState Inline
-rawLaTeXInline = try $ do
- state <- getState
- if stateParseRaw state
- then command >>= demacro
- else do
- (name,st,args) <- command
- x <- demacro (name,st,args)
- unless (x == Str "" || name `elem` commandsToIgnore) $ do
- inp <- getInput
- setInput $ intercalate " " args ++ inp
- return $ Str ""
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 53531dc1a..666265935 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -39,12 +39,12 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
+import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
+import Text.Pandoc.XML ( fromEntities )
import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM, guard)
+import Control.Monad (when, liftM, guard, mzero)
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
@@ -108,6 +108,11 @@ atMostSpaces :: Int -> GenParser Char ParserState ()
atMostSpaces 0 = notFollowedBy (char ' ')
atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return ()
+litChar :: GenParser Char ParserState Char
+litChar = escapedChar'
+ <|> noneOf "\n"
+ <|> (newline >> notFollowedBy blankline >> return ' ')
+
-- | Fail unless we're at beginning of a line.
failUnlessBeginningOfLine :: GenParser tok st ()
failUnlessBeginningOfLine = do
@@ -212,16 +217,15 @@ referenceKey = try $ do
lab <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let nl = char '\n' >> notFollowedBy blankline >> return ' '
let sourceURL = liftM unwords $ many $ try $ do
notFollowedBy' referenceTitle
skipMany spaceChar
- optional nl
+ optional $ newline >> notFollowedBy blankline
skipMany spaceChar
notFollowedBy' reference
- many1 (satisfy $ not . isBlank)
+ many1 $ escapedChar' <|> satisfy (not . isBlank)
let betweenAngles = try $ char '<' >>
- manyTill (noneOf ">\n" <|> nl) (char '>')
+ manyTill (escapedChar' <|> litChar) (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
blanklines
@@ -233,14 +237,14 @@ referenceKey = try $ do
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
-referenceTitle :: GenParser Char st String
-referenceTitle = try $ do
+referenceTitle :: GenParser Char ParserState String
+referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
- tit <- (charsInBalanced '(' ')' >>= return . unwords . words)
+ tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words)
<|> do delim <- char '\'' <|> char '"'
- manyTill anyChar (try (char delim >> skipSpaces >>
+ manyTill litChar (try (char delim >> skipSpaces >>
notFollowedBy (noneOf ")\n")))
- return $ decodeCharacterReferences tit
+ return $ fromEntities tit
noteMarker :: GenParser Char ParserState [Char]
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
@@ -367,32 +371,37 @@ hrule = try $ do
indentedLine :: GenParser Char ParserState [Char]
indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n")
-codeBlockDelimiter :: Maybe Int
- -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])]))
-codeBlockDelimiter len = try $ do
+blockDelimiter :: (Char -> Bool)
+ -> Maybe Int
+ -> GenParser Char st (Int, (String, [String], [(String, String)]), Char)
+blockDelimiter f len = try $ do
+ c <- lookAhead (satisfy f)
size <- case len of
- Just l -> count l (char '~') >> many (char '~') >> return l
- Nothing -> count 3 (char '~') >> many (char '~') >>=
- return . (+ 3) . length
+ Just l -> count l (char c) >> many (char c) >> return l
+ Nothing -> count 3 (char c) >> many (char c) >>=
+ return . (+ 3) . length
many spaceChar
- attr <- option ([],[],[]) attributes
+ attr <- option ([],[],[])
+ $ attributes -- ~~~ {.ruby}
+ <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby
blankline
- return (size, attr)
+ return (size, attr, c)
attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
attributes = try $ do
char '{'
- many spaceChar
- attrs <- many (attribute >>~ many spaceChar)
+ spnl
+ attrs <- many (attribute >>~ spnl)
char '}'
let (ids, classes, keyvals) = unzip3 attrs
- let id' = if null ids then "" else head ids
- return (id', concat classes, concat keyvals)
+ let firstNonNull [] = ""
+ firstNonNull (x:xs) | not (null x) = x
+ | otherwise = firstNonNull xs
+ return (firstNonNull $ reverse ids, concat classes, concat keyvals)
attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
attribute = identifierAttr <|> classAttr <|> keyValAttr
-
identifier :: GenParser Char st [Char]
identifier = do
first <- letter
@@ -415,14 +424,15 @@ keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])])
keyValAttr = try $ do
key <- identifier
char '='
- char '"'
- val <- manyTill (satisfy (/='\n')) (char '"')
+ val <- enclosed (char '"') (char '"') anyChar
+ <|> enclosed (char '\'') (char '\'') anyChar
+ <|> many nonspaceChar
return ("",[],[(key,val)])
codeBlockDelimited :: GenParser Char st Block
codeBlockDelimited = try $ do
- (size, attr) <- codeBlockDelimiter Nothing
- contents <- manyTill anyLine (codeBlockDelimiter (Just size))
+ (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing
+ contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
return $ CodeBlock attr $ intercalate "\n" contents
@@ -552,9 +562,9 @@ listLine = try $ do
return $ concat chunks ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
-rawListItem :: GenParser Char ParserState [Char]
-rawListItem = try $ do
- listStart
+rawListItem :: GenParser Char ParserState a -> GenParser Char ParserState [Char]
+rawListItem start = try $ do
+ start
result <- many1 listLine
blanks <- many blankline
return $ concat result ++ blanks
@@ -577,9 +587,9 @@ listContinuationLine = try $ do
result <- manyTill anyChar newline
return $ result ++ "\n"
-listItem :: GenParser Char ParserState [Block]
-listItem = try $ do
- first <- rawListItem
+listItem :: GenParser Char ParserState a -> GenParser Char ParserState [Block]
+listItem start = try $ do
+ first <- rawListItem start
continuations <- many listContinuation
-- parsing with ListItemState forces markers at beginning of lines to
-- count as list item markers, even if not separated by blank space.
@@ -596,13 +606,15 @@ listItem = try $ do
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
(start, style, delim) <- lookAhead anyOrderedListStart
- items <- many1 listItem
+ items <- many1 $ listItem $ try $
+ do optional newline -- if preceded by a Plain block in a list context
+ skipNonindentSpaces
+ orderedListMarker style delim
return $ OrderedList (start, style, delim) $ compactify items
bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
- lookAhead bulletListStart
- many1 listItem >>= return . BulletList . compactify
+bulletList =
+ many1 (listItem bulletListStart) >>= return . BulletList . compactify
-- definition lists
@@ -718,8 +730,8 @@ rawVerbatimBlock = try $ do
rawTeXBlock :: GenParser Char ParserState Block
rawTeXBlock = do
failIfStrict
- result <- liftM (RawBlock "latex") rawLaTeXEnvironment'
- <|> liftM (RawBlock "context") rawConTeXtEnvironment'
+ result <- liftM (RawBlock "latex") rawLaTeXBlock
+ <|> liftM (RawBlock "context") rawConTeXtEnvironment
spaces
return result
@@ -767,7 +779,7 @@ simpleTableHeader headless = try $ do
let (lengths, lines') = unzip dashes
let indices = scanl (+) (length initSp) lines'
-- If no header, calculate alignment on basis of first row of text
- rawHeads <- liftM (tail . splitByIndices (init indices)) $
+ rawHeads <- liftM (tail . splitStringByIndices (init indices)) $
if headless
then lookAhead anyLine
else return rawContent
@@ -794,7 +806,7 @@ rawTableLine indices = do
notFollowedBy' (blanklines <|> tableFooter)
line <- many1Till anyChar newline
return $ map removeLeadingTrailingSpace $ tail $
- splitByIndices (init indices) line
+ splitStringByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: [Int]
@@ -844,7 +856,7 @@ multilineTableHeader :: Bool -- ^ Headerless table
multilineTableHeader headless = try $ do
if headless
then return '\n'
- else tableSep
+ else tableSep >>~ notFollowedBy blankline
rawContent <- if headless
then return $ repeat ""
else many1
@@ -856,9 +868,9 @@ multilineTableHeader headless = try $ do
let indices = scanl (+) (length initSp) lines'
rawHeadsList <- if headless
then liftM (map (:[]) . tail .
- splitByIndices (init indices)) $ lookAhead anyLine
+ splitStringByIndices (init indices)) $ lookAhead anyLine
else return $ transpose $ map
- (\ln -> tail $ splitByIndices (init indices) ln)
+ (\ln -> tail $ splitStringByIndices (init indices) ln)
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
@@ -922,30 +934,25 @@ inlineParsers = [ whitespace
, inlineNote -- after superscript because of ^[link](/foo)^
, autoLink
, rawHtmlInline
- , rawLaTeXInline'
, escapedChar
+ , rawLaTeXInline'
, exampleRef
, smartPunctuation inline
, charRef
, symbol
, ltSign ]
-inlineNonLink :: GenParser Char ParserState Inline
-inlineNonLink = (choice $
- map (\parser -> try (parser >>= failIfLink)) inlineParsers)
- <?> "inline (non-link)"
-
-failIfLink :: Inline -> GenParser tok st Inline
-failIfLink (Link _ _) = pzero
-failIfLink elt = return elt
-
-escapedChar :: GenParser Char ParserState Inline
-escapedChar = try $ do
+escapedChar' :: GenParser Char ParserState Char
+escapedChar' = try $ do
char '\\'
state <- getState
- result <- if stateStrict state
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
+ if stateStrict state
+ then oneOf "\\`*_{}[]()>#+-.!~"
+ else satisfy (not . isAlphaNum)
+
+escapedChar :: GenParser Char ParserState Inline
+escapedChar = do
+ result <- escapedChar'
return $ case result of
' ' -> Str "\160" -- "\ " is a nonbreaking space
'\n' -> LineBreak -- "\[newline]" is a linebreak
@@ -971,8 +978,7 @@ symbol :: GenParser Char ParserState Inline
symbol = do
result <- noneOf "<\\\n\t "
<|> try (do lookAhead $ char '\\'
- notFollowedBy' $ rawLaTeXEnvironment'
- <|> rawConTeXtEnvironment'
+ notFollowedBy' rawTeXBlock
char '\\')
return $ Str [result]
@@ -1036,8 +1042,20 @@ inlinesBetween start end =
where inner = innerSpace <|> (notFollowedBy' whitespace >> inline)
innerSpace = try $ whitespace >>~ notFollowedBy' end
+-- This is used to prevent exponential blowups for things like:
+-- a**a*a**a*a**a*a**a*a**a*a**a*a**
+nested :: GenParser Char ParserState a
+ -> GenParser Char ParserState a
+nested p = do
+ nestlevel <- stateMaxNestingLevel `fmap` getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
emph :: GenParser Char ParserState Inline
-emph = Emph `liftM`
+emph = Emph `fmap` nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = char '*' >> lookAhead nonspaceChar
starEnd = notFollowedBy' strong >> char '*'
@@ -1045,7 +1063,7 @@ emph = Emph `liftM`
ulEnd = notFollowedBy' strong >> char '_'
strong :: GenParser Char ParserState Inline
-strong = Strong `liftM`
+strong = Strong `liftM` nested
(inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd)
where starStart = string "**" >> lookAhead nonspaceChar
starEnd = try $ string "**"
@@ -1079,12 +1097,20 @@ nonEndline = satisfy (/='\n')
str :: GenParser Char ParserState Inline
str = do
+ smart <- stateSmart `fmap` getState
a <- alphaNum
- as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum)
+ as <- many $ alphaNum
+ <|> (try $ char '_' >>~ lookAhead alphaNum)
+ <|> if smart
+ then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >>
+ lookAhead alphaNum >> return '\x2019')
+ -- for things like l'aide
+ else mzero
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
let result = a:as
- state <- getState
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
- if stateSmart state
+ if smart
then case likelyAbbrev result of
[] -> return $ Str result
xs -> choice (map (\x ->
@@ -1128,19 +1154,18 @@ endline = try $ do
-- a reference label for a link
reference :: GenParser Char ParserState [Inline]
reference = do notFollowedBy' (string "[^") -- footnote reference
- result <- inlinesInBalancedBrackets inlineNonLink
+ result <- inlinesInBalancedBrackets inline
return $ normalizeSpaces result
-- source for a link, with optional title
-source :: GenParser Char st (String, [Char])
+source :: GenParser Char ParserState (String, [Char])
source =
- (try $ charsInBalanced '(' ')' >>= parseFromString source') <|>
+ (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|>
-- the following is needed for cases like: [ref](/url(a).
- (enclosed (char '(') (char ')') anyChar >>=
- parseFromString source')
+ (enclosed (char '(') (char ')') litChar >>= parseFromString source')
-- auxiliary function for source
-source' :: GenParser Char st (String, [Char])
+source' :: GenParser Char ParserState (String, [Char])
source' = do
skipSpaces
let nl = char '\n' >>~ notFollowedBy blankline
@@ -1149,29 +1174,33 @@ source' = do
skipMany spaceChar
optional nl
skipMany spaceChar
- many1 (satisfy $ not . isBlank)
- let betweenAngles = try $ char '<' >>
- manyTill (noneOf ">\n" <|> nl) (char '>')
+ many1 $ escapedChar' <|> satisfy (not . isBlank)
+ let betweenAngles = try $
+ char '<' >> manyTill (escapedChar' <|> noneOf ">\n" <|> nl) (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" linkTitle
skipSpaces
eof
return (escapeURI $ removeTrailingSpace src, tit)
-linkTitle :: GenParser Char st String
-linkTitle = try $ do
+linkTitle :: GenParser Char ParserState String
+linkTitle = try $ do
(many1 spaceChar >> option '\n' newline) <|> newline
skipSpaces
delim <- oneOf "'\""
- tit <- manyTill (optional (char '\\') >> anyChar)
- (try (char delim >> skipSpaces >> eof))
- return $ decodeCharacterReferences tit
+ tit <- manyTill litChar (try (char delim >> skipSpaces >> eof))
+ return $ fromEntities tit
link :: GenParser Char ParserState Inline
link = try $ do
lab <- reference
(src, tit) <- source <|> referenceLink lab
- return $ Link lab (src, tit)
+ return $ Link (delinkify lab) (src, tit)
+
+delinkify :: [Inline] -> [Inline]
+delinkify = bottomUp $ concatMap go
+ where go (Link lab _) = lab
+ go x = [x]
-- a link like [this][ref] or [this][] or [this]
referenceLink :: [Inline]
@@ -1198,8 +1227,9 @@ autoLink = try $ do
image :: GenParser Char ParserState Inline
image = try $ do
char '!'
- (Link lab src) <- link
- return $ Image lab src
+ lab <- reference
+ (src, tit) <- source <|> referenceLink lab
+ return $ Image lab (src,tit)
note :: GenParser Char ParserState Inline
note = try $ do
@@ -1228,18 +1258,16 @@ inlineNote = try $ do
rawLaTeXInline' :: GenParser Char ParserState Inline
rawLaTeXInline' = try $ do
failIfStrict
- lookAhead $ char '\\'
- notFollowedBy' $ rawLaTeXEnvironment'
- <|> rawConTeXtEnvironment'
+ lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
RawInline _ s <- rawLaTeXInline
return $ RawInline "tex" s -- "tex" because it might be context or latex
-rawConTeXtEnvironment' :: GenParser Char st String
-rawConTeXtEnvironment' = try $ do
+rawConTeXtEnvironment :: GenParser Char st String
+rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
<|> (many1 letter)
- contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar))
+ contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar))
(try $ string "\\stop" >> string completion)
return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
@@ -1312,7 +1340,8 @@ citeKey = try $ do
suppress_author <- option False (char '-' >> return True)
char '@'
first <- letter
- rest <- many $ (noneOf ",;!?[]()@ \t\n")
+ let internal p = try $ p >>~ lookAhead (letter <|> digit)
+ rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~")
let key = first:rest
st <- getState
guard $ key `elem` stateCitations st
@@ -1320,8 +1349,12 @@ citeKey = try $ do
suffix :: GenParser Char ParserState [Inline]
suffix = try $ do
+ hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
spnl
- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+ rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+ return $ if hasSpace
+ then Space : rest
+ else rest
prefix :: GenParser Char ParserState [Inline]
prefix = liftM normalizeSpaces $
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 7fda0da19..456b23ce8 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -128,6 +128,7 @@ block = choice [ codeBlock
, fieldList
, imageBlock
, customCodeBlock
+ , mathBlock
, unknownDirective
, header
, hrule
@@ -360,6 +361,33 @@ customCodeBlock = try $ do
result <- indentedBlock
return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result
+-- | The 'math' directive (from Sphinx) for display math.
+mathBlock :: GenParser Char st Block
+mathBlock = try $ do
+ string ".. math::"
+ mathBlockMultiline <|> mathBlockOneLine
+
+mathBlockOneLine :: GenParser Char st Block
+mathBlockOneLine = try $ do
+ result <- manyTill anyChar newline
+ blanklines
+ return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result]
+
+mathBlockMultiline :: GenParser Char st Block
+mathBlockMultiline = try $ do
+ blanklines
+ result <- indentedBlock
+ -- a single block can contain multiple equations, which need to go
+ -- in separate Pandoc math elements
+ let lns = map removeLeadingTrailingSpace $ lines result
+ -- drop :label, :nowrap, etc.
+ let startsWithColon (':':_) = True
+ startsWithColon _ = False
+ let lns' = dropWhile startsWithColon lns
+ let eqs = map (removeLeadingTrailingSpace . unlines)
+ $ filter (not . null) $ splitBy null lns'
+ return $ Para $ map (Math DisplayMath) eqs
+
lhsCodeBlock :: GenParser Char ParserState Block
lhsCodeBlock = try $ do
failUnlessLHS
@@ -526,8 +554,8 @@ noteBlock = try $ do
string ".."
spaceChar >> skipMany spaceChar
ref <- noteMarker
- spaceChar >> skipMany spaceChar
- first <- anyLine
+ first <- (spaceChar >> skipMany spaceChar >> anyLine)
+ <|> (newline >> return "")
blanks <- option "" blanklines
rest <- option "" indentedBlock
endPos <- getPosition
@@ -736,6 +764,7 @@ inline = choice [ whitespace
, image
, superscript
, subscript
+ , math
, note
, smartPunctuation inline
, hyphens
@@ -750,7 +779,8 @@ hyphens = do
return $ Str result
escapedChar :: GenParser Char st Inline
-escapedChar = escaped anyChar
+escapedChar = do c <- escaped anyChar
+ return $ Str [c]
symbol :: GenParser Char ParserState Inline
symbol = do
@@ -773,24 +803,31 @@ strong :: GenParser Char ParserState Inline
strong = enclosed (string "**") (try $ string "**") inline >>=
return . Strong . normalizeSpaces
-interpreted :: [Char] -> GenParser Char st [Inline]
+interpreted :: [Char] -> GenParser Char st [Char]
interpreted role = try $ do
optional $ try $ string "\\ "
result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar
try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "")
- return [Str result]
+ return result
superscript :: GenParser Char ParserState Inline
-superscript = interpreted "sup" >>= (return . Superscript)
+superscript = interpreted "sup" >>= \x -> return (Superscript [Str x])
subscript :: GenParser Char ParserState Inline
-subscript = interpreted "sub" >>= (return . Subscript)
+subscript = interpreted "sub" >>= \x -> return (Subscript [Str x])
+
+math :: GenParser Char ParserState Inline
+math = interpreted "math" >>= \x -> return (Math InlineMath x)
whitespace :: GenParser Char ParserState Inline
whitespace = many1 spaceChar >> return Space <?> "whitespace"
str :: GenParser Char ParserState Inline
-str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str
+str = do
+ result <- many1 (noneOf (specialChars ++ "\t\n "))
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
+ return $ Str result
-- an endline character that can be treated as a space, not a structural break
endline :: GenParser Char ParserState Inline
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index b9a46e8ff..67dfe6753 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -69,8 +69,17 @@ expToInlines (ESymbol t s) = Just $ addSpace t (Str s)
medspace = Str "\x2005"
widespace = Str "\x2004"
expToInlines (EStretchy x) = expToInlines x
+expToInlines (EDelimited start end xs) = do
+ xs' <- mapM expToInlines xs
+ return $ [Str start] ++ concat xs' ++ [Str end]
expToInlines (EGrouped xs) = expsToInlines xs
-expToInlines (ESpace _) = Just [Str " "] -- variable widths not supported
+expToInlines (ESpace "0.167em") = Just [Str "\x2009"]
+expToInlines (ESpace "0.222em") = Just [Str "\x2005"]
+expToInlines (ESpace "0.278em") = Just [Str "\x2004"]
+expToInlines (ESpace "0.333em") = Just [Str "\x2004"]
+expToInlines (ESpace "1em") = Just [Str "\x2001"]
+expToInlines (ESpace "2em") = Just [Str "\x2001\x2001"]
+expToInlines (ESpace _) = Just [Str " "]
expToInlines (EBinary _ _ _) = Nothing
expToInlines (ESub x y) = do
x' <- expToInlines x
@@ -88,10 +97,10 @@ expToInlines (ESubsup x y z) = do
expToInlines (EDown x y) = expToInlines (ESub x y)
expToInlines (EUp x y) = expToInlines (ESuper x y)
expToInlines (EDownup x y z) = expToInlines (ESubsup x y z)
-expToInlines (EText "normal" x) = Just [Str x]
-expToInlines (EText "bold" x) = Just [Strong [Str x]]
-expToInlines (EText "monospace" x) = Just [Code nullAttr x]
-expToInlines (EText "italic" x) = Just [Emph [Str x]]
+expToInlines (EText TextNormal x) = Just [Str x]
+expToInlines (EText TextBold x) = Just [Strong [Str x]]
+expToInlines (EText TextMonospace x) = Just [Code nullAttr x]
+expToInlines (EText TextItalic x) = Just [Emph [Str x]]
expToInlines (EText _ x) = Just [Str x]
expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) =
case accent of
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 12d299aa4..3b5954368 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -68,7 +68,8 @@ import Control.Monad ( guard, liftM )
readTextile :: ParserState -- ^ Parser state, including options for parser
-> String -- ^ String to parse (assuming @'\n'@ line endings)
-> Pandoc
-readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
+readTextile state s =
+ (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n")
--
@@ -436,6 +437,8 @@ str = do
next <- lookAhead letter
guard $ isLetter (last xs) || isLetter next
return $ xs ++ "-"
+ pos <- getPosition
+ updateState $ \s -> s{ stateLastStrPos = Just pos }
return $ Str result
-- | Textile allows HTML span infos, we discard them
diff --git a/src/Text/Pandoc/S5.hs b/src/Text/Pandoc/S5.hs
deleted file mode 100644
index b17b052c5..000000000
--- a/src/Text/Pandoc/S5.hs
+++ /dev/null
@@ -1,69 +0,0 @@
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.S5
- Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Definitions for creation of S5 powerpoint-like HTML.
-(See <http://meyerweb.com/eric/tools/s5/>.)
--}
-module Text.Pandoc.S5 ( s5HeaderIncludes) where
-import Text.Pandoc.Shared ( readDataFile )
-import System.FilePath ( (</>) )
-import Data.ByteString.UTF8 ( toString, fromString )
-import Data.ByteString.Base64 ( encode )
-
-s5HeaderIncludes :: Maybe FilePath -> IO String
-s5HeaderIncludes datadir = do
- c <- s5CSS datadir
- j <- s5Javascript datadir
- return $ c ++ j
-
-s5Javascript :: Maybe FilePath -> IO String
-s5Javascript datadir = do
- js <- readDataFile datadir $ "s5" </> "default" </> "slides.min.js"
- return $ "<script type=\"text/javascript\">\n" ++ inCDATA js ++ "</script>\n"
-
-inCDATA :: String -> String
-inCDATA s = "/*<![CDATA[*/\n" ++ s ++ "\n/*]]>*/\n"
-
-base64 :: String -> String
-base64 = toString . encode . fromString
-
-s5CSS :: Maybe FilePath -> IO String
-s5CSS datadir = do
- s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css"
- s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css"
- s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css"
- s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css"
- s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css"
- s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css"
- return $ "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"slideProj\" href=\"data:text/css;charset=utf-8;base64," ++
- base64 (s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS) ++ "\" />\n" ++
- "<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" href=\"data:text/css;charset=utf-8;base64," ++
- base64 s5OutlineCSS ++ "\" />\n" ++
- "<link rel=\"stylesheet\" type=\"text/css\" media=\"print\" id=\"slidePrint\" href=\"data:text/css;charset=utf-8;base64," ++
- base64 s5PrintCSS ++ "\" />\n" ++
- "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"operaFix\" href=\"data:text/css;charset=utf-8;base64," ++
- base64 s5OperaCSS ++ "\" />\n"
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
new file mode 100644
index 000000000..9c609b8fe
--- /dev/null
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -0,0 +1,162 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.SelfContained
+ Copyright : Copyright (C) 2011 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Functions for converting an HTML file into one that can be viewed
+offline, by incorporating linked images, CSS, and scripts into
+the HTML using data URIs.
+-}
+module Text.Pandoc.SelfContained ( makeSelfContained ) where
+import Text.HTML.TagSoup
+import Network.URI (isAbsoluteURI, parseURI, escapeURIString)
+import Network.HTTP
+import Data.ByteString.Base64
+import qualified Data.ByteString.Char8 as B
+import Data.ByteString (ByteString)
+import Data.ByteString.UTF8 (toString, fromString)
+import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
+import Data.Char (toLower, isAscii, isAlphaNum)
+import Codec.Compression.GZip as Gzip
+import qualified Data.ByteString.Lazy as L
+import Text.Pandoc.Shared (findDataFile)
+import Text.Pandoc.MIME (getMimeType)
+import System.Directory (doesFileExist)
+
+getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String)
+getItem userdata f =
+ if isAbsoluteURI f
+ then openURL f
+ else do
+ let mime = case takeExtension f of
+ ".gz" -> getMimeType $ dropExtension f
+ x -> getMimeType x
+ exists <- doesFileExist f
+ if exists
+ then do
+ cont <- B.readFile f
+ return (cont, mime)
+ else do
+ res <- findDataFile userdata f
+ exists' <- doesFileExist res
+ if exists'
+ then do
+ cont <- B.readFile res
+ return (cont, mime)
+ else error $ "Could not find `" ++ f ++ "'"
+
+-- TODO - have this return mime type too - then it can work for google
+-- chart API, e.g.
+openURL :: String -> IO (ByteString, Maybe String)
+openURL u = getBodyAndMimeType =<< simpleHTTP (getReq u)
+ where getReq v = case parseURI v of
+ Nothing -> error $ "Could not parse URI: " ++ v
+ Just u' -> mkRequest GET u'
+ getBodyAndMimeType (Left err) = fail (show err)
+ getBodyAndMimeType (Right r) = return (rspBody r, findHeader HdrContentType r)
+
+isOk :: Char -> Bool
+isOk c = isAscii c && isAlphaNum c
+
+convertTag :: Maybe FilePath -> Tag String -> IO (Tag String)
+convertTag userdata t@(TagOpen "img" as) =
+ case fromAttrib "src" t of
+ [] -> return t
+ src -> do
+ (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+ return $ TagOpen "img" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+convertTag userdata t@(TagOpen "video" as) =
+ case fromAttrib "src" t of
+ [] -> return t
+ src -> do
+ (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw)
+ return $ TagOpen "video" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+convertTag userdata t@(TagOpen "script" as) =
+ case fromAttrib "src" t of
+ [] -> return t
+ src -> do
+ (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+ return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"])
+convertTag userdata t@(TagOpen "link" as) =
+ case fromAttrib "href" t of
+ [] -> return t
+ src -> do
+ (raw, mime) <- getRaw userdata (fromAttrib "type" t) src
+ let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw)
+ return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"])
+convertTag _ t = return t
+
+cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString
+cssURLs userdata d orig =
+ case B.breakSubstring "url(" orig of
+ (x,y) | B.null y -> return orig
+ | otherwise -> do
+ let (u,v) = B.breakSubstring ")" $ B.drop 4 y
+ let url = toString
+ $ case B.take 1 u of
+ "\"" -> B.takeWhile (/='"') $ B.drop 1 u
+ _ -> u
+ (raw, mime) <- getRaw userdata "" (d </> url)
+ rest <- cssURLs userdata d v
+ let enc = "data:" `B.append` fromString mime `B.append`
+ ";base64," `B.append` (encode raw)
+ return $ x `B.append` "url(" `B.append` enc `B.append` rest
+
+getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String)
+getRaw userdata mimetype src = do
+ let ext = map toLower $ takeExtension src
+ (raw, respMime) <- getItem userdata src
+ let raw' = if ext == ".gz"
+ then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks
+ $ [raw]
+ else raw
+ let mime = case (mimetype, respMime) of
+ ("",Nothing) -> error
+ $ "Could not determine mime type for `" ++ src ++ "'"
+ (x, Nothing) -> x
+ (_, Just x ) -> x
+ result <- if mime == "text/css"
+ then cssURLs userdata (takeDirectory src) raw'
+ else return raw'
+ return (result, mime)
+
+-- | Convert HTML into self-contained HTML, incorporating images,
+-- scripts, and CSS using data: URIs. Items specified using absolute
+-- URLs will be downloaded; those specified using relative URLs will
+-- be sought first relative to the working directory, then relative
+-- to the user data directory (if the first parameter is 'Just'
+-- a directory), and finally relative to pandoc's default data
+-- directory.
+makeSelfContained :: Maybe FilePath -> String -> IO String
+makeSelfContained userdata inp = do
+ let tags = parseTags inp
+ out' <- mapM (convertTag userdata) tags
+ return $ renderTagsOptions renderOptions{ optMinimize = (\t -> t == "br"
+ || t == "img" || t == "meta" || t == "link" ) } out'
+
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 9717e1bc8..cd5b19164 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -32,6 +32,7 @@ module Text.Pandoc.Shared (
-- * List processing
splitBy,
splitByIndices,
+ splitStringByIndices,
substitute,
-- * Text processing
backslashEscapes,
@@ -44,8 +45,9 @@ module Text.Pandoc.Shared (
camelCaseToHyphenated,
toRomanNumeral,
escapeURI,
- unescapeURI,
tabFilter,
+ -- * Date/time
+ normalizeDate,
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
@@ -68,21 +70,31 @@ module Text.Pandoc.Shared (
inDirectory,
findDataFile,
readDataFile,
+ -- * Error handling
+ err,
+ warn,
) where
import Text.Pandoc.Definition
import Text.Pandoc.Generic
-import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
-import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
- isLetter, isDigit )
+import qualified Text.Pandoc.UTF8 as UTF8
+import System.Environment (getProgName)
+import System.Exit (exitWith, ExitCode(..))
+import Data.Char ( toLower, isLower, isUpper, isAlpha,
+ isLetter, isDigit, isSpace )
import Data.List ( find, isPrefixOf, intercalate )
-import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
-import Codec.Binary.UTF8.String ( encodeString, decodeString )
+import Network.URI ( escapeURIString )
import System.Directory
import System.FilePath ( (</>) )
import Data.Generics (Typeable, Data)
import qualified Control.Monad.State as S
+import Control.Monad (msum)
import Paths_pandoc (getDataFileName)
+import Text.Pandoc.Highlighting (Style, pygments)
+import Text.Pandoc.Pretty (charWidth)
+import System.Locale (defaultTimeLocale)
+import Data.Time
+import System.IO (stderr)
--
-- List processing
@@ -96,12 +108,23 @@ splitBy isSep lst =
rest' = dropWhile isSep rest
in first:(splitBy isSep rest')
--- | Split list into chunks divided at specified indices.
splitByIndices :: [Int] -> [a] -> [[a]]
splitByIndices [] lst = [lst]
-splitByIndices (x:xs) lst =
- let (first, rest) = splitAt x lst in
- first:(splitByIndices (map (\y -> y - x) xs) rest)
+splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest)
+ where (first, rest) = splitAt x lst
+
+-- | Split string into chunks divided at specified indices.
+splitStringByIndices :: [Int] -> [Char] -> [[Char]]
+splitStringByIndices [] lst = [lst]
+splitStringByIndices (x:xs) lst =
+ let (first, rest) = splitAt' x lst in
+ first : (splitStringByIndices (map (\y -> y - x) xs) rest)
+
+splitAt' :: Int -> [Char] -> ([Char],[Char])
+splitAt' _ [] = ([],[])
+splitAt' n xs | n <= 0 = ([],xs)
+splitAt' n (x:xs) = (x:ys,zs)
+ where (ys,zs) = splitAt' (n - charWidth x) xs
-- | Replace each occurrence of one sublist in a list with another.
substitute :: (Eq a) => [a] -> [a] -> [a] -> [a]
@@ -181,16 +204,9 @@ toRomanNumeral x =
_ | x >= 1 -> "I" ++ toRomanNumeral (x - 1)
_ -> ""
--- | Escape unicode characters in a URI. Characters that are
--- already valid in a URI, including % and ?, are left alone.
+-- | Escape whitespace in URI.
escapeURI :: String -> String
-escapeURI = escapeURIString isAllowedInURI . encodeString
-
--- | Unescape unicode and some special characters in a URI, but
--- without introducing spaces.
-unescapeURI :: String -> String
-unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
- decodeString . unEscapeString
+escapeURI = escapeURIString (not . isSpace)
-- | Convert tabs to spaces and filter out DOS line endings.
-- Tabs will be preserved if tab stop is set to 0.
@@ -213,6 +229,18 @@ tabFilter tabStop =
in go tabStop
--
+-- Date/time
+--
+
+-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format.
+normalizeDate :: String -> Maybe String
+normalizeDate s = fmap (formatTime defaultTimeLocale "%F")
+ (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day)
+ where parsetimeWith = parseTime defaultTimeLocale
+ formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y",
+ "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"]
+
+--
-- Pandoc block and inline list processing
--
@@ -304,9 +332,9 @@ consolidateInlines (Str x : ys) =
fromStr (Str z) = z
fromStr _ = error "consolidateInlines - fromStr - not a Str"
consolidateInlines (Space : ys) = Space : rest
- where isSpace Space = True
- isSpace _ = False
- rest = consolidateInlines $ dropWhile isSpace ys
+ where isSp Space = True
+ isSp _ = False
+ rest = consolidateInlines $ dropWhile isSp ys
consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
Emph (xs ++ ys) : zs
consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
@@ -334,10 +362,6 @@ stringify = queryWith go
go (Str x) = x
go (Code _ x) = x
go (Math _ x) = x
- go EmDash = "--"
- go EnDash = "-"
- go Apostrophe = "'"
- go Ellipses = "..."
go LineBreak = " "
go _ = ""
@@ -458,6 +482,7 @@ data ObfuscationMethod = NoObfuscation
-- | Varieties of HTML slide shows.
data HTMLSlideVariant = S5Slides
| SlidySlides
+ | DZSlides
| NoSlides
deriving (Show, Read, Eq)
@@ -488,9 +513,13 @@ data WriterOptions = WriterOptions
, writerCiteMethod :: CiteMethod -- ^ How to print cites
, writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations
, writerHtml5 :: Bool -- ^ Produce HTML5
+ , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show
+ , writerSlideLevel :: Maybe Int -- ^ Force header level of slides
, writerChapters :: Bool -- ^ Use "chapter" for top-level sects
, writerListings :: Bool -- ^ Use listings package for code
- , writerAscii :: Bool -- ^ Avoid non-ascii characters
+ , writerHighlight :: Bool -- ^ Highlight source code
+ , writerHighlightStyle :: Style -- ^ Style to use for highlighting
+ , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown
} deriving Show
{-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-}
@@ -522,9 +551,13 @@ defaultWriterOptions =
, writerCiteMethod = Citeproc
, writerBiblioFiles = []
, writerHtml5 = False
+ , writerBeamer = False
+ , writerSlideLevel = Nothing
, writerChapters = False
, writerListings = False
- , writerAscii = False
+ , writerHighlight = False
+ , writerHighlightStyle = pygments
+ , writerSetextHeaders = True
}
--
@@ -554,3 +587,19 @@ findDataFile (Just u) f = do
-- Cabal data directory.
readDataFile :: Maybe FilePath -> FilePath -> IO String
readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile
+
+--
+-- Error reporting
+--
+
+err :: Int -> String -> IO a
+err exitCode msg = do
+ name <- getProgName
+ UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
+ exitWith $ ExitFailure exitCode
+ return undefined
+
+warn :: String -> IO ()
+warn msg = do
+ name <- getProgName
+ UTF8.hPutStrLn stderr $ name ++ ": " ++ msg
diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs
new file mode 100644
index 000000000..1df556d38
--- /dev/null
+++ b/src/Text/Pandoc/Slides.hs
@@ -0,0 +1,57 @@
+{-
+Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Slides
+ Copyright : Copyright (C) 2012 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Utility functions for splitting documents into slides for slide
+show formats (dzslides, s5, slidy, beamer).
+-}
+module Text.Pandoc.Slides ( getSlideLevel, prepSlides ) where
+import Text.Pandoc.Definition
+
+-- | Find level of header that starts slides (defined as the least header
+-- level that occurs before a non-header/non-hrule in the blocks).
+getSlideLevel :: [Block] -> Int
+getSlideLevel = go 6
+ where go least (Header n _ : x : xs)
+ | n < least && nonHOrHR x = go n xs
+ | otherwise = go least (x:xs)
+ go least (_ : xs) = go least xs
+ go least [] = least
+ nonHOrHR (Header _ _) = False
+ nonHOrHR (HorizontalRule) = False
+ nonHOrHR _ = True
+
+-- | Prepare a block list to be passed to hierarchicalize.
+prepSlides :: Int -> [Block] -> [Block]
+prepSlides slideLevel = ensureStartWithH . splitHrule
+ where splitHrule (HorizontalRule : Header n xs : ys)
+ | n == slideLevel = Header slideLevel xs : splitHrule ys
+ splitHrule (HorizontalRule : xs) = Header slideLevel [] : splitHrule xs
+ splitHrule (x : xs) = x : splitHrule xs
+ splitHrule [] = []
+ ensureStartWithH bs@(Header n _:_)
+ | n <= slideLevel = bs
+ ensureStartWithH bs = Header slideLevel [] : bs
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index 19c9a808a..336efe453 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -72,7 +72,7 @@ import Text.ParserCombinators.Parsec
import Control.Monad (liftM, when, forM)
import System.FilePath
import Data.List (intercalate, intersperse)
-import Text.XHtml (primHtml, Html)
+import Text.Blaze (preEscapedString, Html)
import Data.ByteString.Lazy.UTF8 (ByteString, fromString)
import Text.Pandoc.Shared (readDataFile)
import qualified Control.Exception.Extensible as E (try, IOException)
@@ -82,6 +82,8 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first
-> String -- ^ Name of writer
-> IO (Either E.IOException String)
getDefaultTemplate _ "native" = return $ Right ""
+getDefaultTemplate _ "json" = return $ Right ""
+getDefaultTemplate _ "docx" = return $ Right ""
getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument"
getDefaultTemplate user "epub" = getDefaultTemplate user "html"
getDefaultTemplate user writer = do
@@ -110,7 +112,7 @@ instance TemplateTarget ByteString where
toTarget = fromString
instance TemplateTarget Html where
- toTarget = primHtml
+ toTarget = preEscapedString
-- | Renders a template
renderTemplate :: TemplateTarget a
diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs
new file mode 100644
index 000000000..1913eb92b
--- /dev/null
+++ b/src/Text/Pandoc/Writers/AsciiDoc.hs
@@ -0,0 +1,367 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.AsciiDoc
+ Copyright : Copyright (C) 2006-2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to asciidoc.
+
+Note that some information may be lost in conversion, due to
+expressive limitations of asciidoc. Footnotes and table cells with
+paragraphs (or other block items) are not possible in asciidoc.
+If pandoc encounters one of these, it will insert a message indicating
+that it has omitted the construct.
+
+AsciiDoc: <http://www.methods.co.nz/asciidoc/>
+-}
+module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Templates (renderTemplate)
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing hiding (blankline)
+import Text.ParserCombinators.Parsec ( runParser, GenParser )
+import Data.List ( isPrefixOf, intersperse, intercalate )
+import Text.Pandoc.Pretty
+import Control.Monad.State
+
+data WriterState = WriterState { defListMarker :: String
+ , orderedListLevel :: Int
+ , bulletListLevel :: Int
+ }
+
+-- | Convert Pandoc to AsciiDoc.
+writeAsciiDoc :: WriterOptions -> Pandoc -> String
+writeAsciiDoc opts document =
+ evalState (pandocToAsciiDoc opts document) WriterState{
+ defListMarker = "::"
+ , orderedListLevel = 1
+ , bulletListLevel = 1
+ }
+
+-- | Return asciidoc representation of document.
+pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String
+pandocToAsciiDoc opts (Pandoc (Meta title authors date) blocks) = do
+ title' <- inlineListToAsciiDoc opts title
+ let title'' = title' $$ text (replicate (offset title') '=')
+ authors' <- mapM (inlineListToAsciiDoc opts) authors
+ -- asciidoc only allows a singel author
+ date' <- inlineListToAsciiDoc opts date
+ let titleblock = not $ null title && null authors && null date
+ body <- blockListToAsciiDoc opts blocks
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ let main = render colwidth body
+ let context = writerVariables opts ++
+ [ ("body", main)
+ , ("title", render colwidth title'')
+ , ("date", render colwidth date')
+ ] ++
+ [ ("toc", "yes") | writerTableOfContents opts &&
+ writerStandalone opts ] ++
+ [ ("titleblock", "yes") | titleblock ] ++
+ [ ("author", render colwidth a) | a <- authors' ]
+ if writerStandalone opts
+ then return $ renderTemplate context $ writerTemplate opts
+ else return main
+
+-- | Escape special characters for AsciiDoc.
+escapeString :: String -> String
+escapeString = escapeStringUsing escs
+ where escs = backslashEscapes "{"
+
+-- | Ordered list start parser for use in Para below.
+olMarker :: GenParser Char ParserState Char
+olMarker = do (start, style', delim) <- anyOrderedListMarker
+ if delim == Period &&
+ (style' == UpperAlpha || (style' == UpperRoman &&
+ start `elem` [1, 5, 10, 50, 100, 500, 1000]))
+ then spaceChar >> spaceChar
+ else spaceChar
+
+-- | True if string begins with an ordered list marker
+beginsWithOrderedListMarker :: String -> Bool
+beginsWithOrderedListMarker str =
+ case runParser olMarker defaultParserState "para start" (take 10 str) of
+ Left _ -> False
+ Right _ -> True
+
+-- | Convert Pandoc block element to asciidoc.
+blockToAsciiDoc :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState Doc
+blockToAsciiDoc _ Null = return empty
+blockToAsciiDoc opts (Plain inlines) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ return $ contents <> cr
+blockToAsciiDoc opts (Para inlines) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ -- escape if para starts with ordered list marker
+ let esc = if beginsWithOrderedListMarker (render Nothing contents)
+ then text "\\"
+ else empty
+ return $ esc <> contents <> blankline
+blockToAsciiDoc _ (RawBlock _ _) = return empty
+blockToAsciiDoc _ HorizontalRule =
+ return $ blankline <> text "'''''" <> blankline
+blockToAsciiDoc opts (Header level inlines) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ let len = offset contents
+ return $ contents <> cr <>
+ (case level of
+ 1 -> text $ replicate len '-'
+ 2 -> text $ replicate len '~'
+ 3 -> text $ replicate len '^'
+ 4 -> text $ replicate len '+'
+ _ -> empty) <> blankline
+blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $
+ flush (attrs <> dashes <> space <> attrs <> cr <> text str <>
+ cr <> dashes) <> blankline
+ where dashes = text $ replicate (maximum $ map length $ lines str) '-'
+ attrs = if null classes
+ then empty
+ else text $ intercalate "," $ "code" : classes
+blockToAsciiDoc opts (BlockQuote blocks) = do
+ contents <- blockListToAsciiDoc opts blocks
+ let isBlock (BlockQuote _) = True
+ isBlock _ = False
+ -- if there are nested block quotes, put in an open block
+ let contents' = if any isBlock blocks
+ then "--" $$ contents $$ "--"
+ else contents
+ let cols = offset contents'
+ let bar = text $ replicate cols '_'
+ return $ bar $$ chomp contents' $$ bar <> blankline
+blockToAsciiDoc opts (Table caption aligns widths headers rows) = do
+ caption' <- inlineListToAsciiDoc opts caption
+ let caption'' = if null caption
+ then empty
+ else "." <> caption' <> cr
+ let isSimple = all (== 0) widths
+ let relativePercentWidths = if isSimple
+ then widths
+ else map (/ (sum widths)) widths
+ let widths'' :: [Integer]
+ widths'' = map (floor . (* 100)) relativePercentWidths
+ -- ensure that the widths sum to 100
+ let widths' = case widths'' of
+ _ | isSimple -> widths''
+ (w:ws) | sum (w:ws) < 100
+ -> (100 - sum ws) : ws
+ ws -> ws
+ let totalwidth :: Integer
+ totalwidth = floor $ sum widths * 100
+ let colspec al wi = (case al of
+ AlignLeft -> "<"
+ AlignCenter -> "^"
+ AlignRight -> ">"
+ AlignDefault -> "") ++
+ if wi == 0 then "" else (show wi ++ "%")
+ let headerspec = if all null headers
+ then empty
+ else text "options=\"header\","
+ let widthspec = if totalwidth == 0
+ then empty
+ else text "width="
+ <> doubleQuotes (text $ show totalwidth ++ "%")
+ <> text ","
+ let tablespec = text "["
+ <> widthspec
+ <> text "cols="
+ <> doubleQuotes (text $ intercalate ","
+ $ zipWith colspec aligns widths')
+ <> text ","
+ <> headerspec <> text "]"
+ let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x]
+ return $ text "|" <> chomp d
+ makeCell [Para x] = makeCell [Plain x]
+ makeCell _ = return $ text "|" <> "[multiblock cell omitted]"
+ let makeRow cells = hsep `fmap` mapM makeCell cells
+ rows' <- mapM makeRow rows
+ head' <- makeRow headers
+ let head'' = if all null headers then empty else head'
+ let colwidth = if writerWrapText opts
+ then writerColumns opts
+ else 100000
+ let maxwidth = maximum $ map offset (head':rows')
+ let body = if maxwidth > colwidth then vsep rows' else vcat rows'
+ let border = text $ "|" ++ replicate ((min maxwidth colwidth) - 1) '='
+ return $
+ caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline
+blockToAsciiDoc opts (BulletList items) = do
+ contents <- mapM (bulletListItemToAsciiDoc opts) items
+ return $ cat contents <> blankline
+blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do
+ let sty' = case sty of
+ UpperRoman -> UpperAlpha
+ LowerRoman -> LowerAlpha
+ x -> x
+ let markers = orderedListMarkers (1, sty', Period) -- start num not used
+ let markers' = map (\m -> if length m < 3
+ then m ++ replicate (3 - length m) ' '
+ else m) markers
+ contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $
+ zip markers' items
+ return $ cat contents <> blankline
+blockToAsciiDoc opts (DefinitionList items) = do
+ contents <- mapM (definitionListItemToAsciiDoc opts) items
+ return $ cat contents <> blankline
+
+-- | Convert bullet list item (list of blocks) to asciidoc.
+bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc
+bulletListItemToAsciiDoc opts blocks = do
+ let addBlock :: Doc -> Block -> State WriterState Doc
+ addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
+ addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> text "+" <> cr <> chomp x
+ lev <- bulletListLevel `fmap` get
+ modify $ \s -> s{ bulletListLevel = lev + 1 }
+ contents <- foldM addBlock empty blocks
+ modify $ \s -> s{ bulletListLevel = lev }
+ let marker = text (replicate lev '*')
+ return $ marker <> space <> contents <> cr
+
+-- | Convert ordered list item (a list of blocks) to asciidoc.
+orderedListItemToAsciiDoc :: WriterOptions -- ^ options
+ -> String -- ^ list item marker
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToAsciiDoc opts marker blocks = do
+ let addBlock :: Doc -> Block -> State WriterState Doc
+ addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b
+ addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> chomp x
+ addBlock d b = do x <- blockToAsciiDoc opts b
+ return $ d <> cr <> text "+" <> cr <> chomp x
+ lev <- orderedListLevel `fmap` get
+ modify $ \s -> s{ orderedListLevel = lev + 1 }
+ contents <- foldM addBlock empty blocks
+ modify $ \s -> s{ orderedListLevel = lev }
+ return $ text marker <> space <> contents <> cr
+
+-- | Convert definition list item (label, list of blocks) to asciidoc.
+definitionListItemToAsciiDoc :: WriterOptions
+ -> ([Inline],[[Block]])
+ -> State WriterState Doc
+definitionListItemToAsciiDoc opts (label, defs) = do
+ labelText <- inlineListToAsciiDoc opts label
+ marker <- defListMarker `fmap` get
+ if marker == "::"
+ then modify (\st -> st{ defListMarker = ";;"})
+ else modify (\st -> st{ defListMarker = "::"})
+ let divider = cr <> text "+" <> cr
+ let defsToAsciiDoc :: [Block] -> State WriterState Doc
+ defsToAsciiDoc ds = (vcat . intersperse divider . map chomp)
+ `fmap` mapM (blockToAsciiDoc opts) ds
+ defs' <- mapM defsToAsciiDoc defs
+ modify (\st -> st{ defListMarker = marker })
+ let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs'
+ return $ labelText <> text marker <> cr <> contents <> cr
+
+-- | Convert list of Pandoc block elements to asciidoc.
+blockListToAsciiDoc :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks
+
+-- | Convert list of Pandoc inline elements to asciidoc.
+inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc
+inlineListToAsciiDoc opts lst =
+ mapM (inlineToAsciiDoc opts) lst >>= return . cat
+
+-- | Convert Pandoc inline element to asciidoc.
+inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc
+inlineToAsciiDoc opts (Emph lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "_" <> contents <> "_"
+inlineToAsciiDoc opts (Strong lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "*" <> contents <> "*"
+inlineToAsciiDoc opts (Strikeout lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "[line-through]*" <> contents <> "*"
+inlineToAsciiDoc opts (Superscript lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "^" <> contents <> "^"
+inlineToAsciiDoc opts (Subscript lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "~" <> contents <> "~"
+inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst
+inlineToAsciiDoc opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "`" <> contents <> "'"
+inlineToAsciiDoc opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToAsciiDoc opts lst
+ return $ "``" <> contents <> "''"
+inlineToAsciiDoc _ (Code _ str) = return $
+ text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`"
+inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str
+inlineToAsciiDoc _ (Math InlineMath str) =
+ return $ "latexmath:[$" <> text str <> "$]"
+inlineToAsciiDoc _ (Math DisplayMath str) =
+ return $ "latexmath:[\\[" <> text str <> "\\]]"
+inlineToAsciiDoc _ (RawInline _ _) = return empty
+inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr
+inlineToAsciiDoc _ Space = return space
+inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst
+inlineToAsciiDoc opts (Link txt (src, _tit)) = do
+-- relative: link:downloads/foo.zip[download foo.zip]
+-- abs: http://google.cod[Google]
+-- or my@email.com[email john]
+ linktext <- inlineListToAsciiDoc opts txt
+ let isRelative = ':' `notElem` src
+ let prefix = if isRelative
+ then text "link:"
+ else empty
+ let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
+ let useAuto = case txt of
+ [Code _ s] | s == srcSuffix -> True
+ _ -> False
+ return $ if useAuto
+ then text srcSuffix
+ else prefix <> text src <> "[" <> linktext <> "]"
+inlineToAsciiDoc opts (Image alternate (src, tit)) = do
+-- image:images/logo.png[Company logo, title="blah"]
+ let txt = if (null alternate) || (alternate == [Str ""])
+ then [Str "image"]
+ else alternate
+ linktext <- inlineListToAsciiDoc opts txt
+ let linktitle = if null tit
+ then empty
+ else text $ ",title=\"" ++ tit ++ "\""
+ return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]"
+inlineToAsciiDoc opts (Note [Para inlines]) =
+ inlineToAsciiDoc opts (Note [Plain inlines])
+inlineToAsciiDoc opts (Note [Plain inlines]) = do
+ contents <- inlineListToAsciiDoc opts inlines
+ return $ text "footnote:[" <> contents <> "]"
+-- asciidoc can't handle blank lines in notes
+inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]"
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 763f77d7c..dfdf7a140 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -31,12 +31,13 @@ Conversion of 'Pandoc' format into ConTeXt.
module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
+import Text.Pandoc.Generic (queryWith)
import Text.Printf ( printf )
import Data.List ( intercalate )
import Control.Monad.State
import Text.Pandoc.Pretty
import Text.Pandoc.Templates ( renderTemplate )
-import Network.URI ( isAbsoluteURI, unEscapeString )
+import Network.URI ( isURI, unEscapeString )
data WriterState =
WriterState { stNextRef :: Int -- number of next URL reference
@@ -68,13 +69,14 @@ pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do
datetext <- if null date
then return ""
else liftM (render colwidth) $ inlineListToConTeXt date
- body <- blockListToConTeXt blocks
- let main = render colwidth $ body
+ body <- mapM (elementToConTeXt options) $ hierarchicalize blocks
+ let main = (render colwidth . vcat) body
let context = writerVariables options ++
[ ("toc", if writerTableOfContents options then "yes" else "")
, ("body", main)
, ("title", titletext)
, ("date", datetext) ] ++
+ [ ("number-sections", "yes") | writerNumberSections options ] ++
[ ("author", a) | a <- authorstext ]
return $ if writerStandalone options
then renderTemplate context $ writerTemplate options
@@ -101,12 +103,24 @@ escapeCharForConTeXt ch =
']' -> "{]}"
'_' -> "\\letterunderscore{}"
'\160' -> "~"
+ '\x2014' -> "---"
+ '\x2013' -> "--"
+ '\x2019' -> "'"
+ '\x2026' -> "\\ldots{}"
x -> [x]
-- | Escape string for ConTeXt
stringToConTeXt :: String -> String
stringToConTeXt = concatMap escapeCharForConTeXt
+-- | Convert Elements to ConTeXt
+elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc
+elementToConTeXt _ (Blk block) = blockToConTeXt block
+elementToConTeXt opts (Sec level _ id' title' elements) = do
+ header' <- sectionHeader id' level title'
+ innerContents <- mapM (elementToConTeXt opts) elements
+ return $ vcat (header' : innerContents)
+
-- | Convert Pandoc block element to ConTeXt.
blockToConTeXt :: Block
-> State WriterState Doc
@@ -166,18 +180,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
blockToConTeXt (DefinitionList lst) =
liftM vcat $ mapM defListItemToConTeXt lst
blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
-blockToConTeXt (Header level lst) = do
- contents <- inlineListToConTeXt lst
- st <- get
- let opts = stOptions st
- let base = if writerNumberSections opts then "section" else "subject"
- let level' = if writerChapters opts then level - 1 else level
- return $ if level' >= 1 && level' <= 5
- then char '\\' <> text (concat (replicate (level' - 1) "sub")) <>
- text base <> char '{' <> contents <> char '}' <> blankline
- else if level' == 0
- then "\\chapter{" <> contents <> "}"
- else contents <> blankline
+-- If this is ever executed, provide a default for the reference identifier.
+blockToConTeXt (Header level lst) = sectionHeader "" level lst
blockToConTeXt (Table caption aligns widths heads rows) = do
let colDescriptor colWidth alignment = (case alignment of
AlignLeft -> 'l'
@@ -213,8 +217,8 @@ defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc
defListItemToConTeXt (term, defs) = do
term' <- inlineListToConTeXt term
def' <- liftM vsep $ mapM blockListToConTeXt defs
- return $ "\\startdescr" <> braces term' $$ nest 2 def' $$
- "\\stopdescr" <> blankline
+ return $ "\\startdescription" <> braces term' $$ nest 2 def' $$
+ "\\stopdescription" <> blankline
-- | Convert list of block elements to ConTeXt.
blockListToConTeXt :: [Block] -> State WriterState Doc
@@ -257,10 +261,6 @@ inlineToConTeXt (Quoted DoubleQuote lst) = do
contents <- inlineListToConTeXt lst
return $ "\\quotation" <> braces contents
inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst
-inlineToConTeXt Apostrophe = return $ char '\''
-inlineToConTeXt EmDash = return "---"
-inlineToConTeXt EnDash = return "--"
-inlineToConTeXt Ellipses = return "\\ldots{}"
inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str
inlineToConTeXt (Math InlineMath str) =
return $ char '$' <> text str <> char '$'
@@ -271,23 +271,69 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str
inlineToConTeXt (RawInline _ _) = return empty
inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr
inlineToConTeXt Space = return space
-inlineToConTeXt (Link [Code _ str] (src, tit)) = -- since ConTeXt has its own
- inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links...
-inlineToConTeXt (Link txt (src, _)) = do
+-- autolink
+inlineToConTeXt (Link [Code _ str] (src, tit)) = inlineToConTeXt (Link
+ [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"]
+ (src, tit))
+-- Handle HTML-like internal document references to sections
+inlineToConTeXt (Link txt (('#' : ref), _)) = do
+ opts <- gets stOptions
+ label <- inlineListToConTeXt txt
+ return $ text "\\in"
+ <> braces (if writerNumberSections opts
+ then label <+> text "(\\S"
+ else label) -- prefix
+ <> braces (if writerNumberSections opts
+ then text ")"
+ else empty) -- suffix
+ <> brackets (text ref)
+
+inlineToConTeXt (Link txt (src, _)) = do
st <- get
let next = stNextRef st
put $ st {stNextRef = next + 1}
- let ref = show next
- label <- inlineListToConTeXt txt
- return $ "\\useURL" <> brackets (text ref) <> brackets (text src) <>
- brackets empty <> brackets label <>
- "\\from" <> brackets (text ref)
+ let ref = "url" ++ show next
+ label <- inlineListToConTeXt txt
+ return $ "\\useURL"
+ <> brackets (text ref)
+ <> brackets (text $ escapeStringUsing [('#',"\\#")] src)
+ <> brackets empty
+ <> brackets label
+ <> "\\from"
+ <> brackets (text ref)
inlineToConTeXt (Image _ (src, _)) = do
- let src' = if isAbsoluteURI src
+ let src' = if isURI src
then src
else unEscapeString src
return $ braces $ "\\externalfigure" <> brackets (text src')
inlineToConTeXt (Note contents) = do
contents' <- blockListToConTeXt contents
- return $ text "\\footnote{" <>
- nest 2 contents' <> char '}'
+ let codeBlock x@(CodeBlock _ _) = [x]
+ codeBlock _ = []
+ let codeBlocks = queryWith codeBlock contents
+ return $ if null codeBlocks
+ then text "\\footnote{" <> nest 2 contents' <> char '}'
+ else text "\\startbuffer " <> nest 2 contents' <>
+ text "\\stopbuffer\\footnote{\\getbuffer}"
+
+-- | Craft the section header, inserting the secton reference, if supplied.
+sectionHeader :: [Char]
+ -> Int
+ -> [Inline]
+ -> State WriterState Doc
+sectionHeader ident hdrLevel lst = do
+ contents <- inlineListToConTeXt lst
+ st <- get
+ let opts = stOptions st
+ let level' = if writerChapters opts then hdrLevel - 1 else hdrLevel
+ return $ if level' >= 1 && level' <= 5
+ then char '\\'
+ <> text (concat (replicate (level' - 1) "sub"))
+ <> text "section"
+ <> (if (not . null) ident then brackets (text ident) else empty)
+ <> braces contents
+ <> blankline
+ else if level' == 0
+ then "\\chapter{" <> contents <> "}"
+ else contents <> blankline
+
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 29c042cf9..1bcf99dcf 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -35,8 +35,11 @@ import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Readers.TeXMath
import Data.List ( isPrefixOf, intercalate, isSuffixOf )
import Data.Char ( toLower )
-import Text.Pandoc.Highlighting (languages, languagesByExtension)
+import Text.Pandoc.Highlighting ( languages, languagesByExtension )
import Text.Pandoc.Pretty
+import Text.TeXMath
+import qualified Text.XML.Light as Xml
+import Data.Generics (everywhere, mkT)
-- | Convert list of authors to a docbook <author> section
authorToDocbook :: WriterOptions -> [Inline] -> Doc
@@ -69,34 +72,39 @@ writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
then Just $ writerColumns opts
else Nothing
render' = render colwidth
- opts' = if "</book>" `isSuffixOf`
+ opts' = if "/book>" `isSuffixOf`
(removeTrailingSpace $ writerTemplate opts)
then opts{ writerChapters = True }
else opts
- main = render' $ vcat (map (elementToDocbook opts') elements)
+ startLvl = if writerChapters opts' then 0 else 1
+ main = render' $ vcat (map (elementToDocbook opts' startLvl) elements)
context = writerVariables opts ++
[ ("body", main)
, ("title", render' title)
, ("date", render' date) ] ++
- [ ("author", render' a) | a <- authors ]
+ [ ("author", render' a) | a <- authors ] ++
+ [ ("mathml", "yes") | case writerHTMLMathMethod opts of
+ MathML _ -> True
+ _ -> False ]
in if writerStandalone opts
then renderTemplate context $ writerTemplate opts
else main
-- | Convert an Element to Docbook.
-elementToDocbook :: WriterOptions -> Element -> Doc
-elementToDocbook opts (Blk block) = blockToDocbook opts block
-elementToDocbook opts (Sec _ _num id' title elements) =
+elementToDocbook :: WriterOptions -> Int -> Element -> Doc
+elementToDocbook opts _ (Blk block) = blockToDocbook opts block
+elementToDocbook opts lvl (Sec _ _num id' title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]
else elements
- tag = if writerChapters opts
- then "chapter"
- else "section"
+ tag = case lvl of
+ n | n == 0 -> "chapter"
+ | n >= 1 && n <= 5 -> "sect" ++ show n
+ | otherwise -> "simplesect"
in inTags True tag [("id",id')] $
inTagsSimple "title" (inlinesToDocbook opts title) $$
- vcat (map (elementToDocbook opts{ writerChapters = False }) elements')
+ vcat (map (elementToDocbook opts (lvl + 1)) elements')
-- | Convert a list of Pandoc blocks to Docbook.
blocksToDocbook :: WriterOptions -> [Block] -> Doc
@@ -248,14 +256,27 @@ inlineToDocbook opts (Quoted _ lst) =
inTagsSimple "quote" $ inlinesToDocbook opts lst
inlineToDocbook opts (Cite _ lst) =
inlinesToDocbook opts lst
-inlineToDocbook _ Apostrophe = char '\''
-inlineToDocbook _ Ellipses = text "…"
-inlineToDocbook _ EmDash = text "—"
-inlineToDocbook _ EnDash = text "–"
inlineToDocbook _ (Code _ str) =
inTagsSimple "literal" $ text (escapeStringForXML str)
-inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
-inlineToDocbook _ (RawInline _ _) = empty
+inlineToDocbook opts (Math t str)
+ | isMathML (writerHTMLMathMethod opts) =
+ case texMathToMathML dt str of
+ Right r -> inTagsSimple tagtype
+ $ text $ Xml.ppcElement conf
+ $ fixNS
+ $ removeAttr r
+ Left _ -> inlinesToDocbook opts
+ $ readTeXMath str
+ | otherwise = inlinesToDocbook opts $ readTeXMath str
+ where (dt, tagtype) = case t of
+ InlineMath -> (DisplayInline,"inlineequation")
+ DisplayMath -> (DisplayBlock,"informalequation")
+ conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP
+ removeAttr e = e{ Xml.elAttribs = [] }
+ fixNS' qname = qname{ Xml.qPrefix = Just "mml" }
+ fixNS = everywhere (mkT fixNS')
+inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x
+ | otherwise = empty
inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty
inlineToDocbook _ Space = space
inlineToDocbook opts (Link txt (src, _)) =
@@ -280,3 +301,7 @@ inlineToDocbook _ (Image _ (src, tit)) =
titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
inlineToDocbook opts (Note contents) =
inTagsIndented "footnote" $ blocksToDocbook opts contents
+
+isMathML :: HTMLMathMethod -> Bool
+isMathML (MathML _) = True
+isMathML _ = False
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
new file mode 100644
index 000000000..22278be7e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -0,0 +1,666 @@
+{-
+Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Docx
+ Copyright : Copyright (C) 2012 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to docx.
+-}
+module Text.Pandoc.Writers.Docx ( writeDocx ) where
+import Data.List ( intercalate )
+import System.FilePath ( (</>) )
+import qualified Data.ByteString.Lazy as B
+import qualified Data.Map as M
+import Data.ByteString.Lazy.UTF8 ( fromString, toString )
+import Text.Pandoc.UTF8 as UTF8
+import System.IO ( stderr )
+import Codec.Archive.Zip
+import Data.Time.Clock.POSIX
+import Paths_pandoc ( getDataFileName )
+import Text.Pandoc.Definition
+import Text.Pandoc.Generic
+import System.Directory
+import Text.Pandoc.ImageSize
+import Text.Pandoc.Shared hiding (Element)
+import Text.Pandoc.Readers.TeXMath
+import Text.Pandoc.Highlighting ( highlight )
+import Text.Highlighting.Kate.Types ()
+import Text.XML.Light
+import Text.TeXMath
+import Control.Monad.State
+import Text.Highlighting.Kate
+
+data WriterState = WriterState{
+ stTextProperties :: [Element]
+ , stParaProperties :: [Element]
+ , stFootnotes :: [Element]
+ , stSectionIds :: [String]
+ , stExternalLinks :: M.Map String String
+ , stImages :: M.Map FilePath (String, B.ByteString)
+ , stListLevel :: Int
+ , stListMarker :: ListMarker
+ , stNumStyles :: M.Map ListMarker Int
+ , stLists :: [ListMarker]
+ }
+
+data ListMarker = NoMarker
+ | BulletMarker
+ | NumberMarker ListNumberStyle ListNumberDelim Int
+ deriving (Show, Read, Eq, Ord)
+
+defaultWriterState :: WriterState
+defaultWriterState = WriterState{
+ stTextProperties = []
+ , stParaProperties = []
+ , stFootnotes = []
+ , stSectionIds = []
+ , stExternalLinks = M.empty
+ , stImages = M.empty
+ , stListLevel = -1
+ , stListMarker = NoMarker
+ , stNumStyles = M.fromList [(NoMarker, 0)]
+ , stLists = [NoMarker]
+ }
+
+type WS a = StateT WriterState IO a
+
+showTopElement' :: Element -> String
+showTopElement' x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++ showElement x
+
+mknode :: Node t => String -> [(String,String)] -> t -> Element
+mknode s attrs =
+ add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s)
+
+-- | Produce an Docx file from a Pandoc document.
+writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx
+ -> WriterOptions -- ^ Writer options
+ -> Pandoc -- ^ Document to convert
+ -> IO B.ByteString
+writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths date) _) = do
+ let datadir = writerUserDataDir opts
+ refArchive <- liftM toArchive $
+ case mbRefDocx of
+ Just f -> B.readFile f
+ Nothing -> do
+ let defaultDocx = getDataFileName "reference.docx" >>= B.readFile
+ case datadir of
+ Nothing -> defaultDocx
+ Just d -> do
+ exists <- doesFileExist (d </> "reference.docx")
+ if exists
+ then B.readFile (d </> "reference.docx")
+ else defaultDocx
+
+ (newContents, st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc)
+ defaultWriterState
+ epochtime <- floor `fmap` getPOSIXTime
+ let imgs = M.elems $ stImages st
+ let imgPath ident img = "media/" ++ ident ++
+ case imageType img of
+ Just Png -> ".png"
+ Just Jpeg -> ".jpeg"
+ Just Gif -> ".gif"
+ Nothing -> ""
+ let toImgRel (ident,img) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",imgPath ident img)] ()
+ let newrels = map toImgRel imgs
+ let relpath = "word/_rels/document.xml.rels"
+ let reldoc = case findEntryByPath relpath refArchive >>=
+ parseXMLDoc . toString . fromEntry of
+ Just d -> d
+ Nothing -> error $ relpath ++ "missing in reference docx"
+ let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels }
+ -- create entries for images
+ let toImageEntry (ident,img) = toEntry ("word/" ++ imgPath ident img)
+ epochtime img
+ let imageEntries = map toImageEntry imgs
+ -- NOW get list of external links and images from this, and do what's needed
+ let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] ()
+ let newrels' = map toLinkRel $ M.toList $ stExternalLinks st
+ let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' }
+ let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc''
+ let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents
+ -- styles
+ let newstyles = styleToOpenXml $ writerHighlightStyle opts
+ let stylepath = "word/styles.xml"
+ let styledoc = case findEntryByPath stylepath refArchive >>=
+ parseXMLDoc . toString . fromEntry of
+ Just d -> d
+ Nothing -> error $ stylepath ++ "missing in reference docx"
+ let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles }
+ let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc'
+ -- construct word/numbering.xml
+ let numpath = "word/numbering.xml"
+ let numEntry = toEntry numpath epochtime $ fromString $ showTopElement'
+ $ mkNumbering (stNumStyles st) (stLists st)
+ let docPropsPath = "docProps/core.xml"
+ let docProps = mknode "cp:coreProperties"
+ [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties")
+ ,("xmlns:dc","http://purl.org/dc/elements/1.1/")
+ ,("xmlns:dcterms","http://purl.org/dc/terms/")
+ ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/")
+ ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")]
+ $ mknode "dc:title" [] (stringify tit)
+ : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")]
+ (maybe "" id $ normalizeDate $ stringify date)
+ : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here
+ : map (mknode "dc:creator" [] . stringify) auths
+ let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps
+ let relsPath = "_rels/.rels"
+ rels <- case findEntryByPath relsPath refArchive of
+ Just e -> return $ toString $ fromEntry e
+ Nothing -> err 57 "could not find .rels/_rels in reference docx"
+ -- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word
+ let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
+ "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"
+ rels
+ let relsEntry = toEntry relsPath epochtime $ fromString rels'
+ let archive = foldr addEntryToArchive refArchive $
+ relsEntry : contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries
+ return $ fromArchive archive
+
+styleToOpenXml :: Style -> [Element]
+styleToOpenXml style = parStyle : map toStyle alltoktypes
+ where alltoktypes = enumFromTo KeywordTok NormalTok
+ toStyle toktype = mknode "w:style" [("w:type","character"),
+ ("w:customStyle","1"),("w:styleId",show toktype)]
+ [ mknode "w:name" [("w:val",show toktype)] ()
+ , mknode "w:basedOn" [("w:val","VerbatimChar")] ()
+ , mknode "w:rPr" [] $
+ [ mknode "w:color" [("w:val",tokCol toktype)] ()
+ | tokCol toktype /= "auto" ] ++
+ [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] ()
+ | tokBg toktype /= "auto" ] ++
+ [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++
+ [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++
+ [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ]
+ ]
+ tokStyles = tokenStyles style
+ tokFeature f toktype = maybe False f $ lookup toktype tokStyles
+ tokCol toktype = maybe "auto" (drop 1 . fromColor)
+ $ (tokenColor =<< lookup toktype tokStyles)
+ `mplus` defaultColor style
+ tokBg toktype = maybe "auto" (drop 1 . fromColor)
+ $ (tokenBackground =<< lookup toktype tokStyles)
+ `mplus` backgroundColor style
+ parStyle = mknode "w:style" [("w:type","paragraph"),
+ ("w:customStyle","1"),("w:styleId","SourceCode")]
+ [ mknode "w:name" [("w:val","Source Code")] ()
+ , mknode "w:basedOn" [("w:val","Normal")] ()
+ , mknode "w:link" [("w:val","VerbatimChar")] ()
+ , mknode "w:pPr" []
+ $ mknode "w:wordWrap" [("w:val","off")] ()
+ : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()])
+ $ backgroundColor style )
+ ]
+
+mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> Element
+mkNumbering markers lists =
+ mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")]
+ $ map mkAbstractNum (M.toList markers)
+ ++ zipWith (mkNum markers) lists [1..(length lists)]
+
+mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element
+mkNum markers marker numid =
+ mknode "w:num" [("w:numId",show numid)]
+ $ mknode "w:abstractNumId" [("w:val",show absnumid)] ()
+ : case marker of
+ NoMarker -> []
+ BulletMarker -> []
+ NumberMarker _ _ start ->
+ map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))]
+ $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6]
+ where absnumid = maybe 0 id $ M.lookup marker markers
+
+mkAbstractNum :: (ListMarker,Int) -> Element
+mkAbstractNum (marker,numid) =
+ mknode "w:abstractNum" [("w:abstractNumId",show numid)]
+ $ mknode "w:multiLevelType" [("w:val","multilevel")] ()
+ : map (mkLvl marker) [0..6]
+
+mkLvl :: ListMarker -> Int -> Element
+mkLvl marker lvl =
+ mknode "w:lvl" [("w:ilvl",show lvl)] $
+ [ mknode "w:start" [("w:val",start)] ()
+ | marker /= NoMarker && marker /= BulletMarker ] ++
+ [ mknode "w:numFmt" [("w:val",fmt)] ()
+ , mknode "w:lvlText" [("w:val",lvltxt)] ()
+ , mknode "w:lvlJc" [("w:val","left")] ()
+ , mknode "w:pPr" []
+ [ mknode "w:tabs" []
+ $ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] ()
+ , mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] ()
+ ]
+ ]
+ where (fmt, lvltxt, start) =
+ case marker of
+ NoMarker -> ("bullet"," ","1")
+ BulletMarker -> ("bullet",bulletFor lvl,"1")
+ NumberMarker st de n -> (styleFor st lvl
+ ,patternFor de ("%" ++ show (lvl + 1))
+ ,show n)
+ step = 720
+ hang = 480
+ bulletFor 0 = "\8226"
+ bulletFor 1 = "\9702"
+ bulletFor 2 = "\8227"
+ bulletFor 3 = "\8259"
+ bulletFor 4 = "\8226"
+ bulletFor 5 = "\9702"
+ bulletFor _ = "\8227"
+ styleFor UpperAlpha _ = "upperLetter"
+ styleFor LowerAlpha _ = "lowerLetter"
+ styleFor UpperRoman _ = "upperRoman"
+ styleFor LowerRoman _ = "lowerRoman"
+ styleFor Decimal _ = "decimal"
+ styleFor DefaultStyle 1 = "decimal"
+ styleFor DefaultStyle 2 = "lowerLetter"
+ styleFor DefaultStyle 3 = "lowerRoman"
+ styleFor DefaultStyle 4 = "decimal"
+ styleFor DefaultStyle 5 = "lowerLetter"
+ styleFor DefaultStyle 6 = "lowerRoman"
+ styleFor _ _ = "decimal"
+ patternFor OneParen s = s ++ ")"
+ patternFor TwoParens s = "(" ++ s ++ ")"
+ patternFor _ s = s ++ "."
+
+-- | Convert Pandoc document to string in OpenXML format.
+writeOpenXML :: WriterOptions -> Pandoc -> WS Element
+writeOpenXML opts (Pandoc (Meta tit auths dat) blocks) = do
+ title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)]
+ authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts
+ [Para (intercalate [LineBreak] auths) | not (null auths)]
+ date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)]
+ let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs
+ convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs
+ convertSpace xs = xs
+ let blocks' = bottomUp convertSpace $ blocks
+ doc <- blocksToOpenXML opts blocks'
+ notes' <- reverse `fmap` gets stFootnotes
+ let notes = case notes' of
+ [] -> []
+ ns -> [mknode "w:footnotes" [] ns]
+ let meta = title ++ authors ++ date
+ return $ mknode "w:document"
+ [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
+ ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math")
+ ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships")
+ ,("xmlns:o","urn:schemas-microsoft-com:office:office")
+ ,("xmlns:v","urn:schemas-microsoft-com:vml")
+ ,("xmlns:w10","urn:schemas-microsoft-com:office:word")
+ ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main")
+ ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture")
+ ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")]
+ $ mknode "w:body" [] (meta ++ doc ++ notes)
+
+-- | Convert a list of Pandoc blocks to OpenXML.
+blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element]
+blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls
+
+pStyle :: String -> Element
+pStyle sty = mknode "w:pStyle" [("w:val",sty)] ()
+
+rStyle :: String -> Element
+rStyle sty = mknode "w:rStyle" [("w:val",sty)] ()
+
+-- | Convert a Pandoc block element to OpenXML.
+blockToOpenXML :: WriterOptions -> Block -> WS [Element]
+blockToOpenXML _ Null = return []
+blockToOpenXML opts (Header lev lst) = do
+ contents <- withParaProp (pStyle $ "Heading" ++ show lev) $
+ blockToOpenXML opts (Para lst)
+ usedIdents <- gets stSectionIds
+ let ident = uniqueIdent lst usedIdents
+ modify $ \s -> s{ stSectionIds = ident : stSectionIds s }
+ let bookmarkStart = mknode "w:bookmarkStart" [("w:id",ident)
+ ,("w:name",ident)] ()
+ let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id",ident)] ()
+ return $ [bookmarkStart] ++ contents ++ [bookmarkEnd]
+blockToOpenXML opts (Plain lst) = blockToOpenXML opts (Para lst)
+blockToOpenXML opts (Para x@[Image alt _]) = do
+ paraProps <- getParaProps
+ contents <- inlinesToOpenXML opts x
+ captionNode <- withParaProp (pStyle "ImageCaption")
+ $ blockToOpenXML opts (Para alt)
+ return $ mknode "w:p" [] (paraProps ++ contents) : captionNode
+blockToOpenXML opts (Para lst) = do
+ paraProps <- getParaProps
+ contents <- inlinesToOpenXML opts lst
+ return [mknode "w:p" [] (paraProps ++ contents)]
+blockToOpenXML _ (RawBlock format str)
+ | format == "openxml" = return [ x | Elem x <- parseXML str ]
+ | otherwise = return []
+blockToOpenXML opts (BlockQuote blocks) =
+ withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks
+blockToOpenXML opts (CodeBlock attrs str) =
+ withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str]
+blockToOpenXML _ HorizontalRule = return [
+ mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" []
+ $ mknode "v:rect" [("style","width:0;height:1.5pt"),
+ ("o:hralign","center"),
+ ("o:hrstd","t"),("o:hr","t")] () ]
+blockToOpenXML opts (Table caption aligns widths headers rows) = do
+ let captionStr = stringify caption
+ caption' <- if null caption
+ then return []
+ else withParaProp (pStyle "TableCaption")
+ $ blockToOpenXML opts (Para caption)
+ let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] ()
+ let cellToOpenXML (al, cell) = withParaProp (alignmentFor al)
+ $ blocksToOpenXML opts cell
+ headers' <- mapM cellToOpenXML $ zip aligns headers
+ rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells)
+ $ rows
+ let borderProps = mknode "w:tcPr" []
+ [ mknode "w:tcBorders" []
+ $ mknode "w:bottom" [("w:val","single")] ()
+ , mknode "w:vAlign" [("w:val","bottom")] () ]
+ let mkcell border contents = mknode "w:tc" []
+ $ [ borderProps | border ] ++
+ if null contents
+ then [mknode "w:p" [] ()]
+ else contents
+ let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells
+ let textwidth = 7920 -- 5.5 in in twips, 1/20 pt
+ let mkgridcol w = mknode "w:gridCol"
+ [("w:w", show $ (floor (textwidth * w) :: Integer))] ()
+ return $
+ [ mknode "w:tbl" []
+ ( mknode "w:tblPr" []
+ [ mknode "w:tblCaption" [("w:val", captionStr)] ()
+ | not (null caption) ]
+ : mknode "w:tblGrid" []
+ (if all (==0) widths
+ then []
+ else map mkgridcol widths)
+ : [ mkrow True headers' | not (all null headers) ] ++
+ map (mkrow False) rows'
+ )
+ ] ++ caption'
+blockToOpenXML opts (BulletList lst) = do
+ let marker = BulletMarker
+ addList marker
+ asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
+blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do
+ let marker = NumberMarker numstyle numdelim start
+ addList marker
+ asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst
+blockToOpenXML opts (DefinitionList items) =
+ concat `fmap` mapM (definitionListItemToOpenXML opts) items
+
+definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element]
+definitionListItemToOpenXML opts (term,defs) = do
+ term' <- withParaProp (pStyle "DefinitionTerm")
+ $ blockToOpenXML opts (Para term)
+ defs' <- withParaProp (pStyle "Definition")
+ $ concat `fmap` mapM (blocksToOpenXML opts) defs
+ return $ term' ++ defs'
+
+getNumId :: WS Int
+getNumId = length `fmap` gets stLists
+
+addList :: ListMarker -> WS ()
+addList marker = do
+ lists <- gets stLists
+ modify $ \st -> st{ stLists = lists ++ [marker] }
+ numStyles <- gets stNumStyles
+ case M.lookup marker numStyles of
+ Just _ -> return ()
+ Nothing -> modify $ \st ->
+ st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles }
+
+listItemToOpenXML :: WriterOptions -> ListMarker -> [Block] -> WS [Element]
+listItemToOpenXML _ _ [] = return []
+listItemToOpenXML opts marker (first:rest) = do
+ first' <- withMarker marker $ blockToOpenXML opts first
+ rest' <- withMarker NoMarker $ blocksToOpenXML opts rest
+ return $ first' ++ rest'
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+-- | Convert a list of inline elements to OpenXML.
+inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element]
+inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst
+
+withMarker :: ListMarker -> WS a -> WS a
+withMarker m p = do
+ origMarker <- gets stListMarker
+ modify $ \st -> st{ stListMarker = m }
+ result <- p
+ modify $ \st -> st{ stListMarker = origMarker }
+ return result
+
+asList :: WS a -> WS a
+asList p = do
+ origListLevel <- gets stListLevel
+ modify $ \st -> st{ stListLevel = stListLevel st + 1 }
+ result <- p
+ modify $ \st -> st{ stListLevel = origListLevel }
+ return result
+
+getTextProps :: WS [Element]
+getTextProps = do
+ props <- gets stTextProperties
+ return $ if null props
+ then []
+ else [mknode "w:rPr" [] $ props]
+
+pushTextProp :: Element -> WS ()
+pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s }
+
+popTextProp :: WS ()
+popTextProp = modify $ \s -> s{ stTextProperties = drop 1 $ stTextProperties s }
+
+withTextProp :: Element -> WS a -> WS a
+withTextProp d p = do
+ pushTextProp d
+ res <- p
+ popTextProp
+ return res
+
+getParaProps :: WS [Element]
+getParaProps = do
+ props <- gets stParaProperties
+ listLevel <- gets stListLevel
+ numid <- getNumId
+ let listPr = if listLevel >= 0
+ then [ mknode "w:numPr" []
+ [ mknode "w:numId" [("w:val",show numid)] ()
+ , mknode "w:ilvl" [("w:val",show listLevel)] () ]
+ ]
+ else []
+ return $ case props ++ listPr of
+ [] -> []
+ ps -> [mknode "w:pPr" [] ps]
+
+pushParaProp :: Element -> WS ()
+pushParaProp d = modify $ \s -> s{ stParaProperties = d : stParaProperties s }
+
+popParaProp :: WS ()
+popParaProp = modify $ \s -> s{ stParaProperties = drop 1 $ stParaProperties s }
+
+withParaProp :: Element -> WS a -> WS a
+withParaProp d p = do
+ pushParaProp d
+ res <- p
+ popParaProp
+ return res
+
+formattedString :: String -> WS [Element]
+formattedString str = do
+ props <- getTextProps
+ return [ mknode "w:r" [] $
+ props ++
+ [ mknode "w:t" [("xml:space","preserve")] str ] ]
+
+-- | Convert an inline element to OpenXML.
+inlineToOpenXML :: WriterOptions -> Inline -> WS [Element]
+inlineToOpenXML _ (Str str) = formattedString str
+inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ")
+inlineToOpenXML opts (Strong lst) =
+ withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Emph lst) =
+ withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Subscript lst) =
+ withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Superscript lst) =
+ withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (SmallCaps lst) =
+ withTextProp (mknode "w:smallCaps" [] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML opts (Strikeout lst) =
+ withTextProp (mknode "w:strike" [] ())
+ $ inlinesToOpenXML opts lst
+inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ]
+inlineToOpenXML _ (RawInline f str)
+ | f == "openxml" = return [ x | Elem x <- parseXML str ]
+ | otherwise = return []
+inlineToOpenXML opts (Quoted quoteType lst) =
+ inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close]
+ where (open, close) = case quoteType of
+ SingleQuote -> ("\x2018", "\x2019")
+ DoubleQuote -> ("\x201C", "\x201D")
+inlineToOpenXML opts (Math InlineMath str) =
+ case texMathToOMML DisplayInline str of
+ Right r -> return [r]
+ Left _ -> inlinesToOpenXML opts (readTeXMath str)
+inlineToOpenXML opts (Math DisplayMath str) =
+ case texMathToOMML DisplayBlock str of
+ Right r -> return [br, r, br]
+ Left _ -> do
+ fallback <- inlinesToOpenXML opts (readTeXMath str)
+ return $ [br] ++ fallback ++ [br]
+ where br = mknode "w:br" [] ()
+inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst
+inlineToOpenXML _ (Code attrs str) =
+ withTextProp (rStyle "VerbatimChar")
+ $ case highlight formatOpenXML attrs str of
+ Nothing -> intercalate [mknode "w:br" [] ()]
+ `fmap` (mapM formattedString $ lines str)
+ Just h -> return h
+ where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] .
+ map (map toHlTok)
+ toHlTok (toktype,tok) = mknode "w:r" []
+ [ mknode "w:rPr" []
+ [ rStyle $ show toktype ]
+ , mknode "w:t" [("xml:space","preserve")] tok ]
+inlineToOpenXML opts (Note bs) = do
+ notes <- gets stFootnotes
+ let notenum = length notes + 1
+ let notemarker = mknode "w:r" []
+ [ mknode "w:rPr" [] (rStyle "FootnoteReference")
+ , mknode "w:footnoteRef" [] () ]
+ let notemarkerXml = RawInline "openxml" $ ppElement notemarker
+ let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs
+ insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs
+ insertNoteRef xs = Para [notemarkerXml] : xs
+ oldListLevel <- gets stListLevel
+ oldParaProperties <- gets stParaProperties
+ oldTextProperties <- gets stTextProperties
+ modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] }
+ contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts
+ $ insertNoteRef bs
+ modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties,
+ stTextProperties = oldTextProperties }
+ let newnote = mknode "w:footnote" [("w:id",show notenum)] $ contents
+ modify $ \s -> s{ stFootnotes = newnote : notes }
+ return [ mknode "w:r" []
+ [ mknode "w:rPr" [] (rStyle "FootnoteReference")
+ , mknode "w:footnoteReference" [("w:id", show notenum)] () ] ]
+-- internal link:
+inlineToOpenXML opts (Link txt ('#':xs,_)) = do
+ contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt
+ return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ]
+-- external link:
+inlineToOpenXML opts (Link txt (src,_)) = do
+ contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt
+ extlinks <- gets stExternalLinks
+ ind <- case M.lookup src extlinks of
+ Just i -> return i
+ Nothing -> do
+ let i = "link" ++ show (M.size extlinks)
+ modify $ \st -> st{ stExternalLinks =
+ M.insert src i extlinks }
+ return i
+ return [ mknode "w:hyperlink" [("r:id",ind)] contents ]
+inlineToOpenXML opts (Image alt (src, tit)) = do
+ exists <- liftIO $ doesFileExist src
+ if exists
+ then do
+ imgs <- gets stImages
+ (ident,size) <- case M.lookup src imgs of
+ Just (i,img) -> return (i, imageSize img)
+ Nothing -> do
+ img <- liftIO $ B.readFile src
+ let ident' = "image" ++ show (M.size imgs + 1)
+ let size' = imageSize img
+ modify $ \st -> st{
+ stImages = M.insert src (ident',img) $ stImages st }
+ return (ident',size')
+ let (xpt,ypt) = maybe (120,120) sizeInPoints size
+ -- 12700 emu = 1 pt
+ let (xemu,yemu) = (xpt * 12700, ypt * 12700)
+ let cNvPicPr = mknode "pic:cNvPicPr" [] $
+ mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] ()
+ let nvPicPr = mknode "pic:nvPicPr" []
+ [ mknode "pic:cNvPr"
+ [("descr",src),("id","0"),("name","Picture")] ()
+ , cNvPicPr ]
+ let blipFill = mknode "pic:blipFill" []
+ [ mknode "a:blip" [("r:embed",ident)] ()
+ , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ]
+ let xfrm = mknode "a:xfrm" []
+ [ mknode "a:off" [("x","0"),("y","0")] ()
+ , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ]
+ let prstGeom = mknode "a:prstGeom" [("prst","rect")] $
+ mknode "a:avLst" [] ()
+ let ln = mknode "a:ln" [("w","9525")]
+ [ mknode "a:noFill" [] ()
+ , mknode "a:headEnd" [] ()
+ , mknode "a:tailEnd" [] () ]
+ let spPr = mknode "pic:spPr" [("bwMode","auto")]
+ [xfrm, prstGeom, mknode "a:noFill" [] (), ln]
+ let graphic = mknode "a:graphic" [] $
+ mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")]
+ [ mknode "pic:pic" []
+ [ nvPicPr
+ , blipFill
+ , spPr ] ]
+ return [ mknode "w:r" [] $
+ mknode "w:drawing" [] $
+ mknode "wp:inline" []
+ [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] ()
+ , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] ()
+ , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] ()
+ , graphic ] ]
+ else do
+ liftIO $ UTF8.hPutStrLn stderr $
+ "Could not find image `" ++ src ++ "', skipping..."
+ inlinesToOpenXML opts alt
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 9fc393fed..67048348e 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -32,30 +32,31 @@ import Data.IORef
import Data.Maybe ( fromMaybe, isNothing )
import Data.List ( findIndices, isPrefixOf )
import System.Environment ( getEnv )
-import System.FilePath ( (</>), takeBaseName, takeExtension )
+import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Codec.Archive.Zip
-import System.Time
+import Data.Time.Clock.POSIX
import Text.Pandoc.Shared hiding ( Element )
import Text.Pandoc.Definition
import Text.Pandoc.Generic
-import Control.Monad (liftM)
+import Control.Monad.State
import Text.XML.Light hiding (ppTopElement)
import Text.Pandoc.UUID
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.Markdown ( writePlain )
import Data.Char ( toLower )
-import System.Directory ( copyFile )
import Network.URI ( unEscapeString )
+import Text.Pandoc.MIME (getMimeType)
-- | Produce an EPUB file from a Pandoc document.
writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line
+ -> [FilePath] -- ^ Paths to fonts to embed
-> WriterOptions -- ^ Writer options
-> Pandoc -- ^ Document to convert
-> IO B.ByteString
-writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
- (TOD epochtime _) <- getClockTime
+writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do
+ epochtime <- floor `fmap` getPOSIXTime
let mkEntry path content = toEntry path epochtime content
let opts' = opts{ writerEmailObfuscation = NoObfuscation
, writerStandalone = True
@@ -64,17 +65,24 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
let vars = writerVariables opts'
let mbCoverImage = lookup "epub-cover-image" vars
+ titlePageTemplate <- readDataFile (writerUserDataDir opts)
+ $ "templates" </> "epub-titlepage" <.> "html"
+
+ coverImageTemplate <- readDataFile (writerUserDataDir opts)
+ $ "templates" </> "epub-coverimage" <.> "html"
+
+ pageTemplate <- readDataFile (writerUserDataDir opts)
+ $ "templates" </> "epub-page" <.> "html"
+
-- cover page
(cpgEntry, cpicEntry) <-
case mbCoverImage of
Nothing -> return ([],[])
Just img -> do
let coverImage = "cover-image" ++ takeExtension img
- copyFile img coverImage
let cpContent = fromString $ writeHtmlString
- opts'{writerTemplate = pageTemplate
- ,writerVariables =
- ("coverimage",coverImage):vars}
+ opts'{writerTemplate = coverImageTemplate,
+ writerVariables = ("coverimage",coverImage):vars}
(Pandoc meta [])
imgContent <- B.readFile img
return ( [mkEntry "cover.xhtml" cpContent]
@@ -82,37 +90,47 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
-- title page
let tpContent = fromString $ writeHtmlString
- opts'{writerTemplate = pageTemplate
- ,writerVariables = ("titlepage","yes"):vars}
+ opts'{writerTemplate = titlePageTemplate}
(Pandoc meta [])
let tpEntry = mkEntry "title_page.xhtml" tpContent
-- handle pictures
picsRef <- newIORef []
- Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM
+ Pandoc _ blocks <- bottomUpM
(transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc
pics <- readIORef picsRef
let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e ->
return e{ eRelativePath = newsrc }
picEntries <- mapM readPicEntry pics
+ -- handle fonts
+ let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
+ fontEntries <- mapM mkFontEntry fonts
+
-- body pages
let isH1 (Header 1 _) = True
isH1 _ = False
- let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks
- let chunks = splitByIndices h1Indices blocks
+ -- internal reference IDs change when we chunk the file,
+ -- so the next two lines fix that:
+ let reftable = correlateRefs blocks
+ let blocks' = replaceRefs reftable blocks
+ let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks'
+ let chunks = splitByIndices h1Indices blocks'
let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys
titleize xs = Pandoc meta xs
- let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
let chapters = map titleize chunks
+ let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate }
let chapterToEntry :: Int -> Pandoc -> Entry
chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $
fromString $ chapToHtml chap
let chapterEntries = zipWith chapterToEntry [1..] chapters
-- contents.opf
- lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang")
- (\_ -> return "en-US")
+ localeLang <- catch (liftM (takeWhile (/='.')) $ getEnv "LANG")
+ (\_ -> return "en-US")
+ let lang = case lookup "lang" (writerVariables opts') of
+ Just x -> x
+ Nothing -> localeLang
uuid <- getRandomUUID
let chapterNode ent = unode "item" !
[("id", takeBaseName $ eRelativePath ent),
@@ -125,17 +143,22 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
("href", eRelativePath ent),
("media-type", fromMaybe "application/octet-stream"
$ imageTypeOf $ eRelativePath ent)] $ ()
+ let fontNode ent = unode "item" !
+ [("id", takeBaseName $ eRelativePath ent),
+ ("href", eRelativePath ent),
+ ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
let plainify t = removeTrailingSpace $
writePlain opts'{ writerStandalone = False } $
Pandoc meta [Plain t]
let plainTitle = plainify $ docTitle meta
let plainAuthors = map plainify $ docAuthors meta
+ let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta
let contentsData = fromString $ ppTopElement $
unode "package" ! [("version","2.0")
,("xmlns","http://www.idpf.org/2007/opf")
,("unique-identifier","BookId")] $
[ metadataElement (writerEPUBMetadata opts')
- uuid lang plainTitle plainAuthors mbCoverImage
+ uuid lang plainTitle plainAuthors plainDate mbCoverImage
, unode "manifest" $
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
,("media-type","application/x-dtbncx+xml")] $ ()
@@ -143,7 +166,8 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
,("media-type","text/css")] $ ()
] ++
map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
- map pictureNode (cpicEntry ++ picEntries)
+ map pictureNode (cpicEntry ++ picEntries) ++
+ map fontNode fontEntries
, unode "spine" ! [("toc","ncx")] $
case mbCoverImage of
Nothing -> []
@@ -197,6 +221,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
,("media-type","application/oebps-package+xml")] $ ()
let containerEntry = mkEntry "META-INF/container.xml" containerData
+ -- com.apple.ibooks.display-options.xml
+ let apple = fromString $ ppTopElement $
+ unode "display_options" $
+ unode "platform" ! [("name","*")] $
+ unode "option" ! [("name","specified-fonts")] $ "true"
+ let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
+
-- stylesheet
stylesheet <- case mbStylesheet of
Just s -> return s
@@ -205,13 +236,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
-- construct archive
let archive = foldr addEntryToArchive emptyArchive
- (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry :
+ (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
contentsEntry : tocEntry :
- (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries) )
+ (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries) )
return $ fromArchive archive
-metadataElement :: String -> UUID -> String -> String -> [String] -> Maybe a -> Element
-metadataElement metadataXML uuid lang title authors mbCoverImage =
+metadataElement :: String -> UUID -> String -> String -> [String] -> String -> Maybe a -> Element
+metadataElement metadataXML uuid lang title authors date mbCoverImage =
let userNodes = parseXML metadataXML
elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:opf","http://www.idpf.org/2007/opf")] $
@@ -227,6 +258,7 @@ metadataElement metadataXML uuid lang title authors mbCoverImage =
[ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
not (elt `contains` "identifier") ] ++
[ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++
+ [ unode "dc:date" date | not (elt `contains` "date") ] ++
[ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
not (isNothing mbCoverImage) ]
in elt{ elContent = elContent elt ++ map Elem newNodes }
@@ -263,20 +295,24 @@ transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
"</ops:switch>"
result = if "<math" `isPrefixOf` mathml then inOps else mathml
return $ RawInline "html" result : xs
-transformInlines _ _ _ (RawInline _ _ : xs) = return $ Str "" : xs
-transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs
transformInlines _ _ _ xs = return xs
-transformBlock :: Block -> Block
-transformBlock (RawBlock _ _) = Null
-transformBlock x = x
-
(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
ppTopElement :: Element -> String
-ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement
+ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
+ -- unEntity removes numeric entities introduced by ppElement
+ -- (kindlegen seems to choke on these).
+ where unEntity [] = ""
+ unEntity ('&':'#':xs) =
+ let (ds,ys) = break (==';') xs
+ rest = drop 1 ys
+ in case reads ('\'':'\\':ds ++ "'") of
+ ((x,_):_) -> x : unEntity rest
+ _ -> '&':'#':unEntity xs
+ unEntity (x:xs) = x : unEntity xs
imageTypeOf :: FilePath -> Maybe String
imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
@@ -288,38 +324,49 @@ imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
"svg" -> Just "image/svg+xml"
_ -> Nothing
-pageTemplate :: String
-pageTemplate = unlines
- [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
- , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">"
- , "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
- , "<head>"
- , "<title>$title$</title>"
- , "$if(coverimage)$"
- , "<style type=\"text/css\">img{ max-width: 100%; }</style>"
- , "$endif$"
- , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />"
- , "</head>"
- , "<body>"
- , "$if(coverimage)$"
- , "<div id=\"cover-image\">"
- , "<img src=\"$coverimage$\" alt=\"$title$\" />"
- , "</div>"
- , "$else$"
- , "$if(titlepage)$"
- , "<h1 class=\"title\">$title$</h1>"
- , "$for(author)$"
- , "<h2 class=\"author\">$author$</h2>"
- , "$endfor$"
- , "$else$"
- , "<h1>$title$</h1>"
- , "$if(toc)$"
- , "$toc$"
- , "$endif$"
- , "$endif$"
- , "$body$"
- , "$endif$"
- , "</body>"
- , "</html>"
- ]
+data IdentState = IdentState{
+ chapterNumber :: Int,
+ runningIdents :: [String],
+ chapterIdents :: [String],
+ identTable :: [(String,String)]
+ } deriving (Read, Show)
+
+-- Go through a block list and construct a table
+-- correlating the automatically constructed references
+-- that would be used in a normal pandoc document with
+-- new URLs to be used in the EPUB. For example, what
+-- was "header-1" might turn into "ch6.xhtml#header".
+correlateRefs :: [Block] -> [(String,String)]
+correlateRefs bs = identTable $ execState (mapM_ go bs)
+ IdentState{ chapterNumber = 0
+ , runningIdents = []
+ , chapterIdents = []
+ , identTable = [] }
+ where go :: Block -> State IdentState ()
+ go (Header n ils) = do
+ when (n == 1) $
+ modify $ \s -> s{ chapterNumber = chapterNumber s + 1
+ , chapterIdents = [] }
+ st <- get
+ let runningid = uniqueIdent ils (runningIdents st)
+ let chapid = if n == 1
+ then Nothing
+ else Just $ uniqueIdent ils (chapterIdents st)
+ modify $ \s -> s{ runningIdents = runningid : runningIdents st
+ , chapterIdents = maybe (chapterIdents st)
+ (: chapterIdents st) chapid
+ , identTable = (runningid, "ch" ++ show (chapterNumber st) ++
+ ".xhtml" ++ maybe "" ('#':) chapid) : identTable st
+ }
+ go _ = return ()
+
+-- Replace internal link references using the table produced
+-- by correlateRefs.
+replaceRefs :: [(String,String)] -> [Block] -> [Block]
+replaceRefs refTable = bottomUp replaceOneRef
+ where replaceOneRef x@(Link lab ('#':xs,tit)) =
+ case lookup xs refTable of
+ Just url -> Link lab (url,tit)
+ Nothing -> x
+ replaceOneRef x = x
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index 573adbf4a..f35b29370 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -30,168 +31,186 @@ Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
import Text.Pandoc.Definition
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.Pandoc.Shared
import Text.Pandoc.Templates
+import Text.Pandoc.Generic
import Text.Pandoc.Readers.TeXMath
-import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss )
-import Text.Pandoc.XML (stripTags, escapeStringForXML)
+import Text.Pandoc.Slides
+import Text.Pandoc.Highlighting ( highlight, styleToCss,
+ formatHtmlInline, formatHtmlBlock )
+import Text.Pandoc.XML (stripTags, escapeStringForXML, fromEntities)
import Network.HTTP ( urlEncode )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, intersperse )
+import Data.String ( fromString )
import Data.Maybe ( catMaybes )
import Control.Monad.State
-import Text.XHtml.Transitional hiding ( stringToHtml, unordList, ordList )
-import qualified Text.XHtml.Transitional as XHtml
+import Text.Blaze
+import qualified Text.Blaze.Html5 as H5
+import qualified Text.Blaze.XHtml1.Transitional as H
+import qualified Text.Blaze.XHtml1.Transitional.Attributes as A
+import Text.Blaze.Renderer.String (renderHtml)
import Text.TeXMath
import Text.XML.Light.Output
import System.FilePath (takeExtension)
+import Data.Monoid (mempty, mconcat)
data WriterState = WriterState
{ stNotes :: [Html] -- ^ List of notes
, stMath :: Bool -- ^ Math is used in document
, stHighlighting :: Bool -- ^ Syntax highlighting is used
, stSecNum :: [Int] -- ^ Number of current section
- } deriving Show
+ }
defaultWriterState :: WriterState
defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = False, stSecNum = []}
-- Helpers to render HTML with the appropriate function.
--- | Modified version of Text.XHtml's stringToHtml.
--- Use unicode characters wherever possible.
-stringToHtml :: WriterOptions -> String -> Html
-stringToHtml opts = if writerAscii opts
- then XHtml.stringToHtml
- else primHtml . escapeStringForXML
+strToHtml :: String -> Html
+strToHtml = preEscapedString . escapeStringForXML
+-- strToHtml = toHtml
-- | Hard linebreak.
nl :: WriterOptions -> Html
nl opts = if writerWrapText opts
- then primHtml "\n"
- else noHtml
+ then preEscapedString "\n"
+ else mempty
-- | Convert Pandoc document to Html string.
writeHtmlString :: WriterOptions -> Pandoc -> String
writeHtmlString opts d =
- let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d)
- defaultWriterState
+ let (tit, auths, authsMeta, date, toc, body', newvars) = evalState (pandocToHtml opts d)
+ defaultWriterState
in if writerStandalone opts
- then inTemplate opts tit auths date toc body' newvars
- else dropWhile (=='\n') $ showHtmlFragment body'
+ then inTemplate opts tit auths authsMeta date toc body' newvars
+ else renderHtml body'
-- | Convert Pandoc document to Html structure.
writeHtml :: WriterOptions -> Pandoc -> Html
writeHtml opts d =
- let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d)
- defaultWriterState
+ let (tit, auths, authsMeta, date, toc, body', newvars) = evalState (pandocToHtml opts d)
+ defaultWriterState
in if writerStandalone opts
- then inTemplate opts tit auths date toc body' newvars
+ then inTemplate opts tit auths authsMeta date toc body' newvars
else body'
-- result is (title, authors, date, toc, body, new variables)
pandocToHtml :: WriterOptions
-> Pandoc
- -> State WriterState (Html, [Html], Html, Maybe Html, Html, [(String,String)])
+ -> State WriterState (Html, [Html], [Html], Html, Maybe Html, Html, [(String,String)])
pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
let standalone = writerStandalone opts
tit <- if standalone
then inlineListToHtml opts title'
- else return noHtml
+ else return mempty
auths <- if standalone
then mapM (inlineListToHtml opts) authors'
else return []
+ authsMeta <- if standalone
+ then mapM (inlineListToHtml opts . prepForMeta) authors'
+ else return []
date <- if standalone
then inlineListToHtml opts date'
- else return noHtml
+ else return mempty
+ let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts
let sects = hierarchicalize $
if writerSlideVariant opts == NoSlides
then blocks
- else case blocks of
- (Header 1 _ : _) -> blocks
- _ ->
- let isL1 (Header 1 _) = True
- isL1 _ = False
- (preBlocks, rest) = break isL1 blocks
- in (RawBlock "html" "<div class=\"slide\">" :
- preBlocks) ++ (RawBlock "html" "</div>" :
- rest)
+ else prepSlides slideLevel blocks
toc <- if writerTableOfContents opts
then tableOfContents opts sects
else return Nothing
- blocks' <- liftM (toHtmlFromList . intersperse (nl opts)) $
- mapM (elementToHtml opts) sects
+ blocks' <- liftM (mconcat . intersperse (nl opts)) $
+ mapM (elementToHtml slideLevel opts) sects
st <- get
let notes = reverse (stNotes st)
- let thebody = blocks' +++ footnoteSection opts notes
+ let thebody = blocks' >> footnoteSection opts notes
let math = if stMath st
then case writerHTMLMathMethod opts of
LaTeXMathML (Just url) ->
- script !
- [src url, thetype "text/javascript"] $ noHtml
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
MathML (Just url) ->
- script !
- [src url, thetype "text/javascript"] $ noHtml
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
MathJax url ->
- script ! [src url, thetype "text/javascript"] $ noHtml
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
JsMath (Just url) ->
- script !
- [src url, thetype "text/javascript"] $ noHtml
+ H.script ! A.src (toValue url)
+ ! A.type_ "text/javascript"
+ $ mempty
_ -> case lookup "mathml-script" (writerVariables opts) of
- Just s ->
- script ! [thetype "text/javascript"] <<
- primHtml ("/*<![CDATA[*/\n" ++ s ++
- "/*]]>*/\n")
- Nothing -> noHtml
- else noHtml
- let newvars = [("highlighting-css", defaultHighlightingCss) |
+ Just s | not (writerHtml5 opts) ->
+ H.script ! A.type_ "text/javascript"
+ $ preEscapedString
+ ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n")
+ | otherwise -> mempty
+ Nothing -> mempty
+ else mempty
+ let newvars = [("highlighting-css",
+ styleToCss $ writerHighlightStyle opts) |
stHighlighting st] ++
- [("math", showHtmlFragment math) | stMath st]
- return (tit, auths, date, toc, thebody, newvars)
+ [("math", renderHtml math) | stMath st]
+ return (tit, auths, authsMeta, date, toc, thebody, newvars)
+
+-- | Prepare author for meta tag, converting notes into
+-- bracketed text and removing links.
+prepForMeta :: [Inline] -> [Inline]
+prepForMeta = bottomUp (concatMap fixInline)
+ where fixInline (Note [Para xs]) = [Str " ["] ++ xs ++ [Str "]"]
+ fixInline (Note [Plain xs]) = [Str " ["] ++ xs ++ [Str "]"]
+ fixInline (Link lab _) = lab
+ fixInline (Image lab _) = lab
+ fixInline x = [x]
inTemplate :: TemplateTarget a
=> WriterOptions
-> Html
-> [Html]
+ -> [Html]
-> Html
-> Maybe Html
-> Html
-> [(String,String)]
-> a
-inTemplate opts tit auths date toc body' newvars =
- let renderedTit = showHtmlFragment tit
- topTitle' = stripTags renderedTit
- authors = map (stripTags . showHtmlFragment) auths
- date' = stripTags $ showHtmlFragment date
+inTemplate opts tit auths authsMeta date toc body' newvars =
+ let title' = renderHtml tit
+ date' = renderHtml date
+ dateMeta = maybe [] (\x -> [("date-meta",x)]) $ normalizeDate date'
variables = writerVariables opts ++ newvars
- context = variables ++
- [ ("body", dropWhile (=='\n') $ showHtmlFragment body')
- , ("pagetitle", topTitle')
- , ("title", dropWhile (=='\n') $ showHtmlFragment tit)
+ context = variables ++ dateMeta ++
+ [ ("body", dropWhile (=='\n') $ renderHtml body')
+ , ("pagetitle", stripTags title')
+ , ("title", title')
, ("date", date')
, ("idprefix", writerIdentifierPrefix opts)
, ("slidy-url", "http://www.w3.org/Talks/Tools/Slidy2")
- , ("s5-url", "ui/default") ] ++
+ , ("s5-url", "s5/default") ] ++
[ ("html5","true") | writerHtml5 opts ] ++
(case toc of
- Just t -> [ ("toc", showHtmlFragment t)]
+ Just t -> [ ("toc", renderHtml t)]
Nothing -> []) ++
- [ ("author", a) | a <- authors ]
+ [ ("author", renderHtml a) | a <- auths ] ++
+ [ ("author-meta", stripTags $ renderHtml a) | a <- authsMeta ]
in renderTemplate context $ writerTemplate opts
-- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix
-prefixedId :: WriterOptions -> String -> HtmlAttr
-prefixedId opts s = identifier $ writerIdentifierPrefix opts ++ s
+prefixedId :: WriterOptions -> String -> Attribute
+prefixedId opts s = A.id $ toValue $ writerIdentifierPrefix opts ++ s
-- | Replacement for Text.XHtml's unordList.
unordList :: WriterOptions -> ([Html] -> Html)
-unordList opts items = ulist << toListItems opts items
+unordList opts items = H.ul $ mconcat $ toListItems opts items
-- | Replacement for Text.XHtml's ordList.
ordList :: WriterOptions -> ([Html] -> Html)
-ordList opts items = olist << toListItems opts items
+ordList opts items = H.ol $ mconcat $ toListItems opts items
-- | Construct table of contents from list of elements.
tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html)
@@ -214,52 +233,66 @@ elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html)
elementToListItem _ (Blk _) = return Nothing
elementToListItem opts (Sec _ num id' headerText subsecs) = do
let sectnum = if writerNumberSections opts
- then (thespan ! [theclass "toc-section-number"] << showSecNum num) +++
- stringToHtml opts" "
- else noHtml
- txt <- liftM (sectnum +++) $ inlineListToHtml opts headerText
+ then (H.span ! A.class_ "toc-section-number" $ toHtml $ showSecNum num) >>
+ preEscapedString " "
+ else mempty
+ txt <- liftM (sectnum >>) $ inlineListToHtml opts headerText
subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes
let subList = if null subHeads
- then noHtml
+ then mempty
else unordList opts subHeads
- return $ Just $ (anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ id')] $ txt) +++ subList
+ return $ Just $ (H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ id')
+ $ toHtml txt) >> subList
-- | Convert an Element to Html.
-elementToHtml :: WriterOptions -> Element -> State WriterState Html
-elementToHtml opts (Blk HorizontalRule) | writerSlideVariant opts /= NoSlides =
- return $ primHtml "</div>" +++ nl opts +++ primHtml "<div class=\"slide\">"
-elementToHtml opts (Blk block) = blockToHtml opts block
-elementToHtml opts (Sec level num id' title' elements) = do
+elementToHtml :: Int -> WriterOptions -> Element -> State WriterState Html
+elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block
+elementToHtml slideLevel opts (Sec level num id' title' elements) = do
+ let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel
modify $ \st -> st{stSecNum = num} -- update section number
- header' <- blockToHtml opts (Header level title')
- innerContents <- mapM (elementToHtml opts) elements
- let header'' = header' ! [prefixedId opts id' |
- not (writerStrictMarkdown opts ||
- writerSectionDivs opts ||
- writerSlideVariant opts == S5Slides)]
- let stuff = header'' : innerContents
- let slide = writerSlideVariant opts /= NoSlides && level == 1
- let stuff' = if slide
- then [thediv ! [theclass "slide"] <<
- (nl opts : intersperse (nl opts) stuff ++ [nl opts])]
- else intersperse (nl opts) stuff
- let inNl x = nl opts : x ++ [nl opts]
- return $ if writerSectionDivs opts
- then if writerHtml5 opts
- then tag "section" ! [prefixedId opts id'] << inNl stuff'
- else thediv ! [prefixedId opts id'] << inNl stuff'
- else toHtmlFromList stuff'
+ -- always use level 1 for slide titles
+ let level' = if slide then 1 else level
+ let titleSlide = slide && level < slideLevel
+ header' <- blockToHtml opts (Header level' title')
+ let isSec (Sec _ _ _ _ _) = True
+ isSec (Blk _) = False
+ innerContents <- mapM (elementToHtml slideLevel opts)
+ $ if titleSlide
+ -- title slides have no content of their own
+ then filter isSec elements
+ else elements
+ let header'' = if (writerStrictMarkdown opts ||
+ writerSectionDivs opts ||
+ writerSlideVariant opts == S5Slides)
+ then header'
+ else header' ! prefixedId opts id'
+ let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts]
+ let classes = ["titleslide" | titleSlide] ++ ["slide" | slide] ++
+ ["level" ++ show level]
+ let secttag = if writerHtml5 opts
+ then H5.section ! A.class_ (toValue $ unwords classes)
+ else H.div ! A.class_ (toValue $ unwords ("section":classes))
+ return $ if titleSlide
+ then mconcat $ (secttag ! prefixedId opts id' $ header'') : innerContents
+ else if writerSectionDivs opts || slide
+ then secttag ! prefixedId opts id' $ inNl $ header'' : innerContents
+ else mconcat $ intersperse (nl opts) $ header'' : innerContents
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
footnoteSection :: WriterOptions -> [Html] -> Html
footnoteSection opts notes =
if null notes
- then noHtml
- else nl opts +++ (thediv ! [theclass "footnotes"]
- $ nl opts +++ hr +++ nl opts +++
- (olist << (notes ++ [nl opts])) +++ nl opts)
-
+ then mempty
+ else nl opts >> (container
+ $ nl opts >> hrtag >> nl opts >>
+ H.ol (mconcat notes >> nl opts) >> nl opts)
+ where container x = if writerHtml5 opts
+ then H5.section ! A.class_ "footnotes" $ x
+ else if writerSlideVariant opts /= NoSlides
+ then H.div ! A.class_ "footnotes slide" $ x
+ else H.div ! A.class_ "footnotes" $ x
+ hrtag = if writerHtml5 opts then H5.hr else H.hr
-- | Parse a mailto link; return Just (name, domain) or Nothing.
parseMailto :: String -> Maybe (String, String)
@@ -272,7 +305,7 @@ parseMailto _ = Nothing
-- | Obfuscate a "mailto:" link.
obfuscateLink :: WriterOptions -> String -> String -> Html
obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
- anchor ! [href s] << txt
+ H.a ! A.href (toValue s) $ toHtml txt
obfuscateLink opts txt s =
let meth = writerEmailObfuscation opts
s' = map toLower s
@@ -287,19 +320,19 @@ obfuscateLink opts txt s =
domain' ++ ")")
in case meth of
ReferenceObfuscation ->
- -- need to use primHtml or &'s are escaped to &amp; in URL
- primHtml $ "<a href=\"" ++ (obfuscateString s')
+ -- need to use preEscapedString or &'s are escaped to &amp; in URL
+ preEscapedString $ "<a href=\"" ++ (obfuscateString s')
++ "\">" ++ (obfuscateString txt) ++ "</a>"
JavascriptObfuscation ->
- (script ! [thetype "text/javascript"] $
- primHtml ("\n<!--\nh='" ++
+ (H.script ! A.type_ "text/javascript" $
+ preEscapedString ("\n<!--\nh='" ++
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name' ++ "';e=n+a+h;\n" ++
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
- linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
- noscript (primHtml $ obfuscateString altText)
+ linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >>
+ H.noscript (preEscapedString $ obfuscateString altText)
_ -> error $ "Unknown obfuscation method: " ++ show meth
- _ -> anchor ! [href s] $ stringToHtml opts txt -- malformed email
+ _ -> H.a ! A.href (toValue s) $ toHtml txt -- malformed email
-- | Obfuscate character as entity.
obfuscateChar :: Char -> String
@@ -310,13 +343,13 @@ obfuscateChar char =
-- | Obfuscate string using entities.
obfuscateString :: String -> String
-obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+obfuscateString = concatMap obfuscateChar . fromEntities
-attrsToHtml :: WriterOptions -> Attr -> [HtmlAttr]
+attrsToHtml :: WriterOptions -> Attr -> [Attribute]
attrsToHtml opts (id',classes',keyvals) =
- [theclass (unwords classes') | not (null classes')] ++
+ [A.class_ (toValue $ unwords classes') | not (null classes')] ++
[prefixedId opts id' | not (null id')] ++
- map (\(x,y) -> strAttr x y) keyvals
+ map (\(x,y) -> customAttribute (fromString x) (toValue y)) keyvals
imageExts :: [String]
imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf",
@@ -331,40 +364,41 @@ treatAsImage fp =
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
-blockToHtml _ Null = return noHtml
+blockToHtml _ Null = return mempty
blockToHtml opts (Plain lst) = inlineListToHtml opts lst
blockToHtml opts (Para [Image txt (s,tit)]) = do
img <- inlineToHtml opts (Image txt (s,tit))
capt <- inlineListToHtml opts txt
return $ if writerHtml5 opts
- then tag "figure" <<
- [nl opts, img, tag "figcaption" << capt, nl opts]
- else thediv ! [theclass "figure"] <<
- [nl opts, img, paragraph ! [theclass "caption"] << capt,
+ then H5.figure $ mconcat
+ [nl opts, img, H5.figcaption capt, nl opts]
+ else H.div ! A.class_ "figure" $ mconcat
+ [nl opts, img, H.p ! A.class_ "caption" $ capt,
nl opts]
blockToHtml opts (Para lst) = do
contents <- inlineListToHtml opts lst
- return $ paragraph contents
-blockToHtml _ (RawBlock "html" str) = return $ primHtml str
-blockToHtml _ (RawBlock _ _) = return noHtml
-blockToHtml _ (HorizontalRule) = return hr
+ return $ H.p contents
+blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str
+blockToHtml _ (RawBlock _ _) = return mempty
+blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
- let classes' = if writerLiterateHaskell opts
- then classes
+ let tolhs = writerLiterateHaskell opts &&
+ any (\c -> map toLower c == "haskell") classes &&
+ any (\c -> map toLower c == "literate") classes
+ classes' = if tolhs
+ then map (\c -> if map toLower c == "haskell"
+ then "literatehaskell"
+ else c) classes
else filter (/= "literate") classes
- case highlightHtml False (id',classes',keyvals) rawCode of
- Left _ -> -- change leading newlines into <br /> tags, because some
- -- browsers ignore leading newlines in pre blocks
- let (leadingBreaks, rawCode') = span (=='\n') rawCode
- attrs = attrsToHtml opts (id', classes', keyvals)
- addBird = if "literate" `elem` classes'
- then unlines . map ("> " ++) . lines
- else unlines . lines
- in return $ pre ! attrs $ thecode <<
- (replicate (length leadingBreaks) br +++
- [stringToHtml opts $ addBird rawCode'])
- Right h -> modify (\st -> st{ stHighlighting = True }) >>
- return h
+ adjCode = if tolhs
+ then unlines . map ("> " ++) . lines $ rawCode
+ else rawCode
+ case highlight formatHtmlBlock (id',classes,keyvals) adjCode of
+ Nothing -> let attrs = attrsToHtml opts (id', classes', keyvals)
+ in return $ foldl (!) H.pre attrs $ H.code
+ $ toHtml adjCode
+ Just h -> modify (\st -> st{ stHighlighting = True }) >>
+ return (foldl (!) h (attrsToHtml opts (id',[],keyvals)))
blockToHtml opts (BlockQuote blocks) =
-- in S5, treat list in blockquote specially
-- if default is incremental, make it nonincremental;
@@ -378,47 +412,48 @@ blockToHtml opts (BlockQuote blocks) =
blockToHtml (opts {writerIncremental = inc})
(OrderedList attribs lst)
_ -> do contents <- blockListToHtml opts blocks
- return $ blockquote (nl opts +++
- contents +++ nl opts)
+ return $ H.blockquote
+ $ nl opts >> contents >> nl opts
else do
contents <- blockListToHtml opts blocks
- return $ blockquote (nl opts +++ contents +++ nl opts)
+ return $ H.blockquote $ nl opts >> contents >> nl opts
blockToHtml opts (Header level lst) = do
contents <- inlineListToHtml opts lst
secnum <- liftM stSecNum get
let contents' = if writerNumberSections opts
- then (thespan ! [theclass "header-section-number"] << showSecNum secnum) +++
- stringToHtml opts " " +++ contents
+ then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >>
+ strToHtml " " >> contents
else contents
let contents'' = if writerTableOfContents opts
- then anchor ! [href $ "#" ++ writerIdentifierPrefix opts ++ "TOC"] $ contents'
+ then H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ "TOC") $ contents'
else contents'
return $ (case level of
- 1 -> h1 contents''
- 2 -> h2 contents''
- 3 -> h3 contents''
- 4 -> h4 contents''
- 5 -> h5 contents''
- 6 -> h6 contents''
- _ -> paragraph contents'')
+ 1 -> H.h1 contents''
+ 2 -> H.h2 contents''
+ 3 -> H.h3 contents''
+ 4 -> H.h4 contents''
+ 5 -> H.h5 contents''
+ 6 -> H.h6 contents''
+ _ -> H.p contents'')
blockToHtml opts (BulletList lst) = do
contents <- mapM (blockListToHtml opts) lst
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else []
- return $ (unordList opts contents) ! attribs
+ let lst' = unordList opts contents
+ let lst'' = if writerIncremental opts
+ then lst' ! A.class_ "incremental"
+ else lst'
+ return lst''
blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
contents <- mapM (blockListToHtml opts) lst
let numstyle' = camelCaseToHyphenated $ show numstyle
let attribs = (if writerIncremental opts
- then [theclass "incremental"]
+ then [A.class_ "incremental"]
else []) ++
(if startnum /= 1
- then [start startnum]
+ then [A.start $ toValue startnum]
else []) ++
(if numstyle /= DefaultStyle
then if writerHtml5 opts
- then [strAttr "type" $
+ then [A.type_ $
case numstyle of
Decimal -> "1"
LowerAlpha -> "a"
@@ -426,44 +461,44 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
LowerRoman -> "i"
UpperRoman -> "I"
_ -> "1"]
- else [thestyle $ "list-style-type: " ++
+ else [A.style $ toValue $ "list-style-type: " ++
numstyle']
else [])
- return $ (ordList opts contents) ! attribs
+ return $ foldl (!) (ordList opts contents) attribs
blockToHtml opts (DefinitionList lst) = do
contents <- mapM (\(term, defs) ->
- do term' <- liftM (dterm <<) $ inlineListToHtml opts term
- defs' <- mapM ((liftM (\x -> ddef << (x +++ nl opts))) .
+ do term' <- liftM (H.dt) $ inlineListToHtml opts term
+ defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) .
blockListToHtml opts) defs
- return $ nl opts : term' : nl opts : defs') lst
- let attribs = if writerIncremental opts
- then [theclass "incremental"]
- else []
- return $ dlist ! attribs << (concat contents +++ nl opts)
+ return $ mconcat $ nl opts : term' : nl opts : defs') lst
+ let lst' = H.dl $ mconcat contents >> nl opts
+ let lst'' = if writerIncremental opts
+ then lst' ! A.class_ "incremental"
+ else lst'
+ return lst''
blockToHtml opts (Table capt aligns widths headers rows') = do
captionDoc <- if null capt
- then return noHtml
+ then return mempty
else do
cs <- inlineListToHtml opts capt
- return $ caption cs +++ nl opts
+ return $ H.caption cs >> nl opts
let percent w = show (truncate (100*w) :: Integer) ++ "%"
- let widthAttrs w = if writerHtml5 opts
- then [thestyle $ "width: " ++ percent w]
- else [width $ percent w]
let coltags = if all (== 0.0) widths
- then noHtml
- else concatHtml $ map
- (\w -> (col ! (widthAttrs w)) noHtml +++ nl opts)
- widths
+ then mempty
+ else mconcat $ map (\w ->
+ if writerHtml5 opts
+ then H.col ! A.style (toValue $ "width: " ++ percent w)
+ else H.col ! A.width (toValue $ percent w) >> nl opts)
+ widths
head' <- if all null headers
- then return noHtml
+ then return mempty
else do
contents <- tableRowToHtml opts aligns 0 headers
- return $ thead << (nl opts +++ contents) +++ nl opts
- body' <- liftM (\x -> tbody << (nl opts +++ x)) $
+ return $ H.thead (nl opts >> contents) >> nl opts
+ body' <- liftM (\x -> H.tbody (nl opts >> mconcat x)) $
zipWithM (tableRowToHtml opts aligns) [1..] rows'
- return $ table $ nl opts +++ captionDoc +++ coltags +++ head' +++
- body' +++ nl opts
+ return $ H.table $ nl opts >> captionDoc >> coltags >> head' >>
+ body' >> nl opts
tableRowToHtml :: WriterOptions
-> [Alignment]
@@ -471,7 +506,7 @@ tableRowToHtml :: WriterOptions
-> [[Block]]
-> State WriterState Html
tableRowToHtml opts aligns rownum cols' = do
- let mkcell = if rownum == 0 then th else td
+ let mkcell = if rownum == 0 then H.th else H.td
let rowclass = case rownum of
0 -> "header"
x | x `rem` 2 == 1 -> "odd"
@@ -479,8 +514,8 @@ tableRowToHtml opts aligns rownum cols' = do
cols'' <- sequence $ zipWith
(\alignment item -> tableItemToHtml opts mkcell alignment item)
aligns cols'
- return $ (tr ! [theclass rowclass] $ nl opts +++ toHtmlFromList cols'')
- +++ nl opts
+ return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'')
+ >> nl opts
alignmentToString :: Alignment -> [Char]
alignmentToString alignment = case alignment of
@@ -496,84 +531,87 @@ tableItemToHtml :: WriterOptions
-> State WriterState Html
tableItemToHtml opts tag' align' item = do
contents <- blockListToHtml opts item
- let alignAttrs = if writerHtml5 opts
- then [thestyle $ "align: " ++ alignmentToString align']
- else [align $ alignmentToString align']
- return $ (tag' ! alignAttrs) contents +++ nl opts
+ let alignStr = alignmentToString align'
+ let attribs = if writerHtml5 opts
+ then A.style (toValue $ "text-align: " ++ alignStr ++ ";")
+ else A.align (toValue alignStr)
+ return $ (tag' ! attribs $ contents) >> nl opts
toListItems :: WriterOptions -> [Html] -> [Html]
toListItems opts items = map (toListItem opts) items ++ [nl opts]
toListItem :: WriterOptions -> Html -> Html
-toListItem opts item = nl opts +++ li item
+toListItem opts item = nl opts >> H.li item
blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
blockListToHtml opts lst =
mapM (blockToHtml opts) lst >>=
- return . toHtmlFromList . intersperse (nl opts)
+ return . mconcat . intersperse (nl opts)
-- | Convert list of Pandoc inline elements to HTML.
inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html
inlineListToHtml opts lst =
- mapM (inlineToHtml opts) lst >>= return . toHtmlFromList
+ mapM (inlineToHtml opts) lst >>= return . mconcat
-- | Convert Pandoc inline element to HTML.
inlineToHtml :: WriterOptions -> Inline -> State WriterState Html
inlineToHtml opts inline =
case inline of
- (Str str) -> return $ stringToHtml opts str
- (Space) -> return $ stringToHtml opts " "
- (LineBreak) -> return br
- (EmDash) -> return $ stringToHtml opts "—"
- (EnDash) -> return $ stringToHtml opts "–"
- (Ellipses) -> return $ stringToHtml opts "…"
- (Apostrophe) -> return $ stringToHtml opts "’"
- (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
- (Strong lst) -> inlineListToHtml opts lst >>= return . strong
- (Code attr str) -> case highlightHtml True attr str of
- Left _ -> return
- $ thecode ! (attrsToHtml opts attr)
- $ stringToHtml opts str
- Right h -> return h
+ (Str str) -> return $ strToHtml str
+ (Space) -> return $ strToHtml " "
+ (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br
+ (Emph lst) -> inlineListToHtml opts lst >>= return . H.em
+ (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong
+ (Code attr str) -> case highlight formatHtmlInline attr str of
+ Nothing -> return
+ $ foldl (!) H.code (attrsToHtml opts attr)
+ $ strToHtml str
+ Just h -> return $ foldl (!) h $
+ attrsToHtml opts (id',[],keyvals)
+ where (id',_,keyvals) = attr
(Strikeout lst) -> inlineListToHtml opts lst >>=
- return . (thespan ! [thestyle "text-decoration: line-through;"])
+ return . H.del
(SmallCaps lst) -> inlineListToHtml opts lst >>=
- return . (thespan ! [thestyle "font-variant: small-caps;"])
- (Superscript lst) -> inlineListToHtml opts lst >>= return . sup
- (Subscript lst) -> inlineListToHtml opts lst >>= return . sub
+ return . (H.span ! A.style "font-variant: small-caps;")
+ (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup
+ (Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub
(Quoted quoteType lst) ->
let (leftQuote, rightQuote) = case quoteType of
- SingleQuote -> (stringToHtml opts "‘",
- stringToHtml opts "’")
- DoubleQuote -> (stringToHtml opts "“",
- stringToHtml opts "”")
+ SingleQuote -> (strToHtml "‘",
+ strToHtml "’")
+ DoubleQuote -> (strToHtml "“",
+ strToHtml "”")
in do contents <- inlineListToHtml opts lst
- return $ leftQuote +++ contents +++ rightQuote
+ return $ leftQuote >> contents >> rightQuote
(Math t str) -> modify (\st -> st {stMath = True}) >>
(case writerHTMLMathMethod opts of
LaTeXMathML _ ->
-- putting LaTeXMathML in container with class "LaTeX" prevents
-- non-math elements on the page from being treated as math by
-- the javascript
- return $ thespan ! [theclass "LaTeX"] $
+ return $ H.span ! A.class_ "LaTeX" $
case t of
- InlineMath -> primHtml ("$" ++ str ++ "$")
- DisplayMath -> primHtml ("$$" ++ str ++ "$$")
+ InlineMath -> toHtml ("$" ++ str ++ "$")
+ DisplayMath -> toHtml ("$$" ++ str ++ "$$")
JsMath _ -> do
- let m = primHtml str
+ let m = preEscapedString str
return $ case t of
- InlineMath -> thespan ! [theclass "math"] $ m
- DisplayMath -> thediv ! [theclass "math"] $ m
+ InlineMath -> H.span ! A.class_ "math" $ m
+ DisplayMath -> H.div ! A.class_ "math" $ m
WebTeX url -> do
- let m = image ! [src (url ++ urlEncode str),
- alt str, title str]
+ let imtag = if writerHtml5 opts then H5.img else H.img
+ let m = imtag ! A.style "vertical-align:middle"
+ ! A.src (toValue $ url ++ urlEncode str)
+ ! A.alt (toValue str)
+ ! A.title (toValue str)
+ let brtag = if writerHtml5 opts then H5.br else H.br
return $ case t of
InlineMath -> m
- DisplayMath -> br +++ m +++ br
+ DisplayMath -> brtag >> m >> brtag
GladTeX ->
return $ case t of
- InlineMath -> primHtml $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
- DisplayMath -> primHtml $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
+ InlineMath -> preEscapedString "<EQ ENV=\"math\">" >> toHtml str >> preEscapedString "</EQ>"
+ DisplayMath -> preEscapedString "<EQ ENV=\"displaymath\">" >> toHtml str >> preEscapedString "</EQ>"
MathML _ -> do
let dt = if t == InlineMath
then DisplayInline
@@ -581,54 +619,57 @@ inlineToHtml opts inline =
let conf = useShortEmptyTags (const False)
defaultConfigPP
case texMathToMathML dt str of
- Right r -> return $ primHtml $
+ Right r -> return $ preEscapedString $
ppcElement conf r
Left _ -> inlineListToHtml opts
(readTeXMath str) >>= return .
- (thespan ! [theclass "math"])
- MathJax _ -> return $ primHtml $
+ (H.span ! A.class_ "math")
+ MathJax _ -> return $ toHtml $
case t of
InlineMath -> "\\(" ++ str ++ "\\)"
DisplayMath -> "\\[" ++ str ++ "\\]"
PlainMath -> do
x <- inlineListToHtml opts (readTeXMath str)
- let m = thespan ! [theclass "math"] $ x
+ let m = H.span ! A.class_ "math" $ x
+ let brtag = if writerHtml5 opts then H5.br else H.br
return $ case t of
InlineMath -> m
- DisplayMath -> br +++ m +++ br )
+ DisplayMath -> brtag >> m >> brtag )
(RawInline "latex" str) -> case writerHTMLMathMethod opts of
LaTeXMathML _ -> do modify (\st -> st {stMath = True})
- return $ primHtml str
- _ -> return noHtml
- (RawInline "html" str) -> return $ primHtml str
- (RawInline _ _) -> return noHtml
+ return $ toHtml str
+ _ -> return mempty
+ (RawInline "html" str) -> return $ preEscapedString str
+ (RawInline _ _) -> return mempty
(Link [Code _ str] (s,_)) | "mailto:" `isPrefixOf` s ->
return $ obfuscateLink opts str s
(Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do
linkText <- inlineListToHtml opts txt
- return $ obfuscateLink opts (show linkText) s
+ return $ obfuscateLink opts (renderHtml linkText) s
(Link txt (s,tit)) -> do
linkText <- inlineListToHtml opts txt
- return $ anchor ! ([href s] ++
- if null tit then [] else [title tit]) $
- linkText
+ let link = H.a ! A.href (toValue s) $ linkText
+ return $ if null tit
+ then link
+ else link ! A.title (toValue tit)
(Image txt (s,tit)) | treatAsImage s -> do
let alternate' = stringify txt
- let attributes = [src s] ++
+ let attributes = [A.src $ toValue s] ++
(if null tit
then []
- else [title tit]) ++
+ else [A.title $ toValue tit]) ++
if null txt
then []
- else [alt alternate']
- return $ image ! attributes
+ else [A.alt $ toValue alternate']
+ let tag = if writerHtml5 opts then H5.img else H.img
+ return $ foldl (!) tag attributes
-- note: null title included, as in Markdown.pl
(Image _ (s,tit)) -> do
- let attributes = [src s] ++
+ let attributes = [A.src $ toValue s] ++
(if null tit
then []
- else [title tit])
- return $ itag "embed" ! attributes
+ else [A.title $ toValue tit])
+ return $ foldl (!) H5.embed attributes
-- note: null title included, as in Markdown.pl
(Note contents) -> do
st <- get
@@ -638,19 +679,19 @@ inlineToHtml opts inline =
htmlContents <- blockListToNote opts ref contents
-- push contents onto front of notes
put $ st {stNotes = (htmlContents:notes)}
- return $ sup <<
- anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref),
- theclass "footnoteRef",
- prefixedId opts ("fnref" ++ ref)] << ref
- (Cite _ il) -> inlineListToHtml opts il
+ return $ H.sup $
+ H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref)
+ ! A.class_ "footnoteRef"
+ ! prefixedId opts ("fnref" ++ ref)
+ $ toHtml ref
+ (Cite _ il) -> do contents <- inlineListToHtml opts il
+ return $ H.span ! A.class_ "citation" $ contents
blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
blockListToNote opts ref blocks =
-- If last block is Para or Plain, include the backlink at the end of
-- that block. Otherwise, insert a new Plain block with the backlink.
- let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++
- "\" class=\"footnoteBackLink\">" ++
- (if writerAscii opts then "&#8617;" else "↩") ++ "</a>"]
+ let backlink = [Link [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])]
blocks' = if null blocks
then []
else let lastBlock = last blocks
@@ -663,4 +704,4 @@ blockListToNote opts ref blocks =
_ -> otherBlocks ++ [lastBlock,
Plain backlink]
in do contents <- blockListToHtml opts blocks'
- return $ nl opts +++ (li ! [prefixedId opts ("fn" ++ ref)]) contents
+ return $ nl opts >> (H.li ! (prefixedId opts ("fn" ++ ref)) $ contents)
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index d925b2897..e99b20c60 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Writers.LaTeX
Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
+ License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
+ Stability : alpha
Portability : portable
Conversion of 'Pandoc' format into LaTeX.
@@ -41,53 +41,68 @@ import Data.Char ( toLower, isPunctuation )
import Control.Monad.State
import Text.Pandoc.Pretty
import System.FilePath (dropExtension)
+import Text.Pandoc.Slides
+import Text.Pandoc.Highlighting (highlight, styleToLaTeX,
+ formatLaTeXInline, formatLaTeXBlock)
-data WriterState =
- WriterState { stInNote :: Bool -- @True@ if we're in a note
- , stInTable :: Bool -- @True@ if we're in a table
- , stTableNotes :: [(Char, Doc)] -- List of markers, notes
- -- in current table
- , stOLLevel :: Int -- level of ordered list nesting
- , stOptions :: WriterOptions -- writer options, so they don't have to be parameter
- , stVerbInNote :: Bool -- true if document has verbatim text in note
- , stEnumerate :: Bool -- true if document needs fancy enumerated lists
- , stTable :: Bool -- true if document has a table
- , stStrikeout :: Bool -- true if document has strikeout
- , stSubscript :: Bool -- true if document has subscript
- , stUrl :: Bool -- true if document has visible URL link
- , stGraphics :: Bool -- true if document contains images
- , stLHS :: Bool -- true if document has literate haskell code
- , stBook :: Bool -- true if document uses book or memoir class
- , stCsquotes :: Bool -- true if document uses csquotes
+data WriterState =
+ WriterState { stInNote :: Bool -- true if we're in a note
+ , stInTable :: Bool -- true if we're in a table
+ , stTableNotes :: [(Char, Doc)] -- List of markers, notes
+ -- in current table
+ , stOLLevel :: Int -- level of ordered list nesting
+ , stOptions :: WriterOptions -- writer options, so they don't have to be parameter
+ , stVerbInNote :: Bool -- true if document has verbatim text in note
+ , stEnumerate :: Bool -- true if document needs fancy enumerated lists
+ , stTable :: Bool -- true if document has a table
+ , stStrikeout :: Bool -- true if document has strikeout
+ , stSubscript :: Bool -- true if document has subscript
+ , stUrl :: Bool -- true if document has visible URL link
+ , stGraphics :: Bool -- true if document contains images
+ , stLHS :: Bool -- true if document has literate haskell code
+ , stBook :: Bool -- true if document uses book or memoir class
+ , stCsquotes :: Bool -- true if document uses csquotes
+ , stHighlighting :: Bool -- true if document has highlighted code
+ , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit
+ , stInternalLinks :: [String] -- list of internal link targets
}
-- | Convert Pandoc to LaTeX.
writeLaTeX :: WriterOptions -> Pandoc -> String
-writeLaTeX options document =
- evalState (pandocToLaTeX options document) $
+writeLaTeX options document =
+ evalState (pandocToLaTeX options document) $
WriterState { stInNote = False, stInTable = False,
stTableNotes = [], stOLLevel = 1, stOptions = options,
stVerbInNote = False, stEnumerate = False,
stTable = False, stStrikeout = False, stSubscript = False,
stUrl = False, stGraphics = False,
stLHS = False, stBook = writerChapters options,
- stCsquotes = False }
+ stCsquotes = False, stHighlighting = False,
+ stIncremental = writerIncremental options,
+ stInternalLinks = [] }
pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String
pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
+ -- see if there are internal links
+ let isInternalLink (Link _ ('#':xs,_)) = [xs]
+ isInternalLink _ = []
+ modify $ \s -> s{ stInternalLinks = queryWith isInternalLink blocks }
let template = writerTemplate options
- let usesBookClass x = "\\documentclass" `isPrefixOf` x &&
- ("{memoir}" `isSuffixOf` x || "{book}" `isSuffixOf` x ||
- "{report}" `isSuffixOf` x)
- when (any usesBookClass (lines template)) $
- modify $ \s -> s{stBook = True}
+ -- set stBook depending on documentclass
+ let bookClasses = ["memoir","book","report","scrreprt","scrbook"]
+ case lookup "documentclass" (writerVariables options) of
+ Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True}
+ | otherwise -> return ()
+ Nothing | any (\x -> "\\documentclass" `isPrefixOf` x &&
+ (any (`isSuffixOf` x) bookClasses))
+ (lines template) -> modify $ \s -> s{stBook = True}
+ | otherwise -> return ()
-- check for \usepackage...{csquotes}; if present, we'll use
-- \enquote{...} for smart quotes:
when ("{csquotes}" `isInfixOf` template) $
modify $ \s -> s{stCsquotes = True}
- opts <- liftM stOptions get
- let colwidth = if writerWrapText opts
- then Just $ writerColumns opts
+ let colwidth = if writerWrapText options
+ then Just $ writerColumns options
else Nothing
titletext <- liftM (render colwidth) $ inlineListToLaTeX title
authorsText <- mapM (liftM (render colwidth) . inlineListToLaTeX) authors
@@ -97,9 +112,12 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
else case last blocks of
Header 1 il -> (init blocks, il)
_ -> (blocks, [])
- body <- blockListToLaTeX blocks'
+ blocks'' <- if writerBeamer options
+ then toSlides blocks'
+ else return blocks'
+ body <- mapM (elementToLaTeX options) $ hierarchicalize blocks''
biblioTitle <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
- let main = render colwidth body
+ let main = render colwidth $ vcat body
st <- get
let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options
citecontext = case writerCiteMethod options of
@@ -116,7 +134,12 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
[ ("toc", if writerTableOfContents options then "yes" else "")
, ("body", main)
, ("title", titletext)
- , ("date", dateText) ] ++
+ , ("date", dateText)
+ , ("documentclass", if writerBeamer options
+ then "beamer"
+ else if writerChapters options
+ then "book"
+ else "article") ] ++
[ ("author", a) | a <- authorsText ] ++
[ ("verbatim-in-note", "yes") | stVerbInNote st ] ++
[ ("fancy-enums", "yes") | stEnumerate st ] ++
@@ -128,36 +151,102 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
[ ("lhs", "yes") | stLHS st ] ++
[ ("graphics", "yes") | stGraphics st ] ++
[ ("book-class", "yes") | stBook st] ++
- [ ("listings", "yes") | writerListings options ] ++
+ [ ("listings", "yes") | writerListings options || stLHS st ] ++
+ [ ("beamer", "yes") | writerBeamer options ] ++
+ [ ("highlighting-macros", styleToLaTeX
+ $ writerHighlightStyle options ) | stHighlighting st ] ++
citecontext
return $ if writerStandalone options
then renderTemplate context template
else main
--- escape things as needed for LaTeX
+-- | Convert Elements to LaTeX
+elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc
+elementToLaTeX _ (Blk block) = blockToLaTeX block
+elementToLaTeX opts (Sec level _ id' title' elements) = do
+ header' <- sectionHeader id' level title'
+ innerContents <- mapM (elementToLaTeX opts) elements
+ return $ vcat (header' : innerContents)
-stringToLaTeX :: String -> String
-stringToLaTeX = escapeStringUsing latexEscapes
- where latexEscapes = backslashEscapes "{}$%&_#" ++
- [ ('^', "\\^{}")
- , ('\\', "\\textbackslash{}")
- , ('~', "\\ensuremath{\\sim}")
- , ('|', "\\textbar{}")
- , ('<', "\\textless{}")
- , ('>', "\\textgreater{}")
- , ('[', "{[}") -- to avoid interpretation as
- , (']', "{]}") -- optional arguments
- , ('\160', "~")
- , ('\x2018', "`")
- , ('\x2019', "'")
- , ('\x201C', "``")
- , ('\x201D', "''")
- ]
+-- escape things as needed for LaTeX
+stringToLaTeX :: Bool -> String -> String
+stringToLaTeX _ [] = ""
+stringToLaTeX isUrl (x:xs) =
+ case x of
+ '{' -> "\\{" ++ rest
+ '}' -> "\\}" ++ rest
+ '$' -> "\\$" ++ rest
+ '%' -> "\\%" ++ rest
+ '&' -> "\\&" ++ rest
+ '_' -> "\\_" ++ rest
+ '#' -> "\\#" ++ rest
+ '-' -> case xs of -- prevent adjacent hyphens from forming ligatures
+ ('-':_) -> "-{}" ++ rest
+ _ -> '-' : rest
+ '~' | not isUrl -> "\\ensuremath{\\sim}"
+ '^' -> "\\^{}" ++ rest
+ '\\' -> "\\textbackslash{}" ++ rest
+ '€' -> "\\euro{}" ++ rest
+ '|' -> "\\textbar{}" ++ rest
+ '<' -> "\\textless{}" ++ rest
+ '>' -> "\\textgreater{}" ++ rest
+ '[' -> "{[}" ++ rest -- to avoid interpretation as
+ ']' -> "{]}" ++ rest -- optional arguments
+ '\160' -> "~" ++ rest
+ '\x2018' -> "`" ++ rest
+ '\x2019' -> "'" ++ rest
+ '\x201C' -> "``" ++ rest
+ '\x201D' -> "''" ++ rest
+ '\x2026' -> "\\ldots{}" ++ rest
+ '\x2014' -> "---" ++ rest
+ '\x2013' -> "--" ++ rest
+ _ -> x : rest
+ where rest = stringToLaTeX isUrl xs
-- | Puts contents into LaTeX command.
inCmd :: String -> Doc -> Doc
inCmd cmd contents = char '\\' <> text cmd <> braces contents
+toSlides :: [Block] -> State WriterState [Block]
+toSlides bs = do
+ opts <- gets stOptions
+ let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts
+ let bs' = prepSlides slideLevel bs
+ concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs')
+
+elementToBeamer :: Int -> Element -> State WriterState [Block]
+elementToBeamer _slideLevel (Blk b) = return [b]
+elementToBeamer slideLevel (Sec lvl _num _ident tit elts)
+ | lvl > slideLevel = do
+ bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+ return $ Para ( RawInline "latex" "\\begin{block}{"
+ : tit ++ [RawInline "latex" "}"] )
+ : bs ++ [RawBlock "latex" "\\end{block}"]
+ | lvl < slideLevel = do
+ bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+ return $ (Header lvl tit) : bs
+ | otherwise = do -- lvl == slideLevel
+ -- note: [fragile] is required or verbatim breaks
+ let hasCodeBlock (CodeBlock _ _) = [True]
+ hasCodeBlock _ = []
+ let hasCode (Code _ _) = [True]
+ hasCode _ = []
+ let fragile = if not $ null $ queryWith hasCodeBlock elts ++ queryWith hasCode elts
+ then "[fragile]"
+ else ""
+ let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile ++
+ "\\frametitle{") : tit ++ [RawInline "latex" "}"]
+ let slideEnd = RawBlock "latex" "\\end{frame}"
+ -- now carve up slide into blocks if there are sections inside
+ bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts
+ return $ slideStart : bs ++ [slideEnd]
+
+isListBlock :: Block -> Bool
+isListBlock (BulletList _) = True
+isListBlock (OrderedList _ _) = True
+isListBlock (DefinitionList _) = True
+isListBlock _ = False
+
-- | Convert Pandoc block element to LaTeX.
blockToLaTeX :: Block -- ^ Block to convert
-> State WriterState Doc
@@ -172,58 +261,80 @@ blockToLaTeX (Para lst) = do
result <- inlineListToLaTeX lst
return $ result <> blankline
blockToLaTeX (BlockQuote lst) = do
- contents <- blockListToLaTeX lst
- return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
+ beamer <- writerBeamer `fmap` gets stOptions
+ case lst of
+ [b] | beamer && isListBlock b -> do
+ oldIncremental <- gets stIncremental
+ modify $ \s -> s{ stIncremental = True }
+ result <- blockToLaTeX b
+ modify $ \s -> s{ stIncremental = oldIncremental }
+ return result
+ _ -> do
+ contents <- blockListToLaTeX lst
+ return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
- st <- get
- env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes &&
- "literate" `elem` classes
- then do
- modify $ \s -> s{ stLHS = True }
- return "code"
- else if writerListings (stOptions st)
- then return "lstlisting"
- else if stInNote st
- then do
- modify $ \s -> s{ stVerbInNote = True }
- return "Verbatim"
- else return "verbatim"
- let params = if writerListings (stOptions st)
- then take 1
- [ "language=" ++ lang | lang <- classes
- , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform"
- ,"POV","Ada","Java","Prolog","Algol"
- ,"JVMIS","Promela","Ant","ksh","Python"
- ,"Assembler","Lisp","R","Awk","Logo"
- ,"Reduce","bash","make","Rexx","Basic"
- ,"Mathematica","RSL","C","Matlab","Ruby"
- ,"C++","Mercury","S","Caml","MetaPost"
- ,"SAS","Clean","Miranda","Scilab","Cobol"
- ,"Mizar","sh","Comal","ML","SHELXL","csh"
- ,"Modula-2","Simula","Delphi","MuPAD"
- ,"SQL","Eiffel","NASTRAN","tcl","Elan"
- ,"Oberon-2","TeX","erlang","OCL"
- ,"VBScript","Euphoria","Octave","Verilog"
- ,"Fortran","Oz","VHDL","GCL","Pascal"
- ,"VRML","Gnuplot","Perl","XML","Haskell"
- ,"PHP","XSLT","HTML","PL/I"]
- ] ++
- [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ]
- else []
- printParams
- | null params = empty
- | otherwise = "[" <> hsep (intersperse "," (map text params)) <>
- "]"
- return $ flush ("\\begin{" <> text env <> "}" <> printParams $$ text str $$
- "\\end{" <> text env <> "}") $$ cr
- -- final cr needed because of footnotes
+ opts <- gets stOptions
+ case () of
+ _ | writerLiterateHaskell opts && "haskell" `elem` classes &&
+ "literate" `elem` classes -> lhsCodeBlock
+ | writerListings opts -> listingsCodeBlock
+ | writerHighlight opts && not (null classes) -> highlightedCodeBlock
+ | otherwise -> rawCodeBlock
+ where lhsCodeBlock = do
+ modify $ \s -> s{ stLHS = True }
+ return $ flush ("\\begin{code}" $$ text str $$ "\\end{code}") $$ cr
+ rawCodeBlock = do
+ st <- get
+ env <- if stInNote st
+ then modify (\s -> s{ stVerbInNote = True }) >>
+ return "Verbatim"
+ else return "verbatim"
+ return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$
+ text ("\\end{" ++ env ++ "}")) $$ cr -- final cr because of notes
+ listingsCodeBlock = do
+ st <- get
+ let params = if writerListings (stOptions st)
+ then take 1
+ [ "language=" ++ lang | lang <- classes
+ , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform"
+ ,"POV","Ada","Java","Prolog","Algol"
+ ,"JVMIS","Promela","Ant","ksh","Python"
+ ,"Assembler","Lisp","R","Awk","Logo"
+ ,"Reduce","bash","make","Rexx","Basic"
+ ,"Mathematica","RSL","C","Matlab","Ruby"
+ ,"C++","Mercury","S","Caml","MetaPost"
+ ,"SAS","Clean","Miranda","Scilab","Cobol"
+ ,"Mizar","sh","Comal","ML","SHELXL","csh"
+ ,"Modula-2","Simula","Delphi","MuPAD"
+ ,"SQL","Eiffel","NASTRAN","tcl","Elan"
+ ,"Oberon-2","TeX","erlang","OCL"
+ ,"VBScript","Euphoria","Octave","Verilog"
+ ,"Fortran","Oz","VHDL","GCL","Pascal"
+ ,"VRML","Gnuplot","Perl","XML","Haskell"
+ ,"PHP","XSLT","HTML","PL/I"]
+ ] ++
+ [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ]
+ else []
+ printParams
+ | null params = empty
+ | otherwise = brackets $ hsep (intersperse "," (map text params))
+ return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$
+ "\\end{lstlisting}") $$ cr
+ highlightedCodeBlock =
+ case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of
+ Nothing -> rawCodeBlock
+ Just h -> modify (\st -> st{ stHighlighting = True }) >>
+ return (flush $ text h)
blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline
blockToLaTeX (RawBlock _ _) = return empty
blockToLaTeX (BulletList lst) = do
+ incremental <- gets stIncremental
+ let inc = if incremental then "[<+->]" else ""
items <- mapM listItemToLaTeX lst
- return $ "\\begin{itemize}" $$ vcat items $$ "\\end{itemize}"
+ return $ text ("\\begin{itemize}" ++ inc) $$ vcat items $$ "\\end{itemize}"
blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
st <- get
+ let inc = if stIncremental st then "[<+->]" else ""
let oldlevel = stOLLevel st
put $ st {stOLLevel = oldlevel + 1}
items <- mapM listItemToLaTeX lst
@@ -231,46 +342,25 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim
then do
modify $ \s -> s{ stEnumerate = True }
- return $ char '[' <>
+ return $ char '[' <>
text (head (orderedListMarkers (1, numstyle,
numdelim))) <> char ']'
else return empty
let resetcounter = if start /= 1 && oldlevel <= 4
- then text $ "\\setcounter{enum" ++
+ then text $ "\\setcounter{enum" ++
map toLower (toRomanNumeral oldlevel) ++
"}{" ++ show (start - 1) ++ "}"
- else empty
- return $ "\\begin{enumerate}" <> exemplar $$ resetcounter $$
+ else empty
+ return $ text ("\\begin{enumerate}" ++ inc) <> exemplar $$ resetcounter $$
vcat items $$ "\\end{enumerate}"
blockToLaTeX (DefinitionList lst) = do
+ incremental <- gets stIncremental
+ let inc = if incremental then "[<+->]" else ""
items <- mapM defListItemToLaTeX lst
- return $ "\\begin{description}" $$ vcat items $$ "\\end{description}"
+ return $ text ("\\begin{description}" ++ inc) $$ vcat items $$ "\\end{description}"
blockToLaTeX HorizontalRule = return $
"\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline
-blockToLaTeX (Header level lst) = do
- txt <- inlineListToLaTeX lst
- let noNote (Note _) = Str ""
- noNote x = x
- let lstNoNotes = bottomUp noNote lst
- -- footnotes in sections don't work unless you specify an optional
- -- argument: \section[mysec]{mysec\footnote{blah}}
- optional <- if lstNoNotes == lst
- then return empty
- else do
- res <- inlineListToLaTeX lstNoNotes
- return $ char '[' <> res <> char ']'
- let stuffing = optional <> char '{' <> txt <> char '}'
- book <- liftM stBook get
- let level' = if book then level - 1 else level
- let headerWith x y = text x <> y $$ blankline
- return $ case level' of
- 0 -> headerWith "\\chapter" stuffing
- 1 -> headerWith "\\section" stuffing
- 2 -> headerWith "\\subsection" stuffing
- 3 -> headerWith "\\subsubsection" stuffing
- 4 -> headerWith "\\paragraph" stuffing
- 5 -> headerWith "\\subparagraph" stuffing
- _ -> txt $$ blankline
+blockToLaTeX (Header level lst) = sectionHeader "" level lst
blockToLaTeX (Table caption aligns widths heads rows) = do
modify $ \s -> s{ stInTable = True, stTableNotes = [] }
headers <- if all null heads
@@ -338,6 +428,49 @@ defListItemToLaTeX (term, defs) = do
def' <- liftM vsep $ mapM blockListToLaTeX defs
return $ "\\item" <> brackets term' $$ def'
+-- | Craft the section header, inserting the secton reference, if supplied.
+sectionHeader :: [Char]
+ -> Int
+ -> [Inline]
+ -> State WriterState Doc
+sectionHeader ref level lst = do
+ txt <- inlineListToLaTeX lst
+ let noNote (Note _) = Str ""
+ noNote x = x
+ let lstNoNotes = bottomUp noNote lst
+ -- footnotes in sections don't work unless you specify an optional
+ -- argument: \section[mysec]{mysec\footnote{blah}}
+ optional <- if lstNoNotes == lst
+ then return empty
+ else do
+ res <- inlineListToLaTeX lstNoNotes
+ return $ char '[' <> res <> char ']'
+ let stuffing = optional <> char '{' <> txt <> char '}'
+ book <- gets stBook
+ opts <- gets stOptions
+ let level' = if book || writerChapters opts then level - 1 else level
+ internalLinks <- gets stInternalLinks
+ let refLabel lab = (if ref `elem` internalLinks
+ then text "\\hyperdef"
+ <> braces empty
+ <> braces (text ref)
+ <> braces (lab <> text "\\label"
+ <> braces (text ref))
+ else lab)
+ $$ blankline
+ let headerWith x y = refLabel $ text x <> y
+ return $ case level' of
+ 0 -> if writerBeamer opts
+ then headerWith "\\part" stuffing
+ else headerWith "\\chapter" stuffing
+ 1 -> headerWith "\\section" stuffing
+ 2 -> headerWith "\\subsection" stuffing
+ 3 -> headerWith "\\subsubsection" stuffing
+ 4 -> headerWith "\\paragraph" stuffing
+ 5 -> headerWith "\\subparagraph" stuffing
+ _ -> txt $$ blankline
+
+
-- | Convert list of inline elements to LaTeX.
inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
-> State WriterState Doc
@@ -345,7 +478,6 @@ inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat
isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
-isQuoted Apostrophe = True
isQuoted _ = False
-- | Convert inline element to LaTeX
@@ -353,8 +485,8 @@ inlineToLaTeX :: Inline -- ^ Inline to convert
-> State WriterState Doc
inlineToLaTeX (Emph lst) =
inlineListToLaTeX lst >>= return . inCmd "emph"
-inlineToLaTeX (Strong lst) =
- inlineListToLaTeX lst >>= return . inCmd "textbf"
+inlineToLaTeX (Strong lst) =
+ inlineListToLaTeX lst >>= return . inCmd "textbf"
inlineToLaTeX (Strikeout lst) = do
contents <- inlineListToLaTeX lst
modify $ \s -> s{ stStrikeout = True }
@@ -377,14 +509,24 @@ inlineToLaTeX (Cite cits lst) = do
Biblatex -> citationsToBiblatex cits
_ -> inlineListToLaTeX lst
-inlineToLaTeX (Code _ str) = do
- st <- get
- if writerListings (stOptions st)
- then do
- when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True }
- let chr = ((enumFromTo '!' '~') \\ str) !! 0
- return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
- else return $ text $ "\\texttt{" ++ stringToLaTeX str ++ "}"
+inlineToLaTeX (Code (_,classes,_) str) = do
+ opts <- gets stOptions
+ case () of
+ _ | writerListings opts -> listingsCode
+ | writerHighlight opts && not (null classes) -> highlightCode
+ | otherwise -> rawCode
+ where listingsCode = do
+ inNote <- gets stInNote
+ when inNote $ modify $ \s -> s{ stVerbInNote = True }
+ let chr = ((enumFromTo '!' '~') \\ str) !! 0
+ return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
+ highlightCode = do
+ case highlight formatLaTeXInline ("",classes,[]) str of
+ Nothing -> rawCode
+ Just h -> modify (\st -> st{ stHighlighting = True }) >>
+ return (text h)
+ rawCode = return
+ $ text $ "\\texttt{" ++ stringToLaTeX False str ++ "}"
inlineToLaTeX (Quoted SingleQuote lst) = do
contents <- inlineListToLaTeX lst
csquotes <- liftM stCsquotes get
@@ -411,11 +553,7 @@ inlineToLaTeX (Quoted DoubleQuote lst) = do
then "\\,"
else empty
return $ "``" <> s1 <> contents <> s2 <> "''"
-inlineToLaTeX Apostrophe = return $ char '\''
-inlineToLaTeX EmDash = return "---"
-inlineToLaTeX EnDash = return "--"
-inlineToLaTeX Ellipses = return "\\ldots{}"
-inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str
+inlineToLaTeX (Str str) = return $ text $ stringToLaTeX False str
inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$'
inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]"
inlineToLaTeX (RawInline "latex" str) = return $ text str
@@ -429,7 +567,7 @@ inlineToLaTeX (Link txt (src, _)) =
do modify $ \s -> s{ stUrl = True }
return $ text $ "\\url{" ++ x ++ "}"
_ -> do contents <- inlineListToLaTeX txt
- return $ text ("\\href{" ++ stringToLaTeX src ++ "}{") <>
+ return $ text ("\\href{" ++ stringToLaTeX True src ++ "}{") <>
contents <> char '}'
inlineToLaTeX (Image _ (source, _)) = do
modify $ \s -> s{ stGraphics = True }
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index 78b9274d6..d3735efa7 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -98,7 +98,13 @@ noteToMan opts num note = do
-- | Association list of characters to escape.
manEscapes :: [(Char, String)]
-manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "@\\"
+manEscapes = [ ('\160', "\\ ")
+ , ('\'', "\\[aq]")
+ , ('’', "'")
+ , ('\x2014', "\\[em]")
+ , ('\x2013', "\\[en]")
+ , ('\x2026', "\\&...")
+ ] ++ backslashEscapes "@\\"
-- | Escape special characters for Man.
escapeString :: String -> String
@@ -303,10 +309,6 @@ inlineToMan opts (Quoted DoubleQuote lst) = do
return $ text "\\[lq]" <> contents <> text "\\[rq]"
inlineToMan opts (Cite _ lst) =
inlineListToMan opts lst
-inlineToMan _ EmDash = return $ text "\\[em]"
-inlineToMan _ EnDash = return $ text "\\[en]"
-inlineToMan _ Apostrophe = return $ char '\''
-inlineToMan _ Ellipses = return $ text "\\&..."
inlineToMan _ (Code _ str) =
return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
inlineToMan _ (Str str) = return $ text $ escapeString str
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 48e9578b4..7ce939395 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -233,17 +233,19 @@ blockToMarkdown _ HorizontalRule =
blockToMarkdown opts (Header level inlines) = do
contents <- inlineListToMarkdown opts inlines
st <- get
- -- use setext style headers if in literate haskell mode.
- -- ghc interprets '#' characters in column 1 as line number specifiers.
- if writerLiterateHaskell opts || stPlain st
- then let len = offset contents
- in return $ contents <> cr <>
- (case level of
- 1 -> text $ replicate len '='
- 2 -> text $ replicate len '-'
- _ -> empty) <> blankline
- else return $
- text ((replicate level '#') ++ " ") <> contents <> blankline
+ let setext = writerSetextHeaders opts
+ return $ nowrap
+ $ case level of
+ 1 | setext ->
+ contents <> cr <> text (replicate (offset contents) '=') <>
+ blankline
+ 2 | setext ->
+ contents <> cr <> text (replicate (offset contents) '-') <>
+ blankline
+ -- ghc interprets '#' characters in column 1 as linenum specifiers.
+ _ | stPlain st || writerLiterateHaskell opts ->
+ contents <> blankline
+ _ -> text (replicate level '#') <> space <> contents <> blankline
blockToMarkdown opts (CodeBlock (_,classes,_) str)
| "haskell" `elem` classes && "literate" `elem` classes &&
writerLiterateHaskell opts =
@@ -434,10 +436,6 @@ inlineToMarkdown opts (Quoted SingleQuote lst) = do
inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
return $ "“" <> contents <> "”"
-inlineToMarkdown _ EmDash = return "\8212"
-inlineToMarkdown _ EnDash = return "\8211"
-inlineToMarkdown _ Apostrophe = return "\8217"
-inlineToMarkdown _ Ellipses = return "\8230"
inlineToMarkdown opts (Code attr str) =
let tickGroups = filter (\s -> '`' `elem` s) $ group str
longest = if null tickGroups
@@ -495,17 +493,16 @@ inlineToMarkdown opts (Cite (c:cs) lst)
modekey SuppressAuthor = "-"
modekey _ = ""
inlineToMarkdown _ (Cite _ _) = return $ text ""
-inlineToMarkdown opts (Link txt (src', tit)) = do
+inlineToMarkdown opts (Link txt (src, tit)) = do
linktext <- inlineListToMarkdown opts txt
let linktitle = if null tit
then empty
else text $ " \"" ++ tit ++ "\""
- let src = unescapeURI src'
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
- let useRefLinks = writerReferenceLinks opts
let useAuto = case (tit,txt) of
("", [Code _ s]) | s == srcSuffix -> True
_ -> False
+ let useRefLinks = writerReferenceLinks opts && not useAuto
ref <- if useRefLinks then getReference txt (src, tit) else return []
reftext <- inlineListToMarkdown opts ref
return $ if useAuto
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index a7c7fc482..f31a2c2d1 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -346,22 +346,14 @@ inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst
inlineToMediaWiki opts (Quoted SingleQuote lst) = do
contents <- inlineListToMediaWiki opts lst
- return $ "&lsquo;" ++ contents ++ "&rsquo;"
+ return $ "\8216" ++ contents ++ "\8217"
inlineToMediaWiki opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMediaWiki opts lst
- return $ "&ldquo;" ++ contents ++ "&rdquo;"
+ return $ "\8220" ++ contents ++ "\8221"
inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst
-inlineToMediaWiki _ EmDash = return "&mdash;"
-
-inlineToMediaWiki _ EnDash = return "&ndash;"
-
-inlineToMediaWiki _ Apostrophe = return "&rsquo;"
-
-inlineToMediaWiki _ Ellipses = return "&hellip;"
-
inlineToMediaWiki _ (Code _ str) =
return $ "<tt>" ++ (escapeString str) ++ "</tt>"
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index f8030965c..f8f22494f 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -34,9 +34,10 @@ import System.FilePath ( (</>), takeExtension )
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 ( fromString )
import Codec.Archive.Zip
-import System.Time
+import Data.Time.Clock.POSIX
import Paths_pandoc ( getDataFileName )
import Text.Pandoc.Shared ( WriterOptions(..) )
+import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints )
import Text.Pandoc.MIME ( getMimeType )
import Text.Pandoc.Definition
import Text.Pandoc.Generic
@@ -71,7 +72,7 @@ writeODT mbRefOdt opts doc = do
let sourceDir = writerSourceDirectory opts
doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc
let newContents = writeOpenDocument opts{writerWrapText = False} doc'
- (TOD epochtime _) <- getClockTime
+ epochtime <- floor `fmap` getPOSIXTime
let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
picEntries <- readIORef picEntriesRef
let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries
@@ -102,11 +103,16 @@ writeODT mbRefOdt opts doc = do
transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline
transformPic sourceDir entriesRef (Image lab (src,tit)) = do
let src' = unEscapeString src
+ mbSize <- readImageSize src'
+ let tit' = case mbSize of
+ Just s -> let (w,h) = sizeInPoints s
+ in show w ++ "x" ++ show h
+ Nothing -> tit
entries <- readIORef entriesRef
let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src'
catch (readEntry [] (sourceDir </> src') >>= \entry ->
modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >>
- return (Image lab (newsrc, tit)))
+ return (Image lab (newsrc, tit')))
(\_ -> return (Emph lab))
transformPic _ _ x = return x
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index e675f4e65..a0317511a 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -40,7 +40,7 @@ import Text.Printf ( printf )
import Control.Applicative ( (<$>) )
import Control.Arrow ( (***), (>>>) )
import Control.Monad.State hiding ( when )
-import Data.Char (chr)
+import Data.Char (chr, isDigit)
import qualified Data.Map as Map
-- | Auxiliary function to convert Plain block to Para.
@@ -154,8 +154,8 @@ inHeaderTags i d =
, ("text:outline-level", show i)] d
inQuotes :: QuoteType -> Doc -> Doc
-inQuotes SingleQuote s = text "&#8216;" <> s <> text "&#8217;"
-inQuotes DoubleQuote s = text "&#8220;" <> s <> text "&#8221;"
+inQuotes SingleQuote s = char '\8216' <> s <> char '\8217'
+inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221'
handleSpaces :: String -> Doc
handleSpaces s
@@ -361,10 +361,6 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l
-- | Convert an inline element to OpenDocument.
inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc
inlineToOpenDocument o ils
- | Ellipses <- ils = inTextStyle $ text "&#8230;"
- | EmDash <- ils = inTextStyle $ text "&#8212;"
- | EnDash <- ils = inTextStyle $ text "&#8211;"
- | Apostrophe <- ils = inTextStyle $ text "&#8217;"
| Space <- ils = inTextStyle space
| LineBreak <- ils = return $ selfClosingTag "text:line-break" []
| Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s
@@ -382,7 +378,7 @@ inlineToOpenDocument o ils
| RawInline "html" s <- ils = preformatted s -- for backwards compat.
| RawInline _ _ <- ils = return empty
| Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
- | Image _ (s,_) <- ils = return $ mkImg s
+ | Image _ (s,t) <- ils = return $ mkImg s t
| Note l <- ils = mkNote l
| otherwise = return empty
where
@@ -391,7 +387,7 @@ inlineToOpenDocument o ils
, ("xlink:href" , s )
, ("office:name", t )
] . inSpanTags "Definition"
- mkImg s = inTags False "draw:frame" [] $
+ mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $
selfClosingTag "draw:image" [ ("xlink:href" , s )
, ("xlink:type" , "simple")
, ("xlink:show" , "embed" )
@@ -407,6 +403,17 @@ inlineToOpenDocument o ils
addNote nn
return nn
+-- a title of the form "120x140" will be interpreted as image
+-- size in points.
+attrsFromTitle :: String -> [(String,String)]
+attrsFromTitle s = if null xs || null ys
+ then []
+ else [("svg:width",xs ++ "pt"),("svg:height",ys ++ "pt")]
+ where (xs,rest) = span isDigit s
+ ys = case rest of
+ ('x':zs) | all isDigit zs -> zs
+ _ -> ""
+
bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc]))
bulletListStyle l =
let doStyles i = inTags True "text:list-level-style-bullet"
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
index f7f314428..4c77ba7c6 100644
--- a/src/Text/Pandoc/Writers/Org.hs
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -95,7 +95,12 @@ noteToOrg num note = do
-- | Escape special characters for Org.
escapeString :: String -> String
-escapeString = escapeStringUsing (backslashEscapes "^_")
+escapeString = escapeStringUsing $
+ [ ('\x2014',"---")
+ , ('\x2013',"--")
+ , ('\x2019',"'")
+ , ('\x2026',"...")
+ ] ++ backslashEscapes "^_"
titleToOrg :: [Inline] -> State WriterState Doc
titleToOrg [] = return empty
@@ -249,10 +254,6 @@ inlineToOrg (Quoted DoubleQuote lst) = do
contents <- inlineListToOrg lst
return $ "\"" <> contents <> "\""
inlineToOrg (Cite _ lst) = inlineListToOrg lst
-inlineToOrg EmDash = return "---"
-inlineToOrg EnDash = return "--"
-inlineToOrg Apostrophe = return "'"
-inlineToOrg Ellipses = return "..."
inlineToOrg (Code _ str) = return $ "=" <> text str <> "="
inlineToOrg (Str str) = return $ text $ escapeString str
inlineToOrg (Math t str) = do
@@ -272,8 +273,7 @@ inlineToOrg (Link txt (src, _)) = do
_ -> do contents <- inlineListToOrg txt
modify $ \s -> s{ stLinks = True }
return $ "[[" <> text src <> "][" <> contents <> "]]"
-inlineToOrg (Image _ (source', _)) = do
- let source = unescapeURI source'
+inlineToOrg (Image _ (source, _)) = do
modify $ \s -> s{ stImages = True }
return $ "[[" <> text source <> "]]"
inlineToOrg (Note contents) = do
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index d4adaa929..d6e5b5c9e 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -157,7 +157,7 @@ blockToRST (Header level inlines) = do
contents <- inlineListToRST inlines
let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
let border = text $ replicate (offset contents) headerChar
- return $ contents $$ border $$ blankline
+ return $ nowrap $ contents $$ border $$ blankline
blockToRST (CodeBlock (_,classes,_) str) = do
opts <- stOptions <$> get
let tabstop = writerTabStop opts
@@ -176,7 +176,7 @@ blockToRST (Table caption _ widths headers rows) = do
else blankline <> text "Table: " <> caption'
headers' <- mapM blockListToRST headers
rawRows <- mapM (mapM blockListToRST) rows
- let isSimple = all (==0) widths && all (all (\bs -> length bs == 1)) rows
+ let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows
let numChars = maximum . map offset
opts <- get >>= return . stOptions
let widthsInChars =
@@ -281,26 +281,24 @@ inlineToRST (Quoted DoubleQuote lst) = do
return $ "“" <> contents <> "”"
inlineToRST (Cite _ lst) =
inlineListToRST lst
-inlineToRST EmDash = return $ char '\8212'
-inlineToRST EnDash = return $ char '\8211'
-inlineToRST Apostrophe = return $ char '\8217'
-inlineToRST Ellipses = return $ char '\8230'
inlineToRST (Code _ str) = return $ "``" <> text str <> "``"
inlineToRST (Str str) = return $ text $ escapeString str
inlineToRST (Math t str) = do
modify $ \st -> st{ stHasMath = True }
return $ if t == InlineMath
- then ":math:`$" <> text str <> "$`"
- else ":math:`$$" <> text str <> "$$`"
+ then ":math:`" <> text str <> "`" <> beforeNonBlank "\\ "
+ else if '\n' `elem` str
+ then blankline $$ ".. math::" $$
+ blankline $$ nest 3 (text str) $$ blankline
+ else blankline $$ (".. math:: " <> text str) $$ blankline
inlineToRST (RawInline _ _) = return empty
inlineToRST (LineBreak) = return cr -- there's no line break in RST
inlineToRST Space = return space
inlineToRST (Link [Code _ str] (src, _)) | src == str ||
src == "mailto:" ++ str = do
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
- return $ text $ unescapeURI srcSuffix
-inlineToRST (Link txt (src', tit)) = do
- let src = unescapeURI src'
+ return $ text srcSuffix
+inlineToRST (Link txt (src, tit)) = do
useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
linktext <- inlineListToRST $ normalizeSpaces txt
if useReferenceLinks
@@ -311,8 +309,7 @@ inlineToRST (Link txt (src', tit)) = do
modify $ \st -> st { stLinks = refs' }
return $ "`" <> linktext <> "`_"
else return $ "`" <> linktext <> " <" <> text src <> ">`_"
-inlineToRST (Image alternate (source', tit)) = do
- let source = unescapeURI source'
+inlineToRST (Image alternate (source, tit)) = do
pics <- get >>= return . stImages
let labelsUsed = map fst pics
let txt = if null alternate || alternate == [Str ""] ||
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index eb36c1ca6..4e7c2a7cd 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -106,7 +106,15 @@ handleUnicode (c:cs) =
-- | Escape special characters.
escapeSpecial :: String -> String
-escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}"))
+escapeSpecial = escapeStringUsing $
+ [ ('\t',"\\tab ")
+ , ('\8216',"\\u8216'")
+ , ('\8217',"\\u8217'")
+ , ('\8220',"\\u8220\"")
+ , ('\8221',"\\u8221\"")
+ , ('\8211',"\\u8211-")
+ , ('\8212',"\\u8212-")
+ ] ++ backslashEscapes "{\\}"
-- | Escape strings as needed for rich text format.
stringToRTF :: String -> String
@@ -287,10 +295,6 @@ inlineToRTF (Quoted SingleQuote lst) =
"\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'"
inlineToRTF (Quoted DoubleQuote lst) =
"\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\""
-inlineToRTF Apostrophe = "\\u8217'"
-inlineToRTF Ellipses = "\\u8230?"
-inlineToRTF EmDash = "\\u8212-"
-inlineToRTF EnDash = "\\u8211-"
inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
inlineToRTF (Str str) = stringToRTF str
inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 4f6645cd5..563ad7044 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -96,6 +96,10 @@ stringToTexinfo = escapeStringUsing texinfoEscapes
, ('@', "@@")
, (',', "@comma{}") -- only needed in argument lists
, ('\160', "@ ")
+ , ('\x2014', "---")
+ , ('\x2013', "--")
+ , ('\x2026', "@dots{}")
+ , ('\x2019', "'")
]
-- | Puts contents into Texinfo command.
@@ -387,10 +391,6 @@ inlineToTexinfo (Quoted DoubleQuote lst) = do
inlineToTexinfo (Cite _ lst) =
inlineListToTexinfo lst
-inlineToTexinfo Apostrophe = return $ char '\''
-inlineToTexinfo EmDash = return $ text "---"
-inlineToTexinfo EnDash = return $ text "--"
-inlineToTexinfo Ellipses = return $ text "@dots{}"
inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" =
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
index 6614ec28e..26d5ec6d7 100644
--- a/src/Text/Pandoc/Writers/Textile.hs
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -72,15 +72,19 @@ withUseTags action = do
-- | Escape one character as needed for Textile.
escapeCharForTextile :: Char -> String
escapeCharForTextile x = case x of
- '&' -> "&amp;"
- '<' -> "&lt;"
- '>' -> "&gt;"
- '"' -> "&quot;"
- '*' -> "&#42;"
- '_' -> "&#95;"
- '@' -> "&#64;"
- '|' -> "&#124;"
- c -> [c]
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ '*' -> "&#42;"
+ '_' -> "&#95;"
+ '@' -> "&#64;"
+ '|' -> "&#124;"
+ '\x2014' -> " -- "
+ '\x2013' -> " - "
+ '\x2019' -> "'"
+ '\x2026' -> "..."
+ c -> [c]
-- | Escape string as needed for Textile.
escapeStringForTextile :: String -> String
@@ -370,14 +374,6 @@ inlineToTextile opts (Quoted DoubleQuote lst) = do
inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst
-inlineToTextile _ EmDash = return " -- "
-
-inlineToTextile _ EnDash = return " - "
-
-inlineToTextile _ Apostrophe = return "'"
-
-inlineToTextile _ Ellipses = return "..."
-
inlineToTextile _ (Code _ str) =
return $ if '@' `elem` str
then "<tt>" ++ escapeStringForXML str ++ "</tt>"
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index e21525018..7a1c8bdd8 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -33,9 +33,13 @@ module Text.Pandoc.XML ( stripTags,
inTags,
selfClosingTag,
inTagsSimple,
- inTagsIndented ) where
+ inTagsIndented,
+ toEntities,
+ fromEntities ) where
import Text.Pandoc.Pretty
+import Data.Char (ord, isAscii)
+import Text.HTML.TagSoup.Entity (lookupEntity)
-- | Remove everything between <...>
stripTags :: String -> String
@@ -89,3 +93,22 @@ inTagsSimple tagType = inTags False tagType []
-- | Put the supplied contents in indented block btw start and end tags.
inTagsIndented :: String -> Doc -> Doc
inTagsIndented tagType = inTags True tagType []
+
+-- | Escape all non-ascii characters using numerical entities.
+toEntities :: String -> String
+toEntities [] = ""
+toEntities (c:cs)
+ | isAscii c = c : toEntities cs
+ | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs
+
+-- Unescapes XML entities
+fromEntities :: String -> String
+fromEntities ('&':xs) =
+ case lookupEntity ent of
+ Just c -> c : fromEntities rest
+ Nothing -> '&' : fromEntities rest
+ where (ent, rest) = case break (==';') xs of
+ (zs,';':ys) -> (zs,ys)
+ _ -> ("",xs)
+fromEntities (x:xs) = x : fromEntities xs
+fromEntities [] = []
diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs
deleted file mode 100644
index d6ee39dab..000000000
--- a/src/markdown2pdf.hs
+++ /dev/null
@@ -1,256 +0,0 @@
-module Main where
-
-import Data.List (isInfixOf, intercalate, isPrefixOf)
-import Data.Maybe (isNothing)
-import qualified Data.ByteString as BS
-import Codec.Binary.UTF8.String (decodeString, encodeString)
-import Data.ByteString.UTF8 (toString)
-import Control.Monad (unless, guard, liftM, when)
-import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO)
-import Control.Exception (tryJust, bracket, evaluate)
-
-import System.IO
-import System.IO.Error (isDoesNotExistError)
-import System.Environment ( getArgs, getProgName )
-import qualified Text.Pandoc.UTF8 as UTF8
-import System.Exit (ExitCode (..), exitWith)
-import System.FilePath
-import System.Directory
-import System.Process
-
--- A variant of 'readProcessWithExitCode' that does not
--- cause an error if the output is not UTF-8. (Copied
--- with slight variants from 'System.Process'.)
-readProcessWithExitCode'
- :: FilePath -- ^ command to run
- -> [String] -- ^ any arguments
- -> String -- ^ standard input
- -> IO (ExitCode,String,String) -- ^ exitcode, stdout, stderr
-readProcessWithExitCode' cmd args input = do
- (Just inh, Just outh, Just errh, pid) <-
- createProcess (proc cmd args){ std_in = CreatePipe,
- std_out = CreatePipe,
- std_err = CreatePipe }
-
- outMVar <- newEmptyMVar
-
- -- fork off a thread to start consuming stdout
- out <- liftM toString $ BS.hGetContents outh
- _ <- forkIO $ evaluate (length out) >> putMVar outMVar ()
-
- -- fork off a thread to start consuming stderr
- err <- liftM toString $ BS.hGetContents errh
- _ <- forkIO $ evaluate (length err) >> putMVar outMVar ()
-
- -- now write and flush any input
- when (not (null input)) $ do hPutStr inh input; hFlush inh
- hClose inh -- done with stdin
-
- -- wait on the output
- takeMVar outMVar
- takeMVar outMVar
- hClose outh
-
- -- wait on the process
- ex <- waitForProcess pid
-
- return (ex, out, err)
-
-run :: FilePath -> [String] -> IO (Either String String)
-run file opts = do
- (code, out, err) <- readProcessWithExitCode' (encodeString file)
- (map encodeString opts) ""
- let msg = out ++ err
- case code of
- ExitFailure _ -> return $ Left $! msg
- ExitSuccess -> return $ Right $! msg
-
-parsePandocArgs :: [String] -> IO (Maybe ([String], String))
-parsePandocArgs args = do
- result <- run "pandoc" $ ["--dump-args"] ++ args
- return $ either error (parse . map trim . lines) result
- where parse [] = Nothing
- parse ("-":[]) = Just ([], "stdin") -- no output or input
- parse ("-":x:xs) = Just (x:xs, dropExtension x) -- no output
- parse ( x :xs) = Just (xs, dropExtension x) -- at least output
- --trim = reverse . dropWhile isSpace . reverse . dropWhile isSpace
- trim = takeWhile (/='\r') . dropWhile (=='\r')
-
-runPandoc :: [String] -> FilePath -> IO (Either String FilePath)
-runPandoc inputsAndArgs output = do
- let texFile = addExtension output "tex"
- result <- run "pandoc" $
- ["-s", "--no-wrap", "-r", "markdown", "-w", "latex"]
- ++ inputsAndArgs ++ ["-o", texFile]
- return $ either Left (const $ Right texFile) result
-
-runLatexRaw :: String -> FilePath -> IO (Either (Either String String) FilePath)
-runLatexRaw latexProgram file = do
- -- we ignore the ExitCode because pdflatex always fails the first time
- run latexProgram ["-halt-on-error", "-interaction", "nonstopmode",
- "-output-directory", takeDirectory file, dropExtension file] >> return ()
- let pdfFile = replaceExtension file "pdf"
- let logFile = replaceExtension file "log"
- txt <- tryJust (guard . isDoesNotExistError)
- (liftM toString $ BS.readFile logFile)
- let checks = checkLatex $ either (const "") id txt
- case checks of
- -- err , bib , ref , msg
- (True , _ , _ , msg) -> return $ Left $ Left msg -- failure
- (False, True , _ , msg) -> runBibtex file >>
- (return $ Left $ Right msg) -- citations
- (False, _ , True, msg) -> return $ Left $ Right msg -- references
- (False, False, False, _ ) -> return $ Right pdfFile -- success
-
-runLatex :: String -> FilePath -> IO (Either String FilePath)
-runLatex latexProgram file = step 3
- where
- step n = do
- result <- runLatexRaw latexProgram file
- case result of
- Left (Left err) -> return $ Left err
- Left (Right _) | n > 1 -> step (n-1 :: Int)
- Right _ | n > 2 -> step (n-1 :: Int)
- Left (Right msg) -> return $ Left msg
- Right pdfFile -> return $ Right pdfFile
-
-checkLatex :: String -> (Bool, Bool, Bool, String)
-checkLatex "" = (True, False, False, "Could not read log file")
-checkLatex txt = (err , bib, ref, unlines $! msgs ++ tips)
- where
- xs `oneOf` x = any (flip isInfixOf x) xs
- msgs = dropWhile (not . errorline) $ lines txt
- errorline ('!':_) = True
- errorline _ = False
- tips = checkPackages msgs
- err = any (oneOf ["!", "LaTeX Error:", "Latex Error:"]) msgs
- bib = any (oneOf ["Warning: Citation"
- ,"Warning: There were undefined citations"]) msgs
- ref = any (oneOf ["Warning: Reference"
- ,"Warning: Label"
- ,"Warning: There were undefined references"
- ]) msgs
-
-checkPackages :: [String] -> [String]
-checkPackages = concatMap chks
- where -- for each message, search 'pks' for matches and give a hint
- chks x = concatMap (chk x) pks
- chk x (k,v) = if sub k `isInfixOf` x then tip k v else []
- sub k = "`" ++ k ++ ".sty' not found"
- tip k v = ["Please install the '" ++ k ++
- "' package from CTAN:", " " ++ v]
- pks = [("ucs"
- ,"http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/")
- ,("ulem"
- ,"http://www.ctan.org/tex-archive/macros/latex/contrib/misc/")
- ,("graphicx"
- ,"http://www.ctan.org/tex-archive/macros/latex/required/graphics/")
- ,("fancyhdr"
- ,"http://www.ctan.org/tex-archive/macros/latex/contrib/fancyhdr/")
- ,("array"
- ,"http://www.ctan.org/tex-archive/macros/latex/required/tools/")]
-
-runBibtex :: FilePath -> IO (Either String FilePath)
-runBibtex file = do
- let auxFile = replaceExtension file "aux"
- result <- run "bibtex" [auxFile]
- return $ either Left (const $ Right auxFile) result
-
-exit :: String -> IO a
-exit x = do
- progName <- getProgName
- UTF8.hPutStrLn stderr $ progName ++ ": " ++ x
- exitWith $ ExitFailure 1
-
-saveStdin :: FilePath -> IO (Either String FilePath)
-saveStdin file = do
- text <- liftM toString $ BS.getContents
- UTF8.writeFile file text
- fileExist <- doesFileExist (encodeString file)
- case fileExist of
- False -> return $ Left $! "Could not create " ++ file
- True -> return $ Right file
-
-saveOutput :: FilePath -> FilePath -> IO ()
-saveOutput input output = do
- copyFile (encodeString input) (encodeString output)
- UTF8.hPutStrLn stderr $! "Created " ++ output
-
-main :: IO ()
-main = bracket
- -- acquire resource
- (do dir <- getTemporaryDirectory
- let tmp = dir </> "pandoc"
- createDirectoryIfMissing True tmp
- return tmp)
-
- -- release resource
- ( \tmp -> removeDirectoryRecursive tmp)
-
- -- run computation
- $ \tmp -> do
- args <- liftM (map decodeString) getArgs
- -- check for invalid arguments and print help message if needed
- let goodopts = ["-f","-r","-N", "-p","-R","-H","-B","-A", "-C","-o","-V"]
- let goodoptslong = ["--from","--read","--strict",
- "--preserve-tabs","--tab-stop","--parse-raw",
- "--toc","--table-of-contents", "--xetex", "--luatex",
- "--number-sections","--include-in-header",
- "--include-before-body","--include-after-body",
- "--custom-header","--output",
- "--template", "--variable",
- "--csl", "--bibliography", "--data-dir", "--listings"]
- let isOpt ('-':_) = True
- isOpt _ = False
- let opts = filter isOpt args
- -- note that a long option can come in this form: --opt=val
- let isGoodopt x = x `elem` (goodopts ++ goodoptslong) ||
- any (\o -> (o ++ "=") `isPrefixOf` x) goodoptslong
- let markdown2pdfOpts = ["--xetex","--luatex"]
- unless (all isGoodopt opts) $ do
- (code, out, _err) <- readProcessWithExitCode "pandoc" ["--help"] ""
- UTF8.putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:"
- UTF8.putStr $ unlines $
- filter (\l -> any (`isInfixOf` l) goodoptslong) (lines out)
- ++ map (replicate 24 ' ' ++) markdown2pdfOpts
- exitWith code
-
- let args' = filter (`notElem` markdown2pdfOpts) args
-
- -- check for executable files
- let latexProgram = if "--xetex" `elem` opts
- then "xelatex"
- else if "--luatex" `elem` opts
- then "lualatex"
- else "pdflatex"
- let execs = ["pandoc", latexProgram, "bibtex"]
- paths <- mapM findExecutable execs
- let miss = map snd $ filter (isNothing . fst) $ zip paths execs
- unless (null miss) $ exit $! "Could not find " ++ intercalate ", " miss
-
- -- parse arguments
- -- if no input given, use 'stdin'
- pandocArgs <- parsePandocArgs args'
- (input, output) <- case pandocArgs of
- Nothing -> exit "Could not parse arguments"
- Just ([],out) -> do
- stdinFile <- saveStdin (replaceDirectory (takeBaseName out) tmp)
- case stdinFile of
- Left err -> exit err
- Right f -> return ([f], out)
- -- no need because we'll pass all arguments to pandoc
- Just (_ ,out) -> return ([], out)
- -- run pandoc
- pandocRes <- runPandoc (input ++ args') $ replaceDirectory output tmp
- case pandocRes of
- Left err -> exit err
- Right texFile -> do
- -- run pdflatex
- latexRes <- runLatex latexProgram texFile
- case latexRes of
- Left err -> exit err
- Right pdfFile -> do
- -- save the output creating a backup if necessary
- saveOutput pdfFile $
- replaceDirectory pdfFile (takeDirectory output)
-
diff --git a/src/pandoc.hs b/src/pandoc.hs
index 27bc2c25c..3853d360a 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -1,5 +1,5 @@
{-
-Copyright (C) 2006-2011 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Main
- Copyright : Copyright (C) 2006-2011 John MacFarlane
+ Copyright : Copyright (C) 2006-2012 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley@edu>
@@ -30,24 +30,26 @@ writers.
-}
module Main where
import Text.Pandoc
-import Text.Pandoc.S5 (s5HeaderIncludes)
+import Text.Pandoc.PDF (tex2pdf)
+import Text.Pandoc.Readers.LaTeX (handleIncludes)
import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile,
- headerShift, findDataFile, normalize )
-#ifdef _HIGHLIGHTING
-import Text.Pandoc.Highlighting ( languages )
-#endif
+ headerShift, findDataFile, normalize, err, warn )
+import Text.Pandoc.XML ( toEntities, fromEntities )
+import Text.Pandoc.SelfContained ( makeSelfContained )
+import Text.Pandoc.Highlighting ( languages, Style, tango, pygments,
+ espresso, kate, haddock, monochrome )
import System.Environment ( getArgs, getProgName )
import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath
import System.Console.GetOpt
import Data.Char ( toLower )
import Data.List ( intercalate, isSuffixOf, isPrefixOf )
-import System.Directory ( getAppUserDataDirectory, doesFileExist )
-import System.IO ( stdout, stderr )
+import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable )
+import System.IO ( stdout )
import System.IO.Error ( isDoesNotExistError )
import Control.Exception.Extensible ( throwIO )
import qualified Text.Pandoc.UTF8 as UTF8
-import Text.CSL
+import qualified Text.CSL as CSL
import Text.Pandoc.Biblio
import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
@@ -55,35 +57,34 @@ import Network.URI (parseURI, isURI, URI(..))
import qualified Data.ByteString.Lazy as B
import Data.ByteString.Lazy.UTF8 (toString )
import Codec.Binary.UTF8.String (decodeString, encodeString)
+import Text.CSL.Reference (Reference(..))
copyrightMessage :: String
-copyrightMessage = "\nCopyright (C) 2006-2011 John MacFarlane\n" ++
+copyrightMessage = "\nCopyright (C) 2006-2012 John MacFarlane\n" ++
"Web: http://johnmacfarlane.net/pandoc\n" ++
"This is free software; see the source for copying conditions. There is no\n" ++
"warranty, not even for merchantability or fitness for a particular purpose."
compileInfo :: String
compileInfo =
- "\nCompiled with citeproc support." ++
-#ifdef _HIGHLIGHTING
- "\nCompiled with syntax highlighting support for:\n" ++
- wrapWords 78 languages ++
-#endif
- ""
+ "\nCompiled with citeproc-hs " ++ VERSION_citeproc_hs ++ ", texmath " ++
+ VERSION_texmath ++ ", highlighting-kate " ++ VERSION_highlighting_kate ++
+ ".\nSyntax highlighting is supported for the following languages:\n " ++
+ wrapWords 4 78 languages
-- | Converts a list of strings into a single string with the items printed as
-- comma separated words in lines with a maximum line length.
-wrapWords :: Int -> [String] -> String
-wrapWords c = wrap' c c where
- wrap' _ _ [] = ""
+wrapWords :: Int -> Int -> [String] -> String
+wrapWords indent c = wrap' (c - indent) (c - indent)
+ where wrap' _ _ [] = ""
wrap' cols remaining (x:xs) = if remaining == cols
then x ++ wrap' cols (remaining - length x) xs
else if (length x + 1) > remaining
- then ",\n" ++ x ++ wrap' cols (cols - length x) xs
+ then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs
else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs
-isNonTextOutput :: String -> Bool
-isNonTextOutput = (`elem` ["odt","epub"])
+nonTextFormats :: [String]
+nonTextFormats = ["odt","docx","epub"]
-- | Data structure for command line options.
data Opt = Opt
@@ -101,15 +102,19 @@ data Opt = Opt
, optNumberSections :: Bool -- ^ Number sections in LaTeX
, optSectionDivs :: Bool -- ^ Put sections in div tags in HTML
, optIncremental :: Bool -- ^ Use incremental lists in Slidy/S5
- , optOffline :: Bool -- ^ Make slideshow accessible offline
- , optXeTeX :: Bool -- ^ Format latex for xetex
+ , optSelfContained :: Bool -- ^ Make HTML accessible offline
, optSmart :: Bool -- ^ Use smart typography
+ , optOldDashes :: Bool -- ^ Parse dashes like pandoc <=1.8.2.1
, optHtml5 :: Bool -- ^ Produce HTML5 in HTML
+ , optHighlight :: Bool -- ^ Highlight source code
+ , optHighlightStyle :: Style -- ^ Style to use for highlighted code
, optChapters :: Bool -- ^ Use chapter for top-level sects
, optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
, optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt
+ , optReferenceDocx :: Maybe FilePath -- ^ Path of reference.docx
, optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet
, optEPUBMetadata :: String -- ^ EPUB metadata
+ , optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optStrict :: Bool -- ^ Use strict markdown syntax
@@ -124,8 +129,12 @@ data Opt = Opt
, optCiteMethod :: CiteMethod -- ^ Method to output cites
, optBibliography :: [String]
, optCslFile :: FilePath
+ , optAbbrevsFile :: Maybe FilePath
, optListings :: Bool -- ^ Use listings package for code blocks
- , optAscii :: Bool -- ^ Avoid using nonascii characters
+ , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf
+ , optSlideLevel :: Maybe Int -- ^ Header level that creates slides
+ , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2
+ , optAscii :: Bool -- ^ Use ascii characters only in html
}
-- | Defaults for command-line options.
@@ -145,15 +154,19 @@ defaultOpts = Opt
, optNumberSections = False
, optSectionDivs = False
, optIncremental = False
- , optOffline = False
- , optXeTeX = False
+ , optSelfContained = False
, optSmart = False
+ , optOldDashes = False
, optHtml5 = False
+ , optHighlight = True
+ , optHighlightStyle = pygments
, optChapters = False
, optHTMLMathMethod = PlainMath
, optReferenceODT = Nothing
+ , optReferenceDocx = Nothing
, optEPUBStylesheet = Nothing
, optEPUBMetadata = ""
+ , optEPUBFonts = []
, optDumpArgs = False
, optIgnoreArgs = False
, optStrict = False
@@ -168,7 +181,11 @@ defaultOpts = Opt
, optCiteMethod = Citeproc
, optBibliography = []
, optCslFile = ""
+ , optAbbrevsFile = Nothing
, optListings = False
+ , optLaTeXEngine = "pdflatex"
+ , optSlideLevel = Nothing
+ , optSetextHeaders = True
, optAscii = False
}
@@ -188,50 +205,23 @@ options =
"FORMAT")
""
- , Option "s" ["standalone"]
- (NoArg
- (\opt -> return opt { optStandalone = True }))
- "" -- "Include needed header and footer on output"
-
, Option "o" ["output"]
(ReqArg
(\arg opt -> return opt { optOutputFile = arg })
"FILENAME")
"" -- "Name of output file"
- , Option "p" ["preserve-tabs"]
- (NoArg
- (\opt -> return opt { optPreserveTabs = True }))
- "" -- "Preserve tabs instead of converting to spaces"
-
- , Option "" ["tab-stop"]
+ , Option "" ["data-dir"]
(ReqArg
- (\arg opt ->
- case reads arg of
- [(t,"")] | t > 0 -> return opt { optTabStop = t }
- _ -> do
- UTF8.hPutStrLn stderr $
- "tab-stop must be a number greater than 0"
- exitWith $ ExitFailure 31)
- "NUMBER")
- "" -- "Tab stop (default 4)"
+ (\arg opt -> return opt { optDataDir = Just arg })
+ "DIRECTORY") -- "Directory containing pandoc data files."
+ ""
, Option "" ["strict"]
(NoArg
(\opt -> return opt { optStrict = True } ))
"" -- "Disable markdown syntax extensions"
- , Option "" ["normalize"]
- (NoArg
- (\opt -> return opt { optTransforms =
- normalize : optTransforms opt } ))
- "" -- "Normalize the Pandoc AST"
-
- , Option "" ["reference-links"]
- (NoArg
- (\opt -> return opt { optReferenceLinks = True } ))
- "" -- "Use reference links in parsing HTML"
-
, Option "R" ["parse-raw"]
(NoArg
(\opt -> return opt { optParseRaw = True }))
@@ -242,145 +232,25 @@ options =
(\opt -> return opt { optSmart = True }))
"" -- "Use smart quotes, dashes, and ellipses"
- , Option "5" ["html5"]
- (NoArg
- (\opt -> return opt { optHtml5 = True }))
- "" -- "Produce HTML5 in HTML output"
-
- , Option "m" ["latexmathml", "asciimathml"]
- (OptArg
- (\arg opt ->
- return opt { optHTMLMathMethod = LaTeXMathML arg })
- "URL")
- "" -- "Use LaTeXMathML script in html output"
-
- , Option "" ["mathml"]
- (OptArg
- (\arg opt ->
- return opt { optHTMLMathMethod = MathML arg })
- "URL")
- "" -- "Use mathml for HTML math"
-
- , Option "" ["mimetex"]
- (OptArg
- (\arg opt -> do
- let url' = case arg of
- Just u -> u ++ "?"
- Nothing -> "/cgi-bin/mimetex.cgi?"
- return opt { optHTMLMathMethod = WebTeX url' })
- "URL")
- "" -- "Use mimetex for HTML math"
-
- , Option "" ["webtex"]
- (OptArg
- (\arg opt -> do
- let url' = case arg of
- Just u -> u
- Nothing -> "http://chart.apis.google.com/chart?cht=tx&chl="
- return opt { optHTMLMathMethod = WebTeX url' })
- "URL")
- "" -- "Use web service for HTML math"
-
- , Option "" ["jsmath"]
- (OptArg
- (\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
- "URL")
- "" -- "Use jsMath for HTML math"
-
- , Option "" ["mathjax"]
- (OptArg
- (\arg opt -> do
- let url' = case arg of
- Just u -> u
- Nothing -> "https://d3eoax9i5htok0.cloudfront.net/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
- return opt { optHTMLMathMethod = MathJax url'})
- "URL")
- "" -- "Use MathJax for HTML math"
-
- , Option "" ["gladtex"]
- (NoArg
- (\opt -> return opt { optHTMLMathMethod = GladTeX }))
- "" -- "Use gladtex for HTML math"
-
- , Option "i" ["incremental"]
- (NoArg
- (\opt -> return opt { optIncremental = True }))
- "" -- "Make list items display incrementally in Slidy/S5"
-
- , Option "" ["offline"]
- (NoArg
- (\opt -> return opt { optOffline = True,
- optStandalone = True }))
- "" -- "Make slide shows include all the needed js and css"
-
- , Option "" ["xetex"]
+ , Option "" ["old-dashes"]
(NoArg
- (\opt -> do
- UTF8.hPutStrLn stderr $ "pandoc: --xetex is deprecated. "
- ++ "It is no longer needed for use with XeTeX."
- return opt { optXeTeX = True }))
- "" -- "Format latex for processing by XeTeX"
-
- , Option "" ["chapters"]
- (NoArg
- (\opt -> return opt { optChapters = True }))
- "" -- "Use chapter for top-level sections in LaTeX, DocBook"
-
- , Option "N" ["number-sections"]
- (NoArg
- (\opt -> return opt { optNumberSections = True }))
- "" -- "Number sections in LaTeX"
-
- , Option "" ["listings"]
- (NoArg
- (\opt -> return opt { optListings = True }))
- "" -- "Use listings package for LaTeX code blocks"
-
- , Option "" ["section-divs"]
- (NoArg
- (\opt -> return opt { optSectionDivs = True }))
- "" -- "Put sections in div tags in HTML"
-
- , Option "" ["no-wrap"]
- (NoArg
- (\opt -> return opt { optWrapText = False }))
- "" -- "Do not wrap text in output"
+ (\opt -> return opt { optSmart = True
+ , optOldDashes = True }))
+ "" -- "Use smart quotes, dashes, and ellipses"
- , Option "" ["columns"]
+ , Option "" ["base-header-level"]
(ReqArg
(\arg opt ->
case reads arg of
- [(t,"")] | t > 0 -> return opt { optColumns = t }
- _ -> do
- UTF8.hPutStrLn stderr $
- "columns must be a number greater than 0"
- exitWith $ ExitFailure 33)
- "NUMBER")
- "" -- "Length of line in characters"
-
- , Option "" ["ascii"]
- (NoArg
- (\opt -> return opt { optAscii = True }))
- "" -- "Avoid using non-ascii characters in output"
-
- , Option "" ["email-obfuscation"]
- (ReqArg
- (\arg opt -> do
- method <- case arg of
- "references" -> return ReferenceObfuscation
- "javascript" -> return JavascriptObfuscation
- "none" -> return NoObfuscation
- _ -> UTF8.hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
- exitWith (ExitFailure 6)
- return opt { optEmailObfuscation = method })
- "none|javascript|references")
- "" -- "Method for obfuscating email in HTML"
-
- , Option "" ["id-prefix"]
- (ReqArg
- (\arg opt -> return opt { optIdentifierPrefix = arg })
- "STRING")
- "" -- "Prefix to add to automatically generated HTML identifiers"
+ [(t,"")] | t > 0 -> do
+ let oldTransforms = optTransforms opt
+ let shift = t - 1
+ return opt{ optTransforms =
+ headerShift shift : oldTransforms }
+ _ -> err 19
+ "base-header-level must be a number > 0")
+ "NUMBER")
+ "" -- "Headers base level"
, Option "" ["indented-code-classes"]
(ReqArg
@@ -389,26 +259,31 @@ options =
"STRING")
"" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks"
- , Option "" ["toc", "table-of-contents"]
- (NoArg
- (\opt -> return opt { optTableOfContents = True }))
- "" -- "Include table of contents"
+ , Option "" ["normalize"]
+ (NoArg
+ (\opt -> return opt { optTransforms =
+ normalize : optTransforms opt } ))
+ "" -- "Normalize the Pandoc AST"
- , Option "" ["base-header-level"]
+ , Option "p" ["preserve-tabs"]
+ (NoArg
+ (\opt -> return opt { optPreserveTabs = True }))
+ "" -- "Preserve tabs instead of converting to spaces"
+
+ , Option "" ["tab-stop"]
(ReqArg
(\arg opt ->
case reads arg of
- [(t,"")] | t > 0 -> do
- let oldTransforms = optTransforms opt
- let shift = t - 1
- return opt{ optTransforms =
- headerShift shift : oldTransforms }
- _ -> do
- UTF8.hPutStrLn stderr $
- "base-header-level must be a number > 0"
- exitWith $ ExitFailure 19)
+ [(t,"")] | t > 0 -> return opt { optTabStop = t }
+ _ -> err 31
+ "tab-stop must be a number greater than 0")
"NUMBER")
- "" -- "Headers base level"
+ "" -- "Tab stop (default 4)"
+
+ , Option "s" ["standalone"]
+ (NoArg
+ (\opt -> return opt { optStandalone = True }))
+ "" -- "Include needed header and footer on output"
, Option "" ["template"]
(ReqArg
@@ -425,21 +300,62 @@ options =
(k,_:v) -> do
let newvars = optVariables opt ++ [(k,v)]
return opt{ optVariables = newvars }
- _ -> do
- UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
- exitWith $ ExitFailure 17)
+ _ -> err 17 $
+ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)")
"KEY:VALUE")
"" -- "Use custom template"
- , Option "c" ["css"]
+ , Option "D" ["print-default-template"]
(ReqArg
- (\arg opt -> do
- -- add new link to end, so it is included in proper order
- let newvars = optVariables opt ++ [("css",arg)]
- return opt { optVariables = newvars,
- optStandalone = True })
- "URL")
- "" -- "Link to CSS style sheet"
+ (\arg _ -> do
+ templ <- getDefaultTemplate Nothing arg
+ case templ of
+ Right t -> UTF8.hPutStr stdout t
+ Left e -> error $ show e
+ exitWith ExitSuccess)
+ "FORMAT")
+ "" -- "Print default template for FORMAT"
+
+ , Option "" ["no-wrap"]
+ (NoArg
+ (\opt -> return opt { optWrapText = False }))
+ "" -- "Do not wrap text in output"
+
+ , Option "" ["columns"]
+ (ReqArg
+ (\arg opt ->
+ case reads arg of
+ [(t,"")] | t > 0 -> return opt { optColumns = t }
+ _ -> err 33 $
+ "columns must be a number greater than 0")
+ "NUMBER")
+ "" -- "Length of line in characters"
+
+ , Option "" ["toc", "table-of-contents"]
+ (NoArg
+ (\opt -> return opt { optTableOfContents = True }))
+ "" -- "Include table of contents"
+
+ , Option "" ["no-highlight"]
+ (NoArg
+ (\opt -> return opt { optHighlight = False }))
+ "" -- "Don't highlight source code"
+
+ , Option "" ["highlight-style"]
+ (ReqArg
+ (\arg opt -> do
+ newStyle <- case map toLower arg of
+ "pygments" -> return pygments
+ "tango" -> return tango
+ "espresso" -> return espresso
+ "kate" -> return kate
+ "monochrome" -> return monochrome
+ "haddock" -> return haddock
+ _ -> err 39 $
+ "Unknown style :" ++ arg
+ return opt{ optHighlightStyle = newStyle })
+ "STYLE")
+ "" -- "Style for highlighted code"
, Option "H" ["include-in-header"]
(ReqArg
@@ -474,6 +390,100 @@ options =
"FILENAME")
"" -- "File to include after document body"
+ , Option "" ["self-contained"]
+ (NoArg
+ (\opt -> return opt { optSelfContained = True,
+ optVariables = ("slidy-url","slidy") :
+ optVariables opt,
+ optStandalone = True }))
+ "" -- "Make slide shows include all the needed js and css"
+
+ , Option "" ["offline"]
+ (NoArg
+ (\opt -> do warn $ "--offline is deprecated. Use --self-contained instead."
+ return opt { optSelfContained = True,
+ optStandalone = True }))
+ "" -- "Make slide shows include all the needed js and css"
+ -- deprecated synonym for --self-contained
+
+ , Option "5" ["html5"]
+ (NoArg
+ (\opt -> do
+ warn $ "--html5 is deprecated. "
+ ++ "Use the html5 output format instead."
+ return opt { optHtml5 = True }))
+ "" -- "Produce HTML5 in HTML output"
+
+ , Option "" ["ascii"]
+ (NoArg
+ (\opt -> return opt { optAscii = True }))
+ "" -- "Use ascii characters only in HTML output"
+
+ , Option "" ["reference-links"]
+ (NoArg
+ (\opt -> return opt { optReferenceLinks = True } ))
+ "" -- "Use reference links in parsing HTML"
+
+ , Option "" ["atx-headers"]
+ (NoArg
+ (\opt -> return opt { optSetextHeaders = False } ))
+ "" -- "Use atx-style headers for markdown"
+
+ , Option "" ["chapters"]
+ (NoArg
+ (\opt -> return opt { optChapters = True }))
+ "" -- "Use chapter for top-level sections in LaTeX, DocBook"
+
+ , Option "N" ["number-sections"]
+ (NoArg
+ (\opt -> return opt { optNumberSections = True }))
+ "" -- "Number sections in LaTeX"
+
+ , Option "" ["listings"]
+ (NoArg
+ (\opt -> return opt { optListings = True }))
+ "" -- "Use listings package for LaTeX code blocks"
+
+ , Option "i" ["incremental"]
+ (NoArg
+ (\opt -> return opt { optIncremental = True }))
+ "" -- "Make list items display incrementally in Slidy/S5"
+
+ , Option "" ["slide-level"]
+ (ReqArg
+ (\arg opt -> do
+ case reads arg of
+ [(t,"")] | t >= 1 && t <= 6 ->
+ return opt { optSlideLevel = Just t }
+ _ -> err 39 $
+ "slide level must be a number between 1 and 6")
+ "NUMBER")
+ "" -- "Force header level for slides"
+
+ , Option "" ["section-divs"]
+ (NoArg
+ (\opt -> return opt { optSectionDivs = True }))
+ "" -- "Put sections in div tags in HTML"
+
+ , Option "" ["email-obfuscation"]
+ (ReqArg
+ (\arg opt -> do
+ method <- case arg of
+ "references" -> return ReferenceObfuscation
+ "javascript" -> return JavascriptObfuscation
+ "none" -> return NoObfuscation
+ _ -> err 6
+ ("Unknown obfuscation method: " ++ arg)
+ return opt { optEmailObfuscation = method })
+ "none|javascript|references")
+ "" -- "Method for obfuscating email in HTML"
+
+ , Option "" ["id-prefix"]
+ (ReqArg
+ (\arg opt -> return opt { optIdentifierPrefix = arg })
+ "STRING")
+ "" -- "Prefix to add to automatically generated HTML identifiers"
+
, Option "T" ["title-prefix"]
(ReqArg
(\arg opt -> do
@@ -483,6 +493,16 @@ options =
"STRING")
"" -- "String to prefix to HTML window title"
+ , Option "c" ["css"]
+ (ReqArg
+ (\arg opt -> do
+ -- add new link to end, so it is included in proper order
+ let newvars = optVariables opt ++ [("css",arg)]
+ return opt { optVariables = newvars,
+ optStandalone = True })
+ "URL")
+ "" -- "Link to CSS style sheet"
+
, Option "" ["reference-odt"]
(ReqArg
(\arg opt -> do
@@ -490,6 +510,13 @@ options =
"FILENAME")
"" -- "Path of custom reference.odt"
+ , Option "" ["reference-docx"]
+ (ReqArg
+ (\arg opt -> do
+ return opt { optReferenceDocx = Just arg })
+ "FILENAME")
+ "" -- "Path of custom reference.docx"
+
, Option "" ["epub-stylesheet"]
(ReqArg
(\arg opt -> do
@@ -514,16 +541,22 @@ options =
"FILENAME")
"" -- "Path of epub metadata file"
- , Option "D" ["print-default-template"]
+ , Option "" ["epub-embed-font"]
(ReqArg
- (\arg _ -> do
- templ <- getDefaultTemplate Nothing arg
- case templ of
- Right t -> UTF8.hPutStr stdout t
- Left e -> error $ show e
- exitWith ExitSuccess)
- "FORMAT")
- "" -- "Print default template for FORMAT"
+ (\arg opt -> do
+ return opt{ optEPUBFonts = arg : optEPUBFonts opt })
+ "FILE")
+ "" -- "Directory of fonts to embed"
+
+ , Option "" ["latex-engine"]
+ (ReqArg
+ (\arg opt -> do
+ let b = takeBaseName arg
+ if (b == "pdflatex" || b == "lualatex" || b == "xelatex")
+ then return opt { optLaTeXEngine = arg }
+ else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.")
+ "PROGRAM")
+ "" -- "Name of latex program to use in generating PDF"
, Option "" ["bibliography"]
(ReqArg
@@ -537,6 +570,12 @@ options =
"FILENAME")
""
+ , Option "" ["citation-abbreviations"]
+ (ReqArg
+ (\arg opt -> return opt { optAbbrevsFile = Just arg })
+ "FILENAME")
+ ""
+
, Option "" ["natbib"]
(NoArg
(\opt -> return opt { optCiteMethod = Natbib }))
@@ -547,11 +586,60 @@ options =
(\opt -> return opt { optCiteMethod = Biblatex }))
"" -- "Use biblatex cite commands in LaTeX output"
- , Option "" ["data-dir"]
- (ReqArg
- (\arg opt -> return opt { optDataDir = Just arg })
- "DIRECTORY") -- "Directory containing pandoc data files."
- ""
+ , Option "m" ["latexmathml", "asciimathml"]
+ (OptArg
+ (\arg opt ->
+ return opt { optHTMLMathMethod = LaTeXMathML arg })
+ "URL")
+ "" -- "Use LaTeXMathML script in html output"
+
+ , Option "" ["mathml"]
+ (OptArg
+ (\arg opt ->
+ return opt { optHTMLMathMethod = MathML arg })
+ "URL")
+ "" -- "Use mathml for HTML math"
+
+ , Option "" ["mimetex"]
+ (OptArg
+ (\arg opt -> do
+ let url' = case arg of
+ Just u -> u ++ "?"
+ Nothing -> "/cgi-bin/mimetex.cgi?"
+ return opt { optHTMLMathMethod = WebTeX url' })
+ "URL")
+ "" -- "Use mimetex for HTML math"
+
+ , Option "" ["webtex"]
+ (OptArg
+ (\arg opt -> do
+ let url' = case arg of
+ Just u -> u
+ Nothing -> "http://chart.apis.google.com/chart?cht=tx&chl="
+ return opt { optHTMLMathMethod = WebTeX url' })
+ "URL")
+ "" -- "Use web service for HTML math"
+
+ , Option "" ["jsmath"]
+ (OptArg
+ (\arg opt -> return opt { optHTMLMathMethod = JsMath arg})
+ "URL")
+ "" -- "Use jsMath for HTML math"
+
+ , Option "" ["mathjax"]
+ (OptArg
+ (\arg opt -> do
+ let url' = case arg of
+ Just u -> u
+ Nothing -> "https://d3eoax9i5htok0.cloudfront.net/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"
+ return opt { optHTMLMathMethod = MathJax url'})
+ "URL")
+ "" -- "Use MathJax for HTML math"
+
+ , Option "" ["gladtex"]
+ (NoArg
+ (\opt -> return opt { optHTMLMathMethod = GladTeX }))
+ "" -- "Use gladtex for HTML math"
, Option "" ["dump-args"]
(NoArg
@@ -579,14 +667,15 @@ options =
UTF8.hPutStr stdout (usageMessage prg options)
exitWith ExitSuccess ))
"" -- "Show help"
+
]
-- Returns usage message
usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
usageMessage programName = usageInfo
(programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
- (intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++
- (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:")
+ (wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++
+ (wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:")
-- Determine default reader based on source file extensions
defaultReaderName :: String -> [FilePath] -> String
@@ -637,8 +726,11 @@ defaultWriterName x =
".texinfo" -> "texinfo"
".db" -> "docbook"
".odt" -> "odt"
+ ".docx" -> "docx"
".epub" -> "epub"
".org" -> "org"
+ ".asciidoc" -> "asciidoc"
+ ".pdf" -> "latex"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
@@ -654,10 +746,8 @@ main = do
else getOpt Permute options rawArgs
unless (null errors) $
- do name <- getProgName
- mapM_ (\e -> UTF8.hPutStr stderr (name ++ ": ") >> UTF8.hPutStr stderr e) errors
- UTF8.hPutStrLn stderr $ "Try " ++ name ++ " --help for more information."
- exitWith $ ExitFailure 2
+ err 2 $ concat $ errors ++
+ ["Try " ++ prg ++ " --help for more information."]
let defaultOpts' = if compatMode
then defaultOpts { optReader = "markdown"
@@ -682,14 +772,19 @@ main = do
, optNumberSections = numberSections
, optSectionDivs = sectionDivs
, optIncremental = incremental
- , optOffline = offline
+ , optSelfContained = selfContained
, optSmart = smart
+ , optOldDashes = oldDashes
, optHtml5 = html5
+ , optHighlight = highlight
+ , optHighlightStyle = highlightStyle
, optChapters = chapters
, optHTMLMathMethod = mathMethod
, optReferenceODT = referenceODT
+ , optReferenceDocx = referenceDocx
, optEPUBStylesheet = epubStylesheet
, optEPUBMetadata = epubMetadata
+ , optEPUBFonts = epubFonts
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
, optStrict = strict
@@ -702,8 +797,12 @@ main = do
, optDataDir = mbDataDir
, optBibliography = reffiles
, optCslFile = cslfile
+ , optAbbrevsFile = cslabbrevs
, optCiteMethod = citeMethod
, optListings = listings
+ , optLaTeXEngine = latexEngine
+ , optSlideLevel = slideLevel
+ , optSetextHeaders = setextHeaders
, optAscii = ascii
} = opts
@@ -726,17 +825,35 @@ main = do
then "html"
else "markdown"
in defaultReaderName fallback sources
- else readerName
+ else readerName
let writerName' = if null writerName
then defaultWriterName outputFile
else writerName
+ let pdfOutput = map toLower (takeExtension outputFile) == ".pdf"
+
+ when pdfOutput $ do
+ -- make sure writer is latex or beamer
+ unless (writerName' == "latex" || writerName' == "beamer" ||
+ writerName' == "latex+lhs") $
+ err 47 $ "cannot produce pdf output with " ++ writerName' ++ " writer"
+ -- check for latex program
+ mbLatex <- findExecutable latexEngine
+ case mbLatex of
+ Nothing -> err 41 $
+ latexEngine ++ " not found. " ++
+ latexEngine ++ " is needed for pdf output."
+ Just _ -> return ()
+
reader <- case (lookup readerName' readers) of
Just r -> return r
- Nothing -> error ("Unknown reader: " ++ readerName')
+ Nothing -> err 7 ("Unknown reader: " ++ readerName')
+
+ let standalone' = standalone || writerName' `elem` nonTextFormats || pdfOutput
templ <- case templatePath of
+ _ | not standalone' -> return ""
Nothing -> do
deftemp <- getDefaultTemplate datadir writerName'
case deftemp of
@@ -756,57 +873,57 @@ main = do
(\_ -> throwIO e)
else throwIO e)
- let standalone' = standalone || isNonTextOutput writerName'
-
- variables' <- case (writerName', standalone', offline) of
- ("s5", True, True) -> do
- inc <- s5HeaderIncludes datadir
- return $ ("s5includes", inc) : variables
- ("slidy", True, True) -> do
- slidyJs <- readDataFile datadir $
- "slidy" </> "slidy.min.js"
- slidyCss <- readDataFile datadir $
- "slidy" </> "slidy.css"
- return $ ("slidy-js", slidyJs) :
- ("slidy-css", slidyCss) : variables
- _ -> return variables
+ let slideVariant = case writerName' of
+ "s5" -> S5Slides
+ "slidy" -> SlidySlides
+ "dzslides" -> DZSlides
+ _ -> NoSlides
- variables'' <- case mathMethod of
+ variables' <- case mathMethod of
LaTeXMathML Nothing -> do
s <- readDataFile datadir $ "data" </> "LaTeXMathML.js"
- return $ ("mathml-script", s) : variables'
+ return $ ("mathml-script", s) : variables
MathML Nothing -> do
s <- readDataFile datadir $ "data"</>"MathMLinHTML.js"
- return $ ("mathml-script", s) : variables'
- _ -> return variables'
+ return $ ("mathml-script", s) : variables
+ _ -> return variables
+
+ variables'' <- case slideVariant of
+ DZSlides -> do
+ dztempl <- readDataFile datadir $ "dzslides" </> "template.html"
+ let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core")
+ $ lines dztempl
+ return $ ("dzslides-core", dzcore) : variables'
+ _ -> return variables'
- refs <- mapM (\f -> catch (readBiblioFile f) $ \e -> do
- UTF8.hPutStrLn stderr $ "Error reading bibliography `" ++ f ++ "'"
- UTF8.hPutStrLn stderr $ show e
- exitWith (ExitFailure 23)) reffiles >>= \rs -> return $ concat rs
+ -- unescape reference ids, which may contain XML entities, so
+ -- that we can do lookups with regular string equality
+ let unescapeRefId ref = ref{ refId = fromEntities (refId ref) }
+
+ refs <- mapM (\f -> catch (CSL.readBiblioFile f) $ \e ->
+ err 23 $ "Error reading bibliography `" ++ f ++ "'" ++ "\n" ++ show e)
+ reffiles >>=
+ return . map unescapeRefId . concat
let sourceDir = if null sources
then "."
else takeDirectory (head sources)
- let slideVariant = case writerName' of
- "s5" -> S5Slides
- "slidy" -> SlidySlides
- _ -> NoSlides
-
let startParserState =
defaultParserState { stateParseRaw = parseRaw,
stateTabStop = tabStop,
stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' ||
lhsExtension sources,
stateStandalone = standalone',
- stateCitations = map refId refs,
+ stateCitations = map CSL.refId refs,
stateSmart = smart || writerName' `elem`
- ["latex", "context", "latex+lhs", "man"],
+ ["latex", "context", "latex+lhs", "beamer"],
+ stateOldDashes = oldDashes,
stateColumns = columns,
stateStrict = strict,
stateIndentedCodeClasses = codeBlockClasses,
- stateApplyMacros = writerName' `notElem` ["latex", "latex+lhs"] }
+ stateApplyMacros = writerName' `notElem`
+ ["latex", "latex+lhs", "beamer"] }
let writerOptions = defaultWriterOptions
{ writerStandalone = standalone',
@@ -836,16 +953,20 @@ main = do
writerIdentifierPrefix = idPrefix,
writerSourceDirectory = sourceDir,
writerUserDataDir = datadir,
- writerHtml5 = html5 &&
- "html" `isPrefixOf` writerName',
- writerChapters = chapters,
+ writerHtml5 = html5 ||
+ slideVariant == DZSlides,
+ writerChapters = chapters,
writerListings = listings,
- writerAscii = ascii }
+ writerBeamer = writerName' == "beamer",
+ writerSlideLevel = slideLevel,
+ writerHighlight = highlight,
+ writerHighlightStyle = highlightStyle,
+ writerSetextHeaders = setextHeaders
+ }
- when (isNonTextOutput writerName' && outputFile == "-") $
- do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
- "Specify an output file using the -o option.")
- exitWith $ ExitFailure 5
+ when (writerName' `elem` nonTextFormats&& outputFile == "-") $
+ err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++
+ "Specify an output file using the -o option."
let readSources [] = mapM readSource ["-"]
readSources srcs = mapM readSource srcs
@@ -859,7 +980,14 @@ main = do
let convertTabs = tabFilter (if preserveTabs then 0 else tabStop)
- doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources)
+ let handleIncludes' = if readerName' == "latex" || readerName' == "beamer" ||
+ readerName' == "latex+lhs" ||
+ readerName' == "context"
+ then handleIncludes
+ else return
+
+ doc <- (reader startParserState) `fmap` (readSources sources >>=
+ handleIncludes' . convertTabs . intercalate "\n")
let doc0 = foldr ($) doc transforms
@@ -881,19 +1009,41 @@ main = do
replaceDirectory
(replaceExtension cslfile "csl")
csldir
- processBiblio cslfile' refs doc1
+ processBiblio cslfile' cslabbrevs refs doc1
else return doc1
+ let writeBinary :: B.ByteString -> IO ()
+ writeBinary = B.writeFile (encodeString outputFile)
+
+ let writerFn :: FilePath -> String -> IO ()
+ writerFn "-" = UTF8.putStr
+ writerFn f = UTF8.writeFile f
+
case lookup writerName' writers of
- Nothing | writerName' == "epub" ->
- writeEPUB epubStylesheet writerOptions doc2
- >>= B.writeFile (encodeString outputFile)
- Nothing | writerName' == "odt" ->
- writeODT referenceODT writerOptions doc2
- >>= B.writeFile (encodeString outputFile)
- Just r -> writerFn outputFile result
- where writerFn "-" = UTF8.putStr
- writerFn f = UTF8.writeFile f
- result = r writerOptions doc2 ++
- ['\n' | not standalone']
- Nothing -> error $ "Unknown writer: " ++ writerName'
+ Nothing
+ | writerName' == "epub" ->
+ writeEPUB epubStylesheet epubFonts writerOptions doc2
+ >>= writeBinary
+ | writerName' == "odt" ->
+ writeODT referenceODT writerOptions doc2 >>= writeBinary
+ | writerName' == "docx" ->
+ writeDocx referenceDocx writerOptions doc2 >>= writeBinary
+ | otherwise -> err 9 ("Unknown writer: " ++ writerName')
+ Just _
+ | pdfOutput -> do
+ res <- tex2pdf latexEngine $ writeLaTeX writerOptions doc2
+ case res of
+ Right pdf -> writeBinary pdf
+ Left err' -> err 43 $ toString err'
+ Just r
+ | htmlFormat && ascii ->
+ writerFn outputFile =<< selfcontain (toEntities result)
+ | otherwise ->
+ writerFn outputFile =<< selfcontain result
+ where result = r writerOptions doc2 ++ ['\n' | not standalone']
+ htmlFormat = writerName' `elem`
+ ["html","html+lhs","html5","html5+lhs",
+ "s5","slidy","dzslides"]
+ selfcontain = if selfContained && htmlFormat
+ then makeSelfContained datadir
+ else return