diff options
Diffstat (limited to 'src')
49 files changed, 4495 insertions, 2612 deletions
diff --git a/src/Tests/Arbitrary.hs b/src/Tests/Arbitrary.hs index f5a44ceab..9d65e1f1f 100644 --- a/src/Tests/Arbitrary.hs +++ b/src/Tests/Arbitrary.hs @@ -1,5 +1,5 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} -{-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, ScopedTypeVariables #-} -- provides Arbitrary instance for Pandoc types module Tests.Arbitrary () where @@ -22,10 +22,10 @@ arbAttr = do return (id',classes,keyvals) instance Arbitrary Inlines where - arbitrary = liftM fromList arbitrary + arbitrary = liftM (fromList :: [Inline] -> Inlines) arbitrary instance Arbitrary Blocks where - arbitrary = liftM fromList arbitrary + arbitrary = liftM (fromList :: [Block] -> Blocks) arbitrary instance Arbitrary Inline where arbitrary = resize 3 $ arbInline 2 @@ -41,10 +41,6 @@ arbInline :: Int -> Gen Inline arbInline n = frequency $ [ (60, liftM Str realString) , (60, return Space) , (10, liftM2 Code arbAttr realString) - , (5, return EmDash) - , (5, return EnDash) - , (5, return Apostrophe) - , (5, return Ellipses) , (5, elements [ RawInline "html" "<a id=\"eek\">" , RawInline "latex" "\\my{command}" ]) ] ++ [ x | x <- nesters, n > 1] diff --git a/src/Tests/Helpers.hs b/src/Tests/Helpers.hs index b8d6b83a7..66879efed 100644 --- a/src/Tests/Helpers.hs +++ b/src/Tests/Helpers.hs @@ -69,17 +69,15 @@ test fn name (input, expected) = vividize :: (DI,String) -> String vividize (B,s) = s -vividize (_,s) = vivid s +vividize (F,s) = s +vividize (S,s) = setSGRCode [SetColor Background Dull Red + , SetColor Foreground Vivid White] ++ s + ++ setSGRCode [Reset] property :: QP.Testable a => TestName -> a -> Test property = testProperty -vivid :: String -> String -vivid s = setSGRCode [SetColor Background Dull Red - , SetColor Foreground Vivid White] ++ s - ++ setSGRCode [Reset] - -infix 6 =?> +infix 5 =?> (=?>) :: a -> b -> (a,b) x =?> y = (x, y) diff --git a/src/Tests/Old.hs b/src/Tests/Old.hs index cb1417ffa..1ec32a30d 100644 --- a/src/Tests/Old.hs +++ b/src/Tests/Old.hs @@ -13,7 +13,6 @@ import Data.Algorithm.Diff import Text.Pandoc.Shared ( normalize, defaultWriterOptions ) import Text.Pandoc.Writers.Native ( writeNative ) import Text.Pandoc.Readers.Native ( readNative ) -import Text.Pandoc.Highlighting ( languages ) import Prelude hiding ( readFile ) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 (toString) @@ -105,7 +104,7 @@ tests = [ testGroup "markdown" ] , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f) [ "docbook", "opendocument" , "context" , "texinfo" - , "man" , "plain" , "mediawiki", "rtf", "org" + , "man" , "plain" , "mediawiki", "rtf", "org", "asciidoc" ] ] @@ -121,10 +120,7 @@ lhsWriterTests format ] where t n f = test n ["--columns=78", "-r", "native", "-s", "-w", f] - "lhs-test.native" ("lhs-test" <.> ext f) - ext f = if null languages && format == "html" - then "nohl" <.> f - else f + "lhs-test.native" ("lhs-test" <.> f) lhsReaderTest :: String -> Test lhsReaderTest format = diff --git a/src/Tests/Readers/LaTeX.hs b/src/Tests/Readers/LaTeX.hs index 6d28441f8..d60026b20 100644 --- a/src/Tests/Readers/LaTeX.hs +++ b/src/Tests/Readers/LaTeX.hs @@ -11,7 +11,7 @@ import Text.Pandoc latex :: String -> Pandoc latex = readLaTeX defaultParserState -infix 5 =: +infix 4 =: (=:) :: ToString c => String -> (String, c) -> Test (=:) = test latex @@ -35,10 +35,15 @@ tests = [ testGroup "basic" "\\subsubsection{header}" =?> header 3 "header" , "emph" =: "\\section{text \\emph{emph}}" =?> - header 1 ("text" +++ space +++ emph "emph") + header 1 ("text" <> space <> emph "emph") , "link" =: "\\section{text \\href{/url}{link}}" =?> - header 1 ("text" +++ space +++ link "/url" "" "link") + header 1 ("text" <> space <> link "/url" "" "link") + ] + + , testGroup "math" + [ "escaped $" =: + "$x=\\$4$" =?> para (math "x=\\$4") ] , testGroup "space and comments" @@ -64,98 +69,101 @@ baseCitation = Citation{ citationId = "item1" , citationNoteNum = 0 , citationHash = 0 } +rt :: String -> Inlines +rt = rawInline "latex" + natbibCitations :: Test natbibCitations = testGroup "natbib" [ "citet" =: "\\citet{item1}" - =?> para (cite [baseCitation] empty) + =?> para (cite [baseCitation] (rt "\\citet{item1}")) , "suffix" =: "\\citet[p.~30]{item1}" =?> para - (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty) + (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\citet[p.~30]{item1}")) , "suffix long" =: "\\citet[p.~30, with suffix]{item1}" =?> para (cite [baseCitation{ citationSuffix = - toList $ text "p.\160\&30, with suffix" }] empty) + toList $ text ", p.\160\&30, with suffix" }] (rt "\\citet[p.~30, with suffix]{item1}")) , "multiple" =: "\\citeauthor{item1} \\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}" =?> para (cite [baseCitation{ citationMode = AuthorInText } ,baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str "p.\160\&30"] + , citationSuffix = [Str ",",Space,Str "p.\160\&30"] , citationId = "item2" } ,baseCitation{ citationId = "item3" , citationPrefix = [Str "see",Space,Str "also"] , citationMode = NormalCitation } - ] empty) + ] (rt "\\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}")) , "group" =: "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Str "see"] - , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] } + , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } ,baseCitation{ citationMode = NormalCitation , citationId = "item3" , citationPrefix = [Str "also"] - , citationSuffix = [Str "chap.",Space,Str "3"] } - ] empty) + , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } + ] (rt "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}")) , "suffix and locator" =: "\\citep[pp.~33, 35--37, and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty) + , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\citep[pp.~33, 35--37, and nowhere else]{item1}")) , "suffix only" =: "\\citep[and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text "and nowhere else" }] empty) + , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\citep[and nowhere else]{item1}")) , "no author" =: "\\citeyearpar{item1}, and now Doe with a locator \\citeyearpar[p.~44]{item2}" - =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++ - text ", and now Doe with a locator " +++ + =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\citeyearpar{item1}") <> + text ", and now Doe with a locator " <> cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str "p.\160\&44"] - , citationId = "item2" }] empty) + , citationSuffix = [Str ",",Space,Str "p.\160\&44"] + , citationId = "item2" }] (rt "\\citeyearpar[p.~44]{item2}")) , "markup" =: "\\citep[\\emph{see}][p. \\textbf{32}]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str "p.",Space, - Strong [Str "32"]] }] empty) + , citationSuffix = [Str ",",Space,Str "p.",Space, + Strong [Str "32"]] }] (rt "\\citep[\\emph{see}][p. \\textbf{32}]{item1}")) ] biblatexCitations :: Test biblatexCitations = testGroup "biblatex" [ "textcite" =: "\\textcite{item1}" - =?> para (cite [baseCitation] empty) + =?> para (cite [baseCitation] (rt "\\textcite{item1}")) , "suffix" =: "\\textcite[p.~30]{item1}" =?> para - (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty) + (cite [baseCitation{ citationSuffix = toList $ text ", p.\160\&30" }] (rt "\\textcite[p.~30]{item1}")) , "suffix long" =: "\\textcite[p.~30, with suffix]{item1}" =?> para (cite [baseCitation{ citationSuffix = - toList $ text "p.\160\&30, with suffix" }] empty) + toList $ text ", p.\160\&30, with suffix" }] (rt "\\textcite[p.~30, with suffix]{item1}")) , "multiple" =: "\\textcites{item1}[p.~30]{item2}[see also][]{item3}" =?> para (cite [baseCitation{ citationMode = AuthorInText } ,baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str "p.\160\&30"] + , citationSuffix = [Str ",",Space,Str "p.\160\&30"] , citationId = "item2" } ,baseCitation{ citationId = "item3" , citationPrefix = [Str "see",Space,Str "also"] , citationMode = NormalCitation } - ] empty) + ] (rt "\\textcites{item1}[p.~30]{item2}[see also][]{item3}")) , "group" =: "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Str "see"] - , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] } + , citationSuffix = [Str ",",Space,Str "p.\160\&34\8211\&35"] } ,baseCitation{ citationMode = NormalCitation , citationId = "item3" , citationPrefix = [Str "also"] - , citationSuffix = [Str "chap.",Space,Str "3"] } - ] empty) + , citationSuffix = [Str ",",Space,Str "chap.",Space,Str "3"] } + ] (rt "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}")) , "suffix and locator" =: "\\autocite[pp.~33, 35--37, and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty) + , citationSuffix = [Str ",",Space,Str "pp.\160\&33,",Space,Str "35\8211\&37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] (rt "\\autocite[pp.~33, 35--37, and nowhere else]{item1}")) , "suffix only" =: "\\autocite[and nowhere else]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation - , citationSuffix = toList $ text "and nowhere else" }] empty) + , citationSuffix = toList $ text ", and nowhere else" }] (rt "\\autocite[and nowhere else]{item1}")) , "no author" =: "\\autocite*{item1}, and now Doe with a locator \\autocite*[p.~44]{item2}" - =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++ - text ", and now Doe with a locator " +++ + =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] (rt "\\autocite*{item1}") <> + text ", and now Doe with a locator " <> cite [baseCitation{ citationMode = SuppressAuthor - , citationSuffix = [Str "p.\160\&44"] - , citationId = "item2" }] empty) + , citationSuffix = [Str ",",Space,Str "p.\160\&44"] + , citationId = "item2" }] (rt "\\autocite*[p.~44]{item2}")) , "markup" =: "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}" =?> para (cite [baseCitation{ citationMode = NormalCitation , citationPrefix = [Emph [Str "see"]] - , citationSuffix = [Str "p.",Space, - Strong [Str "32"]] }] empty) + , citationSuffix = [Str ",",Space,Str "p.",Space, + Strong [Str "32"]] }] (rt "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}")) , "parencite" =: "\\parencite{item1}" - =?> para (cite [baseCitation{ citationMode = NormalCitation }] empty) + =?> para (cite [baseCitation{ citationMode = NormalCitation }] (rt "\\parencite{item1}")) ] diff --git a/src/Tests/Readers/Markdown.hs b/src/Tests/Readers/Markdown.hs index 941762bd0..5ad974adf 100644 --- a/src/Tests/Readers/Markdown.hs +++ b/src/Tests/Readers/Markdown.hs @@ -8,7 +8,6 @@ import Tests.Arbitrary() import Text.Pandoc.Builder -- import Text.Pandoc.Shared ( normalize ) import Text.Pandoc -import Data.Sequence (singleton) markdown :: String -> Pandoc markdown = readMarkdown defaultParserState{ stateStandalone = True } @@ -16,7 +15,7 @@ markdown = readMarkdown defaultParserState{ stateStandalone = True } markdownSmart :: String -> Pandoc markdownSmart = readMarkdown defaultParserState{ stateSmart = True } -infix 5 =: +infix 4 =: (=:) :: ToString c => String -> (String, c) -> Test (=:) = test markdown @@ -44,29 +43,49 @@ tests = [ testGroup "inline code" "`*` {.haskell .special x=\"7\"}" =?> para (codeWith ("",["haskell","special"],[("x","7")]) "*") ] + , testGroup "backslash escapes" + [ "in URL" =: + "[hi](/there\\))" + =?> para (link "/there)" "" "hi") + , "in title" =: + "[hi](/there \"a\\\"a\")" + =?> para (link "/there" "a\"a" "hi") + , "in reference link title" =: + "[hi]\n\n[hi]: /there (a\\)a)" + =?> para (link "/there" "a)a" "hi") + , "in reference link URL" =: + "[hi]\n\n[hi]: /there\\.0" + =?> para (link "/there.0" "" "hi") + ] , testGroup "smart punctuation" [ test markdownSmart "quote before ellipses" ("'...hi'" - =?> para (singleQuoted (singleton Ellipses +++ "hi"))) + =?> para (singleQuoted ("…hi"))) + , test markdownSmart "apostrophe before emph" + ("D'oh! A l'*aide*!" + =?> para ("D’oh! A l’" <> emph "aide" <> "!")) + , test markdownSmart "apostrophe in French" + ("À l'arrivée de la guerre, le thème de l'«impossibilité du socialisme»" + =?> para ("À l’arrivée de la guerre, le thème de l’«impossibilité du socialisme»")) ] , testGroup "mixed emphasis and strong" [ "emph and strong emph alternating" =: "*xxx* ***xxx*** xxx\n*xxx* ***xxx*** xxx" - =?> para (emph "xxx" +++ space +++ strong (emph "xxx") +++ - space +++ "xxx" +++ space +++ - emph "xxx" +++ space +++ strong (emph "xxx") +++ - space +++ "xxx") + =?> para (emph "xxx" <> space <> strong (emph "xxx") <> + space <> "xxx" <> space <> + emph "xxx" <> space <> strong (emph "xxx") <> + space <> "xxx") , "emph with spaced strong" =: "*x **xx** x*" - =?> para (emph ("x" +++ space +++ strong "xx" +++ space +++ "x")) + =?> para (emph ("x" <> space <> strong "xx" <> space <> "x")) ] , testGroup "footnotes" [ "indent followed by newline and flush-left text" =: "[^1]\n\n[^1]: my note\n\n \nnot in note\n" - =?> para (note (para "my note")) +++ para "not in note" + =?> para (note (para "my note")) <> para "not in note" , "indent followed by newline and indented text" =: "[^1]\n\n[^1]: my note\n \n in note\n" - =?> para (note (para "my note" +++ para "in note")) + =?> para (note (para "my note" <> para "in note")) , "recursive note" =: "[^1]\n\n[^1]: See [^1]\n" =?> para (note (para "See [^1]")) @@ -76,9 +95,9 @@ tests = [ testGroup "inline code" "inverse bird tracks and html" $ "> a\n\n< b\n\n<div>\n" =?> codeBlockWith ("",["sourceCode","literate","haskell"],[]) "a" - +++ + <> codeBlockWith ("",["sourceCode","haskell"],[]) "b" - +++ + <> rawBlock "html" "<div>\n\n" ] -- the round-trip properties frequently fail diff --git a/src/Tests/Readers/RST.hs b/src/Tests/Readers/RST.hs index 4b8c9301b..3269092a6 100644 --- a/src/Tests/Readers/RST.hs +++ b/src/Tests/Readers/RST.hs @@ -11,15 +11,15 @@ import Text.Pandoc rst :: String -> Pandoc rst = readRST defaultParserState{ stateStandalone = True } -infix 5 =: +infix 4 =: (=:) :: ToString c => String -> (String, c) -> Test (=:) = test rst tests :: [Test] tests = [ "line block with blank line" =: - "| a\n|\n| b" =?> para (str "a" +++ linebreak +++ - linebreak +++ str " " +++ str "b") + "| a\n|\n| b" =?> para (str "a" <> linebreak <> + linebreak <> str " " <> str "b") , "field list" =: [_LIT| :Hostname: media08 @@ -51,10 +51,10 @@ tests = [ "line block with blank line" =: , "URLs with following punctuation" =: ("http://google.com, http://yahoo.com; http://foo.bar.baz.\n" ++ "http://foo.bar/baz_(bam) (http://foo.bar)") =?> - para (link "http://google.com" "" "http://google.com" +++ ", " +++ - link "http://yahoo.com" "" "http://yahoo.com" +++ "; " +++ - link "http://foo.bar.baz" "" "http://foo.bar.baz" +++ ". " +++ + para (link "http://google.com" "" "http://google.com" <> ", " <> + link "http://yahoo.com" "" "http://yahoo.com" <> "; " <> + link "http://foo.bar.baz" "" "http://foo.bar.baz" <> ". " <> link "http://foo.bar/baz_(bam)" "" "http://foo.bar/baz_(bam)" - +++ " (" +++ link "http://foo.bar" "" "http://foo.bar" +++ ")") + <> " (" <> link "http://foo.bar" "" "http://foo.bar" <> ")") ] diff --git a/src/Tests/Writers/ConTeXt.hs b/src/Tests/Writers/ConTeXt.hs index 704571e95..506ff698f 100644 --- a/src/Tests/Writers/ConTeXt.hs +++ b/src/Tests/Writers/ConTeXt.hs @@ -26,7 +26,7 @@ which is in turn shorthand for test context "my test" (X,Y) -} -infix 5 =: +infix 4 =: (=:) :: (ToString a, ToPandoc a) => String -> (a, String) -> Test (=:) = test context @@ -43,9 +43,7 @@ tests = [ testGroup "inline code" ] , testGroup "headers" [ "level 1" =: - header 1 "My header" =?> "\\subject{My header}" - , property "header 1 property" $ \ils -> - context' (header 1 ils) == "\\subject{" ++ context' ils ++ "}" + header 1 "My header" =?> "\\section[my-header]{My header}" ] , testGroup "bullet lists" [ "nested" =: diff --git a/src/Tests/Writers/HTML.hs b/src/Tests/Writers/HTML.hs index 3e1e0ddc2..8561aa421 100644 --- a/src/Tests/Writers/HTML.hs +++ b/src/Tests/Writers/HTML.hs @@ -23,7 +23,7 @@ which is in turn shorthand for test html "my test" (X,Y) -} -infix 5 =: +infix 4 =: (=:) :: (ToString a, ToPandoc a) => String -> (a, String) -> Test (=:) = test html @@ -40,7 +40,7 @@ tests = [ testGroup "inline code" ] , testGroup "images" [ "alt with formatting" =: - image "/url" "title" ("my " +++ emph "image") + image "/url" "title" ("my " <> emph "image") =?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />" ] ] diff --git a/src/Tests/Writers/Markdown.hs b/src/Tests/Writers/Markdown.hs index 70266a683..d90dc83b1 100644 --- a/src/Tests/Writers/Markdown.hs +++ b/src/Tests/Writers/Markdown.hs @@ -22,13 +22,13 @@ which is in turn shorthand for test markdown "my test" (X,Y) -} -infix 5 =: +infix 4 =: (=:) :: (ToString a, ToPandoc a) => String -> (a, String) -> Test (=:) = test markdown tests :: [Test] tests = [ "indented code after list" - =: (orderedList [ para "one" +++ para "two" ] +++ codeBlock "test") + =: (orderedList [ para "one" <> para "two" ] <> codeBlock "test") =?> "1. one\n\n two\n\n<!-- -->\n\n test" ] diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index ef8560284..878f0e0dd 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables, FlexibleInstances #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -94,8 +95,10 @@ module Text.Pandoc , writeTextile , writeRTF , writeODT + , writeDocx , writeEPUB , writeOrg + , writeAsciiDoc -- * Writer options used in writers , WriterOptions (..) , HTMLSlideVariant (..) @@ -109,6 +112,7 @@ module Text.Pandoc -- * Miscellaneous , rtfEmbedImage , jsonFilter + , ToJsonFilter(..) ) where import Text.Pandoc.Definition @@ -127,6 +131,7 @@ import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Texinfo import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.ODT +import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.EPUB import Text.Pandoc.Writers.Docbook import Text.Pandoc.Writers.OpenDocument @@ -135,6 +140,7 @@ import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.Org +import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Templates import Text.Pandoc.Parsing import Text.Pandoc.Shared @@ -164,20 +170,28 @@ readers = [("native" , \_ -> readNative) ] -- | Association list of formats and writers (omitting the --- binary writers, odt and epub). +-- binary writers, odt, docx, and epub). writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] writers = [("native" , writeNative) ,("json" , \_ -> encodeJSON) ,("html" , writeHtmlString) + ,("html5" , \o -> + writeHtmlString o{ writerHtml5 = True }) ,("html+lhs" , \o -> writeHtmlString o{ writerLiterateHaskell = True }) + ,("html5+lhs" , \o -> + writeHtmlString o{ writerLiterateHaskell = True, + writerHtml5 = True }) ,("s5" , writeHtmlString) ,("slidy" , writeHtmlString) + ,("dzslides" , writeHtmlString) ,("docbook" , writeDocbook) ,("opendocument" , writeOpenDocument) ,("latex" , writeLaTeX) ,("latex+lhs" , \o -> writeLaTeX o{ writerLiterateHaskell = True }) + ,("beamer" , \o -> + writeLaTeX o{ writerBeamer = True }) ,("context" , writeConTeXt) ,("texinfo" , writeTexinfo) ,("man" , writeMan) @@ -192,10 +206,55 @@ writers = [("native" , writeNative) ,("textile" , writeTextile) ,("rtf" , writeRTF) ,("org" , writeOrg) + ,("asciidoc" , writeAsciiDoc) ] +{-# DEPRECATED jsonFilter "Use toJsonFilter instead" #-} -- | Converts a transformation on the Pandoc AST into a function -- that reads and writes a JSON-encoded string. This is useful -- for writing small scripts. jsonFilter :: (Pandoc -> Pandoc) -> String -> String jsonFilter f = encodeJSON . f . decodeJSON + +-- | 'toJsonFilter' convert a function into a filter that reads pandoc's json output +-- from stdin, transforms it by walking the AST and applying the specified +-- function, and writes the result as json to stdout. Usage example: +-- +-- > -- capitalize.hs +-- > -- compile with: ghc --make capitalize +-- > -- run with: pandoc -t json | ./capitalize | pandoc -f json +-- > +-- > import Text.Pandoc +-- > import Data.Char (toUpper) +-- > +-- > main :: IO () +-- > main = toJsonFilter capitalizeStrings +-- > +-- > capitalizeStrings :: Inline -> Inline +-- > capitalizeStrings (Str s) = Str $ map toUpper s +-- > capitalizeStrings x = x +-- +-- The function can be any type @(a -> a)@, @(a -> IO a)@, @(a -> [a])@, +-- or @(a -> IO [a])@, where @a@ is an instance of 'Data'. +-- So, for example, @a@ can be 'Pandoc', 'Inline', 'Block', ['Inline'], +-- ['Block'], 'Meta', 'ListNumberStyle', 'Alignment', 'ListNumberDelim', +-- 'QuoteType', etc. See 'Text.Pandoc.Definition'. +class ToJsonFilter a where + toJsonFilter :: a -> IO () + +instance (Data a) => ToJsonFilter (a -> a) where + toJsonFilter f = getContents + >>= putStr . encodeJSON . (bottomUp f :: Pandoc -> Pandoc) . decodeJSON + +instance (Data a) => ToJsonFilter (a -> IO a) where + toJsonFilter f = getContents >>= (bottomUpM f :: Pandoc -> IO Pandoc) . decodeJSON + >>= putStr . encodeJSON + +instance (Data a) => ToJsonFilter (a -> [a]) where + toJsonFilter f = getContents + >>= putStr . encodeJSON . (bottomUp (concatMap f) :: Pandoc -> Pandoc) . decodeJSON + +instance (Data a) => ToJsonFilter (a -> IO [a]) where + toJsonFilter f = getContents + >>= (bottomUpM (fmap concat . mapM f) :: Pandoc -> IO Pandoc) . decodeJSON + >>= putStr . encodeJSON diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index d65c9de1c..c8e87b2a0 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -31,7 +31,7 @@ module Text.Pandoc.Biblio ( processBiblio ) where import Data.List import Data.Unique -import Data.Char ( isDigit, isPunctuation ) +import Data.Char ( isDigit ) import qualified Data.Map as M import Text.CSL hiding ( Cite(..), Citation(..) ) import qualified Text.CSL as CSL ( Cite(..) ) @@ -43,11 +43,15 @@ import Control.Monad -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style, using 'citeproc' from citeproc-hs. -processBiblio :: FilePath -> [Reference] -> Pandoc -> IO Pandoc -processBiblio cslfile r p +processBiblio :: FilePath -> Maybe FilePath -> [Reference] -> Pandoc + -> IO Pandoc +processBiblio cslfile abrfile r p = if null r then return p else do csl <- readCSLFile cslfile + abbrevs <- case abrfile of + Just f -> readJsonAbbrevFile f + Nothing -> return [] p' <- bottomUpM setHash p let (nts,grps) = if styleClass csl == "note" then let cits = queryWith getCite p' @@ -55,29 +59,36 @@ processBiblio cslfile r p needNt = cits \\ concat ncits in (,) needNt $ getNoteCitations needNt p' else (,) [] $ queryWith getCitation p' - result = citeproc procOpts csl r (setNearNote csl $ + style = csl { styleAbbrevs = abbrevs } + result = citeproc procOpts style r (setNearNote style $ map (map toCslCite) grps) cits_map = M.fromList $ zip grps (citations result) - biblioList = map (renderPandoc' csl) (bibliography result) - Pandoc m b = bottomUp (procInlines $ processCite csl cits_map) p' + biblioList = map (renderPandoc' style) (bibliography result) + Pandoc m b = bottomUp (procInlines $ processCite style cits_map) p' return . generateNotes nts . Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline] +processCite s cs (Cite t _ : rest) = + case M.lookup t cs of + Just (x:xs) -> + if isTextualCitation t + then renderPandoc s [x] ++ + if null xs + then processCite s cs rest + else [Space, Cite t (renderPandoc s xs)] + ++ processCite s cs rest + else Cite t (renderPandoc s (x:xs)) : processCite s cs rest + _ -> Str ("Error processing " ++ show t) : processCite s cs rest +processCite s cs (x:xs) = x : processCite s cs xs processCite _ _ [] = [] -processCite s cs (i:is) - | Cite t _ <- i = process t ++ processCite s cs is - | otherwise = i : processCite s cs is - where - addNt t x = if null x then [] else [Cite t $ renderPandoc s x] - process t = case M.lookup t cs of - Just x -> if isTextualCitation t && x /= [] - then renderPandoc s [head x] ++ - if tail x /= [] - then Space : addNt t (tail x) - else [] - else [Cite t $ renderPandoc s x] - Nothing -> [Str ("Error processing " ++ show t)] + +procInlines :: ([Inline] -> [Inline]) -> Block -> Block +procInlines f b + | Plain inls <- b = Plain $ f inls + | Para inls <- b = Para $ f inls + | Header i inls <- b = Header i $ f inls + | otherwise = b isTextualCitation :: [Citation] -> Bool isTextualCitation (c:_) = citationMode c == AuthorInText @@ -112,13 +123,6 @@ setHash (Citation i p s cm nn _) generateNotes :: [Inline] -> Pandoc -> Pandoc generateNotes needNote = bottomUp (mvCiteInNote needNote) -procInlines :: ([Inline] -> [Inline]) -> Block -> Block -procInlines f b - | Plain inls <- b = Plain $ f inls - | Para inls <- b = Para $ f inls - | Header i inls <- b = Header i $ f inls - | otherwise = b - mvCiteInNote :: [Inline] -> Block -> Block mvCiteInNote is = procInlines mvCite where @@ -143,9 +147,8 @@ mvCiteInNote is = procInlines mvCite | otherwise = toCapital (i ++ [Str "."]) checkPt i - | Cite c o : xs <- i - , endWithPunct o, startWithPunct xs - , endWithPunct o = Cite c (initInline o) : checkPt xs + | Cite c o : xs <- i , endWithPunct o, startWithPunct xs + = Cite c (initInline o) : checkPt xs | x:xs <- i = x : checkPt xs | otherwise = [] checkNt = bottomUp $ procInlines checkPt @@ -165,13 +168,9 @@ toCslCite c AuthorInText -> (True, False) SuppressAuthor -> (False,True ) NormalCitation -> (False,False) - s' = case s of - [] -> [] - (Str (y:_) : _) | isPunctuation y -> s - _ -> Str "," : Space : s in emptyCite { CSL.citeId = citationId c , CSL.citePrefix = PandocText $ citationPrefix c - , CSL.citeSuffix = PandocText $ s' + , CSL.citeSuffix = PandocText $ s , CSL.citeLabel = la , CSL.citeLocator = lo , CSL.citeNoteNumber = show $ citationNoteNum c @@ -182,9 +181,13 @@ toCslCite c locatorWords :: [Inline] -> (String, [Inline]) locatorWords inp = - case parse pLocatorWords "suffix" inp of + case parse pLocatorWords "suffix" $ breakup inp of Right r -> r Left _ -> ("",inp) + where breakup [] = [] + breakup (Str x : xs) = map Str (splitup x) ++ breakup xs + breakup (x : xs) = x : breakup xs + splitup = groupBy (\x y -> x /= '\160' && y /= '\160') pLocatorWords :: GenParser Inline st (String, [Inline]) pLocatorWords = do @@ -201,7 +204,7 @@ pMatch condition = try $ do return t pSpace :: GenParser Inline st Inline -pSpace = pMatch (== Space) +pSpace = pMatch (\t -> t == Space || t == Str "\160") pLocator :: GenParser Inline st String pLocator = try $ do diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs deleted file mode 100644 index 8157d94d3..000000000 --- a/src/Text/Pandoc/CharacterReferences.hs +++ /dev/null @@ -1,72 +0,0 @@ -{- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.CharacterReferences - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for parsing character references. --} -module Text.Pandoc.CharacterReferences ( - characterReference, - decodeCharacterReferences, - ) where -import Text.ParserCombinators.Parsec -import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity ) -import Data.Maybe ( fromMaybe ) - --- | Parse character entity. -characterReference :: GenParser Char st Char -characterReference = try $ do - char '&' - character <- numRef <|> entity - char ';' - return character - -numRef :: GenParser Char st Char -numRef = do - char '#' - num <- hexNum <|> decNum - return $ fromMaybe '?' $ lookupNumericEntity num - -hexNum :: GenParser Char st [Char] -hexNum = do - x <- oneOf "Xx" - num <- many1 hexDigit - return (x:num) - -decNum :: GenParser Char st [Char] -decNum = many1 digit - -entity :: GenParser Char st Char -entity = do - body <- many1 alphaNum - return $ fromMaybe '?' $ lookupNamedEntity body - --- | Convert entities in a string to characters. -decodeCharacterReferences :: String -> String -decodeCharacterReferences str = - case parse (many (characterReference <|> anyChar)) str str of - Left err -> error $ "\nError: " ++ show err - Right result -> result - diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 5ddaf1379..4fb799cf1 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -19,7 +19,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Highlighting Copyright : Copyright (C) 2008 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -28,47 +28,49 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Exports functions for syntax highlighting. -} -module Text.Pandoc.Highlighting ( languages, highlightHtml, defaultHighlightingCss, languagesByExtension ) where -import Text.XHtml +module Text.Pandoc.Highlighting ( languages + , languagesByExtension + , highlight + , formatLaTeXInline + , formatLaTeXBlock + , styleToLaTeX + , formatHtmlInline + , formatHtmlBlock + , styleToCss + , pygments + , espresso + , tango + , kate + , monochrome + , haddock + , Style + ) where import Text.Pandoc.Definition -#ifdef _HIGHLIGHTING -import Text.Highlighting.Kate ( languages, highlightAs, formatAsXHtml, FormatOption (..), defaultHighlightingCss, languagesByExtension ) +import Text.Highlighting.Kate import Data.List (find) import Data.Maybe (fromMaybe) import Data.Char (toLower) -highlightHtml :: Bool -- ^ True if inline HTML - -> Attr -- ^ Attributes of the Code or CodeBlock - -> String -- ^ Raw contents of the Code or CodeBlock - -> Either String Html -- ^ An error or the formatted Html -highlightHtml inline (_, classes, keyvals) rawCode = - let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals - fmtOpts = [OptNumberFrom firstNum] ++ - [OptInline | inline] ++ - case find (`elem` ["number","numberLines","number-lines"]) classes of - Nothing -> [] - Just _ -> [OptNumberLines] - addBirdTracks = "literate" `elem` classes - lcLanguages = map (map toLower) languages - in case find (\c -> (map toLower c) `elem` lcLanguages) classes of - Nothing -> Left "Unknown or unsupported language" - Just language -> case highlightAs language rawCode of - Left err -> Left err - Right hl -> Right $ formatAsXHtml fmtOpts language $ - if addBirdTracks - then map ((["Special"],"> "):) hl - else hl +lcLanguages :: [String] +lcLanguages = map (map toLower) languages -#else -defaultHighlightingCss :: String -defaultHighlightingCss = "" +highlight :: (FormatOptions -> [SourceLine] -> a) -- ^ Formatter + -> Attr -- ^ Attributes of the CodeBlock + -> String -- ^ Raw contents of the CodeBlock + -> Maybe a -- ^ Maybe the formatted result +highlight formatter (_, classes, keyvals) rawCode = + let firstNum = case reads (fromMaybe "1" $ lookup "startFrom" keyvals) of + ((n,_):_) -> n + [] -> 1 + fmtOpts = defaultFormatOpts{ + startNumber = firstNum, + numberLines = any (`elem` + ["number","numberLines", "number-lines"]) classes } + lcclasses = map (map toLower) classes + in case find (`elem` lcLanguages) lcclasses of + Nothing -> Nothing + Just language -> Just + $ formatter fmtOpts{ codeClasses = [language], + containerClasses = classes } + $ highlightAs language rawCode -languages :: [String] -languages = [] - -languagesByExtension :: String -> [String] -languagesByExtension _ = [] - -highlightHtml :: Bool -> Attr -> String -> Either String Html -highlightHtml _ _ _ = Left "Pandoc was not compiled with support for highlighting" -#endif diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs new file mode 100644 index 000000000..d48c6a5ae --- /dev/null +++ b/src/Text/Pandoc/ImageSize.hs @@ -0,0 +1,151 @@ +{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} +{- + Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., 59 + Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | +Module : Text.Pandoc.ImageSize +Copyright : Copyright (C) 2011 John MacFarlane +License : GNU GPL, version 2 or above + +Maintainer : John MacFarlane <jgm@berkeley.edu> +Stability : alpha +Portability : portable + +Functions for determining the size of a PNG, JPEG, or GIF image. +-} +module Text.Pandoc.ImageSize ( ImageType(..), imageType, imageSize, + sizeInPixels, sizeInPoints, readImageSize ) where +import Data.ByteString.Lazy (ByteString, unpack) +import qualified Data.ByteString.Lazy.Char8 as B +import Control.Monad +import Data.Bits + +-- quick and dirty functions to get image sizes +-- algorithms borrowed from wwwis.pl + +data ImageType = Png | Gif | Jpeg deriving Show + +data ImageSize = ImageSize{ + pxX :: Integer + , pxY :: Integer + , dpiX :: Integer + , dpiY :: Integer + } deriving (Read, Show, Eq) + + +readImageSize :: FilePath -> IO (Maybe ImageSize) +readImageSize fp = imageSize `fmap` B.readFile fp + +imageType :: ByteString -> Maybe ImageType +imageType img = case B.take 4 img of + "\x89\x50\x4e\x47" -> return Png + "\x47\x49\x46\x38" -> return Gif + "\xff\xd8\xff\xe0" -> return Jpeg + _ -> fail "Unknown image type" + +imageSize :: ByteString -> Maybe ImageSize +imageSize img = do + t <- imageType img + case t of + Png -> pngSize img + Gif -> gifSize img + Jpeg -> jpegSize img + +sizeInPixels :: ImageSize -> (Integer, Integer) +sizeInPixels s = (pxX s, pxY s) + +sizeInPoints :: ImageSize -> (Integer, Integer) +sizeInPoints s = (pxX s * 72 `div` dpiX s, pxY s * 72 `div` dpiY s) + +pngSize :: ByteString -> Maybe ImageSize +pngSize img = do + let (h, rest) = B.splitAt 8 img + guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" || + h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a" + let (i, rest') = B.splitAt 4 $ B.drop 4 rest + guard $ i == "MHDR" || i == "IHDR" + let (sizes, rest'') = B.splitAt 8 rest' + (x,y) <- case map fromIntegral $ unpack $ sizes of + ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return + ((shift w1 24) + (shift w2 16) + (shift w3 8) + w4, + (shift h1 24) + (shift h2 16) + (shift h3 8) + h4) + _ -> fail "PNG parse error" + let (dpix, dpiy) = findpHYs rest'' + return $ ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } + +findpHYs :: ByteString -> (Integer, Integer) +findpHYs x = + if B.null x || "IDAT" `B.isPrefixOf` x + then (72,72) -- default, no pHYs + else if "pHYs" `B.isPrefixOf` x + then let [x1,x2,x3,x4,y1,y2,y3,y4,u] = map fromIntegral + $ unpack $ B.take 9 $ B.drop 4 x + factor = if u == 1 -- dots per meter + then \z -> z * 254 `div` 10000 + else const 72 + in ( factor $ (shift x1 24) + (shift x2 16) + (shift x3 8) + x4, + factor $ (shift y1 24) + (shift y2 16) + (shift y3 8) + y4 ) + else findpHYs $ B.drop 1 x -- read another byte + +gifSize :: ByteString -> Maybe ImageSize +gifSize img = do + let (h, rest) = B.splitAt 6 img + guard $ h == "GIF87a" || h == "GIF89a" + case map fromIntegral $ unpack $ B.take 4 rest of + [w2,w1,h2,h1] -> return ImageSize { + pxX = shift w1 8 + w2, + pxY = shift h1 8 + h2, + dpiX = 72, + dpiY = 72 + } + _ -> fail "GIF parse error" + +jpegSize :: ByteString -> Maybe ImageSize +jpegSize img = do + let (hdr, rest) = B.splitAt 4 img + guard $ hdr == "\xff\xd8\xff\xe0" + guard $ B.length rest >= 14 + let [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] = map fromIntegral + $ unpack $ B.take 5 $ B.drop 9 $ rest + let factor = case dpiDensity of + 1 -> id + 2 -> \x -> (x * 254 `div` 10) + _ -> const 72 + let dpix = factor (shift dpix1 8 + dpix2) + let dpiy = factor (shift dpiy1 8 + dpiy2) + (w,h) <- findJpegSize rest + return $ ImageSize { pxX = w, pxY = h, dpiX = dpix, dpiY = dpiy } + +findJpegSize :: ByteString -> Maybe (Integer,Integer) +findJpegSize bs = do + let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs + case B.uncons bs' of + Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> do + case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of + [h1,h2,w1,w2] -> return (shift w1 8 + w2, shift h1 8 + h2) + _ -> fail "JPEG parse error" + Just (_,bs'') -> do + case map fromIntegral $ unpack $ B.take 2 bs'' of + [c1,c2] -> do + let len = shift c1 8 + c2 + -- skip variables + findJpegSize $ B.drop len bs'' + _ -> fail "JPEG parse error" + Nothing -> fail "Did not find length record" + + diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index c52a4c475..d3df2f2e1 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -35,6 +35,7 @@ import qualified Data.Map as M -- | Determine mime type appropriate for file path. getMimeType :: FilePath -> Maybe String +getMimeType "layout-cache" = Just "application/binary" -- in ODT getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes where mimeTypes = M.fromList -- List borrowed from happstack-server. [("gz","application/x-gzip") @@ -294,6 +295,7 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes ,("oth","application/vnd.oasis.opendocument.text-web") ,("otp","application/vnd.oasis.opendocument.presentation-template") ,("ots","application/vnd.oasis.opendocument.spreadsheet-template") + ,("otf","application/x-font-opentype") ,("ott","application/vnd.oasis.opendocument.text-template") ,("oza","application/x-oz-application") ,("p","text/x-pascal") @@ -427,6 +429,7 @@ getMimeType f = M.lookup (map toLower $ drop 1 $ takeExtension f) mimeTypes ,("ts","text/texmacs") ,("tsp","application/dsptype") ,("tsv","text/tab-separated-values") + ,("ttf","application/x-font-truetype") ,("txt","text/plain") ,("udeb","application/x-debian-package") ,("uls","text/iuls") diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs new file mode 100644 index 000000000..cc19e1c50 --- /dev/null +++ b/src/Text/Pandoc/PDF.hs @@ -0,0 +1,136 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.PDF + Copyright : Copyright (C) 2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of LaTeX documents to PDF. +-} +module Text.Pandoc.PDF ( tex2pdf ) where + +import System.IO.Temp +import Data.ByteString.Lazy (ByteString) +import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString.Lazy.Char8 as BC +import System.Exit (ExitCode (..)) +import System.FilePath +import System.Directory +import System.Process +import Control.Exception (evaluate) +import System.IO (hClose) +import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) +import Text.Pandoc.UTF8 as UTF8 +import Control.Monad (unless) +import Data.List (isInfixOf) + +tex2pdf :: String -- ^ tex program (pdflatex, lualatex, xelatex) + -> String -- ^ latex source + -> IO (Either ByteString ByteString) +tex2pdf program source = withSystemTempDirectory "tex2pdf" $ \tmpdir -> + tex2pdf' tmpdir program source + +tex2pdf' :: FilePath -- ^ temp directory for output + -> String -- ^ tex program + -> String -- ^ tex source + -> IO (Either ByteString ByteString) +tex2pdf' tmpDir program source = do + let numruns = if "\\tableofcontents" `isInfixOf` source + then 2 + else 1 + (exit, log', mbPdf) <- runTeXProgram program numruns tmpDir source + let msg = "Error producing PDF from TeX source." + case (exit, mbPdf) of + (ExitFailure _, _) -> return $ Left $ + msg <> "\n" <> extractMsg log' + (ExitSuccess, Nothing) -> return $ Left msg + (ExitSuccess, Just pdf) -> return $ Right pdf + +(<>) :: ByteString -> ByteString -> ByteString +(<>) = B.append + +-- parsing output + +extractMsg :: ByteString -> ByteString +extractMsg log' = do + let msg' = dropWhile (not . ("!" `BC.isPrefixOf`)) $ BC.lines log' + let (msg'',rest) = break ("l." `BC.isPrefixOf`) msg' + let lineno = take 1 rest + if null msg' + then log' + else BC.unlines (msg'' ++ lineno) + +-- running tex programs + +-- Run a TeX program on an input bytestring and return (exit code, +-- contents of stdout, contents of produced PDF if any). Rerun +-- a fixed number of times to resolve references. +runTeXProgram :: String -> Int -> FilePath -> String + -> IO (ExitCode, ByteString, Maybe ByteString) +runTeXProgram program runsLeft tmpDir source = do + let file = tmpDir </> "input.tex" + exists <- doesFileExist file + unless exists $ UTF8.writeFile file source + let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", + "-output-directory", tmpDir, file] + (exit, out, err) <- readCommand program programArgs + if runsLeft > 1 + then runTeXProgram program (runsLeft - 1) tmpDir source + else do + let pdfFile = replaceDirectory (replaceExtension file ".pdf") tmpDir + pdfExists <- doesFileExist pdfFile + pdf <- if pdfExists + then Just `fmap` B.readFile pdfFile + else return Nothing + return (exit, out <> err, pdf) + +-- utility functions + +-- Run a command and return exitcode, contents of stdout, and +-- contents of stderr. (Based on +-- 'readProcessWithExitCode' from 'System.Process'.) +readCommand :: FilePath -- ^ command to run + -> [String] -- ^ any arguments + -> IO (ExitCode,ByteString,ByteString) -- ^ exit, stdout, stderr +readCommand cmd args = do + (Just inh, Just outh, Just errh, pid) <- + createProcess (proc cmd args){ std_in = CreatePipe, + std_out = CreatePipe, + std_err = CreatePipe } + outMVar <- newEmptyMVar + -- fork off a thread to start consuming stdout + out <- B.hGetContents outh + _ <- forkIO $ evaluate (B.length out) >> putMVar outMVar () + -- fork off a thread to start consuming stderr + err <- B.hGetContents errh + _ <- forkIO $ evaluate (B.length err) >> putMVar outMVar () + -- now write and flush any input + hClose inh -- done with stdin + -- wait on the output + takeMVar outMVar + takeMVar outMVar + hClose outh + -- wait on the process + ex <- waitForProcess pid + return (ex, out, err) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index eaf0c0f67..725621ce2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -42,15 +42,16 @@ module Text.Pandoc.Parsing ( (>>~), parseFromString, lineClump, charsInBalanced, - charsInBalanced', romanNumeral, emailAddress, uri, withHorizDisplacement, + withRaw, nullBlock, failIfStrict, failUnlessLHS, escaped, + characterReference, anyOrderedListMarker, orderedListMarker, charRef, @@ -78,7 +79,6 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec -import Text.Pandoc.CharacterReferences ( characterReference ) import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) @@ -86,6 +86,7 @@ import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) +import Text.HTML.TagSoup.Entity ( lookupEntity ) -- | Like >>, but returns the operation on the left. -- (Suggested by Tillmann Rendel on Haskell-cafe list.) @@ -174,29 +175,23 @@ lineClump = blanklines -- | Parse a string of characters between an open character -- and a close character, including text between balanced -- pairs of open and close, which must be different. For example, --- @charsInBalanced '(' ')'@ will parse "(hello (there))" --- and return "hello (there)". Stop if a blank line is --- encountered. -charsInBalanced :: Char -> Char -> GenParser Char st String -charsInBalanced open close = try $ do +-- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" +-- and return "hello (there)". +charsInBalanced :: Char -> Char -> GenParser Char st Char + -> GenParser Char st String +charsInBalanced open close parser = try $ do char open - raw <- many $ (many1 (satisfy $ \c -> - c /= open && c /= close && c /= '\n')) - <|> (do res <- charsInBalanced open close - return $ [open] ++ res ++ [close]) - <|> try (string "\n" >>~ notFollowedBy' blanklines) + let isDelim c = c == open || c == close + raw <- many $ many1 (notFollowedBy (satisfy isDelim) >> parser) + <|> (do res <- charsInBalanced open close parser + return $ [open] ++ res ++ [close]) char close return $ concat raw --- | Like @charsInBalanced@, but allow blank lines in the content. -charsInBalanced' :: Char -> Char -> GenParser Char st String -charsInBalanced' open close = try $ do - char open - raw <- many $ (many1 (satisfy $ \c -> c /= open && c /= close)) - <|> (do res <- charsInBalanced' open close - return $ [open] ++ res ++ [close]) - char close - return $ concat raw +-- old charsInBalanced would be: +-- charsInBalanced open close (noneOf "\n" <|> char '\n' >> notFollowedBy blankline) +-- old charsInBalanced' would be: +-- charsInBalanced open close anyChar -- Auxiliary functions for romanNumeral: @@ -306,6 +301,23 @@ withHorizDisplacement parser = do pos2 <- getPosition return (result, sourceColumn pos2 - sourceColumn pos1) +-- | Applies a parser and returns the raw string that was parsed, +-- along with the value produced by the parser. +withRaw :: GenParser Char st a -> GenParser Char st (a, [Char]) +withRaw parser = do + pos1 <- getPosition + inp <- getInput + result <- parser + pos2 <- getPosition + let (l1,c1) = (sourceLine pos1, sourceColumn pos1) + let (l2,c2) = (sourceLine pos2, sourceColumn pos2) + let inplines = take ((l2 - l1) + 1) $ lines inp + let raw = case inplines of + [] -> error "raw: inplines is null" -- shouldn't happen + [l] -> take (c2 - c1) l + ls -> unlines (init ls) ++ take (c2 - 1) (last ls) + return (result, raw) + -- | Parses a character and returns 'Null' (so that the parser can move on -- if it gets stuck). nullBlock :: GenParser Char st Block @@ -319,17 +331,21 @@ failIfStrict = do -- | Fail unless we're in literate haskell mode. failUnlessLHS :: GenParser tok ParserState () -failUnlessLHS = do - state <- getState - if stateLiterateHaskell state then return () else fail "Literate haskell feature" +failUnlessLHS = getState >>= guard . stateLiterateHaskell -- | Parses backslash, then applies character parser. escaped :: GenParser Char st Char -- ^ Parser for character to escape - -> GenParser Char st Inline -escaped parser = try $ do - char '\\' - result <- parser - return (Str [result]) + -> GenParser Char st Char +escaped parser = try $ char '\\' >> parser + +-- | Parse character entity. +characterReference :: GenParser Char st Char +characterReference = try $ do + char '&' + ent <- many1Till nonspaceChar (char ';') + case lookupEntity ent of + Just c -> return c + Nothing -> fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). upperRoman :: GenParser Char st (ListNumberStyle, Int) @@ -512,7 +528,7 @@ gridTableWith block tableCaption headless = gridTableSplitLine :: [Int] -> String -> [String] gridTableSplitLine indices line = map removeFinalBar $ tail $ - splitByIndices (init indices) $ removeTrailingSpace line + splitStringByIndices (init indices) $ removeTrailingSpace line gridPart :: Char -> GenParser Char st (Int, Int) gridPart ch = do @@ -598,7 +614,7 @@ readWith :: GenParser t ParserState a -- ^ parser -> a readWith parser state input = case runParser parser state "source" input of - Left err -> error $ "\nError:\n" ++ show err + Left err' -> error $ "\nError:\n" ++ show err' Right result -> result -- | Parse a string with @parser@ (for testing). @@ -613,6 +629,8 @@ data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? + stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph + stateLastStrPos :: Maybe SourcePos, -- ^ Position after last str parsed stateKeys :: KeyTable, -- ^ List of reference keys stateCitations :: [String], -- ^ List of available citations stateNotes :: NoteTable, -- ^ List of notes @@ -623,6 +641,9 @@ data ParserState = ParserState stateDate :: [Inline], -- ^ Date of document stateStrict :: Bool, -- ^ Use strict markdown syntax? stateSmart :: Bool, -- ^ Use smart typography? + stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior + -- in parsing dashes; -- is em-dash; + -- before numeral is en-dash stateLiterateHaskell :: Bool, -- ^ Treat input as literate haskell stateColumns :: Int, -- ^ Number of columns in terminal stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used @@ -640,6 +661,8 @@ defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, + stateMaxNestingLevel = 6, + stateLastStrPos = Nothing, stateKeys = M.empty, stateCitations = [], stateNotes = [], @@ -650,6 +673,7 @@ defaultParserState = stateDate = [], stateStrict = False, stateSmart = False, + stateOldDashes = False, stateLiterateHaskell = False, stateColumns = 80, stateHeaderTable = [], @@ -714,7 +738,7 @@ smartPunctuation inlineParser = do choice [ quoted inlineParser, apostrophe, dash, ellipses ] apostrophe :: GenParser Char ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe +apostrophe = (char '\'' <|> char '\8217') >> return (Str "\x2019") quoted :: GenParser Char ParserState Inline -> GenParser Char ParserState Inline @@ -761,8 +785,12 @@ charOrRef cs = return c) singleQuoteStart :: GenParser Char ParserState () -singleQuoteStart = do +singleQuoteStart = do failIfInQuoteContext InSingleQuote + pos <- getPosition + st <- getState + -- single quote start can't be right after str + guard $ stateLastStrPos st /= Just pos try $ do charOrRef "'\8216\145" notFollowedBy (oneOf ")!],;:-? \t\n") notFollowedBy (char '.') <|> lookAhead (string "..." >> return ()) @@ -789,22 +817,42 @@ doubleQuoteEnd = do ellipses :: GenParser Char st Inline ellipses = do - try (charOrRef "…\133") <|> try (string "..." >> return '…') - return Ellipses - -dash :: GenParser Char st Inline -dash = enDash <|> emDash + try (charOrRef "\8230\133") <|> try (string "..." >> return '…') + return (Str "\8230") + +dash :: GenParser Char ParserState Inline +dash = do + oldDashes <- stateOldDashes `fmap` getState + if oldDashes + then emDashOld <|> enDashOld + else Str `fmap` (hyphenDash <|> emDash <|> enDash) + +-- Two hyphens = en-dash, three = em-dash +hyphenDash :: GenParser Char st String +hyphenDash = do + try $ string "--" + option "\8211" (char '-' >> return "\8212") + +emDash :: GenParser Char st String +emDash = do + try (charOrRef "\8212\151") + return "\8212" -enDash :: GenParser Char st Inline +enDash :: GenParser Char st String enDash = do - try (charOrRef "–\150") <|> + try (charOrRef "\8212\151") + return "\8211" + +enDashOld :: GenParser Char st Inline +enDashOld = do + try (charOrRef "\8211\150") <|> try (char '-' >> lookAhead (satisfy isDigit) >> return '–') - return EnDash + return (Str "\8211") -emDash :: GenParser Char st Inline -emDash = do - try (charOrRef "—\151") <|> (try $ string "--" >> optional (char '-') >> return '—') - return EmDash +emDashOld :: GenParser Char st Inline +emDashOld = do + try (charOrRef "\8212\151") <|> (try $ string "--" >> optional (char '-') >> return '-') + return (Str "\8212") -- -- Macros diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 54d65af6f..bf78b2594 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -42,6 +42,7 @@ module Text.Pandoc.Pretty ( , flush , nest , hang + , beforeNonBlank , nowrap , offset , height @@ -59,16 +60,20 @@ module Text.Pandoc.Pretty ( , hsep , vcat , vsep + , chomp , inside , braces , brackets , parens , quotes , doubleQuotes + , charWidth + , realLength ) where -import Data.DList (DList, fromList, toList, cons, singleton) +import Data.Sequence (Seq, fromList, (<|), singleton, mapWithIndex) +import Data.Foldable (toList) import Data.List (intercalate) import Data.Monoid import Data.String @@ -90,6 +95,7 @@ type DocState a = State (RenderState a) () data D = Text Int String | Block Int [String] | Prefixed String Doc + | BeforeNonBlank Doc | Flush Doc | BreakingSpace | CarriageReturn @@ -97,7 +103,7 @@ data D = Text Int String | BlankLine deriving (Show) -newtype Doc = Doc { unDoc :: DList D } +newtype Doc = Doc { unDoc :: Seq D } deriving (Monoid) instance Show Doc where @@ -106,6 +112,14 @@ instance Show Doc where instance IsString Doc where fromString = text +isBlank :: D -> Bool +isBlank BreakingSpace = True +isBlank CarriageReturn = True +isBlank NewLine = True +isBlank BlankLine = True +isBlank (Text _ (c:_)) = isSpace c +isBlank _ = False + -- | True if the document is empty. isEmpty :: Doc -> Bool isEmpty = null . toList . unDoc @@ -114,9 +128,17 @@ isEmpty = null . toList . unDoc empty :: Doc empty = mempty --- | @a <> b@ is the result of concatenating @a@ with @b@. -(<>) :: Doc -> Doc -> Doc +#if MIN_VERSION_base(4,5,0) +-- (<>) is defined in Data.Monoid +#else +infixr 6 <> + +-- | An infix synonym for 'mappend'. +-- @a <> b@ is the result of concatenating @a@ with @b@. +(<>) :: Monoid m => m -> m -> m (<>) = mappend +{-# INLINE (<>) #-} +#endif -- | Concatenate a list of 'Doc's. cat :: [Doc] -> Doc @@ -128,6 +150,7 @@ hcat = mconcat -- | Concatenate a list of 'Doc's, putting breakable spaces -- between them. +infixr 6 <+> (<+>) :: Doc -> Doc -> Doc (<+>) x y = if isEmpty x then y @@ -140,6 +163,7 @@ hcat = mconcat hsep :: [Doc] -> Doc hsep = foldr (<+>) empty +infixr 5 $$ -- | @a $$ b@ puts @a@ above @b@. ($$) :: Doc -> Doc -> Doc ($$) x y = if isEmpty x @@ -148,6 +172,7 @@ hsep = foldr (<+>) empty then x else x <> cr <> y +infixr 5 $+$ -- | @a $$ b@ puts @a@ above @b@, with a blank line between. ($+$) :: Doc -> Doc -> Doc ($+$) x y = if isEmpty x @@ -164,6 +189,17 @@ vcat = foldr ($$) empty vsep :: [Doc] -> Doc vsep = foldr ($+$) empty +-- | Chomps trailing blank space off of a 'Doc'. +chomp :: Doc -> Doc +chomp d = Doc (fromList dl') + where dl = toList (unDoc d) + dl' = reverse $ dropWhile removeable $ reverse dl + removeable BreakingSpace = True + removeable CarriageReturn = True + removeable NewLine = True + removeable BlankLine = True + removeable _ = False + outp :: (IsString a, Monoid a) => Int -> String -> DocState a outp off s | off <= 0 = do @@ -172,7 +208,7 @@ outp off s | off <= 0 = do when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do let pref = reverse $ dropWhile isSpace $ reverse rawpref modify $ \st -> st{ output = fromString pref : output st - , column = column st + length pref } + , column = column st + realLength pref } when (off < 0) $ do modify $ \st -> st { output = fromString s : output st , column = 0 @@ -182,7 +218,7 @@ outp off s = do let pref = prefix st' when (column st' == 0 && usePrefix st' && not (null pref)) $ do modify $ \st -> st{ output = fromString pref : output st - , column = column st + length pref } + , column = column st + realLength pref } modify $ \st -> st{ output = fromString s : output st , column = column st + off , newlines = 0 } @@ -229,6 +265,12 @@ renderList (Flush d : xs) = do modify $ \s -> s{ usePrefix = oldUsePrefix } renderList xs +renderList (BeforeNonBlank d : xs) = + case xs of + (x:_) | isBlank x -> renderList xs + | otherwise -> renderDoc d >> renderList xs + [] -> renderList xs + renderList (BlankLine : xs) = do st <- get case output st of @@ -283,7 +325,7 @@ renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) = renderList (Block width lns : xs) = do st <- get let oldPref = prefix st - case column st - length oldPref of + case column st - realLength oldPref of n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' } _ -> return () renderDoc $ blockToDoc width lns @@ -295,7 +337,7 @@ mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) = Block (w1 + w2 + if addSpace then 1 else 0) $ zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties) where empties = replicate (abs $ length lns1 - length lns2) "" - pad n s = s ++ replicate (n - length s) ' ' + pad n s = s ++ replicate (n - realLength s) ' ' sp "" = "" sp xs = if addSpace then (' ' : xs) else xs mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!" @@ -312,13 +354,13 @@ offsetOf _ = 0 -- | A literal string. text :: String -> Doc text = Doc . toChunks - where toChunks :: String -> DList D + where toChunks :: String -> Seq D toChunks [] = mempty toChunks s = case break (=='\n') s of - ([], _:ys) -> NewLine `cons` toChunks ys - (xs, _:ys) -> Text (length xs) xs `cons` - NewLine `cons` toChunks ys - (xs, []) -> singleton $ Text (length xs) xs + ([], _:ys) -> NewLine <| toChunks ys + (xs, _:ys) -> Text (realLength xs) xs <| + (NewLine <| toChunks ys) + (xs, []) -> singleton $ Text (realLength xs) xs -- | A character. char :: Char -> Doc @@ -359,15 +401,20 @@ nest ind = prefixed (replicate ind ' ') hang :: Int -> Doc -> Doc -> Doc hang ind start doc = start <> nest ind doc +-- | @beforeNonBlank d@ conditionally includes @d@ unless it is +-- followed by blank space. +beforeNonBlank :: Doc -> Doc +beforeNonBlank d = Doc $ singleton (BeforeNonBlank d) + -- | Makes a 'Doc' non-reflowable. nowrap :: Doc -> Doc -nowrap doc = Doc $ fromList $ map replaceSpace $ toList $ unDoc doc - where replaceSpace BreakingSpace = Text 1 " " - replaceSpace x = x +nowrap doc = Doc $ mapWithIndex replaceSpace $ unDoc doc + where replaceSpace _ BreakingSpace = Text 1 " " + replaceSpace _ x = x -- | Returns the width of a 'Doc'. offset :: Doc -> Int -offset d = case map length . lines . render Nothing $ d of +offset d = case map realLength . lines . render Nothing $ d of [] -> 0 os -> maximum os @@ -382,11 +429,11 @@ lblock = block id -- | Like 'lblock' but aligned to the right. rblock :: Int -> Doc -> Doc -rblock w = block (\s -> replicate (w - length s) ' ' ++ s) w +rblock w = block (\s -> replicate (w - realLength s) ' ' ++ s) w -- | Like 'lblock' but aligned centered. cblock :: Int -> Doc -> Doc -cblock w = block (\s -> replicate ((w - length s) `div` 2) ' ' ++ s) w +cblock w = block (\s -> replicate ((w - realLength s) `div` 2) ' ' ++ s) w -- | Returns the height of a block or other 'Doc'. height :: Doc -> Int @@ -401,7 +448,7 @@ chop n cs = case break (=='\n') cs of (_:[]) -> [xs, ""] (_:zs) -> xs : chop n zs else take n xs : chop n (drop n xs ++ ys) - where len = length xs + where len = realLength xs -- | Encloses a 'Doc' inside a start and end 'Doc'. inside :: Doc -> Doc -> Doc -> Doc @@ -427,3 +474,51 @@ quotes = inside (char '\'') (char '\'') -- | Wraps a 'Doc' in double quotes. doubleQuotes :: Doc -> Doc doubleQuotes = inside (char '"') (char '"') + +-- | Returns width of a character in a monospace font: 0 for a combining +-- character, 1 for a regular character, 2 for an East Asian wide character. +charWidth :: Char -> Int +charWidth c = + case c of + _ | c < '\x0300' -> 1 + | c >= '\x0300' && c <= '\x036F' -> 0 -- combining + | c >= '\x0370' && c <= '\x10FC' -> 1 + | c >= '\x1100' && c <= '\x115F' -> 2 + | c >= '\x1160' && c <= '\x11A2' -> 1 + | c >= '\x11A3' && c <= '\x11A7' -> 2 + | c >= '\x11A8' && c <= '\x11F9' -> 1 + | c >= '\x11FA' && c <= '\x11FF' -> 2 + | c >= '\x1200' && c <= '\x2328' -> 1 + | c >= '\x2329' && c <= '\x232A' -> 2 + | c >= '\x232B' && c <= '\x2E31' -> 1 + | c >= '\x2E80' && c <= '\x303E' -> 2 + | c == '\x303F' -> 1 + | c >= '\x3041' && c <= '\x3247' -> 2 + | c >= '\x3248' && c <= '\x324F' -> 1 -- ambiguous + | c >= '\x3250' && c <= '\x4DBF' -> 2 + | c >= '\x4DC0' && c <= '\x4DFF' -> 1 + | c >= '\x4E00' && c <= '\xA4C6' -> 2 + | c >= '\xA4D0' && c <= '\xA95F' -> 1 + | c >= '\xA960' && c <= '\xA97C' -> 2 + | c >= '\xA980' && c <= '\xABF9' -> 1 + | c >= '\xAC00' && c <= '\xD7FB' -> 2 + | c >= '\xD800' && c <= '\xDFFF' -> 1 + | c >= '\xE000' && c <= '\xF8FF' -> 1 -- ambiguous + | c >= '\xF900' && c <= '\xFAFF' -> 2 + | c >= '\xFB00' && c <= '\xFDFD' -> 1 + | c >= '\xFE00' && c <= '\xFE0F' -> 1 -- ambiguous + | c >= '\xFE10' && c <= '\xFE19' -> 2 + | c >= '\xFE20' && c <= '\xFE26' -> 1 + | c >= '\xFE30' && c <= '\xFE6B' -> 2 + | c >= '\xFE70' && c <= '\x16A38' -> 1 + | c >= '\x1B000' && c <= '\x1B001' -> 2 + | c >= '\x1D000' && c <= '\x1F1FF' -> 1 + | c >= '\x1F200' && c <= '\x1F251' -> 2 + | c >= '\x1F300' && c <= '\x1F773' -> 1 + | c >= '\x20000' && c <= '\x3FFFD' -> 2 + | otherwise -> 1 + +-- | Get real length of string, taking into account combining and double-wide +-- characters. +realLength :: String -> Int +realLength = sum . map charWidth diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 7c882f680..43165ceb1 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -215,6 +215,7 @@ pSimpleTable = try $ do TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) skipMany pBlank head' <- option [] $ pOptInTag "thead" $ pInTags "tr" (pCell "th") + skipMany pBlank rows <- pOptInTag "tbody" $ many1 $ try $ skipMany pBlank >> pInTags "tr" (pCell "td") skipMany pBlank @@ -420,8 +421,12 @@ pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad pStr :: GenParser Char ParserState Inline -pStr = liftM Str $ many1 $ satisfy $ \c -> - not (isSpace c) && not (isSpecial c) && not (isBad c) +pStr = do + result <- many1 $ satisfy $ \c -> + not (isSpace c) && not (isSpecial c) && not (isBad c) + pos <- getPosition + updateState $ \s -> s{ stateLastStrPos = Just pos } + return $ Str result isSpecial :: Char -> Bool isSpecial '"' = True @@ -502,16 +507,35 @@ blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div", "dt", "frameset", "li", "tbody", "td", "tfoot", "th", "thead", "tr", "script", "style"] +-- We want to allow raw docbook in markdown documents, so we +-- include docbook block tags here too. +blockDocBookTags :: [String] +blockDocBookTags = ["calloutlist", "bibliolist", "glosslist", "itemizedlist", + "orderedlist", "segmentedlist", "simplelist", + "variablelist", "caution", "important", "note", "tip", + "warning", "address", "literallayout", "programlisting", + "programlistingco", "screen", "screenco", "screenshot", + "synopsis", "example", "informalexample", "figure", + "informalfigure", "table", "informaltable", "para", + "simpara", "formalpara", "equation", "informalequation", + "figure", "screenshot", "mediaobject", "qandaset", + "procedure", "task", "cmdsynopsis", "funcsynopsis", + "classsynopsis", "blockquote", "epigraph", "msgset", + "sidebar", "title"] + +blockTags :: [String] +blockTags = blockHtmlTags ++ blockDocBookTags + isInlineTag :: Tag String -> Bool -isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t || - tagClose (`notElem` blockHtmlTags) t || +isInlineTag t = tagOpen (`notElem` blockTags) (const True) t || + tagClose (`notElem` blockTags) t || tagComment (const True) t isBlockTag :: Tag String -> Bool isBlockTag t = tagOpen (`elem` blocktags) (const True) t || tagClose (`elem` blocktags) t || tagComment (const True) t - where blocktags = blockHtmlTags ++ eitherBlockOrInline + where blocktags = blockTags ++ eitherBlockOrInline isTextTag :: Tag String -> Bool isTextTag = tagText (const True) @@ -546,8 +570,8 @@ t `closes` t2 | t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] && t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" t1 `closes` t2 | - t1 `elem` blockHtmlTags && - t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True + t1 `elem` blockTags && + t2 `notElem` (blockTags ++ eitherBlockOrInline) = True _ `closes` _ = False --- parsers for use in markdown, textile readers diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 02c7361d7..5e69347b6 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,8 +18,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + Copyright : Copyright (C) 2006-2012 John MacFarlane + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha @@ -27,20 +27,26 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of LaTeX to 'Pandoc' document. -} -module Text.Pandoc.Readers.LaTeX ( - readLaTeX, - rawLaTeXInline, - rawLaTeXEnvironment' +module Text.Pandoc.Readers.LaTeX ( readLaTeX, + rawLaTeXInline, + rawLaTeXBlock, + handleIncludes ) where -import Text.ParserCombinators.Parsec +import Text.ParserCombinators.Parsec hiding ((<|>), space, many, optional) import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing -import Data.Maybe ( fromMaybe ) -import Data.Char ( chr, toUpper ) -import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import qualified Text.Pandoc.UTF8 as UTF8 +import Data.Char ( chr, ord ) import Control.Monad +import Text.Pandoc.Builder +import Data.Char (isLetter, isPunctuation, isSpace) +import Control.Applicative +import Data.Monoid +import System.FilePath (replaceExtension) +import Data.List (intercalate) +import qualified Data.Map as M -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ParserState -- ^ Parser state, including options for parser @@ -48,985 +54,867 @@ readLaTeX :: ParserState -- ^ Parser state, including options for parser -> Pandoc readLaTeX = readWith parseLaTeX --- characters with special meaning -specialChars :: [Char] -specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-" - --- --- utility functions --- - --- | Returns text between brackets and its matching pair. -bracketedText :: Char -> Char -> GenParser Char st [Char] -bracketedText openB closeB = do - result <- charsInBalanced' openB closeB - return $ [openB] ++ result ++ [closeB] - --- | Returns an option or argument of a LaTeX command. -optOrArg :: GenParser Char st [Char] -optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']') +parseLaTeX :: LP Pandoc +parseLaTeX = do + bs <- blocks + eof + st <- getState + let title' = stateTitle st + let authors' = stateAuthors st + let date' = stateDate st + return $ Pandoc (Meta title' authors' date') $ toList bs --- | True if the string begins with '{'. -isArg :: [Char] -> Bool -isArg ('{':_) = True -isArg _ = False +type LP = GenParser Char ParserState --- | Returns list of options and arguments of a LaTeX command. -commandArgs :: GenParser Char st [[Char]] -commandArgs = many optOrArg +anyControlSeq :: LP String +anyControlSeq = do + char '\\' + next <- option '\n' anyChar + name <- case next of + '\n' -> return "" + c | isLetter c -> (c:) <$> (many letter <* optional sp) + | otherwise -> return [c] + return name --- | Parses LaTeX command, returns (name, star, list of options or arguments). -command :: GenParser Char st ([Char], [Char], [[Char]]) -command = do +controlSeq :: String -> LP String +controlSeq name = try $ do char '\\' - name <- many1 letter - star <- option "" (string "*") -- some commands have starred versions - args <- commandArgs - return (name, star, args) - -begin :: [Char] -> GenParser Char st [Char] -begin name = try $ do - string "\\begin" - spaces - char '{' - string name - char '}' - optional commandArgs - spaces + case name of + "" -> mzero + [c] | not (isLetter c) -> string [c] + cs -> string cs <* optional sp return name -end :: [Char] -> GenParser Char st [Char] -end name = try $ do - string "\\end" - spaces +sp :: LP () +sp = skipMany1 $ satisfy (\c -> c == ' ' || c == '\t') + <|> (try $ newline >>~ lookAhead anyChar >>~ notFollowedBy blankline) + +isLowerHex :: Char -> Bool +isLowerHex x = x >= '0' && x <= '9' || x >= 'a' && x <= 'f' + +tildeEscape :: LP Char +tildeEscape = try $ do + string "^^" + c <- satisfy (\x -> x >= '\0' && x <= '\128') + d <- if isLowerHex c + then option "" $ count 1 (satisfy isLowerHex) + else return "" + if null d + then case ord c of + x | x >= 64 && x <= 127 -> return $ chr (x - 64) + | otherwise -> return $ chr (x + 64) + else return $ chr $ read ('0':'x':c:d) + +comment :: LP () +comment = do + char '%' + skipMany (satisfy (/='\n')) + newline + return () + +grouped :: Monoid a => LP a -> LP a +grouped parser = try $ char '{' *> (mconcat <$> manyTill parser (char '}')) + +braced :: LP String +braced = char '{' *> (concat <$> manyTill + ( many1 (satisfy (\c -> c /= '\\' && c /= '}' && c /= '{')) + <|> try (string "\\}") + <|> try (string "\\{") + <|> try (string "\\\\") + <|> ((\x -> "{" ++ x ++ "}") <$> braced) + <|> count 1 anyChar + ) (char '}')) + +bracketed :: Monoid a => LP a -> LP a +bracketed parser = try $ char '[' *> (mconcat <$> manyTill parser (char ']')) + +trim :: String -> String +trim = removeLeadingTrailingSpace + +mathDisplay :: LP String -> LP Inlines +mathDisplay p = displayMath <$> (try p >>= applyMacros' . trim) + +mathInline :: LP String -> LP Inlines +mathInline p = math <$> (try p >>= applyMacros') + +mathChars :: LP String +mathChars = concat <$> + many ( many1 (satisfy (\c -> c /= '$' && c /='\\')) + <|> (\c -> ['\\',c]) <$> (try $ char '\\' *> anyChar) + ) + +double_quote :: LP Inlines +double_quote = (doubleQuoted . mconcat) <$> + (try $ string "``" *> manyTill inline (try $ string "''")) + +single_quote :: LP Inlines +single_quote = char '`' *> + ( try ((singleQuoted . mconcat) <$> + manyTill inline (try $ char '\'' >> notFollowedBy letter)) + <|> lit "`") + +inline :: LP Inlines +inline = (mempty <$ comment) + <|> (space <$ sp) + <|> inlineText + <|> inlineCommand + <|> grouped inline + <|> (char '-' *> option (str "-") + ((char '-') *> option (str "–") (str "—" <$ char '-'))) + <|> double_quote + <|> single_quote + <|> (str "’" <$ char '\'') + <|> (str "\160" <$ char '~') + <|> (mathDisplay $ string "$$" *> mathChars <* string "$$") + <|> (mathInline $ char '$' *> mathChars <* char '$') + <|> (superscript <$> (char '^' *> tok)) + <|> (subscript <$> (char '_' *> tok)) + <|> (failUnlessLHS *> char '|' *> doLHSverb) + <|> (str <$> count 1 tildeEscape) + <|> (str <$> string "]") + <|> (str <$> string "#") -- TODO print warning? + <|> (str <$> string "&") -- TODO print warning? + -- <|> (str <$> count 1 (satisfy (\c -> c /= '\\' && c /='\n' && c /='}' && c /='{'))) -- eat random leftover characters + +inlines :: LP Inlines +inlines = mconcat <$> many (notFollowedBy (char '}') *> inline) + +block :: LP Blocks +block = (mempty <$ comment) + <|> (mempty <$ ((spaceChar <|> blankline) *> spaces)) + <|> environment + <|> mempty <$ macro -- TODO improve macros, make them work everywhere + <|> blockCommand + <|> grouped block + <|> paragraph + <|> (mempty <$ char '&') -- loose & in table environment + + +blocks :: LP Blocks +blocks = mconcat <$> many block + +blockCommand :: LP Blocks +blockCommand = try $ do + name <- anyControlSeq + star <- option "" (string "*" <* optional sp) + let name' = name ++ star + case M.lookup name' blockCommands of + Just p -> p + Nothing -> case M.lookup name blockCommands of + Just p -> p + Nothing -> mzero + +inBrackets :: Inlines -> Inlines +inBrackets x = (str "[") <> x <> (str "]") + +-- eat an optional argument and one or more arguments in braces +ignoreInlines :: String -> (String, LP Inlines) +ignoreInlines name = (name, doraw <|> (mempty <$ optargs)) + where optargs = skipopts *> skipMany (try $ optional sp *> braced) + contseq = '\\':name + doraw = (rawInline "latex" . (contseq ++) . snd) <$> + (getState >>= guard . stateParseRaw >> (withRaw optargs)) + +ignoreBlocks :: String -> (String, LP Blocks) +ignoreBlocks name = (name, doraw <|> (mempty <$ optargs)) + where optargs = skipopts *> skipMany (try $ optional sp *> braced) + contseq = '\\':name + doraw = (rawBlock "latex" . (contseq ++) . snd) <$> + (getState >>= guard . stateParseRaw >> (withRaw optargs)) + +blockCommands :: M.Map String (LP Blocks) +blockCommands = M.fromList $ + [ ("par", mempty <$ skipopts) + , ("title", mempty <$ (skipopts *> tok >>= addTitle)) + , ("subtitle", mempty <$ (skipopts *> tok >>= addSubtitle)) + , ("author", mempty <$ (skipopts *> authors)) + -- -- in letter class, temp. store address & sig as title, author + , ("address", mempty <$ (skipopts *> tok >>= addTitle)) + , ("signature", mempty <$ (skipopts *> authors)) + , ("date", mempty <$ (skipopts *> tok >>= addDate)) + -- sectioning + , ("chapter", updateState (\s -> s{ stateHasChapters = True }) *> section 0) + , ("section", section 1) + , ("subsection", section 2) + , ("subsubsection", section 3) + , ("paragraph", section 4) + , ("subparagraph", section 5) + -- beamer slides + , ("frametitle", section 3) + , ("framesubtitle", section 4) + -- letters + , ("opening", (para . trimInlines) <$> (skipopts *> tok)) + , ("closing", skipopts *> closing) + -- + , ("rule", skipopts *> tok *> tok *> pure horizontalRule) + , ("begin", mzero) -- these are here so they won't be interpreted as inline + , ("end", mzero) + , ("item", skipopts *> loose_item) + , ("documentclass", skipopts *> braced *> preamble) + ] ++ map ignoreBlocks + -- these commands will be ignored unless --parse-raw is specified, + -- in which case they will appear as raw latex blocks + [ "newcommand", "renewcommand", "newenvironment", "renewenvironment" + -- newcommand, etc. should be parsed by macro, but we need this + -- here so these aren't parsed as inline commands to ignore + , "special", "pdfannot", "pdfstringdef" + , "bibliography", "bibliographystyle" + , "maketitle", "makeindex", "makeglossary" + , "addcontentsline", "addtocontents", "addtocounter" + -- \ignore{} is used conventionally in literate haskell for definitions + -- that are to be processed by the compiler but not printed. + , "ignore" + , "hyperdef" + , "noindent" + , "markboth", "markright", "markleft" + , "hspace", "vspace" + ] + +addTitle :: Inlines -> LP () +addTitle tit = updateState (\s -> s{ stateTitle = toList tit }) + +addSubtitle :: Inlines -> LP () +addSubtitle tit = updateState (\s -> s{ stateTitle = stateTitle s ++ + toList (str ":" <> linebreak <> tit) }) + +authors :: LP () +authors = try $ do char '{' - string name + let oneAuthor = mconcat <$> + many1 (notFollowedBy' (controlSeq "and") >> inline) + auths <- sepBy oneAuthor (controlSeq "and") char '}' - return name + updateState (\s -> s { stateAuthors = map (normalizeSpaces . toList) auths }) + +addDate :: Inlines -> LP () +addDate dat = updateState (\s -> s{ stateDate = toList dat }) + +section :: Int -> LP Blocks +section lvl = do + hasChapters <- stateHasChapters `fmap` getState + let lvl' = if hasChapters then lvl + 1 else lvl + skipopts + contents <- grouped inline + return $ header lvl' contents + +inlineCommand :: LP Inlines +inlineCommand = try $ do + name <- anyControlSeq + guard $ not $ isBlockCommand name + parseRaw <- stateParseRaw `fmap` getState + star <- option "" (string "*") + let name' = name ++ star + case M.lookup name' inlineCommands of + Just p -> p + Nothing -> case M.lookup name inlineCommands of + Just p -> p + Nothing + | parseRaw -> + (rawInline "latex" . (('\\':name') ++)) <$> + (withRaw (skipopts *> many braced) + >>= applyMacros' . snd) + | otherwise -> return mempty + +isBlockCommand :: String -> Bool +isBlockCommand s = maybe False (const True) $ M.lookup s blockCommands + +inlineCommands :: M.Map String (LP Inlines) +inlineCommands = M.fromList $ + [ ("emph", emph <$> tok) + , ("textit", emph <$> tok) + , ("textsc", smallcaps <$> tok) + , ("sout", strikeout <$> tok) + , ("textsuperscript", superscript <$> tok) + , ("textsubscript", subscript <$> tok) + , ("textbackslash", lit "\\") + , ("backslash", lit "\\") + , ("textbf", strong <$> tok) + , ("ldots", lit "…") + , ("dots", lit "…") + , ("mdots", lit "…") + , ("sim", lit "~") + , ("label", inBrackets <$> tok) + , ("ref", inBrackets <$> tok) + , ("(", mathInline $ manyTill anyChar (try $ string "\\)")) + , ("[", mathDisplay $ manyTill anyChar (try $ string "\\]")) + , ("ensuremath", mathInline $ braced) + , ("P", lit "¶") + , ("S", lit "§") + , ("$", lit "$") + , ("%", lit "%") + , ("&", lit "&") + , ("#", lit "#") + , ("_", lit "_") + , ("{", lit "{") + , ("}", lit "}") + -- old TeX commands + , ("em", emph <$> inlines) + , ("it", emph <$> inlines) + , ("sl", emph <$> inlines) + , ("bf", strong <$> inlines) + , ("rm", inlines) + , ("itshape", emph <$> inlines) + , ("slshape", emph <$> inlines) + , ("scshape", smallcaps <$> inlines) + , ("bfseries", strong <$> inlines) + , ("/", pure mempty) -- italic correction + , ("cc", lit "ç") + , ("cC", lit "Ç") + , ("aa", lit "å") + , ("AA", lit "Å") + , ("ss", lit "ß") + , ("o", lit "ø") + , ("O", lit "Ø") + , ("L", lit "Ł") + , ("l", lit "ł") + , ("ae", lit "æ") + , ("AE", lit "Æ") + , ("pounds", lit "£") + , ("euro", lit "€") + , ("copyright", lit "©") + , ("`", option (str "`") $ try $ tok >>= accent grave) + , ("'", option (str "'") $ try $ tok >>= accent acute) + , ("^", option (str "^") $ try $ tok >>= accent hat) + , ("~", option (str "~") $ try $ tok >>= accent circ) + , ("\"", option (str "\"") $ try $ tok >>= accent umlaut) + , (".", option (str ".") $ try $ tok >>= accent dot) + , ("=", option (str "=") $ try $ tok >>= accent macron) + , ("i", lit "i") + , ("\\", linebreak <$ (optional (bracketed inline) *> optional sp)) + , (",", pure mempty) + , ("@", pure mempty) + , (" ", lit "\160") + , ("ps", pure $ str "PS." <> space) + , ("TeX", lit "TeX") + , ("LaTeX", lit "LaTeX") + , ("bar", lit "|") + , ("textless", lit "<") + , ("textgreater", lit ">") + , ("thanks", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) + , ("footnote", (note . mconcat) <$> (char '{' *> manyTill block (char '}'))) + , ("verb", doverb) + , ("lstinline", doverb) + , ("texttt", (code . stringify . toList) <$> tok) + , ("url", (unescapeURL <$> braced) >>= \url -> + pure (link url "" (codeWith ("",["url"],[]) url))) + , ("href", (unescapeURL <$> braced <* optional sp) >>= \url -> + tok >>= \lab -> + pure (link url "" lab)) + , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= + (\src -> pure (image src "" (str "image")))) + , ("cite", citation "cite" NormalCitation False) + , ("citep", citation "citep" NormalCitation False) + , ("citep*", citation "citep*" NormalCitation False) + , ("citeal", citation "citeal" NormalCitation False) + , ("citealp", citation "citealp" NormalCitation False) + , ("citealp*", citation "citealp*" NormalCitation False) + , ("autocite", citation "autocite" NormalCitation False) + , ("footcite", citation "footcite" NormalCitation False) + , ("parencite", citation "parencite" NormalCitation False) + , ("supercite", citation "supercite" NormalCitation False) + , ("footcitetext", citation "footcitetext" NormalCitation False) + , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) + , ("citeyear", citation "citeyear" SuppressAuthor False) + , ("autocite*", citation "autocite*" SuppressAuthor False) + , ("cite*", citation "cite*" SuppressAuthor False) + , ("parencite*", citation "parencite*" SuppressAuthor False) + , ("textcite", citation "textcite" AuthorInText False) + , ("citet", citation "citet" AuthorInText False) + , ("citet*", citation "citet*" AuthorInText False) + , ("citealt", citation "citealt" AuthorInText False) + , ("citealt*", citation "citealt*" AuthorInText False) + , ("textcites", citation "textcites" AuthorInText True) + , ("cites", citation "cites" NormalCitation True) + , ("autocites", citation "autocites" NormalCitation True) + , ("footcites", citation "footcites" NormalCitation True) + , ("parencites", citation "parencites" NormalCitation True) + , ("supercites", citation "supercites" NormalCitation True) + , ("footcitetexts", citation "footcitetexts" NormalCitation True) + , ("Autocite", citation "Autocite" NormalCitation False) + , ("Footcite", citation "Footcite" NormalCitation False) + , ("Parencite", citation "Parencite" NormalCitation False) + , ("Supercite", citation "Supercite" NormalCitation False) + , ("Footcitetext", citation "Footcitetext" NormalCitation False) + , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) + , ("Citeyear", citation "Citeyear" SuppressAuthor False) + , ("Autocite*", citation "Autocite*" SuppressAuthor False) + , ("Cite*", citation "Cite*" SuppressAuthor False) + , ("Parencite*", citation "Parencite*" SuppressAuthor False) + , ("Textcite", citation "Textcite" AuthorInText False) + , ("Textcites", citation "Textcites" AuthorInText True) + , ("Cites", citation "Cites" NormalCitation True) + , ("Autocites", citation "Autocites" NormalCitation True) + , ("Footcites", citation "Footcites" NormalCitation True) + , ("Parencites", citation "Parencites" NormalCitation True) + , ("Supercites", citation "Supercites" NormalCitation True) + , ("Footcitetexts", citation "Footcitetexts" NormalCitation True) + , ("citetext", complexNatbibCitation NormalCitation) + , ("citeauthor", (try (tok *> optional sp *> controlSeq "citetext") *> + complexNatbibCitation AuthorInText) + <|> citation "citeauthor" AuthorInText False) + ] ++ map ignoreInlines + -- these commands will be ignored unless --parse-raw is specified, + -- in which case they will appear as raw latex blocks: + [ "index", "nocite" ] + +unescapeURL :: String -> String +unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs + where isEscapable '%' = True + isEscapable '#' = True + isEscapable _ = False +unescapeURL (x:xs) = x:unescapeURL xs +unescapeURL [] = "" + +doverb :: LP Inlines +doverb = do + marker <- anyChar + code <$> manyTill (satisfy (/='\n')) (char marker) + +doLHSverb :: LP Inlines +doLHSverb = codeWith ("",["haskell"],[]) <$> manyTill (satisfy (/='\n')) (char '|') + +lit :: String -> LP Inlines +lit = pure . str + +accent :: (Char -> Char) -> Inlines -> LP Inlines +accent f ils = + case toList ils of + (Str (x:xs) : ys) -> return $ fromList $ (Str (f x : xs) : ys) + [] -> mzero + _ -> return ils + +grave :: Char -> Char +grave 'A' = 'À' +grave 'E' = 'È' +grave 'I' = 'Ì' +grave 'O' = 'Ò' +grave 'U' = 'Ù' +grave 'a' = 'à' +grave 'e' = 'è' +grave 'i' = 'ì' +grave 'o' = 'ò' +grave 'u' = 'ù' +grave c = c + +acute :: Char -> Char +acute 'A' = 'Á' +acute 'E' = 'É' +acute 'I' = 'Í' +acute 'O' = 'Ó' +acute 'U' = 'Ú' +acute 'a' = 'á' +acute 'e' = 'é' +acute 'i' = 'í' +acute 'o' = 'ó' +acute 'u' = 'ú' +acute c = c + +hat :: Char -> Char +hat 'A' = 'Â' +hat 'E' = 'Ê' +hat 'I' = 'Î' +hat 'O' = 'Ô' +hat 'U' = 'Û' +hat 'a' = 'ã' +hat 'e' = 'ê' +hat 'i' = 'î' +hat 'o' = 'ô' +hat 'u' = 'û' +hat c = c + +circ :: Char -> Char +circ 'A' = 'Ã' +circ 'O' = 'Õ' +circ 'o' = 'õ' +circ 'N' = 'Ñ' +circ 'n' = 'ñ' +circ c = c + +umlaut :: Char -> Char +umlaut 'A' = 'Ä' +umlaut 'E' = 'Ë' +umlaut 'I' = 'Ï' +umlaut 'O' = 'Ö' +umlaut 'U' = 'Ü' +umlaut 'a' = 'ä' +umlaut 'e' = 'ë' +umlaut 'i' = 'ï' +umlaut 'o' = 'ö' +umlaut 'u' = 'ü' +umlaut c = c + +dot :: Char -> Char +dot 'C' = 'Ċ' +dot 'c' = 'ċ' +dot 'E' = 'Ė' +dot 'e' = 'ė' +dot 'G' = 'Ġ' +dot 'g' = 'ġ' +dot 'I' = 'İ' +dot 'Z' = 'Ż' +dot 'z' = 'ż' +dot c = c + +macron :: Char -> Char +macron 'A' = 'Ā' +macron 'E' = 'Ē' +macron 'I' = 'Ī' +macron 'O' = 'Ō' +macron 'U' = 'Ū' +macron 'a' = 'ā' +macron 'e' = 'ē' +macron 'i' = 'ī' +macron 'o' = 'ō' +macron 'u' = 'ū' +macron c = c + +tok :: LP Inlines +tok = try $ grouped inline <|> inlineCommand <|> str <$> (count 1 $ inlineChar) + +opt :: LP Inlines +opt = bracketed inline <* optional sp + +skipopts :: LP () +skipopts = skipMany opt + +inlineText :: LP Inlines +inlineText = str <$> many1 inlineChar + +inlineChar :: LP Char +inlineChar = satisfy $ \c -> + not (c == '\\' || c == '$' || c == '%' || c == '^' || c == '_' || + c == '&' || c == '~' || c == '#' || c == '{' || c == '}' || + c == '^' || c == '\'' || c == '`' || c == '-' || c == ']' || + c == ' ' || c == '\t' || c == '\n' ) + +environment :: LP Blocks +environment = do + controlSeq "begin" + name <- braced + case M.lookup name environments of + Just p -> p <|> rawEnv name + Nothing -> rawEnv name + +rawEnv :: String -> LP Blocks +rawEnv name = do + let addBegin x = "\\begin{" ++ name ++ "}" ++ x + parseRaw <- stateParseRaw `fmap` getState + if parseRaw + then (rawBlock "latex" . addBegin) <$> + (withRaw (env name blocks) >>= applyMacros' . snd) + else env name blocks + +-- | Replace "include" commands with file contents. +handleIncludes :: String -> IO String +handleIncludes [] = return [] +handleIncludes ('\\':xs) = + case runParser include defaultParserState "input" ('\\':xs) of + Right (fs, rest) -> do let getfile f = catch (UTF8.readFile f) + (\_ -> return "") + yss <- mapM getfile fs + (intercalate "\n" yss ++) `fmap` + handleIncludes rest + _ -> case runParser (verbCmd <|> verbatimEnv) defaultParserState + "input" ('\\':xs) of + Right (r, rest) -> (r ++) `fmap` handleIncludes rest + _ -> ('\\':) `fmap` handleIncludes xs +handleIncludes (x:xs) = (x:) `fmap` handleIncludes xs + +include :: LP ([FilePath], String) +include = do + name <- controlSeq "include" <|> controlSeq "usepackage" + skipopts + fs <- (splitBy (==',')) <$> braced + rest <- getInput + let fs' = if name == "include" + then map (flip replaceExtension ".tex") fs + else map (flip replaceExtension ".sty") fs + return (fs', rest) + +verbCmd :: LP (String, String) +verbCmd = do + (_,r) <- withRaw $ do + controlSeq "verb" + c <- anyChar + manyTill anyChar (char c) + rest <- getInput + return (r, rest) + +verbatimEnv :: LP (String, String) +verbatimEnv = do + (_,r) <- withRaw $ do + controlSeq "begin" + name <- braced + guard $ name == "verbatim" || name == "Verbatim" || + name == "lstlisting" + verbEnv name + rest <- getInput + return (r,rest) --- | Returns a list of block elements containing the contents of an --- environment. -environment :: [Char] -> GenParser Char ParserState [Block] -environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ spaces +rawLaTeXBlock :: GenParser Char ParserState String +rawLaTeXBlock = snd <$> withRaw (environment <|> blockCommand) -anyEnvironment :: GenParser Char ParserState Block -anyEnvironment = try $ do - string "\\begin" - spaces - char '{' - name <- many letter - star <- option "" (string "*") -- some environments have starred variants - char '}' - optional commandArgs +rawLaTeXInline :: GenParser Char ParserState Inline +rawLaTeXInline = do + (res, raw) <- withRaw inlineCommand + if res == mempty + then return (Str "") + else RawInline "latex" <$> (applyMacros' raw) + +environments :: M.Map String (LP Blocks) +environments = M.fromList + [ ("document", env "document" blocks <* skipMany anyChar) + , ("letter", env "letter" letter_contents) + , ("center", env "center" blocks) + , ("tabular", env "tabular" simpTable) + , ("quote", blockQuote <$> env "quote" blocks) + , ("quotation", blockQuote <$> env "quotation" blocks) + , ("verse", blockQuote <$> env "verse" blocks) + , ("itemize", bulletList <$> listenv "itemize" (many item)) + , ("description", definitionList <$> listenv "description" (many descItem)) + , ("enumerate", ordered_list) + , ("code", failUnlessLHS *> + (codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> + verbEnv "code")) + , ("verbatim", codeBlock <$> (verbEnv "verbatim")) + , ("Verbatim", codeBlock <$> (verbEnv "Verbatim")) + , ("lstlisting", codeBlock <$> (verbEnv "listlisting")) + , ("displaymath", mathEnv Nothing "displaymath") + , ("equation", mathEnv Nothing "equation") + , ("equation*", mathEnv Nothing "equation*") + , ("gather", mathEnv (Just "gathered") "gather") + , ("gather*", mathEnv (Just "gathered") "gather*") + , ("multiline", mathEnv (Just "gathered") "multiline") + , ("multiline*", mathEnv (Just "gathered") "multiline*") + , ("eqnarray", mathEnv (Just "aligned*") "eqnarray") + , ("eqnarray*", mathEnv (Just "aligned*") "eqnarray*") + , ("align", mathEnv (Just "aligned*") "align") + , ("align*", mathEnv (Just "aligned*") "align*") + , ("alignat", mathEnv (Just "aligned*") "alignat") + , ("alignat*", mathEnv (Just "aligned*") "alignat*") + ] + +letter_contents :: LP Blocks +letter_contents = do + bs <- blocks + st <- getState + -- add signature (author) and address (title) + let addr = case stateTitle st of + [] -> mempty + x -> para $ trimInlines $ fromList x + updateState $ \s -> s{ stateAuthors = [], stateTitle = [] } + return $ addr <> bs -- sig added by \closing + +closing :: LP Blocks +closing = do + contents <- tok + st <- getState + let sigs = case stateAuthors st of + [] -> mempty + xs -> para $ trimInlines $ fromList + $ intercalate [LineBreak] xs + return $ para (trimInlines contents) <> sigs + +item :: LP Blocks +item = blocks *> controlSeq "item" *> skipopts *> blocks + +loose_item :: LP Blocks +loose_item = do + ctx <- stateParserContext `fmap` getState + if ctx == ListItemState + then mzero + else return mempty + +descItem :: LP (Inlines, [Blocks]) +descItem = do + blocks -- skip blocks before item + controlSeq "item" + optional sp + ils <- opt + bs <- blocks + return (ils, [bs]) + +env :: String -> LP a -> LP a +env name p = p <* (controlSeq "end" *> braced >>= guard . (== name)) + +listenv :: String -> LP a -> LP a +listenv name p = try $ do + oldCtx <- stateParserContext `fmap` getState + updateState $ \st -> st{ stateParserContext = ListItemState } + res <- env name p + updateState $ \st -> st{ stateParserContext = oldCtx } + return res + +mathEnv :: Maybe String -> String -> LP Blocks +mathEnv innerEnv name = para <$> mathDisplay (inner <$> verbEnv name) + where inner x = case innerEnv of + Nothing -> x + Just y -> "\\begin{" ++ y ++ "}\n" ++ x ++ + "\\end{" ++ y ++ "}" + +verbEnv :: String -> LP String +verbEnv name = do + skipopts + optional blankline + let endEnv = try $ controlSeq "end" *> braced >>= guard . (== name) + res <- manyTill anyChar endEnv + return $ stripTrailingNewlines res + +ordered_list :: LP Blocks +ordered_list = do + optional sp + (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ + try $ char '[' *> anyOrderedListMarker <* char ']' spaces - contents <- manyTill block (end (name ++ star)) + optional $ try $ controlSeq "setlength" *> grouped (controlSeq "itemindent") *> braced spaces - return $ BlockQuote contents - --- --- parsing documents --- + start <- option 1 $ try $ do controlSeq "setcounter" + grouped (string "enum" *> many1 (oneOf "iv")) + optional sp + num <- grouped (many1 digit) + spaces + return $ (read num + 1 :: Int) + bs <- listenv "enumerate" (many item) + return $ orderedListWith (start, style, delim) bs + +paragraph :: LP Blocks +paragraph = do + x <- mconcat <$> many1 inline + if x == mempty + then return mempty + else return $ para $ trimInlines x + +preamble :: LP Blocks +preamble = mempty <$> manyTill preambleBlock beginDoc + where beginDoc = lookAhead $ controlSeq "begin" *> string "{document}" + preambleBlock = (mempty <$ comment) + <|> (mempty <$ sp) + <|> (mempty <$ blanklines) + <|> (mempty <$ macro) + <|> blockCommand + <|> (mempty <$ anyControlSeq) + <|> (mempty <$ braced) + <|> (mempty <$ anyChar) + +------- + +-- citations --- | Process LaTeX preamble, extracting metadata. -processLaTeXPreamble :: GenParser Char ParserState () -processLaTeXPreamble = do - try $ string "\\documentclass" - skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] --- | Parse LaTeX and return 'Pandoc'. -parseLaTeX :: GenParser Char ParserState Pandoc -parseLaTeX = do - spaces - skipMany $ comment >> spaces - blocks <- try (processLaTeXPreamble >> environment "document") - <|> (many block >>~ (spaces >> eof)) - state <- getState - let blocks' = filter (/= Null) blocks - let title' = stateTitle state - let authors' = stateAuthors state - let date' = stateDate state - return $ Pandoc (Meta title' authors' date') blocks' - --- --- parsing blocks --- - -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = spaces >> many block - -block :: GenParser Char ParserState Block -block = choice [ hrule - , codeBlock - , header - , list - , blockQuote - , simpleTable - , commentBlock - , macro - , bibliographic - , para - , itemBlock - , unknownEnvironment - , ignore - , unknownCommand - ] <?> "block" - --- --- header blocks --- - -header :: GenParser Char ParserState Block -header = section <|> chapter - -chapter :: GenParser Char ParserState Block -chapter = try $ do - string "\\chapter" - result <- headerWithLevel 1 - updateState $ \s -> s{ stateHasChapters = True } - return result - -section :: GenParser Char ParserState Block -section = try $ do - char '\\' - subs <- many (try (string "sub")) - base <- try (string "section" >> return 1) <|> (string "paragraph" >> return 4) - st <- getState - let lev = if stateHasChapters st - then length subs + base + 1 - else length subs + base - headerWithLevel lev +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + s' = case s of + (Str (c:_):_) + | not (isPunctuation c || isSpace c) -> Str "," : Space : s + _ -> s + in init ks ++ [k {citationSuffix = citationSuffix k ++ s'}] +addSuffix _ _ = [] -headerWithLevel :: Int -> GenParser Char ParserState Block -headerWithLevel lev = try $ do - spaces - optional (char '*') - spaces - optional $ bracketedText '[' ']' -- alt title - spaces +simpleCiteArgs :: LP [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt char '{' - title' <- manyTill inline (char '}') - spaces - return $ Header lev (normalizeSpaces title') - --- --- hrule block --- + keys <- manyTill citationLabel (char '}') + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys -hrule :: GenParser Char st Block -hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", - "\\newpage" ] >> spaces >> return HorizontalRule +citationLabel :: LP String +citationLabel = trim <$> + (many1 (satisfy $ \c -> c /=',' && c /='}') <* optional (char ',') <* optional sp) --- tables +cites :: CitationMode -> Bool -> LP [Citation] +cites mode multi = try $ do + cits <- if multi + then many1 simpleCiteArgs + else count 1 simpleCiteArgs + let (c:cs) = concat cits + return $ case mode of + AuthorInText -> c {citationMode = mode} : cs + _ -> map (\a -> a {citationMode = mode}) (c:cs) -simpleTable :: GenParser Char ParserState Block -simpleTable = try $ do - string "\\begin" - spaces - string "{tabular}" - spaces - aligns <- parseAligns - let cols = length aligns - optional hline - header' <- option [] $ parseTableHeader cols - rows <- many (parseTableRow cols >>~ optional hline) - spaces - end "tabular" - spaces - let header'' = if null header' - then replicate cols [] - else header' - return $ Table [] aligns (replicate cols 0) header'' rows +citation :: String -> CitationMode -> Bool -> LP Inlines +citation name mode multi = do + (c,raw) <- withRaw $ cites mode multi + return $ cite c (rawInline "latex" $ "\\" ++ name ++ raw) + +complexNatbibCitation :: CitationMode -> LP Inlines +complexNatbibCitation mode = try $ do + let ils = (toList . trimInlines . mconcat) <$> + many (notFollowedBy (oneOf "\\};") >> inline) + let parseOne = try $ do + skipSpaces + pref <- ils + cit' <- inline -- expect a citation + let citlist = toList cit' + cits' <- case citlist of + [Cite cs _] -> return cs + _ -> mzero + suff <- ils + skipSpaces + optional $ char ';' + return $ addPrefix pref $ addSuffix suff $ cits' + (c:cits, raw) <- withRaw $ grouped parseOne + return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" ++ raw) -hline :: GenParser Char st () -hline = try $ spaces >> string "\\hline" >> return () +-- tables -parseAligns :: GenParser Char ParserState [Alignment] +parseAligns :: LP [Alignment] parseAligns = try $ do char '{' optional $ char '|' let cAlign = char 'c' >> return AlignCenter let lAlign = char 'l' >> return AlignLeft let rAlign = char 'r' >> return AlignRight - let alignChar = cAlign <|> lAlign <|> rAlign + let alignChar = optional sp *> (cAlign <|> lAlign <|> rAlign) aligns' <- sepEndBy alignChar (optional $ char '|') + spaces char '}' spaces return aligns' -parseTableHeader :: Int -- ^ number of columns - -> GenParser Char ParserState [TableCell] -parseTableHeader cols = try $ do - cells' <- parseTableRow cols - hline - return cells' +hline :: LP () +hline = () <$ (try $ spaces >> controlSeq "hline") parseTableRow :: Int -- ^ number of columns - -> GenParser Char ParserState [TableCell] + -> LP [Blocks] parseTableRow cols = try $ do - let tableCellInline = notFollowedBy (char '&' <|> - (try $ char '\\' >> char '\\')) >> inline - cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces) - (many tableCellInline)) (char '&') + let amp = try $ spaces *> string "&" + let tableCellInline = notFollowedBy (amp <|> controlSeq "\\") >> inline + cells' <- sepBy ((plain . trimInlines . mconcat) <$> many tableCellInline) amp guard $ length cells' == cols spaces - (try $ string "\\\\" >> spaces) <|> - (lookAhead (end "tabular") >> return ()) - return cells' - --- --- code blocks --- - -codeBlock :: GenParser Char ParserState Block -codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock --- Note: Verbatim is from fancyvrb. - -codeBlockWith :: String -> GenParser Char st Block -codeBlockWith env = try $ do - string "\\begin" - spaces -- don't use begin function because it - string $ "{" ++ env ++ "}" -- gobbles whitespace; we want to gobble - optional blanklines -- blank lines, but not leading space - contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}")) - spaces - let classes = if env == "code" then ["haskell"] else [] - return $ CodeBlock ("",classes,[]) (stripTrailingNewlines contents) - -lhsCodeBlock :: GenParser Char ParserState Block -lhsCodeBlock = do - failUnlessLHS - (CodeBlock (_,_,_) cont) <- codeBlockWith "code" - return $ CodeBlock ("", ["sourceCode","literate","haskell"], []) cont - --- --- block quotes --- - -blockQuote :: GenParser Char ParserState Block -blockQuote = (environment "quote" <|> environment "quotation") >>~ spaces >>= - return . BlockQuote - --- --- list blocks --- - -list :: GenParser Char ParserState Block -list = bulletList <|> orderedList <|> definitionList <?> "list" - -listItem :: GenParser Char ParserState ([Inline], [Block]) -listItem = try $ do - ("item", _, args) <- command - spaces - state <- getState - let oldParserContext = stateParserContext state - updateState (\s -> s {stateParserContext = ListItemState}) - blocks <- many block - updateState (\s -> s {stateParserContext = oldParserContext}) - opt <- case args of - ([x]) | "[" `isPrefixOf` x && "]" `isSuffixOf` x -> - parseFromString (many inline) $ tail $ init x - _ -> return [] - return (opt, blocks) - -orderedList :: GenParser Char ParserState Block -orderedList = try $ do - string "\\begin" - spaces - string "{enumerate}" - spaces - (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ - try $ do failIfStrict - char '[' - res <- anyOrderedListMarker - char ']' - return res - spaces - option "" $ try $ do string "\\setlength{\\itemindent}" - char '{' - manyTill anyChar (char '}') - spaces - start <- option 1 $ try $ do failIfStrict - string "\\setcounter{enum" - many1 (oneOf "iv") - string "}{" - num <- many1 digit - char '}' - spaces - return $ (read num) + 1 - items <- many listItem - end "enumerate" - spaces - return $ OrderedList (start, style, delim) $ map snd items - -bulletList :: GenParser Char ParserState Block -bulletList = try $ do - begin "itemize" - items <- many listItem - end "itemize" - spaces - return (BulletList $ map snd items) - -definitionList :: GenParser Char ParserState Block -definitionList = try $ do - begin "description" - items <- many listItem - end "description" - spaces - return $ DefinitionList $ map (\(t,d) -> (t,[d])) items - --- --- paragraph block --- - -para :: GenParser Char ParserState Block -para = do - res <- many1 inline - spaces - return $ if null (filter (`notElem` [Str "", Space]) res) - then Null - else Para $ normalizeSpaces res - --- --- title authors date --- - -bibliographic :: GenParser Char ParserState Block -bibliographic = choice [ maketitle, title, subtitle, authors, date ] - -maketitle :: GenParser Char st Block -maketitle = try (string "\\maketitle") >> spaces >> return Null - -title :: GenParser Char ParserState Block -title = try $ do - string "\\title{" - tit <- manyTill inline (char '}') - spaces - updateState (\state -> state { stateTitle = tit }) - return Null - -subtitle :: GenParser Char ParserState Block -subtitle = try $ do - string "\\subtitle{" - tit <- manyTill inline (char '}') - spaces - updateState (\state -> state { stateTitle = stateTitle state ++ - Str ":" : LineBreak : tit }) - return Null - -authors :: GenParser Char ParserState Block -authors = try $ do - string "\\author{" - let andsep = try $ string "\\and" >> notFollowedBy letter >> - spaces >> return '&' - raw <- sepBy (many $ notFollowedBy (char '}' <|> andsep) >> inline) andsep - let authors' = map normalizeSpaces raw - char '}' + optional $ controlSeq "\\" spaces - updateState (\s -> s { stateAuthors = authors' }) - return Null - -date :: GenParser Char ParserState Block -date = try $ do - string "\\date{" - date' <- manyTill inline (char '}') - spaces - updateState (\state -> state { stateDate = normalizeSpaces date' }) - return Null - --- --- item block --- for use in unknown environments that aren't being parsed as raw latex --- - --- this forces items to be parsed in different blocks -itemBlock :: GenParser Char ParserState Block -itemBlock = try $ do - ("item", _, args) <- command - state <- getState - if stateParserContext state == ListItemState - then fail "item should be handled by list block" - else if null args - then return Null - else return $ Plain [Str (stripFirstAndLast (head args))] - --- --- raw LaTeX --- - --- | Parse any LaTeX environment and return a Para block containing --- the whole literal environment as raw TeX. -rawLaTeXEnvironment :: GenParser Char st Block -rawLaTeXEnvironment = do - contents <- rawLaTeXEnvironment' - spaces - return $ RawBlock "latex" contents + return cells' --- | Parse any LaTeX environment and return a string containing --- the whole literal environment as raw TeX. -rawLaTeXEnvironment' :: GenParser Char st String -rawLaTeXEnvironment' = try $ do - string "\\begin" - spaces - char '{' - name <- many1 letter - star <- option "" (string "*") -- for starred variants - let name' = name ++ star - char '}' - args <- option [] commandArgs - let argStr = concat args - contents <- manyTill (choice [ (many1 (noneOf "\\")), - rawLaTeXEnvironment', - string "\\" ]) - (end name') - return $ "\\begin{" ++ name' ++ "}" ++ argStr ++ - concat contents ++ "\\end{" ++ name' ++ "}" - -unknownEnvironment :: GenParser Char ParserState Block -unknownEnvironment = try $ do - state <- getState - result <- if stateParseRaw state -- check whether we should include raw TeX - then rawLaTeXEnvironment -- if so, get whole raw environment - else anyEnvironment -- otherwise just the contents - return result - --- \ignore{} is used conventionally in literate haskell for definitions --- that are to be processed by the compiler but not printed. -ignore :: GenParser Char ParserState Block -ignore = try $ do - ("ignore", _, _) <- command - spaces - return Null - -demacro :: (String, String, [String]) -> GenParser Char ParserState Inline -demacro (n,st,args) = try $ do - let raw = "\\" ++ n ++ st ++ concat args - s' <- applyMacros' raw - if raw == s' - then return $ RawInline "latex" raw - else do - inp <- getInput - setInput $ s' ++ inp - return $ Str "" - -unknownCommand :: GenParser Char ParserState Block -unknownCommand = try $ do - spaces - notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"] - state <- getState - when (stateParserContext state == ListItemState) $ - notFollowedBy' (string "\\item") - if stateParseRaw state - then command >>= demacro >>= return . Plain . (:[]) - else do - (name, _, args) <- command - spaces - unless (name `elem` commandsToIgnore) $ do - -- put arguments back in input to be parsed - inp <- getInput - setInput $ intercalate " " args ++ inp - return Null - -commandsToIgnore :: [String] -commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"] - -skipChar :: GenParser Char ParserState Block -skipChar = do - satisfy (/='\\') <|> - (notFollowedBy' (try $ - string "\\begin" >> spaces >> string "{document}") >> - anyChar) +simpTable :: LP Blocks +simpTable = try $ do spaces - return Null - -commentBlock :: GenParser Char st Block -commentBlock = many1 (comment >> spaces) >> return Null - --- --- inline --- - -inline :: GenParser Char ParserState Inline -inline = choice [ str - , endline - , whitespace - , quoted - , apostrophe - , strong - , math - , ellipses - , emDash - , enDash - , hyphen - , emph - , strikeout - , superscript - , subscript - , code - , url - , link - , image - , footnote - , linebreak - , accentedChar - , nonbreakingSpace - , cite - , specialChar - , ensureMath - , rawLaTeXInline' - , escapedChar - , unescapedChar - , comment - ] <?> "inline" - - --- latex comment -comment :: GenParser Char st Inline -comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "") - -accentedChar :: GenParser Char st Inline -accentedChar = normalAccentedChar <|> specialAccentedChar - -normalAccentedChar :: GenParser Char st Inline -normalAccentedChar = try $ do - char '\\' - accent <- oneOf "'`^\"~" - character <- (try $ char '{' >> letter >>~ char '}') <|> letter - let table = fromMaybe [] $ lookup character accentTable - let result = case lookup accent table of - Just num -> chr num - Nothing -> '?' - return $ Str [result] - --- an association list of letters and association list of accents --- and decimal character numbers. -accentTable :: [(Char, [(Char, Int)])] -accentTable = - [ ('A', [('`', 192), ('\'', 193), ('^', 194), ('~', 195), ('"', 196)]), - ('E', [('`', 200), ('\'', 201), ('^', 202), ('"', 203)]), - ('I', [('`', 204), ('\'', 205), ('^', 206), ('"', 207)]), - ('N', [('~', 209)]), - ('O', [('`', 210), ('\'', 211), ('^', 212), ('~', 213), ('"', 214)]), - ('U', [('`', 217), ('\'', 218), ('^', 219), ('"', 220)]), - ('a', [('`', 224), ('\'', 225), ('^', 227), ('"', 228)]), - ('e', [('`', 232), ('\'', 233), ('^', 234), ('"', 235)]), - ('i', [('`', 236), ('\'', 237), ('^', 238), ('"', 239)]), - ('n', [('~', 241)]), - ('o', [('`', 242), ('\'', 243), ('^', 244), ('~', 245), ('"', 246)]), - ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] - -specialAccentedChar :: GenParser Char st Inline -specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash, - oslash, pound, euro, copyright, sect ] - -ccedil :: GenParser Char st Inline -ccedil = try $ do - char '\\' - letter' <- oneOfStrings ["cc", "cC"] - let num = if letter' == "cc" then 231 else 199 - return $ Str [chr num] - -aring :: GenParser Char st Inline -aring = try $ do - char '\\' - letter' <- oneOfStrings ["aa", "AA"] - let num = if letter' == "aa" then 229 else 197 - return $ Str [chr num] - -iuml :: GenParser Char st Inline -iuml = try (string "\\\"") >> oneOfStrings ["\\i", "{\\i}"] >> - return (Str [chr 239]) - -szlig :: GenParser Char st Inline -szlig = try (string "\\ss") >> return (Str [chr 223]) - -oslash :: GenParser Char st Inline -oslash = try $ do - char '\\' - letter' <- choice [char 'o', char 'O'] - let num = if letter' == 'o' then 248 else 216 - return $ Str [chr num] - -lslash :: GenParser Char st Inline -lslash = try $ do - cmd <- oneOfStrings ["{\\L}","{\\l}","\\L ","\\l "] - return $ if 'l' `elem` cmd - then Str "\x142" - else Str "\x141" - -aelig :: GenParser Char st Inline -aelig = try $ do - char '\\' - letter' <- oneOfStrings ["ae", "AE"] - let num = if letter' == "ae" then 230 else 198 - return $ Str [chr num] - -pound :: GenParser Char st Inline -pound = try (string "\\pounds") >> return (Str [chr 163]) - -euro :: GenParser Char st Inline -euro = try (string "\\euro") >> return (Str [chr 8364]) - -copyright :: GenParser Char st Inline -copyright = try (string "\\copyright") >> return (Str [chr 169]) - -sect :: GenParser Char st Inline -sect = try (string "\\S") >> return (Str [chr 167]) - -escapedChar :: GenParser Char st Inline -escapedChar = do - result <- escaped (oneOf specialChars) - return $ if result == Str "\n" then Str " " else result - --- nonescaped special characters -unescapedChar :: GenParser Char st Inline -unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c]) - -specialChar :: GenParser Char st Inline -specialChar = choice [ spacer, interwordSpace, - backslash, tilde, caret, - bar, lt, gt, doubleQuote ] - -spacer :: GenParser Char st Inline -spacer = try (string "\\,") >> return (Str "") - -interwordSpace :: GenParser Char st Inline -interwordSpace = try (string "\\ ") >> return (Str "\160") - -backslash :: GenParser Char st Inline -backslash = try (string "\\textbackslash") >> optional (try $ string "{}") >> return (Str "\\") - -tilde :: GenParser Char st Inline -tilde = try (string "\\ensuremath{\\sim}") >> return (Str "~") - -caret :: GenParser Char st Inline -caret = try (string "\\^{}") >> return (Str "^") - -bar :: GenParser Char st Inline -bar = try (string "\\textbar") >> optional (try $ string "{}") >> return (Str "\\") - -lt :: GenParser Char st Inline -lt = try (string "\\textless") >> optional (try $ string "{}") >> return (Str "<") - -gt :: GenParser Char st Inline -gt = try (string "\\textgreater") >> optional (try $ string "{}") >> return (Str ">") - -doubleQuote :: GenParser Char st Inline -doubleQuote = char '"' >> return (Str "\"") - -code :: GenParser Char ParserState Inline -code = code1 <|> code2 <|> code3 <|> lhsInlineCode - -code1 :: GenParser Char st Inline -code1 = try $ do - string "\\verb" - marker <- anyChar - result <- manyTill anyChar (char marker) - return $ Code nullAttr $ removeLeadingTrailingSpace result - -code2 :: GenParser Char st Inline -code2 = try $ do - string "\\texttt{" - result <- manyTill (noneOf "\\\n~$%^&{}") (char '}') - return $ Code nullAttr result - -code3 :: GenParser Char st Inline -code3 = try $ do - string "\\lstinline" - marker <- anyChar - result <- manyTill anyChar (char marker) - return $ Code nullAttr $ removeLeadingTrailingSpace result - -lhsInlineCode :: GenParser Char ParserState Inline -lhsInlineCode = try $ do - failUnlessLHS - char '|' - result <- manyTill (noneOf "|\n") (char '|') - return $ Code ("",["haskell"],[]) result - -emph :: GenParser Char ParserState Inline -emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> - manyTill inline (char '}') >>= return . Emph - -strikeout :: GenParser Char ParserState Inline -strikeout = try $ string "\\sout{" >> manyTill inline (char '}') >>= - return . Strikeout - -superscript :: GenParser Char ParserState Inline -superscript = try $ string "\\textsuperscript{" >> - manyTill inline (char '}') >>= return . Superscript - --- note: \textsubscript isn't a standard latex command, but we use --- a defined version in pandoc. -subscript :: GenParser Char ParserState Inline -subscript = try $ string "\\textsubscript{" >> manyTill inline (char '}') >>= - return . Subscript - -apostrophe :: GenParser Char ParserState Inline -apostrophe = char '\'' >> return Apostrophe - -quoted :: GenParser Char ParserState Inline -quoted = doubleQuoted <|> singleQuoted - -singleQuoted :: GenParser Char ParserState Inline -singleQuoted = enclosed singleQuoteStart singleQuoteEnd inline >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = enclosed doubleQuoteStart doubleQuoteEnd inline >>= - return . Quoted DoubleQuote . normalizeSpaces - -singleQuoteStart :: GenParser Char st Char -singleQuoteStart = char '`' - -singleQuoteEnd :: GenParser Char st () -singleQuoteEnd = try $ char '\'' >> notFollowedBy alphaNum - -doubleQuoteStart :: CharParser st String -doubleQuoteStart = string "``" - -doubleQuoteEnd :: CharParser st String -doubleQuoteEnd = try $ string "''" - -ellipses :: GenParser Char st Inline -ellipses = try $ do - char '\\' - optional $ char 'l' - string "dots" - optional $ try $ string "{}" - return Ellipses - -enDash :: GenParser Char st Inline -enDash = try (string "--") >> return EnDash - -emDash :: GenParser Char st Inline -emDash = try (string "---") >> return EmDash - -hyphen :: GenParser Char st Inline -hyphen = char '-' >> return (Str "-") - -strong :: GenParser Char ParserState Inline -strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= - return . Strong - -whitespace :: GenParser Char st Inline -whitespace = many1 (oneOf " \t") >> return Space - -nonbreakingSpace :: GenParser Char st Inline -nonbreakingSpace = char '~' >> return (Str "\160") - --- hard line break -linebreak :: GenParser Char st Inline -linebreak = try $ do - string "\\\\" - optional $ bracketedText '[' ']' -- e.g. \\[10pt] + aligns <- parseAligns + let cols = length aligns + optional hline + header' <- option [] $ try (parseTableRow cols <* hline) + rows <- many (parseTableRow cols <* optional hline) spaces - return LineBreak - -str :: GenParser Char st Inline -str = many1 (noneOf specialChars) >>= return . Str - --- endline internal to paragraph -endline :: GenParser Char st Inline -endline = try $ newline >> notFollowedBy blankline >> return Space - --- math -math :: GenParser Char ParserState Inline -math = (math3 >>= applyMacros' >>= return . Math DisplayMath) - <|> (math1 >>= applyMacros' >>= return . Math InlineMath) - <|> (math2 >>= applyMacros' >>= return . Math InlineMath) - <|> (math4 >>= applyMacros' >>= return . Math DisplayMath) - <|> (math5 >>= applyMacros' >>= return . Math DisplayMath) - <|> (math6 >>= applyMacros' >>= return . Math DisplayMath) - <?> "math" - -math1 :: GenParser Char st String -math1 = try $ char '$' >> manyTill anyChar (char '$') - -math2 :: GenParser Char st String -math2 = try $ string "\\(" >> manyTill anyChar (try $ string "\\)") - -math3 :: GenParser Char st String -math3 = try $ char '$' >> math1 >>~ char '$' - -math4 :: GenParser Char st String -math4 = try $ do - name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|> - begin "gather" <|> begin "gather*" <|> begin "gathered" <|> - begin "multline" <|> begin "multline*" - manyTill anyChar (end name) - -math5 :: GenParser Char st String -math5 = try $ (string "\\[") >> spaces >> manyTill anyChar (try $ string "\\]") - -math6 :: GenParser Char st String -math6 = try $ do - name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|> - begin "align*" <|> begin "alignat" <|> begin "alignat*" <|> - begin "split" <|> begin "aligned" <|> begin "alignedat" - res <- manyTill anyChar (end name) - return $ filter (/= '&') res -- remove alignment codes - -ensureMath :: GenParser Char st Inline -ensureMath = try $ do - (n, _, args) <- command - guard $ n == "ensuremath" && not (null args) - return $ Math InlineMath $ tail $ init $ head args - --- --- links and images --- - -url :: GenParser Char ParserState Inline -url = try $ do - string "\\url" - url' <- charsInBalanced '{' '}' - return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "") - -link :: GenParser Char ParserState Inline -link = try $ do - string "\\href{" - url' <- manyTill anyChar (char '}') - char '{' - label' <- manyTill inline (char '}') - return $ Link (normalizeSpaces label') (escapeURI url', "") - -image :: GenParser Char ParserState Inline -image = try $ do - ("includegraphics", _, args) <- command - let args' = filter isArg args -- filter out options - let (src,tit) = case args' of - [] -> ("", "") - (x:_) -> (stripFirstAndLast x, "") - return $ Image [Str "image"] (escapeURI src, tit) - -footnote :: GenParser Char ParserState Inline -footnote = try $ do - (name, _, (contents:[])) <- command - if ((name == "footnote") || (name == "thanks")) - then string "" - else fail "not a footnote or thanks command" - let contents' = stripFirstAndLast contents - -- parse the extracted block, which may contain various block elements: - rest <- getInput - setInput $ contents' - blocks <- parseBlocks - setInput rest - return $ Note blocks - --- | citations -cite :: GenParser Char ParserState Inline -cite = simpleCite <|> complexNatbibCites - -simpleCiteArgs :: GenParser Char ParserState [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ (char '[') >> manyTill inline (char ']') - second <- optionMaybe $ (char '[') >> manyTill inline (char ']') - char '{' - keys <- many1Till citationLabel (char '}') - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> ([], s ) - (Just s , Just t ) -> (s , t ) - _ -> ([], []) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys - - -simpleCite :: GenParser Char ParserState Inline -simpleCite = try $ do - char '\\' - let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]] - ++ ["footcitetext"] - normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]] - ++ biblatex - supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"] - intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]] - mintext = ["textcites"] - mnormal = map (++ "s") biblatex - cmdend = notFollowedBy (letter <|> char '*') - capit [] = [] - capit (x:xs) = toUpper x : xs - addUpper xs = xs ++ map capit xs - toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t - (mode, multi) <- toparser normal (NormalCitation, False) - <|> toparser supress (SuppressAuthor, False) - <|> toparser intext (AuthorInText , False) - <|> toparser mnormal (NormalCitation, True ) - <|> toparser mintext (AuthorInText , True ) - cits <- if multi then - many1 simpleCiteArgs - else - simpleCiteArgs >>= \c -> return [c] - let (c:cs) = concat cits - cits' = case mode of - AuthorInText -> c {citationMode = mode} : cs - _ -> map (\a -> a {citationMode = mode}) (c:cs) - return $ Cite cits' [] - -complexNatbibCites :: GenParser Char ParserState Inline -complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical - -complexNatbibTextual :: GenParser Char ParserState Inline -complexNatbibTextual = try $ do - string "\\citeauthor{" - manyTill (noneOf "}") (char '}') - skipSpaces - Cite (c:cs) _ <- complexNatbibParenthetical - return $ Cite (c {citationMode = AuthorInText} : cs) [] - - -complexNatbibParenthetical :: GenParser Char ParserState Inline -complexNatbibParenthetical = try $ do - string "\\citetext{" - cits <- many1Till parseOne (char '}') - return $ Cite (concat cits) [] - where - parseOne = do - skipSpaces - pref <- many (notFollowedBy (oneOf "\\}") >> inline) - (Cite cites _) <- simpleCite - suff <- many (notFollowedBy (oneOf "\\};") >> inline) - skipSpaces - optional $ char ';' - return $ addPrefix pref $ addSuffix suff $ cites - -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] - -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] - -citationLabel :: GenParser Char ParserState String -citationLabel = do - res <- many1 $ noneOf ",}" - optional $ char ',' - return $ removeLeadingTrailingSpace res - --- | Parse any LaTeX inline command and return it in a raw TeX inline element. -rawLaTeXInline' :: GenParser Char ParserState Inline -rawLaTeXInline' = do - notFollowedBy' $ oneOfStrings ["\\begin", "\\end", "\\item", "\\ignore", - "\\section"] - rawLaTeXInline + let header'' = if null header' + then replicate cols mempty + else header' + lookAhead $ controlSeq "end" -- make sure we're at end + return $ table mempty (zip aligns (repeat 0)) header'' rows --- | Parse any LaTeX command and return it in a raw TeX inline element. -rawLaTeXInline :: GenParser Char ParserState Inline -rawLaTeXInline = try $ do - state <- getState - if stateParseRaw state - then command >>= demacro - else do - (name,st,args) <- command - x <- demacro (name,st,args) - unless (x == Str "" || name `elem` commandsToIgnore) $ do - inp <- getInput - setInput $ intercalate " " args ++ inp - return $ Str "" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 53531dc1a..666265935 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -39,12 +39,12 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Parsing -import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) +import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, isTextTag, isCommentTag ) -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) +import Text.Pandoc.XML ( fromEntities ) import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, guard) +import Control.Monad (when, liftM, guard, mzero) import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) @@ -108,6 +108,11 @@ atMostSpaces :: Int -> GenParser Char ParserState () atMostSpaces 0 = notFollowedBy (char ' ') atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return () +litChar :: GenParser Char ParserState Char +litChar = escapedChar' + <|> noneOf "\n" + <|> (newline >> notFollowedBy blankline >> return ' ') + -- | Fail unless we're at beginning of a line. failUnlessBeginningOfLine :: GenParser tok st () failUnlessBeginningOfLine = do @@ -212,16 +217,15 @@ referenceKey = try $ do lab <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let nl = char '\n' >> notFollowedBy blankline >> return ' ' let sourceURL = liftM unwords $ many $ try $ do notFollowedBy' referenceTitle skipMany spaceChar - optional nl + optional $ newline >> notFollowedBy blankline skipMany spaceChar notFollowedBy' reference - many1 (satisfy $ not . isBlank) + many1 $ escapedChar' <|> satisfy (not . isBlank) let betweenAngles = try $ char '<' >> - manyTill (noneOf ">\n" <|> nl) (char '>') + manyTill (escapedChar' <|> litChar) (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle blanklines @@ -233,14 +237,14 @@ referenceKey = try $ do -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' -referenceTitle :: GenParser Char st String -referenceTitle = try $ do +referenceTitle :: GenParser Char ParserState String +referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces - tit <- (charsInBalanced '(' ')' >>= return . unwords . words) + tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words) <|> do delim <- char '\'' <|> char '"' - manyTill anyChar (try (char delim >> skipSpaces >> + manyTill litChar (try (char delim >> skipSpaces >> notFollowedBy (noneOf ")\n"))) - return $ decodeCharacterReferences tit + return $ fromEntities tit noteMarker :: GenParser Char ParserState [Char] noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') @@ -367,32 +371,37 @@ hrule = try $ do indentedLine :: GenParser Char ParserState [Char] indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") -codeBlockDelimiter :: Maybe Int - -> GenParser Char st (Int, ([Char], [[Char]], [([Char], [Char])])) -codeBlockDelimiter len = try $ do +blockDelimiter :: (Char -> Bool) + -> Maybe Int + -> GenParser Char st (Int, (String, [String], [(String, String)]), Char) +blockDelimiter f len = try $ do + c <- lookAhead (satisfy f) size <- case len of - Just l -> count l (char '~') >> many (char '~') >> return l - Nothing -> count 3 (char '~') >> many (char '~') >>= - return . (+ 3) . length + Just l -> count l (char c) >> many (char c) >> return l + Nothing -> count 3 (char c) >> many (char c) >>= + return . (+ 3) . length many spaceChar - attr <- option ([],[],[]) attributes + attr <- option ([],[],[]) + $ attributes -- ~~~ {.ruby} + <|> (many1 alphaNum >>= \x -> return ([],[x],[])) -- github variant ```ruby blankline - return (size, attr) + return (size, attr, c) attributes :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) attributes = try $ do char '{' - many spaceChar - attrs <- many (attribute >>~ many spaceChar) + spnl + attrs <- many (attribute >>~ spnl) char '}' let (ids, classes, keyvals) = unzip3 attrs - let id' = if null ids then "" else head ids - return (id', concat classes, concat keyvals) + let firstNonNull [] = "" + firstNonNull (x:xs) | not (null x) = x + | otherwise = firstNonNull xs + return (firstNonNull $ reverse ids, concat classes, concat keyvals) attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) attribute = identifierAttr <|> classAttr <|> keyValAttr - identifier :: GenParser Char st [Char] identifier = do first <- letter @@ -415,14 +424,15 @@ keyValAttr :: GenParser Char st ([Char], [a], [([Char], [Char])]) keyValAttr = try $ do key <- identifier char '=' - char '"' - val <- manyTill (satisfy (/='\n')) (char '"') + val <- enclosed (char '"') (char '"') anyChar + <|> enclosed (char '\'') (char '\'') anyChar + <|> many nonspaceChar return ("",[],[(key,val)]) codeBlockDelimited :: GenParser Char st Block codeBlockDelimited = try $ do - (size, attr) <- codeBlockDelimiter Nothing - contents <- manyTill anyLine (codeBlockDelimiter (Just size)) + (size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing + contents <- manyTill anyLine (blockDelimiter (== c) (Just size)) blanklines return $ CodeBlock attr $ intercalate "\n" contents @@ -552,9 +562,9 @@ listLine = try $ do return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: GenParser Char ParserState [Char] -rawListItem = try $ do - listStart +rawListItem :: GenParser Char ParserState a -> GenParser Char ParserState [Char] +rawListItem start = try $ do + start result <- many1 listLine blanks <- many blankline return $ concat result ++ blanks @@ -577,9 +587,9 @@ listContinuationLine = try $ do result <- manyTill anyChar newline return $ result ++ "\n" -listItem :: GenParser Char ParserState [Block] -listItem = try $ do - first <- rawListItem +listItem :: GenParser Char ParserState a -> GenParser Char ParserState [Block] +listItem start = try $ do + first <- rawListItem start continuations <- many listContinuation -- parsing with ListItemState forces markers at beginning of lines to -- count as list item markers, even if not separated by blank space. @@ -596,13 +606,15 @@ listItem = try $ do orderedList :: GenParser Char ParserState Block orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - items <- many1 listItem + items <- many1 $ listItem $ try $ + do optional newline -- if preceded by a Plain block in a list context + skipNonindentSpaces + orderedListMarker style delim return $ OrderedList (start, style, delim) $ compactify items bulletList :: GenParser Char ParserState Block -bulletList = try $ do - lookAhead bulletListStart - many1 listItem >>= return . BulletList . compactify +bulletList = + many1 (listItem bulletListStart) >>= return . BulletList . compactify -- definition lists @@ -718,8 +730,8 @@ rawVerbatimBlock = try $ do rawTeXBlock :: GenParser Char ParserState Block rawTeXBlock = do failIfStrict - result <- liftM (RawBlock "latex") rawLaTeXEnvironment' - <|> liftM (RawBlock "context") rawConTeXtEnvironment' + result <- liftM (RawBlock "latex") rawLaTeXBlock + <|> liftM (RawBlock "context") rawConTeXtEnvironment spaces return result @@ -767,7 +779,7 @@ simpleTableHeader headless = try $ do let (lengths, lines') = unzip dashes let indices = scanl (+) (length initSp) lines' -- If no header, calculate alignment on basis of first row of text - rawHeads <- liftM (tail . splitByIndices (init indices)) $ + rawHeads <- liftM (tail . splitStringByIndices (init indices)) $ if headless then lookAhead anyLine else return rawContent @@ -794,7 +806,7 @@ rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline return $ map removeLeadingTrailingSpace $ tail $ - splitByIndices (init indices) line + splitStringByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] @@ -844,7 +856,7 @@ multilineTableHeader :: Bool -- ^ Headerless table multilineTableHeader headless = try $ do if headless then return '\n' - else tableSep + else tableSep >>~ notFollowedBy blankline rawContent <- if headless then return $ repeat "" else many1 @@ -856,9 +868,9 @@ multilineTableHeader headless = try $ do let indices = scanl (+) (length initSp) lines' rawHeadsList <- if headless then liftM (map (:[]) . tail . - splitByIndices (init indices)) $ lookAhead anyLine + splitStringByIndices (init indices)) $ lookAhead anyLine else return $ transpose $ map - (\ln -> tail $ splitByIndices (init indices) ln) + (\ln -> tail $ splitStringByIndices (init indices) ln) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless @@ -922,30 +934,25 @@ inlineParsers = [ whitespace , inlineNote -- after superscript because of ^[link](/foo)^ , autoLink , rawHtmlInline - , rawLaTeXInline' , escapedChar + , rawLaTeXInline' , exampleRef , smartPunctuation inline , charRef , symbol , ltSign ] -inlineNonLink :: GenParser Char ParserState Inline -inlineNonLink = (choice $ - map (\parser -> try (parser >>= failIfLink)) inlineParsers) - <?> "inline (non-link)" - -failIfLink :: Inline -> GenParser tok st Inline -failIfLink (Link _ _) = pzero -failIfLink elt = return elt - -escapedChar :: GenParser Char ParserState Inline -escapedChar = try $ do +escapedChar' :: GenParser Char ParserState Char +escapedChar' = try $ do char '\\' state <- getState - result <- if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) + if stateStrict state + then oneOf "\\`*_{}[]()>#+-.!~" + else satisfy (not . isAlphaNum) + +escapedChar :: GenParser Char ParserState Inline +escapedChar = do + result <- escapedChar' return $ case result of ' ' -> Str "\160" -- "\ " is a nonbreaking space '\n' -> LineBreak -- "\[newline]" is a linebreak @@ -971,8 +978,7 @@ symbol :: GenParser Char ParserState Inline symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' - notFollowedBy' $ rawLaTeXEnvironment' - <|> rawConTeXtEnvironment' + notFollowedBy' rawTeXBlock char '\\') return $ Str [result] @@ -1036,8 +1042,20 @@ inlinesBetween start end = where inner = innerSpace <|> (notFollowedBy' whitespace >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end +-- This is used to prevent exponential blowups for things like: +-- a**a*a**a*a**a*a**a*a**a*a**a*a** +nested :: GenParser Char ParserState a + -> GenParser Char ParserState a +nested p = do + nestlevel <- stateMaxNestingLevel `fmap` getState + guard $ nestlevel > 0 + updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 } + res <- p + updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } + return res + emph :: GenParser Char ParserState Inline -emph = Emph `liftM` +emph = Emph `fmap` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar starEnd = notFollowedBy' strong >> char '*' @@ -1045,7 +1063,7 @@ emph = Emph `liftM` ulEnd = notFollowedBy' strong >> char '_' strong :: GenParser Char ParserState Inline -strong = Strong `liftM` +strong = Strong `liftM` nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar starEnd = try $ string "**" @@ -1079,12 +1097,20 @@ nonEndline = satisfy (/='\n') str :: GenParser Char ParserState Inline str = do + smart <- stateSmart `fmap` getState a <- alphaNum - as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum) + as <- many $ alphaNum + <|> (try $ char '_' >>~ lookAhead alphaNum) + <|> if smart + then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >> + lookAhead alphaNum >> return '\x2019') + -- for things like l'aide + else mzero + pos <- getPosition + updateState $ \s -> s{ stateLastStrPos = Just pos } let result = a:as - state <- getState let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) - if stateSmart state + if smart then case likelyAbbrev result of [] -> return $ Str result xs -> choice (map (\x -> @@ -1128,19 +1154,18 @@ endline = try $ do -- a reference label for a link reference :: GenParser Char ParserState [Inline] reference = do notFollowedBy' (string "[^") -- footnote reference - result <- inlinesInBalancedBrackets inlineNonLink + result <- inlinesInBalancedBrackets inline return $ normalizeSpaces result -- source for a link, with optional title -source :: GenParser Char st (String, [Char]) +source :: GenParser Char ParserState (String, [Char]) source = - (try $ charsInBalanced '(' ')' >>= parseFromString source') <|> + (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|> -- the following is needed for cases like: [ref](/url(a). - (enclosed (char '(') (char ')') anyChar >>= - parseFromString source') + (enclosed (char '(') (char ')') litChar >>= parseFromString source') -- auxiliary function for source -source' :: GenParser Char st (String, [Char]) +source' :: GenParser Char ParserState (String, [Char]) source' = do skipSpaces let nl = char '\n' >>~ notFollowedBy blankline @@ -1149,29 +1174,33 @@ source' = do skipMany spaceChar optional nl skipMany spaceChar - many1 (satisfy $ not . isBlank) - let betweenAngles = try $ char '<' >> - manyTill (noneOf ">\n" <|> nl) (char '>') + many1 $ escapedChar' <|> satisfy (not . isBlank) + let betweenAngles = try $ + char '<' >> manyTill (escapedChar' <|> noneOf ">\n" <|> nl) (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" linkTitle skipSpaces eof return (escapeURI $ removeTrailingSpace src, tit) -linkTitle :: GenParser Char st String -linkTitle = try $ do +linkTitle :: GenParser Char ParserState String +linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces delim <- oneOf "'\"" - tit <- manyTill (optional (char '\\') >> anyChar) - (try (char delim >> skipSpaces >> eof)) - return $ decodeCharacterReferences tit + tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) + return $ fromEntities tit link :: GenParser Char ParserState Inline link = try $ do lab <- reference (src, tit) <- source <|> referenceLink lab - return $ Link lab (src, tit) + return $ Link (delinkify lab) (src, tit) + +delinkify :: [Inline] -> [Inline] +delinkify = bottomUp $ concatMap go + where go (Link lab _) = lab + go x = [x] -- a link like [this][ref] or [this][] or [this] referenceLink :: [Inline] @@ -1198,8 +1227,9 @@ autoLink = try $ do image :: GenParser Char ParserState Inline image = try $ do char '!' - (Link lab src) <- link - return $ Image lab src + lab <- reference + (src, tit) <- source <|> referenceLink lab + return $ Image lab (src,tit) note :: GenParser Char ParserState Inline note = try $ do @@ -1228,18 +1258,16 @@ inlineNote = try $ do rawLaTeXInline' :: GenParser Char ParserState Inline rawLaTeXInline' = try $ do failIfStrict - lookAhead $ char '\\' - notFollowedBy' $ rawLaTeXEnvironment' - <|> rawConTeXtEnvironment' + lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env RawInline _ s <- rawLaTeXInline return $ RawInline "tex" s -- "tex" because it might be context or latex -rawConTeXtEnvironment' :: GenParser Char st String -rawConTeXtEnvironment' = try $ do +rawConTeXtEnvironment :: GenParser Char st String +rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) <|> (many1 letter) - contents <- manyTill (rawConTeXtEnvironment' <|> (count 1 anyChar)) + contents <- manyTill (rawConTeXtEnvironment <|> (count 1 anyChar)) (try $ string "\\stop" >> string completion) return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion @@ -1312,7 +1340,8 @@ citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' first <- letter - rest <- many $ (noneOf ",;!?[]()@ \t\n") + let internal p = try $ p >>~ lookAhead (letter <|> digit) + rest <- many $ letter <|> digit <|> internal (oneOf ":.#$%&-_?<>~") let key = first:rest st <- getState guard $ key `elem` stateCitations st @@ -1320,8 +1349,12 @@ citeKey = try $ do suffix :: GenParser Char ParserState [Inline] suffix = try $ do + hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl - liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + rest <- liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + return $ if hasSpace + then Space : rest + else rest prefix :: GenParser Char ParserState [Inline] prefix = liftM normalizeSpaces $ diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7fda0da19..456b23ce8 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -128,6 +128,7 @@ block = choice [ codeBlock , fieldList , imageBlock , customCodeBlock + , mathBlock , unknownDirective , header , hrule @@ -360,6 +361,33 @@ customCodeBlock = try $ do result <- indentedBlock return $ CodeBlock ("", ["sourceCode", language], []) $ stripTrailingNewlines result +-- | The 'math' directive (from Sphinx) for display math. +mathBlock :: GenParser Char st Block +mathBlock = try $ do + string ".. math::" + mathBlockMultiline <|> mathBlockOneLine + +mathBlockOneLine :: GenParser Char st Block +mathBlockOneLine = try $ do + result <- manyTill anyChar newline + blanklines + return $ Para [Math DisplayMath $ removeLeadingTrailingSpace result] + +mathBlockMultiline :: GenParser Char st Block +mathBlockMultiline = try $ do + blanklines + result <- indentedBlock + -- a single block can contain multiple equations, which need to go + -- in separate Pandoc math elements + let lns = map removeLeadingTrailingSpace $ lines result + -- drop :label, :nowrap, etc. + let startsWithColon (':':_) = True + startsWithColon _ = False + let lns' = dropWhile startsWithColon lns + let eqs = map (removeLeadingTrailingSpace . unlines) + $ filter (not . null) $ splitBy null lns' + return $ Para $ map (Math DisplayMath) eqs + lhsCodeBlock :: GenParser Char ParserState Block lhsCodeBlock = try $ do failUnlessLHS @@ -526,8 +554,8 @@ noteBlock = try $ do string ".." spaceChar >> skipMany spaceChar ref <- noteMarker - spaceChar >> skipMany spaceChar - first <- anyLine + first <- (spaceChar >> skipMany spaceChar >> anyLine) + <|> (newline >> return "") blanks <- option "" blanklines rest <- option "" indentedBlock endPos <- getPosition @@ -736,6 +764,7 @@ inline = choice [ whitespace , image , superscript , subscript + , math , note , smartPunctuation inline , hyphens @@ -750,7 +779,8 @@ hyphens = do return $ Str result escapedChar :: GenParser Char st Inline -escapedChar = escaped anyChar +escapedChar = do c <- escaped anyChar + return $ Str [c] symbol :: GenParser Char ParserState Inline symbol = do @@ -773,24 +803,31 @@ strong :: GenParser Char ParserState Inline strong = enclosed (string "**") (try $ string "**") inline >>= return . Strong . normalizeSpaces -interpreted :: [Char] -> GenParser Char st [Inline] +interpreted :: [Char] -> GenParser Char st [Char] interpreted role = try $ do optional $ try $ string "\\ " result <- enclosed (string $ ":" ++ role ++ ":`") (char '`') anyChar try (string "\\ ") <|> lookAhead (count 1 $ oneOf " \t\n") <|> (eof >> return "") - return [Str result] + return result superscript :: GenParser Char ParserState Inline -superscript = interpreted "sup" >>= (return . Superscript) +superscript = interpreted "sup" >>= \x -> return (Superscript [Str x]) subscript :: GenParser Char ParserState Inline -subscript = interpreted "sub" >>= (return . Subscript) +subscript = interpreted "sub" >>= \x -> return (Subscript [Str x]) + +math :: GenParser Char ParserState Inline +math = interpreted "math" >>= \x -> return (Math InlineMath x) whitespace :: GenParser Char ParserState Inline whitespace = many1 spaceChar >> return Space <?> "whitespace" str :: GenParser Char ParserState Inline -str = many1 (noneOf (specialChars ++ "\t\n ")) >>= return . Str +str = do + result <- many1 (noneOf (specialChars ++ "\t\n ")) + pos <- getPosition + updateState $ \s -> s{ stateLastStrPos = Just pos } + return $ Str result -- an endline character that can be treated as a space, not a structural break endline :: GenParser Char ParserState Inline diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index b9a46e8ff..67dfe6753 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -69,8 +69,17 @@ expToInlines (ESymbol t s) = Just $ addSpace t (Str s) medspace = Str "\x2005" widespace = Str "\x2004" expToInlines (EStretchy x) = expToInlines x +expToInlines (EDelimited start end xs) = do + xs' <- mapM expToInlines xs + return $ [Str start] ++ concat xs' ++ [Str end] expToInlines (EGrouped xs) = expsToInlines xs -expToInlines (ESpace _) = Just [Str " "] -- variable widths not supported +expToInlines (ESpace "0.167em") = Just [Str "\x2009"] +expToInlines (ESpace "0.222em") = Just [Str "\x2005"] +expToInlines (ESpace "0.278em") = Just [Str "\x2004"] +expToInlines (ESpace "0.333em") = Just [Str "\x2004"] +expToInlines (ESpace "1em") = Just [Str "\x2001"] +expToInlines (ESpace "2em") = Just [Str "\x2001\x2001"] +expToInlines (ESpace _) = Just [Str " "] expToInlines (EBinary _ _ _) = Nothing expToInlines (ESub x y) = do x' <- expToInlines x @@ -88,10 +97,10 @@ expToInlines (ESubsup x y z) = do expToInlines (EDown x y) = expToInlines (ESub x y) expToInlines (EUp x y) = expToInlines (ESuper x y) expToInlines (EDownup x y z) = expToInlines (ESubsup x y z) -expToInlines (EText "normal" x) = Just [Str x] -expToInlines (EText "bold" x) = Just [Strong [Str x]] -expToInlines (EText "monospace" x) = Just [Code nullAttr x] -expToInlines (EText "italic" x) = Just [Emph [Str x]] +expToInlines (EText TextNormal x) = Just [Str x] +expToInlines (EText TextBold x) = Just [Strong [Str x]] +expToInlines (EText TextMonospace x) = Just [Code nullAttr x] +expToInlines (EText TextItalic x) = Just [Emph [Str x]] expToInlines (EText _ x) = Just [Str x] expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) = case accent of diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 12d299aa4..3b5954368 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -68,7 +68,8 @@ import Control.Monad ( guard, liftM ) readTextile :: ParserState -- ^ Parser state, including options for parser -> String -- ^ String to parse (assuming @'\n'@ line endings) -> Pandoc -readTextile state s = (readWith parseTextile) state (s ++ "\n\n") +readTextile state s = + (readWith parseTextile) state{ stateOldDashes = True } (s ++ "\n\n") -- @@ -436,6 +437,8 @@ str = do next <- lookAhead letter guard $ isLetter (last xs) || isLetter next return $ xs ++ "-" + pos <- getPosition + updateState $ \s -> s{ stateLastStrPos = Just pos } return $ Str result -- | Textile allows HTML span infos, we discard them diff --git a/src/Text/Pandoc/S5.hs b/src/Text/Pandoc/S5.hs deleted file mode 100644 index b17b052c5..000000000 --- a/src/Text/Pandoc/S5.hs +++ /dev/null @@ -1,69 +0,0 @@ -{- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.S5 - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Definitions for creation of S5 powerpoint-like HTML. -(See <http://meyerweb.com/eric/tools/s5/>.) --} -module Text.Pandoc.S5 ( s5HeaderIncludes) where -import Text.Pandoc.Shared ( readDataFile ) -import System.FilePath ( (</>) ) -import Data.ByteString.UTF8 ( toString, fromString ) -import Data.ByteString.Base64 ( encode ) - -s5HeaderIncludes :: Maybe FilePath -> IO String -s5HeaderIncludes datadir = do - c <- s5CSS datadir - j <- s5Javascript datadir - return $ c ++ j - -s5Javascript :: Maybe FilePath -> IO String -s5Javascript datadir = do - js <- readDataFile datadir $ "s5" </> "default" </> "slides.min.js" - return $ "<script type=\"text/javascript\">\n" ++ inCDATA js ++ "</script>\n" - -inCDATA :: String -> String -inCDATA s = "/*<![CDATA[*/\n" ++ s ++ "\n/*]]>*/\n" - -base64 :: String -> String -base64 = toString . encode . fromString - -s5CSS :: Maybe FilePath -> IO String -s5CSS datadir = do - s5CoreCSS <- readDataFile datadir $ "s5" </> "default" </> "s5-core.css" - s5FramingCSS <- readDataFile datadir $ "s5" </> "default" </> "framing.css" - s5PrettyCSS <- readDataFile datadir $ "s5" </> "default" </> "pretty.css" - s5OperaCSS <- readDataFile datadir $ "s5" </> "default" </> "opera.css" - s5OutlineCSS <- readDataFile datadir $ "s5" </> "default" </> "outline.css" - s5PrintCSS <- readDataFile datadir $ "s5" </> "default" </> "print.css" - return $ "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"slideProj\" href=\"data:text/css;charset=utf-8;base64," ++ - base64 (s5CoreCSS ++ "\n" ++ s5FramingCSS ++ "\n" ++ s5PrettyCSS) ++ "\" />\n" ++ - "<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" id=\"outlineStyle\" href=\"data:text/css;charset=utf-8;base64," ++ - base64 s5OutlineCSS ++ "\" />\n" ++ - "<link rel=\"stylesheet\" type=\"text/css\" media=\"print\" id=\"slidePrint\" href=\"data:text/css;charset=utf-8;base64," ++ - base64 s5PrintCSS ++ "\" />\n" ++ - "<link rel=\"stylesheet\" type=\"text/css\" media=\"projection\" id=\"operaFix\" href=\"data:text/css;charset=utf-8;base64," ++ - base64 s5OperaCSS ++ "\" />\n" diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs new file mode 100644 index 000000000..9c609b8fe --- /dev/null +++ b/src/Text/Pandoc/SelfContained.hs @@ -0,0 +1,162 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.SelfContained + Copyright : Copyright (C) 2011 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Functions for converting an HTML file into one that can be viewed +offline, by incorporating linked images, CSS, and scripts into +the HTML using data URIs. +-} +module Text.Pandoc.SelfContained ( makeSelfContained ) where +import Text.HTML.TagSoup +import Network.URI (isAbsoluteURI, parseURI, escapeURIString) +import Network.HTTP +import Data.ByteString.Base64 +import qualified Data.ByteString.Char8 as B +import Data.ByteString (ByteString) +import Data.ByteString.UTF8 (toString, fromString) +import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>)) +import Data.Char (toLower, isAscii, isAlphaNum) +import Codec.Compression.GZip as Gzip +import qualified Data.ByteString.Lazy as L +import Text.Pandoc.Shared (findDataFile) +import Text.Pandoc.MIME (getMimeType) +import System.Directory (doesFileExist) + +getItem :: Maybe FilePath -> String -> IO (ByteString, Maybe String) +getItem userdata f = + if isAbsoluteURI f + then openURL f + else do + let mime = case takeExtension f of + ".gz" -> getMimeType $ dropExtension f + x -> getMimeType x + exists <- doesFileExist f + if exists + then do + cont <- B.readFile f + return (cont, mime) + else do + res <- findDataFile userdata f + exists' <- doesFileExist res + if exists' + then do + cont <- B.readFile res + return (cont, mime) + else error $ "Could not find `" ++ f ++ "'" + +-- TODO - have this return mime type too - then it can work for google +-- chart API, e.g. +openURL :: String -> IO (ByteString, Maybe String) +openURL u = getBodyAndMimeType =<< simpleHTTP (getReq u) + where getReq v = case parseURI v of + Nothing -> error $ "Could not parse URI: " ++ v + Just u' -> mkRequest GET u' + getBodyAndMimeType (Left err) = fail (show err) + getBodyAndMimeType (Right r) = return (rspBody r, findHeader HdrContentType r) + +isOk :: Char -> Bool +isOk c = isAscii c && isAlphaNum c + +convertTag :: Maybe FilePath -> Tag String -> IO (Tag String) +convertTag userdata t@(TagOpen "img" as) = + case fromAttrib "src" t of + [] -> return t + src -> do + (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) + return $ TagOpen "img" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) +convertTag userdata t@(TagOpen "video" as) = + case fromAttrib "src" t of + [] -> return t + src -> do + (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + let enc = "data:" ++ mime ++ ";base64," ++ toString (encode raw) + return $ TagOpen "video" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) +convertTag userdata t@(TagOpen "script" as) = + case fromAttrib "src" t of + [] -> return t + src -> do + (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + return $ TagOpen "script" (("src",enc) : [(x,y) | (x,y) <- as, x /= "src"]) +convertTag userdata t@(TagOpen "link" as) = + case fromAttrib "href" t of + [] -> return t + src -> do + (raw, mime) <- getRaw userdata (fromAttrib "type" t) src + let enc = "data:" ++ mime ++ "," ++ escapeURIString isOk (toString raw) + return $ TagOpen "link" (("href",enc) : [(x,y) | (x,y) <- as, x /= "href"]) +convertTag _ t = return t + +cssURLs :: Maybe FilePath -> FilePath -> ByteString -> IO ByteString +cssURLs userdata d orig = + case B.breakSubstring "url(" orig of + (x,y) | B.null y -> return orig + | otherwise -> do + let (u,v) = B.breakSubstring ")" $ B.drop 4 y + let url = toString + $ case B.take 1 u of + "\"" -> B.takeWhile (/='"') $ B.drop 1 u + _ -> u + (raw, mime) <- getRaw userdata "" (d </> url) + rest <- cssURLs userdata d v + let enc = "data:" `B.append` fromString mime `B.append` + ";base64," `B.append` (encode raw) + return $ x `B.append` "url(" `B.append` enc `B.append` rest + +getRaw :: Maybe FilePath -> String -> String -> IO (ByteString, String) +getRaw userdata mimetype src = do + let ext = map toLower $ takeExtension src + (raw, respMime) <- getItem userdata src + let raw' = if ext == ".gz" + then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks + $ [raw] + else raw + let mime = case (mimetype, respMime) of + ("",Nothing) -> error + $ "Could not determine mime type for `" ++ src ++ "'" + (x, Nothing) -> x + (_, Just x ) -> x + result <- if mime == "text/css" + then cssURLs userdata (takeDirectory src) raw' + else return raw' + return (result, mime) + +-- | Convert HTML into self-contained HTML, incorporating images, +-- scripts, and CSS using data: URIs. Items specified using absolute +-- URLs will be downloaded; those specified using relative URLs will +-- be sought first relative to the working directory, then relative +-- to the user data directory (if the first parameter is 'Just' +-- a directory), and finally relative to pandoc's default data +-- directory. +makeSelfContained :: Maybe FilePath -> String -> IO String +makeSelfContained userdata inp = do + let tags = parseTags inp + out' <- mapM (convertTag userdata) tags + return $ renderTagsOptions renderOptions{ optMinimize = (\t -> t == "br" + || t == "img" || t == "meta" || t == "link" ) } out' + diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 9717e1bc8..cd5b19164 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -32,6 +32,7 @@ module Text.Pandoc.Shared ( -- * List processing splitBy, splitByIndices, + splitStringByIndices, substitute, -- * Text processing backslashEscapes, @@ -44,8 +45,9 @@ module Text.Pandoc.Shared ( camelCaseToHyphenated, toRomanNumeral, escapeURI, - unescapeURI, tabFilter, + -- * Date/time + normalizeDate, -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, @@ -68,21 +70,31 @@ module Text.Pandoc.Shared ( inDirectory, findDataFile, readDataFile, + -- * Error handling + err, + warn, ) where import Text.Pandoc.Definition import Text.Pandoc.Generic -import qualified Text.Pandoc.UTF8 as UTF8 (readFile) -import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, - isLetter, isDigit ) +import qualified Text.Pandoc.UTF8 as UTF8 +import System.Environment (getProgName) +import System.Exit (exitWith, ExitCode(..)) +import Data.Char ( toLower, isLower, isUpper, isAlpha, + isLetter, isDigit, isSpace ) import Data.List ( find, isPrefixOf, intercalate ) -import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString ) -import Codec.Binary.UTF8.String ( encodeString, decodeString ) +import Network.URI ( escapeURIString ) import System.Directory import System.FilePath ( (</>) ) import Data.Generics (Typeable, Data) import qualified Control.Monad.State as S +import Control.Monad (msum) import Paths_pandoc (getDataFileName) +import Text.Pandoc.Highlighting (Style, pygments) +import Text.Pandoc.Pretty (charWidth) +import System.Locale (defaultTimeLocale) +import Data.Time +import System.IO (stderr) -- -- List processing @@ -96,12 +108,23 @@ splitBy isSep lst = rest' = dropWhile isSep rest in first:(splitBy isSep rest') --- | Split list into chunks divided at specified indices. splitByIndices :: [Int] -> [a] -> [[a]] splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = - let (first, rest) = splitAt x lst in - first:(splitByIndices (map (\y -> y - x) xs) rest) +splitByIndices (x:xs) lst = first:(splitByIndices (map (\y -> y - x) xs) rest) + where (first, rest) = splitAt x lst + +-- | Split string into chunks divided at specified indices. +splitStringByIndices :: [Int] -> [Char] -> [[Char]] +splitStringByIndices [] lst = [lst] +splitStringByIndices (x:xs) lst = + let (first, rest) = splitAt' x lst in + first : (splitStringByIndices (map (\y -> y - x) xs) rest) + +splitAt' :: Int -> [Char] -> ([Char],[Char]) +splitAt' _ [] = ([],[]) +splitAt' n xs | n <= 0 = ([],xs) +splitAt' n (x:xs) = (x:ys,zs) + where (ys,zs) = splitAt' (n - charWidth x) xs -- | Replace each occurrence of one sublist in a list with another. substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] @@ -181,16 +204,9 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" --- | Escape unicode characters in a URI. Characters that are --- already valid in a URI, including % and ?, are left alone. +-- | Escape whitespace in URI. escapeURI :: String -> String -escapeURI = escapeURIString isAllowedInURI . encodeString - --- | Unescape unicode and some special characters in a URI, but --- without introducing spaces. -unescapeURI :: String -> String -unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) . - decodeString . unEscapeString +escapeURI = escapeURIString (not . isSpace) -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. @@ -213,6 +229,18 @@ tabFilter tabStop = in go tabStop -- +-- Date/time +-- + +-- | Parse a date and convert (if possible) to "YYYY-MM-DD" format. +normalizeDate :: String -> Maybe String +normalizeDate s = fmap (formatTime defaultTimeLocale "%F") + (msum $ map (\fs -> parsetimeWith fs s) formats :: Maybe Day) + where parsetimeWith = parseTime defaultTimeLocale + formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", + "%d %B %Y", "%b. %d, %Y", "%B %d, %Y"] + +-- -- Pandoc block and inline list processing -- @@ -304,9 +332,9 @@ consolidateInlines (Str x : ys) = fromStr (Str z) = z fromStr _ = error "consolidateInlines - fromStr - not a Str" consolidateInlines (Space : ys) = Space : rest - where isSpace Space = True - isSpace _ = False - rest = consolidateInlines $ dropWhile isSpace ys + where isSp Space = True + isSp _ = False + rest = consolidateInlines $ dropWhile isSp ys consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $ Emph (xs ++ ys) : zs consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $ @@ -334,10 +362,6 @@ stringify = queryWith go go (Str x) = x go (Code _ x) = x go (Math _ x) = x - go EmDash = "--" - go EnDash = "-" - go Apostrophe = "'" - go Ellipses = "..." go LineBreak = " " go _ = "" @@ -458,6 +482,7 @@ data ObfuscationMethod = NoObfuscation -- | Varieties of HTML slide shows. data HTMLSlideVariant = S5Slides | SlidySlides + | DZSlides | NoSlides deriving (Show, Read, Eq) @@ -488,9 +513,13 @@ data WriterOptions = WriterOptions , writerCiteMethod :: CiteMethod -- ^ How to print cites , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations , writerHtml5 :: Bool -- ^ Produce HTML5 + , writerBeamer :: Bool -- ^ Produce beamer LaTeX slide show + , writerSlideLevel :: Maybe Int -- ^ Force header level of slides , writerChapters :: Bool -- ^ Use "chapter" for top-level sects , writerListings :: Bool -- ^ Use listings package for code - , writerAscii :: Bool -- ^ Avoid non-ascii characters + , writerHighlight :: Bool -- ^ Highlight source code + , writerHighlightStyle :: Style -- ^ Style to use for highlighting + , writerSetextHeaders :: Bool -- ^ Use setext headers for levels 1-2 in markdown } deriving Show {-# DEPRECATED writerXeTeX "writerXeTeX no longer does anything" #-} @@ -522,9 +551,13 @@ defaultWriterOptions = , writerCiteMethod = Citeproc , writerBiblioFiles = [] , writerHtml5 = False + , writerBeamer = False + , writerSlideLevel = Nothing , writerChapters = False , writerListings = False - , writerAscii = False + , writerHighlight = False + , writerHighlightStyle = pygments + , writerSetextHeaders = True } -- @@ -554,3 +587,19 @@ findDataFile (Just u) f = do -- Cabal data directory. readDataFile :: Maybe FilePath -> FilePath -> IO String readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile + +-- +-- Error reporting +-- + +err :: Int -> String -> IO a +err exitCode msg = do + name <- getProgName + UTF8.hPutStrLn stderr $ name ++ ": " ++ msg + exitWith $ ExitFailure exitCode + return undefined + +warn :: String -> IO () +warn msg = do + name <- getProgName + UTF8.hPutStrLn stderr $ name ++ ": " ++ msg diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs new file mode 100644 index 000000000..1df556d38 --- /dev/null +++ b/src/Text/Pandoc/Slides.hs @@ -0,0 +1,57 @@ +{- +Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Slides + Copyright : Copyright (C) 2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Utility functions for splitting documents into slides for slide +show formats (dzslides, s5, slidy, beamer). +-} +module Text.Pandoc.Slides ( getSlideLevel, prepSlides ) where +import Text.Pandoc.Definition + +-- | Find level of header that starts slides (defined as the least header +-- level that occurs before a non-header/non-hrule in the blocks). +getSlideLevel :: [Block] -> Int +getSlideLevel = go 6 + where go least (Header n _ : x : xs) + | n < least && nonHOrHR x = go n xs + | otherwise = go least (x:xs) + go least (_ : xs) = go least xs + go least [] = least + nonHOrHR (Header _ _) = False + nonHOrHR (HorizontalRule) = False + nonHOrHR _ = True + +-- | Prepare a block list to be passed to hierarchicalize. +prepSlides :: Int -> [Block] -> [Block] +prepSlides slideLevel = ensureStartWithH . splitHrule + where splitHrule (HorizontalRule : Header n xs : ys) + | n == slideLevel = Header slideLevel xs : splitHrule ys + splitHrule (HorizontalRule : xs) = Header slideLevel [] : splitHrule xs + splitHrule (x : xs) = x : splitHrule xs + splitHrule [] = [] + ensureStartWithH bs@(Header n _:_) + | n <= slideLevel = bs + ensureStartWithH bs = Header slideLevel [] : bs diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 19c9a808a..336efe453 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -72,7 +72,7 @@ import Text.ParserCombinators.Parsec import Control.Monad (liftM, when, forM) import System.FilePath import Data.List (intercalate, intersperse) -import Text.XHtml (primHtml, Html) +import Text.Blaze (preEscapedString, Html) import Data.ByteString.Lazy.UTF8 (ByteString, fromString) import Text.Pandoc.Shared (readDataFile) import qualified Control.Exception.Extensible as E (try, IOException) @@ -82,6 +82,8 @@ getDefaultTemplate :: (Maybe FilePath) -- ^ User data directory to search first -> String -- ^ Name of writer -> IO (Either E.IOException String) getDefaultTemplate _ "native" = return $ Right "" +getDefaultTemplate _ "json" = return $ Right "" +getDefaultTemplate _ "docx" = return $ Right "" getDefaultTemplate user "odt" = getDefaultTemplate user "opendocument" getDefaultTemplate user "epub" = getDefaultTemplate user "html" getDefaultTemplate user writer = do @@ -110,7 +112,7 @@ instance TemplateTarget ByteString where toTarget = fromString instance TemplateTarget Html where - toTarget = primHtml + toTarget = preEscapedString -- | Renders a template renderTemplate :: TemplateTarget a diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs new file mode 100644 index 000000000..1913eb92b --- /dev/null +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -0,0 +1,367 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.AsciiDoc + Copyright : Copyright (C) 2006-2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to asciidoc. + +Note that some information may be lost in conversion, due to +expressive limitations of asciidoc. Footnotes and table cells with +paragraphs (or other block items) are not possible in asciidoc. +If pandoc encounters one of these, it will insert a message indicating +that it has omitted the construct. + +AsciiDoc: <http://www.methods.co.nz/asciidoc/> +-} +module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc) where +import Text.Pandoc.Definition +import Text.Pandoc.Templates (renderTemplate) +import Text.Pandoc.Shared +import Text.Pandoc.Parsing hiding (blankline) +import Text.ParserCombinators.Parsec ( runParser, GenParser ) +import Data.List ( isPrefixOf, intersperse, intercalate ) +import Text.Pandoc.Pretty +import Control.Monad.State + +data WriterState = WriterState { defListMarker :: String + , orderedListLevel :: Int + , bulletListLevel :: Int + } + +-- | Convert Pandoc to AsciiDoc. +writeAsciiDoc :: WriterOptions -> Pandoc -> String +writeAsciiDoc opts document = + evalState (pandocToAsciiDoc opts document) WriterState{ + defListMarker = "::" + , orderedListLevel = 1 + , bulletListLevel = 1 + } + +-- | Return asciidoc representation of document. +pandocToAsciiDoc :: WriterOptions -> Pandoc -> State WriterState String +pandocToAsciiDoc opts (Pandoc (Meta title authors date) blocks) = do + title' <- inlineListToAsciiDoc opts title + let title'' = title' $$ text (replicate (offset title') '=') + authors' <- mapM (inlineListToAsciiDoc opts) authors + -- asciidoc only allows a singel author + date' <- inlineListToAsciiDoc opts date + let titleblock = not $ null title && null authors && null date + body <- blockListToAsciiDoc opts blocks + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let main = render colwidth body + let context = writerVariables opts ++ + [ ("body", main) + , ("title", render colwidth title'') + , ("date", render colwidth date') + ] ++ + [ ("toc", "yes") | writerTableOfContents opts && + writerStandalone opts ] ++ + [ ("titleblock", "yes") | titleblock ] ++ + [ ("author", render colwidth a) | a <- authors' ] + if writerStandalone opts + then return $ renderTemplate context $ writerTemplate opts + else return main + +-- | Escape special characters for AsciiDoc. +escapeString :: String -> String +escapeString = escapeStringUsing escs + where escs = backslashEscapes "{" + +-- | Ordered list start parser for use in Para below. +olMarker :: GenParser Char ParserState Char +olMarker = do (start, style', delim) <- anyOrderedListMarker + if delim == Period && + (style' == UpperAlpha || (style' == UpperRoman && + start `elem` [1, 5, 10, 50, 100, 500, 1000])) + then spaceChar >> spaceChar + else spaceChar + +-- | True if string begins with an ordered list marker +beginsWithOrderedListMarker :: String -> Bool +beginsWithOrderedListMarker str = + case runParser olMarker defaultParserState "para start" (take 10 str) of + Left _ -> False + Right _ -> True + +-- | Convert Pandoc block element to asciidoc. +blockToAsciiDoc :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState Doc +blockToAsciiDoc _ Null = return empty +blockToAsciiDoc opts (Plain inlines) = do + contents <- inlineListToAsciiDoc opts inlines + return $ contents <> cr +blockToAsciiDoc opts (Para inlines) = do + contents <- inlineListToAsciiDoc opts inlines + -- escape if para starts with ordered list marker + let esc = if beginsWithOrderedListMarker (render Nothing contents) + then text "\\" + else empty + return $ esc <> contents <> blankline +blockToAsciiDoc _ (RawBlock _ _) = return empty +blockToAsciiDoc _ HorizontalRule = + return $ blankline <> text "'''''" <> blankline +blockToAsciiDoc opts (Header level inlines) = do + contents <- inlineListToAsciiDoc opts inlines + let len = offset contents + return $ contents <> cr <> + (case level of + 1 -> text $ replicate len '-' + 2 -> text $ replicate len '~' + 3 -> text $ replicate len '^' + 4 -> text $ replicate len '+' + _ -> empty) <> blankline +blockToAsciiDoc _ (CodeBlock (_,classes,_) str) = return $ + flush (attrs <> dashes <> space <> attrs <> cr <> text str <> + cr <> dashes) <> blankline + where dashes = text $ replicate (maximum $ map length $ lines str) '-' + attrs = if null classes + then empty + else text $ intercalate "," $ "code" : classes +blockToAsciiDoc opts (BlockQuote blocks) = do + contents <- blockListToAsciiDoc opts blocks + let isBlock (BlockQuote _) = True + isBlock _ = False + -- if there are nested block quotes, put in an open block + let contents' = if any isBlock blocks + then "--" $$ contents $$ "--" + else contents + let cols = offset contents' + let bar = text $ replicate cols '_' + return $ bar $$ chomp contents' $$ bar <> blankline +blockToAsciiDoc opts (Table caption aligns widths headers rows) = do + caption' <- inlineListToAsciiDoc opts caption + let caption'' = if null caption + then empty + else "." <> caption' <> cr + let isSimple = all (== 0) widths + let relativePercentWidths = if isSimple + then widths + else map (/ (sum widths)) widths + let widths'' :: [Integer] + widths'' = map (floor . (* 100)) relativePercentWidths + -- ensure that the widths sum to 100 + let widths' = case widths'' of + _ | isSimple -> widths'' + (w:ws) | sum (w:ws) < 100 + -> (100 - sum ws) : ws + ws -> ws + let totalwidth :: Integer + totalwidth = floor $ sum widths * 100 + let colspec al wi = (case al of + AlignLeft -> "<" + AlignCenter -> "^" + AlignRight -> ">" + AlignDefault -> "") ++ + if wi == 0 then "" else (show wi ++ "%") + let headerspec = if all null headers + then empty + else text "options=\"header\"," + let widthspec = if totalwidth == 0 + then empty + else text "width=" + <> doubleQuotes (text $ show totalwidth ++ "%") + <> text "," + let tablespec = text "[" + <> widthspec + <> text "cols=" + <> doubleQuotes (text $ intercalate "," + $ zipWith colspec aligns widths') + <> text "," + <> headerspec <> text "]" + let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x] + return $ text "|" <> chomp d + makeCell [Para x] = makeCell [Plain x] + makeCell _ = return $ text "|" <> "[multiblock cell omitted]" + let makeRow cells = hsep `fmap` mapM makeCell cells + rows' <- mapM makeRow rows + head' <- makeRow headers + let head'' = if all null headers then empty else head' + let colwidth = if writerWrapText opts + then writerColumns opts + else 100000 + let maxwidth = maximum $ map offset (head':rows') + let body = if maxwidth > colwidth then vsep rows' else vcat rows' + let border = text $ "|" ++ replicate ((min maxwidth colwidth) - 1) '=' + return $ + caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline +blockToAsciiDoc opts (BulletList items) = do + contents <- mapM (bulletListItemToAsciiDoc opts) items + return $ cat contents <> blankline +blockToAsciiDoc opts (OrderedList (_start, sty, _delim) items) = do + let sty' = case sty of + UpperRoman -> UpperAlpha + LowerRoman -> LowerAlpha + x -> x + let markers = orderedListMarkers (1, sty', Period) -- start num not used + let markers' = map (\m -> if length m < 3 + then m ++ replicate (3 - length m) ' ' + else m) markers + contents <- mapM (\(item, num) -> orderedListItemToAsciiDoc opts item num) $ + zip markers' items + return $ cat contents <> blankline +blockToAsciiDoc opts (DefinitionList items) = do + contents <- mapM (definitionListItemToAsciiDoc opts) items + return $ cat contents <> blankline + +-- | Convert bullet list item (list of blocks) to asciidoc. +bulletListItemToAsciiDoc :: WriterOptions -> [Block] -> State WriterState Doc +bulletListItemToAsciiDoc opts blocks = do + let addBlock :: Doc -> Block -> State WriterState Doc + addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b + addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b + return $ d <> cr <> chomp x + addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b + return $ d <> cr <> chomp x + addBlock d b = do x <- blockToAsciiDoc opts b + return $ d <> cr <> text "+" <> cr <> chomp x + lev <- bulletListLevel `fmap` get + modify $ \s -> s{ bulletListLevel = lev + 1 } + contents <- foldM addBlock empty blocks + modify $ \s -> s{ bulletListLevel = lev } + let marker = text (replicate lev '*') + return $ marker <> space <> contents <> cr + +-- | Convert ordered list item (a list of blocks) to asciidoc. +orderedListItemToAsciiDoc :: WriterOptions -- ^ options + -> String -- ^ list item marker + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToAsciiDoc opts marker blocks = do + let addBlock :: Doc -> Block -> State WriterState Doc + addBlock d b | isEmpty d = chomp `fmap` blockToAsciiDoc opts b + addBlock d b@(BulletList _) = do x <- blockToAsciiDoc opts b + return $ d <> cr <> chomp x + addBlock d b@(OrderedList _ _) = do x <- blockToAsciiDoc opts b + return $ d <> cr <> chomp x + addBlock d b = do x <- blockToAsciiDoc opts b + return $ d <> cr <> text "+" <> cr <> chomp x + lev <- orderedListLevel `fmap` get + modify $ \s -> s{ orderedListLevel = lev + 1 } + contents <- foldM addBlock empty blocks + modify $ \s -> s{ orderedListLevel = lev } + return $ text marker <> space <> contents <> cr + +-- | Convert definition list item (label, list of blocks) to asciidoc. +definitionListItemToAsciiDoc :: WriterOptions + -> ([Inline],[[Block]]) + -> State WriterState Doc +definitionListItemToAsciiDoc opts (label, defs) = do + labelText <- inlineListToAsciiDoc opts label + marker <- defListMarker `fmap` get + if marker == "::" + then modify (\st -> st{ defListMarker = ";;"}) + else modify (\st -> st{ defListMarker = "::"}) + let divider = cr <> text "+" <> cr + let defsToAsciiDoc :: [Block] -> State WriterState Doc + defsToAsciiDoc ds = (vcat . intersperse divider . map chomp) + `fmap` mapM (blockToAsciiDoc opts) ds + defs' <- mapM defsToAsciiDoc defs + modify (\st -> st{ defListMarker = marker }) + let contents = nest 2 $ vcat $ intersperse divider $ map chomp defs' + return $ labelText <> text marker <> cr <> contents <> cr + +-- | Convert list of Pandoc block elements to asciidoc. +blockListToAsciiDoc :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToAsciiDoc opts blocks = cat `fmap` mapM (blockToAsciiDoc opts) blocks + +-- | Convert list of Pandoc inline elements to asciidoc. +inlineListToAsciiDoc :: WriterOptions -> [Inline] -> State WriterState Doc +inlineListToAsciiDoc opts lst = + mapM (inlineToAsciiDoc opts) lst >>= return . cat + +-- | Convert Pandoc inline element to asciidoc. +inlineToAsciiDoc :: WriterOptions -> Inline -> State WriterState Doc +inlineToAsciiDoc opts (Emph lst) = do + contents <- inlineListToAsciiDoc opts lst + return $ "_" <> contents <> "_" +inlineToAsciiDoc opts (Strong lst) = do + contents <- inlineListToAsciiDoc opts lst + return $ "*" <> contents <> "*" +inlineToAsciiDoc opts (Strikeout lst) = do + contents <- inlineListToAsciiDoc opts lst + return $ "[line-through]*" <> contents <> "*" +inlineToAsciiDoc opts (Superscript lst) = do + contents <- inlineListToAsciiDoc opts lst + return $ "^" <> contents <> "^" +inlineToAsciiDoc opts (Subscript lst) = do + contents <- inlineListToAsciiDoc opts lst + return $ "~" <> contents <> "~" +inlineToAsciiDoc opts (SmallCaps lst) = inlineListToAsciiDoc opts lst +inlineToAsciiDoc opts (Quoted SingleQuote lst) = do + contents <- inlineListToAsciiDoc opts lst + return $ "`" <> contents <> "'" +inlineToAsciiDoc opts (Quoted DoubleQuote lst) = do + contents <- inlineListToAsciiDoc opts lst + return $ "``" <> contents <> "''" +inlineToAsciiDoc _ (Code _ str) = return $ + text "`" <> text (escapeStringUsing (backslashEscapes "`") str) <> "`" +inlineToAsciiDoc _ (Str str) = return $ text $ escapeString str +inlineToAsciiDoc _ (Math InlineMath str) = + return $ "latexmath:[$" <> text str <> "$]" +inlineToAsciiDoc _ (Math DisplayMath str) = + return $ "latexmath:[\\[" <> text str <> "\\]]" +inlineToAsciiDoc _ (RawInline _ _) = return empty +inlineToAsciiDoc _ (LineBreak) = return $ " +" <> cr +inlineToAsciiDoc _ Space = return space +inlineToAsciiDoc opts (Cite _ lst) = inlineListToAsciiDoc opts lst +inlineToAsciiDoc opts (Link txt (src, _tit)) = do +-- relative: link:downloads/foo.zip[download foo.zip] +-- abs: http://google.cod[Google] +-- or my@email.com[email john] + linktext <- inlineListToAsciiDoc opts txt + let isRelative = ':' `notElem` src + let prefix = if isRelative + then text "link:" + else empty + let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src + let useAuto = case txt of + [Code _ s] | s == srcSuffix -> True + _ -> False + return $ if useAuto + then text srcSuffix + else prefix <> text src <> "[" <> linktext <> "]" +inlineToAsciiDoc opts (Image alternate (src, tit)) = do +-- image:images/logo.png[Company logo, title="blah"] + let txt = if (null alternate) || (alternate == [Str ""]) + then [Str "image"] + else alternate + linktext <- inlineListToAsciiDoc opts txt + let linktitle = if null tit + then empty + else text $ ",title=\"" ++ tit ++ "\"" + return $ "image:" <> text src <> "[" <> linktext <> linktitle <> "]" +inlineToAsciiDoc opts (Note [Para inlines]) = + inlineToAsciiDoc opts (Note [Plain inlines]) +inlineToAsciiDoc opts (Note [Plain inlines]) = do + contents <- inlineListToAsciiDoc opts inlines + return $ text "footnote:[" <> contents <> "]" +-- asciidoc can't handle blank lines in notes +inlineToAsciiDoc _ (Note _) = return "[multiblock footnote omitted]" diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 763f77d7c..dfdf7a140 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -31,12 +31,13 @@ Conversion of 'Pandoc' format into ConTeXt. module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Generic (queryWith) import Text.Printf ( printf ) import Data.List ( intercalate ) import Control.Monad.State import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate ) -import Network.URI ( isAbsoluteURI, unEscapeString ) +import Network.URI ( isURI, unEscapeString ) data WriterState = WriterState { stNextRef :: Int -- number of next URL reference @@ -68,13 +69,14 @@ pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do datetext <- if null date then return "" else liftM (render colwidth) $ inlineListToConTeXt date - body <- blockListToConTeXt blocks - let main = render colwidth $ body + body <- mapM (elementToConTeXt options) $ hierarchicalize blocks + let main = (render colwidth . vcat) body let context = writerVariables options ++ [ ("toc", if writerTableOfContents options then "yes" else "") , ("body", main) , ("title", titletext) , ("date", datetext) ] ++ + [ ("number-sections", "yes") | writerNumberSections options ] ++ [ ("author", a) | a <- authorstext ] return $ if writerStandalone options then renderTemplate context $ writerTemplate options @@ -101,12 +103,24 @@ escapeCharForConTeXt ch = ']' -> "{]}" '_' -> "\\letterunderscore{}" '\160' -> "~" + '\x2014' -> "---" + '\x2013' -> "--" + '\x2019' -> "'" + '\x2026' -> "\\ldots{}" x -> [x] -- | Escape string for ConTeXt stringToConTeXt :: String -> String stringToConTeXt = concatMap escapeCharForConTeXt +-- | Convert Elements to ConTeXt +elementToConTeXt :: WriterOptions -> Element -> State WriterState Doc +elementToConTeXt _ (Blk block) = blockToConTeXt block +elementToConTeXt opts (Sec level _ id' title' elements) = do + header' <- sectionHeader id' level title' + innerContents <- mapM (elementToConTeXt opts) elements + return $ vcat (header' : innerContents) + -- | Convert Pandoc block element to ConTeXt. blockToConTeXt :: Block -> State WriterState Doc @@ -166,18 +180,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do blockToConTeXt (DefinitionList lst) = liftM vcat $ mapM defListItemToConTeXt lst blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline -blockToConTeXt (Header level lst) = do - contents <- inlineListToConTeXt lst - st <- get - let opts = stOptions st - let base = if writerNumberSections opts then "section" else "subject" - let level' = if writerChapters opts then level - 1 else level - return $ if level' >= 1 && level' <= 5 - then char '\\' <> text (concat (replicate (level' - 1) "sub")) <> - text base <> char '{' <> contents <> char '}' <> blankline - else if level' == 0 - then "\\chapter{" <> contents <> "}" - else contents <> blankline +-- If this is ever executed, provide a default for the reference identifier. +blockToConTeXt (Header level lst) = sectionHeader "" level lst blockToConTeXt (Table caption aligns widths heads rows) = do let colDescriptor colWidth alignment = (case alignment of AlignLeft -> 'l' @@ -213,8 +217,8 @@ defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToConTeXt (term, defs) = do term' <- inlineListToConTeXt term def' <- liftM vsep $ mapM blockListToConTeXt defs - return $ "\\startdescr" <> braces term' $$ nest 2 def' $$ - "\\stopdescr" <> blankline + return $ "\\startdescription" <> braces term' $$ nest 2 def' $$ + "\\stopdescription" <> blankline -- | Convert list of block elements to ConTeXt. blockListToConTeXt :: [Block] -> State WriterState Doc @@ -257,10 +261,6 @@ inlineToConTeXt (Quoted DoubleQuote lst) = do contents <- inlineListToConTeXt lst return $ "\\quotation" <> braces contents inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst -inlineToConTeXt Apostrophe = return $ char '\'' -inlineToConTeXt EmDash = return "---" -inlineToConTeXt EnDash = return "--" -inlineToConTeXt Ellipses = return "\\ldots{}" inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$' @@ -271,23 +271,69 @@ inlineToConTeXt (RawInline "tex" str) = return $ text str inlineToConTeXt (RawInline _ _) = return empty inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr inlineToConTeXt Space = return space -inlineToConTeXt (Link [Code _ str] (src, tit)) = -- since ConTeXt has its own - inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links... -inlineToConTeXt (Link txt (src, _)) = do +-- autolink +inlineToConTeXt (Link [Code _ str] (src, tit)) = inlineToConTeXt (Link + [RawInline "context" "\\hyphenatedurl{", Str str, RawInline "context" "}"] + (src, tit)) +-- Handle HTML-like internal document references to sections +inlineToConTeXt (Link txt (('#' : ref), _)) = do + opts <- gets stOptions + label <- inlineListToConTeXt txt + return $ text "\\in" + <> braces (if writerNumberSections opts + then label <+> text "(\\S" + else label) -- prefix + <> braces (if writerNumberSections opts + then text ")" + else empty) -- suffix + <> brackets (text ref) + +inlineToConTeXt (Link txt (src, _)) = do st <- get let next = stNextRef st put $ st {stNextRef = next + 1} - let ref = show next - label <- inlineListToConTeXt txt - return $ "\\useURL" <> brackets (text ref) <> brackets (text src) <> - brackets empty <> brackets label <> - "\\from" <> brackets (text ref) + let ref = "url" ++ show next + label <- inlineListToConTeXt txt + return $ "\\useURL" + <> brackets (text ref) + <> brackets (text $ escapeStringUsing [('#',"\\#")] src) + <> brackets empty + <> brackets label + <> "\\from" + <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do - let src' = if isAbsoluteURI src + let src' = if isURI src then src else unEscapeString src return $ braces $ "\\externalfigure" <> brackets (text src') inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents - return $ text "\\footnote{" <> - nest 2 contents' <> char '}' + let codeBlock x@(CodeBlock _ _) = [x] + codeBlock _ = [] + let codeBlocks = queryWith codeBlock contents + return $ if null codeBlocks + then text "\\footnote{" <> nest 2 contents' <> char '}' + else text "\\startbuffer " <> nest 2 contents' <> + text "\\stopbuffer\\footnote{\\getbuffer}" + +-- | Craft the section header, inserting the secton reference, if supplied. +sectionHeader :: [Char] + -> Int + -> [Inline] + -> State WriterState Doc +sectionHeader ident hdrLevel lst = do + contents <- inlineListToConTeXt lst + st <- get + let opts = stOptions st + let level' = if writerChapters opts then hdrLevel - 1 else hdrLevel + return $ if level' >= 1 && level' <= 5 + then char '\\' + <> text (concat (replicate (level' - 1) "sub")) + <> text "section" + <> (if (not . null) ident then brackets (text ident) else empty) + <> braces contents + <> blankline + else if level' == 0 + then "\\chapter{" <> contents <> "}" + else contents <> blankline + diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 29c042cf9..1bcf99dcf 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -35,8 +35,11 @@ import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath import Data.List ( isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) -import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.Highlighting ( languages, languagesByExtension ) import Text.Pandoc.Pretty +import Text.TeXMath +import qualified Text.XML.Light as Xml +import Data.Generics (everywhere, mkT) -- | Convert list of authors to a docbook <author> section authorToDocbook :: WriterOptions -> [Inline] -> Doc @@ -69,34 +72,39 @@ writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = then Just $ writerColumns opts else Nothing render' = render colwidth - opts' = if "</book>" `isSuffixOf` + opts' = if "/book>" `isSuffixOf` (removeTrailingSpace $ writerTemplate opts) then opts{ writerChapters = True } else opts - main = render' $ vcat (map (elementToDocbook opts') elements) + startLvl = if writerChapters opts' then 0 else 1 + main = render' $ vcat (map (elementToDocbook opts' startLvl) elements) context = writerVariables opts ++ [ ("body", main) , ("title", render' title) , ("date", render' date) ] ++ - [ ("author", render' a) | a <- authors ] + [ ("author", render' a) | a <- authors ] ++ + [ ("mathml", "yes") | case writerHTMLMathMethod opts of + MathML _ -> True + _ -> False ] in if writerStandalone opts then renderTemplate context $ writerTemplate opts else main -- | Convert an Element to Docbook. -elementToDocbook :: WriterOptions -> Element -> Doc -elementToDocbook opts (Blk block) = blockToDocbook opts block -elementToDocbook opts (Sec _ _num id' title elements) = +elementToDocbook :: WriterOptions -> Int -> Element -> Doc +elementToDocbook opts _ (Blk block) = blockToDocbook opts block +elementToDocbook opts lvl (Sec _ _num id' title elements) = -- Docbook doesn't allow sections with no content, so insert some if needed let elements' = if null elements then [Blk (Para [])] else elements - tag = if writerChapters opts - then "chapter" - else "section" + tag = case lvl of + n | n == 0 -> "chapter" + | n >= 1 && n <= 5 -> "sect" ++ show n + | otherwise -> "simplesect" in inTags True tag [("id",id')] $ inTagsSimple "title" (inlinesToDocbook opts title) $$ - vcat (map (elementToDocbook opts{ writerChapters = False }) elements') + vcat (map (elementToDocbook opts (lvl + 1)) elements') -- | Convert a list of Pandoc blocks to Docbook. blocksToDocbook :: WriterOptions -> [Block] -> Doc @@ -248,14 +256,27 @@ inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" $ inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst -inlineToDocbook _ Apostrophe = char '\'' -inlineToDocbook _ Ellipses = text "…" -inlineToDocbook _ EmDash = text "—" -inlineToDocbook _ EnDash = text "–" inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) -inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str -inlineToDocbook _ (RawInline _ _) = empty +inlineToDocbook opts (Math t str) + | isMathML (writerHTMLMathMethod opts) = + case texMathToMathML dt str of + Right r -> inTagsSimple tagtype + $ text $ Xml.ppcElement conf + $ fixNS + $ removeAttr r + Left _ -> inlinesToDocbook opts + $ readTeXMath str + | otherwise = inlinesToDocbook opts $ readTeXMath str + where (dt, tagtype) = case t of + InlineMath -> (DisplayInline,"inlineequation") + DisplayMath -> (DisplayBlock,"informalequation") + conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP + removeAttr e = e{ Xml.elAttribs = [] } + fixNS' qname = qname{ Xml.qPrefix = Just "mml" } + fixNS = everywhere (mkT fixNS') +inlineToDocbook _ (RawInline f x) | f == "html" || f == "docbook" = text x + | otherwise = empty inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = @@ -280,3 +301,7 @@ inlineToDocbook _ (Image _ (src, tit)) = titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] inlineToDocbook opts (Note contents) = inTagsIndented "footnote" $ blocksToDocbook opts contents + +isMathML :: HTMLMathMethod -> Bool +isMathML (MathML _) = True +isMathML _ = False diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs new file mode 100644 index 000000000..22278be7e --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -0,0 +1,666 @@ +{- +Copyright (C) 2012 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Docx + Copyright : Copyright (C) 2012 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to docx. +-} +module Text.Pandoc.Writers.Docx ( writeDocx ) where +import Data.List ( intercalate ) +import System.FilePath ( (</>) ) +import qualified Data.ByteString.Lazy as B +import qualified Data.Map as M +import Data.ByteString.Lazy.UTF8 ( fromString, toString ) +import Text.Pandoc.UTF8 as UTF8 +import System.IO ( stderr ) +import Codec.Archive.Zip +import Data.Time.Clock.POSIX +import Paths_pandoc ( getDataFileName ) +import Text.Pandoc.Definition +import Text.Pandoc.Generic +import System.Directory +import Text.Pandoc.ImageSize +import Text.Pandoc.Shared hiding (Element) +import Text.Pandoc.Readers.TeXMath +import Text.Pandoc.Highlighting ( highlight ) +import Text.Highlighting.Kate.Types () +import Text.XML.Light +import Text.TeXMath +import Control.Monad.State +import Text.Highlighting.Kate + +data WriterState = WriterState{ + stTextProperties :: [Element] + , stParaProperties :: [Element] + , stFootnotes :: [Element] + , stSectionIds :: [String] + , stExternalLinks :: M.Map String String + , stImages :: M.Map FilePath (String, B.ByteString) + , stListLevel :: Int + , stListMarker :: ListMarker + , stNumStyles :: M.Map ListMarker Int + , stLists :: [ListMarker] + } + +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + stTextProperties = [] + , stParaProperties = [] + , stFootnotes = [] + , stSectionIds = [] + , stExternalLinks = M.empty + , stImages = M.empty + , stListLevel = -1 + , stListMarker = NoMarker + , stNumStyles = M.fromList [(NoMarker, 0)] + , stLists = [NoMarker] + } + +type WS a = StateT WriterState IO a + +showTopElement' :: Element -> String +showTopElement' x = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++ showElement x + +mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode s attrs = + add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) . node (unqual s) + +-- | Produce an Docx file from a Pandoc document. +writeDocx :: Maybe FilePath -- ^ Path specified by --reference-docx + -> WriterOptions -- ^ Writer options + -> Pandoc -- ^ Document to convert + -> IO B.ByteString +writeDocx mbRefDocx opts doc@(Pandoc (Meta tit auths date) _) = do + let datadir = writerUserDataDir opts + refArchive <- liftM toArchive $ + case mbRefDocx of + Just f -> B.readFile f + Nothing -> do + let defaultDocx = getDataFileName "reference.docx" >>= B.readFile + case datadir of + Nothing -> defaultDocx + Just d -> do + exists <- doesFileExist (d </> "reference.docx") + if exists + then B.readFile (d </> "reference.docx") + else defaultDocx + + (newContents, st) <- runStateT (writeOpenXML opts{writerWrapText = False} doc) + defaultWriterState + epochtime <- floor `fmap` getPOSIXTime + let imgs = M.elems $ stImages st + let imgPath ident img = "media/" ++ ident ++ + case imageType img of + Just Png -> ".png" + Just Jpeg -> ".jpeg" + Just Gif -> ".gif" + Nothing -> "" + let toImgRel (ident,img) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",imgPath ident img)] () + let newrels = map toImgRel imgs + let relpath = "word/_rels/document.xml.rels" + let reldoc = case findEntryByPath relpath refArchive >>= + parseXMLDoc . toString . fromEntry of + Just d -> d + Nothing -> error $ relpath ++ "missing in reference docx" + let reldoc' = reldoc{ elContent = elContent reldoc ++ map Elem newrels } + -- create entries for images + let toImageEntry (ident,img) = toEntry ("word/" ++ imgPath ident img) + epochtime img + let imageEntries = map toImageEntry imgs + -- NOW get list of external links and images from this, and do what's needed + let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () + let newrels' = map toLinkRel $ M.toList $ stExternalLinks st + let reldoc'' = reldoc' { elContent = elContent reldoc' ++ map Elem newrels' } + let relEntry = toEntry relpath epochtime $ fromString $ showTopElement' reldoc'' + let contentEntry = toEntry "word/document.xml" epochtime $ fromString $ showTopElement' newContents + -- styles + let newstyles = styleToOpenXml $ writerHighlightStyle opts + let stylepath = "word/styles.xml" + let styledoc = case findEntryByPath stylepath refArchive >>= + parseXMLDoc . toString . fromEntry of + Just d -> d + Nothing -> error $ stylepath ++ "missing in reference docx" + let styledoc' = styledoc{ elContent = elContent styledoc ++ map Elem newstyles } + let styleEntry = toEntry stylepath epochtime $ fromString $ showTopElement' styledoc' + -- construct word/numbering.xml + let numpath = "word/numbering.xml" + let numEntry = toEntry numpath epochtime $ fromString $ showTopElement' + $ mkNumbering (stNumStyles st) (stLists st) + let docPropsPath = "docProps/core.xml" + let docProps = mknode "cp:coreProperties" + [("xmlns:cp","http://schemas.openxmlformats.org/package/2006/metadata/core-properties") + ,("xmlns:dc","http://purl.org/dc/elements/1.1/") + ,("xmlns:dcterms","http://purl.org/dc/terms/") + ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") + ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] + $ mknode "dc:title" [] (stringify tit) + : mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] + (maybe "" id $ normalizeDate $ stringify date) + : mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] () -- put current time here + : map (mknode "dc:creator" [] . stringify) auths + let docPropsEntry = toEntry docPropsPath epochtime $ fromString $ showTopElement' docProps + let relsPath = "_rels/.rels" + rels <- case findEntryByPath relsPath refArchive of + Just e -> return $ toString $ fromEntry e + Nothing -> err 57 "could not find .rels/_rels in reference docx" + -- fix .rels/_rels, which can get screwed up when reference.docx is edited by Word + let rels' = substitute "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" + "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties" + rels + let relsEntry = toEntry relsPath epochtime $ fromString rels' + let archive = foldr addEntryToArchive refArchive $ + relsEntry : contentEntry : relEntry : numEntry : styleEntry : docPropsEntry : imageEntries + return $ fromArchive archive + +styleToOpenXml :: Style -> [Element] +styleToOpenXml style = parStyle : map toStyle alltoktypes + where alltoktypes = enumFromTo KeywordTok NormalTok + toStyle toktype = mknode "w:style" [("w:type","character"), + ("w:customStyle","1"),("w:styleId",show toktype)] + [ mknode "w:name" [("w:val",show toktype)] () + , mknode "w:basedOn" [("w:val","VerbatimChar")] () + , mknode "w:rPr" [] $ + [ mknode "w:color" [("w:val",tokCol toktype)] () + | tokCol toktype /= "auto" ] ++ + [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] () + | tokBg toktype /= "auto" ] ++ + [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++ + [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++ + [ mknode "w:u" [] () | tokFeature tokenUnderline toktype ] + ] + tokStyles = tokenStyles style + tokFeature f toktype = maybe False f $ lookup toktype tokStyles + tokCol toktype = maybe "auto" (drop 1 . fromColor) + $ (tokenColor =<< lookup toktype tokStyles) + `mplus` defaultColor style + tokBg toktype = maybe "auto" (drop 1 . fromColor) + $ (tokenBackground =<< lookup toktype tokStyles) + `mplus` backgroundColor style + parStyle = mknode "w:style" [("w:type","paragraph"), + ("w:customStyle","1"),("w:styleId","SourceCode")] + [ mknode "w:name" [("w:val","Source Code")] () + , mknode "w:basedOn" [("w:val","Normal")] () + , mknode "w:link" [("w:val","VerbatimChar")] () + , mknode "w:pPr" [] + $ mknode "w:wordWrap" [("w:val","off")] () + : ( maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) + $ backgroundColor style ) + ] + +mkNumbering :: M.Map ListMarker Int -> [ListMarker] -> Element +mkNumbering markers lists = + mknode "w:numbering" [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")] + $ map mkAbstractNum (M.toList markers) + ++ zipWith (mkNum markers) lists [1..(length lists)] + +mkNum :: M.Map ListMarker Int -> ListMarker -> Int -> Element +mkNum markers marker numid = + mknode "w:num" [("w:numId",show numid)] + $ mknode "w:abstractNumId" [("w:val",show absnumid)] () + : case marker of + NoMarker -> [] + BulletMarker -> [] + NumberMarker _ _ start -> + map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] + $ mknode "w:startOverride" [("w:val",show start)] ()) [0..6] + where absnumid = maybe 0 id $ M.lookup marker markers + +mkAbstractNum :: (ListMarker,Int) -> Element +mkAbstractNum (marker,numid) = + mknode "w:abstractNum" [("w:abstractNumId",show numid)] + $ mknode "w:multiLevelType" [("w:val","multilevel")] () + : map (mkLvl marker) [0..6] + +mkLvl :: ListMarker -> Int -> Element +mkLvl marker lvl = + mknode "w:lvl" [("w:ilvl",show lvl)] $ + [ mknode "w:start" [("w:val",start)] () + | marker /= NoMarker && marker /= BulletMarker ] ++ + [ mknode "w:numFmt" [("w:val",fmt)] () + , mknode "w:lvlText" [("w:val",lvltxt)] () + , mknode "w:lvlJc" [("w:val","left")] () + , mknode "w:pPr" [] + [ mknode "w:tabs" [] + $ mknode "w:tab" [("w:val","num"),("w:pos",show $ lvl * step)] () + , mknode "w:ind" [("w:left",show $ lvl * step + hang),("w:hanging",show hang)] () + ] + ] + where (fmt, lvltxt, start) = + case marker of + NoMarker -> ("bullet"," ","1") + BulletMarker -> ("bullet",bulletFor lvl,"1") + NumberMarker st de n -> (styleFor st lvl + ,patternFor de ("%" ++ show (lvl + 1)) + ,show n) + step = 720 + hang = 480 + bulletFor 0 = "\8226" + bulletFor 1 = "\9702" + bulletFor 2 = "\8227" + bulletFor 3 = "\8259" + bulletFor 4 = "\8226" + bulletFor 5 = "\9702" + bulletFor _ = "\8227" + styleFor UpperAlpha _ = "upperLetter" + styleFor LowerAlpha _ = "lowerLetter" + styleFor UpperRoman _ = "upperRoman" + styleFor LowerRoman _ = "lowerRoman" + styleFor Decimal _ = "decimal" + styleFor DefaultStyle 1 = "decimal" + styleFor DefaultStyle 2 = "lowerLetter" + styleFor DefaultStyle 3 = "lowerRoman" + styleFor DefaultStyle 4 = "decimal" + styleFor DefaultStyle 5 = "lowerLetter" + styleFor DefaultStyle 6 = "lowerRoman" + styleFor _ _ = "decimal" + patternFor OneParen s = s ++ ")" + patternFor TwoParens s = "(" ++ s ++ ")" + patternFor _ s = s ++ "." + +-- | Convert Pandoc document to string in OpenXML format. +writeOpenXML :: WriterOptions -> Pandoc -> WS Element +writeOpenXML opts (Pandoc (Meta tit auths dat) blocks) = do + title <- withParaProp (pStyle "Title") $ blocksToOpenXML opts [Para tit | not (null tit)] + authors <- withParaProp (pStyle "Authors") $ blocksToOpenXML opts + [Para (intercalate [LineBreak] auths) | not (null auths)] + date <- withParaProp (pStyle "Date") $ blocksToOpenXML opts [Para dat | not (null dat)] + let convertSpace (Str x : Space : Str y : xs) = Str (x ++ " " ++ y) : xs + convertSpace (Str x : Str y : xs) = Str (x ++ y) : xs + convertSpace xs = xs + let blocks' = bottomUp convertSpace $ blocks + doc <- blocksToOpenXML opts blocks' + notes' <- reverse `fmap` gets stFootnotes + let notes = case notes' of + [] -> [] + ns -> [mknode "w:footnotes" [] ns] + let meta = title ++ authors ++ date + return $ mknode "w:document" + [("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main") + ,("xmlns:m","http://schemas.openxmlformats.org/officeDocument/2006/math") + ,("xmlns:r","http://schemas.openxmlformats.org/officeDocument/2006/relationships") + ,("xmlns:o","urn:schemas-microsoft-com:office:office") + ,("xmlns:v","urn:schemas-microsoft-com:vml") + ,("xmlns:w10","urn:schemas-microsoft-com:office:word") + ,("xmlns:a","http://schemas.openxmlformats.org/drawingml/2006/main") + ,("xmlns:pic","http://schemas.openxmlformats.org/drawingml/2006/picture") + ,("xmlns:wp","http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing")] + $ mknode "w:body" [] (meta ++ doc ++ notes) + +-- | Convert a list of Pandoc blocks to OpenXML. +blocksToOpenXML :: WriterOptions -> [Block] -> WS [Element] +blocksToOpenXML opts bls = concat `fmap` mapM (blockToOpenXML opts) bls + +pStyle :: String -> Element +pStyle sty = mknode "w:pStyle" [("w:val",sty)] () + +rStyle :: String -> Element +rStyle sty = mknode "w:rStyle" [("w:val",sty)] () + +-- | Convert a Pandoc block element to OpenXML. +blockToOpenXML :: WriterOptions -> Block -> WS [Element] +blockToOpenXML _ Null = return [] +blockToOpenXML opts (Header lev lst) = do + contents <- withParaProp (pStyle $ "Heading" ++ show lev) $ + blockToOpenXML opts (Para lst) + usedIdents <- gets stSectionIds + let ident = uniqueIdent lst usedIdents + modify $ \s -> s{ stSectionIds = ident : stSectionIds s } + let bookmarkStart = mknode "w:bookmarkStart" [("w:id",ident) + ,("w:name",ident)] () + let bookmarkEnd = mknode "w:bookmarkEnd" [("w:id",ident)] () + return $ [bookmarkStart] ++ contents ++ [bookmarkEnd] +blockToOpenXML opts (Plain lst) = blockToOpenXML opts (Para lst) +blockToOpenXML opts (Para x@[Image alt _]) = do + paraProps <- getParaProps + contents <- inlinesToOpenXML opts x + captionNode <- withParaProp (pStyle "ImageCaption") + $ blockToOpenXML opts (Para alt) + return $ mknode "w:p" [] (paraProps ++ contents) : captionNode +blockToOpenXML opts (Para lst) = do + paraProps <- getParaProps + contents <- inlinesToOpenXML opts lst + return [mknode "w:p" [] (paraProps ++ contents)] +blockToOpenXML _ (RawBlock format str) + | format == "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] +blockToOpenXML opts (BlockQuote blocks) = + withParaProp (pStyle "BlockQuote") $ blocksToOpenXML opts blocks +blockToOpenXML opts (CodeBlock attrs str) = + withParaProp (pStyle "SourceCode") $ blockToOpenXML opts $ Para [Code attrs str] +blockToOpenXML _ HorizontalRule = return [ + mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] + $ mknode "v:rect" [("style","width:0;height:1.5pt"), + ("o:hralign","center"), + ("o:hrstd","t"),("o:hr","t")] () ] +blockToOpenXML opts (Table caption aligns widths headers rows) = do + let captionStr = stringify caption + caption' <- if null caption + then return [] + else withParaProp (pStyle "TableCaption") + $ blockToOpenXML opts (Para caption) + let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () + let cellToOpenXML (al, cell) = withParaProp (alignmentFor al) + $ blocksToOpenXML opts cell + headers' <- mapM cellToOpenXML $ zip aligns headers + rows' <- mapM (\cells -> mapM cellToOpenXML $ zip aligns cells) + $ rows + let borderProps = mknode "w:tcPr" [] + [ mknode "w:tcBorders" [] + $ mknode "w:bottom" [("w:val","single")] () + , mknode "w:vAlign" [("w:val","bottom")] () ] + let mkcell border contents = mknode "w:tc" [] + $ [ borderProps | border ] ++ + if null contents + then [mknode "w:p" [] ()] + else contents + let mkrow border cells = mknode "w:tr" [] $ map (mkcell border) cells + let textwidth = 7920 -- 5.5 in in twips, 1/20 pt + let mkgridcol w = mknode "w:gridCol" + [("w:w", show $ (floor (textwidth * w) :: Integer))] () + return $ + [ mknode "w:tbl" [] + ( mknode "w:tblPr" [] + [ mknode "w:tblCaption" [("w:val", captionStr)] () + | not (null caption) ] + : mknode "w:tblGrid" [] + (if all (==0) widths + then [] + else map mkgridcol widths) + : [ mkrow True headers' | not (all null headers) ] ++ + map (mkrow False) rows' + ) + ] ++ caption' +blockToOpenXML opts (BulletList lst) = do + let marker = BulletMarker + addList marker + asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst +blockToOpenXML opts (OrderedList (start, numstyle, numdelim) lst) = do + let marker = NumberMarker numstyle numdelim start + addList marker + asList $ concat `fmap` mapM (listItemToOpenXML opts marker) lst +blockToOpenXML opts (DefinitionList items) = + concat `fmap` mapM (definitionListItemToOpenXML opts) items + +definitionListItemToOpenXML :: WriterOptions -> ([Inline],[[Block]]) -> WS [Element] +definitionListItemToOpenXML opts (term,defs) = do + term' <- withParaProp (pStyle "DefinitionTerm") + $ blockToOpenXML opts (Para term) + defs' <- withParaProp (pStyle "Definition") + $ concat `fmap` mapM (blocksToOpenXML opts) defs + return $ term' ++ defs' + +getNumId :: WS Int +getNumId = length `fmap` gets stLists + +addList :: ListMarker -> WS () +addList marker = do + lists <- gets stLists + modify $ \st -> st{ stLists = lists ++ [marker] } + numStyles <- gets stNumStyles + case M.lookup marker numStyles of + Just _ -> return () + Nothing -> modify $ \st -> + st{ stNumStyles = M.insert marker (M.size numStyles + 1) numStyles } + +listItemToOpenXML :: WriterOptions -> ListMarker -> [Block] -> WS [Element] +listItemToOpenXML _ _ [] = return [] +listItemToOpenXML opts marker (first:rest) = do + first' <- withMarker marker $ blockToOpenXML opts first + rest' <- withMarker NoMarker $ blocksToOpenXML opts rest + return $ first' ++ rest' + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +-- | Convert a list of inline elements to OpenXML. +inlinesToOpenXML :: WriterOptions -> [Inline] -> WS [Element] +inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst + +withMarker :: ListMarker -> WS a -> WS a +withMarker m p = do + origMarker <- gets stListMarker + modify $ \st -> st{ stListMarker = m } + result <- p + modify $ \st -> st{ stListMarker = origMarker } + return result + +asList :: WS a -> WS a +asList p = do + origListLevel <- gets stListLevel + modify $ \st -> st{ stListLevel = stListLevel st + 1 } + result <- p + modify $ \st -> st{ stListLevel = origListLevel } + return result + +getTextProps :: WS [Element] +getTextProps = do + props <- gets stTextProperties + return $ if null props + then [] + else [mknode "w:rPr" [] $ props] + +pushTextProp :: Element -> WS () +pushTextProp d = modify $ \s -> s{ stTextProperties = d : stTextProperties s } + +popTextProp :: WS () +popTextProp = modify $ \s -> s{ stTextProperties = drop 1 $ stTextProperties s } + +withTextProp :: Element -> WS a -> WS a +withTextProp d p = do + pushTextProp d + res <- p + popTextProp + return res + +getParaProps :: WS [Element] +getParaProps = do + props <- gets stParaProperties + listLevel <- gets stListLevel + numid <- getNumId + let listPr = if listLevel >= 0 + then [ mknode "w:numPr" [] + [ mknode "w:numId" [("w:val",show numid)] () + , mknode "w:ilvl" [("w:val",show listLevel)] () ] + ] + else [] + return $ case props ++ listPr of + [] -> [] + ps -> [mknode "w:pPr" [] ps] + +pushParaProp :: Element -> WS () +pushParaProp d = modify $ \s -> s{ stParaProperties = d : stParaProperties s } + +popParaProp :: WS () +popParaProp = modify $ \s -> s{ stParaProperties = drop 1 $ stParaProperties s } + +withParaProp :: Element -> WS a -> WS a +withParaProp d p = do + pushParaProp d + res <- p + popParaProp + return res + +formattedString :: String -> WS [Element] +formattedString str = do + props <- getTextProps + return [ mknode "w:r" [] $ + props ++ + [ mknode "w:t" [("xml:space","preserve")] str ] ] + +-- | Convert an inline element to OpenXML. +inlineToOpenXML :: WriterOptions -> Inline -> WS [Element] +inlineToOpenXML _ (Str str) = formattedString str +inlineToOpenXML opts Space = inlineToOpenXML opts (Str " ") +inlineToOpenXML opts (Strong lst) = + withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Emph lst) = + withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Subscript lst) = + withTextProp (mknode "w:vertAlign" [("w:val","subscript")] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Superscript lst) = + withTextProp (mknode "w:vertAlign" [("w:val","superscript")] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML opts (SmallCaps lst) = + withTextProp (mknode "w:smallCaps" [] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML opts (Strikeout lst) = + withTextProp (mknode "w:strike" [] ()) + $ inlinesToOpenXML opts lst +inlineToOpenXML _ LineBreak = return [ mknode "w:br" [] () ] +inlineToOpenXML _ (RawInline f str) + | f == "openxml" = return [ x | Elem x <- parseXML str ] + | otherwise = return [] +inlineToOpenXML opts (Quoted quoteType lst) = + inlinesToOpenXML opts $ [Str open] ++ lst ++ [Str close] + where (open, close) = case quoteType of + SingleQuote -> ("\x2018", "\x2019") + DoubleQuote -> ("\x201C", "\x201D") +inlineToOpenXML opts (Math InlineMath str) = + case texMathToOMML DisplayInline str of + Right r -> return [r] + Left _ -> inlinesToOpenXML opts (readTeXMath str) +inlineToOpenXML opts (Math DisplayMath str) = + case texMathToOMML DisplayBlock str of + Right r -> return [br, r, br] + Left _ -> do + fallback <- inlinesToOpenXML opts (readTeXMath str) + return $ [br] ++ fallback ++ [br] + where br = mknode "w:br" [] () +inlineToOpenXML opts (Cite _ lst) = inlinesToOpenXML opts lst +inlineToOpenXML _ (Code attrs str) = + withTextProp (rStyle "VerbatimChar") + $ case highlight formatOpenXML attrs str of + Nothing -> intercalate [mknode "w:br" [] ()] + `fmap` (mapM formattedString $ lines str) + Just h -> return h + where formatOpenXML _fmtOpts = intercalate [mknode "w:br" [] ()] . + map (map toHlTok) + toHlTok (toktype,tok) = mknode "w:r" [] + [ mknode "w:rPr" [] + [ rStyle $ show toktype ] + , mknode "w:t" [("xml:space","preserve")] tok ] +inlineToOpenXML opts (Note bs) = do + notes <- gets stFootnotes + let notenum = length notes + 1 + let notemarker = mknode "w:r" [] + [ mknode "w:rPr" [] (rStyle "FootnoteReference") + , mknode "w:footnoteRef" [] () ] + let notemarkerXml = RawInline "openxml" $ ppElement notemarker + let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : ils) : xs + insertNoteRef (Para ils : xs) = Para (notemarkerXml : ils) : xs + insertNoteRef xs = Para [notemarkerXml] : xs + oldListLevel <- gets stListLevel + oldParaProperties <- gets stParaProperties + oldTextProperties <- gets stTextProperties + modify $ \st -> st{ stListLevel = -1, stParaProperties = [], stTextProperties = [] } + contents <- withParaProp (pStyle "FootnoteText") $ blocksToOpenXML opts + $ insertNoteRef bs + modify $ \st -> st{ stListLevel = oldListLevel, stParaProperties = oldParaProperties, + stTextProperties = oldTextProperties } + let newnote = mknode "w:footnote" [("w:id",show notenum)] $ contents + modify $ \s -> s{ stFootnotes = newnote : notes } + return [ mknode "w:r" [] + [ mknode "w:rPr" [] (rStyle "FootnoteReference") + , mknode "w:footnoteReference" [("w:id", show notenum)] () ] ] +-- internal link: +inlineToOpenXML opts (Link txt ('#':xs,_)) = do + contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt + return [ mknode "w:hyperlink" [("w:anchor",xs)] contents ] +-- external link: +inlineToOpenXML opts (Link txt (src,_)) = do + contents <- withTextProp (rStyle "Hyperlink") $ inlinesToOpenXML opts txt + extlinks <- gets stExternalLinks + ind <- case M.lookup src extlinks of + Just i -> return i + Nothing -> do + let i = "link" ++ show (M.size extlinks) + modify $ \st -> st{ stExternalLinks = + M.insert src i extlinks } + return i + return [ mknode "w:hyperlink" [("r:id",ind)] contents ] +inlineToOpenXML opts (Image alt (src, tit)) = do + exists <- liftIO $ doesFileExist src + if exists + then do + imgs <- gets stImages + (ident,size) <- case M.lookup src imgs of + Just (i,img) -> return (i, imageSize img) + Nothing -> do + img <- liftIO $ B.readFile src + let ident' = "image" ++ show (M.size imgs + 1) + let size' = imageSize img + modify $ \st -> st{ + stImages = M.insert src (ident',img) $ stImages st } + return (ident',size') + let (xpt,ypt) = maybe (120,120) sizeInPoints size + -- 12700 emu = 1 pt + let (xemu,yemu) = (xpt * 12700, ypt * 12700) + let cNvPicPr = mknode "pic:cNvPicPr" [] $ + mknode "a:picLocks" [("noChangeArrowheads","1"),("noChangeAspect","1")] () + let nvPicPr = mknode "pic:nvPicPr" [] + [ mknode "pic:cNvPr" + [("descr",src),("id","0"),("name","Picture")] () + , cNvPicPr ] + let blipFill = mknode "pic:blipFill" [] + [ mknode "a:blip" [("r:embed",ident)] () + , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] + let xfrm = mknode "a:xfrm" [] + [ mknode "a:off" [("x","0"),("y","0")] () + , mknode "a:ext" [("cx",show xemu),("cy",show yemu)] () ] + let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ + mknode "a:avLst" [] () + let ln = mknode "a:ln" [("w","9525")] + [ mknode "a:noFill" [] () + , mknode "a:headEnd" [] () + , mknode "a:tailEnd" [] () ] + let spPr = mknode "pic:spPr" [("bwMode","auto")] + [xfrm, prstGeom, mknode "a:noFill" [] (), ln] + let graphic = mknode "a:graphic" [] $ + mknode "a:graphicData" [("uri","http://schemas.openxmlformats.org/drawingml/2006/picture")] + [ mknode "pic:pic" [] + [ nvPicPr + , blipFill + , spPr ] ] + return [ mknode "w:r" [] $ + mknode "w:drawing" [] $ + mknode "wp:inline" [] + [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () + , mknode "wp:docPr" [("descr",tit),("id","1"),("name","Picture")] () + , graphic ] ] + else do + liftIO $ UTF8.hPutStrLn stderr $ + "Could not find image `" ++ src ++ "', skipping..." + inlinesToOpenXML opts alt diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 9fc393fed..67048348e 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -32,30 +32,31 @@ import Data.IORef import Data.Maybe ( fromMaybe, isNothing ) import Data.List ( findIndices, isPrefixOf ) import System.Environment ( getEnv ) -import System.FilePath ( (</>), takeBaseName, takeExtension ) +import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension, takeFileName ) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip -import System.Time +import Data.Time.Clock.POSIX import Text.Pandoc.Shared hiding ( Element ) import Text.Pandoc.Definition import Text.Pandoc.Generic -import Control.Monad (liftM) +import Control.Monad.State import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower ) -import System.Directory ( copyFile ) import Network.URI ( unEscapeString ) +import Text.Pandoc.MIME (getMimeType) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line + -> [FilePath] -- ^ Paths to fonts to embed -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString -writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do - (TOD epochtime _) <- getClockTime +writeEPUB mbStylesheet fonts opts doc@(Pandoc meta _) = do + epochtime <- floor `fmap` getPOSIXTime let mkEntry path content = toEntry path epochtime content let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerStandalone = True @@ -64,17 +65,24 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do let vars = writerVariables opts' let mbCoverImage = lookup "epub-cover-image" vars + titlePageTemplate <- readDataFile (writerUserDataDir opts) + $ "templates" </> "epub-titlepage" <.> "html" + + coverImageTemplate <- readDataFile (writerUserDataDir opts) + $ "templates" </> "epub-coverimage" <.> "html" + + pageTemplate <- readDataFile (writerUserDataDir opts) + $ "templates" </> "epub-page" <.> "html" + -- cover page (cpgEntry, cpicEntry) <- case mbCoverImage of Nothing -> return ([],[]) Just img -> do let coverImage = "cover-image" ++ takeExtension img - copyFile img coverImage let cpContent = fromString $ writeHtmlString - opts'{writerTemplate = pageTemplate - ,writerVariables = - ("coverimage",coverImage):vars} + opts'{writerTemplate = coverImageTemplate, + writerVariables = ("coverimage",coverImage):vars} (Pandoc meta []) imgContent <- B.readFile img return ( [mkEntry "cover.xhtml" cpContent] @@ -82,37 +90,47 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do -- title page let tpContent = fromString $ writeHtmlString - opts'{writerTemplate = pageTemplate - ,writerVariables = ("titlepage","yes"):vars} + opts'{writerTemplate = titlePageTemplate} (Pandoc meta []) let tpEntry = mkEntry "title_page.xhtml" tpContent -- handle pictures picsRef <- newIORef [] - Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM + Pandoc _ blocks <- bottomUpM (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc pics <- readIORef picsRef let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e -> return e{ eRelativePath = newsrc } picEntries <- mapM readPicEntry pics + -- handle fonts + let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f + fontEntries <- mapM mkFontEntry fonts + -- body pages let isH1 (Header 1 _) = True isH1 _ = False - let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks - let chunks = splitByIndices h1Indices blocks + -- internal reference IDs change when we chunk the file, + -- so the next two lines fix that: + let reftable = correlateRefs blocks + let blocks' = replaceRefs reftable blocks + let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks' + let chunks = splitByIndices h1Indices blocks' let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys titleize xs = Pandoc meta xs - let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate } let chapters = map titleize chunks + let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate } let chapterToEntry :: Int -> Pandoc -> Entry chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ fromString $ chapToHtml chap let chapterEntries = zipWith chapterToEntry [1..] chapters -- contents.opf - lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang") - (\_ -> return "en-US") + localeLang <- catch (liftM (takeWhile (/='.')) $ getEnv "LANG") + (\_ -> return "en-US") + let lang = case lookup "lang" (writerVariables opts') of + Just x -> x + Nothing -> localeLang uuid <- getRandomUUID let chapterNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), @@ -125,17 +143,22 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ("href", eRelativePath ent), ("media-type", fromMaybe "application/octet-stream" $ imageTypeOf $ eRelativePath ent)] $ () + let fontNode ent = unode "item" ! + [("id", takeBaseName $ eRelativePath ent), + ("href", eRelativePath ent), + ("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ () let plainify t = removeTrailingSpace $ writePlain opts'{ writerStandalone = False } $ Pandoc meta [Plain t] let plainTitle = plainify $ docTitle meta let plainAuthors = map plainify $ docAuthors meta + let plainDate = maybe "" id $ normalizeDate $ stringify $ docDate meta let contentsData = fromString $ ppTopElement $ unode "package" ! [("version","2.0") ,("xmlns","http://www.idpf.org/2007/opf") ,("unique-identifier","BookId")] $ [ metadataElement (writerEPUBMetadata opts') - uuid lang plainTitle plainAuthors mbCoverImage + uuid lang plainTitle plainAuthors plainDate mbCoverImage , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () @@ -143,7 +166,8 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ,("media-type","text/css")] $ () ] ++ map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ - map pictureNode (cpicEntry ++ picEntries) + map pictureNode (cpicEntry ++ picEntries) ++ + map fontNode fontEntries , unode "spine" ! [("toc","ncx")] $ case mbCoverImage of Nothing -> [] @@ -197,6 +221,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do ,("media-type","application/oebps-package+xml")] $ () let containerEntry = mkEntry "META-INF/container.xml" containerData + -- com.apple.ibooks.display-options.xml + let apple = fromString $ ppTopElement $ + unode "display_options" $ + unode "platform" ! [("name","*")] $ + unode "option" ! [("name","specified-fonts")] $ "true" + let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple + -- stylesheet stylesheet <- case mbStylesheet of Just s -> return s @@ -205,13 +236,13 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do -- construct archive let archive = foldr addEntryToArchive emptyArchive - (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : + (mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry : contentsEntry : tocEntry : - (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries) ) + (picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries) ) return $ fromArchive archive -metadataElement :: String -> UUID -> String -> String -> [String] -> Maybe a -> Element -metadataElement metadataXML uuid lang title authors mbCoverImage = +metadataElement :: String -> UUID -> String -> String -> [String] -> String -> Maybe a -> Element +metadataElement metadataXML uuid lang title authors date mbCoverImage = let userNodes = parseXML metadataXML elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ @@ -227,6 +258,7 @@ metadataElement metadataXML uuid lang title authors mbCoverImage = [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | not (elt `contains` "identifier") ] ++ [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] ++ + [ unode "dc:date" date | not (elt `contains` "date") ] ++ [ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () | not (isNothing mbCoverImage) ] in elt{ elContent = elContent elt ++ map Elem newNodes } @@ -263,20 +295,24 @@ transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do "</ops:switch>" result = if "<math" `isPrefixOf` mathml then inOps else mathml return $ RawInline "html" result : xs -transformInlines _ _ _ (RawInline _ _ : xs) = return $ Str "" : xs -transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs transformInlines _ _ _ xs = return xs -transformBlock :: Block -> Block -transformBlock (RawBlock _ _) = Null -transformBlock x = x - (!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) -- | Version of 'ppTopElement' that specifies UTF-8 encoding. ppTopElement :: Element -> String -ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . ppElement +ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement + -- unEntity removes numeric entities introduced by ppElement + -- (kindlegen seems to choke on these). + where unEntity [] = "" + unEntity ('&':'#':xs) = + let (ds,ys) = break (==';') xs + rest = drop 1 ys + in case reads ('\'':'\\':ds ++ "'") of + ((x,_):_) -> x : unEntity rest + _ -> '&':'#':unEntity xs + unEntity (x:xs) = x : unEntity xs imageTypeOf :: FilePath -> Maybe String imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of @@ -288,38 +324,49 @@ imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of "svg" -> Just "image/svg+xml" _ -> Nothing -pageTemplate :: String -pageTemplate = unlines - [ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" - , "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">" - , "<html xmlns=\"http://www.w3.org/1999/xhtml\">" - , "<head>" - , "<title>$title$</title>" - , "$if(coverimage)$" - , "<style type=\"text/css\">img{ max-width: 100%; }</style>" - , "$endif$" - , "<link href=\"stylesheet.css\" type=\"text/css\" rel=\"stylesheet\" />" - , "</head>" - , "<body>" - , "$if(coverimage)$" - , "<div id=\"cover-image\">" - , "<img src=\"$coverimage$\" alt=\"$title$\" />" - , "</div>" - , "$else$" - , "$if(titlepage)$" - , "<h1 class=\"title\">$title$</h1>" - , "$for(author)$" - , "<h2 class=\"author\">$author$</h2>" - , "$endfor$" - , "$else$" - , "<h1>$title$</h1>" - , "$if(toc)$" - , "$toc$" - , "$endif$" - , "$endif$" - , "$body$" - , "$endif$" - , "</body>" - , "</html>" - ] +data IdentState = IdentState{ + chapterNumber :: Int, + runningIdents :: [String], + chapterIdents :: [String], + identTable :: [(String,String)] + } deriving (Read, Show) + +-- Go through a block list and construct a table +-- correlating the automatically constructed references +-- that would be used in a normal pandoc document with +-- new URLs to be used in the EPUB. For example, what +-- was "header-1" might turn into "ch6.xhtml#header". +correlateRefs :: [Block] -> [(String,String)] +correlateRefs bs = identTable $ execState (mapM_ go bs) + IdentState{ chapterNumber = 0 + , runningIdents = [] + , chapterIdents = [] + , identTable = [] } + where go :: Block -> State IdentState () + go (Header n ils) = do + when (n == 1) $ + modify $ \s -> s{ chapterNumber = chapterNumber s + 1 + , chapterIdents = [] } + st <- get + let runningid = uniqueIdent ils (runningIdents st) + let chapid = if n == 1 + then Nothing + else Just $ uniqueIdent ils (chapterIdents st) + modify $ \s -> s{ runningIdents = runningid : runningIdents st + , chapterIdents = maybe (chapterIdents st) + (: chapterIdents st) chapid + , identTable = (runningid, "ch" ++ show (chapterNumber st) ++ + ".xhtml" ++ maybe "" ('#':) chapid) : identTable st + } + go _ = return () + +-- Replace internal link references using the table produced +-- by correlateRefs. +replaceRefs :: [(String,String)] -> [Block] -> [Block] +replaceRefs refTable = bottomUp replaceOneRef + where replaceOneRef x@(Link lab ('#':xs,tit)) = + case lookup xs refTable of + Just url -> Link lab (url,tit) + Nothing -> x + replaceOneRef x = x diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 573adbf4a..f35b29370 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-deprecations #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -30,168 +31,186 @@ Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared import Text.Pandoc.Templates +import Text.Pandoc.Generic import Text.Pandoc.Readers.TeXMath -import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss ) -import Text.Pandoc.XML (stripTags, escapeStringForXML) +import Text.Pandoc.Slides +import Text.Pandoc.Highlighting ( highlight, styleToCss, + formatHtmlInline, formatHtmlBlock ) +import Text.Pandoc.XML (stripTags, escapeStringForXML, fromEntities) import Network.HTTP ( urlEncode ) import Numeric ( showHex ) import Data.Char ( ord, toLower ) import Data.List ( isPrefixOf, intersperse ) +import Data.String ( fromString ) import Data.Maybe ( catMaybes ) import Control.Monad.State -import Text.XHtml.Transitional hiding ( stringToHtml, unordList, ordList ) -import qualified Text.XHtml.Transitional as XHtml +import Text.Blaze +import qualified Text.Blaze.Html5 as H5 +import qualified Text.Blaze.XHtml1.Transitional as H +import qualified Text.Blaze.XHtml1.Transitional.Attributes as A +import Text.Blaze.Renderer.String (renderHtml) import Text.TeXMath import Text.XML.Light.Output import System.FilePath (takeExtension) +import Data.Monoid (mempty, mconcat) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes , stMath :: Bool -- ^ Math is used in document , stHighlighting :: Bool -- ^ Syntax highlighting is used , stSecNum :: [Int] -- ^ Number of current section - } deriving Show + } defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = False, stSecNum = []} -- Helpers to render HTML with the appropriate function. --- | Modified version of Text.XHtml's stringToHtml. --- Use unicode characters wherever possible. -stringToHtml :: WriterOptions -> String -> Html -stringToHtml opts = if writerAscii opts - then XHtml.stringToHtml - else primHtml . escapeStringForXML +strToHtml :: String -> Html +strToHtml = preEscapedString . escapeStringForXML +-- strToHtml = toHtml -- | Hard linebreak. nl :: WriterOptions -> Html nl opts = if writerWrapText opts - then primHtml "\n" - else noHtml + then preEscapedString "\n" + else mempty -- | Convert Pandoc document to Html string. writeHtmlString :: WriterOptions -> Pandoc -> String writeHtmlString opts d = - let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) - defaultWriterState + let (tit, auths, authsMeta, date, toc, body', newvars) = evalState (pandocToHtml opts d) + defaultWriterState in if writerStandalone opts - then inTemplate opts tit auths date toc body' newvars - else dropWhile (=='\n') $ showHtmlFragment body' + then inTemplate opts tit auths authsMeta date toc body' newvars + else renderHtml body' -- | Convert Pandoc document to Html structure. writeHtml :: WriterOptions -> Pandoc -> Html writeHtml opts d = - let (tit, auths, date, toc, body', newvars) = evalState (pandocToHtml opts d) - defaultWriterState + let (tit, auths, authsMeta, date, toc, body', newvars) = evalState (pandocToHtml opts d) + defaultWriterState in if writerStandalone opts - then inTemplate opts tit auths date toc body' newvars + then inTemplate opts tit auths authsMeta date toc body' newvars else body' -- result is (title, authors, date, toc, body, new variables) pandocToHtml :: WriterOptions -> Pandoc - -> State WriterState (Html, [Html], Html, Maybe Html, Html, [(String,String)]) + -> State WriterState (Html, [Html], [Html], Html, Maybe Html, Html, [(String,String)]) pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do let standalone = writerStandalone opts tit <- if standalone then inlineListToHtml opts title' - else return noHtml + else return mempty auths <- if standalone then mapM (inlineListToHtml opts) authors' else return [] + authsMeta <- if standalone + then mapM (inlineListToHtml opts . prepForMeta) authors' + else return [] date <- if standalone then inlineListToHtml opts date' - else return noHtml + else return mempty + let slideLevel = maybe (getSlideLevel blocks) id $ writerSlideLevel opts let sects = hierarchicalize $ if writerSlideVariant opts == NoSlides then blocks - else case blocks of - (Header 1 _ : _) -> blocks - _ -> - let isL1 (Header 1 _) = True - isL1 _ = False - (preBlocks, rest) = break isL1 blocks - in (RawBlock "html" "<div class=\"slide\">" : - preBlocks) ++ (RawBlock "html" "</div>" : - rest) + else prepSlides slideLevel blocks toc <- if writerTableOfContents opts then tableOfContents opts sects else return Nothing - blocks' <- liftM (toHtmlFromList . intersperse (nl opts)) $ - mapM (elementToHtml opts) sects + blocks' <- liftM (mconcat . intersperse (nl opts)) $ + mapM (elementToHtml slideLevel opts) sects st <- get let notes = reverse (stNotes st) - let thebody = blocks' +++ footnoteSection opts notes + let thebody = blocks' >> footnoteSection opts notes let math = if stMath st then case writerHTMLMathMethod opts of LaTeXMathML (Just url) -> - script ! - [src url, thetype "text/javascript"] $ noHtml + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ mempty MathML (Just url) -> - script ! - [src url, thetype "text/javascript"] $ noHtml + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ mempty MathJax url -> - script ! [src url, thetype "text/javascript"] $ noHtml + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ mempty JsMath (Just url) -> - script ! - [src url, thetype "text/javascript"] $ noHtml + H.script ! A.src (toValue url) + ! A.type_ "text/javascript" + $ mempty _ -> case lookup "mathml-script" (writerVariables opts) of - Just s -> - script ! [thetype "text/javascript"] << - primHtml ("/*<![CDATA[*/\n" ++ s ++ - "/*]]>*/\n") - Nothing -> noHtml - else noHtml - let newvars = [("highlighting-css", defaultHighlightingCss) | + Just s | not (writerHtml5 opts) -> + H.script ! A.type_ "text/javascript" + $ preEscapedString + ("/*<![CDATA[*/\n" ++ s ++ "/*]]>*/\n") + | otherwise -> mempty + Nothing -> mempty + else mempty + let newvars = [("highlighting-css", + styleToCss $ writerHighlightStyle opts) | stHighlighting st] ++ - [("math", showHtmlFragment math) | stMath st] - return (tit, auths, date, toc, thebody, newvars) + [("math", renderHtml math) | stMath st] + return (tit, auths, authsMeta, date, toc, thebody, newvars) + +-- | Prepare author for meta tag, converting notes into +-- bracketed text and removing links. +prepForMeta :: [Inline] -> [Inline] +prepForMeta = bottomUp (concatMap fixInline) + where fixInline (Note [Para xs]) = [Str " ["] ++ xs ++ [Str "]"] + fixInline (Note [Plain xs]) = [Str " ["] ++ xs ++ [Str "]"] + fixInline (Link lab _) = lab + fixInline (Image lab _) = lab + fixInline x = [x] inTemplate :: TemplateTarget a => WriterOptions -> Html -> [Html] + -> [Html] -> Html -> Maybe Html -> Html -> [(String,String)] -> a -inTemplate opts tit auths date toc body' newvars = - let renderedTit = showHtmlFragment tit - topTitle' = stripTags renderedTit - authors = map (stripTags . showHtmlFragment) auths - date' = stripTags $ showHtmlFragment date +inTemplate opts tit auths authsMeta date toc body' newvars = + let title' = renderHtml tit + date' = renderHtml date + dateMeta = maybe [] (\x -> [("date-meta",x)]) $ normalizeDate date' variables = writerVariables opts ++ newvars - context = variables ++ - [ ("body", dropWhile (=='\n') $ showHtmlFragment body') - , ("pagetitle", topTitle') - , ("title", dropWhile (=='\n') $ showHtmlFragment tit) + context = variables ++ dateMeta ++ + [ ("body", dropWhile (=='\n') $ renderHtml body') + , ("pagetitle", stripTags title') + , ("title", title') , ("date", date') , ("idprefix", writerIdentifierPrefix opts) , ("slidy-url", "http://www.w3.org/Talks/Tools/Slidy2") - , ("s5-url", "ui/default") ] ++ + , ("s5-url", "s5/default") ] ++ [ ("html5","true") | writerHtml5 opts ] ++ (case toc of - Just t -> [ ("toc", showHtmlFragment t)] + Just t -> [ ("toc", renderHtml t)] Nothing -> []) ++ - [ ("author", a) | a <- authors ] + [ ("author", renderHtml a) | a <- auths ] ++ + [ ("author-meta", stripTags $ renderHtml a) | a <- authsMeta ] in renderTemplate context $ writerTemplate opts -- | Like Text.XHtml's identifier, but adds the writerIdentifierPrefix -prefixedId :: WriterOptions -> String -> HtmlAttr -prefixedId opts s = identifier $ writerIdentifierPrefix opts ++ s +prefixedId :: WriterOptions -> String -> Attribute +prefixedId opts s = A.id $ toValue $ writerIdentifierPrefix opts ++ s -- | Replacement for Text.XHtml's unordList. unordList :: WriterOptions -> ([Html] -> Html) -unordList opts items = ulist << toListItems opts items +unordList opts items = H.ul $ mconcat $ toListItems opts items -- | Replacement for Text.XHtml's ordList. ordList :: WriterOptions -> ([Html] -> Html) -ordList opts items = olist << toListItems opts items +ordList opts items = H.ol $ mconcat $ toListItems opts items -- | Construct table of contents from list of elements. tableOfContents :: WriterOptions -> [Element] -> State WriterState (Maybe Html) @@ -214,52 +233,66 @@ elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html) elementToListItem _ (Blk _) = return Nothing elementToListItem opts (Sec _ num id' headerText subsecs) = do let sectnum = if writerNumberSections opts - then (thespan ! [theclass "toc-section-number"] << showSecNum num) +++ - stringToHtml opts" " - else noHtml - txt <- liftM (sectnum +++) $ inlineListToHtml opts headerText + then (H.span ! A.class_ "toc-section-number" $ toHtml $ showSecNum num) >> + preEscapedString " " + else mempty + txt <- liftM (sectnum >>) $ inlineListToHtml opts headerText subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes let subList = if null subHeads - then noHtml + then mempty else unordList opts subHeads - return $ Just $ (anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ id')] $ txt) +++ subList + return $ Just $ (H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ id') + $ toHtml txt) >> subList -- | Convert an Element to Html. -elementToHtml :: WriterOptions -> Element -> State WriterState Html -elementToHtml opts (Blk HorizontalRule) | writerSlideVariant opts /= NoSlides = - return $ primHtml "</div>" +++ nl opts +++ primHtml "<div class=\"slide\">" -elementToHtml opts (Blk block) = blockToHtml opts block -elementToHtml opts (Sec level num id' title' elements) = do +elementToHtml :: Int -> WriterOptions -> Element -> State WriterState Html +elementToHtml _slideLevel opts (Blk block) = blockToHtml opts block +elementToHtml slideLevel opts (Sec level num id' title' elements) = do + let slide = writerSlideVariant opts /= NoSlides && level <= slideLevel modify $ \st -> st{stSecNum = num} -- update section number - header' <- blockToHtml opts (Header level title') - innerContents <- mapM (elementToHtml opts) elements - let header'' = header' ! [prefixedId opts id' | - not (writerStrictMarkdown opts || - writerSectionDivs opts || - writerSlideVariant opts == S5Slides)] - let stuff = header'' : innerContents - let slide = writerSlideVariant opts /= NoSlides && level == 1 - let stuff' = if slide - then [thediv ! [theclass "slide"] << - (nl opts : intersperse (nl opts) stuff ++ [nl opts])] - else intersperse (nl opts) stuff - let inNl x = nl opts : x ++ [nl opts] - return $ if writerSectionDivs opts - then if writerHtml5 opts - then tag "section" ! [prefixedId opts id'] << inNl stuff' - else thediv ! [prefixedId opts id'] << inNl stuff' - else toHtmlFromList stuff' + -- always use level 1 for slide titles + let level' = if slide then 1 else level + let titleSlide = slide && level < slideLevel + header' <- blockToHtml opts (Header level' title') + let isSec (Sec _ _ _ _ _) = True + isSec (Blk _) = False + innerContents <- mapM (elementToHtml slideLevel opts) + $ if titleSlide + -- title slides have no content of their own + then filter isSec elements + else elements + let header'' = if (writerStrictMarkdown opts || + writerSectionDivs opts || + writerSlideVariant opts == S5Slides) + then header' + else header' ! prefixedId opts id' + let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] + let classes = ["titleslide" | titleSlide] ++ ["slide" | slide] ++ + ["level" ++ show level] + let secttag = if writerHtml5 opts + then H5.section ! A.class_ (toValue $ unwords classes) + else H.div ! A.class_ (toValue $ unwords ("section":classes)) + return $ if titleSlide + then mconcat $ (secttag ! prefixedId opts id' $ header'') : innerContents + else if writerSectionDivs opts || slide + then secttag ! prefixedId opts id' $ inNl $ header'' : innerContents + else mconcat $ intersperse (nl opts) $ header'' : innerContents -- | Convert list of Note blocks to a footnote <div>. -- Assumes notes are sorted. footnoteSection :: WriterOptions -> [Html] -> Html footnoteSection opts notes = if null notes - then noHtml - else nl opts +++ (thediv ! [theclass "footnotes"] - $ nl opts +++ hr +++ nl opts +++ - (olist << (notes ++ [nl opts])) +++ nl opts) - + then mempty + else nl opts >> (container + $ nl opts >> hrtag >> nl opts >> + H.ol (mconcat notes >> nl opts) >> nl opts) + where container x = if writerHtml5 opts + then H5.section ! A.class_ "footnotes" $ x + else if writerSlideVariant opts /= NoSlides + then H.div ! A.class_ "footnotes slide" $ x + else H.div ! A.class_ "footnotes" $ x + hrtag = if writerHtml5 opts then H5.hr else H.hr -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: String -> Maybe (String, String) @@ -272,7 +305,7 @@ parseMailto _ = Nothing -- | Obfuscate a "mailto:" link. obfuscateLink :: WriterOptions -> String -> String -> Html obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation = - anchor ! [href s] << txt + H.a ! A.href (toValue s) $ toHtml txt obfuscateLink opts txt s = let meth = writerEmailObfuscation opts s' = map toLower s @@ -287,19 +320,19 @@ obfuscateLink opts txt s = domain' ++ ")") in case meth of ReferenceObfuscation -> - -- need to use primHtml or &'s are escaped to & in URL - primHtml $ "<a href=\"" ++ (obfuscateString s') + -- need to use preEscapedString or &'s are escaped to & in URL + preEscapedString $ "<a href=\"" ++ (obfuscateString s') ++ "\">" ++ (obfuscateString txt) ++ "</a>" JavascriptObfuscation -> - (script ! [thetype "text/javascript"] $ - primHtml ("\n<!--\nh='" ++ + (H.script ! A.type_ "text/javascript" $ + preEscapedString ("\n<!--\nh='" ++ obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++ obfuscateString name' ++ "';e=n+a+h;\n" ++ "document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++ - linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ - noscript (primHtml $ obfuscateString altText) + linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) >> + H.noscript (preEscapedString $ obfuscateString altText) _ -> error $ "Unknown obfuscation method: " ++ show meth - _ -> anchor ! [href s] $ stringToHtml opts txt -- malformed email + _ -> H.a ! A.href (toValue s) $ toHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -310,13 +343,13 @@ obfuscateChar char = -- | Obfuscate string using entities. obfuscateString :: String -> String -obfuscateString = concatMap obfuscateChar . decodeCharacterReferences +obfuscateString = concatMap obfuscateChar . fromEntities -attrsToHtml :: WriterOptions -> Attr -> [HtmlAttr] +attrsToHtml :: WriterOptions -> Attr -> [Attribute] attrsToHtml opts (id',classes',keyvals) = - [theclass (unwords classes') | not (null classes')] ++ + [A.class_ (toValue $ unwords classes') | not (null classes')] ++ [prefixedId opts id' | not (null id')] ++ - map (\(x,y) -> strAttr x y) keyvals + map (\(x,y) -> customAttribute (fromString x) (toValue y)) keyvals imageExts :: [String] imageExts = [ "art", "bmp", "cdr", "cdt", "cpt", "cr2", "crw", "djvu", "erf", @@ -331,40 +364,41 @@ treatAsImage fp = -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html -blockToHtml _ Null = return noHtml +blockToHtml _ Null = return mempty blockToHtml opts (Plain lst) = inlineListToHtml opts lst blockToHtml opts (Para [Image txt (s,tit)]) = do img <- inlineToHtml opts (Image txt (s,tit)) capt <- inlineListToHtml opts txt return $ if writerHtml5 opts - then tag "figure" << - [nl opts, img, tag "figcaption" << capt, nl opts] - else thediv ! [theclass "figure"] << - [nl opts, img, paragraph ! [theclass "caption"] << capt, + then H5.figure $ mconcat + [nl opts, img, H5.figcaption capt, nl opts] + else H.div ! A.class_ "figure" $ mconcat + [nl opts, img, H.p ! A.class_ "caption" $ capt, nl opts] blockToHtml opts (Para lst) = do contents <- inlineListToHtml opts lst - return $ paragraph contents -blockToHtml _ (RawBlock "html" str) = return $ primHtml str -blockToHtml _ (RawBlock _ _) = return noHtml -blockToHtml _ (HorizontalRule) = return hr + return $ H.p contents +blockToHtml _ (RawBlock "html" str) = return $ preEscapedString str +blockToHtml _ (RawBlock _ _) = return mempty +blockToHtml opts (HorizontalRule) = return $ if writerHtml5 opts then H5.hr else H.hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do - let classes' = if writerLiterateHaskell opts - then classes + let tolhs = writerLiterateHaskell opts && + any (\c -> map toLower c == "haskell") classes && + any (\c -> map toLower c == "literate") classes + classes' = if tolhs + then map (\c -> if map toLower c == "haskell" + then "literatehaskell" + else c) classes else filter (/= "literate") classes - case highlightHtml False (id',classes',keyvals) rawCode of - Left _ -> -- change leading newlines into <br /> tags, because some - -- browsers ignore leading newlines in pre blocks - let (leadingBreaks, rawCode') = span (=='\n') rawCode - attrs = attrsToHtml opts (id', classes', keyvals) - addBird = if "literate" `elem` classes' - then unlines . map ("> " ++) . lines - else unlines . lines - in return $ pre ! attrs $ thecode << - (replicate (length leadingBreaks) br +++ - [stringToHtml opts $ addBird rawCode']) - Right h -> modify (\st -> st{ stHighlighting = True }) >> - return h + adjCode = if tolhs + then unlines . map ("> " ++) . lines $ rawCode + else rawCode + case highlight formatHtmlBlock (id',classes,keyvals) adjCode of + Nothing -> let attrs = attrsToHtml opts (id', classes', keyvals) + in return $ foldl (!) H.pre attrs $ H.code + $ toHtml adjCode + Just h -> modify (\st -> st{ stHighlighting = True }) >> + return (foldl (!) h (attrsToHtml opts (id',[],keyvals))) blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; @@ -378,47 +412,48 @@ blockToHtml opts (BlockQuote blocks) = blockToHtml (opts {writerIncremental = inc}) (OrderedList attribs lst) _ -> do contents <- blockListToHtml opts blocks - return $ blockquote (nl opts +++ - contents +++ nl opts) + return $ H.blockquote + $ nl opts >> contents >> nl opts else do contents <- blockListToHtml opts blocks - return $ blockquote (nl opts +++ contents +++ nl opts) + return $ H.blockquote $ nl opts >> contents >> nl opts blockToHtml opts (Header level lst) = do contents <- inlineListToHtml opts lst secnum <- liftM stSecNum get let contents' = if writerNumberSections opts - then (thespan ! [theclass "header-section-number"] << showSecNum secnum) +++ - stringToHtml opts " " +++ contents + then (H.span ! A.class_ "header-section-number" $ toHtml $ showSecNum secnum) >> + strToHtml " " >> contents else contents let contents'' = if writerTableOfContents opts - then anchor ! [href $ "#" ++ writerIdentifierPrefix opts ++ "TOC"] $ contents' + then H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ "TOC") $ contents' else contents' return $ (case level of - 1 -> h1 contents'' - 2 -> h2 contents'' - 3 -> h3 contents'' - 4 -> h4 contents'' - 5 -> h5 contents'' - 6 -> h6 contents'' - _ -> paragraph contents'') + 1 -> H.h1 contents'' + 2 -> H.h2 contents'' + 3 -> H.h3 contents'' + 4 -> H.h4 contents'' + 5 -> H.h5 contents'' + 6 -> H.h6 contents'' + _ -> H.p contents'') blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ (unordList opts contents) ! attribs + let lst' = unordList opts contents + let lst'' = if writerIncremental opts + then lst' ! A.class_ "incremental" + else lst' + return lst'' blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst let numstyle' = camelCaseToHyphenated $ show numstyle let attribs = (if writerIncremental opts - then [theclass "incremental"] + then [A.class_ "incremental"] else []) ++ (if startnum /= 1 - then [start startnum] + then [A.start $ toValue startnum] else []) ++ (if numstyle /= DefaultStyle then if writerHtml5 opts - then [strAttr "type" $ + then [A.type_ $ case numstyle of Decimal -> "1" LowerAlpha -> "a" @@ -426,44 +461,44 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do LowerRoman -> "i" UpperRoman -> "I" _ -> "1"] - else [thestyle $ "list-style-type: " ++ + else [A.style $ toValue $ "list-style-type: " ++ numstyle'] else []) - return $ (ordList opts contents) ! attribs + return $ foldl (!) (ordList opts contents) attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> - do term' <- liftM (dterm <<) $ inlineListToHtml opts term - defs' <- mapM ((liftM (\x -> ddef << (x +++ nl opts))) . + do term' <- liftM (H.dt) $ inlineListToHtml opts term + defs' <- mapM ((liftM (\x -> H.dd $ (x >> nl opts))) . blockListToHtml opts) defs - return $ nl opts : term' : nl opts : defs') lst - let attribs = if writerIncremental opts - then [theclass "incremental"] - else [] - return $ dlist ! attribs << (concat contents +++ nl opts) + return $ mconcat $ nl opts : term' : nl opts : defs') lst + let lst' = H.dl $ mconcat contents >> nl opts + let lst'' = if writerIncremental opts + then lst' ! A.class_ "incremental" + else lst' + return lst'' blockToHtml opts (Table capt aligns widths headers rows') = do captionDoc <- if null capt - then return noHtml + then return mempty else do cs <- inlineListToHtml opts capt - return $ caption cs +++ nl opts + return $ H.caption cs >> nl opts let percent w = show (truncate (100*w) :: Integer) ++ "%" - let widthAttrs w = if writerHtml5 opts - then [thestyle $ "width: " ++ percent w] - else [width $ percent w] let coltags = if all (== 0.0) widths - then noHtml - else concatHtml $ map - (\w -> (col ! (widthAttrs w)) noHtml +++ nl opts) - widths + then mempty + else mconcat $ map (\w -> + if writerHtml5 opts + then H.col ! A.style (toValue $ "width: " ++ percent w) + else H.col ! A.width (toValue $ percent w) >> nl opts) + widths head' <- if all null headers - then return noHtml + then return mempty else do contents <- tableRowToHtml opts aligns 0 headers - return $ thead << (nl opts +++ contents) +++ nl opts - body' <- liftM (\x -> tbody << (nl opts +++ x)) $ + return $ H.thead (nl opts >> contents) >> nl opts + body' <- liftM (\x -> H.tbody (nl opts >> mconcat x)) $ zipWithM (tableRowToHtml opts aligns) [1..] rows' - return $ table $ nl opts +++ captionDoc +++ coltags +++ head' +++ - body' +++ nl opts + return $ H.table $ nl opts >> captionDoc >> coltags >> head' >> + body' >> nl opts tableRowToHtml :: WriterOptions -> [Alignment] @@ -471,7 +506,7 @@ tableRowToHtml :: WriterOptions -> [[Block]] -> State WriterState Html tableRowToHtml opts aligns rownum cols' = do - let mkcell = if rownum == 0 then th else td + let mkcell = if rownum == 0 then H.th else H.td let rowclass = case rownum of 0 -> "header" x | x `rem` 2 == 1 -> "odd" @@ -479,8 +514,8 @@ tableRowToHtml opts aligns rownum cols' = do cols'' <- sequence $ zipWith (\alignment item -> tableItemToHtml opts mkcell alignment item) aligns cols' - return $ (tr ! [theclass rowclass] $ nl opts +++ toHtmlFromList cols'') - +++ nl opts + return $ (H.tr ! A.class_ rowclass $ nl opts >> mconcat cols'') + >> nl opts alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of @@ -496,84 +531,87 @@ tableItemToHtml :: WriterOptions -> State WriterState Html tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item - let alignAttrs = if writerHtml5 opts - then [thestyle $ "align: " ++ alignmentToString align'] - else [align $ alignmentToString align'] - return $ (tag' ! alignAttrs) contents +++ nl opts + let alignStr = alignmentToString align' + let attribs = if writerHtml5 opts + then A.style (toValue $ "text-align: " ++ alignStr ++ ";") + else A.align (toValue alignStr) + return $ (tag' ! attribs $ contents) >> nl opts toListItems :: WriterOptions -> [Html] -> [Html] toListItems opts items = map (toListItem opts) items ++ [nl opts] toListItem :: WriterOptions -> Html -> Html -toListItem opts item = nl opts +++ li item +toListItem opts item = nl opts >> H.li item blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= - return . toHtmlFromList . intersperse (nl opts) + return . mconcat . intersperse (nl opts) -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html inlineListToHtml opts lst = - mapM (inlineToHtml opts) lst >>= return . toHtmlFromList + mapM (inlineToHtml opts) lst >>= return . mconcat -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = case inline of - (Str str) -> return $ stringToHtml opts str - (Space) -> return $ stringToHtml opts " " - (LineBreak) -> return br - (EmDash) -> return $ stringToHtml opts "—" - (EnDash) -> return $ stringToHtml opts "–" - (Ellipses) -> return $ stringToHtml opts "…" - (Apostrophe) -> return $ stringToHtml opts "’" - (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize - (Strong lst) -> inlineListToHtml opts lst >>= return . strong - (Code attr str) -> case highlightHtml True attr str of - Left _ -> return - $ thecode ! (attrsToHtml opts attr) - $ stringToHtml opts str - Right h -> return h + (Str str) -> return $ strToHtml str + (Space) -> return $ strToHtml " " + (LineBreak) -> return $ if writerHtml5 opts then H5.br else H.br + (Emph lst) -> inlineListToHtml opts lst >>= return . H.em + (Strong lst) -> inlineListToHtml opts lst >>= return . H.strong + (Code attr str) -> case highlight formatHtmlInline attr str of + Nothing -> return + $ foldl (!) H.code (attrsToHtml opts attr) + $ strToHtml str + Just h -> return $ foldl (!) h $ + attrsToHtml opts (id',[],keyvals) + where (id',_,keyvals) = attr (Strikeout lst) -> inlineListToHtml opts lst >>= - return . (thespan ! [thestyle "text-decoration: line-through;"]) + return . H.del (SmallCaps lst) -> inlineListToHtml opts lst >>= - return . (thespan ! [thestyle "font-variant: small-caps;"]) - (Superscript lst) -> inlineListToHtml opts lst >>= return . sup - (Subscript lst) -> inlineListToHtml opts lst >>= return . sub + return . (H.span ! A.style "font-variant: small-caps;") + (Superscript lst) -> inlineListToHtml opts lst >>= return . H.sup + (Subscript lst) -> inlineListToHtml opts lst >>= return . H.sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (stringToHtml opts "‘", - stringToHtml opts "’") - DoubleQuote -> (stringToHtml opts "“", - stringToHtml opts "”") + SingleQuote -> (strToHtml "‘", + strToHtml "’") + DoubleQuote -> (strToHtml "“", + strToHtml "”") in do contents <- inlineListToHtml opts lst - return $ leftQuote +++ contents +++ rightQuote + return $ leftQuote >> contents >> rightQuote (Math t str) -> modify (\st -> st {stMath = True}) >> (case writerHTMLMathMethod opts of LaTeXMathML _ -> -- putting LaTeXMathML in container with class "LaTeX" prevents -- non-math elements on the page from being treated as math by -- the javascript - return $ thespan ! [theclass "LaTeX"] $ + return $ H.span ! A.class_ "LaTeX" $ case t of - InlineMath -> primHtml ("$" ++ str ++ "$") - DisplayMath -> primHtml ("$$" ++ str ++ "$$") + InlineMath -> toHtml ("$" ++ str ++ "$") + DisplayMath -> toHtml ("$$" ++ str ++ "$$") JsMath _ -> do - let m = primHtml str + let m = preEscapedString str return $ case t of - InlineMath -> thespan ! [theclass "math"] $ m - DisplayMath -> thediv ! [theclass "math"] $ m + InlineMath -> H.span ! A.class_ "math" $ m + DisplayMath -> H.div ! A.class_ "math" $ m WebTeX url -> do - let m = image ! [src (url ++ urlEncode str), - alt str, title str] + let imtag = if writerHtml5 opts then H5.img else H.img + let m = imtag ! A.style "vertical-align:middle" + ! A.src (toValue $ url ++ urlEncode str) + ! A.alt (toValue str) + ! A.title (toValue str) + let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of InlineMath -> m - DisplayMath -> br +++ m +++ br + DisplayMath -> brtag >> m >> brtag GladTeX -> return $ case t of - InlineMath -> primHtml $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" - DisplayMath -> primHtml $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" + InlineMath -> preEscapedString "<EQ ENV=\"math\">" >> toHtml str >> preEscapedString "</EQ>" + DisplayMath -> preEscapedString "<EQ ENV=\"displaymath\">" >> toHtml str >> preEscapedString "</EQ>" MathML _ -> do let dt = if t == InlineMath then DisplayInline @@ -581,54 +619,57 @@ inlineToHtml opts inline = let conf = useShortEmptyTags (const False) defaultConfigPP case texMathToMathML dt str of - Right r -> return $ primHtml $ + Right r -> return $ preEscapedString $ ppcElement conf r Left _ -> inlineListToHtml opts (readTeXMath str) >>= return . - (thespan ! [theclass "math"]) - MathJax _ -> return $ primHtml $ + (H.span ! A.class_ "math") + MathJax _ -> return $ toHtml $ case t of InlineMath -> "\\(" ++ str ++ "\\)" DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do x <- inlineListToHtml opts (readTeXMath str) - let m = thespan ! [theclass "math"] $ x + let m = H.span ! A.class_ "math" $ x + let brtag = if writerHtml5 opts then H5.br else H.br return $ case t of InlineMath -> m - DisplayMath -> br +++ m +++ br ) + DisplayMath -> brtag >> m >> brtag ) (RawInline "latex" str) -> case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) - return $ primHtml str - _ -> return noHtml - (RawInline "html" str) -> return $ primHtml str - (RawInline _ _) -> return noHtml + return $ toHtml str + _ -> return mempty + (RawInline "html" str) -> return $ preEscapedString str + (RawInline _ _) -> return mempty (Link [Code _ str] (s,_)) | "mailto:" `isPrefixOf` s -> return $ obfuscateLink opts str s (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt - return $ obfuscateLink opts (show linkText) s + return $ obfuscateLink opts (renderHtml linkText) s (Link txt (s,tit)) -> do linkText <- inlineListToHtml opts txt - return $ anchor ! ([href s] ++ - if null tit then [] else [title tit]) $ - linkText + let link = H.a ! A.href (toValue s) $ linkText + return $ if null tit + then link + else link ! A.title (toValue tit) (Image txt (s,tit)) | treatAsImage s -> do let alternate' = stringify txt - let attributes = [src s] ++ + let attributes = [A.src $ toValue s] ++ (if null tit then [] - else [title tit]) ++ + else [A.title $ toValue tit]) ++ if null txt then [] - else [alt alternate'] - return $ image ! attributes + else [A.alt $ toValue alternate'] + let tag = if writerHtml5 opts then H5.img else H.img + return $ foldl (!) tag attributes -- note: null title included, as in Markdown.pl (Image _ (s,tit)) -> do - let attributes = [src s] ++ + let attributes = [A.src $ toValue s] ++ (if null tit then [] - else [title tit]) - return $ itag "embed" ! attributes + else [A.title $ toValue tit]) + return $ foldl (!) H5.embed attributes -- note: null title included, as in Markdown.pl (Note contents) -> do st <- get @@ -638,19 +679,19 @@ inlineToHtml opts inline = htmlContents <- blockListToNote opts ref contents -- push contents onto front of notes put $ st {stNotes = (htmlContents:notes)} - return $ sup << - anchor ! [href ("#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref), - theclass "footnoteRef", - prefixedId opts ("fnref" ++ ref)] << ref - (Cite _ il) -> inlineListToHtml opts il + return $ H.sup $ + H.a ! A.href (toValue $ "#" ++ writerIdentifierPrefix opts ++ "fn" ++ ref) + ! A.class_ "footnoteRef" + ! prefixedId opts ("fnref" ++ ref) + $ toHtml ref + (Cite _ il) -> do contents <- inlineListToHtml opts il + return $ H.span ! A.class_ "citation" $ contents blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ - "\" class=\"footnoteBackLink\">" ++ - (if writerAscii opts then "↩" else "↩") ++ "</a>"] + let backlink = [Link [Str "↩"] ("#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref,[])] blocks' = if null blocks then [] else let lastBlock = last blocks @@ -663,4 +704,4 @@ blockListToNote opts ref blocks = _ -> otherBlocks ++ [lastBlock, Plain backlink] in do contents <- blockListToHtml opts blocks' - return $ nl opts +++ (li ! [prefixedId opts ("fn" ++ ref)]) contents + return $ nl opts >> (H.li ! (prefixedId opts ("fn" ++ ref)) $ contents) diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d925b2897..e99b20c60 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -20,10 +20,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Writers.LaTeX Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above + License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha + Stability : alpha Portability : portable Conversion of 'Pandoc' format into LaTeX. @@ -41,53 +41,68 @@ import Data.Char ( toLower, isPunctuation ) import Control.Monad.State import Text.Pandoc.Pretty import System.FilePath (dropExtension) +import Text.Pandoc.Slides +import Text.Pandoc.Highlighting (highlight, styleToLaTeX, + formatLaTeXInline, formatLaTeXBlock) -data WriterState = - WriterState { stInNote :: Bool -- @True@ if we're in a note - , stInTable :: Bool -- @True@ if we're in a table - , stTableNotes :: [(Char, Doc)] -- List of markers, notes - -- in current table - , stOLLevel :: Int -- level of ordered list nesting - , stOptions :: WriterOptions -- writer options, so they don't have to be parameter - , stVerbInNote :: Bool -- true if document has verbatim text in note - , stEnumerate :: Bool -- true if document needs fancy enumerated lists - , stTable :: Bool -- true if document has a table - , stStrikeout :: Bool -- true if document has strikeout - , stSubscript :: Bool -- true if document has subscript - , stUrl :: Bool -- true if document has visible URL link - , stGraphics :: Bool -- true if document contains images - , stLHS :: Bool -- true if document has literate haskell code - , stBook :: Bool -- true if document uses book or memoir class - , stCsquotes :: Bool -- true if document uses csquotes +data WriterState = + WriterState { stInNote :: Bool -- true if we're in a note + , stInTable :: Bool -- true if we're in a table + , stTableNotes :: [(Char, Doc)] -- List of markers, notes + -- in current table + , stOLLevel :: Int -- level of ordered list nesting + , stOptions :: WriterOptions -- writer options, so they don't have to be parameter + , stVerbInNote :: Bool -- true if document has verbatim text in note + , stEnumerate :: Bool -- true if document needs fancy enumerated lists + , stTable :: Bool -- true if document has a table + , stStrikeout :: Bool -- true if document has strikeout + , stSubscript :: Bool -- true if document has subscript + , stUrl :: Bool -- true if document has visible URL link + , stGraphics :: Bool -- true if document contains images + , stLHS :: Bool -- true if document has literate haskell code + , stBook :: Bool -- true if document uses book or memoir class + , stCsquotes :: Bool -- true if document uses csquotes + , stHighlighting :: Bool -- true if document has highlighted code + , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit + , stInternalLinks :: [String] -- list of internal link targets } -- | Convert Pandoc to LaTeX. writeLaTeX :: WriterOptions -> Pandoc -> String -writeLaTeX options document = - evalState (pandocToLaTeX options document) $ +writeLaTeX options document = + evalState (pandocToLaTeX options document) $ WriterState { stInNote = False, stInTable = False, stTableNotes = [], stOLLevel = 1, stOptions = options, stVerbInNote = False, stEnumerate = False, stTable = False, stStrikeout = False, stSubscript = False, stUrl = False, stGraphics = False, stLHS = False, stBook = writerChapters options, - stCsquotes = False } + stCsquotes = False, stHighlighting = False, + stIncremental = writerIncremental options, + stInternalLinks = [] } pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do + -- see if there are internal links + let isInternalLink (Link _ ('#':xs,_)) = [xs] + isInternalLink _ = [] + modify $ \s -> s{ stInternalLinks = queryWith isInternalLink blocks } let template = writerTemplate options - let usesBookClass x = "\\documentclass" `isPrefixOf` x && - ("{memoir}" `isSuffixOf` x || "{book}" `isSuffixOf` x || - "{report}" `isSuffixOf` x) - when (any usesBookClass (lines template)) $ - modify $ \s -> s{stBook = True} + -- set stBook depending on documentclass + let bookClasses = ["memoir","book","report","scrreprt","scrbook"] + case lookup "documentclass" (writerVariables options) of + Just x | x `elem` bookClasses -> modify $ \s -> s{stBook = True} + | otherwise -> return () + Nothing | any (\x -> "\\documentclass" `isPrefixOf` x && + (any (`isSuffixOf` x) bookClasses)) + (lines template) -> modify $ \s -> s{stBook = True} + | otherwise -> return () -- check for \usepackage...{csquotes}; if present, we'll use -- \enquote{...} for smart quotes: when ("{csquotes}" `isInfixOf` template) $ modify $ \s -> s{stCsquotes = True} - opts <- liftM stOptions get - let colwidth = if writerWrapText opts - then Just $ writerColumns opts + let colwidth = if writerWrapText options + then Just $ writerColumns options else Nothing titletext <- liftM (render colwidth) $ inlineListToLaTeX title authorsText <- mapM (liftM (render colwidth) . inlineListToLaTeX) authors @@ -97,9 +112,12 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do else case last blocks of Header 1 il -> (init blocks, il) _ -> (blocks, []) - body <- blockListToLaTeX blocks' + blocks'' <- if writerBeamer options + then toSlides blocks' + else return blocks' + body <- mapM (elementToLaTeX options) $ hierarchicalize blocks'' biblioTitle <- liftM (render colwidth) $ inlineListToLaTeX lastHeader - let main = render colwidth body + let main = render colwidth $ vcat body st <- get let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options citecontext = case writerCiteMethod options of @@ -116,7 +134,12 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do [ ("toc", if writerTableOfContents options then "yes" else "") , ("body", main) , ("title", titletext) - , ("date", dateText) ] ++ + , ("date", dateText) + , ("documentclass", if writerBeamer options + then "beamer" + else if writerChapters options + then "book" + else "article") ] ++ [ ("author", a) | a <- authorsText ] ++ [ ("verbatim-in-note", "yes") | stVerbInNote st ] ++ [ ("fancy-enums", "yes") | stEnumerate st ] ++ @@ -128,36 +151,102 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do [ ("lhs", "yes") | stLHS st ] ++ [ ("graphics", "yes") | stGraphics st ] ++ [ ("book-class", "yes") | stBook st] ++ - [ ("listings", "yes") | writerListings options ] ++ + [ ("listings", "yes") | writerListings options || stLHS st ] ++ + [ ("beamer", "yes") | writerBeamer options ] ++ + [ ("highlighting-macros", styleToLaTeX + $ writerHighlightStyle options ) | stHighlighting st ] ++ citecontext return $ if writerStandalone options then renderTemplate context template else main --- escape things as needed for LaTeX +-- | Convert Elements to LaTeX +elementToLaTeX :: WriterOptions -> Element -> State WriterState Doc +elementToLaTeX _ (Blk block) = blockToLaTeX block +elementToLaTeX opts (Sec level _ id' title' elements) = do + header' <- sectionHeader id' level title' + innerContents <- mapM (elementToLaTeX opts) elements + return $ vcat (header' : innerContents) -stringToLaTeX :: String -> String -stringToLaTeX = escapeStringUsing latexEscapes - where latexEscapes = backslashEscapes "{}$%&_#" ++ - [ ('^', "\\^{}") - , ('\\', "\\textbackslash{}") - , ('~', "\\ensuremath{\\sim}") - , ('|', "\\textbar{}") - , ('<', "\\textless{}") - , ('>', "\\textgreater{}") - , ('[', "{[}") -- to avoid interpretation as - , (']', "{]}") -- optional arguments - , ('\160', "~") - , ('\x2018', "`") - , ('\x2019', "'") - , ('\x201C', "``") - , ('\x201D', "''") - ] +-- escape things as needed for LaTeX +stringToLaTeX :: Bool -> String -> String +stringToLaTeX _ [] = "" +stringToLaTeX isUrl (x:xs) = + case x of + '{' -> "\\{" ++ rest + '}' -> "\\}" ++ rest + '$' -> "\\$" ++ rest + '%' -> "\\%" ++ rest + '&' -> "\\&" ++ rest + '_' -> "\\_" ++ rest + '#' -> "\\#" ++ rest + '-' -> case xs of -- prevent adjacent hyphens from forming ligatures + ('-':_) -> "-{}" ++ rest + _ -> '-' : rest + '~' | not isUrl -> "\\ensuremath{\\sim}" + '^' -> "\\^{}" ++ rest + '\\' -> "\\textbackslash{}" ++ rest + '€' -> "\\euro{}" ++ rest + '|' -> "\\textbar{}" ++ rest + '<' -> "\\textless{}" ++ rest + '>' -> "\\textgreater{}" ++ rest + '[' -> "{[}" ++ rest -- to avoid interpretation as + ']' -> "{]}" ++ rest -- optional arguments + '\160' -> "~" ++ rest + '\x2018' -> "`" ++ rest + '\x2019' -> "'" ++ rest + '\x201C' -> "``" ++ rest + '\x201D' -> "''" ++ rest + '\x2026' -> "\\ldots{}" ++ rest + '\x2014' -> "---" ++ rest + '\x2013' -> "--" ++ rest + _ -> x : rest + where rest = stringToLaTeX isUrl xs -- | Puts contents into LaTeX command. inCmd :: String -> Doc -> Doc inCmd cmd contents = char '\\' <> text cmd <> braces contents +toSlides :: [Block] -> State WriterState [Block] +toSlides bs = do + opts <- gets stOptions + let slideLevel = maybe (getSlideLevel bs) id $ writerSlideLevel opts + let bs' = prepSlides slideLevel bs + concat `fmap` (mapM (elementToBeamer slideLevel) $ hierarchicalize bs') + +elementToBeamer :: Int -> Element -> State WriterState [Block] +elementToBeamer _slideLevel (Blk b) = return [b] +elementToBeamer slideLevel (Sec lvl _num _ident tit elts) + | lvl > slideLevel = do + bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts + return $ Para ( RawInline "latex" "\\begin{block}{" + : tit ++ [RawInline "latex" "}"] ) + : bs ++ [RawBlock "latex" "\\end{block}"] + | lvl < slideLevel = do + bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts + return $ (Header lvl tit) : bs + | otherwise = do -- lvl == slideLevel + -- note: [fragile] is required or verbatim breaks + let hasCodeBlock (CodeBlock _ _) = [True] + hasCodeBlock _ = [] + let hasCode (Code _ _) = [True] + hasCode _ = [] + let fragile = if not $ null $ queryWith hasCodeBlock elts ++ queryWith hasCode elts + then "[fragile]" + else "" + let slideStart = Para $ RawInline "latex" ("\\begin{frame}" ++ fragile ++ + "\\frametitle{") : tit ++ [RawInline "latex" "}"] + let slideEnd = RawBlock "latex" "\\end{frame}" + -- now carve up slide into blocks if there are sections inside + bs <- concat `fmap` mapM (elementToBeamer slideLevel) elts + return $ slideStart : bs ++ [slideEnd] + +isListBlock :: Block -> Bool +isListBlock (BulletList _) = True +isListBlock (OrderedList _ _) = True +isListBlock (DefinitionList _) = True +isListBlock _ = False + -- | Convert Pandoc block element to LaTeX. blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc @@ -172,58 +261,80 @@ blockToLaTeX (Para lst) = do result <- inlineListToLaTeX lst return $ result <> blankline blockToLaTeX (BlockQuote lst) = do - contents <- blockListToLaTeX lst - return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" + beamer <- writerBeamer `fmap` gets stOptions + case lst of + [b] | beamer && isListBlock b -> do + oldIncremental <- gets stIncremental + modify $ \s -> s{ stIncremental = True } + result <- blockToLaTeX b + modify $ \s -> s{ stIncremental = oldIncremental } + return result + _ -> do + contents <- blockListToLaTeX lst + return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do - st <- get - env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes && - "literate" `elem` classes - then do - modify $ \s -> s{ stLHS = True } - return "code" - else if writerListings (stOptions st) - then return "lstlisting" - else if stInNote st - then do - modify $ \s -> s{ stVerbInNote = True } - return "Verbatim" - else return "verbatim" - let params = if writerListings (stOptions st) - then take 1 - [ "language=" ++ lang | lang <- classes - , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform" - ,"POV","Ada","Java","Prolog","Algol" - ,"JVMIS","Promela","Ant","ksh","Python" - ,"Assembler","Lisp","R","Awk","Logo" - ,"Reduce","bash","make","Rexx","Basic" - ,"Mathematica","RSL","C","Matlab","Ruby" - ,"C++","Mercury","S","Caml","MetaPost" - ,"SAS","Clean","Miranda","Scilab","Cobol" - ,"Mizar","sh","Comal","ML","SHELXL","csh" - ,"Modula-2","Simula","Delphi","MuPAD" - ,"SQL","Eiffel","NASTRAN","tcl","Elan" - ,"Oberon-2","TeX","erlang","OCL" - ,"VBScript","Euphoria","Octave","Verilog" - ,"Fortran","Oz","VHDL","GCL","Pascal" - ,"VRML","Gnuplot","Perl","XML","Haskell" - ,"PHP","XSLT","HTML","PL/I"] - ] ++ - [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ] - else [] - printParams - | null params = empty - | otherwise = "[" <> hsep (intersperse "," (map text params)) <> - "]" - return $ flush ("\\begin{" <> text env <> "}" <> printParams $$ text str $$ - "\\end{" <> text env <> "}") $$ cr - -- final cr needed because of footnotes + opts <- gets stOptions + case () of + _ | writerLiterateHaskell opts && "haskell" `elem` classes && + "literate" `elem` classes -> lhsCodeBlock + | writerListings opts -> listingsCodeBlock + | writerHighlight opts && not (null classes) -> highlightedCodeBlock + | otherwise -> rawCodeBlock + where lhsCodeBlock = do + modify $ \s -> s{ stLHS = True } + return $ flush ("\\begin{code}" $$ text str $$ "\\end{code}") $$ cr + rawCodeBlock = do + st <- get + env <- if stInNote st + then modify (\s -> s{ stVerbInNote = True }) >> + return "Verbatim" + else return "verbatim" + return $ flush (text ("\\begin{" ++ env ++ "}") $$ text str $$ + text ("\\end{" ++ env ++ "}")) $$ cr -- final cr because of notes + listingsCodeBlock = do + st <- get + let params = if writerListings (stOptions st) + then take 1 + [ "language=" ++ lang | lang <- classes + , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform" + ,"POV","Ada","Java","Prolog","Algol" + ,"JVMIS","Promela","Ant","ksh","Python" + ,"Assembler","Lisp","R","Awk","Logo" + ,"Reduce","bash","make","Rexx","Basic" + ,"Mathematica","RSL","C","Matlab","Ruby" + ,"C++","Mercury","S","Caml","MetaPost" + ,"SAS","Clean","Miranda","Scilab","Cobol" + ,"Mizar","sh","Comal","ML","SHELXL","csh" + ,"Modula-2","Simula","Delphi","MuPAD" + ,"SQL","Eiffel","NASTRAN","tcl","Elan" + ,"Oberon-2","TeX","erlang","OCL" + ,"VBScript","Euphoria","Octave","Verilog" + ,"Fortran","Oz","VHDL","GCL","Pascal" + ,"VRML","Gnuplot","Perl","XML","Haskell" + ,"PHP","XSLT","HTML","PL/I"] + ] ++ + [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ] + else [] + printParams + | null params = empty + | otherwise = brackets $ hsep (intersperse "," (map text params)) + return $ flush ("\\begin{lstlisting}" <> printParams $$ text str $$ + "\\end{lstlisting}") $$ cr + highlightedCodeBlock = + case highlight formatLaTeXBlock ("",classes,keyvalAttr) str of + Nothing -> rawCodeBlock + Just h -> modify (\st -> st{ stHighlighting = True }) >> + return (flush $ text h) blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline blockToLaTeX (RawBlock _ _) = return empty blockToLaTeX (BulletList lst) = do + incremental <- gets stIncremental + let inc = if incremental then "[<+->]" else "" items <- mapM listItemToLaTeX lst - return $ "\\begin{itemize}" $$ vcat items $$ "\\end{itemize}" + return $ text ("\\begin{itemize}" ++ inc) $$ vcat items $$ "\\end{itemize}" blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do st <- get + let inc = if stIncremental st then "[<+->]" else "" let oldlevel = stOLLevel st put $ st {stOLLevel = oldlevel + 1} items <- mapM listItemToLaTeX lst @@ -231,46 +342,25 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do exemplar <- if numstyle /= DefaultStyle || numdelim /= DefaultDelim then do modify $ \s -> s{ stEnumerate = True } - return $ char '[' <> + return $ char '[' <> text (head (orderedListMarkers (1, numstyle, numdelim))) <> char ']' else return empty let resetcounter = if start /= 1 && oldlevel <= 4 - then text $ "\\setcounter{enum" ++ + then text $ "\\setcounter{enum" ++ map toLower (toRomanNumeral oldlevel) ++ "}{" ++ show (start - 1) ++ "}" - else empty - return $ "\\begin{enumerate}" <> exemplar $$ resetcounter $$ + else empty + return $ text ("\\begin{enumerate}" ++ inc) <> exemplar $$ resetcounter $$ vcat items $$ "\\end{enumerate}" blockToLaTeX (DefinitionList lst) = do + incremental <- gets stIncremental + let inc = if incremental then "[<+->]" else "" items <- mapM defListItemToLaTeX lst - return $ "\\begin{description}" $$ vcat items $$ "\\end{description}" + return $ text ("\\begin{description}" ++ inc) $$ vcat items $$ "\\end{description}" blockToLaTeX HorizontalRule = return $ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline -blockToLaTeX (Header level lst) = do - txt <- inlineListToLaTeX lst - let noNote (Note _) = Str "" - noNote x = x - let lstNoNotes = bottomUp noNote lst - -- footnotes in sections don't work unless you specify an optional - -- argument: \section[mysec]{mysec\footnote{blah}} - optional <- if lstNoNotes == lst - then return empty - else do - res <- inlineListToLaTeX lstNoNotes - return $ char '[' <> res <> char ']' - let stuffing = optional <> char '{' <> txt <> char '}' - book <- liftM stBook get - let level' = if book then level - 1 else level - let headerWith x y = text x <> y $$ blankline - return $ case level' of - 0 -> headerWith "\\chapter" stuffing - 1 -> headerWith "\\section" stuffing - 2 -> headerWith "\\subsection" stuffing - 3 -> headerWith "\\subsubsection" stuffing - 4 -> headerWith "\\paragraph" stuffing - 5 -> headerWith "\\subparagraph" stuffing - _ -> txt $$ blankline +blockToLaTeX (Header level lst) = sectionHeader "" level lst blockToLaTeX (Table caption aligns widths heads rows) = do modify $ \s -> s{ stInTable = True, stTableNotes = [] } headers <- if all null heads @@ -338,6 +428,49 @@ defListItemToLaTeX (term, defs) = do def' <- liftM vsep $ mapM blockListToLaTeX defs return $ "\\item" <> brackets term' $$ def' +-- | Craft the section header, inserting the secton reference, if supplied. +sectionHeader :: [Char] + -> Int + -> [Inline] + -> State WriterState Doc +sectionHeader ref level lst = do + txt <- inlineListToLaTeX lst + let noNote (Note _) = Str "" + noNote x = x + let lstNoNotes = bottomUp noNote lst + -- footnotes in sections don't work unless you specify an optional + -- argument: \section[mysec]{mysec\footnote{blah}} + optional <- if lstNoNotes == lst + then return empty + else do + res <- inlineListToLaTeX lstNoNotes + return $ char '[' <> res <> char ']' + let stuffing = optional <> char '{' <> txt <> char '}' + book <- gets stBook + opts <- gets stOptions + let level' = if book || writerChapters opts then level - 1 else level + internalLinks <- gets stInternalLinks + let refLabel lab = (if ref `elem` internalLinks + then text "\\hyperdef" + <> braces empty + <> braces (text ref) + <> braces (lab <> text "\\label" + <> braces (text ref)) + else lab) + $$ blankline + let headerWith x y = refLabel $ text x <> y + return $ case level' of + 0 -> if writerBeamer opts + then headerWith "\\part" stuffing + else headerWith "\\chapter" stuffing + 1 -> headerWith "\\section" stuffing + 2 -> headerWith "\\subsection" stuffing + 3 -> headerWith "\\subsubsection" stuffing + 4 -> headerWith "\\paragraph" stuffing + 5 -> headerWith "\\subparagraph" stuffing + _ -> txt $$ blankline + + -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: [Inline] -- ^ Inlines to convert -> State WriterState Doc @@ -345,7 +478,6 @@ inlineListToLaTeX lst = mapM inlineToLaTeX lst >>= return . hcat isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True -isQuoted Apostrophe = True isQuoted _ = False -- | Convert inline element to LaTeX @@ -353,8 +485,8 @@ inlineToLaTeX :: Inline -- ^ Inline to convert -> State WriterState Doc inlineToLaTeX (Emph lst) = inlineListToLaTeX lst >>= return . inCmd "emph" -inlineToLaTeX (Strong lst) = - inlineListToLaTeX lst >>= return . inCmd "textbf" +inlineToLaTeX (Strong lst) = + inlineListToLaTeX lst >>= return . inCmd "textbf" inlineToLaTeX (Strikeout lst) = do contents <- inlineListToLaTeX lst modify $ \s -> s{ stStrikeout = True } @@ -377,14 +509,24 @@ inlineToLaTeX (Cite cits lst) = do Biblatex -> citationsToBiblatex cits _ -> inlineListToLaTeX lst -inlineToLaTeX (Code _ str) = do - st <- get - if writerListings (stOptions st) - then do - when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True } - let chr = ((enumFromTo '!' '~') \\ str) !! 0 - return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] - else return $ text $ "\\texttt{" ++ stringToLaTeX str ++ "}" +inlineToLaTeX (Code (_,classes,_) str) = do + opts <- gets stOptions + case () of + _ | writerListings opts -> listingsCode + | writerHighlight opts && not (null classes) -> highlightCode + | otherwise -> rawCode + where listingsCode = do + inNote <- gets stInNote + when inNote $ modify $ \s -> s{ stVerbInNote = True } + let chr = ((enumFromTo '!' '~') \\ str) !! 0 + return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] + highlightCode = do + case highlight formatLaTeXInline ("",classes,[]) str of + Nothing -> rawCode + Just h -> modify (\st -> st{ stHighlighting = True }) >> + return (text h) + rawCode = return + $ text $ "\\texttt{" ++ stringToLaTeX False str ++ "}" inlineToLaTeX (Quoted SingleQuote lst) = do contents <- inlineListToLaTeX lst csquotes <- liftM stCsquotes get @@ -411,11 +553,7 @@ inlineToLaTeX (Quoted DoubleQuote lst) = do then "\\," else empty return $ "``" <> s1 <> contents <> s2 <> "''" -inlineToLaTeX Apostrophe = return $ char '\'' -inlineToLaTeX EmDash = return "---" -inlineToLaTeX EnDash = return "--" -inlineToLaTeX Ellipses = return "\\ldots{}" -inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str +inlineToLaTeX (Str str) = return $ text $ stringToLaTeX False str inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" inlineToLaTeX (RawInline "latex" str) = return $ text str @@ -429,7 +567,7 @@ inlineToLaTeX (Link txt (src, _)) = do modify $ \s -> s{ stUrl = True } return $ text $ "\\url{" ++ x ++ "}" _ -> do contents <- inlineListToLaTeX txt - return $ text ("\\href{" ++ stringToLaTeX src ++ "}{") <> + return $ text ("\\href{" ++ stringToLaTeX True src ++ "}{") <> contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 78b9274d6..d3735efa7 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -98,7 +98,13 @@ noteToMan opts num note = do -- | Association list of characters to escape. manEscapes :: [(Char, String)] -manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "@\\" +manEscapes = [ ('\160', "\\ ") + , ('\'', "\\[aq]") + , ('’', "'") + , ('\x2014', "\\[em]") + , ('\x2013', "\\[en]") + , ('\x2026', "\\&...") + ] ++ backslashEscapes "@\\" -- | Escape special characters for Man. escapeString :: String -> String @@ -303,10 +309,6 @@ inlineToMan opts (Quoted DoubleQuote lst) = do return $ text "\\[lq]" <> contents <> text "\\[rq]" inlineToMan opts (Cite _ lst) = inlineListToMan opts lst -inlineToMan _ EmDash = return $ text "\\[em]" -inlineToMan _ EnDash = return $ text "\\[en]" -inlineToMan _ Apostrophe = return $ char '\'' -inlineToMan _ Ellipses = return $ text "\\&..." inlineToMan _ (Code _ str) = return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 48e9578b4..7ce939395 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -233,17 +233,19 @@ blockToMarkdown _ HorizontalRule = blockToMarkdown opts (Header level inlines) = do contents <- inlineListToMarkdown opts inlines st <- get - -- use setext style headers if in literate haskell mode. - -- ghc interprets '#' characters in column 1 as line number specifiers. - if writerLiterateHaskell opts || stPlain st - then let len = offset contents - in return $ contents <> cr <> - (case level of - 1 -> text $ replicate len '=' - 2 -> text $ replicate len '-' - _ -> empty) <> blankline - else return $ - text ((replicate level '#') ++ " ") <> contents <> blankline + let setext = writerSetextHeaders opts + return $ nowrap + $ case level of + 1 | setext -> + contents <> cr <> text (replicate (offset contents) '=') <> + blankline + 2 | setext -> + contents <> cr <> text (replicate (offset contents) '-') <> + blankline + -- ghc interprets '#' characters in column 1 as linenum specifiers. + _ | stPlain st || writerLiterateHaskell opts -> + contents <> blankline + _ -> text (replicate level '#') <> space <> contents <> blankline blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && "literate" `elem` classes && writerLiterateHaskell opts = @@ -434,10 +436,6 @@ inlineToMarkdown opts (Quoted SingleQuote lst) = do inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst return $ "“" <> contents <> "”" -inlineToMarkdown _ EmDash = return "\8212" -inlineToMarkdown _ EnDash = return "\8211" -inlineToMarkdown _ Apostrophe = return "\8217" -inlineToMarkdown _ Ellipses = return "\8230" inlineToMarkdown opts (Code attr str) = let tickGroups = filter (\s -> '`' `elem` s) $ group str longest = if null tickGroups @@ -495,17 +493,16 @@ inlineToMarkdown opts (Cite (c:cs) lst) modekey SuppressAuthor = "-" modekey _ = "" inlineToMarkdown _ (Cite _ _) = return $ text "" -inlineToMarkdown opts (Link txt (src', tit)) = do +inlineToMarkdown opts (Link txt (src, tit)) = do linktext <- inlineListToMarkdown opts txt let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" - let src = unescapeURI src' let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - let useRefLinks = writerReferenceLinks opts let useAuto = case (tit,txt) of ("", [Code _ s]) | s == srcSuffix -> True _ -> False + let useRefLinks = writerReferenceLinks opts && not useAuto ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index a7c7fc482..f31a2c2d1 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -346,22 +346,14 @@ inlineToMediaWiki opts (SmallCaps lst) = inlineListToMediaWiki opts lst inlineToMediaWiki opts (Quoted SingleQuote lst) = do contents <- inlineListToMediaWiki opts lst - return $ "‘" ++ contents ++ "’" + return $ "\8216" ++ contents ++ "\8217" inlineToMediaWiki opts (Quoted DoubleQuote lst) = do contents <- inlineListToMediaWiki opts lst - return $ "“" ++ contents ++ "”" + return $ "\8220" ++ contents ++ "\8221" inlineToMediaWiki opts (Cite _ lst) = inlineListToMediaWiki opts lst -inlineToMediaWiki _ EmDash = return "—" - -inlineToMediaWiki _ EnDash = return "–" - -inlineToMediaWiki _ Apostrophe = return "’" - -inlineToMediaWiki _ Ellipses = return "…" - inlineToMediaWiki _ (Code _ str) = return $ "<tt>" ++ (escapeString str) ++ "</tt>" diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index f8030965c..f8f22494f 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -34,9 +34,10 @@ import System.FilePath ( (</>), takeExtension ) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip -import System.Time +import Data.Time.Clock.POSIX import Paths_pandoc ( getDataFileName ) import Text.Pandoc.Shared ( WriterOptions(..) ) +import Text.Pandoc.ImageSize ( readImageSize, sizeInPoints ) import Text.Pandoc.MIME ( getMimeType ) import Text.Pandoc.Definition import Text.Pandoc.Generic @@ -71,7 +72,7 @@ writeODT mbRefOdt opts doc = do let sourceDir = writerSourceDirectory opts doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc let newContents = writeOpenDocument opts{writerWrapText = False} doc' - (TOD epochtime _) <- getClockTime + epochtime <- floor `fmap` getPOSIXTime let contentEntry = toEntry "content.xml" epochtime $ fromString newContents picEntries <- readIORef picEntriesRef let archive = foldr addEntryToArchive refArchive $ contentEntry : picEntries @@ -102,11 +103,16 @@ writeODT mbRefOdt opts doc = do transformPic :: FilePath -> IORef [Entry] -> Inline -> IO Inline transformPic sourceDir entriesRef (Image lab (src,tit)) = do let src' = unEscapeString src + mbSize <- readImageSize src' + let tit' = case mbSize of + Just s -> let (w,h) = sizeInPoints s + in show w ++ "x" ++ show h + Nothing -> tit entries <- readIORef entriesRef let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src' catch (readEntry [] (sourceDir </> src') >>= \entry -> modifyIORef entriesRef (entry{ eRelativePath = newsrc } :) >> - return (Image lab (newsrc, tit))) + return (Image lab (newsrc, tit'))) (\_ -> return (Emph lab)) transformPic _ _ x = return x diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index e675f4e65..a0317511a 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -40,7 +40,7 @@ import Text.Printf ( printf ) import Control.Applicative ( (<$>) ) import Control.Arrow ( (***), (>>>) ) import Control.Monad.State hiding ( when ) -import Data.Char (chr) +import Data.Char (chr, isDigit) import qualified Data.Map as Map -- | Auxiliary function to convert Plain block to Para. @@ -154,8 +154,8 @@ inHeaderTags i d = , ("text:outline-level", show i)] d inQuotes :: QuoteType -> Doc -> Doc -inQuotes SingleQuote s = text "‘" <> s <> text "’" -inQuotes DoubleQuote s = text "“" <> s <> text "”" +inQuotes SingleQuote s = char '\8216' <> s <> char '\8217' +inQuotes DoubleQuote s = char '\8220' <> s <> char '\8221' handleSpaces :: String -> Doc handleSpaces s @@ -361,10 +361,6 @@ inlinesToOpenDocument o l = hcat <$> mapM (inlineToOpenDocument o) l -- | Convert an inline element to OpenDocument. inlineToOpenDocument :: WriterOptions -> Inline -> State WriterState Doc inlineToOpenDocument o ils - | Ellipses <- ils = inTextStyle $ text "…" - | EmDash <- ils = inTextStyle $ text "—" - | EnDash <- ils = inTextStyle $ text "–" - | Apostrophe <- ils = inTextStyle $ text "’" | Space <- ils = inTextStyle space | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s @@ -382,7 +378,7 @@ inlineToOpenDocument o ils | RawInline "html" s <- ils = preformatted s -- for backwards compat. | RawInline _ _ <- ils = return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l - | Image _ (s,_) <- ils = return $ mkImg s + | Image _ (s,t) <- ils = return $ mkImg s t | Note l <- ils = mkNote l | otherwise = return empty where @@ -391,7 +387,7 @@ inlineToOpenDocument o ils , ("xlink:href" , s ) , ("office:name", t ) ] . inSpanTags "Definition" - mkImg s = inTags False "draw:frame" [] $ + mkImg s t = inTags False "draw:frame" (attrsFromTitle t) $ selfClosingTag "draw:image" [ ("xlink:href" , s ) , ("xlink:type" , "simple") , ("xlink:show" , "embed" ) @@ -407,6 +403,17 @@ inlineToOpenDocument o ils addNote nn return nn +-- a title of the form "120x140" will be interpreted as image +-- size in points. +attrsFromTitle :: String -> [(String,String)] +attrsFromTitle s = if null xs || null ys + then [] + else [("svg:width",xs ++ "pt"),("svg:height",ys ++ "pt")] + where (xs,rest) = span isDigit s + ys = case rest of + ('x':zs) | all isDigit zs -> zs + _ -> "" + bulletListStyle :: Int -> State WriterState (Int,(Int,[Doc])) bulletListStyle l = let doStyles i = inTags True "text:list-level-style-bullet" diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index f7f314428..4c77ba7c6 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -95,7 +95,12 @@ noteToOrg num note = do -- | Escape special characters for Org. escapeString :: String -> String -escapeString = escapeStringUsing (backslashEscapes "^_") +escapeString = escapeStringUsing $ + [ ('\x2014',"---") + , ('\x2013',"--") + , ('\x2019',"'") + , ('\x2026',"...") + ] ++ backslashEscapes "^_" titleToOrg :: [Inline] -> State WriterState Doc titleToOrg [] = return empty @@ -249,10 +254,6 @@ inlineToOrg (Quoted DoubleQuote lst) = do contents <- inlineListToOrg lst return $ "\"" <> contents <> "\"" inlineToOrg (Cite _ lst) = inlineListToOrg lst -inlineToOrg EmDash = return "---" -inlineToOrg EnDash = return "--" -inlineToOrg Apostrophe = return "'" -inlineToOrg Ellipses = return "..." inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" inlineToOrg (Str str) = return $ text $ escapeString str inlineToOrg (Math t str) = do @@ -272,8 +273,7 @@ inlineToOrg (Link txt (src, _)) = do _ -> do contents <- inlineListToOrg txt modify $ \s -> s{ stLinks = True } return $ "[[" <> text src <> "][" <> contents <> "]]" -inlineToOrg (Image _ (source', _)) = do - let source = unescapeURI source' +inlineToOrg (Image _ (source, _)) = do modify $ \s -> s{ stImages = True } return $ "[[" <> text source <> "]]" inlineToOrg (Note contents) = do diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index d4adaa929..d6e5b5c9e 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -157,7 +157,7 @@ blockToRST (Header level inlines) = do contents <- inlineListToRST inlines let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) let border = text $ replicate (offset contents) headerChar - return $ contents $$ border $$ blankline + return $ nowrap $ contents $$ border $$ blankline blockToRST (CodeBlock (_,classes,_) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts @@ -176,7 +176,7 @@ blockToRST (Table caption _ widths headers rows) = do else blankline <> text "Table: " <> caption' headers' <- mapM blockListToRST headers rawRows <- mapM (mapM blockListToRST) rows - let isSimple = all (==0) widths && all (all (\bs -> length bs == 1)) rows + let isSimple = all (==0) widths && all (all (\bs -> length bs <= 1)) rows let numChars = maximum . map offset opts <- get >>= return . stOptions let widthsInChars = @@ -281,26 +281,24 @@ inlineToRST (Quoted DoubleQuote lst) = do return $ "“" <> contents <> "”" inlineToRST (Cite _ lst) = inlineListToRST lst -inlineToRST EmDash = return $ char '\8212' -inlineToRST EnDash = return $ char '\8211' -inlineToRST Apostrophe = return $ char '\8217' -inlineToRST Ellipses = return $ char '\8230' inlineToRST (Code _ str) = return $ "``" <> text str <> "``" inlineToRST (Str str) = return $ text $ escapeString str inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then ":math:`$" <> text str <> "$`" - else ":math:`$$" <> text str <> "$$`" + then ":math:`" <> text str <> "`" <> beforeNonBlank "\\ " + else if '\n' `elem` str + then blankline $$ ".. math::" $$ + blankline $$ nest 3 (text str) $$ blankline + else blankline $$ (".. math:: " <> text str) $$ blankline inlineToRST (RawInline _ _) = return empty inlineToRST (LineBreak) = return cr -- there's no line break in RST inlineToRST Space = return space inlineToRST (Link [Code _ str] (src, _)) | src == str || src == "mailto:" ++ str = do let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ text $ unescapeURI srcSuffix -inlineToRST (Link txt (src', tit)) = do - let src = unescapeURI src' + return $ text srcSuffix +inlineToRST (Link txt (src, tit)) = do useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions linktext <- inlineListToRST $ normalizeSpaces txt if useReferenceLinks @@ -311,8 +309,7 @@ inlineToRST (Link txt (src', tit)) = do modify $ \st -> st { stLinks = refs' } return $ "`" <> linktext <> "`_" else return $ "`" <> linktext <> " <" <> text src <> ">`_" -inlineToRST (Image alternate (source', tit)) = do - let source = unescapeURI source' +inlineToRST (Image alternate (source, tit)) = do pics <- get >>= return . stImages let labelsUsed = map fst pics let txt = if null alternate || alternate == [Str ""] || diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index eb36c1ca6..4e7c2a7cd 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -106,7 +106,15 @@ handleUnicode (c:cs) = -- | Escape special characters. escapeSpecial :: String -> String -escapeSpecial = escapeStringUsing (('\t',"\\tab "):(backslashEscapes "{\\}")) +escapeSpecial = escapeStringUsing $ + [ ('\t',"\\tab ") + , ('\8216',"\\u8216'") + , ('\8217',"\\u8217'") + , ('\8220',"\\u8220\"") + , ('\8221',"\\u8221\"") + , ('\8211',"\\u8211-") + , ('\8212',"\\u8212-") + ] ++ backslashEscapes "{\\}" -- | Escape strings as needed for rich text format. stringToRTF :: String -> String @@ -287,10 +295,6 @@ inlineToRTF (Quoted SingleQuote lst) = "\\u8216'" ++ (inlineListToRTF lst) ++ "\\u8217'" inlineToRTF (Quoted DoubleQuote lst) = "\\u8220\"" ++ (inlineListToRTF lst) ++ "\\u8221\"" -inlineToRTF Apostrophe = "\\u8217'" -inlineToRTF Ellipses = "\\u8230?" -inlineToRTF EmDash = "\\u8212-" -inlineToRTF EnDash = "\\u8211-" inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 4f6645cd5..563ad7044 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -96,6 +96,10 @@ stringToTexinfo = escapeStringUsing texinfoEscapes , ('@', "@@") , (',', "@comma{}") -- only needed in argument lists , ('\160', "@ ") + , ('\x2014', "---") + , ('\x2013', "--") + , ('\x2026', "@dots{}") + , ('\x2019', "'") ] -- | Puts contents into Texinfo command. @@ -387,10 +391,6 @@ inlineToTexinfo (Quoted DoubleQuote lst) = do inlineToTexinfo (Cite _ lst) = inlineListToTexinfo lst -inlineToTexinfo Apostrophe = return $ char '\'' -inlineToTexinfo EmDash = return $ text "---" -inlineToTexinfo EnDash = return $ text "--" -inlineToTexinfo Ellipses = return $ text "@dots{}" inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" = diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 6614ec28e..26d5ec6d7 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -72,15 +72,19 @@ withUseTags action = do -- | Escape one character as needed for Textile. escapeCharForTextile :: Char -> String escapeCharForTextile x = case x of - '&' -> "&" - '<' -> "<" - '>' -> ">" - '"' -> """ - '*' -> "*" - '_' -> "_" - '@' -> "@" - '|' -> "|" - c -> [c] + '&' -> "&" + '<' -> "<" + '>' -> ">" + '"' -> """ + '*' -> "*" + '_' -> "_" + '@' -> "@" + '|' -> "|" + '\x2014' -> " -- " + '\x2013' -> " - " + '\x2019' -> "'" + '\x2026' -> "..." + c -> [c] -- | Escape string as needed for Textile. escapeStringForTextile :: String -> String @@ -370,14 +374,6 @@ inlineToTextile opts (Quoted DoubleQuote lst) = do inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst -inlineToTextile _ EmDash = return " -- " - -inlineToTextile _ EnDash = return " - " - -inlineToTextile _ Apostrophe = return "'" - -inlineToTextile _ Ellipses = return "..." - inlineToTextile _ (Code _ str) = return $ if '@' `elem` str then "<tt>" ++ escapeStringForXML str ++ "</tt>" diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index e21525018..7a1c8bdd8 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -33,9 +33,13 @@ module Text.Pandoc.XML ( stripTags, inTags, selfClosingTag, inTagsSimple, - inTagsIndented ) where + inTagsIndented, + toEntities, + fromEntities ) where import Text.Pandoc.Pretty +import Data.Char (ord, isAscii) +import Text.HTML.TagSoup.Entity (lookupEntity) -- | Remove everything between <...> stripTags :: String -> String @@ -89,3 +93,22 @@ inTagsSimple tagType = inTags False tagType [] -- | Put the supplied contents in indented block btw start and end tags. inTagsIndented :: String -> Doc -> Doc inTagsIndented tagType = inTags True tagType [] + +-- | Escape all non-ascii characters using numerical entities. +toEntities :: String -> String +toEntities [] = "" +toEntities (c:cs) + | isAscii c = c : toEntities cs + | otherwise = "&#" ++ show (ord c) ++ ";" ++ toEntities cs + +-- Unescapes XML entities +fromEntities :: String -> String +fromEntities ('&':xs) = + case lookupEntity ent of + Just c -> c : fromEntities rest + Nothing -> '&' : fromEntities rest + where (ent, rest) = case break (==';') xs of + (zs,';':ys) -> (zs,ys) + _ -> ("",xs) +fromEntities (x:xs) = x : fromEntities xs +fromEntities [] = [] diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs deleted file mode 100644 index d6ee39dab..000000000 --- a/src/markdown2pdf.hs +++ /dev/null @@ -1,256 +0,0 @@ -module Main where - -import Data.List (isInfixOf, intercalate, isPrefixOf) -import Data.Maybe (isNothing) -import qualified Data.ByteString as BS -import Codec.Binary.UTF8.String (decodeString, encodeString) -import Data.ByteString.UTF8 (toString) -import Control.Monad (unless, guard, liftM, when) -import Control.Concurrent (putMVar, takeMVar, newEmptyMVar, forkIO) -import Control.Exception (tryJust, bracket, evaluate) - -import System.IO -import System.IO.Error (isDoesNotExistError) -import System.Environment ( getArgs, getProgName ) -import qualified Text.Pandoc.UTF8 as UTF8 -import System.Exit (ExitCode (..), exitWith) -import System.FilePath -import System.Directory -import System.Process - --- A variant of 'readProcessWithExitCode' that does not --- cause an error if the output is not UTF-8. (Copied --- with slight variants from 'System.Process'.) -readProcessWithExitCode' - :: FilePath -- ^ command to run - -> [String] -- ^ any arguments - -> String -- ^ standard input - -> IO (ExitCode,String,String) -- ^ exitcode, stdout, stderr -readProcessWithExitCode' cmd args input = do - (Just inh, Just outh, Just errh, pid) <- - createProcess (proc cmd args){ std_in = CreatePipe, - std_out = CreatePipe, - std_err = CreatePipe } - - outMVar <- newEmptyMVar - - -- fork off a thread to start consuming stdout - out <- liftM toString $ BS.hGetContents outh - _ <- forkIO $ evaluate (length out) >> putMVar outMVar () - - -- fork off a thread to start consuming stderr - err <- liftM toString $ BS.hGetContents errh - _ <- forkIO $ evaluate (length err) >> putMVar outMVar () - - -- now write and flush any input - when (not (null input)) $ do hPutStr inh input; hFlush inh - hClose inh -- done with stdin - - -- wait on the output - takeMVar outMVar - takeMVar outMVar - hClose outh - - -- wait on the process - ex <- waitForProcess pid - - return (ex, out, err) - -run :: FilePath -> [String] -> IO (Either String String) -run file opts = do - (code, out, err) <- readProcessWithExitCode' (encodeString file) - (map encodeString opts) "" - let msg = out ++ err - case code of - ExitFailure _ -> return $ Left $! msg - ExitSuccess -> return $ Right $! msg - -parsePandocArgs :: [String] -> IO (Maybe ([String], String)) -parsePandocArgs args = do - result <- run "pandoc" $ ["--dump-args"] ++ args - return $ either error (parse . map trim . lines) result - where parse [] = Nothing - parse ("-":[]) = Just ([], "stdin") -- no output or input - parse ("-":x:xs) = Just (x:xs, dropExtension x) -- no output - parse ( x :xs) = Just (xs, dropExtension x) -- at least output - --trim = reverse . dropWhile isSpace . reverse . dropWhile isSpace - trim = takeWhile (/='\r') . dropWhile (=='\r') - -runPandoc :: [String] -> FilePath -> IO (Either String FilePath) -runPandoc inputsAndArgs output = do - let texFile = addExtension output "tex" - result <- run "pandoc" $ - ["-s", "--no-wrap", "-r", "markdown", "-w", "latex"] - ++ inputsAndArgs ++ ["-o", texFile] - return $ either Left (const $ Right texFile) result - -runLatexRaw :: String -> FilePath -> IO (Either (Either String String) FilePath) -runLatexRaw latexProgram file = do - -- we ignore the ExitCode because pdflatex always fails the first time - run latexProgram ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", takeDirectory file, dropExtension file] >> return () - let pdfFile = replaceExtension file "pdf" - let logFile = replaceExtension file "log" - txt <- tryJust (guard . isDoesNotExistError) - (liftM toString $ BS.readFile logFile) - let checks = checkLatex $ either (const "") id txt - case checks of - -- err , bib , ref , msg - (True , _ , _ , msg) -> return $ Left $ Left msg -- failure - (False, True , _ , msg) -> runBibtex file >> - (return $ Left $ Right msg) -- citations - (False, _ , True, msg) -> return $ Left $ Right msg -- references - (False, False, False, _ ) -> return $ Right pdfFile -- success - -runLatex :: String -> FilePath -> IO (Either String FilePath) -runLatex latexProgram file = step 3 - where - step n = do - result <- runLatexRaw latexProgram file - case result of - Left (Left err) -> return $ Left err - Left (Right _) | n > 1 -> step (n-1 :: Int) - Right _ | n > 2 -> step (n-1 :: Int) - Left (Right msg) -> return $ Left msg - Right pdfFile -> return $ Right pdfFile - -checkLatex :: String -> (Bool, Bool, Bool, String) -checkLatex "" = (True, False, False, "Could not read log file") -checkLatex txt = (err , bib, ref, unlines $! msgs ++ tips) - where - xs `oneOf` x = any (flip isInfixOf x) xs - msgs = dropWhile (not . errorline) $ lines txt - errorline ('!':_) = True - errorline _ = False - tips = checkPackages msgs - err = any (oneOf ["!", "LaTeX Error:", "Latex Error:"]) msgs - bib = any (oneOf ["Warning: Citation" - ,"Warning: There were undefined citations"]) msgs - ref = any (oneOf ["Warning: Reference" - ,"Warning: Label" - ,"Warning: There were undefined references" - ]) msgs - -checkPackages :: [String] -> [String] -checkPackages = concatMap chks - where -- for each message, search 'pks' for matches and give a hint - chks x = concatMap (chk x) pks - chk x (k,v) = if sub k `isInfixOf` x then tip k v else [] - sub k = "`" ++ k ++ ".sty' not found" - tip k v = ["Please install the '" ++ k ++ - "' package from CTAN:", " " ++ v] - pks = [("ucs" - ,"http://www.ctan.org/tex-archive/macros/latex/contrib/unicode/") - ,("ulem" - ,"http://www.ctan.org/tex-archive/macros/latex/contrib/misc/") - ,("graphicx" - ,"http://www.ctan.org/tex-archive/macros/latex/required/graphics/") - ,("fancyhdr" - ,"http://www.ctan.org/tex-archive/macros/latex/contrib/fancyhdr/") - ,("array" - ,"http://www.ctan.org/tex-archive/macros/latex/required/tools/")] - -runBibtex :: FilePath -> IO (Either String FilePath) -runBibtex file = do - let auxFile = replaceExtension file "aux" - result <- run "bibtex" [auxFile] - return $ either Left (const $ Right auxFile) result - -exit :: String -> IO a -exit x = do - progName <- getProgName - UTF8.hPutStrLn stderr $ progName ++ ": " ++ x - exitWith $ ExitFailure 1 - -saveStdin :: FilePath -> IO (Either String FilePath) -saveStdin file = do - text <- liftM toString $ BS.getContents - UTF8.writeFile file text - fileExist <- doesFileExist (encodeString file) - case fileExist of - False -> return $ Left $! "Could not create " ++ file - True -> return $ Right file - -saveOutput :: FilePath -> FilePath -> IO () -saveOutput input output = do - copyFile (encodeString input) (encodeString output) - UTF8.hPutStrLn stderr $! "Created " ++ output - -main :: IO () -main = bracket - -- acquire resource - (do dir <- getTemporaryDirectory - let tmp = dir </> "pandoc" - createDirectoryIfMissing True tmp - return tmp) - - -- release resource - ( \tmp -> removeDirectoryRecursive tmp) - - -- run computation - $ \tmp -> do - args <- liftM (map decodeString) getArgs - -- check for invalid arguments and print help message if needed - let goodopts = ["-f","-r","-N", "-p","-R","-H","-B","-A", "-C","-o","-V"] - let goodoptslong = ["--from","--read","--strict", - "--preserve-tabs","--tab-stop","--parse-raw", - "--toc","--table-of-contents", "--xetex", "--luatex", - "--number-sections","--include-in-header", - "--include-before-body","--include-after-body", - "--custom-header","--output", - "--template", "--variable", - "--csl", "--bibliography", "--data-dir", "--listings"] - let isOpt ('-':_) = True - isOpt _ = False - let opts = filter isOpt args - -- note that a long option can come in this form: --opt=val - let isGoodopt x = x `elem` (goodopts ++ goodoptslong) || - any (\o -> (o ++ "=") `isPrefixOf` x) goodoptslong - let markdown2pdfOpts = ["--xetex","--luatex"] - unless (all isGoodopt opts) $ do - (code, out, _err) <- readProcessWithExitCode "pandoc" ["--help"] "" - UTF8.putStrLn "markdown2pdf [OPTIONS] [FILES]\nOptions:" - UTF8.putStr $ unlines $ - filter (\l -> any (`isInfixOf` l) goodoptslong) (lines out) - ++ map (replicate 24 ' ' ++) markdown2pdfOpts - exitWith code - - let args' = filter (`notElem` markdown2pdfOpts) args - - -- check for executable files - let latexProgram = if "--xetex" `elem` opts - then "xelatex" - else if "--luatex" `elem` opts - then "lualatex" - else "pdflatex" - let execs = ["pandoc", latexProgram, "bibtex"] - paths <- mapM findExecutable execs - let miss = map snd $ filter (isNothing . fst) $ zip paths execs - unless (null miss) $ exit $! "Could not find " ++ intercalate ", " miss - - -- parse arguments - -- if no input given, use 'stdin' - pandocArgs <- parsePandocArgs args' - (input, output) <- case pandocArgs of - Nothing -> exit "Could not parse arguments" - Just ([],out) -> do - stdinFile <- saveStdin (replaceDirectory (takeBaseName out) tmp) - case stdinFile of - Left err -> exit err - Right f -> return ([f], out) - -- no need because we'll pass all arguments to pandoc - Just (_ ,out) -> return ([], out) - -- run pandoc - pandocRes <- runPandoc (input ++ args') $ replaceDirectory output tmp - case pandocRes of - Left err -> exit err - Right texFile -> do - -- run pdflatex - latexRes <- runLatex latexProgram texFile - case latexRes of - Left err -> exit err - Right pdfFile -> do - -- save the output creating a backup if necessary - saveOutput pdfFile $ - replaceDirectory pdfFile (takeDirectory output) - diff --git a/src/pandoc.hs b/src/pandoc.hs index 27bc2c25c..3853d360a 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -1,5 +1,5 @@ {- -Copyright (C) 2006-2011 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2006-2012 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,7 +18,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Main - Copyright : Copyright (C) 2006-2011 John MacFarlane + Copyright : Copyright (C) 2006-2012 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> @@ -30,24 +30,26 @@ writers. -} module Main where import Text.Pandoc -import Text.Pandoc.S5 (s5HeaderIncludes) +import Text.Pandoc.PDF (tex2pdf) +import Text.Pandoc.Readers.LaTeX (handleIncludes) import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile, - headerShift, findDataFile, normalize ) -#ifdef _HIGHLIGHTING -import Text.Pandoc.Highlighting ( languages ) -#endif + headerShift, findDataFile, normalize, err, warn ) +import Text.Pandoc.XML ( toEntities, fromEntities ) +import Text.Pandoc.SelfContained ( makeSelfContained ) +import Text.Pandoc.Highlighting ( languages, Style, tango, pygments, + espresso, kate, haddock, monochrome ) import System.Environment ( getArgs, getProgName ) import System.Exit ( exitWith, ExitCode (..) ) import System.FilePath import System.Console.GetOpt import Data.Char ( toLower ) import Data.List ( intercalate, isSuffixOf, isPrefixOf ) -import System.Directory ( getAppUserDataDirectory, doesFileExist ) -import System.IO ( stdout, stderr ) +import System.Directory ( getAppUserDataDirectory, doesFileExist, findExecutable ) +import System.IO ( stdout ) import System.IO.Error ( isDoesNotExistError ) import Control.Exception.Extensible ( throwIO ) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.CSL +import qualified Text.CSL as CSL import Text.Pandoc.Biblio import Control.Monad (when, unless, liftM) import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..)) @@ -55,35 +57,34 @@ import Network.URI (parseURI, isURI, URI(..)) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 (toString ) import Codec.Binary.UTF8.String (decodeString, encodeString) +import Text.CSL.Reference (Reference(..)) copyrightMessage :: String -copyrightMessage = "\nCopyright (C) 2006-2011 John MacFarlane\n" ++ +copyrightMessage = "\nCopyright (C) 2006-2012 John MacFarlane\n" ++ "Web: http://johnmacfarlane.net/pandoc\n" ++ "This is free software; see the source for copying conditions. There is no\n" ++ "warranty, not even for merchantability or fitness for a particular purpose." compileInfo :: String compileInfo = - "\nCompiled with citeproc support." ++ -#ifdef _HIGHLIGHTING - "\nCompiled with syntax highlighting support for:\n" ++ - wrapWords 78 languages ++ -#endif - "" + "\nCompiled with citeproc-hs " ++ VERSION_citeproc_hs ++ ", texmath " ++ + VERSION_texmath ++ ", highlighting-kate " ++ VERSION_highlighting_kate ++ + ".\nSyntax highlighting is supported for the following languages:\n " ++ + wrapWords 4 78 languages -- | Converts a list of strings into a single string with the items printed as -- comma separated words in lines with a maximum line length. -wrapWords :: Int -> [String] -> String -wrapWords c = wrap' c c where - wrap' _ _ [] = "" +wrapWords :: Int -> Int -> [String] -> String +wrapWords indent c = wrap' (c - indent) (c - indent) + where wrap' _ _ [] = "" wrap' cols remaining (x:xs) = if remaining == cols then x ++ wrap' cols (remaining - length x) xs else if (length x + 1) > remaining - then ",\n" ++ x ++ wrap' cols (cols - length x) xs + then ",\n" ++ replicate indent ' ' ++ x ++ wrap' cols (cols - length x) xs else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs -isNonTextOutput :: String -> Bool -isNonTextOutput = (`elem` ["odt","epub"]) +nonTextFormats :: [String] +nonTextFormats = ["odt","docx","epub"] -- | Data structure for command line options. data Opt = Opt @@ -101,15 +102,19 @@ data Opt = Opt , optNumberSections :: Bool -- ^ Number sections in LaTeX , optSectionDivs :: Bool -- ^ Put sections in div tags in HTML , optIncremental :: Bool -- ^ Use incremental lists in Slidy/S5 - , optOffline :: Bool -- ^ Make slideshow accessible offline - , optXeTeX :: Bool -- ^ Format latex for xetex + , optSelfContained :: Bool -- ^ Make HTML accessible offline , optSmart :: Bool -- ^ Use smart typography + , optOldDashes :: Bool -- ^ Parse dashes like pandoc <=1.8.2.1 , optHtml5 :: Bool -- ^ Produce HTML5 in HTML + , optHighlight :: Bool -- ^ Highlight source code + , optHighlightStyle :: Style -- ^ Style to use for highlighted code , optChapters :: Bool -- ^ Use chapter for top-level sects , optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math , optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt + , optReferenceDocx :: Maybe FilePath -- ^ Path of reference.docx , optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet , optEPUBMetadata :: String -- ^ EPUB metadata + , optEPUBFonts :: [FilePath] -- ^ EPUB fonts to embed , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optStrict :: Bool -- ^ Use strict markdown syntax @@ -124,8 +129,12 @@ data Opt = Opt , optCiteMethod :: CiteMethod -- ^ Method to output cites , optBibliography :: [String] , optCslFile :: FilePath + , optAbbrevsFile :: Maybe FilePath , optListings :: Bool -- ^ Use listings package for code blocks - , optAscii :: Bool -- ^ Avoid using nonascii characters + , optLaTeXEngine :: String -- ^ Program to use for latex -> pdf + , optSlideLevel :: Maybe Int -- ^ Header level that creates slides + , optSetextHeaders :: Bool -- ^ Use atx headers for markdown level 1-2 + , optAscii :: Bool -- ^ Use ascii characters only in html } -- | Defaults for command-line options. @@ -145,15 +154,19 @@ defaultOpts = Opt , optNumberSections = False , optSectionDivs = False , optIncremental = False - , optOffline = False - , optXeTeX = False + , optSelfContained = False , optSmart = False + , optOldDashes = False , optHtml5 = False + , optHighlight = True + , optHighlightStyle = pygments , optChapters = False , optHTMLMathMethod = PlainMath , optReferenceODT = Nothing + , optReferenceDocx = Nothing , optEPUBStylesheet = Nothing , optEPUBMetadata = "" + , optEPUBFonts = [] , optDumpArgs = False , optIgnoreArgs = False , optStrict = False @@ -168,7 +181,11 @@ defaultOpts = Opt , optCiteMethod = Citeproc , optBibliography = [] , optCslFile = "" + , optAbbrevsFile = Nothing , optListings = False + , optLaTeXEngine = "pdflatex" + , optSlideLevel = Nothing + , optSetextHeaders = True , optAscii = False } @@ -188,50 +205,23 @@ options = "FORMAT") "" - , Option "s" ["standalone"] - (NoArg - (\opt -> return opt { optStandalone = True })) - "" -- "Include needed header and footer on output" - , Option "o" ["output"] (ReqArg (\arg opt -> return opt { optOutputFile = arg }) "FILENAME") "" -- "Name of output file" - , Option "p" ["preserve-tabs"] - (NoArg - (\opt -> return opt { optPreserveTabs = True })) - "" -- "Preserve tabs instead of converting to spaces" - - , Option "" ["tab-stop"] + , Option "" ["data-dir"] (ReqArg - (\arg opt -> - case reads arg of - [(t,"")] | t > 0 -> return opt { optTabStop = t } - _ -> do - UTF8.hPutStrLn stderr $ - "tab-stop must be a number greater than 0" - exitWith $ ExitFailure 31) - "NUMBER") - "" -- "Tab stop (default 4)" + (\arg opt -> return opt { optDataDir = Just arg }) + "DIRECTORY") -- "Directory containing pandoc data files." + "" , Option "" ["strict"] (NoArg (\opt -> return opt { optStrict = True } )) "" -- "Disable markdown syntax extensions" - , Option "" ["normalize"] - (NoArg - (\opt -> return opt { optTransforms = - normalize : optTransforms opt } )) - "" -- "Normalize the Pandoc AST" - - , Option "" ["reference-links"] - (NoArg - (\opt -> return opt { optReferenceLinks = True } )) - "" -- "Use reference links in parsing HTML" - , Option "R" ["parse-raw"] (NoArg (\opt -> return opt { optParseRaw = True })) @@ -242,145 +232,25 @@ options = (\opt -> return opt { optSmart = True })) "" -- "Use smart quotes, dashes, and ellipses" - , Option "5" ["html5"] - (NoArg - (\opt -> return opt { optHtml5 = True })) - "" -- "Produce HTML5 in HTML output" - - , Option "m" ["latexmathml", "asciimathml"] - (OptArg - (\arg opt -> - return opt { optHTMLMathMethod = LaTeXMathML arg }) - "URL") - "" -- "Use LaTeXMathML script in html output" - - , Option "" ["mathml"] - (OptArg - (\arg opt -> - return opt { optHTMLMathMethod = MathML arg }) - "URL") - "" -- "Use mathml for HTML math" - - , Option "" ["mimetex"] - (OptArg - (\arg opt -> do - let url' = case arg of - Just u -> u ++ "?" - Nothing -> "/cgi-bin/mimetex.cgi?" - return opt { optHTMLMathMethod = WebTeX url' }) - "URL") - "" -- "Use mimetex for HTML math" - - , Option "" ["webtex"] - (OptArg - (\arg opt -> do - let url' = case arg of - Just u -> u - Nothing -> "http://chart.apis.google.com/chart?cht=tx&chl=" - return opt { optHTMLMathMethod = WebTeX url' }) - "URL") - "" -- "Use web service for HTML math" - - , Option "" ["jsmath"] - (OptArg - (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) - "URL") - "" -- "Use jsMath for HTML math" - - , Option "" ["mathjax"] - (OptArg - (\arg opt -> do - let url' = case arg of - Just u -> u - Nothing -> "https://d3eoax9i5htok0.cloudfront.net/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" - return opt { optHTMLMathMethod = MathJax url'}) - "URL") - "" -- "Use MathJax for HTML math" - - , Option "" ["gladtex"] - (NoArg - (\opt -> return opt { optHTMLMathMethod = GladTeX })) - "" -- "Use gladtex for HTML math" - - , Option "i" ["incremental"] - (NoArg - (\opt -> return opt { optIncremental = True })) - "" -- "Make list items display incrementally in Slidy/S5" - - , Option "" ["offline"] - (NoArg - (\opt -> return opt { optOffline = True, - optStandalone = True })) - "" -- "Make slide shows include all the needed js and css" - - , Option "" ["xetex"] + , Option "" ["old-dashes"] (NoArg - (\opt -> do - UTF8.hPutStrLn stderr $ "pandoc: --xetex is deprecated. " - ++ "It is no longer needed for use with XeTeX." - return opt { optXeTeX = True })) - "" -- "Format latex for processing by XeTeX" - - , Option "" ["chapters"] - (NoArg - (\opt -> return opt { optChapters = True })) - "" -- "Use chapter for top-level sections in LaTeX, DocBook" - - , Option "N" ["number-sections"] - (NoArg - (\opt -> return opt { optNumberSections = True })) - "" -- "Number sections in LaTeX" - - , Option "" ["listings"] - (NoArg - (\opt -> return opt { optListings = True })) - "" -- "Use listings package for LaTeX code blocks" - - , Option "" ["section-divs"] - (NoArg - (\opt -> return opt { optSectionDivs = True })) - "" -- "Put sections in div tags in HTML" - - , Option "" ["no-wrap"] - (NoArg - (\opt -> return opt { optWrapText = False })) - "" -- "Do not wrap text in output" + (\opt -> return opt { optSmart = True + , optOldDashes = True })) + "" -- "Use smart quotes, dashes, and ellipses" - , Option "" ["columns"] + , Option "" ["base-header-level"] (ReqArg (\arg opt -> case reads arg of - [(t,"")] | t > 0 -> return opt { optColumns = t } - _ -> do - UTF8.hPutStrLn stderr $ - "columns must be a number greater than 0" - exitWith $ ExitFailure 33) - "NUMBER") - "" -- "Length of line in characters" - - , Option "" ["ascii"] - (NoArg - (\opt -> return opt { optAscii = True })) - "" -- "Avoid using non-ascii characters in output" - - , Option "" ["email-obfuscation"] - (ReqArg - (\arg opt -> do - method <- case arg of - "references" -> return ReferenceObfuscation - "javascript" -> return JavascriptObfuscation - "none" -> return NoObfuscation - _ -> UTF8.hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >> - exitWith (ExitFailure 6) - return opt { optEmailObfuscation = method }) - "none|javascript|references") - "" -- "Method for obfuscating email in HTML" - - , Option "" ["id-prefix"] - (ReqArg - (\arg opt -> return opt { optIdentifierPrefix = arg }) - "STRING") - "" -- "Prefix to add to automatically generated HTML identifiers" + [(t,"")] | t > 0 -> do + let oldTransforms = optTransforms opt + let shift = t - 1 + return opt{ optTransforms = + headerShift shift : oldTransforms } + _ -> err 19 + "base-header-level must be a number > 0") + "NUMBER") + "" -- "Headers base level" , Option "" ["indented-code-classes"] (ReqArg @@ -389,26 +259,31 @@ options = "STRING") "" -- "Classes (whitespace- or comma-separated) to use for indented code-blocks" - , Option "" ["toc", "table-of-contents"] - (NoArg - (\opt -> return opt { optTableOfContents = True })) - "" -- "Include table of contents" + , Option "" ["normalize"] + (NoArg + (\opt -> return opt { optTransforms = + normalize : optTransforms opt } )) + "" -- "Normalize the Pandoc AST" - , Option "" ["base-header-level"] + , Option "p" ["preserve-tabs"] + (NoArg + (\opt -> return opt { optPreserveTabs = True })) + "" -- "Preserve tabs instead of converting to spaces" + + , Option "" ["tab-stop"] (ReqArg (\arg opt -> case reads arg of - [(t,"")] | t > 0 -> do - let oldTransforms = optTransforms opt - let shift = t - 1 - return opt{ optTransforms = - headerShift shift : oldTransforms } - _ -> do - UTF8.hPutStrLn stderr $ - "base-header-level must be a number > 0" - exitWith $ ExitFailure 19) + [(t,"")] | t > 0 -> return opt { optTabStop = t } + _ -> err 31 + "tab-stop must be a number greater than 0") "NUMBER") - "" -- "Headers base level" + "" -- "Tab stop (default 4)" + + , Option "s" ["standalone"] + (NoArg + (\opt -> return opt { optStandalone = True })) + "" -- "Include needed header and footer on output" , Option "" ["template"] (ReqArg @@ -425,21 +300,62 @@ options = (k,_:v) -> do let newvars = optVariables opt ++ [(k,v)] return opt{ optVariables = newvars } - _ -> do - UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)" - exitWith $ ExitFailure 17) + _ -> err 17 $ + "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)") "KEY:VALUE") "" -- "Use custom template" - , Option "c" ["css"] + , Option "D" ["print-default-template"] (ReqArg - (\arg opt -> do - -- add new link to end, so it is included in proper order - let newvars = optVariables opt ++ [("css",arg)] - return opt { optVariables = newvars, - optStandalone = True }) - "URL") - "" -- "Link to CSS style sheet" + (\arg _ -> do + templ <- getDefaultTemplate Nothing arg + case templ of + Right t -> UTF8.hPutStr stdout t + Left e -> error $ show e + exitWith ExitSuccess) + "FORMAT") + "" -- "Print default template for FORMAT" + + , Option "" ["no-wrap"] + (NoArg + (\opt -> return opt { optWrapText = False })) + "" -- "Do not wrap text in output" + + , Option "" ["columns"] + (ReqArg + (\arg opt -> + case reads arg of + [(t,"")] | t > 0 -> return opt { optColumns = t } + _ -> err 33 $ + "columns must be a number greater than 0") + "NUMBER") + "" -- "Length of line in characters" + + , Option "" ["toc", "table-of-contents"] + (NoArg + (\opt -> return opt { optTableOfContents = True })) + "" -- "Include table of contents" + + , Option "" ["no-highlight"] + (NoArg + (\opt -> return opt { optHighlight = False })) + "" -- "Don't highlight source code" + + , Option "" ["highlight-style"] + (ReqArg + (\arg opt -> do + newStyle <- case map toLower arg of + "pygments" -> return pygments + "tango" -> return tango + "espresso" -> return espresso + "kate" -> return kate + "monochrome" -> return monochrome + "haddock" -> return haddock + _ -> err 39 $ + "Unknown style :" ++ arg + return opt{ optHighlightStyle = newStyle }) + "STYLE") + "" -- "Style for highlighted code" , Option "H" ["include-in-header"] (ReqArg @@ -474,6 +390,100 @@ options = "FILENAME") "" -- "File to include after document body" + , Option "" ["self-contained"] + (NoArg + (\opt -> return opt { optSelfContained = True, + optVariables = ("slidy-url","slidy") : + optVariables opt, + optStandalone = True })) + "" -- "Make slide shows include all the needed js and css" + + , Option "" ["offline"] + (NoArg + (\opt -> do warn $ "--offline is deprecated. Use --self-contained instead." + return opt { optSelfContained = True, + optStandalone = True })) + "" -- "Make slide shows include all the needed js and css" + -- deprecated synonym for --self-contained + + , Option "5" ["html5"] + (NoArg + (\opt -> do + warn $ "--html5 is deprecated. " + ++ "Use the html5 output format instead." + return opt { optHtml5 = True })) + "" -- "Produce HTML5 in HTML output" + + , Option "" ["ascii"] + (NoArg + (\opt -> return opt { optAscii = True })) + "" -- "Use ascii characters only in HTML output" + + , Option "" ["reference-links"] + (NoArg + (\opt -> return opt { optReferenceLinks = True } )) + "" -- "Use reference links in parsing HTML" + + , Option "" ["atx-headers"] + (NoArg + (\opt -> return opt { optSetextHeaders = False } )) + "" -- "Use atx-style headers for markdown" + + , Option "" ["chapters"] + (NoArg + (\opt -> return opt { optChapters = True })) + "" -- "Use chapter for top-level sections in LaTeX, DocBook" + + , Option "N" ["number-sections"] + (NoArg + (\opt -> return opt { optNumberSections = True })) + "" -- "Number sections in LaTeX" + + , Option "" ["listings"] + (NoArg + (\opt -> return opt { optListings = True })) + "" -- "Use listings package for LaTeX code blocks" + + , Option "i" ["incremental"] + (NoArg + (\opt -> return opt { optIncremental = True })) + "" -- "Make list items display incrementally in Slidy/S5" + + , Option "" ["slide-level"] + (ReqArg + (\arg opt -> do + case reads arg of + [(t,"")] | t >= 1 && t <= 6 -> + return opt { optSlideLevel = Just t } + _ -> err 39 $ + "slide level must be a number between 1 and 6") + "NUMBER") + "" -- "Force header level for slides" + + , Option "" ["section-divs"] + (NoArg + (\opt -> return opt { optSectionDivs = True })) + "" -- "Put sections in div tags in HTML" + + , Option "" ["email-obfuscation"] + (ReqArg + (\arg opt -> do + method <- case arg of + "references" -> return ReferenceObfuscation + "javascript" -> return JavascriptObfuscation + "none" -> return NoObfuscation + _ -> err 6 + ("Unknown obfuscation method: " ++ arg) + return opt { optEmailObfuscation = method }) + "none|javascript|references") + "" -- "Method for obfuscating email in HTML" + + , Option "" ["id-prefix"] + (ReqArg + (\arg opt -> return opt { optIdentifierPrefix = arg }) + "STRING") + "" -- "Prefix to add to automatically generated HTML identifiers" + , Option "T" ["title-prefix"] (ReqArg (\arg opt -> do @@ -483,6 +493,16 @@ options = "STRING") "" -- "String to prefix to HTML window title" + , Option "c" ["css"] + (ReqArg + (\arg opt -> do + -- add new link to end, so it is included in proper order + let newvars = optVariables opt ++ [("css",arg)] + return opt { optVariables = newvars, + optStandalone = True }) + "URL") + "" -- "Link to CSS style sheet" + , Option "" ["reference-odt"] (ReqArg (\arg opt -> do @@ -490,6 +510,13 @@ options = "FILENAME") "" -- "Path of custom reference.odt" + , Option "" ["reference-docx"] + (ReqArg + (\arg opt -> do + return opt { optReferenceDocx = Just arg }) + "FILENAME") + "" -- "Path of custom reference.docx" + , Option "" ["epub-stylesheet"] (ReqArg (\arg opt -> do @@ -514,16 +541,22 @@ options = "FILENAME") "" -- "Path of epub metadata file" - , Option "D" ["print-default-template"] + , Option "" ["epub-embed-font"] (ReqArg - (\arg _ -> do - templ <- getDefaultTemplate Nothing arg - case templ of - Right t -> UTF8.hPutStr stdout t - Left e -> error $ show e - exitWith ExitSuccess) - "FORMAT") - "" -- "Print default template for FORMAT" + (\arg opt -> do + return opt{ optEPUBFonts = arg : optEPUBFonts opt }) + "FILE") + "" -- "Directory of fonts to embed" + + , Option "" ["latex-engine"] + (ReqArg + (\arg opt -> do + let b = takeBaseName arg + if (b == "pdflatex" || b == "lualatex" || b == "xelatex") + then return opt { optLaTeXEngine = arg } + else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.") + "PROGRAM") + "" -- "Name of latex program to use in generating PDF" , Option "" ["bibliography"] (ReqArg @@ -537,6 +570,12 @@ options = "FILENAME") "" + , Option "" ["citation-abbreviations"] + (ReqArg + (\arg opt -> return opt { optAbbrevsFile = Just arg }) + "FILENAME") + "" + , Option "" ["natbib"] (NoArg (\opt -> return opt { optCiteMethod = Natbib })) @@ -547,11 +586,60 @@ options = (\opt -> return opt { optCiteMethod = Biblatex })) "" -- "Use biblatex cite commands in LaTeX output" - , Option "" ["data-dir"] - (ReqArg - (\arg opt -> return opt { optDataDir = Just arg }) - "DIRECTORY") -- "Directory containing pandoc data files." - "" + , Option "m" ["latexmathml", "asciimathml"] + (OptArg + (\arg opt -> + return opt { optHTMLMathMethod = LaTeXMathML arg }) + "URL") + "" -- "Use LaTeXMathML script in html output" + + , Option "" ["mathml"] + (OptArg + (\arg opt -> + return opt { optHTMLMathMethod = MathML arg }) + "URL") + "" -- "Use mathml for HTML math" + + , Option "" ["mimetex"] + (OptArg + (\arg opt -> do + let url' = case arg of + Just u -> u ++ "?" + Nothing -> "/cgi-bin/mimetex.cgi?" + return opt { optHTMLMathMethod = WebTeX url' }) + "URL") + "" -- "Use mimetex for HTML math" + + , Option "" ["webtex"] + (OptArg + (\arg opt -> do + let url' = case arg of + Just u -> u + Nothing -> "http://chart.apis.google.com/chart?cht=tx&chl=" + return opt { optHTMLMathMethod = WebTeX url' }) + "URL") + "" -- "Use web service for HTML math" + + , Option "" ["jsmath"] + (OptArg + (\arg opt -> return opt { optHTMLMathMethod = JsMath arg}) + "URL") + "" -- "Use jsMath for HTML math" + + , Option "" ["mathjax"] + (OptArg + (\arg opt -> do + let url' = case arg of + Just u -> u + Nothing -> "https://d3eoax9i5htok0.cloudfront.net/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" + return opt { optHTMLMathMethod = MathJax url'}) + "URL") + "" -- "Use MathJax for HTML math" + + , Option "" ["gladtex"] + (NoArg + (\opt -> return opt { optHTMLMathMethod = GladTeX })) + "" -- "Use gladtex for HTML math" , Option "" ["dump-args"] (NoArg @@ -579,14 +667,15 @@ options = UTF8.hPutStr stdout (usageMessage prg options) exitWith ExitSuccess )) "" -- "Show help" + ] -- Returns usage message usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++ - (intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++ - (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:") + (wrapWords 16 78 $ map fst readers) ++ "\nOutput formats: " ++ + (wrapWords 16 78 $ map fst writers ++ nonTextFormats) ++ "\nOptions:") -- Determine default reader based on source file extensions defaultReaderName :: String -> [FilePath] -> String @@ -637,8 +726,11 @@ defaultWriterName x = ".texinfo" -> "texinfo" ".db" -> "docbook" ".odt" -> "odt" + ".docx" -> "docx" ".epub" -> "epub" ".org" -> "org" + ".asciidoc" -> "asciidoc" + ".pdf" -> "latex" ['.',y] | y `elem` ['1'..'9'] -> "man" _ -> "html" @@ -654,10 +746,8 @@ main = do else getOpt Permute options rawArgs unless (null errors) $ - do name <- getProgName - mapM_ (\e -> UTF8.hPutStr stderr (name ++ ": ") >> UTF8.hPutStr stderr e) errors - UTF8.hPutStrLn stderr $ "Try " ++ name ++ " --help for more information." - exitWith $ ExitFailure 2 + err 2 $ concat $ errors ++ + ["Try " ++ prg ++ " --help for more information."] let defaultOpts' = if compatMode then defaultOpts { optReader = "markdown" @@ -682,14 +772,19 @@ main = do , optNumberSections = numberSections , optSectionDivs = sectionDivs , optIncremental = incremental - , optOffline = offline + , optSelfContained = selfContained , optSmart = smart + , optOldDashes = oldDashes , optHtml5 = html5 + , optHighlight = highlight + , optHighlightStyle = highlightStyle , optChapters = chapters , optHTMLMathMethod = mathMethod , optReferenceODT = referenceODT + , optReferenceDocx = referenceDocx , optEPUBStylesheet = epubStylesheet , optEPUBMetadata = epubMetadata + , optEPUBFonts = epubFonts , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optStrict = strict @@ -702,8 +797,12 @@ main = do , optDataDir = mbDataDir , optBibliography = reffiles , optCslFile = cslfile + , optAbbrevsFile = cslabbrevs , optCiteMethod = citeMethod , optListings = listings + , optLaTeXEngine = latexEngine + , optSlideLevel = slideLevel + , optSetextHeaders = setextHeaders , optAscii = ascii } = opts @@ -726,17 +825,35 @@ main = do then "html" else "markdown" in defaultReaderName fallback sources - else readerName + else readerName let writerName' = if null writerName then defaultWriterName outputFile else writerName + let pdfOutput = map toLower (takeExtension outputFile) == ".pdf" + + when pdfOutput $ do + -- make sure writer is latex or beamer + unless (writerName' == "latex" || writerName' == "beamer" || + writerName' == "latex+lhs") $ + err 47 $ "cannot produce pdf output with " ++ writerName' ++ " writer" + -- check for latex program + mbLatex <- findExecutable latexEngine + case mbLatex of + Nothing -> err 41 $ + latexEngine ++ " not found. " ++ + latexEngine ++ " is needed for pdf output." + Just _ -> return () + reader <- case (lookup readerName' readers) of Just r -> return r - Nothing -> error ("Unknown reader: " ++ readerName') + Nothing -> err 7 ("Unknown reader: " ++ readerName') + + let standalone' = standalone || writerName' `elem` nonTextFormats || pdfOutput templ <- case templatePath of + _ | not standalone' -> return "" Nothing -> do deftemp <- getDefaultTemplate datadir writerName' case deftemp of @@ -756,57 +873,57 @@ main = do (\_ -> throwIO e) else throwIO e) - let standalone' = standalone || isNonTextOutput writerName' - - variables' <- case (writerName', standalone', offline) of - ("s5", True, True) -> do - inc <- s5HeaderIncludes datadir - return $ ("s5includes", inc) : variables - ("slidy", True, True) -> do - slidyJs <- readDataFile datadir $ - "slidy" </> "slidy.min.js" - slidyCss <- readDataFile datadir $ - "slidy" </> "slidy.css" - return $ ("slidy-js", slidyJs) : - ("slidy-css", slidyCss) : variables - _ -> return variables + let slideVariant = case writerName' of + "s5" -> S5Slides + "slidy" -> SlidySlides + "dzslides" -> DZSlides + _ -> NoSlides - variables'' <- case mathMethod of + variables' <- case mathMethod of LaTeXMathML Nothing -> do s <- readDataFile datadir $ "data" </> "LaTeXMathML.js" - return $ ("mathml-script", s) : variables' + return $ ("mathml-script", s) : variables MathML Nothing -> do s <- readDataFile datadir $ "data"</>"MathMLinHTML.js" - return $ ("mathml-script", s) : variables' - _ -> return variables' + return $ ("mathml-script", s) : variables + _ -> return variables + + variables'' <- case slideVariant of + DZSlides -> do + dztempl <- readDataFile datadir $ "dzslides" </> "template.html" + let dzcore = unlines $ dropWhile (not . isPrefixOf "<!-- {{{{ dzslides core") + $ lines dztempl + return $ ("dzslides-core", dzcore) : variables' + _ -> return variables' - refs <- mapM (\f -> catch (readBiblioFile f) $ \e -> do - UTF8.hPutStrLn stderr $ "Error reading bibliography `" ++ f ++ "'" - UTF8.hPutStrLn stderr $ show e - exitWith (ExitFailure 23)) reffiles >>= \rs -> return $ concat rs + -- unescape reference ids, which may contain XML entities, so + -- that we can do lookups with regular string equality + let unescapeRefId ref = ref{ refId = fromEntities (refId ref) } + + refs <- mapM (\f -> catch (CSL.readBiblioFile f) $ \e -> + err 23 $ "Error reading bibliography `" ++ f ++ "'" ++ "\n" ++ show e) + reffiles >>= + return . map unescapeRefId . concat let sourceDir = if null sources then "." else takeDirectory (head sources) - let slideVariant = case writerName' of - "s5" -> S5Slides - "slidy" -> SlidySlides - _ -> NoSlides - let startParserState = defaultParserState { stateParseRaw = parseRaw, stateTabStop = tabStop, stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' || lhsExtension sources, stateStandalone = standalone', - stateCitations = map refId refs, + stateCitations = map CSL.refId refs, stateSmart = smart || writerName' `elem` - ["latex", "context", "latex+lhs", "man"], + ["latex", "context", "latex+lhs", "beamer"], + stateOldDashes = oldDashes, stateColumns = columns, stateStrict = strict, stateIndentedCodeClasses = codeBlockClasses, - stateApplyMacros = writerName' `notElem` ["latex", "latex+lhs"] } + stateApplyMacros = writerName' `notElem` + ["latex", "latex+lhs", "beamer"] } let writerOptions = defaultWriterOptions { writerStandalone = standalone', @@ -836,16 +953,20 @@ main = do writerIdentifierPrefix = idPrefix, writerSourceDirectory = sourceDir, writerUserDataDir = datadir, - writerHtml5 = html5 && - "html" `isPrefixOf` writerName', - writerChapters = chapters, + writerHtml5 = html5 || + slideVariant == DZSlides, + writerChapters = chapters, writerListings = listings, - writerAscii = ascii } + writerBeamer = writerName' == "beamer", + writerSlideLevel = slideLevel, + writerHighlight = highlight, + writerHighlightStyle = highlightStyle, + writerSetextHeaders = setextHeaders + } - when (isNonTextOutput writerName' && outputFile == "-") $ - do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ - "Specify an output file using the -o option.") - exitWith $ ExitFailure 5 + when (writerName' `elem` nonTextFormats&& outputFile == "-") $ + err 5 $ "Cannot write " ++ writerName' ++ " output to stdout.\n" ++ + "Specify an output file using the -o option." let readSources [] = mapM readSource ["-"] readSources srcs = mapM readSource srcs @@ -859,7 +980,14 @@ main = do let convertTabs = tabFilter (if preserveTabs then 0 else tabStop) - doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources) + let handleIncludes' = if readerName' == "latex" || readerName' == "beamer" || + readerName' == "latex+lhs" || + readerName' == "context" + then handleIncludes + else return + + doc <- (reader startParserState) `fmap` (readSources sources >>= + handleIncludes' . convertTabs . intercalate "\n") let doc0 = foldr ($) doc transforms @@ -881,19 +1009,41 @@ main = do replaceDirectory (replaceExtension cslfile "csl") csldir - processBiblio cslfile' refs doc1 + processBiblio cslfile' cslabbrevs refs doc1 else return doc1 + let writeBinary :: B.ByteString -> IO () + writeBinary = B.writeFile (encodeString outputFile) + + let writerFn :: FilePath -> String -> IO () + writerFn "-" = UTF8.putStr + writerFn f = UTF8.writeFile f + case lookup writerName' writers of - Nothing | writerName' == "epub" -> - writeEPUB epubStylesheet writerOptions doc2 - >>= B.writeFile (encodeString outputFile) - Nothing | writerName' == "odt" -> - writeODT referenceODT writerOptions doc2 - >>= B.writeFile (encodeString outputFile) - Just r -> writerFn outputFile result - where writerFn "-" = UTF8.putStr - writerFn f = UTF8.writeFile f - result = r writerOptions doc2 ++ - ['\n' | not standalone'] - Nothing -> error $ "Unknown writer: " ++ writerName' + Nothing + | writerName' == "epub" -> + writeEPUB epubStylesheet epubFonts writerOptions doc2 + >>= writeBinary + | writerName' == "odt" -> + writeODT referenceODT writerOptions doc2 >>= writeBinary + | writerName' == "docx" -> + writeDocx referenceDocx writerOptions doc2 >>= writeBinary + | otherwise -> err 9 ("Unknown writer: " ++ writerName') + Just _ + | pdfOutput -> do + res <- tex2pdf latexEngine $ writeLaTeX writerOptions doc2 + case res of + Right pdf -> writeBinary pdf + Left err' -> err 43 $ toString err' + Just r + | htmlFormat && ascii -> + writerFn outputFile =<< selfcontain (toEntities result) + | otherwise -> + writerFn outputFile =<< selfcontain result + where result = r writerOptions doc2 ++ ['\n' | not standalone'] + htmlFormat = writerName' `elem` + ["html","html+lhs","html5","html5+lhs", + "s5","slidy","dzslides"] + selfcontain = if selfContained && htmlFormat + then makeSelfContained datadir + else return |