summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordr@jones.dk <dr@jones.dk>2011-02-04 00:01:35 +0100
committerdr@jones.dk <dr@jones.dk>2011-02-04 00:01:35 +0100
commit91179df4907bec919e0884019da785be1ceb01b3 (patch)
tree2a6655fb4ec4655c554ea17ad074859d707b7709 /src
parent1f6b4aee268fefc72c84bd305b10d4f9103901eb (diff)
Imported Upstream version 1.8.0.1
Diffstat (limited to 'src')
-rw-r--r--src/Tests/Arbitrary.hs181
-rw-r--r--src/Tests/Helpers.hs116
-rw-r--r--src/Tests/Old.hs201
-rw-r--r--src/Tests/Readers/LaTeX.hs161
-rw-r--r--src/Tests/Readers/Markdown.hs29
-rw-r--r--src/Tests/Readers/RST.hs46
-rw-r--r--src/Tests/Shared.hs21
-rw-r--r--src/Tests/Writers/ConTeXt.hs72
-rw-r--r--src/Tests/Writers/HTML.hs41
-rw-r--r--src/Tests/Writers/Native.hs20
-rw-r--r--src/Text/Pandoc.hs73
-rw-r--r--src/Text/Pandoc/Biblio.hs199
-rw-r--r--src/Text/Pandoc/Blocks.hs146
-rw-r--r--src/Text/Pandoc/CharacterReferences.hs277
-rw-r--r--src/Text/Pandoc/Definition.hs151
-rw-r--r--src/Text/Pandoc/Highlighting.hs12
-rw-r--r--src/Text/Pandoc/Parsing.hs235
-rw-r--r--src/Text/Pandoc/Pretty.hs429
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs1105
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs357
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs481
-rw-r--r--src/Text/Pandoc/Readers/Native.hs81
-rw-r--r--src/Text/Pandoc/Readers/RST.hs203
-rw-r--r--src/Text/Pandoc/Readers/TeXMath.hs47
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs523
-rw-r--r--src/Text/Pandoc/Shared.hs274
-rw-r--r--src/Text/Pandoc/Templates.hs4
-rw-r--r--src/Text/Pandoc/UTF8.hs50
-rw-r--r--src/Text/Pandoc/Writers/ConTeXt.hs159
-rw-r--r--src/Text/Pandoc/Writers/Docbook.hs72
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs9
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs108
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs349
-rw-r--r--src/Text/Pandoc/Writers/Man.hs58
-rw-r--r--src/Text/Pandoc/Writers/Markdown.hs382
-rw-r--r--src/Text/Pandoc/Writers/MediaWiki.hs24
-rw-r--r--src/Text/Pandoc/Writers/Native.hs86
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs5
-rw-r--r--src/Text/Pandoc/Writers/OpenDocument.hs33
-rw-r--r--src/Text/Pandoc/Writers/Org.hs284
-rw-r--r--src/Text/Pandoc/Writers/RST.hs177
-rw-r--r--src/Text/Pandoc/Writers/RTF.hs34
-rw-r--r--src/Text/Pandoc/Writers/Texinfo.hs108
-rw-r--r--src/Text/Pandoc/Writers/Textile.hs422
-rw-r--r--src/Text/Pandoc/XML.hs19
-rw-r--r--src/markdown2pdf.hs2
-rw-r--r--src/pandoc.hs327
-rw-r--r--src/test-pandoc.hs32
48 files changed, 5519 insertions, 2706 deletions
diff --git a/src/Tests/Arbitrary.hs b/src/Tests/Arbitrary.hs
new file mode 100644
index 000000000..978717bef
--- /dev/null
+++ b/src/Tests/Arbitrary.hs
@@ -0,0 +1,181 @@
+{-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE TypeSynonymInstances #-}
+-- provides Arbitrary instance for Pandoc types
+module Tests.Arbitrary ()
+where
+import Test.QuickCheck.Gen
+import Test.QuickCheck.Arbitrary
+import Control.Monad (liftM, liftM2)
+import Text.Pandoc
+import Text.Pandoc.Shared
+import Text.Pandoc.Builder
+
+realString :: Gen String
+realString = resize 8 arbitrary -- elements wordlist
+
+{-
+wordlist :: [String]
+wordlist = ["foo","Bar","baz","\\","/",":","\"","'","féé"]
+-}
+
+instance Arbitrary Inlines where
+ arbitrary = liftM fromList arbitrary
+
+instance Arbitrary Blocks where
+ arbitrary = liftM fromList arbitrary
+
+instance Arbitrary Inline where
+ arbitrary = resize 3 $ arbInline 3
+
+-- restrict to 3 levels of nesting max; otherwise we get
+-- bogged down in indefinitely large structures
+arbInline :: Int -> Gen Inline
+arbInline n = frequency $ [ (60, liftM Str realString)
+ , (60, return Space)
+ , (10, liftM2 Code arbitrary realString)
+ , (5, return EmDash)
+ , (5, return EnDash)
+ , (5, return Apostrophe)
+ , (5, return Ellipses)
+ , (5, elements [ RawInline "html" "<a>*&amp;*</a>"
+ , RawInline "latex" "\\my{command}" ])
+ ] ++ [ x | x <- nesters, n > 1]
+ where nesters = [ (10, liftM Emph $ listOf $ arbInline (n-1))
+ , (10, liftM Strong $ listOf $ arbInline (n-1))
+ , (10, liftM Strikeout $ listOf $ arbInline (n-1))
+ , (10, liftM Superscript $ listOf $ arbInline (n-1))
+ , (10, liftM Subscript $ listOf $ arbInline (n-1))
+ , (10, liftM SmallCaps $ listOf $ arbInline (n-1))
+ , (10, do x1 <- arbitrary
+ x2 <- listOf $ arbInline (n-1)
+ return $ Quoted x1 x2)
+ , (10, do x1 <- arbitrary
+ x2 <- realString
+ return $ Math x1 x2)
+ , (10, do x1 <- listOf $ arbInline (n-1)
+ x3 <- realString
+ x2 <- realString
+ return $ Link x1 (x2,x3))
+ , (10, do x1 <- listOf $ arbInline (n-1)
+ x3 <- realString
+ x2 <- realString
+ return $ Image x1 (x2,x3))
+ , (2, liftM Note $ resize 3 $ listOf1 arbitrary)
+ ]
+
+instance Arbitrary Block where
+ arbitrary = resize 3 $ arbBlock 3
+
+arbBlock :: Int -> Gen Block
+arbBlock n = frequency $ [ (10, liftM Plain arbitrary)
+ , (15, liftM Para arbitrary)
+ , (5, liftM2 CodeBlock arbitrary realString)
+ , (2, elements [ RawBlock "html"
+ "<div>\n*&amp;*\n</div>"
+ , RawBlock "latex"
+ "\\begin[opt]{env}\nhi\n{\\end{env}"
+ ])
+ , (5, do x1 <- choose (1 :: Int, 6)
+ x2 <- arbitrary
+ return (Header x1 x2))
+ , (2, return HorizontalRule)
+ ] ++ [x | x <- nesters, n > 0]
+ where nesters = [ (5, liftM BlockQuote $ listOf $ arbBlock (n-1))
+ , (5, liftM2 OrderedList arbitrary
+ $ (listOf1 $ listOf1 $ arbBlock (n-1)))
+ , (5, liftM BulletList $ (listOf1 $ listOf1 $ arbBlock (n-1)))
+ , (5, do x1 <- listOf $ listOf1 $ listOf1 $ arbBlock (n-1)
+ x2 <- arbitrary
+ return (DefinitionList $ zip x2 x1))
+ , (2, do rs <- choose (1 :: Int, 4)
+ cs <- choose (1 :: Int, 4)
+ x1 <- arbitrary
+ x2 <- vector cs
+ x3 <- vectorOf cs $ elements [0, 0.25]
+ x4 <- vectorOf cs $ listOf $ arbBlock (n-1)
+ x5 <- vectorOf rs $ vectorOf cs
+ $ listOf $ arbBlock (n-1)
+ return (Table x1 x2 x3 x4 x5))
+ ]
+
+instance Arbitrary Pandoc where
+ arbitrary = resize 8 $ liftM normalize
+ $ liftM2 Pandoc arbitrary arbitrary
+
+{-
+instance Arbitrary CitationMode where
+ arbitrary
+ = do x <- choose (0 :: Int, 2)
+ case x of
+ 0 -> return AuthorInText
+ 1 -> return SuppressAuthor
+ 2 -> return NormalCitation
+ _ -> error "FATAL ERROR: Arbitrary instance, logic bug"
+
+instance Arbitrary Citation where
+ arbitrary
+ = do x1 <- liftM (filter (`notElem` ",;]@ \t\n")) arbitrary
+ x2 <- arbitrary
+ x3 <- arbitrary
+ x4 <- arbitrary
+ x5 <- arbitrary
+ x6 <- arbitrary
+ return (Citation x1 x2 x3 x4 x5 x6)
+-}
+
+instance Arbitrary MathType where
+ arbitrary
+ = do x <- choose (0 :: Int, 1)
+ case x of
+ 0 -> return DisplayMath
+ 1 -> return InlineMath
+ _ -> error "FATAL ERROR: Arbitrary instance, logic bug"
+
+instance Arbitrary QuoteType where
+ arbitrary
+ = do x <- choose (0 :: Int, 1)
+ case x of
+ 0 -> return SingleQuote
+ 1 -> return DoubleQuote
+ _ -> error "FATAL ERROR: Arbitrary instance, logic bug"
+
+instance Arbitrary Meta where
+ arbitrary
+ = do x1 <- arbitrary
+ x2 <- liftM (filter (not . null)) arbitrary
+ x3 <- arbitrary
+ return (Meta x1 x2 x3)
+
+instance Arbitrary Alignment where
+ arbitrary
+ = do x <- choose (0 :: Int, 3)
+ case x of
+ 0 -> return AlignLeft
+ 1 -> return AlignRight
+ 2 -> return AlignCenter
+ 3 -> return AlignDefault
+ _ -> error "FATAL ERROR: Arbitrary instance, logic bug"
+
+instance Arbitrary ListNumberStyle where
+ arbitrary
+ = do x <- choose (0 :: Int, 6)
+ case x of
+ 0 -> return DefaultStyle
+ 1 -> return Example
+ 2 -> return Decimal
+ 3 -> return LowerRoman
+ 4 -> return UpperRoman
+ 5 -> return LowerAlpha
+ 6 -> return UpperAlpha
+ _ -> error "FATAL ERROR: Arbitrary instance, logic bug"
+
+instance Arbitrary ListNumberDelim where
+ arbitrary
+ = do x <- choose (0 :: Int, 3)
+ case x of
+ 0 -> return DefaultDelim
+ 1 -> return Period
+ 2 -> return OneParen
+ 3 -> return TwoParens
+ _ -> error "FATAL ERROR: Arbitrary instance, logic bug"
+
diff --git a/src/Tests/Helpers.hs b/src/Tests/Helpers.hs
new file mode 100644
index 000000000..b8d6b83a7
--- /dev/null
+++ b/src/Tests/Helpers.hs
@@ -0,0 +1,116 @@
+{-# LANGUAGE TypeSynonymInstances, FlexibleInstances, TemplateHaskell #-}
+-- Utility functions for the test suite.
+
+module Tests.Helpers ( lit
+ , file
+ , test
+ , (=?>)
+ , property
+ , ToString(..)
+ , ToPandoc(..)
+ )
+ where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder (Inlines, Blocks, doc, plain)
+import Test.Framework
+import Test.Framework.Providers.HUnit
+import Test.Framework.Providers.QuickCheck2
+import Test.HUnit (assertBool)
+import Text.Pandoc.Shared (normalize, defaultWriterOptions,
+ WriterOptions(..), removeTrailingSpace)
+import Text.Pandoc.Writers.Native (writeNative)
+import Language.Haskell.TH.Quote (QuasiQuoter(..))
+import Language.Haskell.TH.Syntax (Q, runIO)
+import qualified Test.QuickCheck.Property as QP
+import System.Console.ANSI
+import Data.Algorithm.Diff
+
+lit :: QuasiQuoter
+lit = QuasiQuoter {
+ quoteExp = (\a -> let b = rnl a in [|b|]) . filter (/= '\r')
+ , quotePat = error "Cannot use lit as a pattern"
+ }
+ where rnl ('\n':xs) = xs
+ rnl xs = xs
+
+file :: QuasiQuoter
+file = quoteFile lit
+
+-- adapted from TH 2.5 code
+quoteFile :: QuasiQuoter -> QuasiQuoter
+quoteFile (QuasiQuoter { quoteExp = qe, quotePat = qp }) =
+ QuasiQuoter { quoteExp = get qe, quotePat = get qp }
+ where
+ get :: (String -> Q a) -> String -> Q a
+ get old_quoter file_name = do { file_cts <- runIO (readFile file_name)
+ ; old_quoter file_cts }
+
+test :: (ToString a, ToString b, ToString c)
+ => (a -> b) -- ^ function to test
+ -> String -- ^ name of test case
+ -> (a, c) -- ^ (input, expected value)
+ -> Test
+test fn name (input, expected) =
+ testCase name $ assertBool msg (actual' == expected')
+ where msg = nl ++ dashes "input" ++ nl ++ input' ++ nl ++
+ dashes "expected" ++ nl ++ expected'' ++
+ dashes "got" ++ nl ++ actual'' ++
+ dashes ""
+ nl = "\n"
+ input' = toString input
+ actual' = toString $ fn input
+ expected' = toString expected
+ diff = getDiff (lines expected') (lines actual')
+ expected'' = unlines $ map vividize $ filter (\(d,_) -> d /= S) diff
+ actual'' = unlines $ map vividize $ filter (\(d,_) -> d /= F) diff
+ dashes "" = replicate 72 '-'
+ dashes x = replicate (72 - length x - 5) '-' ++ " " ++ x ++ " ---"
+
+vividize :: (DI,String) -> String
+vividize (B,s) = s
+vividize (_,s) = vivid s
+
+property :: QP.Testable a => TestName -> a -> Test
+property = testProperty
+
+vivid :: String -> String
+vivid s = setSGRCode [SetColor Background Dull Red
+ , SetColor Foreground Vivid White] ++ s
+ ++ setSGRCode [Reset]
+
+infix 6 =?>
+(=?>) :: a -> b -> (a,b)
+x =?> y = (x, y)
+
+class ToString a where
+ toString :: a -> String
+
+instance ToString Pandoc where
+ toString d = writeNative defaultWriterOptions{ writerStandalone = s }
+ $ toPandoc d
+ where s = case d of
+ (Pandoc (Meta [] [] []) _) -> False
+ _ -> True
+
+instance ToString Blocks where
+ toString = writeNative defaultWriterOptions . toPandoc
+
+instance ToString Inlines where
+ toString = removeTrailingSpace . writeNative defaultWriterOptions .
+ toPandoc
+
+instance ToString String where
+ toString = id
+
+class ToPandoc a where
+ toPandoc :: a -> Pandoc
+
+instance ToPandoc Pandoc where
+ toPandoc = normalize
+
+instance ToPandoc Blocks where
+ toPandoc = normalize . doc
+
+instance ToPandoc Inlines where
+ toPandoc = normalize . doc . plain
diff --git a/src/Tests/Old.hs b/src/Tests/Old.hs
new file mode 100644
index 000000000..cb1417ffa
--- /dev/null
+++ b/src/Tests/Old.hs
@@ -0,0 +1,201 @@
+module Tests.Old (tests) where
+
+import Test.Framework (testGroup, Test )
+import Test.Framework.Providers.HUnit
+import Test.HUnit ( assertBool )
+
+import System.IO ( openTempFile, stderr )
+import System.Process ( runProcess, waitForProcess )
+import System.FilePath ( (</>), (<.>) )
+import System.Directory
+import System.Exit
+import Data.Algorithm.Diff
+import Text.Pandoc.Shared ( normalize, defaultWriterOptions )
+import Text.Pandoc.Writers.Native ( writeNative )
+import Text.Pandoc.Readers.Native ( readNative )
+import Text.Pandoc.Highlighting ( languages )
+import Prelude hiding ( readFile )
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy.UTF8 (toString)
+import Text.Printf
+
+readFileUTF8 :: FilePath -> IO String
+readFileUTF8 f = B.readFile f >>= return . toString
+
+pandocPath :: FilePath
+pandocPath = ".." </> "dist" </> "build" </> "pandoc" </> "pandoc"
+
+data TestResult = TestPassed
+ | TestError ExitCode
+ | TestFailed String FilePath [(DI, String)]
+ deriving (Eq)
+
+instance Show TestResult where
+ show TestPassed = "PASSED"
+ show (TestError ec) = "ERROR " ++ show ec
+ show (TestFailed cmd file d) = '\n' : dash ++
+ "\n--- " ++ file ++
+ "\n+++ " ++ cmd ++ "\n" ++ showDiff (1,1) d ++
+ dash
+ where dash = replicate 72 '-'
+
+showDiff :: (Int,Int) -> [(DI, String)] -> String
+showDiff _ [] = ""
+showDiff (l,r) ((F, ln) : ds) =
+ printf "+%4d " l ++ ln ++ "\n" ++ showDiff (l+1,r) ds
+showDiff (l,r) ((S, ln) : ds) =
+ printf "-%4d " r ++ ln ++ "\n" ++ showDiff (l,r+1) ds
+showDiff (l,r) ((B, _ ) : ds) =
+ showDiff (l+1,r+1) ds
+
+tests :: [Test]
+tests = [ testGroup "markdown"
+ [ testGroup "writer"
+ $ writerTests "markdown" ++ lhsWriterTests "markdown"
+ , testGroup "reader"
+ [ test "basic" ["-r", "markdown", "-w", "native", "-s", "-S"]
+ "testsuite.txt" "testsuite.native"
+ , test "tables" ["-r", "markdown", "-w", "native", "--columns=80"]
+ "tables.txt" "tables.native"
+ , test "more" ["-r", "markdown", "-w", "native", "-S"]
+ "markdown-reader-more.txt" "markdown-reader-more.native"
+ , lhsReaderTest "markdown+lhs"
+ ]
+ , testGroup "citations" markdownCitationTests
+ ]
+ , testGroup "rst"
+ [ testGroup "writer" (writerTests "rst" ++ lhsWriterTests "rst")
+ , testGroup "reader"
+ [ test "basic" ["-r", "rst", "-w", "native",
+ "-s", "-S", "--columns=80"] "rst-reader.rst" "rst-reader.native"
+ , test "tables" ["-r", "rst", "-w", "native", "--columns=80"]
+ "tables.rst" "tables-rstsubset.native"
+ , lhsReaderTest "rst+lhs"
+ ]
+ ]
+ , testGroup "latex"
+ [ testGroup "writer" (writerTests "latex" ++ lhsWriterTests "latex")
+ , testGroup "reader"
+ [ test "basic" ["-r", "latex", "-w", "native", "-s", "-R"]
+ "latex-reader.latex" "latex-reader.native"
+ , lhsReaderTest "latex+lhs"
+ ]
+ ]
+ , testGroup "html"
+ [ testGroup "writer" (writerTests "html" ++ lhsWriterTests "html")
+ , test "reader" ["-r", "html", "-w", "native", "-s"]
+ "html-reader.html" "html-reader.native"
+ ]
+ , testGroup "s5"
+ [ s5WriterTest "basic" ["-s"] "s5"
+ , s5WriterTest "fancy" ["-s","-m","-i"] "s5"
+ , s5WriterTest "fragment" [] "html"
+ , s5WriterTest "inserts" ["-s", "-H", "insert",
+ "-B", "insert", "-A", "insert", "-c", "main.css"] "html"
+ ]
+ , testGroup "textile"
+ [ testGroup "writer" $ writerTests "textile"
+ , test "reader" ["-r", "textile", "-w", "native", "-s"]
+ "textile-reader.textile" "textile-reader.native"
+ ]
+ , testGroup "native"
+ [ testGroup "writer" $ writerTests "native"
+ , test "reader" ["-r", "native", "-w", "native", "-s"]
+ "testsuite.native" "testsuite.native"
+ ]
+ , testGroup "other writers" $ map (\f -> testGroup f $ writerTests f)
+ [ "docbook", "opendocument" , "context" , "texinfo"
+ , "man" , "plain" , "mediawiki", "rtf", "org"
+ ]
+ ]
+
+-- makes sure file is fully closed after reading
+readFile' :: FilePath -> IO String
+readFile' f = do s <- readFileUTF8 f
+ return $! (length s `seq` s)
+
+lhsWriterTests :: String -> [Test]
+lhsWriterTests format
+ = [ t "lhs to normal" format
+ , t "lhs to lhs" (format ++ "+lhs")
+ ]
+ where
+ t n f = test n ["--columns=78", "-r", "native", "-s", "-w", f]
+ "lhs-test.native" ("lhs-test" <.> ext f)
+ ext f = if null languages && format == "html"
+ then "nohl" <.> f
+ else f
+
+lhsReaderTest :: String -> Test
+lhsReaderTest format =
+ testWithNormalize normalizer "lhs" ["-r", format, "-w", "native"]
+ ("lhs-test" <.> format) "lhs-test.native"
+ where normalizer = writeNative defaultWriterOptions . normalize . readNative
+
+writerTests :: String -> [Test]
+writerTests format
+ = [ test "basic" (opts ++ ["-s"]) "testsuite.native" ("writer" <.> format)
+ , test "tables" opts "tables.native" ("tables" <.> format)
+ ]
+ where
+ opts = ["-r", "native", "-w", format, "--columns=78"]
+
+s5WriterTest :: String -> [String] -> String -> Test
+s5WriterTest modifier opts format
+ = test (format ++ " writer (" ++ modifier ++ ")")
+ (["-r", "native", "-w", format] ++ opts)
+ "s5.native" ("s5." ++ modifier <.> "html")
+
+markdownCitationTests :: [Test]
+markdownCitationTests
+ = map styleToTest ["chicago-author-date","ieee","mhra"]
+ ++ [test "natbib" wopts "markdown-citations.txt"
+ "markdown-citations.txt"]
+ where
+ ropts = ["-r", "markdown", "-w", "markdown", "--bibliography",
+ "biblio.bib", "--no-wrap"]
+ wopts = ropts ++ ["--natbib"]
+ styleToTest style = test style (ropts ++ ["--csl", style ++ ".csl"])
+ "markdown-citations.txt"
+ ("markdown-citations." ++ style ++ ".txt")
+
+-- | Run a test without normalize function, return True if test passed.
+test :: String -- ^ Title of test
+ -> [String] -- ^ Options to pass to pandoc
+ -> String -- ^ Input filepath
+ -> FilePath -- ^ Norm (for test results) filepath
+ -> Test
+test = testWithNormalize id
+
+-- | Run a test with normalize function, return True if test passed.
+testWithNormalize :: (String -> String) -- ^ Normalize function for output
+ -> String -- ^ Title of test
+ -> [String] -- ^ Options to pass to pandoc
+ -> String -- ^ Input filepath
+ -> FilePath -- ^ Norm (for test results) filepath
+ -> Test
+testWithNormalize normalizer testname opts inp norm = testCase testname $ do
+ (outputPath, hOut) <- openTempFile "" "pandoc-test"
+ let inpPath = inp
+ let normPath = norm
+ let options = ["--data-dir", ".."] ++ [inpPath] ++ opts
+ let cmd = pandocPath ++ " " ++ unwords options
+ ph <- runProcess pandocPath options Nothing
+ (Just [("LANG","en_US.UTF-8"),("HOME", "./")]) Nothing (Just hOut)
+ (Just stderr)
+ ec <- waitForProcess ph
+ result <- if ec == ExitSuccess
+ then do
+ -- filter \r so the tests will work on Windows machines
+ outputContents <- readFile' outputPath >>=
+ return . filter (/='\r') . normalizer
+ normContents <- readFile' normPath >>=
+ return . filter (/='\r') . normalizer
+ if outputContents == normContents
+ then return TestPassed
+ else return
+ $ TestFailed cmd normPath
+ $ getDiff (lines outputContents) (lines normContents)
+ else return $ TestError ec
+ removeFile outputPath
+ assertBool (show result) (result == TestPassed)
diff --git a/src/Tests/Readers/LaTeX.hs b/src/Tests/Readers/LaTeX.hs
new file mode 100644
index 000000000..6d28441f8
--- /dev/null
+++ b/src/Tests/Readers/LaTeX.hs
@@ -0,0 +1,161 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Tests.Readers.LaTeX (tests) where
+
+import Text.Pandoc.Definition
+import Test.Framework
+import Tests.Helpers
+import Tests.Arbitrary()
+import Text.Pandoc.Builder
+import Text.Pandoc
+
+latex :: String -> Pandoc
+latex = readLaTeX defaultParserState
+
+infix 5 =:
+(=:) :: ToString c
+ => String -> (String, c) -> Test
+(=:) = test latex
+
+tests :: [Test]
+tests = [ testGroup "basic"
+ [ "simple" =:
+ "word" =?> para "word"
+ , "space" =:
+ "some text" =?> para ("some text")
+ , "emphasized" =:
+ "\\emph{emphasized}" =?> para (emph "emphasized")
+ ]
+
+ , testGroup "headers"
+ [ "level 1" =:
+ "\\section{header}" =?> header 1 "header"
+ , "level 2" =:
+ "\\subsection{header}" =?> header 2 "header"
+ , "level 3" =:
+ "\\subsubsection{header}" =?> header 3 "header"
+ , "emph" =:
+ "\\section{text \\emph{emph}}" =?>
+ header 1 ("text" +++ space +++ emph "emph")
+ , "link" =:
+ "\\section{text \\href{/url}{link}}" =?>
+ header 1 ("text" +++ space +++ link "/url" "" "link")
+ ]
+
+ , testGroup "space and comments"
+ [ "blank lines + space at beginning" =:
+ "\n \n hi" =?> para "hi"
+ , "blank lines + space + comments" =:
+ "% my comment\n\n \n % another\n\nhi" =?> para "hi"
+ , "comment in paragraph" =:
+ "hi % this is a comment\nthere\n" =?> para "hi there"
+ ]
+
+ , testGroup "citations"
+ [ natbibCitations
+ , biblatexCitations
+ ]
+ ]
+
+baseCitation :: Citation
+baseCitation = Citation{ citationId = "item1"
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = AuthorInText
+ , citationNoteNum = 0
+ , citationHash = 0 }
+
+natbibCitations :: Test
+natbibCitations = testGroup "natbib"
+ [ "citet" =: "\\citet{item1}"
+ =?> para (cite [baseCitation] empty)
+ , "suffix" =: "\\citet[p.~30]{item1}"
+ =?> para
+ (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty)
+ , "suffix long" =: "\\citet[p.~30, with suffix]{item1}"
+ =?> para (cite [baseCitation{ citationSuffix =
+ toList $ text "p.\160\&30, with suffix" }] empty)
+ , "multiple" =: "\\citeauthor{item1} \\citetext{\\citeyear{item1}; \\citeyear[p.~30]{item2}; \\citealp[see also][]{item3}}"
+ =?> para (cite [baseCitation{ citationMode = AuthorInText }
+ ,baseCitation{ citationMode = SuppressAuthor
+ , citationSuffix = [Str "p.\160\&30"]
+ , citationId = "item2" }
+ ,baseCitation{ citationId = "item3"
+ , citationPrefix = [Str "see",Space,Str "also"]
+ , citationMode = NormalCitation }
+ ] empty)
+ , "group" =: "\\citetext{\\citealp[see][p.~34--35]{item1}; \\citealp[also][chap. 3]{item3}}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationPrefix = [Str "see"]
+ , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] }
+ ,baseCitation{ citationMode = NormalCitation
+ , citationId = "item3"
+ , citationPrefix = [Str "also"]
+ , citationSuffix = [Str "chap.",Space,Str "3"] }
+ ] empty)
+ , "suffix and locator" =: "\\citep[pp.~33, 35--37, and nowhere else]{item1}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty)
+ , "suffix only" =: "\\citep[and nowhere else]{item1}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationSuffix = toList $ text "and nowhere else" }] empty)
+ , "no author" =: "\\citeyearpar{item1}, and now Doe with a locator \\citeyearpar[p.~44]{item2}"
+ =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++
+ text ", and now Doe with a locator " +++
+ cite [baseCitation{ citationMode = SuppressAuthor
+ , citationSuffix = [Str "p.\160\&44"]
+ , citationId = "item2" }] empty)
+ , "markup" =: "\\citep[\\emph{see}][p. \\textbf{32}]{item1}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationPrefix = [Emph [Str "see"]]
+ , citationSuffix = [Str "p.",Space,
+ Strong [Str "32"]] }] empty)
+ ]
+
+biblatexCitations :: Test
+biblatexCitations = testGroup "biblatex"
+ [ "textcite" =: "\\textcite{item1}"
+ =?> para (cite [baseCitation] empty)
+ , "suffix" =: "\\textcite[p.~30]{item1}"
+ =?> para
+ (cite [baseCitation{ citationSuffix = toList $ text "p.\160\&30" }] empty)
+ , "suffix long" =: "\\textcite[p.~30, with suffix]{item1}"
+ =?> para (cite [baseCitation{ citationSuffix =
+ toList $ text "p.\160\&30, with suffix" }] empty)
+ , "multiple" =: "\\textcites{item1}[p.~30]{item2}[see also][]{item3}"
+ =?> para (cite [baseCitation{ citationMode = AuthorInText }
+ ,baseCitation{ citationMode = NormalCitation
+ , citationSuffix = [Str "p.\160\&30"]
+ , citationId = "item2" }
+ ,baseCitation{ citationId = "item3"
+ , citationPrefix = [Str "see",Space,Str "also"]
+ , citationMode = NormalCitation }
+ ] empty)
+ , "group" =: "\\autocites[see][p.~34--35]{item1}[also][chap. 3]{item3}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationPrefix = [Str "see"]
+ , citationSuffix = [Str "p.\160\&34",EnDash,Str "35"] }
+ ,baseCitation{ citationMode = NormalCitation
+ , citationId = "item3"
+ , citationPrefix = [Str "also"]
+ , citationSuffix = [Str "chap.",Space,Str "3"] }
+ ] empty)
+ , "suffix and locator" =: "\\autocite[pp.~33, 35--37, and nowhere else]{item1}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationSuffix = [Str "pp.\160\&33,",Space,Str "35",EnDash,Str "37,",Space,Str "and",Space,Str "nowhere",Space, Str "else"] }] empty)
+ , "suffix only" =: "\\autocite[and nowhere else]{item1}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationSuffix = toList $ text "and nowhere else" }] empty)
+ , "no author" =: "\\autocite*{item1}, and now Doe with a locator \\autocite*[p.~44]{item2}"
+ =?> para (cite [baseCitation{ citationMode = SuppressAuthor }] empty +++
+ text ", and now Doe with a locator " +++
+ cite [baseCitation{ citationMode = SuppressAuthor
+ , citationSuffix = [Str "p.\160\&44"]
+ , citationId = "item2" }] empty)
+ , "markup" =: "\\autocite[\\emph{see}][p. \\textbf{32}]{item1}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation
+ , citationPrefix = [Emph [Str "see"]]
+ , citationSuffix = [Str "p.",Space,
+ Strong [Str "32"]] }] empty)
+ , "parencite" =: "\\parencite{item1}"
+ =?> para (cite [baseCitation{ citationMode = NormalCitation }] empty)
+ ]
diff --git a/src/Tests/Readers/Markdown.hs b/src/Tests/Readers/Markdown.hs
new file mode 100644
index 000000000..722a45bdb
--- /dev/null
+++ b/src/Tests/Readers/Markdown.hs
@@ -0,0 +1,29 @@
+{-# LANGUAGE OverloadedStrings, QuasiQuotes #-}
+module Tests.Readers.Markdown (tests) where
+
+import Text.Pandoc.Definition
+import Test.Framework
+import Tests.Helpers
+import Tests.Arbitrary()
+import Text.Pandoc.Builder
+import Text.Pandoc
+
+markdown :: String -> Pandoc
+markdown = readMarkdown defaultParserState{ stateStandalone = True }
+
+infix 5 =:
+(=:) :: ToString c
+ => String -> (String, c) -> Test
+(=:) = test markdown
+
+tests :: [Test]
+tests = [ testGroup "inline code"
+ [ "with attribute" =:
+ "`document.write(\"Hello\");`{.javascript}"
+ =?> para
+ (codeWith ("",["javascript"],[]) "document.write(\"Hello\");")
+ , "with attribute space" =:
+ "`*` {.haskell .special x=\"7\"}"
+ =?> para (codeWith ("",["haskell","special"],[("x","7")]) "*")
+ ]
+ ]
diff --git a/src/Tests/Readers/RST.hs b/src/Tests/Readers/RST.hs
new file mode 100644
index 000000000..c0f60ff51
--- /dev/null
+++ b/src/Tests/Readers/RST.hs
@@ -0,0 +1,46 @@
+{-# LANGUAGE OverloadedStrings, QuasiQuotes #-}
+module Tests.Readers.RST (tests) where
+
+import Text.Pandoc.Definition
+import Test.Framework
+import Tests.Helpers
+import Tests.Arbitrary()
+import Text.Pandoc.Builder
+import Text.Pandoc
+
+rst :: String -> Pandoc
+rst = readRST defaultParserState{ stateStandalone = True }
+
+infix 5 =:
+(=:) :: ToString c
+ => String -> (String, c) -> Test
+(=:) = test rst
+
+tests :: [Test]
+tests = [ "field list" =:
+ [_LIT|
+:Hostname: media08
+:IP address: 10.0.0.19
+:Size: 3ru
+:Date: 2001-08-16
+:Version: 1
+:Authors: - Me
+ - Myself
+ - I
+:Indentation: Since the field marker may be quite long, the second
+ and subsequent lines of the field body do not have to line up
+ with the first line, but they must be indented relative to the
+ field name marker, and they must line up with each other.
+:Parameter i: integer
+|] =?> ( setAuthors ["Me","Myself","I"]
+ $ setDate "2001-08-16"
+ $ doc
+ $ definitionList [ (str "Hostname", [para "media08"])
+ , (str "IP address", [para "10.0.0.19"])
+ , (str "Size", [para "3ru"])
+ , (str "Version", [para "1"])
+ , (str "Indentation", [para "Since the field marker may be quite long, the second and subsequent lines of the field body do not have to line up with the first line, but they must be indented relative to the field name marker, and they must line up with each other."])
+ , (str "Parameter i", [para "integer"])
+ ])
+ ]
+
diff --git a/src/Tests/Shared.hs b/src/Tests/Shared.hs
new file mode 100644
index 000000000..c35a158c1
--- /dev/null
+++ b/src/Tests/Shared.hs
@@ -0,0 +1,21 @@
+module Tests.Shared (tests) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Test.Framework
+import Tests.Helpers
+import Tests.Arbitrary()
+
+tests :: [Test]
+tests = [ testGroup "normalize"
+ [ property "p_normalize_blocks_rt" p_normalize_blocks_rt
+ , property "p_normalize_inlines_rt" p_normalize_inlines_rt
+ ]
+ ]
+
+p_normalize_blocks_rt :: [Block] -> Bool
+p_normalize_blocks_rt bs = normalize bs == normalize (normalize bs)
+
+p_normalize_inlines_rt :: [Inline] -> Bool
+p_normalize_inlines_rt ils = normalize ils == normalize (normalize ils)
+
diff --git a/src/Tests/Writers/ConTeXt.hs b/src/Tests/Writers/ConTeXt.hs
new file mode 100644
index 000000000..704571e95
--- /dev/null
+++ b/src/Tests/Writers/ConTeXt.hs
@@ -0,0 +1,72 @@
+{-# LANGUAGE OverloadedStrings, QuasiQuotes #-}
+module Tests.Writers.ConTeXt (tests) where
+
+import Test.Framework
+import Text.Pandoc.Builder
+import Text.Pandoc
+import Tests.Helpers
+import Tests.Arbitrary()
+
+context :: (ToString a, ToPandoc a) => a -> String
+context = writeConTeXt defaultWriterOptions . toPandoc
+
+context' :: (ToString a, ToPandoc a) => a -> String
+context' = writeConTeXt defaultWriterOptions{ writerWrapText = False }
+ . toPandoc
+
+{-
+ "my test" =: X =?> Y
+
+is shorthand for
+
+ test context "my test" $ X =?> Y
+
+which is in turn shorthand for
+
+ test context "my test" (X,Y)
+-}
+
+infix 5 =:
+(=:) :: (ToString a, ToPandoc a)
+ => String -> (a, String) -> Test
+(=:) = test context
+
+tests :: [Test]
+tests = [ testGroup "inline code"
+ [ "with '}'" =: code "}" =?> "\\mono{\\letterclosebrace{}}"
+ , "without '}'" =: code "]" =?> "\\type{]}"
+ , property "code property" $ \s -> null s ||
+ if '{' `elem` s || '}' `elem` s
+ then (context' $ code s) == "\\mono{" ++
+ (context' $ str s) ++ "}"
+ else (context' $ code s) == "\\type{" ++ s ++ "}"
+ ]
+ , testGroup "headers"
+ [ "level 1" =:
+ header 1 "My header" =?> "\\subject{My header}"
+ , property "header 1 property" $ \ils ->
+ context' (header 1 ils) == "\\subject{" ++ context' ils ++ "}"
+ ]
+ , testGroup "bullet lists"
+ [ "nested" =:
+ bulletList [plain (text "top")
+ ,bulletList [plain (text "next")
+ ,bulletList [plain (text "bot")]]]
+ =?> [_LIT|
+\startitemize
+\item
+ top
+\item
+ \startitemize
+ \item
+ next
+ \item
+ \startitemize
+ \item
+ bot
+ \stopitemize
+ \stopitemize
+\stopitemize|]
+ ]
+ ]
+
diff --git a/src/Tests/Writers/HTML.hs b/src/Tests/Writers/HTML.hs
new file mode 100644
index 000000000..e13d0dc87
--- /dev/null
+++ b/src/Tests/Writers/HTML.hs
@@ -0,0 +1,41 @@
+{-# LANGUAGE OverloadedStrings, QuasiQuotes #-}
+module Tests.Writers.HTML (tests) where
+
+import Test.Framework
+import Text.Pandoc.Builder
+import Text.Pandoc
+import Tests.Helpers
+import Tests.Arbitrary()
+import Text.Pandoc.Highlighting (languages) -- null if no hl support
+
+html :: (ToString a, ToPandoc a) => a -> String
+html = writeHtmlString defaultWriterOptions{ writerWrapText = False } . toPandoc
+
+{-
+ "my test" =: X =?> Y
+
+is shorthand for
+
+ test html "my test" $ X =?> Y
+
+which is in turn shorthand for
+
+ test html "my test" (X,Y)
+-}
+
+infix 5 =:
+(=:) :: (ToString a, ToPandoc a)
+ => String -> (a, String) -> Test
+(=:) = test html
+
+tests :: [Test]
+tests = [ testGroup "inline code"
+ [ "basic" =: code "@&" =?> "<code>@&amp;</code>"
+ , "haskell" =: codeWith ("",["haskell"],[]) ">>="
+ =?> if null languages
+ then "<code class=\"haskell\">&gt;&gt;=</code>"
+ else "<code class=\"sourceCode haskell\"><span class=\"fu\">&gt;&gt;=</span></code>"
+ , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>="
+ =?> "<code class=\"nolanguage\">&gt;&gt;=</code>"
+ ]
+ ]
diff --git a/src/Tests/Writers/Native.hs b/src/Tests/Writers/Native.hs
new file mode 100644
index 000000000..234fe938a
--- /dev/null
+++ b/src/Tests/Writers/Native.hs
@@ -0,0 +1,20 @@
+module Tests.Writers.Native (tests) where
+
+import Test.Framework
+import Text.Pandoc.Builder
+import Text.Pandoc
+import Tests.Helpers
+import Tests.Arbitrary()
+
+p_write_rt :: Pandoc -> Bool
+p_write_rt d =
+ read (writeNative defaultWriterOptions{ writerStandalone = True } d) == d
+
+p_write_blocks_rt :: [Block] -> Bool
+p_write_blocks_rt bs =
+ read (writeNative defaultWriterOptions (Pandoc (Meta [] [] []) bs)) == bs
+
+tests :: [Test]
+tests = [ property "p_write_rt" p_write_rt
+ , property "p_write_blocks_rt" p_write_blocks_rt
+ ]
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index ad429bc93..ef8560284 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -57,11 +57,18 @@ module Text.Pandoc
(
-- * Definitions
module Text.Pandoc.Definition
+ -- * Generics
+ , module Text.Pandoc.Generic
+ -- * Lists of readers and writers
+ , readers
+ , writers
-- * Readers: converting /to/ Pandoc format
, readMarkdown
, readRST
, readLaTeX
, readHtml
+ , readTextile
+ , readNative
-- * Parser state used in readers
, ParserState (..)
, defaultParserState
@@ -84,25 +91,34 @@ module Text.Pandoc
, writeOpenDocument
, writeMan
, writeMediaWiki
+ , writeTextile
, writeRTF
, writeODT
, writeEPUB
+ , writeOrg
-- * Writer options used in writers
, WriterOptions (..)
, HTMLSlideVariant (..)
, HTMLMathMethod (..)
+ , CiteMethod (..)
, defaultWriterOptions
-- * Rendering templates and default templates
, module Text.Pandoc.Templates
-- * Version
, pandocVersion
+ -- * Miscellaneous
+ , rtfEmbedImage
+ , jsonFilter
) where
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
+import Text.Pandoc.Readers.Textile
+import Text.Pandoc.Readers.Native
import Text.Pandoc.Writers.Native
import Text.Pandoc.Writers.Markdown
import Text.Pandoc.Writers.RST
@@ -117,12 +133,69 @@ import Text.Pandoc.Writers.OpenDocument
import Text.Pandoc.Writers.Man
import Text.Pandoc.Writers.RTF
import Text.Pandoc.Writers.MediaWiki
+import Text.Pandoc.Writers.Textile
+import Text.Pandoc.Writers.Org
import Text.Pandoc.Templates
import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import Data.Version (showVersion)
+import Text.JSON.Generic
import Paths_pandoc (version)
-- | Version number of pandoc library.
pandocVersion :: String
pandocVersion = showVersion version
+
+-- | Association list of formats and readers.
+readers :: [(String, ParserState -> String -> Pandoc)]
+readers = [("native" , \_ -> readNative)
+ ,("json" , \_ -> decodeJSON)
+ ,("markdown" , readMarkdown)
+ ,("markdown+lhs" , \st ->
+ readMarkdown st{ stateLiterateHaskell = True})
+ ,("rst" , readRST)
+ ,("rst+lhs" , \st ->
+ readRST st{ stateLiterateHaskell = True})
+ ,("textile" , readTextile) -- TODO : textile+lhs
+ ,("html" , readHtml)
+ ,("latex" , readLaTeX)
+ ,("latex+lhs" , \st ->
+ readLaTeX st{ stateLiterateHaskell = True})
+ ]
+
+-- | Association list of formats and writers (omitting the
+-- binary writers, odt and epub).
+writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
+writers = [("native" , writeNative)
+ ,("json" , \_ -> encodeJSON)
+ ,("html" , writeHtmlString)
+ ,("html+lhs" , \o ->
+ writeHtmlString o{ writerLiterateHaskell = True })
+ ,("s5" , writeHtmlString)
+ ,("slidy" , writeHtmlString)
+ ,("docbook" , writeDocbook)
+ ,("opendocument" , writeOpenDocument)
+ ,("latex" , writeLaTeX)
+ ,("latex+lhs" , \o ->
+ writeLaTeX o{ writerLiterateHaskell = True })
+ ,("context" , writeConTeXt)
+ ,("texinfo" , writeTexinfo)
+ ,("man" , writeMan)
+ ,("markdown" , writeMarkdown)
+ ,("markdown+lhs" , \o ->
+ writeMarkdown o{ writerLiterateHaskell = True })
+ ,("plain" , writePlain)
+ ,("rst" , writeRST)
+ ,("rst+lhs" , \o ->
+ writeRST o{ writerLiterateHaskell = True })
+ ,("mediawiki" , writeMediaWiki)
+ ,("textile" , writeTextile)
+ ,("rtf" , writeRTF)
+ ,("org" , writeOrg)
+ ]
+
+-- | Converts a transformation on the Pandoc AST into a function
+-- that reads and writes a JSON-encoded string. This is useful
+-- for writing small scripts.
+jsonFilter :: (Pandoc -> Pandoc) -> String -> String
+jsonFilter f = encodeJSON . f . decodeJSON
diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs
index 436eadd68..d65c9de1c 100644
--- a/src/Text/Pandoc/Biblio.hs
+++ b/src/Text/Pandoc/Biblio.hs
@@ -19,48 +19,203 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Biblio
- Copyright : Copyright (C) 2008 Andrea Rossato
+ Copyright : Copyright (C) 2008-2010 Andrea Rossato
License : GNU GPL, version 2 or above
- Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it>
+ Maintainer : Andrea Rossato <andrea.rossato@unitn.it>
Stability : alpha
Portability : portable
-}
module Text.Pandoc.Biblio ( processBiblio ) where
-import Control.Monad ( when )
import Data.List
-import Text.CSL
+import Data.Unique
+import Data.Char ( isDigit, isPunctuation )
+import qualified Data.Map as M
+import Text.CSL hiding ( Cite(..), Citation(..) )
+import qualified Text.CSL as CSL ( Cite(..) )
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
+import Text.Pandoc.Shared (stringify)
+import Text.ParserCombinators.Parsec
+import Control.Monad
-- | Process a 'Pandoc' document by adding citations formatted
-- according to a CSL style, using 'citeproc' from citeproc-hs.
-processBiblio :: String -> [Reference] -> Pandoc -> IO Pandoc
-processBiblio cf r p
+processBiblio :: FilePath -> [Reference] -> Pandoc -> IO Pandoc
+processBiblio cslfile r p
= if null r then return p
else do
- when (null cf) $ error "Missing the needed citation style file"
- csl <- readCSLFile cf
- let groups = queryWith getCite p
- result = citeproc csl r groups
- cits_map = zip groups (citations result)
- biblioList = map (read . renderPandoc' csl) (bibliography result)
- Pandoc m b = processWith (processCite csl cits_map) p
- return $ Pandoc m $ b ++ biblioList
+ csl <- readCSLFile cslfile
+ p' <- bottomUpM setHash p
+ let (nts,grps) = if styleClass csl == "note"
+ then let cits = queryWith getCite p'
+ ncits = map (queryWith getCite) $ queryWith getNote p'
+ needNt = cits \\ concat ncits
+ in (,) needNt $ getNoteCitations needNt p'
+ else (,) [] $ queryWith getCitation p'
+ result = citeproc procOpts csl r (setNearNote csl $
+ map (map toCslCite) grps)
+ cits_map = M.fromList $ zip grps (citations result)
+ biblioList = map (renderPandoc' csl) (bibliography result)
+ Pandoc m b = bottomUp (procInlines $ processCite csl cits_map) p'
+ return . generateNotes nts . Pandoc m $ b ++ biblioList
-- | Substitute 'Cite' elements with formatted citations.
-processCite :: Style -> [([Target],[FormattedOutput])] -> Inline -> Inline
-processCite s cs il
- | Cite t _ <- il = Cite t (process t)
- | otherwise = il
+processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline]
+processCite _ _ [] = []
+processCite s cs (i:is)
+ | Cite t _ <- i = process t ++ processCite s cs is
+ | otherwise = i : processCite s cs is
where
- process t = case elemIndex t (map fst cs) of
- Just i -> read . renderPandoc s $ snd (cs !! i)
+ addNt t x = if null x then [] else [Cite t $ renderPandoc s x]
+ process t = case M.lookup t cs of
+ Just x -> if isTextualCitation t && x /= []
+ then renderPandoc s [head x] ++
+ if tail x /= []
+ then Space : addNt t (tail x)
+ else []
+ else [Cite t $ renderPandoc s x]
Nothing -> [Str ("Error processing " ++ show t)]
+isTextualCitation :: [Citation] -> Bool
+isTextualCitation (c:_) = citationMode c == AuthorInText
+isTextualCitation _ = False
+
-- | Retrieve all citations from a 'Pandoc' docuument. To be used with
-- 'queryWith'.
-getCite :: Inline -> [[(String,String)]]
-getCite i | Cite t _ <- i = [t]
+getCitation :: Inline -> [[Citation]]
+getCitation i | Cite t _ <- i = [t]
+ | otherwise = []
+
+getNote :: Inline -> [Inline]
+getNote i | Note _ <- i = [i]
+ | otherwise = []
+
+getCite :: Inline -> [Inline]
+getCite i | Cite _ _ <- i = [i]
| otherwise = []
+
+getNoteCitations :: [Inline] -> Pandoc -> [[Citation]]
+getNoteCitations needNote
+ = let mvCite i = if i `elem` needNote then Note [Para [i]] else i
+ setNote = bottomUp mvCite
+ getCits = concat . flip (zipWith $ setCiteNoteNum) [1..] .
+ map (queryWith getCite) . queryWith getNote . setNote
+ in queryWith getCitation . getCits
+
+setHash :: Citation -> IO Citation
+setHash (Citation i p s cm nn _)
+ = hashUnique `fmap` newUnique >>= return . Citation i p s cm nn
+
+generateNotes :: [Inline] -> Pandoc -> Pandoc
+generateNotes needNote = bottomUp (mvCiteInNote needNote)
+
+procInlines :: ([Inline] -> [Inline]) -> Block -> Block
+procInlines f b
+ | Plain inls <- b = Plain $ f inls
+ | Para inls <- b = Para $ f inls
+ | Header i inls <- b = Header i $ f inls
+ | otherwise = b
+
+mvCiteInNote :: [Inline] -> Block -> Block
+mvCiteInNote is = procInlines mvCite
+ where
+ mvCite :: [Inline] -> [Inline]
+ mvCite inls
+ | x:i:xs <- inls, startWithPunct xs
+ , x == Space, i `elem_` is = switch i xs ++ mvCite (tailFirstInlineStr xs)
+ | x:i:xs <- inls
+ , x == Space, i `elem_` is = mvInNote i : mvCite xs
+ | i:xs <- inls, i `elem_` is
+ , startWithPunct xs = switch i xs ++ mvCite (tailFirstInlineStr xs)
+ | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs
+ | i:xs <- inls = i : mvCite xs
+ | otherwise = []
+ elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False
+ switch i xs = Str (headInline xs) : mvInNote i : []
+ mvInNote i
+ | Cite t o <- i = Note [Para [Cite t $ sanitize o]]
+ | otherwise = Note [Para [i ]]
+ sanitize i
+ | endWithPunct i = toCapital i
+ | otherwise = toCapital (i ++ [Str "."])
+
+ checkPt i
+ | Cite c o : xs <- i
+ , endWithPunct o, startWithPunct xs
+ , endWithPunct o = Cite c (initInline o) : checkPt xs
+ | x:xs <- i = x : checkPt xs
+ | otherwise = []
+ checkNt = bottomUp $ procInlines checkPt
+
+setCiteNoteNum :: [Inline] -> Int -> [Inline]
+setCiteNoteNum ((Cite cs o):xs) n = Cite (setCitationNoteNum n cs) o : setCiteNoteNum xs n
+setCiteNoteNum _ _ = []
+
+setCitationNoteNum :: Int -> [Citation] -> [Citation]
+setCitationNoteNum i = map $ \c -> c { citationNoteNum = i}
+
+toCslCite :: Citation -> CSL.Cite
+toCslCite c
+ = let (l, s) = locatorWords $ citationSuffix c
+ (la,lo) = parseLocator l
+ citMode = case citationMode c of
+ AuthorInText -> (True, False)
+ SuppressAuthor -> (False,True )
+ NormalCitation -> (False,False)
+ s' = case s of
+ [] -> []
+ (Str (y:_) : _) | isPunctuation y -> s
+ _ -> Str "," : Space : s
+ in emptyCite { CSL.citeId = citationId c
+ , CSL.citePrefix = PandocText $ citationPrefix c
+ , CSL.citeSuffix = PandocText $ s'
+ , CSL.citeLabel = la
+ , CSL.citeLocator = lo
+ , CSL.citeNoteNumber = show $ citationNoteNum c
+ , CSL.authorInText = fst citMode
+ , CSL.suppressAuthor = snd citMode
+ , CSL.citeHash = citationHash c
+ }
+
+locatorWords :: [Inline] -> (String, [Inline])
+locatorWords inp =
+ case parse pLocatorWords "suffix" inp of
+ Right r -> r
+ Left _ -> ("",inp)
+
+pLocatorWords :: GenParser Inline st (String, [Inline])
+pLocatorWords = do
+ l <- pLocator
+ s <- getInput -- rest is suffix
+ if length l > 0 && last l == ','
+ then return (init l, Str "," : s)
+ else return (l, s)
+
+pMatch :: (Inline -> Bool) -> GenParser Inline st Inline
+pMatch condition = try $ do
+ t <- anyToken
+ guard $ condition t
+ return t
+
+pSpace :: GenParser Inline st Inline
+pSpace = pMatch (== Space)
+
+pLocator :: GenParser Inline st String
+pLocator = try $ do
+ optional $ pMatch (== Str ",")
+ optional pSpace
+ f <- many1 (notFollowedBy pSpace >> anyToken)
+ gs <- many1 pWordWithDigits
+ return $ stringify f ++ (' ' : unwords gs)
+
+pWordWithDigits :: GenParser Inline st String
+pWordWithDigits = try $ do
+ pSpace
+ r <- many1 (notFollowedBy pSpace >> anyToken)
+ let s = stringify r
+ guard $ any isDigit s
+ return s
+
diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs
deleted file mode 100644
index 122931773..000000000
--- a/src/Text/Pandoc/Blocks.hs
+++ /dev/null
@@ -1,146 +0,0 @@
-{-
-Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Blocks
- Copyright : Copyright (C) 2007 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Functions for the manipulation of fixed-width blocks of text.
-These are used in the construction of plain-text tables.
--}
-
-module Text.Pandoc.Blocks
- (
- TextBlock (..),
- docToBlock,
- blockToDoc,
- widthOfBlock,
- heightOfBlock,
- hcatBlocks,
- hsepBlocks,
- centerAlignBlock,
- leftAlignBlock,
- rightAlignBlock
- )
-where
-import Text.PrettyPrint
-import Data.List ( intersperse )
-
--- | A fixed-width block of text. Parameters are width of block,
--- height of block, and list of lines.
-data TextBlock = TextBlock Int Int [String]
-instance Show TextBlock where
- show x = show $ blockToDoc x
-
--- | Break lines in a list of lines so that none are greater than
--- a given width.
-breakLines :: Int -- ^ Maximum length of lines.
- -> [String] -- ^ List of lines.
- -> [String]
-breakLines _ [] = []
-breakLines width (l:ls) =
- if length l > width
- then (take width l):(breakLines width ((drop width l):ls))
- else l:(breakLines width ls)
-
--- | Convert a @Doc@ element into a @TextBlock@ with a specified width.
-docToBlock :: Int -- ^ Width of text block.
- -> Doc -- ^ @Doc@ to convert.
- -> TextBlock
-docToBlock width doc =
- let rendered = renderStyle (style {lineLength = width,
- ribbonsPerLine = 1}) doc
- lns = breakLines width $ lines rendered
- in TextBlock width (length lns) lns
-
--- | Convert a @TextBlock@ to a @Doc@ element.
-blockToDoc :: TextBlock -> Doc
-blockToDoc (TextBlock _ _ lns) =
- if null lns
- then empty
- else vcat $ map text lns
-
--- | Returns width of a @TextBlock@ (number of columns).
-widthOfBlock :: TextBlock -> Int
-widthOfBlock (TextBlock width _ _) = width
-
--- | Returns height of a @TextBlock@ (number of rows).
-heightOfBlock :: TextBlock -> Int
-heightOfBlock (TextBlock _ height _) = height
-
--- | Pads a string out to a given width using spaces.
-hPad :: Int -- ^ Desired width.
- -> String -- ^ String to pad.
- -> String
-hPad width line =
- let linelen = length line
- in if linelen <= width
- then line ++ replicate (width - linelen) ' '
- else take width line
-
--- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in
--- which they appear side by side.
-hcatBlocks :: [TextBlock] -> TextBlock
-hcatBlocks [] = TextBlock 0 0 []
-hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd.
-hcatBlocks ((TextBlock width1 height1 lns1):xs) =
- let (TextBlock width2 height2 lns2) = hcatBlocks xs
- height = max height1 height2
- width = width1 + width2
- lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) ""
- lns2' = lns2 ++ replicate (height - height2) ""
- lns = zipWith (++) lns1' lns2'
- in TextBlock width height lns
-
--- | Like @hcatBlocks@, but inserts space between the @TextBlock@s.
-hsepBlocks :: [TextBlock] -> TextBlock
-hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "]))
-
-isWhitespace :: Char -> Bool
-isWhitespace x = x `elem` " \t"
-
--- | Left-aligns the contents of a @TextBlock@ within the block.
-leftAlignBlock :: TextBlock -> TextBlock
-leftAlignBlock (TextBlock width height lns) =
- TextBlock width height $ map (dropWhile isWhitespace) lns
-
--- | Right-aligns the contents of a @TextBlock@ within the block.
-rightAlignBlock :: TextBlock -> TextBlock
-rightAlignBlock (TextBlock width height lns) =
- let rightAlignLine ln =
- let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln
- in reverse (rest ++ spaces)
- in TextBlock width height $ map rightAlignLine lns
-
--- | Centers the contents of a @TextBlock@ within the block.
-centerAlignBlock :: TextBlock -> TextBlock
-centerAlignBlock (TextBlock width height lns) =
- let centerAlignLine ln =
- let ln' = hPad width ln
- (startSpaces, rest) = span isWhitespace ln'
- endSpaces = takeWhile isWhitespace (reverse ln')
- numSpaces = length (startSpaces ++ endSpaces)
- startSpaces' = replicate (quot numSpaces 2) ' '
- in startSpaces' ++ rest
- in TextBlock width height $ map centerAlignLine lns
-
diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs
index 8ac55fc61..8157d94d3 100644
--- a/src/Text/Pandoc/CharacterReferences.hs
+++ b/src/Text/Pandoc/CharacterReferences.hs
@@ -31,9 +31,9 @@ module Text.Pandoc.CharacterReferences (
characterReference,
decodeCharacterReferences,
) where
-import Data.Char ( chr )
import Text.ParserCombinators.Parsec
-import qualified Data.Map as Map
+import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity )
+import Data.Maybe ( fromMaybe )
-- | Parse character entity.
characterReference :: GenParser Char st Char
@@ -47,18 +47,21 @@ numRef :: GenParser Char st Char
numRef = do
char '#'
num <- hexNum <|> decNum
- return $ chr $ num
+ return $ fromMaybe '?' $ lookupNumericEntity num
-hexNum :: GenParser Char st Int
-hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . (\xs -> '0':'x':xs)
+hexNum :: GenParser Char st [Char]
+hexNum = do
+ x <- oneOf "Xx"
+ num <- many1 hexDigit
+ return (x:num)
-decNum :: GenParser Char st Int
-decNum = many1 digit >>= return . read
+decNum :: GenParser Char st [Char]
+decNum = many1 digit
entity :: GenParser Char st Char
entity = do
body <- many1 alphaNum
- return $ Map.findWithDefault '?' body entityTable
+ return $ fromMaybe '?' $ lookupNamedEntity body
-- | Convert entities in a string to characters.
decodeCharacterReferences :: String -> String
@@ -67,261 +70,3 @@ decodeCharacterReferences str =
Left err -> error $ "\nError: " ++ show err
Right result -> result
-entityTable :: Map.Map String Char
-entityTable = Map.fromList entityTableList
-
-entityTableList :: [(String, Char)]
-entityTableList = [
- ("quot", chr 34),
- ("amp", chr 38),
- ("lt", chr 60),
- ("gt", chr 62),
- ("nbsp", chr 160),
- ("iexcl", chr 161),
- ("cent", chr 162),
- ("pound", chr 163),
- ("curren", chr 164),
- ("yen", chr 165),
- ("brvbar", chr 166),
- ("sect", chr 167),
- ("uml", chr 168),
- ("copy", chr 169),
- ("ordf", chr 170),
- ("laquo", chr 171),
- ("not", chr 172),
- ("shy", chr 173),
- ("reg", chr 174),
- ("macr", chr 175),
- ("deg", chr 176),
- ("plusmn", chr 177),
- ("sup2", chr 178),
- ("sup3", chr 179),
- ("acute", chr 180),
- ("micro", chr 181),
- ("para", chr 182),
- ("middot", chr 183),
- ("cedil", chr 184),
- ("sup1", chr 185),
- ("ordm", chr 186),
- ("raquo", chr 187),
- ("frac14", chr 188),
- ("frac12", chr 189),
- ("frac34", chr 190),
- ("iquest", chr 191),
- ("Agrave", chr 192),
- ("Aacute", chr 193),
- ("Acirc", chr 194),
- ("Atilde", chr 195),
- ("Auml", chr 196),
- ("Aring", chr 197),
- ("AElig", chr 198),
- ("Ccedil", chr 199),
- ("Egrave", chr 200),
- ("Eacute", chr 201),
- ("Ecirc", chr 202),
- ("Euml", chr 203),
- ("Igrave", chr 204),
- ("Iacute", chr 205),
- ("Icirc", chr 206),
- ("Iuml", chr 207),
- ("ETH", chr 208),
- ("Ntilde", chr 209),
- ("Ograve", chr 210),
- ("Oacute", chr 211),
- ("Ocirc", chr 212),
- ("Otilde", chr 213),
- ("Ouml", chr 214),
- ("times", chr 215),
- ("Oslash", chr 216),
- ("Ugrave", chr 217),
- ("Uacute", chr 218),
- ("Ucirc", chr 219),
- ("Uuml", chr 220),
- ("Yacute", chr 221),
- ("THORN", chr 222),
- ("szlig", chr 223),
- ("agrave", chr 224),
- ("aacute", chr 225),
- ("acirc", chr 226),
- ("atilde", chr 227),
- ("auml", chr 228),
- ("aring", chr 229),
- ("aelig", chr 230),
- ("ccedil", chr 231),
- ("egrave", chr 232),
- ("eacute", chr 233),
- ("ecirc", chr 234),
- ("euml", chr 235),
- ("igrave", chr 236),
- ("iacute", chr 237),
- ("icirc", chr 238),
- ("iuml", chr 239),
- ("eth", chr 240),
- ("ntilde", chr 241),
- ("ograve", chr 242),
- ("oacute", chr 243),
- ("ocirc", chr 244),
- ("otilde", chr 245),
- ("ouml", chr 246),
- ("divide", chr 247),
- ("oslash", chr 248),
- ("ugrave", chr 249),
- ("uacute", chr 250),
- ("ucirc", chr 251),
- ("uuml", chr 252),
- ("yacute", chr 253),
- ("thorn", chr 254),
- ("yuml", chr 255),
- ("OElig", chr 338),
- ("oelig", chr 339),
- ("Scaron", chr 352),
- ("scaron", chr 353),
- ("Yuml", chr 376),
- ("fnof", chr 402),
- ("circ", chr 710),
- ("tilde", chr 732),
- ("Alpha", chr 913),
- ("Beta", chr 914),
- ("Gamma", chr 915),
- ("Delta", chr 916),
- ("Epsilon", chr 917),
- ("Zeta", chr 918),
- ("Eta", chr 919),
- ("Theta", chr 920),
- ("Iota", chr 921),
- ("Kappa", chr 922),
- ("Lambda", chr 923),
- ("Mu", chr 924),
- ("Nu", chr 925),
- ("Xi", chr 926),
- ("Omicron", chr 927),
- ("Pi", chr 928),
- ("Rho", chr 929),
- ("Sigma", chr 931),
- ("Tau", chr 932),
- ("Upsilon", chr 933),
- ("Phi", chr 934),
- ("Chi", chr 935),
- ("Psi", chr 936),
- ("Omega", chr 937),
- ("alpha", chr 945),
- ("beta", chr 946),
- ("gamma", chr 947),
- ("delta", chr 948),
- ("epsilon", chr 949),
- ("zeta", chr 950),
- ("eta", chr 951),
- ("theta", chr 952),
- ("iota", chr 953),
- ("kappa", chr 954),
- ("lambda", chr 955),
- ("mu", chr 956),
- ("nu", chr 957),
- ("xi", chr 958),
- ("omicron", chr 959),
- ("pi", chr 960),
- ("rho", chr 961),
- ("sigmaf", chr 962),
- ("sigma", chr 963),
- ("tau", chr 964),
- ("upsilon", chr 965),
- ("phi", chr 966),
- ("chi", chr 967),
- ("psi", chr 968),
- ("omega", chr 969),
- ("thetasym", chr 977),
- ("upsih", chr 978),
- ("piv", chr 982),
- ("ensp", chr 8194),
- ("emsp", chr 8195),
- ("thinsp", chr 8201),
- ("zwnj", chr 8204),
- ("zwj", chr 8205),
- ("lrm", chr 8206),
- ("rlm", chr 8207),
- ("ndash", chr 8211),
- ("mdash", chr 8212),
- ("lsquo", chr 8216),
- ("rsquo", chr 8217),
- ("sbquo", chr 8218),
- ("ldquo", chr 8220),
- ("rdquo", chr 8221),
- ("bdquo", chr 8222),
- ("dagger", chr 8224),
- ("Dagger", chr 8225),
- ("bull", chr 8226),
- ("hellip", chr 8230),
- ("permil", chr 8240),
- ("prime", chr 8242),
- ("Prime", chr 8243),
- ("lsaquo", chr 8249),
- ("rsaquo", chr 8250),
- ("oline", chr 8254),
- ("frasl", chr 8260),
- ("euro", chr 8364),
- ("image", chr 8465),
- ("weierp", chr 8472),
- ("real", chr 8476),
- ("trade", chr 8482),
- ("alefsym", chr 8501),
- ("larr", chr 8592),
- ("uarr", chr 8593),
- ("rarr", chr 8594),
- ("darr", chr 8595),
- ("harr", chr 8596),
- ("crarr", chr 8629),
- ("lArr", chr 8656),
- ("uArr", chr 8657),
- ("rArr", chr 8658),
- ("dArr", chr 8659),
- ("hArr", chr 8660),
- ("forall", chr 8704),
- ("part", chr 8706),
- ("exist", chr 8707),
- ("empty", chr 8709),
- ("nabla", chr 8711),
- ("isin", chr 8712),
- ("notin", chr 8713),
- ("ni", chr 8715),
- ("prod", chr 8719),
- ("sum", chr 8721),
- ("minus", chr 8722),
- ("lowast", chr 8727),
- ("radic", chr 8730),
- ("prop", chr 8733),
- ("infin", chr 8734),
- ("ang", chr 8736),
- ("and", chr 8743),
- ("or", chr 8744),
- ("cap", chr 8745),
- ("cup", chr 8746),
- ("int", chr 8747),
- ("there4", chr 8756),
- ("sim", chr 8764),
- ("cong", chr 8773),
- ("asymp", chr 8776),
- ("ne", chr 8800),
- ("equiv", chr 8801),
- ("le", chr 8804),
- ("ge", chr 8805),
- ("sub", chr 8834),
- ("sup", chr 8835),
- ("nsub", chr 8836),
- ("sube", chr 8838),
- ("supe", chr 8839),
- ("oplus", chr 8853),
- ("otimes", chr 8855),
- ("perp", chr 8869),
- ("sdot", chr 8901),
- ("lceil", chr 8968),
- ("rceil", chr 8969),
- ("lfloor", chr 8970),
- ("rfloor", chr 8971),
- ("lang", chr 9001),
- ("rang", chr 9002),
- ("loz", chr 9674),
- ("spades", chr 9824),
- ("clubs", chr 9827),
- ("hearts", chr 9829),
- ("diams", chr 9830)
- ]
diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs
deleted file mode 100644
index fffca3b2e..000000000
--- a/src/Text/Pandoc/Definition.hs
+++ /dev/null
@@ -1,151 +0,0 @@
-{-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable
-{-
-Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
--}
-
-{- |
- Module : Text.Pandoc.Definition
- Copyright : Copyright (C) 2006-2010 John MacFarlane
- License : GNU GPL, version 2 or above
-
- Maintainer : John MacFarlane <jgm@berkeley.edu>
- Stability : alpha
- Portability : portable
-
-Definition of 'Pandoc' data structure for format-neutral representation
-of documents.
--}
-module Text.Pandoc.Definition where
-
-import Data.Generics
-
-data Pandoc = Pandoc Meta [Block] deriving (Eq, Ord, Read, Show, Typeable, Data)
-
--- | Bibliographic information for the document: title, authors, date.
-data Meta = Meta { docTitle :: [Inline]
- , docAuthors :: [[Inline]]
- , docDate :: [Inline] }
- deriving (Eq, Ord, Show, Read, Typeable, Data)
-
--- | Alignment of a table column.
-data Alignment = AlignLeft
- | AlignRight
- | AlignCenter
- | AlignDefault deriving (Eq, Ord, Show, Read, Typeable, Data)
-
--- | List attributes.
-type ListAttributes = (Int, ListNumberStyle, ListNumberDelim)
-
--- | Style of list numbers.
-data ListNumberStyle = DefaultStyle
- | Example
- | Decimal
- | LowerRoman
- | UpperRoman
- | LowerAlpha
- | UpperAlpha deriving (Eq, Ord, Show, Read, Typeable, Data)
-
--- | Delimiter of list numbers.
-data ListNumberDelim = DefaultDelim
- | Period
- | OneParen
- | TwoParens deriving (Eq, Ord, Show, Read, Typeable, Data)
-
--- | Attributes: identifier, classes, key-value pairs
-type Attr = (String, [String], [(String, String)])
-
--- | Block element.
-data Block
- = Plain [Inline] -- ^ Plain text, not a paragraph
- | Para [Inline] -- ^ Paragraph
- | CodeBlock Attr String -- ^ Code block (literal) with attributes
- | RawHtml String -- ^ Raw HTML block (literal)
- | BlockQuote [Block] -- ^ Block quote (list of blocks)
- | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes
- -- and a list of items, each a list of blocks)
- | BulletList [[Block]] -- ^ Bullet list (list of items, each
- -- a list of blocks)
- | DefinitionList [([Inline],[[Block]])] -- ^ Definition list
- -- Each list item is a pair consisting of a
- -- term (a list of inlines) and one or more
- -- definitions (each a list of blocks)
- | Header Int [Inline] -- ^ Header - level (integer) and text (inlines)
- | HorizontalRule -- ^ Horizontal rule
- | Table [Inline] [Alignment] [Double] [[Block]] [[[Block]]] -- ^ Table,
- -- with caption, column alignments,
- -- relative column widths (0 = default),
- -- column headers (each a list of blocks), and
- -- rows (each a list of lists of blocks)
- | Null -- ^ Nothing
- deriving (Eq, Ord, Read, Show, Typeable, Data)
-
--- | Type of quotation marks to use in Quoted inline.
-data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Ord, Read, Typeable, Data)
-
--- | Link target (URL, title).
-type Target = (String, String)
-
--- | Type of math element (display or inline).
-data MathType = DisplayMath | InlineMath deriving (Show, Eq, Ord, Read, Typeable, Data)
-
--- | Inline elements.
-data Inline
- = Str String -- ^ Text (string)
- | Emph [Inline] -- ^ Emphasized text (list of inlines)
- | Strong [Inline] -- ^ Strongly emphasized text (list of inlines)
- | Strikeout [Inline] -- ^ Strikeout text (list of inlines)
- | Superscript [Inline] -- ^ Superscripted text (list of inlines)
- | Subscript [Inline] -- ^ Subscripted text (list of inlines)
- | SmallCaps [Inline] -- ^ Small caps text (list of inlines)
- | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines)
- | Cite [Target] [Inline] -- ^ Citation (list of inlines)
- | Code String -- ^ Inline code (literal)
- | Space -- ^ Inter-word space
- | EmDash -- ^ Em dash
- | EnDash -- ^ En dash
- | Apostrophe -- ^ Apostrophe
- | Ellipses -- ^ Ellipses
- | LineBreak -- ^ Hard line break
- | Math MathType String -- ^ TeX math (literal)
- | TeX String -- ^ LaTeX code (literal)
- | HtmlInline String -- ^ HTML code (literal)
- | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target
- | Image [Inline] Target -- ^ Image: alt text (list of inlines), target
- -- and target
- | Note [Block] -- ^ Footnote or endnote
- deriving (Show, Eq, Ord, Read, Typeable, Data)
-
--- | Applies a transformation on @a@s to matching elements in a @b@.
-processWith :: (Data a, Data b) => (a -> a) -> b -> b
-processWith f = everywhere (mkT f)
-
--- | Like 'processWith', but with monadic transformations.
-processWithM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b
-processWithM f = everywhereM (mkM f)
-
--- | Runs a query on matching @a@ elements in a @c@.
-queryWith :: (Data a, Data c) => (a -> [b]) -> c -> [b]
-queryWith f = everything (++) ([] `mkQ` f)
-
-{-# DEPRECATED processPandoc "Use processWith instead" #-}
-processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc
-processPandoc = processWith
-
-{-# DEPRECATED queryPandoc "Use queryWith instead" #-}
-queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b]
-queryPandoc = queryWith
-
diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs
index f29106262..5ddaf1379 100644
--- a/src/Text/Pandoc/Highlighting.hs
+++ b/src/Text/Pandoc/Highlighting.hs
@@ -37,10 +37,14 @@ import Data.List (find)
import Data.Maybe (fromMaybe)
import Data.Char (toLower)
-highlightHtml :: Attr -> String -> Either String Html
-highlightHtml (_, classes, keyvals) rawCode =
+highlightHtml :: Bool -- ^ True if inline HTML
+ -> Attr -- ^ Attributes of the Code or CodeBlock
+ -> String -- ^ Raw contents of the Code or CodeBlock
+ -> Either String Html -- ^ An error or the formatted Html
+highlightHtml inline (_, classes, keyvals) rawCode =
let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals
fmtOpts = [OptNumberFrom firstNum] ++
+ [OptInline | inline] ++
case find (`elem` ["number","numberLines","number-lines"]) classes of
Nothing -> []
Just _ -> [OptNumberLines]
@@ -65,6 +69,6 @@ languages = []
languagesByExtension :: String -> [String]
languagesByExtension _ = []
-highlightHtml :: Attr -> String -> Either String Html
-highlightHtml _ _ = Left "Pandoc was not compiled with support for highlighting"
+highlightHtml :: Bool -> Attr -> String -> Either String Html
+highlightHtml _ _ _ = Left "Pandoc was not compiled with support for highlighting"
#endif
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 3678fc22a..9ce064f91 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -64,21 +64,27 @@ module Text.Pandoc.Parsing ( (>>~),
QuoteContext (..),
NoteTable,
KeyTable,
- Key (..),
+ Key,
+ toKey,
+ fromKey,
lookupKeySrc,
- refsMatch )
+ smartPunctuation,
+ macro,
+ applyMacros' )
where
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.ParserCombinators.Parsec
import Text.Pandoc.CharacterReferences ( characterReference )
-import Data.Char ( toLower, toUpper, ord, isAscii )
+import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
-import Control.Monad ( join, liftM )
+import Control.Monad ( join, liftM, guard )
import Text.Pandoc.Shared
import qualified Data.Map as M
+import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
-- | Like >>, but returns the operation on the left.
-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@@ -114,7 +120,7 @@ oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings
-- | Parses a space or tab.
spaceChar :: CharParser st Char
-spaceChar = char ' ' <|> char '\t'
+spaceChar = satisfy $ \c -> c == ' ' || c == '\t'
-- | Skips zero or more spaces or tabs.
skipSpaces :: GenParser Char st ()
@@ -169,7 +175,8 @@ lineClump = blanklines
charsInBalanced :: Char -> Char -> GenParser Char st String
charsInBalanced open close = try $ do
char open
- raw <- many $ (many1 (noneOf [open, close, '\n']))
+ raw <- many $ (many1 (satisfy $ \c ->
+ c /= open && c /= close && c /= '\n'))
<|> (do res <- charsInBalanced open close
return $ [open] ++ res ++ [close])
<|> try (string "\n" >>~ notFollowedBy' blanklines)
@@ -180,7 +187,7 @@ charsInBalanced open close = try $ do
charsInBalanced' :: Char -> Char -> GenParser Char st String
charsInBalanced' open close = try $ do
char open
- raw <- many $ (many1 (noneOf [open, close]))
+ raw <- many $ (many1 (satisfy $ \c -> c /= open && c /= close))
<|> (do res <- charsInBalanced' open close
return $ [open] ++ res ++ [close])
char close
@@ -201,7 +208,7 @@ romanNumeral upperCase = do
let romanDigits = if upperCase
then uppercaseRomanDigits
else lowercaseRomanDigits
- lookAhead $ oneOf romanDigits
+ lookAhead $ oneOf romanDigits
let [one, five, ten, fifty, hundred, fivehundred, thousand] =
map char romanDigits
thousands <- many thousand >>= (return . (1000 *) . length)
@@ -227,7 +234,8 @@ romanNumeral upperCase = do
-- Parsers for email addresses and URIs
emailChar :: GenParser Char st Char
-emailChar = alphaNum <|> oneOf "-+_."
+emailChar = alphaNum <|>
+ satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.')
domainChar :: GenParser Char st Char
domainChar = alphaNum <|> char '-'
@@ -283,7 +291,7 @@ nullBlock :: GenParser Char st Block
nullBlock = anyChar >> return Null
-- | Fail if reader is in strict markdown syntax mode.
-failIfStrict :: GenParser Char ParserState ()
+failIfStrict :: GenParser a ParserState ()
failIfStrict = do
state <- getState
if stateStrict state then fail "strict mode" else return ()
@@ -327,7 +335,7 @@ decimal = do
exampleNum :: GenParser Char ParserState (ListNumberStyle, Int)
exampleNum = do
char '@'
- lab <- many (alphaNum <|> oneOf "_-")
+ lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-'))
st <- getState
let num = stateNextExample st
let newlabels = if null lab
@@ -450,8 +458,9 @@ widthsFromIndices :: Int -- Number of columns on terminal
-> [Int] -- Indices
-> [Double] -- Fractional relative sizes of columns
widthsFromIndices _ [] = []
-widthsFromIndices numColumns indices =
- let lengths' = zipWith (-) indices (0:indices)
+widthsFromIndices numColumns' indices =
+ let numColumns = max numColumns' (if null indices then 0 else last indices)
+ lengths' = zipWith (-) indices (0:indices)
lengths = reverse $
case reverse lengths' of
[] -> []
@@ -481,8 +490,8 @@ gridTableWith block tableCaption headless =
tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption
gridTableSplitLine :: [Int] -> String -> [String]
-gridTableSplitLine indices line =
- map removeFinalBar $ tail $ splitByIndices (init indices) line
+gridTableSplitLine indices line = map removeFinalBar $ tail $
+ splitByIndices (init indices) $ removeTrailingSpace line
gridPart :: Char -> GenParser Char st (Int, Int)
gridPart ch = do
@@ -494,8 +503,8 @@ gridDashedLines :: Char -> GenParser Char st [(Int,Int)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline
removeFinalBar :: String -> String
-removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") .
- reverse
+removeFinalBar =
+ reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
gridTableSep :: Char -> GenParser Char ParserState Char
@@ -532,7 +541,7 @@ gridTableRawLine :: [Int] -> GenParser Char ParserState [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
- return (gridTableSplitLine indices $ removeTrailingSpace line)
+ return (gridTableSplitLine indices line)
-- | Parse row of grid table.
gridTableRow :: GenParser Char ParserState Block
@@ -562,9 +571,9 @@ gridTableFooter = blanklines
---
-- | Parse a string with a given parser and state.
-readWith :: GenParser Char ParserState a -- ^ parser
+readWith :: GenParser t ParserState a -- ^ parser
-> ParserState -- ^ initial state
- -> String -- ^ input string
+ -> [t] -- ^ input
-> a
readWith parser state input =
case runParser parser state "source" input of
@@ -583,11 +592,8 @@ data ParserState = ParserState
{ stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX?
stateParserContext :: ParserContext, -- ^ Inside list?
stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment?
- stateSanitizeHTML :: Bool, -- ^ Sanitize HTML?
stateKeys :: KeyTable, -- ^ List of reference keys
-#ifdef _CITEPROC
stateCitations :: [String], -- ^ List of available citations
-#endif
stateNotes :: NoteTable, -- ^ List of notes
stateTabStop :: Int, -- ^ Tab stop
stateStandalone :: Bool, -- ^ Parse bibliographic info?
@@ -602,7 +608,9 @@ data ParserState = ParserState
stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks
stateNextExample :: Int, -- ^ Number of next example
stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers
- stateHasChapters :: Bool -- ^ True if \chapter encountered
+ stateHasChapters :: Bool, -- ^ True if \chapter encountered
+ stateApplyMacros :: Bool, -- ^ Apply LaTeX macros?
+ stateMacros :: [Macro] -- ^ List of macros defined so far
}
deriving Show
@@ -611,11 +619,8 @@ defaultParserState =
ParserState { stateParseRaw = False,
stateParserContext = NullState,
stateQuoteContext = NoQuote,
- stateSanitizeHTML = False,
stateKeys = M.empty,
-#ifdef _CITEPROC
stateCitations = [],
-#endif
stateNotes = [],
stateTabStop = 4,
stateStandalone = False,
@@ -630,7 +635,9 @@ defaultParserState =
stateIndentedCodeClasses = [],
stateNextExample = 1,
stateExamples = M.empty,
- stateHasChapters = False }
+ stateHasChapters = False,
+ stateApplyMacros = True,
+ stateMacros = []}
data HeaderType
= SingleHeader Char -- ^ Single line of characters underneath
@@ -650,13 +657,20 @@ data QuoteContext
type NoteTable = [(String, String)]
-newtype Key = Key [Inline] deriving (Show, Read)
+newtype Key = Key [Inline] deriving (Show, Read, Eq, Ord)
-instance Eq Key where
- Key a == Key b = refsMatch a b
+toKey :: [Inline] -> Key
+toKey = Key . bottomUp lowercase
+ where lowercase :: Inline -> Inline
+ lowercase (Str xs) = Str (map toLower xs)
+ lowercase (Math t xs) = Math t (map toLower xs)
+ lowercase (Code attr xs) = Code attr (map toLower xs)
+ lowercase (RawInline f xs) = RawInline f (map toLower xs)
+ lowercase LineBreak = Space
+ lowercase x = x
-instance Ord Key where
- compare (Key a) (Key b) = if a == b then EQ else compare a b
+fromKey :: Key -> [Inline]
+fromKey (Key xs) = xs
type KeyTable = M.Map Key Target
@@ -668,33 +682,130 @@ lookupKeySrc table key = case M.lookup key table of
Nothing -> Nothing
Just src -> Just src
--- | Returns @True@ if keys match (case insensitive).
-refsMatch :: [Inline] -> [Inline] -> Bool
-refsMatch ((Str x):restx) ((Str y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Emph x):restx) ((Emph y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strong x):restx) ((Strong y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Strikeout x):restx) ((Strikeout y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Superscript x):restx) ((Superscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Subscript x):restx) ((Subscript y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) =
- refsMatch x y && refsMatch restx resty
-refsMatch ((Quoted t x):restx) ((Quoted u y):resty) =
- t == u && refsMatch x y && refsMatch restx resty
-refsMatch ((Code x):restx) ((Code y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((Math t x):restx) ((Math u y):resty) =
- ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty
-refsMatch ((TeX x):restx) ((TeX y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) =
- ((map toLower x) == (map toLower y)) && refsMatch restx resty
-refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty
-refsMatch [] x = null x
-refsMatch x [] = null x
+-- | Fail unless we're in "smart typography" mode.
+failUnlessSmart :: GenParser tok ParserState ()
+failUnlessSmart = getState >>= guard . stateSmart
+
+smartPunctuation :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+smartPunctuation inlineParser = do
+ failUnlessSmart
+ choice [ quoted inlineParser, apostrophe, dash, ellipses ]
+
+apostrophe :: GenParser Char ParserState Inline
+apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
+
+quoted :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser
+
+withQuoteContext :: QuoteContext
+ -> (GenParser Char ParserState Inline)
+ -> GenParser Char ParserState Inline
+withQuoteContext context parser = do
+ oldState <- getState
+ let oldQuoteContext = stateQuoteContext oldState
+ setState oldState { stateQuoteContext = context }
+ result <- parser
+ newState <- getState
+ setState newState { stateQuoteContext = oldQuoteContext }
+ return result
+
+singleQuoted :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+singleQuoted inlineParser = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>=
+ return . Quoted SingleQuote . normalizeSpaces
+
+doubleQuoted :: GenParser Char ParserState Inline
+ -> GenParser Char ParserState Inline
+doubleQuoted inlineParser = try $ do
+ doubleQuoteStart
+ withQuoteContext InDoubleQuote $ do
+ contents <- manyTill inlineParser doubleQuoteEnd
+ return . Quoted DoubleQuote . normalizeSpaces $ contents
+
+failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
+failIfInQuoteContext context = do
+ st <- getState
+ if stateQuoteContext st == context
+ then fail "already inside quotes"
+ else return ()
+
+charOrRef :: [Char] -> GenParser Char st Char
+charOrRef cs =
+ oneOf cs <|> try (do c <- characterReference
+ guard (c `elem` cs)
+ return c)
+
+singleQuoteStart :: GenParser Char ParserState ()
+singleQuoteStart = do
+ failIfInQuoteContext InSingleQuote
+ try $ do charOrRef "'\8216"
+ notFollowedBy (oneOf ")!],.;:-? \t\n")
+ notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
+ satisfy (not . isAlphaNum)))
+ -- possess/contraction
+ return ()
+
+singleQuoteEnd :: GenParser Char st ()
+singleQuoteEnd = try $ do
+ charOrRef "'\8217"
+ notFollowedBy alphaNum
+
+doubleQuoteStart :: GenParser Char ParserState ()
+doubleQuoteStart = do
+ failIfInQuoteContext InDoubleQuote
+ try $ do charOrRef "\"\8220"
+ notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n'))
+
+doubleQuoteEnd :: GenParser Char st ()
+doubleQuoteEnd = do
+ charOrRef "\"\8221"
+ return ()
+
+ellipses :: GenParser Char st Inline
+ellipses = do
+ try (charOrRef "…") <|> try (string "..." >> return '…')
+ return Ellipses
+
+dash :: GenParser Char st Inline
+dash = enDash <|> emDash
+
+enDash :: GenParser Char st Inline
+enDash = do
+ try (charOrRef "–") <|>
+ try (char '-' >> lookAhead (satisfy isDigit) >> return '–')
+ return EnDash
+
+emDash :: GenParser Char st Inline
+emDash = do
+ try (charOrRef "—") <|> (try $ string "--" >> optional (char '-') >> return '—')
+ return EmDash
+
+--
+-- Macros
+--
+
+-- | Parse a \newcommand or \renewcommand macro definition.
+macro :: GenParser Char ParserState Block
+macro = do
+ getState >>= guard . stateApplyMacros
+ inp <- getInput
+ case parseMacroDefinitions inp of
+ ([], _) -> pzero
+ (ms, rest) -> do count (length inp - length rest) anyChar
+ updateState $ \st ->
+ st { stateMacros = ms ++ stateMacros st }
+ return Null
+
+-- | Apply current macros to string.
+applyMacros' :: String -> GenParser Char ParserState String
+applyMacros' target = do
+ apply <- liftM stateApplyMacros getState
+ if apply
+ then do macros <- liftM stateMacros getState
+ return $ applyMacros macros target
+ else return target
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
new file mode 100644
index 000000000..54d65af6f
--- /dev/null
+++ b/src/Text/Pandoc/Pretty.hs
@@ -0,0 +1,429 @@
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Pretty
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+A prettyprinting library for the production of text documents,
+including wrapped text, indentated blocks, and tables.
+-}
+
+module Text.Pandoc.Pretty (
+ Doc
+ , render
+ , cr
+ , blankline
+ , space
+ , text
+ , char
+ , prefixed
+ , flush
+ , nest
+ , hang
+ , nowrap
+ , offset
+ , height
+ , lblock
+ , cblock
+ , rblock
+ , (<>)
+ , (<+>)
+ , ($$)
+ , ($+$)
+ , isEmpty
+ , empty
+ , cat
+ , hcat
+ , hsep
+ , vcat
+ , vsep
+ , inside
+ , braces
+ , brackets
+ , parens
+ , quotes
+ , doubleQuotes
+ )
+
+where
+import Data.DList (DList, fromList, toList, cons, singleton)
+import Data.List (intercalate)
+import Data.Monoid
+import Data.String
+import Control.Monad.State
+import Data.Char (isSpace)
+
+data Monoid a =>
+ RenderState a = RenderState{
+ output :: [a] -- ^ In reverse order
+ , prefix :: String
+ , usePrefix :: Bool
+ , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping
+ , column :: Int
+ , newlines :: Int -- ^ Number of preceding newlines
+ }
+
+type DocState a = State (RenderState a) ()
+
+data D = Text Int String
+ | Block Int [String]
+ | Prefixed String Doc
+ | Flush Doc
+ | BreakingSpace
+ | CarriageReturn
+ | NewLine
+ | BlankLine
+ deriving (Show)
+
+newtype Doc = Doc { unDoc :: DList D }
+ deriving (Monoid)
+
+instance Show Doc where
+ show = render Nothing
+
+instance IsString Doc where
+ fromString = text
+
+-- | True if the document is empty.
+isEmpty :: Doc -> Bool
+isEmpty = null . toList . unDoc
+
+-- | The empty document.
+empty :: Doc
+empty = mempty
+
+-- | @a <> b@ is the result of concatenating @a@ with @b@.
+(<>) :: Doc -> Doc -> Doc
+(<>) = mappend
+
+-- | Concatenate a list of 'Doc's.
+cat :: [Doc] -> Doc
+cat = mconcat
+
+-- | Same as 'cat'.
+hcat :: [Doc] -> Doc
+hcat = mconcat
+
+-- | Concatenate a list of 'Doc's, putting breakable spaces
+-- between them.
+(<+>) :: Doc -> Doc -> Doc
+(<+>) x y = if isEmpty x
+ then y
+ else if isEmpty y
+ then x
+ else x <> space <> y
+
+-- | Same as 'cat', but putting breakable spaces between the
+-- 'Doc's.
+hsep :: [Doc] -> Doc
+hsep = foldr (<+>) empty
+
+-- | @a $$ b@ puts @a@ above @b@.
+($$) :: Doc -> Doc -> Doc
+($$) x y = if isEmpty x
+ then y
+ else if isEmpty y
+ then x
+ else x <> cr <> y
+
+-- | @a $$ b@ puts @a@ above @b@, with a blank line between.
+($+$) :: Doc -> Doc -> Doc
+($+$) x y = if isEmpty x
+ then y
+ else if isEmpty y
+ then x
+ else x <> blankline <> y
+
+-- | List version of '$$'.
+vcat :: [Doc] -> Doc
+vcat = foldr ($$) empty
+
+-- | List version of '$+$'.
+vsep :: [Doc] -> Doc
+vsep = foldr ($+$) empty
+
+outp :: (IsString a, Monoid a)
+ => Int -> String -> DocState a
+outp off s | off <= 0 = do
+ st' <- get
+ let rawpref = prefix st'
+ when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do
+ let pref = reverse $ dropWhile isSpace $ reverse rawpref
+ modify $ \st -> st{ output = fromString pref : output st
+ , column = column st + length pref }
+ when (off < 0) $ do
+ modify $ \st -> st { output = fromString s : output st
+ , column = 0
+ , newlines = newlines st + 1 }
+outp off s = do
+ st' <- get
+ let pref = prefix st'
+ when (column st' == 0 && usePrefix st' && not (null pref)) $ do
+ modify $ \st -> st{ output = fromString pref : output st
+ , column = column st + length pref }
+ modify $ \st -> st{ output = fromString s : output st
+ , column = column st + off
+ , newlines = 0 }
+
+-- | Renders a 'Doc'. @render (Just n)@ will use
+-- a line length of @n@ to reflow text on breakable spaces.
+-- @render Nothing@ will not reflow text.
+render :: (Monoid a, IsString a)
+ => Maybe Int -> Doc -> a
+render linelen doc = fromString . mconcat . reverse . output $
+ execState (renderDoc doc) startingState
+ where startingState = RenderState{
+ output = mempty
+ , prefix = ""
+ , usePrefix = True
+ , lineLength = linelen
+ , column = 0
+ , newlines = 2 }
+
+renderDoc :: (IsString a, Monoid a)
+ => Doc -> DocState a
+renderDoc = renderList . toList . unDoc
+
+renderList :: (IsString a, Monoid a)
+ => [D] -> DocState a
+renderList [] = return ()
+renderList (Text off s : xs) = do
+ outp off s
+ renderList xs
+
+renderList (Prefixed pref d : xs) = do
+ st <- get
+ let oldPref = prefix st
+ put st{ prefix = prefix st ++ pref }
+ renderDoc d
+ modify $ \s -> s{ prefix = oldPref }
+ renderList xs
+
+renderList (Flush d : xs) = do
+ st <- get
+ let oldUsePrefix = usePrefix st
+ put st{ usePrefix = False }
+ renderDoc d
+ modify $ \s -> s{ usePrefix = oldUsePrefix }
+ renderList xs
+
+renderList (BlankLine : xs) = do
+ st <- get
+ case output st of
+ _ | newlines st > 1 || null xs -> return ()
+ _ | column st == 0 -> do
+ outp (-1) "\n"
+ _ -> do
+ outp (-1) "\n"
+ outp (-1) "\n"
+ renderList xs
+
+renderList (CarriageReturn : xs) = do
+ st <- get
+ if newlines st > 0 || null xs
+ then renderList xs
+ else do
+ outp (-1) "\n"
+ renderList xs
+
+renderList (NewLine : xs) = do
+ outp (-1) "\n"
+ renderList xs
+
+renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs)
+renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs)
+renderList (BreakingSpace : BlankLine : xs) = renderList (BlankLine:xs)
+renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs)
+renderList (BreakingSpace : xs) = do
+ let isText (Text _ _) = True
+ isText (Block _ _) = True
+ isText _ = False
+ let isBreakingSpace BreakingSpace = True
+ isBreakingSpace _ = False
+ let xs' = dropWhile isBreakingSpace xs
+ let next = takeWhile isText xs'
+ st <- get
+ let off = sum $ map offsetOf next
+ case lineLength st of
+ Just l | column st + 1 + off > l -> do
+ outp (-1) "\n"
+ renderList xs'
+ _ -> do
+ outp 1 " "
+ renderList xs'
+
+renderList (b1@Block{} : b2@Block{} : xs) =
+ renderList (mergeBlocks False b1 b2 : xs)
+
+renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) =
+ renderList (mergeBlocks True b1 b2 : xs)
+
+renderList (Block width lns : xs) = do
+ st <- get
+ let oldPref = prefix st
+ case column st - length oldPref of
+ n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' }
+ _ -> return ()
+ renderDoc $ blockToDoc width lns
+ modify $ \s -> s{ prefix = oldPref }
+ renderList xs
+
+mergeBlocks :: Bool -> D -> D -> D
+mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) =
+ Block (w1 + w2 + if addSpace then 1 else 0) $
+ zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties)
+ where empties = replicate (abs $ length lns1 - length lns2) ""
+ pad n s = s ++ replicate (n - length s) ' '
+ sp "" = ""
+ sp xs = if addSpace then (' ' : xs) else xs
+mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!"
+
+blockToDoc :: Int -> [String] -> Doc
+blockToDoc _ lns = text $ intercalate "\n" lns
+
+offsetOf :: D -> Int
+offsetOf (Text o _) = o
+offsetOf (Block w _) = w
+offsetOf BreakingSpace = 1
+offsetOf _ = 0
+
+-- | A literal string.
+text :: String -> Doc
+text = Doc . toChunks
+ where toChunks :: String -> DList D
+ toChunks [] = mempty
+ toChunks s = case break (=='\n') s of
+ ([], _:ys) -> NewLine `cons` toChunks ys
+ (xs, _:ys) -> Text (length xs) xs `cons`
+ NewLine `cons` toChunks ys
+ (xs, []) -> singleton $ Text (length xs) xs
+
+-- | A character.
+char :: Char -> Doc
+char c = text [c]
+
+-- | A breaking (reflowable) space.
+space :: Doc
+space = Doc $ singleton BreakingSpace
+
+-- | A carriage return. Does nothing if we're at the beginning of
+-- a line; otherwise inserts a newline.
+cr :: Doc
+cr = Doc $ singleton CarriageReturn
+
+-- | Inserts a blank line unless one exists already.
+-- (@blankline <> blankline@ has the same effect as @blankline@.
+-- If you want multiple blank lines, use @text "\\n\\n"@.
+blankline :: Doc
+blankline = Doc $ singleton BlankLine
+
+-- | Uses the specified string as a prefix for every line of
+-- the inside document (except the first, if not at the beginning
+-- of the line).
+prefixed :: String -> Doc -> Doc
+prefixed pref doc = Doc $ singleton $ Prefixed pref doc
+
+-- | Makes a 'Doc' flush against the left margin.
+flush :: Doc -> Doc
+flush doc = Doc $ singleton $ Flush doc
+
+-- | Indents a 'Doc' by the specified number of spaces.
+nest :: Int -> Doc -> Doc
+nest ind = prefixed (replicate ind ' ')
+
+-- | A hanging indent. @hang ind start doc@ prints @start@,
+-- then @doc@, leaving an indent of @ind@ spaces on every
+-- line but the first.
+hang :: Int -> Doc -> Doc -> Doc
+hang ind start doc = start <> nest ind doc
+
+-- | Makes a 'Doc' non-reflowable.
+nowrap :: Doc -> Doc
+nowrap doc = Doc $ fromList $ map replaceSpace $ toList $ unDoc doc
+ where replaceSpace BreakingSpace = Text 1 " "
+ replaceSpace x = x
+
+-- | Returns the width of a 'Doc'.
+offset :: Doc -> Int
+offset d = case map length . lines . render Nothing $ d of
+ [] -> 0
+ os -> maximum os
+
+block :: (String -> String) -> Int -> Doc -> Doc
+block filler width = Doc . singleton . Block width .
+ map filler . chop width . render (Just width)
+
+-- | @lblock n d@ is a block of width @n@ characters, with
+-- text derived from @d@ and aligned to the left.
+lblock :: Int -> Doc -> Doc
+lblock = block id
+
+-- | Like 'lblock' but aligned to the right.
+rblock :: Int -> Doc -> Doc
+rblock w = block (\s -> replicate (w - length s) ' ' ++ s) w
+
+-- | Like 'lblock' but aligned centered.
+cblock :: Int -> Doc -> Doc
+cblock w = block (\s -> replicate ((w - length s) `div` 2) ' ' ++ s) w
+
+-- | Returns the height of a block or other 'Doc'.
+height :: Doc -> Int
+height = length . lines . render Nothing
+
+chop :: Int -> String -> [String]
+chop _ [] = []
+chop n cs = case break (=='\n') cs of
+ (xs, ys) -> if len <= n
+ then case ys of
+ [] -> [xs]
+ (_:[]) -> [xs, ""]
+ (_:zs) -> xs : chop n zs
+ else take n xs : chop n (drop n xs ++ ys)
+ where len = length xs
+
+-- | Encloses a 'Doc' inside a start and end 'Doc'.
+inside :: Doc -> Doc -> Doc -> Doc
+inside start end contents =
+ start <> contents <> end
+
+-- | Puts a 'Doc' in curly braces.
+braces :: Doc -> Doc
+braces = inside (char '{') (char '}')
+
+-- | Puts a 'Doc' in square brackets.
+brackets :: Doc -> Doc
+brackets = inside (char '[') (char ']')
+
+-- | Puts a 'Doc' in parentheses.
+parens :: Doc -> Doc
+parens = inside (char '(') (char ')')
+
+-- | Wraps a 'Doc' in single quotes.
+quotes :: Doc -> Doc
+quotes = inside (char '\'') (char '\'')
+
+-- | Wraps a 'Doc' in double quotes.
+doubleQuotes :: Doc -> Doc
+doubleQuotes = inside (char '"') (char '"')
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f47309d3f..18e3113d3 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -27,43 +27,397 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of HTML to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.HTML (
- readHtml,
- rawHtmlInline,
- rawHtmlBlock,
- anyHtmlBlockTag,
- anyHtmlInlineTag,
- anyHtmlTag,
- anyHtmlEndTag,
- htmlEndTag,
- extractTagType,
- htmlBlockElement,
- htmlComment,
- unsanitaryURI
+module Text.Pandoc.Readers.HTML ( readHtml
+ , htmlTag
+ , htmlInBalanced
+ , isInlineTag
+ , isBlockTag
+ , isTextTag
+ , isCommentTag
) where
import Text.ParserCombinators.Parsec
+import Text.ParserCombinators.Parsec.Pos
+import Text.HTML.TagSoup
+import Text.HTML.TagSoup.Match
import Text.Pandoc.Definition
+import Text.Pandoc.Builder (text, toList)
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
-import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
-import Data.Maybe ( fromMaybe )
-import Data.List ( isPrefixOf, isSuffixOf, intercalate )
-import Data.Char ( toLower, isAlphaNum )
-import Network.URI ( parseURIReference, URI (..) )
-import Control.Monad ( liftM, when )
+import Data.Maybe ( fromMaybe, isJust )
+import Data.List ( intercalate )
+import Data.Char ( isSpace, isDigit )
+import Control.Monad ( liftM, guard )
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: ParserState -- ^ Parser state
-> String -- ^ String to parse (assumes @'\n'@ line endings)
-> Pandoc
-readHtml = readWith parseHtml
+readHtml st inp = Pandoc meta blocks
+ where blocks = readWith parseBody st rest
+ tags = canonicalizeTags $
+ parseTagsOptions parseOptions{ optTagPosition = True } inp
+ hasHeader = any (~== TagOpen "head" []) tags
+ (meta, rest) = if hasHeader
+ then parseHeader tags
+ else (Meta [] [] [], tags)
+
+type TagParser = GenParser (Tag String) ParserState
+
+-- TODO - fix this - not every header has a title tag
+parseHeader :: [Tag String] -> (Meta, [Tag String])
+parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest)
+ where (tit,_) = break (~== TagClose "title") $ drop 1 $
+ dropWhile (\t -> not $ t ~== TagOpen "title" []) tags
+ tit' = concatMap fromTagText $ filter isTagText tit
+ tit'' = normalizeSpaces $ toList $ text tit'
+ rest = drop 1 $ dropWhile (\t -> not $ t ~== TagClose "head" ||
+ t ~== TagOpen "body" []) tags
+
+parseBody :: TagParser [Block]
+parseBody = liftM concat $ manyTill block eof
+
+block :: TagParser [Block]
+block = choice
+ [ pPara
+ , pHeader
+ , pBlockQuote
+ , pCodeBlock
+ , pList
+ , pHrule
+ , pSimpleTable
+ , pPlain
+ , pRawHtmlBlock
+ ]
+
+renderTags' :: [Tag String] -> String
+renderTags' = renderTagsOptions
+ renderOptions{ optMinimize = (`elem` ["hr","br","img"]) }
+
+pList :: TagParser [Block]
+pList = pBulletList <|> pOrderedList <|> pDefinitionList
+
+pBulletList :: TagParser [Block]
+pBulletList = try $ do
+ pSatisfy (~== TagOpen "ul" [])
+ let nonItem = pSatisfy (\t ->
+ not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
+ not (t ~== TagClose "ul"))
+ -- note: if they have an <ol> or <ul> not in scope of a <li>,
+ -- treat it as a list item, though it's not valid xhtml...
+ skipMany nonItem
+ items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul")
+ return [BulletList items]
+
+pOrderedList :: TagParser [Block]
+pOrderedList = try $ do
+ TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
+ st <- getState
+ let (start, style) = if stateStrict st
+ then (1, DefaultStyle)
+ else (sta', sty')
+ where sta = fromMaybe "1" $
+ lookup "start" attribs
+ sta' = if all isDigit sta
+ then read sta
+ else 1
+ sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
+ let nonItem = pSatisfy (\t ->
+ not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
+ not (t ~== TagClose "ol"))
+ -- note: if they have an <ol> or <ul> not in scope of a <li>,
+ -- treat it as a list item, though it's not valid xhtml...
+ skipMany nonItem
+ items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol")
+ return [OrderedList (start, style, DefaultDelim) items]
+
+pDefinitionList :: TagParser [Block]
+pDefinitionList = try $ do
+ pSatisfy (~== TagOpen "dl" [])
+ items <- manyTill pDefListItem (pCloses "dl")
+ return [DefinitionList items]
+
+pDefListItem :: TagParser ([Inline],[[Block]])
+pDefListItem = try $ do
+ let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) &&
+ not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl"))
+ terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline)
+ defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block)
+ skipMany nonItem
+ let term = intercalate [LineBreak] terms
+ return (term, defs)
+
+pRawTag :: TagParser String
+pRawTag = do
+ tag <- pAnyTag
+ let ignorable x = x `elem` ["html","head","body"]
+ if tagOpen ignorable (const True) tag || tagClose ignorable tag
+ then return []
+ else return $ renderTags' [tag]
+
+pRawHtmlBlock :: TagParser [Block]
+pRawHtmlBlock = do
+ raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag
+ state <- getState
+ if stateParseRaw state && not (null raw)
+ then return [RawBlock "html" raw]
+ else return []
+
+pHtmlBlock :: String -> TagParser String
+pHtmlBlock t = try $ do
+ open <- pSatisfy (~== TagOpen t [])
+ contents <- manyTill pAnyTag (pSatisfy (~== TagClose t))
+ return $ renderTags' $ [open] ++ contents ++ [TagClose t]
+
+pHeader :: TagParser [Block]
+pHeader = try $ do
+ TagOpen tagtype attr <- pSatisfy $
+ tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"])
+ (const True)
+ let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")]
+ let level = read (drop 1 tagtype)
+ contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof)
+ return $ if bodyTitle
+ then [] -- skip a representation of the title in the body
+ else [Header level $ normalizeSpaces contents]
+
+pHrule :: TagParser [Block]
+pHrule = do
+ pSelfClosing (=="hr") (const True)
+ return [HorizontalRule]
+
+pSimpleTable :: TagParser [Block]
+pSimpleTable = try $ do
+ TagOpen _ _ <- pSatisfy (~== TagOpen "table" [])
+ skipMany pBlank
+ head' <- option [] $ pInTags "th" pTd
+ rows <- many1 $ try $
+ skipMany pBlank >> pInTags "tr" pTd
+ skipMany pBlank
+ TagClose _ <- pSatisfy (~== TagClose "table")
+ let cols = maximum $ map length rows
+ let aligns = replicate cols AlignLeft
+ let widths = replicate cols 0
+ return [Table [] aligns widths head' rows]
+
+pTd :: TagParser [TableCell]
+pTd = try $ do
+ skipMany pBlank
+ res <- pInTags "td" pPlain
+ skipMany pBlank
+ return [res]
+
+pBlockQuote :: TagParser [Block]
+pBlockQuote = do
+ contents <- pInTags "blockquote" block
+ return [BlockQuote contents]
+
+pPlain :: TagParser [Block]
+pPlain = do
+ contents <- liftM (normalizeSpaces . concat) $ many1 inline
+ if null contents
+ then return []
+ else return [Plain contents]
+
+pPara :: TagParser [Block]
+pPara = do
+ contents <- pInTags "p" inline
+ return [Para $ normalizeSpaces contents]
+
+pCodeBlock :: TagParser [Block]
+pCodeBlock = try $ do
+ TagOpen _ attr <- pSatisfy (~== TagOpen "pre" [])
+ contents <- manyTill pAnyTag (pCloses "pre" <|> eof)
+ let rawText = concatMap fromTagText $ filter isTagText contents
+ -- drop leading newline if any
+ let result' = case rawText of
+ '\n':xs -> xs
+ _ -> rawText
+ -- drop trailing newline if any
+ let result = case reverse result' of
+ '\n':_ -> init result'
+ _ -> result'
+ let attribsId = fromMaybe "" $ lookup "id" attr
+ let attribsClasses = words $ fromMaybe "" $ lookup "class" attr
+ let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
+ st <- getState
+ let attribs = if stateStrict st
+ then ("",[],[])
+ else (attribsId, attribsClasses, attribsKV)
+ return [CodeBlock attribs result]
+
+inline :: TagParser [Inline]
+inline = choice
+ [ pTagText
+ , pEmph
+ , pStrong
+ , pSuperscript
+ , pSubscript
+ , pStrikeout
+ , pLineBreak
+ , pLink
+ , pImage
+ , pCode
+ , pRawHtmlInline
+ ]
+
+pLocation :: TagParser ()
+pLocation = do
+ (TagPosition r c) <- pSat isTagPosition
+ setPosition $ newPos "input" r c
+
+pSat :: (Tag String -> Bool) -> TagParser (Tag String)
+pSat f = do
+ pos <- getPosition
+ token show (const pos) (\x -> if f x then Just x else Nothing)
+
+pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String)
+pSatisfy f = try $ optional pLocation >> pSat f
+
+pAnyTag :: TagParser (Tag String)
+pAnyTag = pSatisfy (const True)
+
+pSelfClosing :: (String -> Bool) -> ([Attribute String] -> Bool)
+ -> TagParser (Tag String)
+pSelfClosing f g = do
+ open <- pSatisfy (tagOpen f g)
+ optional $ pSatisfy (tagClose f)
+ return open
+
+pEmph :: TagParser [Inline]
+pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph
+
+pStrong :: TagParser [Inline]
+pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong
+
+pSuperscript :: TagParser [Inline]
+pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript
+
+pSubscript :: TagParser [Inline]
+pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript
+
+pStrikeout :: TagParser [Inline]
+pStrikeout = do
+ failIfStrict
+ pInlinesInTags "s" Strikeout <|>
+ pInlinesInTags "strike" Strikeout <|>
+ pInlinesInTags "del" Strikeout <|>
+ try (do pSatisfy (~== TagOpen "span" [("class","strikeout")])
+ contents <- liftM concat $ manyTill inline (pCloses "span")
+ return [Strikeout contents])
+
+pLineBreak :: TagParser [Inline]
+pLineBreak = do
+ pSelfClosing (=="br") (const True)
+ return [LineBreak]
+
+pLink :: TagParser [Inline]
+pLink = try $ do
+ tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href"))
+ let url = fromAttrib "href" tag
+ let title = fromAttrib "title" tag
+ lab <- liftM concat $ manyTill inline (pCloses "a")
+ return [Link (normalizeSpaces lab) (escapeURI url, title)]
+
+pImage :: TagParser [Inline]
+pImage = do
+ tag <- pSelfClosing (=="img") (isJust . lookup "src")
+ let url = fromAttrib "src" tag
+ let title = fromAttrib "title" tag
+ let alt = fromAttrib "alt" tag
+ return [Image (toList $ text alt) (escapeURI url, title)]
+
+pCode :: TagParser [Inline]
+pCode = try $ do
+ (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
+ result <- manyTill pAnyTag (pCloses open)
+ let ident = fromMaybe "" $ lookup "id" attr
+ let classes = words $ fromMaybe [] $ lookup "class" attr
+ let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr
+ return [Code (ident,classes,rest)
+ $ intercalate " " $ lines $ innerText result]
+
+pRawHtmlInline :: TagParser [Inline]
+pRawHtmlInline = do
+ result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag
+ state <- getState
+ if stateParseRaw state
+ then return [RawInline "html" $ renderTags' [result]]
+ else return []
+
+pInlinesInTags :: String -> ([Inline] -> Inline)
+ -> TagParser [Inline]
+pInlinesInTags tagtype f = do
+ contents <- pInTags tagtype inline
+ return [f contents]
+
+pInTags :: String -> TagParser [a]
+ -> TagParser [a]
+pInTags tagtype parser = try $ do
+ pSatisfy (~== TagOpen tagtype [])
+ liftM concat $ manyTill parser (pCloses tagtype <|> eof)
+
+pCloses :: String -> TagParser ()
+pCloses tagtype = try $ do
+ t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag
+ case t of
+ (TagClose t') | t' == tagtype -> pAnyTag >> return ()
+ (TagOpen t' _) | t' `closes` tagtype -> return ()
+ (TagClose "ul") | tagtype == "li" -> return ()
+ (TagClose "ol") | tagtype == "li" -> return ()
+ (TagClose "dl") | tagtype == "li" -> return ()
+ _ -> pzero
+
+pTagText :: TagParser [Inline]
+pTagText = try $ do
+ (TagText str) <- pSatisfy isTagText
+ st <- getState
+ case runParser (many pTagContents) st "text" str of
+ Left _ -> fail $ "Could not parse `" ++ str ++ "'"
+ Right result -> return result
+
+pBlank :: TagParser ()
+pBlank = try $ do
+ (TagText str) <- pSatisfy isTagText
+ guard $ all isSpace str
+
+pTagContents :: GenParser Char ParserState Inline
+pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol
+
+pStr :: GenParser Char ParserState Inline
+pStr = liftM Str $ many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c)
+
+isSpecial :: Char -> Bool
+isSpecial '"' = True
+isSpecial '\'' = True
+isSpecial '.' = True
+isSpecial '-' = True
+isSpecial '\8216' = True
+isSpecial '\8217' = True
+isSpecial '\8220' = True
+isSpecial '\8221' = True
+isSpecial _ = False
+
+pSymbol :: GenParser Char ParserState Inline
+pSymbol = satisfy isSpecial >>= return . Str . (:[])
+
+pSpace :: GenParser Char ParserState Inline
+pSpace = many1 (satisfy isSpace) >> return Space
--
-- Constants
--
-eitherBlockOrInline :: [[Char]]
+eitherBlockOrInline :: [String]
eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
"map", "area", "object"]
@@ -76,57 +430,41 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big",
"textarea", "tt", "u", "var"]
-}
-blockHtmlTags :: [[Char]]
+blockHtmlTags :: [String]
blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div",
"dl", "fieldset", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "head", "hr", "html", "isindex", "menu", "noframes",
- "noscript", "ol", "p", "pre", "table", "ul", "dd",
+ "h5", "h6", "head", "hr", "html", "isindex", "menu",
+ "noframes", "noscript", "ol", "p", "pre", "table", "ul", "dd",
"dt", "frameset", "li", "tbody", "td", "tfoot",
"th", "thead", "tr", "script", "style"]
-sanitaryTags :: [[Char]]
-sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big",
- "blockquote", "br", "button", "caption", "center",
- "cite", "code", "col", "colgroup", "dd", "del", "dfn",
- "dir", "div", "dl", "dt", "em", "fieldset", "font",
- "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
- "i", "img", "input", "ins", "kbd", "label", "legend",
- "li", "map", "menu", "ol", "optgroup", "option", "p",
- "pre", "q", "s", "samp", "select", "small", "span",
- "strike", "strong", "sub", "sup", "table", "tbody",
- "td", "textarea", "tfoot", "th", "thead", "tr", "tt",
- "u", "ul", "var"]
-
-sanitaryAttributes :: [[Char]]
-sanitaryAttributes = ["abbr", "accept", "accept-charset",
- "accesskey", "action", "align", "alt", "axis",
- "border", "cellpadding", "cellspacing", "char",
- "charoff", "charset", "checked", "cite", "class",
- "clear", "cols", "colspan", "color", "compact",
- "coords", "datetime", "dir", "disabled",
- "enctype", "for", "frame", "headers", "height",
- "href", "hreflang", "hspace", "id", "ismap",
- "label", "lang", "longdesc", "maxlength", "media",
- "method", "multiple", "name", "nohref", "noshade",
- "nowrap", "prompt", "readonly", "rel", "rev",
- "rows", "rowspan", "rules", "scope", "selected",
- "shape", "size", "span", "src", "start",
- "summary", "tabindex", "target", "title", "type",
- "usemap", "valign", "value", "vspace", "width"]
+isInlineTag :: Tag String -> Bool
+isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t ||
+ tagClose (`notElem` blockHtmlTags) t ||
+ tagComment (const True) t
+
+isBlockTag :: Tag String -> Bool
+isBlockTag t = tagOpen (`elem` blocktags) (const True) t ||
+ tagClose (`elem` blocktags) t ||
+ tagComment (const True) t
+ where blocktags = blockHtmlTags ++ eitherBlockOrInline
+
+isTextTag :: Tag String -> Bool
+isTextTag = tagText (const True)
+
+isCommentTag :: Tag String -> Bool
+isCommentTag = tagComment (const True)
-- taken from HXT and extended
closes :: String -> String -> Bool
-"EOF" `closes` _ = True
_ `closes` "body" = False
_ `closes` "html" = False
"a" `closes` "a" = True
"li" `closes` "li" = True
"th" `closes` t | t `elem` ["th","td"] = True
-"td" `closes` t | t `elem` ["th","td"] = True
"tr" `closes` t | t `elem` ["th","td","tr"] = True
"dt" `closes` t | t `elem` ["dt","dd"] = True
-"dd" `closes` t | t `elem` ["dt","dd"] = True
"hr" `closes` "p" = True
"p" `closes` "p" = True
"meta" `closes` "meta" = True
@@ -148,627 +486,34 @@ t1 `closes` t2 |
t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True
_ `closes` _ = False
---
--- HTML utility functions
---
-
--- | Returns @True@ if sanitization is specified and the specified tag is
--- not on the sanitized tag list.
-unsanitaryTag :: [Char]
- -> GenParser tok ParserState Bool
-unsanitaryTag tag = do
- st <- getState
- return $ stateSanitizeHTML st && tag `notElem` sanitaryTags
-
--- | returns @True@ if sanitization is specified and the specified attribute
--- is not on the sanitized attribute list.
-unsanitaryAttribute :: ([Char], String, t)
- -> GenParser tok ParserState Bool
-unsanitaryAttribute (attr, val, _) = do
- st <- getState
- return $ stateSanitizeHTML st &&
- (attr `notElem` sanitaryAttributes ||
- (attr `elem` ["href","src"] && unsanitaryURI val))
-
--- | Returns @True@ if the specified URI is potentially a security risk.
-unsanitaryURI :: String -> Bool
-unsanitaryURI u =
- let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:",
- "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
- "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
- "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
- "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
- "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
- "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
- "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
- "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
- "snews:", "webcal:", "ymsgr:"]
- in case parseURIReference (escapeURI u) of
- Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes
- Nothing -> True
-
--- | Read blocks until end tag.
-blocksTilEnd :: String -> GenParser Char ParserState [Block]
-blocksTilEnd tag = do
- blocks <- manyTill (block >>~ spaces) (htmlEndTag tag)
- return $ filter (/= Null) blocks
-
--- | Read inlines until end tag.
-inlinesTilEnd :: String -> GenParser Char ParserState [Inline]
-inlinesTilEnd tag = manyTill inline (htmlEndTag tag)
-
--- | Parse blocks between open and close tag.
-blocksIn :: String -> GenParser Char ParserState [Block]
-blocksIn tag = try $ htmlOpenTag tag >> spaces >> blocksTilEnd tag
-
--- | Parse inlines between open and close tag.
-inlinesIn :: String -> GenParser Char ParserState [Inline]
-inlinesIn tag = try $ htmlOpenTag tag >> spaces >> inlinesTilEnd tag
-
--- | Extract type from a tag: e.g. @br@ from @\<br\>@
-extractTagType :: String -> String
-extractTagType ('<':rest) =
- let isSpaceOrSlash c = c `elem` "/ \n\t" in
- map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest
-extractTagType _ = ""
-
--- Parse any HTML tag (opening or self-closing) and return tag type
-anyOpener :: GenParser Char ParserState [Char]
-anyOpener = try $ do
- char '<'
- spaces
- tag <- many1 alphaNum
- skipMany htmlAttribute
- spaces
- option "" (string "/")
- spaces
- char '>'
- return $ map toLower tag
-
--- | Parse any HTML tag (opening or self-closing) and return text of tag
-anyHtmlTag :: GenParser Char ParserState [Char]
-anyHtmlTag = try $ do
- char '<'
- spaces
- tag <- many1 alphaNum
- attribs <- many htmlAttribute
- spaces
- ender <- option "" (string "/")
- let ender' = if null ender then "" else " /"
- spaces
- char '>'
- let result = "<" ++ tag ++
- concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">"
- unsanitary <- unsanitaryTag tag
- if unsanitary
- then return $ "<!-- unsafe HTML removed -->"
- else return result
-
-anyHtmlEndTag :: GenParser Char ParserState [Char]
-anyHtmlEndTag = try $ do
- char '<'
- spaces
- char '/'
- spaces
- tag <- many1 alphaNum
- spaces
- char '>'
- let result = "</" ++ tag ++ ">"
- unsanitary <- unsanitaryTag tag
- if unsanitary
- then return $ "<!-- unsafe HTML removed -->"
- else return result
-
-htmlTag :: Bool
- -> String
- -> GenParser Char ParserState (String, [(String, String)])
-htmlTag selfClosing tag = try $ do
- char '<'
- spaces
- stringAnyCase tag
- attribs <- many htmlAttribute
- spaces
- -- note: we want to handle both HTML and XHTML,
- -- so we don't require the /
- when selfClosing $ optional $ char '/' >> spaces
- char '>'
- return (tag, (map (\(name, content, _) -> (name, content)) attribs))
-
-htmlOpenTag :: String
- -> GenParser Char ParserState (String, [(String, String)])
-htmlOpenTag = htmlTag False
-
-htmlCloseTag :: String
- -> GenParser Char ParserState (String, [(String, String)])
-htmlCloseTag = htmlTag False . ('/':)
-
-htmlSelfClosingTag :: String
- -> GenParser Char ParserState (String, [(String, String)])
-htmlSelfClosingTag = htmlTag True
-
--- parses a quoted html attribute value
-quoted :: Char -> GenParser Char st (String, String)
-quoted quoteChar = do
- result <- between (char quoteChar) (char quoteChar)
- (many (noneOf [quoteChar]))
- return (result, [quoteChar])
-
-nullAttribute :: ([Char], [Char], [Char])
-nullAttribute = ("", "", "")
-
-htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char])
-htmlAttribute = do
- attr <- htmlRegularAttribute <|> htmlMinimizedAttribute
- unsanitary <- unsanitaryAttribute attr
- if unsanitary
- then return nullAttribute
- else return attr
-
--- minimized boolean attribute
-htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char])
-htmlMinimizedAttribute = try $ do
- many1 space
- name <- many1 (choice [letter, oneOf ".-_:"])
- return (name, name, name)
-
-htmlRegularAttribute :: GenParser Char st ([Char], [Char], [Char])
-htmlRegularAttribute = try $ do
- many1 space
- name <- many1 (choice [letter, oneOf ".-_:"])
- spaces
- char '='
- spaces
- (content, quoteStr) <- choice [ (quoted '\''),
- (quoted '"'),
- (do
- a <- many (noneOf " \t\n\r\"'<>")
- return (a,"")) ]
- return (name, content,
- (name ++ "=" ++ quoteStr ++ content ++ quoteStr))
-
--- | Parse an end tag of type 'tag'
-htmlEndTag :: [Char] -> GenParser Char ParserState [Char]
-htmlEndTag tag = try $ do
- closedByNext <- lookAhead $ option False $ liftM (`closes` tag) $
- anyOpener <|> (eof >> return "EOF")
- if closedByNext
- then return ""
- else do char '<'
- spaces
- char '/'
- spaces
- stringAnyCase tag
- spaces
- char '>'
- return $ "</" ++ tag ++ ">"
-
--- | Returns @True@ if the tag is (or can be) a block tag.
-isBlock :: String -> Bool
-isBlock tag = (extractTagType tag) `elem` (blockHtmlTags ++ eitherBlockOrInline)
-
-anyHtmlBlockTag :: GenParser Char ParserState [Char]
-anyHtmlBlockTag = try $ do
- tag <- anyHtmlTag <|> anyHtmlEndTag
- if isBlock tag then return tag else fail "not a block tag"
-
-anyHtmlInlineTag :: GenParser Char ParserState [Char]
-anyHtmlInlineTag = try $ do
- tag <- anyHtmlTag <|> anyHtmlEndTag
- if not (isBlock tag) then return tag else fail "not an inline tag"
-
--- | Parses material between script tags.
--- Scripts must be treated differently, because they can contain '<>' etc.
-htmlScript :: GenParser Char ParserState [Char]
-htmlScript = try $ do
- lookAhead $ htmlOpenTag "script"
- open <- anyHtmlTag
- rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
- st <- getState
- if stateSanitizeHTML st && not ("script" `elem` sanitaryTags)
- then return "<!-- unsafe HTML removed -->"
- else return $ open ++ rest ++ "</script>"
-
-scriptChunk :: GenParser Char ParserState [Char]
-scriptChunk = jsComment <|> jsString <|> jsChars
- where jsComment = jsEndlineComment <|> jsMultilineComment
- jsString = jsSingleQuoteString <|> jsDoubleQuoteString
- jsChars = many1 (noneOf "<\"'*/") <|> count 1 anyChar
- jsEndlineComment = try $ do
- string "//"
- res <- manyTill anyChar newline
- return ("//" ++ res)
- jsMultilineComment = try $ do
- string "/*"
- res <- manyTill anyChar (try $ string "*/")
- return ("/*" ++ res ++ "*/")
- jsSingleQuoteString = stringwith '\''
- jsDoubleQuoteString = stringwith '"'
- charWithEsc escapable = try $
- (try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x])
- <|> count 1 anyChar
- stringwith c = try $ do
- char c
- res <- liftM concat $ manyTill (charWithEsc [c]) (char c)
- return (c : (res ++ [c]))
-
--- | Parses material between style tags.
--- Style tags must be treated differently, because they can contain CSS
-htmlStyle :: GenParser Char ParserState [Char]
-htmlStyle = try $ do
- lookAhead $ htmlOpenTag "style"
- open <- anyHtmlTag
- rest <- manyTill anyChar (htmlEndTag "style")
- st <- getState
- if stateSanitizeHTML st && not ("style" `elem` sanitaryTags)
- then return "<!-- unsafe HTML removed -->"
- else return $ open ++ rest ++ "</style>"
-
-htmlBlockElement :: GenParser Char ParserState [Char]
-htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ]
-
-rawHtmlBlock :: GenParser Char ParserState Block
-rawHtmlBlock = try $ do
- body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag
- state <- getState
- if stateParseRaw state then return (RawHtml body) else return Null
-
--- This is a block whose contents should be passed through verbatim, not interpreted.
-rawVerbatimBlock :: GenParser Char ParserState [Char]
-rawVerbatimBlock = try $ do
- start <- anyHtmlBlockTag
- let tagtype = extractTagType start
- if tagtype `elem` ["pre"]
- then do
- contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar)
- end <- htmlEndTag tagtype
- return $ start ++ contents ++ end
- else fail "Not a verbatim block"
-
--- We don't want to parse </body> or </html> as raw HTML, since these
--- are handled in parseHtml.
-rawHtmlBlock' :: GenParser Char ParserState Block
-rawHtmlBlock' = do notFollowedBy' (htmlCloseTag "body" <|>
- htmlCloseTag "html")
- rawHtmlBlock
-
--- | Parses an HTML comment.
-htmlComment :: GenParser Char st [Char]
-htmlComment = try $ do
- string "<!--"
- comment <- many $ noneOf "-"
- <|> try (char '-' >>~ notFollowedBy (try (char '-' >> char '>')))
- string "-->"
- return $ "<!--" ++ comment ++ "-->"
-
---
--- parsing documents
---
-
-xmlDec :: GenParser Char st [Char]
-xmlDec = try $ do
- string "<?"
- rest <- manyTill anyChar (char '>')
- return $ "<?" ++ rest ++ ">"
-
-definition :: GenParser Char st [Char]
-definition = try $ do
- string "<!"
- rest <- manyTill anyChar (char '>')
- return $ "<!" ++ rest ++ ">"
-
-nonTitleNonHead :: GenParser Char ParserState Char
-nonTitleNonHead = try $ do
- notFollowedBy $ (htmlOpenTag "title" >> return ' ') <|>
- (htmlEndTag "head" >> return ' ')
- (rawHtmlBlock >> return ' ') <|> anyChar
-
-parseTitle :: GenParser Char ParserState [Inline]
-parseTitle = try $ do
- (tag, _) <- htmlOpenTag "title"
- contents <- inlinesTilEnd tag
- spaces
- return contents
-
--- parse header and return meta-information (for now, just title)
-parseHead :: GenParser Char ParserState Meta
-parseHead = try $ do
- htmlOpenTag "head"
- spaces
- skipMany nonTitleNonHead
- contents <- option [] parseTitle
- skipMany nonTitleNonHead
- htmlEndTag "head"
- return $ Meta contents [] []
-
--- h1 class="title" representation of title in body
-bodyTitle :: GenParser Char ParserState [Inline]
-bodyTitle = try $ do
- (_, attribs) <- htmlOpenTag "h1"
- case (extractAttribute "class" attribs) of
- Just "title" -> return ""
- _ -> fail "not title"
- inlinesTilEnd "h1"
-
-endOfDoc :: GenParser Char ParserState ()
-endOfDoc = try $ do
- spaces
- optional (htmlEndTag "body")
- spaces
- optional (htmlEndTag "html" >> many anyChar) -- ignore stuff after </html>
- eof
-
-parseHtml :: GenParser Char ParserState Pandoc
-parseHtml = do
- sepEndBy (choice [xmlDec, definition, htmlComment]) spaces
- spaces
- optional $ htmlOpenTag "html"
- spaces
- meta <- option (Meta [] [] []) parseHead
- spaces
- optional $ htmlOpenTag "body"
- spaces
- optional bodyTitle -- skip title in body, because it's represented in meta
- blocks <- parseBlocks
- endOfDoc
- return $ Pandoc meta blocks
-
---
--- parsing blocks
---
-
-parseBlocks :: GenParser Char ParserState [Block]
-parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null))
-
-block :: GenParser Char ParserState Block
-block = choice [ codeBlock
- , header
- , hrule
- , list
- , blockQuote
- , para
- , plain
- , rawHtmlBlock'
- , notFollowedBy' endOfDoc >> char '<' >> return Null
- ] <?> "block"
-
---
--- header blocks
---
-
-header :: GenParser Char ParserState Block
-header = choice (map headerLevel (enumFromTo 1 5)) <?> "header"
-
-headerLevel :: Int -> GenParser Char ParserState Block
-headerLevel n = try $ do
- let level = "h" ++ show n
- htmlOpenTag level
- contents <- inlinesTilEnd level
- return $ Header n (normalizeSpaces contents)
-
---
--- hrule block
---
-
-hrule :: GenParser Char ParserState Block
-hrule = try $ do
- (_, attribs) <- htmlSelfClosingTag "hr"
- state <- getState
- if not (null attribs) && stateParseRaw state
- then unexpected "attributes in hr" -- parse as raw in this case
- else return HorizontalRule
-
---
--- code blocks
---
-
--- Note: HTML tags in code blocks (e.g. for syntax highlighting) are
--- skipped, because they are not portable to output formats other than HTML.
-codeBlock :: GenParser Char ParserState Block
-codeBlock = try $ do
- htmlOpenTag "pre"
- result <- manyTill
- (many1 (satisfy (/= '<')) <|>
- ((anyHtmlTag <|> anyHtmlEndTag) >> return ""))
- (htmlEndTag "pre")
- let result' = concat result
- -- drop leading newline if any
- let result'' = if "\n" `isPrefixOf` result'
- then drop 1 result'
- else result'
- -- drop trailing newline if any
- let result''' = if "\n" `isSuffixOf` result''
- then init result''
- else result''
- return $ CodeBlock ("",[],[]) $ decodeCharacterReferences result'''
-
---
--- block quotes
---
-
-blockQuote :: GenParser Char ParserState Block
-blockQuote = try $ htmlOpenTag "blockquote" >> spaces >>
- blocksTilEnd "blockquote" >>= (return . BlockQuote)
-
---
--- list blocks
---
-
-list :: GenParser Char ParserState Block
-list = choice [ bulletList, orderedList, definitionList ] <?> "list"
-
-orderedList :: GenParser Char ParserState Block
-orderedList = try $ do
- (_, attribs) <- htmlOpenTag "ol"
- (start, style) <- option (1, DefaultStyle) $
- do failIfStrict
- let sta = fromMaybe "1" $
- lookup "start" attribs
- let sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- let sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
- return (read sta, sty')
- spaces
- -- note: if they have an <ol> or <ul> not in scope of a <li>,
- -- treat it as a list item, though it's not valid xhtml...
- items <- sepEndBy1 (blocksIn "li" <|> liftM (:[]) list) spaces
- htmlEndTag "ol"
- return $ OrderedList (start, style, DefaultDelim) items
-
-bulletList :: GenParser Char ParserState Block
-bulletList = try $ do
- htmlOpenTag "ul"
- spaces
- -- note: if they have an <ol> or <ul> not in scope of a <li>,
- -- treat it as a list item, though it's not valid xhtml...
- items <- sepEndBy1 (blocksIn "li" <|> liftM (:[]) list) spaces
- htmlEndTag "ul"
- return $ BulletList items
-
-definitionList :: GenParser Char ParserState Block
-definitionList = try $ do
- failIfStrict -- def lists not part of standard markdown
- htmlOpenTag "dl"
- spaces
- items <- sepEndBy1 definitionListItem spaces
- htmlEndTag "dl"
- return $ DefinitionList items
-
-definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
-definitionListItem = try $ do
- terms <- sepEndBy1 (inlinesIn "dt") spaces
- defs <- sepEndBy1 (blocksIn "dd") spaces
- let term = intercalate [LineBreak] terms
- return (term, defs)
-
---
--- paragraph block
---
-
-para :: GenParser Char ParserState Block
-para = try $ htmlOpenTag "p" >> inlinesTilEnd "p" >>=
- return . Para . normalizeSpaces
-
---
--- plain block
---
-
-plain :: GenParser Char ParserState Block
-plain = many1 inline >>= return . Plain . normalizeSpaces
-
---
--- inline
---
-
-inline :: GenParser Char ParserState Inline
-inline = choice [ charRef
- , strong
- , emph
- , superscript
- , subscript
- , strikeout
- , spanStrikeout
- , code
- , str
- , linebreak
- , whitespace
- , link
- , image
- , rawHtmlInline
- , char '&' >> return (Str "&") -- common HTML error
- ] <?> "inline"
-
-code :: GenParser Char ParserState Inline
-code = try $ do
- result <- (htmlOpenTag "code" >> manyTill anyChar (htmlEndTag "code"))
- <|> (htmlOpenTag "tt" >> manyTill anyChar (htmlEndTag "tt"))
- -- remove internal line breaks, leading and trailing space,
- -- and decode character references
- return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $
- intercalate " " $ lines result
-
-rawHtmlInline :: GenParser Char ParserState Inline
-rawHtmlInline = do
- result <- anyHtmlInlineTag <|> htmlComment
- state <- getState
- if stateParseRaw state then return (HtmlInline result) else return (Str "")
-
-betweenTags :: [Char] -> GenParser Char ParserState [Inline]
-betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>=
- return . normalizeSpaces
-
-emph :: GenParser Char ParserState Inline
-emph = (betweenTags "em" <|> betweenTags "i") >>= return . Emph
-
-strong :: GenParser Char ParserState Inline
-strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong
-
-superscript :: GenParser Char ParserState Inline
-superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript
-
-subscript :: GenParser Char ParserState Inline
-subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript
-
-strikeout :: GenParser Char ParserState Inline
-strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>=
- return . Strikeout
-
-spanStrikeout :: GenParser Char ParserState Inline
-spanStrikeout = try $ do
- failIfStrict -- strict markdown has no strikeout, so treat as raw HTML
- (_, attributes) <- htmlOpenTag "span"
- result <- case (extractAttribute "class" attributes) of
- Just "strikeout" -> inlinesTilEnd "span"
- _ -> fail "not a strikeout"
- return $ Strikeout result
-
-whitespace :: GenParser Char st Inline
-whitespace = many1 space >> return Space
-
--- hard line break
-linebreak :: GenParser Char ParserState Inline
-linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak
-
-str :: GenParser Char st Inline
-str = many1 (noneOf "< \t\n&") >>= return . Str
-
---
--- links and images
---
-
--- extract contents of attribute (attribute names are case-insensitive)
-extractAttribute :: [Char] -> [([Char], String)] -> Maybe String
-extractAttribute _ [] = Nothing
-extractAttribute name ((attrName, contents):rest) =
- let name' = map toLower name
- attrName' = map toLower attrName
- in if attrName' == name'
- then Just (decodeCharacterReferences contents)
- else extractAttribute name rest
-
-link :: GenParser Char ParserState Inline
-link = try $ do
- (_, attributes) <- htmlOpenTag "a"
- url <- case (extractAttribute "href" attributes) of
- Just url -> return url
- Nothing -> fail "no href"
- let title = fromMaybe "" $ extractAttribute "title" attributes
- lab <- inlinesTilEnd "a"
- return $ Link (normalizeSpaces lab) (escapeURI url, title)
-
-image :: GenParser Char ParserState Inline
-image = try $ do
- (_, attributes) <- htmlSelfClosingTag "img"
- url <- case (extractAttribute "src" attributes) of
- Just url -> return url
- Nothing -> fail "no src"
- let title = fromMaybe "" $ extractAttribute "title" attributes
- let alt = fromMaybe "" (extractAttribute "alt" attributes)
- return $ Image [Str alt] (escapeURI url, title)
-
+--- parsers for use in markdown, textile readers
+
+-- | Matches a stretch of HTML in balanced tags.
+htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String
+htmlInBalanced f = try $ do
+ (TagOpen t _, tag) <- htmlTag f
+ guard $ '/' `notElem` tag -- not a self-closing tag
+ let nonTagChunk = many1 $ satisfy (/= '<')
+ let stopper = htmlTag (~== TagClose t)
+ let anytag = liftM snd $ htmlTag (const True)
+ contents <- many $ notFollowedBy' stopper >>
+ (nonTagChunk <|> htmlInBalanced (const True) <|> anytag)
+ endtag <- liftM snd stopper
+ return $ tag ++ concat contents ++ endtag
+
+-- | Matches a tag meeting a certain condition.
+htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String)
+htmlTag f = try $ do
+ lookAhead (char '<')
+ (next : _) <- getInput >>= return . canonicalizeTags . parseTags
+ guard $ f next
+ -- advance the parser
+ case next of
+ TagComment s -> do
+ count (length s + 4) anyChar
+ skipMany (satisfy (/='>'))
+ char '>'
+ return (next, "<!--" ++ s ++ "-->")
+ _ -> do
+ rendered <- manyTill anyChar (char '>')
+ return (next, rendered ++ ">")
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 406809dfc..dca745b56 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -38,9 +38,9 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe )
-import Data.Char ( chr )
-import Data.List ( isPrefixOf, isSuffixOf )
-import Control.Monad ( when )
+import Data.Char ( chr, toUpper )
+import Data.List ( intercalate, isPrefixOf, isSuffixOf )
+import Control.Monad
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: ParserState -- ^ Parser state, including options for parser
@@ -50,7 +50,7 @@ readLaTeX = readWith parseLaTeX
-- characters with special meaning
specialChars :: [Char]
-specialChars = "\\`$%^&_~#{}\n \t|<>'\"-"
+specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-"
--
-- utility functions
@@ -64,7 +64,7 @@ bracketedText openB closeB = do
-- | Returns an option or argument of a LaTeX command.
optOrArg :: GenParser Char st [Char]
-optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']'
+optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']')
-- | True if the string begins with '{'.
isArg :: [Char] -> Bool
@@ -86,14 +86,22 @@ command = do
begin :: [Char] -> GenParser Char st [Char]
begin name = try $ do
- string $ "\\begin{" ++ name ++ "}"
+ string "\\begin"
+ spaces
+ char '{'
+ string name
+ char '}'
optional commandArgs
spaces
return name
end :: [Char] -> GenParser Char st [Char]
end name = try $ do
- string $ "\\end{" ++ name ++ "}"
+ string "\\end"
+ spaces
+ char '{'
+ string name
+ char '}'
return name
-- | Returns a list of block elements containing the contents of an
@@ -103,7 +111,9 @@ environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ s
anyEnvironment :: GenParser Char ParserState Block
anyEnvironment = try $ do
- string "\\begin{"
+ string "\\begin"
+ spaces
+ char '{'
name <- many letter
star <- option "" (string "*") -- some environments have starred variants
char '}'
@@ -119,22 +129,17 @@ anyEnvironment = try $ do
-- | Process LaTeX preamble, extracting metadata.
processLaTeXPreamble :: GenParser Char ParserState ()
-processLaTeXPreamble = try $ manyTill
- (choice [bibliographic, comment, unknownCommand, nullBlock])
- (try (string "\\begin{document}")) >>
- spaces
+processLaTeXPreamble = do
+ try $ string "\\documentclass"
+ skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar
-- | Parse LaTeX and return 'Pandoc'.
parseLaTeX :: GenParser Char ParserState Pandoc
parseLaTeX = do
- optional processLaTeXPreamble -- preamble might not be present (fragment)
- spaces
- blocks <- parseBlocks
spaces
- optional $ try (string "\\end{document}" >> many anyChar)
- -- might not be present (fragment)
- spaces
- eof
+ skipMany $ comment >> spaces
+ blocks <- try (processLaTeXPreamble >> environment "document")
+ <|> (many block >>~ (spaces >> eof))
state <- getState
let blocks' = filter (/= Null) blocks
let title' = stateTitle state
@@ -155,13 +160,16 @@ block = choice [ hrule
, header
, list
, blockQuote
- , comment
+ , simpleTable
+ , commentBlock
+ , macro
, bibliographic
, para
, itemBlock
, unknownEnvironment
, ignore
- , unknownCommand ] <?> "block"
+ , unknownCommand
+ ] <?> "block"
--
-- header blocks
@@ -208,20 +216,77 @@ hrule :: GenParser Char st Block
hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n",
"\\newpage" ] >> spaces >> return HorizontalRule
+-- tables
+
+simpleTable :: GenParser Char ParserState Block
+simpleTable = try $ do
+ string "\\begin"
+ spaces
+ string "{tabular}"
+ spaces
+ aligns <- parseAligns
+ let cols = length aligns
+ optional hline
+ header' <- option [] $ parseTableHeader cols
+ rows <- many (parseTableRow cols >>~ optional hline)
+ spaces
+ end "tabular"
+ spaces
+ let header'' = if null header'
+ then replicate cols []
+ else header'
+ return $ Table [] aligns (replicate cols 0) header'' rows
+
+hline :: GenParser Char st ()
+hline = try $ spaces >> string "\\hline" >> return ()
+
+parseAligns :: GenParser Char ParserState [Alignment]
+parseAligns = try $ do
+ char '{'
+ optional $ char '|'
+ let cAlign = char 'c' >> return AlignCenter
+ let lAlign = char 'l' >> return AlignLeft
+ let rAlign = char 'r' >> return AlignRight
+ let alignChar = cAlign <|> lAlign <|> rAlign
+ aligns' <- sepEndBy alignChar (optional $ char '|')
+ char '}'
+ spaces
+ return aligns'
+
+parseTableHeader :: Int -- ^ number of columns
+ -> GenParser Char ParserState [TableCell]
+parseTableHeader cols = try $ do
+ cells' <- parseTableRow cols
+ hline
+ return cells'
+
+parseTableRow :: Int -- ^ number of columns
+ -> GenParser Char ParserState [TableCell]
+parseTableRow cols = try $ do
+ let tableCellInline = notFollowedBy (char '&' <|>
+ (try $ char '\\' >> char '\\')) >> inline
+ cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces)
+ (many tableCellInline)) (char '&')
+ guard $ length cells' == cols
+ spaces
+ (try $ string "\\\\" >> spaces) <|>
+ (lookAhead (end "tabular") >> return ())
+ return cells'
+
--
-- code blocks
--
codeBlock :: GenParser Char ParserState Block
-codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> lhsCodeBlock
+codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock
-- Note: Verbatim is from fancyvrb.
codeBlockWith :: String -> GenParser Char st Block
codeBlockWith env = try $ do
- string ("\\begin{" ++ env ++ "}") -- don't use begin function because it
- -- gobbles whitespace
- optional blanklines -- we want to gobble blank lines, but not
- -- leading space
+ string "\\begin"
+ spaces -- don't use begin function because it
+ string $ "{" ++ env ++ "}" -- gobbles whitespace; we want to gobble
+ optional blanklines -- blank lines, but not leading space
contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}"))
spaces
let classes = if env == "code" then ["haskell"] else []
@@ -265,7 +330,10 @@ listItem = try $ do
orderedList :: GenParser Char ParserState Block
orderedList = try $ do
- string "\\begin{enumerate}"
+ string "\\begin"
+ spaces
+ string "{enumerate}"
+ spaces
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $
try $ do failIfStrict
char '['
@@ -293,7 +361,6 @@ orderedList = try $ do
bulletList :: GenParser Char ParserState Block
bulletList = try $ do
begin "itemize"
- spaces
items <- many listItem
end "itemize"
spaces
@@ -302,7 +369,6 @@ bulletList = try $ do
definitionList :: GenParser Char ParserState Block
definitionList = try $ do
begin "description"
- spaces
items <- many listItem
end "description"
spaces
@@ -342,7 +408,7 @@ authors :: GenParser Char ParserState Block
authors = try $ do
string "\\author{"
raw <- many1 (notFollowedBy (char '}') >> inline)
- let authors' = map normalizeSpaces $ splitBy LineBreak raw
+ let authors' = map normalizeSpaces $ splitBy (== LineBreak) raw
char '}'
spaces
updateState (\s -> s { stateAuthors = authors' })
@@ -382,13 +448,15 @@ rawLaTeXEnvironment :: GenParser Char st Block
rawLaTeXEnvironment = do
contents <- rawLaTeXEnvironment'
spaces
- return $ Para [TeX contents]
+ return $ RawBlock "latex" contents
-- | Parse any LaTeX environment and return a string containing
-- the whole literal environment as raw TeX.
rawLaTeXEnvironment' :: GenParser Char st String
rawLaTeXEnvironment' = try $ do
- string "\\begin{"
+ string "\\begin"
+ spaces
+ char '{'
name <- many1 letter
star <- option "" (string "*") -- for starred variants
let name' = name ++ star
@@ -418,31 +486,49 @@ ignore = try $ do
spaces
return Null
+demacro :: (String, String, [String]) -> GenParser Char ParserState Inline
+demacro (n,st,args) = try $ do
+ let raw = "\\" ++ n ++ st ++ concat args
+ s' <- applyMacros' raw
+ if raw == s'
+ then return $ RawInline "latex" raw
+ else do
+ inp <- getInput
+ setInput $ s' ++ inp
+ return $ Str ""
+
unknownCommand :: GenParser Char ParserState Block
unknownCommand = try $ do
- notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description",
- "document"]
+ spaces
+ notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"]
state <- getState
when (stateParserContext state == ListItemState) $
notFollowedBy' (string "\\item")
if stateParseRaw state
- then do
- (name, star, args) <- command
- spaces
- return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)]
+ then command >>= demacro >>= return . Plain . (:[])
else do
(name, _, args) <- command
spaces
- if name `elem` commandsToIgnore
- then return Null
- else return $ Plain [Str $ concat args]
+ unless (name `elem` commandsToIgnore) $ do
+ -- put arguments back in input to be parsed
+ inp <- getInput
+ setInput $ intercalate " " args ++ inp
+ return Null
commandsToIgnore :: [String]
-commandsToIgnore = ["special","pdfannot","pdfstringdef"]
+commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"]
+
+skipChar :: GenParser Char ParserState Block
+skipChar = do
+ satisfy (/='\\') <|>
+ (notFollowedBy' (try $
+ string "\\begin" >> spaces >> string "{document}") >>
+ anyChar)
+ spaces
+ return Null
--- latex comment
-comment :: GenParser Char st Block
-comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null
+commentBlock :: GenParser Char st Block
+commentBlock = many1 (comment >> spaces) >> return Null
--
-- inline
@@ -464,8 +550,6 @@ inline = choice [ str
, strikeout
, superscript
, subscript
- , ref
- , lab
, code
, url
, link
@@ -474,12 +558,20 @@ inline = choice [ str
, linebreak
, accentedChar
, nonbreakingSpace
+ , cite
, specialChar
+ , ensureMath
, rawLaTeXInline'
, escapedChar
, unescapedChar
+ , comment
] <?> "inline"
+
+-- latex comment
+comment :: GenParser Char st Inline
+comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "")
+
accentedChar :: GenParser Char st Inline
accentedChar = normalAccentedChar <|> specialAccentedChar
@@ -512,7 +604,7 @@ accentTable =
('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ]
specialAccentedChar :: GenParser Char st Inline
-specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig,
+specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash,
oslash, pound, euro, copyright, sect ]
ccedil :: GenParser Char st Inline
@@ -543,6 +635,13 @@ oslash = try $ do
let num = if letter' == 'o' then 248 else 216
return $ Str [chr num]
+lslash :: GenParser Char st Inline
+lslash = try $ do
+ cmd <- oneOfStrings ["{\\L}","{\\l}","\\L ","\\l "]
+ return $ if 'l' `elem` cmd
+ then Str "\x142"
+ else Str "\x141"
+
aelig :: GenParser Char st Inline
aelig = try $ do
char '\\'
@@ -569,7 +668,7 @@ escapedChar = do
-- nonescaped special characters
unescapedChar :: GenParser Char st Inline
-unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c])
+unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c])
specialChar :: GenParser Char st Inline
specialChar = choice [ spacer, interwordSpace,
@@ -604,27 +703,34 @@ doubleQuote :: GenParser Char st Inline
doubleQuote = char '"' >> return (Str "\"")
code :: GenParser Char ParserState Inline
-code = code1 <|> code2 <|> lhsInlineCode
+code = code1 <|> code2 <|> code3 <|> lhsInlineCode
code1 :: GenParser Char st Inline
code1 = try $ do
string "\\verb"
marker <- anyChar
result <- manyTill anyChar (char marker)
- return $ Code $ removeLeadingTrailingSpace result
+ return $ Code nullAttr $ removeLeadingTrailingSpace result
code2 :: GenParser Char st Inline
code2 = try $ do
string "\\texttt{"
result <- manyTill (noneOf "\\\n~$%^&{}") (char '}')
- return $ Code result
+ return $ Code nullAttr result
+
+code3 :: GenParser Char st Inline
+code3 = try $ do
+ string "\\lstinline"
+ marker <- anyChar
+ result <- manyTill anyChar (char marker)
+ return $ Code nullAttr $ removeLeadingTrailingSpace result
lhsInlineCode :: GenParser Char ParserState Inline
lhsInlineCode = try $ do
failUnlessLHS
char '|'
result <- manyTill (noneOf "|\n") (char '|')
- return $ Code result
+ return $ Code ("",["haskell"],[]) result
emph :: GenParser Char ParserState Inline
emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >>
@@ -683,15 +789,6 @@ emDash = try (string "---") >> return EmDash
hyphen :: GenParser Char st Inline
hyphen = char '-' >> return (Str "-")
-lab :: GenParser Char st Inline
-lab = try $ do
- string "\\label{"
- result <- manyTill anyChar (char '}')
- return $ Str $ "(" ++ result ++ ")"
-
-ref :: GenParser Char st Inline
-ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str
-
strong :: GenParser Char ParserState Inline
strong = try (string "\\textbf{") >> manyTill inline (char '}') >>=
return . Strong
@@ -714,13 +811,13 @@ endline :: GenParser Char st Inline
endline = try $ newline >> notFollowedBy blankline >> return Space
-- math
-math :: GenParser Char st Inline
-math = (math3 >>= return . Math DisplayMath)
- <|> (math1 >>= return . Math InlineMath)
- <|> (math2 >>= return . Math InlineMath)
- <|> (math4 >>= return . Math DisplayMath)
- <|> (math5 >>= return . Math DisplayMath)
- <|> (math6 >>= return . Math DisplayMath)
+math :: GenParser Char ParserState Inline
+math = (math3 >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (math1 >>= applyMacros' >>= return . Math InlineMath)
+ <|> (math2 >>= applyMacros' >>= return . Math InlineMath)
+ <|> (math4 >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (math5 >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (math6 >>= applyMacros' >>= return . Math DisplayMath)
<?> "math"
math1 :: GenParser Char st String
@@ -737,7 +834,6 @@ math4 = try $ do
name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|>
begin "gather" <|> begin "gather*" <|> begin "gathered" <|>
begin "multline" <|> begin "multline*"
- spaces
manyTill anyChar (end name)
math5 :: GenParser Char st String
@@ -748,10 +844,15 @@ math6 = try $ do
name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|>
begin "align*" <|> begin "alignat" <|> begin "alignat*" <|>
begin "split" <|> begin "aligned" <|> begin "alignedat"
- spaces
res <- manyTill anyChar (end name)
return $ filter (/= '&') res -- remove alignment codes
+ensureMath :: GenParser Char st Inline
+ensureMath = try $ do
+ (n, _, args) <- command
+ guard $ n == "ensuremath" && not (null args)
+ return $ Math InlineMath $ tail $ init $ head args
+
--
-- links and images
--
@@ -760,7 +861,7 @@ url :: GenParser Char ParserState Inline
url = try $ do
string "\\url"
url' <- charsInBalanced '{' '}'
- return $ Link [Code url'] (escapeURI url', "")
+ return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "")
link :: GenParser Char ParserState Inline
link = try $ do
@@ -793,6 +894,103 @@ footnote = try $ do
setInput rest
return $ Note blocks
+-- | citations
+cite :: GenParser Char ParserState Inline
+cite = simpleCite <|> complexNatbibCites
+
+simpleCiteArgs :: GenParser Char ParserState [Citation]
+simpleCiteArgs = try $ do
+ first <- optionMaybe $ (char '[') >> manyTill inline (char ']')
+ second <- optionMaybe $ (char '[') >> manyTill inline (char ']')
+ char '{'
+ keys <- many1Till citationLabel (char '}')
+ let (pre, suf) = case (first , second ) of
+ (Just s , Nothing) -> ([], s )
+ (Just s , Just t ) -> (s , t )
+ _ -> ([], [])
+ conv k = Citation { citationId = k
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = NormalCitation
+ , citationHash = 0
+ , citationNoteNum = 0
+ }
+ return $ addPrefix pre $ addSuffix suf $ map conv keys
+
+
+simpleCite :: GenParser Char ParserState Inline
+simpleCite = try $ do
+ char '\\'
+ let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]]
+ ++ ["footcitetext"]
+ normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]]
+ ++ biblatex
+ supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"]
+ intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]]
+ mintext = ["textcites"]
+ mnormal = map (++ "s") biblatex
+ cmdend = notFollowedBy (letter <|> char '*')
+ capit [] = []
+ capit (x:xs) = toUpper x : xs
+ addUpper xs = xs ++ map capit xs
+ toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t
+ (mode, multi) <- toparser normal (NormalCitation, False)
+ <|> toparser supress (SuppressAuthor, False)
+ <|> toparser intext (AuthorInText , False)
+ <|> toparser mnormal (NormalCitation, True )
+ <|> toparser mintext (AuthorInText , True )
+ cits <- if multi then
+ many1 simpleCiteArgs
+ else
+ simpleCiteArgs >>= \c -> return [c]
+ let (c:cs) = concat cits
+ cits' = case mode of
+ AuthorInText -> c {citationMode = mode} : cs
+ _ -> map (\a -> a {citationMode = mode}) (c:cs)
+ return $ Cite cits' []
+
+complexNatbibCites :: GenParser Char ParserState Inline
+complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical
+
+complexNatbibTextual :: GenParser Char ParserState Inline
+complexNatbibTextual = try $ do
+ string "\\citeauthor{"
+ manyTill (noneOf "}") (char '}')
+ skipSpaces
+ Cite (c:cs) _ <- complexNatbibParenthetical
+ return $ Cite (c {citationMode = AuthorInText} : cs) []
+
+
+complexNatbibParenthetical :: GenParser Char ParserState Inline
+complexNatbibParenthetical = try $ do
+ string "\\citetext{"
+ cits <- many1Till parseOne (char '}')
+ return $ Cite (concat cits) []
+ where
+ parseOne = do
+ skipSpaces
+ pref <- many (notFollowedBy (oneOf "\\}") >> inline)
+ (Cite cites _) <- simpleCite
+ suff <- many (notFollowedBy (oneOf "\\};") >> inline)
+ skipSpaces
+ optional $ char ';'
+ return $ addPrefix pref $ addSuffix suff $ cites
+
+addPrefix :: [Inline] -> [Citation] -> [Citation]
+addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks
+addPrefix _ _ = []
+
+addSuffix :: [Inline] -> [Citation] -> [Citation]
+addSuffix s ks@(_:_) = let k = last ks
+ in init ks ++ [k {citationSuffix = citationSuffix k ++ s}]
+addSuffix _ _ = []
+
+citationLabel :: GenParser Char ParserState String
+citationLabel = do
+ res <- many1 $ noneOf ",}"
+ optional $ char ','
+ return $ removeLeadingTrailingSpace res
+
-- | Parse any LaTeX inline command and return it in a raw TeX inline element.
rawLaTeXInline' :: GenParser Char ParserState Inline
rawLaTeXInline' = do
@@ -805,12 +1003,11 @@ rawLaTeXInline :: GenParser Char ParserState Inline
rawLaTeXInline = try $ do
state <- getState
if stateParseRaw state
- then do
- (name, star, args) <- command
- return $ TeX ("\\" ++ name ++ star ++ concat args)
+ then command >>= demacro
else do
- (name, _, args) <- command
- spaces
- if name `elem` commandsToIgnore
- then return $ Str ""
- else return $ Str (concat args)
+ (name,st,args) <- command
+ x <- demacro (name,st,args)
+ unless (x == Str "" || name `elem` commandsToIgnore) $ do
+ inp <- getInput
+ setInput $ intercalate " " args ++ inp
+ return $ Str ""
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 33fb3d8e6..58d2158bf 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -27,26 +27,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
-module Text.Pandoc.Readers.Markdown (
- readMarkdown
- ) where
+module Text.Pandoc.Readers.Markdown ( readMarkdown ) where
-import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate )
+import Data.List ( transpose, sortBy, findIndex, intercalate )
import qualified Data.Map as M
import Data.Ord ( comparing )
import Data.Char ( isAlphaNum )
import Data.Maybe
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' )
-import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag,
- anyHtmlInlineTag, anyHtmlTag,
- anyHtmlEndTag, htmlEndTag, extractTagType,
- htmlBlockElement, htmlComment, unsanitaryURI )
+import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
+ isTextTag, isCommentTag )
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.ParserCombinators.Parsec
-import Control.Monad (when, liftM, unless)
+import Control.Monad (when, liftM, guard)
+import Text.HTML.TagSoup
+import Text.HTML.TagSoup.Match (tagOpen)
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: ParserState -- ^ Parser state, including options for parser
@@ -58,18 +57,26 @@ readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n")
-- Constants and data structure definitions
--
-bulletListMarkers :: [Char]
-bulletListMarkers = "*+-"
+isBulletListMarker :: Char -> Bool
+isBulletListMarker '*' = True
+isBulletListMarker '+' = True
+isBulletListMarker '-' = True
+isBulletListMarker _ = False
-hruleChars :: [Char]
-hruleChars = "*-_"
+isHruleChar :: Char -> Bool
+isHruleChar '*' = True
+isHruleChar '-' = True
+isHruleChar '_' = True
+isHruleChar _ = False
setextHChars :: [Char]
setextHChars = "=-"
--- treat these as potentially non-text when parsing inline:
-specialChars :: [Char]
-specialChars = "\\[]*_~`<>$!^-.&@'\";"
+isBlank :: Char -> Bool
+isBlank ' ' = True
+isBlank '\t' = True
+isBlank '\n' = True
+isBlank _ = False
--
-- auxiliary functions
@@ -106,12 +113,6 @@ failUnlessBeginningOfLine = do
pos <- getPosition
if sourceColumn pos == 1 then return () else fail "not beginning of line"
--- | Fail unless we're in "smart typography" mode.
-failUnlessSmart :: GenParser tok ParserState ()
-failUnlessSmart = do
- state <- getState
- if stateSmart state then return () else pzero
-
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: GenParser Char ParserState Inline
@@ -119,7 +120,7 @@ inlinesInBalancedBrackets :: GenParser Char ParserState Inline
inlinesInBalancedBrackets parser = try $ do
char '['
result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser
- unless (res == "[") pzero
+ guard (res == "[")
bal <- inlinesInBalancedBrackets parser
return $ [Str "["] ++ bal ++ [Str "]"])
<|> (count 1 parser))
@@ -143,7 +144,8 @@ authorsLine :: GenParser Char ParserState [[Inline]]
authorsLine = try $ do
char '%'
skipSpaces
- authors <- sepEndBy (many (notFollowedBy (oneOf ";\n") >> inline))
+ authors <- sepEndBy (many (notFollowedBy (satisfy $ \c ->
+ c == ';' || c == '\n') >> inline))
(char ';' <|>
try (newline >> notFollowedBy blankline >> spaceChar))
newline
@@ -196,7 +198,7 @@ parseMarkdown = do
handleExampleRef z = z
if M.null examples
then return doc
- else return $ processWith handleExampleRef doc
+ else return $ bottomUp handleExampleRef doc
--
-- initial pass for references and notes
@@ -209,16 +211,24 @@ referenceKey = try $ do
lab <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL excludes = many $
- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' '))
- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+ let nl = char '\n' >> notFollowedBy blankline >> return ' '
+ let sourceURL = liftM unwords $ many $ try $ do
+ notFollowedBy' referenceTitle
+ skipMany spaceChar
+ optional nl
+ skipMany spaceChar
+ notFollowedBy' reference
+ many1 (satisfy $ not . isBlank)
+ let betweenAngles = try $ char '<' >>
+ manyTill (noneOf ">\n" <|> nl) (char '>')
+ src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
blanklines
endPos <- getPosition
let target = (escapeURI $ removeTrailingSpace src, tit)
st <- getState
let oldkeys = stateKeys st
- updateState $ \s -> s { stateKeys = M.insert (Key lab) target oldkeys }
+ updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys }
-- return blanks so line count isn't affected
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
@@ -232,12 +242,12 @@ referenceTitle = try $ do
return $ decodeCharacterReferences tit
noteMarker :: GenParser Char ParserState [Char]
-noteMarker = skipNonindentSpaces >> string "[^" >> manyTill (noneOf " \t\n") (char ']')
+noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
rawLine :: GenParser Char ParserState [Char]
rawLine = do
notFollowedBy blankline
- notFollowedBy' noteMarker
+ notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
contents <- many1 nonEndline
end <- option "" (newline >> optional indentSpaces >> return "\n")
return $ contents ++ end
@@ -248,6 +258,7 @@ rawLines = many1 rawLine >>= return . concat
noteBlock :: GenParser Char ParserState [Char]
noteBlock = try $ do
startPos <- getPosition
+ skipNonindentSpaces
ref <- noteMarker
char ':'
optional blankline
@@ -284,6 +295,7 @@ block = do
, plain
, nullBlock ]
else [ codeBlockDelimited
+ , macro
, header
, table
, codeBlockIndented
@@ -293,6 +305,7 @@ block = do
, bulletList
, orderedList
, definitionList
+ , rawTeXBlock
, para
, rawHtmlBlocks
, plain
@@ -318,6 +331,9 @@ atxClosing = try $ skipMany (char '#') >> blanklines
setextHeader :: GenParser Char ParserState Block
setextHeader = try $ do
+ -- This lookahead prevents us from wasting time parsing Inlines
+ -- unless necessary -- it gives a significant performance boost.
+ lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
text <- many1Till inline newline
underlineChar <- oneOf setextHChars
many (char underlineChar)
@@ -332,7 +348,7 @@ setextHeader = try $ do
hrule :: GenParser Char st Block
hrule = try $ do
skipSpaces
- start <- oneOf hruleChars
+ start <- satisfy isHruleChar
count 2 (skipSpaces >> char start)
skipMany (spaceChar <|> char start)
newline
@@ -371,6 +387,7 @@ attributes = try $ do
attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])])
attribute = identifierAttr <|> classAttr <|> keyValAttr
+
identifier :: GenParser Char st [Char]
identifier = do
first <- letter
@@ -394,7 +411,7 @@ keyValAttr = try $ do
key <- identifier
char '='
char '"'
- val <- manyTill (noneOf "\n") (char '"')
+ val <- manyTill (satisfy (/='\n')) (char '"')
return ("",[],[(key,val)])
codeBlockDelimited :: GenParser Char st Block
@@ -489,7 +506,7 @@ bulletListStart = try $ do
optional newline -- if preceded by a Plain block in a list context
skipNonindentSpaces
notFollowedBy' hrule -- because hrules start out just like lists
- oneOf bulletListMarkers
+ satisfy isBulletListMarker
spaceChar
skipSpaces
@@ -524,7 +541,7 @@ listLine = try $ do
notFollowedBy' (do indentSpaces
many (spaceChar)
listStart)
- chunks <- manyTill (htmlComment <|> count 1 anyChar) newline
+ chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline
return $ concat chunks ++ "\n"
-- parse raw text for one list item, excluding start marker and continuations
@@ -644,23 +661,21 @@ definitionList = do
--
isHtmlOrBlank :: Inline -> Bool
-isHtmlOrBlank (HtmlInline _) = True
-isHtmlOrBlank (Space) = True
-isHtmlOrBlank (LineBreak) = True
-isHtmlOrBlank _ = False
+isHtmlOrBlank (RawInline "html" _) = True
+isHtmlOrBlank (Space) = True
+isHtmlOrBlank (LineBreak) = True
+isHtmlOrBlank _ = False
para :: GenParser Char ParserState Block
para = try $ do
- result <- many1 inline
- if all isHtmlOrBlank result
- then fail "treat as raw HTML"
- else return ()
- newline
- blanklines <|> do st <- getState
- if stateStrict st
- then lookAhead (blockQuote <|> header) >> return ""
- else pzero
- return $ Para $ normalizeSpaces result
+ result <- liftM normalizeSpaces $ many1 inline
+ guard $ not . all isHtmlOrBlank $ result
+ option (Plain result) $ try $ do
+ newline
+ blanklines <|>
+ (getState >>= guard . stateStrict >>
+ lookAhead (blockQuote <|> header) >> return "")
+ return $ Para result
plain :: GenParser Char ParserState Block
plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
@@ -670,7 +685,7 @@ plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces
--
htmlElement :: GenParser Char ParserState [Char]
-htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element"
+htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
htmlBlock :: GenParser Char ParserState Block
htmlBlock = try $ do
@@ -678,27 +693,33 @@ htmlBlock = try $ do
first <- htmlElement
finalSpace <- many spaceChar
finalNewlines <- many newline
- return $ RawHtml $ first ++ finalSpace ++ finalNewlines
-
--- True if tag is self-closing
-isSelfClosing :: [Char] -> Bool
-isSelfClosing tag =
- isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag
+ return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines
strictHtmlBlock :: GenParser Char ParserState [Char]
-strictHtmlBlock = try $ do
- tag <- anyHtmlBlockTag
- let tag' = extractTagType tag
- if isSelfClosing tag || tag' == "hr"
- then return tag
- else do contents <- many (notFollowedBy' (htmlEndTag tag') >>
- (htmlElement <|> (count 1 anyChar)))
- end <- htmlEndTag tag'
- return $ tag ++ concat contents ++ end
+strictHtmlBlock = do
+ failUnlessBeginningOfLine
+ htmlInBalanced (not . isInlineTag)
+
+rawVerbatimBlock :: GenParser Char ParserState String
+rawVerbatimBlock = try $ do
+ (TagOpen tag _, open) <- htmlTag (tagOpen (\t ->
+ t == "pre" || t == "style" || t == "script")
+ (const True))
+ contents <- manyTill anyChar (htmlTag (~== TagClose tag))
+ return $ open ++ contents ++ renderTags [TagClose tag]
+
+rawTeXBlock :: GenParser Char ParserState Block
+rawTeXBlock = do
+ failIfStrict
+ result <- liftM (RawBlock "latex") rawLaTeXEnvironment'
+ <|> liftM (RawBlock "context") rawConTeXtEnvironment'
+ spaces
+ return result
rawHtmlBlocks :: GenParser Char ParserState Block
rawHtmlBlocks = do
- htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock
+ htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|>
+ liftM snd (htmlTag isBlockTag)
sps <- do sp1 <- many spaceChar
sp2 <- option "" (blankline >> return "\n")
sp3 <- many spaceChar
@@ -710,7 +731,7 @@ rawHtmlBlocks = do
return $ blk ++ sps
let combined = concat htmlBlocks
let combined' = if last combined == '\n' then init combined else combined
- return $ RawHtml combined'
+ return $ RawBlock "html" combined'
--
-- Tables
@@ -848,10 +869,11 @@ alignType :: [String]
-> Alignment
alignType [] _ = AlignDefault
alignType strLst len =
- let s = head $ sortBy (comparing length) $
- map removeTrailingSpace strLst
- leftSpace = if null s then False else (s !! 0) `elem` " \t"
- rightSpace = length s < len || (s !! (len - 1)) `elem` " \t"
+ let nonempties = filter (not . null) $ map removeTrailingSpace strLst
+ (leftSpace, rightSpace) =
+ case sortBy (comparing length) nonempties of
+ (x:_) -> (head x `elem` " \t", length x < len)
+ [] -> (False, False)
in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
(False, True) -> AlignLeft
@@ -875,31 +897,29 @@ inline :: GenParser Char ParserState Inline
inline = choice inlineParsers <?> "inline"
inlineParsers :: [GenParser Char ParserState Inline]
-inlineParsers = [ str
- , smartPunctuation
- , whitespace
+inlineParsers = [ whitespace
+ , str
, endline
, code
- , charRef
, (fourOrMore '*' <|> fourOrMore '_')
, strong
, emph
, note
- , inlineNote
, link
-#ifdef _CITEPROC
- , inlineCitation
-#endif
+ , cite
, image
, math
, strikeout
, superscript
, subscript
+ , inlineNote -- after superscript because of ^[link](/foo)^
, autoLink
- , rawHtmlInline'
+ , rawHtmlInline
, rawLaTeXInline'
, escapedChar
, exampleRef
+ , smartPunctuation inline
+ , charRef
, symbol
, ltSign ]
@@ -913,12 +933,12 @@ failIfLink (Link _ _) = pzero
failIfLink elt = return elt
escapedChar :: GenParser Char ParserState Inline
-escapedChar = do
+escapedChar = try $ do
char '\\'
state <- getState
- result <- option '\\' $ if stateStrict state
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
+ result <- if stateStrict state
+ then oneOf "\\`*_{}[]()>#+-.!~"
+ else satisfy (not . isAlphaNum)
return $ case result of
' ' -> Str "\160" -- "\ " is a nonbreaking space
'\n' -> LineBreak -- "\[newline]" is a linebreak
@@ -932,9 +952,6 @@ ltSign = do
else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
return $ Str ['<']
-specialCharsMinusLt :: [Char]
-specialCharsMinusLt = filter (/= '<') specialChars
-
exampleRef :: GenParser Char ParserState Inline
exampleRef = try $ do
char '@'
@@ -945,7 +962,11 @@ exampleRef = try $ do
symbol :: GenParser Char ParserState Inline
symbol = do
- result <- oneOf specialCharsMinusLt
+ result <- noneOf "<\\\n\t "
+ <|> try (do lookAhead $ char '\\'
+ notFollowedBy' $ rawLaTeXEnvironment'
+ <|> rawConTeXtEnvironment'
+ char '\\')
return $ Str [result]
-- parses inline code, between n `s and n `s
@@ -957,7 +978,8 @@ code = try $ do
(char '\n' >> notFollowedBy' blankline >> return " "))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
- return $ Code $ removeLeadingTrailingSpace $ concat result
+ attr <- option ([],[],[]) (try $ optional whitespace >> attributes)
+ return $ Code attr $ removeLeadingTrailingSpace $ concat result
mathWord :: GenParser Char st [Char]
mathWord = liftM concat $ many1 mathChunk
@@ -966,11 +988,11 @@ mathChunk :: GenParser Char st [Char]
mathChunk = do char '\\'
c <- anyChar
return ['\\',c]
- <|> many1 (noneOf " \t\n\\$")
+ <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$'))
math :: GenParser Char ParserState Inline
-math = (mathDisplay >>= return . Math DisplayMath)
- <|> (mathInline >>= return . Math InlineMath)
+math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath)
+ <|> (mathInline >>= applyMacros' >>= return . Math InlineMath)
mathDisplay :: GenParser Char ParserState String
mathDisplay = try $ do
@@ -1019,85 +1041,6 @@ subscript = failIfStrict >> enclosed (char '~') (char '~')
(notFollowedBy spaceChar >> inline) >>= -- may not contain Space
return . Subscript
-smartPunctuation :: GenParser Char ParserState Inline
-smartPunctuation = failUnlessSmart >>
- choice [ quoted, apostrophe, dash, ellipses ]
-
-apostrophe :: GenParser Char ParserState Inline
-apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe
-
-quoted :: GenParser Char ParserState Inline
-quoted = doubleQuoted <|> singleQuoted
-
-withQuoteContext :: QuoteContext
- -> (GenParser Char ParserState Inline)
- -> GenParser Char ParserState Inline
-withQuoteContext context parser = do
- oldState <- getState
- let oldQuoteContext = stateQuoteContext oldState
- setState oldState { stateQuoteContext = context }
- result <- parser
- newState <- getState
- setState newState { stateQuoteContext = oldQuoteContext }
- return result
-
-singleQuoted :: GenParser Char ParserState Inline
-singleQuoted = try $ do
- singleQuoteStart
- withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>=
- return . Quoted SingleQuote . normalizeSpaces
-
-doubleQuoted :: GenParser Char ParserState Inline
-doubleQuoted = try $ do
- doubleQuoteStart
- withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>=
- return . Quoted DoubleQuote . normalizeSpaces
-
-failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState ()
-failIfInQuoteContext context = do
- st <- getState
- if stateQuoteContext st == context
- then fail "already inside quotes"
- else return ()
-
-singleQuoteStart :: GenParser Char ParserState Char
-singleQuoteStart = do
- failIfInQuoteContext InSingleQuote
- try $ do char '\''
- notFollowedBy (oneOf ")!],.;:-? \t\n")
- notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >>
- satisfy (not . isAlphaNum)))
- -- possess/contraction
- return '\''
-
-singleQuoteEnd :: GenParser Char st Char
-singleQuoteEnd = try $ do
- char '\''
- notFollowedBy alphaNum
- return '\''
-
-doubleQuoteStart :: GenParser Char ParserState Char
-doubleQuoteStart = do
- failIfInQuoteContext InDoubleQuote
- try $ do char '"'
- notFollowedBy (oneOf " \t\n")
- return '"'
-
-doubleQuoteEnd :: GenParser Char st Char
-doubleQuoteEnd = char '"'
-
-ellipses :: GenParser Char st Inline
-ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses
-
-dash :: GenParser Char st Inline
-dash = enDash <|> emDash
-
-enDash :: GenParser Char st Inline
-enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash
-
-emDash :: GenParser Char st Inline
-emDash = oneOfStrings ["---", "--"] >> return EmDash
-
whitespace :: GenParser Char ParserState Inline
whitespace = spaceChar >>
( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak))
@@ -1106,20 +1049,19 @@ whitespace = spaceChar >>
nonEndline :: GenParser Char st Char
nonEndline = satisfy (/='\n')
-strChar :: GenParser Char st Char
-strChar = noneOf (specialChars ++ " \t\n")
-
str :: GenParser Char ParserState Inline
str = do
- result <- many1 strChar
+ a <- alphaNum
+ as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum)
+ let result = a:as
state <- getState
let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
if stateSmart state
then case likelyAbbrev result of
[] -> return $ Str result
xs -> choice (map (\x ->
- try (string x >> char ' ' >>
- notFollowedBy spaceChar >>
+ try (string x >> oneOf " \n" >>
+ lookAhead alphaNum >>
return (Str $ result ++ spacesToNbr x ++ "\160"))) xs)
<|> (return $ Str result)
else return $ Str result
@@ -1142,15 +1084,13 @@ endline = try $ do
newline
notFollowedBy blankline
st <- getState
- if stateStrict st
- then do notFollowedBy emailBlockQuoteStart
- notFollowedBy (char '#') -- atx header
- else return ()
+ when (stateStrict st) $ do
+ notFollowedBy emailBlockQuoteStart
+ notFollowedBy (char '#') -- atx header
-- parse potential list-starts differently if in a list:
- if stateParserContext st == ListItemState
- then notFollowedBy' (bulletListStart <|>
- (anyOrderedListStart >> return ()))
- else return ()
+ when (stateParserContext st == ListItemState) $ do
+ notFollowedBy' bulletListStart
+ notFollowedBy' anyOrderedListStart
return Space
--
@@ -1175,9 +1115,16 @@ source =
source' :: GenParser Char st (String, [Char])
source' = do
skipSpaces
- let sourceURL excludes = many $
- optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' '))
- src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n"
+ let nl = char '\n' >>~ notFollowedBy blankline
+ let sourceURL = liftM unwords $ many $ try $ do
+ notFollowedBy' linkTitle
+ skipMany spaceChar
+ optional nl
+ skipMany spaceChar
+ many1 (satisfy $ not . isBlank)
+ let betweenAngles = try $ char '<' >>
+ manyTill (noneOf ">\n" <|> nl) (char '>')
+ src <- try betweenAngles <|> sourceURL
tit <- option "" linkTitle
skipSpaces
eof
@@ -1196,10 +1143,7 @@ link :: GenParser Char ParserState Inline
link = try $ do
lab <- reference
(src, tit) <- source <|> referenceLink lab
- sanitize <- getState >>= return . stateSanitizeHTML
- if sanitize && unsanitaryURI src
- then fail "Unsanitary URI"
- else return $ Link lab (src, tit)
+ return $ Link lab (src, tit)
-- a link like [this][ref] or [this][] or [this]
referenceLink :: [Inline]
@@ -1209,7 +1153,7 @@ referenceLink lab = do
optional (newline >> skipSpaces) >> reference))
let ref' = if null ref then lab else ref
state <- getState
- case lookupKeySrc (stateKeys state) (Key ref') of
+ case lookupKeySrc (stateKeys state) (toKey ref') of
Nothing -> fail "no corresponding key"
Just target -> return target
@@ -1219,12 +1163,9 @@ autoLink = try $ do
(orig, src) <- uri <|> emailAddress
char '>'
st <- getState
- let sanitize = stateSanitizeHTML st
- if sanitize && unsanitaryURI src
- then fail "Unsanitary URI"
- else return $ if stateStrict st
- then Link [Str orig] (src, "")
- else Link [Code orig] (src, "")
+ return $ if stateStrict st
+ then Link [Str orig] (src, "")
+ else Link [Code ("",["url"],[]) orig] (src, "")
image :: GenParser Char ParserState Inline
image = try $ do
@@ -1250,11 +1191,13 @@ inlineNote = try $ do
return $ Note [Para contents]
rawLaTeXInline' :: GenParser Char ParserState Inline
-rawLaTeXInline' = do
+rawLaTeXInline' = try $ do
failIfStrict
- (rawConTeXtEnvironment' >>= return . TeX)
- <|> (rawLaTeXEnvironment' >>= return . TeX)
- <|> rawLaTeXInline
+ lookAhead $ char '\\'
+ notFollowedBy' $ rawLaTeXEnvironment'
+ <|> rawConTeXtEnvironment'
+ RawInline _ s <- rawLaTeXInline
+ return $ RawInline "tex" s -- "tex" because it might be context or latex
rawConTeXtEnvironment' :: GenParser Char st String
rawConTeXtEnvironment' = try $ do
@@ -1272,46 +1215,98 @@ inBrackets parser = do
char ']'
return $ "[" ++ contents ++ "]"
-rawHtmlInline' :: GenParser Char ParserState Inline
-rawHtmlInline' = do
+rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline = do
st <- getState
- result <- if stateStrict st
- then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag]
- else anyHtmlInlineTag
- return $ HtmlInline result
-
-#ifdef _CITEPROC
-inlineCitation :: GenParser Char ParserState Inline
-inlineCitation = try $ do
+ (_,result) <- if stateStrict st
+ then htmlTag (not . isTextTag)
+ else htmlTag isInlineTag
+ return $ RawInline "html" result
+
+-- Citations
+
+cite :: GenParser Char ParserState Inline
+cite = do
failIfStrict
- cit <- citeMarker
- let citations = readWith parseCitation defaultParserState cit
- mr <- mapM chkCit citations
- if catMaybes mr /= []
- then return $ Cite citations []
- else fail "no citation found"
-
-chkCit :: Target -> GenParser Char ParserState (Maybe Target)
-chkCit t = do
+ citations <- textualCite <|> normalCite
+ return $ Cite citations []
+
+spnl :: GenParser Char st ()
+spnl = try $ do
+ skipSpaces
+ optional newline
+ skipSpaces
+ notFollowedBy (char '\n')
+
+textualCite :: GenParser Char ParserState [Citation]
+textualCite = try $ do
+ (_, key) <- citeKey
+ let first = Citation{ citationId = key
+ , citationPrefix = []
+ , citationSuffix = []
+ , citationMode = AuthorInText
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+ rest <- option [] $ try $ spnl >> normalCite
+ if null rest
+ then option [first] $ bareloc first
+ else return $ first : rest
+
+bareloc :: Citation -> GenParser Char ParserState [Citation]
+bareloc c = try $ do
+ spnl
+ char '['
+ suff <- suffix
+ rest <- option [] $ try $ char ';' >> citeList
+ spnl
+ char ']'
+ return $ c{ citationSuffix = suff } : rest
+
+normalCite :: GenParser Char ParserState [Citation]
+normalCite = try $ do
+ char '['
+ spnl
+ citations <- citeList
+ spnl
+ char ']'
+ return citations
+
+citeKey :: GenParser Char ParserState (Bool, String)
+citeKey = try $ do
+ suppress_author <- option False (char '-' >> return True)
+ char '@'
+ first <- letter
+ rest <- many $ (noneOf ",;]@ \t\n")
+ let key = first:rest
st <- getState
- case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of
- Just _ -> fail "This is a link"
- Nothing -> if elem (fst t) $ stateCitations st
- then return $ Just t
- else return $ Nothing
-
-citeMarker :: GenParser Char ParserState String
-citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']')
-
-parseCitation :: GenParser Char ParserState [(String,String)]
-parseCitation = try $ sepBy (parseLabel) (oneOf ";")
-
-parseLabel :: GenParser Char ParserState (String,String)
-parseLabel = try $ do
- res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@")
- case res of
- [lab,loc] -> return (lab, loc)
- [lab] -> return (lab, "" )
- _ -> return ("" , "" )
-
-#endif
+ guard $ key `elem` stateCitations st
+ return (suppress_author, key)
+
+suffix :: GenParser Char ParserState [Inline]
+suffix = try $ do
+ spnl
+ liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline
+
+prefix :: GenParser Char ParserState [Inline]
+prefix = liftM normalizeSpaces $
+ manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey))
+
+citeList :: GenParser Char ParserState [Citation]
+citeList = sepBy1 citation (try $ char ';' >> spnl)
+
+citation :: GenParser Char ParserState Citation
+citation = try $ do
+ pref <- prefix
+ (suppress_author, key) <- citeKey
+ suff <- suffix
+ return $ Citation{ citationId = key
+ , citationPrefix = pref
+ , citationSuffix = suff
+ , citationMode = if suppress_author
+ then SuppressAuthor
+ else NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
new file mode 100644
index 000000000..2c6fcc6e6
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -0,0 +1,81 @@
+{-
+Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Native
+ Copyright : Copyright (C) 2011 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of a string representation of a pandoc type (@Pandoc@,
+@[Block]@, @Block@, @[Inline]@, or @Inline@) to a @Pandoc@ document.
+-}
+module Text.Pandoc.Readers.Native ( readNative ) where
+
+import Text.Pandoc.Definition
+
+nullMeta :: Meta
+nullMeta = Meta{ docTitle = []
+ , docAuthors = []
+ , docDate = []
+ }
+
+-- | Read native formatted text and return a Pandoc document.
+-- The input may be a full pandoc document, a block list, a block,
+-- an inline list, or an inline. Thus, for example,
+--
+-- > Str "hi"
+--
+-- will be treated as if it were
+--
+-- > Pandoc (Meta [] [] []) [Plain [Str "hi"]]
+--
+readNative :: String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readNative s =
+ case reads s of
+ (d,_):_ -> d
+ [] -> Pandoc nullMeta $ readBlocks s
+
+readBlocks :: String -> [Block]
+readBlocks s =
+ case reads s of
+ (d,_):_ -> d
+ [] -> [readBlock s]
+
+readBlock :: String -> Block
+readBlock s =
+ case reads s of
+ (d,_):_ -> d
+ [] -> Plain $ readInlines s
+
+readInlines :: String -> [Inline]
+readInlines s =
+ case reads s of
+ (d,_):_ -> d
+ [] -> [readInline s]
+
+readInline :: String -> Inline
+readInline s =
+ case reads s of
+ (d,_):_ -> d
+ [] -> error "Cannot parse document"
+
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 13afe5053..32fae5ee7 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -34,10 +34,11 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Parsing
import Text.ParserCombinators.Parsec
-import Control.Monad ( when, unless )
-import Data.List ( findIndex, intercalate, transpose, sort )
+import Control.Monad ( when )
+import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy )
import qualified Data.Map as M
import Text.Printf ( printf )
+import Data.Maybe ( catMaybes )
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ParserState -- ^ Parser state, including options for parser
@@ -57,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\\`|*_<>$:[-"
+specialChars = "\\`|*_<>$:[-.\"'\8216\8217\8220\8221"
--
-- parsing documents
@@ -90,12 +91,17 @@ titleTransform blocks = (blocks, [])
parseRST :: GenParser Char ParserState Pandoc
parseRST = do
+ optional blanklines -- skip blank lines at beginning of file
startPos <- getPosition
- -- go through once just to get list of reference keys
+ -- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat
+ docMinusKeys <- manyTill (referenceKey <|> noteBlock <|> lineClump) eof >>=
+ return . concat
setInput docMinusKeys
setPosition startPos
+ st' <- getState
+ let reversedNotes = stateNotes st'
+ updateState $ \s -> s { stateNotes = reverse reversedNotes }
-- now parse it for real...
blocks <- parseBlocks
let blocks' = filter (/= Null) blocks
@@ -117,10 +123,9 @@ parseBlocks = manyTill block eof
block :: GenParser Char ParserState Block
block = choice [ codeBlock
- , rawHtmlBlock
- , rawLaTeXBlock
- , fieldList
+ , rawBlock
, blockQuote
+ , fieldList
, imageBlock
, customCodeBlock
, unknownDirective
@@ -138,46 +143,54 @@ block = choice [ codeBlock
-- field list
--
-fieldListItem :: String -> GenParser Char st ([Char], [Char])
-fieldListItem indent = try $ do
+rawFieldListItem :: String -> GenParser Char ParserState (String, String)
+rawFieldListItem indent = try $ do
string indent
char ':'
- name <- many1 alphaNum
+ name <- many1 $ alphaNum <|> spaceChar
string ": "
skipSpaces
first <- manyTill anyChar newline
- rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >>
- indentedBlock
- return (name, intercalate " " (first:(lines rest)))
+ rest <- option "" $ try $ do lookAhead (string indent >> spaceChar)
+ indentedBlock
+ let raw = first ++ "\n" ++ rest ++ "\n"
+ return (name, raw)
+
+fieldListItem :: String
+ -> GenParser Char ParserState (Maybe ([Inline], [[Block]]))
+fieldListItem indent = try $ do
+ (name, raw) <- rawFieldListItem indent
+ let term = [Str name]
+ contents <- parseFromString (many block) raw
+ case (name, contents) of
+ ("Author", x) -> do
+ updateState $ \st ->
+ st{ stateAuthors = stateAuthors st ++ [extractContents x] }
+ return Nothing
+ ("Authors", [BulletList auths]) -> do
+ updateState $ \st -> st{ stateAuthors = map extractContents auths }
+ return Nothing
+ ("Date", x) -> do
+ updateState $ \st -> st{ stateDate = extractContents x }
+ return Nothing
+ ("Title", x) -> do
+ updateState $ \st -> st{ stateTitle = extractContents x }
+ return Nothing
+ _ -> return $ Just (term, [contents])
+
+extractContents :: [Block] -> [Inline]
+extractContents [Plain auth] = auth
+extractContents [Para auth] = auth
+extractContents _ = []
fieldList :: GenParser Char ParserState Block
fieldList = try $ do
- indent <- lookAhead $ many (oneOf " \t")
+ indent <- lookAhead $ many spaceChar
items <- many1 $ fieldListItem indent
blanklines
- let authors = case lookup "Authors" items of
- Just auth -> [auth]
- Nothing -> map snd (filter (\(x,_) -> x == "Author") items)
- unless (null authors) $ do
- authors' <- mapM (parseFromString (many inline)) authors
- updateState $ \st -> st {stateAuthors = map normalizeSpaces authors'}
- case (lookup "Date" items) of
- Just dat -> do
- dat' <- parseFromString (many inline) dat
- updateState $ \st -> st{ stateDate = normalizeSpaces dat' }
- Nothing -> return ()
- case (lookup "Title" items) of
- Just tit -> parseFromString (many inline) tit >>=
- \t -> updateState $ \st -> st {stateTitle = t}
- Nothing -> return ()
- let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") &&
- (x /= "Date") && (x /= "Title")) items
- if null remaining
- then return Null
- else do terms <- mapM (return . (:[]) . Str . fst) remaining
- defs <- mapM (parseFromString (many block) . snd)
- remaining
- return $ DefinitionList $ zip terms $ map (:[]) defs
+ if null items
+ then return Null
+ else return $ DefinitionList $ catMaybes items
--
-- line block
@@ -186,7 +199,7 @@ fieldList = try $ do
lineBlockLine :: GenParser Char ParserState [Inline]
lineBlockLine = try $ do
string "| "
- white <- many (oneOf " \t")
+ white <- many spaceChar
line <- many $ (notFollowedBy newline >> inline) <|> (try $ endline >>~ char ' ')
optional endline
return $ normalizeSpaces $ (if null white then [] else [Str white]) ++ line
@@ -231,15 +244,16 @@ plain = many1 inline >>= return . Plain . normalizeSpaces
-- image block
--
-imageBlock :: GenParser Char st Block
+imageBlock :: GenParser Char ParserState Block
imageBlock = try $ do
string ".. image:: "
src <- manyTill anyChar newline
- fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t")
- many1 $ fieldListItem indent
+ fields <- try $ do indent <- lookAhead $ many (oneOf " /t")
+ many $ rawFieldListItem indent
optional blanklines
case lookup "alt" fields of
- Just alt -> return $ Plain [Image [Str alt] (src, alt)]
+ Just alt -> return $ Plain [Image [Str $ removeTrailingSpace alt]
+ (src, "")]
Nothing -> return $ Plain [Image [Str "image"] (src, "")]
--
-- header blocks
@@ -314,20 +328,19 @@ hrule = try $ do
indentedLine :: String -> GenParser Char st [Char]
indentedLine indents = try $ do
string indents
- result <- manyTill anyChar newline
- return $ result ++ "\n"
+ manyTill anyChar newline
-- two or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: GenParser Char st [Char]
-indentedBlock = do
- indents <- lookAhead $ many1 (oneOf " \t")
+indentedBlock = try $ do
+ indents <- lookAhead $ many1 spaceChar
lns <- many $ choice $ [ indentedLine indents,
try $ do b <- blanklines
l <- indentedLine indents
return (b ++ l) ]
- optional blanklines
- return $ concat lns
+ optional blanklines
+ return $ unlines lns
codeBlock :: GenParser Char st Block
codeBlock = try $ do
@@ -365,23 +378,16 @@ birdTrackLine = do
manyTill anyChar newline
--
--- raw html
+-- raw html/latex/etc
--
-rawHtmlBlock :: GenParser Char st Block
-rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >>
- indentedBlock >>= return . RawHtml
-
---
--- raw latex
---
-
-rawLaTeXBlock :: GenParser Char st Block
-rawLaTeXBlock = try $ do
- string ".. raw:: latex"
+rawBlock :: GenParser Char st Block
+rawBlock = try $ do
+ string ".. raw:: "
+ lang <- many1 (letter <|> digit)
blanklines
result <- indentedBlock
- return $ Para [(TeX result)]
+ return $ RawBlock lang result
--
-- block quotes
@@ -408,7 +414,7 @@ definitionListItem = try $ do
term <- many1Till inline endline
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString parseBlocks $ raw ++ "\n\n"
+ contents <- parseFromString parseBlocks $ raw ++ "\n"
return (normalizeSpaces term, [contents])
definitionList :: GenParser Char ParserState Block
@@ -505,9 +511,35 @@ unknownDirective = try $ do
string ".."
notFollowedBy (noneOf " \t\n")
manyTill anyChar newline
- many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline)
+ many $ blanklines <|> (spaceChar >> manyTill anyChar newline)
return Null
+---
+--- note block
+---
+
+noteBlock :: GenParser Char ParserState [Char]
+noteBlock = try $ do
+ startPos <- getPosition
+ string ".."
+ spaceChar >> skipMany spaceChar
+ ref <- noteMarker
+ spaceChar >> skipMany spaceChar
+ first <- anyLine
+ blanks <- option "" blanklines
+ rest <- option "" indentedBlock
+ endPos <- getPosition
+ let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
+ let newnote = (ref, raw)
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \s -> s { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+noteMarker :: GenParser Char ParserState [Char]
+noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']'
+
--
-- reference key
--
@@ -565,14 +597,14 @@ imageKey = try $ do
skipSpaces
string "image::"
src <- targetURI
- return (Key (normalizeSpaces ref), (src, ""))
+ return (toKey (normalizeSpaces ref), (src, ""))
anonymousKey :: GenParser Char st (Key, Target)
anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
- return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
+ return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, ""))
regularKey :: GenParser Char ParserState (Key, Target)
regularKey = try $ do
@@ -580,7 +612,7 @@ regularKey = try $ do
ref <- referenceName
char ':'
src <- targetURI
- return (Key (normalizeSpaces ref), (src, ""))
+ return (toKey (normalizeSpaces ref), (src, ""))
--
-- tables
@@ -679,17 +711,19 @@ table = gridTable False <|> simpleTable False <|>
--
inline :: GenParser Char ParserState Inline
-inline = choice [ link
+inline = choice [ whitespace
+ , link
, str
- , whitespace
, endline
, strong
, emph
, code
, image
- , hyphens
, superscript
, subscript
+ , note
+ , smartPunctuation inline
+ , hyphens
, escapedChar
, symbol ] <?> "inline"
@@ -713,7 +747,8 @@ code :: GenParser Char ParserState Inline
code = try $ do
string "``"
result <- manyTill anyChar (try (string "``"))
- return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result
+ return $ Code nullAttr
+ $ removeLeadingTrailingSpace $ intercalate " " $ lines result
emph :: GenParser Char ParserState Inline
emph = enclosed (char '*') (char '*') inline >>=
@@ -779,9 +814,10 @@ referenceLink = try $ do
label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_'
state <- getState
let keyTable = stateKeys state
- let isAnonKey (Key [Str ('_':_)]) = True
- isAnonKey _ = False
- key <- option (Key label') $
+ let isAnonKey x = case fromKey x of
+ [Str ('_':_)] -> True
+ _ -> False
+ key <- option (toKey label') $
do char '_'
let anonKeys = sort $ filter isAnonKey $ M.keys keyTable
if null anonKeys
@@ -814,7 +850,24 @@ image = try $ do
ref <- manyTill inline (char '|')
state <- getState
let keyTable = stateKeys state
- (src,tit) <- case lookupKeySrc keyTable (Key ref) of
+ (src,tit) <- case lookupKeySrc keyTable (toKey ref) of
Nothing -> fail "no corresponding key"
Just target -> return target
return $ Image (normalizeSpaces ref) (src, tit)
+
+note :: GenParser Char ParserState Inline
+note = try $ do
+ ref <- noteMarker
+ char '_'
+ state <- getState
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just raw -> do
+ contents <- parseFromString parseBlocks raw
+ when (ref == "*" || ref == "#") $ do -- auto-numbered
+ -- delete the note so the next auto-numbered note
+ -- doesn't get the same contents:
+ let newnotes = deleteFirstsBy (==) notes [(ref,raw)]
+ updateState $ \st -> st{ stateNotes = newnotes }
+ return $ Note contents
diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs
index b0c6e86d4..b9a46e8ff 100644
--- a/src/Text/Pandoc/Readers/TeXMath.hs
+++ b/src/Text/Pandoc/Readers/TeXMath.hs
@@ -27,12 +27,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of TeX math to a list of 'Pandoc' inline elements.
-}
-module Text.Pandoc.Readers.TeXMath (
- readTeXMath
- ) where
+module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where
-import Text.ParserCombinators.Parsec
import Text.Pandoc.Definition
+import Text.TeXMath.Types
import Text.TeXMath.Parser
-- | Converts a raw TeX math formula to a list of 'Pandoc' inlines.
@@ -40,16 +38,17 @@ import Text.TeXMath.Parser
-- can't be converted.
readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings)
-> [Inline]
-readTeXMath inp = case readTeXMath' inp of
- Nothing -> [Str ("$" ++ inp ++ "$")]
- Just res -> res
+readTeXMath inp = case texMathToPandoc inp of
+ Left _ -> [Str ("$" ++ inp ++ "$")]
+ Right res -> res
--- | Like 'readTeXMath', but without the default.
-readTeXMath' :: String -- ^ String to parse (assumes @'\n'@ line endings)
- -> Maybe [Inline]
-readTeXMath' inp = case parse formula "formula" inp of
- Left _ -> Just [Str inp]
- Right exps -> expsToInlines exps
+texMathToPandoc :: String -> Either String [Inline]
+texMathToPandoc inp = inp `seq`
+ case parseFormula inp of
+ Left err -> Left err
+ Right exps -> case expsToInlines exps of
+ Nothing -> Left "Formula too complex for [Inline]"
+ Just r -> Right r
expsToInlines :: [Exp] -> Maybe [Inline]
expsToInlines xs = do
@@ -89,6 +88,26 @@ expToInlines (ESubsup x y z) = do
expToInlines (EDown x y) = expToInlines (ESub x y)
expToInlines (EUp x y) = expToInlines (ESuper x y)
expToInlines (EDownup x y z) = expToInlines (ESubsup x y z)
-expToInlines (EText _ x) = Just [Emph [Str x]]
+expToInlines (EText "normal" x) = Just [Str x]
+expToInlines (EText "bold" x) = Just [Strong [Str x]]
+expToInlines (EText "monospace" x) = Just [Code nullAttr x]
+expToInlines (EText "italic" x) = Just [Emph [Str x]]
+expToInlines (EText _ x) = Just [Str x]
+expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) =
+ case accent of
+ '\x203E' -> Just [Emph [Str [c,'\x0304']]] -- bar
+ '\x00B4' -> Just [Emph [Str [c,'\x0301']]] -- acute
+ '\x0060' -> Just [Emph [Str [c,'\x0300']]] -- grave
+ '\x02D8' -> Just [Emph [Str [c,'\x0306']]] -- breve
+ '\x02C7' -> Just [Emph [Str [c,'\x030C']]] -- check
+ '.' -> Just [Emph [Str [c,'\x0307']]] -- dot
+ '\x00B0' -> Just [Emph [Str [c,'\x030A']]] -- ring
+ '\x20D7' -> Just [Emph [Str [c,'\x20D7']]] -- arrow right
+ '\x20D6' -> Just [Emph [Str [c,'\x20D6']]] -- arrow left
+ '\x005E' -> Just [Emph [Str [c,'\x0302']]] -- hat
+ '\x0302' -> Just [Emph [Str [c,'\x0302']]] -- hat
+ '~' -> Just [Emph [Str [c,'\x0303']]] -- tilde
+ _ -> Nothing
expToInlines _ = Nothing
+
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
new file mode 100644
index 000000000..19357b343
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -0,0 +1,523 @@
+{-
+Copyright (C) 2010 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Textile
+ Copyright : Copyright (C) 2010-2011 Paul Rivier and John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Paul Rivier <paul*rivier#demotera*com>
+ Stability : alpha
+ Portability : portable
+
+Conversion from Textile to 'Pandoc' document, based on the spec
+available at http://redcloth.org/textile.
+
+Implemented and parsed:
+ - Paragraphs
+ - Code blocks
+ - Lists
+ - blockquote
+ - Inlines : strong, emph, cite, code, deleted, superscript,
+ subscript, links
+ - footnotes
+
+Implemented but discarded:
+ - HTML-specific and CSS-specific attributes
+
+Left to be implemented:
+ - dimension sign
+ - all caps
+ - continued blocks (ex bq..)
+
+TODO : refactor common patterns across readers :
+ - autolink
+ - smartPunctuation
+ - more ...
+
+-}
+
+
+module Text.Pandoc.Readers.Textile ( readTextile) where
+
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Parsing
+import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag )
+import Text.ParserCombinators.Parsec
+import Text.HTML.TagSoup.Match
+import Data.Char ( digitToInt, isLetter )
+import Control.Monad ( guard, liftM )
+
+-- | Parse a Textile text and return a Pandoc document.
+readTextile :: ParserState -- ^ Parser state, including options for parser
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readTextile state s = (readWith parseTextile) state (s ++ "\n\n")
+
+
+--
+-- Constants and data structure definitions
+--
+
+-- | Special chars border strings parsing
+specialChars :: [Char]
+specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%()"
+
+-- | Generate a Pandoc ADT from a textile document
+parseTextile :: GenParser Char ParserState Pandoc
+parseTextile = do
+ -- textile allows raw HTML and does smart punctuation by default
+ updateState (\state -> state { stateParseRaw = True, stateSmart = True })
+ many blankline
+ startPos <- getPosition
+ -- go through once just to get list of reference keys and notes
+ -- docMinusKeys is the raw document with blanks where the keys/notes were...
+ let firstPassParser = noteBlock <|> lineClump
+ manyTill firstPassParser eof >>= setInput . concat
+ setPosition startPos
+ st' <- getState
+ let reversedNotes = stateNotes st'
+ updateState $ \s -> s { stateNotes = reverse reversedNotes }
+ -- now parse it for real...
+ blocks <- parseBlocks
+ return $ Pandoc (Meta [] [] []) blocks -- FIXME
+
+noteMarker :: GenParser Char ParserState [Char]
+noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
+
+noteBlock :: GenParser Char ParserState [Char]
+noteBlock = try $ do
+ startPos <- getPosition
+ ref <- noteMarker
+ optional blankline
+ contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
+ endPos <- getPosition
+ let newnote = (ref, contents ++ "\n")
+ st <- getState
+ let oldnotes = stateNotes st
+ updateState $ \s -> s { stateNotes = newnote : oldnotes }
+ -- return blanks so line count isn't affected
+ return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+
+-- | Parse document blocks
+parseBlocks :: GenParser Char ParserState [Block]
+parseBlocks = manyTill block eof
+
+-- | Block parsers list tried in definition order
+blockParsers :: [GenParser Char ParserState Block]
+blockParsers = [ codeBlock
+ , header
+ , blockQuote
+ , hrule
+ , anyList
+ , rawHtmlBlock
+ , maybeExplicitBlock "table" table
+ , maybeExplicitBlock "p" para
+ , nullBlock ]
+
+-- | Any block in the order of definition of blockParsers
+block :: GenParser Char ParserState Block
+block = choice blockParsers <?> "block"
+
+codeBlock :: GenParser Char ParserState Block
+codeBlock = codeBlockBc <|> codeBlockPre
+
+codeBlockBc :: GenParser Char ParserState Block
+codeBlockBc = try $ do
+ string "bc. "
+ contents <- manyTill anyLine blanklines
+ return $ CodeBlock ("",[],[]) $ unlines contents
+
+-- | Code Blocks in Textile are between <pre> and </pre>
+codeBlockPre :: GenParser Char ParserState Block
+codeBlockPre = try $ do
+ htmlTag (tagOpen (=="pre") null)
+ result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak)
+ -- drop leading newline if any
+ let result'' = case result' of
+ '\n':xs -> xs
+ _ -> result'
+ -- drop trailing newline if any
+ let result''' = case reverse result'' of
+ '\n':_ -> init result''
+ _ -> result''
+ return $ CodeBlock ("",[],[]) result'''
+
+-- | Header of the form "hN. content" with N in 1..6
+header :: GenParser Char ParserState Block
+header = try $ do
+ char 'h'
+ level <- oneOf "123456" >>= return . digitToInt
+ optional attributes
+ char '.'
+ whitespace
+ name <- manyTill inline blockBreak
+ return $ Header level (normalizeSpaces name)
+
+-- | Blockquote of the form "bq. content"
+blockQuote :: GenParser Char ParserState Block
+blockQuote = try $ do
+ string "bq"
+ optional attributes
+ char '.'
+ whitespace
+ para >>= return . BlockQuote . (:[])
+
+-- Horizontal rule
+
+hrule :: GenParser Char st Block
+hrule = try $ do
+ skipSpaces
+ start <- oneOf "-*"
+ count 2 (skipSpaces >> char start)
+ skipMany (spaceChar <|> char start)
+ newline
+ optional blanklines
+ return HorizontalRule
+
+-- Lists handling
+
+-- | Can be a bullet list or an ordered list. This implementation is
+-- strict in the nesting, sublist must start at exactly "parent depth
+-- plus one"
+anyList :: GenParser Char ParserState Block
+anyList = try $ do
+ l <- anyListAtDepth 1
+ blanklines
+ return l
+
+-- | This allow one type of list to be nested into an other type,
+-- provided correct nesting
+anyListAtDepth :: Int -> GenParser Char ParserState Block
+anyListAtDepth depth = choice [ bulletListAtDepth depth,
+ orderedListAtDepth depth,
+ definitionList ]
+
+-- | Bullet List of given depth, depth being the number of leading '*'
+bulletListAtDepth :: Int -> GenParser Char ParserState Block
+bulletListAtDepth depth = try $ do
+ items <- many1 (bulletListItemAtDepth depth)
+ return (BulletList items)
+
+-- | Bullet List Item of given depth, depth being the number of
+-- leading '*'
+bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+bulletListItemAtDepth depth = try $ do
+ count depth (char '*')
+ optional attributes
+ whitespace
+ p <- inlines >>= return . Plain
+ sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
+ return (p:sublist)
+
+-- | Ordered List of given depth, depth being the number of
+-- leading '#'
+orderedListAtDepth :: Int -> GenParser Char ParserState Block
+orderedListAtDepth depth = try $ do
+ items <- many1 (orderedListItemAtDepth depth)
+ return (OrderedList (1, DefaultStyle, DefaultDelim) items)
+
+-- | Ordered List Item of given depth, depth being the number of
+-- leading '#'
+orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block]
+orderedListItemAtDepth depth = try $ do
+ count depth (char '#')
+ optional attributes
+ whitespace
+ p <- inlines >>= return . Plain
+ sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[]))
+ return (p:sublist)
+
+-- | A definition list is a set of consecutive definition items
+definitionList :: GenParser Char ParserState Block
+definitionList = try $ do
+ items <- many1 definitionListItem
+ return $ DefinitionList items
+
+-- | A definition list item in textile begins with '- ', followed by
+-- the term defined, then spaces and ":=". The definition follows, on
+-- the same single line, or spaned on multiple line, after a line
+-- break.
+definitionListItem :: GenParser Char ParserState ([Inline], [[Block]])
+definitionListItem = try $ do
+ string "- "
+ term <- many1Till inline (try (whitespace >> string ":="))
+ def <- inlineDef <|> multilineDef
+ return (term, def)
+ where inlineDef :: GenParser Char ParserState [[Block]]
+ inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines)
+ multilineDef :: GenParser Char ParserState [[Block]]
+ multilineDef = try $ do
+ optional whitespace >> newline
+ s <- many1Till anyChar (try (string "=:" >> newline))
+ -- this ++ "\n\n" does not look very good
+ ds <- parseFromString parseBlocks (s ++ "\n\n")
+ return [ds]
+
+-- | This terminates a block such as a paragraph. Because of raw html
+-- blocks support, we have to lookAhead for a rawHtmlBlock.
+blockBreak :: GenParser Char ParserState ()
+blockBreak = try (newline >> blanklines >> return ()) <|>
+ (lookAhead rawHtmlBlock >> return ())
+
+-- | A raw Html Block, optionally followed by blanklines
+rawHtmlBlock :: GenParser Char ParserState Block
+rawHtmlBlock = try $ do
+ (_,b) <- htmlTag isBlockTag
+ optional blanklines
+ return $ RawBlock "html" b
+
+-- | In textile, paragraphs are separated by blank lines.
+para :: GenParser Char ParserState Block
+para = try $ do
+ content <- manyTill inline blockBreak
+ return $ Para $ normalizeSpaces content
+
+
+-- Tables
+
+-- | A table cell spans until a pipe |
+tableCell :: GenParser Char ParserState TableCell
+tableCell = do
+ c <- many1 (noneOf "|\n")
+ content <- parseFromString (many1 inline) c
+ return $ [ Plain $ normalizeSpaces content ]
+
+-- | A table row is made of many table cells
+tableRow :: GenParser Char ParserState [TableCell]
+tableRow = try $ do
+ char '|'
+ cells <- endBy1 tableCell (char '|')
+ newline
+ return cells
+
+-- | Many table rows
+tableRows :: GenParser Char ParserState [[TableCell]]
+tableRows = many1 tableRow
+
+-- | Table headers are made of cells separated by a tag "|_."
+tableHeaders :: GenParser Char ParserState [TableCell]
+tableHeaders = try $ do
+ let separator = (try $ string "|_.")
+ separator
+ headers <- sepBy1 tableCell separator
+ char '|'
+ newline
+ return headers
+
+-- | A table with an optional header. Current implementation can
+-- handle tables with and without header, but will parse cells
+-- alignment attributes as content.
+table :: GenParser Char ParserState Block
+table = try $ do
+ headers <- option [] tableHeaders
+ rows <- tableRows
+ blanklines
+ let nbOfCols = max (length headers) (length $ head rows)
+ return $ Table []
+ (replicate nbOfCols AlignDefault)
+ (replicate nbOfCols 0.0)
+ headers
+ rows
+
+
+-- | Blocks like 'p' and 'table' do not need explicit block tag.
+-- However, they can be used to set HTML/CSS attributes when needed.
+maybeExplicitBlock :: String -- ^ block tag name
+ -> GenParser Char ParserState Block -- ^ implicit block
+ -> GenParser Char ParserState Block
+maybeExplicitBlock name blk = try $ do
+ optional $ try $ string name >> optional attributes >> char '.' >>
+ ((try whitespace) <|> endline)
+ blk
+
+
+
+----------
+-- Inlines
+----------
+
+
+-- | Any inline element
+inline :: GenParser Char ParserState Inline
+inline = choice inlineParsers <?> "inline"
+
+-- | List of consecutive inlines before a newline
+inlines :: GenParser Char ParserState [Inline]
+inlines = manyTill inline newline
+
+-- | Inline parsers tried in order
+inlineParsers :: [GenParser Char ParserState Inline]
+inlineParsers = [ autoLink
+ , str
+ , whitespace
+ , endline
+ , code
+ , htmlSpan
+ , rawHtmlInline
+ , note
+ , simpleInline (string "??") (Cite [])
+ , simpleInline (string "**") Strong
+ , simpleInline (string "__") Emph
+ , simpleInline (char '*') Strong
+ , simpleInline (char '_') Emph
+ , simpleInline (char '-') Strikeout
+ , simpleInline (char '^') Superscript
+ , simpleInline (char '~') Subscript
+ , link
+ , image
+ , mark
+ , smartPunctuation inline
+ , symbol
+ ]
+
+-- | Trademark, registered, copyright
+mark :: GenParser Char st Inline
+mark = try $ char '(' >> (try tm <|> try reg <|> copy)
+
+reg :: GenParser Char st Inline
+reg = do
+ oneOf "Rr"
+ char ')'
+ return $ Str "\174"
+
+tm :: GenParser Char st Inline
+tm = do
+ oneOf "Tt"
+ oneOf "Mm"
+ char ')'
+ return $ Str "\8482"
+
+copy :: GenParser Char st Inline
+copy = do
+ oneOf "Cc"
+ char ')'
+ return $ Str "\169"
+
+note :: GenParser Char ParserState Inline
+note = try $ do
+ char '['
+ ref <- many1 digit
+ char ']'
+ state <- getState
+ let notes = stateNotes state
+ case lookup ref notes of
+ Nothing -> fail "note not found"
+ Just raw -> liftM Note $ parseFromString parseBlocks raw
+
+-- | Any string
+str :: GenParser Char ParserState Inline
+str = do
+ xs <- many1 (noneOf (specialChars ++ "\t\n "))
+ optional $ try $ do
+ lookAhead (char '(')
+ notFollowedBy' mark
+ getInput >>= setInput . (' ':) -- add space before acronym explanation
+ -- parse a following hyphen if followed by a letter
+ -- (this prevents unwanted interpretation as starting a strikeout section)
+ result <- option xs $ try $ do
+ char '-'
+ next <- lookAhead letter
+ guard $ isLetter (last xs) || isLetter next
+ return $ xs ++ "-"
+ return $ Str result
+
+-- | Textile allows HTML span infos, we discard them
+htmlSpan :: GenParser Char ParserState Inline
+htmlSpan = try $ do
+ char '%'
+ _ <- attributes
+ content <- manyTill anyChar (char '%')
+ return $ Str content
+
+-- | Some number of space chars
+whitespace :: GenParser Char ParserState Inline
+whitespace = many1 spaceChar >> return Space <?> "whitespace"
+
+-- | In Textile, an isolated endline character is a line break
+endline :: GenParser Char ParserState Inline
+endline = try $ do
+ newline >> notFollowedBy blankline
+ return LineBreak
+
+rawHtmlInline :: GenParser Char ParserState Inline
+rawHtmlInline = liftM (RawInline "html" . snd)
+ $ htmlTag isInlineTag
+
+-- | Textile standard link syntax is "label":target
+link :: GenParser Char ParserState Inline
+link = try $ do
+ name <- surrounded (char '"') inline
+ char ':'
+ url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline))))
+ return $ Link name (url, "")
+
+-- | Detect plain links to http or email.
+autoLink :: GenParser Char ParserState Inline
+autoLink = do
+ (orig, src) <- (try uri <|> try emailAddress)
+ return $ Link [Str orig] (src, "")
+
+-- | image embedding
+image :: GenParser Char ParserState Inline
+image = try $ do
+ char '!' >> notFollowedBy space
+ src <- manyTill anyChar (lookAhead $ oneOf "!(")
+ alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')')))
+ char '!'
+ return $ Image [Str alt] (src, alt)
+
+-- | Any special symbol defined in specialChars
+symbol :: GenParser Char ParserState Inline
+symbol = do
+ result <- oneOf specialChars
+ return $ Str [result]
+
+-- | Inline code
+code :: GenParser Char ParserState Inline
+code = code1 <|> code2
+
+code1 :: GenParser Char ParserState Inline
+code1 = surrounded (char '@') anyChar >>= return . Code nullAttr
+
+code2 :: GenParser Char ParserState Inline
+code2 = do
+ htmlTag (tagOpen (=="tt") null)
+ result' <- manyTill anyChar (try $ htmlTag $ tagClose (=="tt"))
+ return $ Code nullAttr result'
+
+-- | Html / CSS attributes
+attributes :: GenParser Char ParserState String
+attributes = choice [ enclosed (char '(') (char ')') anyChar,
+ enclosed (char '{') (char '}') anyChar,
+ enclosed (char '[') (char ']') anyChar]
+
+-- | Parses material surrounded by a parser.
+surrounded :: GenParser Char st t -- ^ surrounding parser
+ -> GenParser Char st a -- ^ content parser (to be used repeatedly)
+ -> GenParser Char st [a]
+surrounded border = enclosed border border
+
+-- | Inlines are most of the time of the same form
+simpleInline :: GenParser Char ParserState t -- ^ surrounding parser
+ -> ([Inline] -> Inline) -- ^ Inline constructor
+ -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly)
+simpleInline border construct = surrounded border (inlineWithAttribute) >>=
+ return . construct . normalizeSpaces
+ where inlineWithAttribute = (try $ optional attributes) >> inline
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 633708046..b1d5de63f 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -46,17 +46,11 @@ module Text.Pandoc.Shared (
escapeURI,
unescapeURI,
tabFilter,
- -- * Prettyprinting
- wrapped,
- wrapIfNeeded,
- wrappedTeX,
- wrapTeXIfNeeded,
- BlockWrapper (..),
- wrappedBlocksToDoc,
- hang',
-- * Pandoc block and inline list processing
orderedListMarkers,
normalizeSpaces,
+ normalize,
+ stringify,
compactify,
Element (..),
hierarchicalize,
@@ -65,19 +59,20 @@ module Text.Pandoc.Shared (
headerShift,
-- * Writer options
HTMLMathMethod (..),
+ CiteMethod (..),
ObfuscationMethod (..),
HTMLSlideVariant (..),
WriterOptions (..),
defaultWriterOptions,
-- * File handling
inDirectory,
+ findDataFile,
readDataFile
) where
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (readFile)
-import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
-import qualified Text.PrettyPrint.HughesPJ as PP
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
isLetter, isDigit )
import Data.List ( find, isPrefixOf, intercalate )
@@ -94,12 +89,12 @@ import Paths_pandoc (getDataFileName)
--
-- | Split list by groups of one or more sep.
-splitBy :: (Eq a) => a -> [a] -> [[a]]
+splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
-splitBy sep lst =
- let (first, rest) = break (== sep) lst
- rest' = dropWhile (== sep) rest
- in first:(splitBy sep rest')
+splitBy isSep lst =
+ let (first, rest) = break isSep lst
+ rest' = dropWhile isSep rest
+ in first:(splitBy isSep rest')
-- | Split list into chunks divided at specified indices.
splitByIndices :: [Int] -> [a] -> [[a]]
@@ -218,83 +213,6 @@ tabFilter tabStop =
in go tabStop
--
--- Prettyprinting
---
-
--- | Wrap inlines to line length.
-wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc
-wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>=
- return . fsep
-
--- | Wrap inlines if the text wrap option is selected.
-wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) ->
- [Inline] -> m Doc
-wrapIfNeeded opts = if writerWrapText opts
- then wrapped
- else ($)
-
--- auxiliary function for wrappedTeX
-isNote :: Inline -> Bool
-isNote (Note _) = True
-isNote _ = False
-
--- | Wrap inlines to line length, treating footnotes in a way that
--- makes sense in LaTeX and ConTeXt.
-wrappedTeX :: Monad m
- => Bool
- -> ([Inline] -> m Doc)
- -> [Inline]
- -> m Doc
-wrappedTeX includePercent listWriter sect = do
- let (firstpart, rest) = break isNote sect
- firstpartWrapped <- wrapped listWriter firstpart
- if null rest
- then return firstpartWrapped
- else do let (note:rest') = rest
- let (rest1, rest2) = break (== Space) rest'
- -- rest1 is whatever comes between the note and a Space.
- -- if the note is followed directly by a Space, rest1 is null.
- -- rest1 is printed after the note but before the line break,
- -- to avoid spurious blank space the note and immediately
- -- following punctuation.
- rest1Out <- if null rest1
- then return empty
- else listWriter rest1
- rest2Wrapped <- if null rest2
- then return empty
- else wrappedTeX includePercent listWriter (tail rest2)
- noteText <- listWriter [note]
- return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$
- (noteText <> rest1Out) $$
- rest2Wrapped
-
--- | Wrap inlines if the text wrap option is selected, specialized
--- for LaTeX and ConTeXt.
-wrapTeXIfNeeded :: Monad m
- => WriterOptions
- -> Bool
- -> ([Inline] -> m Doc)
- -> [Inline]
- -> m Doc
-wrapTeXIfNeeded opts includePercent = if writerWrapText opts
- then wrappedTeX includePercent
- else ($)
-
--- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@).
-data BlockWrapper = Pad Doc | Reg Doc
-
--- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks.
-wrappedBlocksToDoc :: [BlockWrapper] -> Doc
-wrappedBlocksToDoc = foldr addBlock empty
- where addBlock (Pad d) accum | isEmpty accum = d
- addBlock (Pad d) accum = d $$ text "" $$ accum
- addBlock (Reg d) accum = d $$ accum
-
--- | A version of hang that works like the version in pretty-1.0.0.0
-hang' :: Doc -> Int -> Doc -> Doc
-hang' d1 n d2 = d1 $$ (nest n d2)
-
---
-- Pandoc block and inline list processing
--
@@ -324,20 +242,96 @@ orderedListMarkers (start, numstyle, numdelim) =
-- @Space@ elements, collapse double @Space@s into singles, and
-- remove empty Str elements.
normalizeSpaces :: [Inline] -> [Inline]
-normalizeSpaces [] = []
-normalizeSpaces list =
- let removeDoubles [] = []
- removeDoubles (Space:Space:rest) = removeDoubles (Space:rest)
- removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest)
- removeDoubles ((Str ""):rest) = removeDoubles rest
- removeDoubles (x:rest) = x:(removeDoubles rest)
- removeLeading (Space:xs) = removeLeading xs
- removeLeading x = x
- removeTrailing [] = []
- removeTrailing lst = if (last lst == Space)
- then init lst
- else lst
- in removeLeading $ removeTrailing $ removeDoubles list
+normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty
+ where isSpaceOrEmpty Space = True
+ isSpaceOrEmpty (Str "") = True
+ isSpaceOrEmpty _ = False
+ cleanup [] = []
+ cleanup (Space:rest) = let rest' = dropWhile isSpaceOrEmpty rest
+ in case rest' of
+ [] -> []
+ _ -> Space : cleanup rest'
+ cleanup ((Str ""):rest) = cleanup rest
+ cleanup (x:rest) = x : cleanup rest
+
+-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
+-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
+-- empty elements, etc.
+normalize :: (Eq a, Data a) => a -> a
+normalize = topDown removeEmptyBlocks .
+ topDown consolidateInlines .
+ bottomUp removeEmptyInlines
+
+removeEmptyBlocks :: [Block] -> [Block]
+removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs
+removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs
+removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs
+removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs
+removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs
+removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs
+removeEmptyBlocks [] = []
+
+removeEmptyInlines :: [Inline] -> [Inline]
+removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs
+removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs
+removeEmptyInlines (x : xs) = x : removeEmptyInlines xs
+removeEmptyInlines [] = []
+
+consolidateInlines :: [Inline] -> [Inline]
+consolidateInlines (Str x : ys) =
+ case concat (x : map fromStr strs) of
+ "" -> consolidateInlines rest
+ n -> Str n : consolidateInlines rest
+ where
+ (strs, rest) = span isStr ys
+ isStr (Str _) = True
+ isStr _ = False
+ fromStr (Str z) = z
+ fromStr _ = error "consolidateInlines - fromStr - not a Str"
+consolidateInlines (Space : ys) = Space : rest
+ where isSpace Space = True
+ isSpace _ = False
+ rest = consolidateInlines $ dropWhile isSpace ys
+consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $
+ Emph (xs ++ ys) : zs
+consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $
+ Strong (xs ++ ys) : zs
+consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $
+ Subscript (xs ++ ys) : zs
+consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $
+ Superscript (xs ++ ys) : zs
+consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $
+ SmallCaps (xs ++ ys) : zs
+consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $
+ Strikeout (xs ++ ys) : zs
+consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' =
+ consolidateInlines $ RawInline f (x ++ y) : zs
+consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 =
+ consolidateInlines $ Code a1 (x ++ y) : zs
+consolidateInlines (x : xs) = x : consolidateInlines xs
+consolidateInlines [] = []
+
+-- | Convert list of inlines to a string with formatting removed.
+stringify :: [Inline] -> String
+stringify = queryWith go
+ where go :: Inline -> [Char]
+ go Space = " "
+ go (Str x) = x
+ go (Code _ x) = x
+ go (Math _ x) = x
+ go EmDash = "--"
+ go EnDash = "-"
+ go Apostrophe = "'"
+ go Ellipses = "..."
+ go LineBreak = " "
+ go _ = ""
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
@@ -370,32 +364,12 @@ data Element = Blk Block
-- letters, digits, and the characters _-.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier =
- dropWhile (not . isAlpha) . intercalate "-" . words . map toLower .
- filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
- concatMap extractText
- where extractText x = case x of
- Str s -> s
- Emph lst -> concatMap extractText lst
- Strikeout lst -> concatMap extractText lst
- Superscript lst -> concatMap extractText lst
- SmallCaps lst -> concatMap extractText lst
- Subscript lst -> concatMap extractText lst
- Strong lst -> concatMap extractText lst
- Quoted _ lst -> concatMap extractText lst
- Cite _ lst -> concatMap extractText lst
- Code s -> s
- Space -> " "
- EmDash -> "---"
- EnDash -> "--"
- Apostrophe -> ""
- Ellipses -> "..."
- LineBreak -> " "
- Math _ s -> s
- TeX _ -> ""
- HtmlInline _ -> ""
- Link lst _ -> concatMap extractText lst
- Image lst _ -> concatMap extractText lst
- Note _ -> ""
+ dropWhile (not . isAlpha) . intercalate "-" . words .
+ map (nbspToSp . toLower) .
+ filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") .
+ stringify
+ where nbspToSp '\160' = ' '
+ nbspToSp x = x
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
@@ -444,7 +418,7 @@ isHeaderBlock _ = False
-- | Shift header levels up or down.
headerShift :: Int -> Pandoc -> Pandoc
-headerShift n = processWith shift
+headerShift n = bottomUp shift
where shift :: Block -> Block
shift (Header level inner) = Header (level + n) inner
shift x = x
@@ -459,8 +433,14 @@ data HTMLMathMethod = PlainMath
| GladTeX
| WebTeX String -- url of TeX->image script.
| MathML (Maybe String) -- url of MathMLinHTML.js
+ | MathJax String -- url of MathJax.js
deriving (Show, Read, Eq)
+data CiteMethod = Citeproc -- use citeproc to render them
+ | Natbib -- output natbib cite commands
+ | Biblatex -- output biblatex cite commands
+ deriving (Show, Read, Eq)
+
-- | Methods for obfuscating email addresses in HTML.
data ObfuscationMethod = NoObfuscation
| ReferenceObfuscation
@@ -491,11 +471,17 @@ data WriterOptions = WriterOptions
, writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
, writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, writerWrapText :: Bool -- ^ Wrap text to line length
+ , writerColumns :: Int -- ^ Characters in a line (for text wrapping)
, writerLiterateHaskell :: Bool -- ^ Write as literate haskell
, writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
, writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML
, writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file
, writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory
+ , writerCiteMethod :: CiteMethod -- ^ How to print cites
+ , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations
+ , writerHtml5 :: Bool -- ^ Produce HTML5
+ , writerChapters :: Bool -- ^ Use "chapter" for top-level sects
+ , writerListings :: Bool -- ^ Use listings package for code
} deriving Show
-- | Default writer options.
@@ -517,11 +503,17 @@ defaultWriterOptions =
, writerStrictMarkdown = False
, writerReferenceLinks = False
, writerWrapText = True
+ , writerColumns = 72
, writerLiterateHaskell = False
, writerEmailObfuscation = JavascriptObfuscation
, writerIdentifierPrefix = ""
, writerSourceDirectory = "."
, writerUserDataDir = Nothing
+ , writerCiteMethod = Citeproc
+ , writerBiblioFiles = []
+ , writerHtml5 = False
+ , writerChapters = False
+ , writerListings = False
}
--
@@ -537,11 +529,17 @@ inDirectory path action = do
setCurrentDirectory oldDir
return result
+-- | Get file path for data file, either from specified user data directory,
+-- or, if not found there, from Cabal data directory.
+findDataFile :: Maybe FilePath -> FilePath -> IO FilePath
+findDataFile Nothing f = getDataFileName f
+findDataFile (Just u) f = do
+ ex <- doesFileExist (u </> f)
+ if ex
+ then return (u </> f)
+ else getDataFileName f
+
-- | Read file from specified user data directory or, if not found there, from
-- Cabal data directory.
readDataFile :: Maybe FilePath -> FilePath -> IO String
-readDataFile userDir fname =
- case userDir of
- Nothing -> getDataFileName fname >>= UTF8.readFile
- Just u -> catch (UTF8.readFile $ u </> fname)
- (\_ -> getDataFileName fname >>= UTF8.readFile)
+readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile
diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs
index c8ddc3abf..b03e8c73f 100644
--- a/src/Text/Pandoc/Templates.hs
+++ b/src/Text/Pandoc/Templates.hs
@@ -72,7 +72,6 @@ import Text.ParserCombinators.Parsec
import Control.Monad (liftM, when, forM)
import System.FilePath
import Data.List (intercalate, intersperse)
-import Text.PrettyPrint (text, Doc)
import Text.XHtml (primHtml, Html)
import Data.ByteString.Lazy.UTF8 (ByteString, fromString)
import Text.Pandoc.Shared (readDataFile)
@@ -112,9 +111,6 @@ instance TemplateTarget ByteString where
instance TemplateTarget Html where
toTarget = primHtml
-instance TemplateTarget Doc where
- toTarget = text
-
-- | Renders a template
renderTemplate :: TemplateTarget a
=> [(String,String)] -- ^ Assoc. list of values for variables
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index 96d6e6218..a77f92cdc 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
-UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12.
+UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
-}
module Text.Pandoc.UTF8 ( readFile
, writeFile
@@ -34,15 +34,54 @@ module Text.Pandoc.UTF8 ( readFile
, putStrLn
, hPutStr
, hPutStrLn
+ , hGetContents
)
where
+
+#if MIN_VERSION_base(4,2,0)
+
+import System.IO hiding (readFile, writeFile, getContents,
+ putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
+import qualified System.IO as IO
+
+readFile :: FilePath -> IO String
+readFile f = do
+ h <- openFile f ReadMode
+ hGetContents h
+
+writeFile :: FilePath -> String -> IO ()
+writeFile f s = withFile f WriteMode $ \h -> hPutStr h s
+
+getContents :: IO String
+getContents = hGetContents stdin
+
+putStr :: String -> IO ()
+putStr s = hPutStr stdout s
+
+putStrLn :: String -> IO ()
+putStrLn s = hPutStrLn stdout s
+
+hPutStr :: Handle -> String -> IO ()
+hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s
+
+hPutStrLn :: Handle -> String -> IO ()
+hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
+
+hGetContents :: Handle -> IO String
+hGetContents h = hSetEncoding h utf8_bom >> IO.hGetContents h
+
+#else
+
import qualified Data.ByteString as B
+import Codec.Binary.UTF8.String (encodeString)
import Data.ByteString.UTF8 (toString, fromString)
import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn)
import System.IO (Handle)
import Control.Monad (liftM)
+
bom :: B.ByteString
bom = B.pack [0xEF, 0xBB, 0xBF]
@@ -51,14 +90,17 @@ stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s
stripBOM s = s
readFile :: FilePath -> IO String
-readFile = liftM (toString . stripBOM) . B.readFile
+readFile = liftM (toString . stripBOM) . B.readFile . encodeString
writeFile :: FilePath -> String -> IO ()
-writeFile f = B.writeFile f . fromString
+writeFile f = B.writeFile (encodeString f) . fromString
getContents :: IO String
getContents = liftM (toString . stripBOM) B.getContents
+hGetContents :: Handle -> IO String
+hGetContents h = liftM (toString . stripBOM) (B.hGetContents h)
+
putStr :: String -> IO ()
putStr = B.putStr . fromString
@@ -70,3 +112,5 @@ hPutStr h = B.hPutStr h . fromString
hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hPutStr h (s ++ "\n")
+
+#endif
diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs
index 395bc2d30..0f6e00a3b 100644
--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu>
@@ -31,9 +32,9 @@ module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Printf ( printf )
-import Data.List ( isSuffixOf, intercalate, intersperse )
+import Data.List ( intercalate )
import Control.Monad.State
-import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Text.Pandoc.Pretty
import Text.Pandoc.Templates ( renderTemplate )
data WriterState =
@@ -56,15 +57,18 @@ writeConTeXt options document =
pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String
pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do
+ let colwidth = if writerWrapText options
+ then Just $ writerColumns options
+ else Nothing
titletext <- if null title
then return ""
- else liftM render $ inlineListToConTeXt title
- authorstext <- mapM (liftM render . inlineListToConTeXt) authors
+ else liftM (render colwidth) $ inlineListToConTeXt title
+ authorstext <- mapM (liftM (render colwidth) . inlineListToConTeXt) authors
datetext <- if null date
then return ""
- else liftM render $ inlineListToConTeXt date
- body <- blockListToConTeXt blocks
- let main = render $ body $$ text ""
+ else liftM (render colwidth) $ inlineListToConTeXt date
+ body <- blockListToConTeXt blocks
+ let main = render colwidth $ body
let context = writerVariables options ++
[ ("toc", if writerTableOfContents options then "yes" else "")
, ("body", main)
@@ -92,6 +96,8 @@ escapeCharForConTeXt ch =
'#' -> "\\#"
'<' -> "\\letterless{}"
'>' -> "\\lettermore{}"
+ '[' -> "{[}"
+ ']' -> "{]}"
'_' -> "\\letterunderscore{}"
'\160' -> "~"
x -> [x]
@@ -102,32 +108,27 @@ stringToConTeXt = concatMap escapeCharForConTeXt
-- | Convert Pandoc block element to ConTeXt.
blockToConTeXt :: Block
- -> State WriterState BlockWrapper
-blockToConTeXt Null = return $ Reg empty
-blockToConTeXt (Plain lst) = do
- st <- get
- let options = stOptions st
- contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst
- return $ Reg contents
+ -> State WriterState Doc
+blockToConTeXt Null = return empty
+blockToConTeXt (Plain lst) = inlineListToConTeXt lst
blockToConTeXt (Para [Image txt (src,_)]) = do
capt <- inlineListToConTeXt txt
- return $ Pad $ text "\\placefigure[here,nonumber]{" <> capt <>
- text "}{\\externalfigure[" <> text src <> text "]}"
+ return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <>
+ braces ("\\externalfigure" <> brackets (text src)) <> blankline
blockToConTeXt (Para lst) = do
- st <- get
- let options = stOptions st
- contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst
- return $ Pad contents
+ contents <- inlineListToConTeXt lst
+ return $ contents <> blankline
blockToConTeXt (BlockQuote lst) = do
contents <- blockListToConTeXt lst
- return $ Pad $ text "\\startblockquote" $$ contents $$ text "\\stopblockquote"
-blockToConTeXt (CodeBlock _ str) =
- return $ Reg $ text $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n"
- -- \n because \stoptyping can't have anything after it, inc. }
-blockToConTeXt (RawHtml _) = return $ Reg empty
-blockToConTeXt (BulletList lst) = do
+ return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline
+blockToConTeXt (CodeBlock _ str) =
+ return $ "\\starttyping" <> cr <> flush (text str) <> cr <> "\\stoptyping" $$ blankline
+ -- blankline because \stoptyping can't have anything after it, inc. '}'
+blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
+blockToConTeXt (RawBlock _ _ ) = return empty
+blockToConTeXt (BulletList lst) = do
contents <- mapM listItemToConTeXt lst
- return $ Pad $ text "\\startitemize" $$ vcat contents $$ text "\\stopitemize"
+ return $ "\\startitemize" $$ vcat contents $$ text "\\stopitemize" <> blankline
blockToConTeXt (OrderedList (start, style', delim) lst) = do
st <- get
let level = stOrderedListLevel st
@@ -159,20 +160,23 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do
LowerAlpha -> "[a]"
UpperAlpha -> "[A]"
let specs = style'' ++ specs2
- return $ Pad $ text ("\\startitemize" ++ specs) $$ vcat contents $$
- text "\\stopitemize"
+ return $ "\\startitemize" <> text specs $$ vcat contents $$
+ "\\stopitemize" <> blankline
blockToConTeXt (DefinitionList lst) =
- mapM defListItemToConTeXt lst >>= return . Pad . wrappedBlocksToDoc
-blockToConTeXt HorizontalRule = return $ Pad $ text "\\thinrule"
+ liftM vcat $ mapM defListItemToConTeXt lst
+blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline
blockToConTeXt (Header level lst) = do
contents <- inlineListToConTeXt lst
st <- get
let opts = stOptions st
let base = if writerNumberSections opts then "section" else "subject"
- return $ Pad $ if level >= 1 && level <= 5
- then char '\\' <> text (concat (replicate (level - 1) "sub")) <>
- text base <> char '{' <> contents <> char '}'
- else contents
+ let level' = if writerChapters opts then level - 1 else level
+ return $ if level' >= 1 && level' <= 5
+ then char '\\' <> text (concat (replicate (level' - 1) "sub")) <>
+ text base <> char '{' <> contents <> char '}' <> blankline
+ else if level' == 0
+ then "\\chapter{" <> contents <> "}"
+ else contents <> blankline
blockToConTeXt (Table caption aligns widths heads rows) = do
let colDescriptor colWidth alignment = (case alignment of
AlignLeft -> 'l'
@@ -186,81 +190,87 @@ blockToConTeXt (Table caption aligns widths heads rows) = do
zipWith colDescriptor widths aligns)
headers <- if all null heads
then return empty
- else liftM ($$ text "\\HL") $ tableRowToConTeXt heads
+ else liftM ($$ "\\HL") $ tableRowToConTeXt heads
captionText <- inlineListToConTeXt caption
let captionText' = if null caption then text "none" else captionText
rows' <- mapM tableRowToConTeXt rows
- return $ Pad $ text "\\placetable[here]{" <> captionText' <> char '}' $$
- text "\\starttable[" <> text colDescriptors <> char ']' $$
- text "\\HL" $$ headers $$
- vcat rows' $$ text "\\HL\n\\stoptable"
+ return $ "\\placetable[here]" <> braces captionText' $$
+ "\\starttable" <> brackets (text colDescriptors) $$
+ "\\HL" $$ headers $$
+ vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline
tableRowToConTeXt :: [[Block]] -> State WriterState Doc
tableRowToConTeXt cols = do
cols' <- mapM blockListToConTeXt cols
- return $ (vcat (map (text "\\NC " <>) cols')) $$
- text "\\NC\\AR"
+ return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR"
listItemToConTeXt :: [Block] -> State WriterState Doc
listItemToConTeXt list = blockListToConTeXt list >>=
- return . (text "\\item" $$) . (nest 2)
+ return . ("\\item" $$) . (nest 2)
-defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState BlockWrapper
+defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc
defListItemToConTeXt (term, defs) = do
term' <- inlineListToConTeXt term
- def' <- liftM (vcat . intersperse (text "")) $ mapM blockListToConTeXt defs
- return $ Pad $ text "\\startdescr{" <> term' <> char '}' $$ def' $$ text "\\stopdescr"
+ def' <- liftM vsep $ mapM blockListToConTeXt defs
+ return $ "\\startdescr" <> braces term' $$ nest 2 def' $$
+ "\\stopdescr" <> blankline
-- | Convert list of block elements to ConTeXt.
blockListToConTeXt :: [Block] -> State WriterState Doc
-blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . wrappedBlocksToDoc
+blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst
-- | Convert list of inline elements to ConTeXt.
inlineListToConTeXt :: [Inline] -- ^ Inlines to convert
-> State WriterState Doc
-inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . hcat
+inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst
-- | Convert inline element to ConTeXt
inlineToConTeXt :: Inline -- ^ Inline to convert
-> State WriterState Doc
inlineToConTeXt (Emph lst) = do
contents <- inlineListToConTeXt lst
- return $ text "{\\em " <> contents <> char '}'
+ return $ braces $ "\\em " <> contents
inlineToConTeXt (Strong lst) = do
contents <- inlineListToConTeXt lst
- return $ text "{\\bf " <> contents <> char '}'
+ return $ braces $ "\\bf " <> contents
inlineToConTeXt (Strikeout lst) = do
contents <- inlineListToConTeXt lst
- return $ text "\\overstrikes{" <> contents <> char '}'
+ return $ "\\overstrikes" <> braces contents
inlineToConTeXt (Superscript lst) = do
contents <- inlineListToConTeXt lst
- return $ text "\\high{" <> contents <> char '}'
+ return $ "\\high" <> braces contents
inlineToConTeXt (Subscript lst) = do
contents <- inlineListToConTeXt lst
- return $ text "\\low{" <> contents <> char '}'
+ return $ "\\low" <> braces contents
inlineToConTeXt (SmallCaps lst) = do
contents <- inlineListToConTeXt lst
- return $ text "{\\sc " <> contents <> char '}'
-inlineToConTeXt (Code str) = return $ text $ "\\type{" ++ str ++ "}"
+ return $ braces $ "\\sc " <> contents
+inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) =
+ return $ "\\type" <> braces (text str)
+inlineToConTeXt (Code _ str) =
+ return $ "\\mono" <> braces (text $ stringToConTeXt str)
inlineToConTeXt (Quoted SingleQuote lst) = do
contents <- inlineListToConTeXt lst
- return $ text "\\quote{" <> contents <> char '}'
+ return $ "\\quote" <> braces contents
inlineToConTeXt (Quoted DoubleQuote lst) = do
contents <- inlineListToConTeXt lst
- return $ text "\\quotation{" <> contents <> char '}'
+ return $ "\\quotation" <> braces contents
inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst
inlineToConTeXt Apostrophe = return $ char '\''
-inlineToConTeXt EmDash = return $ text "---"
-inlineToConTeXt EnDash = return $ text "--"
-inlineToConTeXt Ellipses = return $ text "\\ldots{}"
+inlineToConTeXt EmDash = return "---"
+inlineToConTeXt EnDash = return "--"
+inlineToConTeXt Ellipses = return "\\ldots{}"
inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str
-inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$'
-inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula"
-inlineToConTeXt (TeX str) = return $ text str
-inlineToConTeXt (HtmlInline _) = return empty
-inlineToConTeXt (LineBreak) = return $ text "\\crlf\n"
-inlineToConTeXt Space = return $ char ' '
-inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own
+inlineToConTeXt (Math InlineMath str) =
+ return $ char '$' <> text str <> char '$'
+inlineToConTeXt (Math DisplayMath str) =
+ return $ text "\\startformula " <> text str <> text " \\stopformula"
+inlineToConTeXt (RawInline "context" str) = return $ text str
+inlineToConTeXt (RawInline "tex" str) = return $ text str
+inlineToConTeXt (RawInline _ _) = return empty
+inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr
+inlineToConTeXt Space = return space
+inlineToConTeXt (Link [Code _ str] (src, tit)) = -- since ConTeXt has its own
inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links...
inlineToConTeXt (Link txt (src, _)) = do
st <- get
@@ -268,15 +278,12 @@ inlineToConTeXt (Link txt (src, _)) = do
put $ st {stNextRef = next + 1}
let ref = show next
label <- inlineListToConTeXt txt
- return $ text "\\useURL[" <> text ref <> text "][" <> text src <>
- text "][][" <> label <> text "]\\from[" <> text ref <> char ']'
+ return $ "\\useURL" <> brackets (text ref) <> brackets (text src) <>
+ brackets empty <> brackets label <>
+ "\\from" <> brackets (text ref)
inlineToConTeXt (Image _ (src, _)) = do
- return $ text "{\\externalfigure[" <> text src <> text "]}"
+ return $ braces $ "\\externalfigure" <> brackets (text src)
inlineToConTeXt (Note contents) = do
contents' <- blockListToConTeXt contents
- let rawnote = stripTrailingNewlines $ render contents'
- -- note: a \n before } is needed when note ends with a \stoptyping
- let optNewline = "\\stoptyping" `isSuffixOf` rawnote
- return $ text "\\footnote{" <>
- text rawnote <> (if optNewline then char '\n' else empty) <> char '}'
-
+ return $ text "\\footnote{" <>
+ nest 2 contents' <> char '}'
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs
index 5223259eb..9d09d46e3 100644
--- a/src/Text/Pandoc/Writers/Docbook.hs
+++ b/src/Text/Pandoc/Writers/Docbook.hs
@@ -33,15 +33,15 @@ import Text.Pandoc.XML
import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Readers.TeXMath
-import Data.List ( isPrefixOf, intercalate )
+import Data.List ( isPrefixOf, intercalate, isSuffixOf )
import Data.Char ( toLower )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
import Text.Pandoc.Highlighting (languages, languagesByExtension)
+import Text.Pandoc.Pretty
-- | Convert list of authors to a docbook <author> section
authorToDocbook :: WriterOptions -> [Inline] -> Doc
authorToDocbook opts name' =
- let name = render $ inlinesToDocbook opts name'
+ let name = render Nothing $ inlinesToDocbook opts name'
in if ',' `elem` name
then -- last name first
let (lastname, rest) = break (==',') name
@@ -61,16 +61,24 @@ authorToDocbook opts name' =
-- | Convert Pandoc document to string in Docbook format.
writeDocbook :: WriterOptions -> Pandoc -> String
writeDocbook opts (Pandoc (Meta tit auths dat) blocks) =
- let title = wrap opts tit
+ let title = inlinesToDocbook opts tit
authors = map (authorToDocbook opts) auths
date = inlinesToDocbook opts dat
elements = hierarchicalize blocks
- main = render $ vcat (map (elementToDocbook opts) elements)
+ colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ render' = render colwidth
+ opts' = if "</book>" `isSuffixOf`
+ (removeTrailingSpace $ writerTemplate opts)
+ then opts{ writerChapters = True }
+ else opts
+ main = render' $ vcat (map (elementToDocbook opts') elements)
context = writerVariables opts ++
[ ("body", main)
- , ("title", render title)
- , ("date", render date) ] ++
- [ ("author", render a) | a <- authors ]
+ , ("title", render' title)
+ , ("date", render' date) ] ++
+ [ ("author", render' a) | a <- authors ]
in if writerStandalone opts
then renderTemplate context $ writerTemplate opts
else main
@@ -83,9 +91,12 @@ elementToDocbook opts (Sec _ _num id' title elements) =
let elements' = if null elements
then [Blk (Para [])]
else elements
- in inTags True "section" [("id",id')] $
- inTagsSimple "title" (wrap opts title) $$
- vcat (map (elementToDocbook opts) elements')
+ tag = if writerChapters opts
+ then "chapter"
+ else "section"
+ in inTags True tag [("id",id')] $
+ inTagsSimple "title" (inlinesToDocbook opts title) $$
+ vcat (map (elementToDocbook opts{ writerChapters = False }) elements')
-- | Convert a list of Pandoc blocks to Docbook.
blocksToDocbook :: WriterOptions -> [Block] -> Doc
@@ -123,7 +134,7 @@ listItemToDocbook opts item =
blockToDocbook :: WriterOptions -> Block -> Doc
blockToDocbook _ Null = empty
blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize
-blockToDocbook opts (Plain lst) = wrap opts lst
+blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst
blockToDocbook opts (Para [Image txt (src,_)]) =
let capt = inlinesToDocbook opts txt
in inTagsIndented "figure" $
@@ -132,12 +143,13 @@ blockToDocbook opts (Para [Image txt (src,_)]) =
(inTagsIndented "imageobject"
(selfClosingTag "imagedata" [("fileref",src)])) $$
inTagsSimple "textobject" (inTagsSimple "phrase" capt))
-blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst
+blockToDocbook opts (Para lst) =
+ inTagsIndented "para" $ inlinesToDocbook opts lst
blockToDocbook opts (BlockQuote blocks) =
inTagsIndented "blockquote" $ blocksToDocbook opts blocks
blockToDocbook _ (CodeBlock (_,classes,_) str) =
- text ("<screen" ++ lang ++ ">\n") <>
- text (escapeStringForXML str) <> text "\n</screen>"
+ text ("<screen" ++ lang ++ ">") <> cr <>
+ flush (text (escapeStringForXML str) <> cr <> text "</screen>")
where lang = if null langs
then ""
else " language=\"" ++ escapeStringForXML (head langs) ++
@@ -167,7 +179,10 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) =
in inTags True "orderedlist" attribs items
blockToDocbook opts (DefinitionList lst) =
inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst
-blockToDocbook _ (RawHtml str) = text str -- raw XML block
+blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block
+-- we allow html for compatibility with earlier versions of pandoc
+blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block
+blockToDocbook _ (RawBlock _ _) = empty
blockToDocbook _ HorizontalRule = empty -- not semantic
blockToDocbook opts (Table caption aligns widths headers rows) =
let alignStrings = map alignmentToString aligns
@@ -214,12 +229,6 @@ tableItemToDocbook opts tag align item =
let attrib = [("align", align)]
in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item
--- | Take list of inline elements and return wrapped doc.
-wrap :: WriterOptions -> [Inline] -> Doc
-wrap opts lst = if writerWrapText opts
- then fsep $ map (inlinesToDocbook opts) (splitBy Space lst)
- else inlinesToDocbook opts lst
-
-- | Convert a list of inline elements to Docbook.
inlinesToDocbook :: WriterOptions -> [Inline] -> Doc
inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst
@@ -249,22 +258,21 @@ inlineToDocbook _ Apostrophe = char '\''
inlineToDocbook _ Ellipses = text "…"
inlineToDocbook _ EmDash = text "—"
inlineToDocbook _ EnDash = text "–"
-inlineToDocbook _ (Code str) =
+inlineToDocbook _ (Code _ str) =
inTagsSimple "literal" $ text (escapeStringForXML str)
inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str
-inlineToDocbook _ (TeX _) = empty
-inlineToDocbook _ (HtmlInline _) = empty
-inlineToDocbook _ LineBreak = text $ "<literallayout></literallayout>"
-inlineToDocbook _ Space = char ' '
+inlineToDocbook _ (RawInline _ _) = empty
+inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty
+inlineToDocbook _ Space = space
inlineToDocbook opts (Link txt (src, _)) =
if isPrefixOf "mailto:" src
then let src' = drop 7 src
emailLink = inTagsSimple "email" $ text $
escapeStringForXML $ src'
- in if txt == [Code src']
- then emailLink
- else inlinesToDocbook opts txt <+> char '(' <> emailLink <>
- char ')'
+ in case txt of
+ [Code _ s] | s == src' -> emailLink
+ _ -> inlinesToDocbook opts txt <+>
+ char '(' <> emailLink <> char ')'
else (if isPrefixOf "#" src
then inTags False "link" [("linkend", drop 1 src)]
else inTags False "ulink" [("url", src)]) $
@@ -275,6 +283,6 @@ inlineToDocbook _ (Image _ (src, tit)) =
else inTagsIndented "objectinfo" $
inTagsIndented "title" (text $ escapeStringForXML tit)
in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $
- titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
+ titleDoc $$ selfClosingTag "imagedata" [("fileref", src)]
inlineToDocbook opts (Note contents) =
inTagsIndented "footnote" $ blocksToDocbook opts contents
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index deaa2fe33..33b8aa76a 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -39,6 +39,7 @@ import Codec.Archive.Zip
import System.Time
import Text.Pandoc.Shared hiding ( Element )
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import Control.Monad (liftM)
import Text.XML.Light hiding (ppTopElement)
import Text.Pandoc.UUID
@@ -69,7 +70,7 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do
-- handle pictures
picsRef <- newIORef []
- Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM
+ Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM
(transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc
pics <- readIORef picsRef
let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e ->
@@ -232,13 +233,13 @@ transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do
mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++
"</ops:switch>"
result = if "<math" `isPrefixOf` mathml then inOps else mathml
- return $ HtmlInline result : xs
-transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs
+ return $ RawInline "html" result : xs
+transformInlines _ _ _ (RawInline _ _ : xs) = return $ Str "" : xs
transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs
transformInlines _ _ _ xs = return xs
transformBlock :: Block -> Block
-transformBlock (RawHtml _) = Null
+transformBlock (RawBlock _ _) = Null
transformBlock x = x
(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index d2a400c5c..ef14b6809 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -105,8 +105,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
toc <- if writerTableOfContents opts
then tableOfContents opts sects
else return Nothing
- let startSlide = RawHtml "<div class=\"slide\">\n"
- endSlide = RawHtml "</div>\n"
+ let startSlide = RawBlock "html" "<div class=\"slide\">\n"
+ endSlide = RawBlock "html" "</div>\n"
let cutUp (HorizontalRule : Header 1 ys : xs) = cutUp (Header 1 ys : xs)
cutUp (HorizontalRule : xs) = [endSlide, startSlide] ++ cutUp xs
cutUp (Header 1 ys : xs) = [endSlide, startSlide] ++
@@ -134,6 +134,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do
MathML (Just url) ->
script !
[src url, thetype "text/javascript"] $ noHtml
+ MathJax url ->
+ script ! [src url, thetype "text/javascript"] $ noHtml
JsMath (Just url) ->
script !
[src url, thetype "text/javascript"] $ noHtml
@@ -168,6 +170,7 @@ inTemplate opts tit auths date toc body' newvars =
, ("pagetitle", topTitle')
, ("title", renderHtmlFragment tit)
, ("date", date') ] ++
+ [ ("html5","true") | writerHtml5 opts ] ++
(case toc of
Just t -> [ ("toc", renderHtmlFragment t)]
Nothing -> []) ++
@@ -187,7 +190,12 @@ tableOfContents opts sects = do
let tocList = catMaybes contents
return $ if null tocList
then Nothing
- else Just $ thediv ! [prefixedId opts' "TOC"] $ unordList tocList
+ else Just $
+ if writerHtml5 opts
+ then tag "nav" ! [prefixedId opts' "TOC"] $
+ unordList tocList
+ else thediv ! [prefixedId opts' "TOC"] $
+ unordList tocList
-- | Convert section number to string
showSecNum :: [Int] -> String
@@ -224,7 +232,10 @@ elementToHtml opts (Sec level num id' title' elements) = do
return $ if slides -- S5 gets confused by the extra divs around sections
then toHtmlFromList stuff
else if writerSectionDivs opts
- then thediv ! [prefixedId opts id'] << stuff
+ then if writerHtml5 opts
+ then tag "section" ! [prefixedId opts id']
+ << stuff
+ else thediv ! [prefixedId opts id'] << stuff
else toHtmlFromList stuff
-- | Convert list of Note blocks to a footnote <div>.
@@ -287,6 +298,12 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
+attrsToHtml :: WriterOptions -> Attr -> [HtmlAttr]
+attrsToHtml opts (id',classes',keyvals) =
+ [theclass (unwords classes') | not (null classes')] ++
+ [prefixedId opts id' | not (null id')] ++
+ map (\(x,y) -> strAttr x y) keyvals
+
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
blockToHtml _ Null = return $ noHtml
@@ -294,22 +311,24 @@ blockToHtml opts (Plain lst) = inlineListToHtml opts lst
blockToHtml opts (Para [Image txt (s,tit)]) = do
img <- inlineToHtml opts (Image txt (s,tit))
capt <- inlineListToHtml opts txt
- return $ thediv ! [theclass "figure"] <<
- [img, paragraph ! [theclass "caption"] << capt]
+ return $ if writerHtml5 opts
+ then tag "figure" <<
+ [img, tag "figcaption" << capt]
+ else thediv ! [theclass "figure"] <<
+ [img, paragraph ! [theclass "caption"] << capt]
blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph)
-blockToHtml _ (RawHtml str) = return $ primHtml str
+blockToHtml _ (RawBlock "html" str) = return $ primHtml str
+blockToHtml _ (RawBlock _ _) = return noHtml
blockToHtml _ (HorizontalRule) = return $ hr
blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do
let classes' = if writerLiterateHaskell opts
then classes
else filter (/= "literate") classes
- case highlightHtml (id',classes',keyvals) rawCode of
+ case highlightHtml False (id',classes',keyvals) rawCode of
Left _ -> -- change leading newlines into <br /> tags, because some
-- browsers ignore leading newlines in pre blocks
let (leadingBreaks, rawCode') = span (=='\n') rawCode
- attrs = [theclass (unwords classes') | not (null classes')] ++
- [prefixedId opts id' | not (null id')] ++
- map (\(x,y) -> strAttr x y) keyvals
+ attrs = attrsToHtml opts (id', classes', keyvals)
addBird = if "literate" `elem` classes'
then unlines . map ("> " ++) . lines
else unlines . lines
@@ -366,7 +385,17 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do
then [start startnum]
else []) ++
(if numstyle /= DefaultStyle
- then [thestyle $ "list-style-type: " ++ numstyle' ++ ";"]
+ then if writerHtml5 opts
+ then [strAttr "type" $
+ case numstyle of
+ Decimal -> "1"
+ LowerAlpha -> "a"
+ UpperAlpha -> "A"
+ LowerRoman -> "i"
+ UpperRoman -> "I"
+ _ -> "1"]
+ else [thestyle $ "list-style-type: " ++
+ numstyle']
else [])
return $ ordList ! attribs $ contents
blockToHtml opts (DefinitionList lst) = do
@@ -379,28 +408,30 @@ blockToHtml opts (DefinitionList lst) = do
else []
return $ dlist ! attribs << concat contents
blockToHtml opts (Table capt aligns widths headers rows') = do
- let alignStrings = map alignmentToString aligns
captionDoc <- if null capt
then return noHtml
else inlineListToHtml opts capt >>= return . caption
let percent w = show (truncate (100*w) :: Integer) ++ "%"
+ let widthAttrs w = if writerHtml5 opts
+ then [thestyle $ "width: " ++ percent w]
+ else [width $ percent w]
let coltags = if all (== 0.0) widths
then noHtml
else concatHtml $ map
- (\w -> col ! [width $ percent w] $ noHtml) widths
+ (\w -> col ! (widthAttrs w) $ noHtml) widths
head' <- if all null headers
then return noHtml
- else liftM (thead <<) $ tableRowToHtml opts alignStrings 0 headers
+ else liftM (thead <<) $ tableRowToHtml opts aligns 0 headers
body' <- liftM (tbody <<) $
- zipWithM (tableRowToHtml opts alignStrings) [1..] rows'
+ zipWithM (tableRowToHtml opts aligns) [1..] rows'
return $ table $ captionDoc +++ coltags +++ head' +++ body'
tableRowToHtml :: WriterOptions
- -> [String]
+ -> [Alignment]
-> Int
-> [[Block]]
-> State WriterState Html
-tableRowToHtml opts alignStrings rownum cols' = do
+tableRowToHtml opts aligns rownum cols' = do
let mkcell = if rownum == 0 then th else td
let rowclass = case rownum of
0 -> "header"
@@ -408,7 +439,7 @@ tableRowToHtml opts alignStrings rownum cols' = do
_ -> "even"
cols'' <- sequence $ zipWith
(\alignment item -> tableItemToHtml opts mkcell alignment item)
- alignStrings cols'
+ aligns cols'
return $ tr ! [theclass rowclass] $ toHtmlFromList cols''
alignmentToString :: Alignment -> [Char]
@@ -420,12 +451,15 @@ alignmentToString alignment = case alignment of
tableItemToHtml :: WriterOptions
-> (Html -> Html)
- -> [Char]
+ -> Alignment
-> [Block]
-> State WriterState Html
tableItemToHtml opts tag' align' item = do
contents <- blockListToHtml opts item
- return $ tag' ! [align align'] $ contents
+ let alignAttrs = if writerHtml5 opts
+ then [thestyle $ "align: " ++ alignmentToString align']
+ else [align $ alignmentToString align']
+ return $ tag' ! alignAttrs $ contents
blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html
blockListToHtml opts lst =
@@ -449,7 +483,11 @@ inlineToHtml opts inline =
(Apostrophe) -> return $ stringToHtml "’"
(Emph lst) -> inlineListToHtml opts lst >>= return . emphasize
(Strong lst) -> inlineListToHtml opts lst >>= return . strong
- (Code str) -> return $ thecode << str
+ (Code attr str) -> case highlightHtml True attr str of
+ Left _ -> return
+ $ thecode ! (attrsToHtml opts attr)
+ $ stringToHtml str
+ Right h -> return h
(Strikeout lst) -> inlineListToHtml opts lst >>=
return . (thespan ! [thestyle "text-decoration: line-through;"])
(SmallCaps lst) -> inlineListToHtml opts lst >>=
@@ -464,8 +502,7 @@ inlineToHtml opts inline =
stringToHtml "”")
in do contents <- inlineListToHtml opts lst
return $ leftQuote +++ contents +++ rightQuote
- (Math t str) ->
- modify (\st -> st {stMath = True}) >>
+ (Math t str) -> modify (\st -> st {stMath = True}) >>
(case writerHTMLMathMethod opts of
LaTeXMathML _ ->
-- putting LaTeXMathML in container with class "LaTeX" prevents
@@ -487,7 +524,9 @@ inlineToHtml opts inline =
InlineMath -> m
DisplayMath -> br +++ m +++ br
GladTeX ->
- return $ primHtml $ "<EQ>" ++ str ++ "</EQ>"
+ return $ case t of
+ InlineMath -> primHtml $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>"
+ DisplayMath -> primHtml $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>"
MathML _ -> do
let dt = if t == InlineMath
then DisplayInline
@@ -500,18 +539,23 @@ inlineToHtml opts inline =
Left _ -> inlineListToHtml opts
(readTeXMath str) >>= return .
(thespan ! [theclass "math"])
+ MathJax _ -> return $ primHtml $
+ case t of
+ InlineMath -> "\\(" ++ str ++ "\\)"
+ DisplayMath -> "\\[" ++ str ++ "\\]"
PlainMath -> do
x <- inlineListToHtml opts (readTeXMath str)
let m = thespan ! [theclass "math"] $ x
return $ case t of
InlineMath -> m
DisplayMath -> br +++ m +++ br )
- (TeX str) -> case writerHTMLMathMethod opts of
- LaTeXMathML _ -> do modify (\st -> st {stMath = True})
- return $ primHtml str
- _ -> return noHtml
- (HtmlInline str) -> return $ primHtml str
- (Link [Code str] (s,_)) | "mailto:" `isPrefixOf` s ->
+ (RawInline "latex" str) -> case writerHTMLMathMethod opts of
+ LaTeXMathML _ -> do modify (\st -> st {stMath = True})
+ return $ primHtml str
+ _ -> return noHtml
+ (RawInline "html" str) -> return $ primHtml str
+ (RawInline _ _) -> return noHtml
+ (Link [Code _ str] (s,_)) | "mailto:" `isPrefixOf` s ->
return $ obfuscateLink opts str s
(Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do
linkText <- inlineListToHtml opts txt
@@ -551,7 +595,7 @@ blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html
blockListToNote opts ref blocks =
-- If last block is Para or Plain, include the backlink at the end of
-- that block. Otherwise, insert a new Plain block with the backlink.
- let backlink = [HtmlInline $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++
+ let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++
"\" class=\"footnoteBackLink\"" ++
" title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"]
blocks' = if null blocks
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 720c00ac8..28a1e7174 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -29,13 +30,15 @@ Conversion of 'Pandoc' format into LaTeX.
-}
module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import Text.Pandoc.Shared
import Text.Pandoc.Templates
import Text.Printf ( printf )
-import Data.List ( (\\), isSuffixOf, isPrefixOf, intersperse )
-import Data.Char ( toLower )
+import Data.List ( (\\), isSuffixOf, isPrefixOf, intercalate, intersperse )
+import Data.Char ( toLower, isPunctuation )
import Control.Monad.State
-import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Text.Pandoc.Pretty
+import System.FilePath (dropExtension)
data WriterState =
WriterState { stInNote :: Bool -- @True@ if we're in a note
@@ -60,7 +63,7 @@ writeLaTeX options document =
stVerbInNote = False, stEnumerate = False,
stTable = False, stStrikeout = False, stSubscript = False,
stUrl = False, stGraphics = False,
- stLHS = False, stBook = False }
+ stLHS = False, stBook = writerChapters options }
pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String
pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
@@ -70,13 +73,34 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
"{report}" `isSuffixOf` x)
when (any usesBookClass (lines template)) $
modify $ \s -> s{stBook = True}
- titletext <- liftM render $ inlineListToLaTeX title
- authorsText <- mapM (liftM render . inlineListToLaTeX) authors
- dateText <- liftM render $ inlineListToLaTeX date
- body <- blockListToLaTeX blocks
- let main = render body
+ opts <- liftM stOptions get
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ titletext <- liftM (render colwidth) $ inlineListToLaTeX title
+ authorsText <- mapM (liftM (render colwidth) . inlineListToLaTeX) authors
+ dateText <- liftM (render colwidth) $ inlineListToLaTeX date
+ let (blocks', lastHeader) = if writerCiteMethod options == Citeproc then
+ (blocks, [])
+ else case last blocks of
+ Header 1 il -> (init blocks, il)
+ _ -> (blocks, [])
+ body <- blockListToLaTeX blocks'
+ biblioTitle <- liftM (render colwidth) $ inlineListToLaTeX lastHeader
+ let main = render colwidth body
st <- get
- let context = writerVariables options ++
+ let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options
+ citecontext = case writerCiteMethod options of
+ Natbib -> [ ("biblio-files", biblioFiles)
+ , ("biblio-title", biblioTitle)
+ , ("natbib", "yes")
+ ]
+ Biblatex -> [ ("biblio-files", biblioFiles)
+ , ("biblio-title", biblioTitle)
+ , ("biblatex", "yes")
+ ]
+ _ -> []
+ context = writerVariables options ++
[ ("toc", if writerTableOfContents options then "yes" else "")
, ("body", main)
, ("title", titletext)
@@ -91,7 +115,10 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do
[ ("url", "yes") | stUrl st ] ++
[ ("numbersections", "yes") | writerNumberSections options ] ++
[ ("lhs", "yes") | stLHS st ] ++
- [ ("graphics", "yes") | stGraphics st ]
+ [ ("graphics", "yes") | stGraphics st ] ++
+ [ ("book-class", "yes") | stBook st] ++
+ [ ("listings", "yes") | writerListings options ] ++
+ citecontext
return $ if writerStandalone options
then renderTemplate context template
else main
@@ -107,7 +134,13 @@ stringToLaTeX = escapeStringUsing latexEscapes
, ('|', "\\textbar{}")
, ('<', "\\textless{}")
, ('>', "\\textgreater{}")
+ , ('[', "{[}") -- to avoid interpretation as
+ , (']', "{]}") -- optional arguments
, ('\160', "~")
+ , ('\x2018', "`")
+ , ('\x2019', "'")
+ , ('\x201C', "``")
+ , ('\x201D', "''")
]
-- | Puts contents into LaTeX command.
@@ -118,49 +151,73 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents
-- (because it's illegal to have verbatim inside some command arguments)
deVerb :: [Inline] -> [Inline]
deVerb [] = []
-deVerb ((Code str):rest) =
- (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest)
+deVerb ((Code _ str):rest) =
+ (RawInline "latex" $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest)
deVerb (other:rest) = other:(deVerb rest)
-- | Convert Pandoc block element to LaTeX.
blockToLaTeX :: Block -- ^ Block to convert
-> State WriterState Doc
blockToLaTeX Null = return empty
-blockToLaTeX (Plain lst) = do
- st <- get
- let opts = stOptions st
- wrapTeXIfNeeded opts True inlineListToLaTeX lst
+blockToLaTeX (Plain lst) = inlineListToLaTeX lst
blockToLaTeX (Para [Image txt (src,tit)]) = do
capt <- inlineListToLaTeX txt
img <- inlineToLaTeX (Image txt (src,tit))
- return $ text "\\begin{figure}[htb]" $$ text "\\centering" $$ img $$
- (text "\\caption{" <> capt <> char '}') $$ text "\\end{figure}\n"
+ return $ "\\begin{figure}[htb]" $$ "\\centering" $$ img $$
+ ("\\caption{" <> capt <> char '}') $$ "\\end{figure}" $$ blankline
blockToLaTeX (Para lst) = do
- st <- get
- let opts = stOptions st
- result <- wrapTeXIfNeeded opts True inlineListToLaTeX lst
- return $ result <> char '\n'
+ result <- inlineListToLaTeX lst
+ return $ result <> blankline
blockToLaTeX (BlockQuote lst) = do
contents <- blockListToLaTeX lst
- return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}"
-blockToLaTeX (CodeBlock (_,classes,_) str) = do
+ return $ "\\begin{quote}" $$ contents $$ "\\end{quote}"
+blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do
st <- get
env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes &&
"literate" `elem` classes
then do
modify $ \s -> s{ stLHS = True }
return "code"
- else if stInNote st
- then do
- modify $ \s -> s{ stVerbInNote = True }
- return "Verbatim"
- else return "verbatim"
- return $ text ("\\begin{" ++ env ++ "}\n") <> text str <>
- text ("\n\\end{" ++ env ++ "}")
-blockToLaTeX (RawHtml _) = return empty
+ else if writerListings (stOptions st)
+ then return "lstlisting"
+ else if stInNote st
+ then do
+ modify $ \s -> s{ stVerbInNote = True }
+ return "Verbatim"
+ else return "verbatim"
+ let params = if writerListings (stOptions st)
+ then take 1
+ [ "language=" ++ lang | lang <- classes
+ , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform"
+ ,"POV","Ada","Java","Prolog","Algol"
+ ,"JVMIS","Promela","Ant","ksh","Python"
+ ,"Assembler","Lisp","R","Awk","Logo"
+ ,"Reduce","bash","make","Rexx","Basic"
+ ,"Mathematica","RSL","C","Matlab","Ruby"
+ ,"C++","Mercury","S","Caml","MetaPost"
+ ,"SAS","Clean","Miranda","Scilab","Cobol"
+ ,"Mizar","sh","Comal","ML","SHELXL","csh"
+ ,"Modula-2","Simula","Delphi","MuPAD"
+ ,"SQL","Eiffel","NASTRAN","tcl","Elan"
+ ,"Oberon-2","TeX","erlang","OCL"
+ ,"VBScript","Euphoria","Octave","Verilog"
+ ,"Fortran","Oz","VHDL","GCL","Pascal"
+ ,"VRML","Gnuplot","Perl","XML","Haskell"
+ ,"PHP","XSLT","HTML","PL/I"]
+ ] ++
+ [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ]
+ else []
+ printParams
+ | null params = empty
+ | otherwise = "[" <> hsep (intersperse "," (map text params)) <>
+ "]"
+ return $ "\\begin{" <> text env <> "}" <> printParams $$ flush (text str) $$
+ "\\end{" <> text env <> "}" $$ cr -- final cr needed because of footnotes
+blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline
+blockToLaTeX (RawBlock _ _) = return empty
blockToLaTeX (BulletList lst) = do
items <- mapM listItemToLaTeX lst
- return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}"
+ return $ "\\begin{itemize}" $$ vcat items $$ "\\end{itemize}"
blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
st <- get
let oldlevel = stOLLevel st
@@ -179,20 +236,19 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do
map toLower (toRomanNumeral oldlevel) ++
"}{" ++ show (start - 1) ++ "}"
else empty
- return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$
- vcat items $$ text "\\end{enumerate}"
+ return $ "\\begin{enumerate}" <> exemplar $$ resetcounter $$
+ vcat items $$ "\\end{enumerate}"
blockToLaTeX (DefinitionList lst) = do
items <- mapM defListItemToLaTeX lst
- return $ text "\\begin{description}" $$ vcat items $$
- text "\\end{description}"
-blockToLaTeX HorizontalRule = return $ text $
- "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n"
+ return $ "\\begin{description}" $$ vcat items $$ "\\end{description}"
+blockToLaTeX HorizontalRule = return $
+ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline
blockToLaTeX (Header level lst) = do
let lst' = deVerb lst
txt <- inlineListToLaTeX lst'
let noNote (Note _) = Str ""
noNote x = x
- let lstNoNotes = processWith noNote lst'
+ let lstNoNotes = bottomUp noNote lst'
-- footnotes in sections don't work unless you specify an optional
-- argument: \section[mysec]{mysec\footnote{blah}}
optional <- if lstNoNotes == lst'
@@ -202,30 +258,31 @@ blockToLaTeX (Header level lst) = do
return $ char '[' <> res <> char ']'
let stuffing = optional <> char '{' <> txt <> char '}'
book <- liftM stBook get
- return $ case (book, level) of
- (True, 1) -> text "\\chapter" <> stuffing <> char '\n'
- (True, 2) -> text "\\section" <> stuffing <> char '\n'
- (True, 3) -> text "\\subsection" <> stuffing <> char '\n'
- (True, 4) -> text "\\subsubsection" <> stuffing <> char '\n'
- (False, 1) -> text "\\section" <> stuffing <> char '\n'
- (False, 2) -> text "\\subsection" <> stuffing <> char '\n'
- (False, 3) -> text "\\subsubsection" <> stuffing <> char '\n'
- _ -> txt <> char '\n'
+ let level' = if book then level - 1 else level
+ let headerWith x y = text x <> y $$ blankline
+ return $ case level' of
+ 0 -> headerWith "\\chapter" stuffing
+ 1 -> headerWith "\\section" stuffing
+ 2 -> headerWith "\\subsection" stuffing
+ 3 -> headerWith "\\subsubsection" stuffing
+ 4 -> headerWith "\\paragraph" stuffing
+ 5 -> headerWith "\\subparagraph" stuffing
+ _ -> txt $$ blankline
blockToLaTeX (Table caption aligns widths heads rows) = do
headers <- if all null heads
then return empty
- else liftM ($$ text "\\hline") $ tableRowToLaTeX heads
+ else liftM ($$ "\\hline") $ (tableRowToLaTeX widths) heads
captionText <- inlineListToLaTeX caption
- rows' <- mapM tableRowToLaTeX rows
+ rows' <- mapM (tableRowToLaTeX widths) rows
let colDescriptors = concat $ zipWith toColDescriptor widths aligns
let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$
- headers $$ vcat rows' $$ text "\\end{tabular}"
- let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}"
+ headers $$ vcat rows' $$ "\\end{tabular}"
+ let centered txt = "\\begin{center}" $$ txt $$ "\\end{center}"
modify $ \s -> s{ stTable = True }
return $ if isEmpty captionText
- then centered tableBody <> char '\n'
- else text "\\begin{table}[h]" $$ centered tableBody $$
- inCmd "caption" captionText $$ text "\\end{table}\n"
+ then centered tableBody $$ blankline
+ else "\\begin{table}[h]" $$ centered tableBody $$
+ inCmd "caption" captionText $$ "\\end{table}" $$ blankline
toColDescriptor :: Double -> Alignment -> String
toColDescriptor 0 align =
@@ -240,16 +297,19 @@ toColDescriptor width align = ">{\\PBS" ++
AlignRight -> "\\raggedleft"
AlignCenter -> "\\centering"
AlignDefault -> "\\raggedright") ++
- "\\hspace{0pt}}p{" ++ printf "%.2f" width ++
- "\\columnwidth}"
+ "\\hspace{0pt}}p{" ++ printf "%.2f" width ++ "\\columnwidth}"
blockListToLaTeX :: [Block] -> State WriterState Doc
blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat
-tableRowToLaTeX :: [[Block]] -> State WriterState Doc
-tableRowToLaTeX cols = mapM blockListToLaTeX cols >>=
- return . ($$ text "\\\\") . foldl (\row item -> row $$
- (if isEmpty row then text "" else text " & ") <> item) empty
+tableRowToLaTeX :: [Double] -> [[Block]] -> State WriterState Doc
+tableRowToLaTeX widths cols = do
+ renderedCells <- mapM blockListToLaTeX cols
+ let toCell 0 c = c
+ toCell w c = "\\parbox{" <> text (printf "%.2f" w) <>
+ "\\columnwidth}{" <> c <> cr <> "}"
+ let cells = zipWith toCell widths renderedCells
+ return $ (hcat $ intersperse (" & ") cells) <> "\\\\"
listItemToLaTeX :: [Block] -> State WriterState Doc
listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) .
@@ -258,8 +318,8 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) .
defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc
defListItemToLaTeX (term, defs) = do
term' <- inlineListToLaTeX $ deVerb term
- def' <- liftM (vcat . intersperse (text "")) $ mapM blockListToLaTeX defs
- return $ text "\\item[" <> term' <> text "]" $$ def'
+ def' <- liftM vsep $ mapM blockListToLaTeX defs
+ return $ "\\item" <> brackets term' $$ def'
-- | Convert list of inline elements to LaTeX.
inlineListToLaTeX :: [Inline] -- ^ Inlines to convert
@@ -292,60 +352,161 @@ inlineToLaTeX (Subscript lst) = do
return $ inCmd "textsubscr" contents
inlineToLaTeX (SmallCaps lst) =
inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc"
-inlineToLaTeX (Cite _ lst) =
- inlineListToLaTeX lst
-inlineToLaTeX (Code str) = do
+inlineToLaTeX (Cite cits lst) = do
+ st <- get
+ let opts = stOptions st
+ case writerCiteMethod opts of
+ Natbib -> citationsToNatbib cits
+ Biblatex -> citationsToBiblatex cits
+ _ -> inlineListToLaTeX lst
+
+inlineToLaTeX (Code _ str) = do
st <- get
when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True }
let chr = ((enumFromTo '!' '~') \\ str) !! 0
- return $ text $ "\\verb" ++ [chr] ++ str ++ [chr]
+ if writerListings (stOptions st)
+ then return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr]
+ else return $ text $ "\\verb" ++ [chr] ++ str ++ [chr]
inlineToLaTeX (Quoted SingleQuote lst) = do
contents <- inlineListToLaTeX lst
let s1 = if (not (null lst)) && (isQuoted (head lst))
- then text "\\,"
- else empty
+ then "\\,"
+ else empty
let s2 = if (not (null lst)) && (isQuoted (last lst))
- then text "\\,"
+ then "\\,"
else empty
return $ char '`' <> s1 <> contents <> s2 <> char '\''
inlineToLaTeX (Quoted DoubleQuote lst) = do
contents <- inlineListToLaTeX lst
let s1 = if (not (null lst)) && (isQuoted (head lst))
- then text "\\,"
- else empty
+ then "\\,"
+ else empty
let s2 = if (not (null lst)) && (isQuoted (last lst))
- then text "\\,"
+ then "\\,"
else empty
- return $ text "``" <> s1 <> contents <> s2 <> text "''"
+ return $ "``" <> s1 <> contents <> s2 <> "''"
inlineToLaTeX Apostrophe = return $ char '\''
-inlineToLaTeX EmDash = return $ text "---"
-inlineToLaTeX EnDash = return $ text "--"
-inlineToLaTeX Ellipses = return $ text "\\ldots{}"
+inlineToLaTeX EmDash = return "---"
+inlineToLaTeX EnDash = return "--"
+inlineToLaTeX Ellipses = return "\\ldots{}"
inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str
inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$'
-inlineToLaTeX (Math DisplayMath str) = return $ text "\\[" <> text str <> text "\\]"
-inlineToLaTeX (TeX str) = return $ text str
-inlineToLaTeX (HtmlInline _) = return empty
-inlineToLaTeX (LineBreak) = return $ text "\\\\"
-inlineToLaTeX Space = return $ char ' '
+inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]"
+inlineToLaTeX (RawInline "latex" str) = return $ text str
+inlineToLaTeX (RawInline "tex" str) = return $ text str
+inlineToLaTeX (RawInline _ _) = return empty
+inlineToLaTeX (LineBreak) = return "\\\\"
+inlineToLaTeX Space = return space
inlineToLaTeX (Link txt (src, _)) =
case txt of
- [Code x] | x == src -> -- autolink
+ [Code _ x] | x == src -> -- autolink
do modify $ \s -> s{ stUrl = True }
return $ text $ "\\url{" ++ x ++ "}"
_ -> do contents <- inlineListToLaTeX $ deVerb txt
- return $ text ("\\href{" ++ src ++ "}{") <> contents <>
- char '}'
+ return $ text ("\\href{" ++ stringToLaTeX src ++ "}{") <>
+ contents <> char '}'
inlineToLaTeX (Image _ (source, _)) = do
modify $ \s -> s{ stGraphics = True }
- return $ text $ "\\includegraphics{" ++ source ++ "}"
+ return $ "\\includegraphics" <> braces (text source)
inlineToLaTeX (Note contents) = do
- st <- get
- put (st {stInNote = True})
+ modify (\s -> s{stInNote = True})
contents' <- blockListToLaTeX contents
modify (\s -> s {stInNote = False})
- let rawnote = stripTrailingNewlines $ render contents'
-- note: a \n before } is needed when note ends with a Verbatim environment
- let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote
- return $ text "\\footnote{" <>
- text rawnote <> (if optNewline then char '\n' else empty) <> char '}'
+ return $ "\\footnote" <> braces (nest 2 contents')
+
+
+citationsToNatbib :: [Citation] -> State WriterState Doc
+citationsToNatbib (one:[])
+ = citeCommand c p s k
+ where
+ Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ , citationMode = m
+ }
+ = one
+ c = case m of
+ AuthorInText -> "citet"
+ SuppressAuthor -> "citeyearpar"
+ NormalCitation -> "citep"
+
+citationsToNatbib cits
+ | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits
+ = citeCommand "citep" p s ks
+ where
+ noPrefix = and . map (null . citationPrefix)
+ noSuffix = and . map (null . citationSuffix)
+ ismode m = and . map (((==) m) . citationMode)
+ p = citationPrefix $ head $ cits
+ s = citationSuffix $ last $ cits
+ ks = intercalate ", " $ map citationId cits
+
+citationsToNatbib (c:cs) | citationMode c == AuthorInText = do
+ author <- citeCommand "citeauthor" [] [] (citationId c)
+ cits <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs)
+ return $ author <+> cits
+
+citationsToNatbib cits = do
+ cits' <- mapM convertOne cits
+ return $ text "\\citetext{" <> foldl combineTwo empty cits' <> text "}"
+ where
+ combineTwo a b | isEmpty a = b
+ | otherwise = a <> text "; " <> b
+ convertOne Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ , citationMode = m
+ }
+ = case m of
+ AuthorInText -> citeCommand "citealt" p s k
+ SuppressAuthor -> citeCommand "citeyear" p s k
+ NormalCitation -> citeCommand "citealp" p s k
+
+citeCommand :: String -> [Inline] -> [Inline] -> String -> State WriterState Doc
+citeCommand c p s k = do
+ args <- citeArguments p s k
+ return $ text ("\\" ++ c) <> args
+
+citeArguments :: [Inline] -> [Inline] -> String -> State WriterState Doc
+citeArguments p s k = do
+ let s' = case s of
+ (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r
+ (Str (x:xs) : r) | isPunctuation x -> Str xs : r
+ _ -> s
+ pdoc <- inlineListToLaTeX p
+ sdoc <- inlineListToLaTeX s'
+ let optargs = case (isEmpty pdoc, isEmpty sdoc) of
+ (True, True ) -> empty
+ (True, False) -> brackets sdoc
+ (_ , _ ) -> brackets pdoc <> brackets sdoc
+ return $ optargs <> braces (text k)
+
+citationsToBiblatex :: [Citation] -> State WriterState Doc
+citationsToBiblatex (one:[])
+ = citeCommand cmd p s k
+ where
+ Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ , citationMode = m
+ } = one
+ cmd = case m of
+ SuppressAuthor -> "autocite*"
+ AuthorInText -> "textcite"
+ NormalCitation -> "autocite"
+
+citationsToBiblatex (c:cs) = do
+ args <- mapM convertOne (c:cs)
+ return $ text cmd <> foldl (<>) empty args
+ where
+ cmd = case citationMode c of
+ AuthorInText -> "\\textcites"
+ _ -> "\\autocites"
+ convertOne Citation { citationId = k
+ , citationPrefix = p
+ , citationSuffix = s
+ }
+ = citeArguments p s k
+
+citationsToBiblatex _ = return empty
diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs
index a46a18893..78b9274d6 100644
--- a/src/Text/Pandoc/Writers/Man.hs
+++ b/src/Text/Pandoc/Writers/Man.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Shared
import Text.Pandoc.Readers.TeXMath
import Text.Printf ( printf )
import Data.List ( isPrefixOf, intersperse, intercalate )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Text.Pandoc.Pretty
import Control.Monad.State
type Notes = [[Block]]
@@ -52,27 +52,31 @@ pandocToMan opts (Pandoc (Meta title authors date) blocks) = do
titleText <- inlineListToMan opts title
authors' <- mapM (inlineListToMan opts) authors
date' <- inlineListToMan opts date
- let (cmdName, rest) = break (== ' ') $ render titleText
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ let render' = render colwidth
+ let (cmdName, rest) = break (== ' ') $ render' titleText
let (title', section) = case reverse cmdName of
(')':d:'(':xs) | d `elem` ['0'..'9'] ->
(text (reverse xs), char d)
- xs -> (text (reverse xs), doubleQuotes empty)
+ xs -> (text (reverse xs), doubleQuotes empty)
let description = hsep $
map (doubleQuotes . text . removeLeadingTrailingSpace) $
- splitBy '|' rest
+ splitBy (== '|') rest
body <- blockListToMan opts blocks
notes <- liftM stNotes get
notes' <- notesToMan opts (reverse notes)
- let main = render $ body $$ notes' $$ text ""
+ let main = render' $ body $$ notes' $$ text ""
hasTables <- liftM stHasTables get
let context = writerVariables opts ++
[ ("body", main)
- , ("title", render title')
- , ("section", render section)
- , ("date", render date')
- , ("description", render description) ] ++
+ , ("title", render' title')
+ , ("section", render' section)
+ , ("date", render' date')
+ , ("description", render' description) ] ++
[ ("has-tables", "yes") | hasTables ] ++
- [ ("author", render a) | a <- authors' ]
+ [ ("author", render' a) | a <- authors' ]
if writerStandalone opts
then return $ renderTemplate context $ writerTemplate opts
else return main
@@ -89,7 +93,7 @@ notesToMan opts notes =
noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc
noteToMan opts num note = do
contents <- blockListToMan opts note
- let marker = text "\n.SS [" <> text (show num) <> char ']'
+ let marker = cr <> text ".SS " <> brackets (text (show num))
return $ marker $$ contents
-- | Association list of characters to escape.
@@ -136,14 +140,14 @@ blockToMan :: WriterOptions -- ^ Options
-> State WriterState Doc
blockToMan _ Null = return empty
blockToMan opts (Plain inlines) =
- liftM vcat $ mapM (wrapIfNeeded opts (inlineListToMan opts)) $
- splitSentences inlines
+ liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines
blockToMan opts (Para inlines) = do
- contents <- liftM vcat $ mapM (wrapIfNeeded opts (inlineListToMan opts)) $
+ contents <- liftM vcat $ mapM (inlineListToMan opts) $
splitSentences inlines
return $ text ".PP" $$ contents
-blockToMan _ (RawHtml _) = return empty
-blockToMan _ HorizontalRule = return $ text $ ".PP\n * * * * *"
+blockToMan _ (RawBlock "man" str) = return $ text str
+blockToMan _ (RawBlock _ _) = return empty
+blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *"
blockToMan opts (Header level inlines) = do
contents <- inlineListToMan opts inlines
let heading = case level of
@@ -256,7 +260,7 @@ definitionListItemToMan opts (label, defs) = do
mapM (\item -> blockToMan opts item) rest
first' <- blockToMan opts first
return $ first' $$ text ".RS" $$ rest' $$ text ".RE"
- return $ text ".TP\n.B " <> labelText $+$ contents
+ return $ text ".TP" $$ text ".B " <> labelText $$ contents
-- | Convert list of Pandoc block elements to man.
blockListToMan :: WriterOptions -- ^ Options
@@ -303,23 +307,25 @@ inlineToMan _ EmDash = return $ text "\\[em]"
inlineToMan _ EnDash = return $ text "\\[en]"
inlineToMan _ Apostrophe = return $ char '\''
inlineToMan _ Ellipses = return $ text "\\&..."
-inlineToMan _ (Code str) =
+inlineToMan _ (Code _ str) =
return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]"
inlineToMan _ (Str str) = return $ text $ escapeString str
inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str
inlineToMan opts (Math DisplayMath str) = do
contents <- inlineListToMan opts $ readTeXMath str
- return $ text ".RS" $$ contents $$ text ".RE"
-inlineToMan _ (TeX _) = return empty
-inlineToMan _ (HtmlInline _) = return empty
-inlineToMan _ (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n"
-inlineToMan _ Space = return $ char ' '
+ return $ cr <> text ".RS" $$ contents $$ text ".RE"
+inlineToMan _ (RawInline "man" str) = return $ text str
+inlineToMan _ (RawInline _ _) = return empty
+inlineToMan _ (LineBreak) = return $
+ cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr
+inlineToMan _ Space = return space
inlineToMan opts (Link txt (src, _)) = do
linktext <- inlineListToMan opts txt
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
- return $ if txt == [Code srcSuffix]
- then char '<' <> text srcSuffix <> char '>'
- else linktext <> text " (" <> text src <> char ')'
+ return $ case txt of
+ [Code _ s]
+ | s == srcSuffix -> char '<' <> text srcSuffix <> char '>'
+ _ -> linktext <> text " (" <> text src <> char ')'
inlineToMan opts (Image alternate (source, tit)) = do
let txt = if (null alternate) || (alternate == [Str ""]) ||
(alternate == [Str source]) -- to prevent autolinks
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index 1b612006b..5e12c4aca 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -31,13 +32,13 @@ Markdown: <http://daringfireball.net/projects/markdown/>
-}
module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Shared
-import Text.Pandoc.Parsing
-import Text.Pandoc.Blocks
+import Text.Pandoc.Parsing hiding (blankline)
import Text.ParserCombinators.Parsec ( runParser, GenParser )
import Data.List ( group, isPrefixOf, find, intersperse, transpose )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Text.Pandoc.Pretty
import Control.Monad.State
type Notes = [[Block]]
@@ -57,28 +58,28 @@ writeMarkdown opts document =
-- pictures, or inline formatting).
writePlain :: WriterOptions -> Pandoc -> String
writePlain opts document =
- evalState (pandocToMarkdown opts document') WriterState{ stNotes = []
- , stRefs = []
- , stPlain = True }
+ evalState (pandocToMarkdown opts{writerStrictMarkdown = True}
+ document') WriterState{ stNotes = []
+ , stRefs = []
+ , stPlain = True }
where document' = plainify document
plainify :: Pandoc -> Pandoc
-plainify = processWith go
- where go :: [Inline] -> [Inline]
- go (Emph xs : ys) = go xs ++ go ys
- go (Strong xs : ys) = go xs ++ go ys
- go (Strikeout xs : ys) = go xs ++ go ys
- go (Superscript xs : ys) = go xs ++ go ys
- go (Subscript xs : ys) = go xs ++ go ys
- go (SmallCaps xs : ys) = go xs ++ go ys
- go (Code s : ys) = Str s : go ys
- go (Math _ s : ys) = Str s : go ys
- go (TeX _ : ys) = Str "" : go ys
- go (HtmlInline _ : ys) = Str "" : go ys
- go (Link xs _ : ys) = go xs ++ go ys
- go (Image _ _ : ys) = go ys
- go (x : ys) = x : go ys
- go [] = []
+plainify = bottomUp go
+ where go :: Inline -> Inline
+ go (Emph xs) = SmallCaps xs
+ go (Strong xs) = SmallCaps xs
+ go (Strikeout xs) = SmallCaps xs
+ go (Superscript xs) = SmallCaps xs
+ go (Subscript xs) = SmallCaps xs
+ go (SmallCaps xs) = SmallCaps xs
+ go (Code _ s) = Str s
+ go (Math _ s) = Str s
+ go (RawInline _ _) = Str ""
+ go (Link xs _) = SmallCaps xs
+ go (Image xs _) = SmallCaps $ [Str "["] ++ xs ++ [Str "]"]
+ go (Cite _ cits) = SmallCaps cits
+ go x = x
-- | Return markdown representation of document.
pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState String
@@ -96,15 +97,20 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
notes' <- notesToMarkdown opts (reverse $ stNotes st)
st' <- get -- note that the notes may contain refs
refs' <- refsToMarkdown opts (reverse $ stRefs st')
- let main = render $ foldl ($+$) empty $ [body, notes', refs']
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ let main = render colwidth $ body <>
+ (if isEmpty notes' then empty else blankline <> notes') <>
+ (if isEmpty refs' then empty else blankline <> refs')
let context = writerVariables opts ++
- [ ("toc", render toc)
+ [ ("toc", render colwidth toc)
, ("body", main)
- , ("title", render title')
- , ("date", render date')
+ , ("title", render colwidth title')
+ , ("date", render colwidth date')
] ++
[ ("titleblock", "yes") | titleblock ] ++
- [ ("author", render a) | a <- authors' ]
+ [ ("author", render colwidth a) | a <- authors' ]
if writerStandalone opts
then return $ renderTemplate context $ writerTemplate opts
else return main
@@ -112,29 +118,36 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do
-- | Return markdown representation of reference key table.
refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc
refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat
-
+
-- | Return markdown representation of a reference key.
keyToMarkdown :: WriterOptions
-> ([Inline], (String, String))
-> State WriterState Doc
keyToMarkdown opts (label, (src, tit)) = do
label' <- inlineListToMarkdown opts label
- let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\""
- return $ text " " <> char '[' <> label' <> char ']' <> text ": " <>
- text src <> tit'
+ let tit' = if null tit
+ then empty
+ else space <> "\"" <> text tit <> "\""
+ return $ nest 2 $ hang 2
+ ("[" <> label' <> "]:" <> space) (text src <> tit')
-- | Return markdown representation of notes.
notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc
notesToMarkdown opts notes =
- mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
- return . vcat
+ mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>=
+ return . vsep
-- | Return markdown representation of a note.
noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc
noteToMarkdown opts num blocks = do
contents <- blockListToMarkdown opts blocks
- let marker = text "[^" <> text (show num) <> text "]:"
- return $ hang' marker (writerTabStop opts) contents
+ let num' = text $ show num
+ let marker = text "[^" <> num' <> text "]:"
+ let markerSize = 4 + offset num'
+ let spacer = case writerTabStop opts - markerSize of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ return $ hang (writerTabStop opts) (marker <> spacer) contents
-- | Escape special characters for Markdown.
escapeString :: String -> String
@@ -158,6 +171,22 @@ elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++
then []
else [BulletList $ map elementToListItem subsecs]
+attrsToMarkdown :: Attr -> Doc
+attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys]
+ where attribId = case attribs of
+ ([],_,_) -> empty
+ (i,_,_) -> "#" <> text i
+ attribClasses = case attribs of
+ (_,[],_) -> empty
+ (_,cs,_) -> hsep $
+ map (text . ('.':))
+ cs
+ attribKeys = case attribs of
+ (_,_,[]) -> empty
+ (_,_,ks) -> hsep $
+ map (\(k,v) -> text k
+ <> "=\"" <> text v <> "\"") ks
+
-- | Ordered list start parser for use in Para below.
olMarker :: GenParser Char ParserState Char
olMarker = do (start, style', delim) <- anyOrderedListMarker
@@ -169,134 +198,139 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker
-- | True if string begins with an ordered list marker
beginsWithOrderedListMarker :: String -> Bool
-beginsWithOrderedListMarker str =
- case runParser olMarker defaultParserState "para start" str of
- Left _ -> False
+beginsWithOrderedListMarker str =
+ case runParser olMarker defaultParserState "para start" (take 10 str) of
+ Left _ -> False
Right _ -> True
-wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
-wrappedMarkdown opts inlines = do
- let chunks = splitBy LineBreak inlines
- let chunks' = if null chunks
- then []
- else (map (++ [Str " "]) $ init chunks) ++ [last chunks]
- lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks'
- return $ vcat lns
-
-- | Convert Pandoc block element to markdown.
blockToMarkdown :: WriterOptions -- ^ Options
-> Block -- ^ Block element
-> State WriterState Doc
blockToMarkdown _ Null = return empty
-blockToMarkdown opts (Plain inlines) =
- wrappedMarkdown opts inlines
+blockToMarkdown opts (Plain inlines) = do
+ contents <- inlineListToMarkdown opts inlines
+ return $ contents <> cr
blockToMarkdown opts (Para inlines) = do
- contents <- wrappedMarkdown opts inlines
+ contents <- inlineListToMarkdown opts inlines
-- escape if para starts with ordered list marker
- let esc = if (not (writerStrictMarkdown opts)) &&
- beginsWithOrderedListMarker (render contents)
- then char '\\'
- else empty
- return $ esc <> contents <> text "\n"
-blockToMarkdown _ (RawHtml str) = do
st <- get
- if stPlain st
- then return empty
- else return $ text str
-blockToMarkdown _ HorizontalRule = return $ text "\n* * * * *\n"
+ let esc = if (not (writerStrictMarkdown opts)) &&
+ not (stPlain st) &&
+ beginsWithOrderedListMarker (render Nothing contents)
+ then text "\\"
+ else empty
+ return $ esc <> contents <> blankline
+blockToMarkdown _ (RawBlock f str)
+ | f == "html" || f == "latex" || f == "tex" || f == "markdown" = do
+ st <- get
+ if stPlain st
+ then return empty
+ else return $ text str <> text "\n"
+blockToMarkdown _ (RawBlock _ _) = return empty
+blockToMarkdown _ HorizontalRule =
+ return $ blankline <> text "* * * * *" <> blankline
blockToMarkdown opts (Header level inlines) = do
contents <- inlineListToMarkdown opts inlines
st <- get
-- use setext style headers if in literate haskell mode.
-- ghc interprets '#' characters in column 1 as line number specifiers.
if writerLiterateHaskell opts || stPlain st
- then let len = length $ render contents
- in return $ contents <> text "\n" <>
- case level of
- 1 -> text $ replicate len '=' ++ "\n"
- 2 -> text $ replicate len '-' ++ "\n"
- _ -> empty
- else return $ text ((replicate level '#') ++ " ") <> contents <> text "\n"
-blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes &&
- "literate" `elem` classes &&
- writerLiterateHaskell opts =
- return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n"
-blockToMarkdown opts (CodeBlock _ str) = return $
- (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n"
+ then let len = offset contents
+ in return $ contents <> cr <>
+ (case level of
+ 1 -> text $ replicate len '='
+ 2 -> text $ replicate len '-'
+ _ -> empty) <> blankline
+ else return $
+ text ((replicate level '#') ++ " ") <> contents <> blankline
+blockToMarkdown opts (CodeBlock (_,classes,_) str)
+ | "haskell" `elem` classes && "literate" `elem` classes &&
+ writerLiterateHaskell opts =
+ return $ prefixed "> " (text str) <> blankline
+blockToMarkdown opts (CodeBlock attribs str) = return $
+ if writerStrictMarkdown opts || attribs == nullAttr
+ then nest (writerTabStop opts) (text str) <> blankline
+ else -- use delimited code block
+ flush (tildes <> space <> attrs <> cr <> text str <>
+ cr <> tildes) <> blankline
+ where tildes = text "~~~~"
+ attrs = attrsToMarkdown attribs
blockToMarkdown opts (BlockQuote blocks) = do
st <- get
-- if we're writing literate haskell, put a space before the bird tracks
-- so they won't be interpreted as lhs...
let leader = if writerLiterateHaskell opts
- then text . (" > " ++)
+ then " > "
else if stPlain st
- then text . (" " ++)
- else text . ("> " ++)
+ then " "
+ else "> "
contents <- blockListToMarkdown opts blocks
- return $ (vcat $ map leader $ lines $ render contents) <>
- text "\n"
+ return $ (prefixed leader contents) <> blankline
blockToMarkdown opts (Table caption aligns widths headers rows) = do
caption' <- inlineListToMarkdown opts caption
let caption'' = if null caption
then empty
- else text "" $+$ (text ": " <> caption')
+ else blankline <> ": " <> caption' <> blankline
headers' <- mapM (blockListToMarkdown opts) headers
let alignHeader alignment = case alignment of
- AlignLeft -> leftAlignBlock
- AlignCenter -> centerAlignBlock
- AlignRight -> rightAlignBlock
- AlignDefault -> leftAlignBlock
+ AlignLeft -> lblock
+ AlignCenter -> cblock
+ AlignRight -> rblock
+ AlignDefault -> lblock
rawRows <- mapM (mapM (blockListToMarkdown opts)) rows
let isSimple = all (==0) widths
- let numChars = maximum . map (length . render)
+ let numChars = maximum . map offset
let widthsInChars =
if isSimple
then map ((+2) . numChars) $ transpose (headers' : rawRows)
- else map (floor . (78 *)) widths
- let makeRow = hsepBlocks . (zipWith alignHeader aligns) .
- (zipWith docToBlock widthsInChars)
+ else map (floor . (fromIntegral (writerColumns opts) *)) widths
+ let makeRow = hcat . intersperse (lblock 1 (text " ")) .
+ (zipWith3 alignHeader aligns widthsInChars)
let rows' = map makeRow rawRows
let head' = makeRow headers'
- let maxRowHeight = maximum $ map heightOfBlock (head':rows')
- let underline = hsep $
- map (\width -> text $ replicate width '-') widthsInChars
+ let maxRowHeight = maximum $ map height (head':rows')
+ let underline = cat $ intersperse (text " ") $
+ map (\width -> text (replicate width '-')) widthsInChars
let border = if maxRowHeight > 1
- then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-'
+ then text (replicate (sum widthsInChars +
+ length widthsInChars - 1) '-')
else if all null headers
then underline
else empty
let head'' = if all null headers
then empty
- else border $+$ blockToDoc head'
- let spacer = if maxRowHeight > 1
- then text ""
- else empty
- let body = vcat $ intersperse spacer $ map blockToDoc rows'
+ else border <> cr <> head'
+ let body = if maxRowHeight > 1
+ then vsep rows'
+ else vcat rows'
let bottom = if all null headers
then underline
else border
- return $ (nest 2 $ head'' $+$ underline $+$ body $+$
- bottom $+$ caption'') <> text "\n"
+ return $ nest 2 $ head'' $$ underline $$ body $$
+ bottom $$ blankline $$ caption'' $$ blankline
blockToMarkdown opts (BulletList items) = do
contents <- mapM (bulletListItemToMarkdown opts) items
- return $ (vcat contents) <> text "\n"
+ return $ cat contents <> blankline
blockToMarkdown opts (OrderedList attribs items) = do
let markers = orderedListMarkers attribs
let markers' = map (\m -> if length m < 3
then m ++ replicate (3 - length m) ' '
- else m) markers
+ else m) markers
contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $
- zip markers' items
- return $ (vcat contents) <> text "\n"
+ zip markers' items
+ return $ cat contents <> blankline
blockToMarkdown opts (DefinitionList items) = do
contents <- mapM (definitionListItemToMarkdown opts) items
- return $ (vcat contents) <> text "\n"
+ return $ cat contents <> blankline
-- | Convert bullet list item (list of blocks) to markdown.
bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc
bulletListItemToMarkdown opts items = do
contents <- blockListToMarkdown opts items
- return $ hang' (text "- ") (writerTabStop opts) contents
+ let sps = replicate (writerTabStop opts - 2) ' '
+ let start = text ('-' : ' ' : sps)
+ return $ hang (writerTabStop opts) start $ contents <> cr
-- | Convert ordered list item (a list of blocks) to markdown.
orderedListItemToMarkdown :: WriterOptions -- ^ options
@@ -305,8 +339,11 @@ orderedListItemToMarkdown :: WriterOptions -- ^ options
-> State WriterState Doc
orderedListItemToMarkdown opts marker items = do
contents <- blockListToMarkdown opts items
- return $ hsep [nest (min (3 - length marker) 0) (text marker),
- nest (writerTabStop opts) contents]
+ let sps = case length marker - writerTabStop opts of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ let start = text marker <> sps
+ return $ hang (writerTabStop opts) start $ contents <> cr
-- | Convert definition list item (label, list of blocks) to markdown.
definitionListItemToMarkdown :: WriterOptions
@@ -316,17 +353,20 @@ definitionListItemToMarkdown opts (label, defs) = do
labelText <- inlineListToMarkdown opts label
let tabStop = writerTabStop opts
st <- get
- let leader = if stPlain st then empty else text " ~"
- contents <- liftM vcat $
- mapM (liftM ((leader $$) . nest tabStop . vcat) . mapM (blockToMarkdown opts)) defs
- return $ labelText $+$ contents
+ let leader = if stPlain st then " " else " ~"
+ let sps = case writerTabStop opts - 3 of
+ n | n > 0 -> text $ replicate n ' '
+ _ -> text " "
+ defs' <- mapM (mapM (blockToMarkdown opts)) defs
+ let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs'
+ return $ labelText <> cr <> contents <> cr
-- | Convert list of Pandoc block elements to markdown.
blockListToMarkdown :: WriterOptions -- ^ Options
-> [Block] -- ^ List of block elements
-> State WriterState Doc
blockListToMarkdown opts blocks =
- mapM (blockToMarkdown opts) blocks >>= return . vcat
+ mapM (blockToMarkdown opts) blocks >>= return . cat
-- | Get reference for target; if none exists, create unique one and return.
-- Prefer label if possible; otherwise, generate a unique key.
@@ -349,86 +389,132 @@ getReference label (src, tit) = do
-- | Convert list of Pandoc inline elements to markdown.
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
inlineListToMarkdown opts lst =
- mapM (inlineToMarkdown opts) lst >>= return . hcat
+ mapM (inlineToMarkdown opts) lst >>= return . cat
+
+escapeSpaces :: Inline -> Inline
+escapeSpaces (Str s) = Str $ substitute " " "\\ " s
+escapeSpaces Space = Str "\\ "
+escapeSpaces x = x
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
inlineToMarkdown opts (Emph lst) = do
contents <- inlineListToMarkdown opts lst
- return $ char '*' <> contents <> char '*'
+ return $ "*" <> contents <> "*"
inlineToMarkdown opts (Strong lst) = do
contents <- inlineListToMarkdown opts lst
- return $ text "**" <> contents <> text "**"
+ return $ "**" <> contents <> "**"
inlineToMarkdown opts (Strikeout lst) = do
contents <- inlineListToMarkdown opts lst
- return $ text "~~" <> contents <> text "~~"
+ return $ "~~" <> contents <> "~~"
inlineToMarkdown opts (Superscript lst) = do
- contents <- inlineListToMarkdown opts lst
- let contents' = text $ substitute " " "\\ " $ render contents
- return $ char '^' <> contents' <> char '^'
+ let lst' = bottomUp escapeSpaces lst
+ contents <- inlineListToMarkdown opts lst'
+ return $ "^" <> contents <> "^"
inlineToMarkdown opts (Subscript lst) = do
- contents <- inlineListToMarkdown opts lst
- let contents' = text $ substitute " " "\\ " $ render contents
- return $ char '~' <> contents' <> char '~'
+ let lst' = bottomUp escapeSpaces lst
+ contents <- inlineListToMarkdown opts lst'
+ return $ "~" <> contents <> "~"
inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst
inlineToMarkdown opts (Quoted SingleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ char '‘' <> contents <> char '’'
+ return $ "‘" <> contents <> "’"
inlineToMarkdown opts (Quoted DoubleQuote lst) = do
contents <- inlineListToMarkdown opts lst
- return $ char '“' <> contents <> char '”'
-inlineToMarkdown _ EmDash = return $ char '\8212'
-inlineToMarkdown _ EnDash = return $ char '\8211'
-inlineToMarkdown _ Apostrophe = return $ char '\8217'
-inlineToMarkdown _ Ellipses = return $ char '\8230'
-inlineToMarkdown _ (Code str) =
+ return $ "“" <> contents <> "”"
+inlineToMarkdown _ EmDash = return "\8212"
+inlineToMarkdown _ EnDash = return "\8211"
+inlineToMarkdown _ Apostrophe = return "\8217"
+inlineToMarkdown _ Ellipses = return "\8230"
+inlineToMarkdown opts (Code attr str) =
let tickGroups = filter (\s -> '`' `elem` s) $ group str
longest = if null tickGroups
then 0
else maximum $ map length tickGroups
marker = replicate (longest + 1) '`'
- spacer = if (longest == 0) then "" else " " in
- return $ text (marker ++ spacer ++ str ++ spacer ++ marker)
+ spacer = if (longest == 0) then "" else " "
+ attrs = if writerStrictMarkdown opts || attr == nullAttr
+ then empty
+ else attrsToMarkdown attr
+ in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs
inlineToMarkdown _ (Str str) = do
st <- get
if stPlain st
then return $ text str
else return $ text $ escapeString str
-inlineToMarkdown _ (Math InlineMath str) = return $ char '$' <> text str <> char '$'
-inlineToMarkdown _ (Math DisplayMath str) = return $ text "$$" <> text str <> text "$$"
-inlineToMarkdown _ (TeX str) = return $ text str
-inlineToMarkdown _ (HtmlInline str) = return $ text str
-inlineToMarkdown _ (LineBreak) = return $ text " \n"
-inlineToMarkdown _ Space = return $ char ' '
-inlineToMarkdown opts (Cite _ cits) = inlineListToMarkdown opts cits
+inlineToMarkdown _ (Math InlineMath str) =
+ return $ "$" <> text str <> "$"
+inlineToMarkdown _ (Math DisplayMath str) =
+ return $ "$$" <> text str <> "$$"
+inlineToMarkdown _ (RawInline f str)
+ | f == "html" || f == "latex" || f == "tex" || f == "markdown" =
+ return $ text str
+inlineToMarkdown _ (RawInline _ _) = return empty
+inlineToMarkdown opts (LineBreak) = return $
+ if writerStrictMarkdown opts
+ then " " <> cr
+ else "\\" <> cr
+inlineToMarkdown _ Space = return space
+inlineToMarkdown opts (Cite (c:cs) lst)
+ | writerCiteMethod opts == Citeproc = inlineListToMarkdown opts lst
+ | citationMode c == AuthorInText = do
+ suffs <- inlineListToMarkdown opts $ citationSuffix c
+ rest <- mapM convertOne cs
+ let inbr = suffs <+> joincits rest
+ br = if isEmpty inbr then empty else char '[' <> inbr <> char ']'
+ return $ text ("@" ++ citationId c) <+> br
+ | otherwise = do
+ cits <- mapM convertOne (c:cs)
+ return $ text "[" <> joincits cits <> text "]"
+ where
+ joincits = hcat . intersperse (text "; ") . filter (not . isEmpty)
+ convertOne Citation { citationId = k
+ , citationPrefix = pinlines
+ , citationSuffix = sinlines
+ , citationMode = m }
+ = do
+ pdoc <- inlineListToMarkdown opts pinlines
+ sdoc <- inlineListToMarkdown opts sinlines
+ let k' = text (modekey m ++ "@" ++ k)
+ r = case sinlines of
+ Str (y:_):_ | y `elem` ",;]@" -> k' <> sdoc
+ _ -> k' <+> sdoc
+ return $ pdoc <+> r
+ modekey SuppressAuthor = "-"
+ modekey _ = ""
+inlineToMarkdown _ (Cite _ _) = return $ text ""
inlineToMarkdown opts (Link txt (src', tit)) = do
linktext <- inlineListToMarkdown opts txt
- let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\""
+ let linktitle = if null tit
+ then empty
+ else text $ " \"" ++ tit ++ "\""
let src = unescapeURI src'
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
let useRefLinks = writerReferenceLinks opts
- let useAuto = null tit && txt == [Code srcSuffix]
+ let useAuto = case (tit,txt) of
+ ("", [Code _ s]) | s == srcSuffix -> True
+ _ -> False
ref <- if useRefLinks then getReference txt (src, tit) else return []
reftext <- inlineListToMarkdown opts ref
return $ if useAuto
- then char '<' <> text srcSuffix <> char '>'
+ then "<" <> text srcSuffix <> ">"
else if useRefLinks
- then let first = char '[' <> linktext <> char ']'
+ then let first = "[" <> linktext <> "]"
second = if txt == ref
- then text "[]"
- else char '[' <> reftext <> char ']'
+ then "[]"
+ else "[" <> reftext <> "]"
in first <> second
- else char '[' <> linktext <> char ']' <>
- char '(' <> text src <> linktitle <> char ')'
+ else "[" <> linktext <> "](" <>
+ text src <> linktitle <> ")"
inlineToMarkdown opts (Image alternate (source, tit)) = do
let txt = if (null alternate) || (alternate == [Str ""]) ||
(alternate == [Str source]) -- to prevent autolinks
then [Str "image"]
else alternate
- linkPart <- inlineToMarkdown opts (Link txt (unescapeURI source, tit))
- return $ char '!' <> linkPart
+ linkPart <- inlineToMarkdown opts (Link txt (source, tit))
+ return $ "!" <> linkPart
inlineToMarkdown _ (Note contents) = do
modify (\st -> st{ stNotes = contents : stNotes st })
st <- get
let ref = show $ (length $ stNotes st)
- return $ text "[^" <> text ref <> char ']'
+ return $ "[^" <> text ref <> "]"
diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs
index e8cb33caf..a7c7fc482 100644
--- a/src/Text/Pandoc/Writers/MediaWiki.hs
+++ b/src/Text/Pandoc/Writers/MediaWiki.hs
@@ -96,7 +96,9 @@ blockToMediaWiki opts (Para inlines) = do
then "<p>" ++ contents ++ "</p>"
else contents ++ if null listLevel then "\n" else ""
-blockToMediaWiki _ (RawHtml str) = return str
+blockToMediaWiki _ (RawBlock "mediawiki" str) = return str
+blockToMediaWiki _ (RawBlock "html" str) = return str
+blockToMediaWiki _ (RawBlock _ _) = return ""
blockToMediaWiki _ HorizontalRule = return "\n-----\n"
@@ -360,7 +362,7 @@ inlineToMediaWiki _ Apostrophe = return "&rsquo;"
inlineToMediaWiki _ Ellipses = return "&hellip;"
-inlineToMediaWiki _ (Code str) =
+inlineToMediaWiki _ (Code _ str) =
return $ "<tt>" ++ (escapeString str) ++ "</tt>"
inlineToMediaWiki _ (Str str) = return $ escapeString str
@@ -368,9 +370,9 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str
inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>"
-- note: str should NOT be escaped
-inlineToMediaWiki _ (TeX _) = return ""
-
-inlineToMediaWiki _ (HtmlInline str) = return str
+inlineToMediaWiki _ (RawInline "mediawiki" str) = return str
+inlineToMediaWiki _ (RawInline "html" str) = return str
+inlineToMediaWiki _ (RawInline _ _) = return ""
inlineToMediaWiki _ (LineBreak) = return "<br />\n"
@@ -378,12 +380,12 @@ inlineToMediaWiki _ Space = return " "
inlineToMediaWiki opts (Link txt (src, _)) = do
label <- inlineListToMediaWiki opts txt
- if txt == [Code src] -- autolink
- then return src
- else if isURI src
- then return $ "[" ++ src ++ " " ++ label ++ "]"
- else return $ "[[" ++ src' ++ "|" ++ label ++ "]]"
- where src' = case src of
+ case txt of
+ [Code _ s] | s == src -> return src
+ _ -> if isURI src
+ then return $ "[" ++ src ++ " " ++ label ++ "]"
+ else return $ "[[" ++ src' ++ "|" ++ label ++ "]]"
+ where src' = case src of
'/':xs -> xs -- with leading / it's a
_ -> src -- link to a help page
inlineToMediaWiki opts (Image alt (source, tit)) = do
diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs
index 3b5ea7481..d2b56cd17 100644
--- a/src/Text/Pandoc/Writers/Native.hs
+++ b/src/Text/Pandoc/Writers/Native.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -25,62 +26,53 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
-Utility functions and definitions used by the various Pandoc modules.
+Conversion of a 'Pandoc' document to a string representation.
+
+Note: If @writerStandalone@ is @False@, only the document body
+is represented; otherwise, the full 'Pandoc' document, including the
+metadata.
-}
module Text.Pandoc.Writers.Native ( writeNative )
where
-import Text.Pandoc.Shared ( WriterOptions )
-import Data.List ( intercalate )
+import Text.Pandoc.Shared ( WriterOptions(..) )
+import Data.List ( intersperse )
import Text.Pandoc.Definition
+import Text.Pandoc.Pretty
--- | Indent string as a block.
-indentBy :: Int -- ^ Number of spaces to indent the block
- -> Int -- ^ Number of spaces (rel to block) to indent first line
- -> String -- ^ Contents of block to indent
- -> String
-indentBy _ _ [] = ""
-indentBy num first str =
- let (firstLine:restLines) = lines str
- firstLineIndent = num + first
- in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++
- (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines)
-
--- | Prettyprint list of Pandoc blocks elements.
-prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks
- -> [Block] -- ^ List of blocks
- -> String
-prettyBlockList indent [] = indentBy indent 0 "[]"
-prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++
- (intercalate "\n, " (map prettyBlock blocks)) ++ " ]"
+prettyList :: [Doc] -> Doc
+prettyList ds =
+ "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]"
-- | Prettyprint Pandoc block element.
-prettyBlock :: Block -> String
-prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++
- (prettyBlockList 2 blocks)
+prettyBlock :: Block -> Doc
+prettyBlock (BlockQuote blocks) =
+ "BlockQuote" $$ prettyList (map prettyBlock blocks)
prettyBlock (OrderedList attribs blockLists) =
- "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++
- (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks)
- blockLists)) ++ " ]"
-prettyBlock (BulletList blockLists) = "BulletList\n" ++
- indentBy 2 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]"
-prettyBlock (DefinitionList items) = "DefinitionList\n" ++
- indentBy 2 0 ("[ " ++ (intercalate "\n, "
- (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++
- indentBy 3 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++
- ")") items))) ++ " ]"
+ "OrderedList" <> space <> text (show attribs) $$
+ (prettyList $ map (prettyList . map prettyBlock) blockLists)
+prettyBlock (BulletList blockLists) =
+ "BulletList" $$
+ (prettyList $ map (prettyList . map prettyBlock) blockLists)
+prettyBlock (DefinitionList items) = "DefinitionList" $$
+ (prettyList $ map deflistitem items)
+ where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <>
+ nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")"
prettyBlock (Table caption aligns widths header rows) =
- "Table " ++ show caption ++ " " ++ show aligns ++ " " ++
- show widths ++ "\n" ++ prettyRow header ++ " [\n" ++
- (intercalate ",\n" (map prettyRow rows)) ++ " ]"
- where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", "
- (map (\blocks -> prettyBlockList 2 blocks)
- cols))) ++ " ]"
-prettyBlock block = show block
+ "Table " <> text (show caption) <> " " <> text (show aligns) <> " " <>
+ text (show widths) $$
+ prettyRow header $$
+ prettyList (map prettyRow rows)
+ where prettyRow cols = prettyList (map (prettyList . map prettyBlock) cols)
+prettyBlock block = text $ show block
-- | Prettyprint Pandoc document.
writeNative :: WriterOptions -> Pandoc -> String
-writeNative _ (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++
- ")\n" ++ (prettyBlockList 0 blocks) ++ "\n"
-
+writeNative opts (Pandoc meta blocks) =
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ withHead = if writerStandalone opts
+ then \bs -> text ("Pandoc " ++ "(" ++ show meta ++ ")") $$
+ bs $$ cr
+ else id
+ in render colwidth $ withHead $ prettyList $ map prettyBlock blocks
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 5aa0fd310..cf1be8755 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -37,6 +37,7 @@ import System.Time
import Paths_pandoc ( getDataFileName )
import Text.Pandoc.Shared ( WriterOptions(..) )
import Text.Pandoc.Definition
+import Text.Pandoc.Generic
import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument )
import System.Directory
import Control.Monad (liftM)
@@ -63,8 +64,8 @@ writeODT mbRefOdt opts doc = do
-- handle pictures
picEntriesRef <- newIORef ([] :: [Entry])
let sourceDir = writerSourceDirectory opts
- doc' <- processWithM (transformPic sourceDir picEntriesRef) doc
- let newContents = writeOpenDocument opts doc'
+ doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc
+ let newContents = writeOpenDocument opts{writerWrapText = False} doc'
(TOD epochtime _) <- getClockTime
let contentEntry = toEntry "content.xml" epochtime $ fromString newContents
picEntries <- readIORef picEntriesRef
diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs
index 4e3979c07..b9444aac7 100644
--- a/src/Text/Pandoc/Writers/OpenDocument.hs
+++ b/src/Text/Pandoc/Writers/OpenDocument.hs
@@ -35,7 +35,7 @@ import Text.Pandoc.Shared
import Text.Pandoc.XML
import Text.Pandoc.Templates (renderTemplate)
import Text.Pandoc.Readers.TeXMath
-import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Text.Pandoc.Pretty
import Text.Printf ( printf )
import Control.Applicative ( (<$>) )
import Control.Arrow ( (***), (>>>) )
@@ -112,7 +112,9 @@ setInDefinitionList :: Bool -> State WriterState ()
setInDefinitionList b = modify $ \s -> s { stInDefinition = b }
inParagraphTags :: Doc -> Doc
-inParagraphTags = inTags False "text:p" [("text:style-name", "Text_20_body")]
+inParagraphTags d | isEmpty d = empty
+inParagraphTags d =
+ inTags False "text:p" [("text:style-name", "Text_20_body")] d
inParagraphTagsWithStyle :: String -> Doc -> Doc
inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)]
@@ -167,7 +169,11 @@ writeOpenDocument opts (Pandoc (Meta title authors date) blocks) =
date'' <- inlinesToOpenDocument opts date
doc'' <- blocksToOpenDocument opts blocks
return (doc'', title'', authors'', date'')
- body' = render doc
+ colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ render' = render colwidth
+ body' = render' doc
styles = stTableStyles s ++ stParaStyles s ++ stTextStyles s
listStyle (n,l) = inTags True "text:list-style"
[("style:name", "L" ++ show n)] (vcat l)
@@ -176,10 +182,10 @@ writeOpenDocument opts (Pandoc (Meta title authors date) blocks) =
reverse $ styles ++ listStyles
context = writerVariables opts ++
[ ("body", body')
- , ("automatic-styles", render automaticStyles)
- , ("title", render title')
- , ("date", render date') ] ++
- [ ("author", render a) | a <- authors' ]
+ , ("automatic-styles", render' automaticStyles)
+ , ("title", render' title')
+ , ("date", render' date') ] ++
+ [ ("author", render' a) | a <- authors' ]
in if writerStandalone opts
then renderTemplate context $ writerTemplate opts
else body'
@@ -273,7 +279,7 @@ blockToOpenDocument o bs
| Header i b <- bs = inHeaderTags i <$> inlinesToOpenDocument o b
| BlockQuote b <- bs = mkBlockQuote b
| CodeBlock _ s <- bs = preformatted s
- | RawHtml _ <- bs = return empty
+ | RawBlock _ _ <- bs = return empty
| DefinitionList b <- bs = defList b
| BulletList b <- bs = bulletListToOpenDocument o b
| OrderedList a b <- bs = orderedList a b
@@ -286,7 +292,7 @@ blockToOpenDocument o bs
r <- vcat <$> mapM (deflistItemToOpenDocument o) b
setInDefinitionList False
return r
- preformatted s = vcat <$> mapM (inPreformattedTags . escapeStringForXML) (lines s)
+ preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (lines s)
mkBlockQuote b = do increaseIndent
i <- paraStyle "Quotations" []
inBlockQuote o i (map plainToPara b)
@@ -346,7 +352,7 @@ inlineToOpenDocument o ils
| EmDash <- ils = inTextStyle $ text "&#8212;"
| EnDash <- ils = inTextStyle $ text "&#8211;"
| Apostrophe <- ils = inTextStyle $ text "&#8217;"
- | Space <- ils = inTextStyle $ char ' '
+ | Space <- ils = inTextStyle space
| LineBreak <- ils = return $ selfClosingTag "text:line-break" []
| Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s
| Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l
@@ -356,11 +362,12 @@ inlineToOpenDocument o ils
| Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l
| SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l
| Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l
- | Code s <- ils = preformatted s
+ | Code _ s <- ils = preformatted s
| Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s)
| Cite _ l <- ils = inlinesToOpenDocument o l
- | TeX s <- ils = preformatted s
- | HtmlInline s <- ils = preformatted s
+ | RawInline "opendocument" s <- ils = preformatted s
+ | RawInline "html" s <- ils = preformatted s -- for backwards compat.
+ | RawInline _ _ <- ils = return empty
| Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l
| Image _ (s,_) <- ils = return $ mkImg s
| Note l <- ils = mkNote l
diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs
new file mode 100644
index 000000000..f7f314428
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Org.hs
@@ -0,0 +1,284 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-
+Copyright (C) 2006-2010 Puneeth Chaganti <punchagan@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Org
+ Copyright : Copyright (C) 2010 Puneeth Chaganti
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Puneeth Chaganti <punchagan@gmail.com>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Emacs Org-Mode.
+
+Org-Mode: <http://orgmode.org>
+-}
+module Text.Pandoc.Writers.Org ( writeOrg) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Pretty
+import Text.Pandoc.Templates (renderTemplate)
+import Data.List ( intersect, intersperse, transpose )
+import Control.Monad.State
+import Control.Applicative ( (<$>) )
+
+data WriterState =
+ WriterState { stNotes :: [[Block]]
+ , stLinks :: Bool
+ , stImages :: Bool
+ , stHasMath :: Bool
+ , stOptions :: WriterOptions
+ }
+
+-- | Convert Pandoc to Org.
+writeOrg :: WriterOptions -> Pandoc -> String
+writeOrg opts document =
+ let st = WriterState { stNotes = [], stLinks = False,
+ stImages = False, stHasMath = False,
+ stOptions = opts }
+ in evalState (pandocToOrg document) st
+
+-- | Return Org representation of document.
+pandocToOrg :: Pandoc -> State WriterState String
+pandocToOrg (Pandoc (Meta tit auth dat) blocks) = do
+ opts <- liftM stOptions get
+ title <- titleToOrg tit
+ authors <- mapM inlineListToOrg auth
+ date <- inlineListToOrg dat
+ body <- blockListToOrg blocks
+ notes <- liftM (reverse . stNotes) get >>= notesToOrg
+ -- note that the notes may contain refs, so we do them first
+ hasMath <- liftM stHasMath get
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ let main = render colwidth $ foldl ($+$) empty $ [body, notes]
+ let context = writerVariables opts ++
+ [ ("body", main)
+ , ("title", render Nothing title)
+ , ("date", render Nothing date) ] ++
+ [ ("math", "yes") | hasMath ] ++
+ [ ("author", render Nothing a) | a <- authors ]
+ if writerStandalone opts
+ then return $ renderTemplate context $ writerTemplate opts
+ else return main
+
+-- | Return Org representation of notes.
+notesToOrg :: [[Block]] -> State WriterState Doc
+notesToOrg notes =
+ mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>=
+ return . vsep
+
+-- | Return Org representation of a note.
+noteToOrg :: Int -> [Block] -> State WriterState Doc
+noteToOrg num note = do
+ contents <- blockListToOrg note
+ let marker = "[" ++ show num ++ "] "
+ return $ hang (length marker) (text marker) contents
+
+-- | Escape special characters for Org.
+escapeString :: String -> String
+escapeString = escapeStringUsing (backslashEscapes "^_")
+
+titleToOrg :: [Inline] -> State WriterState Doc
+titleToOrg [] = return empty
+titleToOrg lst = do
+ contents <- inlineListToOrg lst
+ return $ "#+TITLE: " <> contents
+
+-- | Convert Pandoc block element to Org.
+blockToOrg :: Block -- ^ Block element
+ -> State WriterState Doc
+blockToOrg Null = return empty
+blockToOrg (Plain inlines) = inlineListToOrg inlines
+blockToOrg (Para [Image txt (src,tit)]) = do
+ capt <- inlineListToOrg txt
+ img <- inlineToOrg (Image txt (src,tit))
+ return $ "#+CAPTION: " <> capt <> blankline <> img
+blockToOrg (Para inlines) = do
+ contents <- inlineListToOrg inlines
+ return $ contents <> blankline
+blockToOrg (RawBlock "html" str) =
+ return $ blankline $$ "#+BEGIN_HTML" $$
+ nest 2 (text str) $$ "#+END_HTML" $$ blankline
+blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" =
+ return $ text str
+blockToOrg (RawBlock _ _) = return empty
+blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline
+blockToOrg (Header level inlines) = do
+ contents <- inlineListToOrg inlines
+ let headerStr = text $ if level > 999 then " " else replicate level '*'
+ return $ headerStr <> " " <> contents <> blankline
+blockToOrg (CodeBlock (_,classes,_) str) = do
+ opts <- stOptions <$> get
+ let tabstop = writerTabStop opts
+ let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa",
+ "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex",
+ "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave",
+ "oz", "perl", "plantuml", "python", "R", "ruby", "sass",
+ "scheme", "screen", "sh", "sql", "sqlite"]
+ let (beg, end) = if null at
+ then ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE")
+ else ("#+BEGIN_SRC" ++ head at, "#+END_SRC")
+ return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline
+blockToOrg (BlockQuote blocks) = do
+ contents <- blockListToOrg blocks
+ return $ blankline $$ "#+BEGIN_QUOTE" $$
+ nest 2 contents $$ "#+END_QUOTE" $$ blankline
+blockToOrg (Table caption' _ _ headers rows) = do
+ caption'' <- inlineListToOrg caption'
+ let caption = if null caption'
+ then empty
+ else ("#+CAPTION: " <> caption'')
+ headers' <- mapM blockListToOrg headers
+ rawRows <- mapM (mapM blockListToOrg) rows
+ let numChars = maximum . map offset
+ -- FIXME: width is not being used.
+ let widthsInChars =
+ map ((+2) . numChars) $ transpose (headers' : rawRows)
+ -- FIXME: Org doesn't allow blocks with height more than 1.
+ let hpipeBlocks blocks = hcat [beg, middle, end]
+ where h = maximum (map height blocks)
+ sep' = lblock 3 $ vcat (map text $ replicate h " | ")
+ beg = lblock 2 $ vcat (map text $ replicate h "| ")
+ end = lblock 2 $ vcat (map text $ replicate h " |")
+ middle = hcat $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith lblock widthsInChars
+ let head' = makeRow headers'
+ rows' <- mapM (\row -> do cols <- mapM blockListToOrg row
+ return $ makeRow cols) rows
+ let border ch = char '|' <> char ch <>
+ (hcat $ intersperse (char ch <> char '+' <> char ch) $
+ map (\l -> text $ replicate l ch) widthsInChars) <>
+ char ch <> char '|'
+ let body = vcat rows'
+ let head'' = if all null headers
+ then empty
+ else head' $$ border '-'
+ return $ head'' $$ body $$ caption $$ blankline
+blockToOrg (BulletList items) = do
+ contents <- mapM bulletListItemToOrg items
+ -- ensure that sublists have preceding blank line
+ return $ blankline $+$ vcat contents $$ blankline
+blockToOrg (OrderedList (start, _, delim) items) = do
+ let delim' = case delim of
+ TwoParens -> OneParen
+ x -> x
+ let markers = take (length items) $ orderedListMarkers
+ (start, Decimal, delim')
+ let maxMarkerLength = maximum $ map length markers
+ let markers' = map (\m -> let s = maxMarkerLength - length m
+ in m ++ replicate s ' ') markers
+ contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $
+ zip markers' items
+ -- ensure that sublists have preceding blank line
+ return $ blankline $$ vcat contents $$ blankline
+blockToOrg (DefinitionList items) = do
+ contents <- mapM definitionListItemToOrg items
+ return $ vcat contents $$ blankline
+
+-- | Convert bullet list item (list of blocks) to Org.
+bulletListItemToOrg :: [Block] -> State WriterState Doc
+bulletListItemToOrg items = do
+ contents <- blockListToOrg items
+ return $ hang 3 "- " (contents <> cr)
+
+-- | Convert ordered list item (a list of blocks) to Org.
+orderedListItemToOrg :: String -- ^ marker for list item
+ -> [Block] -- ^ list item (list of blocks)
+ -> State WriterState Doc
+orderedListItemToOrg marker items = do
+ contents <- blockListToOrg items
+ return $ hang (length marker + 1) (text marker <> space) (contents <> cr)
+
+-- | Convert defintion list item (label, list of blocks) to Org.
+definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc
+definitionListItemToOrg (label, defs) = do
+ label' <- inlineListToOrg label
+ contents <- liftM vcat $ mapM blockListToOrg defs
+ return $ hang 3 "- " $ label' <> " :: " <> (contents <> cr)
+
+-- | Convert list of Pandoc block elements to Org.
+blockListToOrg :: [Block] -- ^ List of block elements
+ -> State WriterState Doc
+blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to Org.
+inlineListToOrg :: [Inline] -> State WriterState Doc
+inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat
+
+-- | Convert Pandoc inline element to Org.
+inlineToOrg :: Inline -> State WriterState Doc
+inlineToOrg (Emph lst) = do
+ contents <- inlineListToOrg lst
+ return $ "/" <> contents <> "/"
+inlineToOrg (Strong lst) = do
+ contents <- inlineListToOrg lst
+ return $ "*" <> contents <> "*"
+inlineToOrg (Strikeout lst) = do
+ contents <- inlineListToOrg lst
+ return $ "+" <> contents <> "+"
+inlineToOrg (Superscript lst) = do
+ contents <- inlineListToOrg lst
+ return $ "^{" <> contents <> "}"
+inlineToOrg (Subscript lst) = do
+ contents <- inlineListToOrg lst
+ return $ "_{" <> contents <> "}"
+inlineToOrg (SmallCaps lst) = inlineListToOrg lst
+inlineToOrg (Quoted SingleQuote lst) = do
+ contents <- inlineListToOrg lst
+ return $ "'" <> contents <> "'"
+inlineToOrg (Quoted DoubleQuote lst) = do
+ contents <- inlineListToOrg lst
+ return $ "\"" <> contents <> "\""
+inlineToOrg (Cite _ lst) = inlineListToOrg lst
+inlineToOrg EmDash = return "---"
+inlineToOrg EnDash = return "--"
+inlineToOrg Apostrophe = return "'"
+inlineToOrg Ellipses = return "..."
+inlineToOrg (Code _ str) = return $ "=" <> text str <> "="
+inlineToOrg (Str str) = return $ text $ escapeString str
+inlineToOrg (Math t str) = do
+ modify $ \st -> st{ stHasMath = True }
+ return $ if t == InlineMath
+ then "$" <> text str <> "$"
+ else "$$" <> text str <> "$$"
+inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str
+inlineToOrg (RawInline _ _) = return empty
+inlineToOrg (LineBreak) = return cr -- there's no line break in Org
+inlineToOrg Space = return space
+inlineToOrg (Link txt (src, _)) = do
+ case txt of
+ [Code _ x] | x == src -> -- autolink
+ do modify $ \s -> s{ stLinks = True }
+ return $ "[[" <> text x <> "]]"
+ _ -> do contents <- inlineListToOrg txt
+ modify $ \s -> s{ stLinks = True }
+ return $ "[[" <> text src <> "][" <> contents <> "]]"
+inlineToOrg (Image _ (source', _)) = do
+ let source = unescapeURI source'
+ modify $ \s -> s{ stImages = True }
+ return $ "[[" <> text source <> "]]"
+inlineToOrg (Note contents) = do
+ -- add to notes in state
+ notes <- get >>= (return . stNotes)
+ modify $ \st -> st { stNotes = contents:notes }
+ let ref = show $ (length notes) + 1
+ return $ " [" <> text ref <> "]"
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index e79f97b33..d4adaa929 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
@@ -32,10 +33,9 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html>
module Text.Pandoc.Writers.RST ( writeRST) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
-import Text.Pandoc.Blocks
import Text.Pandoc.Templates (renderTemplate)
-import Data.List ( isPrefixOf, isSuffixOf, intersperse, transpose )
-import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Data.List ( isPrefixOf, intersperse, transpose )
+import Text.Pandoc.Pretty
import Control.Monad.State
import Control.Applicative ( (<$>) )
@@ -70,13 +70,16 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do
refs <- liftM (reverse . stLinks) get >>= refsToRST
pics <- liftM (reverse . stImages) get >>= pictRefsToRST
hasMath <- liftM stHasMath get
- let main = render $ foldl ($+$) empty $ [body, notes, refs, pics]
+ let colwidth = if writerWrapText opts
+ then Just $ writerColumns opts
+ else Nothing
+ let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics]
let context = writerVariables opts ++
[ ("body", main)
- , ("title", render title)
- , ("date", render date) ] ++
+ , ("title", render Nothing title)
+ , ("date", render colwidth date) ] ++
[ ("math", "yes") | hasMath ] ++
- [ ("author", render a) | a <- authors ]
+ [ ("author", render colwidth a) | a <- authors ]
if writerStandalone opts
then return $ renderTemplate context $ writerTemplate opts
else return main
@@ -84,49 +87,40 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do
-- | Return RST representation of reference key table.
refsToRST :: Refs -> State WriterState Doc
refsToRST refs = mapM keyToRST refs >>= return . vcat
-
+
-- | Return RST representation of a reference key.
keyToRST :: ([Inline], (String, String))
-> State WriterState Doc
keyToRST (label, (src, _)) = do
label' <- inlineListToRST label
- let label'' = if ':' `elem` (render label')
+ let label'' = if ':' `elem` (render Nothing label')
then char '`' <> label' <> char '`'
else label'
- return $ text ".. _" <> label'' <> text ": " <> text src
+ return $ ".. _" <> label'' <> ": " <> text src
-- | Return RST representation of notes.
notesToRST :: [[Block]] -> State WriterState Doc
notesToRST notes =
- mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
- return . vcat
+ mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>=
+ return . vsep
-- | Return RST representation of a note.
noteToRST :: Int -> [Block] -> State WriterState Doc
noteToRST num note = do
contents <- blockListToRST note
- let marker = text ".. [" <> text (show num) <> text "]"
+ let marker = ".. [" <> text (show num) <> "]"
return $ marker $$ nest 3 contents
-- | Return RST representation of picture reference table.
pictRefsToRST :: Refs -> State WriterState Doc
pictRefsToRST refs = mapM pictToRST refs >>= return . vcat
-
+
-- | Return RST representation of a picture substitution reference.
pictToRST :: ([Inline], (String, String))
-> State WriterState Doc
pictToRST (label, (src, _)) = do
label' <- inlineListToRST label
- return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <>
- text src
-
--- | Take list of inline elements and return wrapped doc.
-wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc
-wrappedRST opts inlines = do
- lineBreakDoc <- inlineToRST LineBreak
- chunks <- mapM (wrapIfNeeded opts inlineListToRST)
- (splitBy LineBreak inlines)
- return $ vcat $ intersperse lineBreakDoc chunks
+ return $ ".. |" <> label' <> "| image:: " <> text src
-- | Escape special characters for RST.
escapeString :: String -> String
@@ -136,69 +130,66 @@ titleToRST :: [Inline] -> State WriterState Doc
titleToRST [] = return empty
titleToRST lst = do
contents <- inlineListToRST lst
- let titleLength = length $ render contents
+ let titleLength = length $ (render Nothing contents :: String)
let border = text (replicate titleLength '=')
- return $ border $+$ contents $+$ border
+ return $ border $$ contents $$ border
-- | Convert Pandoc block element to RST.
blockToRST :: Block -- ^ Block element
-> State WriterState Doc
blockToRST Null = return empty
-blockToRST (Plain inlines) = do
- opts <- get >>= (return . stOptions)
- wrappedRST opts inlines
+blockToRST (Plain inlines) = inlineListToRST inlines
blockToRST (Para [Image txt (src,tit)]) = do
capt <- inlineListToRST txt
- let fig = text "figure:: " <> text src
- let align = text ":align: center"
- let alt = text ":alt: " <> if null tit then capt else text tit
- return $ (text ".. " <> (fig $$ align $$ alt $$ text "" $$ capt)) $$ text ""
+ let fig = "figure:: " <> text src
+ let align = ":align: center"
+ let alt = ":alt: " <> if null tit then capt else text tit
+ return $ hang 3 ".. " $ fig $$ align $$ alt $+$ capt $$ blankline
blockToRST (Para inlines) = do
- opts <- get >>= (return . stOptions)
- contents <- wrappedRST opts inlines
- return $ contents <> text "\n"
-blockToRST (RawHtml str) =
- let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in
- return $ (text "\n.. raw:: html\n") $$ (nest 3 $ vcat $ map text (lines str'))
-blockToRST HorizontalRule = return $ text "--------------\n"
+ contents <- inlineListToRST inlines
+ return $ contents <> blankline
+blockToRST (RawBlock f str) =
+ return $ blankline <> ".. raw:: " <> text f $+$
+ (nest 3 $ text str) $$ blankline
+blockToRST HorizontalRule =
+ return $ blankline $$ "--------------" $$ blankline
blockToRST (Header level inlines) = do
contents <- inlineListToRST inlines
- let headerLength = length $ render contents
let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1)
- let border = text $ replicate headerLength headerChar
- return $ contents $+$ border <> text "\n"
+ let border = text $ replicate (offset contents) headerChar
+ return $ contents $$ border $$ blankline
blockToRST (CodeBlock (_,classes,_) str) = do
opts <- stOptions <$> get
let tabstop = writerTabStop opts
if "haskell" `elem` classes && "literate" `elem` classes &&
writerLiterateHaskell opts
- then return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n"
- else return $ (text "::\n") $+$
- (nest tabstop $ vcat $ map text (lines str)) <> text "\n"
+ then return $ prefixed "> " (text str) $$ blankline
+ else return $ "::" $+$ nest tabstop (text str) $$ blankline
blockToRST (BlockQuote blocks) = do
tabstop <- get >>= (return . writerTabStop . stOptions)
contents <- blockListToRST blocks
- return $ (nest tabstop contents) <> text "\n"
+ return $ (nest tabstop contents) <> blankline
blockToRST (Table caption _ widths headers rows) = do
caption' <- inlineListToRST caption
let caption'' = if null caption
then empty
- else text "" $+$ (text "Table: " <> caption')
+ else blankline <> text "Table: " <> caption'
headers' <- mapM blockListToRST headers
rawRows <- mapM (mapM blockListToRST) rows
let isSimple = all (==0) widths && all (all (\bs -> length bs == 1)) rows
- let numChars = maximum . map (length . render)
+ let numChars = maximum . map offset
+ opts <- get >>= return . stOptions
let widthsInChars =
if isSimple
then map ((+2) . numChars) $ transpose (headers' : rawRows)
- else map (floor . (78 *)) widths
- let hpipeBlocks blocks = hcatBlocks [beg, middle, end]
- where height = maximum (map heightOfBlock blocks)
- sep' = TextBlock 3 height (replicate height " | ")
- beg = TextBlock 2 height (replicate height "| ")
- end = TextBlock 2 height (replicate height " |")
- middle = hcatBlocks $ intersperse sep' blocks
- let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars
+ else map (floor . (fromIntegral (writerColumns opts) *)) widths
+ let hpipeBlocks blocks = hcat [beg, middle, end]
+ where h = maximum (map height blocks)
+ sep' = lblock 3 $ vcat (map text $ replicate h " | ")
+ beg = lblock 2 $ vcat (map text $ replicate h "| ")
+ end = lblock 2 $ vcat (map text $ replicate h " |")
+ middle = hcat $ intersperse sep' blocks
+ let makeRow = hpipeBlocks . zipWith lblock widthsInChars
let head' = makeRow headers'
rows' <- mapM (\row -> do cols <- mapM blockListToRST row
return $ makeRow cols) rows
@@ -206,15 +197,15 @@ blockToRST (Table caption _ widths headers rows) = do
(hcat $ intersperse (char ch <> char '+' <> char ch) $
map (\l -> text $ replicate l ch) widthsInChars) <>
char ch <> char '+'
- let body = vcat $ intersperse (border '-') $ map blockToDoc rows'
+ let body = vcat $ intersperse (border '-') rows'
let head'' = if all null headers
then empty
- else blockToDoc head' $+$ border '='
- return $ border '-' $+$ head'' $+$ body $+$ border '-' $$ caption'' $$ text ""
+ else head' $$ border '='
+ return $ border '-' $$ head'' $$ body $$ border '-' $$ caption'' $$ blankline
blockToRST (BulletList items) = do
contents <- mapM bulletListItemToRST items
-- ensure that sublists have preceding blank line
- return $ text "" $+$ vcat contents <> text "\n"
+ return $ blankline $$ vcat contents $$ blankline
blockToRST (OrderedList (start, style', delim) items) = do
let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
then take (length items) $ repeat "#."
@@ -224,18 +215,19 @@ blockToRST (OrderedList (start, style', delim) items) = do
let markers' = map (\m -> let s = maxMarkerLength - length m
in m ++ replicate s ' ') markers
contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
- zip markers' items
+ zip markers' items
-- ensure that sublists have preceding blank line
- return $ text "" $+$ vcat contents <> text "\n"
+ return $ blankline $$ vcat contents $$ blankline
blockToRST (DefinitionList items) = do
contents <- mapM definitionListItemToRST items
- return $ (vcat contents) <> text "\n"
+ -- ensure that sublists have preceding blank line
+ return $ blankline $$ vcat contents $$ blankline
-- | Convert bullet list item (list of blocks) to RST.
bulletListItemToRST :: [Block] -> State WriterState Doc
bulletListItemToRST items = do
contents <- blockListToRST items
- return $ (text "- ") <> contents
+ return $ hang 3 "- " $ contents <> cr
-- | Convert ordered list item (a list of blocks) to RST.
orderedListItemToRST :: String -- ^ marker for list item
@@ -243,7 +235,8 @@ orderedListItemToRST :: String -- ^ marker for list item
-> State WriterState Doc
orderedListItemToRST marker items = do
contents <- blockListToRST items
- return $ (text marker <> char ' ') <> contents
+ let marker' = marker ++ " "
+ return $ hang (length marker') (text marker') $ contents <> cr
-- | Convert defintion list item (label, list of blocks) to RST.
definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc
@@ -251,7 +244,7 @@ definitionListItemToRST (label, defs) = do
label' <- inlineListToRST label
contents <- liftM vcat $ mapM blockListToRST defs
tabstop <- get >>= (return . writerTabStop . stOptions)
- return $ label' $+$ nest tabstop contents
+ return $ label' $$ nest tabstop (contents <> cr)
-- | Convert list of Pandoc block elements to RST.
blockListToRST :: [Block] -- ^ List of block elements
@@ -266,65 +259,63 @@ inlineListToRST lst = mapM inlineToRST lst >>= return . hcat
inlineToRST :: Inline -> State WriterState Doc
inlineToRST (Emph lst) = do
contents <- inlineListToRST lst
- return $ char '*' <> contents <> char '*'
+ return $ "*" <> contents <> "*"
inlineToRST (Strong lst) = do
contents <- inlineListToRST lst
- return $ text "**" <> contents <> text "**"
+ return $ "**" <> contents <> "**"
inlineToRST (Strikeout lst) = do
contents <- inlineListToRST lst
- return $ text "[STRIKEOUT:" <> contents <> char ']'
+ return $ "[STRIKEOUT:" <> contents <> "]"
inlineToRST (Superscript lst) = do
contents <- inlineListToRST lst
- return $ text "\\ :sup:`" <> contents <> text "`\\ "
+ return $ "\\ :sup:`" <> contents <> "`\\ "
inlineToRST (Subscript lst) = do
contents <- inlineListToRST lst
- return $ text "\\ :sub:`" <> contents <> text "`\\ "
+ return $ "\\ :sub:`" <> contents <> "`\\ "
inlineToRST (SmallCaps lst) = inlineListToRST lst
inlineToRST (Quoted SingleQuote lst) = do
contents <- inlineListToRST lst
- return $ char '‘' <> contents <> char '’'
+ return $ "‘" <> contents <> "’"
inlineToRST (Quoted DoubleQuote lst) = do
contents <- inlineListToRST lst
- return $ char '“' <> contents <> char '”'
+ return $ "“" <> contents <> "”"
inlineToRST (Cite _ lst) =
inlineListToRST lst
inlineToRST EmDash = return $ char '\8212'
inlineToRST EnDash = return $ char '\8211'
inlineToRST Apostrophe = return $ char '\8217'
inlineToRST Ellipses = return $ char '\8230'
-inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``"
+inlineToRST (Code _ str) = return $ "``" <> text str <> "``"
inlineToRST (Str str) = return $ text $ escapeString str
inlineToRST (Math t str) = do
modify $ \st -> st{ stHasMath = True }
return $ if t == InlineMath
- then text $ ":math:`$" ++ str ++ "$`"
- else text $ ":math:`$$" ++ str ++ "$$`"
-inlineToRST (TeX _) = return empty
-inlineToRST (HtmlInline _) = return empty
-inlineToRST (LineBreak) = do
- return $ empty -- there's no line break in RST
-inlineToRST Space = return $ char ' '
-inlineToRST (Link [Code str] (src, _)) | src == str ||
- src == "mailto:" ++ str = do
+ then ":math:`$" <> text str <> "$`"
+ else ":math:`$$" <> text str <> "$$`"
+inlineToRST (RawInline _ _) = return empty
+inlineToRST (LineBreak) = return cr -- there's no line break in RST
+inlineToRST Space = return space
+inlineToRST (Link [Code _ str] (src, _)) | src == str ||
+ src == "mailto:" ++ str = do
let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src
return $ text $ unescapeURI srcSuffix
inlineToRST (Link txt (src', tit)) = do
let src = unescapeURI src'
- useReferenceLinks <- get >>= (return . writerReferenceLinks . stOptions)
+ useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions
linktext <- inlineListToRST $ normalizeSpaces txt
if useReferenceLinks
- then do refs <- get >>= (return . stLinks)
+ then do refs <- get >>= return . stLinks
let refs' = if (txt, (src, tit)) `elem` refs
then refs
else (txt, (src, tit)):refs
modify $ \st -> st { stLinks = refs' }
- return $ char '`' <> linktext <> text "`_"
- else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_"
+ return $ "`" <> linktext <> "`_"
+ else return $ "`" <> linktext <> " <" <> text src <> ">`_"
inlineToRST (Image alternate (source', tit)) = do
let source = unescapeURI source'
- pics <- get >>= (return . stImages)
+ pics <- get >>= return . stImages
let labelsUsed = map fst pics
- let txt = if null alternate || alternate == [Str ""] ||
+ let txt = if null alternate || alternate == [Str ""] ||
alternate `elem` labelsUsed
then [Str $ "image" ++ show (length pics)]
else alternate
@@ -333,10 +324,10 @@ inlineToRST (Image alternate (source', tit)) = do
else (txt, (source, tit)):pics
modify $ \st -> st { stImages = pics' }
label <- inlineListToRST txt
- return $ char '|' <> label <> char '|'
+ return $ "|" <> label <> "|"
inlineToRST (Note contents) = do
-- add to notes in state
- notes <- get >>= (return . stNotes)
+ notes <- get >>= return . stNotes
modify $ \st -> st { stNotes = contents:notes }
let ref = show $ (length notes) + 1
- return $ text " [" <> text ref <> text "]_"
+ return $ " [" <> text ref <> "]_"
diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs
index ae71e1307..605e4162b 100644
--- a/src/Text/Pandoc/Writers/RTF.hs
+++ b/src/Text/Pandoc/Writers/RTF.hs
@@ -27,13 +27,34 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Conversion of 'Pandoc' documents to RTF (rich text format).
-}
-module Text.Pandoc.Writers.RTF ( writeRTF ) where
+module Text.Pandoc.Writers.RTF ( writeRTF, rtfEmbedImage ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Templates (renderTemplate)
import Data.List ( isSuffixOf, intercalate )
-import Data.Char ( ord, isDigit )
+import Data.Char ( ord, isDigit, toLower )
+import System.FilePath ( takeExtension )
+import qualified Data.ByteString as B
+import Text.Printf ( printf )
+
+-- | Convert Image inlines into a raw RTF embedded image, read from a file.
+-- If file not found or filetype not jpeg or png, leave the inline unchanged.
+rtfEmbedImage :: Inline -> IO Inline
+rtfEmbedImage x@(Image _ (src,_))
+ | map toLower (takeExtension src) `elem` [".jpg",".jpeg",".png"] = do
+ imgdata <- catch (B.readFile src) (\_ -> return B.empty)
+ let bytes = map (printf "%02x") $ B.unpack imgdata
+ let filetype = case map toLower (takeExtension src) of
+ ".jpg" -> "\\jpegblip"
+ ".jpeg" -> "\\jpegblip"
+ ".png" -> "\\pngblip"
+ _ -> error "Unknown file type"
+ let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}"
+ return $ if B.null imgdata
+ then x
+ else RawInline "rtf" raw
+rtfEmbedImage x = return x
-- | Convert Pandoc to a string in rich text format.
writeRTF :: WriterOptions -> Pandoc -> String
@@ -159,7 +180,8 @@ blockToRTF indent alignment (BlockQuote lst) =
concatMap (blockToRTF (indent + indentIncrement) alignment) lst
blockToRTF indent _ (CodeBlock _ str) =
rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str))
-blockToRTF _ _ (RawHtml _) = ""
+blockToRTF _ _ (RawBlock "rtf" str) = str
+blockToRTF _ _ (RawBlock _ _) = ""
blockToRTF indent alignment (BulletList lst) = spaceAtEnd $
concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst
blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $
@@ -264,12 +286,12 @@ inlineToRTF Apostrophe = "\\u8217'"
inlineToRTF Ellipses = "\\u8230?"
inlineToRTF EmDash = "\\u8212-"
inlineToRTF EnDash = "\\u8211-"
-inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
+inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}"
inlineToRTF (Str str) = stringToRTF str
inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str
inlineToRTF (Cite _ lst) = inlineListToRTF lst
-inlineToRTF (TeX _) = ""
-inlineToRTF (HtmlInline _) = ""
+inlineToRTF (RawInline "rtf" str) = str
+inlineToRTF (RawInline _ _) = ""
inlineToRTF (LineBreak) = "\\line "
inlineToRTF Space = " "
inlineToRTF (Link text (src, _)) =
diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs
index 65e053827..c8638cdd7 100644
--- a/src/Text/Pandoc/Writers/Texinfo.hs
+++ b/src/Text/Pandoc/Writers/Texinfo.hs
@@ -31,13 +31,12 @@ module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Text.Pandoc.Templates (renderTemplate)
-import Text.Pandoc.Readers.TeXMath
import Text.Printf ( printf )
-import Data.List ( isSuffixOf, transpose, maximumBy )
+import Data.List ( transpose, maximumBy )
import Data.Ord ( comparing )
import Data.Char ( chr, ord )
import Control.Monad.State
-import Text.PrettyPrint.HughesPJ hiding ( Str )
+import Text.Pandoc.Pretty
data WriterState =
WriterState { stStrikeout :: Bool -- document contains strikeout
@@ -69,17 +68,20 @@ pandocToTexinfo options (Pandoc (Meta title authors date) blocks) = do
let titlePage = not $ all null $ title : date : authors
main <- blockListToTexinfo blocks
st <- get
- let body = render main
+ let colwidth = if writerWrapText options
+ then Just $ writerColumns options
+ else Nothing
+ let body = render colwidth main
let context = writerVariables options ++
[ ("body", body)
- , ("title", render titleText)
- , ("date", render dateText) ] ++
+ , ("title", render colwidth titleText)
+ , ("date", render colwidth dateText) ] ++
[ ("toc", "yes") | writerTableOfContents options ] ++
[ ("titlepage", "yes") | titlePage ] ++
[ ("subscript", "yes") | stSubscript st ] ++
[ ("superscript", "yes") | stSuperscript st ] ++
[ ("strikeout", "yes") | stStrikeout st ] ++
- [ ("author", render a) | a <- authorsText ]
+ [ ("author", render colwidth a) | a <- authorsText ]
if writerStandalone options
then return $ renderTemplate context $ writerTemplate options
else return body
@@ -124,22 +126,25 @@ blockToTexinfo (BlockQuote lst) = do
blockToTexinfo (CodeBlock _ str) = do
return $ text "@verbatim" $$
- vcat (map text (lines str)) $$
- text "@end verbatim\n"
+ flush (text str) $$
+ text "@end verbatim" <> blankline
-blockToTexinfo (RawHtml _) = return empty
+blockToTexinfo (RawBlock "texinfo" str) = return $ text str
+blockToTexinfo (RawBlock "latex" str) =
+ return $ text "@tex" $$ text str $$ text "@end tex"
+blockToTexinfo (RawBlock _ _) = return empty
blockToTexinfo (BulletList lst) = do
items <- mapM listItemToTexinfo lst
return $ text "@itemize" $$
vcat items $$
- text "@end itemize\n"
+ text "@end itemize" <> blankline
blockToTexinfo (OrderedList (start, numstyle, _) lst) = do
items <- mapM listItemToTexinfo lst
return $ text "@enumerate " <> exemplar $$
vcat items $$
- text "@end enumerate\n"
+ text "@end enumerate" <> blankline
where
exemplar = case numstyle of
DefaultStyle -> decimal
@@ -159,7 +164,7 @@ blockToTexinfo (DefinitionList lst) = do
items <- mapM defListItemToTexinfo lst
return $ text "@table @asis" $$
vcat items $$
- text "@end table\n"
+ text "@end table" <> blankline
blockToTexinfo HorizontalRule =
-- XXX can't get the equivalent from LaTeX.hs to work
@@ -175,13 +180,13 @@ blockToTexinfo (Header 0 lst) = do
then return $ text "Top"
else inlineListToTexinfo lst
return $ text "@node Top" $$
- text "@top " <> txt <> char '\n'
+ text "@top " <> txt <> blankline
blockToTexinfo (Header level lst) = do
node <- inlineListForNode lst
txt <- inlineListToTexinfo lst
return $ if (level > 0) && (level <= 4)
- then text "\n@node " <> node <> char '\n' <>
+ then blankline <> text "@node " <> node <> cr <>
text (seccmd level) <> txt
else txt
where
@@ -200,18 +205,18 @@ blockToTexinfo (Table caption aligns widths heads rows) = do
colDescriptors <-
if all (== 0) widths
then do -- use longest entry instead of column widths
- cols <- mapM (mapM (liftM (render . hcat) . mapM blockToTexinfo)) $
- transpose $ heads : rows
+ cols <- mapM (mapM (liftM (render Nothing . hcat) . mapM blockToTexinfo)) $
+ transpose $ heads : rows
return $ concatMap ((\x -> "{"++x++"} ") . maximumBy (comparing length)) cols
else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths
let tableBody = text ("@multitable " ++ colDescriptors) $$
headers $$
vcat rowsText $$
- text "@end multitable"
+ text "@end multitable"
return $ if isEmpty captionText
- then tableBody <> char '\n'
+ then tableBody <> blankline
else text "@float" $$
- tableBody $$
+ tableBody $$
inCmd "caption" captionText $$
text "@end float"
@@ -253,7 +258,7 @@ alignedBlock _ col = blockListToTexinfo col
-- | Convert Pandoc block elements to Texinfo.
blockListToTexinfo :: [Block]
-> State WriterState Doc
-blockListToTexinfo [] = return $ empty
+blockListToTexinfo [] = return empty
blockListToTexinfo (x:xs) = do
x' <- blockToTexinfo x
case x of
@@ -276,7 +281,7 @@ blockListToTexinfo (x:xs) = do
xs' <- blockListToTexinfo xs
case xs of
((CodeBlock _ _):_) -> return $ x' $$ xs'
- _ -> return $ x' $$ text "" $$ xs'
+ _ -> return $ x' $+$ xs'
_ -> do
xs' <- blockListToTexinfo xs
return $ x' $$ xs'
@@ -307,15 +312,23 @@ makeMenuLine _ = error "makeMenuLine called with non-Header block"
listItemToTexinfo :: [Block]
-> State WriterState Doc
-listItemToTexinfo lst = blockListToTexinfo lst >>=
- return . (text "@item" $$)
+listItemToTexinfo lst = do
+ contents <- blockListToTexinfo lst
+ let spacer = case reverse lst of
+ (Para{}:_) -> blankline
+ _ -> empty
+ return $ text "@item" $$ contents <> spacer
defListItemToTexinfo :: ([Inline], [[Block]])
-> State WriterState Doc
defListItemToTexinfo (term, defs) = do
term' <- inlineListToTexinfo term
- def' <- liftM vcat $ mapM blockListToTexinfo defs
- return $ text "@item " <> term' <> text "\n" $$ def'
+ let defToTexinfo bs = do d <- blockListToTexinfo bs
+ case reverse bs of
+ (Para{}:_) -> return $ d <> blankline
+ _ -> return d
+ defs' <- mapM defToTexinfo defs
+ return $ text "@item " <> term' $+$ vcat defs'
-- | Convert list of inline elements to Texinfo.
inlineListToTexinfo :: [Inline] -- ^ Inlines to convert
@@ -325,31 +338,7 @@ inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat
-- | Convert list of inline elements to Texinfo acceptable for a node name.
inlineListForNode :: [Inline] -- ^ Inlines to convert
-> State WriterState Doc
-inlineListForNode lst = mapM inlineForNode lst >>= return . hcat
-
-inlineForNode :: Inline -> State WriterState Doc
-inlineForNode (Str str) = return $ text $ filter (not.disallowedInNode) str
-inlineForNode (Emph lst) = inlineListForNode lst
-inlineForNode (Strong lst) = inlineListForNode lst
-inlineForNode (Strikeout lst) = inlineListForNode lst
-inlineForNode (Superscript lst) = inlineListForNode lst
-inlineForNode (Subscript lst) = inlineListForNode lst
-inlineForNode (SmallCaps lst) = inlineListForNode lst
-inlineForNode (Quoted _ lst) = inlineListForNode lst
-inlineForNode (Cite _ lst) = inlineListForNode lst
-inlineForNode (Code str) = inlineForNode (Str str)
-inlineForNode Space = return $ char ' '
-inlineForNode EmDash = return $ text "---"
-inlineForNode EnDash = return $ text "--"
-inlineForNode Apostrophe = return $ char '\''
-inlineForNode Ellipses = return $ text "..."
-inlineForNode LineBreak = return empty
-inlineForNode (Math _ str) = inlineListForNode $ readTeXMath str
-inlineForNode (TeX _) = return empty
-inlineForNode (HtmlInline _) = return empty
-inlineForNode (Link lst _) = inlineListForNode lst
-inlineForNode (Image lst _) = inlineListForNode lst
-inlineForNode (Note _) = return empty
+inlineListForNode = return . text . filter (not . disallowedInNode) . stringify
-- periods, commas, colons, and parentheses are disallowed in node names
disallowedInNode :: Char -> Bool
@@ -383,7 +372,7 @@ inlineToTexinfo (Subscript lst) = do
inlineToTexinfo (SmallCaps lst) =
inlineListToTexinfo lst >>= return . inCmd "sc"
-inlineToTexinfo (Code str) = do
+inlineToTexinfo (Code _ str) = do
return $ text $ "@code{" ++ stringToTexinfo str ++ "}"
inlineToTexinfo (Quoted SingleQuote lst) = do
@@ -402,14 +391,16 @@ inlineToTexinfo EnDash = return $ text "--"
inlineToTexinfo Ellipses = return $ text "@dots{}"
inlineToTexinfo (Str str) = return $ text (stringToTexinfo str)
inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str
-inlineToTexinfo (TeX str) = return $ text "@tex" $$ text str $$ text "@end tex"
-inlineToTexinfo (HtmlInline _) = return empty
+inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" =
+ return $ text "@tex" $$ text str $$ text "@end tex"
+inlineToTexinfo (RawInline "texinfo" str) = return $ text str
+inlineToTexinfo (RawInline _ _) = return empty
inlineToTexinfo (LineBreak) = return $ text "@*"
inlineToTexinfo Space = return $ char ' '
inlineToTexinfo (Link txt (src, _)) = do
case txt of
- [Code x] | x == src -> -- autolink
+ [Code _ x] | x == src -> -- autolink
do return $ text $ "@url{" ++ x ++ "}"
_ -> do contents <- inlineListToTexinfo txt
let src1 = stringToTexinfo src
@@ -429,9 +420,4 @@ inlineToTexinfo (Image alternate (source, _)) = do
inlineToTexinfo (Note contents) = do
contents' <- blockListToTexinfo contents
- let rawnote = stripTrailingNewlines $ render contents'
- let optNewline = "@end verbatim" `isSuffixOf` rawnote
- return $ text "@footnote{" <>
- text rawnote <>
- (if optNewline then char '\n' else empty) <>
- char '}'
+ return $ text "@footnote" <> braces contents'
diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs
new file mode 100644
index 000000000..6614ec28e
--- /dev/null
+++ b/src/Text/Pandoc/Writers/Textile.hs
@@ -0,0 +1,422 @@
+{-
+Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Writers.Textile
+ Copyright : Copyright (C) 2010 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of 'Pandoc' documents to Textile markup.
+
+Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual>
+-}
+module Text.Pandoc.Writers.Textile ( writeTextile ) where
+import Text.Pandoc.Definition
+import Text.Pandoc.Shared
+import Text.Pandoc.Templates (renderTemplate)
+import Text.Pandoc.XML ( escapeStringForXML )
+import Data.List ( intercalate )
+import Control.Monad.State
+import Data.Char ( isSpace )
+
+data WriterState = WriterState {
+ stNotes :: [String] -- Footnotes
+ , stListLevel :: [Char] -- String at beginning of list items, e.g. "**"
+ , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list
+ }
+
+-- | Convert Pandoc to Textile.
+writeTextile :: WriterOptions -> Pandoc -> String
+writeTextile opts document =
+ evalState (pandocToTextile opts document)
+ (WriterState { stNotes = [], stListLevel = [], stUseTags = False })
+
+-- | Return Textile representation of document.
+pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String
+pandocToTextile opts (Pandoc _ blocks) = do
+ body <- blockListToTextile opts blocks
+ notes <- liftM (unlines . reverse . stNotes) get
+ let main = body ++ if null notes then "" else ("\n\n" ++ notes)
+ let context = writerVariables opts ++ [ ("body", main) ]
+ if writerStandalone opts
+ then return $ renderTemplate context $ writerTemplate opts
+ else return main
+
+withUseTags :: State WriterState a -> State WriterState a
+withUseTags action = do
+ oldUseTags <- liftM stUseTags get
+ modify $ \s -> s { stUseTags = True }
+ result <- action
+ modify $ \s -> s { stUseTags = oldUseTags }
+ return result
+
+-- | Escape one character as needed for Textile.
+escapeCharForTextile :: Char -> String
+escapeCharForTextile x = case x of
+ '&' -> "&amp;"
+ '<' -> "&lt;"
+ '>' -> "&gt;"
+ '"' -> "&quot;"
+ '*' -> "&#42;"
+ '_' -> "&#95;"
+ '@' -> "&#64;"
+ '|' -> "&#124;"
+ c -> [c]
+
+-- | Escape string as needed for Textile.
+escapeStringForTextile :: String -> String
+escapeStringForTextile = concatMap escapeCharForTextile
+
+-- | Convert Pandoc block element to Textile.
+blockToTextile :: WriterOptions -- ^ Options
+ -> Block -- ^ Block element
+ -> State WriterState String
+
+blockToTextile _ Null = return ""
+
+blockToTextile opts (Plain inlines) =
+ inlineListToTextile opts inlines
+
+blockToTextile opts (Para [Image txt (src,tit)]) = do
+ capt <- blockToTextile opts (Para txt)
+ im <- inlineToTextile opts (Image txt (src,tit))
+ return $ im ++ "\n" ++ capt
+
+blockToTextile opts (Para inlines) = do
+ useTags <- liftM stUseTags get
+ listLevel <- liftM stListLevel get
+ contents <- inlineListToTextile opts inlines
+ return $ if useTags
+ then "<p>" ++ contents ++ "</p>"
+ else contents ++ if null listLevel then "\n" else ""
+
+blockToTextile _ (RawBlock f str) =
+ if f == "html" || f == "textile"
+ then return str
+ else return ""
+
+blockToTextile _ HorizontalRule = return "<hr />\n"
+
+blockToTextile opts (Header level inlines) = do
+ contents <- inlineListToTextile opts inlines
+ let prefix = 'h' : (show level ++ ". ")
+ return $ prefix ++ contents ++ "\n"
+
+blockToTextile _ (CodeBlock (_,classes,_) str) | any (all isSpace) (lines str) =
+ return $ "<pre" ++ classes' ++ ">\n" ++ escapeStringForXML str ++
+ "\n</pre>\n"
+ where classes' = if null classes
+ then ""
+ else " class=\"" ++ unwords classes ++ "\""
+
+blockToTextile _ (CodeBlock (_,classes,_) str) =
+ return $ "bc" ++ classes' ++ ". " ++ str ++ "\n\n"
+ where classes' = if null classes
+ then ""
+ else "(" ++ unwords classes ++ ")"
+
+blockToTextile opts (BlockQuote bs@[Para _]) = do
+ contents <- blockListToTextile opts bs
+ return $ "bq. " ++ contents ++ "\n\n"
+
+blockToTextile opts (BlockQuote blocks) = do
+ contents <- blockListToTextile opts blocks
+ return $ "<blockquote>\n\n" ++ contents ++ "\n</blockquote>\n"
+
+blockToTextile opts (Table [] aligns widths headers rows') |
+ all (==0) widths && all (`elem` [AlignLeft,AlignDefault]) aligns = do
+ hs <- mapM (liftM (("_. " ++) . stripTrailingNewlines) . blockListToTextile opts) headers
+ let cellsToRow cells = "|" ++ intercalate "|" cells ++ "|"
+ let header = if all null headers then "" else cellsToRow hs
+ let rowToCells = mapM (liftM stripTrailingNewlines . blockListToTextile opts)
+ bs <- mapM rowToCells rows'
+ let body = unlines $ map cellsToRow bs
+ return $ header ++ "\n" ++ body ++ "\n"
+
+blockToTextile opts (Table capt aligns widths headers rows') = do
+ let alignStrings = map alignmentToString aligns
+ captionDoc <- if null capt
+ then return ""
+ else do
+ c <- inlineListToTextile opts capt
+ return $ "<caption>" ++ c ++ "</caption>\n"
+ let percent w = show (truncate (100*w) :: Integer) ++ "%"
+ let coltags = if all (== 0.0) widths
+ then ""
+ else unlines $ map
+ (\w -> "<col width=\"" ++ percent w ++ "\" />") widths
+ head' <- if all null headers
+ then return ""
+ else do
+ hs <- tableRowToTextile opts alignStrings 0 headers
+ return $ "<thead>\n" ++ hs ++ "\n</thead>\n"
+ body' <- zipWithM (tableRowToTextile opts alignStrings) [1..] rows'
+ return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++
+ "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n"
+
+blockToTextile opts x@(BulletList items) = do
+ oldUseTags <- liftM stUseTags get
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ contents <- withUseTags $ mapM (listItemToTextile opts) items
+ return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n"
+ else do
+ modify $ \s -> s { stListLevel = stListLevel s ++ "*" }
+ level <- get >>= return . length . stListLevel
+ contents <- mapM (listItemToTextile opts) items
+ modify $ \s -> s { stListLevel = init (stListLevel s) }
+ return $ vcat contents ++ (if level > 1 then "" else "\n")
+
+blockToTextile opts x@(OrderedList attribs items) = do
+ oldUseTags <- liftM stUseTags get
+ let useTags = oldUseTags || not (isSimpleList x)
+ if useTags
+ then do
+ contents <- withUseTags $ mapM (listItemToTextile opts) items
+ return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++
+ "\n</ol>\n"
+ else do
+ modify $ \s -> s { stListLevel = stListLevel s ++ "#" }
+ level <- get >>= return . length . stListLevel
+ contents <- mapM (listItemToTextile opts) items
+ modify $ \s -> s { stListLevel = init (stListLevel s) }
+ return $ vcat contents ++ (if level > 1 then "" else "\n")
+
+blockToTextile opts (DefinitionList items) = do
+ contents <- withUseTags $ mapM (definitionListItemToTextile opts) items
+ return $ "<dl>\n" ++ vcat contents ++ "\n</dl>\n"
+
+-- Auxiliary functions for lists:
+
+-- | Convert ordered list attributes to HTML attribute string
+listAttribsToString :: ListAttributes -> String
+listAttribsToString (startnum, numstyle, _) =
+ let numstyle' = camelCaseToHyphenated $ show numstyle
+ in (if startnum /= 1
+ then " start=\"" ++ show startnum ++ "\""
+ else "") ++
+ (if numstyle /= DefaultStyle
+ then " style=\"list-style-type: " ++ numstyle' ++ ";\""
+ else "")
+
+-- | Convert bullet or ordered list item (list of blocks) to Textile.
+listItemToTextile :: WriterOptions -> [Block] -> State WriterState String
+listItemToTextile opts items = do
+ contents <- blockListToTextile opts items
+ useTags <- get >>= return . stUseTags
+ if useTags
+ then return $ "<li>" ++ contents ++ "</li>"
+ else do
+ marker <- get >>= return . stListLevel
+ return $ marker ++ " " ++ contents
+
+-- | Convert definition list item (label, list of blocks) to Textile.
+definitionListItemToTextile :: WriterOptions
+ -> ([Inline],[[Block]])
+ -> State WriterState String
+definitionListItemToTextile opts (label, items) = do
+ labelText <- inlineListToTextile opts label
+ contents <- mapM (blockListToTextile opts) items
+ return $ "<dt>" ++ labelText ++ "</dt>\n" ++
+ (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents)
+
+-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed.
+isSimpleList :: Block -> Bool
+isSimpleList x =
+ case x of
+ BulletList items -> all isSimpleListItem items
+ OrderedList (num, sty, _) items -> all isSimpleListItem items &&
+ num == 1 && sty `elem` [DefaultStyle, Decimal]
+ _ -> False
+
+-- | True if list item can be handled with the simple wiki syntax. False if
+-- HTML tags will be needed.
+isSimpleListItem :: [Block] -> Bool
+isSimpleListItem [] = True
+isSimpleListItem [x] =
+ case x of
+ Plain _ -> True
+ Para _ -> True
+ BulletList _ -> isSimpleList x
+ OrderedList _ _ -> isSimpleList x
+ _ -> False
+isSimpleListItem [x, y] | isPlainOrPara x =
+ case y of
+ BulletList _ -> isSimpleList y
+ OrderedList _ _ -> isSimpleList y
+ _ -> False
+isSimpleListItem _ = False
+
+isPlainOrPara :: Block -> Bool
+isPlainOrPara (Plain _) = True
+isPlainOrPara (Para _) = True
+isPlainOrPara _ = False
+
+-- | Concatenates strings with line breaks between them.
+vcat :: [String] -> String
+vcat = intercalate "\n"
+
+-- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki,
+-- and Textile writers, and should be abstracted out.)
+
+tableRowToTextile :: WriterOptions
+ -> [String]
+ -> Int
+ -> [[Block]]
+ -> State WriterState String
+tableRowToTextile opts alignStrings rownum cols' = do
+ let celltype = if rownum == 0 then "th" else "td"
+ let rowclass = case rownum of
+ 0 -> "header"
+ x | x `rem` 2 == 1 -> "odd"
+ _ -> "even"
+ cols'' <- sequence $ zipWith
+ (\alignment item -> tableItemToTextile opts celltype alignment item)
+ alignStrings cols'
+ return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>"
+
+alignmentToString :: Alignment -> [Char]
+alignmentToString alignment = case alignment of
+ AlignLeft -> "left"
+ AlignRight -> "right"
+ AlignCenter -> "center"
+ AlignDefault -> "left"
+
+tableItemToTextile :: WriterOptions
+ -> String
+ -> String
+ -> [Block]
+ -> State WriterState String
+tableItemToTextile opts celltype align' item = do
+ let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++
+ x ++ "</" ++ celltype ++ ">"
+ contents <- blockListToTextile opts item
+ return $ mkcell contents
+
+-- | Convert list of Pandoc block elements to Textile.
+blockListToTextile :: WriterOptions -- ^ Options
+ -> [Block] -- ^ List of block elements
+ -> State WriterState String
+blockListToTextile opts blocks =
+ mapM (blockToTextile opts) blocks >>= return . vcat
+
+-- | Convert list of Pandoc inline elements to Textile.
+inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String
+inlineListToTextile opts lst =
+ mapM (inlineToTextile opts) lst >>= return . concat
+
+-- | Convert Pandoc inline element to Textile.
+inlineToTextile :: WriterOptions -> Inline -> State WriterState String
+
+inlineToTextile opts (Emph lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '_' `elem` contents
+ then "<em>" ++ contents ++ "</em>"
+ else "_" ++ contents ++ "_"
+
+inlineToTextile opts (Strong lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '*' `elem` contents
+ then "<strong>" ++ contents ++ "</strong>"
+ else "*" ++ contents ++ "*"
+
+inlineToTextile opts (Strikeout lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '-' `elem` contents
+ then "<del>" ++ contents ++ "</del>"
+ else "-" ++ contents ++ "-"
+
+inlineToTextile opts (Superscript lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '^' `elem` contents
+ then "<sup>" ++ contents ++ "</sup>"
+ else "[^" ++ contents ++ "^]"
+
+inlineToTextile opts (Subscript lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ if '~' `elem` contents
+ then "<sub>" ++ contents ++ "</sub>"
+ else "[~" ++ contents ++ "~]"
+
+inlineToTextile opts (SmallCaps lst) = inlineListToTextile opts lst
+
+inlineToTextile opts (Quoted SingleQuote lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ "'" ++ contents ++ "'"
+
+inlineToTextile opts (Quoted DoubleQuote lst) = do
+ contents <- inlineListToTextile opts lst
+ return $ "\"" ++ contents ++ "\""
+
+inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst
+
+inlineToTextile _ EmDash = return " -- "
+
+inlineToTextile _ EnDash = return " - "
+
+inlineToTextile _ Apostrophe = return "'"
+
+inlineToTextile _ Ellipses = return "..."
+
+inlineToTextile _ (Code _ str) =
+ return $ if '@' `elem` str
+ then "<tt>" ++ escapeStringForXML str ++ "</tt>"
+ else "@" ++ str ++ "@"
+
+inlineToTextile _ (Str str) = return $ escapeStringForTextile str
+
+inlineToTextile _ (Math _ str) =
+ return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>"
+
+inlineToTextile _ (RawInline f str) =
+ if f == "html" || f == "textile"
+ then return str
+ else return ""
+
+inlineToTextile _ (LineBreak) = return "\n"
+
+inlineToTextile _ Space = return " "
+
+inlineToTextile opts (Link txt (src, _)) = do
+ label <- case txt of
+ [Code _ s] -> return s
+ _ -> inlineListToTextile opts txt
+ return $ "\"" ++ label ++ "\":" ++ src
+
+inlineToTextile opts (Image alt (source, tit)) = do
+ alt' <- inlineListToTextile opts alt
+ let txt = if null tit
+ then if null alt'
+ then ""
+ else "(" ++ alt' ++ ")"
+ else "(" ++ tit ++ ")"
+ return $ "!" ++ source ++ txt ++ "!"
+
+inlineToTextile opts (Note contents) = do
+ curNotes <- liftM stNotes get
+ let newnum = length curNotes + 1
+ contents' <- blockListToTextile opts contents
+ let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n"
+ modify $ \s -> s { stNotes = thisnote : curNotes }
+ return $ "[" ++ show newnum ++ "]"
+ -- note - may not work for notes with multiple blocks
diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs
index 0c48b48df..e21525018 100644
--- a/src/Text/Pandoc/XML.hs
+++ b/src/Text/Pandoc/XML.hs
@@ -34,7 +34,8 @@ module Text.Pandoc.XML ( stripTags,
selfClosingTag,
inTagsSimple,
inTagsIndented ) where
-import Text.PrettyPrint.HughesPJ
+
+import Text.Pandoc.Pretty
-- | Remove everything between <...>
stripTags :: String -> String
@@ -55,23 +56,15 @@ escapeCharForXML x = case x of
'"' -> "&quot;"
c -> [c]
--- | True if the character needs to be escaped.
-needsEscaping :: Char -> Bool
-needsEscaping c = c `elem` "&<>\""
-
-- | Escape string as needed for XML. Entity references are not preserved.
escapeStringForXML :: String -> String
-escapeStringForXML "" = ""
-escapeStringForXML str =
- case break needsEscaping str of
- (okay, "") -> okay
- (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs
+escapeStringForXML = concatMap escapeCharForXML
-- | Return a text object with a string of formatted XML attributes.
attributeList :: [(String, String)] -> Doc
-attributeList = text . concatMap
- (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++
- escapeStringForXML b ++ "\"")
+attributeList = hcat . map
+ (\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++
+ escapeStringForXML b ++ "\""))
-- | Put the supplied contents between start and end tags of tagType,
-- with specified attributes and (if specified) indentation.
diff --git a/src/markdown2pdf.hs b/src/markdown2pdf.hs
index cc6a034c0..4070001f1 100644
--- a/src/markdown2pdf.hs
+++ b/src/markdown2pdf.hs
@@ -155,7 +155,7 @@ main = bracket
"--include-before-body","--include-after-body",
"--custom-header","--output",
"--template", "--variable",
- "--csl", "--biblio", "--biblio-format"]
+ "--csl", "--bibliography", "--data-dir", "--listings"]
let isOpt ('-':_) = True
isOpt _ = False
let opts = filter isOpt args
diff --git a/src/pandoc.hs b/src/pandoc.hs
index ef38c0332..c0f457449 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -32,29 +32,27 @@ module Main where
import Text.Pandoc
import Text.Pandoc.S5 (s5HeaderIncludes)
import Text.Pandoc.Shared ( tabFilter, ObfuscationMethod (..), readDataFile,
- headerShift )
+ headerShift, findDataFile, normalize )
#ifdef _HIGHLIGHTING
import Text.Pandoc.Highlighting ( languages )
#endif
-import System.Environment ( getArgs, getProgName, getEnvironment )
+import System.Environment ( getArgs, getProgName )
import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath
import System.Console.GetOpt
-import Data.Char ( toLower, isDigit )
-import Data.List ( intercalate, isSuffixOf )
-import System.Directory ( getAppUserDataDirectory )
+import Data.Char ( toLower )
+import Data.List ( intercalate, isSuffixOf, isPrefixOf )
+import System.Directory ( getAppUserDataDirectory, doesFileExist )
import System.IO ( stdout, stderr )
import qualified Text.Pandoc.UTF8 as UTF8
-#ifdef _CITEPROC
import Text.CSL
import Text.Pandoc.Biblio
-#endif
import Control.Monad (when, unless, liftM)
import Network.HTTP (simpleHTTP, mkRequest, getResponseBody, RequestMethod(..))
-import Network.URI (parseURI, isURI)
+import Network.URI (parseURI, isURI, URI(..))
import qualified Data.ByteString.Lazy as B
-import Data.ByteString.Lazy.UTF8 (toString, fromString)
-import Codec.Binary.UTF8.String (decodeString)
+import Data.ByteString.Lazy.UTF8 (toString )
+import Codec.Binary.UTF8.String (decodeString, encodeString)
copyrightMessage :: String
copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++
@@ -64,9 +62,7 @@ copyrightMessage = "\nCopyright (C) 2006-2010 John MacFarlane\n" ++
compileInfo :: String
compileInfo =
-#ifdef _CITEPROC
"\nCompiled with citeproc support." ++
-#endif
#ifdef _HIGHLIGHTING
"\nCompiled with syntax highlighting support for:\n" ++
wrapWords 78 languages ++
@@ -84,47 +80,6 @@ wrapWords c = wrap' c c where
then ",\n" ++ x ++ wrap' cols (cols - length x) xs
else ", " ++ x ++ wrap' cols (remaining - (length x + 2)) xs
--- | Association list of formats and readers.
-readers :: [(String, ParserState -> String -> Pandoc)]
-readers = [("native" , readPandoc)
- ,("markdown" , readMarkdown)
- ,("markdown+lhs" , readMarkdown)
- ,("rst" , readRST)
- ,("rst+lhs" , readRST)
- ,("html" , readHtml)
- ,("latex" , readLaTeX)
- ,("latex+lhs" , readLaTeX)
- ]
-
--- | Reader for native Pandoc format.
-readPandoc :: ParserState -> String -> Pandoc
-readPandoc _ = read
-
--- | Association list of formats and writers.
-writers :: [ ( String, WriterOptions -> Pandoc -> String ) ]
-writers = [("native" , writeNative)
- ,("html" , writeHtmlString)
- ,("html+lhs" , writeHtmlString)
- ,("s5" , writeHtmlString)
- ,("slidy" , writeHtmlString)
- ,("docbook" , writeDocbook)
- ,("opendocument" , writeOpenDocument)
- ,("odt" , \_ _ -> "")
- ,("epub" , \_ _ -> "")
- ,("latex" , writeLaTeX)
- ,("latex+lhs" , writeLaTeX)
- ,("context" , writeConTeXt)
- ,("texinfo" , writeTexinfo)
- ,("man" , writeMan)
- ,("markdown" , writeMarkdown)
- ,("markdown+lhs" , writeMarkdown)
- ,("plain" , writePlain)
- ,("rst" , writeRST)
- ,("rst+lhs" , writeRST)
- ,("mediawiki" , writeMediaWiki)
- ,("rtf" , writeRTF)
- ]
-
isNonTextOutput :: String -> Bool
isNonTextOutput = (`elem` ["odt","epub"])
@@ -147,6 +102,8 @@ data Opt = Opt
, optOffline :: Bool -- ^ Make slideshow accessible offline
, optXeTeX :: Bool -- ^ Format latex for xetex
, optSmart :: Bool -- ^ Use smart typography
+ , optHtml5 :: Bool -- ^ Produce HTML5 in HTML
+ , optChapters :: Bool -- ^ Use chapter for top-level sects
, optHTMLMathMethod :: HTMLMathMethod -- ^ Method to print HTML math
, optReferenceODT :: Maybe FilePath -- ^ Path of reference.odt
, optEPUBStylesheet :: Maybe String -- ^ EPUB stylesheet
@@ -156,17 +113,16 @@ data Opt = Opt
, optStrict :: Bool -- ^ Use strict markdown syntax
, optReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
, optWrapText :: Bool -- ^ Wrap text
- , optSanitizeHTML :: Bool -- ^ Sanitize HTML
+ , optColumns :: Int -- ^ Line length in characters
, optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
, optEmailObfuscation :: ObfuscationMethod
, optIdentifierPrefix :: String
, optIndentedCodeClasses :: [String] -- ^ Default classes for indented code blocks
, optDataDir :: Maybe FilePath
-#ifdef _CITEPROC
- , optBiblioFile :: String
- , optBiblioFormat :: String
- , optCslFile :: String
-#endif
+ , optCiteMethod :: CiteMethod -- ^ Method to output cites
+ , optBibliography :: [String]
+ , optCslFile :: FilePath
+ , optListings :: Bool -- ^ Use listings package for code blocks
}
-- | Defaults for command-line options.
@@ -189,6 +145,8 @@ defaultOpts = Opt
, optOffline = False
, optXeTeX = False
, optSmart = False
+ , optHtml5 = False
+ , optChapters = False
, optHTMLMathMethod = PlainMath
, optReferenceODT = Nothing
, optEPUBStylesheet = Nothing
@@ -198,17 +156,16 @@ defaultOpts = Opt
, optStrict = False
, optReferenceLinks = False
, optWrapText = True
- , optSanitizeHTML = False
+ , optColumns = 72
, optPlugins = []
, optEmailObfuscation = JavascriptObfuscation
, optIdentifierPrefix = ""
, optIndentedCodeClasses = []
, optDataDir = Nothing
-#ifdef _CITEPROC
- , optBiblioFile = []
- , optBiblioFormat = []
- , optCslFile = []
-#endif
+ , optCiteMethod = Citeproc
+ , optBibliography = []
+ , optCslFile = ""
+ , optListings = False
}
-- | A list of functions, each transforming the options data structure
@@ -219,13 +176,13 @@ options =
(ReqArg
(\arg opt -> return opt { optReader = map toLower arg })
"FORMAT")
- "" -- ("(" ++ (intercalate ", " $ map fst readers) ++ ")")
+ ""
, Option "tw" ["to","write"]
(ReqArg
(\arg opt -> return opt { optWriter = map toLower arg })
"FORMAT")
- "" -- ("(" ++ (intercalate ", " $ map fst writers) ++ ")")
+ ""
, Option "s" ["standalone"]
(NoArg
@@ -245,8 +202,14 @@ options =
, Option "" ["tab-stop"]
(ReqArg
- (\arg opt -> return opt { optTabStop = (read arg) } )
- "TABSTOP")
+ (\arg opt ->
+ case reads arg of
+ [(t,"")] | t > 0 -> return opt { optTabStop = t }
+ _ -> do
+ UTF8.hPutStrLn stderr $
+ "tab-stop must be a number greater than 0"
+ exitWith $ ExitFailure 31)
+ "NUMBER")
"" -- "Tab stop (default 4)"
, Option "" ["strict"]
@@ -254,6 +217,12 @@ options =
(\opt -> return opt { optStrict = True } ))
"" -- "Disable markdown syntax extensions"
+ , Option "" ["normalize"]
+ (NoArg
+ (\opt -> return opt { optTransforms =
+ normalize : optTransforms opt } ))
+ "" -- "Normalize the Pandoc AST"
+
, Option "" ["reference-links"]
(NoArg
(\opt -> return opt { optReferenceLinks = True } ))
@@ -269,6 +238,11 @@ options =
(\opt -> return opt { optSmart = True }))
"" -- "Use smart quotes, dashes, and ellipses"
+ , Option "5" ["html5"]
+ (NoArg
+ (\opt -> return opt { optHtml5 = True }))
+ "" -- "Produce HTML5 in HTML output"
+
, Option "m" ["latexmathml", "asciimathml"]
(OptArg
(\arg opt ->
@@ -309,6 +283,12 @@ options =
"URL")
"" -- "Use jsMath for HTML math"
+ , Option "" ["mathjax"]
+ (ReqArg
+ (\arg opt -> return opt { optHTMLMathMethod = MathJax arg})
+ "URL")
+ "" -- "Use MathJax for HTML math"
+
, Option "" ["gladtex"]
(NoArg
(\opt -> return opt { optHTMLMathMethod = GladTeX }))
@@ -330,11 +310,21 @@ options =
(\opt -> return opt { optXeTeX = True }))
"" -- "Format latex for processing by XeTeX"
+ , Option "" ["chapters"]
+ (NoArg
+ (\opt -> return opt { optChapters = True }))
+ "" -- "Use chapter for top-level sections in LaTeX, DocBook"
+
, Option "N" ["number-sections"]
(NoArg
(\opt -> return opt { optNumberSections = True }))
"" -- "Number sections in LaTeX"
+ , Option "" ["listings"]
+ (NoArg
+ (\opt -> return opt { optListings = True }))
+ "" -- "Use listings package for LaTeX code blocks"
+
, Option "" ["section-divs"]
(NoArg
(\opt -> return opt { optSectionDivs = True }))
@@ -345,10 +335,17 @@ options =
(\opt -> return opt { optWrapText = False }))
"" -- "Do not wrap text in output"
- , Option "" ["sanitize-html"]
- (NoArg
- (\opt -> return opt { optSanitizeHTML = True }))
- "" -- "Sanitize HTML"
+ , Option "" ["columns"]
+ (ReqArg
+ (\arg opt ->
+ case reads arg of
+ [(t,"")] | t > 0 -> return opt { optColumns = t }
+ _ -> do
+ UTF8.hPutStrLn stderr $
+ "columns must be a number greater than 0"
+ exitWith $ ExitFailure 33)
+ "NUMBER")
+ "" -- "Length of line in characters"
, Option "" ["email-obfuscation"]
(ReqArg
@@ -383,17 +380,18 @@ options =
, Option "" ["base-header-level"]
(ReqArg
- (\arg opt -> do
- if all isDigit arg && (read arg :: Int) >= 1
- then do
- let oldTransforms = optTransforms opt
- let shift = read arg - 1
- return opt{ optTransforms =
- headerShift shift : oldTransforms }
- else do
- UTF8.hPutStrLn stderr $ "base-header-level must be a number >= 1"
- exitWith $ ExitFailure 19)
- "LEVEL")
+ (\arg opt ->
+ case reads arg of
+ [(t,"")] | t > 0 -> do
+ let oldTransforms = optTransforms opt
+ let shift = t - 1
+ return opt{ optTransforms =
+ headerShift shift : oldTransforms }
+ _ -> do
+ UTF8.hPutStrLn stderr $
+ "base-header-level must be a number > 0"
+ exitWith $ ExitFailure 19)
+ "NUMBER")
"" -- "Headers base level"
, Option "" ["template"]
@@ -415,7 +413,7 @@ options =
_ -> do
UTF8.hPutStrLn stderr $ "Could not parse `" ++ arg ++ "' as a key/value pair (k=v or k:v)"
exitWith $ ExitFailure 17)
- "FILENAME")
+ "KEY:VALUE")
"" -- "Use custom template"
, Option "c" ["css"]
@@ -461,16 +459,6 @@ options =
"FILENAME")
"" -- "File to include after document body"
- , Option "C" ["custom-header"]
- (ReqArg
- (\arg opt -> do
- text <- UTF8.readFile arg
- let newVars = ("legacy-header", text) : optVariables opt
- return opt { optVariables = newVars
- , optStandalone = True })
- "FILENAME")
- "" -- "File to use for custom header (implies -s)"
-
, Option "T" ["title-prefix"]
(ReqArg
(\arg opt -> do
@@ -513,23 +501,29 @@ options =
exitWith ExitSuccess)
"FORMAT")
"" -- "Print default template for FORMAT"
-#ifdef _CITEPROC
- , Option "" ["biblio"]
+
+ , Option "" ["bibliography"]
(ReqArg
- (\arg opt -> return opt { optBiblioFile = arg} )
+ (\arg opt -> return opt { optBibliography = (optBibliography opt) ++ [arg] })
"FILENAME")
""
- , Option "" ["biblio-format"]
- (ReqArg
- (\arg opt -> return opt { optBiblioFormat = arg} )
- "STRING")
- ""
+
, Option "" ["csl"]
(ReqArg
- (\arg opt -> return opt { optCslFile = arg} )
+ (\arg opt -> return opt { optCslFile = arg })
"FILENAME")
""
-#endif
+
+ , Option "" ["natbib"]
+ (NoArg
+ (\opt -> return opt { optCiteMethod = Natbib }))
+ "" -- "Use natbib cite commands in LaTeX output"
+
+ , Option "" ["biblatex"]
+ (NoArg
+ (\opt -> return opt { optCiteMethod = Biblatex }))
+ "" -- "Use biblatex cite commands in LaTeX output"
+
, Option "" ["data-dir"]
(ReqArg
(\arg opt -> return opt { optDataDir = Just arg })
@@ -569,7 +563,7 @@ usageMessage :: String -> [OptDescr (Opt -> IO Opt)] -> String
usageMessage programName = usageInfo
(programName ++ " [OPTIONS] [FILES]" ++ "\nInput formats: " ++
(intercalate ", " $ map fst readers) ++ "\nOutput formats: " ++
- (intercalate ", " $ map fst writers) ++ "\nOptions:")
+ (intercalate ", " $ map fst writers ++ ["odt","epub"]) ++ "\nOptions:")
-- Determine default reader based on source file extensions
defaultReaderName :: String -> [FilePath] -> String
@@ -584,7 +578,9 @@ defaultReaderName fallback (x:xs) =
".ltx" -> "latex"
".rst" -> "rst"
".lhs" -> "markdown+lhs"
+ ".textile" -> "textile"
".native" -> "native"
+ ".json" -> "json"
_ -> defaultReaderName fallback xs
-- Returns True if extension of first source is .lhs
@@ -607,16 +603,19 @@ defaultWriterName x =
".rst" -> "rst"
".s5" -> "s5"
".native" -> "native"
+ ".json" -> "json"
".txt" -> "markdown"
".text" -> "markdown"
".md" -> "markdown"
".markdown" -> "markdown"
+ ".textile" -> "textile"
".lhs" -> "markdown+lhs"
".texi" -> "texinfo"
".texinfo" -> "texinfo"
".db" -> "docbook"
".odt" -> "odt"
".epub" -> "epub"
+ ".org" -> "org"
['.',y] | y `elem` ['1'..'9'] -> "man"
_ -> "html"
@@ -663,6 +662,8 @@ main = do
, optOffline = offline
, optXeTeX = xetex
, optSmart = smart
+ , optHtml5 = html5
+ , optChapters = chapters
, optHTMLMathMethod = mathMethod
, optReferenceODT = referenceODT
, optEPUBStylesheet = epubStylesheet
@@ -672,16 +673,15 @@ main = do
, optStrict = strict
, optReferenceLinks = referenceLinks
, optWrapText = wrap
- , optSanitizeHTML = sanitize
+ , optColumns = columns
, optEmailObfuscation = obfuscationMethod
, optIdentifierPrefix = idPrefix
, optIndentedCodeClasses = codeBlockClasses
, optDataDir = mbDataDir
-#ifdef _CITEPROC
- , optBiblioFile = biblioFile
- , optBiblioFormat = biblioFormat
- , optCslFile = cslFile
-#endif
+ , optBibliography = reffiles
+ , optCslFile = cslfile
+ , optCiteMethod = citeMethod
+ , optListings = listings
} = opts
when dumpArgs $
@@ -689,13 +689,6 @@ main = do
mapM_ (\arg -> UTF8.hPutStrLn stdout arg) args
exitWith ExitSuccess
- -- warn about deprecated options
- case lookup "legacy-header" variables of
- Just _ -> UTF8.hPutStrLn stderr $
- "Warning: The -C/--custom-header is deprecated.\n" ++
- "Please transition to using --template instead."
- Nothing -> return ()
-
let sources = if ignoreArgs then [] else args
datadir <- case mbDataDir of
@@ -720,30 +713,13 @@ main = do
Just r -> return r
Nothing -> error ("Unknown reader: " ++ readerName')
- let writer = case lookup writerName' writers of
- Just _ | writerName' == "epub" -> writeEPUB epubStylesheet
- Just _ | writerName' == "odt" -> writeODT referenceODT
- Just r -> \o ->
- return . fromString . r o
- Nothing -> error $ "Unknown writer: " ++
- writerName'
-
templ <- getDefaultTemplate datadir writerName'
let defaultTemplate = case templ of
Right t -> t
Left e -> error (show e)
- environment <- getEnvironment
- let columns = case lookup "COLUMNS" environment of
- Just cols -> read cols
- Nothing -> stateColumns defaultParserState
-
let standalone' = standalone || isNonTextOutput writerName'
-#ifdef _CITEPROC
- refs <- if null biblioFile then return [] else readBiblioFile biblioFile biblioFormat
-#endif
-
variables' <- case (writerName', standalone', offline) of
("s5", True, True) -> do
inc <- s5HeaderIncludes datadir
@@ -766,6 +742,11 @@ main = do
return $ ("mathml-script", s) : variables'
_ -> return variables'
+ refs <- mapM (\f -> catch (readBiblioFile f) $ \e -> do
+ UTF8.hPutStrLn stderr $ "Error reading bibliography `" ++ f ++ "'"
+ UTF8.hPutStrLn stderr $ show e
+ exitWith (ExitFailure 23)) reffiles >>= \rs -> return $ concat rs
+
let sourceDir = if null sources
then "."
else takeDirectory (head sources)
@@ -778,18 +759,16 @@ main = do
let startParserState =
defaultParserState { stateParseRaw = parseRaw,
stateTabStop = tabStop,
- stateSanitizeHTML = sanitize,
stateLiterateHaskell = "+lhs" `isSuffixOf` readerName' ||
lhsExtension sources,
stateStandalone = standalone',
-#ifdef _CITEPROC
- stateCitations = map citeKey refs,
-#endif
+ stateCitations = map refId refs,
stateSmart = smart || writerName' `elem`
- ["latex", "context", "man"],
+ ["latex", "context", "latex+lhs", "man"],
stateColumns = columns,
stateStrict = strict,
- stateIndentedCodeClasses = codeBlockClasses }
+ stateIndentedCodeClasses = codeBlockClasses,
+ stateApplyMacros = writerName' `notElem` ["latex", "latex+lhs"] }
let writerOptions = WriterOptions { writerStandalone = standalone',
writerTemplate = if null template
@@ -804,12 +783,15 @@ main = do
writerSlideVariant = slideVariant,
writerIncremental = incremental,
writerXeTeX = xetex,
+ writerCiteMethod = citeMethod,
+ writerBiblioFiles = reffiles,
writerIgnoreNotes = False,
writerNumberSections = numberSections,
writerSectionDivs = sectionDivs,
writerStrictMarkdown = strict,
writerReferenceLinks = referenceLinks,
writerWrapText = wrap,
+ writerColumns = columns,
writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
lhsExtension [outputFile],
writerEmailObfuscation = if strict
@@ -817,7 +799,11 @@ main = do
else obfuscationMethod,
writerIdentifierPrefix = idPrefix,
writerSourceDirectory = sourceDir,
- writerUserDataDir = datadir }
+ writerUserDataDir = datadir,
+ writerHtml5 = html5 &&
+ "html" `isPrefixOf` writerName',
+ writerChapters = chapters,
+ writerListings = listings }
when (isNonTextOutput writerName' && outputFile == "-") $
do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
@@ -828,8 +814,9 @@ main = do
readSources srcs = mapM readSource srcs
readSource "-" = UTF8.getContents
readSource src = case parseURI src of
- Just u -> readURI u
- Nothing -> UTF8.readFile src
+ Just u | uriScheme u `elem` ["http:","https:"] ->
+ readURI u
+ _ -> UTF8.readFile src
readURI uri = simpleHTTP (mkRequest GET uri) >>= getResponseBody >>=
return . toString -- treat all as UTF8
@@ -837,17 +824,39 @@ main = do
doc <- fmap (reader startParserState . convertTabs . intercalate "\n") (readSources sources)
- let doc' = foldr ($) doc transforms
-
- doc'' <- do
-#ifdef _CITEPROC
- processBiblio cslFile refs doc'
-#else
- return doc'
-#endif
-
- writerOutput <- writer writerOptions doc''
-
- if outputFile == "-"
- then B.putStr writerOutput
- else B.writeFile outputFile writerOutput
+ let doc0 = foldr ($) doc transforms
+
+ doc1 <- if writerName' == "rtf"
+ then bottomUpM rtfEmbedImage doc0
+ else return doc0
+
+ doc2 <- do
+ if citeMethod == Citeproc && not (null refs)
+ then do
+ csldir <- getAppUserDataDirectory "csl"
+ cslfile' <- if null cslfile
+ then findDataFile datadir "default.csl"
+ else do
+ ex <- doesFileExist cslfile
+ if ex
+ then return cslfile
+ else findDataFile datadir $
+ replaceDirectory
+ (replaceExtension cslfile "csl")
+ csldir
+ processBiblio cslfile' refs doc1
+ else return doc1
+
+ case lookup writerName' writers of
+ Nothing | writerName' == "epub" ->
+ writeEPUB epubStylesheet writerOptions doc2
+ >>= B.writeFile (encodeString outputFile)
+ Nothing | writerName' == "odt" ->
+ writeODT referenceODT writerOptions doc2
+ >>= B.writeFile (encodeString outputFile)
+ Just r -> writerFn outputFile result
+ where writerFn "-" = UTF8.putStr
+ writerFn f = UTF8.writeFile f
+ result = r writerOptions doc2 ++
+ ['\n' | not standalone']
+ Nothing -> error $ "Unknown writer: " ++ writerName'
diff --git a/src/test-pandoc.hs b/src/test-pandoc.hs
new file mode 100644
index 000000000..2b6c2bb49
--- /dev/null
+++ b/src/test-pandoc.hs
@@ -0,0 +1,32 @@
+{-# OPTIONS_GHC -Wall #-}
+
+module Main where
+
+import Test.Framework
+
+import qualified Tests.Old
+import qualified Tests.Readers.LaTeX
+import qualified Tests.Readers.Markdown
+import qualified Tests.Readers.RST
+import qualified Tests.Writers.ConTeXt
+import qualified Tests.Writers.HTML
+import qualified Tests.Writers.Native
+import qualified Tests.Shared
+
+tests :: [Test]
+tests = [ testGroup "Old" Tests.Old.tests
+ , testGroup "Shared" Tests.Shared.tests
+ , testGroup "Writers"
+ [ testGroup "Native" Tests.Writers.Native.tests
+ , testGroup "ConTeXt" Tests.Writers.ConTeXt.tests
+ , testGroup "HTML" Tests.Writers.HTML.tests
+ ]
+ , testGroup "Readers"
+ [ testGroup "LaTeX" Tests.Readers.LaTeX.tests
+ , testGroup "Markdown" Tests.Readers.Markdown.tests
+ , testGroup "RST" Tests.Readers.RST.tests
+ ]
+ ]
+
+main :: IO ()
+main = defaultMain tests