diff options
author | dr@jones.dk <dr@jones.dk> | 2011-02-04 00:01:35 +0100 |
---|---|---|
committer | dr@jones.dk <dr@jones.dk> | 2011-02-04 00:01:35 +0100 |
commit | 91179df4907bec919e0884019da785be1ceb01b3 (patch) | |
tree | 2a6655fb4ec4655c554ea17ad074859d707b7709 /src/Text | |
parent | 1f6b4aee268fefc72c84bd305b10d4f9103901eb (diff) |
Imported Upstream version 1.8.0.1
Diffstat (limited to 'src/Text')
35 files changed, 4430 insertions, 2546 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index ad429bc93..ef8560284 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -57,11 +57,18 @@ module Text.Pandoc ( -- * Definitions module Text.Pandoc.Definition + -- * Generics + , module Text.Pandoc.Generic + -- * Lists of readers and writers + , readers + , writers -- * Readers: converting /to/ Pandoc format , readMarkdown , readRST , readLaTeX , readHtml + , readTextile + , readNative -- * Parser state used in readers , ParserState (..) , defaultParserState @@ -84,25 +91,34 @@ module Text.Pandoc , writeOpenDocument , writeMan , writeMediaWiki + , writeTextile , writeRTF , writeODT , writeEPUB + , writeOrg -- * Writer options used in writers , WriterOptions (..) , HTMLSlideVariant (..) , HTMLMathMethod (..) + , CiteMethod (..) , defaultWriterOptions -- * Rendering templates and default templates , module Text.Pandoc.Templates -- * Version , pandocVersion + -- * Miscellaneous + , rtfEmbedImage + , jsonFilter ) where import Text.Pandoc.Definition +import Text.Pandoc.Generic import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.RST import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.HTML +import Text.Pandoc.Readers.Textile +import Text.Pandoc.Readers.Native import Text.Pandoc.Writers.Native import Text.Pandoc.Writers.Markdown import Text.Pandoc.Writers.RST @@ -117,12 +133,69 @@ import Text.Pandoc.Writers.OpenDocument import Text.Pandoc.Writers.Man import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.MediaWiki +import Text.Pandoc.Writers.Textile +import Text.Pandoc.Writers.Org import Text.Pandoc.Templates import Text.Pandoc.Parsing import Text.Pandoc.Shared import Data.Version (showVersion) +import Text.JSON.Generic import Paths_pandoc (version) -- | Version number of pandoc library. pandocVersion :: String pandocVersion = showVersion version + +-- | Association list of formats and readers. +readers :: [(String, ParserState -> String -> Pandoc)] +readers = [("native" , \_ -> readNative) + ,("json" , \_ -> decodeJSON) + ,("markdown" , readMarkdown) + ,("markdown+lhs" , \st -> + readMarkdown st{ stateLiterateHaskell = True}) + ,("rst" , readRST) + ,("rst+lhs" , \st -> + readRST st{ stateLiterateHaskell = True}) + ,("textile" , readTextile) -- TODO : textile+lhs + ,("html" , readHtml) + ,("latex" , readLaTeX) + ,("latex+lhs" , \st -> + readLaTeX st{ stateLiterateHaskell = True}) + ] + +-- | Association list of formats and writers (omitting the +-- binary writers, odt and epub). +writers :: [ ( String, WriterOptions -> Pandoc -> String ) ] +writers = [("native" , writeNative) + ,("json" , \_ -> encodeJSON) + ,("html" , writeHtmlString) + ,("html+lhs" , \o -> + writeHtmlString o{ writerLiterateHaskell = True }) + ,("s5" , writeHtmlString) + ,("slidy" , writeHtmlString) + ,("docbook" , writeDocbook) + ,("opendocument" , writeOpenDocument) + ,("latex" , writeLaTeX) + ,("latex+lhs" , \o -> + writeLaTeX o{ writerLiterateHaskell = True }) + ,("context" , writeConTeXt) + ,("texinfo" , writeTexinfo) + ,("man" , writeMan) + ,("markdown" , writeMarkdown) + ,("markdown+lhs" , \o -> + writeMarkdown o{ writerLiterateHaskell = True }) + ,("plain" , writePlain) + ,("rst" , writeRST) + ,("rst+lhs" , \o -> + writeRST o{ writerLiterateHaskell = True }) + ,("mediawiki" , writeMediaWiki) + ,("textile" , writeTextile) + ,("rtf" , writeRTF) + ,("org" , writeOrg) + ] + +-- | Converts a transformation on the Pandoc AST into a function +-- that reads and writes a JSON-encoded string. This is useful +-- for writing small scripts. +jsonFilter :: (Pandoc -> Pandoc) -> String -> String +jsonFilter f = encodeJSON . f . decodeJSON diff --git a/src/Text/Pandoc/Biblio.hs b/src/Text/Pandoc/Biblio.hs index 436eadd68..d65c9de1c 100644 --- a/src/Text/Pandoc/Biblio.hs +++ b/src/Text/Pandoc/Biblio.hs @@ -19,48 +19,203 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Biblio - Copyright : Copyright (C) 2008 Andrea Rossato + Copyright : Copyright (C) 2008-2010 Andrea Rossato License : GNU GPL, version 2 or above - Maintainer : Andrea Rossato <andrea.rossato@ing.unitn.it> + Maintainer : Andrea Rossato <andrea.rossato@unitn.it> Stability : alpha Portability : portable -} module Text.Pandoc.Biblio ( processBiblio ) where -import Control.Monad ( when ) import Data.List -import Text.CSL +import Data.Unique +import Data.Char ( isDigit, isPunctuation ) +import qualified Data.Map as M +import Text.CSL hiding ( Cite(..), Citation(..) ) +import qualified Text.CSL as CSL ( Cite(..) ) import Text.Pandoc.Definition +import Text.Pandoc.Generic +import Text.Pandoc.Shared (stringify) +import Text.ParserCombinators.Parsec +import Control.Monad -- | Process a 'Pandoc' document by adding citations formatted -- according to a CSL style, using 'citeproc' from citeproc-hs. -processBiblio :: String -> [Reference] -> Pandoc -> IO Pandoc -processBiblio cf r p +processBiblio :: FilePath -> [Reference] -> Pandoc -> IO Pandoc +processBiblio cslfile r p = if null r then return p else do - when (null cf) $ error "Missing the needed citation style file" - csl <- readCSLFile cf - let groups = queryWith getCite p - result = citeproc csl r groups - cits_map = zip groups (citations result) - biblioList = map (read . renderPandoc' csl) (bibliography result) - Pandoc m b = processWith (processCite csl cits_map) p - return $ Pandoc m $ b ++ biblioList + csl <- readCSLFile cslfile + p' <- bottomUpM setHash p + let (nts,grps) = if styleClass csl == "note" + then let cits = queryWith getCite p' + ncits = map (queryWith getCite) $ queryWith getNote p' + needNt = cits \\ concat ncits + in (,) needNt $ getNoteCitations needNt p' + else (,) [] $ queryWith getCitation p' + result = citeproc procOpts csl r (setNearNote csl $ + map (map toCslCite) grps) + cits_map = M.fromList $ zip grps (citations result) + biblioList = map (renderPandoc' csl) (bibliography result) + Pandoc m b = bottomUp (procInlines $ processCite csl cits_map) p' + return . generateNotes nts . Pandoc m $ b ++ biblioList -- | Substitute 'Cite' elements with formatted citations. -processCite :: Style -> [([Target],[FormattedOutput])] -> Inline -> Inline -processCite s cs il - | Cite t _ <- il = Cite t (process t) - | otherwise = il +processCite :: Style -> M.Map [Citation] [FormattedOutput] -> [Inline] -> [Inline] +processCite _ _ [] = [] +processCite s cs (i:is) + | Cite t _ <- i = process t ++ processCite s cs is + | otherwise = i : processCite s cs is where - process t = case elemIndex t (map fst cs) of - Just i -> read . renderPandoc s $ snd (cs !! i) + addNt t x = if null x then [] else [Cite t $ renderPandoc s x] + process t = case M.lookup t cs of + Just x -> if isTextualCitation t && x /= [] + then renderPandoc s [head x] ++ + if tail x /= [] + then Space : addNt t (tail x) + else [] + else [Cite t $ renderPandoc s x] Nothing -> [Str ("Error processing " ++ show t)] +isTextualCitation :: [Citation] -> Bool +isTextualCitation (c:_) = citationMode c == AuthorInText +isTextualCitation _ = False + -- | Retrieve all citations from a 'Pandoc' docuument. To be used with -- 'queryWith'. -getCite :: Inline -> [[(String,String)]] -getCite i | Cite t _ <- i = [t] +getCitation :: Inline -> [[Citation]] +getCitation i | Cite t _ <- i = [t] + | otherwise = [] + +getNote :: Inline -> [Inline] +getNote i | Note _ <- i = [i] + | otherwise = [] + +getCite :: Inline -> [Inline] +getCite i | Cite _ _ <- i = [i] | otherwise = [] + +getNoteCitations :: [Inline] -> Pandoc -> [[Citation]] +getNoteCitations needNote + = let mvCite i = if i `elem` needNote then Note [Para [i]] else i + setNote = bottomUp mvCite + getCits = concat . flip (zipWith $ setCiteNoteNum) [1..] . + map (queryWith getCite) . queryWith getNote . setNote + in queryWith getCitation . getCits + +setHash :: Citation -> IO Citation +setHash (Citation i p s cm nn _) + = hashUnique `fmap` newUnique >>= return . Citation i p s cm nn + +generateNotes :: [Inline] -> Pandoc -> Pandoc +generateNotes needNote = bottomUp (mvCiteInNote needNote) + +procInlines :: ([Inline] -> [Inline]) -> Block -> Block +procInlines f b + | Plain inls <- b = Plain $ f inls + | Para inls <- b = Para $ f inls + | Header i inls <- b = Header i $ f inls + | otherwise = b + +mvCiteInNote :: [Inline] -> Block -> Block +mvCiteInNote is = procInlines mvCite + where + mvCite :: [Inline] -> [Inline] + mvCite inls + | x:i:xs <- inls, startWithPunct xs + , x == Space, i `elem_` is = switch i xs ++ mvCite (tailFirstInlineStr xs) + | x:i:xs <- inls + , x == Space, i `elem_` is = mvInNote i : mvCite xs + | i:xs <- inls, i `elem_` is + , startWithPunct xs = switch i xs ++ mvCite (tailFirstInlineStr xs) + | i:xs <- inls, Note _ <- i = checkNt i : mvCite xs + | i:xs <- inls = i : mvCite xs + | otherwise = [] + elem_ x xs = case x of Cite cs _ -> (Cite cs []) `elem` xs; _ -> False + switch i xs = Str (headInline xs) : mvInNote i : [] + mvInNote i + | Cite t o <- i = Note [Para [Cite t $ sanitize o]] + | otherwise = Note [Para [i ]] + sanitize i + | endWithPunct i = toCapital i + | otherwise = toCapital (i ++ [Str "."]) + + checkPt i + | Cite c o : xs <- i + , endWithPunct o, startWithPunct xs + , endWithPunct o = Cite c (initInline o) : checkPt xs + | x:xs <- i = x : checkPt xs + | otherwise = [] + checkNt = bottomUp $ procInlines checkPt + +setCiteNoteNum :: [Inline] -> Int -> [Inline] +setCiteNoteNum ((Cite cs o):xs) n = Cite (setCitationNoteNum n cs) o : setCiteNoteNum xs n +setCiteNoteNum _ _ = [] + +setCitationNoteNum :: Int -> [Citation] -> [Citation] +setCitationNoteNum i = map $ \c -> c { citationNoteNum = i} + +toCslCite :: Citation -> CSL.Cite +toCslCite c + = let (l, s) = locatorWords $ citationSuffix c + (la,lo) = parseLocator l + citMode = case citationMode c of + AuthorInText -> (True, False) + SuppressAuthor -> (False,True ) + NormalCitation -> (False,False) + s' = case s of + [] -> [] + (Str (y:_) : _) | isPunctuation y -> s + _ -> Str "," : Space : s + in emptyCite { CSL.citeId = citationId c + , CSL.citePrefix = PandocText $ citationPrefix c + , CSL.citeSuffix = PandocText $ s' + , CSL.citeLabel = la + , CSL.citeLocator = lo + , CSL.citeNoteNumber = show $ citationNoteNum c + , CSL.authorInText = fst citMode + , CSL.suppressAuthor = snd citMode + , CSL.citeHash = citationHash c + } + +locatorWords :: [Inline] -> (String, [Inline]) +locatorWords inp = + case parse pLocatorWords "suffix" inp of + Right r -> r + Left _ -> ("",inp) + +pLocatorWords :: GenParser Inline st (String, [Inline]) +pLocatorWords = do + l <- pLocator + s <- getInput -- rest is suffix + if length l > 0 && last l == ',' + then return (init l, Str "," : s) + else return (l, s) + +pMatch :: (Inline -> Bool) -> GenParser Inline st Inline +pMatch condition = try $ do + t <- anyToken + guard $ condition t + return t + +pSpace :: GenParser Inline st Inline +pSpace = pMatch (== Space) + +pLocator :: GenParser Inline st String +pLocator = try $ do + optional $ pMatch (== Str ",") + optional pSpace + f <- many1 (notFollowedBy pSpace >> anyToken) + gs <- many1 pWordWithDigits + return $ stringify f ++ (' ' : unwords gs) + +pWordWithDigits :: GenParser Inline st String +pWordWithDigits = try $ do + pSpace + r <- many1 (notFollowedBy pSpace >> anyToken) + let s = stringify r + guard $ any isDigit s + return s + diff --git a/src/Text/Pandoc/Blocks.hs b/src/Text/Pandoc/Blocks.hs deleted file mode 100644 index 122931773..000000000 --- a/src/Text/Pandoc/Blocks.hs +++ /dev/null @@ -1,146 +0,0 @@ -{- -Copyright (C) 2007 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Blocks - Copyright : Copyright (C) 2007 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for the manipulation of fixed-width blocks of text. -These are used in the construction of plain-text tables. --} - -module Text.Pandoc.Blocks - ( - TextBlock (..), - docToBlock, - blockToDoc, - widthOfBlock, - heightOfBlock, - hcatBlocks, - hsepBlocks, - centerAlignBlock, - leftAlignBlock, - rightAlignBlock - ) -where -import Text.PrettyPrint -import Data.List ( intersperse ) - --- | A fixed-width block of text. Parameters are width of block, --- height of block, and list of lines. -data TextBlock = TextBlock Int Int [String] -instance Show TextBlock where - show x = show $ blockToDoc x - --- | Break lines in a list of lines so that none are greater than --- a given width. -breakLines :: Int -- ^ Maximum length of lines. - -> [String] -- ^ List of lines. - -> [String] -breakLines _ [] = [] -breakLines width (l:ls) = - if length l > width - then (take width l):(breakLines width ((drop width l):ls)) - else l:(breakLines width ls) - --- | Convert a @Doc@ element into a @TextBlock@ with a specified width. -docToBlock :: Int -- ^ Width of text block. - -> Doc -- ^ @Doc@ to convert. - -> TextBlock -docToBlock width doc = - let rendered = renderStyle (style {lineLength = width, - ribbonsPerLine = 1}) doc - lns = breakLines width $ lines rendered - in TextBlock width (length lns) lns - --- | Convert a @TextBlock@ to a @Doc@ element. -blockToDoc :: TextBlock -> Doc -blockToDoc (TextBlock _ _ lns) = - if null lns - then empty - else vcat $ map text lns - --- | Returns width of a @TextBlock@ (number of columns). -widthOfBlock :: TextBlock -> Int -widthOfBlock (TextBlock width _ _) = width - --- | Returns height of a @TextBlock@ (number of rows). -heightOfBlock :: TextBlock -> Int -heightOfBlock (TextBlock _ height _) = height - --- | Pads a string out to a given width using spaces. -hPad :: Int -- ^ Desired width. - -> String -- ^ String to pad. - -> String -hPad width line = - let linelen = length line - in if linelen <= width - then line ++ replicate (width - linelen) ' ' - else take width line - --- | Concatenates a list of @TextBlock@s into a new @TextBlock@ in --- which they appear side by side. -hcatBlocks :: [TextBlock] -> TextBlock -hcatBlocks [] = TextBlock 0 0 [] -hcatBlocks [x] = x -- This is not redundant! We don't want last item hPad'd. -hcatBlocks ((TextBlock width1 height1 lns1):xs) = - let (TextBlock width2 height2 lns2) = hcatBlocks xs - height = max height1 height2 - width = width1 + width2 - lns1' = map (hPad width1) $ lns1 ++ replicate (height - height1) "" - lns2' = lns2 ++ replicate (height - height2) "" - lns = zipWith (++) lns1' lns2' - in TextBlock width height lns - --- | Like @hcatBlocks@, but inserts space between the @TextBlock@s. -hsepBlocks :: [TextBlock] -> TextBlock -hsepBlocks = hcatBlocks . (intersperse (TextBlock 1 1 [" "])) - -isWhitespace :: Char -> Bool -isWhitespace x = x `elem` " \t" - --- | Left-aligns the contents of a @TextBlock@ within the block. -leftAlignBlock :: TextBlock -> TextBlock -leftAlignBlock (TextBlock width height lns) = - TextBlock width height $ map (dropWhile isWhitespace) lns - --- | Right-aligns the contents of a @TextBlock@ within the block. -rightAlignBlock :: TextBlock -> TextBlock -rightAlignBlock (TextBlock width height lns) = - let rightAlignLine ln = - let (spaces, rest) = span isWhitespace $ reverse $ hPad width ln - in reverse (rest ++ spaces) - in TextBlock width height $ map rightAlignLine lns - --- | Centers the contents of a @TextBlock@ within the block. -centerAlignBlock :: TextBlock -> TextBlock -centerAlignBlock (TextBlock width height lns) = - let centerAlignLine ln = - let ln' = hPad width ln - (startSpaces, rest) = span isWhitespace ln' - endSpaces = takeWhile isWhitespace (reverse ln') - numSpaces = length (startSpaces ++ endSpaces) - startSpaces' = replicate (quot numSpaces 2) ' ' - in startSpaces' ++ rest - in TextBlock width height $ map centerAlignLine lns - diff --git a/src/Text/Pandoc/CharacterReferences.hs b/src/Text/Pandoc/CharacterReferences.hs index 8ac55fc61..8157d94d3 100644 --- a/src/Text/Pandoc/CharacterReferences.hs +++ b/src/Text/Pandoc/CharacterReferences.hs @@ -31,9 +31,9 @@ module Text.Pandoc.CharacterReferences ( characterReference, decodeCharacterReferences, ) where -import Data.Char ( chr ) import Text.ParserCombinators.Parsec -import qualified Data.Map as Map +import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity ) +import Data.Maybe ( fromMaybe ) -- | Parse character entity. characterReference :: GenParser Char st Char @@ -47,18 +47,21 @@ numRef :: GenParser Char st Char numRef = do char '#' num <- hexNum <|> decNum - return $ chr $ num + return $ fromMaybe '?' $ lookupNumericEntity num -hexNum :: GenParser Char st Int -hexNum = oneOf "Xx" >> many1 hexDigit >>= return . read . (\xs -> '0':'x':xs) +hexNum :: GenParser Char st [Char] +hexNum = do + x <- oneOf "Xx" + num <- many1 hexDigit + return (x:num) -decNum :: GenParser Char st Int -decNum = many1 digit >>= return . read +decNum :: GenParser Char st [Char] +decNum = many1 digit entity :: GenParser Char st Char entity = do body <- many1 alphaNum - return $ Map.findWithDefault '?' body entityTable + return $ fromMaybe '?' $ lookupNamedEntity body -- | Convert entities in a string to characters. decodeCharacterReferences :: String -> String @@ -67,261 +70,3 @@ decodeCharacterReferences str = Left err -> error $ "\nError: " ++ show err Right result -> result -entityTable :: Map.Map String Char -entityTable = Map.fromList entityTableList - -entityTableList :: [(String, Char)] -entityTableList = [ - ("quot", chr 34), - ("amp", chr 38), - ("lt", chr 60), - ("gt", chr 62), - ("nbsp", chr 160), - ("iexcl", chr 161), - ("cent", chr 162), - ("pound", chr 163), - ("curren", chr 164), - ("yen", chr 165), - ("brvbar", chr 166), - ("sect", chr 167), - ("uml", chr 168), - ("copy", chr 169), - ("ordf", chr 170), - ("laquo", chr 171), - ("not", chr 172), - ("shy", chr 173), - ("reg", chr 174), - ("macr", chr 175), - ("deg", chr 176), - ("plusmn", chr 177), - ("sup2", chr 178), - ("sup3", chr 179), - ("acute", chr 180), - ("micro", chr 181), - ("para", chr 182), - ("middot", chr 183), - ("cedil", chr 184), - ("sup1", chr 185), - ("ordm", chr 186), - ("raquo", chr 187), - ("frac14", chr 188), - ("frac12", chr 189), - ("frac34", chr 190), - ("iquest", chr 191), - ("Agrave", chr 192), - ("Aacute", chr 193), - ("Acirc", chr 194), - ("Atilde", chr 195), - ("Auml", chr 196), - ("Aring", chr 197), - ("AElig", chr 198), - ("Ccedil", chr 199), - ("Egrave", chr 200), - ("Eacute", chr 201), - ("Ecirc", chr 202), - ("Euml", chr 203), - ("Igrave", chr 204), - ("Iacute", chr 205), - ("Icirc", chr 206), - ("Iuml", chr 207), - ("ETH", chr 208), - ("Ntilde", chr 209), - ("Ograve", chr 210), - ("Oacute", chr 211), - ("Ocirc", chr 212), - ("Otilde", chr 213), - ("Ouml", chr 214), - ("times", chr 215), - ("Oslash", chr 216), - ("Ugrave", chr 217), - ("Uacute", chr 218), - ("Ucirc", chr 219), - ("Uuml", chr 220), - ("Yacute", chr 221), - ("THORN", chr 222), - ("szlig", chr 223), - ("agrave", chr 224), - ("aacute", chr 225), - ("acirc", chr 226), - ("atilde", chr 227), - ("auml", chr 228), - ("aring", chr 229), - ("aelig", chr 230), - ("ccedil", chr 231), - ("egrave", chr 232), - ("eacute", chr 233), - ("ecirc", chr 234), - ("euml", chr 235), - ("igrave", chr 236), - ("iacute", chr 237), - ("icirc", chr 238), - ("iuml", chr 239), - ("eth", chr 240), - ("ntilde", chr 241), - ("ograve", chr 242), - ("oacute", chr 243), - ("ocirc", chr 244), - ("otilde", chr 245), - ("ouml", chr 246), - ("divide", chr 247), - ("oslash", chr 248), - ("ugrave", chr 249), - ("uacute", chr 250), - ("ucirc", chr 251), - ("uuml", chr 252), - ("yacute", chr 253), - ("thorn", chr 254), - ("yuml", chr 255), - ("OElig", chr 338), - ("oelig", chr 339), - ("Scaron", chr 352), - ("scaron", chr 353), - ("Yuml", chr 376), - ("fnof", chr 402), - ("circ", chr 710), - ("tilde", chr 732), - ("Alpha", chr 913), - ("Beta", chr 914), - ("Gamma", chr 915), - ("Delta", chr 916), - ("Epsilon", chr 917), - ("Zeta", chr 918), - ("Eta", chr 919), - ("Theta", chr 920), - ("Iota", chr 921), - ("Kappa", chr 922), - ("Lambda", chr 923), - ("Mu", chr 924), - ("Nu", chr 925), - ("Xi", chr 926), - ("Omicron", chr 927), - ("Pi", chr 928), - ("Rho", chr 929), - ("Sigma", chr 931), - ("Tau", chr 932), - ("Upsilon", chr 933), - ("Phi", chr 934), - ("Chi", chr 935), - ("Psi", chr 936), - ("Omega", chr 937), - ("alpha", chr 945), - ("beta", chr 946), - ("gamma", chr 947), - ("delta", chr 948), - ("epsilon", chr 949), - ("zeta", chr 950), - ("eta", chr 951), - ("theta", chr 952), - ("iota", chr 953), - ("kappa", chr 954), - ("lambda", chr 955), - ("mu", chr 956), - ("nu", chr 957), - ("xi", chr 958), - ("omicron", chr 959), - ("pi", chr 960), - ("rho", chr 961), - ("sigmaf", chr 962), - ("sigma", chr 963), - ("tau", chr 964), - ("upsilon", chr 965), - ("phi", chr 966), - ("chi", chr 967), - ("psi", chr 968), - ("omega", chr 969), - ("thetasym", chr 977), - ("upsih", chr 978), - ("piv", chr 982), - ("ensp", chr 8194), - ("emsp", chr 8195), - ("thinsp", chr 8201), - ("zwnj", chr 8204), - ("zwj", chr 8205), - ("lrm", chr 8206), - ("rlm", chr 8207), - ("ndash", chr 8211), - ("mdash", chr 8212), - ("lsquo", chr 8216), - ("rsquo", chr 8217), - ("sbquo", chr 8218), - ("ldquo", chr 8220), - ("rdquo", chr 8221), - ("bdquo", chr 8222), - ("dagger", chr 8224), - ("Dagger", chr 8225), - ("bull", chr 8226), - ("hellip", chr 8230), - ("permil", chr 8240), - ("prime", chr 8242), - ("Prime", chr 8243), - ("lsaquo", chr 8249), - ("rsaquo", chr 8250), - ("oline", chr 8254), - ("frasl", chr 8260), - ("euro", chr 8364), - ("image", chr 8465), - ("weierp", chr 8472), - ("real", chr 8476), - ("trade", chr 8482), - ("alefsym", chr 8501), - ("larr", chr 8592), - ("uarr", chr 8593), - ("rarr", chr 8594), - ("darr", chr 8595), - ("harr", chr 8596), - ("crarr", chr 8629), - ("lArr", chr 8656), - ("uArr", chr 8657), - ("rArr", chr 8658), - ("dArr", chr 8659), - ("hArr", chr 8660), - ("forall", chr 8704), - ("part", chr 8706), - ("exist", chr 8707), - ("empty", chr 8709), - ("nabla", chr 8711), - ("isin", chr 8712), - ("notin", chr 8713), - ("ni", chr 8715), - ("prod", chr 8719), - ("sum", chr 8721), - ("minus", chr 8722), - ("lowast", chr 8727), - ("radic", chr 8730), - ("prop", chr 8733), - ("infin", chr 8734), - ("ang", chr 8736), - ("and", chr 8743), - ("or", chr 8744), - ("cap", chr 8745), - ("cup", chr 8746), - ("int", chr 8747), - ("there4", chr 8756), - ("sim", chr 8764), - ("cong", chr 8773), - ("asymp", chr 8776), - ("ne", chr 8800), - ("equiv", chr 8801), - ("le", chr 8804), - ("ge", chr 8805), - ("sub", chr 8834), - ("sup", chr 8835), - ("nsub", chr 8836), - ("sube", chr 8838), - ("supe", chr 8839), - ("oplus", chr 8853), - ("otimes", chr 8855), - ("perp", chr 8869), - ("sdot", chr 8901), - ("lceil", chr 8968), - ("rceil", chr 8969), - ("lfloor", chr 8970), - ("rfloor", chr 8971), - ("lang", chr 9001), - ("rang", chr 9002), - ("loz", chr 9674), - ("spades", chr 9824), - ("clubs", chr 9827), - ("hearts", chr 9829), - ("diams", chr 9830) - ] diff --git a/src/Text/Pandoc/Definition.hs b/src/Text/Pandoc/Definition.hs deleted file mode 100644 index fffca3b2e..000000000 --- a/src/Text/Pandoc/Definition.hs +++ /dev/null @@ -1,151 +0,0 @@ -{-# OPTIONS_GHC -fglasgow-exts #-} -- for deriving Typeable -{- -Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Definition - Copyright : Copyright (C) 2006-2010 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Definition of 'Pandoc' data structure for format-neutral representation -of documents. --} -module Text.Pandoc.Definition where - -import Data.Generics - -data Pandoc = Pandoc Meta [Block] deriving (Eq, Ord, Read, Show, Typeable, Data) - --- | Bibliographic information for the document: title, authors, date. -data Meta = Meta { docTitle :: [Inline] - , docAuthors :: [[Inline]] - , docDate :: [Inline] } - deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Alignment of a table column. -data Alignment = AlignLeft - | AlignRight - | AlignCenter - | AlignDefault deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | List attributes. -type ListAttributes = (Int, ListNumberStyle, ListNumberDelim) - --- | Style of list numbers. -data ListNumberStyle = DefaultStyle - | Example - | Decimal - | LowerRoman - | UpperRoman - | LowerAlpha - | UpperAlpha deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Delimiter of list numbers. -data ListNumberDelim = DefaultDelim - | Period - | OneParen - | TwoParens deriving (Eq, Ord, Show, Read, Typeable, Data) - --- | Attributes: identifier, classes, key-value pairs -type Attr = (String, [String], [(String, String)]) - --- | Block element. -data Block - = Plain [Inline] -- ^ Plain text, not a paragraph - | Para [Inline] -- ^ Paragraph - | CodeBlock Attr String -- ^ Code block (literal) with attributes - | RawHtml String -- ^ Raw HTML block (literal) - | BlockQuote [Block] -- ^ Block quote (list of blocks) - | OrderedList ListAttributes [[Block]] -- ^ Ordered list (attributes - -- and a list of items, each a list of blocks) - | BulletList [[Block]] -- ^ Bullet list (list of items, each - -- a list of blocks) - | DefinitionList [([Inline],[[Block]])] -- ^ Definition list - -- Each list item is a pair consisting of a - -- term (a list of inlines) and one or more - -- definitions (each a list of blocks) - | Header Int [Inline] -- ^ Header - level (integer) and text (inlines) - | HorizontalRule -- ^ Horizontal rule - | Table [Inline] [Alignment] [Double] [[Block]] [[[Block]]] -- ^ Table, - -- with caption, column alignments, - -- relative column widths (0 = default), - -- column headers (each a list of blocks), and - -- rows (each a list of lists of blocks) - | Null -- ^ Nothing - deriving (Eq, Ord, Read, Show, Typeable, Data) - --- | Type of quotation marks to use in Quoted inline. -data QuoteType = SingleQuote | DoubleQuote deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Link target (URL, title). -type Target = (String, String) - --- | Type of math element (display or inline). -data MathType = DisplayMath | InlineMath deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Inline elements. -data Inline - = Str String -- ^ Text (string) - | Emph [Inline] -- ^ Emphasized text (list of inlines) - | Strong [Inline] -- ^ Strongly emphasized text (list of inlines) - | Strikeout [Inline] -- ^ Strikeout text (list of inlines) - | Superscript [Inline] -- ^ Superscripted text (list of inlines) - | Subscript [Inline] -- ^ Subscripted text (list of inlines) - | SmallCaps [Inline] -- ^ Small caps text (list of inlines) - | Quoted QuoteType [Inline] -- ^ Quoted text (list of inlines) - | Cite [Target] [Inline] -- ^ Citation (list of inlines) - | Code String -- ^ Inline code (literal) - | Space -- ^ Inter-word space - | EmDash -- ^ Em dash - | EnDash -- ^ En dash - | Apostrophe -- ^ Apostrophe - | Ellipses -- ^ Ellipses - | LineBreak -- ^ Hard line break - | Math MathType String -- ^ TeX math (literal) - | TeX String -- ^ LaTeX code (literal) - | HtmlInline String -- ^ HTML code (literal) - | Link [Inline] Target -- ^ Hyperlink: text (list of inlines), target - | Image [Inline] Target -- ^ Image: alt text (list of inlines), target - -- and target - | Note [Block] -- ^ Footnote or endnote - deriving (Show, Eq, Ord, Read, Typeable, Data) - --- | Applies a transformation on @a@s to matching elements in a @b@. -processWith :: (Data a, Data b) => (a -> a) -> b -> b -processWith f = everywhere (mkT f) - --- | Like 'processWith', but with monadic transformations. -processWithM :: (Monad m, Data a, Data b) => (a -> m a) -> b -> m b -processWithM f = everywhereM (mkM f) - --- | Runs a query on matching @a@ elements in a @c@. -queryWith :: (Data a, Data c) => (a -> [b]) -> c -> [b] -queryWith f = everything (++) ([] `mkQ` f) - -{-# DEPRECATED processPandoc "Use processWith instead" #-} -processPandoc :: Data a => (a -> a) -> Pandoc -> Pandoc -processPandoc = processWith - -{-# DEPRECATED queryPandoc "Use queryWith instead" #-} -queryPandoc :: Data a => (a -> [b]) -> Pandoc -> [b] -queryPandoc = queryWith - diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index f29106262..5ddaf1379 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -37,10 +37,14 @@ import Data.List (find) import Data.Maybe (fromMaybe) import Data.Char (toLower) -highlightHtml :: Attr -> String -> Either String Html -highlightHtml (_, classes, keyvals) rawCode = +highlightHtml :: Bool -- ^ True if inline HTML + -> Attr -- ^ Attributes of the Code or CodeBlock + -> String -- ^ Raw contents of the Code or CodeBlock + -> Either String Html -- ^ An error or the formatted Html +highlightHtml inline (_, classes, keyvals) rawCode = let firstNum = read $ fromMaybe "1" $ lookup "startFrom" keyvals fmtOpts = [OptNumberFrom firstNum] ++ + [OptInline | inline] ++ case find (`elem` ["number","numberLines","number-lines"]) classes of Nothing -> [] Just _ -> [OptNumberLines] @@ -65,6 +69,6 @@ languages = [] languagesByExtension :: String -> [String] languagesByExtension _ = [] -highlightHtml :: Attr -> String -> Either String Html -highlightHtml _ _ = Left "Pandoc was not compiled with support for highlighting" +highlightHtml :: Bool -> Attr -> String -> Either String Html +highlightHtml _ _ _ = Left "Pandoc was not compiled with support for highlighting" #endif diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 3678fc22a..9ce064f91 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -64,21 +64,27 @@ module Text.Pandoc.Parsing ( (>>~), QuoteContext (..), NoteTable, KeyTable, - Key (..), + Key, + toKey, + fromKey, lookupKeySrc, - refsMatch ) + smartPunctuation, + macro, + applyMacros' ) where import Text.Pandoc.Definition +import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.ParserCombinators.Parsec import Text.Pandoc.CharacterReferences ( characterReference ) -import Data.Char ( toLower, toUpper, ord, isAscii ) +import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit ) import Data.List ( intercalate, transpose ) import Network.URI ( parseURI, URI (..), isAllowedInURI ) -import Control.Monad ( join, liftM ) +import Control.Monad ( join, liftM, guard ) import Text.Pandoc.Shared import qualified Data.Map as M +import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions) -- | Like >>, but returns the operation on the left. -- (Suggested by Tillmann Rendel on Haskell-cafe list.) @@ -114,7 +120,7 @@ oneOfStrings listOfStrings = choice $ map (try . string) listOfStrings -- | Parses a space or tab. spaceChar :: CharParser st Char -spaceChar = char ' ' <|> char '\t' +spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Skips zero or more spaces or tabs. skipSpaces :: GenParser Char st () @@ -169,7 +175,8 @@ lineClump = blanklines charsInBalanced :: Char -> Char -> GenParser Char st String charsInBalanced open close = try $ do char open - raw <- many $ (many1 (noneOf [open, close, '\n'])) + raw <- many $ (many1 (satisfy $ \c -> + c /= open && c /= close && c /= '\n')) <|> (do res <- charsInBalanced open close return $ [open] ++ res ++ [close]) <|> try (string "\n" >>~ notFollowedBy' blanklines) @@ -180,7 +187,7 @@ charsInBalanced open close = try $ do charsInBalanced' :: Char -> Char -> GenParser Char st String charsInBalanced' open close = try $ do char open - raw <- many $ (many1 (noneOf [open, close])) + raw <- many $ (many1 (satisfy $ \c -> c /= open && c /= close)) <|> (do res <- charsInBalanced' open close return $ [open] ++ res ++ [close]) char close @@ -201,7 +208,7 @@ romanNumeral upperCase = do let romanDigits = if upperCase then uppercaseRomanDigits else lowercaseRomanDigits - lookAhead $ oneOf romanDigits + lookAhead $ oneOf romanDigits let [one, five, ten, fifty, hundred, fivehundred, thousand] = map char romanDigits thousands <- many thousand >>= (return . (1000 *) . length) @@ -227,7 +234,8 @@ romanNumeral upperCase = do -- Parsers for email addresses and URIs emailChar :: GenParser Char st Char -emailChar = alphaNum <|> oneOf "-+_." +emailChar = alphaNum <|> + satisfy (\c -> c == '-' || c == '+' || c == '_' || c == '.') domainChar :: GenParser Char st Char domainChar = alphaNum <|> char '-' @@ -283,7 +291,7 @@ nullBlock :: GenParser Char st Block nullBlock = anyChar >> return Null -- | Fail if reader is in strict markdown syntax mode. -failIfStrict :: GenParser Char ParserState () +failIfStrict :: GenParser a ParserState () failIfStrict = do state <- getState if stateStrict state then fail "strict mode" else return () @@ -327,7 +335,7 @@ decimal = do exampleNum :: GenParser Char ParserState (ListNumberStyle, Int) exampleNum = do char '@' - lab <- many (alphaNum <|> oneOf "_-") + lab <- many (alphaNum <|> satisfy (\c -> c == '_' || c == '-')) st <- getState let num = stateNextExample st let newlabels = if null lab @@ -450,8 +458,9 @@ widthsFromIndices :: Int -- Number of columns on terminal -> [Int] -- Indices -> [Double] -- Fractional relative sizes of columns widthsFromIndices _ [] = [] -widthsFromIndices numColumns indices = - let lengths' = zipWith (-) indices (0:indices) +widthsFromIndices numColumns' indices = + let numColumns = max numColumns' (if null indices then 0 else last indices) + lengths' = zipWith (-) indices (0:indices) lengths = reverse $ case reverse lengths' of [] -> [] @@ -481,8 +490,8 @@ gridTableWith block tableCaption headless = tableWith (gridTableHeader headless block) (gridTableRow block) (gridTableSep '-') gridTableFooter tableCaption gridTableSplitLine :: [Int] -> String -> [String] -gridTableSplitLine indices line = - map removeFinalBar $ tail $ splitByIndices (init indices) line +gridTableSplitLine indices line = map removeFinalBar $ tail $ + splitByIndices (init indices) $ removeTrailingSpace line gridPart :: Char -> GenParser Char st (Int, Int) gridPart ch = do @@ -494,8 +503,8 @@ gridDashedLines :: Char -> GenParser Char st [(Int,Int)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) >>~ blankline removeFinalBar :: String -> String -removeFinalBar = reverse . dropWhile (=='|') . dropWhile (`elem` " \t") . - reverse +removeFinalBar = + reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. gridTableSep :: Char -> GenParser Char ParserState Char @@ -532,7 +541,7 @@ gridTableRawLine :: [Int] -> GenParser Char ParserState [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline - return (gridTableSplitLine indices $ removeTrailingSpace line) + return (gridTableSplitLine indices line) -- | Parse row of grid table. gridTableRow :: GenParser Char ParserState Block @@ -562,9 +571,9 @@ gridTableFooter = blanklines --- -- | Parse a string with a given parser and state. -readWith :: GenParser Char ParserState a -- ^ parser +readWith :: GenParser t ParserState a -- ^ parser -> ParserState -- ^ initial state - -> String -- ^ input string + -> [t] -- ^ input -> a readWith parser state input = case runParser parser state "source" input of @@ -583,11 +592,8 @@ data ParserState = ParserState { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? - stateSanitizeHTML :: Bool, -- ^ Sanitize HTML? stateKeys :: KeyTable, -- ^ List of reference keys -#ifdef _CITEPROC stateCitations :: [String], -- ^ List of available citations -#endif stateNotes :: NoteTable, -- ^ List of notes stateTabStop :: Int, -- ^ Tab stop stateStandalone :: Bool, -- ^ Parse bibliographic info? @@ -602,7 +608,9 @@ data ParserState = ParserState stateIndentedCodeClasses :: [String], -- ^ Classes to use for indented code blocks stateNextExample :: Int, -- ^ Number of next example stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers - stateHasChapters :: Bool -- ^ True if \chapter encountered + stateHasChapters :: Bool, -- ^ True if \chapter encountered + stateApplyMacros :: Bool, -- ^ Apply LaTeX macros? + stateMacros :: [Macro] -- ^ List of macros defined so far } deriving Show @@ -611,11 +619,8 @@ defaultParserState = ParserState { stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, - stateSanitizeHTML = False, stateKeys = M.empty, -#ifdef _CITEPROC stateCitations = [], -#endif stateNotes = [], stateTabStop = 4, stateStandalone = False, @@ -630,7 +635,9 @@ defaultParserState = stateIndentedCodeClasses = [], stateNextExample = 1, stateExamples = M.empty, - stateHasChapters = False } + stateHasChapters = False, + stateApplyMacros = True, + stateMacros = []} data HeaderType = SingleHeader Char -- ^ Single line of characters underneath @@ -650,13 +657,20 @@ data QuoteContext type NoteTable = [(String, String)] -newtype Key = Key [Inline] deriving (Show, Read) +newtype Key = Key [Inline] deriving (Show, Read, Eq, Ord) -instance Eq Key where - Key a == Key b = refsMatch a b +toKey :: [Inline] -> Key +toKey = Key . bottomUp lowercase + where lowercase :: Inline -> Inline + lowercase (Str xs) = Str (map toLower xs) + lowercase (Math t xs) = Math t (map toLower xs) + lowercase (Code attr xs) = Code attr (map toLower xs) + lowercase (RawInline f xs) = RawInline f (map toLower xs) + lowercase LineBreak = Space + lowercase x = x -instance Ord Key where - compare (Key a) (Key b) = if a == b then EQ else compare a b +fromKey :: Key -> [Inline] +fromKey (Key xs) = xs type KeyTable = M.Map Key Target @@ -668,33 +682,130 @@ lookupKeySrc table key = case M.lookup key table of Nothing -> Nothing Just src -> Just src --- | Returns @True@ if keys match (case insensitive). -refsMatch :: [Inline] -> [Inline] -> Bool -refsMatch ((Str x):restx) ((Str y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Emph x):restx) ((Emph y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strong x):restx) ((Strong y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Strikeout x):restx) ((Strikeout y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Superscript x):restx) ((Superscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Subscript x):restx) ((Subscript y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((SmallCaps x):restx) ((SmallCaps y):resty) = - refsMatch x y && refsMatch restx resty -refsMatch ((Quoted t x):restx) ((Quoted u y):resty) = - t == u && refsMatch x y && refsMatch restx resty -refsMatch ((Code x):restx) ((Code y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((Math t x):restx) ((Math u y):resty) = - ((map toLower x) == (map toLower y)) && t == u && refsMatch restx resty -refsMatch ((TeX x):restx) ((TeX y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch ((HtmlInline x):restx) ((HtmlInline y):resty) = - ((map toLower x) == (map toLower y)) && refsMatch restx resty -refsMatch (x:restx) (y:resty) = (x == y) && refsMatch restx resty -refsMatch [] x = null x -refsMatch x [] = null x +-- | Fail unless we're in "smart typography" mode. +failUnlessSmart :: GenParser tok ParserState () +failUnlessSmart = getState >>= guard . stateSmart + +smartPunctuation :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +smartPunctuation inlineParser = do + failUnlessSmart + choice [ quoted inlineParser, apostrophe, dash, ellipses ] + +apostrophe :: GenParser Char ParserState Inline +apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe + +quoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser + +withQuoteContext :: QuoteContext + -> (GenParser Char ParserState Inline) + -> GenParser Char ParserState Inline +withQuoteContext context parser = do + oldState <- getState + let oldQuoteContext = stateQuoteContext oldState + setState oldState { stateQuoteContext = context } + result <- parser + newState <- getState + setState newState { stateQuoteContext = oldQuoteContext } + return result + +singleQuoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +singleQuoted inlineParser = try $ do + singleQuoteStart + withQuoteContext InSingleQuote $ many1Till inlineParser singleQuoteEnd >>= + return . Quoted SingleQuote . normalizeSpaces + +doubleQuoted :: GenParser Char ParserState Inline + -> GenParser Char ParserState Inline +doubleQuoted inlineParser = try $ do + doubleQuoteStart + withQuoteContext InDoubleQuote $ do + contents <- manyTill inlineParser doubleQuoteEnd + return . Quoted DoubleQuote . normalizeSpaces $ contents + +failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () +failIfInQuoteContext context = do + st <- getState + if stateQuoteContext st == context + then fail "already inside quotes" + else return () + +charOrRef :: [Char] -> GenParser Char st Char +charOrRef cs = + oneOf cs <|> try (do c <- characterReference + guard (c `elem` cs) + return c) + +singleQuoteStart :: GenParser Char ParserState () +singleQuoteStart = do + failIfInQuoteContext InSingleQuote + try $ do charOrRef "'\8216" + notFollowedBy (oneOf ")!],.;:-? \t\n") + notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> + satisfy (not . isAlphaNum))) + -- possess/contraction + return () + +singleQuoteEnd :: GenParser Char st () +singleQuoteEnd = try $ do + charOrRef "'\8217" + notFollowedBy alphaNum + +doubleQuoteStart :: GenParser Char ParserState () +doubleQuoteStart = do + failIfInQuoteContext InDoubleQuote + try $ do charOrRef "\"\8220" + notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) + +doubleQuoteEnd :: GenParser Char st () +doubleQuoteEnd = do + charOrRef "\"\8221" + return () + +ellipses :: GenParser Char st Inline +ellipses = do + try (charOrRef "…") <|> try (string "..." >> return '…') + return Ellipses + +dash :: GenParser Char st Inline +dash = enDash <|> emDash + +enDash :: GenParser Char st Inline +enDash = do + try (charOrRef "–") <|> + try (char '-' >> lookAhead (satisfy isDigit) >> return '–') + return EnDash + +emDash :: GenParser Char st Inline +emDash = do + try (charOrRef "—") <|> (try $ string "--" >> optional (char '-') >> return '—') + return EmDash + +-- +-- Macros +-- + +-- | Parse a \newcommand or \renewcommand macro definition. +macro :: GenParser Char ParserState Block +macro = do + getState >>= guard . stateApplyMacros + inp <- getInput + case parseMacroDefinitions inp of + ([], _) -> pzero + (ms, rest) -> do count (length inp - length rest) anyChar + updateState $ \st -> + st { stateMacros = ms ++ stateMacros st } + return Null + +-- | Apply current macros to string. +applyMacros' :: String -> GenParser Char ParserState String +applyMacros' target = do + apply <- liftM stateApplyMacros getState + if apply + then do macros <- liftM stateMacros getState + return $ applyMacros macros target + else return target diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs new file mode 100644 index 000000000..54d65af6f --- /dev/null +++ b/src/Text/Pandoc/Pretty.hs @@ -0,0 +1,429 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111(-1)307 USA +-} + +{- | + Module : Text.Pandoc.Pretty + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +A prettyprinting library for the production of text documents, +including wrapped text, indentated blocks, and tables. +-} + +module Text.Pandoc.Pretty ( + Doc + , render + , cr + , blankline + , space + , text + , char + , prefixed + , flush + , nest + , hang + , nowrap + , offset + , height + , lblock + , cblock + , rblock + , (<>) + , (<+>) + , ($$) + , ($+$) + , isEmpty + , empty + , cat + , hcat + , hsep + , vcat + , vsep + , inside + , braces + , brackets + , parens + , quotes + , doubleQuotes + ) + +where +import Data.DList (DList, fromList, toList, cons, singleton) +import Data.List (intercalate) +import Data.Monoid +import Data.String +import Control.Monad.State +import Data.Char (isSpace) + +data Monoid a => + RenderState a = RenderState{ + output :: [a] -- ^ In reverse order + , prefix :: String + , usePrefix :: Bool + , lineLength :: Maybe Int -- ^ 'Nothing' means no wrapping + , column :: Int + , newlines :: Int -- ^ Number of preceding newlines + } + +type DocState a = State (RenderState a) () + +data D = Text Int String + | Block Int [String] + | Prefixed String Doc + | Flush Doc + | BreakingSpace + | CarriageReturn + | NewLine + | BlankLine + deriving (Show) + +newtype Doc = Doc { unDoc :: DList D } + deriving (Monoid) + +instance Show Doc where + show = render Nothing + +instance IsString Doc where + fromString = text + +-- | True if the document is empty. +isEmpty :: Doc -> Bool +isEmpty = null . toList . unDoc + +-- | The empty document. +empty :: Doc +empty = mempty + +-- | @a <> b@ is the result of concatenating @a@ with @b@. +(<>) :: Doc -> Doc -> Doc +(<>) = mappend + +-- | Concatenate a list of 'Doc's. +cat :: [Doc] -> Doc +cat = mconcat + +-- | Same as 'cat'. +hcat :: [Doc] -> Doc +hcat = mconcat + +-- | Concatenate a list of 'Doc's, putting breakable spaces +-- between them. +(<+>) :: Doc -> Doc -> Doc +(<+>) x y = if isEmpty x + then y + else if isEmpty y + then x + else x <> space <> y + +-- | Same as 'cat', but putting breakable spaces between the +-- 'Doc's. +hsep :: [Doc] -> Doc +hsep = foldr (<+>) empty + +-- | @a $$ b@ puts @a@ above @b@. +($$) :: Doc -> Doc -> Doc +($$) x y = if isEmpty x + then y + else if isEmpty y + then x + else x <> cr <> y + +-- | @a $$ b@ puts @a@ above @b@, with a blank line between. +($+$) :: Doc -> Doc -> Doc +($+$) x y = if isEmpty x + then y + else if isEmpty y + then x + else x <> blankline <> y + +-- | List version of '$$'. +vcat :: [Doc] -> Doc +vcat = foldr ($$) empty + +-- | List version of '$+$'. +vsep :: [Doc] -> Doc +vsep = foldr ($+$) empty + +outp :: (IsString a, Monoid a) + => Int -> String -> DocState a +outp off s | off <= 0 = do + st' <- get + let rawpref = prefix st' + when (column st' == 0 && usePrefix st' && not (null rawpref)) $ do + let pref = reverse $ dropWhile isSpace $ reverse rawpref + modify $ \st -> st{ output = fromString pref : output st + , column = column st + length pref } + when (off < 0) $ do + modify $ \st -> st { output = fromString s : output st + , column = 0 + , newlines = newlines st + 1 } +outp off s = do + st' <- get + let pref = prefix st' + when (column st' == 0 && usePrefix st' && not (null pref)) $ do + modify $ \st -> st{ output = fromString pref : output st + , column = column st + length pref } + modify $ \st -> st{ output = fromString s : output st + , column = column st + off + , newlines = 0 } + +-- | Renders a 'Doc'. @render (Just n)@ will use +-- a line length of @n@ to reflow text on breakable spaces. +-- @render Nothing@ will not reflow text. +render :: (Monoid a, IsString a) + => Maybe Int -> Doc -> a +render linelen doc = fromString . mconcat . reverse . output $ + execState (renderDoc doc) startingState + where startingState = RenderState{ + output = mempty + , prefix = "" + , usePrefix = True + , lineLength = linelen + , column = 0 + , newlines = 2 } + +renderDoc :: (IsString a, Monoid a) + => Doc -> DocState a +renderDoc = renderList . toList . unDoc + +renderList :: (IsString a, Monoid a) + => [D] -> DocState a +renderList [] = return () +renderList (Text off s : xs) = do + outp off s + renderList xs + +renderList (Prefixed pref d : xs) = do + st <- get + let oldPref = prefix st + put st{ prefix = prefix st ++ pref } + renderDoc d + modify $ \s -> s{ prefix = oldPref } + renderList xs + +renderList (Flush d : xs) = do + st <- get + let oldUsePrefix = usePrefix st + put st{ usePrefix = False } + renderDoc d + modify $ \s -> s{ usePrefix = oldUsePrefix } + renderList xs + +renderList (BlankLine : xs) = do + st <- get + case output st of + _ | newlines st > 1 || null xs -> return () + _ | column st == 0 -> do + outp (-1) "\n" + _ -> do + outp (-1) "\n" + outp (-1) "\n" + renderList xs + +renderList (CarriageReturn : xs) = do + st <- get + if newlines st > 0 || null xs + then renderList xs + else do + outp (-1) "\n" + renderList xs + +renderList (NewLine : xs) = do + outp (-1) "\n" + renderList xs + +renderList (BreakingSpace : CarriageReturn : xs) = renderList (CarriageReturn:xs) +renderList (BreakingSpace : NewLine : xs) = renderList (NewLine:xs) +renderList (BreakingSpace : BlankLine : xs) = renderList (BlankLine:xs) +renderList (BreakingSpace : BreakingSpace : xs) = renderList (BreakingSpace:xs) +renderList (BreakingSpace : xs) = do + let isText (Text _ _) = True + isText (Block _ _) = True + isText _ = False + let isBreakingSpace BreakingSpace = True + isBreakingSpace _ = False + let xs' = dropWhile isBreakingSpace xs + let next = takeWhile isText xs' + st <- get + let off = sum $ map offsetOf next + case lineLength st of + Just l | column st + 1 + off > l -> do + outp (-1) "\n" + renderList xs' + _ -> do + outp 1 " " + renderList xs' + +renderList (b1@Block{} : b2@Block{} : xs) = + renderList (mergeBlocks False b1 b2 : xs) + +renderList (b1@Block{} : BreakingSpace : b2@Block{} : xs) = + renderList (mergeBlocks True b1 b2 : xs) + +renderList (Block width lns : xs) = do + st <- get + let oldPref = prefix st + case column st - length oldPref of + n | n > 0 -> modify $ \s -> s{ prefix = oldPref ++ replicate n ' ' } + _ -> return () + renderDoc $ blockToDoc width lns + modify $ \s -> s{ prefix = oldPref } + renderList xs + +mergeBlocks :: Bool -> D -> D -> D +mergeBlocks addSpace (Block w1 lns1) (Block w2 lns2) = + Block (w1 + w2 + if addSpace then 1 else 0) $ + zipWith (\l1 l2 -> pad w1 l1 ++ l2) (lns1 ++ empties) (map sp lns2 ++ empties) + where empties = replicate (abs $ length lns1 - length lns2) "" + pad n s = s ++ replicate (n - length s) ' ' + sp "" = "" + sp xs = if addSpace then (' ' : xs) else xs +mergeBlocks _ _ _ = error "mergeBlocks tried on non-Block!" + +blockToDoc :: Int -> [String] -> Doc +blockToDoc _ lns = text $ intercalate "\n" lns + +offsetOf :: D -> Int +offsetOf (Text o _) = o +offsetOf (Block w _) = w +offsetOf BreakingSpace = 1 +offsetOf _ = 0 + +-- | A literal string. +text :: String -> Doc +text = Doc . toChunks + where toChunks :: String -> DList D + toChunks [] = mempty + toChunks s = case break (=='\n') s of + ([], _:ys) -> NewLine `cons` toChunks ys + (xs, _:ys) -> Text (length xs) xs `cons` + NewLine `cons` toChunks ys + (xs, []) -> singleton $ Text (length xs) xs + +-- | A character. +char :: Char -> Doc +char c = text [c] + +-- | A breaking (reflowable) space. +space :: Doc +space = Doc $ singleton BreakingSpace + +-- | A carriage return. Does nothing if we're at the beginning of +-- a line; otherwise inserts a newline. +cr :: Doc +cr = Doc $ singleton CarriageReturn + +-- | Inserts a blank line unless one exists already. +-- (@blankline <> blankline@ has the same effect as @blankline@. +-- If you want multiple blank lines, use @text "\\n\\n"@. +blankline :: Doc +blankline = Doc $ singleton BlankLine + +-- | Uses the specified string as a prefix for every line of +-- the inside document (except the first, if not at the beginning +-- of the line). +prefixed :: String -> Doc -> Doc +prefixed pref doc = Doc $ singleton $ Prefixed pref doc + +-- | Makes a 'Doc' flush against the left margin. +flush :: Doc -> Doc +flush doc = Doc $ singleton $ Flush doc + +-- | Indents a 'Doc' by the specified number of spaces. +nest :: Int -> Doc -> Doc +nest ind = prefixed (replicate ind ' ') + +-- | A hanging indent. @hang ind start doc@ prints @start@, +-- then @doc@, leaving an indent of @ind@ spaces on every +-- line but the first. +hang :: Int -> Doc -> Doc -> Doc +hang ind start doc = start <> nest ind doc + +-- | Makes a 'Doc' non-reflowable. +nowrap :: Doc -> Doc +nowrap doc = Doc $ fromList $ map replaceSpace $ toList $ unDoc doc + where replaceSpace BreakingSpace = Text 1 " " + replaceSpace x = x + +-- | Returns the width of a 'Doc'. +offset :: Doc -> Int +offset d = case map length . lines . render Nothing $ d of + [] -> 0 + os -> maximum os + +block :: (String -> String) -> Int -> Doc -> Doc +block filler width = Doc . singleton . Block width . + map filler . chop width . render (Just width) + +-- | @lblock n d@ is a block of width @n@ characters, with +-- text derived from @d@ and aligned to the left. +lblock :: Int -> Doc -> Doc +lblock = block id + +-- | Like 'lblock' but aligned to the right. +rblock :: Int -> Doc -> Doc +rblock w = block (\s -> replicate (w - length s) ' ' ++ s) w + +-- | Like 'lblock' but aligned centered. +cblock :: Int -> Doc -> Doc +cblock w = block (\s -> replicate ((w - length s) `div` 2) ' ' ++ s) w + +-- | Returns the height of a block or other 'Doc'. +height :: Doc -> Int +height = length . lines . render Nothing + +chop :: Int -> String -> [String] +chop _ [] = [] +chop n cs = case break (=='\n') cs of + (xs, ys) -> if len <= n + then case ys of + [] -> [xs] + (_:[]) -> [xs, ""] + (_:zs) -> xs : chop n zs + else take n xs : chop n (drop n xs ++ ys) + where len = length xs + +-- | Encloses a 'Doc' inside a start and end 'Doc'. +inside :: Doc -> Doc -> Doc -> Doc +inside start end contents = + start <> contents <> end + +-- | Puts a 'Doc' in curly braces. +braces :: Doc -> Doc +braces = inside (char '{') (char '}') + +-- | Puts a 'Doc' in square brackets. +brackets :: Doc -> Doc +brackets = inside (char '[') (char ']') + +-- | Puts a 'Doc' in parentheses. +parens :: Doc -> Doc +parens = inside (char '(') (char ')') + +-- | Wraps a 'Doc' in single quotes. +quotes :: Doc -> Doc +quotes = inside (char '\'') (char '\'') + +-- | Wraps a 'Doc' in double quotes. +doubleQuotes :: Doc -> Doc +doubleQuotes = inside (char '"') (char '"') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f47309d3f..18e3113d3 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -27,43 +27,397 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of HTML to 'Pandoc' document. -} -module Text.Pandoc.Readers.HTML ( - readHtml, - rawHtmlInline, - rawHtmlBlock, - anyHtmlBlockTag, - anyHtmlInlineTag, - anyHtmlTag, - anyHtmlEndTag, - htmlEndTag, - extractTagType, - htmlBlockElement, - htmlComment, - unsanitaryURI +module Text.Pandoc.Readers.HTML ( readHtml + , htmlTag + , htmlInBalanced + , isInlineTag + , isBlockTag + , isTextTag + , isCommentTag ) where import Text.ParserCombinators.Parsec +import Text.ParserCombinators.Parsec.Pos +import Text.HTML.TagSoup +import Text.HTML.TagSoup.Match import Text.Pandoc.Definition +import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Shared import Text.Pandoc.Parsing -import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) -import Data.Maybe ( fromMaybe ) -import Data.List ( isPrefixOf, isSuffixOf, intercalate ) -import Data.Char ( toLower, isAlphaNum ) -import Network.URI ( parseURIReference, URI (..) ) -import Control.Monad ( liftM, when ) +import Data.Maybe ( fromMaybe, isJust ) +import Data.List ( intercalate ) +import Data.Char ( isSpace, isDigit ) +import Control.Monad ( liftM, guard ) -- | Convert HTML-formatted string to 'Pandoc' document. readHtml :: ParserState -- ^ Parser state -> String -- ^ String to parse (assumes @'\n'@ line endings) -> Pandoc -readHtml = readWith parseHtml +readHtml st inp = Pandoc meta blocks + where blocks = readWith parseBody st rest + tags = canonicalizeTags $ + parseTagsOptions parseOptions{ optTagPosition = True } inp + hasHeader = any (~== TagOpen "head" []) tags + (meta, rest) = if hasHeader + then parseHeader tags + else (Meta [] [] [], tags) + +type TagParser = GenParser (Tag String) ParserState + +-- TODO - fix this - not every header has a title tag +parseHeader :: [Tag String] -> (Meta, [Tag String]) +parseHeader tags = (Meta{docTitle = tit'', docAuthors = [], docDate = []}, rest) + where (tit,_) = break (~== TagClose "title") $ drop 1 $ + dropWhile (\t -> not $ t ~== TagOpen "title" []) tags + tit' = concatMap fromTagText $ filter isTagText tit + tit'' = normalizeSpaces $ toList $ text tit' + rest = drop 1 $ dropWhile (\t -> not $ t ~== TagClose "head" || + t ~== TagOpen "body" []) tags + +parseBody :: TagParser [Block] +parseBody = liftM concat $ manyTill block eof + +block :: TagParser [Block] +block = choice + [ pPara + , pHeader + , pBlockQuote + , pCodeBlock + , pList + , pHrule + , pSimpleTable + , pPlain + , pRawHtmlBlock + ] + +renderTags' :: [Tag String] -> String +renderTags' = renderTagsOptions + renderOptions{ optMinimize = (`elem` ["hr","br","img"]) } + +pList :: TagParser [Block] +pList = pBulletList <|> pOrderedList <|> pDefinitionList + +pBulletList :: TagParser [Block] +pBulletList = try $ do + pSatisfy (~== TagOpen "ul" []) + let nonItem = pSatisfy (\t -> + not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && + not (t ~== TagClose "ul")) + -- note: if they have an <ol> or <ul> not in scope of a <li>, + -- treat it as a list item, though it's not valid xhtml... + skipMany nonItem + items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ul") + return [BulletList items] + +pOrderedList :: TagParser [Block] +pOrderedList = try $ do + TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) + st <- getState + let (start, style) = if stateStrict st + then (1, DefaultStyle) + else (sta', sty') + where sta = fromMaybe "1" $ + lookup "start" attribs + sta' = if all isDigit sta + then read sta + else 1 + sty = fromMaybe (fromMaybe "" $ + lookup "style" attribs) $ + lookup "class" attribs + sty' = case sty of + "lower-roman" -> LowerRoman + "upper-roman" -> UpperRoman + "lower-alpha" -> LowerAlpha + "upper-alpha" -> UpperAlpha + "decimal" -> Decimal + _ -> DefaultStyle + let nonItem = pSatisfy (\t -> + not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && + not (t ~== TagClose "ol")) + -- note: if they have an <ol> or <ul> not in scope of a <li>, + -- treat it as a list item, though it's not valid xhtml... + skipMany nonItem + items <- manyTill (pInTags "li" block >>~ skipMany nonItem) (pCloses "ol") + return [OrderedList (start, style, DefaultDelim) items] + +pDefinitionList :: TagParser [Block] +pDefinitionList = try $ do + pSatisfy (~== TagOpen "dl" []) + items <- manyTill pDefListItem (pCloses "dl") + return [DefinitionList items] + +pDefListItem :: TagParser ([Inline],[[Block]]) +pDefListItem = try $ do + let nonItem = pSatisfy (\t -> not (t ~== TagOpen "dt" []) && + not (t ~== TagOpen "dd" []) && not (t ~== TagClose "dl")) + terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) + defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) + skipMany nonItem + let term = intercalate [LineBreak] terms + return (term, defs) + +pRawTag :: TagParser String +pRawTag = do + tag <- pAnyTag + let ignorable x = x `elem` ["html","head","body"] + if tagOpen ignorable (const True) tag || tagClose ignorable tag + then return [] + else return $ renderTags' [tag] + +pRawHtmlBlock :: TagParser [Block] +pRawHtmlBlock = do + raw <- pHtmlBlock "script" <|> pHtmlBlock "style" <|> pRawTag + state <- getState + if stateParseRaw state && not (null raw) + then return [RawBlock "html" raw] + else return [] + +pHtmlBlock :: String -> TagParser String +pHtmlBlock t = try $ do + open <- pSatisfy (~== TagOpen t []) + contents <- manyTill pAnyTag (pSatisfy (~== TagClose t)) + return $ renderTags' $ [open] ++ contents ++ [TagClose t] + +pHeader :: TagParser [Block] +pHeader = try $ do + TagOpen tagtype attr <- pSatisfy $ + tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) + (const True) + let bodyTitle = TagOpen tagtype attr ~== TagOpen "h1" [("class","title")] + let level = read (drop 1 tagtype) + contents <- liftM concat $ manyTill inline (pCloses tagtype <|> eof) + return $ if bodyTitle + then [] -- skip a representation of the title in the body + else [Header level $ normalizeSpaces contents] + +pHrule :: TagParser [Block] +pHrule = do + pSelfClosing (=="hr") (const True) + return [HorizontalRule] + +pSimpleTable :: TagParser [Block] +pSimpleTable = try $ do + TagOpen _ _ <- pSatisfy (~== TagOpen "table" []) + skipMany pBlank + head' <- option [] $ pInTags "th" pTd + rows <- many1 $ try $ + skipMany pBlank >> pInTags "tr" pTd + skipMany pBlank + TagClose _ <- pSatisfy (~== TagClose "table") + let cols = maximum $ map length rows + let aligns = replicate cols AlignLeft + let widths = replicate cols 0 + return [Table [] aligns widths head' rows] + +pTd :: TagParser [TableCell] +pTd = try $ do + skipMany pBlank + res <- pInTags "td" pPlain + skipMany pBlank + return [res] + +pBlockQuote :: TagParser [Block] +pBlockQuote = do + contents <- pInTags "blockquote" block + return [BlockQuote contents] + +pPlain :: TagParser [Block] +pPlain = do + contents <- liftM (normalizeSpaces . concat) $ many1 inline + if null contents + then return [] + else return [Plain contents] + +pPara :: TagParser [Block] +pPara = do + contents <- pInTags "p" inline + return [Para $ normalizeSpaces contents] + +pCodeBlock :: TagParser [Block] +pCodeBlock = try $ do + TagOpen _ attr <- pSatisfy (~== TagOpen "pre" []) + contents <- manyTill pAnyTag (pCloses "pre" <|> eof) + let rawText = concatMap fromTagText $ filter isTagText contents + -- drop leading newline if any + let result' = case rawText of + '\n':xs -> xs + _ -> rawText + -- drop trailing newline if any + let result = case reverse result' of + '\n':_ -> init result' + _ -> result' + let attribsId = fromMaybe "" $ lookup "id" attr + let attribsClasses = words $ fromMaybe "" $ lookup "class" attr + let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr + st <- getState + let attribs = if stateStrict st + then ("",[],[]) + else (attribsId, attribsClasses, attribsKV) + return [CodeBlock attribs result] + +inline :: TagParser [Inline] +inline = choice + [ pTagText + , pEmph + , pStrong + , pSuperscript + , pSubscript + , pStrikeout + , pLineBreak + , pLink + , pImage + , pCode + , pRawHtmlInline + ] + +pLocation :: TagParser () +pLocation = do + (TagPosition r c) <- pSat isTagPosition + setPosition $ newPos "input" r c + +pSat :: (Tag String -> Bool) -> TagParser (Tag String) +pSat f = do + pos <- getPosition + token show (const pos) (\x -> if f x then Just x else Nothing) + +pSatisfy :: (Tag String -> Bool) -> TagParser (Tag String) +pSatisfy f = try $ optional pLocation >> pSat f + +pAnyTag :: TagParser (Tag String) +pAnyTag = pSatisfy (const True) + +pSelfClosing :: (String -> Bool) -> ([Attribute String] -> Bool) + -> TagParser (Tag String) +pSelfClosing f g = do + open <- pSatisfy (tagOpen f g) + optional $ pSatisfy (tagClose f) + return open + +pEmph :: TagParser [Inline] +pEmph = pInlinesInTags "em" Emph <|> pInlinesInTags "i" Emph + +pStrong :: TagParser [Inline] +pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong + +pSuperscript :: TagParser [Inline] +pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript + +pSubscript :: TagParser [Inline] +pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript + +pStrikeout :: TagParser [Inline] +pStrikeout = do + failIfStrict + pInlinesInTags "s" Strikeout <|> + pInlinesInTags "strike" Strikeout <|> + pInlinesInTags "del" Strikeout <|> + try (do pSatisfy (~== TagOpen "span" [("class","strikeout")]) + contents <- liftM concat $ manyTill inline (pCloses "span") + return [Strikeout contents]) + +pLineBreak :: TagParser [Inline] +pLineBreak = do + pSelfClosing (=="br") (const True) + return [LineBreak] + +pLink :: TagParser [Inline] +pLink = try $ do + tag <- pSatisfy (tagOpenLit "a" (isJust . lookup "href")) + let url = fromAttrib "href" tag + let title = fromAttrib "title" tag + lab <- liftM concat $ manyTill inline (pCloses "a") + return [Link (normalizeSpaces lab) (escapeURI url, title)] + +pImage :: TagParser [Inline] +pImage = do + tag <- pSelfClosing (=="img") (isJust . lookup "src") + let url = fromAttrib "src" tag + let title = fromAttrib "title" tag + let alt = fromAttrib "alt" tag + return [Image (toList $ text alt) (escapeURI url, title)] + +pCode :: TagParser [Inline] +pCode = try $ do + (TagOpen open attr) <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) + result <- manyTill pAnyTag (pCloses open) + let ident = fromMaybe "" $ lookup "id" attr + let classes = words $ fromMaybe [] $ lookup "class" attr + let rest = filter (\(x,_) -> x /= "id" && x /= "class") attr + return [Code (ident,classes,rest) + $ intercalate " " $ lines $ innerText result] + +pRawHtmlInline :: TagParser [Inline] +pRawHtmlInline = do + result <- pSatisfy (tagComment (const True)) <|> pSatisfy isInlineTag + state <- getState + if stateParseRaw state + then return [RawInline "html" $ renderTags' [result]] + else return [] + +pInlinesInTags :: String -> ([Inline] -> Inline) + -> TagParser [Inline] +pInlinesInTags tagtype f = do + contents <- pInTags tagtype inline + return [f contents] + +pInTags :: String -> TagParser [a] + -> TagParser [a] +pInTags tagtype parser = try $ do + pSatisfy (~== TagOpen tagtype []) + liftM concat $ manyTill parser (pCloses tagtype <|> eof) + +pCloses :: String -> TagParser () +pCloses tagtype = try $ do + t <- lookAhead $ pSatisfy $ \tag -> isTagClose tag || isTagOpen tag + case t of + (TagClose t') | t' == tagtype -> pAnyTag >> return () + (TagOpen t' _) | t' `closes` tagtype -> return () + (TagClose "ul") | tagtype == "li" -> return () + (TagClose "ol") | tagtype == "li" -> return () + (TagClose "dl") | tagtype == "li" -> return () + _ -> pzero + +pTagText :: TagParser [Inline] +pTagText = try $ do + (TagText str) <- pSatisfy isTagText + st <- getState + case runParser (many pTagContents) st "text" str of + Left _ -> fail $ "Could not parse `" ++ str ++ "'" + Right result -> return result + +pBlank :: TagParser () +pBlank = try $ do + (TagText str) <- pSatisfy isTagText + guard $ all isSpace str + +pTagContents :: GenParser Char ParserState Inline +pTagContents = pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol + +pStr :: GenParser Char ParserState Inline +pStr = liftM Str $ many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) + +isSpecial :: Char -> Bool +isSpecial '"' = True +isSpecial '\'' = True +isSpecial '.' = True +isSpecial '-' = True +isSpecial '\8216' = True +isSpecial '\8217' = True +isSpecial '\8220' = True +isSpecial '\8221' = True +isSpecial _ = False + +pSymbol :: GenParser Char ParserState Inline +pSymbol = satisfy isSpecial >>= return . Str . (:[]) + +pSpace :: GenParser Char ParserState Inline +pSpace = many1 (satisfy isSpace) >> return Space -- -- Constants -- -eitherBlockOrInline :: [[Char]] +eitherBlockOrInline :: [String] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] @@ -76,57 +430,41 @@ inlineHtmlTags = ["a", "abbr", "acronym", "b", "basefont", "bdo", "big", "textarea", "tt", "u", "var"] -} -blockHtmlTags :: [[Char]] +blockHtmlTags :: [String] blockHtmlTags = ["address", "blockquote", "body", "center", "dir", "div", "dl", "fieldset", "form", "h1", "h2", "h3", "h4", - "h5", "h6", "head", "hr", "html", "isindex", "menu", "noframes", - "noscript", "ol", "p", "pre", "table", "ul", "dd", + "h5", "h6", "head", "hr", "html", "isindex", "menu", + "noframes", "noscript", "ol", "p", "pre", "table", "ul", "dd", "dt", "frameset", "li", "tbody", "td", "tfoot", "th", "thead", "tr", "script", "style"] -sanitaryTags :: [[Char]] -sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big", - "blockquote", "br", "button", "caption", "center", - "cite", "code", "col", "colgroup", "dd", "del", "dfn", - "dir", "div", "dl", "dt", "em", "fieldset", "font", - "form", "h1", "h2", "h3", "h4", "h5", "h6", "hr", - "i", "img", "input", "ins", "kbd", "label", "legend", - "li", "map", "menu", "ol", "optgroup", "option", "p", - "pre", "q", "s", "samp", "select", "small", "span", - "strike", "strong", "sub", "sup", "table", "tbody", - "td", "textarea", "tfoot", "th", "thead", "tr", "tt", - "u", "ul", "var"] - -sanitaryAttributes :: [[Char]] -sanitaryAttributes = ["abbr", "accept", "accept-charset", - "accesskey", "action", "align", "alt", "axis", - "border", "cellpadding", "cellspacing", "char", - "charoff", "charset", "checked", "cite", "class", - "clear", "cols", "colspan", "color", "compact", - "coords", "datetime", "dir", "disabled", - "enctype", "for", "frame", "headers", "height", - "href", "hreflang", "hspace", "id", "ismap", - "label", "lang", "longdesc", "maxlength", "media", - "method", "multiple", "name", "nohref", "noshade", - "nowrap", "prompt", "readonly", "rel", "rev", - "rows", "rowspan", "rules", "scope", "selected", - "shape", "size", "span", "src", "start", - "summary", "tabindex", "target", "title", "type", - "usemap", "valign", "value", "vspace", "width"] +isInlineTag :: Tag String -> Bool +isInlineTag t = tagOpen (`notElem` blockHtmlTags) (const True) t || + tagClose (`notElem` blockHtmlTags) t || + tagComment (const True) t + +isBlockTag :: Tag String -> Bool +isBlockTag t = tagOpen (`elem` blocktags) (const True) t || + tagClose (`elem` blocktags) t || + tagComment (const True) t + where blocktags = blockHtmlTags ++ eitherBlockOrInline + +isTextTag :: Tag String -> Bool +isTextTag = tagText (const True) + +isCommentTag :: Tag String -> Bool +isCommentTag = tagComment (const True) -- taken from HXT and extended closes :: String -> String -> Bool -"EOF" `closes` _ = True _ `closes` "body" = False _ `closes` "html" = False "a" `closes` "a" = True "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True -"td" `closes` t | t `elem` ["th","td"] = True "tr" `closes` t | t `elem` ["th","td","tr"] = True "dt" `closes` t | t `elem` ["dt","dd"] = True -"dd" `closes` t | t `elem` ["dt","dd"] = True "hr" `closes` "p" = True "p" `closes` "p" = True "meta" `closes` "meta" = True @@ -148,627 +486,34 @@ t1 `closes` t2 | t2 `notElem` (blockHtmlTags ++ eitherBlockOrInline) = True _ `closes` _ = False --- --- HTML utility functions --- - --- | Returns @True@ if sanitization is specified and the specified tag is --- not on the sanitized tag list. -unsanitaryTag :: [Char] - -> GenParser tok ParserState Bool -unsanitaryTag tag = do - st <- getState - return $ stateSanitizeHTML st && tag `notElem` sanitaryTags - --- | returns @True@ if sanitization is specified and the specified attribute --- is not on the sanitized attribute list. -unsanitaryAttribute :: ([Char], String, t) - -> GenParser tok ParserState Bool -unsanitaryAttribute (attr, val, _) = do - st <- getState - return $ stateSanitizeHTML st && - (attr `notElem` sanitaryAttributes || - (attr `elem` ["href","src"] && unsanitaryURI val)) - --- | Returns @True@ if the specified URI is potentially a security risk. -unsanitaryURI :: String -> Bool -unsanitaryURI u = - let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:", - "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", - "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", - "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", - "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", - "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", - "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", - "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", - "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", - "snews:", "webcal:", "ymsgr:"] - in case parseURIReference (escapeURI u) of - Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes - Nothing -> True - --- | Read blocks until end tag. -blocksTilEnd :: String -> GenParser Char ParserState [Block] -blocksTilEnd tag = do - blocks <- manyTill (block >>~ spaces) (htmlEndTag tag) - return $ filter (/= Null) blocks - --- | Read inlines until end tag. -inlinesTilEnd :: String -> GenParser Char ParserState [Inline] -inlinesTilEnd tag = manyTill inline (htmlEndTag tag) - --- | Parse blocks between open and close tag. -blocksIn :: String -> GenParser Char ParserState [Block] -blocksIn tag = try $ htmlOpenTag tag >> spaces >> blocksTilEnd tag - --- | Parse inlines between open and close tag. -inlinesIn :: String -> GenParser Char ParserState [Inline] -inlinesIn tag = try $ htmlOpenTag tag >> spaces >> inlinesTilEnd tag - --- | Extract type from a tag: e.g. @br@ from @\<br\>@ -extractTagType :: String -> String -extractTagType ('<':rest) = - let isSpaceOrSlash c = c `elem` "/ \n\t" in - map toLower $ takeWhile isAlphaNum $ dropWhile isSpaceOrSlash rest -extractTagType _ = "" - --- Parse any HTML tag (opening or self-closing) and return tag type -anyOpener :: GenParser Char ParserState [Char] -anyOpener = try $ do - char '<' - spaces - tag <- many1 alphaNum - skipMany htmlAttribute - spaces - option "" (string "/") - spaces - char '>' - return $ map toLower tag - --- | Parse any HTML tag (opening or self-closing) and return text of tag -anyHtmlTag :: GenParser Char ParserState [Char] -anyHtmlTag = try $ do - char '<' - spaces - tag <- many1 alphaNum - attribs <- many htmlAttribute - spaces - ender <- option "" (string "/") - let ender' = if null ender then "" else " /" - spaces - char '>' - let result = "<" ++ tag ++ - concatMap (\(_, _, raw) -> (' ':raw)) attribs ++ ender' ++ ">" - unsanitary <- unsanitaryTag tag - if unsanitary - then return $ "<!-- unsafe HTML removed -->" - else return result - -anyHtmlEndTag :: GenParser Char ParserState [Char] -anyHtmlEndTag = try $ do - char '<' - spaces - char '/' - spaces - tag <- many1 alphaNum - spaces - char '>' - let result = "</" ++ tag ++ ">" - unsanitary <- unsanitaryTag tag - if unsanitary - then return $ "<!-- unsafe HTML removed -->" - else return result - -htmlTag :: Bool - -> String - -> GenParser Char ParserState (String, [(String, String)]) -htmlTag selfClosing tag = try $ do - char '<' - spaces - stringAnyCase tag - attribs <- many htmlAttribute - spaces - -- note: we want to handle both HTML and XHTML, - -- so we don't require the / - when selfClosing $ optional $ char '/' >> spaces - char '>' - return (tag, (map (\(name, content, _) -> (name, content)) attribs)) - -htmlOpenTag :: String - -> GenParser Char ParserState (String, [(String, String)]) -htmlOpenTag = htmlTag False - -htmlCloseTag :: String - -> GenParser Char ParserState (String, [(String, String)]) -htmlCloseTag = htmlTag False . ('/':) - -htmlSelfClosingTag :: String - -> GenParser Char ParserState (String, [(String, String)]) -htmlSelfClosingTag = htmlTag True - --- parses a quoted html attribute value -quoted :: Char -> GenParser Char st (String, String) -quoted quoteChar = do - result <- between (char quoteChar) (char quoteChar) - (many (noneOf [quoteChar])) - return (result, [quoteChar]) - -nullAttribute :: ([Char], [Char], [Char]) -nullAttribute = ("", "", "") - -htmlAttribute :: GenParser Char ParserState ([Char], [Char], [Char]) -htmlAttribute = do - attr <- htmlRegularAttribute <|> htmlMinimizedAttribute - unsanitary <- unsanitaryAttribute attr - if unsanitary - then return nullAttribute - else return attr - --- minimized boolean attribute -htmlMinimizedAttribute :: GenParser Char st ([Char], [Char], [Char]) -htmlMinimizedAttribute = try $ do - many1 space - name <- many1 (choice [letter, oneOf ".-_:"]) - return (name, name, name) - -htmlRegularAttribute :: GenParser Char st ([Char], [Char], [Char]) -htmlRegularAttribute = try $ do - many1 space - name <- many1 (choice [letter, oneOf ".-_:"]) - spaces - char '=' - spaces - (content, quoteStr) <- choice [ (quoted '\''), - (quoted '"'), - (do - a <- many (noneOf " \t\n\r\"'<>") - return (a,"")) ] - return (name, content, - (name ++ "=" ++ quoteStr ++ content ++ quoteStr)) - --- | Parse an end tag of type 'tag' -htmlEndTag :: [Char] -> GenParser Char ParserState [Char] -htmlEndTag tag = try $ do - closedByNext <- lookAhead $ option False $ liftM (`closes` tag) $ - anyOpener <|> (eof >> return "EOF") - if closedByNext - then return "" - else do char '<' - spaces - char '/' - spaces - stringAnyCase tag - spaces - char '>' - return $ "</" ++ tag ++ ">" - --- | Returns @True@ if the tag is (or can be) a block tag. -isBlock :: String -> Bool -isBlock tag = (extractTagType tag) `elem` (blockHtmlTags ++ eitherBlockOrInline) - -anyHtmlBlockTag :: GenParser Char ParserState [Char] -anyHtmlBlockTag = try $ do - tag <- anyHtmlTag <|> anyHtmlEndTag - if isBlock tag then return tag else fail "not a block tag" - -anyHtmlInlineTag :: GenParser Char ParserState [Char] -anyHtmlInlineTag = try $ do - tag <- anyHtmlTag <|> anyHtmlEndTag - if not (isBlock tag) then return tag else fail "not an inline tag" - --- | Parses material between script tags. --- Scripts must be treated differently, because they can contain '<>' etc. -htmlScript :: GenParser Char ParserState [Char] -htmlScript = try $ do - lookAhead $ htmlOpenTag "script" - open <- anyHtmlTag - rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script") - st <- getState - if stateSanitizeHTML st && not ("script" `elem` sanitaryTags) - then return "<!-- unsafe HTML removed -->" - else return $ open ++ rest ++ "</script>" - -scriptChunk :: GenParser Char ParserState [Char] -scriptChunk = jsComment <|> jsString <|> jsChars - where jsComment = jsEndlineComment <|> jsMultilineComment - jsString = jsSingleQuoteString <|> jsDoubleQuoteString - jsChars = many1 (noneOf "<\"'*/") <|> count 1 anyChar - jsEndlineComment = try $ do - string "//" - res <- manyTill anyChar newline - return ("//" ++ res) - jsMultilineComment = try $ do - string "/*" - res <- manyTill anyChar (try $ string "*/") - return ("/*" ++ res ++ "*/") - jsSingleQuoteString = stringwith '\'' - jsDoubleQuoteString = stringwith '"' - charWithEsc escapable = try $ - (try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x]) - <|> count 1 anyChar - stringwith c = try $ do - char c - res <- liftM concat $ manyTill (charWithEsc [c]) (char c) - return (c : (res ++ [c])) - --- | Parses material between style tags. --- Style tags must be treated differently, because they can contain CSS -htmlStyle :: GenParser Char ParserState [Char] -htmlStyle = try $ do - lookAhead $ htmlOpenTag "style" - open <- anyHtmlTag - rest <- manyTill anyChar (htmlEndTag "style") - st <- getState - if stateSanitizeHTML st && not ("style" `elem` sanitaryTags) - then return "<!-- unsafe HTML removed -->" - else return $ open ++ rest ++ "</style>" - -htmlBlockElement :: GenParser Char ParserState [Char] -htmlBlockElement = choice [ htmlScript, htmlStyle, htmlComment, xmlDec, definition ] - -rawHtmlBlock :: GenParser Char ParserState Block -rawHtmlBlock = try $ do - body <- htmlBlockElement <|> rawVerbatimBlock <|> anyHtmlBlockTag - state <- getState - if stateParseRaw state then return (RawHtml body) else return Null - --- This is a block whose contents should be passed through verbatim, not interpreted. -rawVerbatimBlock :: GenParser Char ParserState [Char] -rawVerbatimBlock = try $ do - start <- anyHtmlBlockTag - let tagtype = extractTagType start - if tagtype `elem` ["pre"] - then do - contents <- many (notFollowedBy' (htmlEndTag tagtype) >> anyChar) - end <- htmlEndTag tagtype - return $ start ++ contents ++ end - else fail "Not a verbatim block" - --- We don't want to parse </body> or </html> as raw HTML, since these --- are handled in parseHtml. -rawHtmlBlock' :: GenParser Char ParserState Block -rawHtmlBlock' = do notFollowedBy' (htmlCloseTag "body" <|> - htmlCloseTag "html") - rawHtmlBlock - --- | Parses an HTML comment. -htmlComment :: GenParser Char st [Char] -htmlComment = try $ do - string "<!--" - comment <- many $ noneOf "-" - <|> try (char '-' >>~ notFollowedBy (try (char '-' >> char '>'))) - string "-->" - return $ "<!--" ++ comment ++ "-->" - --- --- parsing documents --- - -xmlDec :: GenParser Char st [Char] -xmlDec = try $ do - string "<?" - rest <- manyTill anyChar (char '>') - return $ "<?" ++ rest ++ ">" - -definition :: GenParser Char st [Char] -definition = try $ do - string "<!" - rest <- manyTill anyChar (char '>') - return $ "<!" ++ rest ++ ">" - -nonTitleNonHead :: GenParser Char ParserState Char -nonTitleNonHead = try $ do - notFollowedBy $ (htmlOpenTag "title" >> return ' ') <|> - (htmlEndTag "head" >> return ' ') - (rawHtmlBlock >> return ' ') <|> anyChar - -parseTitle :: GenParser Char ParserState [Inline] -parseTitle = try $ do - (tag, _) <- htmlOpenTag "title" - contents <- inlinesTilEnd tag - spaces - return contents - --- parse header and return meta-information (for now, just title) -parseHead :: GenParser Char ParserState Meta -parseHead = try $ do - htmlOpenTag "head" - spaces - skipMany nonTitleNonHead - contents <- option [] parseTitle - skipMany nonTitleNonHead - htmlEndTag "head" - return $ Meta contents [] [] - --- h1 class="title" representation of title in body -bodyTitle :: GenParser Char ParserState [Inline] -bodyTitle = try $ do - (_, attribs) <- htmlOpenTag "h1" - case (extractAttribute "class" attribs) of - Just "title" -> return "" - _ -> fail "not title" - inlinesTilEnd "h1" - -endOfDoc :: GenParser Char ParserState () -endOfDoc = try $ do - spaces - optional (htmlEndTag "body") - spaces - optional (htmlEndTag "html" >> many anyChar) -- ignore stuff after </html> - eof - -parseHtml :: GenParser Char ParserState Pandoc -parseHtml = do - sepEndBy (choice [xmlDec, definition, htmlComment]) spaces - spaces - optional $ htmlOpenTag "html" - spaces - meta <- option (Meta [] [] []) parseHead - spaces - optional $ htmlOpenTag "body" - spaces - optional bodyTitle -- skip title in body, because it's represented in meta - blocks <- parseBlocks - endOfDoc - return $ Pandoc meta blocks - --- --- parsing blocks --- - -parseBlocks :: GenParser Char ParserState [Block] -parseBlocks = spaces >> sepEndBy block spaces >>= (return . filter (/= Null)) - -block :: GenParser Char ParserState Block -block = choice [ codeBlock - , header - , hrule - , list - , blockQuote - , para - , plain - , rawHtmlBlock' - , notFollowedBy' endOfDoc >> char '<' >> return Null - ] <?> "block" - --- --- header blocks --- - -header :: GenParser Char ParserState Block -header = choice (map headerLevel (enumFromTo 1 5)) <?> "header" - -headerLevel :: Int -> GenParser Char ParserState Block -headerLevel n = try $ do - let level = "h" ++ show n - htmlOpenTag level - contents <- inlinesTilEnd level - return $ Header n (normalizeSpaces contents) - --- --- hrule block --- - -hrule :: GenParser Char ParserState Block -hrule = try $ do - (_, attribs) <- htmlSelfClosingTag "hr" - state <- getState - if not (null attribs) && stateParseRaw state - then unexpected "attributes in hr" -- parse as raw in this case - else return HorizontalRule - --- --- code blocks --- - --- Note: HTML tags in code blocks (e.g. for syntax highlighting) are --- skipped, because they are not portable to output formats other than HTML. -codeBlock :: GenParser Char ParserState Block -codeBlock = try $ do - htmlOpenTag "pre" - result <- manyTill - (many1 (satisfy (/= '<')) <|> - ((anyHtmlTag <|> anyHtmlEndTag) >> return "")) - (htmlEndTag "pre") - let result' = concat result - -- drop leading newline if any - let result'' = if "\n" `isPrefixOf` result' - then drop 1 result' - else result' - -- drop trailing newline if any - let result''' = if "\n" `isSuffixOf` result'' - then init result'' - else result'' - return $ CodeBlock ("",[],[]) $ decodeCharacterReferences result''' - --- --- block quotes --- - -blockQuote :: GenParser Char ParserState Block -blockQuote = try $ htmlOpenTag "blockquote" >> spaces >> - blocksTilEnd "blockquote" >>= (return . BlockQuote) - --- --- list blocks --- - -list :: GenParser Char ParserState Block -list = choice [ bulletList, orderedList, definitionList ] <?> "list" - -orderedList :: GenParser Char ParserState Block -orderedList = try $ do - (_, attribs) <- htmlOpenTag "ol" - (start, style) <- option (1, DefaultStyle) $ - do failIfStrict - let sta = fromMaybe "1" $ - lookup "start" attribs - let sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - let sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> DefaultStyle - return (read sta, sty') - spaces - -- note: if they have an <ol> or <ul> not in scope of a <li>, - -- treat it as a list item, though it's not valid xhtml... - items <- sepEndBy1 (blocksIn "li" <|> liftM (:[]) list) spaces - htmlEndTag "ol" - return $ OrderedList (start, style, DefaultDelim) items - -bulletList :: GenParser Char ParserState Block -bulletList = try $ do - htmlOpenTag "ul" - spaces - -- note: if they have an <ol> or <ul> not in scope of a <li>, - -- treat it as a list item, though it's not valid xhtml... - items <- sepEndBy1 (blocksIn "li" <|> liftM (:[]) list) spaces - htmlEndTag "ul" - return $ BulletList items - -definitionList :: GenParser Char ParserState Block -definitionList = try $ do - failIfStrict -- def lists not part of standard markdown - htmlOpenTag "dl" - spaces - items <- sepEndBy1 definitionListItem spaces - htmlEndTag "dl" - return $ DefinitionList items - -definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) -definitionListItem = try $ do - terms <- sepEndBy1 (inlinesIn "dt") spaces - defs <- sepEndBy1 (blocksIn "dd") spaces - let term = intercalate [LineBreak] terms - return (term, defs) - --- --- paragraph block --- - -para :: GenParser Char ParserState Block -para = try $ htmlOpenTag "p" >> inlinesTilEnd "p" >>= - return . Para . normalizeSpaces - --- --- plain block --- - -plain :: GenParser Char ParserState Block -plain = many1 inline >>= return . Plain . normalizeSpaces - --- --- inline --- - -inline :: GenParser Char ParserState Inline -inline = choice [ charRef - , strong - , emph - , superscript - , subscript - , strikeout - , spanStrikeout - , code - , str - , linebreak - , whitespace - , link - , image - , rawHtmlInline - , char '&' >> return (Str "&") -- common HTML error - ] <?> "inline" - -code :: GenParser Char ParserState Inline -code = try $ do - result <- (htmlOpenTag "code" >> manyTill anyChar (htmlEndTag "code")) - <|> (htmlOpenTag "tt" >> manyTill anyChar (htmlEndTag "tt")) - -- remove internal line breaks, leading and trailing space, - -- and decode character references - return $ Code $ decodeCharacterReferences $ removeLeadingTrailingSpace $ - intercalate " " $ lines result - -rawHtmlInline :: GenParser Char ParserState Inline -rawHtmlInline = do - result <- anyHtmlInlineTag <|> htmlComment - state <- getState - if stateParseRaw state then return (HtmlInline result) else return (Str "") - -betweenTags :: [Char] -> GenParser Char ParserState [Inline] -betweenTags tag = try $ htmlOpenTag tag >> inlinesTilEnd tag >>= - return . normalizeSpaces - -emph :: GenParser Char ParserState Inline -emph = (betweenTags "em" <|> betweenTags "i") >>= return . Emph - -strong :: GenParser Char ParserState Inline -strong = (betweenTags "b" <|> betweenTags "strong") >>= return . Strong - -superscript :: GenParser Char ParserState Inline -superscript = failIfStrict >> betweenTags "sup" >>= return . Superscript - -subscript :: GenParser Char ParserState Inline -subscript = failIfStrict >> betweenTags "sub" >>= return . Subscript - -strikeout :: GenParser Char ParserState Inline -strikeout = failIfStrict >> (betweenTags "s" <|> betweenTags "strike") >>= - return . Strikeout - -spanStrikeout :: GenParser Char ParserState Inline -spanStrikeout = try $ do - failIfStrict -- strict markdown has no strikeout, so treat as raw HTML - (_, attributes) <- htmlOpenTag "span" - result <- case (extractAttribute "class" attributes) of - Just "strikeout" -> inlinesTilEnd "span" - _ -> fail "not a strikeout" - return $ Strikeout result - -whitespace :: GenParser Char st Inline -whitespace = many1 space >> return Space - --- hard line break -linebreak :: GenParser Char ParserState Inline -linebreak = htmlSelfClosingTag "br" >> optional newline >> return LineBreak - -str :: GenParser Char st Inline -str = many1 (noneOf "< \t\n&") >>= return . Str - --- --- links and images --- - --- extract contents of attribute (attribute names are case-insensitive) -extractAttribute :: [Char] -> [([Char], String)] -> Maybe String -extractAttribute _ [] = Nothing -extractAttribute name ((attrName, contents):rest) = - let name' = map toLower name - attrName' = map toLower attrName - in if attrName' == name' - then Just (decodeCharacterReferences contents) - else extractAttribute name rest - -link :: GenParser Char ParserState Inline -link = try $ do - (_, attributes) <- htmlOpenTag "a" - url <- case (extractAttribute "href" attributes) of - Just url -> return url - Nothing -> fail "no href" - let title = fromMaybe "" $ extractAttribute "title" attributes - lab <- inlinesTilEnd "a" - return $ Link (normalizeSpaces lab) (escapeURI url, title) - -image :: GenParser Char ParserState Inline -image = try $ do - (_, attributes) <- htmlSelfClosingTag "img" - url <- case (extractAttribute "src" attributes) of - Just url -> return url - Nothing -> fail "no src" - let title = fromMaybe "" $ extractAttribute "title" attributes - let alt = fromMaybe "" (extractAttribute "alt" attributes) - return $ Image [Str alt] (escapeURI url, title) - +--- parsers for use in markdown, textile readers + +-- | Matches a stretch of HTML in balanced tags. +htmlInBalanced :: (Tag String -> Bool) -> GenParser Char ParserState String +htmlInBalanced f = try $ do + (TagOpen t _, tag) <- htmlTag f + guard $ '/' `notElem` tag -- not a self-closing tag + let nonTagChunk = many1 $ satisfy (/= '<') + let stopper = htmlTag (~== TagClose t) + let anytag = liftM snd $ htmlTag (const True) + contents <- many $ notFollowedBy' stopper >> + (nonTagChunk <|> htmlInBalanced (const True) <|> anytag) + endtag <- liftM snd stopper + return $ tag ++ concat contents ++ endtag + +-- | Matches a tag meeting a certain condition. +htmlTag :: (Tag String -> Bool) -> GenParser Char ParserState (Tag String, String) +htmlTag f = try $ do + lookAhead (char '<') + (next : _) <- getInput >>= return . canonicalizeTags . parseTags + guard $ f next + -- advance the parser + case next of + TagComment s -> do + count (length s + 4) anyChar + skipMany (satisfy (/='>')) + char '>' + return (next, "<!--" ++ s ++ "-->") + _ -> do + rendered <- manyTill anyChar (char '>') + return (next, rendered ++ ">") diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 406809dfc..dca745b56 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,9 +38,9 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe ) -import Data.Char ( chr ) -import Data.List ( isPrefixOf, isSuffixOf ) -import Control.Monad ( when ) +import Data.Char ( chr, toUpper ) +import Data.List ( intercalate, isPrefixOf, isSuffixOf ) +import Control.Monad -- | Parse LaTeX from string and return 'Pandoc' document. readLaTeX :: ParserState -- ^ Parser state, including options for parser @@ -50,7 +50,7 @@ readLaTeX = readWith parseLaTeX -- characters with special meaning specialChars :: [Char] -specialChars = "\\`$%^&_~#{}\n \t|<>'\"-" +specialChars = "\\`$%^&_~#{}[]\n \t|<>'\"-" -- -- utility functions @@ -64,7 +64,7 @@ bracketedText openB closeB = do -- | Returns an option or argument of a LaTeX command. optOrArg :: GenParser Char st [Char] -optOrArg = bracketedText '{' '}' <|> bracketedText '[' ']' +optOrArg = try $ spaces >> (bracketedText '{' '}' <|> bracketedText '[' ']') -- | True if the string begins with '{'. isArg :: [Char] -> Bool @@ -86,14 +86,22 @@ command = do begin :: [Char] -> GenParser Char st [Char] begin name = try $ do - string $ "\\begin{" ++ name ++ "}" + string "\\begin" + spaces + char '{' + string name + char '}' optional commandArgs spaces return name end :: [Char] -> GenParser Char st [Char] end name = try $ do - string $ "\\end{" ++ name ++ "}" + string "\\end" + spaces + char '{' + string name + char '}' return name -- | Returns a list of block elements containing the contents of an @@ -103,7 +111,9 @@ environment name = try $ begin name >> spaces >> manyTill block (end name) >>~ s anyEnvironment :: GenParser Char ParserState Block anyEnvironment = try $ do - string "\\begin{" + string "\\begin" + spaces + char '{' name <- many letter star <- option "" (string "*") -- some environments have starred variants char '}' @@ -119,22 +129,17 @@ anyEnvironment = try $ do -- | Process LaTeX preamble, extracting metadata. processLaTeXPreamble :: GenParser Char ParserState () -processLaTeXPreamble = try $ manyTill - (choice [bibliographic, comment, unknownCommand, nullBlock]) - (try (string "\\begin{document}")) >> - spaces +processLaTeXPreamble = do + try $ string "\\documentclass" + skipMany $ bibliographic <|> macro <|> commentBlock <|> skipChar -- | Parse LaTeX and return 'Pandoc'. parseLaTeX :: GenParser Char ParserState Pandoc parseLaTeX = do - optional processLaTeXPreamble -- preamble might not be present (fragment) - spaces - blocks <- parseBlocks spaces - optional $ try (string "\\end{document}" >> many anyChar) - -- might not be present (fragment) - spaces - eof + skipMany $ comment >> spaces + blocks <- try (processLaTeXPreamble >> environment "document") + <|> (many block >>~ (spaces >> eof)) state <- getState let blocks' = filter (/= Null) blocks let title' = stateTitle state @@ -155,13 +160,16 @@ block = choice [ hrule , header , list , blockQuote - , comment + , simpleTable + , commentBlock + , macro , bibliographic , para , itemBlock , unknownEnvironment , ignore - , unknownCommand ] <?> "block" + , unknownCommand + ] <?> "block" -- -- header blocks @@ -208,20 +216,77 @@ hrule :: GenParser Char st Block hrule = oneOfStrings [ "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n", "\\newpage" ] >> spaces >> return HorizontalRule +-- tables + +simpleTable :: GenParser Char ParserState Block +simpleTable = try $ do + string "\\begin" + spaces + string "{tabular}" + spaces + aligns <- parseAligns + let cols = length aligns + optional hline + header' <- option [] $ parseTableHeader cols + rows <- many (parseTableRow cols >>~ optional hline) + spaces + end "tabular" + spaces + let header'' = if null header' + then replicate cols [] + else header' + return $ Table [] aligns (replicate cols 0) header'' rows + +hline :: GenParser Char st () +hline = try $ spaces >> string "\\hline" >> return () + +parseAligns :: GenParser Char ParserState [Alignment] +parseAligns = try $ do + char '{' + optional $ char '|' + let cAlign = char 'c' >> return AlignCenter + let lAlign = char 'l' >> return AlignLeft + let rAlign = char 'r' >> return AlignRight + let alignChar = cAlign <|> lAlign <|> rAlign + aligns' <- sepEndBy alignChar (optional $ char '|') + char '}' + spaces + return aligns' + +parseTableHeader :: Int -- ^ number of columns + -> GenParser Char ParserState [TableCell] +parseTableHeader cols = try $ do + cells' <- parseTableRow cols + hline + return cells' + +parseTableRow :: Int -- ^ number of columns + -> GenParser Char ParserState [TableCell] +parseTableRow cols = try $ do + let tableCellInline = notFollowedBy (char '&' <|> + (try $ char '\\' >> char '\\')) >> inline + cells' <- sepBy (spaces >> liftM ((:[]) . Plain . normalizeSpaces) + (many tableCellInline)) (char '&') + guard $ length cells' == cols + spaces + (try $ string "\\\\" >> spaces) <|> + (lookAhead (end "tabular") >> return ()) + return cells' + -- -- code blocks -- codeBlock :: GenParser Char ParserState Block -codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> lhsCodeBlock +codeBlock = codeBlockWith "verbatim" <|> codeBlockWith "Verbatim" <|> codeBlockWith "lstlisting" <|> lhsCodeBlock -- Note: Verbatim is from fancyvrb. codeBlockWith :: String -> GenParser Char st Block codeBlockWith env = try $ do - string ("\\begin{" ++ env ++ "}") -- don't use begin function because it - -- gobbles whitespace - optional blanklines -- we want to gobble blank lines, but not - -- leading space + string "\\begin" + spaces -- don't use begin function because it + string $ "{" ++ env ++ "}" -- gobbles whitespace; we want to gobble + optional blanklines -- blank lines, but not leading space contents <- manyTill anyChar (try (string $ "\\end{" ++ env ++ "}")) spaces let classes = if env == "code" then ["haskell"] else [] @@ -265,7 +330,10 @@ listItem = try $ do orderedList :: GenParser Char ParserState Block orderedList = try $ do - string "\\begin{enumerate}" + string "\\begin" + spaces + string "{enumerate}" + spaces (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) $ try $ do failIfStrict char '[' @@ -293,7 +361,6 @@ orderedList = try $ do bulletList :: GenParser Char ParserState Block bulletList = try $ do begin "itemize" - spaces items <- many listItem end "itemize" spaces @@ -302,7 +369,6 @@ bulletList = try $ do definitionList :: GenParser Char ParserState Block definitionList = try $ do begin "description" - spaces items <- many listItem end "description" spaces @@ -342,7 +408,7 @@ authors :: GenParser Char ParserState Block authors = try $ do string "\\author{" raw <- many1 (notFollowedBy (char '}') >> inline) - let authors' = map normalizeSpaces $ splitBy LineBreak raw + let authors' = map normalizeSpaces $ splitBy (== LineBreak) raw char '}' spaces updateState (\s -> s { stateAuthors = authors' }) @@ -382,13 +448,15 @@ rawLaTeXEnvironment :: GenParser Char st Block rawLaTeXEnvironment = do contents <- rawLaTeXEnvironment' spaces - return $ Para [TeX contents] + return $ RawBlock "latex" contents -- | Parse any LaTeX environment and return a string containing -- the whole literal environment as raw TeX. rawLaTeXEnvironment' :: GenParser Char st String rawLaTeXEnvironment' = try $ do - string "\\begin{" + string "\\begin" + spaces + char '{' name <- many1 letter star <- option "" (string "*") -- for starred variants let name' = name ++ star @@ -418,31 +486,49 @@ ignore = try $ do spaces return Null +demacro :: (String, String, [String]) -> GenParser Char ParserState Inline +demacro (n,st,args) = try $ do + let raw = "\\" ++ n ++ st ++ concat args + s' <- applyMacros' raw + if raw == s' + then return $ RawInline "latex" raw + else do + inp <- getInput + setInput $ s' ++ inp + return $ Str "" + unknownCommand :: GenParser Char ParserState Block unknownCommand = try $ do - notFollowedBy' $ choice $ map end ["itemize", "enumerate", "description", - "document"] + spaces + notFollowedBy' $ oneOfStrings ["\\begin","\\end","\\item"] state <- getState when (stateParserContext state == ListItemState) $ notFollowedBy' (string "\\item") if stateParseRaw state - then do - (name, star, args) <- command - spaces - return $ Plain [TeX ("\\" ++ name ++ star ++ concat args)] + then command >>= demacro >>= return . Plain . (:[]) else do (name, _, args) <- command spaces - if name `elem` commandsToIgnore - then return Null - else return $ Plain [Str $ concat args] + unless (name `elem` commandsToIgnore) $ do + -- put arguments back in input to be parsed + inp <- getInput + setInput $ intercalate " " args ++ inp + return Null commandsToIgnore :: [String] -commandsToIgnore = ["special","pdfannot","pdfstringdef"] +commandsToIgnore = ["special","pdfannot","pdfstringdef", "index","bibliography"] + +skipChar :: GenParser Char ParserState Block +skipChar = do + satisfy (/='\\') <|> + (notFollowedBy' (try $ + string "\\begin" >> spaces >> string "{document}") >> + anyChar) + spaces + return Null --- latex comment -comment :: GenParser Char st Block -comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return Null +commentBlock :: GenParser Char st Block +commentBlock = many1 (comment >> spaces) >> return Null -- -- inline @@ -464,8 +550,6 @@ inline = choice [ str , strikeout , superscript , subscript - , ref - , lab , code , url , link @@ -474,12 +558,20 @@ inline = choice [ str , linebreak , accentedChar , nonbreakingSpace + , cite , specialChar + , ensureMath , rawLaTeXInline' , escapedChar , unescapedChar + , comment ] <?> "inline" + +-- latex comment +comment :: GenParser Char st Inline +comment = try $ char '%' >> manyTill anyChar newline >> spaces >> return (Str "") + accentedChar :: GenParser Char st Inline accentedChar = normalAccentedChar <|> specialAccentedChar @@ -512,7 +604,7 @@ accentTable = ('u', [('`', 249), ('\'', 250), ('^', 251), ('"', 252)]) ] specialAccentedChar :: GenParser Char st Inline -specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, +specialAccentedChar = choice [ ccedil, aring, iuml, szlig, aelig, lslash, oslash, pound, euro, copyright, sect ] ccedil :: GenParser Char st Inline @@ -543,6 +635,13 @@ oslash = try $ do let num = if letter' == 'o' then 248 else 216 return $ Str [chr num] +lslash :: GenParser Char st Inline +lslash = try $ do + cmd <- oneOfStrings ["{\\L}","{\\l}","\\L ","\\l "] + return $ if 'l' `elem` cmd + then Str "\x142" + else Str "\x141" + aelig :: GenParser Char st Inline aelig = try $ do char '\\' @@ -569,7 +668,7 @@ escapedChar = do -- nonescaped special characters unescapedChar :: GenParser Char st Inline -unescapedChar = oneOf "`$^&_#{}|<>" >>= return . (\c -> Str [c]) +unescapedChar = oneOf "`$^&_#{}[]|<>" >>= return . (\c -> Str [c]) specialChar :: GenParser Char st Inline specialChar = choice [ spacer, interwordSpace, @@ -604,27 +703,34 @@ doubleQuote :: GenParser Char st Inline doubleQuote = char '"' >> return (Str "\"") code :: GenParser Char ParserState Inline -code = code1 <|> code2 <|> lhsInlineCode +code = code1 <|> code2 <|> code3 <|> lhsInlineCode code1 :: GenParser Char st Inline code1 = try $ do string "\\verb" marker <- anyChar result <- manyTill anyChar (char marker) - return $ Code $ removeLeadingTrailingSpace result + return $ Code nullAttr $ removeLeadingTrailingSpace result code2 :: GenParser Char st Inline code2 = try $ do string "\\texttt{" result <- manyTill (noneOf "\\\n~$%^&{}") (char '}') - return $ Code result + return $ Code nullAttr result + +code3 :: GenParser Char st Inline +code3 = try $ do + string "\\lstinline" + marker <- anyChar + result <- manyTill anyChar (char marker) + return $ Code nullAttr $ removeLeadingTrailingSpace result lhsInlineCode :: GenParser Char ParserState Inline lhsInlineCode = try $ do failUnlessLHS char '|' result <- manyTill (noneOf "|\n") (char '|') - return $ Code result + return $ Code ("",["haskell"],[]) result emph :: GenParser Char ParserState Inline emph = try $ oneOfStrings [ "\\emph{", "\\textit{" ] >> @@ -683,15 +789,6 @@ emDash = try (string "---") >> return EmDash hyphen :: GenParser Char st Inline hyphen = char '-' >> return (Str "-") -lab :: GenParser Char st Inline -lab = try $ do - string "\\label{" - result <- manyTill anyChar (char '}') - return $ Str $ "(" ++ result ++ ")" - -ref :: GenParser Char st Inline -ref = try (string "\\ref{") >> manyTill anyChar (char '}') >>= return . Str - strong :: GenParser Char ParserState Inline strong = try (string "\\textbf{") >> manyTill inline (char '}') >>= return . Strong @@ -714,13 +811,13 @@ endline :: GenParser Char st Inline endline = try $ newline >> notFollowedBy blankline >> return Space -- math -math :: GenParser Char st Inline -math = (math3 >>= return . Math DisplayMath) - <|> (math1 >>= return . Math InlineMath) - <|> (math2 >>= return . Math InlineMath) - <|> (math4 >>= return . Math DisplayMath) - <|> (math5 >>= return . Math DisplayMath) - <|> (math6 >>= return . Math DisplayMath) +math :: GenParser Char ParserState Inline +math = (math3 >>= applyMacros' >>= return . Math DisplayMath) + <|> (math1 >>= applyMacros' >>= return . Math InlineMath) + <|> (math2 >>= applyMacros' >>= return . Math InlineMath) + <|> (math4 >>= applyMacros' >>= return . Math DisplayMath) + <|> (math5 >>= applyMacros' >>= return . Math DisplayMath) + <|> (math6 >>= applyMacros' >>= return . Math DisplayMath) <?> "math" math1 :: GenParser Char st String @@ -737,7 +834,6 @@ math4 = try $ do name <- begin "displaymath" <|> begin "equation" <|> begin "equation*" <|> begin "gather" <|> begin "gather*" <|> begin "gathered" <|> begin "multline" <|> begin "multline*" - spaces manyTill anyChar (end name) math5 :: GenParser Char st String @@ -748,10 +844,15 @@ math6 = try $ do name <- begin "eqnarray" <|> begin "eqnarray*" <|> begin "align" <|> begin "align*" <|> begin "alignat" <|> begin "alignat*" <|> begin "split" <|> begin "aligned" <|> begin "alignedat" - spaces res <- manyTill anyChar (end name) return $ filter (/= '&') res -- remove alignment codes +ensureMath :: GenParser Char st Inline +ensureMath = try $ do + (n, _, args) <- command + guard $ n == "ensuremath" && not (null args) + return $ Math InlineMath $ tail $ init $ head args + -- -- links and images -- @@ -760,7 +861,7 @@ url :: GenParser Char ParserState Inline url = try $ do string "\\url" url' <- charsInBalanced '{' '}' - return $ Link [Code url'] (escapeURI url', "") + return $ Link [Code ("",["url"],[]) url'] (escapeURI url', "") link :: GenParser Char ParserState Inline link = try $ do @@ -793,6 +894,103 @@ footnote = try $ do setInput rest return $ Note blocks +-- | citations +cite :: GenParser Char ParserState Inline +cite = simpleCite <|> complexNatbibCites + +simpleCiteArgs :: GenParser Char ParserState [Citation] +simpleCiteArgs = try $ do + first <- optionMaybe $ (char '[') >> manyTill inline (char ']') + second <- optionMaybe $ (char '[') >> manyTill inline (char ']') + char '{' + keys <- many1Till citationLabel (char '}') + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> ([], s ) + (Just s , Just t ) -> (s , t ) + _ -> ([], []) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + + +simpleCite :: GenParser Char ParserState Inline +simpleCite = try $ do + char '\\' + let biblatex = [a ++ "cite" | a <- ["auto", "foot", "paren", "super", ""]] + ++ ["footcitetext"] + normal = ["cite" ++ a ++ b | a <- ["al", ""], b <- ["p", "p*", ""]] + ++ biblatex + supress = ["citeyearpar", "citeyear", "autocite*", "cite*", "parencite*"] + intext = ["textcite"] ++ ["cite" ++ a ++ b | a <- ["al", ""], b <- ["t", "t*"]] + mintext = ["textcites"] + mnormal = map (++ "s") biblatex + cmdend = notFollowedBy (letter <|> char '*') + capit [] = [] + capit (x:xs) = toUpper x : xs + addUpper xs = xs ++ map capit xs + toparser l t = try $ oneOfStrings (addUpper l) >> cmdend >> return t + (mode, multi) <- toparser normal (NormalCitation, False) + <|> toparser supress (SuppressAuthor, False) + <|> toparser intext (AuthorInText , False) + <|> toparser mnormal (NormalCitation, True ) + <|> toparser mintext (AuthorInText , True ) + cits <- if multi then + many1 simpleCiteArgs + else + simpleCiteArgs >>= \c -> return [c] + let (c:cs) = concat cits + cits' = case mode of + AuthorInText -> c {citationMode = mode} : cs + _ -> map (\a -> a {citationMode = mode}) (c:cs) + return $ Cite cits' [] + +complexNatbibCites :: GenParser Char ParserState Inline +complexNatbibCites = complexNatbibTextual <|> complexNatbibParenthetical + +complexNatbibTextual :: GenParser Char ParserState Inline +complexNatbibTextual = try $ do + string "\\citeauthor{" + manyTill (noneOf "}") (char '}') + skipSpaces + Cite (c:cs) _ <- complexNatbibParenthetical + return $ Cite (c {citationMode = AuthorInText} : cs) [] + + +complexNatbibParenthetical :: GenParser Char ParserState Inline +complexNatbibParenthetical = try $ do + string "\\citetext{" + cits <- many1Till parseOne (char '}') + return $ Cite (concat cits) [] + where + parseOne = do + skipSpaces + pref <- many (notFollowedBy (oneOf "\\}") >> inline) + (Cite cites _) <- simpleCite + suff <- many (notFollowedBy (oneOf "\\};") >> inline) + skipSpaces + optional $ char ';' + return $ addPrefix pref $ addSuffix suff $ cites + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +citationLabel :: GenParser Char ParserState String +citationLabel = do + res <- many1 $ noneOf ",}" + optional $ char ',' + return $ removeLeadingTrailingSpace res + -- | Parse any LaTeX inline command and return it in a raw TeX inline element. rawLaTeXInline' :: GenParser Char ParserState Inline rawLaTeXInline' = do @@ -805,12 +1003,11 @@ rawLaTeXInline :: GenParser Char ParserState Inline rawLaTeXInline = try $ do state <- getState if stateParseRaw state - then do - (name, star, args) <- command - return $ TeX ("\\" ++ name ++ star ++ concat args) + then command >>= demacro else do - (name, _, args) <- command - spaces - if name `elem` commandsToIgnore - then return $ Str "" - else return $ Str (concat args) + (name,st,args) <- command + x <- demacro (name,st,args) + unless (x == Str "" || name `elem` commandsToIgnore) $ do + inp <- getInput + setInput $ intercalate " " args ++ inp + return $ Str "" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 33fb3d8e6..58d2158bf 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -27,26 +27,25 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of markdown-formatted plain text to 'Pandoc' document. -} -module Text.Pandoc.Readers.Markdown ( - readMarkdown - ) where +module Text.Pandoc.Readers.Markdown ( readMarkdown ) where -import Data.List ( transpose, isSuffixOf, sortBy, findIndex, intercalate ) +import Data.List ( transpose, sortBy, findIndex, intercalate ) import qualified Data.Map as M import Data.Ord ( comparing ) import Data.Char ( isAlphaNum ) import Data.Maybe import Text.Pandoc.Definition +import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXEnvironment' ) -import Text.Pandoc.Readers.HTML ( rawHtmlBlock, anyHtmlBlockTag, - anyHtmlInlineTag, anyHtmlTag, - anyHtmlEndTag, htmlEndTag, extractTagType, - htmlBlockElement, htmlComment, unsanitaryURI ) +import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag, + isTextTag, isCommentTag ) import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.ParserCombinators.Parsec -import Control.Monad (when, liftM, unless) +import Control.Monad (when, liftM, guard) +import Text.HTML.TagSoup +import Text.HTML.TagSoup.Match (tagOpen) -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ParserState -- ^ Parser state, including options for parser @@ -58,18 +57,26 @@ readMarkdown state s = (readWith parseMarkdown) state (s ++ "\n\n") -- Constants and data structure definitions -- -bulletListMarkers :: [Char] -bulletListMarkers = "*+-" +isBulletListMarker :: Char -> Bool +isBulletListMarker '*' = True +isBulletListMarker '+' = True +isBulletListMarker '-' = True +isBulletListMarker _ = False -hruleChars :: [Char] -hruleChars = "*-_" +isHruleChar :: Char -> Bool +isHruleChar '*' = True +isHruleChar '-' = True +isHruleChar '_' = True +isHruleChar _ = False setextHChars :: [Char] setextHChars = "=-" --- treat these as potentially non-text when parsing inline: -specialChars :: [Char] -specialChars = "\\[]*_~`<>$!^-.&@'\";" +isBlank :: Char -> Bool +isBlank ' ' = True +isBlank '\t' = True +isBlank '\n' = True +isBlank _ = False -- -- auxiliary functions @@ -106,12 +113,6 @@ failUnlessBeginningOfLine = do pos <- getPosition if sourceColumn pos == 1 then return () else fail "not beginning of line" --- | Fail unless we're in "smart typography" mode. -failUnlessSmart :: GenParser tok ParserState () -failUnlessSmart = do - state <- getState - if stateSmart state then return () else pzero - -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. inlinesInBalancedBrackets :: GenParser Char ParserState Inline @@ -119,7 +120,7 @@ inlinesInBalancedBrackets :: GenParser Char ParserState Inline inlinesInBalancedBrackets parser = try $ do char '[' result <- manyTill ( (do lookAhead $ try $ do (Str res) <- parser - unless (res == "[") pzero + guard (res == "[") bal <- inlinesInBalancedBrackets parser return $ [Str "["] ++ bal ++ [Str "]"]) <|> (count 1 parser)) @@ -143,7 +144,8 @@ authorsLine :: GenParser Char ParserState [[Inline]] authorsLine = try $ do char '%' skipSpaces - authors <- sepEndBy (many (notFollowedBy (oneOf ";\n") >> inline)) + authors <- sepEndBy (many (notFollowedBy (satisfy $ \c -> + c == ';' || c == '\n') >> inline)) (char ';' <|> try (newline >> notFollowedBy blankline >> spaceChar)) newline @@ -196,7 +198,7 @@ parseMarkdown = do handleExampleRef z = z if M.null examples then return doc - else return $ processWith handleExampleRef doc + else return $ bottomUp handleExampleRef doc -- -- initial pass for references and notes @@ -209,16 +211,24 @@ referenceKey = try $ do lab <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL excludes = many $ - optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' referenceTitle >> char ' ')) - src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" + let nl = char '\n' >> notFollowedBy blankline >> return ' ' + let sourceURL = liftM unwords $ many $ try $ do + notFollowedBy' referenceTitle + skipMany spaceChar + optional nl + skipMany spaceChar + notFollowedBy' reference + many1 (satisfy $ not . isBlank) + let betweenAngles = try $ char '<' >> + manyTill (noneOf ">\n" <|> nl) (char '>') + src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle blanklines endPos <- getPosition let target = (escapeURI $ removeTrailingSpace src, tit) st <- getState let oldkeys = stateKeys st - updateState $ \s -> s { stateKeys = M.insert (Key lab) target oldkeys } + updateState $ \s -> s { stateKeys = M.insert (toKey lab) target oldkeys } -- return blanks so line count isn't affected return $ replicate (sourceLine endPos - sourceLine startPos) '\n' @@ -232,12 +242,12 @@ referenceTitle = try $ do return $ decodeCharacterReferences tit noteMarker :: GenParser Char ParserState [Char] -noteMarker = skipNonindentSpaces >> string "[^" >> manyTill (noneOf " \t\n") (char ']') +noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') rawLine :: GenParser Char ParserState [Char] rawLine = do notFollowedBy blankline - notFollowedBy' noteMarker + notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker contents <- many1 nonEndline end <- option "" (newline >> optional indentSpaces >> return "\n") return $ contents ++ end @@ -248,6 +258,7 @@ rawLines = many1 rawLine >>= return . concat noteBlock :: GenParser Char ParserState [Char] noteBlock = try $ do startPos <- getPosition + skipNonindentSpaces ref <- noteMarker char ':' optional blankline @@ -284,6 +295,7 @@ block = do , plain , nullBlock ] else [ codeBlockDelimited + , macro , header , table , codeBlockIndented @@ -293,6 +305,7 @@ block = do , bulletList , orderedList , definitionList + , rawTeXBlock , para , rawHtmlBlocks , plain @@ -318,6 +331,9 @@ atxClosing = try $ skipMany (char '#') >> blanklines setextHeader :: GenParser Char ParserState Block setextHeader = try $ do + -- This lookahead prevents us from wasting time parsing Inlines + -- unless necessary -- it gives a significant performance boost. + lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline text <- many1Till inline newline underlineChar <- oneOf setextHChars many (char underlineChar) @@ -332,7 +348,7 @@ setextHeader = try $ do hrule :: GenParser Char st Block hrule = try $ do skipSpaces - start <- oneOf hruleChars + start <- satisfy isHruleChar count 2 (skipSpaces >> char start) skipMany (spaceChar <|> char start) newline @@ -371,6 +387,7 @@ attributes = try $ do attribute :: GenParser Char st ([Char], [[Char]], [([Char], [Char])]) attribute = identifierAttr <|> classAttr <|> keyValAttr + identifier :: GenParser Char st [Char] identifier = do first <- letter @@ -394,7 +411,7 @@ keyValAttr = try $ do key <- identifier char '=' char '"' - val <- manyTill (noneOf "\n") (char '"') + val <- manyTill (satisfy (/='\n')) (char '"') return ("",[],[(key,val)]) codeBlockDelimited :: GenParser Char st Block @@ -489,7 +506,7 @@ bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces notFollowedBy' hrule -- because hrules start out just like lists - oneOf bulletListMarkers + satisfy isBulletListMarker spaceChar skipSpaces @@ -524,7 +541,7 @@ listLine = try $ do notFollowedBy' (do indentSpaces many (spaceChar) listStart) - chunks <- manyTill (htmlComment <|> count 1 anyChar) newline + chunks <- manyTill (liftM snd (htmlTag isCommentTag) <|> count 1 anyChar) newline return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations @@ -644,23 +661,21 @@ definitionList = do -- isHtmlOrBlank :: Inline -> Bool -isHtmlOrBlank (HtmlInline _) = True -isHtmlOrBlank (Space) = True -isHtmlOrBlank (LineBreak) = True -isHtmlOrBlank _ = False +isHtmlOrBlank (RawInline "html" _) = True +isHtmlOrBlank (Space) = True +isHtmlOrBlank (LineBreak) = True +isHtmlOrBlank _ = False para :: GenParser Char ParserState Block para = try $ do - result <- many1 inline - if all isHtmlOrBlank result - then fail "treat as raw HTML" - else return () - newline - blanklines <|> do st <- getState - if stateStrict st - then lookAhead (blockQuote <|> header) >> return "" - else pzero - return $ Para $ normalizeSpaces result + result <- liftM normalizeSpaces $ many1 inline + guard $ not . all isHtmlOrBlank $ result + option (Plain result) $ try $ do + newline + blanklines <|> + (getState >>= guard . stateStrict >> + lookAhead (blockQuote <|> header) >> return "") + return $ Para result plain :: GenParser Char ParserState Block plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces @@ -670,7 +685,7 @@ plain = many1 inline >>~ spaces >>= return . Plain . normalizeSpaces -- htmlElement :: GenParser Char ParserState [Char] -htmlElement = strictHtmlBlock <|> htmlBlockElement <?> "html element" +htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) htmlBlock :: GenParser Char ParserState Block htmlBlock = try $ do @@ -678,27 +693,33 @@ htmlBlock = try $ do first <- htmlElement finalSpace <- many spaceChar finalNewlines <- many newline - return $ RawHtml $ first ++ finalSpace ++ finalNewlines - --- True if tag is self-closing -isSelfClosing :: [Char] -> Bool -isSelfClosing tag = - isSuffixOf "/>" $ filter (not . (`elem` " \n\t")) tag + return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines strictHtmlBlock :: GenParser Char ParserState [Char] -strictHtmlBlock = try $ do - tag <- anyHtmlBlockTag - let tag' = extractTagType tag - if isSelfClosing tag || tag' == "hr" - then return tag - else do contents <- many (notFollowedBy' (htmlEndTag tag') >> - (htmlElement <|> (count 1 anyChar))) - end <- htmlEndTag tag' - return $ tag ++ concat contents ++ end +strictHtmlBlock = do + failUnlessBeginningOfLine + htmlInBalanced (not . isInlineTag) + +rawVerbatimBlock :: GenParser Char ParserState String +rawVerbatimBlock = try $ do + (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> + t == "pre" || t == "style" || t == "script") + (const True)) + contents <- manyTill anyChar (htmlTag (~== TagClose tag)) + return $ open ++ contents ++ renderTags [TagClose tag] + +rawTeXBlock :: GenParser Char ParserState Block +rawTeXBlock = do + failIfStrict + result <- liftM (RawBlock "latex") rawLaTeXEnvironment' + <|> liftM (RawBlock "context") rawConTeXtEnvironment' + spaces + return result rawHtmlBlocks :: GenParser Char ParserState Block rawHtmlBlocks = do - htmlBlocks <- many1 $ do (RawHtml blk) <- rawHtmlBlock + htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|> + liftM snd (htmlTag isBlockTag) sps <- do sp1 <- many spaceChar sp2 <- option "" (blankline >> return "\n") sp3 <- many spaceChar @@ -710,7 +731,7 @@ rawHtmlBlocks = do return $ blk ++ sps let combined = concat htmlBlocks let combined' = if last combined == '\n' then init combined else combined - return $ RawHtml combined' + return $ RawBlock "html" combined' -- -- Tables @@ -848,10 +869,11 @@ alignType :: [String] -> Alignment alignType [] _ = AlignDefault alignType strLst len = - let s = head $ sortBy (comparing length) $ - map removeTrailingSpace strLst - leftSpace = if null s then False else (s !! 0) `elem` " \t" - rightSpace = length s < len || (s !! (len - 1)) `elem` " \t" + let nonempties = filter (not . null) $ map removeTrailingSpace strLst + (leftSpace, rightSpace) = + case sortBy (comparing length) nonempties of + (x:_) -> (head x `elem` " \t", length x < len) + [] -> (False, False) in case (leftSpace, rightSpace) of (True, False) -> AlignRight (False, True) -> AlignLeft @@ -875,31 +897,29 @@ inline :: GenParser Char ParserState Inline inline = choice inlineParsers <?> "inline" inlineParsers :: [GenParser Char ParserState Inline] -inlineParsers = [ str - , smartPunctuation - , whitespace +inlineParsers = [ whitespace + , str , endline , code - , charRef , (fourOrMore '*' <|> fourOrMore '_') , strong , emph , note - , inlineNote , link -#ifdef _CITEPROC - , inlineCitation -#endif + , cite , image , math , strikeout , superscript , subscript + , inlineNote -- after superscript because of ^[link](/foo)^ , autoLink - , rawHtmlInline' + , rawHtmlInline , rawLaTeXInline' , escapedChar , exampleRef + , smartPunctuation inline + , charRef , symbol , ltSign ] @@ -913,12 +933,12 @@ failIfLink (Link _ _) = pzero failIfLink elt = return elt escapedChar :: GenParser Char ParserState Inline -escapedChar = do +escapedChar = try $ do char '\\' state <- getState - result <- option '\\' $ if stateStrict state - then oneOf "\\`*_{}[]()>#+-.!~" - else satisfy (not . isAlphaNum) + result <- if stateStrict state + then oneOf "\\`*_{}[]()>#+-.!~" + else satisfy (not . isAlphaNum) return $ case result of ' ' -> Str "\160" -- "\ " is a nonbreaking space '\n' -> LineBreak -- "\[newline]" is a linebreak @@ -932,9 +952,6 @@ ltSign = do else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html return $ Str ['<'] -specialCharsMinusLt :: [Char] -specialCharsMinusLt = filter (/= '<') specialChars - exampleRef :: GenParser Char ParserState Inline exampleRef = try $ do char '@' @@ -945,7 +962,11 @@ exampleRef = try $ do symbol :: GenParser Char ParserState Inline symbol = do - result <- oneOf specialCharsMinusLt + result <- noneOf "<\\\n\t " + <|> try (do lookAhead $ char '\\' + notFollowedBy' $ rawLaTeXEnvironment' + <|> rawConTeXtEnvironment' + char '\\') return $ Str [result] -- parses inline code, between n `s and n `s @@ -957,7 +978,8 @@ code = try $ do (char '\n' >> notFollowedBy' blankline >> return " ")) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) - return $ Code $ removeLeadingTrailingSpace $ concat result + attr <- option ([],[],[]) (try $ optional whitespace >> attributes) + return $ Code attr $ removeLeadingTrailingSpace $ concat result mathWord :: GenParser Char st [Char] mathWord = liftM concat $ many1 mathChunk @@ -966,11 +988,11 @@ mathChunk :: GenParser Char st [Char] mathChunk = do char '\\' c <- anyChar return ['\\',c] - <|> many1 (noneOf " \t\n\\$") + <|> many1 (satisfy $ \c -> not (isBlank c || c == '\\' || c == '$')) math :: GenParser Char ParserState Inline -math = (mathDisplay >>= return . Math DisplayMath) - <|> (mathInline >>= return . Math InlineMath) +math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath) + <|> (mathInline >>= applyMacros' >>= return . Math InlineMath) mathDisplay :: GenParser Char ParserState String mathDisplay = try $ do @@ -1019,85 +1041,6 @@ subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy spaceChar >> inline) >>= -- may not contain Space return . Subscript -smartPunctuation :: GenParser Char ParserState Inline -smartPunctuation = failUnlessSmart >> - choice [ quoted, apostrophe, dash, ellipses ] - -apostrophe :: GenParser Char ParserState Inline -apostrophe = (char '\'' <|> char '\8217') >> return Apostrophe - -quoted :: GenParser Char ParserState Inline -quoted = doubleQuoted <|> singleQuoted - -withQuoteContext :: QuoteContext - -> (GenParser Char ParserState Inline) - -> GenParser Char ParserState Inline -withQuoteContext context parser = do - oldState <- getState - let oldQuoteContext = stateQuoteContext oldState - setState oldState { stateQuoteContext = context } - result <- parser - newState <- getState - setState newState { stateQuoteContext = oldQuoteContext } - return result - -singleQuoted :: GenParser Char ParserState Inline -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ many1Till inline singleQuoteEnd >>= - return . Quoted SingleQuote . normalizeSpaces - -doubleQuoted :: GenParser Char ParserState Inline -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ many1Till inline doubleQuoteEnd >>= - return . Quoted DoubleQuote . normalizeSpaces - -failIfInQuoteContext :: QuoteContext -> GenParser tok ParserState () -failIfInQuoteContext context = do - st <- getState - if stateQuoteContext st == context - then fail "already inside quotes" - else return () - -singleQuoteStart :: GenParser Char ParserState Char -singleQuoteStart = do - failIfInQuoteContext InSingleQuote - try $ do char '\'' - notFollowedBy (oneOf ")!],.;:-? \t\n") - notFollowedBy (try (oneOfStrings ["s","t","m","ve","ll","re"] >> - satisfy (not . isAlphaNum))) - -- possess/contraction - return '\'' - -singleQuoteEnd :: GenParser Char st Char -singleQuoteEnd = try $ do - char '\'' - notFollowedBy alphaNum - return '\'' - -doubleQuoteStart :: GenParser Char ParserState Char -doubleQuoteStart = do - failIfInQuoteContext InDoubleQuote - try $ do char '"' - notFollowedBy (oneOf " \t\n") - return '"' - -doubleQuoteEnd :: GenParser Char st Char -doubleQuoteEnd = char '"' - -ellipses :: GenParser Char st Inline -ellipses = oneOfStrings ["...", " . . . ", ". . .", " . . ."] >> return Ellipses - -dash :: GenParser Char st Inline -dash = enDash <|> emDash - -enDash :: GenParser Char st Inline -enDash = try $ char '-' >> notFollowedBy (noneOf "0123456789") >> return EnDash - -emDash :: GenParser Char st Inline -emDash = oneOfStrings ["---", "--"] >> return EmDash - whitespace :: GenParser Char ParserState Inline whitespace = spaceChar >> ( (spaceChar >> skipMany spaceChar >> option Space (endline >> return LineBreak)) @@ -1106,20 +1049,19 @@ whitespace = spaceChar >> nonEndline :: GenParser Char st Char nonEndline = satisfy (/='\n') -strChar :: GenParser Char st Char -strChar = noneOf (specialChars ++ " \t\n") - str :: GenParser Char ParserState Inline str = do - result <- many1 strChar + a <- alphaNum + as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum) + let result = a:as state <- getState let spacesToNbr = map (\c -> if c == ' ' then '\160' else c) if stateSmart state then case likelyAbbrev result of [] -> return $ Str result xs -> choice (map (\x -> - try (string x >> char ' ' >> - notFollowedBy spaceChar >> + try (string x >> oneOf " \n" >> + lookAhead alphaNum >> return (Str $ result ++ spacesToNbr x ++ "\160"))) xs) <|> (return $ Str result) else return $ Str result @@ -1142,15 +1084,13 @@ endline = try $ do newline notFollowedBy blankline st <- getState - if stateStrict st - then do notFollowedBy emailBlockQuoteStart - notFollowedBy (char '#') -- atx header - else return () + when (stateStrict st) $ do + notFollowedBy emailBlockQuoteStart + notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: - if stateParserContext st == ListItemState - then notFollowedBy' (bulletListStart <|> - (anyOrderedListStart >> return ())) - else return () + when (stateParserContext st == ListItemState) $ do + notFollowedBy' bulletListStart + notFollowedBy' anyOrderedListStart return Space -- @@ -1175,9 +1115,16 @@ source = source' :: GenParser Char st (String, [Char]) source' = do skipSpaces - let sourceURL excludes = many $ - optional (char '\\') >> (noneOf (' ':excludes) <|> (notFollowedBy' linkTitle >> char ' ')) - src <- try (char '<' >> sourceURL ">\t\n" >>~ char '>') <|> sourceURL "\t\n" + let nl = char '\n' >>~ notFollowedBy blankline + let sourceURL = liftM unwords $ many $ try $ do + notFollowedBy' linkTitle + skipMany spaceChar + optional nl + skipMany spaceChar + many1 (satisfy $ not . isBlank) + let betweenAngles = try $ char '<' >> + manyTill (noneOf ">\n" <|> nl) (char '>') + src <- try betweenAngles <|> sourceURL tit <- option "" linkTitle skipSpaces eof @@ -1196,10 +1143,7 @@ link :: GenParser Char ParserState Inline link = try $ do lab <- reference (src, tit) <- source <|> referenceLink lab - sanitize <- getState >>= return . stateSanitizeHTML - if sanitize && unsanitaryURI src - then fail "Unsanitary URI" - else return $ Link lab (src, tit) + return $ Link lab (src, tit) -- a link like [this][ref] or [this][] or [this] referenceLink :: [Inline] @@ -1209,7 +1153,7 @@ referenceLink lab = do optional (newline >> skipSpaces) >> reference)) let ref' = if null ref then lab else ref state <- getState - case lookupKeySrc (stateKeys state) (Key ref') of + case lookupKeySrc (stateKeys state) (toKey ref') of Nothing -> fail "no corresponding key" Just target -> return target @@ -1219,12 +1163,9 @@ autoLink = try $ do (orig, src) <- uri <|> emailAddress char '>' st <- getState - let sanitize = stateSanitizeHTML st - if sanitize && unsanitaryURI src - then fail "Unsanitary URI" - else return $ if stateStrict st - then Link [Str orig] (src, "") - else Link [Code orig] (src, "") + return $ if stateStrict st + then Link [Str orig] (src, "") + else Link [Code ("",["url"],[]) orig] (src, "") image :: GenParser Char ParserState Inline image = try $ do @@ -1250,11 +1191,13 @@ inlineNote = try $ do return $ Note [Para contents] rawLaTeXInline' :: GenParser Char ParserState Inline -rawLaTeXInline' = do +rawLaTeXInline' = try $ do failIfStrict - (rawConTeXtEnvironment' >>= return . TeX) - <|> (rawLaTeXEnvironment' >>= return . TeX) - <|> rawLaTeXInline + lookAhead $ char '\\' + notFollowedBy' $ rawLaTeXEnvironment' + <|> rawConTeXtEnvironment' + RawInline _ s <- rawLaTeXInline + return $ RawInline "tex" s -- "tex" because it might be context or latex rawConTeXtEnvironment' :: GenParser Char st String rawConTeXtEnvironment' = try $ do @@ -1272,46 +1215,98 @@ inBrackets parser = do char ']' return $ "[" ++ contents ++ "]" -rawHtmlInline' :: GenParser Char ParserState Inline -rawHtmlInline' = do +rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline = do st <- getState - result <- if stateStrict st - then choice [htmlBlockElement, anyHtmlTag, anyHtmlEndTag] - else anyHtmlInlineTag - return $ HtmlInline result - -#ifdef _CITEPROC -inlineCitation :: GenParser Char ParserState Inline -inlineCitation = try $ do + (_,result) <- if stateStrict st + then htmlTag (not . isTextTag) + else htmlTag isInlineTag + return $ RawInline "html" result + +-- Citations + +cite :: GenParser Char ParserState Inline +cite = do failIfStrict - cit <- citeMarker - let citations = readWith parseCitation defaultParserState cit - mr <- mapM chkCit citations - if catMaybes mr /= [] - then return $ Cite citations [] - else fail "no citation found" - -chkCit :: Target -> GenParser Char ParserState (Maybe Target) -chkCit t = do + citations <- textualCite <|> normalCite + return $ Cite citations [] + +spnl :: GenParser Char st () +spnl = try $ do + skipSpaces + optional newline + skipSpaces + notFollowedBy (char '\n') + +textualCite :: GenParser Char ParserState [Citation] +textualCite = try $ do + (_, key) <- citeKey + let first = Citation{ citationId = key + , citationPrefix = [] + , citationSuffix = [] + , citationMode = AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + rest <- option [] $ try $ spnl >> normalCite + if null rest + then option [first] $ bareloc first + else return $ first : rest + +bareloc :: Citation -> GenParser Char ParserState [Citation] +bareloc c = try $ do + spnl + char '[' + suff <- suffix + rest <- option [] $ try $ char ';' >> citeList + spnl + char ']' + return $ c{ citationSuffix = suff } : rest + +normalCite :: GenParser Char ParserState [Citation] +normalCite = try $ do + char '[' + spnl + citations <- citeList + spnl + char ']' + return citations + +citeKey :: GenParser Char ParserState (Bool, String) +citeKey = try $ do + suppress_author <- option False (char '-' >> return True) + char '@' + first <- letter + rest <- many $ (noneOf ",;]@ \t\n") + let key = first:rest st <- getState - case lookupKeySrc (stateKeys st) (Key [Str $ fst t]) of - Just _ -> fail "This is a link" - Nothing -> if elem (fst t) $ stateCitations st - then return $ Just t - else return $ Nothing - -citeMarker :: GenParser Char ParserState String -citeMarker = char '[' >> manyTill ( noneOf "\n" <|> (newline >>~ notFollowedBy blankline) ) (char ']') - -parseCitation :: GenParser Char ParserState [(String,String)] -parseCitation = try $ sepBy (parseLabel) (oneOf ";") - -parseLabel :: GenParser Char ParserState (String,String) -parseLabel = try $ do - res <- sepBy (skipSpaces >> optional newline >> skipSpaces >> many1 (noneOf "@;")) (oneOf "@") - case res of - [lab,loc] -> return (lab, loc) - [lab] -> return (lab, "" ) - _ -> return ("" , "" ) - -#endif + guard $ key `elem` stateCitations st + return (suppress_author, key) + +suffix :: GenParser Char ParserState [Inline] +suffix = try $ do + spnl + liftM normalizeSpaces $ many $ notFollowedBy (oneOf ";]") >> inline + +prefix :: GenParser Char ParserState [Inline] +prefix = liftM normalizeSpaces $ + manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) + +citeList :: GenParser Char ParserState [Citation] +citeList = sepBy1 citation (try $ char ';' >> spnl) + +citation :: GenParser Char ParserState Citation +citation = try $ do + pref <- prefix + (suppress_author, key) <- citeKey + suff <- suffix + return $ Citation{ citationId = key + , citationPrefix = pref + , citationSuffix = suff + , citationMode = if suppress_author + then SuppressAuthor + else NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs new file mode 100644 index 000000000..2c6fcc6e6 --- /dev/null +++ b/src/Text/Pandoc/Readers/Native.hs @@ -0,0 +1,81 @@ +{- +Copyright (C) 2011 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Native + Copyright : Copyright (C) 2011 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of a string representation of a pandoc type (@Pandoc@, +@[Block]@, @Block@, @[Inline]@, or @Inline@) to a @Pandoc@ document. +-} +module Text.Pandoc.Readers.Native ( readNative ) where + +import Text.Pandoc.Definition + +nullMeta :: Meta +nullMeta = Meta{ docTitle = [] + , docAuthors = [] + , docDate = [] + } + +-- | Read native formatted text and return a Pandoc document. +-- The input may be a full pandoc document, a block list, a block, +-- an inline list, or an inline. Thus, for example, +-- +-- > Str "hi" +-- +-- will be treated as if it were +-- +-- > Pandoc (Meta [] [] []) [Plain [Str "hi"]] +-- +readNative :: String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readNative s = + case reads s of + (d,_):_ -> d + [] -> Pandoc nullMeta $ readBlocks s + +readBlocks :: String -> [Block] +readBlocks s = + case reads s of + (d,_):_ -> d + [] -> [readBlock s] + +readBlock :: String -> Block +readBlock s = + case reads s of + (d,_):_ -> d + [] -> Plain $ readInlines s + +readInlines :: String -> [Inline] +readInlines s = + case reads s of + (d,_):_ -> d + [] -> [readInline s] + +readInline :: String -> Inline +readInline s = + case reads s of + (d,_):_ -> d + [] -> error "Cannot parse document" + diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 13afe5053..32fae5ee7 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -34,10 +34,11 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.ParserCombinators.Parsec -import Control.Monad ( when, unless ) -import Data.List ( findIndex, intercalate, transpose, sort ) +import Control.Monad ( when ) +import Data.List ( findIndex, intercalate, transpose, sort, deleteFirstsBy ) import qualified Data.Map as M import Text.Printf ( printf ) +import Data.Maybe ( catMaybes ) -- | Parse reStructuredText string and return Pandoc document. readRST :: ParserState -- ^ Parser state, including options for parser @@ -57,7 +58,7 @@ underlineChars = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\\`|*_<>$:[-" +specialChars = "\\`|*_<>$:[-.\"'\8216\8217\8220\8221" -- -- parsing documents @@ -90,12 +91,17 @@ titleTransform blocks = (blocks, []) parseRST :: GenParser Char ParserState Pandoc parseRST = do + optional blanklines -- skip blank lines at beginning of file startPos <- getPosition - -- go through once just to get list of reference keys + -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- manyTill (referenceKey <|> lineClump) eof >>= return . concat + docMinusKeys <- manyTill (referenceKey <|> noteBlock <|> lineClump) eof >>= + return . concat setInput docMinusKeys setPosition startPos + st' <- getState + let reversedNotes = stateNotes st' + updateState $ \s -> s { stateNotes = reverse reversedNotes } -- now parse it for real... blocks <- parseBlocks let blocks' = filter (/= Null) blocks @@ -117,10 +123,9 @@ parseBlocks = manyTill block eof block :: GenParser Char ParserState Block block = choice [ codeBlock - , rawHtmlBlock - , rawLaTeXBlock - , fieldList + , rawBlock , blockQuote + , fieldList , imageBlock , customCodeBlock , unknownDirective @@ -138,46 +143,54 @@ block = choice [ codeBlock -- field list -- -fieldListItem :: String -> GenParser Char st ([Char], [Char]) -fieldListItem indent = try $ do +rawFieldListItem :: String -> GenParser Char ParserState (String, String) +rawFieldListItem indent = try $ do string indent char ':' - name <- many1 alphaNum + name <- many1 $ alphaNum <|> spaceChar string ": " skipSpaces first <- manyTill anyChar newline - rest <- option "" $ try $ lookAhead (string indent >> oneOf " \t") >> - indentedBlock - return (name, intercalate " " (first:(lines rest))) + rest <- option "" $ try $ do lookAhead (string indent >> spaceChar) + indentedBlock + let raw = first ++ "\n" ++ rest ++ "\n" + return (name, raw) + +fieldListItem :: String + -> GenParser Char ParserState (Maybe ([Inline], [[Block]])) +fieldListItem indent = try $ do + (name, raw) <- rawFieldListItem indent + let term = [Str name] + contents <- parseFromString (many block) raw + case (name, contents) of + ("Author", x) -> do + updateState $ \st -> + st{ stateAuthors = stateAuthors st ++ [extractContents x] } + return Nothing + ("Authors", [BulletList auths]) -> do + updateState $ \st -> st{ stateAuthors = map extractContents auths } + return Nothing + ("Date", x) -> do + updateState $ \st -> st{ stateDate = extractContents x } + return Nothing + ("Title", x) -> do + updateState $ \st -> st{ stateTitle = extractContents x } + return Nothing + _ -> return $ Just (term, [contents]) + +extractContents :: [Block] -> [Inline] +extractContents [Plain auth] = auth +extractContents [Para auth] = auth +extractContents _ = [] fieldList :: GenParser Char ParserState Block fieldList = try $ do - indent <- lookAhead $ many (oneOf " \t") + indent <- lookAhead $ many spaceChar items <- many1 $ fieldListItem indent blanklines - let authors = case lookup "Authors" items of - Just auth -> [auth] - Nothing -> map snd (filter (\(x,_) -> x == "Author") items) - unless (null authors) $ do - authors' <- mapM (parseFromString (many inline)) authors - updateState $ \st -> st {stateAuthors = map normalizeSpaces authors'} - case (lookup "Date" items) of - Just dat -> do - dat' <- parseFromString (many inline) dat - updateState $ \st -> st{ stateDate = normalizeSpaces dat' } - Nothing -> return () - case (lookup "Title" items) of - Just tit -> parseFromString (many inline) tit >>= - \t -> updateState $ \st -> st {stateTitle = t} - Nothing -> return () - let remaining = filter (\(x,_) -> (x /= "Authors") && (x /= "Author") && - (x /= "Date") && (x /= "Title")) items - if null remaining - then return Null - else do terms <- mapM (return . (:[]) . Str . fst) remaining - defs <- mapM (parseFromString (many block) . snd) - remaining - return $ DefinitionList $ zip terms $ map (:[]) defs + if null items + then return Null + else return $ DefinitionList $ catMaybes items -- -- line block @@ -186,7 +199,7 @@ fieldList = try $ do lineBlockLine :: GenParser Char ParserState [Inline] lineBlockLine = try $ do string "| " - white <- many (oneOf " \t") + white <- many spaceChar line <- many $ (notFollowedBy newline >> inline) <|> (try $ endline >>~ char ' ') optional endline return $ normalizeSpaces $ (if null white then [] else [Str white]) ++ line @@ -231,15 +244,16 @@ plain = many1 inline >>= return . Plain . normalizeSpaces -- image block -- -imageBlock :: GenParser Char st Block +imageBlock :: GenParser Char ParserState Block imageBlock = try $ do string ".. image:: " src <- manyTill anyChar newline - fields <- option [] $ do indent <- lookAhead $ many (oneOf " /t") - many1 $ fieldListItem indent + fields <- try $ do indent <- lookAhead $ many (oneOf " /t") + many $ rawFieldListItem indent optional blanklines case lookup "alt" fields of - Just alt -> return $ Plain [Image [Str alt] (src, alt)] + Just alt -> return $ Plain [Image [Str $ removeTrailingSpace alt] + (src, "")] Nothing -> return $ Plain [Image [Str "image"] (src, "")] -- -- header blocks @@ -314,20 +328,19 @@ hrule = try $ do indentedLine :: String -> GenParser Char st [Char] indentedLine indents = try $ do string indents - result <- manyTill anyChar newline - return $ result ++ "\n" + manyTill anyChar newline -- two or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: GenParser Char st [Char] -indentedBlock = do - indents <- lookAhead $ many1 (oneOf " \t") +indentedBlock = try $ do + indents <- lookAhead $ many1 spaceChar lns <- many $ choice $ [ indentedLine indents, try $ do b <- blanklines l <- indentedLine indents return (b ++ l) ] - optional blanklines - return $ concat lns + optional blanklines + return $ unlines lns codeBlock :: GenParser Char st Block codeBlock = try $ do @@ -365,23 +378,16 @@ birdTrackLine = do manyTill anyChar newline -- --- raw html +-- raw html/latex/etc -- -rawHtmlBlock :: GenParser Char st Block -rawHtmlBlock = try $ string ".. raw:: html" >> blanklines >> - indentedBlock >>= return . RawHtml - --- --- raw latex --- - -rawLaTeXBlock :: GenParser Char st Block -rawLaTeXBlock = try $ do - string ".. raw:: latex" +rawBlock :: GenParser Char st Block +rawBlock = try $ do + string ".. raw:: " + lang <- many1 (letter <|> digit) blanklines result <- indentedBlock - return $ Para [(TeX result)] + return $ RawBlock lang result -- -- block quotes @@ -408,7 +414,7 @@ definitionListItem = try $ do term <- many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString parseBlocks $ raw ++ "\n\n" + contents <- parseFromString parseBlocks $ raw ++ "\n" return (normalizeSpaces term, [contents]) definitionList :: GenParser Char ParserState Block @@ -505,9 +511,35 @@ unknownDirective = try $ do string ".." notFollowedBy (noneOf " \t\n") manyTill anyChar newline - many $ blanklines <|> (oneOf " \t" >> manyTill anyChar newline) + many $ blanklines <|> (spaceChar >> manyTill anyChar newline) return Null +--- +--- note block +--- + +noteBlock :: GenParser Char ParserState [Char] +noteBlock = try $ do + startPos <- getPosition + string ".." + spaceChar >> skipMany spaceChar + ref <- noteMarker + spaceChar >> skipMany spaceChar + first <- anyLine + blanks <- option "" blanklines + rest <- option "" indentedBlock + endPos <- getPosition + let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" + let newnote = (ref, raw) + st <- getState + let oldnotes = stateNotes st + updateState $ \s -> s { stateNotes = newnote : oldnotes } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +noteMarker :: GenParser Char ParserState [Char] +noteMarker = char '[' >> (many1 digit <|> count 1 (oneOf "#*")) >>~ char ']' + -- -- reference key -- @@ -565,14 +597,14 @@ imageKey = try $ do skipSpaces string "image::" src <- targetURI - return (Key (normalizeSpaces ref), (src, "")) + return (toKey (normalizeSpaces ref), (src, "")) anonymousKey :: GenParser Char st (Key, Target) anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - return (Key [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) + return (toKey [Str $ "_" ++ printf "%09d" (sourceLine pos)], (src, "")) regularKey :: GenParser Char ParserState (Key, Target) regularKey = try $ do @@ -580,7 +612,7 @@ regularKey = try $ do ref <- referenceName char ':' src <- targetURI - return (Key (normalizeSpaces ref), (src, "")) + return (toKey (normalizeSpaces ref), (src, "")) -- -- tables @@ -679,17 +711,19 @@ table = gridTable False <|> simpleTable False <|> -- inline :: GenParser Char ParserState Inline -inline = choice [ link +inline = choice [ whitespace + , link , str - , whitespace , endline , strong , emph , code , image - , hyphens , superscript , subscript + , note + , smartPunctuation inline + , hyphens , escapedChar , symbol ] <?> "inline" @@ -713,7 +747,8 @@ code :: GenParser Char ParserState Inline code = try $ do string "``" result <- manyTill anyChar (try (string "``")) - return $ Code $ removeLeadingTrailingSpace $ intercalate " " $ lines result + return $ Code nullAttr + $ removeLeadingTrailingSpace $ intercalate " " $ lines result emph :: GenParser Char ParserState Inline emph = enclosed (char '*') (char '*') inline >>= @@ -779,9 +814,10 @@ referenceLink = try $ do label' <- (quotedReferenceName <|> simpleReferenceName) >>~ char '_' state <- getState let keyTable = stateKeys state - let isAnonKey (Key [Str ('_':_)]) = True - isAnonKey _ = False - key <- option (Key label') $ + let isAnonKey x = case fromKey x of + [Str ('_':_)] -> True + _ -> False + key <- option (toKey label') $ do char '_' let anonKeys = sort $ filter isAnonKey $ M.keys keyTable if null anonKeys @@ -814,7 +850,24 @@ image = try $ do ref <- manyTill inline (char '|') state <- getState let keyTable = stateKeys state - (src,tit) <- case lookupKeySrc keyTable (Key ref) of + (src,tit) <- case lookupKeySrc keyTable (toKey ref) of Nothing -> fail "no corresponding key" Just target -> return target return $ Image (normalizeSpaces ref) (src, tit) + +note :: GenParser Char ParserState Inline +note = try $ do + ref <- noteMarker + char '_' + state <- getState + let notes = stateNotes state + case lookup ref notes of + Nothing -> fail "note not found" + Just raw -> do + contents <- parseFromString parseBlocks raw + when (ref == "*" || ref == "#") $ do -- auto-numbered + -- delete the note so the next auto-numbered note + -- doesn't get the same contents: + let newnotes = deleteFirstsBy (==) notes [(ref,raw)] + updateState $ \st -> st{ stateNotes = newnotes } + return $ Note contents diff --git a/src/Text/Pandoc/Readers/TeXMath.hs b/src/Text/Pandoc/Readers/TeXMath.hs index b0c6e86d4..b9a46e8ff 100644 --- a/src/Text/Pandoc/Readers/TeXMath.hs +++ b/src/Text/Pandoc/Readers/TeXMath.hs @@ -27,12 +27,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of TeX math to a list of 'Pandoc' inline elements. -} -module Text.Pandoc.Readers.TeXMath ( - readTeXMath - ) where +module Text.Pandoc.Readers.TeXMath ( readTeXMath ) where -import Text.ParserCombinators.Parsec import Text.Pandoc.Definition +import Text.TeXMath.Types import Text.TeXMath.Parser -- | Converts a raw TeX math formula to a list of 'Pandoc' inlines. @@ -40,16 +38,17 @@ import Text.TeXMath.Parser -- can't be converted. readTeXMath :: String -- ^ String to parse (assumes @'\n'@ line endings) -> [Inline] -readTeXMath inp = case readTeXMath' inp of - Nothing -> [Str ("$" ++ inp ++ "$")] - Just res -> res +readTeXMath inp = case texMathToPandoc inp of + Left _ -> [Str ("$" ++ inp ++ "$")] + Right res -> res --- | Like 'readTeXMath', but without the default. -readTeXMath' :: String -- ^ String to parse (assumes @'\n'@ line endings) - -> Maybe [Inline] -readTeXMath' inp = case parse formula "formula" inp of - Left _ -> Just [Str inp] - Right exps -> expsToInlines exps +texMathToPandoc :: String -> Either String [Inline] +texMathToPandoc inp = inp `seq` + case parseFormula inp of + Left err -> Left err + Right exps -> case expsToInlines exps of + Nothing -> Left "Formula too complex for [Inline]" + Just r -> Right r expsToInlines :: [Exp] -> Maybe [Inline] expsToInlines xs = do @@ -89,6 +88,26 @@ expToInlines (ESubsup x y z) = do expToInlines (EDown x y) = expToInlines (ESub x y) expToInlines (EUp x y) = expToInlines (ESuper x y) expToInlines (EDownup x y z) = expToInlines (ESubsup x y z) -expToInlines (EText _ x) = Just [Emph [Str x]] +expToInlines (EText "normal" x) = Just [Str x] +expToInlines (EText "bold" x) = Just [Strong [Str x]] +expToInlines (EText "monospace" x) = Just [Code nullAttr x] +expToInlines (EText "italic" x) = Just [Emph [Str x]] +expToInlines (EText _ x) = Just [Str x] +expToInlines (EOver (EGrouped [EIdentifier [c]]) (ESymbol Accent [accent])) = + case accent of + '\x203E' -> Just [Emph [Str [c,'\x0304']]] -- bar + '\x00B4' -> Just [Emph [Str [c,'\x0301']]] -- acute + '\x0060' -> Just [Emph [Str [c,'\x0300']]] -- grave + '\x02D8' -> Just [Emph [Str [c,'\x0306']]] -- breve + '\x02C7' -> Just [Emph [Str [c,'\x030C']]] -- check + '.' -> Just [Emph [Str [c,'\x0307']]] -- dot + '\x00B0' -> Just [Emph [Str [c,'\x030A']]] -- ring + '\x20D7' -> Just [Emph [Str [c,'\x20D7']]] -- arrow right + '\x20D6' -> Just [Emph [Str [c,'\x20D6']]] -- arrow left + '\x005E' -> Just [Emph [Str [c,'\x0302']]] -- hat + '\x0302' -> Just [Emph [Str [c,'\x0302']]] -- hat + '~' -> Just [Emph [Str [c,'\x0303']]] -- tilde + _ -> Nothing expToInlines _ = Nothing + diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs new file mode 100644 index 000000000..19357b343 --- /dev/null +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -0,0 +1,523 @@ +{- +Copyright (C) 2010 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@' + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Textile + Copyright : Copyright (C) 2010-2011 Paul Rivier and John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : Paul Rivier <paul*rivier#demotera*com> + Stability : alpha + Portability : portable + +Conversion from Textile to 'Pandoc' document, based on the spec +available at http://redcloth.org/textile. + +Implemented and parsed: + - Paragraphs + - Code blocks + - Lists + - blockquote + - Inlines : strong, emph, cite, code, deleted, superscript, + subscript, links + - footnotes + +Implemented but discarded: + - HTML-specific and CSS-specific attributes + +Left to be implemented: + - dimension sign + - all caps + - continued blocks (ex bq..) + +TODO : refactor common patterns across readers : + - autolink + - smartPunctuation + - more ... + +-} + + +module Text.Pandoc.Readers.Textile ( readTextile) where + +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Parsing +import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) +import Text.ParserCombinators.Parsec +import Text.HTML.TagSoup.Match +import Data.Char ( digitToInt, isLetter ) +import Control.Monad ( guard, liftM ) + +-- | Parse a Textile text and return a Pandoc document. +readTextile :: ParserState -- ^ Parser state, including options for parser + -> String -- ^ String to parse (assuming @'\n'@ line endings) + -> Pandoc +readTextile state s = (readWith parseTextile) state (s ++ "\n\n") + + +-- +-- Constants and data structure definitions +-- + +-- | Special chars border strings parsing +specialChars :: [Char] +specialChars = "\\[]<>*#_@~-+^&,.;:!?|\"'%()" + +-- | Generate a Pandoc ADT from a textile document +parseTextile :: GenParser Char ParserState Pandoc +parseTextile = do + -- textile allows raw HTML and does smart punctuation by default + updateState (\state -> state { stateParseRaw = True, stateSmart = True }) + many blankline + startPos <- getPosition + -- go through once just to get list of reference keys and notes + -- docMinusKeys is the raw document with blanks where the keys/notes were... + let firstPassParser = noteBlock <|> lineClump + manyTill firstPassParser eof >>= setInput . concat + setPosition startPos + st' <- getState + let reversedNotes = stateNotes st' + updateState $ \s -> s { stateNotes = reverse reversedNotes } + -- now parse it for real... + blocks <- parseBlocks + return $ Pandoc (Meta [] [] []) blocks -- FIXME + +noteMarker :: GenParser Char ParserState [Char] +noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') + +noteBlock :: GenParser Char ParserState [Char] +noteBlock = try $ do + startPos <- getPosition + ref <- noteMarker + optional blankline + contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock) + endPos <- getPosition + let newnote = (ref, contents ++ "\n") + st <- getState + let oldnotes = stateNotes st + updateState $ \s -> s { stateNotes = newnote : oldnotes } + -- return blanks so line count isn't affected + return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + +-- | Parse document blocks +parseBlocks :: GenParser Char ParserState [Block] +parseBlocks = manyTill block eof + +-- | Block parsers list tried in definition order +blockParsers :: [GenParser Char ParserState Block] +blockParsers = [ codeBlock + , header + , blockQuote + , hrule + , anyList + , rawHtmlBlock + , maybeExplicitBlock "table" table + , maybeExplicitBlock "p" para + , nullBlock ] + +-- | Any block in the order of definition of blockParsers +block :: GenParser Char ParserState Block +block = choice blockParsers <?> "block" + +codeBlock :: GenParser Char ParserState Block +codeBlock = codeBlockBc <|> codeBlockPre + +codeBlockBc :: GenParser Char ParserState Block +codeBlockBc = try $ do + string "bc. " + contents <- manyTill anyLine blanklines + return $ CodeBlock ("",[],[]) $ unlines contents + +-- | Code Blocks in Textile are between <pre> and </pre> +codeBlockPre :: GenParser Char ParserState Block +codeBlockPre = try $ do + htmlTag (tagOpen (=="pre") null) + result' <- manyTill anyChar (try $ htmlTag (tagClose (=="pre")) >> blockBreak) + -- drop leading newline if any + let result'' = case result' of + '\n':xs -> xs + _ -> result' + -- drop trailing newline if any + let result''' = case reverse result'' of + '\n':_ -> init result'' + _ -> result'' + return $ CodeBlock ("",[],[]) result''' + +-- | Header of the form "hN. content" with N in 1..6 +header :: GenParser Char ParserState Block +header = try $ do + char 'h' + level <- oneOf "123456" >>= return . digitToInt + optional attributes + char '.' + whitespace + name <- manyTill inline blockBreak + return $ Header level (normalizeSpaces name) + +-- | Blockquote of the form "bq. content" +blockQuote :: GenParser Char ParserState Block +blockQuote = try $ do + string "bq" + optional attributes + char '.' + whitespace + para >>= return . BlockQuote . (:[]) + +-- Horizontal rule + +hrule :: GenParser Char st Block +hrule = try $ do + skipSpaces + start <- oneOf "-*" + count 2 (skipSpaces >> char start) + skipMany (spaceChar <|> char start) + newline + optional blanklines + return HorizontalRule + +-- Lists handling + +-- | Can be a bullet list or an ordered list. This implementation is +-- strict in the nesting, sublist must start at exactly "parent depth +-- plus one" +anyList :: GenParser Char ParserState Block +anyList = try $ do + l <- anyListAtDepth 1 + blanklines + return l + +-- | This allow one type of list to be nested into an other type, +-- provided correct nesting +anyListAtDepth :: Int -> GenParser Char ParserState Block +anyListAtDepth depth = choice [ bulletListAtDepth depth, + orderedListAtDepth depth, + definitionList ] + +-- | Bullet List of given depth, depth being the number of leading '*' +bulletListAtDepth :: Int -> GenParser Char ParserState Block +bulletListAtDepth depth = try $ do + items <- many1 (bulletListItemAtDepth depth) + return (BulletList items) + +-- | Bullet List Item of given depth, depth being the number of +-- leading '*' +bulletListItemAtDepth :: Int -> GenParser Char ParserState [Block] +bulletListItemAtDepth depth = try $ do + count depth (char '*') + optional attributes + whitespace + p <- inlines >>= return . Plain + sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[])) + return (p:sublist) + +-- | Ordered List of given depth, depth being the number of +-- leading '#' +orderedListAtDepth :: Int -> GenParser Char ParserState Block +orderedListAtDepth depth = try $ do + items <- many1 (orderedListItemAtDepth depth) + return (OrderedList (1, DefaultStyle, DefaultDelim) items) + +-- | Ordered List Item of given depth, depth being the number of +-- leading '#' +orderedListItemAtDepth :: Int -> GenParser Char ParserState [Block] +orderedListItemAtDepth depth = try $ do + count depth (char '#') + optional attributes + whitespace + p <- inlines >>= return . Plain + sublist <- option [] (anyListAtDepth (depth + 1) >>= return . (:[])) + return (p:sublist) + +-- | A definition list is a set of consecutive definition items +definitionList :: GenParser Char ParserState Block +definitionList = try $ do + items <- many1 definitionListItem + return $ DefinitionList items + +-- | A definition list item in textile begins with '- ', followed by +-- the term defined, then spaces and ":=". The definition follows, on +-- the same single line, or spaned on multiple line, after a line +-- break. +definitionListItem :: GenParser Char ParserState ([Inline], [[Block]]) +definitionListItem = try $ do + string "- " + term <- many1Till inline (try (whitespace >> string ":=")) + def <- inlineDef <|> multilineDef + return (term, def) + where inlineDef :: GenParser Char ParserState [[Block]] + inlineDef = liftM (\d -> [[Plain d]]) $ try (whitespace >> inlines) + multilineDef :: GenParser Char ParserState [[Block]] + multilineDef = try $ do + optional whitespace >> newline + s <- many1Till anyChar (try (string "=:" >> newline)) + -- this ++ "\n\n" does not look very good + ds <- parseFromString parseBlocks (s ++ "\n\n") + return [ds] + +-- | This terminates a block such as a paragraph. Because of raw html +-- blocks support, we have to lookAhead for a rawHtmlBlock. +blockBreak :: GenParser Char ParserState () +blockBreak = try (newline >> blanklines >> return ()) <|> + (lookAhead rawHtmlBlock >> return ()) + +-- | A raw Html Block, optionally followed by blanklines +rawHtmlBlock :: GenParser Char ParserState Block +rawHtmlBlock = try $ do + (_,b) <- htmlTag isBlockTag + optional blanklines + return $ RawBlock "html" b + +-- | In textile, paragraphs are separated by blank lines. +para :: GenParser Char ParserState Block +para = try $ do + content <- manyTill inline blockBreak + return $ Para $ normalizeSpaces content + + +-- Tables + +-- | A table cell spans until a pipe | +tableCell :: GenParser Char ParserState TableCell +tableCell = do + c <- many1 (noneOf "|\n") + content <- parseFromString (many1 inline) c + return $ [ Plain $ normalizeSpaces content ] + +-- | A table row is made of many table cells +tableRow :: GenParser Char ParserState [TableCell] +tableRow = try $ do + char '|' + cells <- endBy1 tableCell (char '|') + newline + return cells + +-- | Many table rows +tableRows :: GenParser Char ParserState [[TableCell]] +tableRows = many1 tableRow + +-- | Table headers are made of cells separated by a tag "|_." +tableHeaders :: GenParser Char ParserState [TableCell] +tableHeaders = try $ do + let separator = (try $ string "|_.") + separator + headers <- sepBy1 tableCell separator + char '|' + newline + return headers + +-- | A table with an optional header. Current implementation can +-- handle tables with and without header, but will parse cells +-- alignment attributes as content. +table :: GenParser Char ParserState Block +table = try $ do + headers <- option [] tableHeaders + rows <- tableRows + blanklines + let nbOfCols = max (length headers) (length $ head rows) + return $ Table [] + (replicate nbOfCols AlignDefault) + (replicate nbOfCols 0.0) + headers + rows + + +-- | Blocks like 'p' and 'table' do not need explicit block tag. +-- However, they can be used to set HTML/CSS attributes when needed. +maybeExplicitBlock :: String -- ^ block tag name + -> GenParser Char ParserState Block -- ^ implicit block + -> GenParser Char ParserState Block +maybeExplicitBlock name blk = try $ do + optional $ try $ string name >> optional attributes >> char '.' >> + ((try whitespace) <|> endline) + blk + + + +---------- +-- Inlines +---------- + + +-- | Any inline element +inline :: GenParser Char ParserState Inline +inline = choice inlineParsers <?> "inline" + +-- | List of consecutive inlines before a newline +inlines :: GenParser Char ParserState [Inline] +inlines = manyTill inline newline + +-- | Inline parsers tried in order +inlineParsers :: [GenParser Char ParserState Inline] +inlineParsers = [ autoLink + , str + , whitespace + , endline + , code + , htmlSpan + , rawHtmlInline + , note + , simpleInline (string "??") (Cite []) + , simpleInline (string "**") Strong + , simpleInline (string "__") Emph + , simpleInline (char '*') Strong + , simpleInline (char '_') Emph + , simpleInline (char '-') Strikeout + , simpleInline (char '^') Superscript + , simpleInline (char '~') Subscript + , link + , image + , mark + , smartPunctuation inline + , symbol + ] + +-- | Trademark, registered, copyright +mark :: GenParser Char st Inline +mark = try $ char '(' >> (try tm <|> try reg <|> copy) + +reg :: GenParser Char st Inline +reg = do + oneOf "Rr" + char ')' + return $ Str "\174" + +tm :: GenParser Char st Inline +tm = do + oneOf "Tt" + oneOf "Mm" + char ')' + return $ Str "\8482" + +copy :: GenParser Char st Inline +copy = do + oneOf "Cc" + char ')' + return $ Str "\169" + +note :: GenParser Char ParserState Inline +note = try $ do + char '[' + ref <- many1 digit + char ']' + state <- getState + let notes = stateNotes state + case lookup ref notes of + Nothing -> fail "note not found" + Just raw -> liftM Note $ parseFromString parseBlocks raw + +-- | Any string +str :: GenParser Char ParserState Inline +str = do + xs <- many1 (noneOf (specialChars ++ "\t\n ")) + optional $ try $ do + lookAhead (char '(') + notFollowedBy' mark + getInput >>= setInput . (' ':) -- add space before acronym explanation + -- parse a following hyphen if followed by a letter + -- (this prevents unwanted interpretation as starting a strikeout section) + result <- option xs $ try $ do + char '-' + next <- lookAhead letter + guard $ isLetter (last xs) || isLetter next + return $ xs ++ "-" + return $ Str result + +-- | Textile allows HTML span infos, we discard them +htmlSpan :: GenParser Char ParserState Inline +htmlSpan = try $ do + char '%' + _ <- attributes + content <- manyTill anyChar (char '%') + return $ Str content + +-- | Some number of space chars +whitespace :: GenParser Char ParserState Inline +whitespace = many1 spaceChar >> return Space <?> "whitespace" + +-- | In Textile, an isolated endline character is a line break +endline :: GenParser Char ParserState Inline +endline = try $ do + newline >> notFollowedBy blankline + return LineBreak + +rawHtmlInline :: GenParser Char ParserState Inline +rawHtmlInline = liftM (RawInline "html" . snd) + $ htmlTag isInlineTag + +-- | Textile standard link syntax is "label":target +link :: GenParser Char ParserState Inline +link = try $ do + name <- surrounded (char '"') inline + char ':' + url <- manyTill (anyChar) (lookAhead $ (space <|> try (oneOf ".;," >> (space <|> newline)))) + return $ Link name (url, "") + +-- | Detect plain links to http or email. +autoLink :: GenParser Char ParserState Inline +autoLink = do + (orig, src) <- (try uri <|> try emailAddress) + return $ Link [Str orig] (src, "") + +-- | image embedding +image :: GenParser Char ParserState Inline +image = try $ do + char '!' >> notFollowedBy space + src <- manyTill anyChar (lookAhead $ oneOf "!(") + alt <- option "" (try $ (char '(' >> manyTill anyChar (char ')'))) + char '!' + return $ Image [Str alt] (src, alt) + +-- | Any special symbol defined in specialChars +symbol :: GenParser Char ParserState Inline +symbol = do + result <- oneOf specialChars + return $ Str [result] + +-- | Inline code +code :: GenParser Char ParserState Inline +code = code1 <|> code2 + +code1 :: GenParser Char ParserState Inline +code1 = surrounded (char '@') anyChar >>= return . Code nullAttr + +code2 :: GenParser Char ParserState Inline +code2 = do + htmlTag (tagOpen (=="tt") null) + result' <- manyTill anyChar (try $ htmlTag $ tagClose (=="tt")) + return $ Code nullAttr result' + +-- | Html / CSS attributes +attributes :: GenParser Char ParserState String +attributes = choice [ enclosed (char '(') (char ')') anyChar, + enclosed (char '{') (char '}') anyChar, + enclosed (char '[') (char ']') anyChar] + +-- | Parses material surrounded by a parser. +surrounded :: GenParser Char st t -- ^ surrounding parser + -> GenParser Char st a -- ^ content parser (to be used repeatedly) + -> GenParser Char st [a] +surrounded border = enclosed border border + +-- | Inlines are most of the time of the same form +simpleInline :: GenParser Char ParserState t -- ^ surrounding parser + -> ([Inline] -> Inline) -- ^ Inline constructor + -> GenParser Char ParserState Inline -- ^ content parser (to be used repeatedly) +simpleInline border construct = surrounded border (inlineWithAttribute) >>= + return . construct . normalizeSpaces + where inlineWithAttribute = (try $ optional attributes) >> inline diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 633708046..b1d5de63f 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -46,17 +46,11 @@ module Text.Pandoc.Shared ( escapeURI, unescapeURI, tabFilter, - -- * Prettyprinting - wrapped, - wrapIfNeeded, - wrappedTeX, - wrapTeXIfNeeded, - BlockWrapper (..), - wrappedBlocksToDoc, - hang', -- * Pandoc block and inline list processing orderedListMarkers, normalizeSpaces, + normalize, + stringify, compactify, Element (..), hierarchicalize, @@ -65,19 +59,20 @@ module Text.Pandoc.Shared ( headerShift, -- * Writer options HTMLMathMethod (..), + CiteMethod (..), ObfuscationMethod (..), HTMLSlideVariant (..), WriterOptions (..), defaultWriterOptions, -- * File handling inDirectory, + findDataFile, readDataFile ) where import Text.Pandoc.Definition +import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (readFile) -import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest ) -import qualified Text.PrettyPrint.HughesPJ as PP import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii, isLetter, isDigit ) import Data.List ( find, isPrefixOf, intercalate ) @@ -94,12 +89,12 @@ import Paths_pandoc (getDataFileName) -- -- | Split list by groups of one or more sep. -splitBy :: (Eq a) => a -> [a] -> [[a]] +splitBy :: (a -> Bool) -> [a] -> [[a]] splitBy _ [] = [] -splitBy sep lst = - let (first, rest) = break (== sep) lst - rest' = dropWhile (== sep) rest - in first:(splitBy sep rest') +splitBy isSep lst = + let (first, rest) = break isSep lst + rest' = dropWhile isSep rest + in first:(splitBy isSep rest') -- | Split list into chunks divided at specified indices. splitByIndices :: [Int] -> [a] -> [[a]] @@ -218,83 +213,6 @@ tabFilter tabStop = in go tabStop -- --- Prettyprinting --- - --- | Wrap inlines to line length. -wrapped :: Monad m => ([Inline] -> m Doc) -> [Inline] -> m Doc -wrapped listWriter sect = (mapM listWriter $ splitBy Space sect) >>= - return . fsep - --- | Wrap inlines if the text wrap option is selected. -wrapIfNeeded :: Monad m => WriterOptions -> ([Inline] -> m Doc) -> - [Inline] -> m Doc -wrapIfNeeded opts = if writerWrapText opts - then wrapped - else ($) - --- auxiliary function for wrappedTeX -isNote :: Inline -> Bool -isNote (Note _) = True -isNote _ = False - --- | Wrap inlines to line length, treating footnotes in a way that --- makes sense in LaTeX and ConTeXt. -wrappedTeX :: Monad m - => Bool - -> ([Inline] -> m Doc) - -> [Inline] - -> m Doc -wrappedTeX includePercent listWriter sect = do - let (firstpart, rest) = break isNote sect - firstpartWrapped <- wrapped listWriter firstpart - if null rest - then return firstpartWrapped - else do let (note:rest') = rest - let (rest1, rest2) = break (== Space) rest' - -- rest1 is whatever comes between the note and a Space. - -- if the note is followed directly by a Space, rest1 is null. - -- rest1 is printed after the note but before the line break, - -- to avoid spurious blank space the note and immediately - -- following punctuation. - rest1Out <- if null rest1 - then return empty - else listWriter rest1 - rest2Wrapped <- if null rest2 - then return empty - else wrappedTeX includePercent listWriter (tail rest2) - noteText <- listWriter [note] - return $ (firstpartWrapped <> if includePercent then PP.char '%' else empty) $$ - (noteText <> rest1Out) $$ - rest2Wrapped - --- | Wrap inlines if the text wrap option is selected, specialized --- for LaTeX and ConTeXt. -wrapTeXIfNeeded :: Monad m - => WriterOptions - -> Bool - -> ([Inline] -> m Doc) - -> [Inline] - -> m Doc -wrapTeXIfNeeded opts includePercent = if writerWrapText opts - then wrappedTeX includePercent - else ($) - --- | Indicates whether block should be surrounded by blank lines (@Pad@) or not (@Reg@). -data BlockWrapper = Pad Doc | Reg Doc - --- | Converts a list of wrapped blocks to a Doc, with appropriate spaces around blocks. -wrappedBlocksToDoc :: [BlockWrapper] -> Doc -wrappedBlocksToDoc = foldr addBlock empty - where addBlock (Pad d) accum | isEmpty accum = d - addBlock (Pad d) accum = d $$ text "" $$ accum - addBlock (Reg d) accum = d $$ accum - --- | A version of hang that works like the version in pretty-1.0.0.0 -hang' :: Doc -> Int -> Doc -> Doc -hang' d1 n d2 = d1 $$ (nest n d2) - --- -- Pandoc block and inline list processing -- @@ -324,20 +242,96 @@ orderedListMarkers (start, numstyle, numdelim) = -- @Space@ elements, collapse double @Space@s into singles, and -- remove empty Str elements. normalizeSpaces :: [Inline] -> [Inline] -normalizeSpaces [] = [] -normalizeSpaces list = - let removeDoubles [] = [] - removeDoubles (Space:Space:rest) = removeDoubles (Space:rest) - removeDoubles (Space:(Str ""):Space:rest) = removeDoubles (Space:rest) - removeDoubles ((Str ""):rest) = removeDoubles rest - removeDoubles (x:rest) = x:(removeDoubles rest) - removeLeading (Space:xs) = removeLeading xs - removeLeading x = x - removeTrailing [] = [] - removeTrailing lst = if (last lst == Space) - then init lst - else lst - in removeLeading $ removeTrailing $ removeDoubles list +normalizeSpaces = cleanup . dropWhile isSpaceOrEmpty + where isSpaceOrEmpty Space = True + isSpaceOrEmpty (Str "") = True + isSpaceOrEmpty _ = False + cleanup [] = [] + cleanup (Space:rest) = let rest' = dropWhile isSpaceOrEmpty rest + in case rest' of + [] -> [] + _ -> Space : cleanup rest' + cleanup ((Str ""):rest) = cleanup rest + cleanup (x:rest) = x : cleanup rest + +-- | Normalize @Pandoc@ document, consolidating doubled 'Space's, +-- combining adjacent 'Str's and 'Emph's, remove 'Null's and +-- empty elements, etc. +normalize :: (Eq a, Data a) => a -> a +normalize = topDown removeEmptyBlocks . + topDown consolidateInlines . + bottomUp removeEmptyInlines + +removeEmptyBlocks :: [Block] -> [Block] +removeEmptyBlocks (Null : xs) = removeEmptyBlocks xs +removeEmptyBlocks (BulletList [] : xs) = removeEmptyBlocks xs +removeEmptyBlocks (OrderedList _ [] : xs) = removeEmptyBlocks xs +removeEmptyBlocks (DefinitionList [] : xs) = removeEmptyBlocks xs +removeEmptyBlocks (RawBlock _ [] : xs) = removeEmptyBlocks xs +removeEmptyBlocks (x:xs) = x : removeEmptyBlocks xs +removeEmptyBlocks [] = [] + +removeEmptyInlines :: [Inline] -> [Inline] +removeEmptyInlines (Emph [] : zs) = removeEmptyInlines zs +removeEmptyInlines (Strong [] : zs) = removeEmptyInlines zs +removeEmptyInlines (Subscript [] : zs) = removeEmptyInlines zs +removeEmptyInlines (Superscript [] : zs) = removeEmptyInlines zs +removeEmptyInlines (SmallCaps [] : zs) = removeEmptyInlines zs +removeEmptyInlines (Strikeout [] : zs) = removeEmptyInlines zs +removeEmptyInlines (RawInline _ [] : zs) = removeEmptyInlines zs +removeEmptyInlines (Code _ [] : zs) = removeEmptyInlines zs +removeEmptyInlines (Str "" : zs) = removeEmptyInlines zs +removeEmptyInlines (x : xs) = x : removeEmptyInlines xs +removeEmptyInlines [] = [] + +consolidateInlines :: [Inline] -> [Inline] +consolidateInlines (Str x : ys) = + case concat (x : map fromStr strs) of + "" -> consolidateInlines rest + n -> Str n : consolidateInlines rest + where + (strs, rest) = span isStr ys + isStr (Str _) = True + isStr _ = False + fromStr (Str z) = z + fromStr _ = error "consolidateInlines - fromStr - not a Str" +consolidateInlines (Space : ys) = Space : rest + where isSpace Space = True + isSpace _ = False + rest = consolidateInlines $ dropWhile isSpace ys +consolidateInlines (Emph xs : Emph ys : zs) = consolidateInlines $ + Emph (xs ++ ys) : zs +consolidateInlines (Strong xs : Strong ys : zs) = consolidateInlines $ + Strong (xs ++ ys) : zs +consolidateInlines (Subscript xs : Subscript ys : zs) = consolidateInlines $ + Subscript (xs ++ ys) : zs +consolidateInlines (Superscript xs : Superscript ys : zs) = consolidateInlines $ + Superscript (xs ++ ys) : zs +consolidateInlines (SmallCaps xs : SmallCaps ys : zs) = consolidateInlines $ + SmallCaps (xs ++ ys) : zs +consolidateInlines (Strikeout xs : Strikeout ys : zs) = consolidateInlines $ + Strikeout (xs ++ ys) : zs +consolidateInlines (RawInline f x : RawInline f' y : zs) | f == f' = + consolidateInlines $ RawInline f (x ++ y) : zs +consolidateInlines (Code a1 x : Code a2 y : zs) | a1 == a2 = + consolidateInlines $ Code a1 (x ++ y) : zs +consolidateInlines (x : xs) = x : consolidateInlines xs +consolidateInlines [] = [] + +-- | Convert list of inlines to a string with formatting removed. +stringify :: [Inline] -> String +stringify = queryWith go + where go :: Inline -> [Char] + go Space = " " + go (Str x) = x + go (Code _ x) = x + go (Math _ x) = x + go EmDash = "--" + go EnDash = "-" + go Apostrophe = "'" + go Ellipses = "..." + go LineBreak = " " + go _ = "" -- | Change final list item from @Para@ to @Plain@ if the list contains -- no other @Para@ blocks. @@ -370,32 +364,12 @@ data Element = Blk Block -- letters, digits, and the characters _-. inlineListToIdentifier :: [Inline] -> String inlineListToIdentifier = - dropWhile (not . isAlpha) . intercalate "-" . words . map toLower . - filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . - concatMap extractText - where extractText x = case x of - Str s -> s - Emph lst -> concatMap extractText lst - Strikeout lst -> concatMap extractText lst - Superscript lst -> concatMap extractText lst - SmallCaps lst -> concatMap extractText lst - Subscript lst -> concatMap extractText lst - Strong lst -> concatMap extractText lst - Quoted _ lst -> concatMap extractText lst - Cite _ lst -> concatMap extractText lst - Code s -> s - Space -> " " - EmDash -> "---" - EnDash -> "--" - Apostrophe -> "" - Ellipses -> "..." - LineBreak -> " " - Math _ s -> s - TeX _ -> "" - HtmlInline _ -> "" - Link lst _ -> concatMap extractText lst - Image lst _ -> concatMap extractText lst - Note _ -> "" + dropWhile (not . isAlpha) . intercalate "-" . words . + map (nbspToSp . toLower) . + filter (\c -> isLetter c || isDigit c || c `elem` "_-. ") . + stringify + where nbspToSp '\160' = ' ' + nbspToSp x = x -- | Convert list of Pandoc blocks into (hierarchical) list of Elements hierarchicalize :: [Block] -> [Element] @@ -444,7 +418,7 @@ isHeaderBlock _ = False -- | Shift header levels up or down. headerShift :: Int -> Pandoc -> Pandoc -headerShift n = processWith shift +headerShift n = bottomUp shift where shift :: Block -> Block shift (Header level inner) = Header (level + n) inner shift x = x @@ -459,8 +433,14 @@ data HTMLMathMethod = PlainMath | GladTeX | WebTeX String -- url of TeX->image script. | MathML (Maybe String) -- url of MathMLinHTML.js + | MathJax String -- url of MathJax.js deriving (Show, Read, Eq) +data CiteMethod = Citeproc -- use citeproc to render them + | Natbib -- output natbib cite commands + | Biblatex -- output biblatex cite commands + deriving (Show, Read, Eq) + -- | Methods for obfuscating email addresses in HTML. data ObfuscationMethod = NoObfuscation | ReferenceObfuscation @@ -491,11 +471,17 @@ data WriterOptions = WriterOptions , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst , writerWrapText :: Bool -- ^ Wrap text to line length + , writerColumns :: Int -- ^ Characters in a line (for text wrapping) , writerLiterateHaskell :: Bool -- ^ Write as literate haskell , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails , writerIdentifierPrefix :: String -- ^ Prefix for section & note ids in HTML , writerSourceDirectory :: FilePath -- ^ Directory path of 1st source file , writerUserDataDir :: Maybe FilePath -- ^ Path of user data directory + , writerCiteMethod :: CiteMethod -- ^ How to print cites + , writerBiblioFiles :: [FilePath] -- ^ Biblio files to use for citations + , writerHtml5 :: Bool -- ^ Produce HTML5 + , writerChapters :: Bool -- ^ Use "chapter" for top-level sects + , writerListings :: Bool -- ^ Use listings package for code } deriving Show -- | Default writer options. @@ -517,11 +503,17 @@ defaultWriterOptions = , writerStrictMarkdown = False , writerReferenceLinks = False , writerWrapText = True + , writerColumns = 72 , writerLiterateHaskell = False , writerEmailObfuscation = JavascriptObfuscation , writerIdentifierPrefix = "" , writerSourceDirectory = "." , writerUserDataDir = Nothing + , writerCiteMethod = Citeproc + , writerBiblioFiles = [] + , writerHtml5 = False + , writerChapters = False + , writerListings = False } -- @@ -537,11 +529,17 @@ inDirectory path action = do setCurrentDirectory oldDir return result +-- | Get file path for data file, either from specified user data directory, +-- or, if not found there, from Cabal data directory. +findDataFile :: Maybe FilePath -> FilePath -> IO FilePath +findDataFile Nothing f = getDataFileName f +findDataFile (Just u) f = do + ex <- doesFileExist (u </> f) + if ex + then return (u </> f) + else getDataFileName f + -- | Read file from specified user data directory or, if not found there, from -- Cabal data directory. readDataFile :: Maybe FilePath -> FilePath -> IO String -readDataFile userDir fname = - case userDir of - Nothing -> getDataFileName fname >>= UTF8.readFile - Just u -> catch (UTF8.readFile $ u </> fname) - (\_ -> getDataFileName fname >>= UTF8.readFile) +readDataFile userDir fname = findDataFile userDir fname >>= UTF8.readFile diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index c8ddc3abf..b03e8c73f 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -72,7 +72,6 @@ import Text.ParserCombinators.Parsec import Control.Monad (liftM, when, forM) import System.FilePath import Data.List (intercalate, intersperse) -import Text.PrettyPrint (text, Doc) import Text.XHtml (primHtml, Html) import Data.ByteString.Lazy.UTF8 (ByteString, fromString) import Text.Pandoc.Shared (readDataFile) @@ -112,9 +111,6 @@ instance TemplateTarget ByteString where instance TemplateTarget Html where toTarget = primHtml -instance TemplateTarget Doc where - toTarget = text - -- | Renders a template renderTemplate :: TemplateTarget a => [(String,String)] -- ^ Assoc. list of values for variables diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 96d6e6218..a77f92cdc 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -25,7 +25,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Stability : alpha Portability : portable -UTF-8 aware string IO functions that will work with GHC 6.10 or 6.12. +UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. -} module Text.Pandoc.UTF8 ( readFile , writeFile @@ -34,15 +34,54 @@ module Text.Pandoc.UTF8 ( readFile , putStrLn , hPutStr , hPutStrLn + , hGetContents ) where + +#if MIN_VERSION_base(4,2,0) + +import System.IO hiding (readFile, writeFile, getContents, + putStr, putStrLn, hPutStr, hPutStrLn, hGetContents) +import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn ) +import qualified System.IO as IO + +readFile :: FilePath -> IO String +readFile f = do + h <- openFile f ReadMode + hGetContents h + +writeFile :: FilePath -> String -> IO () +writeFile f s = withFile f WriteMode $ \h -> hPutStr h s + +getContents :: IO String +getContents = hGetContents stdin + +putStr :: String -> IO () +putStr s = hPutStr stdout s + +putStrLn :: String -> IO () +putStrLn s = hPutStrLn stdout s + +hPutStr :: Handle -> String -> IO () +hPutStr h s = hSetEncoding h utf8 >> IO.hPutStr h s + +hPutStrLn :: Handle -> String -> IO () +hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s + +hGetContents :: Handle -> IO String +hGetContents h = hSetEncoding h utf8_bom >> IO.hGetContents h + +#else + import qualified Data.ByteString as B +import Codec.Binary.UTF8.String (encodeString) import Data.ByteString.UTF8 (toString, fromString) import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn) import System.IO (Handle) import Control.Monad (liftM) + bom :: B.ByteString bom = B.pack [0xEF, 0xBB, 0xBF] @@ -51,14 +90,17 @@ stripBOM s | bom `B.isPrefixOf` s = B.drop 3 s stripBOM s = s readFile :: FilePath -> IO String -readFile = liftM (toString . stripBOM) . B.readFile +readFile = liftM (toString . stripBOM) . B.readFile . encodeString writeFile :: FilePath -> String -> IO () -writeFile f = B.writeFile f . fromString +writeFile f = B.writeFile (encodeString f) . fromString getContents :: IO String getContents = liftM (toString . stripBOM) B.getContents +hGetContents :: Handle -> IO String +hGetContents h = liftM (toString . stripBOM) (B.hGetContents h) + putStr :: String -> IO () putStr = B.putStr . fromString @@ -70,3 +112,5 @@ hPutStr h = B.hPutStr h . fromString hPutStrLn :: Handle -> String -> IO () hPutStrLn h s = hPutStr h (s ++ "\n") + +#endif diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 395bc2d30..0f6e00a3b 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2007-2010 John MacFarlane <jgm@berkeley.edu> @@ -31,9 +32,9 @@ module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Printf ( printf ) -import Data.List ( isSuffixOf, intercalate, intersperse ) +import Data.List ( intercalate ) import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Pandoc.Pretty import Text.Pandoc.Templates ( renderTemplate ) data WriterState = @@ -56,15 +57,18 @@ writeConTeXt options document = pandocToConTeXt :: WriterOptions -> Pandoc -> State WriterState String pandocToConTeXt options (Pandoc (Meta title authors date) blocks) = do + let colwidth = if writerWrapText options + then Just $ writerColumns options + else Nothing titletext <- if null title then return "" - else liftM render $ inlineListToConTeXt title - authorstext <- mapM (liftM render . inlineListToConTeXt) authors + else liftM (render colwidth) $ inlineListToConTeXt title + authorstext <- mapM (liftM (render colwidth) . inlineListToConTeXt) authors datetext <- if null date then return "" - else liftM render $ inlineListToConTeXt date - body <- blockListToConTeXt blocks - let main = render $ body $$ text "" + else liftM (render colwidth) $ inlineListToConTeXt date + body <- blockListToConTeXt blocks + let main = render colwidth $ body let context = writerVariables options ++ [ ("toc", if writerTableOfContents options then "yes" else "") , ("body", main) @@ -92,6 +96,8 @@ escapeCharForConTeXt ch = '#' -> "\\#" '<' -> "\\letterless{}" '>' -> "\\lettermore{}" + '[' -> "{[}" + ']' -> "{]}" '_' -> "\\letterunderscore{}" '\160' -> "~" x -> [x] @@ -102,32 +108,27 @@ stringToConTeXt = concatMap escapeCharForConTeXt -- | Convert Pandoc block element to ConTeXt. blockToConTeXt :: Block - -> State WriterState BlockWrapper -blockToConTeXt Null = return $ Reg empty -blockToConTeXt (Plain lst) = do - st <- get - let options = stOptions st - contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst - return $ Reg contents + -> State WriterState Doc +blockToConTeXt Null = return empty +blockToConTeXt (Plain lst) = inlineListToConTeXt lst blockToConTeXt (Para [Image txt (src,_)]) = do capt <- inlineListToConTeXt txt - return $ Pad $ text "\\placefigure[here,nonumber]{" <> capt <> - text "}{\\externalfigure[" <> text src <> text "]}" + return $ blankline $$ "\\placefigure[here,nonumber]" <> braces capt <> + braces ("\\externalfigure" <> brackets (text src)) <> blankline blockToConTeXt (Para lst) = do - st <- get - let options = stOptions st - contents <- wrapTeXIfNeeded options False inlineListToConTeXt lst - return $ Pad contents + contents <- inlineListToConTeXt lst + return $ contents <> blankline blockToConTeXt (BlockQuote lst) = do contents <- blockListToConTeXt lst - return $ Pad $ text "\\startblockquote" $$ contents $$ text "\\stopblockquote" -blockToConTeXt (CodeBlock _ str) = - return $ Reg $ text $ "\\starttyping\n" ++ str ++ "\n\\stoptyping\n" - -- \n because \stoptyping can't have anything after it, inc. } -blockToConTeXt (RawHtml _) = return $ Reg empty -blockToConTeXt (BulletList lst) = do + return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline +blockToConTeXt (CodeBlock _ str) = + return $ "\\starttyping" <> cr <> flush (text str) <> cr <> "\\stoptyping" $$ blankline + -- blankline because \stoptyping can't have anything after it, inc. '}' +blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline +blockToConTeXt (RawBlock _ _ ) = return empty +blockToConTeXt (BulletList lst) = do contents <- mapM listItemToConTeXt lst - return $ Pad $ text "\\startitemize" $$ vcat contents $$ text "\\stopitemize" + return $ "\\startitemize" $$ vcat contents $$ text "\\stopitemize" <> blankline blockToConTeXt (OrderedList (start, style', delim) lst) = do st <- get let level = stOrderedListLevel st @@ -159,20 +160,23 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do LowerAlpha -> "[a]" UpperAlpha -> "[A]" let specs = style'' ++ specs2 - return $ Pad $ text ("\\startitemize" ++ specs) $$ vcat contents $$ - text "\\stopitemize" + return $ "\\startitemize" <> text specs $$ vcat contents $$ + "\\stopitemize" <> blankline blockToConTeXt (DefinitionList lst) = - mapM defListItemToConTeXt lst >>= return . Pad . wrappedBlocksToDoc -blockToConTeXt HorizontalRule = return $ Pad $ text "\\thinrule" + liftM vcat $ mapM defListItemToConTeXt lst +blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline blockToConTeXt (Header level lst) = do contents <- inlineListToConTeXt lst st <- get let opts = stOptions st let base = if writerNumberSections opts then "section" else "subject" - return $ Pad $ if level >= 1 && level <= 5 - then char '\\' <> text (concat (replicate (level - 1) "sub")) <> - text base <> char '{' <> contents <> char '}' - else contents + let level' = if writerChapters opts then level - 1 else level + return $ if level' >= 1 && level' <= 5 + then char '\\' <> text (concat (replicate (level' - 1) "sub")) <> + text base <> char '{' <> contents <> char '}' <> blankline + else if level' == 0 + then "\\chapter{" <> contents <> "}" + else contents <> blankline blockToConTeXt (Table caption aligns widths heads rows) = do let colDescriptor colWidth alignment = (case alignment of AlignLeft -> 'l' @@ -186,81 +190,87 @@ blockToConTeXt (Table caption aligns widths heads rows) = do zipWith colDescriptor widths aligns) headers <- if all null heads then return empty - else liftM ($$ text "\\HL") $ tableRowToConTeXt heads + else liftM ($$ "\\HL") $ tableRowToConTeXt heads captionText <- inlineListToConTeXt caption let captionText' = if null caption then text "none" else captionText rows' <- mapM tableRowToConTeXt rows - return $ Pad $ text "\\placetable[here]{" <> captionText' <> char '}' $$ - text "\\starttable[" <> text colDescriptors <> char ']' $$ - text "\\HL" $$ headers $$ - vcat rows' $$ text "\\HL\n\\stoptable" + return $ "\\placetable[here]" <> braces captionText' $$ + "\\starttable" <> brackets (text colDescriptors) $$ + "\\HL" $$ headers $$ + vcat rows' $$ "\\HL" $$ "\\stoptable" <> blankline tableRowToConTeXt :: [[Block]] -> State WriterState Doc tableRowToConTeXt cols = do cols' <- mapM blockListToConTeXt cols - return $ (vcat (map (text "\\NC " <>) cols')) $$ - text "\\NC\\AR" + return $ (vcat (map ("\\NC " <>) cols')) $$ "\\NC\\AR" listItemToConTeXt :: [Block] -> State WriterState Doc listItemToConTeXt list = blockListToConTeXt list >>= - return . (text "\\item" $$) . (nest 2) + return . ("\\item" $$) . (nest 2) -defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState BlockWrapper +defListItemToConTeXt :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToConTeXt (term, defs) = do term' <- inlineListToConTeXt term - def' <- liftM (vcat . intersperse (text "")) $ mapM blockListToConTeXt defs - return $ Pad $ text "\\startdescr{" <> term' <> char '}' $$ def' $$ text "\\stopdescr" + def' <- liftM vsep $ mapM blockListToConTeXt defs + return $ "\\startdescr" <> braces term' $$ nest 2 def' $$ + "\\stopdescr" <> blankline -- | Convert list of block elements to ConTeXt. blockListToConTeXt :: [Block] -> State WriterState Doc -blockListToConTeXt lst = mapM blockToConTeXt lst >>= return . wrappedBlocksToDoc +blockListToConTeXt lst = liftM vcat $ mapM blockToConTeXt lst -- | Convert list of inline elements to ConTeXt. inlineListToConTeXt :: [Inline] -- ^ Inlines to convert -> State WriterState Doc -inlineListToConTeXt lst = mapM inlineToConTeXt lst >>= return . hcat +inlineListToConTeXt lst = liftM hcat $ mapM inlineToConTeXt lst -- | Convert inline element to ConTeXt inlineToConTeXt :: Inline -- ^ Inline to convert -> State WriterState Doc inlineToConTeXt (Emph lst) = do contents <- inlineListToConTeXt lst - return $ text "{\\em " <> contents <> char '}' + return $ braces $ "\\em " <> contents inlineToConTeXt (Strong lst) = do contents <- inlineListToConTeXt lst - return $ text "{\\bf " <> contents <> char '}' + return $ braces $ "\\bf " <> contents inlineToConTeXt (Strikeout lst) = do contents <- inlineListToConTeXt lst - return $ text "\\overstrikes{" <> contents <> char '}' + return $ "\\overstrikes" <> braces contents inlineToConTeXt (Superscript lst) = do contents <- inlineListToConTeXt lst - return $ text "\\high{" <> contents <> char '}' + return $ "\\high" <> braces contents inlineToConTeXt (Subscript lst) = do contents <- inlineListToConTeXt lst - return $ text "\\low{" <> contents <> char '}' + return $ "\\low" <> braces contents inlineToConTeXt (SmallCaps lst) = do contents <- inlineListToConTeXt lst - return $ text "{\\sc " <> contents <> char '}' -inlineToConTeXt (Code str) = return $ text $ "\\type{" ++ str ++ "}" + return $ braces $ "\\sc " <> contents +inlineToConTeXt (Code _ str) | not ('{' `elem` str || '}' `elem` str) = + return $ "\\type" <> braces (text str) +inlineToConTeXt (Code _ str) = + return $ "\\mono" <> braces (text $ stringToConTeXt str) inlineToConTeXt (Quoted SingleQuote lst) = do contents <- inlineListToConTeXt lst - return $ text "\\quote{" <> contents <> char '}' + return $ "\\quote" <> braces contents inlineToConTeXt (Quoted DoubleQuote lst) = do contents <- inlineListToConTeXt lst - return $ text "\\quotation{" <> contents <> char '}' + return $ "\\quotation" <> braces contents inlineToConTeXt (Cite _ lst) = inlineListToConTeXt lst inlineToConTeXt Apostrophe = return $ char '\'' -inlineToConTeXt EmDash = return $ text "---" -inlineToConTeXt EnDash = return $ text "--" -inlineToConTeXt Ellipses = return $ text "\\ldots{}" +inlineToConTeXt EmDash = return "---" +inlineToConTeXt EnDash = return "--" +inlineToConTeXt Ellipses = return "\\ldots{}" inlineToConTeXt (Str str) = return $ text $ stringToConTeXt str -inlineToConTeXt (Math InlineMath str) = return $ char '$' <> text str <> char '$' -inlineToConTeXt (Math DisplayMath str) = return $ text "\\startformula " <> text str <> text " \\stopformula" -inlineToConTeXt (TeX str) = return $ text str -inlineToConTeXt (HtmlInline _) = return empty -inlineToConTeXt (LineBreak) = return $ text "\\crlf\n" -inlineToConTeXt Space = return $ char ' ' -inlineToConTeXt (Link [Code str] (src, tit)) = -- since ConTeXt has its own +inlineToConTeXt (Math InlineMath str) = + return $ char '$' <> text str <> char '$' +inlineToConTeXt (Math DisplayMath str) = + return $ text "\\startformula " <> text str <> text " \\stopformula" +inlineToConTeXt (RawInline "context" str) = return $ text str +inlineToConTeXt (RawInline "tex" str) = return $ text str +inlineToConTeXt (RawInline _ _) = return empty +inlineToConTeXt (LineBreak) = return $ text "\\crlf" <> cr +inlineToConTeXt Space = return space +inlineToConTeXt (Link [Code _ str] (src, tit)) = -- since ConTeXt has its own inlineToConTeXt (Link [Str str] (src, tit)) -- way of printing links... inlineToConTeXt (Link txt (src, _)) = do st <- get @@ -268,15 +278,12 @@ inlineToConTeXt (Link txt (src, _)) = do put $ st {stNextRef = next + 1} let ref = show next label <- inlineListToConTeXt txt - return $ text "\\useURL[" <> text ref <> text "][" <> text src <> - text "][][" <> label <> text "]\\from[" <> text ref <> char ']' + return $ "\\useURL" <> brackets (text ref) <> brackets (text src) <> + brackets empty <> brackets label <> + "\\from" <> brackets (text ref) inlineToConTeXt (Image _ (src, _)) = do - return $ text "{\\externalfigure[" <> text src <> text "]}" + return $ braces $ "\\externalfigure" <> brackets (text src) inlineToConTeXt (Note contents) = do contents' <- blockListToConTeXt contents - let rawnote = stripTrailingNewlines $ render contents' - -- note: a \n before } is needed when note ends with a \stoptyping - let optNewline = "\\stoptyping" `isSuffixOf` rawnote - return $ text "\\footnote{" <> - text rawnote <> (if optNewline then char '\n' else empty) <> char '}' - + return $ text "\\footnote{" <> + nest 2 contents' <> char '}' diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 5223259eb..9d09d46e3 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -33,15 +33,15 @@ import Text.Pandoc.XML import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath -import Data.List ( isPrefixOf, intercalate ) +import Data.List ( isPrefixOf, intercalate, isSuffixOf ) import Data.Char ( toLower ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.Pretty -- | Convert list of authors to a docbook <author> section authorToDocbook :: WriterOptions -> [Inline] -> Doc authorToDocbook opts name' = - let name = render $ inlinesToDocbook opts name' + let name = render Nothing $ inlinesToDocbook opts name' in if ',' `elem` name then -- last name first let (lastname, rest) = break (==',') name @@ -61,16 +61,24 @@ authorToDocbook opts name' = -- | Convert Pandoc document to string in Docbook format. writeDocbook :: WriterOptions -> Pandoc -> String writeDocbook opts (Pandoc (Meta tit auths dat) blocks) = - let title = wrap opts tit + let title = inlinesToDocbook opts tit authors = map (authorToDocbook opts) auths date = inlinesToDocbook opts dat elements = hierarchicalize blocks - main = render $ vcat (map (elementToDocbook opts) elements) + colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + render' = render colwidth + opts' = if "</book>" `isSuffixOf` + (removeTrailingSpace $ writerTemplate opts) + then opts{ writerChapters = True } + else opts + main = render' $ vcat (map (elementToDocbook opts') elements) context = writerVariables opts ++ [ ("body", main) - , ("title", render title) - , ("date", render date) ] ++ - [ ("author", render a) | a <- authors ] + , ("title", render' title) + , ("date", render' date) ] ++ + [ ("author", render' a) | a <- authors ] in if writerStandalone opts then renderTemplate context $ writerTemplate opts else main @@ -83,9 +91,12 @@ elementToDocbook opts (Sec _ _num id' title elements) = let elements' = if null elements then [Blk (Para [])] else elements - in inTags True "section" [("id",id')] $ - inTagsSimple "title" (wrap opts title) $$ - vcat (map (elementToDocbook opts) elements') + tag = if writerChapters opts + then "chapter" + else "section" + in inTags True tag [("id",id')] $ + inTagsSimple "title" (inlinesToDocbook opts title) $$ + vcat (map (elementToDocbook opts{ writerChapters = False }) elements') -- | Convert a list of Pandoc blocks to Docbook. blocksToDocbook :: WriterOptions -> [Block] -> Doc @@ -123,7 +134,7 @@ listItemToDocbook opts item = blockToDocbook :: WriterOptions -> Block -> Doc blockToDocbook _ Null = empty blockToDocbook _ (Header _ _) = empty -- should not occur after hierarchicalize -blockToDocbook opts (Plain lst) = wrap opts lst +blockToDocbook opts (Plain lst) = inlinesToDocbook opts lst blockToDocbook opts (Para [Image txt (src,_)]) = let capt = inlinesToDocbook opts txt in inTagsIndented "figure" $ @@ -132,12 +143,13 @@ blockToDocbook opts (Para [Image txt (src,_)]) = (inTagsIndented "imageobject" (selfClosingTag "imagedata" [("fileref",src)])) $$ inTagsSimple "textobject" (inTagsSimple "phrase" capt)) -blockToDocbook opts (Para lst) = inTagsIndented "para" $ wrap opts lst +blockToDocbook opts (Para lst) = + inTagsIndented "para" $ inlinesToDocbook opts lst blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" $ blocksToDocbook opts blocks blockToDocbook _ (CodeBlock (_,classes,_) str) = - text ("<screen" ++ lang ++ ">\n") <> - text (escapeStringForXML str) <> text "\n</screen>" + text ("<screen" ++ lang ++ ">") <> cr <> + flush (text (escapeStringForXML str) <> cr <> text "</screen>") where lang = if null langs then "" else " language=\"" ++ escapeStringForXML (head langs) ++ @@ -167,7 +179,10 @@ blockToDocbook opts (OrderedList (start, numstyle, _) (first:rest)) = in inTags True "orderedlist" attribs items blockToDocbook opts (DefinitionList lst) = inTagsIndented "variablelist" $ deflistItemsToDocbook opts lst -blockToDocbook _ (RawHtml str) = text str -- raw XML block +blockToDocbook _ (RawBlock "docbook" str) = text str -- raw XML block +-- we allow html for compatibility with earlier versions of pandoc +blockToDocbook _ (RawBlock "html" str) = text str -- raw XML block +blockToDocbook _ (RawBlock _ _) = empty blockToDocbook _ HorizontalRule = empty -- not semantic blockToDocbook opts (Table caption aligns widths headers rows) = let alignStrings = map alignmentToString aligns @@ -214,12 +229,6 @@ tableItemToDocbook opts tag align item = let attrib = [("align", align)] in inTags True tag attrib $ vcat $ map (blockToDocbook opts) item --- | Take list of inline elements and return wrapped doc. -wrap :: WriterOptions -> [Inline] -> Doc -wrap opts lst = if writerWrapText opts - then fsep $ map (inlinesToDocbook opts) (splitBy Space lst) - else inlinesToDocbook opts lst - -- | Convert a list of inline elements to Docbook. inlinesToDocbook :: WriterOptions -> [Inline] -> Doc inlinesToDocbook opts lst = hcat $ map (inlineToDocbook opts) lst @@ -249,22 +258,21 @@ inlineToDocbook _ Apostrophe = char '\'' inlineToDocbook _ Ellipses = text "…" inlineToDocbook _ EmDash = text "—" inlineToDocbook _ EnDash = text "–" -inlineToDocbook _ (Code str) = +inlineToDocbook _ (Code _ str) = inTagsSimple "literal" $ text (escapeStringForXML str) inlineToDocbook opts (Math _ str) = inlinesToDocbook opts $ readTeXMath str -inlineToDocbook _ (TeX _) = empty -inlineToDocbook _ (HtmlInline _) = empty -inlineToDocbook _ LineBreak = text $ "<literallayout></literallayout>" -inlineToDocbook _ Space = char ' ' +inlineToDocbook _ (RawInline _ _) = empty +inlineToDocbook _ LineBreak = inTagsSimple "literallayout" empty +inlineToDocbook _ Space = space inlineToDocbook opts (Link txt (src, _)) = if isPrefixOf "mailto:" src then let src' = drop 7 src emailLink = inTagsSimple "email" $ text $ escapeStringForXML $ src' - in if txt == [Code src'] - then emailLink - else inlinesToDocbook opts txt <+> char '(' <> emailLink <> - char ')' + in case txt of + [Code _ s] | s == src' -> emailLink + _ -> inlinesToDocbook opts txt <+> + char '(' <> emailLink <> char ')' else (if isPrefixOf "#" src then inTags False "link" [("linkend", drop 1 src)] else inTags False "ulink" [("url", src)]) $ @@ -275,6 +283,6 @@ inlineToDocbook _ (Image _ (src, tit)) = else inTagsIndented "objectinfo" $ inTagsIndented "title" (text $ escapeStringForXML tit) in inTagsIndented "inlinemediaobject" $ inTagsIndented "imageobject" $ - titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] + titleDoc $$ selfClosingTag "imagedata" [("fileref", src)] inlineToDocbook opts (Note contents) = inTagsIndented "footnote" $ blocksToDocbook opts contents diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index deaa2fe33..33b8aa76a 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -39,6 +39,7 @@ import Codec.Archive.Zip import System.Time import Text.Pandoc.Shared hiding ( Element ) import Text.Pandoc.Definition +import Text.Pandoc.Generic import Control.Monad (liftM) import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID @@ -69,7 +70,7 @@ writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do -- handle pictures picsRef <- newIORef [] - Pandoc _ blocks <- liftM (processWith transformBlock) $ processWithM + Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc pics <- readIORef picsRef let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e -> @@ -232,13 +233,13 @@ transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do mathml ++ "</ops:case><ops:default>" ++ fallback ++ "</ops:default>" ++ "</ops:switch>" result = if "<math" `isPrefixOf` mathml then inOps else mathml - return $ HtmlInline result : xs -transformInlines _ _ _ (HtmlInline _ : xs) = return $ Str "" : xs + return $ RawInline "html" result : xs +transformInlines _ _ _ (RawInline _ _ : xs) = return $ Str "" : xs transformInlines _ _ _ (Link lab (_,_) : xs) = return $ lab ++ xs transformInlines _ _ _ xs = return xs transformBlock :: Block -> Block -transformBlock (RawHtml _) = Null +transformBlock (RawBlock _ _) = Null transformBlock x = x (!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index d2a400c5c..ef14b6809 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -105,8 +105,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do toc <- if writerTableOfContents opts then tableOfContents opts sects else return Nothing - let startSlide = RawHtml "<div class=\"slide\">\n" - endSlide = RawHtml "</div>\n" + let startSlide = RawBlock "html" "<div class=\"slide\">\n" + endSlide = RawBlock "html" "</div>\n" let cutUp (HorizontalRule : Header 1 ys : xs) = cutUp (Header 1 ys : xs) cutUp (HorizontalRule : xs) = [endSlide, startSlide] ++ cutUp xs cutUp (Header 1 ys : xs) = [endSlide, startSlide] ++ @@ -134,6 +134,8 @@ pandocToHtml opts (Pandoc (Meta title' authors' date') blocks) = do MathML (Just url) -> script ! [src url, thetype "text/javascript"] $ noHtml + MathJax url -> + script ! [src url, thetype "text/javascript"] $ noHtml JsMath (Just url) -> script ! [src url, thetype "text/javascript"] $ noHtml @@ -168,6 +170,7 @@ inTemplate opts tit auths date toc body' newvars = , ("pagetitle", topTitle') , ("title", renderHtmlFragment tit) , ("date", date') ] ++ + [ ("html5","true") | writerHtml5 opts ] ++ (case toc of Just t -> [ ("toc", renderHtmlFragment t)] Nothing -> []) ++ @@ -187,7 +190,12 @@ tableOfContents opts sects = do let tocList = catMaybes contents return $ if null tocList then Nothing - else Just $ thediv ! [prefixedId opts' "TOC"] $ unordList tocList + else Just $ + if writerHtml5 opts + then tag "nav" ! [prefixedId opts' "TOC"] $ + unordList tocList + else thediv ! [prefixedId opts' "TOC"] $ + unordList tocList -- | Convert section number to string showSecNum :: [Int] -> String @@ -224,7 +232,10 @@ elementToHtml opts (Sec level num id' title' elements) = do return $ if slides -- S5 gets confused by the extra divs around sections then toHtmlFromList stuff else if writerSectionDivs opts - then thediv ! [prefixedId opts id'] << stuff + then if writerHtml5 opts + then tag "section" ! [prefixedId opts id'] + << stuff + else thediv ! [prefixedId opts id'] << stuff else toHtmlFromList stuff -- | Convert list of Note blocks to a footnote <div>. @@ -287,6 +298,12 @@ obfuscateChar char = obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . decodeCharacterReferences +attrsToHtml :: WriterOptions -> Attr -> [HtmlAttr] +attrsToHtml opts (id',classes',keyvals) = + [theclass (unwords classes') | not (null classes')] ++ + [prefixedId opts id' | not (null id')] ++ + map (\(x,y) -> strAttr x y) keyvals + -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html blockToHtml _ Null = return $ noHtml @@ -294,22 +311,24 @@ blockToHtml opts (Plain lst) = inlineListToHtml opts lst blockToHtml opts (Para [Image txt (s,tit)]) = do img <- inlineToHtml opts (Image txt (s,tit)) capt <- inlineListToHtml opts txt - return $ thediv ! [theclass "figure"] << - [img, paragraph ! [theclass "caption"] << capt] + return $ if writerHtml5 opts + then tag "figure" << + [img, tag "figcaption" << capt] + else thediv ! [theclass "figure"] << + [img, paragraph ! [theclass "caption"] << capt] blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) -blockToHtml _ (RawHtml str) = return $ primHtml str +blockToHtml _ (RawBlock "html" str) = return $ primHtml str +blockToHtml _ (RawBlock _ _) = return noHtml blockToHtml _ (HorizontalRule) = return $ hr blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do let classes' = if writerLiterateHaskell opts then classes else filter (/= "literate") classes - case highlightHtml (id',classes',keyvals) rawCode of + case highlightHtml False (id',classes',keyvals) rawCode of Left _ -> -- change leading newlines into <br /> tags, because some -- browsers ignore leading newlines in pre blocks let (leadingBreaks, rawCode') = span (=='\n') rawCode - attrs = [theclass (unwords classes') | not (null classes')] ++ - [prefixedId opts id' | not (null id')] ++ - map (\(x,y) -> strAttr x y) keyvals + attrs = attrsToHtml opts (id', classes', keyvals) addBird = if "literate" `elem` classes' then unlines . map ("> " ++) . lines else unlines . lines @@ -366,7 +385,17 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do then [start startnum] else []) ++ (if numstyle /= DefaultStyle - then [thestyle $ "list-style-type: " ++ numstyle' ++ ";"] + then if writerHtml5 opts + then [strAttr "type" $ + case numstyle of + Decimal -> "1" + LowerAlpha -> "a" + UpperAlpha -> "A" + LowerRoman -> "i" + UpperRoman -> "I" + _ -> "1"] + else [thestyle $ "list-style-type: " ++ + numstyle'] else []) return $ ordList ! attribs $ contents blockToHtml opts (DefinitionList lst) = do @@ -379,28 +408,30 @@ blockToHtml opts (DefinitionList lst) = do else [] return $ dlist ! attribs << concat contents blockToHtml opts (Table capt aligns widths headers rows') = do - let alignStrings = map alignmentToString aligns captionDoc <- if null capt then return noHtml else inlineListToHtml opts capt >>= return . caption let percent w = show (truncate (100*w) :: Integer) ++ "%" + let widthAttrs w = if writerHtml5 opts + then [thestyle $ "width: " ++ percent w] + else [width $ percent w] let coltags = if all (== 0.0) widths then noHtml else concatHtml $ map - (\w -> col ! [width $ percent w] $ noHtml) widths + (\w -> col ! (widthAttrs w) $ noHtml) widths head' <- if all null headers then return noHtml - else liftM (thead <<) $ tableRowToHtml opts alignStrings 0 headers + else liftM (thead <<) $ tableRowToHtml opts aligns 0 headers body' <- liftM (tbody <<) $ - zipWithM (tableRowToHtml opts alignStrings) [1..] rows' + zipWithM (tableRowToHtml opts aligns) [1..] rows' return $ table $ captionDoc +++ coltags +++ head' +++ body' tableRowToHtml :: WriterOptions - -> [String] + -> [Alignment] -> Int -> [[Block]] -> State WriterState Html -tableRowToHtml opts alignStrings rownum cols' = do +tableRowToHtml opts aligns rownum cols' = do let mkcell = if rownum == 0 then th else td let rowclass = case rownum of 0 -> "header" @@ -408,7 +439,7 @@ tableRowToHtml opts alignStrings rownum cols' = do _ -> "even" cols'' <- sequence $ zipWith (\alignment item -> tableItemToHtml opts mkcell alignment item) - alignStrings cols' + aligns cols' return $ tr ! [theclass rowclass] $ toHtmlFromList cols'' alignmentToString :: Alignment -> [Char] @@ -420,12 +451,15 @@ alignmentToString alignment = case alignment of tableItemToHtml :: WriterOptions -> (Html -> Html) - -> [Char] + -> Alignment -> [Block] -> State WriterState Html tableItemToHtml opts tag' align' item = do contents <- blockListToHtml opts item - return $ tag' ! [align align'] $ contents + let alignAttrs = if writerHtml5 opts + then [thestyle $ "align: " ++ alignmentToString align'] + else [align $ alignmentToString align'] + return $ tag' ! alignAttrs $ contents blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = @@ -449,7 +483,11 @@ inlineToHtml opts inline = (Apostrophe) -> return $ stringToHtml "’" (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize (Strong lst) -> inlineListToHtml opts lst >>= return . strong - (Code str) -> return $ thecode << str + (Code attr str) -> case highlightHtml True attr str of + Left _ -> return + $ thecode ! (attrsToHtml opts attr) + $ stringToHtml str + Right h -> return h (Strikeout lst) -> inlineListToHtml opts lst >>= return . (thespan ! [thestyle "text-decoration: line-through;"]) (SmallCaps lst) -> inlineListToHtml opts lst >>= @@ -464,8 +502,7 @@ inlineToHtml opts inline = stringToHtml "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote - (Math t str) -> - modify (\st -> st {stMath = True}) >> + (Math t str) -> modify (\st -> st {stMath = True}) >> (case writerHTMLMathMethod opts of LaTeXMathML _ -> -- putting LaTeXMathML in container with class "LaTeX" prevents @@ -487,7 +524,9 @@ inlineToHtml opts inline = InlineMath -> m DisplayMath -> br +++ m +++ br GladTeX -> - return $ primHtml $ "<EQ>" ++ str ++ "</EQ>" + return $ case t of + InlineMath -> primHtml $ "<EQ ENV=\"math\">" ++ str ++ "</EQ>" + DisplayMath -> primHtml $ "<EQ ENV=\"displaymath\">" ++ str ++ "</EQ>" MathML _ -> do let dt = if t == InlineMath then DisplayInline @@ -500,18 +539,23 @@ inlineToHtml opts inline = Left _ -> inlineListToHtml opts (readTeXMath str) >>= return . (thespan ! [theclass "math"]) + MathJax _ -> return $ primHtml $ + case t of + InlineMath -> "\\(" ++ str ++ "\\)" + DisplayMath -> "\\[" ++ str ++ "\\]" PlainMath -> do x <- inlineListToHtml opts (readTeXMath str) let m = thespan ! [theclass "math"] $ x return $ case t of InlineMath -> m DisplayMath -> br +++ m +++ br ) - (TeX str) -> case writerHTMLMathMethod opts of - LaTeXMathML _ -> do modify (\st -> st {stMath = True}) - return $ primHtml str - _ -> return noHtml - (HtmlInline str) -> return $ primHtml str - (Link [Code str] (s,_)) | "mailto:" `isPrefixOf` s -> + (RawInline "latex" str) -> case writerHTMLMathMethod opts of + LaTeXMathML _ -> do modify (\st -> st {stMath = True}) + return $ primHtml str + _ -> return noHtml + (RawInline "html" str) -> return $ primHtml str + (RawInline _ _) -> return noHtml + (Link [Code _ str] (s,_)) | "mailto:" `isPrefixOf` s -> return $ obfuscateLink opts str s (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt @@ -551,7 +595,7 @@ blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. - let backlink = [HtmlInline $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ + let backlink = [RawInline "html" $ " <a href=\"#" ++ writerIdentifierPrefix opts ++ "fnref" ++ ref ++ "\" class=\"footnoteBackLink\"" ++ " title=\"Jump back to footnote " ++ ref ++ "\">↩</a>"] blocks' = if null blocks diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 720c00ac8..28a1e7174 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -29,13 +30,15 @@ Conversion of 'Pandoc' format into LaTeX. -} module Text.Pandoc.Writers.LaTeX ( writeLaTeX ) where import Text.Pandoc.Definition +import Text.Pandoc.Generic import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Printf ( printf ) -import Data.List ( (\\), isSuffixOf, isPrefixOf, intersperse ) -import Data.Char ( toLower ) +import Data.List ( (\\), isSuffixOf, isPrefixOf, intercalate, intersperse ) +import Data.Char ( toLower, isPunctuation ) import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Pandoc.Pretty +import System.FilePath (dropExtension) data WriterState = WriterState { stInNote :: Bool -- @True@ if we're in a note @@ -60,7 +63,7 @@ writeLaTeX options document = stVerbInNote = False, stEnumerate = False, stTable = False, stStrikeout = False, stSubscript = False, stUrl = False, stGraphics = False, - stLHS = False, stBook = False } + stLHS = False, stBook = writerChapters options } pandocToLaTeX :: WriterOptions -> Pandoc -> State WriterState String pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do @@ -70,13 +73,34 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do "{report}" `isSuffixOf` x) when (any usesBookClass (lines template)) $ modify $ \s -> s{stBook = True} - titletext <- liftM render $ inlineListToLaTeX title - authorsText <- mapM (liftM render . inlineListToLaTeX) authors - dateText <- liftM render $ inlineListToLaTeX date - body <- blockListToLaTeX blocks - let main = render body + opts <- liftM stOptions get + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + titletext <- liftM (render colwidth) $ inlineListToLaTeX title + authorsText <- mapM (liftM (render colwidth) . inlineListToLaTeX) authors + dateText <- liftM (render colwidth) $ inlineListToLaTeX date + let (blocks', lastHeader) = if writerCiteMethod options == Citeproc then + (blocks, []) + else case last blocks of + Header 1 il -> (init blocks, il) + _ -> (blocks, []) + body <- blockListToLaTeX blocks' + biblioTitle <- liftM (render colwidth) $ inlineListToLaTeX lastHeader + let main = render colwidth body st <- get - let context = writerVariables options ++ + let biblioFiles = intercalate "," $ map dropExtension $ writerBiblioFiles options + citecontext = case writerCiteMethod options of + Natbib -> [ ("biblio-files", biblioFiles) + , ("biblio-title", biblioTitle) + , ("natbib", "yes") + ] + Biblatex -> [ ("biblio-files", biblioFiles) + , ("biblio-title", biblioTitle) + , ("biblatex", "yes") + ] + _ -> [] + context = writerVariables options ++ [ ("toc", if writerTableOfContents options then "yes" else "") , ("body", main) , ("title", titletext) @@ -91,7 +115,10 @@ pandocToLaTeX options (Pandoc (Meta title authors date) blocks) = do [ ("url", "yes") | stUrl st ] ++ [ ("numbersections", "yes") | writerNumberSections options ] ++ [ ("lhs", "yes") | stLHS st ] ++ - [ ("graphics", "yes") | stGraphics st ] + [ ("graphics", "yes") | stGraphics st ] ++ + [ ("book-class", "yes") | stBook st] ++ + [ ("listings", "yes") | writerListings options ] ++ + citecontext return $ if writerStandalone options then renderTemplate context template else main @@ -107,7 +134,13 @@ stringToLaTeX = escapeStringUsing latexEscapes , ('|', "\\textbar{}") , ('<', "\\textless{}") , ('>', "\\textgreater{}") + , ('[', "{[}") -- to avoid interpretation as + , (']', "{]}") -- optional arguments , ('\160', "~") + , ('\x2018', "`") + , ('\x2019', "'") + , ('\x201C', "``") + , ('\x201D', "''") ] -- | Puts contents into LaTeX command. @@ -118,49 +151,73 @@ inCmd cmd contents = char '\\' <> text cmd <> braces contents -- (because it's illegal to have verbatim inside some command arguments) deVerb :: [Inline] -> [Inline] deVerb [] = [] -deVerb ((Code str):rest) = - (TeX $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) +deVerb ((Code _ str):rest) = + (RawInline "latex" $ "\\texttt{" ++ stringToLaTeX str ++ "}"):(deVerb rest) deVerb (other:rest) = other:(deVerb rest) -- | Convert Pandoc block element to LaTeX. blockToLaTeX :: Block -- ^ Block to convert -> State WriterState Doc blockToLaTeX Null = return empty -blockToLaTeX (Plain lst) = do - st <- get - let opts = stOptions st - wrapTeXIfNeeded opts True inlineListToLaTeX lst +blockToLaTeX (Plain lst) = inlineListToLaTeX lst blockToLaTeX (Para [Image txt (src,tit)]) = do capt <- inlineListToLaTeX txt img <- inlineToLaTeX (Image txt (src,tit)) - return $ text "\\begin{figure}[htb]" $$ text "\\centering" $$ img $$ - (text "\\caption{" <> capt <> char '}') $$ text "\\end{figure}\n" + return $ "\\begin{figure}[htb]" $$ "\\centering" $$ img $$ + ("\\caption{" <> capt <> char '}') $$ "\\end{figure}" $$ blankline blockToLaTeX (Para lst) = do - st <- get - let opts = stOptions st - result <- wrapTeXIfNeeded opts True inlineListToLaTeX lst - return $ result <> char '\n' + result <- inlineListToLaTeX lst + return $ result <> blankline blockToLaTeX (BlockQuote lst) = do contents <- blockListToLaTeX lst - return $ text "\\begin{quote}" $$ contents $$ text "\\end{quote}" -blockToLaTeX (CodeBlock (_,classes,_) str) = do + return $ "\\begin{quote}" $$ contents $$ "\\end{quote}" +blockToLaTeX (CodeBlock (_,classes,keyvalAttr) str) = do st <- get env <- if writerLiterateHaskell (stOptions st) && "haskell" `elem` classes && "literate" `elem` classes then do modify $ \s -> s{ stLHS = True } return "code" - else if stInNote st - then do - modify $ \s -> s{ stVerbInNote = True } - return "Verbatim" - else return "verbatim" - return $ text ("\\begin{" ++ env ++ "}\n") <> text str <> - text ("\n\\end{" ++ env ++ "}") -blockToLaTeX (RawHtml _) = return empty + else if writerListings (stOptions st) + then return "lstlisting" + else if stInNote st + then do + modify $ \s -> s{ stVerbInNote = True } + return "Verbatim" + else return "verbatim" + let params = if writerListings (stOptions st) + then take 1 + [ "language=" ++ lang | lang <- classes + , lang `elem` ["ABAP","IDL","Plasm","ACSL","inform" + ,"POV","Ada","Java","Prolog","Algol" + ,"JVMIS","Promela","Ant","ksh","Python" + ,"Assembler","Lisp","R","Awk","Logo" + ,"Reduce","bash","make","Rexx","Basic" + ,"Mathematica","RSL","C","Matlab","Ruby" + ,"C++","Mercury","S","Caml","MetaPost" + ,"SAS","Clean","Miranda","Scilab","Cobol" + ,"Mizar","sh","Comal","ML","SHELXL","csh" + ,"Modula-2","Simula","Delphi","MuPAD" + ,"SQL","Eiffel","NASTRAN","tcl","Elan" + ,"Oberon-2","TeX","erlang","OCL" + ,"VBScript","Euphoria","Octave","Verilog" + ,"Fortran","Oz","VHDL","GCL","Pascal" + ,"VRML","Gnuplot","Perl","XML","Haskell" + ,"PHP","XSLT","HTML","PL/I"] + ] ++ + [ key ++ "=" ++ attr | (key,attr) <- keyvalAttr ] + else [] + printParams + | null params = empty + | otherwise = "[" <> hsep (intersperse "," (map text params)) <> + "]" + return $ "\\begin{" <> text env <> "}" <> printParams $$ flush (text str) $$ + "\\end{" <> text env <> "}" $$ cr -- final cr needed because of footnotes +blockToLaTeX (RawBlock "latex" x) = return $ text x <> blankline +blockToLaTeX (RawBlock _ _) = return empty blockToLaTeX (BulletList lst) = do items <- mapM listItemToLaTeX lst - return $ text "\\begin{itemize}" $$ vcat items $$ text "\\end{itemize}" + return $ "\\begin{itemize}" $$ vcat items $$ "\\end{itemize}" blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do st <- get let oldlevel = stOLLevel st @@ -179,20 +236,19 @@ blockToLaTeX (OrderedList (start, numstyle, numdelim) lst) = do map toLower (toRomanNumeral oldlevel) ++ "}{" ++ show (start - 1) ++ "}" else empty - return $ text "\\begin{enumerate}" <> exemplar $$ resetcounter $$ - vcat items $$ text "\\end{enumerate}" + return $ "\\begin{enumerate}" <> exemplar $$ resetcounter $$ + vcat items $$ "\\end{enumerate}" blockToLaTeX (DefinitionList lst) = do items <- mapM defListItemToLaTeX lst - return $ text "\\begin{description}" $$ vcat items $$ - text "\\end{description}" -blockToLaTeX HorizontalRule = return $ text $ - "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n" + return $ "\\begin{description}" $$ vcat items $$ "\\end{description}" +blockToLaTeX HorizontalRule = return $ + "\\begin{center}\\rule{3in}{0.4pt}\\end{center}" $$ blankline blockToLaTeX (Header level lst) = do let lst' = deVerb lst txt <- inlineListToLaTeX lst' let noNote (Note _) = Str "" noNote x = x - let lstNoNotes = processWith noNote lst' + let lstNoNotes = bottomUp noNote lst' -- footnotes in sections don't work unless you specify an optional -- argument: \section[mysec]{mysec\footnote{blah}} optional <- if lstNoNotes == lst' @@ -202,30 +258,31 @@ blockToLaTeX (Header level lst) = do return $ char '[' <> res <> char ']' let stuffing = optional <> char '{' <> txt <> char '}' book <- liftM stBook get - return $ case (book, level) of - (True, 1) -> text "\\chapter" <> stuffing <> char '\n' - (True, 2) -> text "\\section" <> stuffing <> char '\n' - (True, 3) -> text "\\subsection" <> stuffing <> char '\n' - (True, 4) -> text "\\subsubsection" <> stuffing <> char '\n' - (False, 1) -> text "\\section" <> stuffing <> char '\n' - (False, 2) -> text "\\subsection" <> stuffing <> char '\n' - (False, 3) -> text "\\subsubsection" <> stuffing <> char '\n' - _ -> txt <> char '\n' + let level' = if book then level - 1 else level + let headerWith x y = text x <> y $$ blankline + return $ case level' of + 0 -> headerWith "\\chapter" stuffing + 1 -> headerWith "\\section" stuffing + 2 -> headerWith "\\subsection" stuffing + 3 -> headerWith "\\subsubsection" stuffing + 4 -> headerWith "\\paragraph" stuffing + 5 -> headerWith "\\subparagraph" stuffing + _ -> txt $$ blankline blockToLaTeX (Table caption aligns widths heads rows) = do headers <- if all null heads then return empty - else liftM ($$ text "\\hline") $ tableRowToLaTeX heads + else liftM ($$ "\\hline") $ (tableRowToLaTeX widths) heads captionText <- inlineListToLaTeX caption - rows' <- mapM tableRowToLaTeX rows + rows' <- mapM (tableRowToLaTeX widths) rows let colDescriptors = concat $ zipWith toColDescriptor widths aligns let tableBody = text ("\\begin{tabular}{" ++ colDescriptors ++ "}") $$ - headers $$ vcat rows' $$ text "\\end{tabular}" - let centered txt = text "\\begin{center}" $$ txt $$ text "\\end{center}" + headers $$ vcat rows' $$ "\\end{tabular}" + let centered txt = "\\begin{center}" $$ txt $$ "\\end{center}" modify $ \s -> s{ stTable = True } return $ if isEmpty captionText - then centered tableBody <> char '\n' - else text "\\begin{table}[h]" $$ centered tableBody $$ - inCmd "caption" captionText $$ text "\\end{table}\n" + then centered tableBody $$ blankline + else "\\begin{table}[h]" $$ centered tableBody $$ + inCmd "caption" captionText $$ "\\end{table}" $$ blankline toColDescriptor :: Double -> Alignment -> String toColDescriptor 0 align = @@ -240,16 +297,19 @@ toColDescriptor width align = ">{\\PBS" ++ AlignRight -> "\\raggedleft" AlignCenter -> "\\centering" AlignDefault -> "\\raggedright") ++ - "\\hspace{0pt}}p{" ++ printf "%.2f" width ++ - "\\columnwidth}" + "\\hspace{0pt}}p{" ++ printf "%.2f" width ++ "\\columnwidth}" blockListToLaTeX :: [Block] -> State WriterState Doc blockListToLaTeX lst = mapM blockToLaTeX lst >>= return . vcat -tableRowToLaTeX :: [[Block]] -> State WriterState Doc -tableRowToLaTeX cols = mapM blockListToLaTeX cols >>= - return . ($$ text "\\\\") . foldl (\row item -> row $$ - (if isEmpty row then text "" else text " & ") <> item) empty +tableRowToLaTeX :: [Double] -> [[Block]] -> State WriterState Doc +tableRowToLaTeX widths cols = do + renderedCells <- mapM blockListToLaTeX cols + let toCell 0 c = c + toCell w c = "\\parbox{" <> text (printf "%.2f" w) <> + "\\columnwidth}{" <> c <> cr <> "}" + let cells = zipWith toCell widths renderedCells + return $ (hcat $ intersperse (" & ") cells) <> "\\\\" listItemToLaTeX :: [Block] -> State WriterState Doc listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . @@ -258,8 +318,8 @@ listItemToLaTeX lst = blockListToLaTeX lst >>= return . (text "\\item" $$) . defListItemToLaTeX :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToLaTeX (term, defs) = do term' <- inlineListToLaTeX $ deVerb term - def' <- liftM (vcat . intersperse (text "")) $ mapM blockListToLaTeX defs - return $ text "\\item[" <> term' <> text "]" $$ def' + def' <- liftM vsep $ mapM blockListToLaTeX defs + return $ "\\item" <> brackets term' $$ def' -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: [Inline] -- ^ Inlines to convert @@ -292,60 +352,161 @@ inlineToLaTeX (Subscript lst) = do return $ inCmd "textsubscr" contents inlineToLaTeX (SmallCaps lst) = inlineListToLaTeX (deVerb lst) >>= return . inCmd "textsc" -inlineToLaTeX (Cite _ lst) = - inlineListToLaTeX lst -inlineToLaTeX (Code str) = do +inlineToLaTeX (Cite cits lst) = do + st <- get + let opts = stOptions st + case writerCiteMethod opts of + Natbib -> citationsToNatbib cits + Biblatex -> citationsToBiblatex cits + _ -> inlineListToLaTeX lst + +inlineToLaTeX (Code _ str) = do st <- get when (stInNote st) $ modify $ \s -> s{ stVerbInNote = True } let chr = ((enumFromTo '!' '~') \\ str) !! 0 - return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] + if writerListings (stOptions st) + then return $ text $ "\\lstinline" ++ [chr] ++ str ++ [chr] + else return $ text $ "\\verb" ++ [chr] ++ str ++ [chr] inlineToLaTeX (Quoted SingleQuote lst) = do contents <- inlineListToLaTeX lst let s1 = if (not (null lst)) && (isQuoted (head lst)) - then text "\\," - else empty + then "\\," + else empty let s2 = if (not (null lst)) && (isQuoted (last lst)) - then text "\\," + then "\\," else empty return $ char '`' <> s1 <> contents <> s2 <> char '\'' inlineToLaTeX (Quoted DoubleQuote lst) = do contents <- inlineListToLaTeX lst let s1 = if (not (null lst)) && (isQuoted (head lst)) - then text "\\," - else empty + then "\\," + else empty let s2 = if (not (null lst)) && (isQuoted (last lst)) - then text "\\," + then "\\," else empty - return $ text "``" <> s1 <> contents <> s2 <> text "''" + return $ "``" <> s1 <> contents <> s2 <> "''" inlineToLaTeX Apostrophe = return $ char '\'' -inlineToLaTeX EmDash = return $ text "---" -inlineToLaTeX EnDash = return $ text "--" -inlineToLaTeX Ellipses = return $ text "\\ldots{}" +inlineToLaTeX EmDash = return "---" +inlineToLaTeX EnDash = return "--" +inlineToLaTeX Ellipses = return "\\ldots{}" inlineToLaTeX (Str str) = return $ text $ stringToLaTeX str inlineToLaTeX (Math InlineMath str) = return $ char '$' <> text str <> char '$' -inlineToLaTeX (Math DisplayMath str) = return $ text "\\[" <> text str <> text "\\]" -inlineToLaTeX (TeX str) = return $ text str -inlineToLaTeX (HtmlInline _) = return empty -inlineToLaTeX (LineBreak) = return $ text "\\\\" -inlineToLaTeX Space = return $ char ' ' +inlineToLaTeX (Math DisplayMath str) = return $ "\\[" <> text str <> "\\]" +inlineToLaTeX (RawInline "latex" str) = return $ text str +inlineToLaTeX (RawInline "tex" str) = return $ text str +inlineToLaTeX (RawInline _ _) = return empty +inlineToLaTeX (LineBreak) = return "\\\\" +inlineToLaTeX Space = return space inlineToLaTeX (Link txt (src, _)) = case txt of - [Code x] | x == src -> -- autolink + [Code _ x] | x == src -> -- autolink do modify $ \s -> s{ stUrl = True } return $ text $ "\\url{" ++ x ++ "}" _ -> do contents <- inlineListToLaTeX $ deVerb txt - return $ text ("\\href{" ++ src ++ "}{") <> contents <> - char '}' + return $ text ("\\href{" ++ stringToLaTeX src ++ "}{") <> + contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do modify $ \s -> s{ stGraphics = True } - return $ text $ "\\includegraphics{" ++ source ++ "}" + return $ "\\includegraphics" <> braces (text source) inlineToLaTeX (Note contents) = do - st <- get - put (st {stInNote = True}) + modify (\s -> s{stInNote = True}) contents' <- blockListToLaTeX contents modify (\s -> s {stInNote = False}) - let rawnote = stripTrailingNewlines $ render contents' -- note: a \n before } is needed when note ends with a Verbatim environment - let optNewline = "\\end{Verbatim}" `isSuffixOf` rawnote - return $ text "\\footnote{" <> - text rawnote <> (if optNewline then char '\n' else empty) <> char '}' + return $ "\\footnote" <> braces (nest 2 contents') + + +citationsToNatbib :: [Citation] -> State WriterState Doc +citationsToNatbib (one:[]) + = citeCommand c p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = one + c = case m of + AuthorInText -> "citet" + SuppressAuthor -> "citeyearpar" + NormalCitation -> "citep" + +citationsToNatbib cits + | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits + = citeCommand "citep" p s ks + where + noPrefix = and . map (null . citationPrefix) + noSuffix = and . map (null . citationSuffix) + ismode m = and . map (((==) m) . citationMode) + p = citationPrefix $ head $ cits + s = citationSuffix $ last $ cits + ks = intercalate ", " $ map citationId cits + +citationsToNatbib (c:cs) | citationMode c == AuthorInText = do + author <- citeCommand "citeauthor" [] [] (citationId c) + cits <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs) + return $ author <+> cits + +citationsToNatbib cits = do + cits' <- mapM convertOne cits + return $ text "\\citetext{" <> foldl combineTwo empty cits' <> text "}" + where + combineTwo a b | isEmpty a = b + | otherwise = a <> text "; " <> b + convertOne Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = case m of + AuthorInText -> citeCommand "citealt" p s k + SuppressAuthor -> citeCommand "citeyear" p s k + NormalCitation -> citeCommand "citealp" p s k + +citeCommand :: String -> [Inline] -> [Inline] -> String -> State WriterState Doc +citeCommand c p s k = do + args <- citeArguments p s k + return $ text ("\\" ++ c) <> args + +citeArguments :: [Inline] -> [Inline] -> String -> State WriterState Doc +citeArguments p s k = do + let s' = case s of + (Str (x:[]) : r) | isPunctuation x -> dropWhile (== Space) r + (Str (x:xs) : r) | isPunctuation x -> Str xs : r + _ -> s + pdoc <- inlineListToLaTeX p + sdoc <- inlineListToLaTeX s' + let optargs = case (isEmpty pdoc, isEmpty sdoc) of + (True, True ) -> empty + (True, False) -> brackets sdoc + (_ , _ ) -> brackets pdoc <> brackets sdoc + return $ optargs <> braces (text k) + +citationsToBiblatex :: [Citation] -> State WriterState Doc +citationsToBiblatex (one:[]) + = citeCommand cmd p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } = one + cmd = case m of + SuppressAuthor -> "autocite*" + AuthorInText -> "textcite" + NormalCitation -> "autocite" + +citationsToBiblatex (c:cs) = do + args <- mapM convertOne (c:cs) + return $ text cmd <> foldl (<>) empty args + where + cmd = case citationMode c of + AuthorInText -> "\\textcites" + _ -> "\\autocites" + convertOne Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + } + = citeArguments p s k + +citationsToBiblatex _ = return empty diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index a46a18893..78b9274d6 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath import Text.Printf ( printf ) import Data.List ( isPrefixOf, intersperse, intercalate ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Pandoc.Pretty import Control.Monad.State type Notes = [[Block]] @@ -52,27 +52,31 @@ pandocToMan opts (Pandoc (Meta title authors date) blocks) = do titleText <- inlineListToMan opts title authors' <- mapM (inlineListToMan opts) authors date' <- inlineListToMan opts date - let (cmdName, rest) = break (== ' ') $ render titleText + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let render' = render colwidth + let (cmdName, rest) = break (== ' ') $ render' titleText let (title', section) = case reverse cmdName of (')':d:'(':xs) | d `elem` ['0'..'9'] -> (text (reverse xs), char d) - xs -> (text (reverse xs), doubleQuotes empty) + xs -> (text (reverse xs), doubleQuotes empty) let description = hsep $ map (doubleQuotes . text . removeLeadingTrailingSpace) $ - splitBy '|' rest + splitBy (== '|') rest body <- blockListToMan opts blocks notes <- liftM stNotes get notes' <- notesToMan opts (reverse notes) - let main = render $ body $$ notes' $$ text "" + let main = render' $ body $$ notes' $$ text "" hasTables <- liftM stHasTables get let context = writerVariables opts ++ [ ("body", main) - , ("title", render title') - , ("section", render section) - , ("date", render date') - , ("description", render description) ] ++ + , ("title", render' title') + , ("section", render' section) + , ("date", render' date') + , ("description", render' description) ] ++ [ ("has-tables", "yes") | hasTables ] ++ - [ ("author", render a) | a <- authors' ] + [ ("author", render' a) | a <- authors' ] if writerStandalone opts then return $ renderTemplate context $ writerTemplate opts else return main @@ -89,7 +93,7 @@ notesToMan opts notes = noteToMan :: WriterOptions -> Int -> [Block] -> State WriterState Doc noteToMan opts num note = do contents <- blockListToMan opts note - let marker = text "\n.SS [" <> text (show num) <> char ']' + let marker = cr <> text ".SS " <> brackets (text (show num)) return $ marker $$ contents -- | Association list of characters to escape. @@ -136,14 +140,14 @@ blockToMan :: WriterOptions -- ^ Options -> State WriterState Doc blockToMan _ Null = return empty blockToMan opts (Plain inlines) = - liftM vcat $ mapM (wrapIfNeeded opts (inlineListToMan opts)) $ - splitSentences inlines + liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines blockToMan opts (Para inlines) = do - contents <- liftM vcat $ mapM (wrapIfNeeded opts (inlineListToMan opts)) $ + contents <- liftM vcat $ mapM (inlineListToMan opts) $ splitSentences inlines return $ text ".PP" $$ contents -blockToMan _ (RawHtml _) = return empty -blockToMan _ HorizontalRule = return $ text $ ".PP\n * * * * *" +blockToMan _ (RawBlock "man" str) = return $ text str +blockToMan _ (RawBlock _ _) = return empty +blockToMan _ HorizontalRule = return $ text ".PP" $$ text " * * * * *" blockToMan opts (Header level inlines) = do contents <- inlineListToMan opts inlines let heading = case level of @@ -256,7 +260,7 @@ definitionListItemToMan opts (label, defs) = do mapM (\item -> blockToMan opts item) rest first' <- blockToMan opts first return $ first' $$ text ".RS" $$ rest' $$ text ".RE" - return $ text ".TP\n.B " <> labelText $+$ contents + return $ text ".TP" $$ text ".B " <> labelText $$ contents -- | Convert list of Pandoc block elements to man. blockListToMan :: WriterOptions -- ^ Options @@ -303,23 +307,25 @@ inlineToMan _ EmDash = return $ text "\\[em]" inlineToMan _ EnDash = return $ text "\\[en]" inlineToMan _ Apostrophe = return $ char '\'' inlineToMan _ Ellipses = return $ text "\\&..." -inlineToMan _ (Code str) = +inlineToMan _ (Code _ str) = return $ text $ "\\f[C]" ++ escapeCode str ++ "\\f[]" inlineToMan _ (Str str) = return $ text $ escapeString str inlineToMan opts (Math InlineMath str) = inlineListToMan opts $ readTeXMath str inlineToMan opts (Math DisplayMath str) = do contents <- inlineListToMan opts $ readTeXMath str - return $ text ".RS" $$ contents $$ text ".RE" -inlineToMan _ (TeX _) = return empty -inlineToMan _ (HtmlInline _) = return empty -inlineToMan _ (LineBreak) = return $ text "\n.PD 0\n.P\n.PD\n" -inlineToMan _ Space = return $ char ' ' + return $ cr <> text ".RS" $$ contents $$ text ".RE" +inlineToMan _ (RawInline "man" str) = return $ text str +inlineToMan _ (RawInline _ _) = return empty +inlineToMan _ (LineBreak) = return $ + cr <> text ".PD 0" $$ text ".P" $$ text ".PD" <> cr +inlineToMan _ Space = return space inlineToMan opts (Link txt (src, _)) = do linktext <- inlineListToMan opts txt let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src - return $ if txt == [Code srcSuffix] - then char '<' <> text srcSuffix <> char '>' - else linktext <> text " (" <> text src <> char ')' + return $ case txt of + [Code _ s] + | s == srcSuffix -> char '<' <> text srcSuffix <> char '>' + _ -> linktext <> text " (" <> text src <> char ')' inlineToMan opts (Image alternate (source, tit)) = do let txt = if (null alternate) || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 1b612006b..5e12c4aca 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -31,13 +32,13 @@ Markdown: <http://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown (writeMarkdown, writePlain) where import Text.Pandoc.Definition +import Text.Pandoc.Generic import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Shared -import Text.Pandoc.Parsing -import Text.Pandoc.Blocks +import Text.Pandoc.Parsing hiding (blankline) import Text.ParserCombinators.Parsec ( runParser, GenParser ) import Data.List ( group, isPrefixOf, find, intersperse, transpose ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Pandoc.Pretty import Control.Monad.State type Notes = [[Block]] @@ -57,28 +58,28 @@ writeMarkdown opts document = -- pictures, or inline formatting). writePlain :: WriterOptions -> Pandoc -> String writePlain opts document = - evalState (pandocToMarkdown opts document') WriterState{ stNotes = [] - , stRefs = [] - , stPlain = True } + evalState (pandocToMarkdown opts{writerStrictMarkdown = True} + document') WriterState{ stNotes = [] + , stRefs = [] + , stPlain = True } where document' = plainify document plainify :: Pandoc -> Pandoc -plainify = processWith go - where go :: [Inline] -> [Inline] - go (Emph xs : ys) = go xs ++ go ys - go (Strong xs : ys) = go xs ++ go ys - go (Strikeout xs : ys) = go xs ++ go ys - go (Superscript xs : ys) = go xs ++ go ys - go (Subscript xs : ys) = go xs ++ go ys - go (SmallCaps xs : ys) = go xs ++ go ys - go (Code s : ys) = Str s : go ys - go (Math _ s : ys) = Str s : go ys - go (TeX _ : ys) = Str "" : go ys - go (HtmlInline _ : ys) = Str "" : go ys - go (Link xs _ : ys) = go xs ++ go ys - go (Image _ _ : ys) = go ys - go (x : ys) = x : go ys - go [] = [] +plainify = bottomUp go + where go :: Inline -> Inline + go (Emph xs) = SmallCaps xs + go (Strong xs) = SmallCaps xs + go (Strikeout xs) = SmallCaps xs + go (Superscript xs) = SmallCaps xs + go (Subscript xs) = SmallCaps xs + go (SmallCaps xs) = SmallCaps xs + go (Code _ s) = Str s + go (Math _ s) = Str s + go (RawInline _ _) = Str "" + go (Link xs _) = SmallCaps xs + go (Image xs _) = SmallCaps $ [Str "["] ++ xs ++ [Str "]"] + go (Cite _ cits) = SmallCaps cits + go x = x -- | Return markdown representation of document. pandocToMarkdown :: WriterOptions -> Pandoc -> State WriterState String @@ -96,15 +97,20 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do notes' <- notesToMarkdown opts (reverse $ stNotes st) st' <- get -- note that the notes may contain refs refs' <- refsToMarkdown opts (reverse $ stRefs st') - let main = render $ foldl ($+$) empty $ [body, notes', refs'] + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let main = render colwidth $ body <> + (if isEmpty notes' then empty else blankline <> notes') <> + (if isEmpty refs' then empty else blankline <> refs') let context = writerVariables opts ++ - [ ("toc", render toc) + [ ("toc", render colwidth toc) , ("body", main) - , ("title", render title') - , ("date", render date') + , ("title", render colwidth title') + , ("date", render colwidth date') ] ++ [ ("titleblock", "yes") | titleblock ] ++ - [ ("author", render a) | a <- authors' ] + [ ("author", render colwidth a) | a <- authors' ] if writerStandalone opts then return $ renderTemplate context $ writerTemplate opts else return main @@ -112,29 +118,36 @@ pandocToMarkdown opts (Pandoc (Meta title authors date) blocks) = do -- | Return markdown representation of reference key table. refsToMarkdown :: WriterOptions -> Refs -> State WriterState Doc refsToMarkdown opts refs = mapM (keyToMarkdown opts) refs >>= return . vcat - + -- | Return markdown representation of a reference key. keyToMarkdown :: WriterOptions -> ([Inline], (String, String)) -> State WriterState Doc keyToMarkdown opts (label, (src, tit)) = do label' <- inlineListToMarkdown opts label - let tit' = if null tit then empty else text $ " \"" ++ tit ++ "\"" - return $ text " " <> char '[' <> label' <> char ']' <> text ": " <> - text src <> tit' + let tit' = if null tit + then empty + else space <> "\"" <> text tit <> "\"" + return $ nest 2 $ hang 2 + ("[" <> label' <> "]:" <> space) (text src <> tit') -- | Return markdown representation of notes. notesToMarkdown :: WriterOptions -> [[Block]] -> State WriterState Doc notesToMarkdown opts notes = - mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= - return . vcat + mapM (\(num, note) -> noteToMarkdown opts num note) (zip [1..] notes) >>= + return . vsep -- | Return markdown representation of a note. noteToMarkdown :: WriterOptions -> Int -> [Block] -> State WriterState Doc noteToMarkdown opts num blocks = do contents <- blockListToMarkdown opts blocks - let marker = text "[^" <> text (show num) <> text "]:" - return $ hang' marker (writerTabStop opts) contents + let num' = text $ show num + let marker = text "[^" <> num' <> text "]:" + let markerSize = 4 + offset num' + let spacer = case writerTabStop opts - markerSize of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + return $ hang (writerTabStop opts) (marker <> spacer) contents -- | Escape special characters for Markdown. escapeString :: String -> String @@ -158,6 +171,22 @@ elementToListItem (Sec _ _ _ headerText subsecs) = [Plain headerText] ++ then [] else [BulletList $ map elementToListItem subsecs] +attrsToMarkdown :: Attr -> Doc +attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] + where attribId = case attribs of + ([],_,_) -> empty + (i,_,_) -> "#" <> text i + attribClasses = case attribs of + (_,[],_) -> empty + (_,cs,_) -> hsep $ + map (text . ('.':)) + cs + attribKeys = case attribs of + (_,_,[]) -> empty + (_,_,ks) -> hsep $ + map (\(k,v) -> text k + <> "=\"" <> text v <> "\"") ks + -- | Ordered list start parser for use in Para below. olMarker :: GenParser Char ParserState Char olMarker = do (start, style', delim) <- anyOrderedListMarker @@ -169,134 +198,139 @@ olMarker = do (start, style', delim) <- anyOrderedListMarker -- | True if string begins with an ordered list marker beginsWithOrderedListMarker :: String -> Bool -beginsWithOrderedListMarker str = - case runParser olMarker defaultParserState "para start" str of - Left _ -> False +beginsWithOrderedListMarker str = + case runParser olMarker defaultParserState "para start" (take 10 str) of + Left _ -> False Right _ -> True -wrappedMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedMarkdown opts inlines = do - let chunks = splitBy LineBreak inlines - let chunks' = if null chunks - then [] - else (map (++ [Str " "]) $ init chunks) ++ [last chunks] - lns <- mapM (wrapIfNeeded opts (inlineListToMarkdown opts)) chunks' - return $ vcat lns - -- | Convert Pandoc block element to markdown. blockToMarkdown :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMarkdown _ Null = return empty -blockToMarkdown opts (Plain inlines) = - wrappedMarkdown opts inlines +blockToMarkdown opts (Plain inlines) = do + contents <- inlineListToMarkdown opts inlines + return $ contents <> cr blockToMarkdown opts (Para inlines) = do - contents <- wrappedMarkdown opts inlines + contents <- inlineListToMarkdown opts inlines -- escape if para starts with ordered list marker - let esc = if (not (writerStrictMarkdown opts)) && - beginsWithOrderedListMarker (render contents) - then char '\\' - else empty - return $ esc <> contents <> text "\n" -blockToMarkdown _ (RawHtml str) = do st <- get - if stPlain st - then return empty - else return $ text str -blockToMarkdown _ HorizontalRule = return $ text "\n* * * * *\n" + let esc = if (not (writerStrictMarkdown opts)) && + not (stPlain st) && + beginsWithOrderedListMarker (render Nothing contents) + then text "\\" + else empty + return $ esc <> contents <> blankline +blockToMarkdown _ (RawBlock f str) + | f == "html" || f == "latex" || f == "tex" || f == "markdown" = do + st <- get + if stPlain st + then return empty + else return $ text str <> text "\n" +blockToMarkdown _ (RawBlock _ _) = return empty +blockToMarkdown _ HorizontalRule = + return $ blankline <> text "* * * * *" <> blankline blockToMarkdown opts (Header level inlines) = do contents <- inlineListToMarkdown opts inlines st <- get -- use setext style headers if in literate haskell mode. -- ghc interprets '#' characters in column 1 as line number specifiers. if writerLiterateHaskell opts || stPlain st - then let len = length $ render contents - in return $ contents <> text "\n" <> - case level of - 1 -> text $ replicate len '=' ++ "\n" - 2 -> text $ replicate len '-' ++ "\n" - _ -> empty - else return $ text ((replicate level '#') ++ " ") <> contents <> text "\n" -blockToMarkdown opts (CodeBlock (_,classes,_) str) | "haskell" `elem` classes && - "literate" `elem` classes && - writerLiterateHaskell opts = - return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n" -blockToMarkdown opts (CodeBlock _ str) = return $ - (nest (writerTabStop opts) $ vcat $ map text (lines str)) <> text "\n" + then let len = offset contents + in return $ contents <> cr <> + (case level of + 1 -> text $ replicate len '=' + 2 -> text $ replicate len '-' + _ -> empty) <> blankline + else return $ + text ((replicate level '#') ++ " ") <> contents <> blankline +blockToMarkdown opts (CodeBlock (_,classes,_) str) + | "haskell" `elem` classes && "literate" `elem` classes && + writerLiterateHaskell opts = + return $ prefixed "> " (text str) <> blankline +blockToMarkdown opts (CodeBlock attribs str) = return $ + if writerStrictMarkdown opts || attribs == nullAttr + then nest (writerTabStop opts) (text str) <> blankline + else -- use delimited code block + flush (tildes <> space <> attrs <> cr <> text str <> + cr <> tildes) <> blankline + where tildes = text "~~~~" + attrs = attrsToMarkdown attribs blockToMarkdown opts (BlockQuote blocks) = do st <- get -- if we're writing literate haskell, put a space before the bird tracks -- so they won't be interpreted as lhs... let leader = if writerLiterateHaskell opts - then text . (" > " ++) + then " > " else if stPlain st - then text . (" " ++) - else text . ("> " ++) + then " " + else "> " contents <- blockListToMarkdown opts blocks - return $ (vcat $ map leader $ lines $ render contents) <> - text "\n" + return $ (prefixed leader contents) <> blankline blockToMarkdown opts (Table caption aligns widths headers rows) = do caption' <- inlineListToMarkdown opts caption let caption'' = if null caption then empty - else text "" $+$ (text ": " <> caption') + else blankline <> ": " <> caption' <> blankline headers' <- mapM (blockListToMarkdown opts) headers let alignHeader alignment = case alignment of - AlignLeft -> leftAlignBlock - AlignCenter -> centerAlignBlock - AlignRight -> rightAlignBlock - AlignDefault -> leftAlignBlock + AlignLeft -> lblock + AlignCenter -> cblock + AlignRight -> rblock + AlignDefault -> lblock rawRows <- mapM (mapM (blockListToMarkdown opts)) rows let isSimple = all (==0) widths - let numChars = maximum . map (length . render) + let numChars = maximum . map offset let widthsInChars = if isSimple then map ((+2) . numChars) $ transpose (headers' : rawRows) - else map (floor . (78 *)) widths - let makeRow = hsepBlocks . (zipWith alignHeader aligns) . - (zipWith docToBlock widthsInChars) + else map (floor . (fromIntegral (writerColumns opts) *)) widths + let makeRow = hcat . intersperse (lblock 1 (text " ")) . + (zipWith3 alignHeader aligns widthsInChars) let rows' = map makeRow rawRows let head' = makeRow headers' - let maxRowHeight = maximum $ map heightOfBlock (head':rows') - let underline = hsep $ - map (\width -> text $ replicate width '-') widthsInChars + let maxRowHeight = maximum $ map height (head':rows') + let underline = cat $ intersperse (text " ") $ + map (\width -> text (replicate width '-')) widthsInChars let border = if maxRowHeight > 1 - then text $ replicate (sum widthsInChars + (length widthsInChars - 1)) '-' + then text (replicate (sum widthsInChars + + length widthsInChars - 1) '-') else if all null headers then underline else empty let head'' = if all null headers then empty - else border $+$ blockToDoc head' - let spacer = if maxRowHeight > 1 - then text "" - else empty - let body = vcat $ intersperse spacer $ map blockToDoc rows' + else border <> cr <> head' + let body = if maxRowHeight > 1 + then vsep rows' + else vcat rows' let bottom = if all null headers then underline else border - return $ (nest 2 $ head'' $+$ underline $+$ body $+$ - bottom $+$ caption'') <> text "\n" + return $ nest 2 $ head'' $$ underline $$ body $$ + bottom $$ blankline $$ caption'' $$ blankline blockToMarkdown opts (BulletList items) = do contents <- mapM (bulletListItemToMarkdown opts) items - return $ (vcat contents) <> text "\n" + return $ cat contents <> blankline blockToMarkdown opts (OrderedList attribs items) = do let markers = orderedListMarkers attribs let markers' = map (\m -> if length m < 3 then m ++ replicate (3 - length m) ' ' - else m) markers + else m) markers contents <- mapM (\(item, num) -> orderedListItemToMarkdown opts item num) $ - zip markers' items - return $ (vcat contents) <> text "\n" + zip markers' items + return $ cat contents <> blankline blockToMarkdown opts (DefinitionList items) = do contents <- mapM (definitionListItemToMarkdown opts) items - return $ (vcat contents) <> text "\n" + return $ cat contents <> blankline -- | Convert bullet list item (list of blocks) to markdown. bulletListItemToMarkdown :: WriterOptions -> [Block] -> State WriterState Doc bulletListItemToMarkdown opts items = do contents <- blockListToMarkdown opts items - return $ hang' (text "- ") (writerTabStop opts) contents + let sps = replicate (writerTabStop opts - 2) ' ' + let start = text ('-' : ' ' : sps) + return $ hang (writerTabStop opts) start $ contents <> cr -- | Convert ordered list item (a list of blocks) to markdown. orderedListItemToMarkdown :: WriterOptions -- ^ options @@ -305,8 +339,11 @@ orderedListItemToMarkdown :: WriterOptions -- ^ options -> State WriterState Doc orderedListItemToMarkdown opts marker items = do contents <- blockListToMarkdown opts items - return $ hsep [nest (min (3 - length marker) 0) (text marker), - nest (writerTabStop opts) contents] + let sps = case length marker - writerTabStop opts of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + let start = text marker <> sps + return $ hang (writerTabStop opts) start $ contents <> cr -- | Convert definition list item (label, list of blocks) to markdown. definitionListItemToMarkdown :: WriterOptions @@ -316,17 +353,20 @@ definitionListItemToMarkdown opts (label, defs) = do labelText <- inlineListToMarkdown opts label let tabStop = writerTabStop opts st <- get - let leader = if stPlain st then empty else text " ~" - contents <- liftM vcat $ - mapM (liftM ((leader $$) . nest tabStop . vcat) . mapM (blockToMarkdown opts)) defs - return $ labelText $+$ contents + let leader = if stPlain st then " " else " ~" + let sps = case writerTabStop opts - 3 of + n | n > 0 -> text $ replicate n ' ' + _ -> text " " + defs' <- mapM (mapM (blockToMarkdown opts)) defs + let contents = vcat $ map (\d -> hang tabStop (leader <> sps) $ vcat d <> cr) defs' + return $ labelText <> cr <> contents <> cr -- | Convert list of Pandoc block elements to markdown. blockListToMarkdown :: WriterOptions -- ^ Options -> [Block] -- ^ List of block elements -> State WriterState Doc blockListToMarkdown opts blocks = - mapM (blockToMarkdown opts) blocks >>= return . vcat + mapM (blockToMarkdown opts) blocks >>= return . cat -- | Get reference for target; if none exists, create unique one and return. -- Prefer label if possible; otherwise, generate a unique key. @@ -349,86 +389,132 @@ getReference label (src, tit) = do -- | Convert list of Pandoc inline elements to markdown. inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc inlineListToMarkdown opts lst = - mapM (inlineToMarkdown opts) lst >>= return . hcat + mapM (inlineToMarkdown opts) lst >>= return . cat + +escapeSpaces :: Inline -> Inline +escapeSpaces (Str s) = Str $ substitute " " "\\ " s +escapeSpaces Space = Str "\\ " +escapeSpaces x = x -- | Convert Pandoc inline element to markdown. inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc inlineToMarkdown opts (Emph lst) = do contents <- inlineListToMarkdown opts lst - return $ char '*' <> contents <> char '*' + return $ "*" <> contents <> "*" inlineToMarkdown opts (Strong lst) = do contents <- inlineListToMarkdown opts lst - return $ text "**" <> contents <> text "**" + return $ "**" <> contents <> "**" inlineToMarkdown opts (Strikeout lst) = do contents <- inlineListToMarkdown opts lst - return $ text "~~" <> contents <> text "~~" + return $ "~~" <> contents <> "~~" inlineToMarkdown opts (Superscript lst) = do - contents <- inlineListToMarkdown opts lst - let contents' = text $ substitute " " "\\ " $ render contents - return $ char '^' <> contents' <> char '^' + let lst' = bottomUp escapeSpaces lst + contents <- inlineListToMarkdown opts lst' + return $ "^" <> contents <> "^" inlineToMarkdown opts (Subscript lst) = do - contents <- inlineListToMarkdown opts lst - let contents' = text $ substitute " " "\\ " $ render contents - return $ char '~' <> contents' <> char '~' + let lst' = bottomUp escapeSpaces lst + contents <- inlineListToMarkdown opts lst' + return $ "~" <> contents <> "~" inlineToMarkdown opts (SmallCaps lst) = inlineListToMarkdown opts lst inlineToMarkdown opts (Quoted SingleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ char '‘' <> contents <> char '’' + return $ "‘" <> contents <> "’" inlineToMarkdown opts (Quoted DoubleQuote lst) = do contents <- inlineListToMarkdown opts lst - return $ char '“' <> contents <> char '”' -inlineToMarkdown _ EmDash = return $ char '\8212' -inlineToMarkdown _ EnDash = return $ char '\8211' -inlineToMarkdown _ Apostrophe = return $ char '\8217' -inlineToMarkdown _ Ellipses = return $ char '\8230' -inlineToMarkdown _ (Code str) = + return $ "“" <> contents <> "”" +inlineToMarkdown _ EmDash = return "\8212" +inlineToMarkdown _ EnDash = return "\8211" +inlineToMarkdown _ Apostrophe = return "\8217" +inlineToMarkdown _ Ellipses = return "\8230" +inlineToMarkdown opts (Code attr str) = let tickGroups = filter (\s -> '`' `elem` s) $ group str longest = if null tickGroups then 0 else maximum $ map length tickGroups marker = replicate (longest + 1) '`' - spacer = if (longest == 0) then "" else " " in - return $ text (marker ++ spacer ++ str ++ spacer ++ marker) + spacer = if (longest == 0) then "" else " " + attrs = if writerStrictMarkdown opts || attr == nullAttr + then empty + else attrsToMarkdown attr + in return $ text (marker ++ spacer ++ str ++ spacer ++ marker) <> attrs inlineToMarkdown _ (Str str) = do st <- get if stPlain st then return $ text str else return $ text $ escapeString str -inlineToMarkdown _ (Math InlineMath str) = return $ char '$' <> text str <> char '$' -inlineToMarkdown _ (Math DisplayMath str) = return $ text "$$" <> text str <> text "$$" -inlineToMarkdown _ (TeX str) = return $ text str -inlineToMarkdown _ (HtmlInline str) = return $ text str -inlineToMarkdown _ (LineBreak) = return $ text " \n" -inlineToMarkdown _ Space = return $ char ' ' -inlineToMarkdown opts (Cite _ cits) = inlineListToMarkdown opts cits +inlineToMarkdown _ (Math InlineMath str) = + return $ "$" <> text str <> "$" +inlineToMarkdown _ (Math DisplayMath str) = + return $ "$$" <> text str <> "$$" +inlineToMarkdown _ (RawInline f str) + | f == "html" || f == "latex" || f == "tex" || f == "markdown" = + return $ text str +inlineToMarkdown _ (RawInline _ _) = return empty +inlineToMarkdown opts (LineBreak) = return $ + if writerStrictMarkdown opts + then " " <> cr + else "\\" <> cr +inlineToMarkdown _ Space = return space +inlineToMarkdown opts (Cite (c:cs) lst) + | writerCiteMethod opts == Citeproc = inlineListToMarkdown opts lst + | citationMode c == AuthorInText = do + suffs <- inlineListToMarkdown opts $ citationSuffix c + rest <- mapM convertOne cs + let inbr = suffs <+> joincits rest + br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' + return $ text ("@" ++ citationId c) <+> br + | otherwise = do + cits <- mapM convertOne (c:cs) + return $ text "[" <> joincits cits <> text "]" + where + joincits = hcat . intersperse (text "; ") . filter (not . isEmpty) + convertOne Citation { citationId = k + , citationPrefix = pinlines + , citationSuffix = sinlines + , citationMode = m } + = do + pdoc <- inlineListToMarkdown opts pinlines + sdoc <- inlineListToMarkdown opts sinlines + let k' = text (modekey m ++ "@" ++ k) + r = case sinlines of + Str (y:_):_ | y `elem` ",;]@" -> k' <> sdoc + _ -> k' <+> sdoc + return $ pdoc <+> r + modekey SuppressAuthor = "-" + modekey _ = "" +inlineToMarkdown _ (Cite _ _) = return $ text "" inlineToMarkdown opts (Link txt (src', tit)) = do linktext <- inlineListToMarkdown opts txt - let linktitle = if null tit then empty else text $ " \"" ++ tit ++ "\"" + let linktitle = if null tit + then empty + else text $ " \"" ++ tit ++ "\"" let src = unescapeURI src' let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src let useRefLinks = writerReferenceLinks opts - let useAuto = null tit && txt == [Code srcSuffix] + let useAuto = case (tit,txt) of + ("", [Code _ s]) | s == srcSuffix -> True + _ -> False ref <- if useRefLinks then getReference txt (src, tit) else return [] reftext <- inlineListToMarkdown opts ref return $ if useAuto - then char '<' <> text srcSuffix <> char '>' + then "<" <> text srcSuffix <> ">" else if useRefLinks - then let first = char '[' <> linktext <> char ']' + then let first = "[" <> linktext <> "]" second = if txt == ref - then text "[]" - else char '[' <> reftext <> char ']' + then "[]" + else "[" <> reftext <> "]" in first <> second - else char '[' <> linktext <> char ']' <> - char '(' <> text src <> linktitle <> char ')' + else "[" <> linktext <> "](" <> + text src <> linktitle <> ")" inlineToMarkdown opts (Image alternate (source, tit)) = do let txt = if (null alternate) || (alternate == [Str ""]) || (alternate == [Str source]) -- to prevent autolinks then [Str "image"] else alternate - linkPart <- inlineToMarkdown opts (Link txt (unescapeURI source, tit)) - return $ char '!' <> linkPart + linkPart <- inlineToMarkdown opts (Link txt (source, tit)) + return $ "!" <> linkPart inlineToMarkdown _ (Note contents) = do modify (\st -> st{ stNotes = contents : stNotes st }) st <- get let ref = show $ (length $ stNotes st) - return $ text "[^" <> text ref <> char ']' + return $ "[^" <> text ref <> "]" diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index e8cb33caf..a7c7fc482 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -96,7 +96,9 @@ blockToMediaWiki opts (Para inlines) = do then "<p>" ++ contents ++ "</p>" else contents ++ if null listLevel then "\n" else "" -blockToMediaWiki _ (RawHtml str) = return str +blockToMediaWiki _ (RawBlock "mediawiki" str) = return str +blockToMediaWiki _ (RawBlock "html" str) = return str +blockToMediaWiki _ (RawBlock _ _) = return "" blockToMediaWiki _ HorizontalRule = return "\n-----\n" @@ -360,7 +362,7 @@ inlineToMediaWiki _ Apostrophe = return "’" inlineToMediaWiki _ Ellipses = return "…" -inlineToMediaWiki _ (Code str) = +inlineToMediaWiki _ (Code _ str) = return $ "<tt>" ++ (escapeString str) ++ "</tt>" inlineToMediaWiki _ (Str str) = return $ escapeString str @@ -368,9 +370,9 @@ inlineToMediaWiki _ (Str str) = return $ escapeString str inlineToMediaWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" -- note: str should NOT be escaped -inlineToMediaWiki _ (TeX _) = return "" - -inlineToMediaWiki _ (HtmlInline str) = return str +inlineToMediaWiki _ (RawInline "mediawiki" str) = return str +inlineToMediaWiki _ (RawInline "html" str) = return str +inlineToMediaWiki _ (RawInline _ _) = return "" inlineToMediaWiki _ (LineBreak) = return "<br />\n" @@ -378,12 +380,12 @@ inlineToMediaWiki _ Space = return " " inlineToMediaWiki opts (Link txt (src, _)) = do label <- inlineListToMediaWiki opts txt - if txt == [Code src] -- autolink - then return src - else if isURI src - then return $ "[" ++ src ++ " " ++ label ++ "]" - else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" - where src' = case src of + case txt of + [Code _ s] | s == src -> return src + _ -> if isURI src + then return $ "[" ++ src ++ " " ++ label ++ "]" + else return $ "[[" ++ src' ++ "|" ++ label ++ "]]" + where src' = case src of '/':xs -> xs -- with leading / it's a _ -> src -- link to a help page inlineToMediaWiki opts (Image alt (source, tit)) = do diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 3b5ea7481..d2b56cd17 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -25,62 +26,53 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Stability : alpha Portability : portable -Utility functions and definitions used by the various Pandoc modules. +Conversion of a 'Pandoc' document to a string representation. + +Note: If @writerStandalone@ is @False@, only the document body +is represented; otherwise, the full 'Pandoc' document, including the +metadata. -} module Text.Pandoc.Writers.Native ( writeNative ) where -import Text.Pandoc.Shared ( WriterOptions ) -import Data.List ( intercalate ) +import Text.Pandoc.Shared ( WriterOptions(..) ) +import Data.List ( intersperse ) import Text.Pandoc.Definition +import Text.Pandoc.Pretty --- | Indent string as a block. -indentBy :: Int -- ^ Number of spaces to indent the block - -> Int -- ^ Number of spaces (rel to block) to indent first line - -> String -- ^ Contents of block to indent - -> String -indentBy _ _ [] = "" -indentBy num first str = - let (firstLine:restLines) = lines str - firstLineIndent = num + first - in (replicate firstLineIndent ' ') ++ firstLine ++ "\n" ++ - (intercalate "\n" $ map ((replicate num ' ') ++ ) restLines) - --- | Prettyprint list of Pandoc blocks elements. -prettyBlockList :: Int -- ^ Number of spaces to indent list of blocks - -> [Block] -- ^ List of blocks - -> String -prettyBlockList indent [] = indentBy indent 0 "[]" -prettyBlockList indent blocks = indentBy indent (-2) $ "[ " ++ - (intercalate "\n, " (map prettyBlock blocks)) ++ " ]" +prettyList :: [Doc] -> Doc +prettyList ds = + "[" <> (cat $ intersperse (cr <> ",") $ map (nest 1) ds) <> "]" -- | Prettyprint Pandoc block element. -prettyBlock :: Block -> String -prettyBlock (BlockQuote blocks) = "BlockQuote\n " ++ - (prettyBlockList 2 blocks) +prettyBlock :: Block -> Doc +prettyBlock (BlockQuote blocks) = + "BlockQuote" $$ prettyList (map prettyBlock blocks) prettyBlock (OrderedList attribs blockLists) = - "OrderedList " ++ show attribs ++ "\n" ++ indentBy 2 0 ("[ " ++ - (intercalate ", " $ map (\blocks -> prettyBlockList 2 blocks) - blockLists)) ++ " ]" -prettyBlock (BulletList blockLists) = "BulletList\n" ++ - indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) blockLists))) ++ " ]" -prettyBlock (DefinitionList items) = "DefinitionList\n" ++ - indentBy 2 0 ("[ " ++ (intercalate "\n, " - (map (\(term, defs) -> "(" ++ show term ++ ",\n" ++ - indentBy 3 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) defs)) ++ "]") ++ - ")") items))) ++ " ]" + "OrderedList" <> space <> text (show attribs) $$ + (prettyList $ map (prettyList . map prettyBlock) blockLists) +prettyBlock (BulletList blockLists) = + "BulletList" $$ + (prettyList $ map (prettyList . map prettyBlock) blockLists) +prettyBlock (DefinitionList items) = "DefinitionList" $$ + (prettyList $ map deflistitem items) + where deflistitem (term, defs) = "(" <> text (show term) <> "," <> cr <> + nest 1 (prettyList $ map (prettyList . map prettyBlock) defs) <> ")" prettyBlock (Table caption aligns widths header rows) = - "Table " ++ show caption ++ " " ++ show aligns ++ " " ++ - show widths ++ "\n" ++ prettyRow header ++ " [\n" ++ - (intercalate ",\n" (map prettyRow rows)) ++ " ]" - where prettyRow cols = indentBy 2 0 ("[ " ++ (intercalate ", " - (map (\blocks -> prettyBlockList 2 blocks) - cols))) ++ " ]" -prettyBlock block = show block + "Table " <> text (show caption) <> " " <> text (show aligns) <> " " <> + text (show widths) $$ + prettyRow header $$ + prettyList (map prettyRow rows) + where prettyRow cols = prettyList (map (prettyList . map prettyBlock) cols) +prettyBlock block = text $ show block -- | Prettyprint Pandoc document. writeNative :: WriterOptions -> Pandoc -> String -writeNative _ (Pandoc meta blocks) = "Pandoc " ++ "(" ++ show meta ++ - ")\n" ++ (prettyBlockList 0 blocks) ++ "\n" - +writeNative opts (Pandoc meta blocks) = + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + withHead = if writerStandalone opts + then \bs -> text ("Pandoc " ++ "(" ++ show meta ++ ")") $$ + bs $$ cr + else id + in render colwidth $ withHead $ prettyList $ map prettyBlock blocks diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 5aa0fd310..cf1be8755 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -37,6 +37,7 @@ import System.Time import Paths_pandoc ( getDataFileName ) import Text.Pandoc.Shared ( WriterOptions(..) ) import Text.Pandoc.Definition +import Text.Pandoc.Generic import Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) import System.Directory import Control.Monad (liftM) @@ -63,8 +64,8 @@ writeODT mbRefOdt opts doc = do -- handle pictures picEntriesRef <- newIORef ([] :: [Entry]) let sourceDir = writerSourceDirectory opts - doc' <- processWithM (transformPic sourceDir picEntriesRef) doc - let newContents = writeOpenDocument opts doc' + doc' <- bottomUpM (transformPic sourceDir picEntriesRef) doc + let newContents = writeOpenDocument opts{writerWrapText = False} doc' (TOD epochtime _) <- getClockTime let contentEntry = toEntry "content.xml" epochtime $ fromString newContents picEntries <- readIORef picEntriesRef diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 4e3979c07..b9444aac7 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Shared import Text.Pandoc.XML import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Readers.TeXMath -import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Pandoc.Pretty import Text.Printf ( printf ) import Control.Applicative ( (<$>) ) import Control.Arrow ( (***), (>>>) ) @@ -112,7 +112,9 @@ setInDefinitionList :: Bool -> State WriterState () setInDefinitionList b = modify $ \s -> s { stInDefinition = b } inParagraphTags :: Doc -> Doc -inParagraphTags = inTags False "text:p" [("text:style-name", "Text_20_body")] +inParagraphTags d | isEmpty d = empty +inParagraphTags d = + inTags False "text:p" [("text:style-name", "Text_20_body")] d inParagraphTagsWithStyle :: String -> Doc -> Doc inParagraphTagsWithStyle sty = inTags False "text:p" [("text:style-name", sty)] @@ -167,7 +169,11 @@ writeOpenDocument opts (Pandoc (Meta title authors date) blocks) = date'' <- inlinesToOpenDocument opts date doc'' <- blocksToOpenDocument opts blocks return (doc'', title'', authors'', date'') - body' = render doc + colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + render' = render colwidth + body' = render' doc styles = stTableStyles s ++ stParaStyles s ++ stTextStyles s listStyle (n,l) = inTags True "text:list-style" [("style:name", "L" ++ show n)] (vcat l) @@ -176,10 +182,10 @@ writeOpenDocument opts (Pandoc (Meta title authors date) blocks) = reverse $ styles ++ listStyles context = writerVariables opts ++ [ ("body", body') - , ("automatic-styles", render automaticStyles) - , ("title", render title') - , ("date", render date') ] ++ - [ ("author", render a) | a <- authors' ] + , ("automatic-styles", render' automaticStyles) + , ("title", render' title') + , ("date", render' date') ] ++ + [ ("author", render' a) | a <- authors' ] in if writerStandalone opts then renderTemplate context $ writerTemplate opts else body' @@ -273,7 +279,7 @@ blockToOpenDocument o bs | Header i b <- bs = inHeaderTags i <$> inlinesToOpenDocument o b | BlockQuote b <- bs = mkBlockQuote b | CodeBlock _ s <- bs = preformatted s - | RawHtml _ <- bs = return empty + | RawBlock _ _ <- bs = return empty | DefinitionList b <- bs = defList b | BulletList b <- bs = bulletListToOpenDocument o b | OrderedList a b <- bs = orderedList a b @@ -286,7 +292,7 @@ blockToOpenDocument o bs r <- vcat <$> mapM (deflistItemToOpenDocument o) b setInDefinitionList False return r - preformatted s = vcat <$> mapM (inPreformattedTags . escapeStringForXML) (lines s) + preformatted s = (flush . vcat) <$> mapM (inPreformattedTags . escapeStringForXML) (lines s) mkBlockQuote b = do increaseIndent i <- paraStyle "Quotations" [] inBlockQuote o i (map plainToPara b) @@ -346,7 +352,7 @@ inlineToOpenDocument o ils | EmDash <- ils = inTextStyle $ text "—" | EnDash <- ils = inTextStyle $ text "–" | Apostrophe <- ils = inTextStyle $ text "’" - | Space <- ils = inTextStyle $ char ' ' + | Space <- ils = inTextStyle space | LineBreak <- ils = return $ selfClosingTag "text:line-break" [] | Str s <- ils = inTextStyle $ handleSpaces $ escapeStringForXML s | Emph l <- ils = withTextStyle Italic $ inlinesToOpenDocument o l @@ -356,11 +362,12 @@ inlineToOpenDocument o ils | Subscript l <- ils = withTextStyle Sub $ inlinesToOpenDocument o l | SmallCaps l <- ils = withTextStyle SmallC $ inlinesToOpenDocument o l | Quoted t l <- ils = inQuotes t <$> inlinesToOpenDocument o l - | Code s <- ils = preformatted s + | Code _ s <- ils = preformatted s | Math _ s <- ils = inlinesToOpenDocument o (readTeXMath s) | Cite _ l <- ils = inlinesToOpenDocument o l - | TeX s <- ils = preformatted s - | HtmlInline s <- ils = preformatted s + | RawInline "opendocument" s <- ils = preformatted s + | RawInline "html" s <- ils = preformatted s -- for backwards compat. + | RawInline _ _ <- ils = return empty | Link l (s,t) <- ils = mkLink s t <$> inlinesToOpenDocument o l | Image _ (s,_) <- ils = return $ mkImg s | Note l <- ils = mkNote l diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs new file mode 100644 index 000000000..f7f314428 --- /dev/null +++ b/src/Text/Pandoc/Writers/Org.hs @@ -0,0 +1,284 @@ +{-# LANGUAGE OverloadedStrings #-} +{- +Copyright (C) 2006-2010 Puneeth Chaganti <punchagan@gmail.com> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Org + Copyright : Copyright (C) 2010 Puneeth Chaganti + License : GNU GPL, version 2 or above + + Maintainer : Puneeth Chaganti <punchagan@gmail.com> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to Emacs Org-Mode. + +Org-Mode: <http://orgmode.org> +-} +module Text.Pandoc.Writers.Org ( writeOrg) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Pretty +import Text.Pandoc.Templates (renderTemplate) +import Data.List ( intersect, intersperse, transpose ) +import Control.Monad.State +import Control.Applicative ( (<$>) ) + +data WriterState = + WriterState { stNotes :: [[Block]] + , stLinks :: Bool + , stImages :: Bool + , stHasMath :: Bool + , stOptions :: WriterOptions + } + +-- | Convert Pandoc to Org. +writeOrg :: WriterOptions -> Pandoc -> String +writeOrg opts document = + let st = WriterState { stNotes = [], stLinks = False, + stImages = False, stHasMath = False, + stOptions = opts } + in evalState (pandocToOrg document) st + +-- | Return Org representation of document. +pandocToOrg :: Pandoc -> State WriterState String +pandocToOrg (Pandoc (Meta tit auth dat) blocks) = do + opts <- liftM stOptions get + title <- titleToOrg tit + authors <- mapM inlineListToOrg auth + date <- inlineListToOrg dat + body <- blockListToOrg blocks + notes <- liftM (reverse . stNotes) get >>= notesToOrg + -- note that the notes may contain refs, so we do them first + hasMath <- liftM stHasMath get + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let main = render colwidth $ foldl ($+$) empty $ [body, notes] + let context = writerVariables opts ++ + [ ("body", main) + , ("title", render Nothing title) + , ("date", render Nothing date) ] ++ + [ ("math", "yes") | hasMath ] ++ + [ ("author", render Nothing a) | a <- authors ] + if writerStandalone opts + then return $ renderTemplate context $ writerTemplate opts + else return main + +-- | Return Org representation of notes. +notesToOrg :: [[Block]] -> State WriterState Doc +notesToOrg notes = + mapM (\(num, note) -> noteToOrg num note) (zip [1..] notes) >>= + return . vsep + +-- | Return Org representation of a note. +noteToOrg :: Int -> [Block] -> State WriterState Doc +noteToOrg num note = do + contents <- blockListToOrg note + let marker = "[" ++ show num ++ "] " + return $ hang (length marker) (text marker) contents + +-- | Escape special characters for Org. +escapeString :: String -> String +escapeString = escapeStringUsing (backslashEscapes "^_") + +titleToOrg :: [Inline] -> State WriterState Doc +titleToOrg [] = return empty +titleToOrg lst = do + contents <- inlineListToOrg lst + return $ "#+TITLE: " <> contents + +-- | Convert Pandoc block element to Org. +blockToOrg :: Block -- ^ Block element + -> State WriterState Doc +blockToOrg Null = return empty +blockToOrg (Plain inlines) = inlineListToOrg inlines +blockToOrg (Para [Image txt (src,tit)]) = do + capt <- inlineListToOrg txt + img <- inlineToOrg (Image txt (src,tit)) + return $ "#+CAPTION: " <> capt <> blankline <> img +blockToOrg (Para inlines) = do + contents <- inlineListToOrg inlines + return $ contents <> blankline +blockToOrg (RawBlock "html" str) = + return $ blankline $$ "#+BEGIN_HTML" $$ + nest 2 (text str) $$ "#+END_HTML" $$ blankline +blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = + return $ text str +blockToOrg (RawBlock _ _) = return empty +blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline +blockToOrg (Header level inlines) = do + contents <- inlineListToOrg inlines + let headerStr = text $ if level > 999 then " " else replicate level '*' + return $ headerStr <> " " <> contents <> blankline +blockToOrg (CodeBlock (_,classes,_) str) = do + opts <- stOptions <$> get + let tabstop = writerTabStop opts + let at = classes `intersect` ["asymptote", "C", "clojure", "css", "ditaa", + "dot", "emacs-lisp", "gnuplot", "haskell", "js", "latex", + "ledger", "lisp", "matlab", "mscgen", "ocaml", "octave", + "oz", "perl", "plantuml", "python", "R", "ruby", "sass", + "scheme", "screen", "sh", "sql", "sqlite"] + let (beg, end) = if null at + then ("#+BEGIN_EXAMPLE", "#+END_EXAMPLE") + else ("#+BEGIN_SRC" ++ head at, "#+END_SRC") + return $ text beg $$ nest tabstop (text str) $$ text end $$ blankline +blockToOrg (BlockQuote blocks) = do + contents <- blockListToOrg blocks + return $ blankline $$ "#+BEGIN_QUOTE" $$ + nest 2 contents $$ "#+END_QUOTE" $$ blankline +blockToOrg (Table caption' _ _ headers rows) = do + caption'' <- inlineListToOrg caption' + let caption = if null caption' + then empty + else ("#+CAPTION: " <> caption'') + headers' <- mapM blockListToOrg headers + rawRows <- mapM (mapM blockListToOrg) rows + let numChars = maximum . map offset + -- FIXME: width is not being used. + let widthsInChars = + map ((+2) . numChars) $ transpose (headers' : rawRows) + -- FIXME: Org doesn't allow blocks with height more than 1. + let hpipeBlocks blocks = hcat [beg, middle, end] + where h = maximum (map height blocks) + sep' = lblock 3 $ vcat (map text $ replicate h " | ") + beg = lblock 2 $ vcat (map text $ replicate h "| ") + end = lblock 2 $ vcat (map text $ replicate h " |") + middle = hcat $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith lblock widthsInChars + let head' = makeRow headers' + rows' <- mapM (\row -> do cols <- mapM blockListToOrg row + return $ makeRow cols) rows + let border ch = char '|' <> char ch <> + (hcat $ intersperse (char ch <> char '+' <> char ch) $ + map (\l -> text $ replicate l ch) widthsInChars) <> + char ch <> char '|' + let body = vcat rows' + let head'' = if all null headers + then empty + else head' $$ border '-' + return $ head'' $$ body $$ caption $$ blankline +blockToOrg (BulletList items) = do + contents <- mapM bulletListItemToOrg items + -- ensure that sublists have preceding blank line + return $ blankline $+$ vcat contents $$ blankline +blockToOrg (OrderedList (start, _, delim) items) = do + let delim' = case delim of + TwoParens -> OneParen + x -> x + let markers = take (length items) $ orderedListMarkers + (start, Decimal, delim') + let maxMarkerLength = maximum $ map length markers + let markers' = map (\m -> let s = maxMarkerLength - length m + in m ++ replicate s ' ') markers + contents <- mapM (\(item, num) -> orderedListItemToOrg item num) $ + zip markers' items + -- ensure that sublists have preceding blank line + return $ blankline $$ vcat contents $$ blankline +blockToOrg (DefinitionList items) = do + contents <- mapM definitionListItemToOrg items + return $ vcat contents $$ blankline + +-- | Convert bullet list item (list of blocks) to Org. +bulletListItemToOrg :: [Block] -> State WriterState Doc +bulletListItemToOrg items = do + contents <- blockListToOrg items + return $ hang 3 "- " (contents <> cr) + +-- | Convert ordered list item (a list of blocks) to Org. +orderedListItemToOrg :: String -- ^ marker for list item + -> [Block] -- ^ list item (list of blocks) + -> State WriterState Doc +orderedListItemToOrg marker items = do + contents <- blockListToOrg items + return $ hang (length marker + 1) (text marker <> space) (contents <> cr) + +-- | Convert defintion list item (label, list of blocks) to Org. +definitionListItemToOrg :: ([Inline], [[Block]]) -> State WriterState Doc +definitionListItemToOrg (label, defs) = do + label' <- inlineListToOrg label + contents <- liftM vcat $ mapM blockListToOrg defs + return $ hang 3 "- " $ label' <> " :: " <> (contents <> cr) + +-- | Convert list of Pandoc block elements to Org. +blockListToOrg :: [Block] -- ^ List of block elements + -> State WriterState Doc +blockListToOrg blocks = mapM blockToOrg blocks >>= return . vcat + +-- | Convert list of Pandoc inline elements to Org. +inlineListToOrg :: [Inline] -> State WriterState Doc +inlineListToOrg lst = mapM inlineToOrg lst >>= return . hcat + +-- | Convert Pandoc inline element to Org. +inlineToOrg :: Inline -> State WriterState Doc +inlineToOrg (Emph lst) = do + contents <- inlineListToOrg lst + return $ "/" <> contents <> "/" +inlineToOrg (Strong lst) = do + contents <- inlineListToOrg lst + return $ "*" <> contents <> "*" +inlineToOrg (Strikeout lst) = do + contents <- inlineListToOrg lst + return $ "+" <> contents <> "+" +inlineToOrg (Superscript lst) = do + contents <- inlineListToOrg lst + return $ "^{" <> contents <> "}" +inlineToOrg (Subscript lst) = do + contents <- inlineListToOrg lst + return $ "_{" <> contents <> "}" +inlineToOrg (SmallCaps lst) = inlineListToOrg lst +inlineToOrg (Quoted SingleQuote lst) = do + contents <- inlineListToOrg lst + return $ "'" <> contents <> "'" +inlineToOrg (Quoted DoubleQuote lst) = do + contents <- inlineListToOrg lst + return $ "\"" <> contents <> "\"" +inlineToOrg (Cite _ lst) = inlineListToOrg lst +inlineToOrg EmDash = return "---" +inlineToOrg EnDash = return "--" +inlineToOrg Apostrophe = return "'" +inlineToOrg Ellipses = return "..." +inlineToOrg (Code _ str) = return $ "=" <> text str <> "=" +inlineToOrg (Str str) = return $ text $ escapeString str +inlineToOrg (Math t str) = do + modify $ \st -> st{ stHasMath = True } + return $ if t == InlineMath + then "$" <> text str <> "$" + else "$$" <> text str <> "$$" +inlineToOrg (RawInline f str) | f == "tex" || f == "latex" = return $ text str +inlineToOrg (RawInline _ _) = return empty +inlineToOrg (LineBreak) = return cr -- there's no line break in Org +inlineToOrg Space = return space +inlineToOrg (Link txt (src, _)) = do + case txt of + [Code _ x] | x == src -> -- autolink + do modify $ \s -> s{ stLinks = True } + return $ "[[" <> text x <> "]]" + _ -> do contents <- inlineListToOrg txt + modify $ \s -> s{ stLinks = True } + return $ "[[" <> text src <> "][" <> contents <> "]]" +inlineToOrg (Image _ (source', _)) = do + let source = unescapeURI source' + modify $ \s -> s{ stImages = True } + return $ "[[" <> text source <> "]]" +inlineToOrg (Note contents) = do + -- add to notes in state + notes <- get >>= (return . stNotes) + modify $ \st -> st { stNotes = contents:notes } + let ref = show $ (length notes) + 1 + return $ " [" <> text ref <> "]" diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index e79f97b33..d4adaa929 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu> @@ -32,10 +33,9 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> module Text.Pandoc.Writers.RST ( writeRST) where import Text.Pandoc.Definition import Text.Pandoc.Shared -import Text.Pandoc.Blocks import Text.Pandoc.Templates (renderTemplate) -import Data.List ( isPrefixOf, isSuffixOf, intersperse, transpose ) -import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Data.List ( isPrefixOf, intersperse, transpose ) +import Text.Pandoc.Pretty import Control.Monad.State import Control.Applicative ( (<$>) ) @@ -70,13 +70,16 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do refs <- liftM (reverse . stLinks) get >>= refsToRST pics <- liftM (reverse . stImages) get >>= pictRefsToRST hasMath <- liftM stHasMath get - let main = render $ foldl ($+$) empty $ [body, notes, refs, pics] + let colwidth = if writerWrapText opts + then Just $ writerColumns opts + else Nothing + let main = render colwidth $ foldl ($+$) empty $ [body, notes, refs, pics] let context = writerVariables opts ++ [ ("body", main) - , ("title", render title) - , ("date", render date) ] ++ + , ("title", render Nothing title) + , ("date", render colwidth date) ] ++ [ ("math", "yes") | hasMath ] ++ - [ ("author", render a) | a <- authors ] + [ ("author", render colwidth a) | a <- authors ] if writerStandalone opts then return $ renderTemplate context $ writerTemplate opts else return main @@ -84,49 +87,40 @@ pandocToRST (Pandoc (Meta tit auth dat) blocks) = do -- | Return RST representation of reference key table. refsToRST :: Refs -> State WriterState Doc refsToRST refs = mapM keyToRST refs >>= return . vcat - + -- | Return RST representation of a reference key. keyToRST :: ([Inline], (String, String)) -> State WriterState Doc keyToRST (label, (src, _)) = do label' <- inlineListToRST label - let label'' = if ':' `elem` (render label') + let label'' = if ':' `elem` (render Nothing label') then char '`' <> label' <> char '`' else label' - return $ text ".. _" <> label'' <> text ": " <> text src + return $ ".. _" <> label'' <> ": " <> text src -- | Return RST representation of notes. notesToRST :: [[Block]] -> State WriterState Doc notesToRST notes = - mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= - return . vcat + mapM (\(num, note) -> noteToRST num note) (zip [1..] notes) >>= + return . vsep -- | Return RST representation of a note. noteToRST :: Int -> [Block] -> State WriterState Doc noteToRST num note = do contents <- blockListToRST note - let marker = text ".. [" <> text (show num) <> text "]" + let marker = ".. [" <> text (show num) <> "]" return $ marker $$ nest 3 contents -- | Return RST representation of picture reference table. pictRefsToRST :: Refs -> State WriterState Doc pictRefsToRST refs = mapM pictToRST refs >>= return . vcat - + -- | Return RST representation of a picture substitution reference. pictToRST :: ([Inline], (String, String)) -> State WriterState Doc pictToRST (label, (src, _)) = do label' <- inlineListToRST label - return $ text ".. " <> char '|' <> label' <> char '|' <> text " image:: " <> - text src - --- | Take list of inline elements and return wrapped doc. -wrappedRST :: WriterOptions -> [Inline] -> State WriterState Doc -wrappedRST opts inlines = do - lineBreakDoc <- inlineToRST LineBreak - chunks <- mapM (wrapIfNeeded opts inlineListToRST) - (splitBy LineBreak inlines) - return $ vcat $ intersperse lineBreakDoc chunks + return $ ".. |" <> label' <> "| image:: " <> text src -- | Escape special characters for RST. escapeString :: String -> String @@ -136,69 +130,66 @@ titleToRST :: [Inline] -> State WriterState Doc titleToRST [] = return empty titleToRST lst = do contents <- inlineListToRST lst - let titleLength = length $ render contents + let titleLength = length $ (render Nothing contents :: String) let border = text (replicate titleLength '=') - return $ border $+$ contents $+$ border + return $ border $$ contents $$ border -- | Convert Pandoc block element to RST. blockToRST :: Block -- ^ Block element -> State WriterState Doc blockToRST Null = return empty -blockToRST (Plain inlines) = do - opts <- get >>= (return . stOptions) - wrappedRST opts inlines +blockToRST (Plain inlines) = inlineListToRST inlines blockToRST (Para [Image txt (src,tit)]) = do capt <- inlineListToRST txt - let fig = text "figure:: " <> text src - let align = text ":align: center" - let alt = text ":alt: " <> if null tit then capt else text tit - return $ (text ".. " <> (fig $$ align $$ alt $$ text "" $$ capt)) $$ text "" + let fig = "figure:: " <> text src + let align = ":align: center" + let alt = ":alt: " <> if null tit then capt else text tit + return $ hang 3 ".. " $ fig $$ align $$ alt $+$ capt $$ blankline blockToRST (Para inlines) = do - opts <- get >>= (return . stOptions) - contents <- wrappedRST opts inlines - return $ contents <> text "\n" -blockToRST (RawHtml str) = - let str' = if "\n" `isSuffixOf` str then str ++ "\n" else str ++ "\n\n" in - return $ (text "\n.. raw:: html\n") $$ (nest 3 $ vcat $ map text (lines str')) -blockToRST HorizontalRule = return $ text "--------------\n" + contents <- inlineListToRST inlines + return $ contents <> blankline +blockToRST (RawBlock f str) = + return $ blankline <> ".. raw:: " <> text f $+$ + (nest 3 $ text str) $$ blankline +blockToRST HorizontalRule = + return $ blankline $$ "--------------" $$ blankline blockToRST (Header level inlines) = do contents <- inlineListToRST inlines - let headerLength = length $ render contents let headerChar = if level > 5 then ' ' else "=-~^'" !! (level - 1) - let border = text $ replicate headerLength headerChar - return $ contents $+$ border <> text "\n" + let border = text $ replicate (offset contents) headerChar + return $ contents $$ border $$ blankline blockToRST (CodeBlock (_,classes,_) str) = do opts <- stOptions <$> get let tabstop = writerTabStop opts if "haskell" `elem` classes && "literate" `elem` classes && writerLiterateHaskell opts - then return $ (vcat $ map (text "> " <>) $ map text (lines str)) <> text "\n" - else return $ (text "::\n") $+$ - (nest tabstop $ vcat $ map text (lines str)) <> text "\n" + then return $ prefixed "> " (text str) $$ blankline + else return $ "::" $+$ nest tabstop (text str) $$ blankline blockToRST (BlockQuote blocks) = do tabstop <- get >>= (return . writerTabStop . stOptions) contents <- blockListToRST blocks - return $ (nest tabstop contents) <> text "\n" + return $ (nest tabstop contents) <> blankline blockToRST (Table caption _ widths headers rows) = do caption' <- inlineListToRST caption let caption'' = if null caption then empty - else text "" $+$ (text "Table: " <> caption') + else blankline <> text "Table: " <> caption' headers' <- mapM blockListToRST headers rawRows <- mapM (mapM blockListToRST) rows let isSimple = all (==0) widths && all (all (\bs -> length bs == 1)) rows - let numChars = maximum . map (length . render) + let numChars = maximum . map offset + opts <- get >>= return . stOptions let widthsInChars = if isSimple then map ((+2) . numChars) $ transpose (headers' : rawRows) - else map (floor . (78 *)) widths - let hpipeBlocks blocks = hcatBlocks [beg, middle, end] - where height = maximum (map heightOfBlock blocks) - sep' = TextBlock 3 height (replicate height " | ") - beg = TextBlock 2 height (replicate height "| ") - end = TextBlock 2 height (replicate height " |") - middle = hcatBlocks $ intersperse sep' blocks - let makeRow = hpipeBlocks . zipWith docToBlock widthsInChars + else map (floor . (fromIntegral (writerColumns opts) *)) widths + let hpipeBlocks blocks = hcat [beg, middle, end] + where h = maximum (map height blocks) + sep' = lblock 3 $ vcat (map text $ replicate h " | ") + beg = lblock 2 $ vcat (map text $ replicate h "| ") + end = lblock 2 $ vcat (map text $ replicate h " |") + middle = hcat $ intersperse sep' blocks + let makeRow = hpipeBlocks . zipWith lblock widthsInChars let head' = makeRow headers' rows' <- mapM (\row -> do cols <- mapM blockListToRST row return $ makeRow cols) rows @@ -206,15 +197,15 @@ blockToRST (Table caption _ widths headers rows) = do (hcat $ intersperse (char ch <> char '+' <> char ch) $ map (\l -> text $ replicate l ch) widthsInChars) <> char ch <> char '+' - let body = vcat $ intersperse (border '-') $ map blockToDoc rows' + let body = vcat $ intersperse (border '-') rows' let head'' = if all null headers then empty - else blockToDoc head' $+$ border '=' - return $ border '-' $+$ head'' $+$ body $+$ border '-' $$ caption'' $$ text "" + else head' $$ border '=' + return $ border '-' $$ head'' $$ body $$ border '-' $$ caption'' $$ blankline blockToRST (BulletList items) = do contents <- mapM bulletListItemToRST items -- ensure that sublists have preceding blank line - return $ text "" $+$ vcat contents <> text "\n" + return $ blankline $$ vcat contents $$ blankline blockToRST (OrderedList (start, style', delim) items) = do let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim then take (length items) $ repeat "#." @@ -224,18 +215,19 @@ blockToRST (OrderedList (start, style', delim) items) = do let markers' = map (\m -> let s = maxMarkerLength - length m in m ++ replicate s ' ') markers contents <- mapM (\(item, num) -> orderedListItemToRST item num) $ - zip markers' items + zip markers' items -- ensure that sublists have preceding blank line - return $ text "" $+$ vcat contents <> text "\n" + return $ blankline $$ vcat contents $$ blankline blockToRST (DefinitionList items) = do contents <- mapM definitionListItemToRST items - return $ (vcat contents) <> text "\n" + -- ensure that sublists have preceding blank line + return $ blankline $$ vcat contents $$ blankline -- | Convert bullet list item (list of blocks) to RST. bulletListItemToRST :: [Block] -> State WriterState Doc bulletListItemToRST items = do contents <- blockListToRST items - return $ (text "- ") <> contents + return $ hang 3 "- " $ contents <> cr -- | Convert ordered list item (a list of blocks) to RST. orderedListItemToRST :: String -- ^ marker for list item @@ -243,7 +235,8 @@ orderedListItemToRST :: String -- ^ marker for list item -> State WriterState Doc orderedListItemToRST marker items = do contents <- blockListToRST items - return $ (text marker <> char ' ') <> contents + let marker' = marker ++ " " + return $ hang (length marker') (text marker') $ contents <> cr -- | Convert defintion list item (label, list of blocks) to RST. definitionListItemToRST :: ([Inline], [[Block]]) -> State WriterState Doc @@ -251,7 +244,7 @@ definitionListItemToRST (label, defs) = do label' <- inlineListToRST label contents <- liftM vcat $ mapM blockListToRST defs tabstop <- get >>= (return . writerTabStop . stOptions) - return $ label' $+$ nest tabstop contents + return $ label' $$ nest tabstop (contents <> cr) -- | Convert list of Pandoc block elements to RST. blockListToRST :: [Block] -- ^ List of block elements @@ -266,65 +259,63 @@ inlineListToRST lst = mapM inlineToRST lst >>= return . hcat inlineToRST :: Inline -> State WriterState Doc inlineToRST (Emph lst) = do contents <- inlineListToRST lst - return $ char '*' <> contents <> char '*' + return $ "*" <> contents <> "*" inlineToRST (Strong lst) = do contents <- inlineListToRST lst - return $ text "**" <> contents <> text "**" + return $ "**" <> contents <> "**" inlineToRST (Strikeout lst) = do contents <- inlineListToRST lst - return $ text "[STRIKEOUT:" <> contents <> char ']' + return $ "[STRIKEOUT:" <> contents <> "]" inlineToRST (Superscript lst) = do contents <- inlineListToRST lst - return $ text "\\ :sup:`" <> contents <> text "`\\ " + return $ "\\ :sup:`" <> contents <> "`\\ " inlineToRST (Subscript lst) = do contents <- inlineListToRST lst - return $ text "\\ :sub:`" <> contents <> text "`\\ " + return $ "\\ :sub:`" <> contents <> "`\\ " inlineToRST (SmallCaps lst) = inlineListToRST lst inlineToRST (Quoted SingleQuote lst) = do contents <- inlineListToRST lst - return $ char '‘' <> contents <> char '’' + return $ "‘" <> contents <> "’" inlineToRST (Quoted DoubleQuote lst) = do contents <- inlineListToRST lst - return $ char '“' <> contents <> char '”' + return $ "“" <> contents <> "”" inlineToRST (Cite _ lst) = inlineListToRST lst inlineToRST EmDash = return $ char '\8212' inlineToRST EnDash = return $ char '\8211' inlineToRST Apostrophe = return $ char '\8217' inlineToRST Ellipses = return $ char '\8230' -inlineToRST (Code str) = return $ text $ "``" ++ str ++ "``" +inlineToRST (Code _ str) = return $ "``" <> text str <> "``" inlineToRST (Str str) = return $ text $ escapeString str inlineToRST (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then text $ ":math:`$" ++ str ++ "$`" - else text $ ":math:`$$" ++ str ++ "$$`" -inlineToRST (TeX _) = return empty -inlineToRST (HtmlInline _) = return empty -inlineToRST (LineBreak) = do - return $ empty -- there's no line break in RST -inlineToRST Space = return $ char ' ' -inlineToRST (Link [Code str] (src, _)) | src == str || - src == "mailto:" ++ str = do + then ":math:`$" <> text str <> "$`" + else ":math:`$$" <> text str <> "$$`" +inlineToRST (RawInline _ _) = return empty +inlineToRST (LineBreak) = return cr -- there's no line break in RST +inlineToRST Space = return space +inlineToRST (Link [Code _ str] (src, _)) | src == str || + src == "mailto:" ++ str = do let srcSuffix = if isPrefixOf "mailto:" src then drop 7 src else src return $ text $ unescapeURI srcSuffix inlineToRST (Link txt (src', tit)) = do let src = unescapeURI src' - useReferenceLinks <- get >>= (return . writerReferenceLinks . stOptions) + useReferenceLinks <- get >>= return . writerReferenceLinks . stOptions linktext <- inlineListToRST $ normalizeSpaces txt if useReferenceLinks - then do refs <- get >>= (return . stLinks) + then do refs <- get >>= return . stLinks let refs' = if (txt, (src, tit)) `elem` refs then refs else (txt, (src, tit)):refs modify $ \st -> st { stLinks = refs' } - return $ char '`' <> linktext <> text "`_" - else return $ char '`' <> linktext <> text " <" <> text src <> text ">`_" + return $ "`" <> linktext <> "`_" + else return $ "`" <> linktext <> " <" <> text src <> ">`_" inlineToRST (Image alternate (source', tit)) = do let source = unescapeURI source' - pics <- get >>= (return . stImages) + pics <- get >>= return . stImages let labelsUsed = map fst pics - let txt = if null alternate || alternate == [Str ""] || + let txt = if null alternate || alternate == [Str ""] || alternate `elem` labelsUsed then [Str $ "image" ++ show (length pics)] else alternate @@ -333,10 +324,10 @@ inlineToRST (Image alternate (source', tit)) = do else (txt, (source, tit)):pics modify $ \st -> st { stImages = pics' } label <- inlineListToRST txt - return $ char '|' <> label <> char '|' + return $ "|" <> label <> "|" inlineToRST (Note contents) = do -- add to notes in state - notes <- get >>= (return . stNotes) + notes <- get >>= return . stNotes modify $ \st -> st { stNotes = contents:notes } let ref = show $ (length notes) + 1 - return $ text " [" <> text ref <> text "]_" + return $ " [" <> text ref <> "]_" diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index ae71e1307..605e4162b 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -27,13 +27,34 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Conversion of 'Pandoc' documents to RTF (rich text format). -} -module Text.Pandoc.Writers.RTF ( writeRTF ) where +module Text.Pandoc.Writers.RTF ( writeRTF, rtfEmbedImage ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Templates (renderTemplate) import Data.List ( isSuffixOf, intercalate ) -import Data.Char ( ord, isDigit ) +import Data.Char ( ord, isDigit, toLower ) +import System.FilePath ( takeExtension ) +import qualified Data.ByteString as B +import Text.Printf ( printf ) + +-- | Convert Image inlines into a raw RTF embedded image, read from a file. +-- If file not found or filetype not jpeg or png, leave the inline unchanged. +rtfEmbedImage :: Inline -> IO Inline +rtfEmbedImage x@(Image _ (src,_)) + | map toLower (takeExtension src) `elem` [".jpg",".jpeg",".png"] = do + imgdata <- catch (B.readFile src) (\_ -> return B.empty) + let bytes = map (printf "%02x") $ B.unpack imgdata + let filetype = case map toLower (takeExtension src) of + ".jpg" -> "\\jpegblip" + ".jpeg" -> "\\jpegblip" + ".png" -> "\\pngblip" + _ -> error "Unknown file type" + let raw = "{\\pict" ++ filetype ++ " " ++ concat bytes ++ "}" + return $ if B.null imgdata + then x + else RawInline "rtf" raw +rtfEmbedImage x = return x -- | Convert Pandoc to a string in rich text format. writeRTF :: WriterOptions -> Pandoc -> String @@ -159,7 +180,8 @@ blockToRTF indent alignment (BlockQuote lst) = concatMap (blockToRTF (indent + indentIncrement) alignment) lst blockToRTF indent _ (CodeBlock _ str) = rtfPar indent 0 AlignLeft ("\\f1 " ++ (codeStringToRTF str)) -blockToRTF _ _ (RawHtml _) = "" +blockToRTF _ _ (RawBlock "rtf" str) = str +blockToRTF _ _ (RawBlock _ _) = "" blockToRTF indent alignment (BulletList lst) = spaceAtEnd $ concatMap (listItemToRTF alignment indent (bulletMarker indent)) lst blockToRTF indent alignment (OrderedList attribs lst) = spaceAtEnd $ concat $ @@ -264,12 +286,12 @@ inlineToRTF Apostrophe = "\\u8217'" inlineToRTF Ellipses = "\\u8230?" inlineToRTF EmDash = "\\u8212-" inlineToRTF EnDash = "\\u8211-" -inlineToRTF (Code str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" +inlineToRTF (Code _ str) = "{\\f1 " ++ (codeStringToRTF str) ++ "}" inlineToRTF (Str str) = stringToRTF str inlineToRTF (Math _ str) = inlineListToRTF $ readTeXMath str inlineToRTF (Cite _ lst) = inlineListToRTF lst -inlineToRTF (TeX _) = "" -inlineToRTF (HtmlInline _) = "" +inlineToRTF (RawInline "rtf" str) = str +inlineToRTF (RawInline _ _) = "" inlineToRTF (LineBreak) = "\\line " inlineToRTF Space = " " inlineToRTF (Link text (src, _)) = diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 65e053827..c8638cdd7 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -31,13 +31,12 @@ module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Text.Pandoc.Definition import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) -import Text.Pandoc.Readers.TeXMath import Text.Printf ( printf ) -import Data.List ( isSuffixOf, transpose, maximumBy ) +import Data.List ( transpose, maximumBy ) import Data.Ord ( comparing ) import Data.Char ( chr, ord ) import Control.Monad.State -import Text.PrettyPrint.HughesPJ hiding ( Str ) +import Text.Pandoc.Pretty data WriterState = WriterState { stStrikeout :: Bool -- document contains strikeout @@ -69,17 +68,20 @@ pandocToTexinfo options (Pandoc (Meta title authors date) blocks) = do let titlePage = not $ all null $ title : date : authors main <- blockListToTexinfo blocks st <- get - let body = render main + let colwidth = if writerWrapText options + then Just $ writerColumns options + else Nothing + let body = render colwidth main let context = writerVariables options ++ [ ("body", body) - , ("title", render titleText) - , ("date", render dateText) ] ++ + , ("title", render colwidth titleText) + , ("date", render colwidth dateText) ] ++ [ ("toc", "yes") | writerTableOfContents options ] ++ [ ("titlepage", "yes") | titlePage ] ++ [ ("subscript", "yes") | stSubscript st ] ++ [ ("superscript", "yes") | stSuperscript st ] ++ [ ("strikeout", "yes") | stStrikeout st ] ++ - [ ("author", render a) | a <- authorsText ] + [ ("author", render colwidth a) | a <- authorsText ] if writerStandalone options then return $ renderTemplate context $ writerTemplate options else return body @@ -124,22 +126,25 @@ blockToTexinfo (BlockQuote lst) = do blockToTexinfo (CodeBlock _ str) = do return $ text "@verbatim" $$ - vcat (map text (lines str)) $$ - text "@end verbatim\n" + flush (text str) $$ + text "@end verbatim" <> blankline -blockToTexinfo (RawHtml _) = return empty +blockToTexinfo (RawBlock "texinfo" str) = return $ text str +blockToTexinfo (RawBlock "latex" str) = + return $ text "@tex" $$ text str $$ text "@end tex" +blockToTexinfo (RawBlock _ _) = return empty blockToTexinfo (BulletList lst) = do items <- mapM listItemToTexinfo lst return $ text "@itemize" $$ vcat items $$ - text "@end itemize\n" + text "@end itemize" <> blankline blockToTexinfo (OrderedList (start, numstyle, _) lst) = do items <- mapM listItemToTexinfo lst return $ text "@enumerate " <> exemplar $$ vcat items $$ - text "@end enumerate\n" + text "@end enumerate" <> blankline where exemplar = case numstyle of DefaultStyle -> decimal @@ -159,7 +164,7 @@ blockToTexinfo (DefinitionList lst) = do items <- mapM defListItemToTexinfo lst return $ text "@table @asis" $$ vcat items $$ - text "@end table\n" + text "@end table" <> blankline blockToTexinfo HorizontalRule = -- XXX can't get the equivalent from LaTeX.hs to work @@ -175,13 +180,13 @@ blockToTexinfo (Header 0 lst) = do then return $ text "Top" else inlineListToTexinfo lst return $ text "@node Top" $$ - text "@top " <> txt <> char '\n' + text "@top " <> txt <> blankline blockToTexinfo (Header level lst) = do node <- inlineListForNode lst txt <- inlineListToTexinfo lst return $ if (level > 0) && (level <= 4) - then text "\n@node " <> node <> char '\n' <> + then blankline <> text "@node " <> node <> cr <> text (seccmd level) <> txt else txt where @@ -200,18 +205,18 @@ blockToTexinfo (Table caption aligns widths heads rows) = do colDescriptors <- if all (== 0) widths then do -- use longest entry instead of column widths - cols <- mapM (mapM (liftM (render . hcat) . mapM blockToTexinfo)) $ - transpose $ heads : rows + cols <- mapM (mapM (liftM (render Nothing . hcat) . mapM blockToTexinfo)) $ + transpose $ heads : rows return $ concatMap ((\x -> "{"++x++"} ") . maximumBy (comparing length)) cols else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths let tableBody = text ("@multitable " ++ colDescriptors) $$ headers $$ vcat rowsText $$ - text "@end multitable" + text "@end multitable" return $ if isEmpty captionText - then tableBody <> char '\n' + then tableBody <> blankline else text "@float" $$ - tableBody $$ + tableBody $$ inCmd "caption" captionText $$ text "@end float" @@ -253,7 +258,7 @@ alignedBlock _ col = blockListToTexinfo col -- | Convert Pandoc block elements to Texinfo. blockListToTexinfo :: [Block] -> State WriterState Doc -blockListToTexinfo [] = return $ empty +blockListToTexinfo [] = return empty blockListToTexinfo (x:xs) = do x' <- blockToTexinfo x case x of @@ -276,7 +281,7 @@ blockListToTexinfo (x:xs) = do xs' <- blockListToTexinfo xs case xs of ((CodeBlock _ _):_) -> return $ x' $$ xs' - _ -> return $ x' $$ text "" $$ xs' + _ -> return $ x' $+$ xs' _ -> do xs' <- blockListToTexinfo xs return $ x' $$ xs' @@ -307,15 +312,23 @@ makeMenuLine _ = error "makeMenuLine called with non-Header block" listItemToTexinfo :: [Block] -> State WriterState Doc -listItemToTexinfo lst = blockListToTexinfo lst >>= - return . (text "@item" $$) +listItemToTexinfo lst = do + contents <- blockListToTexinfo lst + let spacer = case reverse lst of + (Para{}:_) -> blankline + _ -> empty + return $ text "@item" $$ contents <> spacer defListItemToTexinfo :: ([Inline], [[Block]]) -> State WriterState Doc defListItemToTexinfo (term, defs) = do term' <- inlineListToTexinfo term - def' <- liftM vcat $ mapM blockListToTexinfo defs - return $ text "@item " <> term' <> text "\n" $$ def' + let defToTexinfo bs = do d <- blockListToTexinfo bs + case reverse bs of + (Para{}:_) -> return $ d <> blankline + _ -> return d + defs' <- mapM defToTexinfo defs + return $ text "@item " <> term' $+$ vcat defs' -- | Convert list of inline elements to Texinfo. inlineListToTexinfo :: [Inline] -- ^ Inlines to convert @@ -325,31 +338,7 @@ inlineListToTexinfo lst = mapM inlineToTexinfo lst >>= return . hcat -- | Convert list of inline elements to Texinfo acceptable for a node name. inlineListForNode :: [Inline] -- ^ Inlines to convert -> State WriterState Doc -inlineListForNode lst = mapM inlineForNode lst >>= return . hcat - -inlineForNode :: Inline -> State WriterState Doc -inlineForNode (Str str) = return $ text $ filter (not.disallowedInNode) str -inlineForNode (Emph lst) = inlineListForNode lst -inlineForNode (Strong lst) = inlineListForNode lst -inlineForNode (Strikeout lst) = inlineListForNode lst -inlineForNode (Superscript lst) = inlineListForNode lst -inlineForNode (Subscript lst) = inlineListForNode lst -inlineForNode (SmallCaps lst) = inlineListForNode lst -inlineForNode (Quoted _ lst) = inlineListForNode lst -inlineForNode (Cite _ lst) = inlineListForNode lst -inlineForNode (Code str) = inlineForNode (Str str) -inlineForNode Space = return $ char ' ' -inlineForNode EmDash = return $ text "---" -inlineForNode EnDash = return $ text "--" -inlineForNode Apostrophe = return $ char '\'' -inlineForNode Ellipses = return $ text "..." -inlineForNode LineBreak = return empty -inlineForNode (Math _ str) = inlineListForNode $ readTeXMath str -inlineForNode (TeX _) = return empty -inlineForNode (HtmlInline _) = return empty -inlineForNode (Link lst _) = inlineListForNode lst -inlineForNode (Image lst _) = inlineListForNode lst -inlineForNode (Note _) = return empty +inlineListForNode = return . text . filter (not . disallowedInNode) . stringify -- periods, commas, colons, and parentheses are disallowed in node names disallowedInNode :: Char -> Bool @@ -383,7 +372,7 @@ inlineToTexinfo (Subscript lst) = do inlineToTexinfo (SmallCaps lst) = inlineListToTexinfo lst >>= return . inCmd "sc" -inlineToTexinfo (Code str) = do +inlineToTexinfo (Code _ str) = do return $ text $ "@code{" ++ stringToTexinfo str ++ "}" inlineToTexinfo (Quoted SingleQuote lst) = do @@ -402,14 +391,16 @@ inlineToTexinfo EnDash = return $ text "--" inlineToTexinfo Ellipses = return $ text "@dots{}" inlineToTexinfo (Str str) = return $ text (stringToTexinfo str) inlineToTexinfo (Math _ str) = return $ inCmd "math" $ text str -inlineToTexinfo (TeX str) = return $ text "@tex" $$ text str $$ text "@end tex" -inlineToTexinfo (HtmlInline _) = return empty +inlineToTexinfo (RawInline f str) | f == "latex" || f == "tex" = + return $ text "@tex" $$ text str $$ text "@end tex" +inlineToTexinfo (RawInline "texinfo" str) = return $ text str +inlineToTexinfo (RawInline _ _) = return empty inlineToTexinfo (LineBreak) = return $ text "@*" inlineToTexinfo Space = return $ char ' ' inlineToTexinfo (Link txt (src, _)) = do case txt of - [Code x] | x == src -> -- autolink + [Code _ x] | x == src -> -- autolink do return $ text $ "@url{" ++ x ++ "}" _ -> do contents <- inlineListToTexinfo txt let src1 = stringToTexinfo src @@ -429,9 +420,4 @@ inlineToTexinfo (Image alternate (source, _)) = do inlineToTexinfo (Note contents) = do contents' <- blockListToTexinfo contents - let rawnote = stripTrailingNewlines $ render contents' - let optNewline = "@end verbatim" `isSuffixOf` rawnote - return $ text "@footnote{" <> - text rawnote <> - (if optNewline then char '\n' else empty) <> - char '}' + return $ text "@footnote" <> braces contents' diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs new file mode 100644 index 000000000..6614ec28e --- /dev/null +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -0,0 +1,422 @@ +{- +Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Writers.Textile + Copyright : Copyright (C) 2010 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to Textile markup. + +Textile: <http://thresholdstate.com/articles/4312/the-textile-reference-manual> +-} +module Text.Pandoc.Writers.Textile ( writeTextile ) where +import Text.Pandoc.Definition +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate) +import Text.Pandoc.XML ( escapeStringForXML ) +import Data.List ( intercalate ) +import Control.Monad.State +import Data.Char ( isSpace ) + +data WriterState = WriterState { + stNotes :: [String] -- Footnotes + , stListLevel :: [Char] -- String at beginning of list items, e.g. "**" + , stUseTags :: Bool -- True if we should use HTML tags because we're in a complex list + } + +-- | Convert Pandoc to Textile. +writeTextile :: WriterOptions -> Pandoc -> String +writeTextile opts document = + evalState (pandocToTextile opts document) + (WriterState { stNotes = [], stListLevel = [], stUseTags = False }) + +-- | Return Textile representation of document. +pandocToTextile :: WriterOptions -> Pandoc -> State WriterState String +pandocToTextile opts (Pandoc _ blocks) = do + body <- blockListToTextile opts blocks + notes <- liftM (unlines . reverse . stNotes) get + let main = body ++ if null notes then "" else ("\n\n" ++ notes) + let context = writerVariables opts ++ [ ("body", main) ] + if writerStandalone opts + then return $ renderTemplate context $ writerTemplate opts + else return main + +withUseTags :: State WriterState a -> State WriterState a +withUseTags action = do + oldUseTags <- liftM stUseTags get + modify $ \s -> s { stUseTags = True } + result <- action + modify $ \s -> s { stUseTags = oldUseTags } + return result + +-- | Escape one character as needed for Textile. +escapeCharForTextile :: Char -> String +escapeCharForTextile x = case x of + '&' -> "&" + '<' -> "<" + '>' -> ">" + '"' -> """ + '*' -> "*" + '_' -> "_" + '@' -> "@" + '|' -> "|" + c -> [c] + +-- | Escape string as needed for Textile. +escapeStringForTextile :: String -> String +escapeStringForTextile = concatMap escapeCharForTextile + +-- | Convert Pandoc block element to Textile. +blockToTextile :: WriterOptions -- ^ Options + -> Block -- ^ Block element + -> State WriterState String + +blockToTextile _ Null = return "" + +blockToTextile opts (Plain inlines) = + inlineListToTextile opts inlines + +blockToTextile opts (Para [Image txt (src,tit)]) = do + capt <- blockToTextile opts (Para txt) + im <- inlineToTextile opts (Image txt (src,tit)) + return $ im ++ "\n" ++ capt + +blockToTextile opts (Para inlines) = do + useTags <- liftM stUseTags get + listLevel <- liftM stListLevel get + contents <- inlineListToTextile opts inlines + return $ if useTags + then "<p>" ++ contents ++ "</p>" + else contents ++ if null listLevel then "\n" else "" + +blockToTextile _ (RawBlock f str) = + if f == "html" || f == "textile" + then return str + else return "" + +blockToTextile _ HorizontalRule = return "<hr />\n" + +blockToTextile opts (Header level inlines) = do + contents <- inlineListToTextile opts inlines + let prefix = 'h' : (show level ++ ". ") + return $ prefix ++ contents ++ "\n" + +blockToTextile _ (CodeBlock (_,classes,_) str) | any (all isSpace) (lines str) = + return $ "<pre" ++ classes' ++ ">\n" ++ escapeStringForXML str ++ + "\n</pre>\n" + where classes' = if null classes + then "" + else " class=\"" ++ unwords classes ++ "\"" + +blockToTextile _ (CodeBlock (_,classes,_) str) = + return $ "bc" ++ classes' ++ ". " ++ str ++ "\n\n" + where classes' = if null classes + then "" + else "(" ++ unwords classes ++ ")" + +blockToTextile opts (BlockQuote bs@[Para _]) = do + contents <- blockListToTextile opts bs + return $ "bq. " ++ contents ++ "\n\n" + +blockToTextile opts (BlockQuote blocks) = do + contents <- blockListToTextile opts blocks + return $ "<blockquote>\n\n" ++ contents ++ "\n</blockquote>\n" + +blockToTextile opts (Table [] aligns widths headers rows') | + all (==0) widths && all (`elem` [AlignLeft,AlignDefault]) aligns = do + hs <- mapM (liftM (("_. " ++) . stripTrailingNewlines) . blockListToTextile opts) headers + let cellsToRow cells = "|" ++ intercalate "|" cells ++ "|" + let header = if all null headers then "" else cellsToRow hs + let rowToCells = mapM (liftM stripTrailingNewlines . blockListToTextile opts) + bs <- mapM rowToCells rows' + let body = unlines $ map cellsToRow bs + return $ header ++ "\n" ++ body ++ "\n" + +blockToTextile opts (Table capt aligns widths headers rows') = do + let alignStrings = map alignmentToString aligns + captionDoc <- if null capt + then return "" + else do + c <- inlineListToTextile opts capt + return $ "<caption>" ++ c ++ "</caption>\n" + let percent w = show (truncate (100*w) :: Integer) ++ "%" + let coltags = if all (== 0.0) widths + then "" + else unlines $ map + (\w -> "<col width=\"" ++ percent w ++ "\" />") widths + head' <- if all null headers + then return "" + else do + hs <- tableRowToTextile opts alignStrings 0 headers + return $ "<thead>\n" ++ hs ++ "\n</thead>\n" + body' <- zipWithM (tableRowToTextile opts alignStrings) [1..] rows' + return $ "<table>\n" ++ captionDoc ++ coltags ++ head' ++ + "<tbody>\n" ++ unlines body' ++ "</tbody>\n</table>\n" + +blockToTextile opts x@(BulletList items) = do + oldUseTags <- liftM stUseTags get + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + contents <- withUseTags $ mapM (listItemToTextile opts) items + return $ "<ul>\n" ++ vcat contents ++ "\n</ul>\n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ "*" } + level <- get >>= return . length . stListLevel + contents <- mapM (listItemToTextile opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents ++ (if level > 1 then "" else "\n") + +blockToTextile opts x@(OrderedList attribs items) = do + oldUseTags <- liftM stUseTags get + let useTags = oldUseTags || not (isSimpleList x) + if useTags + then do + contents <- withUseTags $ mapM (listItemToTextile opts) items + return $ "<ol" ++ listAttribsToString attribs ++ ">\n" ++ vcat contents ++ + "\n</ol>\n" + else do + modify $ \s -> s { stListLevel = stListLevel s ++ "#" } + level <- get >>= return . length . stListLevel + contents <- mapM (listItemToTextile opts) items + modify $ \s -> s { stListLevel = init (stListLevel s) } + return $ vcat contents ++ (if level > 1 then "" else "\n") + +blockToTextile opts (DefinitionList items) = do + contents <- withUseTags $ mapM (definitionListItemToTextile opts) items + return $ "<dl>\n" ++ vcat contents ++ "\n</dl>\n" + +-- Auxiliary functions for lists: + +-- | Convert ordered list attributes to HTML attribute string +listAttribsToString :: ListAttributes -> String +listAttribsToString (startnum, numstyle, _) = + let numstyle' = camelCaseToHyphenated $ show numstyle + in (if startnum /= 1 + then " start=\"" ++ show startnum ++ "\"" + else "") ++ + (if numstyle /= DefaultStyle + then " style=\"list-style-type: " ++ numstyle' ++ ";\"" + else "") + +-- | Convert bullet or ordered list item (list of blocks) to Textile. +listItemToTextile :: WriterOptions -> [Block] -> State WriterState String +listItemToTextile opts items = do + contents <- blockListToTextile opts items + useTags <- get >>= return . stUseTags + if useTags + then return $ "<li>" ++ contents ++ "</li>" + else do + marker <- get >>= return . stListLevel + return $ marker ++ " " ++ contents + +-- | Convert definition list item (label, list of blocks) to Textile. +definitionListItemToTextile :: WriterOptions + -> ([Inline],[[Block]]) + -> State WriterState String +definitionListItemToTextile opts (label, items) = do + labelText <- inlineListToTextile opts label + contents <- mapM (blockListToTextile opts) items + return $ "<dt>" ++ labelText ++ "</dt>\n" ++ + (intercalate "\n" $ map (\d -> "<dd>" ++ d ++ "</dd>") contents) + +-- | True if the list can be handled by simple wiki markup, False if HTML tags will be needed. +isSimpleList :: Block -> Bool +isSimpleList x = + case x of + BulletList items -> all isSimpleListItem items + OrderedList (num, sty, _) items -> all isSimpleListItem items && + num == 1 && sty `elem` [DefaultStyle, Decimal] + _ -> False + +-- | True if list item can be handled with the simple wiki syntax. False if +-- HTML tags will be needed. +isSimpleListItem :: [Block] -> Bool +isSimpleListItem [] = True +isSimpleListItem [x] = + case x of + Plain _ -> True + Para _ -> True + BulletList _ -> isSimpleList x + OrderedList _ _ -> isSimpleList x + _ -> False +isSimpleListItem [x, y] | isPlainOrPara x = + case y of + BulletList _ -> isSimpleList y + OrderedList _ _ -> isSimpleList y + _ -> False +isSimpleListItem _ = False + +isPlainOrPara :: Block -> Bool +isPlainOrPara (Plain _) = True +isPlainOrPara (Para _) = True +isPlainOrPara _ = False + +-- | Concatenates strings with line breaks between them. +vcat :: [String] -> String +vcat = intercalate "\n" + +-- Auxiliary functions for tables. (TODO: these are common to HTML, MediaWiki, +-- and Textile writers, and should be abstracted out.) + +tableRowToTextile :: WriterOptions + -> [String] + -> Int + -> [[Block]] + -> State WriterState String +tableRowToTextile opts alignStrings rownum cols' = do + let celltype = if rownum == 0 then "th" else "td" + let rowclass = case rownum of + 0 -> "header" + x | x `rem` 2 == 1 -> "odd" + _ -> "even" + cols'' <- sequence $ zipWith + (\alignment item -> tableItemToTextile opts celltype alignment item) + alignStrings cols' + return $ "<tr class=\"" ++ rowclass ++ "\">\n" ++ unlines cols'' ++ "</tr>" + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableItemToTextile :: WriterOptions + -> String + -> String + -> [Block] + -> State WriterState String +tableItemToTextile opts celltype align' item = do + let mkcell x = "<" ++ celltype ++ " align=\"" ++ align' ++ "\">" ++ + x ++ "</" ++ celltype ++ ">" + contents <- blockListToTextile opts item + return $ mkcell contents + +-- | Convert list of Pandoc block elements to Textile. +blockListToTextile :: WriterOptions -- ^ Options + -> [Block] -- ^ List of block elements + -> State WriterState String +blockListToTextile opts blocks = + mapM (blockToTextile opts) blocks >>= return . vcat + +-- | Convert list of Pandoc inline elements to Textile. +inlineListToTextile :: WriterOptions -> [Inline] -> State WriterState String +inlineListToTextile opts lst = + mapM (inlineToTextile opts) lst >>= return . concat + +-- | Convert Pandoc inline element to Textile. +inlineToTextile :: WriterOptions -> Inline -> State WriterState String + +inlineToTextile opts (Emph lst) = do + contents <- inlineListToTextile opts lst + return $ if '_' `elem` contents + then "<em>" ++ contents ++ "</em>" + else "_" ++ contents ++ "_" + +inlineToTextile opts (Strong lst) = do + contents <- inlineListToTextile opts lst + return $ if '*' `elem` contents + then "<strong>" ++ contents ++ "</strong>" + else "*" ++ contents ++ "*" + +inlineToTextile opts (Strikeout lst) = do + contents <- inlineListToTextile opts lst + return $ if '-' `elem` contents + then "<del>" ++ contents ++ "</del>" + else "-" ++ contents ++ "-" + +inlineToTextile opts (Superscript lst) = do + contents <- inlineListToTextile opts lst + return $ if '^' `elem` contents + then "<sup>" ++ contents ++ "</sup>" + else "[^" ++ contents ++ "^]" + +inlineToTextile opts (Subscript lst) = do + contents <- inlineListToTextile opts lst + return $ if '~' `elem` contents + then "<sub>" ++ contents ++ "</sub>" + else "[~" ++ contents ++ "~]" + +inlineToTextile opts (SmallCaps lst) = inlineListToTextile opts lst + +inlineToTextile opts (Quoted SingleQuote lst) = do + contents <- inlineListToTextile opts lst + return $ "'" ++ contents ++ "'" + +inlineToTextile opts (Quoted DoubleQuote lst) = do + contents <- inlineListToTextile opts lst + return $ "\"" ++ contents ++ "\"" + +inlineToTextile opts (Cite _ lst) = inlineListToTextile opts lst + +inlineToTextile _ EmDash = return " -- " + +inlineToTextile _ EnDash = return " - " + +inlineToTextile _ Apostrophe = return "'" + +inlineToTextile _ Ellipses = return "..." + +inlineToTextile _ (Code _ str) = + return $ if '@' `elem` str + then "<tt>" ++ escapeStringForXML str ++ "</tt>" + else "@" ++ str ++ "@" + +inlineToTextile _ (Str str) = return $ escapeStringForTextile str + +inlineToTextile _ (Math _ str) = + return $ "<span class=\"math\">" ++ escapeStringForXML str ++ "</math>" + +inlineToTextile _ (RawInline f str) = + if f == "html" || f == "textile" + then return str + else return "" + +inlineToTextile _ (LineBreak) = return "\n" + +inlineToTextile _ Space = return " " + +inlineToTextile opts (Link txt (src, _)) = do + label <- case txt of + [Code _ s] -> return s + _ -> inlineListToTextile opts txt + return $ "\"" ++ label ++ "\":" ++ src + +inlineToTextile opts (Image alt (source, tit)) = do + alt' <- inlineListToTextile opts alt + let txt = if null tit + then if null alt' + then "" + else "(" ++ alt' ++ ")" + else "(" ++ tit ++ ")" + return $ "!" ++ source ++ txt ++ "!" + +inlineToTextile opts (Note contents) = do + curNotes <- liftM stNotes get + let newnum = length curNotes + 1 + contents' <- blockListToTextile opts contents + let thisnote = "fn" ++ show newnum ++ ". " ++ contents' ++ "\n" + modify $ \s -> s { stNotes = thisnote : curNotes } + return $ "[" ++ show newnum ++ "]" + -- note - may not work for notes with multiple blocks diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 0c48b48df..e21525018 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -34,7 +34,8 @@ module Text.Pandoc.XML ( stripTags, selfClosingTag, inTagsSimple, inTagsIndented ) where -import Text.PrettyPrint.HughesPJ + +import Text.Pandoc.Pretty -- | Remove everything between <...> stripTags :: String -> String @@ -55,23 +56,15 @@ escapeCharForXML x = case x of '"' -> """ c -> [c] --- | True if the character needs to be escaped. -needsEscaping :: Char -> Bool -needsEscaping c = c `elem` "&<>\"" - -- | Escape string as needed for XML. Entity references are not preserved. escapeStringForXML :: String -> String -escapeStringForXML "" = "" -escapeStringForXML str = - case break needsEscaping str of - (okay, "") -> okay - (okay, (c:cs)) -> okay ++ escapeCharForXML c ++ escapeStringForXML cs +escapeStringForXML = concatMap escapeCharForXML -- | Return a text object with a string of formatted XML attributes. attributeList :: [(String, String)] -> Doc -attributeList = text . concatMap - (\(a, b) -> " " ++ escapeStringForXML a ++ "=\"" ++ - escapeStringForXML b ++ "\"") +attributeList = hcat . map + (\(a, b) -> text (' ' : escapeStringForXML a ++ "=\"" ++ + escapeStringForXML b ++ "\"")) -- | Put the supplied contents between start and end tags of tagType, -- with specified attributes and (if specified) indentation. |