summaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc.hs3
-rw-r--r--src/Text/Pandoc/MIME.hs2
-rw-r--r--src/Text/Pandoc/Pretty.hs2
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs33
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs170
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs4
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs2
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs11
-rw-r--r--src/Text/Pandoc/Readers/Org.hs49
-rw-r--r--src/Text/Pandoc/Readers/RST.hs20
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs526
-rw-r--r--src/Text/Pandoc/Writers/DokuWiki.hs6
-rw-r--r--src/Text/Pandoc/Writers/EPUB.hs24
-rw-r--r--src/Text/Pandoc/Writers/ICML.hs2
-rw-r--r--src/Text/Pandoc/Writers/ODT.hs10
-rw-r--r--src/Text/Pandoc/Writers/RST.hs13
16 files changed, 756 insertions, 121 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index fd849316b..1f22122ac 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -77,6 +77,7 @@ module Text.Pandoc
, readHaddock
, readNative
, readJSON
+ , readTWiki
, readTxt2Tags
, readTxt2TagsNoMacros
, readEPUB
@@ -133,6 +134,7 @@ import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.Textile
import Text.Pandoc.Readers.Native
import Text.Pandoc.Readers.Haddock
+import Text.Pandoc.Readers.TWiki
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.Txt2Tags
import Text.Pandoc.Readers.EPUB
@@ -233,6 +235,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s)
,("html" , mkStringReader readHtml)
,("latex" , mkStringReader readLaTeX)
,("haddock" , mkStringReader readHaddock)
+ ,("twiki" , mkStringReader readTWiki)
,("docx" , mkBSReader readDocx)
,("t2t" , mkStringReader readTxt2TagsNoMacros)
,("epub" , mkBSReader readEPUB)
diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs
index 75b4ff0d2..2fdba93e0 100644
--- a/src/Text/Pandoc/MIME.hs
+++ b/src/Text/Pandoc/MIME.hs
@@ -328,7 +328,7 @@ mimeTypesList = -- List borrowed from happstack-server.
,("oth","application/vnd.oasis.opendocument.text-web")
,("otp","application/vnd.oasis.opendocument.presentation-template")
,("ots","application/vnd.oasis.opendocument.spreadsheet-template")
- ,("otf","application/x-font-opentype")
+ ,("otf","application/vnd.ms-opentype")
,("ott","application/vnd.oasis.opendocument.text-template")
,("oza","application/x-oz-application")
,("p","text/x-pascal")
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs
index 9ee7fe94a..2f2656086 100644
--- a/src/Text/Pandoc/Pretty.hs
+++ b/src/Text/Pandoc/Pretty.hs
@@ -534,4 +534,4 @@ charWidth c =
-- | Get real length of string, taking into account combining and double-wide
-- characters.
realLength :: String -> Int
-realLength = sum . map charWidth
+realLength = foldr (\a b -> charWidth a + b) 0
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 4b5fbfdfc..64eb0322f 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -84,8 +84,7 @@ import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Reducible
import Text.Pandoc.Shared
import Text.Pandoc.MediaBag (insertMedia, MediaBag)
-import Data.Maybe (isJust)
-import Data.List (delete, stripPrefix, (\\), intersect, isPrefixOf)
+import Data.List (delete, (\\), intersect)
import Data.Monoid
import Text.TeXMath (writeTeX)
import Data.Default (Default)
@@ -197,19 +196,9 @@ fixAuthors mv = mv
codeStyles :: [String]
codeStyles = ["VerbatimChar"]
-blockQuoteDivs :: [String]
-blockQuoteDivs = ["Quote", "BlockQuote", "BlockQuotation"]
-
codeDivs :: [String]
codeDivs = ["SourceCode"]
-
--- For the moment, we have English, Danish, German, and French. This
--- is fairly ad-hoc, and there might be a more systematic way to do
--- it, but it's better than nothing.
-headerPrefixes :: [String]
-headerPrefixes = ["Heading", "Overskrift", "berschrift", "Titre"]
-
runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s
runElemToInlines (LnBrk) = linebreak
@@ -434,9 +423,9 @@ parStyleToTransform pPr
let pPr' = pPr { pStyle = cs, indentation = Nothing}
in
(divWith ("", [c], [])) . (parStyleToTransform pPr')
- | (c:cs) <- pStyle pPr
- , c `elem` blockQuoteDivs =
- let pPr' = pPr { pStyle = cs \\ blockQuoteDivs }
+ | (_:cs) <- pStyle pPr
+ , Just True <- pBlockQuote pPr =
+ let pPr' = pPr { pStyle = cs }
in
blockQuote . (parStyleToTransform pPr')
| (_:cs) <- pStyle pPr =
@@ -467,12 +456,11 @@ bodyPartToBlocks (Paragraph pPr parparts)
$ parStyleToTransform pPr
$ codeBlock
$ concatMap parPartToString parparts
- | (c : cs) <- filter (isJust . isHeaderClass) $ pStyle pPr
- , Just (prefix, n) <- isHeaderClass c = do
+ | Just (style, n) <- pHeading pPr = do
ils <- local (\s-> s{docxInHeaderBlock=True}) $
(concatReduce <$> mapM parPartToInlines parparts)
makeHeaderAnchor $
- headerWith ("", delete (prefix ++ show n) cs, []) n ils
+ headerWith ("", delete style (pStyle pPr), []) n ils
| otherwise = do
ils <- concatReduce <$> mapM parPartToInlines parparts >>=
(return . fromList . trimLineBreaks . normalizeSpaces . toList)
@@ -559,12 +547,3 @@ docxToOutput :: ReaderOptions -> Docx -> (Meta, [Block], MediaBag)
docxToOutput opts (Docx (Document _ body)) =
let dEnv = def { docxOptions = opts} in
evalDocxContext (bodyToOutput body) dEnv def
-
-isHeaderClass :: String -> Maybe (String, Int)
-isHeaderClass s | (pref:_) <- filter (\h -> isPrefixOf h s) headerPrefixes
- , Just s' <- stripPrefix pref s =
- case reads s' :: [(Int, String)] of
- [] -> Nothing
- ((n, "") : []) -> Just (pref, n)
- _ -> Nothing
-isHeaderClass _ = Nothing
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 2945a1eda..5fd6b7a81 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE PatternGuards, ViewPatterns #-}
+{-# LANGUAGE PatternGuards, ViewPatterns, FlexibleInstances #-}
{-
Copyright (C) 2014 Jesse Rosenthal <jrosenthal@jhu.edu>
@@ -65,7 +65,7 @@ import Text.Pandoc.Compat.Except
import Text.TeXMath.Readers.OMML (readOMML)
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
import Text.TeXMath (Exp)
-import Data.Char (readLitChar, ord, chr)
+import Data.Char (readLitChar, ord, chr, isDigit)
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envNumbering :: Numbering
@@ -73,6 +73,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
, envMedia :: Media
, envFont :: Maybe Font
, envCharStyles :: CharStyleMap
+ , envParStyles :: ParStyleMap
}
deriving Show
@@ -122,8 +123,12 @@ type Media = [(FilePath, B.ByteString)]
type CharStyle = (String, RunStyle)
+type ParStyle = (String, ParStyleData)
+
type CharStyleMap = M.Map String RunStyle
+type ParStyleMap = M.Map String ParStyleData
+
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
@@ -152,6 +157,8 @@ data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
data ParagraphStyle = ParagraphStyle { pStyle :: [String]
, indentation :: Maybe ParIndentation
, dropCap :: Bool
+ , pHeading :: Maybe (String, Int)
+ , pBlockQuote :: Maybe Bool
}
deriving Show
@@ -159,6 +166,8 @@ defaultParagraphStyle :: ParagraphStyle
defaultParagraphStyle = ParagraphStyle { pStyle = []
, indentation = Nothing
, dropCap = False
+ , pHeading = Nothing
+ , pBlockQuote = Nothing
}
@@ -213,6 +222,11 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
, rStyle :: Maybe CharStyle}
deriving Show
+data ParStyleData = ParStyleData { headingLev :: Maybe (String, Int)
+ , isBlockQuote :: Maybe Bool
+ , psStyle :: Maybe ParStyle}
+ deriving Show
+
defaultRunStyle :: RunStyle
defaultRunStyle = RunStyle { isBold = Nothing
, isItalic = Nothing
@@ -242,8 +256,8 @@ archiveToDocx archive = do
numbering = archiveToNumbering archive
rels = archiveToRelationships archive
media = archiveToMedia archive
- styles = archiveToStyles archive
- rEnv = ReaderEnv notes numbering rels media Nothing styles
+ (styles, parstyles) = archiveToStyles archive
+ rEnv = ReaderEnv notes numbering rels media Nothing styles parstyles
doc <- runD (archiveToDocument archive) rEnv
return $ Docx doc
@@ -263,47 +277,69 @@ elemToBody ns element | isElem ns "w" "body" element =
(\bps -> return $ Body bps)
elemToBody _ _ = throwError WrongElem
-archiveToStyles :: Archive -> CharStyleMap
+archiveToStyles :: Archive -> (CharStyleMap, ParStyleMap)
archiveToStyles zf =
let stylesElem = findEntryByPath "word/styles.xml" zf >>=
(parseXMLDoc . UTF8.toStringLazy . fromEntry)
in
case stylesElem of
- Nothing -> M.empty
+ Nothing -> (M.empty, M.empty)
Just styElem ->
let namespaces = mapMaybe attrToNSPair (elAttribs styElem)
in
- M.fromList $ buildBasedOnList namespaces styElem Nothing
+ ( M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe CharStyle),
+ M.fromList $ buildBasedOnList namespaces styElem
+ (Nothing :: Maybe ParStyle) )
-isBasedOnStyle :: NameSpaces -> Element -> Maybe CharStyle -> Bool
+isBasedOnStyle :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> Bool
isBasedOnStyle ns element parentStyle
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttr (elemName ns "w" "type") element
+ , styleType == cStyleType parentStyle
, Just basedOnVal <- findChild (elemName ns "w" "basedOn") element >>=
findAttr (elemName ns "w" "val")
- , Just (parentId, _) <- parentStyle = (basedOnVal == parentId)
+ , Just ps <- parentStyle = (basedOnVal == getStyleId ps)
| isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleType <- findAttr (elemName ns "w" "type") element
+ , styleType == cStyleType parentStyle
, Nothing <- findChild (elemName ns "w" "basedOn") element
, Nothing <- parentStyle = True
| otherwise = False
-elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle
-elemToCharStyle ns element parentStyle
- | isElem ns "w" "style" element
- , Just "character" <- findAttr (elemName ns "w" "type") element
- , Just styleId <- findAttr (elemName ns "w" "styleId") element =
- Just (styleId, elemToRunStyle ns element parentStyle)
- | otherwise = Nothing
-
-getStyleChildren :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+class ElemToStyle a where
+ cStyleType :: Maybe a -> String
+ elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
+ getStyleId :: a -> String
+
+instance ElemToStyle CharStyle where
+ cStyleType _ = "character"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "character" <- findAttr (elemName ns "w" "type") element
+ , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ Just (styleId, elemToRunStyle ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+instance ElemToStyle ParStyle where
+ cStyleType _ = "paragraph"
+ elemToStyle ns element parentStyle
+ | isElem ns "w" "style" element
+ , Just "paragraph" <- findAttr (elemName ns "w" "type") element
+ , Just styleId <- findAttr (elemName ns "w" "styleId") element =
+ Just (styleId, elemToParStyleData ns element parentStyle)
+ | otherwise = Nothing
+ getStyleId s = fst s
+
+getStyleChildren :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
getStyleChildren ns element parentStyle
| isElem ns "w" "styles" element =
- mapMaybe (\e -> elemToCharStyle ns e parentStyle) $
+ mapMaybe (\e -> elemToStyle ns e parentStyle) $
filterChildren (\e' -> isBasedOnStyle ns e' parentStyle) element
| otherwise = []
-buildBasedOnList :: NameSpaces -> Element -> Maybe CharStyle -> [CharStyle]
+buildBasedOnList :: (ElemToStyle a) => NameSpaces -> Element -> Maybe a -> [a]
buildBasedOnList ns element rootStyle =
case (getStyleChildren ns element rootStyle) of
[] -> []
@@ -543,7 +579,8 @@ elemToBodyPart ns element
elemToBodyPart ns element
| isElem ns "w" "p" element
, Just (numId, lvl) <- elemToNumInfo ns element = do
- let parstyle = elemToParagraphStyle ns element
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
num <- asks envNumbering
case lookupLevel numId lvl num of
@@ -551,7 +588,8 @@ elemToBodyPart ns element
Nothing -> throwError WrongElem
elemToBodyPart ns element
| isElem ns "w" "p" element = do
- let parstyle = elemToParagraphStyle ns element
+ sty <- asks envParStyles
+ let parstyle = elemToParagraphStyle ns element sty
parparts <- mapD (elemToParPart ns) (elChildren element)
return $ Paragraph parstyle parparts
elemToBodyPart ns element
@@ -625,17 +663,20 @@ elemToParPart ns element
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
+ , Just relId <- findAttr (elemName ns "r" "id") element = do
runs <- mapD (elemToRun ns) (elChildren element)
- return $ InternalHyperLink anchor runs
+ rels <- asks envRelationships
+ case lookupRelationship relId rels of
+ Just target -> do
+ case findAttr (elemName ns "w" "anchor") element of
+ Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ Nothing -> return $ ExternalHyperLink target runs
+ Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttr (elemName ns "r" "id") element = do
+ , Just anchor <- findAttr (elemName ns "w" "anchor") element = do
runs <- mapD (elemToRun ns) (elChildren element)
- rels <- asks envRelationships
- return $ case lookupRelationship relId rels of
- Just target -> ExternalHyperLink target runs
- Nothing -> ExternalHyperLink "" runs
+ return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "m" "oMath" element =
(eitherToD $ readOMML $ showElement element) >>= (return . PlainOMath)
@@ -684,14 +725,30 @@ elemToRun ns element
return $ Run runStyle runElems
elemToRun _ _ = throwError WrongElem
-elemToParagraphStyle :: NameSpaces -> Element -> ParagraphStyle
-elemToParagraphStyle ns element
+getParentStyleValue :: (ParStyleData -> Maybe a) -> ParStyleData -> Maybe a
+getParentStyleValue field style
+ | Just value <- field style = Just value
+ | Just parentStyle <- psStyle style
+ = getParentStyleValue field (snd parentStyle)
+getParentStyleValue _ _ = Nothing
+
+getParStyleField :: (ParStyleData -> Maybe a) -> ParStyleMap -> [String] ->
+ Maybe a
+getParStyleField field stylemap styles
+ | x <- mapMaybe (\x -> M.lookup x stylemap) styles
+ , (y:_) <- mapMaybe (getParentStyleValue field) x
+ = Just y
+getParStyleField _ _ _ = Nothing
+
+elemToParagraphStyle :: NameSpaces -> Element -> ParStyleMap -> ParagraphStyle
+elemToParagraphStyle ns element sty
| Just pPr <- findChild (elemName ns "w" "pPr") element =
- ParagraphStyle
- {pStyle =
+ let style =
mapMaybe
(findAttr (elemName ns "w" "val"))
(findChildren (elemName ns "w" "pStyle") pPr)
+ in ParagraphStyle
+ {pStyle = style
, indentation =
findChild (elemName ns "w" "ind") pPr >>=
elemToParIndentation ns
@@ -703,8 +760,10 @@ elemToParagraphStyle ns element
Just "none" -> False
Just _ -> True
Nothing -> False
+ , pHeading = getParStyleField headingLev sty style
+ , pBlockQuote = getParStyleField isBlockQuote sty style
}
-elemToParagraphStyle _ _ = defaultParagraphStyle
+elemToParagraphStyle _ _ _ = defaultParagraphStyle
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
@@ -758,6 +817,45 @@ elemToRunStyle ns element parentStyle
}
elemToRunStyle _ _ _ = defaultRunStyle
+isNumericNotNull :: String -> Bool
+isNumericNotNull str = (str /= []) && (all isDigit str)
+
+getHeaderLevel :: NameSpaces -> Element -> Maybe (String,Int)
+getHeaderLevel ns element
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , Just index <- stripPrefix "Heading" styleId
+ , isNumericNotNull index = Just (styleId, read index)
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , Just index <- findChild (elemName ns "w" "name") element >>=
+ findAttr (elemName ns "w" "val") >>=
+ stripPrefix "heading "
+ , isNumericNotNull index = Just (styleId, read index)
+getHeaderLevel _ _ = Nothing
+
+blockQuoteStyleIds :: [String]
+blockQuoteStyleIds = ["Quote", "BlockQuote", "BlockQuotation"]
+
+blockQuoteStyleNames :: [String]
+blockQuoteStyleNames = ["Quote", "Block Text"]
+
+getBlockQuote :: NameSpaces -> Element -> Maybe Bool
+getBlockQuote ns element
+ | Just styleId <- findAttr (elemName ns "w" "styleId") element
+ , styleId `elem` blockQuoteStyleIds = Just True
+ | Just styleName <- findChild (elemName ns "w" "name") element >>=
+ findAttr (elemName ns "w" "val")
+ , styleName `elem` blockQuoteStyleNames = Just True
+getBlockQuote _ _ = Nothing
+
+elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> ParStyleData
+elemToParStyleData ns element parentStyle =
+ ParStyleData
+ {
+ headingLev = getHeaderLevel ns element
+ , isBlockQuote = getBlockQuote ns element
+ , psStyle = parentStyle
+ }
+
elemToRunElem :: NameSpaces -> Element -> D RunElem
elemToRunElem ns element
| isElem ns "w" "t" element
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 4e0bb375a..2a23f2a62 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -740,7 +740,7 @@ pSpace = many1 (satisfy isSpace) >> return B.space
--
eitherBlockOrInline :: [String]
-eitherBlockOrInline = ["audio", "applet", "button", "iframe",
+eitherBlockOrInline = ["audio", "applet", "button", "iframe", "embed",
"del", "ins",
"progress", "map", "area", "noscript", "script",
"object", "svg", "video", "source"]
@@ -758,7 +758,7 @@ blockHtmlTags :: [String]
blockHtmlTags = ["?xml", "!DOCTYPE", "address", "article", "aside",
"blockquote", "body", "button", "canvas",
"caption", "center", "col", "colgroup", "dd", "dir", "div",
- "dl", "dt", "embed", "fieldset", "figcaption", "figure",
+ "dl", "dt", "fieldset", "figcaption", "figure",
"footer", "form", "h1", "h2", "h3", "h4",
"h5", "h6", "head", "header", "hgroup", "hr", "html",
"isindex", "menu", "noframes", "ol", "output", "p", "pre",
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 9f51e9a8f..9420d602f 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -494,6 +494,7 @@ inlineCommands = M.fromList $
, ("citealp", citation "citealp" NormalCitation False)
, ("citealp*", citation "citealp*" NormalCitation False)
, ("autocite", citation "autocite" NormalCitation False)
+ , ("smartcite", citation "smartcite" NormalCitation False)
, ("footcite", inNote <$> citation "footcite" NormalCitation False)
, ("parencite", citation "parencite" NormalCitation False)
, ("supercite", citation "supercite" NormalCitation False)
@@ -516,6 +517,7 @@ inlineCommands = M.fromList $
, ("supercites", citation "supercites" NormalCitation True)
, ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True)
, ("Autocite", citation "Autocite" NormalCitation False)
+ , ("Smartcite", citation "Smartcite" NormalCitation False)
, ("Footcite", citation "Footcite" NormalCitation False)
, ("Parencite", citation "Parencite" NormalCitation False)
, ("Supercite", citation "Supercite" NormalCitation False)
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 7a3be8291..b8487b4e6 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -117,6 +117,12 @@ isBlank _ = False
-- auxiliary functions
--
+-- | Succeeds when we're in list context.
+inList :: MarkdownParser ()
+inList = do
+ ctx <- stateParserContext <$> getState
+ guard (ctx == ListItemState)
+
isNull :: F Inlines -> Bool
isNull ils = B.isNull $ runF ils def
@@ -926,6 +932,8 @@ para = try $ do
<|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced)
<|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header)
<|> (guardEnabled Ext_lists_without_preceding_blankline >>
+ -- Avoid creating a paragraph in a nested list.
+ notFollowedBy' inList >>
() <$ lookAhead listStart)
<|> do guardEnabled Ext_native_divs
inHtmlBlock <- stateInHtmlBlock <$> getState
@@ -1610,8 +1618,7 @@ endline = try $ do
newline
notFollowedBy blankline
-- parse potential list-starts differently if in a list:
- st <- getState
- when (stateParserContext st == ListItemState) $ notFollowedBy listStart
+ notFollowedBy (inList >> listStart)
guardDisabled Ext_lists_without_preceding_blankline <|> notFollowedBy listStart
guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 1ddfeab87..579e38a38 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -879,18 +879,18 @@ bulletListStart = bulletListStart' Nothing
bulletListStart' :: Maybe Int -> OrgParser Int
-- returns length of bulletList prefix, inclusive of marker
-bulletListStart' Nothing = do ind <- many spaceChar
+bulletListStart' Nothing = do ind <- length <$> many spaceChar
+ when (ind == 0) $ notFollowedBy (char '*')
oneOf bullets
many1 spaceChar
- return $ length ind + 1
+ return (ind + 1)
-- Unindented lists are legal, but they can't use '*' bullets
-- We return n to maintain compatibility with the generic listItem
bulletListStart' (Just n) = do count (n-1) spaceChar
- oneOf validBullets
+ when (n == 1) $ notFollowedBy (char '*')
+ oneOf bullets
many1 spaceChar
return n
- where validBullets = if n == 1 then noAsterisks else bullets
- noAsterisks = filter (/= '*') bullets
bullets :: String
bullets = "*+-"
@@ -1099,7 +1099,7 @@ linkOrImage = explicitOrImageLink
explicitOrImageLink :: OrgParser (F Inlines)
explicitOrImageLink = try $ do
char '['
- srcF <- applyCustomLinkFormat =<< linkTarget
+ srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget
title <- enclosedRaw (char '[') (char ']')
title' <- parseFromString (mconcat <$> many inline) title
char ']'
@@ -1132,6 +1132,9 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String
linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
+possiblyEmptyLinkTarget :: OrgParser String
+possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
+
applyCustomLinkFormat :: String -> OrgParser (F String)
applyCustomLinkFormat link = do
let (linkType, rest) = break (== ':') link
@@ -1139,27 +1142,33 @@ applyCustomLinkFormat link = do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
return $ maybe link ($ drop 1 rest) formatter
-
linkToInlinesF :: String -> Inlines -> F Inlines
-linkToInlinesF s@('#':_) = pure . B.link s ""
-linkToInlinesF s
- | isImageFilename s = const . pure $ B.image s "" ""
- | isUri s = pure . B.link s ""
- | isRelativeUrl s = pure . B.link s ""
-linkToInlinesF s = \title -> do
- anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
- if anchorB
- then pure $ B.link ('#':s) "" title
- else pure $ B.emph title
-
-isRelativeUrl :: String -> Bool
-isRelativeUrl s = (':' `notElem` s) && ("./" `isPrefixOf` s)
+linkToInlinesF s =
+ case s of
+ "" -> pure . B.link "" ""
+ ('#':_) -> pure . B.link s ""
+ _ | isImageFilename s -> const . pure $ B.image s "" ""
+ _ | isUri s -> pure . B.link s ""
+ _ | isRelativeFilePath s -> pure . B.link s ""
+ _ | isAbsoluteFilePath s -> pure . B.link ("file://" ++ s) ""
+ _ -> \title -> do
+ anchorB <- (s `elem`) <$> asksF orgStateAnchorIds
+ if anchorB
+ then pure $ B.link ('#':s) "" title
+ else pure $ B.emph title
+
+isRelativeFilePath :: String -> Bool
+isRelativeFilePath s = (("./" `isPrefixOf` s) || ("../" `isPrefixOf` s)) &&
+ (':' `notElem` s)
isUri :: String -> Bool
isUri s = let (scheme, path) = break (== ':') s
in all (\c -> isAlphaNum c || c `elem` ".-") scheme
&& not (null path)
+isAbsoluteFilePath :: String -> Bool
+isAbsoluteFilePath = ('/' ==) . head
+
isImageFilename :: String -> Bool
isImageFilename filename =
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index e5eccb116..732956981 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -47,7 +47,7 @@ import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
import qualified Text.Pandoc.Builder as B
import Data.Monoid (mconcat, mempty)
import Data.Sequence (viewr, ViewR(..))
-import Data.Char (toLower, isHexDigit)
+import Data.Char (toLower, isHexDigit, isSpace)
-- | Parse reStructuredText string and return Pandoc document.
readRST :: ReaderOptions -- ^ Reader options
@@ -335,6 +335,13 @@ indentedBlock = try $ do
optional blanklines
return $ unlines lns
+quotedBlock :: Parser [Char] st [Char]
+quotedBlock = try $ do
+ quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+ lns <- many1 $ lookAhead (char quote) >> anyLine
+ optional blanklines
+ return $ unlines lns
+
codeBlockStart :: Parser [Char] st Char
codeBlockStart = string "::" >> blankline >> blankline
@@ -342,7 +349,8 @@ codeBlock :: Parser [Char] st Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
codeBlockBody :: Parser [Char] st Blocks
-codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> indentedBlock
+codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
+ (indentedBlock <|> quotedBlock)
lhsCodeBlock :: RSTParser Blocks
lhsCodeBlock = try $ do
@@ -513,7 +521,6 @@ directive = try $ do
-- TODO: line-block, parsed-literal, table, csv-table, list-table
-- date
-- include
--- class
-- title
directive' :: RSTParser Blocks
directive' = do
@@ -594,6 +601,13 @@ directive' = do
Just t -> B.link (escapeURI $ trim t) ""
$ B.image src "" alt
Nothing -> B.image src "" alt
+ "class" -> do
+ let attrs = ("", (splitBy isSpace $ trim top), map (\(k,v) -> (k, trimr v)) fields)
+ -- directive content or the first immediately following element
+ children <- case body of
+ "" -> block
+ _ -> parseFromString parseBlocks body'
+ return $ B.divWith attrs children
_ -> return mempty
-- TODO:
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
new file mode 100644
index 000000000..c2325c0ea
--- /dev/null
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -0,0 +1,526 @@
+{-# LANGUAGE RelaxedPolyRec, FlexibleInstances, TypeSynonymInstances #-}
+-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
+{-
+ Copyright (C) 2014 Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.TWiki
+ Copyright : Copyright (C) 2014 Alexander Sulfrian
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Alexander Sulfrian <alexander.sulfrian@fu-berlin.de>
+ Stability : alpha
+ Portability : portable
+
+Conversion of twiki text to 'Pandoc' document.
+-}
+module Text.Pandoc.Readers.TWiki ( readTWiki
+ , readTWikiWithWarnings
+ ) where
+
+import Text.Pandoc.Definition
+import qualified Text.Pandoc.Builder as B
+import Text.Pandoc.Options
+import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
+import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
+import Data.Monoid (Monoid, mconcat, mempty)
+import Control.Applicative ((<$>), (<*), (*>), (<$))
+import Control.Monad
+import Text.Printf (printf)
+import Debug.Trace (trace)
+import Text.Pandoc.XML (fromEntities)
+import Data.Maybe (fromMaybe)
+import Text.HTML.TagSoup
+import Data.Char (isAlphaNum)
+import qualified Data.Foldable as F
+
+-- | Read twiki from an input string and return a Pandoc document.
+readTWiki :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Pandoc
+readTWiki opts s =
+ (readWith parseTWiki) def{ stateOptions = opts } (s ++ "\n\n")
+
+readTWikiWithWarnings :: ReaderOptions -- ^ Reader options
+ -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> (Pandoc, [String])
+readTWikiWithWarnings opts s =
+ (readWith parseTWikiWithWarnings) def{ stateOptions = opts } (s ++ "\n\n")
+ where parseTWikiWithWarnings = do
+ doc <- parseTWiki
+ warnings <- stateWarnings <$> getState
+ return (doc, warnings)
+
+type TWParser = Parser [Char] ParserState
+
+--
+-- utility functions
+--
+
+tryMsg :: String -> TWParser a -> TWParser a
+tryMsg msg p = try p <?> msg
+
+skip :: TWParser a -> TWParser ()
+skip parser = parser >> return ()
+
+nested :: TWParser a -> TWParser a
+nested p = do
+ nestlevel <- stateMaxNestingLevel <$> getState
+ guard $ nestlevel > 0
+ updateState $ \st -> st{ stateMaxNestingLevel = stateMaxNestingLevel st - 1 }
+ res <- p
+ updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
+ return res
+
+htmlElement :: String -> TWParser (Attr, String)
+htmlElement tag = tryMsg tag $ do
+ (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
+ content <- manyTill anyChar (endtag <|> endofinput)
+ return (htmlAttrToPandoc attr, trim content)
+ where
+ endtag = skip $ htmlTag (~== TagClose tag)
+ endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
+ trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+
+htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc attrs = (ident, classes, keyvals)
+ where
+ ident = fromMaybe "" $ lookup "id" attrs
+ classes = maybe [] words $ lookup "class" attrs
+ keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
+
+parseHtmlContentWithAttrs :: String -> TWParser a -> TWParser (Attr, [a])
+parseHtmlContentWithAttrs tag parser = do
+ (attr, content) <- htmlElement tag
+ parsedContent <- try $ parseContent content
+ return (attr, parsedContent)
+ where
+ parseContent = parseFromString $ nested $ manyTill parser endOfContent
+ endOfContent = try $ skipMany blankline >> skipSpaces >> eof
+
+parseHtmlContent :: String -> TWParser a -> TWParser [a]
+parseHtmlContent tag p = parseHtmlContentWithAttrs tag p >>= return . snd
+
+--
+-- main parser
+--
+
+parseTWiki :: TWParser Pandoc
+parseTWiki = do
+ bs <- mconcat <$> many block
+ spaces
+ eof
+ return $ B.doc bs
+
+
+--
+-- block parsers
+--
+
+block :: TWParser B.Blocks
+block = do
+ tr <- getOption readerTrace
+ pos <- getPosition
+ res <- mempty <$ skipMany1 blankline
+ <|> blockElements
+ <|> para
+ skipMany blankline
+ when tr $
+ trace (printf "line %d: %s" (sourceLine pos)
+ (take 60 $ show $ B.toList res)) (return ())
+ return res
+
+blockElements :: TWParser B.Blocks
+blockElements = choice [ separator
+ , header
+ , verbatim
+ , literal
+ , list ""
+ , table
+ , blockQuote
+ , noautolink
+ ]
+
+separator :: TWParser B.Blocks
+separator = tryMsg "separator" $ string "---" >> newline >> return B.horizontalRule
+
+header :: TWParser B.Blocks
+header = tryMsg "header" $ do
+ string "---"
+ level <- many1 (char '+') >>= return . length
+ guard $ level <= 6
+ classes <- option [] $ string "!!" >> return ["unnumbered"]
+ skipSpaces
+ content <- B.trimInlines . mconcat <$> manyTill inline newline
+ attr <- registerHeader ("", classes, []) content
+ return $ B.headerWith attr level $ content
+
+verbatim :: TWParser B.Blocks
+verbatim = (htmlElement "verbatim" <|> htmlElement "pre")
+ >>= return . (uncurry B.codeBlockWith)
+
+literal :: TWParser B.Blocks
+literal = htmlElement "literal" >>= return . rawBlock
+ where
+ format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
+ rawBlock (attrs, content) = B.rawBlock (format attrs) content
+
+list :: String -> TWParser B.Blocks
+list prefix = choice [ bulletList prefix
+ , orderedList prefix
+ , definitionList prefix]
+
+definitionList :: String -> TWParser B.Blocks
+definitionList prefix = tryMsg "definitionList" $ do
+ indent <- lookAhead $ string prefix *> (many1 $ string " ") <* string "$ "
+ elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
+ return $ B.definitionList elements
+ where
+ parseDefinitionListItem :: String -> TWParser (B.Inlines, [B.Blocks])
+ parseDefinitionListItem indent = do
+ string (indent ++ "$ ") >> skipSpaces
+ term <- many1Till inline $ string ": "
+ line <- listItemLine indent $ string "$ "
+ return $ (mconcat term, [line])
+
+bulletList :: String -> TWParser B.Blocks
+bulletList prefix = tryMsg "bulletList" $
+ parseList prefix (char '*') (char ' ')
+
+orderedList :: String -> TWParser B.Blocks
+orderedList prefix = tryMsg "orderedList" $
+ parseList prefix (oneOf "1iIaA") (string ". ")
+
+parseList :: Show a => String -> TWParser Char -> TWParser a -> TWParser B.Blocks
+parseList prefix marker delim = do
+ (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
+ blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
+ return $ case style of
+ '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
+ 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
+ 'I' -> B.orderedListWith (1, UpperRoman, DefaultDelim) blocks
+ 'a' -> B.orderedListWith (1, LowerAlpha, DefaultDelim) blocks
+ 'A' -> B.orderedListWith (1, UpperAlpha, DefaultDelim) blocks
+ _ -> B.bulletList blocks
+ where
+ listStyle = do
+ indent <- many1 $ string " "
+ style <- marker
+ return (concat indent, style)
+
+parseListItem :: Show a => String -> TWParser a -> TWParser B.Blocks
+parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
+
+listItemLine :: Show a => String -> TWParser a -> TWParser B.Blocks
+listItemLine prefix marker = lineContent >>= parseContent >>= return . mconcat
+ where
+ lineContent = do
+ content <- anyLine
+ continuation <- optionMaybe listContinuation
+ return $ filterSpaces content ++ "\n" ++ (maybe "" (" " ++) continuation)
+ filterSpaces = reverse . dropWhile (== ' ') . reverse
+ listContinuation = notFollowedBy (string prefix >> marker) >>
+ string " " >> lineContent
+ parseContent = parseFromString $ many1 $ nestedList <|> parseInline
+ parseInline = many1Till inline (lastNewline <|> newlineBeforeNestedList) >>=
+ return . B.plain . mconcat
+ nestedList = list prefix
+ lastNewline = try $ char '\n' <* eof
+ newlineBeforeNestedList = try $ char '\n' <* lookAhead nestedList
+
+table :: TWParser B.Blocks
+table = try $ do
+ tableHead <- optionMaybe $ many1Till tableParseHeader newline >>= return . unzip
+ rows <- many1 tableParseRow
+ return $ buildTable mempty rows $ fromMaybe (align rows, columns rows) tableHead
+ where
+ buildTable caption rows (aligns, heads)
+ = B.table caption aligns heads rows
+ align rows = replicate (columCount rows) (AlignDefault, 0)
+ columns rows = replicate (columCount rows) mempty
+ columCount rows = length $ head rows
+
+tableParseHeader :: TWParser ((Alignment, Double), B.Blocks)
+tableParseHeader = try $ do
+ char '|'
+ leftSpaces <- many spaceChar >>= return . length
+ char '*'
+ content <- tableColumnContent (char '*' >> skipSpaces >> char '|')
+ char '*'
+ rightSpaces <- many spaceChar >>= return . length
+ optional tableEndOfRow
+ return (tableAlign leftSpaces rightSpaces, content)
+ where
+ tableAlign left right
+ | left >= 2 && left == right = (AlignCenter, 0)
+ | left > right = (AlignRight, 0)
+ | otherwise = (AlignLeft, 0)
+
+tableParseRow :: TWParser [B.Blocks]
+tableParseRow = many1Till tableParseColumn newline
+
+tableParseColumn :: TWParser B.Blocks
+tableParseColumn = char '|' *> skipSpaces *>
+ tableColumnContent (skipSpaces >> char '|')
+ <* skipSpaces <* optional tableEndOfRow
+
+tableEndOfRow :: TWParser Char
+tableEndOfRow = lookAhead (try $ char '|' >> char '\n') >> char '|'
+
+tableColumnContent :: Show a => TWParser a -> TWParser B.Blocks
+tableColumnContent end = manyTill content (lookAhead $ try end) >>= return . B.plain . mconcat
+ where
+ content = continuation <|> inline
+ continuation = try $ char '\\' >> newline >> return mempty
+
+blockQuote :: TWParser B.Blocks
+blockQuote = parseHtmlContent "blockquote" block >>= return . B.blockQuote . mconcat
+
+noautolink :: TWParser B.Blocks
+noautolink = do
+ (_, content) <- htmlElement "noautolink"
+ st <- getState
+ setState $ st{ stateAllowLinks = False }
+ blocks <- try $ parseContent content
+ setState $ st{ stateAllowLinks = True }
+ return $ mconcat blocks
+ where
+ parseContent = parseFromString $ many $ block
+
+para :: TWParser B.Blocks
+para = many1Till inline endOfParaElement >>= return . result . mconcat
+ where
+ endOfParaElement = lookAhead $ endOfInput <|> endOfPara <|> newBlockElement
+ endOfInput = try $ skipMany blankline >> skipSpaces >> eof
+ endOfPara = try $ blankline >> skipMany1 blankline
+ newBlockElement = try $ blankline >> skip blockElements
+ result content = if F.all (==Space) content
+ then mempty
+ else B.para $ B.trimInlines content
+
+
+--
+-- inline parsers
+--
+
+inline :: TWParser B.Inlines
+inline = choice [ whitespace
+ , br
+ , macro
+ , strong
+ , strongHtml
+ , strongAndEmph
+ , emph
+ , emphHtml
+ , boldCode
+ , smart
+ , link
+ , htmlComment
+ , code
+ , codeHtml
+ , nop
+ , autoLink
+ , str
+ , symbol
+ ] <?> "inline"
+
+whitespace :: TWParser B.Inlines
+whitespace = (lb <|> regsp) >>= return
+ where lb = try $ skipMany spaceChar >> linebreak >> return B.space
+ regsp = try $ skipMany1 spaceChar >> return B.space
+
+br :: TWParser B.Inlines
+br = try $ string "%BR%" >> return B.linebreak
+
+linebreak :: TWParser B.Inlines
+linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
+ where lastNewline = eof >> return mempty
+ innerNewline = return B.space
+
+between :: (Show b, Monoid c) => TWParser a -> TWParser b -> (TWParser b -> TWParser c) -> TWParser c
+between start end p =
+ mconcat <$> try (start >> notFollowedBy whitespace >> many1Till (p end) end)
+
+enclosed :: (Show a, Monoid b) => TWParser a -> (TWParser a -> TWParser b) -> TWParser b
+enclosed sep p = between sep (try $ sep <* endMarker) p
+ where
+ endMarker = lookAhead $ skip endSpace <|> skip (oneOf ".,!?:)|") <|> eof
+ endSpace = (spaceChar <|> newline) >> return B.space
+
+macro :: TWParser B.Inlines
+macro = macroWithParameters <|> withoutParameters
+ where
+ withoutParameters = enclosed (char '%') (\_ -> macroName) >>= return . emptySpan
+ emptySpan name = buildSpan name [] mempty
+
+macroWithParameters :: TWParser B.Inlines
+macroWithParameters = try $ do
+ char '%'
+ name <- macroName
+ (content, kvs) <- attributes
+ char '%'
+ return $ buildSpan name kvs $ B.str content
+
+buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
+buildSpan className kvs = B.spanWith attrs
+ where
+ attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
+ additionalClasses = maybe [] words $ lookup "class" kvs
+ kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
+
+macroName :: TWParser String
+macroName = do
+ first <- letter
+ rest <- many $ alphaNum <|> char '_'
+ return (first:rest)
+
+attributes :: TWParser (String, [(String, String)])
+attributes = char '{' *> spnl *> many (attribute <* spnl) <* char '}' >>=
+ return . foldr (either mkContent mkKvs) ([], [])
+ where
+ spnl = skipMany (spaceChar <|> newline)
+ mkContent c ([], kvs) = (c, kvs)
+ mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
+ mkKvs kv (cont, rest) = (cont, (kv : rest))
+
+attribute :: TWParser (Either String (String, String))
+attribute = withKey <|> withoutKey
+ where
+ withKey = try $ do
+ key <- macroName
+ char '='
+ parseValue False >>= return . (curry Right key)
+ withoutKey = try $ parseValue True >>= return . Left
+ parseValue allowSpaces = (withQuotes <|> withoutQuotes allowSpaces) >>= return . fromEntities
+ withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
+ withoutQuotes allowSpaces
+ | allowSpaces == True = many1 $ noneOf "}"
+ | otherwise = many1 $ noneOf " }"
+
+nestedInlines :: Show a => TWParser a -> TWParser B.Inlines
+nestedInlines end = innerSpace <|> nestedInline
+ where
+ innerSpace = try $ whitespace <* (notFollowedBy end)
+ nestedInline = notFollowedBy whitespace >> nested inline
+
+strong :: TWParser B.Inlines
+strong = try $ enclosed (char '*') nestedInlines >>= return . B.strong
+
+strongHtml :: TWParser B.Inlines
+strongHtml = (parseHtmlContent "strong" inline <|> parseHtmlContent "b" inline)
+ >>= return . B.strong . mconcat
+
+strongAndEmph :: TWParser B.Inlines
+strongAndEmph = try $ enclosed (string "__") nestedInlines >>= return . B.emph . B.strong
+
+emph :: TWParser B.Inlines
+emph = try $ enclosed (char '_') nestedInlines >>= return . B.emph
+
+emphHtml :: TWParser B.Inlines
+emphHtml = (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
+ >>= return . B.emph . mconcat
+
+nestedString :: Show a => TWParser a -> TWParser String
+nestedString end = innerSpace <|> (count 1 nonspaceChar)
+ where
+ innerSpace = try $ many1 spaceChar <* notFollowedBy end
+
+boldCode :: TWParser B.Inlines
+boldCode = try $ enclosed (string "==") nestedString >>= return . B.strong . B.code . fromEntities
+
+htmlComment :: TWParser B.Inlines
+htmlComment = htmlTag isCommentTag >> return mempty
+
+code :: TWParser B.Inlines
+code = try $ enclosed (char '=') nestedString >>= return . B.code . fromEntities
+
+codeHtml :: TWParser B.Inlines
+codeHtml = do
+ (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ return $ B.codeWith attrs $ fromEntities content
+
+autoLink :: TWParser B.Inlines
+autoLink = try $ do
+ state <- getState
+ guard $ stateAllowLinks state
+ (text, url) <- parseLink
+ guard $ checkLink (head $ reverse url)
+ return $ makeLink (text, url)
+ where
+ parseLink = notFollowedBy nop >> (uri <|> emailAddress)
+ makeLink (text, url) = B.link url "" $ B.str text
+ checkLink c
+ | c == '/' = True
+ | otherwise = isAlphaNum c
+
+str :: TWParser B.Inlines
+str = (many1 alphaNum <|> count 1 characterReference) >>= return . B.str
+
+nop :: TWParser B.Inlines
+nop = try $ (skip exclamation <|> skip nopTag) >> followContent
+ where
+ exclamation = char '!'
+ nopTag = stringAnyCase "<nop>"
+ followContent = many1 nonspaceChar >>= return . B.str . fromEntities
+
+symbol :: TWParser B.Inlines
+symbol = count 1 nonspaceChar >>= return . B.str
+
+smart :: TWParser B.Inlines
+smart = do
+ getOption readerSmart >>= guard
+ doubleQuoted <|> singleQuoted <|>
+ choice [ apostrophe
+ , dash
+ , ellipses
+ ]
+
+singleQuoted :: TWParser B.Inlines
+singleQuoted = try $ do
+ singleQuoteStart
+ withQuoteContext InSingleQuote $
+ many1Till inline singleQuoteEnd >>=
+ (return . B.singleQuoted . B.trimInlines . mconcat)
+
+doubleQuoted :: TWParser B.Inlines
+doubleQuoted = try $ do
+ doubleQuoteStart
+ contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
+ (withQuoteContext InDoubleQuote $ doubleQuoteEnd >>
+ return (B.doubleQuoted $ B.trimInlines contents))
+ <|> (return $ (B.str "\8220") B.<> contents)
+
+link :: TWParser B.Inlines
+link = try $ do
+ st <- getState
+ guard $ stateAllowLinks st
+ setState $ st{ stateAllowLinks = False }
+ (url, title, content) <- linkText
+ setState $ st{ stateAllowLinks = True }
+ return $ B.link url title content
+
+linkText :: TWParser (String, String, B.Inlines)
+linkText = do
+ string "[["
+ url <- many1Till anyChar (char ']')
+ content <- option [B.str url] linkContent
+ char ']'
+ return (url, "", mconcat content)
+ where
+ linkContent = (char '[') >> many1Till anyChar (char ']') >>= parseLinkContent
+ parseLinkContent = parseFromString $ many1 inline
diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs
index 8c1d360aa..74418aa7e 100644
--- a/src/Text/Pandoc/Writers/DokuWiki.hs
+++ b/src/Text/Pandoc/Writers/DokuWiki.hs
@@ -178,7 +178,7 @@ blockToDokuWiki _ (CodeBlock (_,classes,_) str) = do
blockToDokuWiki opts (BlockQuote blocks) = do
contents <- blockListToDokuWiki opts blocks
if isSimpleBlockQuote blocks
- then return $ "> " ++ contents
+ then return $ unlines $ map ("> " ++) $ lines contents
else return $ "<HTML><blockquote>\n" ++ contents ++ "</blockquote></HTML>"
blockToDokuWiki opts (Table capt aligns _ headers rows) = do
@@ -352,9 +352,7 @@ isPlainOrPara (Para _) = True
isPlainOrPara _ = False
isSimpleBlockQuote :: [Block] -> Bool
-isSimpleBlockQuote [BlockQuote bs] = isSimpleBlockQuote bs
-isSimpleBlockQuote [b] = isPlainOrPara b
-isSimpleBlockQuote _ = False
+isSimpleBlockQuote bs = all isPlainOrPara bs
-- | Concatenates strings with line breaks between them.
vcat :: [String] -> String
diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs
index 53574711f..2291c7184 100644
--- a/src/Text/Pandoc/Writers/EPUB.hs
+++ b/src/Text/Pandoc/Writers/EPUB.hs
@@ -358,8 +358,9 @@ writeEPUB opts doc@(Pandoc meta _) = do
Nothing -> return ([],[])
Just img -> do
let coverImage = "media/" ++ takeFileName img
- let cpContent = renderHtml $ writeHtml opts'
- (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
+ let cpContent = renderHtml $ writeHtml
+ opts'{ writerVariables = ("coverpage","true"):vars }
+ (Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
imgContent <- B.readFile img
return ( [mkEntry "cover.xhtml" cpContent]
, [mkEntry coverImage imgContent] )
@@ -611,17 +612,14 @@ writeEPUB opts doc@(Pandoc meta _) = do
(_:_) -> [unode "ol" ! [("class","toc")] $ subs]
let navtag = if epub3 then "nav" else "div"
- let navData = UTF8.fromStringLazy $ ppTopElement $
- unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml")
- ,("xmlns:epub","http://www.idpf.org/2007/ops")] $
- [ unode "head" $
- [ unode "title" plainTitle
- , unode "link" ! [("rel","stylesheet"),("type","text/css"),("href","stylesheet.css")] $ () ]
- , unode "body" $
- unode navtag ! [("epub:type","toc") | epub3] $
- [ unode "h1" ! [("id","toc-title")] $ plainTitle
- , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]
- ]
+ let navBlocks = [RawBlock (Format "html") $ ppElement $
+ unode navtag ! [("epub:type","toc") | epub3] $
+ [ unode "h1" ! [("id","toc-title")] $ plainTitle
+ , unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]]
+ let navData = renderHtml $ writeHtml opts'
+ (Pandoc (setMeta "title"
+ (walk removeNote $ fromList $ docTitle' meta) nullMeta)
+ navBlocks)
let navEntry = mkEntry "nav.xhtml" navData
-- mimetype
diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs
index ae20efd4b..181c63df7 100644
--- a/src/Text/Pandoc/Writers/ICML.hs
+++ b/src/Text/Pandoc/Writers/ICML.hs
@@ -399,7 +399,7 @@ inlineToICML opts style (Subscript lst) = inlinesToICML opts (subscriptName:styl
inlineToICML opts style (SmallCaps lst) = inlinesToICML opts (smallCapsName:style) lst
inlineToICML opts style (Quoted SingleQuote lst) = inlinesToICML opts style $ [Str "‘"] ++ lst ++ [Str "’"]
inlineToICML opts style (Quoted DoubleQuote lst) = inlinesToICML opts style $ [Str "“"] ++ lst ++ [Str "”"]
-inlineToICML opts style (Cite _ lst) = footnoteToICML opts style [Para lst]
+inlineToICML opts style (Cite _ lst) = inlinesToICML opts style lst
inlineToICML _ style (Code _ str) = charStyle (codeName:style) $ text $ escapeStringForXML str
inlineToICML _ style Space = charStyle style space
inlineToICML _ style LineBreak = charStyle style $ text lineSeparator
diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs
index 03f8e8ba4..2a4129512 100644
--- a/src/Text/Pandoc/Writers/ODT.hs
+++ b/src/Text/Pandoc/Writers/ODT.hs
@@ -41,7 +41,7 @@ import Control.Applicative ((<$>))
import Text.Pandoc.Options ( WriterOptions(..) )
import Text.Pandoc.Shared ( stringify, readDataFile, fetchItem', warn )
import Text.Pandoc.ImageSize ( imageSize, sizeInPoints )
-import Text.Pandoc.MIME ( getMimeType )
+import Text.Pandoc.MIME ( getMimeType, extensionFromMimeType )
import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Text.Pandoc.Writers.Shared ( fixDisplayMath )
@@ -51,7 +51,7 @@ import Text.Pandoc.XML
import Text.Pandoc.Pretty
import qualified Control.Exception as E
import Data.Time.Clock.POSIX ( getPOSIXTime )
-import System.FilePath ( takeExtension, takeDirectory )
+import System.FilePath ( takeExtension, takeDirectory, (<.>))
-- | Produce an ODT file from a Pandoc document.
writeODT :: WriterOptions -- ^ Writer options
@@ -133,12 +133,14 @@ transformPicMath opts entriesRef (Image lab (src,_)) = do
Left (_ :: E.SomeException) -> do
warn $ "Could not find image `" ++ src ++ "', skipping..."
return $ Emph lab
- Right (img, _) -> do
+ Right (img, mbMimeType) -> do
let size = imageSize img
let (w,h) = fromMaybe (0,0) $ sizeInPoints `fmap` size
let tit' = show w ++ "x" ++ show h
entries <- readIORef entriesRef
- let newsrc = "Pictures/" ++ show (length entries) ++ takeExtension src
+ let extension = fromMaybe (takeExtension $ takeWhile (/='?') src)
+ (mbMimeType >>= extensionFromMimeType)
+ let newsrc = "Pictures/" ++ show (length entries) <.> extension
let toLazy = B.fromChunks . (:[])
epochtime <- floor `fmap` getPOSIXTime
let entry = toEntry newsrc epochtime $ toLazy img
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index a96670c96..5ba4c9983 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -173,7 +173,7 @@ blockToRST (Para [Image txt (src,'f':'i':'g':':':tit)]) = do
capt <- inlineListToRST txt
let fig = "figure:: " <> text src
let alt = ":alt: " <> if null tit then capt else text tit
- return $ hang 3 ".. " $ fig $$ alt $+$ capt $$ blankline
+ return $ hang 3 ".. " (fig $$ alt $+$ capt) $$ blankline
blockToRST (Para inlines)
| LineBreak `elem` inlines = do -- use line block if LineBreaks
lns <- mapM inlineListToRST $ splitBy (==LineBreak) inlines
@@ -239,8 +239,7 @@ blockToRST (Table caption _ widths headers rows) = do
middle = hcat $ intersperse sep' blocks
let makeRow = hpipeBlocks . zipWith lblock widthsInChars
let head' = makeRow headers'
- rows' <- mapM (\row -> do cols <- mapM blockListToRST row
- return $ makeRow cols) rows
+ let rows' = map makeRow rawRows
let border ch = char '+' <> char ch <>
(hcat $ intersperse (char ch <> char '+' <> char ch) $
map (\l -> text $ replicate l ch) widthsInChars) <>
@@ -253,7 +252,7 @@ blockToRST (Table caption _ widths headers rows) = do
blockToRST (BulletList items) = do
contents <- mapM bulletListItemToRST items
-- ensure that sublists have preceding blank line
- return $ blankline $$ vcat contents $$ blankline
+ return $ blankline $$ chomp (vcat contents) $$ blankline
blockToRST (OrderedList (start, style', delim) items) = do
let markers = if start == 1 && style' == DefaultStyle && delim == DefaultDelim
then take (length items) $ repeat "#."
@@ -265,11 +264,11 @@ blockToRST (OrderedList (start, style', delim) items) = do
contents <- mapM (\(item, num) -> orderedListItemToRST item num) $
zip markers' items
-- ensure that sublists have preceding blank line
- return $ blankline $$ vcat contents $$ blankline
+ return $ blankline $$ chomp (vcat contents) $$ blankline
blockToRST (DefinitionList items) = do
contents <- mapM definitionListItemToRST items
-- ensure that sublists have preceding blank line
- return $ blankline $$ vcat contents $$ blankline
+ return $ blankline $$ chomp (vcat contents) $$ blankline
-- | Convert bullet list item (list of blocks) to RST.
bulletListItemToRST :: [Block] -> State WriterState Doc
@@ -427,7 +426,7 @@ inlineToRST (Image alternate (source, tit)) = do
return $ "|" <> label <> "|"
inlineToRST (Note contents) = do
-- add to notes in state
- notes <- get >>= return . stNotes
+ notes <- gets stNotes
modify $ \st -> st { stNotes = contents:notes }
let ref = show $ (length notes) + 1
return $ " [" <> text ref <> "]_"