summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc.hs6
-rw-r--r--src/Text/Pandoc/CSS.hs35
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs5
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs9
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs82
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs1
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org.hs17
-rw-r--r--src/Text/Pandoc/Readers/RST.hs20
-rw-r--r--src/Text/Pandoc/Shared.hs1
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs8
-rw-r--r--src/Text/Pandoc/Writers/RST.hs9
12 files changed, 136 insertions, 61 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index a4d963221..d7311d978 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -320,14 +320,14 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
getDefaultExtensions "markdown_github" = githubMarkdownExtensions
getDefaultExtensions "markdown" = pandocExtensions
getDefaultExtensions "plain" = plainExtensions
-getDefaultExtensions "org" = Set.fromList [Ext_citations]
+getDefaultExtensions "org" = Set.fromList [Ext_citations,
+ Ext_auto_identifiers]
getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers]
getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers,
Ext_native_divs,
Ext_native_spans]
getDefaultExtensions "html5" = getDefaultExtensions "html"
-getDefaultExtensions "epub" = Set.fromList [Ext_auto_identifiers,
- Ext_raw_html,
+getDefaultExtensions "epub" = Set.fromList [Ext_raw_html,
Ext_native_divs,
Ext_native_spans,
Ext_epub_html_exts]
diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs
new file mode 100644
index 000000000..32a5ea129
--- /dev/null
+++ b/src/Text/Pandoc/CSS.hs
@@ -0,0 +1,35 @@
+module Text.Pandoc.CSS ( foldOrElse,
+ pickStyleAttrProps
+ )
+where
+
+import Text.Pandoc.Shared (trim)
+import Text.Parsec
+import Text.Parsec.String
+import Control.Applicative ((<*))
+
+ruleParser :: Parser (String, String)
+ruleParser = do
+ p <- many1 (noneOf ":") <* char ':'
+ v <- many1 (noneOf ":;") <* char ';' <* spaces
+ return (trim p, trim v)
+
+styleAttrParser :: Parser [(String, String)]
+styleAttrParser = do
+ p <- many1 ruleParser
+ return p
+
+orElse :: Eq a => a -> a -> a -> a
+orElse v x y = if v == x then y else x
+
+foldOrElse :: Eq a => a -> [a] -> a
+foldOrElse v xs = foldr (orElse v) v xs
+
+eitherToMaybe :: Either a b -> Maybe b
+eitherToMaybe (Right x) = Just x
+eitherToMaybe _ = Nothing
+
+pickStyleAttrProps :: [String] -> String -> Maybe String
+pickStyleAttrProps lookupProps styleAttr = do
+ styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr
+ foldOrElse Nothing $ map (flip lookup styles) lookupProps
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3cc2a4479..352b94496 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -194,7 +194,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] indexterm - A wrapper for terms to be indexed
[x] info - A wrapper for information about a component or other block. (DocBook v5)
[x] informalequation - A displayed mathematical equation without a title
-[ ] informalexample - A displayed example without a title
+[x] informalexample - A displayed example without a title
[ ] informalfigure - A untitled figure
[ ] informaltable - A table without a title
[ ] initializer - The initializer for a FieldSynopsis
@@ -611,6 +611,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags
"important","caution","note","tip","warning","qandadiv",
"question","answer","abstract","itemizedlist","orderedlist",
"variablelist","article","book","table","informaltable",
+ "informalexample",
"screen","programlisting","example","calloutlist"]
isBlockElement _ = False
@@ -766,6 +767,8 @@ parseBlock (Elem e) =
"book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e
"table" -> parseTable
"informaltable" -> parseTable
+ "informalexample" -> divWith ("", ["informalexample"], []) <$>
+ getBlocks e
"literallayout" -> codeBlockWithLang
"screen" -> codeBlockWithLang
"programlisting" -> codeBlockWithLang
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 338540533..aefc32e0e 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -181,7 +181,6 @@ getManifest archive = do
fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences pathToFile =
(walk $ renameImages root)
- . (walk normalisePath)
. (walk $ fixBlockIRs filename)
. (walk $ fixInlineIRs filename)
where
@@ -196,12 +195,6 @@ fixInlineIRs s (Link t ('#':url, tit)) =
Link t (addHash s url, tit)
fixInlineIRs _ v = v
-normalisePath :: Inline -> Inline
-normalisePath (Link t (url, tit)) =
- let (path, uid) = span (/= '#') url in
- Link t (takeFileName path ++ uid, tit)
-normalisePath s = s
-
prependHash :: [String] -> Inline -> Inline
prependHash ps l@(Link is (url, tit))
| or [s `isPrefixOf` url | s <- ps] =
@@ -223,7 +216,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP
addHash :: String -> String -> String
addHash _ "" = ""
-addHash s ident = s ++ "#" ++ ident
+addHash s ident = takeFileName s ++ "#" ++ ident
removeEPUBAttrs :: [(String, String)] -> [(String, String)]
removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index fcba16e04..b32264d61 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -50,6 +50,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
+import qualified Data.Map as M
import Data.Maybe ( fromMaybe, isJust)
import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf )
import Data.Char ( isDigit )
@@ -64,6 +65,7 @@ import Data.Default (Default (..), def)
import Control.Monad.Reader (Reader,ask, asks, local, runReader)
import Network.URI (isURI)
import Text.Pandoc.Error
+import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Parsec.Error
@@ -74,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options
-> Either PandocError Pandoc
readHtml opts inp =
mapLeft (ParseFailure . getError) . flip runReader def $
- runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing)
- "source" tags
+ runParserT parseDoc
+ (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty)
+ "source" tags
where tags = stripPrefixes . canonicalizeTags $
parseTagsOptions parseOptions{ optTagPosition = True } inp
parseDoc = do
@@ -100,7 +103,9 @@ data HTMLState =
HTMLState
{ parserState :: ParserState,
noteTable :: [(String, Blocks)],
- baseHref :: Maybe String
+ baseHref :: Maybe String,
+ identifiers :: [String],
+ headerMap :: M.Map Inlines String
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@@ -252,6 +257,22 @@ pListItem nonItem = do
let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr)
(liDiv <>) <$> pInTags "li" block <* skipMany nonItem
+parseListStyleType :: String -> ListNumberStyle
+parseListStyleType "lower-roman" = LowerRoman
+parseListStyleType "upper-roman" = UpperRoman
+parseListStyleType "lower-alpha" = LowerAlpha
+parseListStyleType "upper-alpha" = UpperAlpha
+parseListStyleType "decimal" = Decimal
+parseListStyleType _ = DefaultStyle
+
+parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr "i" = LowerRoman
+parseTypeAttr "I" = UpperRoman
+parseTypeAttr "a" = LowerAlpha
+parseTypeAttr "A" = UpperAlpha
+parseTypeAttr "1" = Decimal
+parseTypeAttr _ = DefaultStyle
+
pOrderedList :: TagParser Blocks
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
@@ -261,23 +282,19 @@ pOrderedList = try $ do
sta' = if all isDigit sta
then read sta
else 1
- sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ ->
- case lookup "type" attribs of
- Just "1" -> Decimal
- Just "I" -> UpperRoman
- Just "i" -> LowerRoman
- Just "A" -> UpperAlpha
- Just "a" -> LowerAlpha
- _ -> DefaultStyle
+
+ pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"]
+
+ typeAttr = fromMaybe "" $ lookup "type" attribs
+ classAttr = fromMaybe "" $ lookup "class" attribs
+ styleAttr = fromMaybe "" $ lookup "style" attribs
+ listStyle = fromMaybe "" $ pickListStyle styleAttr
+
+ sty' = foldOrElse DefaultStyle
+ [ parseTypeAttr typeAttr
+ , parseListStyleType classAttr
+ , parseListStyleType listStyle
+ ]
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -385,9 +402,10 @@ pHeader = try $ do
let ident = fromMaybe "" $ lookup "id" attr
let classes = maybe [] words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
+ attr' <- registerHeader (ident, classes, keyvals) contents
return $ if bodyTitle
then mempty -- skip a representation of the title in the body
- else B.headerWith (ident, classes, keyvals) level contents
+ else B.headerWith attr' level contents
pHrule :: TagParser Blocks
pHrule = do
@@ -622,12 +640,11 @@ pSpan = try $ do
guardEnabled Ext_native_spans
TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
contents <- pInTags "span" inline
- let attr' = mkAttr attr
- return $ case attr' of
- ("",[],[("style",s)])
- | filter (`notElem` " \t;") s == "font-variant:small-caps" ->
- B.smallcaps contents
- _ -> B.spanWith (mkAttr attr) contents
+ let isSmallCaps = fontVariant == "small-caps"
+ where styleAttr = fromMaybe "" $ lookup "style" attr
+ fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
+ let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
+ return $ tag contents
pRawHtmlInline :: TagParser Inlines
pRawHtmlInline = do
@@ -971,6 +988,14 @@ isSpace _ = False
-- Instances
+instance HasIdentifierList HTMLState where
+ extractIdentifierList = identifiers
+ updateIdentifierList f s = s{ identifiers = f (identifiers s) }
+
+instance HasHeaderMap HTMLState where
+ extractHeaderMap = headerMap
+ updateHeaderMap f s = s{ headerMap = f (headerMap s) }
+
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance HasQuoteContext st (Reader HTMLLocal) where
@@ -980,9 +1005,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where
instance HasReaderOptions HTMLState where
extractReaderOptions = extractReaderOptions . parserState
-instance Default HTMLState where
- def = HTMLState def [] Nothing
-
instance HasMeta HTMLState where
setMeta s b st = st {parserState = setMeta s b $ parserState st}
deleteMeta s st = st {parserState = deleteMeta s $ parserState st}
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 0da912ea6..16d387dc4 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -524,6 +524,7 @@ inlineCommands = M.fromList $
, ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage)
, ("enquote", enquote)
, ("cite", citation "cite" AuthorInText False)
+ , ("Cite", citation "cite" AuthorInText False)
, ("citep", citation "citep" NormalCitation False)
, ("citep*", citation "citep*" NormalCitation False)
, ("citeal", citation "citeal" NormalCitation False)
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 2a5adab22..b23b44544 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -252,8 +252,8 @@ parseAttr = try $ do
skipMany spaceChar
k <- many1 letter
char '='
- char '"'
- v <- many1Till (satisfy (/='\n')) (char '"')
+ v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
+ <|> many1 nonspaceChar
return (k,v)
tableStart :: MWParser ()
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 980f63504..55ac92bcb 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -70,6 +70,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+instance HasIdentifierList OrgParserState where
+ extractIdentifierList = orgStateIdentifiers
+ updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+ extractHeaderMap = orgStateHeaderMap
+ updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
parseOrg :: OrgParser Pandoc
parseOrg = do
blocks' <- parseBlocks
@@ -135,6 +143,8 @@ data OrgParserState = OrgParserState
, orgStateMeta :: Meta
, orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
+ , orgStateIdentifiers :: [String]
+ , orgStateHeaderMap :: M.Map Inlines String
}
instance Default OrgParserLocal where
@@ -174,6 +184,8 @@ defaultOrgParserState = OrgParserState
, orgStateMeta = nullMeta
, orgStateMeta' = return nullMeta
, orgStateNotes' = []
+ , orgStateIdentifiers = []
+ , orgStateHeaderMap = M.empty
}
recordAnchorId :: String -> OrgParser ()
@@ -668,7 +680,10 @@ header = try $ do
title <- manyTill inline (lookAhead headerEnd)
tags <- headerEnd
let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags
- return $ B.header level <$> inlns
+ st <- getState
+ let inlines = runF inlns st
+ attr <- registerHeader nullAttr inlines
+ return $ pure (B.headerWith attr level inlines)
where
tagToInlineF :: String -> F Inlines
tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 678eecc52..f9663b19a 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -614,20 +614,22 @@ directive' = do
return mempty
-- TODO:
--- - Silently ignores illegal fields
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: String -> [(String, String)] -> RSTParser Blocks
addNewRole roleString fields = do
(role, parentRole) <- parseFromString inheritedRole roleString
customRoles <- stateRstCustomRoles <$> getState
- let (baseRole, baseFmt, baseAttr) =
- maybe (parentRole, Nothing, nullAttr) id $
- M.lookup parentRole customRoles
+ let getBaseRole (r, f, a) roles =
+ case M.lookup r roles of
+ Just (r', f', a') -> getBaseRole (r', f', a') roles
+ Nothing -> (r, f, a)
+ (baseRole, baseFmt, baseAttr) =
+ getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
annotate :: [String] -> [String]
annotate = maybe id (:) $
- if parentRole == "code"
+ if baseRole == "code"
then lookup "language" fields
else Nothing
attr = let (ident, classes, keyValues) = baseAttr
@@ -636,12 +638,12 @@ addNewRole roleString fields = do
-- warn about syntax we ignore
flip mapM_ fields $ \(key, _) -> case key of
- "language" -> when (parentRole /= "code") $ addWarning Nothing $
+ "language" -> when (baseRole /= "code") $ addWarning Nothing $
"ignoring :language: field because the parent of role :" ++
- role ++ ": is :" ++ parentRole ++ ": not :code:"
- "format" -> when (parentRole /= "raw") $ addWarning Nothing $
+ role ++ ": is :" ++ baseRole ++ ": not :code:"
+ "format" -> when (baseRole /= "raw") $ addWarning Nothing $
"ignoring :format: field because the parent of role :" ++
- role ++ ": is :" ++ parentRole ++ ": not :raw:"
+ role ++ ": is :" ++ baseRole ++ ": not :raw:"
_ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++
": in definition of role :" ++ role ++ ": in"
when (parentRole == "raw" && countKeys "format" > 1) $
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 07a7e962c..c44133e12 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -540,6 +540,7 @@ stringify = query go . walk deNote
go (Str x) = x
go (Code _ x) = x
go (Math _ x) = x
+ go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105
go LineBreak = " "
go _ = ""
deNote (Note _) = Str ""
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index da4c78cef..0cb313e7b 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -181,8 +181,8 @@ renumIds f renumMap = map (renumId f renumMap)
-- | Certain characters are invalid in XML even if escaped.
-- See #1992
-stripInvalidChars :: Pandoc -> Pandoc
-stripInvalidChars = bottomUp (filter isValidChar)
+stripInvalidChars :: String -> String
+stripInvalidChars = filter isValidChar
-- | See XML reference
isValidChar :: Char -> Bool
@@ -208,7 +208,7 @@ writeDocx :: WriterOptions -- ^ Writer options
-> IO BL.ByteString
writeDocx opts doc@(Pandoc meta _) = do
let datadir = writerUserDataDir opts
- let doc' = stripInvalidChars . walk fixDisplayMath $ doc
+ let doc' = walk fixDisplayMath $ doc
username <- lookup "USERNAME" <$> getEnvironment
utctime <- getCurrentTime
distArchive <- getDefaultReferenceDocx Nothing
@@ -974,7 +974,7 @@ formattedString str = do
return [ mknode "w:r" [] $
props ++
[ mknode (if inDel then "w:delText" else "w:t")
- [("xml:space","preserve")] str ] ]
+ [("xml:space","preserve")] (stripInvalidChars str) ] ]
setFirstPara :: WS ()
setFirstPara = modify $ \s -> s { stFirstPara = True }
diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs
index 151d3c2ae..fae908f30 100644
--- a/src/Text/Pandoc/Writers/RST.hs
+++ b/src/Text/Pandoc/Writers/RST.hs
@@ -333,7 +333,8 @@ blockListToRST = blockListToRST' False
-- | Convert list of Pandoc inline elements to RST.
inlineListToRST :: [Inline] -> State WriterState Doc
inlineListToRST lst =
- mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat
+ mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>=
+ return . hcat
where -- remove spaces after displaymath, as they screw up indentation:
removeSpaceAfterDisplayMath (Math DisplayMath x : zs) =
Math DisplayMath x : dropWhile (==Space) zs
@@ -341,8 +342,8 @@ inlineListToRST lst =
removeSpaceAfterDisplayMath [] = []
insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed
insertBS (x:y:z:zs)
- | isComplex y && surroundComplex x z =
- x : y : RawInline "rst" "\\ " : insertBS (z:zs)
+ | isComplex y && (surroundComplex x z) =
+ x : y : insertBS (z : zs)
insertBS (x:y:zs)
| isComplex x && not (okAfterComplex y) =
x : RawInline "rst" "\\ " : insertBS (y : zs)
@@ -383,6 +384,8 @@ inlineListToRST lst =
isComplex (Image _ _) = True
isComplex (Code _ _) = True
isComplex (Math _ _) = True
+ isComplex (Cite _ (x:_)) = isComplex x
+ isComplex (Span _ (x:_)) = isComplex x
isComplex _ = False
-- | Convert Pandoc inline element to RST.