diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Text/Pandoc.hs | 6 | ||||
-rw-r--r-- | src/Text/Pandoc/CSS.hs | 35 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/EPUB.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 82 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/MediaWiki.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org.hs | 17 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 20 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 1 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Docx.hs | 8 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/RST.hs | 9 |
12 files changed, 136 insertions, 61 deletions
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index a4d963221..d7311d978 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -320,14 +320,14 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions getDefaultExtensions "markdown_github" = githubMarkdownExtensions getDefaultExtensions "markdown" = pandocExtensions getDefaultExtensions "plain" = plainExtensions -getDefaultExtensions "org" = Set.fromList [Ext_citations] +getDefaultExtensions "org" = Set.fromList [Ext_citations, + Ext_auto_identifiers] getDefaultExtensions "textile" = Set.fromList [Ext_auto_identifiers] getDefaultExtensions "html" = Set.fromList [Ext_auto_identifiers, Ext_native_divs, Ext_native_spans] getDefaultExtensions "html5" = getDefaultExtensions "html" -getDefaultExtensions "epub" = Set.fromList [Ext_auto_identifiers, - Ext_raw_html, +getDefaultExtensions "epub" = Set.fromList [Ext_raw_html, Ext_native_divs, Ext_native_spans, Ext_epub_html_exts] diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs new file mode 100644 index 000000000..32a5ea129 --- /dev/null +++ b/src/Text/Pandoc/CSS.hs @@ -0,0 +1,35 @@ +module Text.Pandoc.CSS ( foldOrElse, + pickStyleAttrProps + ) +where + +import Text.Pandoc.Shared (trim) +import Text.Parsec +import Text.Parsec.String +import Control.Applicative ((<*)) + +ruleParser :: Parser (String, String) +ruleParser = do + p <- many1 (noneOf ":") <* char ':' + v <- many1 (noneOf ":;") <* char ';' <* spaces + return (trim p, trim v) + +styleAttrParser :: Parser [(String, String)] +styleAttrParser = do + p <- many1 ruleParser + return p + +orElse :: Eq a => a -> a -> a -> a +orElse v x y = if v == x then y else x + +foldOrElse :: Eq a => a -> [a] -> a +foldOrElse v xs = foldr (orElse v) v xs + +eitherToMaybe :: Either a b -> Maybe b +eitherToMaybe (Right x) = Just x +eitherToMaybe _ = Nothing + +pickStyleAttrProps :: [String] -> String -> Maybe String +pickStyleAttrProps lookupProps styleAttr = do + styles <- eitherToMaybe $ parse styleAttrParser "" styleAttr + foldOrElse Nothing $ map (flip lookup styles) lookupProps diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3cc2a4479..352b94496 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -194,7 +194,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] indexterm - A wrapper for terms to be indexed [x] info - A wrapper for information about a component or other block. (DocBook v5) [x] informalequation - A displayed mathematical equation without a title -[ ] informalexample - A displayed example without a title +[x] informalexample - A displayed example without a title [ ] informalfigure - A untitled figure [ ] informaltable - A table without a title [ ] initializer - The initializer for a FieldSynopsis @@ -611,6 +611,7 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags "important","caution","note","tip","warning","qandadiv", "question","answer","abstract","itemizedlist","orderedlist", "variablelist","article","book","table","informaltable", + "informalexample", "screen","programlisting","example","calloutlist"] isBlockElement _ = False @@ -766,6 +767,8 @@ parseBlock (Elem e) = "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e "table" -> parseTable "informaltable" -> parseTable + "informalexample" -> divWith ("", ["informalexample"], []) <$> + getBlocks e "literallayout" -> codeBlockWithLang "screen" -> codeBlockWithLang "programlisting" -> codeBlockWithLang diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 338540533..aefc32e0e 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -181,7 +181,6 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = (walk $ renameImages root) - . (walk normalisePath) . (walk $ fixBlockIRs filename) . (walk $ fixInlineIRs filename) where @@ -196,12 +195,6 @@ fixInlineIRs s (Link t ('#':url, tit)) = Link t (addHash s url, tit) fixInlineIRs _ v = v -normalisePath :: Inline -> Inline -normalisePath (Link t (url, tit)) = - let (path, uid) = span (/= '#') url in - Link t (takeFileName path ++ uid, tit) -normalisePath s = s - prependHash :: [String] -> Inline -> Inline prependHash ps l@(Link is (url, tit)) | or [s `isPrefixOf` url | s <- ps] = @@ -223,7 +216,7 @@ fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEP addHash :: String -> String -> String addHash _ "" = "" -addHash s ident = s ++ "#" ++ ident +addHash s ident = takeFileName s ++ "#" ++ ident removeEPUBAttrs :: [(String, String)] -> [(String, String)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index fcba16e04..b32264d61 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -50,6 +50,7 @@ import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace) Ext_native_divs, Ext_native_spans)) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Walk +import qualified Data.Map as M import Data.Maybe ( fromMaybe, isJust) import Data.List ( intercalate, isInfixOf, isPrefixOf, isSuffixOf ) import Data.Char ( isDigit ) @@ -64,6 +65,7 @@ import Data.Default (Default (..), def) import Control.Monad.Reader (Reader,ask, asks, local, runReader) import Network.URI (isURI) import Text.Pandoc.Error +import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Parsec.Error @@ -74,8 +76,9 @@ readHtml :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readHtml opts inp = mapLeft (ParseFailure . getError) . flip runReader def $ - runParserT parseDoc (HTMLState def{ stateOptions = opts } [] Nothing) - "source" tags + runParserT parseDoc + (HTMLState def{ stateOptions = opts } [] Nothing [] M.empty) + "source" tags where tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } inp parseDoc = do @@ -100,7 +103,9 @@ data HTMLState = HTMLState { parserState :: ParserState, noteTable :: [(String, Blocks)], - baseHref :: Maybe String + baseHref :: Maybe String, + identifiers :: [String], + headerMap :: M.Map Inlines String } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -252,6 +257,22 @@ pListItem nonItem = do let liDiv = maybe mempty (\x -> B.divWith (x, [], []) mempty) (lookup "id" attr) (liDiv <>) <$> pInTags "li" block <* skipMany nonItem +parseListStyleType :: String -> ListNumberStyle +parseListStyleType "lower-roman" = LowerRoman +parseListStyleType "upper-roman" = UpperRoman +parseListStyleType "lower-alpha" = LowerAlpha +parseListStyleType "upper-alpha" = UpperAlpha +parseListStyleType "decimal" = Decimal +parseListStyleType _ = DefaultStyle + +parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr "i" = LowerRoman +parseTypeAttr "I" = UpperRoman +parseTypeAttr "a" = LowerAlpha +parseTypeAttr "A" = UpperAlpha +parseTypeAttr "1" = Decimal +parseTypeAttr _ = DefaultStyle + pOrderedList :: TagParser Blocks pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) @@ -261,23 +282,19 @@ pOrderedList = try $ do sta' = if all isDigit sta then read sta else 1 - sty = fromMaybe (fromMaybe "" $ - lookup "style" attribs) $ - lookup "class" attribs - sty' = case sty of - "lower-roman" -> LowerRoman - "upper-roman" -> UpperRoman - "lower-alpha" -> LowerAlpha - "upper-alpha" -> UpperAlpha - "decimal" -> Decimal - _ -> - case lookup "type" attribs of - Just "1" -> Decimal - Just "I" -> UpperRoman - Just "i" -> LowerRoman - Just "A" -> UpperAlpha - Just "a" -> LowerAlpha - _ -> DefaultStyle + + pickListStyle = pickStyleAttrProps ["list-style-type", "list-style"] + + typeAttr = fromMaybe "" $ lookup "type" attribs + classAttr = fromMaybe "" $ lookup "class" attribs + styleAttr = fromMaybe "" $ lookup "style" attribs + listStyle = fromMaybe "" $ pickListStyle styleAttr + + sty' = foldOrElse DefaultStyle + [ parseTypeAttr typeAttr + , parseListStyleType classAttr + , parseListStyleType listStyle + ] let nonItem = pSatisfy (\t -> not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) && not (t ~== TagClose "ol")) @@ -385,9 +402,10 @@ pHeader = try $ do let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] + attr' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle then mempty -- skip a representation of the title in the body - else B.headerWith (ident, classes, keyvals) level contents + else B.headerWith attr' level contents pHrule :: TagParser Blocks pHrule = do @@ -622,12 +640,11 @@ pSpan = try $ do guardEnabled Ext_native_spans TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True) contents <- pInTags "span" inline - let attr' = mkAttr attr - return $ case attr' of - ("",[],[("style",s)]) - | filter (`notElem` " \t;") s == "font-variant:small-caps" -> - B.smallcaps contents - _ -> B.spanWith (mkAttr attr) contents + let isSmallCaps = fontVariant == "small-caps" + where styleAttr = fromMaybe "" $ lookup "style" attr + fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr + let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) + return $ tag contents pRawHtmlInline :: TagParser Inlines pRawHtmlInline = do @@ -971,6 +988,14 @@ isSpace _ = False -- Instances +instance HasIdentifierList HTMLState where + extractIdentifierList = identifiers + updateIdentifierList f s = s{ identifiers = f (identifiers s) } + +instance HasHeaderMap HTMLState where + extractHeaderMap = headerMap + updateHeaderMap f s = s{ headerMap = f (headerMap s) } + -- This signature should be more general -- MonadReader HTMLLocal m => HasQuoteContext st m instance HasQuoteContext st (Reader HTMLLocal) where @@ -980,9 +1005,6 @@ instance HasQuoteContext st (Reader HTMLLocal) where instance HasReaderOptions HTMLState where extractReaderOptions = extractReaderOptions . parserState -instance Default HTMLState where - def = HTMLState def [] Nothing - instance HasMeta HTMLState where setMeta s b st = st {parserState = setMeta s b $ parserState st} deleteMeta s st = st {parserState = deleteMeta s $ parserState st} diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 0da912ea6..16d387dc4 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -524,6 +524,7 @@ inlineCommands = M.fromList $ , ("includegraphics", skipopts *> (unescapeURL <$> braced) >>= mkImage) , ("enquote", enquote) , ("cite", citation "cite" AuthorInText False) + , ("Cite", citation "cite" AuthorInText False) , ("citep", citation "citep" NormalCitation False) , ("citep*", citation "citep*" NormalCitation False) , ("citeal", citation "citeal" NormalCitation False) diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 2a5adab22..b23b44544 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -252,8 +252,8 @@ parseAttr = try $ do skipMany spaceChar k <- many1 letter char '=' - char '"' - v <- many1Till (satisfy (/='\n')) (char '"') + v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) + <|> many1 nonspaceChar return (k,v) tableStart :: MWParser () diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 980f63504..55ac92bcb 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -70,6 +70,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext } type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) +instance HasIdentifierList OrgParserState where + extractIdentifierList = orgStateIdentifiers + updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) } + +instance HasHeaderMap OrgParserState where + extractHeaderMap = orgStateHeaderMap + updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) } + parseOrg :: OrgParser Pandoc parseOrg = do blocks' <- parseBlocks @@ -135,6 +143,8 @@ data OrgParserState = OrgParserState , orgStateMeta :: Meta , orgStateMeta' :: F Meta , orgStateNotes' :: OrgNoteTable + , orgStateIdentifiers :: [String] + , orgStateHeaderMap :: M.Map Inlines String } instance Default OrgParserLocal where @@ -174,6 +184,8 @@ defaultOrgParserState = OrgParserState , orgStateMeta = nullMeta , orgStateMeta' = return nullMeta , orgStateNotes' = [] + , orgStateIdentifiers = [] + , orgStateHeaderMap = M.empty } recordAnchorId :: String -> OrgParser () @@ -668,7 +680,10 @@ header = try $ do title <- manyTill inline (lookAhead headerEnd) tags <- headerEnd let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags - return $ B.header level <$> inlns + st <- getState + let inlines = runF inlns st + attr <- registerHeader nullAttr inlines + return $ pure (B.headerWith attr level inlines) where tagToInlineF :: String -> F Inlines tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 678eecc52..f9663b19a 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -614,20 +614,22 @@ directive' = do return mempty -- TODO: --- - Silently ignores illegal fields -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: String -> [(String, String)] -> RSTParser Blocks addNewRole roleString fields = do (role, parentRole) <- parseFromString inheritedRole roleString customRoles <- stateRstCustomRoles <$> getState - let (baseRole, baseFmt, baseAttr) = - maybe (parentRole, Nothing, nullAttr) id $ - M.lookup parentRole customRoles + let getBaseRole (r, f, a) roles = + case M.lookup r roles of + Just (r', f', a') -> getBaseRole (r', f', a') roles + Nothing -> (r, f, a) + (baseRole, baseFmt, baseAttr) = + getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt annotate :: [String] -> [String] annotate = maybe id (:) $ - if parentRole == "code" + if baseRole == "code" then lookup "language" fields else Nothing attr = let (ident, classes, keyValues) = baseAttr @@ -636,12 +638,12 @@ addNewRole roleString fields = do -- warn about syntax we ignore flip mapM_ fields $ \(key, _) -> case key of - "language" -> when (parentRole /= "code") $ addWarning Nothing $ + "language" -> when (baseRole /= "code") $ addWarning Nothing $ "ignoring :language: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :code:" - "format" -> when (parentRole /= "raw") $ addWarning Nothing $ + role ++ ": is :" ++ baseRole ++ ": not :code:" + "format" -> when (baseRole /= "raw") $ addWarning Nothing $ "ignoring :format: field because the parent of role :" ++ - role ++ ": is :" ++ parentRole ++ ": not :raw:" + role ++ ": is :" ++ baseRole ++ ": not :raw:" _ -> addWarning Nothing $ "ignoring unknown field :" ++ key ++ ": in definition of role :" ++ role ++ ": in" when (parentRole == "raw" && countKeys "format" > 1) $ diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 07a7e962c..c44133e12 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -540,6 +540,7 @@ stringify = query go . walk deNote go (Str x) = x go (Code _ x) = x go (Math _ x) = x + go (RawInline (Format "html") ('<':'b':'r':_)) = " " -- see #2105 go LineBreak = " " go _ = "" deNote (Note _) = Str "" diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index da4c78cef..0cb313e7b 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -181,8 +181,8 @@ renumIds f renumMap = map (renumId f renumMap) -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: Pandoc -> Pandoc -stripInvalidChars = bottomUp (filter isValidChar) +stripInvalidChars :: String -> String +stripInvalidChars = filter isValidChar -- | See XML reference isValidChar :: Char -> Bool @@ -208,7 +208,7 @@ writeDocx :: WriterOptions -- ^ Writer options -> IO BL.ByteString writeDocx opts doc@(Pandoc meta _) = do let datadir = writerUserDataDir opts - let doc' = stripInvalidChars . walk fixDisplayMath $ doc + let doc' = walk fixDisplayMath $ doc username <- lookup "USERNAME" <$> getEnvironment utctime <- getCurrentTime distArchive <- getDefaultReferenceDocx Nothing @@ -974,7 +974,7 @@ formattedString str = do return [ mknode "w:r" [] $ props ++ [ mknode (if inDel then "w:delText" else "w:t") - [("xml:space","preserve")] str ] ] + [("xml:space","preserve")] (stripInvalidChars str) ] ] setFirstPara :: WS () setFirstPara = modify $ \s -> s { stFirstPara = True } diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 151d3c2ae..fae908f30 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -333,7 +333,8 @@ blockListToRST = blockListToRST' False -- | Convert list of Pandoc inline elements to RST. inlineListToRST :: [Inline] -> State WriterState Doc inlineListToRST lst = - mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= return . hcat + mapM inlineToRST (removeSpaceAfterDisplayMath $ insertBS lst) >>= + return . hcat where -- remove spaces after displaymath, as they screw up indentation: removeSpaceAfterDisplayMath (Math DisplayMath x : zs) = Math DisplayMath x : dropWhile (==Space) zs @@ -341,8 +342,8 @@ inlineListToRST lst = removeSpaceAfterDisplayMath [] = [] insertBS :: [Inline] -> [Inline] -- insert '\ ' where needed insertBS (x:y:z:zs) - | isComplex y && surroundComplex x z = - x : y : RawInline "rst" "\\ " : insertBS (z:zs) + | isComplex y && (surroundComplex x z) = + x : y : insertBS (z : zs) insertBS (x:y:zs) | isComplex x && not (okAfterComplex y) = x : RawInline "rst" "\\ " : insertBS (y : zs) @@ -383,6 +384,8 @@ inlineListToRST lst = isComplex (Image _ _) = True isComplex (Code _ _) = True isComplex (Math _ _) = True + isComplex (Cite _ (x:_)) = isComplex x + isComplex (Span _ (x:_)) = isComplex x isComplex _ = False -- | Convert Pandoc inline element to RST. |