diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2018-04-24 10:49:04 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2018-04-24 10:49:04 -0700 |
commit | a080dde1efb823e6e25e6ba0ead2afeb76012e43 (patch) | |
tree | 6165e39a24544d1387a201790541147e0f7478ab /src/Text/Pandoc/Readers/Org/Blocks.hs | |
parent | a9ae23fa15d769ab9b05f483c8511e96cc684403 (diff) | |
parent | de5ee82ed0e287ada3a5b272d8365a04fe8e9f95 (diff) |
Merge tag 'upstream/2.1.2_dfsg'
Upstream version 2.1.2~dfsg
# gpg: Signature made Tue 24 Apr 2018 10:48:48 AM MST
# gpg: using RSA key 9B917007AE030E36E4FC248B695B7AE4BF066240
# gpg: issuer "spwhitton@spwhitton.name"
# gpg: Good signature from "Sean Whitton <spwhitton@spwhitton.name>" [ultimate]
# Primary key fingerprint: 8DC2 487E 51AB DD90 B5C4 753F 0F56 D055 3B6D 411B
# Subkey fingerprint: 9B91 7007 AE03 0E36 E4FC 248B 695B 7AE4 BF06 6240
Diffstat (limited to 'src/Text/Pandoc/Readers/Org/Blocks.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 732 |
1 files changed, 299 insertions, 433 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 484d97482..fa016283c 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,8 +1,5 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE ViewPatterns #-} {- -Copyright (C) 2014-2017 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> +Copyright (C) 2014-2018 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,10 +15,11 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} - +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} {- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2017 Albert Krewinkel + Module : Text.Pandoc.Readers.Org.Blocks + Copyright : Copyright (C) 2014-2018 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> @@ -33,247 +31,61 @@ module Text.Pandoc.Readers.Org.Blocks , meta ) where -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.Meta ( metaExport, metaKey, metaLine ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.DocumentTree (documentTree, headlineToBlocks) +import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, + originalLang, translateLang) + +import Text.Pandoc.Builder (Blocks, Inlines) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) + +import Control.Monad (foldM, guard, mzero, void) +import Data.Char (isSpace, toLower, toUpper) +import Data.Default (Default) +import Data.List (foldl', isPrefixOf) +import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Monoid ((<>)) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks ) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared ( compactify', compactify'DL, safeRead ) - -import Control.Monad ( foldM, guard, mzero, void ) -import Data.Char ( isSpace, toLower, toUpper) -import Data.Default ( Default ) -import Data.List ( foldl', isPrefixOf ) -import Data.Maybe ( fromMaybe, isNothing ) -import Data.Monoid ((<>)) - --- --- Org headers --- -newtype Tag = Tag { fromTag :: String } - deriving (Show, Eq) - --- | Create a tag containing the given string. -toTag :: String -> Tag -toTag = Tag - --- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } - deriving (Show, Eq, Ord) - --- | Create a property key containing the given string. Org mode keys are --- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower - --- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } - --- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue -toPropertyValue = PropertyValue - --- | Check whether the property value is non-nil (i.e. truish). -isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] - --- | Key/value pairs from a PROPERTIES drawer -type Properties = [(PropertyKey, PropertyValue)] - --- | Org mode headline (i.e. a document subtree). -data Headline = Headline - { headlineLevel :: Int - , headlineTodoMarker :: Maybe TodoMarker - , headlineText :: Inlines - , headlineTags :: [Tag] - , headlineProperties :: Properties - , headlineContents :: Blocks - , headlineChildren :: [Headline] - } - --- --- Parsing headlines and subtrees --- - --- | Read an Org mode headline and its contents (i.e. a document subtree). --- @lvl@ gives the minimum acceptable level of the tree. -headline :: Int -> OrgParser (F Headline) -headline lvl = try $ do - level <- headerStart - guard (lvl <= level) - todoKw <- optionMaybe todoKeyword - title <- trimInlinesF . mconcat <$> manyTill inline endOfTitle - tags <- option [] headerTags - newline - properties <- option mempty propertiesDrawer - contents <- blocks - children <- many (headline (level + 1)) - return $ do - title' <- title - contents' <- contents - children' <- sequence children - return $ Headline - { headlineLevel = level - , headlineTodoMarker = todoKw - , headlineText = title' - , headlineTags = tags - , headlineProperties = properties - , headlineContents = contents' - , headlineChildren = children' - } - where - endOfTitle :: OrgParser () - endOfTitle = void . lookAhead $ optional headerTags *> newline - - headerTags :: OrgParser [Tag] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in map toTag <$> (skipSpaces *> char ':' *> many1 tag <* skipSpaces) - --- | Convert an Org mode headline (i.e. a document tree) into pandoc's Blocks -headlineToBlocks :: Headline -> OrgParser Blocks -headlineToBlocks hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - case () of - _ | any isNoExportTag headlineTags -> return mempty - _ | any isArchiveTag headlineTags -> archivedHeadlineToBlocks hdln - _ | isCommentTitle headlineText -> return mempty - _ | headlineLevel >= maxHeadlineLevels -> headlineToHeaderWithList hdln - _ | otherwise -> headlineToHeaderWithContents hdln - -isNoExportTag :: Tag -> Bool -isNoExportTag = (== toTag "noexport") - -isArchiveTag :: Tag -> Bool -isArchiveTag = (== toTag "ARCHIVE") - --- | Check if the title starts with COMMENT. --- FIXME: This accesses builder internals not intended for use in situations --- like these. Replace once keyword parsing is supported. -isCommentTitle :: Inlines -> Bool -isCommentTitle (B.toList -> (Str "COMMENT":_)) = True -isCommentTitle _ = False - -archivedHeadlineToBlocks :: Headline -> OrgParser Blocks -archivedHeadlineToBlocks hdln = do - archivedTreesOption <- getExportSetting exportArchivedTrees - case archivedTreesOption of - ArchivedTreesNoExport -> return mempty - ArchivedTreesExport -> headlineToHeaderWithContents hdln - ArchivedTreesHeadlineOnly -> headlineToHeader hdln - -headlineToHeaderWithList :: Headline -> OrgParser Blocks -headlineToHeaderWithList hdln@(Headline {..}) = do - maxHeadlineLevels <- getExportSetting exportHeadlineLevels - header <- headlineToHeader hdln - listElements <- sequence (map headlineToBlocks headlineChildren) - let listBlock = if null listElements - then mempty - else B.orderedList listElements - let headerText = if maxHeadlineLevels == headlineLevel - then header - else flattenHeader header - return $ headerText <> headlineContents <> listBlock - where - flattenHeader :: Blocks -> Blocks - flattenHeader blks = - case B.toList blks of - (Header _ _ inlns:_) -> B.para (B.fromList inlns) - _ -> mempty - -headlineToHeaderWithContents :: Headline -> OrgParser Blocks -headlineToHeaderWithContents hdln@(Headline {..}) = do - header <- headlineToHeader hdln - childrenBlocks <- mconcat <$> sequence (map headlineToBlocks headlineChildren) - return $ header <> headlineContents <> childrenBlocks - -headlineToHeader :: Headline -> OrgParser Blocks -headlineToHeader (Headline {..}) = do - exportTodoKeyword <- getExportSetting exportWithTodoKeywords - let todoText = if exportTodoKeyword - then case headlineTodoMarker of - Just kw -> todoKeywordToInlines kw <> B.space - Nothing -> mempty - else mempty - let text = tagTitle (todoText <> headlineText) headlineTags - let propAttr = propertiesToAttr headlineProperties - attr <- registerHeader propAttr headlineText - return $ B.headerWith attr headlineLevel text - -todoKeyword :: OrgParser TodoMarker -todoKeyword = try $ do - taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try $ (tdm <$ string (todoMarkerName tdm) <* spaceChar) - choice (map kwParser taskStates) - -todoKeywordToInlines :: TodoMarker -> Inlines -todoKeywordToInlines tdm = - let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm - classes = [todoState, todoText] - in B.spanWith (mempty, classes, mempty) (B.str todoText) - -propertiesToAttr :: Properties -> Attr -propertiesToAttr properties = - let - toStringPair prop = (fromKey (fst prop), fromValue (snd prop)) - customIdKey = toPropertyKey "custom_id" - classKey = toPropertyKey "class" - unnumberedKey = toPropertyKey "unnumbered" - specialProperties = [customIdKey, classKey, unnumberedKey] - id' = fromMaybe mempty . fmap fromValue . lookup customIdKey $ properties - cls = fromMaybe mempty . fmap fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) - $ properties - isUnnumbered = - fromMaybe False . fmap isNonNil . lookup unnumberedKey $ properties - in - (id', words cls ++ (if isUnnumbered then ["unnumbered"] else []), kvs') - -tagTitle :: Inlines -> [Tag] -> Inlines -tagTitle title tags = title <> (mconcat $ map tagToInline tags) - -tagToInline :: Tag -> Inlines -tagToInline t = B.spanWith ("", ["tag"], [("data-tag-name", fromTag t)]) mempty - +import qualified Text.Pandoc.Walk as Walk -- -- parsing blocks -- -- | Get a list of blocks. -blockList :: OrgParser [Block] +blockList :: PandocMonad m => OrgParser m [Block] blockList = do - initialBlocks <- blocks - headlines <- sequence <$> manyTill (headline 1) eof + headlines <- documentTree blocks inline st <- getState - headlineBlocks <- fmap mconcat . sequence . map headlineToBlocks $ runF headlines st - return . B.toList $ (runF initialBlocks st) <> headlineBlocks + headlineBlocks <- headlineToBlocks $ runF headlines st + -- ignore first headline, it's the document's title + return . drop 1 . B.toList $ headlineBlocks --- | Get the meta information safed in the state. -meta :: OrgParser Meta +-- | Get the meta information saved in the state. +meta :: Monad m => OrgParser m Meta meta = do meta' <- metaExport runF meta' <$> getState -blocks :: OrgParser (F Blocks) +blocks :: PandocMonad m => OrgParser m (F Blocks) blocks = mconcat <$> manyTill block (void (lookAhead headerStart) <|> eof) -block :: OrgParser (F Blocks) +block :: PandocMonad m => OrgParser m (F Blocks) block = choice [ mempty <$ blanklines , table , orgBlock , figure , example , genericDrawer + , include , specialLine , horizontalRule , list @@ -283,6 +95,11 @@ block = choice [ mempty <$ blanklines ] <?> "block" +-- | Parse a horizontal rule into a block element +horizontalRule :: Monad m => OrgParser m (F Blocks) +horizontalRule = return B.horizontalRule <$ try hline + + -- -- Block Attributes -- @@ -297,7 +114,7 @@ data BlockAttributes = BlockAttributes -- | Convert BlockAttributes into pandoc Attr attrFromBlockAttributes :: BlockAttributes -> Attr -attrFromBlockAttributes (BlockAttributes{..}) = +attrFromBlockAttributes BlockAttributes{..} = let ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of @@ -306,18 +123,18 @@ attrFromBlockAttributes (BlockAttributes{..}) = kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String) -stringyMetaAttribute attrCheck = try $ do +stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute = try $ do metaLineStart attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName skipSpaces - attrValue <- anyLine + attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) -blockAttributes :: OrgParser BlockAttributes +blockAttributes :: PandocMonad m => OrgParser m BlockAttributes blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) + kv <- many stringyMetaAttribute + guard $ all (attrCheck . fst) kv let caption = foldl' (appendValues "CAPTION") Nothing kv let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv let name = lookup "NAME" kv @@ -326,7 +143,7 @@ blockAttributes = try $ do Nothing -> return Nothing Just s -> Just <$> parseFromString inlines (s ++ "\n") kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes + return BlockAttributes { blockAttrName = name , blockAttrLabel = label , blockAttrCaption = caption' @@ -334,13 +151,7 @@ blockAttributes = try $ do } where attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "LABEL" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False + attrCheck x = x `elem` ["NAME", "LABEL", "CAPTION", "ATTR_HTML", "RESULTS"] appendValues :: String -> Maybe String -> (String, String) -> Maybe String appendValues attrName accValue (key, value) = @@ -350,17 +161,18 @@ blockAttributes = try $ do Just acc -> Just $ acc ++ ' ':value Nothing -> Just value -keyValues :: OrgParser [(String, String)] +-- | Parse key-value pairs for HTML attributes +keyValues :: Monad m => OrgParser m [(String, String)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: OrgParser String + key :: Monad m => OrgParser m String key = try $ skipSpaces *> char ':' *> many1 nonspaceChar - value :: OrgParser String + value :: Monad m => OrgParser m String value = skipSpaces *> manyTill anyChar endOfValue - endOfValue :: OrgParser () + endOfValue :: Monad m => OrgParser m () endOfValue = lookAhead $ (() <$ try (many1 spaceChar <* key)) <|> () <$ newline @@ -371,12 +183,12 @@ keyValues = try $ -- -- | Read an org-mode block delimited by #+BEGIN_TYPE and #+END_TYPE. -orgBlock :: OrgParser (F Blocks) +orgBlock :: PandocMonad m => OrgParser m (F Blocks) orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case (map toLower blkType) of + case map toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -390,25 +202,25 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: OrgParser String + blockHeaderStart :: Monad m => OrgParser m String blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord lowercase :: String -> String lowercase = map toLower -rawBlockLines :: (String -> F Blocks) -> String -> OrgParser (F Blocks) -rawBlockLines f blockType = (ignHeaders *> (f <$> rawBlockContent blockType)) +rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) +rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) -parseBlockLines :: (F Blocks -> F Blocks) -> String -> OrgParser (F Blocks) -parseBlockLines f blockType = (ignHeaders *> (f <$> parsedBlockContent)) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) +parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where - parsedBlockContent :: OrgParser (F Blocks) + parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType parseFromString blocks (raw ++ "\n") -- | Read the raw string content of a block -rawBlockContent :: String -> OrgParser String +rawBlockContent :: Monad m => String -> OrgParser m String rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop @@ -418,18 +230,17 @@ rawBlockContent blockType = try $ do . map (tabsToSpaces tabLen . commaEscaped) $ blkLines where - rawLine :: OrgParser String + rawLine :: Monad m => OrgParser m String rawLine = try $ ("" <$ blankline) <|> anyLine - blockEnder :: OrgParser () + blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) stripIndent :: [String] -> [String] stripIndent strs = map (drop (shortestIndent strs)) strs shortestIndent :: [String] -> Int - shortestIndent = foldr min maxBound - . map (length . takeWhile isSpace) + shortestIndent = foldr (min . length . takeWhile isSpace) maxBound . filter (not . null) tabsToSpaces :: Int -> String -> String @@ -437,7 +248,7 @@ rawBlockContent blockType = try $ do tabsToSpaces tabLen cs'@(c:cs) = case c of ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> (take tabLen $ repeat ' ') ++ tabsToSpaces tabLen cs + '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs _ -> cs' commaEscaped :: String -> String @@ -448,18 +259,18 @@ rawBlockContent blockType = try $ do commaEscaped cs = cs -- | Read but ignore all remaining block headers. -ignHeaders :: OrgParser () +ignHeaders :: Monad m => OrgParser m () ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: String -> OrgParser (F Blocks) +exportBlock :: Monad m => String -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType returnF (B.rawBlock (map toLower exportType) contents) -verseBlock :: String -> OrgParser (F Blocks) +verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType @@ -468,7 +279,7 @@ verseBlock blockType = try $ do where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: String -> OrgParser (F Inlines) + parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) parseVerseLine cs = do let (initialSpaces, indentedLine) = span isSpace cs let nbspIndent = if null initialSpaces @@ -480,23 +291,20 @@ verseBlock blockType = try $ do -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: BlockAttributes -> String -> OrgParser (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) content <- rawBlockContent blockType - resultsContent <- trailingResultsBlock + resultsContent <- option mempty babelResultsBlock let id' = fromMaybe mempty $ blockAttrName blockAttrs - let includeCode = exportsCode kv - let includeResults = exportsResults kv let codeBlck = B.codeBlockWith ( id', classes, kv ) content let labelledBlck = maybe (pure codeBlck) (labelDiv codeBlck) (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent return $ - (if includeCode then labelledBlck else mempty) <> - (if includeResults then resultBlck else mempty) + (if exportsCode kv then labelledBlck else mempty) <> + (if exportsResults kv then resultsContent else mempty) where labelDiv :: Blocks -> F Inlines -> F Blocks labelDiv blk value = @@ -505,60 +313,97 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) -exportsCode :: [(String, String)] -> Bool -exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs - || ("rundoc-exports", "results") `elem` attrs) + exportsCode :: [(String, String)] -> Bool + exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" -exportsResults :: [(String, String)] -> Bool -exportsResults attrs = ("rundoc-exports", "results") `elem` attrs - || ("rundoc-exports", "both") `elem` attrs + exportsResults :: [(String, String)] -> Bool + exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -trailingResultsBlock :: OrgParser (Maybe (F Blocks)) -trailingResultsBlock = optionMaybe . try $ do +-- | Parse the result of an evaluated babel code block. +babelResultsBlock :: PandocMonad m => OrgParser m (F Blocks) +babelResultsBlock = try $ do blanklines - stringAnyCase "#+RESULTS:" - blankline + resultsMarker <|> + (lookAhead . void . try $ + manyTill (metaLineStart *> anyLineNewline) resultsMarker) block + where + resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments --- TODO: We currently don't handle switches. -codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord - _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + (switchClasses, switchKv) <- switchesAsAttributes parameters <- manyTill blockOption newline - let pandocLang = translateLang language - return $ - if hasRundocParameters parameters - then ( [ pandocLang, rundocBlockClass ] - , map toRundocAttrib (("language", language) : parameters) + return ( translateLang language : switchClasses + , originalLang language <> switchKv <> parameters ) - else ([ pandocLang ], parameters) - where - hasRundocParameters = not . null -switch :: OrgParser (Char, Maybe String) -switch = try $ simpleSwitch <|> lineNumbersSwitch +switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) +switchesAsAttributes = try $ do + switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) + return $ foldr addToAttr ([], []) switches where - simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) - lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> - (string "-l \"" *> many1Till nonspaceChar (char '"')) + addToAttr :: (Char, Maybe String, SwitchPolarity) + -> ([String], [(String, String)]) + -> ([String], [(String, String)]) + addToAttr ('n', lineNum, pol) (cls, kv) = + let kv' = case lineNum of + Just num -> ("startFrom", num):kv + Nothing -> kv + cls' = case pol of + SwitchPlus -> "continuedSourceBlock":cls + SwitchMinus -> cls + in ("numberLines":cls', kv') + addToAttr _ x = x + +-- | Whether a switch flag is specified with @+@ or @-@. +data SwitchPolarity = SwitchPlus | SwitchMinus + deriving (Show, Eq) + +-- | Parses a switch's polarity. +switchPolarity :: Monad m => OrgParser m SwitchPolarity +switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -blockOption :: OrgParser (String, String) +-- | Parses a source block switch option. +switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +switch = try $ lineNumberSwitch <|> labelSwitch <|> simpleSwitch + where + simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter + labelSwitch = genericSwitch 'l' $ + char '"' *> many1Till nonspaceChar (char '"') + +-- | Generic source block switch-option parser. +genericSwitch :: Monad m + => Char + -> OrgParser m String + -> OrgParser m (Char, Maybe String, SwitchPolarity) +genericSwitch c p = try $ do + polarity <- switchPolarity <* char c <* skipSpaces + arg <- optionMaybe p + return (c, arg, polarity) + +-- | Reads a line number switch option. The line number switch can be used with +-- example and source blocks. +lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +lineNumberSwitch = genericSwitch 'n' (many digit) + +blockOption :: Monad m => OrgParser m (String, String) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: OrgParser String +orgParamValue :: Monad m => OrgParser m String orgParamValue = try $ skipSpaces - *> notFollowedBy (char ':' ) - *> many1 nonspaceChar + *> notFollowedBy orgArgKey + *> noneOf "\n\r" `many1Till` endOfValue <* skipSpaces - -horizontalRule :: OrgParser (F Blocks) -horizontalRule = return B.horizontalRule <$ try hline + where + endOfValue = lookAhead $ try (skipSpaces <* oneOf "\n\r") + <|> try (skipSpaces1 <* orgArgKey) -- @@ -568,7 +413,7 @@ horizontalRule = return B.horizontalRule <$ try hline -- | A generic drawer which has no special meaning for org-mode. -- Whether or not this drawer is included in the output depends on the drawers -- export setting. -genericDrawer :: OrgParser (F Blocks) +genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do name <- map toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) @@ -576,44 +421,25 @@ genericDrawer = try $ do -- Include drawer if it is explicitly included in or not explicitly excluded -- from the list of drawers that should be exported. PROPERTIES drawers are -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of + case exportDrawers . orgStateExportSettings $ state of _ | name == "PROPERTIES" -> return mempty Left names | name `elem` names -> return mempty Right names | name `notElem` names -> return mempty - _ -> drawerDiv name <$> parseLines content + _ -> drawerDiv name <$> parseLines content where - parseLines :: [String] -> OrgParser (F Blocks) + parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) parseLines = parseFromString blocks . (++ "\n") . unlines drawerDiv :: String -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: OrgParser String +drawerLine :: Monad m => OrgParser m String drawerLine = anyLine -drawerEnd :: OrgParser String +drawerEnd :: Monad m => OrgParser m String drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: OrgParser Properties -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: OrgParser (PropertyKey, PropertyValue) - property = try $ (,) <$> key <*> value - - key :: OrgParser PropertyKey - key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: OrgParser PropertyValue - value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - -- -- Figures @@ -621,7 +447,7 @@ propertiesDrawer = try $ do -- | Figures or an image paragraph (i.e. an image on a line by itself). Only -- images with a caption attribute are interpreted as figures. -figure :: OrgParser (F Blocks) +figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph @@ -629,10 +455,10 @@ figure = try $ do Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) - let isFigure = not . isNothing $ blockAttrCaption figAttrs + let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: OrgParser String + selfTarget :: PandocMonad m => OrgParser m String selfTarget = try $ char '[' *> linkTarget <* char ']' imageBlock :: Bool -> BlockAttributes -> String -> F Blocks @@ -654,7 +480,7 @@ figure = try $ do else "fig:" ++ cs -- | Succeeds if looking at the end of the current paragraph -endOfParagraph :: OrgParser () +endOfParagraph :: Monad m => OrgParser m () endOfParagraph = try $ skipSpaces *> newline *> endOfBlock @@ -663,11 +489,10 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- -- | Example code marked up by a leading colon. -example :: OrgParser (F Blocks) -example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine +example :: Monad m => OrgParser m (F Blocks) +example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine where - exampleLine :: OrgParser String + exampleLine :: Monad m => OrgParser m String exampleLine = try $ exampleLineStart *> anyLine exampleCode :: String -> Blocks @@ -678,10 +503,59 @@ exampleCode = B.codeBlockWith ("", ["example"], []) -- Comments, Options and Metadata -- -specialLine :: OrgParser (F Blocks) +specialLine :: PandocMonad m => OrgParser m (F Blocks) specialLine = fmap return . try $ rawExportLine <|> metaLine <|> commentLine -rawExportLine :: OrgParser Blocks +-- | Include the content of a file. +include :: PandocMonad m => OrgParser m (F Blocks) +include = try $ do + metaLineStart <* stringAnyCase "include:" <* skipSpaces + filename <- includeTarget + includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + params <- keyValues + blocksParser <- case includeArgs of + ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw + ["export"] -> return . returnF $ B.fromList [] + ["export", format] -> return $ pure . B.rawBlock format <$> parseRaw + ("src" : rest) -> do + let attr = case rest of + [lang] -> (mempty, [lang], mempty) + _ -> nullAttr + return $ pure . B.codeBlockWith attr <$> parseRaw + _ -> return $ return . B.fromList . blockFilter params <$> blockList + insertIncludedFileF blocksParser ["."] filename + where + includeTarget :: PandocMonad m => OrgParser m FilePath + includeTarget = do + char '"' + manyTill (noneOf "\n\r\t") (char '"') + + parseRaw :: PandocMonad m => OrgParser m String + parseRaw = many anyChar + + blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter params blks = + let minlvl = lookup "minlevel" params + in case (minlvl >>= safeRead :: Maybe Int) of + Nothing -> blks + Just lvl -> let levels = Walk.query headerLevel blks + -- CAVE: partial function in else + curMin = if null levels then 0 else minimum levels + in Walk.walk (shiftHeader (curMin - lvl)) blks + + headerLevel :: Block -> [Int] + headerLevel (Header lvl _attr _content) = [lvl] + headerLevel _ = [] + + shiftHeader :: Int -> Block -> Block + shiftHeader shift blk = + if shift <= 0 + then blk + else case blk of + (Header lvl attr content) -> Header (lvl - shift) attr content + _ -> blk + +rawExportLine :: PandocMonad m => OrgParser m Blocks rawExportLine = try $ do metaLineStart key <- metaKey @@ -689,7 +563,7 @@ rawExportLine = try $ do then B.rawBlock key <$> anyLine else mzero -commentLine :: OrgParser Blocks +commentLine :: Monad m => OrgParser m Blocks commentLine = commentLineStart *> anyLine *> pure mempty @@ -714,12 +588,21 @@ data OrgTableRow = OrgContentRow (F [Blocks]) -- should be generated using a builder function. data OrgTable = OrgTable { orgTableColumnProperties :: [ColumnProperty] - , orgTableHeader :: [Blocks] - , orgTableRows :: [[Blocks]] + , orgTableHeader :: [Blocks] + , orgTableRows :: [[Blocks]] } -table :: OrgParser (F Blocks) -table = try $ do +table :: PandocMonad m => OrgParser m (F Blocks) +table = gridTableWith blocks True <|> orgTable + +-- | A normal org table +orgTable :: PandocMonad m => OrgParser m (F Blocks) +orgTable = try $ do + -- don't allow a table on the first line of a list item; org requires that + -- tables start at first non-space character on the line + let isFirstInListItem st = orgStateParserContext st == ListItemState && + isNothing (orgStateLastPreCharPos st) + guard =<< not . isFirstInListItem <$> getState blockAttrs <- blockAttributes lookAhead tableStart do @@ -731,7 +614,7 @@ orgToPandocTable :: OrgTable -> Inlines -> Blocks orgToPandocTable (OrgTable colProps heads lns) caption = - let totalWidth = if any (not . isNothing) (map columnRelWidth colProps) + let totalWidth = if any isJust (map columnRelWidth colProps) then Just . sum $ map (fromMaybe 1 . columnRelWidth) colProps else Nothing in B.table caption (map (convertColProp totalWidth) colProps) heads lns @@ -741,22 +624,22 @@ orgToPandocTable (OrgTable colProps heads lns) caption = let align' = fromMaybe AlignDefault $ columnAlignment colProp width' = fromMaybe 0 $ (\w t -> (fromIntegral w / fromIntegral t)) - <$> (columnRelWidth colProp) + <$> columnRelWidth colProp <*> totalWidth in (align', width') -tableRows :: OrgParser [OrgTableRow] +tableRows :: PandocMonad m => OrgParser m [OrgTableRow] tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) -tableContentRow :: OrgParser OrgTableRow +tableContentRow :: PandocMonad m => OrgParser m OrgTableRow tableContentRow = try $ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) -tableContentCell :: OrgParser (F Blocks) +tableContentCell :: PandocMonad m => OrgParser m (F Blocks) tableContentCell = try $ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell -tableAlignRow :: OrgParser OrgTableRow +tableAlignRow :: Monad m => OrgParser m OrgTableRow tableAlignRow = try $ do tableStart colProps <- many1Till columnPropertyCell newline @@ -764,10 +647,10 @@ tableAlignRow = try $ do guard $ any (/= def) colProps return $ OrgAlignRow colProps -columnPropertyCell :: OrgParser ColumnProperty +columnPropertyCell :: Monad m => OrgParser m ColumnProperty columnPropertyCell = emptyCell <|> propCell <?> "alignment info" where - emptyCell = ColumnProperty Nothing Nothing <$ (try $ skipSpaces *> endOfCell) + emptyCell = ColumnProperty Nothing Nothing <$ try (skipSpaces *> endOfCell) propCell = try $ ColumnProperty <$> (skipSpaces *> char '<' @@ -776,18 +659,18 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <* char '>' <* emptyCell) -tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar :: Monad m => OrgParser m Alignment tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft , char 'c' *> return AlignCenter , char 'r' *> return AlignRight ] -tableHline :: OrgParser OrgTableRow +tableHline :: Monad m => OrgParser m OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) -endOfCell :: OrgParser Char +endOfCell :: Monad m => OrgParser m Char endOfCell = try $ char '|' <|> lookAhead newline rowsToTable :: [OrgTableRow] @@ -813,45 +696,45 @@ normalizeTable (OrgTable colProps heads rows) = rowToContent :: OrgTable -> OrgTableRow -> F OrgTable -rowToContent orgTable row = +rowToContent tbl row = case row of OrgHlineRow -> return singleRowPromotedToHeader OrgAlignRow props -> return . setProperties $ props OrgContentRow cs -> appendToBody cs where singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable + singleRowPromotedToHeader = case tbl of + OrgTable{ orgTableHeader = [], orgTableRows = [b] } -> + tbl{ orgTableHeader = b , orgTableRows = [] } + _ -> tbl setProperties :: [ColumnProperty] -> OrgTable - setProperties ps = orgTable{ orgTableColumnProperties = ps } + setProperties ps = tbl{ orgTableColumnProperties = ps } appendToBody :: F [Blocks] -> F OrgTable appendToBody frow = do newRow <- frow - let oldRows = orgTableRows orgTable + let oldRows = orgTableRows tbl -- NOTE: This is an inefficient O(n) operation. This should be changed -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } + return tbl{ orgTableRows = oldRows ++ [newRow] } -- -- LaTeX fragments -- -latexFragment :: OrgParser (F Blocks) +latexFragment :: Monad m => OrgParser m (F Blocks) latexFragment = try $ do envName <- latexEnvStart content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + returnF $ B.rawBlock "latex" (content `inLatexEnv` envName) where c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" , c , "\\end{", e, "}\n" ] -latexEnd :: String -> OrgParser () +latexEnd :: Monad m => String -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces <* string ("\\end{" ++ envName ++ "}") @@ -861,74 +744,70 @@ latexEnd envName = try $ -- -- Footnote defintions -- -noteBlock :: OrgParser (F Blocks) +noteBlock :: PandocMonad m => OrgParser m (F Blocks) noteBlock = try $ do - ref <- noteMarker <* skipSpaces - content <- mconcat <$> blocksTillHeaderOrNote + ref <- noteMarker <* skipSpaces <* updateLastPreCharPos + content <- mconcat <$> many1Till block endOfFootnote addToNotesTable (ref, content) return mempty where - blocksTillHeaderOrNote = - many1Till block (eof <|> () <$ lookAhead noteMarker - <|> () <$ lookAhead headerStart) + endOfFootnote = eof + <|> () <$ lookAhead noteMarker + <|> () <$ lookAhead headerStart + <|> () <$ lookAhead (try $ blankline *> blankline) -- Paragraphs or Plain text -paraOrPlain :: OrgParser (F Blocks) +paraOrPlain :: PandocMonad m => OrgParser m (F Blocks) paraOrPlain = try $ do -- Make sure we are not looking at a headline - notFollowedBy' (char '*' *> (oneOf " *")) + notFollowedBy' headerStart ils <- inlines nl <- option False (newline *> return True) -- Read block as paragraph, except if we are in a list context and the block -- is directly followed by a list item, in which case the block is read as -- plain text. try (guard nl - *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) + *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart)) *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) + <|> return (B.plain <$> ils) -- -- list blocks -- -list :: OrgParser (F Blocks) +list :: PandocMonad m => OrgParser m (F Blocks) list = choice [ definitionList, bulletList, orderedList ] <?> "list" -definitionList :: OrgParser (F Blocks) -definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactify'DL . sequence - <$> many1 (definitionListItem $ bulletListStart' (Just n)) - -bulletList :: OrgParser (F Blocks) -bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify' . sequence - <$> many1 (listItem (bulletListStart' $ Just n)) - -orderedList :: OrgParser (F Blocks) -orderedList = fmap B.orderedList . fmap compactify' . sequence - <$> many1 (listItem orderedListStart) - -bulletListStart' :: Maybe Int -> OrgParser Int --- returns length of bulletList prefix, inclusive of marker -bulletListStart' Nothing = do ind <- length <$> many spaceChar - oneOf (bullets $ ind == 0) - skipSpaces1 - return (ind + 1) -bulletListStart' (Just n) = do count (n-1) spaceChar - oneOf (bullets $ n == 1) - many1 spaceChar - return n - --- Unindented lists are legal, but they can't use '*' bullets. --- We return n to maintain compatibility with the generic listItem. -bullets :: Bool -> String -bullets unindented = if unindented then "+-" else "*+-" - -definitionListItem :: OrgParser Int - -> OrgParser (F (Inlines, [Blocks])) -definitionListItem parseMarkerGetLength = try $ do - markerLength <- parseMarkerGetLength +definitionList :: PandocMonad m => OrgParser m (F Blocks) +definitionList = try $ do + indent <- lookAhead bulletListStart + fmap (B.definitionList . compactifyDL) . sequence + <$> many1 (definitionListItem (bulletListStart `indented` indent)) + +bulletList :: PandocMonad m => OrgParser m (F Blocks) +bulletList = try $ do + indent <- lookAhead bulletListStart + fmap (B.bulletList . compactify) . sequence + <$> many1 (listItem (bulletListStart `indented` indent)) + +indented :: Monad m => OrgParser m Int -> Int -> OrgParser m Int +indented indentedMarker minIndent = try $ do + n <- indentedMarker + guard (minIndent <= n) + return n + +orderedList :: PandocMonad m => OrgParser m (F Blocks) +orderedList = try $ do + indent <- lookAhead orderedListStart + fmap (B.orderedList . compactify) . sequence + <$> many1 (listItem (orderedListStart `indented` indent)) + +definitionListItem :: PandocMonad m + => OrgParser m Int + -> OrgParser m (F (Inlines, [Blocks])) +definitionListItem parseIndentedMarker = try $ do + markerLength <- parseIndentedMarker term <- manyTill (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) @@ -940,12 +819,12 @@ definitionListItem parseMarkerGetLength = try $ do definitionMarker = spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) - --- parse raw text for one list item, excluding start marker and continuations -listItem :: OrgParser Int - -> OrgParser (F Blocks) -listItem start = try . withContext ListItemState $ do - markerLength <- try start +-- | parse raw text for one list item +listItem :: PandocMonad m + => OrgParser m Int + -> OrgParser m (F Blocks) +listItem parseIndentedMarker = try . withContext ListItemState $ do + markerLength <- try parseIndentedMarker firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) @@ -953,24 +832,11 @@ listItem start = try . withContext ListItemState $ do -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Int - -> OrgParser String -listContinuation markerLength = try $ +listContinuation :: Monad m => Int + -> OrgParser m String +listContinuation markerLength = try $ do notFollowedBy' blankline - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) + mappend <$> (concat <$> many1 listLine) + <*> many blankline where listLine = try $ indentWith markerLength *> anyLineNewline - - -- indent by specified number of spaces (or equiv. tabs) - indentWith :: Int -> OrgParser String - indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - --- | Parse any line, include the final newline in the output. -anyLineNewline :: OrgParser String -anyLineNewline = (++ "\n") <$> anyLine |