From eea6d6568f99eda689b93210a22692c7f79b4bbf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 25 May 2016 22:50:51 +0200 Subject: Org reader: extract blocks parser to module Block parsing code is moved to a separate module. This is part of the Org-mode reader cleanup effort. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/Org.hs | 853 +------------------------------- src/Text/Pandoc/Readers/Org/Blocks.hs | 891 ++++++++++++++++++++++++++++++++++ 3 files changed, 901 insertions(+), 844 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Org/Blocks.hs diff --git a/pandoc.cabal b/pandoc.cabal index 7286bd890..7d7250a21 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -394,6 +394,7 @@ Library Text.Pandoc.Readers.Odt.Arrows.State, Text.Pandoc.Readers.Odt.Arrows.Utils, Text.Pandoc.Readers.Org.BlockStarts, + Text.Pandoc.Readers.Org.Blocks, Text.Pandoc.Readers.Org.Inlines, Text.Pandoc.Readers.Org.ParserState, Text.Pandoc.Readers.Org.Parsing, diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 605d2220e..1042b5a21 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE FlexibleContexts #-} {- Copyright (C) 2014-2016 Albert Krewinkel @@ -28,27 +27,15 @@ Conversion of org-mode formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Org ( readOrg ) where -import Text.Pandoc.Readers.Org.BlockStarts -import Text.Pandoc.Readers.Org.Inlines -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing +import Text.Pandoc.Readers.Org.Blocks ( blockList, meta ) +import Text.Pandoc.Readers.Org.Parsing ( OrgParser, readWithM ) +import Text.Pandoc.Readers.Org.ParserState ( OrgParserState (..) ) -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks ) import Text.Pandoc.Definition -import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared ( compactify', compactify'DL ) -import Control.Arrow ( first ) -import Control.Monad ( foldM, guard, mzero ) import Control.Monad.Reader ( runReader ) -import Data.Char ( toLower, toUpper) -import Data.List ( foldl', intersperse, isPrefixOf ) -import qualified Data.Map as M -import Data.Maybe ( fromMaybe, isNothing ) -import Network.HTTP ( urlEncode ) -- | Parse org-mode string and return a Pandoc document. @@ -57,106 +44,17 @@ readOrg :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") --- --- Export Settings --- -exportSetting :: OrgParser () -exportSetting = choice - [ booleanSetting "^" setExportSubSuperscripts - , ignoredSetting "'" - , ignoredSetting "*" - , ignoredSetting "-" - , ignoredSetting ":" - , ignoredSetting "<" - , ignoredSetting "\\n" - , ignoredSetting "arch" - , ignoredSetting "author" - , ignoredSetting "c" - , ignoredSetting "creator" - , complementableListSetting "d" setExportDrawers - , ignoredSetting "date" - , ignoredSetting "e" - , ignoredSetting "email" - , ignoredSetting "f" - , ignoredSetting "H" - , ignoredSetting "inline" - , ignoredSetting "num" - , ignoredSetting "p" - , ignoredSetting "pri" - , ignoredSetting "prop" - , ignoredSetting "stat" - , ignoredSetting "tags" - , ignoredSetting "tasks" - , ignoredSetting "tex" - , ignoredSetting "timestamp" - , ignoredSetting "title" - , ignoredSetting "toc" - , ignoredSetting "todo" - , ignoredSetting "|" - ] "export setting" - -booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser () -booleanSetting settingIdentifier setter = try $ do - string settingIdentifier - char ':' - value <- elispBoolean - updateState $ modifyExportSettings setter value - --- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are --- interpreted as true. -elispBoolean :: OrgParser Bool -elispBoolean = try $ do - value <- many1 nonspaceChar - return $ case map toLower value of - "nil" -> False - "{}" -> False - "()" -> False - _ -> True - --- | A list or a complement list (i.e. a list starting with `not`). -complementableListSetting :: String - -> ExportSettingSetter (Either [String] [String]) - -> OrgParser () -complementableListSetting settingIdentifier setter = try $ do - _ <- string settingIdentifier <* char ':' - value <- choice [ Left <$> complementStringList - , Right <$> stringList - , (\b -> if b then Left [] else Right []) <$> elispBoolean - ] - updateState $ modifyExportSettings setter value - where - -- Read a plain list of strings. - stringList :: OrgParser [String] - stringList = try $ - char '(' - *> sepBy elispString spaces - <* char ')' - - -- Read an emacs lisp list specifying a complement set. - complementStringList :: OrgParser [String] - complementStringList = try $ - string "(not " - *> sepBy elispString spaces - <* char ')' - - elispString :: OrgParser String - elispString = try $ - char '"' - *> manyTill alphaNum (char '"') - -ignoredSetting :: String -> OrgParser () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) - -- -- Parser -- parseOrg :: OrgParser Pandoc parseOrg = do - blocks' <- parseBlocks - st <- getState - let meta = runF (orgStateMeta' st) st - let removeUnwantedBlocks = dropCommentTrees . filter (/= Null) - return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st) + blocks' <- blockList + meta' <- meta + return . Pandoc meta' $ removeUnwantedBlocks blocks' + where + removeUnwantedBlocks :: [Block] -> [Block] + removeUnwantedBlocks = dropCommentTrees . filter (/= Null) -- | Drop COMMENT headers and the document tree below those headers. dropCommentTrees :: [Block] -> [Block] @@ -191,736 +89,3 @@ isHeaderLevelLowerEq n blk = case blk of (Header level _ _) -> n >= level _ -> False - - --- --- parsing blocks --- - -parseBlocks :: OrgParser (F Blocks) -parseBlocks = mconcat <$> manyTill block eof - -block :: OrgParser (F Blocks) -block = choice [ mempty <$ blanklines - , table - , orgBlock - , figure - , example - , genericDrawer - , specialLine - , header - , horizontalRule - , list - , latexFragment - , noteBlock - , paraOrPlain - ] "block" - - --- --- Block Attributes --- - --- | Attributes that may be added to figures (like a name or caption). -data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] - } - -stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String) -stringyMetaAttribute attrCheck = try $ do - metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') - guard $ attrCheck attrName - skipSpaces - attrValue <- anyLine - return (attrName, attrValue) - -blockAttributes :: OrgParser BlockAttributes -blockAttributes = try $ do - kv <- many (stringyMetaAttribute attrCheck) - let caption = foldl' (appendValues "CAPTION") Nothing kv - let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv - let name = lookup "NAME" kv - caption' <- maybe (return Nothing) - (fmap Just . parseFromString parseInlines) - caption - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs - return $ BlockAttributes - { blockAttrName = name - , blockAttrCaption = caption' - , blockAttrKeyValues = kvAttrs' - } - where - attrCheck :: String -> Bool - attrCheck attr = - case attr of - "NAME" -> True - "CAPTION" -> True - "ATTR_HTML" -> True - _ -> False - - appendValues :: String -> Maybe String -> (String, String) -> Maybe String - appendValues attrName accValue (key, value) = - if key /= attrName - then accValue - else case accValue of - Just acc -> Just $ acc ++ ' ':value - Nothing -> Just value - -keyValues :: OrgParser [(String, String)] -keyValues = try $ - manyTill ((,) <$> key <*> value) newline - where - key :: OrgParser String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar - - value :: OrgParser String - value = skipSpaces *> manyTill anyChar endOfValue - - endOfValue :: OrgParser () - endOfValue = - lookAhead $ (() <$ try (many1 spaceChar <* key)) - <|> () <$ newline - - --- --- Org Blocks (#+BEGIN_... / #+END_...) --- - -type BlockProperties = (Int, String) -- (Indentation, Block-Type) - -updateIndent :: BlockProperties -> Int -> BlockProperties -updateIndent (_, blkType) indent = (indent, blkType) - -orgBlock :: OrgParser (F Blocks) -orgBlock = try $ do - blockAttrs <- blockAttributes - blockProp@(_, blkType) <- blockHeaderStart - ($ blockProp) $ - case blkType of - "comment" -> withRaw' (const mempty) - "html" -> withRaw' (return . (B.rawBlock blkType)) - "latex" -> withRaw' (return . (B.rawBlock blkType)) - "ascii" -> withRaw' (return . (B.rawBlock blkType)) - "example" -> withRaw' (return . exampleCode) - "quote" -> withParsed (fmap B.blockQuote) - "verse" -> verseBlock - "src" -> codeBlock blockAttrs - _ -> withParsed (fmap $ divWithClass blkType) - -blockHeaderStart :: OrgParser (Int, String) -blockHeaderStart = try $ (,) <$> indentation <*> blockType - where - blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) - -indentation :: OrgParser Int -indentation = try $ do - tabStop <- getOption readerTabStop - s <- many spaceChar - return $ spaceLength tabStop s - -spaceLength :: Int -> String -> Int -spaceLength tabStop s = (sum . map charLen) s - where - charLen ' ' = 1 - charLen '\t' = tabStop - charLen _ = 0 - -withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) -withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) - -withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) -withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) - -ignHeaders :: OrgParser () -ignHeaders = (() <$ newline) <|> (() <$ anyLine) - -divWithClass :: String -> Blocks -> Blocks -divWithClass cls = B.divWith ("", [cls], []) - -verseBlock :: BlockProperties -> OrgParser (F Blocks) -verseBlock blkProp = try $ do - ignHeaders - content <- rawBlockContent blkProp - fmap B.para . mconcat . intersperse (pure B.linebreak) - <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content) - -exportsCode :: [(String, String)] -> Bool -exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs - || ("rundoc-exports", "results") `elem` attrs) - -exportsResults :: [(String, String)] -> Bool -exportsResults attrs = ("rundoc-exports", "results") `elem` attrs - || ("rundoc-exports", "both") `elem` attrs - -followingResultsBlock :: OrgParser (Maybe (F Blocks)) -followingResultsBlock = - optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:" - *> blankline - *> block) - -codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks) -codeBlock blockAttrs blkProp = do - skipSpaces - (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) - leadingIndent <- lookAhead indentation - content <- rawBlockContent (updateIndent blkProp leadingIndent) - resultsContent <- followingResultsBlock - let id' = fromMaybe mempty $ blockAttrName blockAttrs - let includeCode = exportsCode kv - let includeResults = exportsResults kv - let codeBlck = B.codeBlockWith ( id', classes, kv ) content - let labelledBlck = maybe (pure codeBlck) - (labelDiv codeBlck) - (blockAttrCaption blockAttrs) - let resultBlck = fromMaybe mempty resultsContent - return $ (if includeCode then labelledBlck else mempty) - <> (if includeResults then resultBlck else mempty) - where - labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value - <*> pure blk) - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - -rawBlockContent :: BlockProperties -> OrgParser String -rawBlockContent (indent, blockType) = try $ - unlines . map commaEscaped <$> manyTill indentedLine blockEnder - where - indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) - blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType) - -parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) -parsedBlockContent blkProps = try $ do - raw <- rawBlockContent blkProps - parseFromString parseBlocks (raw ++ "\n") - --- indent by specified number of spaces (or equiv. tabs) -indentWith :: Int -> OrgParser String -indentWith num = do - tabStop <- getOption readerTabStop - if num < tabStop - then count num (char ' ') - else choice [ try (count num (char ' ')) - , try (char '\t' >> count (num - tabStop) (char ' ')) ] - -type SwitchOption = (Char, Maybe String) - --- | Parse code block arguments --- TODO: We currently don't handle switches. -codeHeaderArgs :: OrgParser ([String], [(String, String)]) -codeHeaderArgs = try $ do - language <- skipSpaces *> orgArgWord - _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) - parameters <- manyTill blockOption newline - let pandocLang = translateLang language - return $ - if hasRundocParameters parameters - then ( [ pandocLang, rundocBlockClass ] - , map toRundocAttrib (("language", language) : parameters) - ) - else ([ pandocLang ], parameters) - where - hasRundocParameters = not . null - toRundocAttrib = first ("rundoc-" ++) - - -switch :: OrgParser SwitchOption -switch = try $ simpleSwitch <|> lineNumbersSwitch - where - simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) - lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> - (string "-l \"" *> many1Till nonspaceChar (char '"')) - -translateLang :: String -> String -translateLang "C" = "c" -translateLang "C++" = "cpp" -translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported -translateLang "js" = "javascript" -translateLang "lisp" = "commonlisp" -translateLang "R" = "r" -translateLang "sh" = "bash" -translateLang "sqlite" = "sql" -translateLang cs = cs - --- | Prefix used for Rundoc classes and arguments. -rundocPrefix :: String -rundocPrefix = "rundoc-" - --- | The class-name used to mark rundoc blocks. -rundocBlockClass :: String -rundocBlockClass = rundocPrefix ++ "block" - -blockOption :: OrgParser (String, String) -blockOption = try $ do - argKey <- orgArgKey - paramValue <- option "yes" orgParamValue - return (argKey, paramValue) - -orgParamValue :: OrgParser String -orgParamValue = try $ - skipSpaces - *> notFollowedBy (char ':' ) - *> many1 (noneOf "\t\n\r ") - <* skipSpaces - -commaEscaped :: String -> String -commaEscaped (',':cs@('*':_)) = cs -commaEscaped (',':cs@('#':'+':_)) = cs -commaEscaped cs = cs - -example :: OrgParser (F Blocks) -example = try $ do - return . return . exampleCode =<< unlines <$> many1 exampleLine - -exampleCode :: String -> Blocks -exampleCode = B.codeBlockWith ("", ["example"], []) - -exampleLine :: OrgParser String -exampleLine = try $ exampleLineStart *> anyLine - -horizontalRule :: OrgParser (F Blocks) -horizontalRule = return B.horizontalRule <$ try hline - - --- --- Drawers --- - --- | A generic drawer which has no special meaning for org-mode. --- Whether or not this drawer is included in the output depends on the drawers --- export setting. -genericDrawer :: OrgParser (F Blocks) -genericDrawer = try $ do - name <- map toUpper <$> drawerStart - content <- manyTill drawerLine (try drawerEnd) - state <- getState - -- Include drawer if it is explicitly included in or not explicitly excluded - -- from the list of drawers that should be exported. PROPERTIES drawers are - -- never exported. - case (exportDrawers . orgStateExportSettings $ state) of - _ | name == "PROPERTIES" -> return mempty - Left names | name `elem` names -> return mempty - Right names | name `notElem` names -> return mempty - _ -> drawerDiv name <$> parseLines content - where - parseLines :: [String] -> OrgParser (F Blocks) - parseLines = parseFromString parseBlocks . (++ "\n") . unlines - - drawerDiv :: String -> F Blocks -> F Blocks - drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) - -drawerLine :: OrgParser String -drawerLine = anyLine - -drawerEnd :: OrgParser String -drawerEnd = try $ - skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline - --- | Read a :PROPERTIES: drawer and return the key/value pairs contained --- within. -propertiesDrawer :: OrgParser [(String, String)] -propertiesDrawer = try $ do - drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" - manyTill property (try drawerEnd) - where - property :: OrgParser (String, String) - property = try $ (,) <$> key <*> value - - key :: OrgParser String - key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') - - value :: OrgParser String - value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) - -keyValuesToAttr :: [(String, String)] -> Attr -keyValuesToAttr kvs = - let - lowerKvs = map (\(k, v) -> (map toLower k, v)) kvs - id' = fromMaybe mempty . lookup "custom_id" $ lowerKvs - cls = fromMaybe mempty . lookup "class" $ lowerKvs - kvs' = filter (flip notElem ["custom_id", "class"] . fst) lowerKvs - in - (id', words cls, kvs') - - --- --- Figures --- - --- | Figures (Image on a line by itself, preceded by name and/or caption) -figure :: OrgParser (F Blocks) -figure = try $ do - figAttrs <- blockAttributes - src <- skipSpaces *> selfTarget <* skipSpaces <* newline - guard . not . isNothing . blockAttrCaption $ figAttrs - guard (isImageFilename src) - let figName = fromMaybe mempty $ blockAttrName figAttrs - let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs - let figKeyVals = blockAttrKeyValues figAttrs - let attr = (mempty, mempty, figKeyVals) - return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption) - where - withFigPrefix :: String -> String - withFigPrefix cs = - if "fig:" `isPrefixOf` cs - then cs - else "fig:" ++ cs - - selfTarget :: OrgParser String - selfTarget = try $ char '[' *> linkTarget <* char ']' - - --- --- Comments, Options and Metadata --- - -addLinkFormat :: String - -> (String -> String) - -> OrgParser () -addLinkFormat key formatter = updateState $ \s -> - let fs = orgStateLinkFormatters s - in s{ orgStateLinkFormatters = M.insert key formatter fs } - -specialLine :: OrgParser (F Blocks) -specialLine = fmap return . try $ metaLine <|> commentLine - --- The order, in which blocks are tried, makes sure that we're not looking at --- the beginning of a block, so we don't need to check for it -metaLine :: OrgParser Blocks -metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) - -commentLine :: OrgParser Blocks -commentLine = commentLineStart *> anyLine *> pure mempty - -declarationLine :: OrgParser () -declarationLine = try $ do - key <- metaKey - inlinesF <- metaInlines - updateState $ \st -> - let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta - in st { orgStateMeta' = orgStateMeta' st <> meta' } - return () - -metaInlines :: OrgParser (F MetaValue) -metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline - -metaKey :: OrgParser String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces - -optionLine :: OrgParser () -optionLine = try $ do - key <- metaKey - case key of - "link" -> parseLinkFormat >>= uncurry addLinkFormat - "options" -> () <$ sepBy spaces exportSetting - _ -> mzero - -parseLinkFormat :: OrgParser ((String, String -> String)) -parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces - linkSubst <- parseFormat - return (linkType, linkSubst) - --- | An ad-hoc, single-argument-only implementation of a printf-style format --- parser. -parseFormat :: OrgParser (String -> String) -parseFormat = try $ do - replacePlain <|> replaceUrl <|> justAppend - where - -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) - <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) - <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest - - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) - --- --- Headers --- - --- | Headers -header :: OrgParser (F Blocks) -header = try $ do - level <- headerStart - title <- manyTill inline (lookAhead $ optional headerTags <* newline) - tags <- option [] headerTags - newline - propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer) - inlines <- runF (tagTitle title tags) <$> getState - attr <- registerHeader propAttr inlines - return $ pure (B.headerWith attr level inlines) - where - tagTitle :: [F Inlines] -> [String] -> F Inlines - tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags - - tagToInlineF :: String -> F Inlines - tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty - - headerTags :: OrgParser [String] - headerTags = try $ - let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' - in skipSpaces - *> char ':' - *> many1 tag - <* skipSpaces - - --- --- Tables --- - -data OrgTableRow = OrgContentRow (F [Blocks]) - | OrgAlignRow [Alignment] - | OrgHlineRow - --- OrgTable is strongly related to the pandoc table ADT. Using the same --- (i.e. pandoc-global) ADT would mean that the reader would break if the --- global structure was to be changed, which would be bad. The final table --- should be generated using a builder function. Column widths aren't --- implemented yet, so they are not tracked here. -data OrgTable = OrgTable - { orgTableAlignments :: [Alignment] - , orgTableHeader :: [Blocks] - , orgTableRows :: [[Blocks]] - } - -table :: OrgParser (F Blocks) -table = try $ do - blockAttrs <- blockAttributes - lookAhead tableStart - do - rows <- tableRows - let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs - return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows - -orgToPandocTable :: OrgTable - -> Inlines - -> Blocks -orgToPandocTable (OrgTable aligns heads lns) caption = - B.table caption (zip aligns $ repeat 0) heads lns - -tableRows :: OrgParser [OrgTableRow] -tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) - -tableContentRow :: OrgParser OrgTableRow -tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) - -tableContentCell :: OrgParser (F Blocks) -tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell - -tableAlignRow :: OrgParser OrgTableRow -tableAlignRow = try $ do - tableStart - cells <- many1Till tableAlignCell newline - -- Empty rows are regular (i.e. content) rows, not alignment rows. - guard $ any (/= AlignDefault) cells - return $ OrgAlignRow cells - -tableAlignCell :: OrgParser Alignment -tableAlignCell = - choice [ try $ emptyCell *> return AlignDefault - , try $ skipSpaces - *> char '<' - *> tableAlignFromChar - <* many digit - <* char '>' - <* emptyCell - ] "alignment info" - where emptyCell = try $ skipSpaces *> endOfCell - -tableAlignFromChar :: OrgParser Alignment -tableAlignFromChar = try $ - choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] - -tableHline :: OrgParser OrgTableRow -tableHline = try $ - OrgHlineRow <$ (tableStart *> char '-' *> anyLine) - -endOfCell :: OrgParser Char -endOfCell = try $ char '|' <|> lookAhead newline - -rowsToTable :: [OrgTableRow] - -> F OrgTable -rowsToTable = foldM rowToContent emptyTable - where emptyTable = OrgTable mempty mempty mempty - -normalizeTable :: OrgTable -> OrgTable -normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows - where - refRow = if heads /= mempty - then heads - else if rows == mempty then mempty else head rows - cols = length refRow - fillColumns base padding = take cols $ base ++ repeat padding - aligns' = fillColumns aligns AlignDefault - --- One or more horizontal rules after the first content line mark the previous --- line as a header. All other horizontal lines are discarded. -rowToContent :: OrgTable - -> OrgTableRow - -> F OrgTable -rowToContent orgTable row = - case row of - OrgHlineRow -> return singleRowPromotedToHeader - OrgAlignRow as -> return . setAligns $ as - OrgContentRow cs -> appendToBody cs - where - singleRowPromotedToHeader :: OrgTable - singleRowPromotedToHeader = case orgTable of - OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> - orgTable{ orgTableHeader = b , orgTableRows = [] } - _ -> orgTable - - setAligns :: [Alignment] -> OrgTable - setAligns aligns = orgTable{ orgTableAlignments = aligns } - - appendToBody :: F [Blocks] -> F OrgTable - appendToBody frow = do - newRow <- frow - let oldRows = orgTableRows orgTable - -- NOTE: This is an inefficient O(n) operation. This should be changed - -- if performance ever becomes a problem. - return orgTable{ orgTableRows = oldRows ++ [newRow] } - - --- --- LaTeX fragments --- -latexFragment :: OrgParser (F Blocks) -latexFragment = try $ do - envName <- latexEnvStart - content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) - return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) - where - c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" - , c - , "\\end{", e, "}\n" - ] - -latexEnd :: String -> OrgParser () -latexEnd envName = try $ - () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") - <* blankline - - --- --- Footnote defintions --- -noteBlock :: OrgParser (F Blocks) -noteBlock = try $ do - ref <- noteMarker <* skipSpaces - content <- mconcat <$> blocksTillHeaderOrNote - addToNotesTable (ref, content) - return mempty - where - blocksTillHeaderOrNote = - many1Till block (eof <|> () <$ lookAhead noteMarker - <|> () <$ lookAhead headerStart) - --- Paragraphs or Plain text -paraOrPlain :: OrgParser (F Blocks) -paraOrPlain = try $ do - ils <- parseInlines - nl <- option False (newline *> return True) - -- Read block as paragraph, except if we are in a list context and the block - -- is directly followed by a list item, in which case the block is read as - -- plain text. - try (guard nl - *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) - *> return (B.para <$> ils)) - <|> (return (B.plain <$> ils)) - -inlinesTillNewline :: OrgParser (F Inlines) -inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline - - --- --- list blocks --- - -list :: OrgParser (F Blocks) -list = choice [ definitionList, bulletList, orderedList ] "list" - -definitionList :: OrgParser (F Blocks) -definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.definitionList . fmap compactify'DL . sequence - <$> many1 (definitionListItem $ bulletListStart' (Just n)) - -bulletList :: OrgParser (F Blocks) -bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) - fmap B.bulletList . fmap compactify' . sequence - <$> many1 (listItem (bulletListStart' $ Just n)) - -orderedList :: OrgParser (F Blocks) -orderedList = fmap B.orderedList . fmap compactify' . sequence - <$> many1 (listItem orderedListStart) - -bulletListStart' :: Maybe Int -> OrgParser Int --- returns length of bulletList prefix, inclusive of marker -bulletListStart' Nothing = do ind <- length <$> many spaceChar - oneOf (bullets $ ind == 0) - skipSpaces1 - return (ind + 1) -bulletListStart' (Just n) = do count (n-1) spaceChar - oneOf (bullets $ n == 1) - many1 spaceChar - return n - --- Unindented lists are legal, but they can't use '*' bullets. --- We return n to maintain compatibility with the generic listItem. -bullets :: Bool -> String -bullets unindented = if unindented then "+-" else "*+-" - -definitionListItem :: OrgParser Int - -> OrgParser (F (Inlines, [Blocks])) -definitionListItem parseMarkerGetLength = try $ do - markerLength <- parseMarkerGetLength - term <- manyTill (noneOf "\n\r") (try definitionMarker) - line1 <- anyLineNewline - blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) - term' <- parseFromString parseInlines term - contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont - return $ (,) <$> term' <*> fmap (:[]) contents' - where - definitionMarker = - spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) - - --- parse raw text for one list item, excluding start marker and continuations -listItem :: OrgParser Int - -> OrgParser (F Blocks) -listItem start = try . withContext ListItemState $ do - markerLength <- try start - firstLine <- anyLineNewline - blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString parseBlocks $ firstLine ++ blank ++ rest - --- continuation of a list item - indented and separated by blankline or endline. --- Note: nested lists are parsed as continuations. -listContinuation :: Int - -> OrgParser String -listContinuation markerLength = try $ - notFollowedBy' blankline - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) - where listLine = try $ indentWith markerLength *> anyLineNewline - --- | Parse any line, include the final newline in the output. -anyLineNewline :: OrgParser String -anyLineNewline = (++ "\n") <$> anyLine diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs new file mode 100644 index 000000000..b374acfe2 --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -0,0 +1,891 @@ +{-# LANGUAGE FlexibleContexts #-} +{- +Copyright (C) 2014-2016 Albert Krewinkel + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org.Options + Copyright : Copyright (C) 2014-2016 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + +Parsers for Org-mode block elements. +-} +module Text.Pandoc.Readers.Org.Blocks + ( blockList + , meta + ) where + +import Text.Pandoc.Readers.Org.BlockStarts +import Text.Pandoc.Readers.Org.Inlines +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder ( Inlines, Blocks ) +import Text.Pandoc.Definition +import Text.Pandoc.Compat.Monoid ((<>)) +import Text.Pandoc.Options +import Text.Pandoc.Shared ( compactify', compactify'DL ) + +import Control.Arrow ( first ) +import Control.Monad ( foldM, guard, mzero ) +import Data.Char ( toLower, toUpper) +import Data.List ( foldl', intersperse, isPrefixOf ) +import qualified Data.Map as M +import Data.Maybe ( fromMaybe, isNothing ) +import Network.HTTP ( urlEncode ) + + +-- +-- parsing blocks +-- + +-- | Get a list of blocks. +blockList :: OrgParser [Block] +blockList = do + blocks' <- blocks + st <- getState + return . B.toList $ runF blocks' st + +-- | Get the meta information safed in the state. +meta :: OrgParser Meta +meta = do + st <- getState + return $ runF (orgStateMeta' st) st + +blocks :: OrgParser (F Blocks) +blocks = mconcat <$> manyTill block eof + +block :: OrgParser (F Blocks) +block = choice [ mempty <$ blanklines + , table + , orgBlock + , figure + , example + , genericDrawer + , specialLine + , header + , horizontalRule + , list + , latexFragment + , noteBlock + , paraOrPlain + ] "block" + + +-- +-- Block Attributes +-- + +-- | Attributes that may be added to figures (like a name or caption). +data BlockAttributes = BlockAttributes + { blockAttrName :: Maybe String + , blockAttrCaption :: Maybe (F Inlines) + , blockAttrKeyValues :: [(String, String)] + } + +stringyMetaAttribute :: (String -> Bool) -> OrgParser (String, String) +stringyMetaAttribute attrCheck = try $ do + metaLineStart + attrName <- map toUpper <$> many1Till nonspaceChar (char ':') + guard $ attrCheck attrName + skipSpaces + attrValue <- anyLine + return (attrName, attrValue) + +blockAttributes :: OrgParser BlockAttributes +blockAttributes = try $ do + kv <- many (stringyMetaAttribute attrCheck) + let caption = foldl' (appendValues "CAPTION") Nothing kv + let kvAttrs = foldl' (appendValues "ATTR_HTML") Nothing kv + let name = lookup "NAME" kv + caption' <- maybe (return Nothing) + (fmap Just . parseFromString parseInlines) + caption + kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs + return $ BlockAttributes + { blockAttrName = name + , blockAttrCaption = caption' + , blockAttrKeyValues = kvAttrs' + } + where + attrCheck :: String -> Bool + attrCheck attr = + case attr of + "NAME" -> True + "CAPTION" -> True + "ATTR_HTML" -> True + _ -> False + + appendValues :: String -> Maybe String -> (String, String) -> Maybe String + appendValues attrName accValue (key, value) = + if key /= attrName + then accValue + else case accValue of + Just acc -> Just $ acc ++ ' ':value + Nothing -> Just value + +keyValues :: OrgParser [(String, String)] +keyValues = try $ + manyTill ((,) <$> key <*> value) newline + where + key :: OrgParser String + key = try $ skipSpaces *> char ':' *> many1 nonspaceChar + + value :: OrgParser String + value = skipSpaces *> manyTill anyChar endOfValue + + endOfValue :: OrgParser () + endOfValue = + lookAhead $ (() <$ try (many1 spaceChar <* key)) + <|> () <$ newline + + +-- +-- Org Blocks (#+BEGIN_... / #+END_...) +-- + +type BlockProperties = (Int, String) -- (Indentation, Block-Type) + +updateIndent :: BlockProperties -> Int -> BlockProperties +updateIndent (_, blkType) indent = (indent, blkType) + +orgBlock :: OrgParser (F Blocks) +orgBlock = try $ do + blockAttrs <- blockAttributes + blockProp@(_, blkType) <- blockHeaderStart + ($ blockProp) $ + case blkType of + "comment" -> withRaw' (const mempty) + "html" -> withRaw' (return . (B.rawBlock blkType)) + "latex" -> withRaw' (return . (B.rawBlock blkType)) + "ascii" -> withRaw' (return . (B.rawBlock blkType)) + "example" -> withRaw' (return . exampleCode) + "quote" -> withParsed (fmap B.blockQuote) + "verse" -> verseBlock + "src" -> codeBlock blockAttrs + _ -> withParsed (fmap $ divWithClass blkType) + +blockHeaderStart :: OrgParser (Int, String) +blockHeaderStart = try $ (,) <$> indentation <*> blockType + where + blockType = map toLower <$> (stringAnyCase "#+begin_" *> orgArgWord) + +indentation :: OrgParser Int +indentation = try $ do + tabStop <- getOption readerTabStop + s <- many spaceChar + return $ spaceLength tabStop s + +spaceLength :: Int -> String -> Int +spaceLength tabStop s = (sum . map charLen) s + where + charLen ' ' = 1 + charLen '\t' = tabStop + charLen _ = 0 + +withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp)) + +withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks) +withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp)) + +ignHeaders :: OrgParser () +ignHeaders = (() <$ newline) <|> (() <$ anyLine) + +divWithClass :: String -> Blocks -> Blocks +divWithClass cls = B.divWith ("", [cls], []) + +verseBlock :: BlockProperties -> OrgParser (F Blocks) +verseBlock blkProp = try $ do + ignHeaders + content <- rawBlockContent blkProp + fmap B.para . mconcat . intersperse (pure B.linebreak) + <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content) + +exportsCode :: [(String, String)] -> Bool +exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs + || ("rundoc-exports", "results") `elem` attrs) + +exportsResults :: [(String, String)] -> Bool +exportsResults attrs = ("rundoc-exports", "results") `elem` attrs + || ("rundoc-exports", "both") `elem` attrs + +followingResultsBlock :: OrgParser (Maybe (F Blocks)) +followingResultsBlock = + optionMaybe (try $ blanklines *> stringAnyCase "#+RESULTS:" + *> blankline + *> block) + +codeBlock :: BlockAttributes -> BlockProperties -> OrgParser (F Blocks) +codeBlock blockAttrs blkProp = do + skipSpaces + (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) + leadingIndent <- lookAhead indentation + content <- rawBlockContent (updateIndent blkProp leadingIndent) + resultsContent <- followingResultsBlock + let id' = fromMaybe mempty $ blockAttrName blockAttrs + let includeCode = exportsCode kv + let includeResults = exportsResults kv + let codeBlck = B.codeBlockWith ( id', classes, kv ) content + let labelledBlck = maybe (pure codeBlck) + (labelDiv codeBlck) + (blockAttrCaption blockAttrs) + let resultBlck = fromMaybe mempty resultsContent + return $ (if includeCode then labelledBlck else mempty) + <> (if includeResults then resultBlck else mempty) + where + labelDiv blk value = + B.divWith nullAttr <$> (mappend <$> labelledBlock value + <*> pure blk) + labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + +rawBlockContent :: BlockProperties -> OrgParser String +rawBlockContent (indent, blockType) = try $ + unlines . map commaEscaped <$> manyTill indentedLine blockEnder + where + indentedLine = try $ ("" <$ blankline) <|> (indentWith indent *> anyLine) + blockEnder = try $ skipSpaces *> stringAnyCase ("#+end_" <> blockType) + +parsedBlockContent :: BlockProperties -> OrgParser (F Blocks) +parsedBlockContent blkProps = try $ do + raw <- rawBlockContent blkProps + parseFromString blocks (raw ++ "\n") + +-- indent by specified number of spaces (or equiv. tabs) +indentWith :: Int -> OrgParser String +indentWith num = do + tabStop <- getOption readerTabStop + if num < tabStop + then count num (char ' ') + else choice [ try (count num (char ' ')) + , try (char '\t' >> count (num - tabStop) (char ' ')) ] + +type SwitchOption = (Char, Maybe String) + +-- | Parse code block arguments +-- TODO: We currently don't handle switches. +codeHeaderArgs :: OrgParser ([String], [(String, String)]) +codeHeaderArgs = try $ do + language <- skipSpaces *> orgArgWord + _ <- skipSpaces *> (try $ switch `sepBy` (many1 spaceChar)) + parameters <- manyTill blockOption newline + let pandocLang = translateLang language + return $ + if hasRundocParameters parameters + then ( [ pandocLang, rundocBlockClass ] + , map toRundocAttrib (("language", language) : parameters) + ) + else ([ pandocLang ], parameters) + where + hasRundocParameters = not . null + toRundocAttrib = first ("rundoc-" ++) + + +switch :: OrgParser SwitchOption +switch = try $ simpleSwitch <|> lineNumbersSwitch + where + simpleSwitch = (\c -> (c, Nothing)) <$> (oneOf "-+" *> letter) + lineNumbersSwitch = (\ls -> ('l', Just ls)) <$> + (string "-l \"" *> many1Till nonspaceChar (char '"')) + +translateLang :: String -> String +translateLang "C" = "c" +translateLang "C++" = "cpp" +translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported +translateLang "js" = "javascript" +translateLang "lisp" = "commonlisp" +translateLang "R" = "r" +translateLang "sh" = "bash" +translateLang "sqlite" = "sql" +translateLang cs = cs + +-- | Prefix used for Rundoc classes and arguments. +rundocPrefix :: String +rundocPrefix = "rundoc-" + +-- | The class-name used to mark rundoc blocks. +rundocBlockClass :: String +rundocBlockClass = rundocPrefix ++ "block" + +blockOption :: OrgParser (String, String) +blockOption = try $ do + argKey <- orgArgKey + paramValue <- option "yes" orgParamValue + return (argKey, paramValue) + +orgParamValue :: OrgParser String +orgParamValue = try $ + skipSpaces + *> notFollowedBy (char ':' ) + *> many1 (noneOf "\t\n\r ") + <* skipSpaces + +commaEscaped :: String -> String +commaEscaped (',':cs@('*':_)) = cs +commaEscaped (',':cs@('#':'+':_)) = cs +commaEscaped cs = cs + +example :: OrgParser (F Blocks) +example = try $ do + return . return . exampleCode =<< unlines <$> many1 exampleLine + +exampleCode :: String -> Blocks +exampleCode = B.codeBlockWith ("", ["example"], []) + +exampleLine :: OrgParser String +exampleLine = try $ exampleLineStart *> anyLine + +horizontalRule :: OrgParser (F Blocks) +horizontalRule = return B.horizontalRule <$ try hline + + +-- +-- Drawers +-- + +-- | A generic drawer which has no special meaning for org-mode. +-- Whether or not this drawer is included in the output depends on the drawers +-- export setting. +genericDrawer :: OrgParser (F Blocks) +genericDrawer = try $ do + name <- map toUpper <$> drawerStart + content <- manyTill drawerLine (try drawerEnd) + state <- getState + -- Include drawer if it is explicitly included in or not explicitly excluded + -- from the list of drawers that should be exported. PROPERTIES drawers are + -- never exported. + case (exportDrawers . orgStateExportSettings $ state) of + _ | name == "PROPERTIES" -> return mempty + Left names | name `elem` names -> return mempty + Right names | name `notElem` names -> return mempty + _ -> drawerDiv name <$> parseLines content + where + parseLines :: [String] -> OrgParser (F Blocks) + parseLines = parseFromString blocks . (++ "\n") . unlines + + drawerDiv :: String -> F Blocks -> F Blocks + drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) + +drawerLine :: OrgParser String +drawerLine = anyLine + +drawerEnd :: OrgParser String +drawerEnd = try $ + skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline + +-- | Read a :PROPERTIES: drawer and return the key/value pairs contained +-- within. +propertiesDrawer :: OrgParser [(String, String)] +propertiesDrawer = try $ do + drawerType <- drawerStart + guard $ map toUpper drawerType == "PROPERTIES" + manyTill property (try drawerEnd) + where + property :: OrgParser (String, String) + property = try $ (,) <$> key <*> value + + key :: OrgParser String + key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + + value :: OrgParser String + value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + +keyValuesToAttr :: [(String, String)] -> Attr +keyValuesToAttr kvs = + let + lowerKvs = map (\(k, v) -> (map toLower k, v)) kvs + id' = fromMaybe mempty . lookup "custom_id" $ lowerKvs + cls = fromMaybe mempty . lookup "class" $ lowerKvs + kvs' = filter (flip notElem ["custom_id", "class"] . fst) lowerKvs + in + (id', words cls, kvs') + + +-- +-- Figures +-- + +-- | Figures (Image on a line by itself, preceded by name and/or caption) +figure :: OrgParser (F Blocks) +figure = try $ do + figAttrs <- blockAttributes + src <- skipSpaces *> selfTarget <* skipSpaces <* newline + guard . not . isNothing . blockAttrCaption $ figAttrs + guard (isImageFilename src) + let figName = fromMaybe mempty $ blockAttrName figAttrs + let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs + let figKeyVals = blockAttrKeyValues figAttrs + let attr = (mempty, mempty, figKeyVals) + return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption) + where + withFigPrefix :: String -> String + withFigPrefix cs = + if "fig:" `isPrefixOf` cs + then cs + else "fig:" ++ cs + + selfTarget :: OrgParser String + selfTarget = try $ char '[' *> linkTarget <* char ']' + + +-- +-- Comments, Options and Metadata +-- + +addLinkFormat :: String + -> (String -> String) + -> OrgParser () +addLinkFormat key formatter = updateState $ \s -> + let fs = orgStateLinkFormatters s + in s{ orgStateLinkFormatters = M.insert key formatter fs } + +specialLine :: OrgParser (F Blocks) +specialLine = fmap return . try $ metaLine <|> commentLine + +-- The order, in which blocks are tried, makes sure that we're not looking at +-- the beginning of a block, so we don't need to check for it +metaLine :: OrgParser Blocks +metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) + +commentLine :: OrgParser Blocks +commentLine = commentLineStart *> anyLine *> pure mempty + +declarationLine :: OrgParser () +declarationLine = try $ do + key <- metaKey + inlinesF <- metaInlines + updateState $ \st -> + let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta + in st { orgStateMeta' = orgStateMeta' st <> meta' } + return () + +metaInlines :: OrgParser (F MetaValue) +metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline + +metaKey :: OrgParser String +metaKey = map toLower <$> many1 (noneOf ": \n\r") + <* char ':' + <* skipSpaces + +optionLine :: OrgParser () +optionLine = try $ do + key <- metaKey + case key of + "link" -> parseLinkFormat >>= uncurry addLinkFormat + "options" -> () <$ sepBy spaces exportSetting + _ -> mzero + +-- +-- Export Settings +-- + +-- | Read and process org-mode specific export options. +exportSetting :: OrgParser () +exportSetting = choice + [ booleanSetting "^" setExportSubSuperscripts + , ignoredSetting "'" + , ignoredSetting "*" + , ignoredSetting "-" + , ignoredSetting ":" + , ignoredSetting "<" + , ignoredSetting "\\n" + , ignoredSetting "arch" + , ignoredSetting "author" + , ignoredSetting "c" + , ignoredSetting "creator" + , complementableListSetting "d" setExportDrawers + , ignoredSetting "date" + , ignoredSetting "e" + , ignoredSetting "email" + , ignoredSetting "f" + , ignoredSetting "H" + , ignoredSetting "inline" + , ignoredSetting "num" + , ignoredSetting "p" + , ignoredSetting "pri" + , ignoredSetting "prop" + , ignoredSetting "stat" + , ignoredSetting "tags" + , ignoredSetting "tasks" + , ignoredSetting "tex" + , ignoredSetting "timestamp" + , ignoredSetting "title" + , ignoredSetting "toc" + , ignoredSetting "todo" + , ignoredSetting "|" + ] "export setting" + +booleanSetting :: String -> ExportSettingSetter Bool -> OrgParser () +booleanSetting settingIdentifier setter = try $ do + string settingIdentifier + char ':' + value <- elispBoolean + updateState $ modifyExportSettings setter value + +-- | Read an elisp boolean. Only NIL is treated as false, non-NIL values are +-- interpreted as true. +elispBoolean :: OrgParser Bool +elispBoolean = try $ do + value <- many1 nonspaceChar + return $ case map toLower value of + "nil" -> False + "{}" -> False + "()" -> False + _ -> True + +-- | A list or a complement list (i.e. a list starting with `not`). +complementableListSetting :: String + -> ExportSettingSetter (Either [String] [String]) + -> OrgParser () +complementableListSetting settingIdentifier setter = try $ do + _ <- string settingIdentifier <* char ':' + value <- choice [ Left <$> complementStringList + , Right <$> stringList + , (\b -> if b then Left [] else Right []) <$> elispBoolean + ] + updateState $ modifyExportSettings setter value + where + -- Read a plain list of strings. + stringList :: OrgParser [String] + stringList = try $ + char '(' + *> sepBy elispString spaces + <* char ')' + + -- Read an emacs lisp list specifying a complement set. + complementStringList :: OrgParser [String] + complementStringList = try $ + string "(not " + *> sepBy elispString spaces + <* char ')' + + elispString :: OrgParser String + elispString = try $ + char '"' + *> manyTill alphaNum (char '"') + +ignoredSetting :: String -> OrgParser () +ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) + + +parseLinkFormat :: OrgParser ((String, String -> String)) +parseLinkFormat = try $ do + linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkSubst <- parseFormat + return (linkType, linkSubst) + +-- | An ad-hoc, single-argument-only implementation of a printf-style format +-- parser. +parseFormat :: OrgParser (String -> String) +parseFormat = try $ do + replacePlain <|> replaceUrl <|> justAppend + where + -- inefficient, but who cares + replacePlain = try $ (\x -> concat . flip intersperse x) + <$> sequence [tillSpecifier 's', rest] + replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + <$> sequence [tillSpecifier 'h', rest] + justAppend = try $ (++) <$> rest + + rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + +-- +-- Headers +-- + +-- | Headers +header :: OrgParser (F Blocks) +header = try $ do + level <- headerStart + title <- manyTill inline (lookAhead $ optional headerTags <* newline) + tags <- option [] headerTags + newline + propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer) + inlines <- runF (tagTitle title tags) <$> getState + attr <- registerHeader propAttr inlines + return $ pure (B.headerWith attr level inlines) + where + tagTitle :: [F Inlines] -> [String] -> F Inlines + tagTitle title tags = trimInlinesF . mconcat $ title <> map tagToInlineF tags + + tagToInlineF :: String -> F Inlines + tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty + + headerTags :: OrgParser [String] + headerTags = try $ + let tag = many1 (alphaNum <|> oneOf "@%#_") <* char ':' + in skipSpaces + *> char ':' + *> many1 tag + <* skipSpaces + + +-- +-- Tables +-- + +data OrgTableRow = OrgContentRow (F [Blocks]) + | OrgAlignRow [Alignment] + | OrgHlineRow + +-- OrgTable is strongly related to the pandoc table ADT. Using the same +-- (i.e. pandoc-global) ADT would mean that the reader would break if the +-- global structure was to be changed, which would be bad. The final table +-- should be generated using a builder function. Column widths aren't +-- implemented yet, so they are not tracked here. +data OrgTable = OrgTable + { orgTableAlignments :: [Alignment] + , orgTableHeader :: [Blocks] + , orgTableRows :: [[Blocks]] + } + +table :: OrgParser (F Blocks) +table = try $ do + blockAttrs <- blockAttributes + lookAhead tableStart + do + rows <- tableRows + let caption = fromMaybe (return mempty) $ blockAttrCaption blockAttrs + return $ (<$> caption) . orgToPandocTable . normalizeTable =<< rowsToTable rows + +orgToPandocTable :: OrgTable + -> Inlines + -> Blocks +orgToPandocTable (OrgTable aligns heads lns) caption = + B.table caption (zip aligns $ repeat 0) heads lns + +tableRows :: OrgParser [OrgTableRow] +tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) + +tableContentRow :: OrgParser OrgTableRow +tableContentRow = try $ + OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) + +tableContentCell :: OrgParser (F Blocks) +tableContentCell = try $ + fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell + +tableAlignRow :: OrgParser OrgTableRow +tableAlignRow = try $ do + tableStart + cells <- many1Till tableAlignCell newline + -- Empty rows are regular (i.e. content) rows, not alignment rows. + guard $ any (/= AlignDefault) cells + return $ OrgAlignRow cells + +tableAlignCell :: OrgParser Alignment +tableAlignCell = + choice [ try $ emptyCell *> return AlignDefault + , try $ skipSpaces + *> char '<' + *> tableAlignFromChar + <* many digit + <* char '>' + <* emptyCell + ] "alignment info" + where emptyCell = try $ skipSpaces *> endOfCell + +tableAlignFromChar :: OrgParser Alignment +tableAlignFromChar = try $ + choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] + +tableHline :: OrgParser OrgTableRow +tableHline = try $ + OrgHlineRow <$ (tableStart *> char '-' *> anyLine) + +endOfCell :: OrgParser Char +endOfCell = try $ char '|' <|> lookAhead newline + +rowsToTable :: [OrgTableRow] + -> F OrgTable +rowsToTable = foldM rowToContent emptyTable + where emptyTable = OrgTable mempty mempty mempty + +normalizeTable :: OrgTable -> OrgTable +normalizeTable (OrgTable aligns heads rows) = OrgTable aligns' heads rows + where + refRow = if heads /= mempty + then heads + else if rows == mempty then mempty else head rows + cols = length refRow + fillColumns base padding = take cols $ base ++ repeat padding + aligns' = fillColumns aligns AlignDefault + +-- One or more horizontal rules after the first content line mark the previous +-- line as a header. All other horizontal lines are discarded. +rowToContent :: OrgTable + -> OrgTableRow + -> F OrgTable +rowToContent orgTable row = + case row of + OrgHlineRow -> return singleRowPromotedToHeader + OrgAlignRow as -> return . setAligns $ as + OrgContentRow cs -> appendToBody cs + where + singleRowPromotedToHeader :: OrgTable + singleRowPromotedToHeader = case orgTable of + OrgTable{ orgTableHeader = [], orgTableRows = b:[] } -> + orgTable{ orgTableHeader = b , orgTableRows = [] } + _ -> orgTable + + setAligns :: [Alignment] -> OrgTable + setAligns aligns = orgTable{ orgTableAlignments = aligns } + + appendToBody :: F [Blocks] -> F OrgTable + appendToBody frow = do + newRow <- frow + let oldRows = orgTableRows orgTable + -- NOTE: This is an inefficient O(n) operation. This should be changed + -- if performance ever becomes a problem. + return orgTable{ orgTableRows = oldRows ++ [newRow] } + + +-- +-- LaTeX fragments +-- +latexFragment :: OrgParser (F Blocks) +latexFragment = try $ do + envName <- latexEnvStart + content <- mconcat <$> manyTill anyLineNewline (latexEnd envName) + return . return $ B.rawBlock "latex" (content `inLatexEnv` envName) + where + c `inLatexEnv` e = mconcat [ "\\begin{", e, "}\n" + , c + , "\\end{", e, "}\n" + ] + +latexEnd :: String -> OrgParser () +latexEnd envName = try $ + () <$ skipSpaces + <* string ("\\end{" ++ envName ++ "}") + <* blankline + + +-- +-- Footnote defintions +-- +noteBlock :: OrgParser (F Blocks) +noteBlock = try $ do + ref <- noteMarker <* skipSpaces + content <- mconcat <$> blocksTillHeaderOrNote + addToNotesTable (ref, content) + return mempty + where + blocksTillHeaderOrNote = + many1Till block (eof <|> () <$ lookAhead noteMarker + <|> () <$ lookAhead headerStart) + +-- Paragraphs or Plain text +paraOrPlain :: OrgParser (F Blocks) +paraOrPlain = try $ do + ils <- parseInlines + nl <- option False (newline *> return True) + -- Read block as paragraph, except if we are in a list context and the block + -- is directly followed by a list item, in which case the block is read as + -- plain text. + try (guard nl + *> notFollowedBy (inList *> (() <$ orderedListStart <|> bulletListStart)) + *> return (B.para <$> ils)) + <|> (return (B.plain <$> ils)) + +inlinesTillNewline :: OrgParser (F Inlines) +inlinesTillNewline = trimInlinesF . mconcat <$> manyTill inline newline + + +-- +-- list blocks +-- + +list :: OrgParser (F Blocks) +list = choice [ definitionList, bulletList, orderedList ] "list" + +definitionList :: OrgParser (F Blocks) +definitionList = try $ do n <- lookAhead (bulletListStart' Nothing) + fmap B.definitionList . fmap compactify'DL . sequence + <$> many1 (definitionListItem $ bulletListStart' (Just n)) + +bulletList :: OrgParser (F Blocks) +bulletList = try $ do n <- lookAhead (bulletListStart' Nothing) + fmap B.bulletList . fmap compactify' . sequence + <$> many1 (listItem (bulletListStart' $ Just n)) + +orderedList :: OrgParser (F Blocks) +orderedList = fmap B.orderedList . fmap compactify' . sequence + <$> many1 (listItem orderedListStart) + +bulletListStart' :: Maybe Int -> OrgParser Int +-- returns length of bulletList prefix, inclusive of marker +bulletListStart' Nothing = do ind <- length <$> many spaceChar + oneOf (bullets $ ind == 0) + skipSpaces1 + return (ind + 1) +bulletListStart' (Just n) = do count (n-1) spaceChar + oneOf (bullets $ n == 1) + many1 spaceChar + return n + +-- Unindented lists are legal, but they can't use '*' bullets. +-- We return n to maintain compatibility with the generic listItem. +bullets :: Bool -> String +bullets unindented = if unindented then "+-" else "*+-" + +definitionListItem :: OrgParser Int + -> OrgParser (F (Inlines, [Blocks])) +definitionListItem parseMarkerGetLength = try $ do + markerLength <- parseMarkerGetLength + term <- manyTill (noneOf "\n\r") (try definitionMarker) + line1 <- anyLineNewline + blank <- option "" ("\n" <$ blankline) + cont <- concat <$> many (listContinuation markerLength) + term' <- parseFromString parseInlines term + contents' <- parseFromString blocks $ line1 ++ blank ++ cont + return $ (,) <$> term' <*> fmap (:[]) contents' + where + definitionMarker = + spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) + + +-- parse raw text for one list item, excluding start marker and continuations +listItem :: OrgParser Int + -> OrgParser (F Blocks) +listItem start = try . withContext ListItemState $ do + markerLength <- try start + firstLine <- anyLineNewline + blank <- option "" ("\n" <$ blankline) + rest <- concat <$> many (listContinuation markerLength) + parseFromString blocks $ firstLine ++ blank ++ rest + +-- continuation of a list item - indented and separated by blankline or endline. +-- Note: nested lists are parsed as continuations. +listContinuation :: Int + -> OrgParser String +listContinuation markerLength = try $ + notFollowedBy' blankline + *> (mappend <$> (concat <$> many1 listLine) + <*> many blankline) + where listLine = try $ indentWith markerLength *> anyLineNewline + +-- | Parse any line, include the final newline in the output. +anyLineNewline :: OrgParser String +anyLineNewline = (++ "\n") <$> anyLine -- cgit v1.2.3