summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org.hs
diff options
context:
space:
mode:
authorAlbert Krewinkel <tarleb@moltkeplatz.de>2014-05-08 17:01:58 +0200
committerAlbert Krewinkel <tarleb@moltkeplatz.de>2014-05-09 10:53:08 +0200
commit7760504bb26f215e7d0c57da843f1f1dcc8c1186 (patch)
treecfe8395a96593e9b33393c957b198deae715c751 /src/Text/Pandoc/Readers/Org.hs
parent8afbd7e66499c4c9a24b225d3af01d353e0876ca (diff)
Org reader: refactor #+BEGIN..#+END block parsing code
Diffstat (limited to 'src/Text/Pandoc/Readers/Org.hs')
-rw-r--r--src/Text/Pandoc/Readers/Org.hs122
1 files changed, 80 insertions, 42 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index dba61dfe0..9df8ce0b3 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -50,7 +50,7 @@ import Data.Char (isAlphaNum, toLower)
import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
-import Data.Maybe (listToMaybe, fromMaybe, isJust)
+import Data.Maybe (fromMaybe, isJust)
import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
@@ -162,7 +162,8 @@ popInlineCharStack = updateState $ \s ->
s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s }
surroundingEmphasisChar :: OrgParser [Char]
-surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
+surroundingEmphasisChar =
+ take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
startEmphasisNewlinesCounting :: Int -> OrgParser ()
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
@@ -170,7 +171,7 @@ startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
decEmphasisNewlinesCount :: OrgParser ()
decEmphasisNewlinesCount = updateState $ \s ->
- s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
+ s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
newlinesCountWithinLimits :: OrgParser Bool
newlinesCountWithinLimits = do
@@ -296,41 +297,60 @@ lookupInlinesAttr attr = try $ do
-- Org Blocks (#+BEGIN_... / #+END_...)
--
+type BlockProperties = (Int, String) -- (Indentation, Block-Type)
+
orgBlock :: OrgParser (F Blocks)
orgBlock = try $ do
- (indent, blockType, args) <- blockHeader
- content <- rawBlockContent indent blockType
- contentBlocks <- parseFromString parseBlocks (content ++ "\n")
- let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
- case blockType of
- "comment" -> return mempty
- "html" -> returnF $ B.rawBlock "html" content
- "latex" -> returnF $ B.rawBlock "latex" content
- "ascii" -> returnF $ B.rawBlock "ascii" content
- "example" -> returnF $ exampleCode content
- "quote" -> return $ B.blockQuote <$> contentBlocks
- "verse" -> parseVerse content
- "src" -> codeBlockWithAttr classArgs content
- _ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks
+ blockProp@(_, blkType) <- blockHeaderStart
+ ($ blockProp) $
+ case blkType of
+ "comment" -> withRaw' (const mempty)
+ "html" -> withRaw' (return . (B.rawBlock blkType))
+ "latex" -> withRaw' (return . (B.rawBlock blkType))
+ "ascii" -> withRaw' (return . (B.rawBlock blkType))
+ "example" -> withRaw' (return . exampleCode)
+ "quote" -> withParsed (fmap B.blockQuote)
+ "verse" -> verseBlock
+ "src" -> codeBlock
+ _ -> withParsed (fmap $ divWithClass blkType)
+
+blockHeaderStart :: OrgParser (Int, String)
+blockHeaderStart = try $ (,) <$> indent <*> blockType
where
- parseVerse :: String -> OrgParser (F Blocks)
- parseVerse cs =
- fmap B.para . mconcat . intersperse (pure B.linebreak)
- <$> mapM (parseFromString parseInlines) (lines cs)
-
-blockHeader :: OrgParser (Int, String, [String])
-blockHeader = (,,) <$> blockIndent
- <*> blockType
- <*> (skipSpaces *> blockArgs)
- where blockIndent = length <$> many spaceChar
- blockType = map toLower <$> (stringAnyCase "#+begin_" *> many letter)
- blockArgs = manyTill (many nonspaceChar <* skipSpaces) newline
-
-codeBlockWithAttr :: [String] -> String -> OrgParser (F Blocks)
-codeBlockWithAttr classArgs content = do
- identifier <- fromMaybe "" <$> lookupBlockAttribute "name"
- caption <- lookupInlinesAttr "caption"
- let codeBlck = B.codeBlockWith (identifier, classArgs, []) content
+ indent = length <$> many spaceChar
+ blockType = map toLower <$> (stringAnyCase "#+begin_" *> many orgArgWordChar)
+
+withRaw' :: (String -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withRaw' f blockProp = (ignHeaders *> (f <$> rawBlockContent blockProp))
+
+withParsed :: (F Blocks -> F Blocks) -> BlockProperties -> OrgParser (F Blocks)
+withParsed f blockProp = (ignHeaders *> (f <$> parsedBlockContent blockProp))
+
+ignHeaders :: OrgParser ()
+ignHeaders = (() <$ newline) <|> (() <$ anyLine)
+
+divWithClass :: String -> Blocks -> Blocks
+divWithClass cls = B.divWith ("", [cls], [])
+
+verseBlock :: BlockProperties -> OrgParser (F Blocks)
+verseBlock blkProp = try $ do
+ ignHeaders
+ content <- rawBlockContent blkProp
+ fmap B.para . mconcat . intersperse (pure B.linebreak)
+ <$> mapM (parseFromString parseInlines) (lines content)
+
+codeBlock :: BlockProperties -> OrgParser (F Blocks)
+codeBlock blkProp = do
+ skipSpaces
+ language <- optionMaybe orgArgWord
+ (classes, kv) <- codeHeaderArgs
+ id' <- fromMaybe "" <$> lookupBlockAttribute "name"
+ caption <- lookupInlinesAttr "caption"
+ content <- rawBlockContent blkProp
+ let attr = ( id'
+ , maybe id (\l -> (l:)) language $ classes
+ , kv )
+ let codeBlck = B.codeBlockWith attr content
return $ maybe (pure codeBlck) (labelDiv codeBlck) caption
where
labelDiv blk value =
@@ -338,14 +358,21 @@ codeBlockWithAttr classArgs content = do
<*> pure blk)
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
-rawBlockContent :: Int -> String -> OrgParser String
-rawBlockContent indent blockType =
+rawBlockContent :: BlockProperties -> OrgParser String
+rawBlockContent (indent, blockType) = try $
unlines . map commaEscaped <$> manyTill indentedLine blockEnder
where
- indentedLine = try $ choice [ blankline *> pure "\n"
- , indentWith indent *> anyLine
- ]
- blockEnder = try $ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+ indentedLine = try $
+ choice [ blankline *> pure "\n"
+ , indentWith indent *> anyLine
+ ]
+ blockEnder = try $
+ indentWith indent *> stringAnyCase ("#+end_" <> blockType)
+
+parsedBlockContent :: BlockProperties -> OrgParser (F Blocks)
+parsedBlockContent blkProps = try $ do
+ raw <- rawBlockContent blkProps
+ parseFromString parseBlocks (raw ++ "\n")
-- indent by specified number of spaces (or equiv. tabs)
indentWith :: Int -> OrgParser String
@@ -356,6 +383,13 @@ indentWith num = do
else choice [ try (count num (char ' '))
, try (char '\t' >> count (num - tabStop) (char ' ')) ]
+orgArgWord :: OrgParser String
+orgArgWord = many1 orgArgWordChar
+
+codeHeaderArgs :: OrgParser ([String], [(String, String)])
+codeHeaderArgs =
+ (\x -> (x, [])) <$> manyTill (many nonspaceChar <* skipSpaces) newline
+
translateLang :: String -> String
translateLang "C" = "c"
translateLang "C++" = "cpp"
@@ -1002,9 +1036,13 @@ inlineCodeBlock = try $ do
returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
where enclosedByPair s e p = char s *> many1Till p (char e)
+-- | Prefix used for Rundoc classes and arguments.
+rundocPrefix :: String
+rundocPrefix = "rundoc-"
+
-- | The class-name used to mark rundoc blocks.
rundocBlockClass :: String
-rundocBlockClass = "rundoc-block"
+rundocBlockClass = rundocPrefix ++ "block"
blockOption :: OrgParser (String, String)
blockOption = try $ (,) <$> orgArgKey <*> orgArgValue