summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Org.hs')
-rw-r--r--src/Text/Pandoc/Readers/Org.hs102
1 files changed, 79 insertions, 23 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 980f63504..3be47cfd4 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, FlexibleInstances #-}
{-
-Copyright (C) 2014-2015 Albert Krewinkel <tarleb@moltkeplatz.de>
+Copyright (C) 2014-2015 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -21,19 +21,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Org
- Copyright : Copyright (C) 2014 Albert Krewinkel
+ Copyright : Copyright (C) 2014-2015 Albert Krewinkel
License : GNU GPL, version 2 or above
- Maintainer : Albert Krewinkel <tarleb@moltkeplatz.de>
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
Conversion of org-mode formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Org ( readOrg ) where
import qualified Text.Pandoc.Builder as B
-import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), (<>)
- , trimInlines )
+import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
+ trimInlines )
import Text.Pandoc.Definition
+import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@@ -45,8 +46,6 @@ import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-import Control.Applicative ( Applicative, pure
- , (<$>), (<$), (<*>), (<*), (*>) )
import Control.Arrow (first)
import Control.Monad (foldM, guard, liftM, liftM2, mplus, mzero, when)
import Control.Monad.Reader (Reader, runReader, ask, asks, local)
@@ -55,7 +54,6 @@ import Data.Default
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust)
-import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
import Text.Pandoc.Error
@@ -70,6 +68,14 @@ data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+instance HasIdentifierList OrgParserState where
+ extractIdentifierList = orgStateIdentifiers
+ updateIdentifierList f s = s{ orgStateIdentifiers = f (orgStateIdentifiers s) }
+
+instance HasHeaderMap OrgParserState where
+ extractHeaderMap = orgStateHeaderMap
+ updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
+
parseOrg :: OrgParser Pandoc
parseOrg = do
blocks' <- parseBlocks
@@ -135,6 +141,9 @@ data OrgParserState = OrgParserState
, orgStateMeta :: Meta
, orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
+ , orgStateParserContext :: ParserContext
+ , orgStateIdentifiers :: [String]
+ , orgStateHeaderMap :: M.Map Inlines String
}
instance Default OrgParserLocal where
@@ -174,6 +183,9 @@ defaultOrgParserState = OrgParserState
, orgStateMeta = nullMeta
, orgStateMeta' = return nullMeta
, orgStateNotes' = []
+ , orgStateParserContext = NullState
+ , orgStateIdentifiers = []
+ , orgStateHeaderMap = M.empty
}
recordAnchorId :: String -> OrgParser ()
@@ -282,6 +294,23 @@ blanklines =
<* updateLastPreCharPos
<* updateLastForbiddenCharPos
+-- | Succeeds when we're in list context.
+inList :: OrgParser ()
+inList = do
+ ctx <- orgStateParserContext <$> getState
+ guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: ParserContext -- ^ New parser context
+ -> OrgParser a -- ^ Parser to run in that context
+ -> OrgParser a
+withContext context parser = do
+ oldContext <- orgStateParserContext <$> getState
+ updateState $ \s -> s{ orgStateParserContext = context }
+ result <- parser
+ updateState $ \s -> s{ orgStateParserContext = oldContext }
+ return result
+
--
-- parsing blocks
--
@@ -397,7 +426,7 @@ verseBlock blkProp = try $ do
ignHeaders
content <- rawBlockContent blkProp
fmap B.para . mconcat . intersperse (pure B.linebreak)
- <$> mapM (parseFromString parseInlines) (lines content)
+ <$> mapM (parseFromString parseInlines) (map (++ "\n") . lines $ content)
exportsCode :: [(String, String)] -> Bool
exportsCode attrs = not (("rundoc-exports", "none") `elem` attrs
@@ -504,10 +533,16 @@ rundocBlockClass :: String
rundocBlockClass = rundocPrefix ++ "block"
blockOption :: OrgParser (String, String)
-blockOption = try $ (,) <$> orgArgKey <*> orgParamValue
+blockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgParamValue
+ return (argKey, paramValue)
inlineBlockOption :: OrgParser (String, String)
-inlineBlockOption = try $ (,) <$> orgArgKey <*> orgInlineParamValue
+inlineBlockOption = try $ do
+ argKey <- orgArgKey
+ paramValue <- option "yes" orgInlineParamValue
+ return (argKey, paramValue)
orgArgKey :: OrgParser String
orgArgKey = try $
@@ -516,11 +551,17 @@ orgArgKey = try $
orgParamValue :: OrgParser String
orgParamValue = try $
- skipSpaces *> many1 (noneOf "\t\n\r ") <* skipSpaces
+ skipSpaces
+ *> notFollowedBy (char ':' )
+ *> many1 (noneOf "\t\n\r ")
+ <* skipSpaces
orgInlineParamValue :: OrgParser String
orgInlineParamValue = try $
- skipSpaces *> many1 (noneOf "\t\n\r ]") <* skipSpaces
+ skipSpaces
+ *> notFollowedBy (char ':')
+ *> many1 (noneOf "\t\n\r ]")
+ <* skipSpaces
orgArgWordChar :: OrgParser Char
orgArgWordChar = alphaNum <|> oneOf "-_"
@@ -668,7 +709,10 @@ header = try $ do
title <- manyTill inline (lookAhead headerEnd)
tags <- headerEnd
let inlns = trimInlinesF . mconcat $ title <> map tagToInlineF tags
- return $ B.header level <$> inlns
+ st <- getState
+ let inlines = runF inlns st
+ attr <- registerHeader nullAttr inlines
+ return $ pure (B.headerWith attr level inlines)
where
tagToInlineF :: String -> F Inlines
tagToInlineF t = return $ B.spanWith ("", ["tag"], [("data-tag-name", t)]) mempty
@@ -687,7 +731,7 @@ headerTags = try $
headerStart :: OrgParser Int
headerStart = try $
- (length <$> many1 (char '*')) <* many1 (char ' ')
+ (length <$> many1 (char '*')) <* many1 (char ' ') <* updateLastPreCharPos
-- Don't use (or need) the reader wrapper here, we want hline to be
@@ -879,9 +923,13 @@ noteBlock = try $ do
paraOrPlain :: OrgParser (F Blocks)
paraOrPlain = try $ do
ils <- parseInlines
- nl <- option False (newline >> return True)
- try (guard nl >> notFollowedBy (orderedListStart <|> bulletListStart) >>
- return (B.para <$> ils))
+ nl <- option False (newline *> return True)
+ -- Read block as paragraph, except if we are in a list context and the block
+ -- is directly followed by a list item, in which case the block is read as
+ -- plain text.
+ try (guard nl
+ *> notFollowedBy (inList *> (orderedListStart <|> bulletListStart))
+ *> return (B.para <$> ils))
<|> (return (B.plain <$> ils))
inlinesTillNewline :: OrgParser (F Inlines)
@@ -946,19 +994,22 @@ definitionListItem :: OrgParser Int
-> OrgParser (F (Inlines, [Blocks]))
definitionListItem parseMarkerGetLength = try $ do
markerLength <- parseMarkerGetLength
- term <- manyTill (noneOf "\n\r") (try $ string "::")
+ term <- manyTill (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
cont <- concat <$> many (listContinuation markerLength)
term' <- parseFromString parseInlines term
contents' <- parseFromString parseBlocks $ line1 ++ blank ++ cont
return $ (,) <$> term' <*> fmap (:[]) contents'
+ where
+ definitionMarker =
+ spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
-- parse raw text for one list item, excluding start marker and continuations
listItem :: OrgParser Int
-> OrgParser (F Blocks)
-listItem start = try $ do
+listItem start = try . withContext ListItemState $ do
markerLength <- try start
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
@@ -1537,8 +1588,11 @@ smart :: OrgParser (F Inlines)
smart = do
getOption readerSmart >>= guard
doubleQuoted <|> singleQuoted <|>
- choice (map (return <$>) [orgApostrophe, dash, ellipses])
- where orgApostrophe =
+ choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses])
+ where
+ orgDash = dash <* updatePositions '-'
+ orgEllipses = ellipses <* updatePositions '.'
+ orgApostrophe =
(char '\'' <|> char '\8217') <* updateLastPreCharPos
<* updateLastForbiddenCharPos
*> return (B.str "\x2019")
@@ -1546,9 +1600,10 @@ smart = do
singleQuoted :: OrgParser (F Inlines)
singleQuoted = try $ do
singleQuoteStart
+ updatePositions '\''
withQuoteContext InSingleQuote $
fmap B.singleQuoted . trimInlinesF . mconcat <$>
- many1Till inline singleQuoteEnd
+ many1Till inline (singleQuoteEnd <* updatePositions '\'')
-- doubleQuoted will handle regular double-quoted sections, as well
-- as dialogues with an open double-quote without a close double-quote
@@ -1556,6 +1611,7 @@ singleQuoted = try $ do
doubleQuoted :: OrgParser (F Inlines)
doubleQuoted = try $ do
doubleQuoteStart
+ updatePositions '"'
contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline)
(withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return
(fmap B.doubleQuoted . trimInlinesF $ contents))