summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2016-05-22 16:33:31 +0200
committerAlbert Krewinkel <albert@zeitkraut.de>2016-05-25 22:53:55 +0200
commita340c7249f8e19d36ee4a68663b4c97e0893292b (patch)
tree56c1d4e5255d0e1a6e3c33415d7d3ca7d216f2a9
parentcc937eea2fbd8a7bb07672bfed3b924de8573646 (diff)
Org reader: extract parsing function to module
The Org-mode reader uses many functions defined in the `Text.Pandoc.Parsing` utility module. Some of the functions are overwritten with versions adapted to Org-mode idiosyncrasies. These special functions, as well as the normal Pandoc versions, are combined in a single module to increase the ease of use. This leads to decoupling of Org-mode and Pandoc and hence to slightly cleaner code. The downside is code-bloat due to repeated import/export statements.
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc/Readers/Org.hs86
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs7
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs182
4 files changed, 198 insertions, 78 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 24da05faa..61b5043ba 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -394,6 +394,7 @@ Library
Text.Pandoc.Readers.Odt.Arrows.State,
Text.Pandoc.Readers.Odt.Arrows.Utils,
Text.Pandoc.Readers.Org.ParserState,
+ Text.Pandoc.Readers.Org.Parsing,
Text.Pandoc.Writers.Shared,
Text.Pandoc.Asciify,
Text.Pandoc.MIME,
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 621e7107f..fd811c078 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Org ( readOrg ) where
+import Text.Pandoc.Readers.Org.ParserState
+import Text.Pandoc.Readers.Org.Parsing
+
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
import Text.Pandoc.Error
import Text.Pandoc.Options
-import qualified Text.Pandoc.Parsing as P
-import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
- , anyLine, blanklines, newline
- , orderedListMarker
- , parseFromString
- )
import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
-import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Shared (compactify', compactify'DL)
import Text.TeXMath (readTeX, writePandoc, DisplayType(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Control.Arrow (first)
import Control.Monad (foldM, guard, mplus, mzero, when)
-import Control.Monad.Reader ( Reader, runReader )
+import Control.Monad.Reader ( runReader )
import Data.Char (isAlphaNum, isSpace, toLower, toUpper)
import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf )
import qualified Data.Map as M
@@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options
-> Either PandocError Pandoc
readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
--- | The parser used to read org files.
-type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
-
--
-- Functions acting on the parser state
--
@@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : (orgStateAnchorIds s) }
-updateLastForbiddenCharPos :: OrgParser ()
-updateLastForbiddenCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
-
-updateLastPreCharPos :: OrgParser ()
-updateLastPreCharPos = getPosition >>= \p ->
- updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
-
pushToInlineCharStack :: Char -> OrgParser ()
pushToInlineCharStack c = updateState $ \s ->
s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s }
@@ -259,57 +244,6 @@ isHeaderLevelLowerEq n blk =
--
--- Adaptions and specializations of parsing utilities
---
-
--- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
--- of the state saved and restored.
-parseFromString :: OrgParser a -> String -> OrgParser a
-parseFromString parser str' = do
- oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
- updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
- result <- P.parseFromString parser str'
- updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
- return result
-
--- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
-newline :: OrgParser Char
-newline =
- P.newline
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: OrgParser [Char]
-blanklines =
- P.blanklines
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
-anyLine :: OrgParser String
-anyLine =
- P.anyLine
- <* updateLastPreCharPos
- <* updateLastForbiddenCharPos
-
--- | Succeeds when we're in list context.
-inList :: OrgParser ()
-inList = do
- ctx <- orgStateParserContext <$> getState
- guard (ctx == ListItemState)
-
--- | Parse in different context
-withContext :: ParserContext -- ^ New parser context
- -> OrgParser a -- ^ Parser to run in that context
- -> OrgParser a
-withContext context parser = do
- oldContext <- orgStateParserContext <$> getState
- updateState $ \s -> s{ orgStateParserContext = context }
- result <- parser
- updateState $ \s -> s{ orgStateParserContext = oldContext }
- return result
-
---
-- parsing blocks
--
@@ -398,7 +332,7 @@ keyValues = try $
endOfValue :: OrgParser ()
endOfValue =
lookAhead $ (() <$ try (many1 spaceChar <* key))
- <|> () <$ P.newline
+ <|> () <$ newline
--
@@ -675,7 +609,7 @@ propertiesDrawer = try $ do
key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
value :: OrgParser String
- value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline)
+ value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
keyValuesToAttr :: [(String, String)] -> Attr
keyValuesToAttr kvs =
@@ -696,7 +630,7 @@ keyValuesToAttr kvs =
figure :: OrgParser (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
- src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline
+ src <- skipSpaces *> selfTarget <* skipSpaces <* newline
guard . not . isNothing . blockAttrCaption $ figAttrs
guard (isImageFilename src)
let figName = fromMaybe mempty $ blockAttrName figAttrs
@@ -785,7 +719,7 @@ parseFormat = try $ do
header :: OrgParser (F Blocks)
header = try $ do
level <- headerStart
- title <- manyTill inline (lookAhead $ optional headerTags <* P.newline)
+ title <- manyTill inline (lookAhead $ optional headerTags <* newline)
tags <- option [] headerTags
newline
propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer)
@@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do
return $ (,) <$> term' <*> fmap (:[]) contents'
where
definitionMarker =
- spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline)
+ spaceChar *> string "::" <* (spaceChar <|> lookAhead newline)
-- parse raw text for one list item, excluding start marker and continuations
@@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $
nMoreLines k cs = try $ (final k cs <|> rest k cs)
>>= uncurry nMoreLines
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
- rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
+ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
finalLine = try $ manyTill p end
minus1 k = k - 1
oneOrMore cs = guard (not $ null cs) *> return cs
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index 6a902cd46..e648a883e 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Define the Org-mode parser state.
-}
module Text.Pandoc.Readers.Org.ParserState
- ( OrgParserState(..)
- , OrgParserLocal(..)
+ ( OrgParserState (..)
+ , OrgParserLocal (..)
, OrgNoteRecord
+ , HasReaderOptions (..)
+ , HasQuoteContext (..)
, F(..)
, askF
, asksF
@@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse
modifyExportSettings setter val state =
state { orgStateExportSettings = setter val . orgStateExportSettings $ state }
+
--
-- Parser state reader
--
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
new file mode 100644
index 000000000..efe2ae25f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -0,0 +1,182 @@
+{-
+Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.Readers.Org.Options
+ Copyright : Copyright (C) 2014-2016 Albert Krewinkel
+ License : GNU GPL, version 2 or above
+
+ Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de>
+
+Org-mode parsing utilities.
+
+Most functions are simply re-exports from @Text.Pandoc.Parsing@, some
+functions are adapted to Org-mode specific functionality.
+-}
+module Text.Pandoc.Readers.Org.Parsing
+ ( OrgParser
+ , anyLine
+ , blanklines
+ , newline
+ , parseFromString
+ , inList
+ , withContext
+ , updateLastForbiddenCharPos
+ , updateLastPreCharPos
+ -- * Re-exports from Text.Pandoc.Parser
+ , ParserContext (..)
+ , many1Till
+ , notFollowedBy'
+ , spaceChar
+ , nonspaceChar
+ , skipSpaces
+ , blankline
+ , enclosed
+ , stringAnyCase
+ , charsInBalanced
+ , uri
+ , withRaw
+ , readWithM
+ , guardEnabled
+ , updateLastStrPos
+ , notAfterString
+ , ParserState (..)
+ , registerHeader
+ , QuoteContext (..)
+ , singleQuoteStart
+ , singleQuoteEnd
+ , doubleQuoteStart
+ , doubleQuoteEnd
+ , dash
+ , ellipses
+ , citeKey
+ -- * Re-exports from Text.Pandoc.Parsec
+ , runParser
+ , getInput
+ , char
+ , letter
+ , digit
+ , alphaNum
+ , skipMany1
+ , spaces
+ , anyChar
+ , string
+ , count
+ , eof
+ , noneOf
+ , oneOf
+ , lookAhead
+ , notFollowedBy
+ , many
+ , many1
+ , manyTill
+ , (<|>)
+ , (<?>)
+ , choice
+ , try
+ , sepBy
+ , sepBy1
+ , option
+ , optional
+ , optionMaybe
+ , getState
+ , updateState
+ , SourcePos
+ , getPosition
+ ) where
+
+import Text.Pandoc.Readers.Org.ParserState
+
+import qualified Text.Pandoc.Parsing as P
+import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline
+ , parseFromString )
+
+import Control.Monad ( guard )
+import Control.Monad.Reader ( Reader )
+
+-- | The parser used to read org files.
+type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal)
+
+--
+-- Adaptions and specializations of parsing utilities
+--
+
+-- | Parse any line of text
+anyLine :: OrgParser String
+anyLine =
+ P.anyLine
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+ oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+ updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+ result <- P.parseFromString parser str'
+ updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+ return result
+
+-- | Like @Text.Parsec.Char.newline@, but causes additional state changes.
+newline :: OrgParser Char
+newline =
+ P.newline
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
+blanklines :: OrgParser [Char]
+blanklines =
+ P.blanklines
+ <* updateLastPreCharPos
+ <* updateLastForbiddenCharPos
+
+-- | Succeeds when we're in list context.
+inList :: OrgParser ()
+inList = do
+ ctx <- orgStateParserContext <$> getState
+ guard (ctx == ListItemState)
+
+-- | Parse in different context
+withContext :: ParserContext -- ^ New parser context
+ -> OrgParser a -- ^ Parser to run in that context
+ -> OrgParser a
+withContext context parser = do
+ oldContext <- orgStateParserContext <$> getState
+ updateState $ \s -> s{ orgStateParserContext = context }
+ result <- parser
+ updateState $ \s -> s{ orgStateParserContext = oldContext }
+ return result
+
+--
+-- Parser state update functions
+--
+
+-- | Set the current position as the last position at which a forbidden char
+-- was found (i.e. a character which is not allowed at the inner border of
+-- markup).
+updateLastForbiddenCharPos :: OrgParser ()
+updateLastForbiddenCharPos = getPosition >>= \p ->
+ updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
+
+-- | Set the current parser position as the position at which a character was
+-- seen which allows inline markup to follow.
+updateLastPreCharPos :: OrgParser ()
+updateLastPreCharPos = getPosition >>= \p ->
+ updateState $ \s -> s{ orgStateLastPreCharPos = Just p}