From a340c7249f8e19d36ee4a68663b4c97e0893292b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Sun, 22 May 2016 16:33:31 +0200 Subject: Org reader: extract parsing function to module The Org-mode reader uses many functions defined in the `Text.Pandoc.Parsing` utility module. Some of the functions are overwritten with versions adapted to Org-mode idiosyncrasies. These special functions, as well as the normal Pandoc versions, are combined in a single module to increase the ease of use. This leads to decoupling of Org-mode and Pandoc and hence to slightly cleaner code. The downside is code-bloat due to repeated import/export statements. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/Org.hs | 86 ++------------ src/Text/Pandoc/Readers/Org/ParserState.hs | 7 +- src/Text/Pandoc/Readers/Org/Parsing.hs | 182 +++++++++++++++++++++++++++++ 4 files changed, 198 insertions(+), 78 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Org/Parsing.hs diff --git a/pandoc.cabal b/pandoc.cabal index 24da05faa..61b5043ba 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -394,6 +394,7 @@ Library Text.Pandoc.Readers.Odt.Arrows.State, Text.Pandoc.Readers.Odt.Arrows.Utils, Text.Pandoc.Readers.Org.ParserState, + Text.Pandoc.Readers.Org.Parsing, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 621e7107f..fd811c078 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -29,27 +29,23 @@ Conversion of org-mode formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Org ( readOrg ) where +import Text.Pandoc.Readers.Org.ParserState +import Text.Pandoc.Readers.Org.Parsing + import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder ( Inlines, Blocks ) import Text.Pandoc.Definition import Text.Pandoc.Compat.Monoid ((<>)) import Text.Pandoc.Error import Text.Pandoc.Options -import qualified Text.Pandoc.Parsing as P -import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF - , anyLine, blanklines, newline - , orderedListMarker - , parseFromString - ) import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) -import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Arrow (first) import Control.Monad (foldM, guard, mplus, mzero, when) -import Control.Monad.Reader ( Reader, runReader ) +import Control.Monad.Reader ( runReader ) import Data.Char (isAlphaNum, isSpace, toLower, toUpper) import Data.List ( foldl', intersperse, isPrefixOf, isSuffixOf ) import qualified Data.Map as M @@ -63,9 +59,6 @@ readOrg :: ReaderOptions -- ^ Reader options -> Either PandocError Pandoc readOrg opts s = flip runReader def $ readWithM parseOrg def{ orgStateOptions = opts } (s ++ "\n\n") --- | The parser used to read org files. -type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) - -- -- Functions acting on the parser state -- @@ -73,14 +66,6 @@ recordAnchorId :: String -> OrgParser () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } -updateLastForbiddenCharPos :: OrgParser () -updateLastForbiddenCharPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} - -updateLastPreCharPos :: OrgParser () -updateLastPreCharPos = getPosition >>= \p -> - updateState $ \s -> s{ orgStateLastPreCharPos = Just p} - pushToInlineCharStack :: Char -> OrgParser () pushToInlineCharStack c = updateState $ \s -> s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } @@ -258,57 +243,6 @@ isHeaderLevelLowerEq n blk = _ -> False --- --- Adaptions and specializations of parsing utilities --- - --- The version Text.Pandoc.Parsing cannot be used, as we need additional parts --- of the state saved and restored. -parseFromString :: OrgParser a -> String -> OrgParser a -parseFromString parser str' = do - oldLastPreCharPos <- orgStateLastPreCharPos <$> getState - updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } - result <- P.parseFromString parser str' - updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } - return result - --- | Like @Text.Parsec.Char.newline@, but causes additional state changes. -newline :: OrgParser Char -newline = - P.newline - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: OrgParser [Char] -blanklines = - P.blanklines - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - -anyLine :: OrgParser String -anyLine = - P.anyLine - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - --- | Succeeds when we're in list context. -inList :: OrgParser () -inList = do - ctx <- orgStateParserContext <$> getState - guard (ctx == ListItemState) - --- | Parse in different context -withContext :: ParserContext -- ^ New parser context - -> OrgParser a -- ^ Parser to run in that context - -> OrgParser a -withContext context parser = do - oldContext <- orgStateParserContext <$> getState - updateState $ \s -> s{ orgStateParserContext = context } - result <- parser - updateState $ \s -> s{ orgStateParserContext = oldContext } - return result - -- -- parsing blocks -- @@ -398,7 +332,7 @@ keyValues = try $ endOfValue :: OrgParser () endOfValue = lookAhead $ (() <$ try (many1 spaceChar <* key)) - <|> () <$ P.newline + <|> () <$ newline -- @@ -675,7 +609,7 @@ propertiesDrawer = try $ do key = try $ skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') value :: OrgParser String - value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> P.newline) + value = try $ skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) keyValuesToAttr :: [(String, String)] -> Attr keyValuesToAttr kvs = @@ -696,7 +630,7 @@ keyValuesToAttr kvs = figure :: OrgParser (F Blocks) figure = try $ do figAttrs <- blockAttributes - src <- skipSpaces *> selfTarget <* skipSpaces <* P.newline + src <- skipSpaces *> selfTarget <* skipSpaces <* newline guard . not . isNothing . blockAttrCaption $ figAttrs guard (isImageFilename src) let figName = fromMaybe mempty $ blockAttrName figAttrs @@ -785,7 +719,7 @@ parseFormat = try $ do header :: OrgParser (F Blocks) header = try $ do level <- headerStart - title <- manyTill inline (lookAhead $ optional headerTags <* P.newline) + title <- manyTill inline (lookAhead $ optional headerTags <* newline) tags <- option [] headerTags newline propAttr <- option nullAttr (keyValuesToAttr <$> propertiesDrawer) @@ -1083,7 +1017,7 @@ definitionListItem parseMarkerGetLength = try $ do return $ (,) <$> term' <*> fmap (:[]) contents' where definitionMarker = - spaceChar *> string "::" <* (spaceChar <|> lookAhead P.newline) + spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) -- parse raw text for one list item, excluding start marker and continuations @@ -1559,7 +1493,7 @@ many1TillNOrLessNewlines n p end = try $ nMoreLines k cs = try $ (final k cs <|> rest k cs) >>= uncurry nMoreLines final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine - rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline) + rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) finalLine = try $ manyTill p end minus1 k = k - 1 oneOrMore cs = guard (not $ null cs) *> return cs diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 6a902cd46..e648a883e 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -29,9 +29,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Define the Org-mode parser state. -} module Text.Pandoc.Readers.Org.ParserState - ( OrgParserState(..) - , OrgParserLocal(..) + ( OrgParserState (..) + , OrgParserLocal (..) , OrgNoteRecord + , HasReaderOptions (..) + , HasQuoteContext (..) , F(..) , askF , asksF @@ -184,6 +186,7 @@ modifyExportSettings :: ExportSettingSetter a -> a -> OrgParserState -> OrgParse modifyExportSettings setter val state = state { orgStateExportSettings = setter val . orgStateExportSettings $ state } + -- -- Parser state reader -- diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs new file mode 100644 index 000000000..efe2ae25f --- /dev/null +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -0,0 +1,182 @@ +{- +Copyright (C) 2014-2016 Albert Krewinkel + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Org.Options + Copyright : Copyright (C) 2014-2016 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel + +Org-mode parsing utilities. + +Most functions are simply re-exports from @Text.Pandoc.Parsing@, some +functions are adapted to Org-mode specific functionality. +-} +module Text.Pandoc.Readers.Org.Parsing + ( OrgParser + , anyLine + , blanklines + , newline + , parseFromString + , inList + , withContext + , updateLastForbiddenCharPos + , updateLastPreCharPos + -- * Re-exports from Text.Pandoc.Parser + , ParserContext (..) + , many1Till + , notFollowedBy' + , spaceChar + , nonspaceChar + , skipSpaces + , blankline + , enclosed + , stringAnyCase + , charsInBalanced + , uri + , withRaw + , readWithM + , guardEnabled + , updateLastStrPos + , notAfterString + , ParserState (..) + , registerHeader + , QuoteContext (..) + , singleQuoteStart + , singleQuoteEnd + , doubleQuoteStart + , doubleQuoteEnd + , dash + , ellipses + , citeKey + -- * Re-exports from Text.Pandoc.Parsec + , runParser + , getInput + , char + , letter + , digit + , alphaNum + , skipMany1 + , spaces + , anyChar + , string + , count + , eof + , noneOf + , oneOf + , lookAhead + , notFollowedBy + , many + , many1 + , manyTill + , (<|>) + , () + , choice + , try + , sepBy + , sepBy1 + , option + , optional + , optionMaybe + , getState + , updateState + , SourcePos + , getPosition + ) where + +import Text.Pandoc.Readers.Org.ParserState + +import qualified Text.Pandoc.Parsing as P +import Text.Pandoc.Parsing hiding ( anyLine, blanklines, newline + , parseFromString ) + +import Control.Monad ( guard ) +import Control.Monad.Reader ( Reader ) + +-- | The parser used to read org files. +type OrgParser = ParserT [Char] OrgParserState (Reader OrgParserLocal) + +-- +-- Adaptions and specializations of parsing utilities +-- + +-- | Parse any line of text +anyLine :: OrgParser String +anyLine = + P.anyLine + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + +-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts +-- of the state saved and restored. +parseFromString :: OrgParser a -> String -> OrgParser a +parseFromString parser str' = do + oldLastPreCharPos <- orgStateLastPreCharPos <$> getState + updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } + result <- P.parseFromString parser str' + updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos } + return result + +-- | Like @Text.Parsec.Char.newline@, but causes additional state changes. +newline :: OrgParser Char +newline = + P.newline + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + +-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. +blanklines :: OrgParser [Char] +blanklines = + P.blanklines + <* updateLastPreCharPos + <* updateLastForbiddenCharPos + +-- | Succeeds when we're in list context. +inList :: OrgParser () +inList = do + ctx <- orgStateParserContext <$> getState + guard (ctx == ListItemState) + +-- | Parse in different context +withContext :: ParserContext -- ^ New parser context + -> OrgParser a -- ^ Parser to run in that context + -> OrgParser a +withContext context parser = do + oldContext <- orgStateParserContext <$> getState + updateState $ \s -> s{ orgStateParserContext = context } + result <- parser + updateState $ \s -> s{ orgStateParserContext = oldContext } + return result + +-- +-- Parser state update functions +-- + +-- | Set the current position as the last position at which a forbidden char +-- was found (i.e. a character which is not allowed at the inner border of +-- markup). +updateLastForbiddenCharPos :: OrgParser () +updateLastForbiddenCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p} + +-- | Set the current parser position as the position at which a character was +-- seen which allows inline markup to follow. +updateLastPreCharPos :: OrgParser () +updateLastPreCharPos = getPosition >>= \p -> + updateState $ \s -> s{ orgStateLastPreCharPos = Just p} -- cgit v1.2.3