summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-06-10 18:26:44 +0200
committerJohn MacFarlane <jgm@berkeley.edu>2017-06-10 18:26:44 +0200
commit94b3dacb4ea7e5e99ab62286b13877b92f9391b3 (patch)
tree4e1d9b98ebb2246c8a543163e980a927d25b0c34 /src/Text/Pandoc/Readers
parentd6822157e75432e09210350e3b58eec3998dca76 (diff)
Changed all readers to take Text instead of String.
Readers: Renamed StringReader -> TextReader. Updated tests. API change.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs6
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs8
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs4
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs6
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs5
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs5
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs5
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs5
-rw-r--r--src/Text/Pandoc/Readers/Native.hs21
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs10
-rw-r--r--src/Text/Pandoc/Readers/Org.hs7
-rw-r--r--src/Text/Pandoc/Readers/RST.hs7
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs7
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs7
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs7
15 files changed, 68 insertions, 42 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index e98ee066e..3c62f8db5 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -34,15 +34,15 @@ where
import CMark
import Data.List (groupBy)
-import Data.Text (pack, unpack)
+import Data.Text (Text, unpack)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Options
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
-readCommonMark :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readCommonMark opts s = return $
- nodeToPandoc $ commonmarkToNode opts' $ pack s
+ nodeToPandoc $ commonmarkToNode opts' s
where opts' = if extensionEnabled Ext_smart (readerExtensions opts)
then [optNormalize, optSmart]
else [optNormalize]
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index bef256a93..bd3c7c356 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -16,6 +16,8 @@ import Text.TeXMath (readMathML, writeTeX)
import Data.Default
import Data.Foldable (asum)
import Text.Pandoc.Class (PandocMonad)
+import Data.Text (Text)
+import qualified Data.Text as T
{-
@@ -522,11 +524,11 @@ instance Default DBState where
, dbContent = [] }
-readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readDocBook _ inp = do
- let tree = normalizeTree . parseXML . handleInstructions $ inp
+ let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree
- return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
+ return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index db58e9654..c0d8029dc 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -13,6 +13,8 @@ import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
+import qualified Data.Text.Lazy.Encoding as TL
+import qualified Data.Text.Lazy as TL
import Data.List (isInfixOf, isPrefixOf)
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (fromMaybe, mapMaybe)
@@ -73,7 +75,7 @@ archiveToEPUB os archive = do
mimeToReader "application/xhtml+xml" (unEscapeString -> root)
(unEscapeString -> path) = do
fname <- findEntryByPathE (root </> path) archive
- html <- readHtml os' . UTF8.toStringLazy $ fromEntry fname
+ html <- readHtml os' . TL.toStrict . TL.decodeUtf8 $ fromEntry fname
return $ fixInternalReferences path html
mimeToReader s _ (unEscapeString -> path)
| s `elem` imageMimes = return $ imageToPandoc path
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index c1bdb4d09..3bccf89fb 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -59,6 +59,7 @@ import Control.Monad ( guard, mzero, void, unless )
import Control.Arrow ((***))
import Control.Applicative ( (<|>) )
import Data.Monoid (First (..))
+import Data.Text (Text, unpack)
import Text.TeXMath (readMathML, writeTeX)
import Data.Default (Default (..), def)
import Control.Monad.Reader (ask, asks, local, ReaderT, runReaderT, lift)
@@ -74,11 +75,12 @@ import Control.Monad.Except (throwError)
-- | Convert HTML-formatted string to 'Pandoc' document.
readHtml :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> m Pandoc
readHtml opts inp = do
let tags = stripPrefixes . canonicalizeTags $
- parseTagsOptions parseOptions{ optTagPosition = True } inp
+ parseTagsOptions parseOptions{ optTagPosition = True }
+ (unpack inp)
parseDoc = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 28caa528e..b22b71b96 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -16,6 +16,7 @@ module Text.Pandoc.Readers.Haddock
import Control.Monad.Except (throwError)
import Data.List (intersperse, stripPrefix)
+import Data.Text (Text, unpack)
import Data.Maybe (fromMaybe)
import Data.Monoid ((<>))
import Documentation.Haddock.Parser
@@ -32,9 +33,9 @@ import Text.Pandoc.Shared (splitBy, trim)
-- | Parse Haddock markup and return a 'Pandoc' document.
readHaddock :: PandocMonad m
=> ReaderOptions
- -> String
+ -> Text
-> m Pandoc
-readHaddock opts s = case readHaddockEither opts s of
+readHaddock opts s = case readHaddockEither opts (unpack s) of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index b65ae15ad..796d2789e 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -39,6 +39,7 @@ import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (chr, isAlphaNum, isLetter, ord)
+import Data.Text (Text, unpack)
import Data.List (intercalate, isPrefixOf)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, maybeToList)
@@ -59,10 +60,10 @@ import Text.Pandoc.Walk
-- | Parse LaTeX from string and return 'Pandoc' document.
readLaTeX :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assumes @'\n'@ line endings)
+ -> Text -- ^ String to parse (assumes @'\n'@ line endings)
-> m Pandoc
readLaTeX opts ltx = do
- parsed <- readWithM parseLaTeX def{ stateOptions = opts } ltx
+ parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx)
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 5694c4354..5e966a17e 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -70,10 +70,11 @@ type MarkdownParser m = ParserT [Char] ParserState m
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readMarkdown opts s = do
- parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } (s ++ "\n\n")
+ parsed <- (readWithM parseMarkdown) def{ stateOptions = opts }
+ (T.unpack s ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 3f6142f00..a3ff60c14 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -41,6 +41,7 @@ module Text.Pandoc.Readers.MediaWiki ( readMediaWiki ) where
import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isDigit, isSpace)
+import Data.Text (Text, unpack)
import qualified Data.Foldable as F
import Data.List (intercalate, intersperse, isPrefixOf)
import qualified Data.Map as M
@@ -64,7 +65,7 @@ import Text.Pandoc.XML (fromEntities)
-- | Read mediawiki from an input string and return a Pandoc document.
readMediaWiki :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readMediaWiki opts s = do
parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts
@@ -76,7 +77,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (s ++ "\n")
+ (unpack s ++ "\n")
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 8f42a45de..abc2ed38a 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -37,6 +37,7 @@ import Text.Pandoc.Shared (safeRead)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class
import Text.Pandoc.Error
+import Data.Text (Text, unpack)
-- | Read native formatted text and return a Pandoc document.
-- The input may be a full pandoc document, a block list, a block,
@@ -50,22 +51,22 @@ import Text.Pandoc.Error
--
readNative :: PandocMonad m
=> ReaderOptions
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readNative _ s =
- case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
+ case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "couldn't read native"
-readBlocks :: String -> Either PandocError [Block]
-readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
+readBlocks :: Text -> Either PandocError [Block]
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s))
-readBlock :: String -> Either PandocError Block
-readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
+readBlock :: Text -> Either PandocError Block
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s))
-readInlines :: String -> Either PandocError [Inline]
-readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
+readInlines :: Text -> Either PandocError [Inline]
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s))
-readInline :: String -> Either PandocError Inline
-readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ s) Right (safeRead s)
+readInline :: Text -> Either PandocError Inline
+readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s))
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index cf1c8f479..591d7590e 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -2,6 +2,7 @@
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Control.Monad.State
import Data.Char (toUpper)
+import Data.Text (Text, unpack, pack)
import Data.Default
import Data.Generics
import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -28,9 +29,10 @@ instance Default OPMLState where
, opmlDocDate = mempty
}
-readOPML :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML _ inp = do
- (bs, st') <- flip runStateT def (mapM parseBlock $ normalizeTree $ parseXML inp)
+ (bs, st') <- flip runStateT def
+ (mapM parseBlock $ normalizeTree $ parseXML (unpack inp))
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
@@ -69,10 +71,10 @@ asHtml :: PandocMonad m => String -> OPML m Inlines
asHtml s =
(\(Pandoc _ bs) -> case bs of
[Plain ils] -> fromList ils
- _ -> mempty) <$> (lift $ readHtml def s)
+ _ -> mempty) <$> (lift $ readHtml def (pack s))
asMarkdown :: PandocMonad m => String -> OPML m Blocks
-asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def s)
+asMarkdown s = (\(Pandoc _ bs) -> fromList bs) <$> (lift $ readMarkdown def (pack s))
getBlocks :: PandocMonad m => Element -> OPML m Blocks
getBlocks e = mconcat <$> (mapM parseBlock $ elContent e)
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2b29bcfda..5e0d67d10 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -40,15 +40,18 @@ import Text.Pandoc.Parsing (reportLogMessages)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Parse org-mode string and return a Pandoc document.
readOrg :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readOrg opts s = do
parsed <- flip runReaderT def $
- readWithM parseOrg (optionsToParserState opts) (s ++ "\n\n")
+ readWithM parseOrg (optionsToParserState opts)
+ (T.unpack s ++ "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing org"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index b242d6428..fb5f6f2d4 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -53,6 +53,8 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Shared
import Text.Printf (printf)
+import Data.Text (Text)
+import qualified Data.Text as T
-- TODO:
-- [ ] .. parsed-literal
@@ -62,10 +64,11 @@ import Text.Printf (printf)
-- | Parse reStructuredText string and return Pandoc document.
readRST :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readRST opts s = do
- parsed <- (readWithM parseRST) def{ stateOptions = opts } (s ++ "\n\n")
+ parsed <- (readWithM parseRST) def{ stateOptions = opts }
+ (T.unpack s ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index fcb95fc35..9e544c4ac 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -49,14 +49,17 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, macro, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
import Text.Pandoc.XML (fromEntities)
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Read twiki from an input string and return a Pandoc document.
readTWiki :: PandocMonad m
=> ReaderOptions
- -> String
+ -> Text
-> m Pandoc
readTWiki opts s = do
- res <- readWithM parseTWiki def{ stateOptions = opts } (s ++ "\n\n")
+ res <- readWithM parseTWiki def{ stateOptions = opts }
+ (T.unpack s ++ "\n\n")
case res of
Left e -> throwError e
Right d -> return d
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 0b964dd63..1669e3e51 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -70,14 +70,17 @@ import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared (trim)
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Parse a Textile text and return a Pandoc document.
readTextile :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> String -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readTextile opts s = do
- parsed <- readWithM parseTextile def{ stateOptions = opts } (s ++ "\n\n")
+ parsed <- readWithM parseTextile def{ stateOptions = opts }
+ (T.unpack s ++ "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index d8791869d..260bb7fff 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -45,7 +45,8 @@ import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI)
import Control.Monad (guard, void, when)
import Control.Monad.Reader (Reader, asks, runReader)
import Data.Default
-
+import Data.Text (Text)
+import qualified Data.Text as T
import Control.Monad.Except (catchError, throwError)
import Data.Time.Format (formatTime)
import Text.Pandoc.Class (PandocMonad)
@@ -90,11 +91,11 @@ getT2TMeta = do
-- | Read Txt2Tags from an input string returning a Pandoc document
readTxt2Tags :: PandocMonad m
=> ReaderOptions
- -> String
+ -> Text
-> m Pandoc
readTxt2Tags opts s = do
meta <- getT2TMeta
- let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n")
+ let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n")
case parsed of
Right result -> return $ result
Left e -> throwError e