diff options
Diffstat (limited to 'src/Text/Pandoc/Readers.hs')
-rw-r--r-- | src/Text/Pandoc/Readers.hs | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs new file mode 100644 index 000000000..b9374ba06 --- /dev/null +++ b/src/Text/Pandoc/Readers.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- +Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers + Copyright : Copyright (C) 2006-2018 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +This helper module exports the readers. + +Note: all of the readers assume that the input text has @'\n'@ +line endings. So if you get your input text from a web form, +you should remove @'\r'@ characters using @filter (/='\r')@. + +-} + +module Text.Pandoc.Readers + ( + -- * Readers: converting /to/ Pandoc format + Reader (..) + , readers + , readDocx + , readOdt + , readMarkdown + , readCommonMark + , readCreole + , readMediaWiki + , readVimwiki + , readRST + , readOrg + , readLaTeX + , readHtml + , readJATS + , readTextile + , readDocBook + , readOPML + , readHaddock + , readNative + , readJSON + , readTWiki + , readTikiWiki + , readTxt2Tags + , readEPUB + , readMuse + -- * Miscellaneous + , getReader + , getDefaultExtensions + ) where + +import Control.Monad.Except (throwError) +import Data.Aeson +import qualified Data.ByteString.Lazy as BL +import Data.List (intercalate) +import Data.Text (Text) +import Text.Pandoc.Class +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Extensions +import Text.Pandoc.Options +import Text.Pandoc.Readers.CommonMark +import Text.Pandoc.Readers.Creole +import Text.Pandoc.Readers.DocBook +import Text.Pandoc.Readers.Docx +import Text.Pandoc.Readers.EPUB +import Text.Pandoc.Readers.Haddock +import Text.Pandoc.Readers.HTML (readHtml) +import Text.Pandoc.Readers.JATS (readJATS) +import Text.Pandoc.Readers.LaTeX +import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Readers.MediaWiki +import Text.Pandoc.Readers.Muse +import Text.Pandoc.Readers.Native +import Text.Pandoc.Readers.Odt +import Text.Pandoc.Readers.OPML +import Text.Pandoc.Readers.Org +import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.Textile +import Text.Pandoc.Readers.TikiWiki +import Text.Pandoc.Readers.TWiki +import Text.Pandoc.Readers.Txt2Tags +import Text.Pandoc.Readers.Vimwiki +import Text.Pandoc.Shared (mapLeft) +import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Parsec.Error + +data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc) + | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) + +-- | Association list of formats and readers. +readers :: PandocMonad m => [(String, Reader m)] +readers = [ ("native" , TextReader readNative) + ,("json" , TextReader $ \o s -> + case readJSON o s of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "JSON parse error") + ,("markdown" , TextReader readMarkdown) + ,("markdown_strict" , TextReader readMarkdown) + ,("markdown_phpextra" , TextReader readMarkdown) + ,("markdown_github" , TextReader readMarkdown) + ,("markdown_mmd", TextReader readMarkdown) + ,("commonmark" , TextReader readCommonMark) + ,("creole" , TextReader readCreole) + ,("gfm" , TextReader readCommonMark) + ,("rst" , TextReader readRST) + ,("mediawiki" , TextReader readMediaWiki) + ,("vimwiki" , TextReader readVimwiki) + ,("docbook" , TextReader readDocBook) + ,("opml" , TextReader readOPML) + ,("org" , TextReader readOrg) + ,("textile" , TextReader readTextile) -- TODO : textile+lhs + ,("html" , TextReader readHtml) + ,("jats" , TextReader readJATS) + ,("latex" , TextReader readLaTeX) + ,("haddock" , TextReader readHaddock) + ,("twiki" , TextReader readTWiki) + ,("tikiwiki" , TextReader readTikiWiki) + ,("docx" , ByteStringReader readDocx) + ,("odt" , ByteStringReader readOdt) + ,("t2t" , TextReader readTxt2Tags) + ,("epub" , ByteStringReader readEPUB) + ,("muse" , TextReader readMuse) + ] + +-- | Retrieve reader, extensions based on formatSpec (format+extensions). +getReader :: PandocMonad m => String -> Either String (Reader m, Extensions) +getReader s = + case parseFormatSpec s of + Left e -> Left $ intercalate "\n" [m | Message m <- errorMessages e] + Right (readerName, setExts) -> + case lookup readerName readers of + Nothing -> Left $ "Unknown reader: " ++ readerName + Just r -> Right (r, setExts $ + getDefaultExtensions readerName) + +-- | Read pandoc document from JSON format. +readJSON :: ReaderOptions -> Text -> Either PandocError Pandoc +readJSON _ = + mapLeft PandocParseError . eitherDecode' . BL.fromStrict . UTF8.fromText |