diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Txt2Tags.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Txt2Tags.hs | 159 |
1 files changed, 83 insertions, 76 deletions
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0aafc83c7..f4dda7a11 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE ViewPatterns #-} {- Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com> @@ -29,39 +28,39 @@ Conversion of txt2tags formatted plain text to 'Pandoc' document. module Text.Pandoc.Readers.Txt2Tags ( readTxt2Tags , getT2TMeta , T2TMeta (..) - , readTxt2TagsNoMacros) + ) where -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines, Blocks, trimInlines ) -import Data.Monoid ((<>)) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Shared (escapeURI,compactify', compactify'DL) -import Text.Pandoc.Parsing hiding (space, spaces, uri, macro) +import Control.Monad (guard, void, when) +import Control.Monad.Except (catchError, throwError) +import Control.Monad.Reader (Reader, asks, runReader) import Data.Char (toLower) -import Data.List (transpose, intersperse, intercalate) -import Data.Maybe (fromMaybe) ---import Network.URI (isURI) -- Not sure whether to use this function -import Control.Monad (void, guard, when) import Data.Default -import Control.Monad.Reader (Reader, runReader, asks) -import Text.Pandoc.Error - -import Data.Time.LocalTime (getZonedTime) -import System.Directory(getModificationTime) +import Data.List (intercalate, transpose) +import Data.Maybe (fromMaybe) +import Data.Monoid ((<>)) +import Data.Text (Text) +import qualified Data.Text as T import Data.Time.Format (formatTime) +import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad) +import qualified Text.Pandoc.Class as P import Text.Pandoc.Compat.Time (defaultTimeLocale) -import System.IO.Error (catchIOError) +import Text.Pandoc.Definition +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (space, spaces, uri) +import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI, + underlineSpan) type T2T = ParserT String ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file data T2TMeta = T2TMeta { - date :: String -- ^ Current date - , mtime :: String -- ^ Last modification time of infile - , infile :: FilePath -- ^ Input file + date :: String -- ^ Current date + , mtime :: String -- ^ Last modification time of infile + , infile :: FilePath -- ^ Input file , outfile :: FilePath -- ^ Output file } deriving Show @@ -69,26 +68,38 @@ instance Default T2TMeta where def = T2TMeta "" "" "" "" -- | Get the meta information required by Txt2Tags macros -getT2TMeta :: [FilePath] -> FilePath -> IO T2TMeta -getT2TMeta inps out = do - curDate <- formatTime defaultTimeLocale "%F" <$> getZonedTime +getT2TMeta :: PandocMonad m => m T2TMeta +getT2TMeta = do + inps <- P.getInputFiles + outp <- fromMaybe "" <$> P.getOutputFile + curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime let getModTime = fmap (formatTime defaultTimeLocale "%T") . - getModificationTime + P.getModificationTime curMtime <- case inps of - [] -> formatTime defaultTimeLocale "%T" <$> getZonedTime - _ -> catchIOError + [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime + _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) out + return $ T2TMeta curDate curMtime (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: T2TMeta -> ReaderOptions -> String -> Either PandocError Pandoc -readTxt2Tags t opts s = flip runReader t $ readWithM parseT2T (def {stateOptions = opts}) (s ++ "\n\n") +readTxt2Tags :: PandocMonad m + => ReaderOptions + -> Text + -> m Pandoc +readTxt2Tags opts s = do + meta <- getT2TMeta + let parsed = flip runReader meta $ + readWithM parseT2T (def {stateOptions = opts}) $ + T.unpack (crFilter s) ++ "\n\n" + case parsed of + Right result -> return result + Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document -readTxt2TagsNoMacros :: ReaderOptions -> String -> Either PandocError Pandoc -readTxt2TagsNoMacros = readTxt2Tags def +-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc +-- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc parseT2T = do @@ -137,7 +148,7 @@ setting = do string "%!" keyword <- ignoreSpacesCap (many1 alphaNum) char ':' - value <- ignoreSpacesCap (manyTill anyChar (newline)) + value <- ignoreSpacesCap (manyTill anyChar newline) return (keyword, value) -- Blocks @@ -146,7 +157,7 @@ parseBlocks :: T2T Blocks parseBlocks = mconcat <$> manyTill block eof block :: T2T Blocks -block = do +block = choice [ mempty <$ blanklines , quote @@ -184,7 +195,7 @@ para = try $ do listStart = try bulletListStart <|> orderedListStart commentBlock :: T2T Blocks -commentBlock = try (blockMarkupArea (anyLine) (const mempty) "%%%") <|> comment +commentBlock = try (blockMarkupArea anyLine (const mempty) "%%%") <|> comment -- Seperator and Strong line treated the same hrule :: T2T Blocks @@ -198,7 +209,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -210,16 +221,16 @@ list :: T2T Blocks list = choice [bulletList, orderedList, definitionList] bulletList :: T2T Blocks -bulletList = B.bulletList . compactify' +bulletList = B.bulletList . compactify <$> many1 (listItem bulletListStart parseBlocks) orderedList :: T2T Blocks -orderedList = B.orderedList . compactify' +orderedList = B.orderedList . compactify <$> many1 (listItem orderedListStart parseBlocks) definitionList :: T2T Blocks -definitionList = try $ do - B.definitionList . compactify'DL <$> +definitionList = try $ + B.definitionList . compactifyDL <$> many1 (listItem definitionListStart definitionListEnd) definitionListEnd :: T2T (Inlines, [Blocks]) @@ -250,7 +261,7 @@ listItem start end = try $ do firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- concat <$> many (listContinuation markerLength) - parseFromString end $ firstLine ++ blank ++ rest + parseFromString' end $ firstLine ++ blank ++ rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. @@ -262,12 +273,6 @@ listContinuation markerLength = try $ <*> many blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -anyLineNewline :: T2T String -anyLineNewline = (++ "\n") <$> anyLine - -indentWith :: Int -> T2T String -indentWith n = count n space - -- Table table :: T2T Blocks @@ -276,17 +281,17 @@ table = try $ do rows <- many1 (many commentLine *> tableRow) let columns = transpose rows let ncolumns = length columns - let aligns = map (foldr1 findAlign) (map (map fst) columns) + let aligns = map (foldr1 findAlign . map fst) columns let rows' = map (map snd) rows let size = maximum (map length rows') let rowsPadded = map (pad size) rows' - let headerPadded = if (not (null tableHeader)) then pad size tableHeader else mempty + let headerPadded = if null tableHeader then mempty else pad size tableHeader return $ B.table mempty (zip aligns (replicate ncolumns 0.0)) headerPadded rowsPadded pad :: (Monoid a) => Int -> [a] -> [a] -pad n xs = xs ++ (replicate (n - length xs) mempty) +pad n xs = xs ++ replicate (n - length xs) mempty findAlign :: Alignment -> Alignment -> Alignment @@ -309,7 +314,7 @@ genericRow start = try $ do tableCell :: T2T (Alignment, Blocks) tableCell = try $ do leftSpaces <- length <$> lookAhead (many1 space) -- Case of empty cell means we must lookAhead - content <- (manyTill inline (try $ lookAhead (cellEnd))) + content <- manyTill inline (try $ lookAhead cellEnd) rightSpaces <- length <$> many space let align = case compare leftSpaces rightSpaces of @@ -317,9 +322,9 @@ tableCell = try $ do EQ -> AlignCenter GT -> AlignRight endOfCell - return $ (align, B.plain (B.trimInlines $ mconcat content)) + return (align, B.plain (B.trimInlines $ mconcat content)) where - cellEnd = (void newline <|> (many1 space *> endOfCell)) + cellEnd = void newline <|> (many1 space *> endOfCell) endOfCell :: T2T () endOfCell = try (skipMany1 $ char '|') <|> ( () <$ lookAhead newline) @@ -342,10 +347,10 @@ taggedBlock = do genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s -blockMarkupArea :: Monoid a => (T2T a) -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupArea p f s = try $ (do +blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupArea p f s = try (do string s *> blankline - f . mconcat <$> (manyTill p (eof <|> void (string s *> blankline)))) + f . mconcat <$> manyTill p (eof <|> void (string s *> blankline))) blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks blockMarkupLine p f s = try (f <$> (string s *> space *> p)) @@ -363,7 +368,7 @@ parseInlines :: T2T Inlines parseInlines = trimInlines . mconcat <$> many1 inline inline :: T2T Inlines -inline = do +inline = choice [ endline , macro @@ -385,16 +390,16 @@ inline = do ] bold :: T2T Inlines -bold = inlineMarkup inline B.strong '*' (B.str) +bold = inlineMarkup inline B.strong '*' B.str underline :: T2T Inlines -underline = inlineMarkup inline B.emph '_' (B.str) +underline = inlineMarkup inline underlineSpan '_' B.str strike :: T2T Inlines -strike = inlineMarkup inline B.strikeout '-' (B.str) +strike = inlineMarkup inline B.strikeout '-' B.str italic :: T2T Inlines -italic = inlineMarkup inline B.emph '/' (B.str) +italic = inlineMarkup inline B.emph '/' B.str code :: T2T Inlines code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id @@ -413,7 +418,7 @@ tagged = do -- Glued meaning that markup must be tight to content -- Markup can't pass newlines inlineMarkup :: Monoid a - => (T2T a) -- Content parser + => T2T a -- Content parser -> (a -> Inlines) -- Constructor -> Char -- Fence -> (String -> a) -- Special Case to handle ****** @@ -425,20 +430,24 @@ inlineMarkup p f c special = try $ do when (l == 2) (void $ notFollowedBy space) -- We must make sure that there is no space before the start of the -- closing tags - body <- optionMaybe (try $ manyTill (noneOf "\n\r") $ + body <- optionMaybe (try $ manyTill (noneOf "\n\r") (try $ lookAhead (noneOf " " >> string [c,c] ))) case body of Just middle -> do lastChar <- anyChar end <- many1 (char c) - let parser inp = parseFromString (mconcat <$> many p) inp - let start' = special (drop 2 start) + let parser inp = parseFromString' (mconcat <$> many p) inp + let start' = case drop 2 start of + "" -> mempty + xs -> special xs body' <- parser (middle ++ [lastChar]) - let end' = special (drop 2 end) + let end' = case drop 2 end of + "" -> mempty + xs -> special xs return $ f (start' <> body' <> end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) - let body' = (replicate (l - 4) c) + let body' = replicate (l - 4) c return $ f (special body') link :: T2T Inlines @@ -453,8 +462,8 @@ titleLink = try $ do guard (length tokens >= 2) char ']' let link' = last tokens - guard (length link' > 0) - let tit = concat (intersperse " " (init tokens)) + guard $ not $ null link' + let tit = unwords (init tokens) return $ B.link link' "" (B.text tit) -- Link with image @@ -479,7 +488,7 @@ macro = try $ do -- raw URLs in text are automatically linked url :: T2T Inlines url = try $ do - (rawUrl, escapedUrl) <- (try uri <|> emailAddress) + (rawUrl, escapedUrl) <- try uri <|> emailAddress return $ B.link rawUrl "" (B.str escapedUrl) uri :: T2T (String, String) @@ -520,8 +529,7 @@ image = try $ do -- List taken from txt2tags source let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] char '[' - path <- manyTill (noneOf "\n\t\r ") (try $ lookAhead (oneOfStrings extensions)) - ext <- oneOfStrings extensions + (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions) char ']' return $ B.image (path ++ ext) "" mempty @@ -550,11 +558,10 @@ endline = try $ do notFollowedBy quote notFollowedBy list notFollowedBy table - return $ B.softbreak + return B.softbreak str :: T2T Inlines -str = try $ do - B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") whitespace :: T2T Inlines whitespace = try $ B.space <$ spaceChar |