From a6f2b960844caa81d9c8dd4d18f94c3de50bdb49 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 15 Aug 2012 09:42:16 -0700 Subject: Moved renderTags' from HTML reader & SelfContained to Shared. Improved removal of markdown="1" attribute in Markdow reader. --- src/Text/Pandoc/Readers/HTML.hs | 14 +------------- src/Text/Pandoc/Readers/Markdown.hs | 12 +++++++++--- src/Text/Pandoc/SelfContained.hs | 13 +------------ src/Text/Pandoc/Shared.hs | 22 ++++++++++++++++++++++ 4 files changed, 33 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 33846286d..e5c310ffc 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) -import Data.Char ( isDigit, toLower ) +import Data.Char ( isDigit ) import Control.Monad ( liftM, guard, when, mzero ) isSpace :: Char -> Bool @@ -95,18 +95,6 @@ block = choice , pRawHtmlBlock ] --- repeated in SelfContained -- consolidate eventually -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } - pList :: TagParser [Block] pList = pBulletList <|> pOrderedList <|> pDefinitionList diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index fe9be439e..b0925ac68 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -776,11 +776,10 @@ rawHtmlBlocks = do if "markdown" `notElem` map fst as then mzero - else return $ substitute - " markdown=\"1\"" "" raw + else return $ + stripMarkdownAttribute raw | otherwise -> return raw _ -> return raw ) - -- TODO remove markdown="1" attribute from raw tags sps <- do sp1 <- many spaceChar sp2 <- option "" (blankline >> return "\n") sp3 <- many spaceChar @@ -793,6 +792,13 @@ rawHtmlBlocks = do let combined = concat htmlBlocks return $ if last combined == '\n' then init combined else combined +-- remove markdown="1" attribute +stripMarkdownAttribute :: String -> String +stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s + where filterAttrib (TagOpen t as) = TagOpen t + [(k,v) | (k,v) <- as, k /= "markdown"] + filterAttrib x = x + -- -- Tables -- diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 675b8366e..7a21f6f3a 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -42,7 +42,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, ()) import Data.Char (toLower, isAscii, isAlphaNum) import Codec.Compression.GZip as Gzip import qualified Data.ByteString.Lazy as L -import Text.Pandoc.Shared (findDataFile) +import Text.Pandoc.Shared (findDataFile, renderTags') import Text.Pandoc.MIME (getMimeType) import System.Directory (doesFileExist) @@ -163,14 +163,3 @@ makeSelfContained userdata inp = do out' <- mapM (convertTag userdata) tags return $ renderTags' out' --- repeated from HTML reader: -renderTags' :: [Tag String] -> String -renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index ad28b7c23..d86f9a390 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -59,6 +59,8 @@ module Text.Pandoc.Shared ( uniqueIdent, isHeaderBlock, headerShift, + -- * TagSoup HTML handling + renderTags', -- * File handling inDirectory, findDataFile, @@ -89,6 +91,8 @@ import Text.Pandoc.Pretty (charWidth) import System.Locale (defaultTimeLocale) import Data.Time import System.IO (stderr) +import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..), + renderOptions) -- -- List processing @@ -448,6 +452,22 @@ headerShift n = bottomUp shift shift (Header level inner) = Header (level + n) inner shift x = x +-- +-- TagSoup HTML handling +-- + +-- | Render HTML tags. +renderTags' :: [Tag String] -> String +renderTags' = renderTagsOptions + renderOptions{ optMinimize = \x -> + let y = map toLower x + in y == "hr" || y == "br" || + y == "img" || y == "meta" || + y == "link" + , optRawTag = \x -> + let y = map toLower x + in y == "script" || y == "style" } + -- -- File handling -- @@ -501,3 +521,5 @@ safeRead s = case reads s of (d,x):_ | all isSpace x -> return d _ -> fail $ "Could not read `" ++ s ++ "'" + + -- cgit v1.2.3