summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2012-08-15 09:42:16 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2012-08-15 09:42:16 -0700
commita6f2b960844caa81d9c8dd4d18f94c3de50bdb49 (patch)
tree870db6b2655d372fddffdd87f5d8f848e43812cd /src
parent3745706fa2b2ada40126e59d3fa354f3f72efee9 (diff)
Moved renderTags' from HTML reader & SelfContained to Shared.
Improved removal of markdown="1" attribute in Markdow reader.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs14
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs12
-rw-r--r--src/Text/Pandoc/SelfContained.hs13
-rw-r--r--src/Text/Pandoc/Shared.hs22
4 files changed, 33 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 33846286d..e5c310ffc 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -45,7 +45,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Data.Maybe ( fromMaybe, isJust )
import Data.List ( intercalate )
-import Data.Char ( isDigit, toLower )
+import Data.Char ( isDigit )
import Control.Monad ( liftM, guard, when, mzero )
isSpace :: Char -> Bool
@@ -95,18 +95,6 @@ block = choice
, pRawHtmlBlock
]
--- repeated in SelfContained -- consolidate eventually
-renderTags' :: [Tag String] -> String
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = \x ->
- let y = map toLower x
- in y == "hr" || y == "br" ||
- y == "img" || y == "meta" ||
- y == "link"
- , optRawTag = \x ->
- let y = map toLower x
- in y == "script" || y == "style" }
-
pList :: TagParser [Block]
pList = pBulletList <|> pOrderedList <|> pDefinitionList
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index fe9be439e..b0925ac68 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -776,11 +776,10 @@ rawHtmlBlocks = do
if "markdown" `notElem`
map fst as
then mzero
- else return $ substitute
- " markdown=\"1\"" "" raw
+ else return $
+ stripMarkdownAttribute raw
| otherwise -> return raw
_ -> return raw )
- -- TODO remove markdown="1" attribute from raw tags
sps <- do sp1 <- many spaceChar
sp2 <- option "" (blankline >> return "\n")
sp3 <- many spaceChar
@@ -793,6 +792,13 @@ rawHtmlBlocks = do
let combined = concat htmlBlocks
return $ if last combined == '\n' then init combined else combined
+-- remove markdown="1" attribute
+stripMarkdownAttribute :: String -> String
+stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
+ where filterAttrib (TagOpen t as) = TagOpen t
+ [(k,v) | (k,v) <- as, k /= "markdown"]
+ filterAttrib x = x
+
--
-- Tables
--
diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs
index 675b8366e..7a21f6f3a 100644
--- a/src/Text/Pandoc/SelfContained.hs
+++ b/src/Text/Pandoc/SelfContained.hs
@@ -42,7 +42,7 @@ import System.FilePath (takeExtension, dropExtension, takeDirectory, (</>))
import Data.Char (toLower, isAscii, isAlphaNum)
import Codec.Compression.GZip as Gzip
import qualified Data.ByteString.Lazy as L
-import Text.Pandoc.Shared (findDataFile)
+import Text.Pandoc.Shared (findDataFile, renderTags')
import Text.Pandoc.MIME (getMimeType)
import System.Directory (doesFileExist)
@@ -163,14 +163,3 @@ makeSelfContained userdata inp = do
out' <- mapM (convertTag userdata) tags
return $ renderTags' out'
--- repeated from HTML reader:
-renderTags' :: [Tag String] -> String
-renderTags' = renderTagsOptions
- renderOptions{ optMinimize = \x ->
- let y = map toLower x
- in y == "hr" || y == "br" ||
- y == "img" || y == "meta" ||
- y == "link"
- , optRawTag = \x ->
- let y = map toLower x
- in y == "script" || y == "style" }
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index ad28b7c23..d86f9a390 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -59,6 +59,8 @@ module Text.Pandoc.Shared (
uniqueIdent,
isHeaderBlock,
headerShift,
+ -- * TagSoup HTML handling
+ renderTags',
-- * File handling
inDirectory,
findDataFile,
@@ -89,6 +91,8 @@ import Text.Pandoc.Pretty (charWidth)
import System.Locale (defaultTimeLocale)
import Data.Time
import System.IO (stderr)
+import Text.HTML.TagSoup (renderTagsOptions, RenderOptions(..), Tag(..),
+ renderOptions)
--
-- List processing
@@ -449,6 +453,22 @@ headerShift n = bottomUp shift
shift x = x
--
+-- TagSoup HTML handling
+--
+
+-- | Render HTML tags.
+renderTags' :: [Tag String] -> String
+renderTags' = renderTagsOptions
+ renderOptions{ optMinimize = \x ->
+ let y = map toLower x
+ in y == "hr" || y == "br" ||
+ y == "img" || y == "meta" ||
+ y == "link"
+ , optRawTag = \x ->
+ let y = map toLower x
+ in y == "script" || y == "style" }
+
+--
-- File handling
--
@@ -501,3 +521,5 @@ safeRead s = case reads s of
(d,x):_
| all isSpace x -> return d
_ -> fail $ "Could not read `" ++ s ++ "'"
+
+