summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2010-12-22 19:20:27 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2010-12-22 19:20:27 -0800
commitc08ca6fa6d58c6a52c93b126d1a704b8202f9a36 (patch)
tree894046c25c30218b5c808a1b2f6251953fdb07dd
parent4bfe140ed1cc7a07d762e4920e7365427d0b9618 (diff)
HTML reader: Simplified parsing of <script> sections.
I had previously assumed that we needed to ignore </script> occuring in a string literal or javascript comment. It turns out, though, that browsers aren't that smart.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs25
1 files changed, 1 insertions, 24 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 1bbb11e62..c25a73418 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -288,32 +288,9 @@ htmlScript :: GenParser Char ParserState [Char]
htmlScript = try $ do
lookAhead $ htmlOpenTag "script"
open <- anyHtmlTag
- rest <- liftM concat $ manyTill scriptChunk (htmlEndTag "script")
+ rest <- manyTill anyChar (htmlEndTag "script")
return $ open ++ rest ++ "</script>"
-scriptChunk :: GenParser Char ParserState [Char]
-scriptChunk = jsComment <|> jsString <|> jsChars
- where jsComment = jsEndlineComment <|> jsMultilineComment
- jsString = jsSingleQuoteString <|> jsDoubleQuoteString
- jsChars = many1 (noneOf "<\"'*/") <|> count 1 anyChar
- jsEndlineComment = try $ do
- string "//"
- res <- manyTill anyChar newline
- return ("//" ++ res)
- jsMultilineComment = try $ do
- string "/*"
- res <- manyTill anyChar (try $ string "*/")
- return ("/*" ++ res ++ "*/")
- jsSingleQuoteString = stringwith '\''
- jsDoubleQuoteString = stringwith '"'
- charWithEsc escapable = try $
- (try $ char '\\' >> oneOf ('\\':escapable) >>= \x -> return ['\\',x])
- <|> count 1 anyChar
- stringwith c = try $ do
- char c
- res <- liftM concat $ manyTill (charWithEsc [c]) (char c)
- return (c : (res ++ [c]))
-
-- | Parses material between style tags.
-- Style tags must be treated differently, because they can contain CSS
htmlStyle :: GenParser Char ParserState [Char]