summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-07-10 10:28:39 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-07-10 10:33:27 -0700
commit99fe8594d94573b8ba8ec1d1e47b57444de4e4cb (patch)
treeaceaded36d6dd830095ef5b759373b8292e9abad
parent5f730ee804be35d9ff52adcb929e8b8d7422bfea (diff)
Avoid parsing partial URLs as HTML tags.
Closes #2277.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs9
-rw-r--r--tests/Tests/Readers/Markdown.hs3
2 files changed, 11 insertions, 1 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index f2f97dbc4..361d64361 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -911,8 +911,15 @@ htmlTag :: Monad m
htmlTag f = try $ do
lookAhead (char '<')
inp <- getInput
- let (next : _) = canonicalizeTags $ parseTags inp
+ let hasTagWarning (TagWarning _:_) = True
+ hasTagWarning _ = False
+ let (next : rest) = canonicalizeTags $ parseTagsOptions
+ parseOptions{ optTagWarning = True } inp
guard $ f next
+ -- we get a TagWarning on things like
+ -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>
+ -- which should NOT be parsed as an HTML tag, see #2277
+ guard $ not $ hasTagWarning rest
case next of
TagComment s
| "<!--" `isPrefixOf` inp -> do
diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs
index ed79f9e3d..e08ac1607 100644
--- a/tests/Tests/Readers/Markdown.hs
+++ b/tests/Tests/Readers/Markdown.hs
@@ -195,6 +195,9 @@ tests = [ testGroup "inline code"
[ "with unicode dash following" =:
"<http://foo.bar>\8212" =?> para (autolink "http://foo.bar" <>
str "\8212")
+ , "a partial URL (#2277)" =:
+ "<www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>" =?>
+ para (text "<www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66>")
]
, testGroup "Headers"
[ "blank line before header" =: