summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-07-14 10:20:09 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-07-14 10:24:39 -0700
commit9cdfd4f6491cbf9e3002e88be8be9ebeb48ba2bb (patch)
treed666e66bb87a2932d164f05d5d297629073d006f /src/Text/Pandoc
parentdce6a7388a5f546ebbfd192febcb1753b743b377 (diff)
Improved bare autolink detection.
Previously we disallowed `-` at the end of an autolink, and disallowed the combination `=-`. This commit liberalizes the rules for allowing punctuation in a bare URI. Added test cases. One potential drawback is that you can no longer put a bare URI in em dashes like this this uri---http://example.com---is an example. But in this respect we now match github's treatment of bare URIs. Closes #2299.
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Parsing.hs5
1 files changed, 2 insertions, 3 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 82e7e2c33..5dc991be2 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -448,13 +448,12 @@ uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
uri = try $ do
scheme <- uriScheme
char ':'
- -- We allow punctuation except at the end, since
+ -- We allow sentence punctuation except at the end, since
-- we don't want the trailing '.' in 'http://google.com.' We want to allow
-- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation)
-- as a URL, while NOT picking up the closing paren in
-- (http://wikipedia.org). So we include balanced parens in the URL.
- let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' ||
- not (isAscii c)
+ let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-"
let wordChar = satisfy isWordChar
let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit)
let entity = () <$ characterReference