From 1e8a25ad69db8fa372ac4ddbb5ac05ffb00ed052 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 11 Oct 2015 17:06:26 -0700 Subject: Percent-encode more special characters in URLs. HTML, LaTeX writers adjusted. The special characters are '<','>','|','"','{','}','[',']','^', '`'. Closes #1640, #2377. --- src/Text/Pandoc/Shared.hs | 7 +++++-- src/Text/Pandoc/Writers/LaTeX.hs | 10 +++++----- tests/Tests/Readers/Markdown.hs | 6 ++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 1fe3db5f7..1e06aa383 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -288,9 +288,12 @@ toRomanNumeral x = _ | x >= 1 -> "I" ++ toRomanNumeral (x - 1) _ -> "" --- | Escape whitespace in URI. +-- | Escape whitespace and some punctuation characters in URI. escapeURI :: String -> String -escapeURI = escapeURIString (not . isSpace) +escapeURI = escapeURIString (not . needsEscaping) + where needsEscaping c = isSpace c || c `elem` + ['<','>','|','"','{','}','[',']','^', '`'] + -- | Convert tabs to spaces and filter out DOS line endings. -- Tabs will be preserved if tab stop is set to 0. diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index f424d8d4a..0caa80795 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -242,7 +242,7 @@ stringToLaTeX ctx (x:xs) = do '^' -> "\\^{}" ++ rest '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows | otherwise -> "\\textbackslash{}" ++ rest - '|' -> "\\textbar{}" ++ rest + '|' | not isUrl -> "\\textbar{}" ++ rest '<' -> "\\textless{}" ++ rest '>' -> "\\textgreater{}" ++ rest '[' -> "{[}" ++ rest -- to avoid interpretation as @@ -848,17 +848,17 @@ inlineToLaTeX (Link txt (src, _)) = case txt of [Str x] | escapeURI x == src -> -- autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text $ "\\url{" ++ src' ++ "}" [Str x] | Just rest <- stripPrefix "mailto:" src, escapeURI x == rest -> -- email autolink do modify $ \s -> s{ stUrl = True } - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) contents <- inlineListToLaTeX txt return $ "\\href" <> braces (text src') <> braces ("\\nolinkurl" <> braces contents) _ -> do contents <- inlineListToLaTeX txt - src' <- stringToLaTeX URLString src + src' <- stringToLaTeX URLString (escapeURI src) return $ text ("\\href{" ++ src' ++ "}{") <> contents <> char '}' inlineToLaTeX (Image _ (source, _)) = do @@ -866,7 +866,7 @@ inlineToLaTeX (Image _ (source, _)) = do let source' = if isURI source then source else unEscapeString source - source'' <- stringToLaTeX URLString source' + source'' <- stringToLaTeX URLString (escapeURI source') inHeading <- gets stInHeading return $ (if inHeading then "\\protect\\includegraphics" else "\\includegraphics") diff --git a/tests/Tests/Readers/Markdown.hs b/tests/Tests/Readers/Markdown.hs index 5b39ae7e2..c24d2293f 100644 --- a/tests/Tests/Readers/Markdown.hs +++ b/tests/Tests/Readers/Markdown.hs @@ -66,9 +66,11 @@ bareLinkTests = , ("http://en.wikipedia.org/wiki/Sprite_(computer_graphics)", autolink "http://en.wikipedia.org/wiki/Sprite_(computer_graphics)") , ("http://en.wikipedia.org/wiki/Sprite_[computer_graphics]", - autolink "http://en.wikipedia.org/wiki/Sprite_[computer_graphics]") + link "http://en.wikipedia.org/wiki/Sprite_%5Bcomputer_graphics%5D" "" + (str "http://en.wikipedia.org/wiki/Sprite_[computer_graphics]")) , ("http://en.wikipedia.org/wiki/Sprite_{computer_graphics}", - autolink "http://en.wikipedia.org/wiki/Sprite_{computer_graphics}") + link "http://en.wikipedia.org/wiki/Sprite_%7Bcomputer_graphics%7D" "" + (str "http://en.wikipedia.org/wiki/Sprite_{computer_graphics}")) , ("http://example.com/Notification_Center-GitHub-20101108-140050.jpg", autolink "http://example.com/Notification_Center-GitHub-20101108-140050.jpg") , ("https://github.com/github/hubot/blob/master/scripts/cream.js#L20-20", -- cgit v1.2.3