summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Main.hs65
-rw-r--r--README8
-rw-r--r--Text/Pandoc/Shared.hs73
-rw-r--r--Text/Pandoc/Writers/HTML.hs16
-rw-r--r--man/man1/pandoc.1.md8
5 files changed, 110 insertions, 60 deletions
diff --git a/Main.hs b/Main.hs
index 26057ff58..ffd71c635 100644
--- a/Main.hs
+++ b/Main.hs
@@ -32,7 +32,7 @@ writers.
module Main where
import Text.Pandoc
import Text.Pandoc.ODT
-import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy )
+import Text.Pandoc.Shared ( HTMLMathMethod (..), splitBy, ObfuscationMethod (..) )
import Text.Pandoc.Highlighting ( languages )
import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
@@ -156,6 +156,7 @@ data Opt = Opt
, optWrapText :: Bool -- ^ Wrap text
, optSanitizeHTML :: Bool -- ^ Sanitize HTML
, optPlugins :: [Pandoc -> IO Pandoc] -- ^ Plugins to apply
+ , optEmailObfuscation :: ObfuscationMethod
#ifdef _CITEPROC
, optBiblioFile :: String
, optBiblioFormat :: String
@@ -191,6 +192,7 @@ defaultOpts = Opt
, optWrapText = True
, optSanitizeHTML = False
, optPlugins = []
+ , optEmailObfuscation = JavascriptObfuscation
#ifdef _CITEPROC
, optBiblioFile = []
, optBiblioFormat = []
@@ -301,6 +303,19 @@ options =
(\opt -> return opt { optSanitizeHTML = True }))
"" -- "Sanitize HTML"
+ , Option "" ["email-obfuscation"]
+ (ReqArg
+ (\arg opt -> do
+ method <- case arg of
+ "references" -> return ReferenceObfuscation
+ "javascript" -> return JavascriptObfuscation
+ "none" -> return NoObfuscation
+ _ -> hPutStrLn stderr ("Error: Unknown obfuscation method: " ++ arg) >>
+ exitWith (ExitFailure 6)
+ return opt { optEmailObfuscation = method })
+ "none|javascript|references")
+ "" -- "Method for obfuscating email in HTML"
+
, Option "" ["toc", "table-of-contents"]
(NoArg
(\opt -> return opt { optTableOfContents = True }))
@@ -533,10 +548,11 @@ main = do
, optWrapText = wrap
, optSanitizeHTML = sanitize
, optPlugins = plugins
+ , optEmailObfuscation = obfuscationMethod
#ifdef _CITEPROC
- , optBiblioFile = biblioFile
- , optBiblioFormat = biblioFormat
- , optCslFile = cslFile
+ , optBiblioFile = biblioFile
+ , optBiblioFormat = biblioFormat
+ , optCslFile = cslFile
#endif
} = opts
@@ -614,25 +630,28 @@ main = do
let header = (if (customHeader == "DEFAULT")
then defaultHeader
else customHeader) ++ csslink ++ includeHeader
- let writerOptions = WriterOptions { writerStandalone = standalone',
- writerHeader = header,
- writerTitlePrefix = titlePrefix,
- writerTabStop = tabStop,
- writerTableOfContents = toc &&
- (not strict) &&
- writerName' /= "s5",
- writerHTMLMathMethod = mathMethod,
- writerS5 = (writerName' == "s5"),
- writerIgnoreNotes = False,
- writerIncremental = incremental,
- writerNumberSections = numberSections,
- writerIncludeBefore = includeBefore,
- writerIncludeAfter = includeAfter,
- writerStrictMarkdown = strict,
- writerReferenceLinks = referenceLinks,
- writerWrapText = wrap,
- writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
- lhsExtension [outputFile] }
+ let writerOptions = WriterOptions { writerStandalone = standalone',
+ writerHeader = header,
+ writerTitlePrefix = titlePrefix,
+ writerTabStop = tabStop,
+ writerTableOfContents = toc &&
+ (not strict) &&
+ writerName' /= "s5",
+ writerHTMLMathMethod = mathMethod,
+ writerS5 = (writerName' == "s5"),
+ writerIgnoreNotes = False,
+ writerIncremental = incremental,
+ writerNumberSections = numberSections,
+ writerIncludeBefore = includeBefore,
+ writerIncludeAfter = includeAfter,
+ writerStrictMarkdown = strict,
+ writerReferenceLinks = referenceLinks,
+ writerWrapText = wrap,
+ writerLiterateHaskell = "+lhs" `isSuffixOf` writerName' ||
+ lhsExtension [outputFile],
+ writerEmailObfuscation = if strict
+ then ReferenceObfuscation
+ else obfuscationMethod }
if isNonTextOutput writerName' && outputFile == "-"
then do hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++
diff --git a/README b/README
index 4f43b0fc2..d2bc5ce65 100644
--- a/README
+++ b/README
@@ -383,6 +383,14 @@ For further documentation, see the `pandoc(1)` man page.
are omitted. URIs in links and images are also checked against a
whitelist of URI schemes.
+`--email-obfuscation`*=none|javascript|references*
+: specifies a method for obfuscating `mailto:` links in HTML documents.
+ *none* leaves `mailto:` links as they are. *javascript* obfuscates
+ them using javascript. *references* obfuscates them by printing their
+ letters as decimal or hexadecimal character references. If `--strict`
+ is specified, *references* is used regardless of the presence
+ of this option.
+
`--dump-args`
: is intended to make it easier to create wrapper scripts that use
Pandoc. It causes Pandoc to dump information about the arguments
diff --git a/Text/Pandoc/Shared.hs b/Text/Pandoc/Shared.hs
index c607a4e5b..6854e5ae6 100644
--- a/Text/Pandoc/Shared.hs
+++ b/Text/Pandoc/Shared.hs
@@ -99,6 +99,7 @@ module Text.Pandoc.Shared (
isHeaderBlock,
-- * Writer options
HTMLMathMethod (..),
+ ObfuscationMethod (..),
WriterOptions (..),
defaultWriterOptions,
-- * File handling
@@ -889,45 +890,53 @@ data HTMLMathMethod = PlainMath
| MimeTeX String -- url of mimetex.cgi
deriving (Show, Read, Eq)
+-- | Methods for obfuscating email addresses in HTML.
+data ObfuscationMethod = NoObfuscation
+ | ReferenceObfuscation
+ | JavascriptObfuscation
+ deriving (Show, Read, Eq)
+
-- | Options for writers
data WriterOptions = WriterOptions
- { writerStandalone :: Bool -- ^ Include header and footer
- , writerHeader :: String -- ^ Header for the document
- , writerTitlePrefix :: String -- ^ Prefix for HTML titles
- , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
- , writerTableOfContents :: Bool -- ^ Include table of contents
- , writerS5 :: Bool -- ^ We're writing S5
- , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
- , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
- , writerIncremental :: Bool -- ^ Incremental S5 lists
- , writerNumberSections :: Bool -- ^ Number sections in LaTeX
- , writerIncludeBefore :: String -- ^ String to include before the body
- , writerIncludeAfter :: String -- ^ String to include after the body
- , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
- , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
- , writerWrapText :: Bool -- ^ Wrap text to line length
- , writerLiterateHaskell :: Bool -- ^ Write as literate haskell
+ { writerStandalone :: Bool -- ^ Include header and footer
+ , writerHeader :: String -- ^ Header for the document
+ , writerTitlePrefix :: String -- ^ Prefix for HTML titles
+ , writerTabStop :: Int -- ^ Tabstop for conversion btw spaces and tabs
+ , writerTableOfContents :: Bool -- ^ Include table of contents
+ , writerS5 :: Bool -- ^ We're writing S5
+ , writerHTMLMathMethod :: HTMLMathMethod -- ^ How to print math in HTML
+ , writerIgnoreNotes :: Bool -- ^ Ignore footnotes (used in making toc)
+ , writerIncremental :: Bool -- ^ Incremental S5 lists
+ , writerNumberSections :: Bool -- ^ Number sections in LaTeX
+ , writerIncludeBefore :: String -- ^ String to include before the body
+ , writerIncludeAfter :: String -- ^ String to include after the body
+ , writerStrictMarkdown :: Bool -- ^ Use strict markdown syntax
+ , writerReferenceLinks :: Bool -- ^ Use reference links in writing markdown, rst
+ , writerWrapText :: Bool -- ^ Wrap text to line length
+ , writerLiterateHaskell :: Bool -- ^ Write as literate haskell
+ , writerEmailObfuscation :: ObfuscationMethod -- ^ How to obfuscate emails
} deriving Show
-- | Default writer options.
defaultWriterOptions :: WriterOptions
defaultWriterOptions =
- WriterOptions { writerStandalone = False
- , writerHeader = ""
- , writerTitlePrefix = ""
- , writerTabStop = 4
- , writerTableOfContents = False
- , writerS5 = False
- , writerHTMLMathMethod = PlainMath
- , writerIgnoreNotes = False
- , writerIncremental = False
- , writerNumberSections = False
- , writerIncludeBefore = ""
- , writerIncludeAfter = ""
- , writerStrictMarkdown = False
- , writerReferenceLinks = False
- , writerWrapText = True
- , writerLiterateHaskell = False
+ WriterOptions { writerStandalone = False
+ , writerHeader = ""
+ , writerTitlePrefix = ""
+ , writerTabStop = 4
+ , writerTableOfContents = False
+ , writerS5 = False
+ , writerHTMLMathMethod = PlainMath
+ , writerIgnoreNotes = False
+ , writerIncremental = False
+ , writerNumberSections = False
+ , writerIncludeBefore = ""
+ , writerIncludeAfter = ""
+ , writerStrictMarkdown = False
+ , writerReferenceLinks = False
+ , writerWrapText = True
+ , writerLiterateHaskell = False
+ , writerEmailObfuscation = JavascriptObfuscation
}
--
diff --git a/Text/Pandoc/Writers/HTML.hs b/Text/Pandoc/Writers/HTML.hs
index 087fff623..fb7320e92 100644
--- a/Text/Pandoc/Writers/HTML.hs
+++ b/Text/Pandoc/Writers/HTML.hs
@@ -193,10 +193,13 @@ parseMailto ('m':'a':'i':'l':'t':'o':':':addr) =
in Just (name', domain)
parseMailto _ = Nothing
--- | Obfuscate a "mailto:" link using Javascript.
+-- | Obfuscate a "mailto:" link.
obfuscateLink :: WriterOptions -> String -> String -> Html
+obfuscateLink opts txt s | writerEmailObfuscation opts == NoObfuscation =
+ anchor ! [href s] << txt
obfuscateLink opts txt s =
- let s' = map toLower s
+ let meth = writerEmailObfuscation opts
+ s' = map toLower s
in case parseMailto s' of
(Just (name', domain)) ->
let domain' = substitute "." " dot " domain
@@ -206,17 +209,20 @@ obfuscateLink opts txt s =
then ("'<code>'+e+'</code>'", name' ++ " at " ++ domain')
else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++
domain' ++ ")")
- in if writerStrictMarkdown opts
- then -- need to use primHtml or &'s are escaped to &amp; in URL
+ in case meth of
+ ReferenceObfuscation ->
+ -- need to use primHtml or &'s are escaped to &amp; in URL
primHtml $ "<a href=\"" ++ (obfuscateString s')
++ "\">" ++ (obfuscateString txt) ++ "</a>"
- else (script ! [thetype "text/javascript"] $
+ JavascriptObfuscation ->
+ (script ! [thetype "text/javascript"] $
primHtml ("\n<!--\nh='" ++
obfuscateString domain ++ "';a='" ++ at' ++ "';n='" ++
obfuscateString name' ++ "';e=n+a+h;\n" ++
"document.write('<a h'+'ref'+'=\"ma'+'ilto'+':'+e+'\">'+" ++
linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++
noscript (primHtml $ obfuscateString altText)
+ _ -> error $ "Unknown obfuscation method: " ++ show meth
_ -> anchor ! [href s] $ primHtml txt -- malformed email
-- | Obfuscate character as entity.
diff --git a/man/man1/pandoc.1.md b/man/man1/pandoc.1.md
index a418e608b..467a62b0a 100644
--- a/man/man1/pandoc.1.md
+++ b/man/man1/pandoc.1.md
@@ -146,6 +146,14 @@ to Pandoc. Or use `html2markdown`(1), a wrapper around `pandoc`.
are omitted. URIs in links and images are also checked against a
whitelist of URI schemes.
+\--email-obfuscation=*none|javascript|references*
+: Specify a method for obfuscating `mailto:` links in HTML documents.
+ *none* leaves `mailto:` links as they are. *javascript* obfuscates
+ them using javascript. *references* obfuscates them by printing their
+ letters as decimal or hexadecimal character references.
+ If `--strict` is specified, *references* is used regardless of the
+ presence of this option.
+
\--toc, \--table-of-contents
: Include an automatically generated table of contents (HTML, markdown,
RTF) or an instruction to create one (LaTeX, reStructuredText).