summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-03-03 18:19:31 +0000
committerfiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b>2007-03-03 18:19:31 +0000
commit31c030e3a55c3bc9d4a606e9658dd30a9aa1c938 (patch)
tree74d8041eb6ca26258a74164e29363342fad133bf
parentf99cedd236f06544b5937d5d362ada3059a5ca4b (diff)
Added --inline-links option to force links in HTML to be parsed
as inline links, rather than reference links. (Addresses Issue #4.) git-svn-id: https://pandoc.googlecode.com/svn/trunk@554 788f1e2b-df1e-0410-8736-df70ead52e1b
-rw-r--r--README3
-rw-r--r--man/man1/html2markdown.12
-rw-r--r--man/man1/pandoc.13
-rw-r--r--src/Main.hs21
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs9
-rw-r--r--src/Text/Pandoc/Shared.hs2
6 files changed, 31 insertions, 9 deletions
diff --git a/README b/README
index 8abdad6a9..4faa7502e 100644
--- a/README
+++ b/README
@@ -241,6 +241,9 @@ preserved, rather than converted to spaces (the default).
`--strict` specifies that strict markdown syntax is to be used, without
pandoc's usual extensions and variants (described below).
+`--inline-links` causes links in HTML to be parsed as inline links, rather
+than reference links.
+
`-R` or `--parse-raw` causes the HTML and LaTeX readers to parse HTML
codes and LaTeX environments that it can't translate as raw HTML or
LaTeX. Raw HTML can be printed in markdown, reStructuredText, HTML,
diff --git a/man/man1/html2markdown.1 b/man/man1/html2markdown.1
index 78c27808e..3ffda7a97 100644
--- a/man/man1/html2markdown.1
+++ b/man/man1/html2markdown.1
@@ -32,6 +32,8 @@ Write output to \fIFILE\fR instead of STDOUT.
.B \-\-strict
Use strict markdown syntax, with no extensions or variants.
.TP
+.B \-\-inline\-links
+Parse links in HTML as inline links, rather than reference links.
.TP
.B \-R, \-\-parse-raw
Parse untranslatable HTML codes as raw HTML.
diff --git a/man/man1/pandoc.1 b/man/man1/pandoc.1
index 8c3c62e15..8a0012a3d 100644
--- a/man/man1/pandoc.1
+++ b/man/man1/pandoc.1
@@ -105,6 +105,9 @@ Specify tab stop (default is 4).
.B \-\-strict
Use strict markdown syntax, with no extensions or variants.
.TP
+.B \-\-inline\-links
+Parse links in HTML as inline links, rather than reference links.
+.TP
.B \-R, \-\-parse-raw
Parse untranslatable HTML codes and LaTeX environments as raw HTML
or LaTeX, instead of ignoring them.
diff --git a/src/Main.hs b/src/Main.hs
index 275a89f0d..0bb246fa5 100644
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -118,6 +118,7 @@ data Opt = Opt
, optDumpArgs :: Bool -- ^ Output command-line arguments
, optIgnoreArgs :: Bool -- ^ Ignore command-line arguments
, optStrict :: Bool -- ^ Use strict markdown syntax
+ , optInlineLinks :: Bool -- ^ Use inline links in parsing HTML
}
-- | Defaults for command-line options.
@@ -143,6 +144,7 @@ defaultOpts = Opt
, optDumpArgs = False
, optIgnoreArgs = False
, optStrict = False
+ , optInlineLinks = False
}
-- | A list of functions, each transforming the options data structure
@@ -188,6 +190,11 @@ options =
(\opt -> return opt { optStrict = True } ))
"" -- "Use strict markdown syntax with no extensions"
+ , Option "" ["inline-links"]
+ (NoArg
+ (\opt -> return opt { optInlineLinks = True } ))
+ "" -- "Use inline links in parsing HTML"
+
, Option "R" ["parse-raw"]
(NoArg
(\opt -> return opt { optParseRaw = True }))
@@ -398,6 +405,7 @@ main = do
, optDumpArgs = dumpArgs
, optIgnoreArgs = ignoreArgs
, optStrict = strict
+ , optInlineLinks = inlineLinks
} = opts
if dumpArgs
@@ -440,12 +448,13 @@ main = do
let removeCRs str = filter (/= '\r') str -- remove DOS-style line endings
let filter = tabFilter . addBlank . removeCRs
let startParserState =
- defaultParserState { stateParseRaw = parseRaw,
- stateTabStop = tabStop,
- stateStandalone = standalone && (not strict),
- stateSmart = smart || writerName' == "latex",
- stateColumns = columns,
- stateStrict = strict }
+ defaultParserState { stateParseRaw = parseRaw,
+ stateTabStop = tabStop,
+ stateStandalone = standalone && (not strict),
+ stateSmart = smart || writerName' == "latex",
+ stateColumns = columns,
+ stateStrict = strict,
+ stateInlineLinks = inlineLinks }
let csslink = if (css == "")
then ""
else "<link rel=\"stylesheet\" href=\"" ++ css ++
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 3fcb33698..8ebf02c33 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -449,15 +449,18 @@ extractAttribute name ((attrName, contents):rest) =
then Just (decodeEntities contents)
else extractAttribute name rest
-link = try (do
+link = try $ do
(tag, attributes) <- htmlTag "a"
url <- case (extractAttribute "href" attributes) of
Just url -> do {return url}
Nothing -> fail "no href"
let title = fromMaybe "" (extractAttribute "title" attributes)
label <- inlinesTilEnd "a"
- ref <- generateReference url title
- return (Link (normalizeSpaces label) ref))
+ state <- getState
+ ref <- if stateInlineLinks state
+ then return (Src url title)
+ else generateReference url title
+ return $ Link (normalizeSpaces label) ref
image = try (do
(tag, attributes) <- htmlTag "img"
diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs
index 87718030f..55e093ee7 100644
--- a/src/Text/Pandoc/Shared.hs
+++ b/src/Text/Pandoc/Shared.hs
@@ -133,6 +133,7 @@ data ParserState = ParserState
stateSmart :: Bool, -- ^ Use smart typography
stateColumns :: Int, -- ^ Number of columns in
-- terminal (used for tables)
+ stateInlineLinks :: Bool, -- ^ Parse html links as inline
stateHeaderTable :: [HeaderType] -- ^ List of header types used,
-- in what order (rst only)
}
@@ -155,6 +156,7 @@ defaultParserState =
stateStrict = False,
stateSmart = False,
stateColumns = 80,
+ stateInlineLinks = False,
stateHeaderTable = [] }
-- | Indent string as a block.