From 50f0cfcc1a96a418b5da9539f80499758ac207c7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 13 Nov 2016 22:41:11 +0100 Subject: HTML reader: only treat "a" element as link if it has href. Otherwise treat as span. Closes #3226. --- src/Text/Pandoc/Readers/HTML.hs | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 10a2976e5..e2fc97fbf 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -599,19 +599,31 @@ pLineBreak = do pSelfClosing (=="br") (const True) return B.linebreak +-- Unlike fromAttrib from tagsoup, this distinguishes +-- between a missing attribute and an attribute with empty content. +maybeFromAttrib :: String -> Tag String -> Maybe String +maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs +maybeFromAttrib _ _ = Nothing + pLink :: TagParser Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - mbBaseHref <- baseHref <$> getState - let url' = fromAttrib "href" tag - let url = case (parseURIReference url', mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) - _ -> url' let title = fromAttrib "title" tag - let uid = fromAttrib "id" tag + -- take id from id attribute if present, otherwise name + let uid = maybe (fromAttrib "name" tag) id $ maybeFromAttrib "id" tag let cls = words $ fromAttrib "class" tag lab <- trimInlines . mconcat <$> manyTill inline (pCloses "a") - return $ B.linkWith (uid, cls, []) (escapeURI url) title lab + -- check for href; if href, then a link, otherwise a span + case maybeFromAttrib "href" tag of + Nothing -> + return $ B.spanWith (uid, cls, []) lab + Just url' -> do + mbBaseHref <- baseHref <$> getState + let url = case (parseURIReference url', mbBaseHref) of + (Just rel, Just bs) -> + show (rel `nonStrictRelativeTo` bs) + _ -> url' + return $ B.linkWith (uid, cls, []) (escapeURI url) title lab pImage :: TagParser Inlines pImage = do -- cgit v1.2.3