summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/HTML.hs
diff options
context:
space:
mode:
authorbucklereed <horridimpfoobarbaz@chammy.info>2017-08-09 17:10:12 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2017-08-09 09:10:12 -0700
commitdb55f7c1b243cbc82c70276c7dfb9c0403e369b0 (patch)
tree0a4621dc208124353217f4f46cf3e78ac731c28f /src/Text/Pandoc/Readers/HTML.hs
parentcfa597fc2ac2a6dceb0b3387a2ee885ec75bc7d1 (diff)
HTML reader: parse <main> like <div role=main>. (#3791)
* HTML reader: parse <main> like <div role=main>. * <main> closes <p> and behaves like a block element generally
Diffstat (limited to 'src/Text/Pandoc/Readers/HTML.hs')
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs18
1 files changed, 11 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 3a0d6eb14..7b9ab38fd 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -54,7 +54,7 @@ import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
import qualified Data.Map as M
import Data.Foldable ( for_ )
-import Data.Maybe ( fromMaybe, isJust)
+import Data.Maybe ( fromMaybe, isJust, isNothing )
import Data.List ( intercalate, isPrefixOf )
import Data.Char ( isDigit, isLetter, isAlphaNum )
import Control.Monad ( guard, mzero, void, unless )
@@ -377,6 +377,7 @@ pDiv = try $ do
guardEnabled Ext_native_divs
let isDivLike "div" = True
isDivLike "section" = True
+ isDivLike "main" = True
isDivLike _ = False
TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True)
let attr = toStringAttr attr'
@@ -385,7 +386,10 @@ pDiv = try $ do
let classes' = if tag == "section"
then "section":classes
else classes
- return $ B.divWith (ident, classes', kvs) contents
+ kvs' = if tag == "main" && isNothing (lookup "role" kvs)
+ then ("role", "main"):kvs
+ else kvs
+ return $ B.divWith (ident, classes', kvs') contents
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
@@ -940,7 +944,7 @@ blockHtmlTags = Set.fromList
"dir", "div", "dl", "dt", "fieldset", "figcaption", "figure",
"footer", "form", "h1", "h2", "h3", "h4",
"h5", "h6", "head", "header", "hgroup", "hr", "html",
- "isindex", "menu", "noframes", "ol", "output", "p", "pre",
+ "isindex", "main", "menu", "noframes", "ol", "output", "p", "pre",
"section", "table", "tbody", "textarea",
"thead", "tfoot", "ul", "dd",
"dt", "frameset", "li", "tbody", "td", "tfoot",
@@ -1022,10 +1026,10 @@ _ `closes` "html" = False
"optgroup" `closes` "optgroup" = True
"optgroup" `closes` "option" = True
"option" `closes` "option" = True
--- http://www.w3.org/TR/html-markup/p.html
+-- https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
x `closes` "p" | x `elem` ["address", "article", "aside", "blockquote",
"dir", "div", "dl", "fieldset", "footer", "form", "h1", "h2", "h3", "h4",
- "h5", "h6", "header", "hr", "menu", "nav", "ol", "p", "pre", "section",
+ "h5", "h6", "header", "hr", "main", "menu", "nav", "ol", "p", "pre", "section",
"table", "ul"] = True
"meta" `closes` "meta" = True
"form" `closes` "form" = True
@@ -1038,8 +1042,8 @@ t `closes` "select" | t /= "option" = True
"tfoot" `closes` t | t `elem` ["thead","colgroup"] = True
"tbody" `closes` t | t `elem` ["tbody","tfoot","thead","colgroup"] = True
t `closes` t2 |
- t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","p"] &&
- t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div"
+ t `elem` ["h1","h2","h3","h4","h5","h6","dl","ol","ul","table","div","main","p"] &&
+ t2 `elem` ["h1","h2","h3","h4","h5","h6","p" ] = True -- not "div" or "main"
t1 `closes` t2 |
t1 `Set.member` blockTags &&
t2 `Set.notMember` blockTags &&