summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-11-26 22:28:28 +0100
committerJohn MacFarlane <jgm@berkeley.edu>2016-11-26 22:28:28 +0100
commit5222572033e12948de2786122532f3c589145fe1 (patch)
treeeacda0eeba588d36d4011f23fb6732f2c5ad2709
parent7b4a12a532d02d9ea0069d73f118537f045bd42f (diff)
HTML reader: improved table parsing.
We now check explicitly for non-1 rowspan or colspan attributes, and fail when we encounter them. Previously we checked that each row had the same number of cells, but that could be true even with rowspans/colspans. And there are cases where it isn't true in tables that we can handle fine -- e.g. when a tr element is empty. So now we just pad rows with empty cells when needed. Closes #3027.
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs35
1 files changed, 24 insertions, 11 deletions
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index e2fc97fbf..d4360e521 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -435,17 +435,20 @@ pTable = try $ do
rowsLs <- many pTBody
rows' <- pOptInTag "tfoot" $ many pTr
TagClose _ <- pSatisfy (~== TagClose "table")
- let rows = (concat rowsLs) ++ rows'
+ let rows'' = (concat rowsLs) ++ rows'
-- fail on empty table
- guard $ not $ null head' && null rows
+ guard $ not $ null head' && null rows''
let isSinglePlain x = case B.toList x of
[] -> True
[Plain _] -> True
_ -> False
- let isSimple = all isSinglePlain $ concat (head':rows)
- let cols = length $ if null head' then head rows else head'
- -- fail if there are colspans or rowspans
- guard $ all (\r -> length r == cols) rows
+ let isSimple = all isSinglePlain $ concat (head':rows'')
+ let cols = length $ if null head' then head rows'' else head'
+ -- add empty cells to short rows
+ let addEmpties r = case length r - cols of
+ n | n > 1 -> r ++ replicate n []
+ | otherwise -> r
+ let rows = addEmpties rows''
let aligns = replicate cols AlignDefault
let widths = if null widths'
then if isSimple
@@ -471,10 +474,17 @@ pColgroup = try $ do
skipMany pBlank
manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank
+noColOrRowSpans :: Tag String -> Bool
+noColOrRowSpans t = isNullOrOne "colspan" && isNullOrOne "rowspan"
+ where isNullOrOne x = case fromAttrib x t of
+ "" -> True
+ "1" -> True
+ _ -> False
+
pCell :: String -> TagParser [Blocks]
pCell celltype = try $ do
skipMany pBlank
- res <- pInTags celltype block
+ res <- pInTags' celltype noColOrRowSpans block
skipMany pBlank
return [res]
@@ -695,10 +705,13 @@ pInlinesInTags :: String -> (Inlines -> Inlines)
-> TagParser Inlines
pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline
-pInTags :: (Monoid a) => String -> TagParser a
- -> TagParser a
-pInTags tagtype parser = try $ do
- pSatisfy (~== TagOpen tagtype [])
+pInTags :: (Monoid a) => String -> TagParser a -> TagParser a
+pInTags tagtype parser = pInTags' tagtype (const True) parser
+
+pInTags' :: (Monoid a) => String -> (Tag String -> Bool) -> TagParser a
+ -> TagParser a
+pInTags' tagtype tagtest parser = try $ do
+ pSatisfy (\t -> t ~== TagOpen tagtype [] && tagtest t)
mconcat <$> manyTill parser (pCloses tagtype <|> eof)
-- parses p, preceeded by an optional opening tag