summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-07-18 22:40:45 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2016-07-18 22:40:45 -0700
commit3490932d21d72a7708963794aeffc925ecda74f4 (patch)
tree04734ae5954d14f7db4bcc6c36d01bd36679bef8 /src/Text/Pandoc
parentd9880fe3408baecd8ff7ae7a74291feea6f984e7 (diff)
Textile reader: improved table parsing.
We now handle cell and row attributes, mostly by skipping them. However, alignments are now handled properly. Since in pandoc alignment is per-column, not per-cell, we try to devine column alignments from cell alignments. Table captions are also now parsed, and textile indicators for thead and tfoot no longer cause parse failure. (However, a row designated as tfoot will just be a regular row in pandoc.)
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs84
1 files changed, 62 insertions, 22 deletions
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index 7c53ef28d..d9cdfd66f 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -62,7 +62,7 @@ import Text.Pandoc.Shared (trim)
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
import Text.HTML.TagSoup.Match
-import Data.List ( intercalate )
+import Data.List ( intercalate, transpose )
import Data.Char ( digitToInt, isUpper )
import Control.Monad ( guard, liftM, when )
import Text.Pandoc.Compat.Monoid ((<>))
@@ -134,7 +134,7 @@ blockParsers = [ codeBlock
, anyList
, rawHtmlBlock
, rawLaTeXBlock'
- , maybeExplicitBlock "table" table
+ , table
, maybeExplicitBlock "p" para
, mempty <$ blanklines
]
@@ -328,38 +328,78 @@ para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
+toAlignment :: Char -> Alignment
+toAlignment '<' = AlignLeft
+toAlignment '>' = AlignRight
+toAlignment '=' = AlignCenter
+toAlignment _ = AlignDefault
+
+cellAttributes :: Parser [Char] ParserState (Bool, Alignment)
+cellAttributes = try $ do
+ isHeader <- option False (True <$ char '_')
+ -- we just ignore colspan and rowspan markers:
+ optional $ try $ oneOf "/\\" >> many1 digit
+ -- we pay attention to alignments:
+ alignment <- option AlignDefault $ toAlignment <$> oneOf "<>="
+ -- ignore other attributes for now:
+ _ <- attributes
+ char '.'
+ return (isHeader, alignment)
+
-- | A table cell spans until a pipe |
-tableCell :: Bool -> Parser [Char] ParserState Blocks
-tableCell headerCell = try $ do
+tableCell :: Parser [Char] ParserState ((Bool, Alignment), Blocks)
+tableCell = try $ do
char '|'
- when headerCell $ () <$ string "_."
+ (isHeader, alignment) <- option (False, AlignDefault) $ cellAttributes
notFollowedBy blankline
raw <- trim <$>
many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
content <- mconcat <$> parseFromString (many inline) raw
- return $ B.plain content
+ return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
-tableRow :: Parser [Char] ParserState [Blocks]
-tableRow = many1 (tableCell False) <* char '|' <* newline
-
-tableHeader :: Parser [Char] ParserState [Blocks]
-tableHeader = many1 (tableCell True) <* char '|' <* newline
-
--- | A table with an optional header. Current implementation can
--- handle tables with and without header, but will parse cells
--- alignment attributes as content.
+tableRow :: Parser [Char] ParserState [((Bool, Alignment), Blocks)]
+tableRow = try $ do
+ -- skip optional row attributes
+ optional $ try $ do
+ _ <- attributes
+ char '.'
+ many1 spaceChar
+ many1 tableCell <* char '|' <* blankline
+
+-- | A table with an optional header.
table :: Parser [Char] ParserState Blocks
table = try $ do
- headers <- option mempty $ tableHeader
- rows <- many1 tableRow
+ -- ignore table attributes
+ caption <- option mempty $ try $ do
+ string "table"
+ _ <- attributes
+ char '.'
+ rawcapt <- trim <$> anyLine
+ parseFromString (mconcat <$> many inline) rawcapt
+ rawrows <- many1 $ (skipMany ignorableRow) >> tableRow
+ skipMany ignorableRow
blanklines
+ let (headers, rows) = case rawrows of
+ (toprow:rest) | any (fst . fst) toprow ->
+ (toprow, rest)
+ _ -> (mempty, rawrows)
let nbOfCols = max (length headers) (length $ head rows)
- return $ B.table mempty
- (zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0))
- headers
- rows
-
+ let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows)
+ return $ B.table caption
+ (zip aligns (replicate nbOfCols 0.0))
+ (map snd headers)
+ (map (map snd) rows)
+
+-- | Ignore markers for cols, thead, tfoot.
+ignorableRow :: Parser [Char] ParserState ()
+ignorableRow = try $ do
+ char '|'
+ oneOf ":^-~"
+ _ <- attributes
+ char '.'
+ _ <- anyLine
+ return ()
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.