From 2d825603c684d6c7af6adb08f26ed34a078a5afe Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 4 May 2016 15:33:18 +0200 Subject: Org reader: fix handling of empty table cells, rows This fixes Org mode parsing of some corner cases regarding empty cells and rows. Empty cells weren't parsed correctly, e.g. `|||` should be two empty cells, but would be parsed as a single cell containing a pipe character. Empty rows where parsed as alignment rows and dropped from the output. This fixes #2616. --- src/Text/Pandoc/Readers/Org.hs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index e41dd5dd8..493e94d2e 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..), trimInlines ) import Text.Pandoc.Definition import Text.Pandoc.Compat.Monoid ((<>)) +import Text.Pandoc.Error import Text.Pandoc.Options import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF @@ -57,8 +58,6 @@ import qualified Data.Set as Set import Data.Maybe (fromMaybe, isJust) import Network.HTTP (urlEncode) -import Text.Pandoc.Error - -- | Parse org-mode string and return a Pandoc document. readOrg :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) @@ -807,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow) tableContentRow :: OrgParser OrgTableRow tableContentRow = try $ - OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline) + OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline) tableContentCell :: OrgParser (F Blocks) tableContentCell = try $ - fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell - -endOfCell :: OrgParser Char -endOfCell = try $ char '|' <|> lookAhead newline + fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell tableAlignRow :: OrgParser OrgTableRow -tableAlignRow = try $ - OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline) +tableAlignRow = try $ do + tableStart + cells <- many1Till tableAlignCell newline + -- Empty rows are regular (i.e. content) rows, not alignment rows. + guard $ any (/= AlignDefault) cells + return $ OrgAlignRow cells tableAlignCell :: OrgParser Alignment tableAlignCell = @@ -833,15 +833,19 @@ tableAlignCell = where emptyCell = try $ skipSpaces *> endOfCell tableAlignFromChar :: OrgParser Alignment -tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft - , char 'c' *> return AlignCenter - , char 'r' *> return AlignRight - ] +tableAlignFromChar = try $ + choice [ char 'l' *> return AlignLeft + , char 'c' *> return AlignCenter + , char 'r' *> return AlignRight + ] tableHline :: OrgParser OrgTableRow tableHline = try $ OrgHlineRow <$ (tableStart *> char '-' *> anyLine) +endOfCell :: OrgParser Char +endOfCell = try $ char '|' <|> lookAhead newline + rowsToTable :: [OrgTableRow] -> F OrgTable rowsToTable = foldM rowToContent emptyTable -- cgit v1.2.3