summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlbert Krewinkel <albert@zeitkraut.de>2016-05-04 15:33:18 +0200
committerAlbert Krewinkel <albert@zeitkraut.de>2016-05-04 16:02:03 +0200
commit2d825603c684d6c7af6adb08f26ed34a078a5afe (patch)
tree3b00c25b4a035b7ea3e585e18816e13647e18a3f
parenta51e4e82156d8d6f0d3dc616c4f38c70b25be616 (diff)
Org reader: fix handling of empty table cells, rows
This fixes Org mode parsing of some corner cases regarding empty cells and rows. Empty cells weren't parsed correctly, e.g. `|||` should be two empty cells, but would be parsed as a single cell containing a pipe character. Empty rows where parsed as alignment rows and dropped from the output. This fixes #2616.
-rw-r--r--src/Text/Pandoc/Readers/Org.hs30
-rw-r--r--tests/Tests/Readers/Org.hs13
2 files changed, 29 insertions, 14 deletions
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index e41dd5dd8..493e94d2e 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -35,6 +35,7 @@ import Text.Pandoc.Builder ( Inlines, Blocks, HasMeta(..),
trimInlines )
import Text.Pandoc.Definition
import Text.Pandoc.Compat.Monoid ((<>))
+import Text.Pandoc.Error
import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
@@ -57,8 +58,6 @@ import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isJust)
import Network.HTTP (urlEncode)
-import Text.Pandoc.Error
-
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
-> String -- ^ String to parse (assuming @'\n'@ line endings)
@@ -807,18 +806,19 @@ tableRows = try $ many (tableAlignRow <|> tableHline <|> tableContentRow)
tableContentRow :: OrgParser OrgTableRow
tableContentRow = try $
- OrgContentRow . sequence <$> (tableStart *> manyTill tableContentCell newline)
+ OrgContentRow . sequence <$> (tableStart *> many1Till tableContentCell newline)
tableContentCell :: OrgParser (F Blocks)
tableContentCell = try $
- fmap B.plain . trimInlinesF . mconcat <$> many1Till inline endOfCell
-
-endOfCell :: OrgParser Char
-endOfCell = try $ char '|' <|> lookAhead newline
+ fmap B.plain . trimInlinesF . mconcat <$> manyTill inline endOfCell
tableAlignRow :: OrgParser OrgTableRow
-tableAlignRow = try $
- OrgAlignRow <$> (tableStart *> manyTill tableAlignCell newline)
+tableAlignRow = try $ do
+ tableStart
+ cells <- many1Till tableAlignCell newline
+ -- Empty rows are regular (i.e. content) rows, not alignment rows.
+ guard $ any (/= AlignDefault) cells
+ return $ OrgAlignRow cells
tableAlignCell :: OrgParser Alignment
tableAlignCell =
@@ -833,15 +833,19 @@ tableAlignCell =
where emptyCell = try $ skipSpaces *> endOfCell
tableAlignFromChar :: OrgParser Alignment
-tableAlignFromChar = try $ choice [ char 'l' *> return AlignLeft
- , char 'c' *> return AlignCenter
- , char 'r' *> return AlignRight
- ]
+tableAlignFromChar = try $
+ choice [ char 'l' *> return AlignLeft
+ , char 'c' *> return AlignCenter
+ , char 'r' *> return AlignRight
+ ]
tableHline :: OrgParser OrgTableRow
tableHline = try $
OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
+endOfCell :: OrgParser Char
+endOfCell = try $ char '|' <|> lookAhead newline
+
rowsToTable :: [OrgTableRow]
-> F OrgTable
rowsToTable = foldM rowToContent emptyTable
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 98658482c..81684d8ef 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -941,7 +941,7 @@ tests =
, "Empty table" =:
"||" =?>
- simpleTable' 1 mempty mempty
+ simpleTable' 1 mempty [[mempty]]
, "Glider Table" =:
unlines [ "| 1 | 0 | 0 |"
@@ -996,6 +996,17 @@ tests =
, [ plain "dynamic", plain "Lisp" ]
]
+ , "Table with empty cells" =:
+ "|||c|" =?>
+ simpleTable' 3 mempty [[mempty, mempty, plain "c"]]
+
+ , "Table with empty rows" =:
+ unlines [ "| first |"
+ , "| |"
+ , "| third |"
+ ] =?>
+ simpleTable' 1 mempty [[plain "first"], [mempty], [plain "third"]]
+
, "Table with alignment row" =:
unlines [ "| Numbers | Text | More |"
, "| <c> | <r> | |"