RST reader: Basic support for csv-table directive.

* Added Text.Pandoc.CSV, simple CSV parser. * Options still not supported, and we need tests. See #3533.
author: John MacFarlane <jgm@berkeley.edu> 2017-08-10 11:12:41 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2017-08-10 11:12:41 -0700
commit: a5790dd30893cf7143eb64a46fb137caf131a624 (patch)
tree: a095b1b283ab3fa6f7d7f46577555df5290fa3cd /src/Text/Pandoc
parent: e9eaf8421567b2d54b415b642ec1077d79907a10 (diff)
2 files changed, 154 insertions, 0 deletions
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
new file mode 100644
index 000000000..15492ac52
--- /dev/null
+++ b/src/Text/Pandoc/CSV.hs
@@ -0,0 +1,102 @@
+{-
+Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-}
+
+{- |
+   Module      : Text.Pandoc.CSV
+   Copyright   : Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
+   License     : GNU GPL, version 2 or above
+   Maintainer  : John MacFarlane <jgm@berkeley.edu>
+   Stability   : alpha
+   Portability : portable
+
+Simple CSV parser.
+-}
+
+module Text.Pandoc.CSV (
+  CSVOptions,
+  defaultCSVOptions,
+  parseCSV,
+  ParseError
+) where
+
+import Text.Parsec
+import Text.Parsec.Text (Parser)
+import Text.Parsec.Error (ParseError)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Control.Monad (void)
+
+data CSVOptions = CSVOptions{
+    csvDelim     :: Char
+  , csvQuote     :: Char
+  , csvKeepSpace :: Bool -- treat whitespace following delim as significant
+  , csvEscape    :: Maybe Char -- default is to double up quote
+} deriving (Read, Show)
+
+defaultCSVOptions :: CSVOptions
+defaultCSVOptions = CSVOptions{
+    csvDelim = ','
+  , csvQuote = '"'
+  , csvKeepSpace = False
+  , csvEscape = Nothing }
+
+parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
+parseCSV opts t = parse (pCSV opts) "csv" t
+
+pCSV :: CSVOptions -> Parser [[Text]]
+pCSV opts =
+  (pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)
+
+pCSVRow :: CSVOptions -> Parser [Text]
+pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts
+
+blank :: Parser ()
+blank = try $ spaces >> (() <$ endline <|> eof)
+
+pCSVCell :: CSVOptions -> Parser Text
+pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
+
+pCSVQuotedCell :: CSVOptions -> Parser Text
+pCSVQuotedCell opts = do
+  char (csvQuote opts)
+  res <- many (satisfy (\c -> c /= csvQuote opts) <|> escaped opts)
+  char (csvQuote opts)
+  return $ T.pack res
+
+escaped :: CSVOptions -> Parser Char
+escaped opts = do
+  case csvEscape opts of
+       Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts)
+       Just c  -> try $ char c >> noneOf "\r\n"
+
+pCSVUnquotedCell :: CSVOptions -> Parser Text
+pCSVUnquotedCell opts = T.pack <$>
+  many (satisfy $ \c -> c /= csvDelim opts && c /= '\r' && c /= '\n')
+
+pCSVDelim :: CSVOptions -> Parser ()
+pCSVDelim opts = do
+  char (csvDelim opts)
+  if csvKeepSpace opts
+     then return ()
+     else skipMany (oneOf " \t")
+
+endline :: Parser ()
+endline = do
+  optional (void $ char '\r')
+  void $ char '\n'
+
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 3b1eee010..6cf8dbae4 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -45,6 +45,7 @@ import Text.Pandoc.Builder (fromList, setMeta)
 import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
+import Text.Pandoc.CSV (CSVOptions(..), defaultCSVOptions, parseCSV)
 import Text.Pandoc.Definition
 import Text.Pandoc.Error
 import Text.Pandoc.ImageSize (lengthToDim, scaleDimension)
@@ -56,6 +57,8 @@ import Text.Printf (printf)
 import Data.Text (Text)
 import qualified Data.Text as T
 
+import Debug.Trace
+
 -- TODO:
 -- [ ] .. parsed-literal
 -- [ ] .. csv-table
@@ -688,6 +691,7 @@ directive' = do
   case label of
         "table" -> tableDirective top fields body'
         "list-table" -> listTableDirective top fields body'
+        "csv-table" -> csvTableDirective top fields body'
         "line-block" -> lineBlockDirective body'
         "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
         "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
@@ -820,6 +824,54 @@ listTableDirective top fields body = do
           takeCells _ = []
           normWidths ws = map (/ max 1 (sum ws)) ws
 
+  -- TODO
+  -- [ ] delim:
+  -- [ ] quote:
+  -- [ ] keepspace:
+  -- [ ] escape:
+  -- [ ] widths:
+  -- [ ] header-rows:
+  -- [ ] header:
+  -- [ ] url:
+  -- [ ] file:
+  -- [ ] encoding:
+csvTableDirective :: PandocMonad m
+                   => String -> [(String, String)] -> String
+                   -> RSTParser m Blocks
+csvTableDirective top fields rawcsv = do
+  let res = parseCSV defaultCSVOptions (T.pack rawcsv)
+  case res of
+       Left e  -> do
+         throwError $ PandocParsecError "csv table" e
+       Right rows -> do
+         return $ B.rawBlock "rst" $ show rows
+{-
+  bs <- parseFromString' parseBlocks body
+  title <- parseFromString' (trimInlines . mconcat <$> many inline) top
+  let rows = takeRows $ B.toList bs
+      headerRowsNum = fromMaybe (0 :: Int) $
+         lookup "header-rows" fields >>= safeRead
+      (headerRow,bodyRows,numOfCols) = case rows of
+        x:xs -> if headerRowsNum > 0
+                   then (x, xs, length x)
+                   else ([], rows, length x)
+        _ -> ([],[],0)
+      widths = case trim <$> lookup "widths" fields of
+        Just "auto" -> replicate numOfCols 0
+        Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
+                           splitBy (`elem` (" ," :: String)) specs
+        _ -> replicate numOfCols 0
+  return $ B.table title
+             (zip (replicate numOfCols AlignDefault) widths)
+             headerRow
+             bodyRows
+    where takeRows [BulletList rows] = map takeCells rows
+          takeRows _ = []
+          takeCells [BulletList cells] = map B.fromList cells
+          takeCells _ = []
+          normWidths ws = map (/ max 1 (sum ws)) ws
+-}
+
 -- TODO:
 --  - Only supports :format: fields with a single format for :raw: roles,
 --    change Text.Pandoc.Definition.Format to fix
author	John MacFarlane <jgm@berkeley.edu>	2017-08-10 11:12:41 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2017-08-10 11:12:41 -0700
commit	a5790dd30893cf7143eb64a46fb137caf131a624 (patch)
tree	a095b1b283ab3fa6f7d7f46577555df5290fa3cd /src/Text/Pandoc
parent	e9eaf8421567b2d54b415b642ec1077d79907a10 (diff)