summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/CSV.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/CSV.hs')
-rw-r--r--src/Text/Pandoc/CSV.hs102
1 files changed, 102 insertions, 0 deletions
diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs
new file mode 100644
index 000000000..3415ae88f
--- /dev/null
+++ b/src/Text/Pandoc/CSV.hs
@@ -0,0 +1,102 @@
+{-
+Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+-}
+
+{- |
+ Module : Text.Pandoc.CSV
+ Copyright : Copyright (C) 2017–2018 John MacFarlane <jgm@berkeley.edu>
+ License : GNU GPL, version 2 or above
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Simple CSV parser.
+-}
+
+module Text.Pandoc.CSV (
+ CSVOptions(..),
+ defaultCSVOptions,
+ parseCSV,
+ ParseError
+) where
+
+import Control.Monad (void)
+import Data.Text (Text)
+import qualified Data.Text as T
+import Text.Parsec
+import Text.Parsec.Text (Parser)
+
+data CSVOptions = CSVOptions{
+ csvDelim :: Char
+ , csvQuote :: Char
+ , csvKeepSpace :: Bool -- treat whitespace following delim as significant
+ , csvEscape :: Maybe Char -- default is to double up quote
+} deriving (Read, Show)
+
+defaultCSVOptions :: CSVOptions
+defaultCSVOptions = CSVOptions{
+ csvDelim = ','
+ , csvQuote = '"'
+ , csvKeepSpace = False
+ , csvEscape = Nothing }
+
+parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
+parseCSV opts t = parse (pCSV opts) "csv" t
+
+pCSV :: CSVOptions -> Parser [[Text]]
+pCSV opts =
+ (pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)
+
+pCSVRow :: CSVOptions -> Parser [Text]
+pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts
+
+blank :: Parser ()
+blank = try $ spaces >> (() <$ endline <|> eof)
+
+pCSVCell :: CSVOptions -> Parser Text
+pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
+
+pCSVQuotedCell :: CSVOptions -> Parser Text
+pCSVQuotedCell opts = do
+ char (csvQuote opts)
+ res <- many (satisfy (\c -> c /= csvQuote opts &&
+ Just c /= csvEscape opts) <|> escaped opts)
+ char (csvQuote opts)
+ return $ T.pack res
+
+escaped :: CSVOptions -> Parser Char
+escaped opts =
+ case csvEscape opts of
+ Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts)
+ Just c -> try $ char c >> noneOf "\r\n"
+
+pCSVUnquotedCell :: CSVOptions -> Parser Text
+pCSVUnquotedCell opts = T.pack <$>
+ many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'
+ && c /= csvQuote opts))
+
+pCSVDelim :: CSVOptions -> Parser ()
+pCSVDelim opts = do
+ char (csvDelim opts)
+ if csvKeepSpace opts
+ then return ()
+ else skipMany (oneOf " \t")
+
+endline :: Parser ()
+endline = do
+ optional (void $ char '\r')
+ void $ char '\n'