From 7272735b3d413a644fd9ab01eeae8ae9cd5a925b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 23 Sep 2012 22:12:21 -0700 Subject: More intelligent handling of text encodings. Previously, UTF-8 was enforced for both input and output. The new system: * For input, UTF-8 is tried first; if an error is raised, the locale encoding is tried. * For output, the locale encoding is always used. --- src/Text/Pandoc/UTF8.hs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'src/Text/Pandoc/UTF8.hs') diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index aa3a9da04..45664892a 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -25,7 +25,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Stability : alpha Portability : portable -UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7. +UTF-8 aware string IO functions that will work with GHC 6.12 or 7. +The reading functions first attempt to read UTF-8; if an encoding +error is encountered, the local encoding is used instead. This +should work well in practice because text in other encodings +is usually not valid UTF-8. -} module Text.Pandoc.UTF8 ( readFile , writeFile @@ -45,10 +49,11 @@ where #else import Codec.Binary.UTF8.String (encodeString, decodeString) #endif - +import Control.Exception (catch, throwIO) +import GHC.IO.Exception (IOException(..), IOErrorType(..)) import System.IO hiding (readFile, writeFile, getContents, putStr, putStrLn, hPutStr, hPutStrLn, hGetContents) -import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn ) +import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, catch ) import qualified System.IO as IO readFile :: FilePath -> IO String @@ -75,7 +80,14 @@ hPutStrLn :: Handle -> String -> IO () hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s hGetContents :: Handle -> IO String -hGetContents h = hSetEncoding h utf8_bom >> IO.hGetContents h +hGetContents h = do + hSetEncoding h utf8_bom + catch (IO.hGetContents h) $ \e -> + case ioe_type e of + InvalidArgument -> do + hSetEncoding h localeEncoding + IO.hGetContents h + _ -> throwIO e encodePath :: FilePath -> FilePath decodeArg :: String -> String -- cgit v1.2.3