summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/UTF8.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-09-23 22:12:21 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-09-23 22:12:21 -0700
commit7272735b3d413a644fd9ab01eeae8ae9cd5a925b (patch)
treebb0fa978cee6702d0bb3300f234b1f08d2071c5f /src/Text/Pandoc/UTF8.hs
parent31107741f0ee69d444e5f9ed2d8272583024e10c (diff)
More intelligent handling of text encodings.
Previously, UTF-8 was enforced for both input and output. The new system: * For input, UTF-8 is tried first; if an error is raised, the locale encoding is tried. * For output, the locale encoding is always used.
Diffstat (limited to 'src/Text/Pandoc/UTF8.hs')
-rw-r--r--src/Text/Pandoc/UTF8.hs20
1 files changed, 16 insertions, 4 deletions
diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs
index aa3a9da04..45664892a 100644
--- a/src/Text/Pandoc/UTF8.hs
+++ b/src/Text/Pandoc/UTF8.hs
@@ -25,7 +25,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Stability : alpha
Portability : portable
-UTF-8 aware string IO functions that will work with GHC 6.10, 6.12, or 7.
+UTF-8 aware string IO functions that will work with GHC 6.12 or 7.
+The reading functions first attempt to read UTF-8; if an encoding
+error is encountered, the local encoding is used instead. This
+should work well in practice because text in other encodings
+is usually not valid UTF-8.
-}
module Text.Pandoc.UTF8 ( readFile
, writeFile
@@ -45,10 +49,11 @@ where
#else
import Codec.Binary.UTF8.String (encodeString, decodeString)
#endif
-
+import Control.Exception (catch, throwIO)
+import GHC.IO.Exception (IOException(..), IOErrorType(..))
import System.IO hiding (readFile, writeFile, getContents,
putStr, putStrLn, hPutStr, hPutStrLn, hGetContents)
-import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn )
+import Prelude hiding (readFile, writeFile, getContents, putStr, putStrLn, catch )
import qualified System.IO as IO
readFile :: FilePath -> IO String
@@ -75,7 +80,14 @@ hPutStrLn :: Handle -> String -> IO ()
hPutStrLn h s = hSetEncoding h utf8 >> IO.hPutStrLn h s
hGetContents :: Handle -> IO String
-hGetContents h = hSetEncoding h utf8_bom >> IO.hGetContents h
+hGetContents h = do
+ hSetEncoding h utf8_bom
+ catch (IO.hGetContents h) $ \e ->
+ case ioe_type e of
+ InvalidArgument -> do
+ hSetEncoding h localeEncoding
+ IO.hGetContents h
+ _ -> throwIO e
encodePath :: FilePath -> FilePath
decodeArg :: String -> String