summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rw-r--r--Command/Info.hs25
-rw-r--r--Command/MetaData.hs8
-rw-r--r--Command/Whereis.hs3
-rw-r--r--Key.hs6
-rw-r--r--Messages/JSON.hs43
-rw-r--r--Remote.hs16
-rw-r--r--Remote/Tahoe.hs2
-rw-r--r--Test.hs4
-rw-r--r--Types/Messages.hs2
-rw-r--r--Types/MetaData.hs12
-rw-r--r--Utility/Aeson.hs86
-rw-r--r--doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn25
-rw-r--r--git-annex.cabal1
14 files changed, 173 insertions, 62 deletions
diff --git a/CHANGELOG b/CHANGELOG
index b9701f3546..126a80dc69 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,6 +7,8 @@ git-annex (6.20180410) UNRELEASED; urgency=medium
Note that it's still allowed to move the content of a file
from one repository to another when numcopies is not satisfied, as long
as the move does not result in there being fewer copies.
+ * Fix mangling of --json output of utf-8 characters when not
+ running in a utf-8 locale.
-- Joey Hess <id@joeyh.name> Mon, 09 Apr 2018 14:03:28 -0400
diff --git a/Command/Info.hs b/Command/Info.hs
index 6ec6a1f71e..cc9c1b5fe0 100644
--- a/Command/Info.hs
+++ b/Command/Info.hs
@@ -11,9 +11,8 @@ module Command.Info where
import "mtl" Control.Monad.State.Strict
import qualified Data.Map.Strict as M
-import qualified Data.Text as T
+import qualified Data.Vector as V
import Data.Ord
-import Data.Aeson hiding (json)
import Command
import qualified Git
@@ -34,6 +33,7 @@ import Config
import Git.Config (boolConfig)
import qualified Git.LsTree as LsTree
import Utility.Percentage
+import Utility.Aeson hiding (json)
import Types.Transfer
import Logs.Transfer
import Types.Key
@@ -283,7 +283,7 @@ simpleStat desc getval = stat desc $ json id getval
nostat :: Stat
nostat = return Nothing
-json :: ToJSON j => (j -> String) -> StatState j -> String -> StatState String
+json :: ToJSON' j => (j -> String) -> StatState j -> String -> StatState String
json fmt a desc = do
j <- a
lift $ maybeShowJSON $ JSONChunk [(desc, j)]
@@ -422,7 +422,7 @@ transfer_list :: Stat
transfer_list = stat desc $ nojson $ lift $ do
uuidmap <- Remote.remoteMap id
ts <- getTransfers
- maybeShowJSON $ JSONChunk [(desc, map (uncurry jsonify) ts)]
+ maybeShowJSON $ JSONChunk [(desc, V.fromList $ map (uncurry jsonify) ts)]
return $ if null ts
then "none"
else multiLine $
@@ -438,11 +438,11 @@ transfer_list = stat desc $ nojson $ lift $ do
, maybe (fromUUID $ transferUUID t) Remote.name $
M.lookup (transferUUID t) uuidmap
]
- jsonify t i = object $ map (\(k, v) -> (T.pack k, v)) $
- [ ("transfer", toJSON (formatDirection (transferDirection t)))
- , ("key", toJSON (key2file (transferKey t)))
- , ("file", toJSON afile)
- , ("remote", toJSON (fromUUID (transferUUID t)))
+ jsonify t i = object $ map (\(k, v) -> (packString k, v)) $
+ [ ("transfer", toJSON' (formatDirection (transferDirection t)))
+ , ("key", toJSON' (transferKey t))
+ , ("file", toJSON' afile)
+ , ("remote", toJSON' (fromUUID (transferUUID t)))
]
where
AssociatedFile afile = associatedFile i
@@ -476,10 +476,13 @@ numcopies_stats :: Stat
numcopies_stats = stat "numcopies stats" $ json fmt $
calc <$> (maybe M.empty numCopiesVarianceMap <$> cachedNumCopiesStats)
where
- calc = map (\(variance, count) -> (show variance, count))
+ calc = V.fromList
+ . map (\(variance, count) -> (show variance, count))
. sortBy (flip (comparing fst))
. M.toList
- fmt = multiLine . map (\(variance, count) -> "numcopies " ++ variance ++ ": " ++ show count)
+ fmt = multiLine
+ . map (\(variance, count) -> "numcopies " ++ variance ++ ": " ++ show count)
+ . V.toList
reposizes_stats :: Stat
reposizes_stats = stat desc $ nojson $ do
diff --git a/Command/MetaData.hs b/Command/MetaData.hs
index ef3f1da9a5..282b7fda05 100644
--- a/Command/MetaData.hs
+++ b/Command/MetaData.hs
@@ -14,12 +14,12 @@ import Logs.MetaData
import Annex.WorkTree
import Messages.JSON (JSONActionItem(..))
import Types.Messages
+import Utility.Aeson
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.ByteString.Lazy.UTF8 as BU
-import Data.Aeson
import Control.Concurrent
cmd :: Command
@@ -115,7 +115,7 @@ perform c o k = case getSet o of
cleanup :: Key -> CommandCleanup
cleanup k = do
m <- getCurrentMetaData k
- let Object o = toJSON (MetaDataFields m)
+ let Object o = toJSON' (MetaDataFields m)
maybeShowJSON $ AesonObject o
showLongNote $ unlines $ concatMap showmeta $
map unwrapmeta (fromMetaData m)
@@ -129,8 +129,8 @@ cleanup k = do
newtype MetaDataFields = MetaDataFields MetaData
deriving (Show)
-instance ToJSON MetaDataFields where
- toJSON (MetaDataFields m) = object [ (fieldsField, toJSON m) ]
+instance ToJSON' MetaDataFields where
+ toJSON' (MetaDataFields m) = object [ (fieldsField, toJSON' m) ]
instance FromJSON MetaDataFields where
parseJSON (Object v) = do
diff --git a/Command/Whereis.hs b/Command/Whereis.hs
index fddb2b5c6e..b14e231c17 100644
--- a/Command/Whereis.hs
+++ b/Command/Whereis.hs
@@ -15,6 +15,7 @@ import Remote.Web (getWebUrls)
import Annex.UUID
import qualified Data.Map as M
+import qualified Data.Vector as V
cmd :: Command
cmd = noCommit $ withGlobalOptions [jsonOptions, annexedMatchingOptions] $
@@ -77,7 +78,7 @@ perform remotemap key = do
untrustedheader = "The following untrusted locations may also have copies:\n"
ppwhereis h ls urls = do
descm <- uuidDescriptions
- let urlvals = map (\(u, us) -> (u, Just us)) $
+ let urlvals = map (\(u, us) -> (u, Just (V.fromList us))) $
filter (\(u,_) -> u `elem` ls) urls
prettyPrintUUIDsWith (Just "urls") h descm (const Nothing) urlvals
diff --git a/Key.hs b/Key.hs
index 44e9acea4e..ade012a4ba 100644
--- a/Key.hs
+++ b/Key.hs
@@ -22,7 +22,6 @@ module Key (
prop_isomorphic_key_decode
) where
-import Data.Aeson
import Data.Char
import qualified Data.Text as T
@@ -30,6 +29,7 @@ import Common
import Types.Key
import Utility.QuickCheck
import Utility.Bloom
+import Utility.Aeson
import qualified Utility.SimpleProtocol as Proto
stubKey :: Key
@@ -155,8 +155,8 @@ instance Hashable Key where
hashIO32 = hashIO32 . key2file
hashIO64 = hashIO64 . key2file
-instance ToJSON Key where
- toJSON = toJSON . key2file
+instance ToJSON' Key where
+ toJSON' = toJSON' . key2file
instance FromJSON Key where
parseJSON (String t) = maybe mempty pure $ file2key $ T.unpack t
diff --git a/Messages/JSON.hs b/Messages/JSON.hs
index 897eb8cbfe..e63a263e3f 100644
--- a/Messages/JSON.hs
+++ b/Messages/JSON.hs
@@ -26,12 +26,10 @@ module Messages.JSON (
JSONActionItem(..),
) where
-import Data.Aeson
import Control.Applicative
import qualified Data.Map as M
-import qualified Data.Text as T
import qualified Data.Vector as V
-import qualified Data.ByteString.Lazy as B
+import qualified Data.ByteString.Lazy as L
import qualified Data.HashMap.Strict as HM
import System.IO
import System.IO.Unsafe (unsafePerformIO)
@@ -44,6 +42,7 @@ import Types.Messages
import Key
import Utility.Metered
import Utility.Percentage
+import Utility.Aeson
-- A global lock to avoid concurrent threads emitting json at the same time.
{-# NOINLINE emitLock #-}
@@ -53,7 +52,7 @@ emitLock = unsafePerformIO $ newMVar ()
emit :: Object -> IO ()
emit o = do
takeMVar emitLock
- B.hPut stdout (encode o)
+ L.hPut stdout (encode o)
putStr "\n"
putMVar emitLock ()
@@ -67,7 +66,7 @@ none = id
start :: String -> Maybe FilePath -> Maybe Key -> JSONBuilder
start command file key _ = Just (o, False)
where
- Object o = toJSON $ JSONActionItem
+ Object o = toJSON' $ JSONActionItem
{ itemCommand = Just command
, itemKey = key
, itemFile = file
@@ -75,7 +74,7 @@ start command file key _ = Just (o, False)
}
end :: Bool -> JSONBuilder
-end b (Just (o, _)) = Just (HM.insert "success" (toJSON b) o, True)
+end b (Just (o, _)) = Just (HM.insert "success" (toJSON' b) o, True)
end _ Nothing = Nothing
finalize :: JSONOptions -> Object -> Object
@@ -91,32 +90,32 @@ addErrorMessage msg o =
where
combinearray (Array new) (Array old) = Array (old <> new)
combinearray new _old = new
- v = Array $ V.fromList $ map (String . T.pack) msg
+ v = Array $ V.fromList $ map (String . packString) msg
note :: String -> JSONBuilder
note _ Nothing = Nothing
-note s (Just (o, e)) = Just (HM.insertWith combinelines "note" (toJSON s) o, e)
+note s (Just (o, e)) = Just (HM.insertWith combinelines "note" (toJSON' s) o, e)
where
combinelines (String new) (String old) =
- String (old <> T.pack "\n" <> new)
+ String (old <> "\n" <> new)
combinelines new _old = new
info :: String -> JSONBuilder
info s _ = Just (o, True)
where
- Object o = object ["info" .= toJSON s]
+ Object o = object ["info" .= toJSON' s]
data JSONChunk v where
AesonObject :: Object -> JSONChunk Object
- JSONChunk :: ToJSON v => [(String, v)] -> JSONChunk [(String, v)]
+ JSONChunk :: ToJSON' v => [(String, v)] -> JSONChunk [(String, v)]
add :: JSONChunk v -> JSONBuilder
add v (Just (o, e)) = Just (HM.union o' o, e)
where
Object o' = case v of
AesonObject ao -> Object ao
- JSONChunk l -> object (map mkPair l)
- mkPair (s, d) = (T.pack s, toJSON d)
+ JSONChunk l -> object $ map mkPair l
+ mkPair (s, d) = (packString s, toJSON' d)
add _ Nothing = Nothing
complete :: JSONChunk v -> JSONBuilder
@@ -145,8 +144,8 @@ data DualDisp = DualDisp
, dispJson :: String
}
-instance ToJSON DualDisp where
- toJSON = toJSON . dispJson
+instance ToJSON' DualDisp where
+ toJSON' = toJSON' . dispJson
instance Show DualDisp where
show = dispNormal
@@ -156,10 +155,10 @@ instance Show DualDisp where
-- serialization of Map, which uses "[key, value]".
data ObjectMap a = ObjectMap { fromObjectMap :: M.Map String a }
-instance ToJSON a => ToJSON (ObjectMap a) where
- toJSON (ObjectMap m) = object $ map go $ M.toList m
+instance ToJSON' a => ToJSON' (ObjectMap a) where
+ toJSON' (ObjectMap m) = object $ map go $ M.toList m
where
- go (k, v) = (T.pack k, toJSON v)
+ go (k, v) = (packString k, toJSON' v)
-- An item that a git-annex command acts on, and displays a JSON object about.
data JSONActionItem a = JSONActionItem
@@ -170,13 +169,13 @@ data JSONActionItem a = JSONActionItem
}
deriving (Show)
-instance ToJSON (JSONActionItem a) where
- toJSON i = object $ catMaybes
+instance ToJSON' (JSONActionItem a) where
+ toJSON' i = object $ catMaybes
[ Just $ "command" .= itemCommand i
, case itemKey i of
Nothing -> Nothing
- Just k -> Just $ "key" .= toJSON k
- , Just $ "file" .= itemFile i
+ Just k -> Just $ "key" .= toJSON' k
+ , Just $ "file" .= toJSON' (itemFile i)
-- itemAdded is not included; must be added later by 'add'
]
diff --git a/Remote.hs b/Remote.hs
index 8d826712c1..29f59a7bf8 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -57,9 +57,8 @@ module Remote (
) where
import Data.Ord
-import Data.Aeson
import qualified Data.Map as M
-import qualified Data.Text as T
+import qualified Data.Vector as V
import Annex.Common
import Types.Remote
@@ -74,6 +73,7 @@ import Config
import Config.DynamicConfig
import Git.Types (RemoteName)
import qualified Git
+import Utility.Aeson
{- Map from UUIDs of Remotes to a calculated value. -}
remoteMap :: (Remote -> v) -> Annex (M.Map UUID v)
@@ -197,7 +197,7 @@ prettyPrintUUIDsDescs header descm uuids =
{- An optional field can be included in the list of UUIDs. -}
prettyPrintUUIDsWith
- :: ToJSON v
+ :: ToJSON' v
=> Maybe String
-> String
-> M.Map UUID RemoteName
@@ -206,7 +206,7 @@ prettyPrintUUIDsWith
-> Annex String
prettyPrintUUIDsWith optfield header descm showval uuidvals = do
hereu <- getUUID
- maybeShowJSON $ JSONChunk [(header, map (jsonify hereu) uuidvals)]
+ maybeShowJSON $ JSONChunk [(header, V.fromList $ map (jsonify hereu) uuidvals)]
return $ unwords $ map (\u -> "\t" ++ prettify hereu u ++ "\n") uuidvals
where
finddescription u = M.findWithDefault "" u descm
@@ -224,11 +224,11 @@ prettyPrintUUIDsWith optfield header descm showval uuidvals = do
Nothing -> s
Just val -> val ++ ": " ++ s
jsonify hereu (u, optval) = object $ catMaybes
- [ Just (T.pack "uuid", toJSON $ fromUUID u)
- , Just (T.pack "description", toJSON $ finddescription u)
- , Just (T.pack "here", toJSON $ hereu == u)
+ [ Just (packString "uuid", toJSON' $ fromUUID u)
+ , Just (packString "description", toJSON' $ finddescription u)
+ , Just (packString "here", toJSON' $ hereu == u)
, case (optfield, optval) of
- (Just field, Just val) -> Just (T.pack field, toJSON val)
+ (Just field, Just val) -> Just (packString field, toJSON' val)
_ -> Nothing
]
diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs
index 0091f27ba3..6423fefdb1 100644
--- a/Remote/Tahoe.hs
+++ b/Remote/Tahoe.hs
@@ -23,7 +23,7 @@
module Remote.Tahoe (remote) where
import qualified Data.Map as M
-import Data.Aeson
+import Utility.Aeson
import Data.ByteString.Lazy.UTF8 (fromString)
import Control.Concurrent.STM
diff --git a/Test.hs b/Test.hs
index b0f4186563..9bd43bbe69 100644
--- a/Test.hs
+++ b/Test.hs
@@ -21,7 +21,6 @@ import Test.Tasty.Ingredients.Rerun
import Options.Applicative (switch, long, help, internal)
import qualified Data.Map as M
-import qualified Data.Aeson
import qualified Data.ByteString.Lazy.UTF8 as BU8
import System.Environment
@@ -83,6 +82,7 @@ import qualified Utility.HumanTime
import qualified Utility.Base64
import qualified Utility.Tmp.Dir
import qualified Utility.FileSystemEncoding
+import qualified Utility.Aeson
#ifndef mingw32_HOST_OS
import qualified Remote.Helper.Encryptable
import qualified Types.Crypto
@@ -971,7 +971,7 @@ test_merge = intmpclonerepo $
test_info :: Assertion
test_info = intmpclonerepo $ do
json <- BU8.fromString <$> git_annex_output "info" ["--json"]
- case Data.Aeson.eitherDecode json :: Either String Data.Aeson.Value of
+ case Utility.Aeson.eitherDecode json :: Either String Utility.Aeson.Value of
Right _ -> return ()
Left e -> assertFailure e
diff --git a/Types/Messages.hs b/Types/Messages.hs
index d45174bb71..8ca60651f6 100644
--- a/Types/Messages.hs
+++ b/Types/Messages.hs
@@ -9,7 +9,7 @@
module Types.Messages where
-import qualified Data.Aeson as Aeson
+import qualified Utility.Aeson as Aeson
import Control.Concurrent
#ifdef WITH_CONCURRENTOUTPUT
diff --git a/Types/MetaData.hs b/Types/MetaData.hs
index 279aacbb84..e05a8f72ec 100644
--- a/Types/MetaData.hs
+++ b/Types/MetaData.hs
@@ -43,6 +43,7 @@ module Types.MetaData (
import Common
import Utility.Base64
import Utility.QuickCheck
+import Utility.Aeson
import qualified Data.Text as T
import qualified Data.Set as S
@@ -50,15 +51,14 @@ import qualified Data.Map as M
import qualified Data.HashMap.Strict as HM
import Data.Char
import qualified Data.CaseInsensitive as CI
-import Data.Aeson
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
deriving (Show, Eq, Ord)
-instance ToJSON MetaData where
- toJSON (MetaData m) = object $ map go (M.toList m)
+instance ToJSON' MetaData where
+ toJSON' (MetaData m) = object $ map go (M.toList m)
where
- go (MetaField f, s) = (T.pack (CI.original f), toJSON s)
+ go (MetaField f, s) = (packString (CI.original f), toJSON' s)
instance FromJSON MetaData where
parseJSON (Object o) = do
@@ -82,8 +82,8 @@ newtype MetaField = MetaField (CI.CI String)
data MetaValue = MetaValue CurrentlySet String
deriving (Read, Show)
-instance ToJSON MetaValue where
- toJSON (MetaValue _ v) = toJSON v
+instance ToJSON' MetaValue where
+ toJSON' (MetaValue _ v) = toJSON' v
instance FromJSON MetaValue where
parseJSON (String v) = return $ MetaValue (CurrentlySet True) (T.unpack v)
diff --git a/Utility/Aeson.hs b/Utility/Aeson.hs
new file mode 100644
index 0000000000..7147e516bb
--- /dev/null
+++ b/Utility/Aeson.hs
@@ -0,0 +1,86 @@
+{- GHC File system encoding support for Aeson.
+ -
+ - Import instead of Data.Aeson
+ -
+ - Copyright 2018 Joey Hess <id@joeyh.name>
+ -
+ - License: BSD-2-clause
+ -}
+
+{-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-}
+
+module Utility.Aeson (
+ module X,
+ ToJSON'(..),
+ encode,
+ packString,
+) where
+
+import Data.Aeson as X hiding (ToJSON, toJSON, encode)
+import Data.Aeson hiding (encode)
+import qualified Data.Aeson
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import qualified Data.ByteString.Lazy as L
+import qualified Data.ByteString as S
+import qualified Data.Set
+import qualified Data.Vector
+import Prelude
+
+import Utility.FileSystemEncoding
+
+-- | Use this instead of Data.Aeson.encode to make sure that the
+-- below String instance is used.
+encode :: ToJSON' a => a -> L.ByteString
+encode = Data.Aeson.encode . toJSON'
+
+-- | Aeson has an unfortunate ToJSON instance for Char and [Char]
+-- which does not support Strings containing UTF8 characters
+-- encoded using the filesystem encoding when run in a non-utf8 locale.
+--
+-- Since we can't replace that with a instance that does the right
+-- thing, instead here's a new class that handles String right.
+class ToJSON' a where
+ toJSON' :: a -> Value
+
+instance ToJSON' String where
+ toJSON' = toJSON . packString
+
+-- | Pack a String to Text, correctly handling the filesystem encoding.
+--
+-- Use this instead of Data.Text.pack.
+--
+-- Note that if the string contains invalid UTF8 characters not using
+-- the FileSystemEncoding, this is the same as Data.Text.pack.
+packString :: String -> T.Text
+packString s = case T.decodeUtf8' (S.concat $ L.toChunks $ encodeBS s) of
+ Right t -> t
+ Left _ -> T.pack s
+
+-- | An instance for lists cannot be included as it would overlap with
+-- the String instance. Instead, you can use a Vector.
+instance ToJSON' s => ToJSON' (Data.Vector.Vector s) where
+ toJSON' = toJSON . map toJSON' . Data.Vector.toList
+
+-- Aeson generates the same JSON for a Set as for a list.
+instance ToJSON' s => ToJSON' (Data.Set.Set s) where
+ toJSON' = toJSON . map toJSON' . Data.Set.toList
+
+instance (ToJSON' a, ToJSON a) => ToJSON' (Maybe a) where
+ toJSON' (Just a) = toJSON (Just (toJSON' a))
+ toJSON' v@Nothing = toJSON v
+
+instance (ToJSON' a, ToJSON a, ToJSON' b, ToJSON b) => ToJSON' (a, b) where
+ toJSON' (a, b) = toJSON ((toJSON' a, toJSON' b))
+
+instance ToJSON' Bool where
+ toJSON' = toJSON
+
+instance ToJSON' Integer where
+ toJSON' = toJSON
+
+instance ToJSON' Object where
+ toJSON' = toJSON
+
+instance ToJSON' Value where
+ toJSON' = toJSON
diff --git a/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn b/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn
index 66d0de2244..ac7275a77a 100644
--- a/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn
+++ b/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn
@@ -2,6 +2,25 @@ json is defined as always utf-8. However, when LANG=C,
git-annex --json currently outputs "file":"���������"
instead of "file":"äöü東" for that utf-8 filename. --[[Joey]]
-(Note that git-annex can operate on non-utf8 filenames; it's not defined
-what the json contains then, which might or might not be considered a bug
-but this is not about that.)
+This can also affect keys when they contain some non-utf8 from eg the
+extension. And metadata keys and values can contain non-utf8 and also get
+converted to json with similar results.
+
+Note that git-annex can operate on non-utf8 filenames and keys;
+it's not defined what the json contains then, and it currently contains
+similar garbage.
+
+This happens because aeson's instance of ToJSON for Char uses
+Text.singleton, and Text does not handle ghc's filesystem encoding
+for String. Instead it defaults to `\65533` for each byte encoded with the
+filesystem encoding.
+
+So, git-annex will need to convert filenames and keys and anything else
+that might use the filesystem encoding to Text itself in some
+way that does respect the filesystem encoding. Ie, use encodeBS to convert
+it to a ByteString and then Data.Text.Encoding.decodeUtf8.
+
+> [[done]] that. --[[Joey]]
+
+What about git-annex commands that take json as input,
+when run in a non-utf8 locale? Tested that, it is handled ok. --[[Joey]]
diff --git a/git-annex.cabal b/git-annex.cabal
index 4aa5bfcb8e..d2b2a06ffc 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -999,6 +999,7 @@ Executable git-annex
Upgrade.V3
Upgrade.V4
Upgrade.V5
+ Utility.Aeson
Utility.Applicative
Utility.AuthToken
Utility.Base64