summaryrefslogtreecommitdiff
path: root/Backend/Hash.hs
blob: 9ac00f8cd3142cf9b84fd14284a3605ca747a0ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
{- git-annex hashing backends
 -
 - Copyright 2011-2020 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings #-}

module Backend.Hash (
	backends,
	testKeyBackend,
	keyHash,
) where

import Annex.Common
import qualified Annex
import Types.Key
import Types.Backend
import Types.KeySource
import Utility.Hash
import Utility.Metered

import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
import Data.Char
import Data.Word
import Control.DeepSeq
import Control.Exception (evaluate)

data Hash
	= MD5Hash
	| SHA1Hash
	| SHA2Hash HashSize
	| SHA3Hash HashSize
	| SkeinHash HashSize
	| Blake2bHash HashSize
	| Blake2bpHash HashSize
	| Blake2sHash HashSize
	| Blake2spHash HashSize

{- Order is slightly significant; want SHA256 first, and more general
 - sizes earlier. -}
hashes :: [Hash]
hashes = concat 
	[ map (SHA2Hash . HashSize) [256, 512, 224, 384]
	, map (SHA3Hash . HashSize) [256, 512, 224, 384]
	, map (SkeinHash . HashSize) [256, 512]
	, map (Blake2bHash . HashSize) [256, 512, 160, 224, 384]
	, map (Blake2bpHash . HashSize) [512]
	, map (Blake2sHash . HashSize) [256, 160, 224]
	, map (Blake2spHash . HashSize) [256, 224]
	, [SHA1Hash]
	, [MD5Hash]
	]

{- The SHA256E backend is the default, so genBackendE comes first. -}
backends :: [Backend]
backends = concatMap (\h -> [genBackendE h, genBackend h]) hashes

genBackend :: Hash -> Backend
genBackend hash = Backend
	{ backendVariety = hashKeyVariety hash (HasExt False)
	, getKey = keyValue hash
	, verifyKeyContent = Just $ checkKeyChecksum hash
	, canUpgradeKey = Just needsUpgrade
	, fastMigrate = Just trivialMigrate
	, isStableKey = const True
	}

genBackendE :: Hash -> Backend
genBackendE hash = (genBackend hash)
	{ backendVariety = hashKeyVariety hash (HasExt True)
	, getKey = keyValueE hash
	}

hashKeyVariety :: Hash -> HasExt -> KeyVariety
hashKeyVariety MD5Hash he = MD5Key he
hashKeyVariety SHA1Hash he = SHA1Key he
hashKeyVariety (SHA2Hash size) he = SHA2Key size he
hashKeyVariety (SHA3Hash size) he = SHA3Key size he
hashKeyVariety (SkeinHash size) he = SKEINKey size he
hashKeyVariety (Blake2bHash size) he = Blake2bKey size he
hashKeyVariety (Blake2bpHash size) he = Blake2bpKey size he
hashKeyVariety (Blake2sHash size) he = Blake2sKey size he
hashKeyVariety (Blake2spHash size) he = Blake2spKey size he

{- A key is a hash of its contents. -}
keyValue :: Hash -> KeySource -> MeterUpdate -> Annex (Maybe Key)
keyValue hash source meterupdate = do
	let file = fromRawFilePath (contentLocation source)
	filesize <- liftIO $ getFileSize file
	s <- hashFile hash file meterupdate
	return $ Just $ mkKey $ \k -> k
		{ keyName = encodeBS s
		, keyVariety = hashKeyVariety hash (HasExt False)
		, keySize = Just filesize
		}

{- Extension preserving keys. -}
keyValueE :: Hash -> KeySource -> MeterUpdate -> Annex (Maybe Key)
keyValueE hash source meterupdate =
	keyValue hash source meterupdate >>= maybe (return Nothing) addE
  where
	addE k = do
		maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig
		let ext = selectExtension maxlen (keyFilename source)
		return $ Just $ alterKey k $ \d -> d
			{ keyName = keyName d <> ext
			, keyVariety = hashKeyVariety hash (HasExt True)
			}

selectExtension :: Maybe Int -> RawFilePath -> S.ByteString
selectExtension maxlen f
	| null es = ""
	| otherwise = S.intercalate "." ("":es)
  where
	es = filter (not . S.null) $ reverse $
		take 2 $ filter (S.all validInExtension) $
		takeWhile shortenough $
		reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f)
	shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen

maxExtensionLen :: Int
maxExtensionLen = 4 -- long enough for "jpeg"

{- A key's checksum is checked during fsck when it's content is present
 - except for in fast mode. -}
checkKeyChecksum :: Hash -> Key -> FilePath -> Annex Bool
checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do
	fast <- Annex.getState Annex.fast
	exists <- liftIO $ doesFileExist file
	case (exists, fast) of
		(True, False) -> do
			showAction "checksum"
			check <$> hashFile hash file nullMeterUpdate
		_ -> return True
  where
	expected = decodeBS (keyHash key)
	check s
		| s == expected = True
		{- A bug caused checksums to be prefixed with \ in some
		 - cases; still accept these as legal now that the bug has been
		 - fixed. -}
		| '\\' : s == expected = True
		| otherwise = False

	hwfault e = do
		warning $ "hardware fault: " ++ show e
		return False

keyHash :: Key -> S.ByteString
keyHash = fst . splitKeyNameExtension

validInExtension :: Word8 -> Bool
validInExtension c
	| isAlphaNum (chr (fromIntegral c)) = True
	| fromIntegral c == ord '.' = True
	| c <= 127 = False -- other ascii: spaces, punctuation, control chars
	| otherwise = True -- utf8 is allowed, also other encodings

{- Upgrade keys that have the \ prefix on their hash due to a bug, or
 - that contain non-alphanumeric characters in their extension.
 -
 - Also, for a while migrate from eg SHA256E to SHA256 resulted in a SHA256
 - key that contained an extension inside its keyName. Upgrade those
 - keys, removing the extension.
 -}
needsUpgrade :: Key -> Bool
needsUpgrade key = or
	[ "\\" `S8.isPrefixOf` keyHash key
	, S.any (not . validInExtension) (snd $ splitKeyNameExtension key)
	, not (hasExt (fromKey keyVariety key)) && keyHash key /= fromKey keyName key
	]

trivialMigrate :: Key -> Backend -> AssociatedFile -> Annex (Maybe Key)
trivialMigrate oldkey newbackend afile = trivialMigrate' oldkey newbackend afile
	<$> (annexMaxExtensionLength <$> Annex.getGitConfig)

trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key
trivialMigrate' oldkey newbackend afile maxextlen
	{- Fast migration from hashE to hash backend. -}
	| migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d
		{ keyName = keyHash oldkey
		, keyVariety = newvariety
		}
	{- Fast migration from hash to hashE backend. -}
	| migratable && hasExt newvariety = case afile of
		AssociatedFile Nothing -> Nothing
		AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
			{ keyName = keyHash oldkey 
				<> selectExtension maxextlen file
			, keyVariety = newvariety
			}
	{- Upgrade to fix bad previous migration that created a
	 - non-extension preserving key, with an extension
	 - in its keyName. -}
	| newvariety == oldvariety && not (hasExt oldvariety) &&
		keyHash oldkey /= fromKey keyName oldkey = 
			Just $ alterKey oldkey $ \d -> d
				{ keyName = keyHash oldkey
				}
	| otherwise = Nothing
  where
	migratable = oldvariety /= newvariety 
		&& sameExceptExt oldvariety newvariety
	oldvariety = fromKey keyVariety oldkey
	newvariety = backendVariety newbackend

hashFile :: Hash -> FilePath -> MeterUpdate -> Annex String
hashFile hash file meterupdate = 
	liftIO $ withMeteredFile file meterupdate $ \b -> do
		let h = hasher b
		-- Force full evaluation of hash so whole file is read
		-- before returning.
		evaluate (rnf h)
		return h
  where
	hasher = case hash of
		MD5Hash -> md5Hasher
		SHA1Hash -> sha1Hasher
		SHA2Hash hashsize -> sha2Hasher hashsize
		SHA3Hash hashsize -> sha3Hasher hashsize
		SkeinHash hashsize -> skeinHasher hashsize
		Blake2bHash hashsize -> blake2bHasher hashsize
		Blake2bpHash hashsize -> blake2bpHasher hashsize
		Blake2sHash hashsize -> blake2sHasher hashsize
		Blake2spHash hashsize -> blake2spHasher hashsize

sha2Hasher :: HashSize -> (L.ByteString -> String)
sha2Hasher (HashSize hashsize)
	| hashsize == 256 = use sha2_256
	| hashsize == 224 = use sha2_224
	| hashsize == 384 = use sha2_384
	| hashsize == 512 = use sha2_512
	| otherwise = error $ "unsupported SHA size " ++ show hashsize
  where
	use hasher = show . hasher

sha3Hasher :: HashSize -> (L.ByteString -> String)
sha3Hasher (HashSize hashsize)
	| hashsize == 256 = show . sha3_256
	| hashsize == 224 = show . sha3_224
	| hashsize == 384 = show . sha3_384
	| hashsize == 512 = show . sha3_512
	| otherwise = error $ "unsupported SHA3 size " ++ show hashsize

skeinHasher :: HashSize -> (L.ByteString -> String)
skeinHasher (HashSize hashsize)
	| hashsize == 256 = show . skein256
	| hashsize == 512 = show . skein512
	| otherwise = error $ "unsupported SKEIN size " ++ show hashsize

blake2bHasher :: HashSize -> (L.ByteString -> String)
blake2bHasher (HashSize hashsize)
	| hashsize == 256 = show . blake2b_256
	| hashsize == 512 = show . blake2b_512
	| hashsize == 160 = show . blake2b_160
	| hashsize == 224 = show . blake2b_224
	| hashsize == 384 = show . blake2b_384
	| otherwise = error $ "unsupported BLAKE2B size " ++ show hashsize

blake2bpHasher :: HashSize -> (L.ByteString -> String)
blake2bpHasher (HashSize hashsize)
	| hashsize == 512 = show . blake2bp_512
	| otherwise = error $ "unsupported BLAKE2BP size " ++ show hashsize

blake2sHasher :: HashSize -> (L.ByteString -> String)
blake2sHasher (HashSize hashsize)
	| hashsize == 256 = show . blake2s_256
	| hashsize == 160 = show . blake2s_160
	| hashsize == 224 = show . blake2s_224
	| otherwise = error $ "unsupported BLAKE2S size " ++ show hashsize

blake2spHasher :: HashSize -> (L.ByteString -> String)
blake2spHasher (HashSize hashsize)
	| hashsize == 256 = show . blake2sp_256
	| hashsize == 224 = show . blake2sp_224
	| otherwise = error $ "unsupported BLAKE2SP size " ++ show hashsize

sha1Hasher :: L.ByteString -> String
sha1Hasher = show . sha1

md5Hasher :: L.ByteString -> String
md5Hasher = show . md5

{- A varient of the SHA256E backend, for testing that needs special keys
 - that cannot collide with legitimate keys in the repository.
 -
 - This is accomplished by appending a special extension to the key,
 - that is not one that selectExtension would select (due to being too
 - long).
 -}
testKeyBackend :: Backend
testKeyBackend = 
	let b = genBackendE (SHA2Hash (HashSize 256))
	in b { getKey = \ks p -> (fmap addE) <$> getKey b ks p } 
  where
	addE k = alterKey k $ \d -> d
		{ keyName = keyName d <> longext
		}
	longext = ".this-is-a-test-key"