summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2019-01-14 16:33:20 -0400
committerJoey Hess <joeyh@joeyh.name>2019-01-14 16:37:28 -0400
commit4536c93bb2ecf114ab711beac33fa358facd6985 (patch)
treef14d3b7d9224d4979be87c5e8ba52983da359a02
parent918868915c2859d2f8d68afaa222d92143ec64f2 (diff)
cache the serialization of a Key
This will speed up the common case where a Key is deserialized from disk, but is then serialized to build eg, the path to the annex object. It means that every place a Key has any of its fields changed, the cache has to be dropped. I've grepped and found them all. But, it would be better to avoid that gotcha somehow..
-rw-r--r--Annex/Export.hs1
-rw-r--r--Backend.hs5
-rw-r--r--Backend/Hash.hs8
-rw-r--r--CHANGELOG2
-rw-r--r--Command/AddUrl.hs5
-rw-r--r--Key.hs13
-rw-r--r--Remote/Helper/Chunked.hs5
-rw-r--r--Types/Key.hs1
-rw-r--r--Upgrade/V1.hs1
9 files changed, 34 insertions, 7 deletions
diff --git a/Annex/Export.hs b/Annex/Export.hs
index 47a6a75249..2cc110cb83 100644
--- a/Annex/Export.hs
+++ b/Annex/Export.hs
@@ -42,6 +42,7 @@ exportKey sha = mk <$> catKey sha
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
+ , keySerialization = Nothing
}
exportTree :: Remote.RemoteConfig -> Bool
diff --git a/Backend.hs b/Backend.hs
index 2932253aec..5b7b82e40c 100644
--- a/Backend.hs
+++ b/Backend.hs
@@ -58,7 +58,10 @@ genKey source preferredbackend = do
Just k -> Just (makesane k, b)
where
-- keyNames should not contain newline characters.
- makesane k = k { keyName = S8.map fixbadchar (keyName k) }
+ makesane k = k
+ { keyName = S8.map fixbadchar (keyName k)
+ , keySerialization = Nothing
+ }
fixbadchar c
| c == '\n' = '_'
| otherwise = c
diff --git a/Backend/Hash.hs b/Backend/Hash.hs
index b8977301b3..6c42af19c3 100644
--- a/Backend/Hash.hs
+++ b/Backend/Hash.hs
@@ -181,6 +181,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
| migratable && hasExt oldvariety = Just $ oldkey
{ keyName = keyHash oldkey
, keyVariety = newvariety
+ , keySerialization = Nothing
}
{- Fast migration from hash to hashE backend. -}
| migratable && hasExt newvariety = case afile of
@@ -189,6 +190,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
{ keyName = keyHash oldkey
<> encodeBS (selectExtension maxextlen file)
, keyVariety = newvariety
+ , keySerialization = Nothing
}
{- Upgrade to fix bad previous migration that created a
- non-extension preserving key, with an extension
@@ -196,6 +198,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
| newvariety == oldvariety && not (hasExt oldvariety) &&
keyHash oldkey /= keyName oldkey = Just $ oldkey
{ keyName = keyHash oldkey
+ , keySerialization = Nothing
}
| otherwise = Nothing
where
@@ -288,5 +291,8 @@ testKeyBackend =
let b = genBackendE (SHA2Hash (HashSize 256))
in b { getKey = (fmap addE) <$$> getKey b }
where
- addE k = k { keyName = keyName k <> longext }
+ addE k = k
+ { keyName = keyName k <> longext
+ , keySerialization = Nothing
+ }
longext = ".this-is-a-test-key"
diff --git a/CHANGELOG b/CHANGELOG
index e44ee49d0d..b185bd513f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -14,7 +14,7 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
* importfeed: Better error message when downloading the feed fails.
* Some optimisations, including a 10x faster timestamp parser,
a 7x faster key parser, and improved parsing and serialization of
- git-annex branch data.
+ git-annex branch data. Some commands will run up to 15% faster.
* Stricter parser for keys doesn't allow doubled fields or out of order fields.
* The benchmark command, which only had some old benchmarking of the sqlite
databases before, now allows benchmarking any other git-annex commands.
diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs
index be008f63aa..cff1166fdb 100644
--- a/Command/AddUrl.hs
+++ b/Command/AddUrl.hs
@@ -377,7 +377,10 @@ finishDownloadWith tmp u url file = do
{- Adds the url size to the Key. -}
addSizeUrlKey :: Url.UrlInfo -> Key -> Key
-addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo }
+addSizeUrlKey urlinfo key = key
+ { keySize = Url.urlSize urlinfo
+ , keySerialization = Nothing
+ }
{- Adds worktree file to the repository. -}
addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()
diff --git a/Key.hs b/Key.hs
index 16956ee5ea..eaa179d9f4 100644
--- a/Key.hs
+++ b/Key.hs
@@ -51,6 +51,7 @@ stubKey = Key
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
+ , keySerialization = Nothing
}
-- Gets the parent of a chunk key.
@@ -58,6 +59,7 @@ nonChunkKey :: Key -> Key
nonChunkKey k = k
{ keyChunkSize = Nothing
, keyChunkNum = Nothing
+ , keySerialization = Nothing
}
-- Where a chunk key is offset within its parent.
@@ -97,7 +99,9 @@ serializeKey :: Key -> String
serializeKey = decodeBL' . serializeKey'
serializeKey' :: Key -> L.ByteString
-serializeKey' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKey
+serializeKey' k = case keySerialization k of
+ Nothing -> toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty (buildKey k)
+ Just b -> L.fromStrict b
{- This is a strict parser for security reasons; a key
- can contain only 4 fields, which all consist only of numbers.
@@ -127,6 +131,7 @@ keyParser = do
, keyMtime = m
, keyChunkSize = cs
, keyChunkNum = cn
+ , keySerialization = Nothing
}
else fail "invalid keyName"
where
@@ -140,7 +145,10 @@ deserializeKey :: String -> Maybe Key
deserializeKey = deserializeKey' . encodeBS'
deserializeKey' :: S.ByteString -> Maybe Key
-deserializeKey' b = eitherToMaybe $ A.parseOnly keyParser b
+deserializeKey' b = either
+ (const Nothing)
+ (\k -> Just $ k { keySerialization = Just b })
+ (A.parseOnly keyParser b)
{- This splits any extension out of the keyName, returning the
- keyName minus extension, and the extension (including leading dot).
@@ -177,6 +185,7 @@ instance Arbitrary Key where
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
+ <*> pure Nothing
instance Hashable Key where
hashIO32 = hashIO32 . serializeKey'
diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs
index f3c69c38dd..595a4c4d63 100644
--- a/Remote/Helper/Chunked.hs
+++ b/Remote/Helper/Chunked.hs
@@ -68,7 +68,10 @@ chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
where
mk chunknum = sizedk { keyChunkNum = Just chunknum }
- sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
+ sizedk = basek
+ { keyChunkSize = Just (toInteger chunksize)
+ , keySerialization = Nothing
+ }
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
diff --git a/Types/Key.hs b/Types/Key.hs
index df0e042606..97d548ff78 100644
--- a/Types/Key.hs
+++ b/Types/Key.hs
@@ -23,6 +23,7 @@ data Key = Key
, keyMtime :: Maybe EpochTime
, keyChunkSize :: Maybe Integer
, keyChunkNum :: Maybe Integer
+ , keySerialization :: Maybe S.ByteString -- ^ cached serialization
} deriving (Eq, Ord, Read, Show)
{- A filename may be associated with a Key. -}
diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs
index c0dafbb842..e58e5818f7 100644
--- a/Upgrade/V1.hs
+++ b/Upgrade/V1.hs
@@ -150,6 +150,7 @@ readKey1 v
, keyVariety = parseKeyVariety (encodeBS b)
, keySize = s
, keyMtime = t
+ , keySerialization = Nothing
}
where
bits = splitc ':' v