summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2019-02-20 15:36:09 -0400
committerJoey Hess <joeyh@joeyh.name>2019-02-20 15:40:07 -0400
commite8bfc3640b30e395a694d0125b874a2cce109345 (patch)
treed79a780bf88d5d96552d66e405ee8ffb7b5de879
parent0442842622867ec741d2d2802ae2c3bb812c2c59 (diff)
storing ContentIdentifier in the git-annex branch
-rw-r--r--COPYRIGHT2
-rw-r--r--Logs.hs16
-rw-r--r--Logs/ContentIdentifier/Pure.hs57
-rw-r--r--doc/internals.mdwn12
-rw-r--r--git-annex.cabal1
5 files changed, 84 insertions, 4 deletions
diff --git a/COPYRIGHT b/COPYRIGHT
index ceffc4d717..40c7e02d79 100644
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name>
© 2014 Sören Brunk
License: AGPL-3+
-Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
+Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Copyright: © 2011-2019 Joey Hess <id@joeyh.name>
License: AGPL-3+
diff --git a/Logs.hs b/Logs.hs
index 63d64efadd..2bced05608 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -1,6 +1,6 @@
{- git-annex log file names
-
- - Copyright 2013-2018 Joey Hess <id@joeyh.name>
+ - Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -25,7 +25,7 @@ data LogVariety
getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
- | isRemoteStateLog f = Just NewUUIDBasedLog
+ | isRemoteStateLog f || isRemoteContentIdentifierLog f = Just NewUUIDBasedLog
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
| isRemoteMetaDataLog f = Just RemoteMetaDataLog
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
@@ -54,7 +54,7 @@ presenceLogs f =
, locationLogFileKey f
]
-{- Logs that are neither UUID based nor presence logs. -}
+{- Top-level logs that are neither UUID based nor presence logs. -}
otherLogs :: [FilePath]
otherLogs =
[ numcopiesLog
@@ -197,3 +197,13 @@ remoteMetaDataLogExt = ".log.rmet"
isRemoteMetaDataLog :: FilePath -> Bool
isRemoteMetaDataLog path = remoteMetaDataLogExt `isSuffixOf` path
+
+{- The filename of the remote content identifier log for a given key. -}
+remoteContentIdentifierLogFile :: GitConfig -> Key -> FilePath
+remoteContentIdentifierLogFile config key = branchHashDir config key </> keyFile key ++ remoteContentIdentifierExt
+
+remoteContentIdentifierExt :: String
+remoteContentIdentifierExt = ".log.cid"
+
+isRemoteContentIdentifierLog :: FilePath -> Bool
+isRemoteContentIdentifierLog path = remoteContentIdentifierExt `isSuffixOf` path
diff --git a/Logs/ContentIdentifier/Pure.hs b/Logs/ContentIdentifier/Pure.hs
new file mode 100644
index 0000000000..536b60fa47
--- /dev/null
+++ b/Logs/ContentIdentifier/Pure.hs
@@ -0,0 +1,57 @@
+{- Remote content identifier logs, pure operations.
+ -
+ - Copyright 2019 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+{-# LANGUAGE OverloadedStrings #-}
+
+module Logs.ContentIdentifier.Pure
+ ( ContentIdentifierLog
+ , parseLog
+ , buildLog
+ ) where
+
+import Annex.Common
+import Logs.MapLog
+import Data.Int
+import Types.Remote (ContentIdentifier(..))
+import Utility.Base64
+
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Char8 as S8
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
+import Data.ByteString.Builder
+
+type ContentIdentifierLog = MapLog UUID [ContentIdentifier]
+
+buildLog :: ContentIdentifierLog -> Builder
+buildLog = buildMapLog buildUUID valuebuilder
+ where
+ valuebuilder [] = mempty
+ valuebuilder [c] = buildcid c
+ valuebuilder (c:cs) = buildcid c <> charUtf8 ' ' <> valuebuilder cs
+ buildcid (ContentIdentifier c)
+ | S8.any (`elem` [' ', '\r', '\n']) c || "!" `S8.isPrefixOf` c =
+ charUtf8 '!' <> byteString (toB64' c)
+ | otherwise = byteString c
+
+parseLog :: L.ByteString -> ContentIdentifierLog
+parseLog = parseMapLog
+ (toUUID <$> A.takeByteString)
+ (reverse . catMaybes <$> valueparser [])
+ where
+ valueparser l = do
+ b <- A8.takeWhile1 (/= ' ')
+ let cid = if "!" `S8.isPrefixOf` b
+ then ContentIdentifier <$> fromB64Maybe' (S.drop 1 b)
+ else Just $ ContentIdentifier b
+ ifM A8.atEnd
+ ( return (cid:l)
+ , do
+ _ <- A8.char ' '
+ valueparser (cid:l)
+ )
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index bf7c3c48cb..9027ba289c 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -281,6 +281,18 @@ For example:
1287290776.765152s 26339d22-446b-11e0-9101-002170d25c55:x +1
1291237510.141453s 26339d22-446b-11e0-9101-002170d25c55:x -1 26339d22-446b-11e0-9101-002170d25c55:x +2
+## `aaa/bbb/*.log.cid`
+
+These log files store per-remote content identifiers for keys.
+A given key may have any number of content identifiers.
+
+The format is a timestamp, followed by the uuid or the remote,
+followed by the content identifiers. If a content identifier
+contains any whitespace (including \r or \n), it will be base64
+encoded. Base64 encoded values are indicated by prefixing them with "!".
+
+ 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 5248916 5250378
+
## `aaa/bbb/*.log.cnk`
These log files are used when objects are stored in chunked form on
diff --git a/git-annex.cabal b/git-annex.cabal
index 67c3ae0502..c6000938b3 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -869,6 +869,7 @@ Executable git-annex
Logs.Chunk
Logs.Chunk.Pure
Logs.Config
+ Logs.ContentIdentifier.Pure
Logs.Difference
Logs.Difference.Pure
Logs.Export