diff options
-rw-r--r-- | COPYRIGHT | 2 | ||||
-rw-r--r-- | Logs.hs | 16 | ||||
-rw-r--r-- | Logs/ContentIdentifier/Pure.hs | 57 | ||||
-rw-r--r-- | doc/internals.mdwn | 12 | ||||
-rw-r--r-- | git-annex.cabal | 1 |
5 files changed, 84 insertions, 4 deletions
@@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name> © 2014 Sören Brunk License: AGPL-3+ -Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs +Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs Copyright: © 2011-2019 Joey Hess <id@joeyh.name> License: AGPL-3+ @@ -1,6 +1,6 @@ {- git-annex log file names - - - Copyright 2013-2018 Joey Hess <id@joeyh.name> + - Copyright 2013-2019 Joey Hess <id@joeyh.name> - - Licensed under the GNU GPL version 3 or higher. -} @@ -25,7 +25,7 @@ data LogVariety getLogVariety :: FilePath -> Maybe LogVariety getLogVariety f | f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog - | isRemoteStateLog f = Just NewUUIDBasedLog + | isRemoteStateLog f || isRemoteContentIdentifierLog f = Just NewUUIDBasedLog | isChunkLog f = ChunkLog <$> chunkLogFileKey f | isRemoteMetaDataLog f = Just RemoteMetaDataLog | isMetaDataLog f || f `elem` otherLogs = Just OtherLog @@ -54,7 +54,7 @@ presenceLogs f = , locationLogFileKey f ] -{- Logs that are neither UUID based nor presence logs. -} +{- Top-level logs that are neither UUID based nor presence logs. -} otherLogs :: [FilePath] otherLogs = [ numcopiesLog @@ -197,3 +197,13 @@ remoteMetaDataLogExt = ".log.rmet" isRemoteMetaDataLog :: FilePath -> Bool isRemoteMetaDataLog path = remoteMetaDataLogExt `isSuffixOf` path + +{- The filename of the remote content identifier log for a given key. -} +remoteContentIdentifierLogFile :: GitConfig -> Key -> FilePath +remoteContentIdentifierLogFile config key = branchHashDir config key </> keyFile key ++ remoteContentIdentifierExt + +remoteContentIdentifierExt :: String +remoteContentIdentifierExt = ".log.cid" + +isRemoteContentIdentifierLog :: FilePath -> Bool +isRemoteContentIdentifierLog path = remoteContentIdentifierExt `isSuffixOf` path diff --git a/Logs/ContentIdentifier/Pure.hs b/Logs/ContentIdentifier/Pure.hs new file mode 100644 index 0000000000..536b60fa47 --- /dev/null +++ b/Logs/ContentIdentifier/Pure.hs @@ -0,0 +1,57 @@ +{- Remote content identifier logs, pure operations. + - + - Copyright 2019 Joey Hess <id@joeyh.name> + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Logs.ContentIdentifier.Pure + ( ContentIdentifierLog + , parseLog + , buildLog + ) where + +import Annex.Common +import Logs.MapLog +import Data.Int +import Types.Remote (ContentIdentifier(..)) +import Utility.Base64 + +import qualified Data.ByteString as S +import qualified Data.ByteString.Char8 as S8 +import qualified Data.ByteString.Lazy as L +import qualified Data.Attoparsec.ByteString.Lazy as A +import qualified Data.Attoparsec.ByteString.Char8 as A8 +import Data.ByteString.Builder + +type ContentIdentifierLog = MapLog UUID [ContentIdentifier] + +buildLog :: ContentIdentifierLog -> Builder +buildLog = buildMapLog buildUUID valuebuilder + where + valuebuilder [] = mempty + valuebuilder [c] = buildcid c + valuebuilder (c:cs) = buildcid c <> charUtf8 ' ' <> valuebuilder cs + buildcid (ContentIdentifier c) + | S8.any (`elem` [' ', '\r', '\n']) c || "!" `S8.isPrefixOf` c = + charUtf8 '!' <> byteString (toB64' c) + | otherwise = byteString c + +parseLog :: L.ByteString -> ContentIdentifierLog +parseLog = parseMapLog + (toUUID <$> A.takeByteString) + (reverse . catMaybes <$> valueparser []) + where + valueparser l = do + b <- A8.takeWhile1 (/= ' ') + let cid = if "!" `S8.isPrefixOf` b + then ContentIdentifier <$> fromB64Maybe' (S.drop 1 b) + else Just $ ContentIdentifier b + ifM A8.atEnd + ( return (cid:l) + , do + _ <- A8.char ' ' + valueparser (cid:l) + ) diff --git a/doc/internals.mdwn b/doc/internals.mdwn index bf7c3c48cb..9027ba289c 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -281,6 +281,18 @@ For example: 1287290776.765152s 26339d22-446b-11e0-9101-002170d25c55:x +1 1291237510.141453s 26339d22-446b-11e0-9101-002170d25c55:x -1 26339d22-446b-11e0-9101-002170d25c55:x +2 +## `aaa/bbb/*.log.cid` + +These log files store per-remote content identifiers for keys. +A given key may have any number of content identifiers. + +The format is a timestamp, followed by the uuid or the remote, +followed by the content identifiers. If a content identifier +contains any whitespace (including \r or \n), it will be base64 +encoded. Base64 encoded values are indicated by prefixing them with "!". + + 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 5248916 5250378 + ## `aaa/bbb/*.log.cnk` These log files are used when objects are stored in chunked form on diff --git a/git-annex.cabal b/git-annex.cabal index 67c3ae0502..c6000938b3 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -869,6 +869,7 @@ Executable git-annex Logs.Chunk Logs.Chunk.Pure Logs.Config + Logs.ContentIdentifier.Pure Logs.Difference Logs.Difference.Pure Logs.Export |