From 664cc987e806800ef46794f356003321170c1060 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 7 Dec 2015 15:22:01 -0400 Subject: support pointer files Backend.lookupFile is changed to always fall back to catKey when operating on a file that's not a symlink. catKey is changed to understand pointer files, as well as annex symlinks. Before, catKey needed a file mode witness, to be sure it was looking at a symlink. That was complicated stuff. Now, it doesn't actually care if a file in git is a symlink or not; in either case asking git for the content of the file will get the pointer to the key. This does mean that git-annex will treat a link foo -> WORM--bar as a git-annex file, and also treats a regular file containing annex/objects/WORM--bar as a git-annex file. Calling catKey could make git-annex commands need to do more work than before. This would especially be the case if a repo contained many regular files, and only a few annexed files, as now git-annex will need to ask git about the contents of the regular files. --- Annex/CatFile.hs | 79 +++++++++++++++++++++++--------------------------------- 1 file changed, 32 insertions(+), 47 deletions(-) (limited to 'Annex/CatFile.hs') diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 1791498446..47ea86a31f 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -1,6 +1,6 @@ {- git cat-file interface, with handle automatically stored in the Annex monad - - - Copyright 2011-2013 Joey Hess + - Copyright 2011-2015 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -14,9 +14,10 @@ module Annex.CatFile ( catFileHandle, catFileStop, catKey, + parsePointer, catKeyFile, catKeyFileHEAD, - catLink, + catSymLinkTarget, ) where import qualified Data.ByteString.Lazy as L @@ -29,8 +30,8 @@ import qualified Git.CatFile import qualified Annex import Git.Types import Git.FilePath -import Git.FileMode import qualified Git.Ref +import Types.Key catFile :: Git.Branch -> FilePath -> Annex L.ByteString catFile branch file = do @@ -80,52 +81,36 @@ catFileStop = do (s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s) liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m) -{- From the Sha or Ref of a symlink back to the key. - - - - Requires a mode witness, to guarantee that the file is a symlink. - -} -catKey :: Ref -> FileMode -> Annex (Maybe Key) -catKey = catKey' True +{- From ref to a symlink or a pointer file, get the key. -} +catKey :: Ref -> Annex (Maybe Key) +catKey ref = do + o <- catObject ref + if L.length o > maxsz + then return Nothing -- too big + else do + let l = decodeBS o + let l' = fromInternalGitPath l + return $ if isLinkToAnnex l' + then fileKey $ takeFileName l' + else parsePointer l + where + -- Want to avoid buffering really big files in git into memory. + -- 8192 bytes is plenty for a pointer to a key. + -- Pad some more to allow for any pointer files that might have + -- lines after the key explaining what the file is used for. + maxsz = 81920 -catKey' :: Bool -> Sha -> FileMode -> Annex (Maybe Key) -catKey' modeguaranteed sha mode - | isSymLink mode = do - l <- catLink modeguaranteed sha - return $ if isLinkToAnnex l - then fileKey $ takeFileName l - else Nothing - | otherwise = return Nothing +{- Only look at the first line of a pointer file. -} +parsePointer :: String -> Maybe Key +parsePointer s = headMaybe (lines s) >>= file2key {- Gets a symlink target. -} -catLink :: Bool -> Sha -> Annex String -catLink modeguaranteed sha = fromInternalGitPath . decodeBS <$> get - where - -- If the mode is not guaranteed to be correct, avoid - -- buffering the whole file content, which might be large. - -- 8192 is enough if it really is a symlink. - get - | modeguaranteed = catObject sha - | otherwise = L.take 8192 <$> catObject sha - -{- Looks up the key corresponding to the Ref using the running cat-file. - - - - Currently this always has to look in HEAD, because cat-file --batch - - does not offer a way to specify that we want to look up a tree object - - in the index. So if the index has a file staged not as a symlink, - - and it is a symlink in head, the wrong mode is gotten. - - Also, we have to assume the file is a symlink if it's not yet committed - - to HEAD. For these reasons, modeguaranteed is not set. - -} -catKeyChecked :: Bool -> Ref -> Annex (Maybe Key) -catKeyChecked needhead ref@(Ref r) = - catKey' False ref =<< findmode <$> catTree treeref +catSymLinkTarget :: Sha -> Annex String +catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get where - pathparts = split "/" r - dir = intercalate "/" $ take (length pathparts - 1) pathparts - file = fromMaybe "" $ lastMaybe pathparts - treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/" - findmode = fromMaybe symLinkMode . headMaybe . - map snd . filter (\p -> fst p == file) + -- Avoid buffering the whole file content, which might be large. + -- 8192 is enough if it really is a symlink or pointer file. + get = L.take 8192 <$> catObject sha {- From a file in the repository back to the key. - @@ -151,8 +136,8 @@ catKeyChecked needhead ref@(Ref r) = catKeyFile :: FilePath -> Annex (Maybe Key) catKeyFile f = ifM (Annex.getState Annex.daemon) ( catKeyFileHEAD f - , catKeyChecked True $ Git.Ref.fileRef f + , catKey $ Git.Ref.fileRef f ) catKeyFileHEAD :: FilePath -> Annex (Maybe Key) -catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f +catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f -- cgit v1.2.3