summaryrefslogtreecommitdiff
path: root/Annex/CatFile.hs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2015-12-07 15:22:01 -0400
committerJoey Hess <joeyh@joeyh.name>2015-12-07 15:35:36 -0400
commit664cc987e806800ef46794f356003321170c1060 (patch)
treea352081605262c79f6bbd80ccd752fcd20252e2c /Annex/CatFile.hs
parent2cbcb4f1a8e131ec532ce379d8fcdd63f7faa23e (diff)
support pointer files
Backend.lookupFile is changed to always fall back to catKey when operating on a file that's not a symlink. catKey is changed to understand pointer files, as well as annex symlinks. Before, catKey needed a file mode witness, to be sure it was looking at a symlink. That was complicated stuff. Now, it doesn't actually care if a file in git is a symlink or not; in either case asking git for the content of the file will get the pointer to the key. This does mean that git-annex will treat a link foo -> WORM--bar as a git-annex file, and also treats a regular file containing annex/objects/WORM--bar as a git-annex file. Calling catKey could make git-annex commands need to do more work than before. This would especially be the case if a repo contained many regular files, and only a few annexed files, as now git-annex will need to ask git about the contents of the regular files.
Diffstat (limited to 'Annex/CatFile.hs')
-rw-r--r--Annex/CatFile.hs79
1 files changed, 32 insertions, 47 deletions
diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs
index 1791498446..47ea86a31f 100644
--- a/Annex/CatFile.hs
+++ b/Annex/CatFile.hs
@@ -1,6 +1,6 @@
{- git cat-file interface, with handle automatically stored in the Annex monad
-
- - Copyright 2011-2013 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -14,9 +14,10 @@ module Annex.CatFile (
catFileHandle,
catFileStop,
catKey,
+ parsePointer,
catKeyFile,
catKeyFileHEAD,
- catLink,
+ catSymLinkTarget,
) where
import qualified Data.ByteString.Lazy as L
@@ -29,8 +30,8 @@ import qualified Git.CatFile
import qualified Annex
import Git.Types
import Git.FilePath
-import Git.FileMode
import qualified Git.Ref
+import Types.Key
catFile :: Git.Branch -> FilePath -> Annex L.ByteString
catFile branch file = do
@@ -80,52 +81,36 @@ catFileStop = do
(s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s)
liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m)
-{- From the Sha or Ref of a symlink back to the key.
- -
- - Requires a mode witness, to guarantee that the file is a symlink.
- -}
-catKey :: Ref -> FileMode -> Annex (Maybe Key)
-catKey = catKey' True
+{- From ref to a symlink or a pointer file, get the key. -}
+catKey :: Ref -> Annex (Maybe Key)
+catKey ref = do
+ o <- catObject ref
+ if L.length o > maxsz
+ then return Nothing -- too big
+ else do
+ let l = decodeBS o
+ let l' = fromInternalGitPath l
+ return $ if isLinkToAnnex l'
+ then fileKey $ takeFileName l'
+ else parsePointer l
+ where
+ -- Want to avoid buffering really big files in git into memory.
+ -- 8192 bytes is plenty for a pointer to a key.
+ -- Pad some more to allow for any pointer files that might have
+ -- lines after the key explaining what the file is used for.
+ maxsz = 81920
-catKey' :: Bool -> Sha -> FileMode -> Annex (Maybe Key)
-catKey' modeguaranteed sha mode
- | isSymLink mode = do
- l <- catLink modeguaranteed sha
- return $ if isLinkToAnnex l
- then fileKey $ takeFileName l
- else Nothing
- | otherwise = return Nothing
+{- Only look at the first line of a pointer file. -}
+parsePointer :: String -> Maybe Key
+parsePointer s = headMaybe (lines s) >>= file2key
{- Gets a symlink target. -}
-catLink :: Bool -> Sha -> Annex String
-catLink modeguaranteed sha = fromInternalGitPath . decodeBS <$> get
- where
- -- If the mode is not guaranteed to be correct, avoid
- -- buffering the whole file content, which might be large.
- -- 8192 is enough if it really is a symlink.
- get
- | modeguaranteed = catObject sha
- | otherwise = L.take 8192 <$> catObject sha
-
-{- Looks up the key corresponding to the Ref using the running cat-file.
- -
- - Currently this always has to look in HEAD, because cat-file --batch
- - does not offer a way to specify that we want to look up a tree object
- - in the index. So if the index has a file staged not as a symlink,
- - and it is a symlink in head, the wrong mode is gotten.
- - Also, we have to assume the file is a symlink if it's not yet committed
- - to HEAD. For these reasons, modeguaranteed is not set.
- -}
-catKeyChecked :: Bool -> Ref -> Annex (Maybe Key)
-catKeyChecked needhead ref@(Ref r) =
- catKey' False ref =<< findmode <$> catTree treeref
+catSymLinkTarget :: Sha -> Annex String
+catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get
where
- pathparts = split "/" r
- dir = intercalate "/" $ take (length pathparts - 1) pathparts
- file = fromMaybe "" $ lastMaybe pathparts
- treeref = Ref $ if needhead then "HEAD" ++ dir ++ "/" else dir ++ "/"
- findmode = fromMaybe symLinkMode . headMaybe .
- map snd . filter (\p -> fst p == file)
+ -- Avoid buffering the whole file content, which might be large.
+ -- 8192 is enough if it really is a symlink or pointer file.
+ get = L.take 8192 <$> catObject sha
{- From a file in the repository back to the key.
-
@@ -151,8 +136,8 @@ catKeyChecked needhead ref@(Ref r) =
catKeyFile :: FilePath -> Annex (Maybe Key)
catKeyFile f = ifM (Annex.getState Annex.daemon)
( catKeyFileHEAD f
- , catKeyChecked True $ Git.Ref.fileRef f
+ , catKey $ Git.Ref.fileRef f
)
catKeyFileHEAD :: FilePath -> Annex (Maybe Key)
-catKeyFileHEAD f = catKeyChecked False $ Git.Ref.fileFromRef Git.Ref.headRef f
+catKeyFileHEAD f = catKey $ Git.Ref.fileFromRef Git.Ref.headRef f