summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2019-12-11 14:12:22 -0400
committerJoey Hess <joeyh@joeyh.name>2019-12-11 15:25:07 -0400
commitc19211774f509f2b0334073b599295538871a00f (patch)
tree315e2b66d847f6a7141fefbe549e82f4586be691
parentbdec7fed9cae4fe5c3cbdf0ee2ee6ae8530bbc19 (diff)
use filepath-bytestring for annex object manipulations
git-annex find is now RawFilePath end to end, no string conversions. So is git-annex get when it does not need to get anything. So this is a major milestone on optimisation. Benchmarks indicate around 30% speedup in both commands. Probably many other performance improvements. All or nearly all places where a file is statted use RawFilePath now.
-rw-r--r--Annex/AutoMerge.hs5
-rw-r--r--Annex/Branch.hs2
-rw-r--r--Annex/Content.hs66
-rw-r--r--Annex/Content/PointerFile.hs7
-rw-r--r--Annex/DirHashes.hs32
-rw-r--r--Annex/Ingest.hs13
-rw-r--r--Annex/InodeSentinal.hs4
-rw-r--r--Annex/Journal.hs30
-rw-r--r--Annex/Link.hs8
-rw-r--r--Annex/Locations.hs64
-rw-r--r--Annex/WorkTree.hs7
-rw-r--r--Assistant/Threads/Committer.hs2
-rw-r--r--Assistant/Upgrade.hs2
-rw-r--r--CHANGELOG12
-rw-r--r--CmdLine/Seek.hs2
-rw-r--r--Command/ContentLocation.hs7
-rw-r--r--Command/DiffDriver.hs3
-rw-r--r--Command/Find.hs4
-rw-r--r--Command/Fix.hs15
-rw-r--r--Command/Fsck.hs18
-rw-r--r--Command/Import.hs2
-rw-r--r--Command/Lock.hs24
-rw-r--r--Command/Migrate.hs2
-rw-r--r--Command/Multicast.hs3
-rw-r--r--Command/ReKey.hs6
-rw-r--r--Command/Smudge.hs8
-rw-r--r--Command/TestRemote.hs6
-rw-r--r--Command/Unannex.hs2
-rw-r--r--Command/Uninit.hs3
-rw-r--r--Command/Unused.hs2
-rw-r--r--Database/Keys.hs12
-rw-r--r--Limit.hs6
-rw-r--r--Logs.hs25
-rw-r--r--P2P/Annex.hs2
-rw-r--r--Remote/Adb.hs2
-rw-r--r--Remote/Directory.hs5
-rw-r--r--Remote/External.hs4
-rw-r--r--Remote/GCrypt.hs3
-rw-r--r--Remote/Git.hs9
-rw-r--r--Remote/Hook.hs3
-rw-r--r--Remote/Rsync.hs4
-rw-r--r--Remote/Rsync/RsyncUrl.hs11
-rw-r--r--Remote/WebDAV/DavLocation.hs4
-rw-r--r--Test.hs3
-rw-r--r--Upgrade/V1.hs4
-rw-r--r--Upgrade/V5.hs2
-rw-r--r--Upgrade/V5/Direct.hs4
-rw-r--r--Utility/InodeCache.hs19
-rw-r--r--Utility/MD5.hs5
-rw-r--r--Utility/RawFilePath.hs9
-rw-r--r--doc/todo/optimize_by_converting_String_to_ByteString.mdwn20
-rw-r--r--doc/todo/optimize_by_converting_String_to_ByteString/comment_3_5cad0557a1409703f8c71078f0785309._comment40
-rw-r--r--stack.yaml1
53 files changed, 324 insertions, 234 deletions
diff --git a/Annex/AutoMerge.hs b/Annex/AutoMerge.hs
index d558c94c60..c2990eabf2 100644
--- a/Annex/AutoMerge.hs
+++ b/Annex/AutoMerge.hs
@@ -334,10 +334,9 @@ inodeMap :: Annex ([RawFilePath], IO Bool) -> Annex InodeMap
inodeMap getfiles = do
(fs, cleanup) <- getfiles
fsis <- forM fs $ \f -> do
- let f' = fromRawFilePath f
- mi <- withTSDelta (liftIO . genInodeCache f')
+ mi <- withTSDelta (liftIO . genInodeCache f)
return $ case mi of
Nothing -> Nothing
- Just i -> Just (inodeCacheToKey Strongly i, f')
+ Just i -> Just (inodeCacheToKey Strongly i, fromRawFilePath f)
void $ liftIO cleanup
return $ M.fromList $ catMaybes fsis
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 10fa59abc4..6934e62bab 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -482,7 +482,7 @@ stageJournal jl commitindex = withIndex $ withOtherTmp $ \tmpdir -> do
sha <- Git.HashObject.hashFile h path
hPutStrLn jlogh file
streamer $ Git.UpdateIndex.updateIndexLine
- sha TreeFile (asTopFilePath $ toRawFilePath $ fileJournal file)
+ sha TreeFile (asTopFilePath $ fileJournal $ toRawFilePath file)
genstream dir h jh jlogh streamer
-- Clean up the staged files, as listed in the temp log file.
-- The temp file is used to avoid needing to buffer all the
diff --git a/Annex/Content.hs b/Annex/Content.hs
index c109e3f1f8..74dd17886e 100644
--- a/Annex/Content.hs
+++ b/Annex/Content.hs
@@ -89,17 +89,18 @@ import Annex.Content.LowLevel
import Annex.Content.PointerFile
import Annex.Concurrent
import Types.WorkerPool
+import qualified Utility.RawFilePath as R
{- Checks if a given key's content is currently present. -}
inAnnex :: Key -> Annex Bool
-inAnnex key = inAnnexCheck key $ liftIO . doesFileExist
+inAnnex key = inAnnexCheck key $ liftIO . R.doesPathExist
{- Runs an arbitrary check on a key's content. -}
-inAnnexCheck :: Key -> (FilePath -> Annex Bool) -> Annex Bool
+inAnnexCheck :: Key -> (RawFilePath -> Annex Bool) -> Annex Bool
inAnnexCheck key check = inAnnex' id False check key
{- inAnnex that performs an arbitrary check of the key's content. -}
-inAnnex' :: (a -> Bool) -> a -> (FilePath -> Annex a) -> Key -> Annex a
+inAnnex' :: (a -> Bool) -> a -> (RawFilePath -> Annex a) -> Key -> Annex a
inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
r <- check loc
if isgood r
@@ -120,12 +121,15 @@ inAnnex' isgood bad check key = withObjectLoc key $ \loc -> do
{- Like inAnnex, checks if the object file for a key exists,
- but there are no guarantees it has the right content. -}
objectFileExists :: Key -> Annex Bool
-objectFileExists key = calcRepo (gitAnnexLocation key) >>= liftIO . doesFileExist
+objectFileExists key =
+ calcRepo (gitAnnexLocation key)
+ >>= liftIO . R.doesPathExist
{- A safer check; the key's content must not only be present, but
- is not in the process of being removed. -}
inAnnexSafe :: Key -> Annex (Maybe Bool)
-inAnnexSafe key = inAnnex' (fromMaybe True) (Just False) go key
+inAnnexSafe key =
+ inAnnex' (fromMaybe True) (Just False) (go . fromRawFilePath) key
where
is_locked = Nothing
is_unlocked = Just True
@@ -246,7 +250,7 @@ winLocker _ _ Nothing = return Nothing
lockContentUsing :: ContentLocker -> Key -> Annex a -> Annex a
lockContentUsing locker key a = do
- contentfile <- calcRepo $ gitAnnexLocation key
+ contentfile <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
lockfile <- contentLockFile key
bracket
(lock contentfile lockfile)
@@ -474,18 +478,20 @@ moveAnnex key src = ifM (checkSecureHashes key)
, return False
)
where
- storeobject dest = ifM (liftIO $ doesFileExist dest)
+ storeobject dest = ifM (liftIO $ R.doesPathExist dest)
( alreadyhave
- , modifyContent dest $ do
+ , modifyContent dest' $ do
freezeContent src
- liftIO $ moveFile src dest
+ liftIO $ moveFile src dest'
g <- Annex.gitRepo
fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
unless (null fs) $ do
- ics <- mapM (populatePointerFile (Restage True) key (toRawFilePath dest)) fs
+ ics <- mapM (populatePointerFile (Restage True) key dest) fs
Database.Keys.storeInodeCaches' key [dest] (catMaybes ics)
)
+ where
+ dest' = fromRawFilePath dest
alreadyhave = liftIO $ removeFile src
checkSecureHashes :: Key -> Annex Bool
@@ -505,7 +511,7 @@ data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
linkToAnnex :: Key -> FilePath -> Maybe InodeCache -> Annex LinkAnnexResult
linkToAnnex key src srcic = ifM (checkSecureHashes key)
( do
- dest <- calcRepo (gitAnnexLocation key)
+ dest <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent dest $ linkAnnex To key src srcic dest Nothing
, return LinkAnnexFailed
)
@@ -515,7 +521,7 @@ linkFromAnnex :: Key -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkFromAnnex key dest destmode = do
src <- calcRepo (gitAnnexLocation key)
srcic <- withTSDelta (liftIO . genInodeCache src)
- linkAnnex From key src srcic dest destmode
+ linkAnnex From key (fromRawFilePath src) srcic dest destmode
data FromTo = From | To
@@ -534,7 +540,7 @@ data FromTo = From | To
linkAnnex :: FromTo -> Key -> FilePath -> Maybe InodeCache -> FilePath -> Maybe FileMode -> Annex LinkAnnexResult
linkAnnex _ _ _ Nothing _ _ = return LinkAnnexFailed
linkAnnex fromto key src (Just srcic) dest destmode =
- withTSDelta (liftIO . genInodeCache dest) >>= \case
+ withTSDelta (liftIO . genInodeCache dest') >>= \case
Just destic -> do
cs <- Database.Keys.getInodeCaches key
if null cs
@@ -551,12 +557,13 @@ linkAnnex fromto key src (Just srcic) dest destmode =
Linked -> noop
checksrcunchanged
where
+ dest' = toRawFilePath dest
failed = do
Database.Keys.addInodeCaches key [srcic]
return LinkAnnexFailed
- checksrcunchanged = withTSDelta (liftIO . genInodeCache src) >>= \case
+ checksrcunchanged = withTSDelta (liftIO . genInodeCache (toRawFilePath src)) >>= \case
Just srcic' | compareStrong srcic srcic' -> do
- destic <- withTSDelta (liftIO . genInodeCache dest)
+ destic <- withTSDelta (liftIO . genInodeCache dest')
Database.Keys.addInodeCaches key $
catMaybes [destic, Just srcic]
return LinkAnnexOk
@@ -567,7 +574,7 @@ linkAnnex fromto key src (Just srcic) dest destmode =
{- Removes the annex object file for a key. Lowlevel. -}
unlinkAnnex :: Key -> Annex ()
unlinkAnnex key = do
- obj <- calcRepo $ gitAnnexLocation key
+ obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
modifyContent obj $ do
secureErase obj
liftIO $ nukeFile obj
@@ -616,15 +623,15 @@ prepSendAnnex key = withObjectLoc key $ \f -> do
else pure cache
return $ if null cache'
then Nothing
- else Just (f, sameInodeCache f cache')
+ else Just (fromRawFilePath f, sameInodeCache f cache')
{- Performs an action, passing it the location to use for a key's content. -}
-withObjectLoc :: Key -> (FilePath -> Annex a) -> Annex a
+withObjectLoc :: Key -> (RawFilePath -> Annex a) -> Annex a
withObjectLoc key a = a =<< calcRepo (gitAnnexLocation key)
cleanObjectLoc :: Key -> Annex () -> Annex ()
cleanObjectLoc key cleaner = do
- file <- calcRepo $ gitAnnexLocation key
+ file <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
void $ tryIO $ thawContentDir file
cleaner
liftIO $ removeparents file (3 :: Int)
@@ -640,22 +647,23 @@ cleanObjectLoc key cleaner = do
removeAnnex :: ContentRemovalLock -> Annex ()
removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
cleanObjectLoc key $ do
- secureErase file
- liftIO $ nukeFile file
+ let file' = fromRawFilePath file
+ secureErase file'
+ liftIO $ nukeFile file'
g <- Annex.gitRepo
- mapM_ (\f -> void $ tryIO $ resetpointer $ fromRawFilePath $ fromTopFilePath f g)
+ mapM_ (\f -> void $ tryIO $ resetpointer $ fromTopFilePath f g)
=<< Database.Keys.getAssociatedFiles key
Database.Keys.removeInodeCaches key
where
-- Check associated pointer file for modifications, and reset if
-- it's unmodified.
resetpointer file = ifM (isUnmodified key file)
- ( depopulatePointerFile key (toRawFilePath file)
+ ( depopulatePointerFile key file
-- Modified file, so leave it alone.
-- If it was a hard link to the annex object,
-- that object might have been frozen as part of the
-- removal process, so thaw it.
- , void $ tryIO $ thawContent file
+ , void $ tryIO $ thawContent $ fromRawFilePath file
)
{- Check if a file contains the unmodified content of the key.
@@ -663,12 +671,12 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key $ \file ->
- The expensive way to tell is to do a verification of its content.
- The cheaper way is to see if the InodeCache for the key matches the
- file. -}
-isUnmodified :: Key -> FilePath -> Annex Bool
+isUnmodified :: Key -> RawFilePath -> Annex Bool
isUnmodified key f = go =<< geti
where
go Nothing = return False
go (Just fc) = isUnmodifiedCheap' key fc <||> expensivecheck fc
- expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key f)
+ expensivecheck fc = ifM (verifyKeyContent RetrievalAllKeysSecure AlwaysVerify UnVerified key (fromRawFilePath f))
( do
-- The file could have been modified while it was
-- being verified. Detect that.
@@ -691,7 +699,7 @@ isUnmodified key f = go =<< geti
- this may report a false positive when repeated edits are made to a file
- within a small time window (eg 1 second).
-}
-isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
+isUnmodifiedCheap :: Key -> RawFilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
=<< withTSDelta (liftIO . genInodeCache f)
@@ -703,7 +711,7 @@ isUnmodifiedCheap' key fc =
- returns the file it was moved to. -}
moveBad :: Key -> Annex FilePath
moveBad key = do
- src <- calcRepo $ gitAnnexLocation key
+ src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
bad <- fromRepo gitAnnexBadDir
let dest = bad </> takeFileName src
createAnnexDirectory (parentDir dest)
@@ -791,7 +799,7 @@ preseedTmp key file = go =<< inAnnex key
copy = ifM (liftIO $ doesFileExist file)
( return True
, do
- s <- calcRepo $ gitAnnexLocation key
+ s <- fromRawFilePath <$> (calcRepo $ gitAnnexLocation key)
liftIO $ ifM (doesFileExist s)
( copyFileExternal CopyTimeStamps s file
, return False
diff --git a/Annex/Content/PointerFile.hs b/Annex/Content/PointerFile.hs
index 59825a9d70..997f731ca6 100644
--- a/Annex/Content/PointerFile.hs
+++ b/Annex/Content/PointerFile.hs
@@ -38,10 +38,11 @@ populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f'
liftIO $ nukeFile f'
(ic, populated) <- replaceFile f' $ \tmp -> do
+ let tmp' = toRawFilePath tmp
ok <- linkOrCopy k (fromRawFilePath obj) tmp destmode >>= \case
Just _ -> thawContent tmp >> return True
- Nothing -> liftIO (writePointerFile (toRawFilePath tmp) k destmode) >> return False
- ic <- withTSDelta (liftIO . genInodeCache tmp)
+ Nothing -> liftIO (writePointerFile tmp' k destmode) >> return False
+ ic <- withTSDelta (liftIO . genInodeCache tmp')
return (ic, ok)
maybe noop (restagePointerFile restage f) ic
if populated
@@ -68,5 +69,5 @@ depopulatePointerFile key file = do
(\t -> touch tmp t False)
(fmap modificationTimeHiRes st)
#endif
- withTSDelta (liftIO . genInodeCache tmp)
+ withTSDelta (liftIO . genInodeCache (toRawFilePath tmp))
maybe noop (restagePointerFile (Restage True) file) ic
diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs
index 1fb0073826..237345feb1 100644
--- a/Annex/DirHashes.hs
+++ b/Annex/DirHashes.hs
@@ -1,6 +1,6 @@
{- git-annex file locations
-
- - Copyright 2010-2017 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -19,7 +19,10 @@ module Annex.DirHashes (
import Data.Default
import Data.Bits
-import qualified Data.ByteArray
+import qualified Data.ByteArray as BA
+import qualified Data.ByteArray.Encoding as BA
+import qualified Data.ByteString as S
+import qualified System.FilePath.ByteString as P
import Common
import Key
@@ -28,7 +31,7 @@ import Types.Difference
import Utility.Hash
import Utility.MD5
-type Hasher = Key -> FilePath
+type Hasher = Key -> RawFilePath
-- Number of hash levels to use. 2 is the default.
newtype HashLevels = HashLevels Int
@@ -47,7 +50,7 @@ configHashLevels d config
| hasDifference d (annexDifferences config) = HashLevels 1
| otherwise = def
-branchHashDir :: GitConfig -> Key -> String
+branchHashDir :: GitConfig -> Key -> S.ByteString
branchHashDir = hashDirLower . branchHashLevels
{- Two different directory hashes may be used. The mixed case hash
@@ -60,19 +63,26 @@ branchHashDir = hashDirLower . branchHashLevels
dirHashes :: [HashLevels -> Hasher]
dirHashes = [hashDirLower, hashDirMixed]
-hashDirs :: HashLevels -> Int -> String -> FilePath
-hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
-hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
+hashDirs :: HashLevels -> Int -> S.ByteString -> RawFilePath
+hashDirs (HashLevels 1) sz s = P.addTrailingPathSeparator $ S.take sz s
+hashDirs _ sz s = P.addTrailingPathSeparator $ h P.</> t
+ where
+ (h, t) = S.splitAt sz s
hashDirLower :: HashLevels -> Hasher
-hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k
+hashDirLower n k = hashDirs n 3 $ S.pack $ take 6 $ conv $
+ md5s $ serializeKey' $ nonChunkKey k
+ where
+ conv v = BA.unpack $
+ (BA.convertToBase BA.Base16 v :: BA.Bytes)
{- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher
-hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
- encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
- Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
+hashDirMixed n k = hashDirs n 2 $ S.pack $ take 4 $
+ concatMap display_32bits_as_dir $
+ encodeWord32 $ map fromIntegral $ BA.unpack $
+ Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
where
encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)
diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs
index 85a4d38122..e1b22c7b8a 100644
--- a/Annex/Ingest.hs
+++ b/Annex/Ingest.hs
@@ -92,7 +92,7 @@ lockDown' cfg file = tryIO $ ifM crippledFileSystem
nohardlink = withTSDelta $ liftIO . nohardlink'
nohardlink' delta = do
- cache <- genInodeCache file delta
+ cache <- genInodeCache (toRawFilePath file) delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file
, contentLocation = file
@@ -112,7 +112,7 @@ lockDown' cfg file = tryIO $ ifM crippledFileSystem
withhardlink' delta tmpfile = do
createLink file tmpfile
- cache <- genInodeCache tmpfile delta
+ cache <- genInodeCache (toRawFilePath tmpfile) delta
return $ LockedDown cfg $ KeySource
{ keyFilename = file
, contentLocation = tmpfile
@@ -209,7 +209,7 @@ finishIngestUnlocked' key source restage = do
{- Copy to any other locations using the same key. -}
populateAssociatedFiles :: Key -> KeySource -> Restage -> Annex ()
populateAssociatedFiles key source restage = do
- obj <- toRawFilePath <$> calcRepo (gitAnnexLocation key)
+ obj <- calcRepo (gitAnnexLocation key)
g <- Annex.gitRepo
ingestedf <- flip fromTopFilePath g
<$> inRepo (toTopFilePath (toRawFilePath (keyFilename source)))
@@ -235,8 +235,7 @@ cleanOldKeys file newkey = do
unlessM (isUnmodified key =<< calcRepo (gitAnnexLocation key)) $ do
caches <- Database.Keys.getInodeCaches key
unlinkAnnex key
- fs <- map fromRawFilePath
- . filter (/= ingestedf)
+ fs <- filter (/= ingestedf)
. map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
filterM (`sameInodeCache` caches) fs >>= \case
@@ -245,7 +244,7 @@ cleanOldKeys file newkey = do
-- so no need for any recovery.
(f:_) -> do
ic <- withTSDelta (liftIO . genInodeCache f)
- void $ linkToAnnex key f ic
+ void $ linkToAnnex key (fromRawFilePath f) ic
_ -> logStatus key InfoMissing
{- On error, put the file back so it doesn't seem to have vanished.
@@ -256,7 +255,7 @@ restoreFile file key e = do
liftIO $ nukeFile file
-- The key could be used by other files too, so leave the
-- content in the annex, and make a copy back to the file.
- obj <- calcRepo $ gitAnnexLocation key
+ obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
unlessM (liftIO $ copyFileExternal CopyTimeStamps obj file) $
warning $ "Unable to restore content of " ++ file ++ "; it should be located in " ++ obj
thawContent file
diff --git a/Annex/InodeSentinal.hs b/Annex/InodeSentinal.hs
index 0f5c7ca606..0dae0d6cac 100644
--- a/Annex/InodeSentinal.hs
+++ b/Annex/InodeSentinal.hs
@@ -29,7 +29,7 @@ compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
{- Checks if one of the provided old InodeCache matches the current
- version of a file. -}
-sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
+sameInodeCache :: RawFilePath -> [InodeCache] -> Annex Bool
sameInodeCache _ [] = return False
sameInodeCache file old = go =<< withTSDelta (liftIO . genInodeCache file)
where
@@ -78,7 +78,7 @@ createInodeSentinalFile :: Bool -> Annex ()
createInodeSentinalFile evenwithobjects =
unlessM (alreadyexists <||> hasobjects) $ do
s <- annexSentinalFile
- createAnnexDirectory (parentDir (sentinalFile s))
+ createAnnexDirectory (parentDir (fromRawFilePath (sentinalFile s)))
liftIO $ writeSentinalFile s
where
alreadyexists = liftIO. sentinalFileExists =<< annexSentinalFile
diff --git a/Annex/Journal.hs b/Annex/Journal.hs
index e7e624f354..937e183e22 100644
--- a/Annex/Journal.hs
+++ b/Annex/Journal.hs
@@ -20,7 +20,9 @@ import Utility.Directory.Stream
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
+import qualified System.FilePath.ByteString as P
import Data.ByteString.Builder
+import Data.Char
class Journalable t where
writeJournalHandle :: Handle -> t -> IO ()
@@ -48,7 +50,7 @@ setJournalFile :: Journalable content => JournalLocked -> RawFilePath -> content
setJournalFile _jl file content = withOtherTmp $ \tmp -> do
createAnnexDirectory =<< fromRepo gitAnnexJournalDir
-- journal file is written atomically
- jfile <- fromRepo $ journalFile $ fromRawFilePath file
+ jfile <- fromRawFilePath <$> fromRepo (journalFile file)
let tmpfile = tmp </> takeFileName jfile
liftIO $ do
withFile tmpfile WriteMode $ \h -> writeJournalHandle h content
@@ -71,7 +73,7 @@ getJournalFile _jl = getJournalFileStale
-}
getJournalFileStale :: RawFilePath -> Annex (Maybe L.ByteString)
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
- L.fromStrict <$> S.readFile (journalFile (fromRawFilePath file) g)
+ L.fromStrict <$> S.readFile (fromRawFilePath $ journalFile file g)
{- List of existing journal files, but without locking, may miss new ones
- just being added, or may have false positives if the journal is staged
@@ -81,7 +83,8 @@ getJournalledFilesStale = do
g <- gitRepo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents $ gitAnnexJournalDir g
- return $ filter (`notElem` [".", ".."]) $ map fileJournal fs
+ return $ filter (`notElem` [".", ".."]) $
+ map (fromRawFilePath . fileJournal . toRawFilePath) fs
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
withJournalHandle a = do
@@ -98,23 +101,28 @@ journalDirty = do
{- Produces a filename to use in the journal for a file on the branch.
-
+ - The input filename is assumed to not contain any '_' character,
+ - since path separators are replaced with that.
+ -
- The journal typically won't have a lot of files in it, so the hashing
- used in the branch is not necessary, and all the files are put directly
- in the journal directory.
-}
-journalFile :: FilePath -> Git.Repo -> FilePath
-journalFile file repo = gitAnnexJournalDir repo </> concatMap mangle file
+journalFile :: RawFilePath -> Git.Repo -> RawFilePath
+journalFile file repo = gitAnnexJournalDir' repo P.</> S.map mangle file
where
mangle c
- | c == pathSeparator = "_"
- | c == '_' = "__"
- | otherwise = [c]
+ | c == P.pathSeparator = fromIntegral (ord '_')
+ | otherwise = c
{- Converts a journal file (relative to the journal dir) back to the
- filename on the branch. -}
-fileJournal :: FilePath -> FilePath
-fileJournal = replace [pathSeparator, pathSeparator] "_" .
- replace "_" [pathSeparator]
+fileJournal :: RawFilePath -> RawFilePath
+fileJournal = S.map unmangle
+ where
+ unmangle c
+ | c == fromIntegral (ord '_') = P.pathSeparator
+ | otherwise = c
{- Sentinal value, only produced by lockJournal; required
- as a parameter by things that need to ensure the journal is
diff --git a/Annex/Link.hs b/Annex/Link.hs
index fe9e1d52d7..ede132a5b9 100644
--- a/Annex/Link.hs
+++ b/Annex/Link.hs
@@ -39,6 +39,7 @@ import qualified Utility.RawFilePath as R
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
+import qualified System.FilePath.ByteString as P
type LinkTarget = String
@@ -182,7 +183,7 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do
absf <- liftIO $ absPath $ fromRawFilePath f
Annex.Queue.addInternalAction runner [(absf, isunmodified tsd)]
where
- isunmodified tsd = genInodeCache' f tsd >>= return . \case
+ isunmodified tsd = genInodeCache f tsd >>= return . \case
Nothing -> False
Just new -> compareStrong orig new
@@ -301,8 +302,7 @@ isLinkToAnnex s = p `S.isInfixOf` s
|| p' `S.isInfixOf` s
#endif
where
- sp = (pathSeparator:objectDir)
- p = toRawFilePath sp
+ p = P.pathSeparator `S.cons` objectDir'
#ifdef mingw32_HOST_OS
- p' = toRawFilePath (toInternalGitPath sp)
+ p' = toInternalGitPath p
#endif
diff --git a/Annex/Locations.hs b/Annex/Locations.hs
index 3c49099094..36858a72bb 100644
--- a/Annex/Locations.hs
+++ b/Annex/Locations.hs
@@ -16,6 +16,7 @@ module Annex.Locations (
keyPath,
annexDir,
objectDir,
+ objectDir',
gitAnnexLocation,
gitAnnexLocationDepth,
gitAnnexLink,
@@ -62,6 +63,7 @@ module Annex.Locations (
gitAnnexFeedState,
gitAnnexMergeDir,
gitAnnexJournalDir,
+ gitAnnexJournalDir',
gitAnnexJournalLock,
gitAnnexGitQueueLock,
gitAnnexPreCommitLock,
@@ -105,6 +107,7 @@ import qualified Git.Types as Git
import Git.FilePath
import Annex.DirHashes
import Annex.Fixup
+import qualified Utility.RawFilePath as R
{- Conventions:
-
@@ -124,21 +127,27 @@ import Annex.Fixup
annexDir :: FilePath
annexDir = addTrailingPathSeparator "annex"
+annexDir' :: RawFilePath
+annexDir' = P.addTrailingPathSeparator "annex"
+
{- The directory git annex uses for locally available object content,
- relative to the .git directory -}
objectDir :: FilePath
objectDir = addTrailingPathSeparator $ annexDir </> "objects"
+objectDir' :: RawFilePath
+objectDir' = P.addTrailingPathSeparator $ annexDir' P.</> "objects"
+
{- Annexed file's possible locations relative to the .git directory.
- There are two different possibilities, using different hashes.
-
- Also, some repositories have a Difference in hash directory depth.
-}
-annexLocations :: GitConfig -> Key -> [FilePath]
+annexLocations :: GitConfig -> Key -> [RawFilePath]
annexLocations config key = map (annexLocation config key) dirHashes
-annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath
-annexLocation config key hasher = objectDir </> keyPath key (hasher $ objectHashLevels config)
+annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
+annexLocation config key hasher = objectDir' P.</> keyPath key (hasher $ objectHashLevels config)
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
@@ -158,14 +167,14 @@ gitAnnexLocationDepth config = hashlevels + 1
- This does not take direct mode into account, so in direct mode it is not
- the actual location of the file's content.
-}
-gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO FilePath
+gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation key r config = gitAnnexLocation' key r config
(annexCrippledFileSystem config)
(coreSymlinks config)
- doesFileExist
- (fromRawFilePath (Git.localGitDir r))
+ R.doesPathExist
+ (Git.localGitDir r)
-gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (FilePath -> IO Bool) -> FilePath -> IO FilePath
+gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. But check all locations. -}
@@ -187,7 +196,7 @@ gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
only = return . inrepo . annexLocation config key
checkall = check $ map inrepo $ annexLocations config key
- inrepo d = gitdir </> d
+ inrepo d = gitdir P.</> d
check locs@(l:_) = fromMaybe l <$> firstM checker locs
check [] = error "internal"
@@ -199,16 +208,17 @@ gitAnnexLink file key r config = do
let gitdir = getgitdir currdir
loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir
fromRawFilePath . toInternalGitPath . toRawFilePath
- <$> relPathDirToFile (parentDir absfile) loc
+ <$> relPathDirToFile (parentDir absfile) (fromRawFilePath loc)
where
getgitdir currdir
{- This special case is for git submodules on filesystems not
- supporting symlinks; generate link target that will
- work portably. -}
| not (coreSymlinks config) && needsSubmoduleFixup r =
- absNormPathUnix currdir $ fromRawFilePath $
- Git.repoPath r P.</> ".git"
- | otherwise = fromRawFilePath $ Git.localGitDir r
+ toRawFilePath $
+ absNormPathUnix currdir $ fromRawFilePath $
+ Git.repoPath r P.</> ".git"
+ | otherwise = Git.localGitDir r
absNormPathUnix d p = fromRawFilePath $ toInternalGitPath $ toRawFilePath $
absPathFrom
(fromRawFilePath $ toInternalGitPath $ toRawFilePath d)
@@ -232,33 +242,36 @@ gitAnnexLinkCanonical file key r config = gitAnnexLink file key r' config'
gitAnnexContentLock :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexContentLock key r config = do
loc <- gitAnnexLocation key r config
- return $ loc ++ ".lck"
+ return $ fromRawFilePath loc ++ ".lck"
{- File that maps from a key to the file(s) in the git repository.
- Used in direct mode. -}
gitAnnexMapping :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexMapping key r config = do
loc <- gitAnnexLocation key r config
- return $ loc ++ ".map"
+ return $ fromRawFilePath loc ++ ".map"
{- File that caches information about a key's content, used to determine
- if a file has changed.
- Used in direct mode. -}
gitAnnexInodeCache :: Key -> Git.Repo -> GitConfig -> IO FilePath
-gitAnnexInodeCache key r config = do
+gitAnnexInodeCache key r config = do
loc <- gitAnnexLocation key r config
- return $ loc ++ ".cache"
+ return $ fromRawFilePath loc ++ ".cache"
-gitAnnexInodeSentinal :: Git.Repo -> FilePath
-gitAnnexInodeSentinal r = gitAnnexDir r </> "sentinal"
+gitAnnexInodeSentinal :: Git.Repo -> RawFilePath
+gitAnnexInodeSentinal r = gitAnnexDir' r P.</> "sentinal"
-gitAnnexInodeSentinalCache :: Git.Repo -> FilePath
-gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r ++ ".cache"
+gitAnnexInodeSentinalCache :: Git.Repo -> RawFilePath
+gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r <> ".cache"
{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) </> annexDir
+gitAnnexDir' :: Git.Repo -> RawFilePath
+gitAnnexDir' r = P.addTrailingPathSeparator $ Git.localGitDir r P.</> annexDir'
+
{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> FilePath
gitAnnexObjectDir r = addTrailingPathSeparator $ fromRawFilePath (Git.localGitDir r) </> objectDir
@@ -428,6 +441,9 @@ gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r </> "transfer"
gitAnnexJournalDir :: Git.Repo -> FilePath
gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r </> "journal"
+gitAnnexJournalDir' :: Git.Repo -> RawFilePath
+gitAnnexJournalDir' r = P.addTrailingPathSeparator $ gitAnnexDir' r P.</> "journal"
+
{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> FilePath
gitAnnexJournalLock r = gitAnnexDir r </> "journal.lck"
@@ -609,10 +625,10 @@ fileKey' = deserializeKey' . S8.intercalate "/" . map go . S8.split '%'
- The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file.
-}
-keyPath :: Key -> Hasher -> FilePath
-keyPath key hasher = hasher key </> f </> f
+keyPath :: Key -> Hasher -> RawFilePath
+keyPath key hasher = hasher key P.</> f P.</> f
where
- f = keyFile key
+ f = keyFile' key
{- All possibile locations to store a key in a special remote
- using different directory hashes.
@@ -620,5 +636,5 @@ keyPath key hasher = hasher key </> f </> f
- This is compatible with the annexLocations, for interoperability between
- special remotes and git-annex repos.
-}
-keyPaths :: Key -> [FilePath]
+keyPaths :: Key -> [RawFilePath]
keyPaths key = map (\h -> keyPath key (h def)) dirHashes
diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs
index 1b2c11061e..bca75be864 100644
--- a/Annex/WorkTree.hs
+++ b/Annex/WorkTree.hs
@@ -101,13 +101,14 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
Just k' | k' == k -> do
destmode <- liftIO $ catchMaybeIO $
fileMode <$> R.getFileStatus f
- ic <- replaceFile (fromRawFilePath f) $ \tmp ->
+ ic <- replaceFile (fromRawFilePath f) $ \tmp -> do
+ let tmp' = toRawFilePath tmp
linkFromAnnex k tmp destmode >>= \case
LinkAnnexOk ->
- withTSDelta (liftIO . genInodeCache tmp)
+ withTSDelta (liftIO . genInodeCache tmp')
LinkAnnexNoop -> return Nothing
LinkAnnexFailed -> liftIO $ do
- writePointerFile (toRawFilePath tmp) k destmode
+ writePointerFile tmp' k destmode
return Nothing
maybe noop (restagePointerFile (Restage True) f) ic
_ -> noop
diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs
index 5ed49166bb..53d72b6454 100644
--- a/Assistant/Threads/Committer.hs
+++ b/Assistant/Threads/Committer.hs
@@ -308,7 +308,7 @@ handleAdds lockdowndir havelsof delayadd cs = returnWhen (null incomplete) $ do
if M.null m
then forM toadd (add cfg)
else forM toadd $ \c -> do
- mcache <- liftIO $ genInodeCache (changeFile c) delta
+ mcache <- liftIO $ genInodeCache (toRawFilePath (changeFile c)) delta
case mcache of
Nothing -> add cfg c
Just cache ->
diff --git a/Assistant/Upgrade.hs b/Assistant/Upgrade.hs
index 0ea52f3158..a8a6778abe 100644
--- a/Assistant/Upgrade.hs
+++ b/Assistant/Upgrade.hs
@@ -113,7 +113,7 @@ distributionDownloadComplete d dest cleanup t
| transferDirection t == Download = do
debug ["finished downloading git-annex distribution"]
maybe (failedupgrade "bad download") go
- =<< liftAnnex (withObjectLoc k fsckit)
+ =<< liftAnnex (withObjectLoc k (fsckit . fromRawFilePath))
| otherwise = cleanup
where
k = mkKey $ const $ distributionKey d
diff --git a/CHANGELOG b/CHANGELOG
index 66ae7e8bdc..a3c748ce93 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,13 +1,9 @@
git-annex (7.20191115) UNRELEASED; urgency=medium
- * Sped up many git-annex commands that operate on many files, by
- using ByteStrings. Some commands like find got up to 60% faster.
- * Sped up many git-annex commands that operate on many files, by
- avoiding reserialization of keys.
- find got 7% faster; whereis 3% faster; and git-annex get when
- all files are already present got 5% faster
- * Sped up many git-annex commands that query the git-annex branch.
- In particular whereis got 1.5% faster.
+ * Optimised processing of many files, especially by commands like find
+ and whereis that only report on the state of the repository. Commands
+ like get also sped up in cases where they have to check a lot of
+ files but only transfer a few files. Speedups range from 30-100%.
* Stop displaying rsync progress, and use git-annex's own progress display
for local-to-local repo transfers.
* git-lfs: The url provided to initremote/enableremote will now be
diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs
index 1811698f00..0ffa1cbfb6 100644
--- a/CmdLine/Seek.hs
+++ b/CmdLine/Seek.hs
@@ -131,7 +131,7 @@ withUnmodifiedUnlockedPointers a l = seekActions $
isUnmodifiedUnlocked :: RawFilePath -> Annex Bool
isUnmodifiedUnlocked f = catKeyFile f >>= \case
Nothing -> return False
- Just k -> sameInodeCache (fromRawFilePath f) =<< Database.Keys.getInodeCaches k
+ Just k -> sameInodeCache f =<< Database.Keys.getInodeCaches k
{- Finds files that may be modified. -}
withFilesMaybeModified :: (RawFilePath -> CommandSeek) -> [WorkTreeItem] -> CommandSeek
diff --git a/Command/ContentLocation.hs b/Command/ContentLocation.hs
index 9576f86044..ef2e467bb5 100644
--- a/Command/ContentLocation.hs
+++ b/Command/ContentLocation.hs
@@ -9,6 +9,9 @@ module Command.ContentLocation where
import Command
import Annex.Content
+import qualified Utility.RawFilePath as R
+
+import qualified Data.ByteString.Char8 as B8
cmd :: Command
cmd = noCommit $ noMessages $
@@ -20,10 +23,10 @@ cmd = noCommit $ noMessages $
run :: () -> String -> Annex Bool
run _ p = do
let k = fromMaybe (giveup "bad key") $ deserializeKey p
- maybe (return False) (\f -> liftIO (putStrLn f) >> return True)
+ maybe (return False) (\f -> liftIO (B8.putStrLn f) >> return True)
=<< inAnnex' (pure True) Nothing check k
where
- check f = ifM (liftIO (doesFileExist f))
+ check f = ifM (liftIO (R.doesPathExist f))
( return (Just f)
, return Nothing
)
diff --git a/Command/DiffDriver.hs b/Command/DiffDriver.hs
index ecc05ca093..e0cef22234 100644
--- a/Command/DiffDriver.hs
+++ b/Command/DiffDriver.hs
@@ -90,7 +90,8 @@ fixupReq req@(Req {}) =
v <- getAnnexLinkTarget' (toRawFilePath (getfile r)) False
case parseLinkTargetOrPointer =<< v of
Nothing -> return r
- Just k -> withObjectLoc k (pure . setfile r)
+ Just k -> withObjectLoc k $
+ pure . setfile r . fromRawFilePath
_ -> return r
externalDiffer :: String -> [String] -> Differ
diff --git a/Command/Find.hs b/Command/Find.hs
index 4e71ac845a..eba431c92c 100644
--- a/Command/Find.hs
+++ b/Command/Find.hs
@@ -93,8 +93,8 @@ keyVars key =
, ("bytesize", size show)
, ("humansize", size $ roughSize storageUnits True)
, ("keyname", decodeBS $ fromKey keyName key)
- , ("hashdirlower", hashDirLower def key)
- , ("hashdirmixed", hashDirMixed def key)
+ , ("hashdirlower", fromRawFilePath $ hashDirLower def key)
+ , ("hashdirmixed", fromRawFilePath $ hashDirMixed def key)
, ("mtime", whenavail show $ fromKey keyMtime key)
]
where
diff --git a/Command/Fix.hs b/Command/Fix.hs
index 52e076f30b..e26d184092 100644
--- a/Command/Fix.hs
+++ b/Command/Fix.hs
@@ -53,11 +53,11 @@ start fixwhat file key = do
where
fixby = starting "fix" (mkActionItem (key, file))
fixthin = do
- obj <- calcRepo $ gitAnnexLocation key
- stopUnless (isUnmodified key (fromRawFilePath file) <&&> isUnmodified key obj) $ do
+ obj <- calcRepo (gitAnnexLocation key)
+ stopUnless (isUnmodified key file <&&> isUnmodified key obj) $ do
thin <- annexThin <$> Annex.getGitConfig
fs <- liftIO $ catchMaybeIO $ R.getFileStatus file
- os <- liftIO $ catchMaybeIO $ getFileStatus obj
+ os <- liftIO $ catchMaybeIO $ R.getFileStatus obj
case (linkCount <$> fs, linkCount <$> os, thin) of
(Just 1, Just 1, True) ->
fixby $ makeHardLink file key
@@ -65,15 +65,16 @@ start fixwhat file key = do
fixby $ breakHardLink file key obj
_ -> stop
-breakHardLink :: RawFilePath -> Key -> FilePath -> CommandPerform
+breakHardLink :: RawFilePath -> Key -> RawFilePath -> CommandPerform
breakHardLink file key obj = do
replaceFile (fromRawFilePath file) $ \tmp -> do
mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file
- unlessM (checkedCopyFile key obj tmp mode) $
+ let obj' = fromRawFilePath obj
+ unlessM (checkedCopyFile key obj' tmp mode) $
error "unable to break hard link"
thawContent tmp
- modifyContent obj $ freezeContent obj
- Database.Keys.storeInodeCaches key [fromRawFilePath file]
+ modifyContent obj' $ freezeContent obj'
+ Database.Keys.storeInodeCaches key [file]
next $ return True
makeHardLink :: RawFilePath -> Key -> CommandPerform
diff --git a/Command/Fsck.hs b/Command/Fsck.hs
index a55b882c09..3010a6ce37 100644
--- a/Command/Fsck.hs
+++ b/Command/Fsck.hs
@@ -223,7 +223,7 @@ fixLink key file = do
- in this repository only. -}
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool
verifyLocationLog key keystatus ai = do
- obj <- calcRepo $ gitAnnexLocation key
+ obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
present <- if isKeyUnlockedThin keystatus
then liftIO (doesFileExist obj)
else inAnnex key
@@ -332,11 +332,11 @@ verifyWorkTree key file = do
ifM (annexThin <$> Annex.getGitConfig)
( void $ linkFromAnnex key tmp mode
, do
- obj <- calcRepo $ gitAnnexLocation key
+ obj <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
void $ checkedCopyFile key obj tmp mode
thawContent tmp
)
- Database.Keys.storeInodeCaches key [fromRawFilePath file]
+ Database.Keys.storeInodeCaches key [file]
_ -> return ()
return True
@@ -349,8 +349,8 @@ checkKeySize :: Key -> KeyStatus -> ActionItem -> Annex Bool
checkKeySize _ KeyUnlockedThin _ = return True
checkKeySize key _ ai = do
file <- calcRepo $ gitAnnexLocation key
- ifM (liftIO $ doesFileExist file)
- ( checkKeySizeOr badContent key file ai
+ ifM (liftIO $ R.doesPathExist file)
+ ( checkKeySizeOr badContent key (fromRawFilePath file) ai
, return True
)
@@ -417,10 +417,10 @@ checkKeyUpgrade _ _ _ (AssociatedFile Nothing) =
-}
checkBackend :: Backend -> Key -> KeyStatus -> AssociatedFile -> Annex Bool
checkBackend backend key keystatus afile = do
- content <- calcRepo $ gitAnnexLocation key
+ content <- calcRepo (gitAnnexLocation key)
ifM (pure (isKeyUnlockedThin keystatus) <&&> (not <$> isUnmodified key content))
( nocheck
- , checkBackendOr badContent backend key content ai
+ , checkBackendOr badContent backend key (fromRawFilePath content) ai
)
where
nocheck = return True
@@ -670,8 +670,8 @@ isKeyUnlockedThin KeyMissing = False
getKeyStatus :: Key -> Annex KeyStatus
getKeyStatus key = catchDefaultIO KeyMissing $ do
afs <- not . null <$> Database.Keys.getAssociatedFiles key
- obj <- calcRepo $ gitAnnexLocation key
- multilink <- ((> 1) . linkCount <$> liftIO (getFileStatus obj))
+ obj <- calcRepo (gitAnnexLocation key)
+ multilink <- ((> 1) . linkCount <$> liftIO (R.getFileStatus obj))
return $ if multilink && afs
then KeyUnlockedThin
else KeyPresent
diff --git a/Command/Import.hs b/Command/Import.hs
index 615fe5db1c..7e8ea18642 100644
--- a/Command/Import.hs
+++ b/Command/Import.hs
@@ -181,7 +181,7 @@ startLocal largematcher mode (srcfile, destfile) =
-- weakly the same as the origianlly locked down file's
-- inode cache. (Since the file may have been copied,
-- its inodes may not be the same.)
- newcache <- withTSDelta $ liftIO . genInodeCache destfile
+ newcache <- withTSDelta $ liftIO . genInodeCache (toRawFilePath destfile)
let unchanged = case (newcache, inodeCache (keySource ld)) of
(_, Nothing) -> True
(Just newc, Just c) | compareWeak c newc -> True
diff --git a/Command/Lock.hs b/Command/Lock.hs
index e0ca6e4594..6e8a7f4ffb 100644
--- a/Command/Lock.hs
+++ b/Command/Lock.hs
@@ -20,6 +20,7 @@ import qualified Database.Keys
import Annex.Ingest
import Logs.Location
import Git.FilePath
+import qualified Utility.RawFilePath as R
cmd :: Command
cmd = withGlobalOptions [jsonOptions, annexedMatchingOptions] $
@@ -43,7 +44,7 @@ startNew file key = ifM (isJust <$> isAnnexLink file)
| key' == key = cont
| otherwise = errorModified
go Nothing =
- ifM (isUnmodified key (fromRawFilePath file))
+ ifM (isUnmodified key file)
( cont
, ifM (Annex.getState Annex.force)
( cont
@@ -56,37 +57,38 @@ performNew :: RawFilePath -> Key -> CommandPerform
performNew file key = do
lockdown =<< calcRepo (gitAnnexLocation key)
addLink (fromRawFilePath file) key
- =<< withTSDelta (liftIO . genInodeCache' file)
+ =<< withTSDelta (liftIO . genInodeCache file)
next $ cleanupNew file key
where
lockdown obj = do
ifM (isUnmodified key obj)
( breakhardlink obj
- , repopulate obj
+ , repopulate (fromRawFilePath obj)
)
- whenM (liftIO $ doesFileExist obj) $
- freezeContent obj
+ whenM (liftIO $ R.doesPathExist obj) $
+ freezeContent $ fromRawFilePath obj
-- It's ok if the file is hard linked to obj, but if some other
-- associated file is, we need to break that link to lock down obj.
- breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (getFileStatus obj)) $ do
- mfc <- withTSDelta (liftIO . genInodeCache' file)
+ breakhardlink obj = whenM (catchBoolIO $ (> 1) . linkCount <$> liftIO (R.getFileStatus obj)) $ do
+ mfc <- withTSDelta (liftIO . genInodeCache file)
unlessM (sameInodeCache obj (maybeToList mfc)) $ do
- modifyContent obj $ replaceFile obj $ \tmp -> do
- unlessM (checkedCopyFile key obj tmp Nothing) $
+ let obj' = fromRawFilePath obj
+ modifyContent obj' $ replaceFile obj' $ \tmp -> do
+ unlessM (checkedCopyFile key obj' tmp Nothing) $
giveup "unable to lock file"
Database.Keys.storeInodeCaches key [obj]
-- Try to repopulate obj from an unmodified associated file.
repopulate obj = modifyContent obj $ do
g <- Annex.gitRepo
- fs <- map fromRawFilePath . map (`fromTopFilePath` g)
+ fs <- map (`fromTopFilePath` g)
<$> Database.Keys.getAssociatedFiles key
mfile <- firstM (isUnmodified key) fs
liftIO $ nukeFile obj
case mfile of
Just unmodified ->
- unlessM (checkedCopyFile key unmodified obj Nothing)
+ unlessM (checkedCopyFile key (fromRawFilePath unmodified) obj Nothing)
lostcontent
Nothing -> lostcontent
diff --git a/Command/Migrate.hs b/Command/Migrate.hs
index 0f964bb749..2feb879aa5 100644
--- a/Command/Migrate.hs
+++ b/Command/Migrate.hs
@@ -86,7 +86,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken
content <- calcRepo $ gitAnnexLocation oldkey
let source = KeySource
{ keyFilename = fromRawFilePath file
- , contentLocation = content
+ , contentLocation = fromRawFilePath content
, inodeCache = Nothing
}
v <- genKey source nullMeterUpdate (Just newbackend)
diff --git a/Command/Multicast.hs b/Command/Multicast.hs
index 6c6d2c418b..fcb36800d4 100644
--- a/Command/Multicast.hs
+++ b/Command/Multicast.hs
@@ -137,7 +137,8 @@ send ups fs = do
mk <- lookupFile f
case mk of
Nothing -> noop
- Just k -> withObjectLoc k (addlist f)
+ Just k -> withObjectLoc k $
+ addlist f . fromRawFilePath
liftIO $ hClose h
serverkey <- uftpKey
diff --git a/Command/ReKey.hs b/Command/ReKey.hs
index a67d876df7..52984928bd 100644
--- a/Command/ReKey.hs
+++ b/Command/ReKey.hs
@@ -83,12 +83,12 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
- unlocked file, which would leave the new key unlocked
- and vulnerable to corruption. -}
( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey $ \tmp -> unVerified $ do
- oldobj <- calcRepo (gitAnnexLocation oldkey)
+ oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing
, do
{- The file being rekeyed is itself an unlocked file; if
- it's hard linked to the old key, that link must be broken. -}
- oldobj <- calcRepo (gitAnnexLocation oldkey)
+ oldobj <- fromRawFilePath <$> calcRepo (gitAnnexLocation oldkey)
v <- tryNonAsync $ do
st <- liftIO $ R.getFileStatus file
when (linkCount st > 1) $ do
@@ -97,7 +97,7 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
unlessM (checkedCopyFile oldkey oldobj tmp Nothing) $
error "can't lock old key"
thawContent tmp
- ic <- withTSDelta (liftIO . genInodeCache' file)
+ ic <- withTSDelta (liftIO . genInodeCache file)
case v of
Left e -> do
warning (show e)
diff --git a/Command/Smudge.hs b/Command/Smudge.hs
index 9b5e57ede1..d8f6c08454 100644
--- a/Command/Smudge.hs
+++ b/Command/Smudge.hs
@@ -108,7 +108,7 @@ clean file = do
-- annexed and is unmodified.
case oldkey of
Nothing -> doingest oldkey
- Just ko -> ifM (isUnmodifiedCheap ko file)
+ Just ko -> ifM (isUnmodifiedCheap ko (toRawFilePath file))
( liftIO $ emitPointer ko
, doingest oldkey
)
@@ -174,7 +174,7 @@ shouldAnnex file moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig)
Just _ -> return True
Nothing -> checkknowninode
- checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case
+ checkknowninode = withTSDelta (liftIO . genInodeCache (toRawFilePath file)) >>= \case
Nothing -> pure False
Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus
@@ -191,7 +191,7 @@ emitPointer = S.putStr . formatPointer
getMoveRaceRecovery :: Key -> RawFilePath -> Annex ()
getMoveRaceRecovery k file = void $ tryNonAsync $
whenM (inAnnex k) $ do
- obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
+ obj <- calcRepo (gitAnnexLocation k)
-- Cannot restage because git add is running and has
-- the index locked.
populatePointerFile (Restage False) k obj file >>= \case
@@ -207,7 +207,7 @@ updateSmudged :: Restage -> Annex ()
updateSmudged restage = streamSmudged $ \k topf -> do
f <- fromRepo (fromTopFilePath topf)
whenM (inAnnex k) $ do
- obj <- toRawFilePath <$> calcRepo (gitAnnexLocation k)
+ obj <- calcRepo (gitAnnexLocation k)
unlessM (isJust <$> populatePointerFile restage k obj f) $
liftIO (isPointerFile f) >>= \case
Just k' | k' == k -> toplevelWarning False $
diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs
index 292697a781..bf8c24cd5d 100644
--- a/Command/TestRemote.hs
+++ b/Command/TestRemote.hs
@@ -168,7 +168,7 @@ test st r k = catMaybes
get
, Just $ check "fsck downloaded object" fsck
, Just $ check "retrieveKeyFile resume from 33%" $ do
- loc <- Annex.calcRepo (gitAnnexLocation k)
+ loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do
sz <- hFileSize h
@@ -184,7 +184,7 @@ test st r k = catMaybes
get
, Just $ check "fsck downloaded object" fsck
, Just $ check "retrieveKeyFile resume from end" $ do
- loc <- Annex.calcRepo (gitAnnexLocation k)
+ loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
tmp <- prepTmp k
void $ liftIO $ copyFileExternal CopyAllMetaData loc tmp
lockContentForRemoval k removeAnnex
@@ -240,7 +240,7 @@ testExportTree st (Just _) ea k1 k2 =
check desc a = testCase desc $
Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed"
storeexport k = do
- loc <- Annex.calcRepo (gitAnnexLocation k)
+ loc <- fromRawFilePath <$> Annex.calcRepo (gitAnnexLocation k)
Remote.storeExport ea loc k testexportlocation nullMeterUpdate
retrieveexport k = withTmpFile "exported" $ \tmp h -> do
liftIO $ hClose h
diff --git a/Command/Unannex.hs b/Command/Unannex.hs
index 356ff1d946..d63f9a6b4f 100644
--- a/Command/Unannex.hs
+++ b/Command/Unannex.hs
@@ -46,7 +46,7 @@ perform file key = do
cleanup :: RawFilePath -> Key -> CommandCleanup
cleanup file key = do
Database.Keys.removeAssociatedFile key =<< inRepo (toTopFilePath file)
- src <- calcRepo $ gitAnnexLocation key
+ src <- fromRawFilePath <$> calcRepo (gitAnnexLocation key)
ifM (Annex.getState Annex.fast)
( do
-- Only make a hard link if the annexed file does not
diff --git a/Command/Uninit.hs b/Command/Uninit.hs
index 6c62694543..29278a6c4e 100644
--- a/Command/Uninit.hs
+++ b/Command/Uninit.hs
@@ -17,6 +17,7 @@ import qualified Database.Keys
import Annex.Content
import Annex.Init
import Utility.FileMode
+import qualified Utility.RawFilePath as R
cmd :: Command
cmd = addCheck check $
@@ -117,5 +118,5 @@ removeUnannexed = go []
, go (k:c) ks
)
enoughlinks f = catchBoolIO $ do
- s <- getFileStatus f
+ s <- R.getFileStatus f
return $ linkCount s > 1
diff --git a/Command/Unused.hs b/Command/Unused.hs
index 7f49440e6b..78400db7e1 100644
--- a/Command/Unused.hs
+++ b/Command/Unused.hs
@@ -283,7 +283,7 @@ associatedFilesFilter = filterM go
checkunmodified _ [] = return True
checkunmodified cs (f:fs) = do
relf <- fromRepo $ fromTopFilePath f
- ifM (sameInodeCache (fromRawFilePath relf) cs)
+ ifM (sameInodeCache relf cs)
( return False
, checkunmodified cs fs
)
diff --git a/Database/Keys.hs b/Database/Keys.hs
index b04dff02be..48d51caf4e 100644
--- a/Database/Keys.hs
+++ b/Database/Keys.hs
@@ -169,13 +169,13 @@ removeAssociatedFile :: Key -> TopFilePath -> Annex ()
removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toIKey k)
{- Stats the files, and stores their InodeCaches. -}
-storeInodeCaches :: Key -> [FilePath] -> Annex ()
+storeInodeCaches :: Key -> [RawFilePath] -> Annex ()
storeInodeCaches k fs = storeInodeCaches' k fs []
-storeInodeCaches' :: Key -> [FilePath] -> [InodeCache] -> Annex ()
+storeInodeCaches' :: Key -> [RawFilePath] -> [InodeCache] -> Annex ()
storeInodeCaches' k fs ics = withTSDelta $ \d ->
addInodeCaches k . (++ ics) . catMaybes
- =<< liftIO (mapM (`genInodeCache` d) fs)
+ =<< liftIO (mapM (\f -> genInodeCache f d) fs)
addInodeCaches :: Key -> [InodeCache] -> Annex ()
addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toIKey k) is
@@ -223,7 +223,7 @@ reconcileStaged :: H.DbQueue -> Annex ()
reconcileStaged qh = do
gitindex <- inRepo currentIndexFile
indexcache <- fromRepo gitAnnexKeysDbIndexCache
- withTSDelta (liftIO . genInodeCache gitindex) >>= \case
+ withTSDelta (liftIO . genInodeCache (toRawFilePath gitindex)) >>= \case
Just cur ->
liftIO (maybe Nothing readInodeCache <$> catchMaybeIO (readFile indexcache)) >>= \case
Nothing -> go cur indexcache
@@ -295,10 +295,10 @@ reconcileStaged qh = do
keyloc <- calcRepo (gitAnnexLocation key)
keypopulated <- sameInodeCache keyloc caches
p <- fromRepo $ fromTopFilePath file
- filepopulated <- sameInodeCache (fromRawFilePath p) caches
+ filepopulated <- sameInodeCache p caches
case (keypopulated, filepopulated) of
(True, False) ->
- populatePointerFile (Restage True) key (toRawFilePath keyloc) p >>= \case
+ populatePointerFile (Restage True) key keyloc p >>= \case
Nothing -> return ()
Just ic -> liftIO $
SQL.addInodeCaches ikey [ic] (SQL.WriteHandle qh)
diff --git a/Limit.hs b/Limit.hs
index 9e8ece2d11..2069822711 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -33,6 +33,7 @@ import Git.Types (RefDate(..))
import Utility.Glob
import Utility.HumanTime
import Utility.DataUnits
+import qualified Utility.RawFilePath as R
import Data.Time.Clock.POSIX
import qualified Data.Set as S
@@ -117,7 +118,8 @@ addMagicLimit limitname querymagic selectprovidedinfo glob = do
-- When the file is an annex symlink, get magic of the
-- object file.
Nothing -> isAnnexLink (toRawFilePath f) >>= \case
- Just k -> withObjectLoc k $ querymagic magic
+ Just k -> withObjectLoc k $
+ querymagic magic . fromRawFilePath
Nothing -> querymagic magic f
matchMagic :: String -> (Magic -> FilePath -> Annex (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
@@ -363,7 +365,7 @@ addAccessedWithin duration = do
where
check now k = inAnnexCheck k $ \f ->
liftIO $ catchDefaultIO False $ do
- s <- getFileStatus f
+ s <- R.getFileStatus f
let accessed = realToFrac (accessTime s)
let delta = now - accessed
return $ delta <= secs
diff --git a/Logs.hs b/Logs.hs
index d612aa8d56..18a045b452 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -13,6 +13,7 @@ import Annex.Common
import Annex.DirHashes
import qualified Data.ByteString as S
+import qualified System.FilePath.ByteString as P
{- There are several varieties of log file formats. -}
data LogVariety
@@ -117,19 +118,19 @@ exportLog = "export.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> RawFilePath
-locationLogFile config key = toRawFilePath $
- branchHashDir config key </> keyFile key ++ ".log"
+locationLogFile config key =
+ branchHashDir config key P.</> keyFile' key <> ".log"
{- The filename of the url log for a given key. -}
urlLogFile :: GitConfig -> Key -> RawFilePath
-urlLogFile config key = toRawFilePath $
- branchHashDir config key </> keyFile key ++ decodeBS' urlLogExt
+urlLogFile config key =
+ branchHashDir config key P.</> keyFile' key <> urlLogExt
{- Old versions stored the urls elsewhere. -}
oldurlLogs :: GitConfig -> Key -> [RawFilePath]
-oldurlLogs config key = map toRawFilePath
- [ "remote/web" </> hdir </> serializeKey key ++ ".log"
- , "remote/web" </> hdir </> keyFile key ++ ".log"
+oldurlLogs config key =
+ [ "remote/web" P.</> hdir P.</> serializeKey' key <> ".log"
+ , "remote/web" P.</> hdir P.</> keyFile' key <> ".log"
]
where
hdir = branchHashDir config key
@@ -144,7 +145,7 @@ isUrlLog file = urlLogExt `S.isSuffixOf` file
{- The filename of the remote state log for a given key. -}
remoteStateLogFile :: GitConfig -> Key -> RawFilePath
remoteStateLogFile config key =
- toRawFilePath (branchHashDir config key </> keyFile key)
+ (branchHashDir config key P.</> keyFile' key)
<> remoteStateLogExt
remoteStateLogExt :: S.ByteString
@@ -156,7 +157,7 @@ isRemoteStateLog path = remoteStateLogExt `S.isSuffixOf` path
{- The filename of the chunk log for a given key. -}
chunkLogFile :: GitConfig -> Key -> RawFilePath
chunkLogFile config key =
- toRawFilePath (branchHashDir config key </> keyFile key)
+ (branchHashDir config key P.</> keyFile' key)
<> chunkLogExt
chunkLogExt :: S.ByteString
@@ -168,7 +169,7 @@ isChunkLog path = chunkLogExt `S.isSuffixOf` path
{- The filename of the metadata log for a given key. -}
metaDataLogFile :: GitConfig -> Key -> RawFilePath
metaDataLogFile config key =
- toRawFilePath (branchHashDir config key </> keyFile key)
+ (branchHashDir config key P.</> keyFile' key)
<> metaDataLogExt
metaDataLogExt :: S.ByteString
@@ -180,7 +181,7 @@ isMetaDataLog path = metaDataLogExt `S.isSuffixOf` path
{- The filename of the remote metadata log for a given key. -}
remoteMetaDataLogFile :: GitConfig -> Key -> RawFilePath
remoteMetaDataLogFile config key =
- toRawFilePath (branchHashDir config key </> keyFile key)
+ (branchHashDir config key P.</> keyFile' key)
<> remoteMetaDataLogExt
remoteMetaDataLogExt :: S.ByteString
@@ -192,7 +193,7 @@ isRemoteMetaDataLog path = remoteMetaDataLogExt `S.isSuffixOf` path
{- The filename of the remote content identifier log for a given key. -}
remoteContentIdentifierLogFile :: GitConfig -> Key -> RawFilePath
remoteContentIdentifierLogFile config key =
- toRawFilePath (branchHashDir config key </> keyFile key)
+ (branchHashDir config key P.</> keyFile' key)
<> remoteContentIdentifierExt
remoteContentIdentifierExt :: S.ByteString
diff --git a/P2P/Annex.hs b/P2P/Annex.hs
index dd84668bf8..bcdde75cd1 100644
--- a/P2P/Annex.hs
+++ b/P2P/Annex.hs
@@ -47,7 +47,7 @@ runLocal runst runner a = case a of
size <- liftIO $ catchDefaultIO 0 $ getFileSize f
runner (next (Len size))
ContentSize k next -> do
- let getsize = liftIO . catchMaybeIO . getFileSize
+ let getsize = liftIO . catchMaybeIO . getFileSize . fromRawFilePath
size <- inAnnex' isJust Nothing getsize k
runner (next (Len <$> size))
ReadContent k af o sender next -> do
diff --git a/Remote/Adb.hs b/Remote/Adb.hs
index 03e3819cff..e7e8fae3b9 100644
--- a/Remote/Adb.hs
+++ b/Remote/Adb.hs
@@ -212,7 +212,7 @@ androidHashDir :: AndroidPath -> Key -> AndroidPath
androidHashDir adir k = AndroidPath $
fromAndroidPath adir ++ "/" ++ hdir
where
- hdir = replace [pathSeparator] "/" (hashDirLower def k)
+ hdir = replace [pathSeparator] "/" (fromRawFilePath (hashDirLower def k))
storeExportM :: AndroidSerial -> AndroidPath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
storeExportM serial adir src _k loc _p = store' serial dest src
diff --git a/Remote/Directory.hs b/Remote/Directory.hs
index 0387474f9a..933ccd23ce 100644
--- a/Remote/Directory.hs
+++ b/Remote/Directory.hs
@@ -127,7 +127,7 @@ directorySetup _ mu _ c gc = do
- We try more than one since we used to write to different hash
- directories. -}
locations :: FilePath -> Key -> [FilePath]
-locations d k = map (d </>) (keyPaths k)
+locations d k = map (\f -> d </> fromRawFilePath f) (keyPaths k)
{- Returns the location off a Key in the directory. If the key is
- present, returns the location that is actually used, otherwise
@@ -139,7 +139,8 @@ getLocation d k = do
{- Directory where the file(s) for a key are stored. -}
storeDir :: FilePath -> Key -> FilePath
-storeDir d k = addTrailingPathSeparator $ d </> hashDirLower def k </> keyFile k
+storeDir d k = addTrailingPathSeparator $
+ d </> fromRawFilePath (hashDirLower def k) </> keyFile k
{- Check if there is enough free disk space in the remote's directory to
- store the key. Note that the unencrypted key size is checked. -}
diff --git a/Remote/External.hs b/Remote/External.hs
index 2b5c99457a..4c4c156848 100644
--- a/Remote/External.hs
+++ b/Remote/External.hs
@@ -383,9 +383,9 @@ handleRequest' st external req mp responsehandler
handleRemoteRequest (PROGRESS bytesprocessed) =
maybe noop (\a -> liftIO $ a bytesprocessed) mp
handleRemoteRequest (DIRHASH k) =
- send $ VALUE $ hashDirMixed def k
+ send $ VALUE $ fromRawFilePath $ hashDirMixed def k
handleRemoteRequest (DIRHASH_LOWER k) =
- send $ VALUE $ hashDirLower def k
+ send $ VALUE $ fromRawFilePath $ hashDirLower def k
handleRemoteRequest (SETCONFIG setting value) =
liftIO $ atomically $ modifyTVar' (externalConfig st) $
M.insert setting value
diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs
index 4682637eaf..c3a3f31348 100644
--- a/Remote/GCrypt.hs
+++ b/Remote/GCrypt.hs
@@ -422,7 +422,8 @@ checkKey' repo r rsyncopts k
{- Annexed objects are hashed using lower-case directories for max
- portability. -}
gCryptLocation :: Git.Repo -> Key -> FilePath
-gCryptLocation repo key = Git.repoLocation repo </> objectDir </> keyPath key (hashDirLower def)
+gCryptLocation repo key = Git.repoLocation repo </> objectDir
+ </> fromRawFilePath (keyPath key (hashDirLower def))
data AccessMethod = AccessDirect | AccessShell
diff --git a/Remote/Git.hs b/Remote/Git.hs
index 459cd80d65..b6dd02ae5f 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -61,6 +61,7 @@ import Creds
import Types.NumCopies
import Annex.Action
import Messages.Progress
+import qualified Utility.RawFilePath as R
#ifndef mingw32_HOST_OS
import Utility.FileMode
@@ -393,9 +394,9 @@ keyUrls gc repo r key = map tourl locs'
| remoteAnnexBare remoteconfig == Just False = reverse (annexLocations gc key)
| otherwise = annexLocations gc key
#ifndef mingw32_HOST_OS
- locs' = locs
+ locs' = map fromRawFilePath locs
#else
- locs' = map (replace "\\" "/") locs
+ locs' = map (replace "\\" "/" . fromRawFilePath) locs
#endif
remoteconfig = gitconfig r
@@ -599,9 +600,9 @@ copyFromRemoteCheap' repo r st key af file
| not $ Git.repoIsUrl repo = guardUsable repo (return False) $ do
gc <- getGitConfigFromState st
loc <- liftIO $ gitAnnexLocation key repo gc
- liftIO $ ifM (doesFileExist loc)
+ liftIO $ ifM (R.doesPathExist loc)
( do
- absloc <- absPath loc
+ absloc <- absPath (fromRawFilePath loc)
catchBoolIO $ do
createSymbolicLink absloc file
return True
diff --git a/Remote/Hook.hs b/Remote/Hook.hs
index f0a67d808e..897e73cc1f 100644
--- a/Remote/Hook.hs
+++ b/Remote/Hook.hs
@@ -104,7 +104,8 @@ hookEnv action k f = Just <$> mergeenv (fileenv f ++ keyenv)
]
fileenv Nothing = []
fileenv (Just file) = [envvar "FILE" file]
- hashbits = map takeDirectory $ splitPath $ hashDirMixed def k
+ hashbits = map takeDirectory $ splitPath $
+ fromRawFilePath $ hashDirMixed def k
lookupHook :: HookName -> Action -> Annex (Maybe String)
lookupHook hookname action = do
diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs
index 566f95bab6..f171b69e60 100644
--- a/Remote/Rsync.hs
+++ b/Remote/Rsync.hs
@@ -183,7 +183,7 @@ rsyncSetup _ mu _ c gc = do
store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool
store o k src meterupdate = storeGeneric o meterupdate basedest populatedest
where
- basedest = Prelude.head (keyPaths k)
+ basedest = fromRawFilePath $ Prelude.head (keyPaths k)
populatedest dest = liftIO $ if canrename
then do
rename src dest
@@ -222,7 +222,7 @@ remove :: RsyncOpts -> Remover
remove o k = removeGeneric o includes
where
includes = concatMap use dirHashes
- use h = let dir = h def k in
+ use h = let dir = fromRawFilePath (h def k) in
[ parentDir dir
, dir
-- match content directory and anything in it
diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs
index 4c2f10843c..2b0dbc1966 100644
--- a/Remote/Rsync/RsyncUrl.hs
+++ b/Remote/Rsync/RsyncUrl.hs
@@ -13,13 +13,14 @@ import Types
import Annex.Locations
import Utility.Rsync
import Utility.SafeCommand
-
-import Data.Default
-import System.FilePath.Posix
+import Utility.FileSystemEncoding
+import Annex.DirHashes
#ifdef mingw32_HOST_OS
import Utility.Split
#endif
-import Annex.DirHashes
+
+import Data.Default
+import System.FilePath.Posix
type RsyncUrl = String
@@ -42,7 +43,7 @@ mkRsyncUrl o f = rsyncUrl o </> rsyncEscape o f
rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl]
rsyncUrls o k = map use dirHashes
where
- use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f)
+ use h = rsyncUrl o </> fromRawFilePath (hash h) </> rsyncEscape o (f </> f)
f = keyFile k
#ifndef mingw32_HOST_OS
hash h = h def k
diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs
index 4464ed2d36..3893533a22 100644
--- a/Remote/WebDAV/DavLocation.hs
+++ b/Remote/WebDAV/DavLocation.hs
@@ -39,9 +39,9 @@ keyDir :: Key -> DavLocation
keyDir k = addTrailingPathSeparator $ hashdir </> keyFile k
where
#ifndef mingw32_HOST_OS
- hashdir = hashDirLower def k
+ hashdir = fromRawFilePath $ hashDirLower def k
#else
- hashdir = replace "\\" "/" (hashDirLower def k)
+ hashdir = replace "\\" "/" (fromRawFilePath $ hashDirLower def k)
#endif
keyLocation :: Key -> DavLocation
diff --git a/Test.hs b/Test.hs
index 4752ff07e2..7bcfdd3560 100644
--- a/Test.hs
+++ b/Test.hs
@@ -1638,7 +1638,8 @@ test_crypto = do
checkFile mvariant filename =
Utility.Gpg.checkEncryptionFile gpgcmd filename $
if mvariant == Just Types.Crypto.PubKey then ks else Nothing
- serializeKeys cipher = Annex.Locations.keyPaths .
+ serializeKeys cipher = map fromRawFilePath .
+ Annex.Locations.keyPaths .
Crypto.encryptKey Types.Crypto.HmacSha1 cipher
#else
test_crypto = putStrLn "gpg testing not implemented on Windows"
diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs
index bad1183dfd..e311044664 100644
--- a/Upgrade/V1.hs
+++ b/Upgrade/V1.hs
@@ -236,9 +236,9 @@ logFile1 repo key = Upgrade.V2.gitStateDir repo ++ keyFile1 key ++ ".log"
logFile2 :: Key -> Git.Repo -> String
logFile2 = logFile' (hashDirLower def)
-logFile' :: (Key -> FilePath) -> Key -> Git.Repo -> String
+logFile' :: (Key -> RawFilePath) -> Key -> Git.Repo -> String
logFile' hasher key repo =
- gitStateDir repo ++ hasher key ++ keyFile key ++ ".log"
+ gitStateDir repo ++ fromRawFilePath (hasher key) ++ keyFile key ++ ".log"
stateDir :: FilePath
stateDir = addTrailingPathSeparator ".git-annex"
diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs
index 7cbdd04e65..a8a84283b3 100644
--- a/Upgrade/V5.hs
+++ b/Upgrade/V5.hs
@@ -135,7 +135,7 @@ upgradeDirectWorkTree = do
-- is just not populated with it. Since the work tree
-- file is recorded as an associated file, things will
-- still work that way, it's just not ideal.
- ic <- withTSDelta (liftIO . genInodeCache f)
+ ic <- withTSDelta (liftIO . genInodeCache (toRawFilePath f))
void $ Content.linkToAnnex k f ic
, unlessM (Content.inAnnex k) $ do
-- Worktree file was deleted or modified;
diff --git a/Upgrade/V5/Direct.hs b/Upgrade/V5/Direct.hs
index baf7dae9a0..600efc616d 100644
--- a/Upgrade/V5/Direct.hs
+++ b/Upgrade/V5/Direct.hs
@@ -107,7 +107,9 @@ removeAssociatedFiles key = do
- expected mtime and inode.
-}
goodContent :: Key -> FilePath -> Annex Bool
-goodContent key file = sameInodeCache file =<< recordedInodeCache key
+goodContent key file =
+ sameInodeCache (toRawFilePath file)
+ =<< recordedInodeCache key
{- Gets the recorded inode cache for a key.
-
diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs
index a918e7bd08..d14d1f9d15 100644
--- a/Utility/InodeCache.hs
+++ b/Utility/InodeCache.hs
@@ -22,7 +22,6 @@ module Utility.InodeCache (
readInodeCache,
showInodeCache,
genInodeCache,
- genInodeCache',
toInodeCache,
likeInodeCacheWeak,
@@ -182,12 +181,8 @@ readInodeCache s = case words s of
return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
_ -> Nothing
-genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache)
+genInodeCache :: RawFilePath -> TSDelta -> IO (Maybe InodeCache)
genInodeCache f delta = catchDefaultIO Nothing $
- toInodeCache delta f =<< getFileStatus f
-
-genInodeCache' :: RawFilePath -> TSDelta -> IO (Maybe InodeCache)
-genInodeCache' f delta = catchDefaultIO Nothing $
toInodeCache delta (fromRawFilePath f) =<< R.getFileStatus f
toInodeCache :: TSDelta -> FilePath -> FileStatus -> IO (Maybe InodeCache)
@@ -208,8 +203,8 @@ toInodeCache (TSDelta getdelta) f s
- Its InodeCache at the time of its creation is written to the cache file,
- so changes can later be detected. -}
data SentinalFile = SentinalFile
- { sentinalFile :: FilePath
- , sentinalCacheFile :: FilePath
+ { sentinalFile :: RawFilePath
+ , sentinalCacheFile :: RawFilePath
}
deriving (Show)
@@ -226,8 +221,8 @@ noTSDelta = TSDelta (pure 0)
writeSentinalFile :: SentinalFile -> IO ()
writeSentinalFile s = do
- writeFile (sentinalFile s) ""
- maybe noop (writeFile (sentinalCacheFile s) . showInodeCache)
+ writeFile (fromRawFilePath (sentinalFile s)) ""
+ maybe noop (writeFile (fromRawFilePath (sentinalCacheFile s)) . showInodeCache)
=<< genInodeCache (sentinalFile s) noTSDelta
data SentinalStatus = SentinalStatus
@@ -256,7 +251,7 @@ checkSentinalFile s = do
Just new -> return $ calc old new
where
loadoldcache = catchDefaultIO Nothing $
- readInodeCache <$> readFile (sentinalCacheFile s)
+ readInodeCache <$> readFile (fromRawFilePath (sentinalCacheFile s))
gennewcache = genInodeCache (sentinalFile s) noTSDelta
calc (InodeCache (InodeCachePrim oldinode oldsize oldmtime)) (InodeCache (InodeCachePrim newinode newsize newmtime)) =
SentinalStatus (not unchanged) tsdelta
@@ -281,7 +276,7 @@ checkSentinalFile s = do
dummy = SentinalStatus True noTSDelta
sentinalFileExists :: SentinalFile -> IO Bool
-sentinalFileExists s = allM doesFileExist [sentinalCacheFile s, sentinalFile s]
+sentinalFileExists s = allM R.doesPathExist [sentinalCacheFile s, sentinalFile s]
instance Arbitrary InodeCache where
arbitrary =
diff --git a/Utility/MD5.hs b/Utility/MD5.hs
index d0475bf480..aabb5d724b 100644
--- a/Utility/MD5.hs
+++ b/Utility/MD5.hs
@@ -8,13 +8,14 @@ module Utility.MD5 where
import Data.Bits
import Data.Word
+import Data.Char
-display_32bits_as_dir :: Word32 -> String
+display_32bits_as_dir :: Word32 -> [Word8]
display_32bits_as_dir w = trim $ swap_pairs cs
where
-- Need 32 characters to use. To avoid inaverdently making
-- a real word, use letters that appear less frequently.
- chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
+ chars = map (fromIntegral . ord) (['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF")
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
getc n = chars !! fromIntegral n
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
diff --git a/Utility/RawFilePath.hs b/Utility/RawFilePath.hs
index a62ba65e51..426f5633a3 100644
--- a/Utility/RawFilePath.hs
+++ b/Utility/RawFilePath.hs
@@ -19,14 +19,20 @@ module Utility.RawFilePath (
readSymbolicLink,
getFileStatus,
getSymbolicLinkStatus,
+ doesPathExist,
) where
#ifndef mingw32_HOST_OS
import Utility.FileSystemEncoding (RawFilePath)
import System.Posix.Files.ByteString
+
+doesPathExist :: RawFilePath -> IO Bool
+doesPathExist = fileExist
+
#else
import qualified Data.ByteString as B
import qualified System.PosixCompat as P
+import qualified System.Directory as D
import Utility.FileSystemEncoding
readSymbolicLink :: RawFilePath -> IO RawFilePath
@@ -37,4 +43,7 @@ getFileStatus = P.getFileStatus . fromRawFilePath
getSymbolicLinkStatus :: RawFilePath -> IO FileStatus
getSymbolicLinkStatus = P.getSymbolicLinkStatus . fromRawFilePath
+
+doesPathExist :: RawFilePath -> IO Bool
+doesPathExist = D.doesPathExist . fromRawFilePath
#endif
diff --git a/doc/todo/optimize_by_converting_String_to_ByteString.mdwn b/doc/todo/optimize_by_converting_String_to_ByteString.mdwn
index 7ac7efe382..4a6d2b6dcd 100644
--- a/doc/todo/optimize_by_converting_String_to_ByteString.mdwn
+++ b/doc/todo/optimize_by_converting_String_to_ByteString.mdwn
@@ -11,26 +11,12 @@ than find so the improvement is not as large.
The `bs` branch is in a mergeable state now, but still needs work:
+* Profile various commands and look for hot spots.
+
* Eliminate all the fromRawFilePath, toRawFilePath, encodeBS,
decodeBS conversions. Or at least most of them. There are likely
- quite a few places where a value is converted back and forth several times.
-
- As a first step, profile and look for the hot spots. Known hot spots:
+ some places where a value is converted back and forth several times.
- * keyFile uses fromRawFilePath and that adds around 3% overhead in `git-annex find`.
- Converting it to a RawFilePath needs a version of `</>` for RawFilePaths.
- * getJournalFileStale uses fromRawFilePath, and adds 3-5% overhead in
- `git-annex whereis`. Converting it to RawFilePath needs a version
- of `</>` for RawFilePaths. It also needs a ByteString.readFile
- for RawFilePath.
-
-* System.FilePath is not available for RawFilePath, and many of the
- conversions are to get a FilePath in order to use that library.
-
- It should be entirely straightforward to make a version of System.FilePath
- that can operate on RawFilePath, except possibly there could be some
- complications due to Windows.
-
* Use versions of IO actions like getFileStatus that take a RawFilePath,
avoiding a conversion. Note that these are only available on unix, not
windows, so a compatability shim will be needed.
diff --git a/doc/todo/optimize_by_converting_String_to_ByteString/comment_3_5cad0557a1409703f8c71078f0785309._comment b/doc/todo/optimize_by_converting_String_to_ByteString/comment_3_5cad0557a1409703f8c71078f0785309._comment
new file mode 100644
index 0000000000..c888f617c0
--- /dev/null
+++ b/doc/todo/optimize_by_converting_String_to_ByteString/comment_3_5cad0557a1409703f8c71078f0785309._comment
@@ -0,0 +1,40 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 3"""
+ date="2019-12-11T18:16:13Z"
+ content="""
+Updated profiling. git-annex find is now ByteString end-to-end!
+Note the massive reduction in alloc, and improved runtime.
+
+ Wed Dec 11 14:41 2019 Time and Allocation Profiling Report (Final)
+
+ git-annex +RTS -p -RTS find
+
+ total time = 1.51 secs (1515 ticks @ 1000 us, 1 processor)
+ total alloc = 608,475,328 bytes (excludes profiling overheads)
+
+ COST CENTRE MODULE SRC %time %alloc
+
+ keyFile' Annex.Locations Annex/Locations.hs:(590,1)-(600,30) 8.2 16.6
+ >>=.\.succ' Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:146:13-76 4.7 0.7
+ getAnnexLinkTarget'.probesymlink Annex.Link Annex/Link.hs:79:9-46 4.2 7.6
+ >>=.\ Data.Attoparsec.Internal.Types Data/Attoparsec/Internal/Types.hs:(146,9)-(147,44) 3.9 2.3
+ parseLinkTarget Annex.Link Annex/Link.hs:(255,1)-(263,25) 3.9 11.8
+ doesPathExist Utility.RawFilePath Utility/RawFilePath.hs:30:1-25 3.4 0.6
+ keyFile'.esc Annex.Locations Annex/Locations.hs:(596,9)-(600,30) 3.2 14.7
+ fileKey' Annex.Locations Annex/Locations.hs:(609,1)-(619,41) 3.0 4.7
+ parseLinkTargetOrPointer Annex.Link Annex/Link.hs:(240,1)-(244,25) 2.8 0.2
+ hashUpdates.\.\.\ Crypto.Hash Crypto/Hash.hs:85:48-99 2.5 0.1
+ combineAlways System.FilePath.Posix.ByteString System/FilePath/Posix/../Internal.hs:(698,1)-(704,67) 2.0 3.3
+ getState Annex Annex.hs:(251,1)-(254,27) 2.0 1.1
+ withPtr.makeTrampoline Basement.Block.Base Basement/Block/Base.hs:(401,5)-(404,31) 1.9 1.7
+ withMutablePtrHint Basement.Block.Base Basement/Block/Base.hs:(468,1)-(482,50) 1.8 1.2
+ parseKeyVariety Types.Key Types/Key.hs:(323,1)-(371,42) 1.8 0.0
+ fileKey'.go Annex.Locations Annex/Locations.hs:611:9-55 1.7 2.2
+ isLinkToAnnex Annex.Link Annex/Link.hs:(299,1)-(305,47) 1.7 1.0
+ hashDirMixed Annex.DirHashes Annex/DirHashes.hs:(82,1)-(90,27) 1.7 1.3
+ primitive Basement.Monad Basement/Monad.hs:72:5-18 1.6 0.1
+ withPtr Basement.Block.Base Basement/Block/Base.hs:(395,1)-(404,31) 1.5 1.6
+ mkKeySerialization Types.Key Types/Key.hs:(115,1)-(117,22) 1.1 2.8
+ decimal.step Data.Attoparsec.ByteString.Char8 Data/Attoparsec/ByteString/Char8.hs:448:9-49 0.8 1.2
+"""]]
diff --git a/stack.yaml b/stack.yaml
index d97bf2f263..dde1d76583 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -24,6 +24,7 @@ extra-deps:
- sandi-0.5
- http-client-0.5.14
- silently-1.2.5.1
+- filepath-bytestring-1.4.2.1.0
explicit-setup-deps:
git-annex: true
resolver: lts-13.29