summaryrefslogtreecommitdiff
path: root/Logs.hs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2019-11-26 15:27:22 -0400
committerJoey Hess <joeyh@joeyh.name>2019-11-26 16:01:58 -0400
commit067aabdd4899997f10c78388273f28cccf777b66 (patch)
tree5c9258eaf5ec8d737676d53ac5b8eed3fd959a4e /Logs.hs
parent6a97ff6b3a5ffacfe889abd6c23325a0f4f1dfc9 (diff)
wip RawFilePath 2x git-annex find speedup
Finally builds (oh the agoncy of making it build), but still very unmergable, only Command.Find is included and lots of stuff is badly hacked to make it compile. Benchmarking vs master, this git-annex find is significantly faster! Specifically: num files old new speedup 48500 4.77 3.73 28% 12500 1.36 1.02 66% 20 0.075 0.074 0% (so startup time is unchanged) That's without really finishing the optimization. Things still to do: * Eliminate all the fromRawFilePath, toRawFilePath, encodeBS, decodeBS conversions. * Use versions of IO actions like getFileStatus that take a RawFilePath. * Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy. * Use ByteString for parsing git config to speed up startup. It's likely several of those will speed up git-annex find further. And other commands will certianly benefit even more.
Diffstat (limited to 'Logs.hs')
-rw-r--r--Logs.hs137
1 files changed, 76 insertions, 61 deletions
diff --git a/Logs.hs b/Logs.hs
index e7b15be3c6..d612aa8d56 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -5,11 +5,15 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
+{-# LANGUAGE OverloadedStrings #-}
+
module Logs where
import Annex.Common
import Annex.DirHashes
+import qualified Data.ByteString as S
+
{- There are several varieties of log file formats. -}
data LogVariety
= OldUUIDBasedLog
@@ -22,7 +26,7 @@ data LogVariety
{- Converts a path from the git-annex branch into one of the varieties
- of logs used by git-annex, if it's a known path. -}
-getLogVariety :: FilePath -> Maybe LogVariety
+getLogVariety :: RawFilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelOldUUIDBasedLogs = Just OldUUIDBasedLog
| f `elem` topLevelNewUUIDBasedLogs = Just NewUUIDBasedLog
@@ -34,7 +38,7 @@ getLogVariety f
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
{- All the old-format uuid-based logs stored in the top of the git-annex branch. -}
-topLevelOldUUIDBasedLogs :: [FilePath]
+topLevelOldUUIDBasedLogs :: [RawFilePath]
topLevelOldUUIDBasedLogs =
[ uuidLog
, remoteLog
@@ -49,161 +53,172 @@ topLevelOldUUIDBasedLogs =
]
{- All the new-format uuid-based logs stored in the top of the git-annex branch. -}
-topLevelNewUUIDBasedLogs :: [FilePath]
+topLevelNewUUIDBasedLogs :: [RawFilePath]
topLevelNewUUIDBasedLogs =
[ exportLog
]
{- All the ways to get a key from a presence log file -}
-presenceLogs :: FilePath -> [Maybe Key]
+presenceLogs :: RawFilePath -> [Maybe Key]
presenceLogs f =
[ urlLogFileKey f
, locationLogFileKey f
]
{- Top-level logs that are neither UUID based nor presence logs. -}
-otherLogs :: [FilePath]
+otherLogs :: [RawFilePath]
otherLogs =
[ numcopiesLog
, groupPreferredContentLog
]
-uuidLog :: FilePath
+uuidLog :: RawFilePath
uuidLog = "uuid.log"
-numcopiesLog :: FilePath
+numcopiesLog :: RawFilePath
numcopiesLog = "numcopies.log"
-configLog :: FilePath
+configLog :: RawFilePath
configLog = "config.log"
-remoteLog :: FilePath
+remoteLog :: RawFilePath
remoteLog = "remote.log"
-trustLog :: FilePath
+trustLog :: RawFilePath
trustLog = "trust.log"
-groupLog :: FilePath
+groupLog :: RawFilePath
groupLog = "group.log"
-preferredContentLog :: FilePath
+preferredContentLog :: RawFilePath
preferredContentLog = "preferred-content.log"
-requiredContentLog :: FilePath
+requiredContentLog :: RawFilePath
requiredContentLog = "required-content.log"
-groupPreferredContentLog :: FilePath
+groupPreferredContentLog :: RawFilePath
groupPreferredContentLog = "group-preferred-content.log"
-scheduleLog :: FilePath
+scheduleLog :: RawFilePath
scheduleLog = "schedule.log"
-activityLog :: FilePath
+activityLog :: RawFilePath
activityLog = "activity.log"
-differenceLog :: FilePath
+differenceLog :: RawFilePath
differenceLog = "difference.log"
-multicastLog :: FilePath
+multicastLog :: RawFilePath
multicastLog = "multicast.log"
-exportLog :: FilePath
+exportLog :: RawFilePath
exportLog = "export.log"
{- The pathname of the location log file for a given key. -}
-locationLogFile :: GitConfig -> Key -> String
-locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
+locationLogFile :: GitConfig -> Key -> RawFilePath
+locationLogFile config key = toRawFilePath $
+ branchHashDir config key </> keyFile key ++ ".log"
{- The filename of the url log for a given key. -}
-urlLogFile :: GitConfig -> Key -> FilePath
-urlLogFile config key = branchHashDir config key </> keyFile key ++ urlLogExt
+urlLogFile :: GitConfig -> Key -> RawFilePath
+urlLogFile config key = toRawFilePath $
+ branchHashDir config key </> keyFile key ++ decodeBS' urlLogExt
{- Old versions stored the urls elsewhere. -}
-oldurlLogs :: GitConfig -> Key -> [FilePath]
-oldurlLogs config key =
+oldurlLogs :: GitConfig -> Key -> [RawFilePath]
+oldurlLogs config key = map toRawFilePath
[ "remote/web" </> hdir </> serializeKey key ++ ".log"
, "remote/web" </> hdir </> keyFile key ++ ".log"
]
where
hdir = branchHashDir config key
-urlLogExt :: String
+urlLogExt :: S.ByteString
urlLogExt = ".log.web"
{- Does not work on oldurllogs. -}
-isUrlLog :: FilePath -> Bool
-isUrlLog file = urlLogExt `isSuffixOf` file
+isUrlLog :: RawFilePath -> Bool
+isUrlLog file = urlLogExt `S.isSuffixOf` file
{- The filename of the remote state log for a given key. -}
-remoteStateLogFile :: GitConfig -> Key -> FilePath
-remoteStateLogFile config key = branchHashDir config key
- </> keyFile key ++ remoteStateLogExt
+remoteStateLogFile :: GitConfig -> Key -> RawFilePath
+remoteStateLogFile config key =
+ toRawFilePath (branchHashDir config key </> keyFile key)
+ <> remoteStateLogExt
-remoteStateLogExt :: String
+remoteStateLogExt :: S.ByteString
remoteStateLogExt = ".log.rmt"
-isRemoteStateLog :: FilePath -> Bool
-isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
+isRemoteStateLog :: RawFilePath -> Bool
+isRemoteStateLog path = remoteStateLogExt `S.isSuffixOf` path
{- The filename of the chunk log for a given key. -}
-chunkLogFile :: GitConfig -> Key -> FilePath
-chunkLogFile config key = branchHashDir config key </> keyFile key ++ chunkLogExt
+chunkLogFile :: GitConfig -> Key -> RawFilePath
+chunkLogFile config key =
+ toRawFilePath (branchHashDir config key </> keyFile key)
+ <> chunkLogExt
-chunkLogExt :: String
+chunkLogExt :: S.ByteString
chunkLogExt = ".log.cnk"
-isChunkLog :: FilePath -> Bool
-isChunkLog path = chunkLogExt `isSuffixOf` path
+isChunkLog :: RawFilePath -> Bool
+isChunkLog path = chunkLogExt `S.isSuffixOf` path
{- The filename of the metadata log for a given key. -}
-metaDataLogFile :: GitConfig -> Key -> FilePath
-metaDataLogFile config key = branchHashDir config key </> keyFile key ++ metaDataLogExt
+metaDataLogFile :: GitConfig -> Key -> RawFilePath
+metaDataLogFile config key =
+ toRawFilePath (branchHashDir config key </> keyFile key)
+ <> metaDataLogExt
-metaDataLogExt :: String
+metaDataLogExt :: S.ByteString
metaDataLogExt = ".log.met"
-isMetaDataLog :: FilePath -> Bool
-isMetaDataLog path = metaDataLogExt `isSuffixOf` path
+isMetaDataLog :: RawFilePath -> Bool
+isMetaDataLog path = metaDataLogExt `S.isSuffixOf` path
{- The filename of the remote metadata log for a given key. -}
-remoteMetaDataLogFile :: GitConfig -> Key -> FilePath
-remoteMetaDataLogFile config key = branchHashDir config key </> keyFile key ++ remoteMetaDataLogExt
+remoteMetaDataLogFile :: GitConfig -> Key -> RawFilePath
+remoteMetaDataLogFile config key =
+ toRawFilePath (branchHashDir config key </> keyFile key)
+ <> remoteMetaDataLogExt
-remoteMetaDataLogExt :: String
+remoteMetaDataLogExt :: S.ByteString
remoteMetaDataLogExt = ".log.rmet"
-isRemoteMetaDataLog :: FilePath -> Bool
-isRemoteMetaDataLog path = remoteMetaDataLogExt `isSuffixOf` path
+isRemoteMetaDataLog :: RawFilePath -> Bool
+isRemoteMetaDataLog path = remoteMetaDataLogExt `S.isSuffixOf` path
{- The filename of the remote content identifier log for a given key. -}
-remoteContentIdentifierLogFile :: GitConfig -> Key -> FilePath
-remoteContentIdentifierLogFile config key = branchHashDir config key </> keyFile key ++ remoteContentIdentifierExt
+remoteContentIdentifierLogFile :: GitConfig -> Key -> RawFilePath
+remoteContentIdentifierLogFile config key =
+ toRawFilePath (branchHashDir config key </> keyFile key)
+ <> remoteContentIdentifierExt
-remoteContentIdentifierExt :: String
+remoteContentIdentifierExt :: S.ByteString
remoteContentIdentifierExt = ".log.cid"
-isRemoteContentIdentifierLog :: FilePath -> Bool
-isRemoteContentIdentifierLog path = remoteContentIdentifierExt `isSuffixOf` path
+isRemoteContentIdentifierLog :: RawFilePath -> Bool
+isRemoteContentIdentifierLog path = remoteContentIdentifierExt `S.isSuffixOf` path
{- From an extension and a log filename, get the key that it's a log for. -}
-extLogFileKey :: String -> FilePath -> Maybe Key
+extLogFileKey :: S.ByteString -> RawFilePath -> Maybe Key
extLogFileKey expectedext path
- | ext == expectedext = fileKey base
+ | encodeBS' ext == expectedext = fileKey base
| otherwise = Nothing
where
- file = takeFileName path
+ file = takeFileName (fromRawFilePath path)
(base, ext) = splitAt (length file - extlen) file
- extlen = length expectedext
+ extlen = S.length expectedext
{- Converts a url log file into a key.
- (Does not work on oldurlLogs.) -}
-urlLogFileKey :: FilePath -> Maybe Key
+urlLogFileKey :: RawFilePath -> Maybe Key
urlLogFileKey = extLogFileKey urlLogExt
{- Converts a pathname into a key if it's a location log. -}
-locationLogFileKey :: FilePath -> Maybe Key
+locationLogFileKey :: RawFilePath -> Maybe Key
locationLogFileKey path
-- Want only xx/yy/foo.log, not .log files in other places.
- | length (splitDirectories path) /= 3 = Nothing
+ | length (splitDirectories (fromRawFilePath path)) /= 3 = Nothing
| otherwise = extLogFileKey ".log" path