diff options
-rw-r--r-- | Annex/FileMatcher.hs | 1 | ||||
-rw-r--r-- | CHANGELOG | 3 | ||||
-rw-r--r-- | Command/Sync.hs | 35 | ||||
-rw-r--r-- | Limit.hs | 22 | ||||
-rw-r--r-- | Limit/Wanted.hs | 2 | ||||
-rw-r--r-- | Logs/PreferredContent.hs | 13 | ||||
-rw-r--r-- | Types/FileMatcher.hs | 5 | ||||
-rw-r--r-- | doc/todo/skip_first_pass_in_git_annex_sync.mdwn | 2 | ||||
-rw-r--r-- | doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment | 15 |
9 files changed, 86 insertions, 12 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index f2eef0f59c..73e6706480 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -267,6 +267,7 @@ call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex) call (Right sub) = Right $ Operation $ MatchFiles { matchAction = \notpresent mi -> matchMrun sub $ \o -> matchAction o notpresent mi + , matchNeedsFileName = any matchNeedsFileName sub , matchNeedsFileContent = any matchNeedsFileContent sub } call (Left err) = Left err @@ -18,6 +18,9 @@ git-annex (8.20200909) UNRELEASED; urgency=medium message, with some hints for the user for what to do. * Improve --debug output to show pid of processes that are started and stopped. + * sync --all: Sped up seeking to around twice as fast, by avoiding a + pass over the worktree files when preferred content expressions of the + local repo and remotes don't use include=/exclude=. -- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400 diff --git a/Command/Sync.hs b/Command/Sync.hs index 5b50daf0b2..df3faa8772 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -57,6 +57,7 @@ import Annex.Drop import Annex.UUID import Logs.UUID import Logs.Export +import Logs.PreferredContent import Annex.AutoMerge import Annex.AdjustedBranch import Annex.Ssh @@ -65,6 +66,7 @@ import Annex.UpdateInstead import Annex.Export import Annex.TaggedPush import Annex.CurrentBranch +import Types.FileMatcher import qualified Database.Export as Export import Utility.Bloom import Utility.OptParse @@ -633,9 +635,11 @@ newer remote b = do - (Or, when in an ajusted branch where some files are hidden, at files in - the original branch.) - - - With --all, makes a second pass over all keys. - - This ensures that preferred content expressions that match on - - filenames work, even when in --all mode. + - With --all, when preferred content expressions look at filenames, + - makes a first pass over the files in the work tree so those preferred + - content expressions will match. The second pass is over all keys, + - and only preferred content expressions that don't look at filenames + - will match. - - Returns true if any file transfers were made. - @@ -646,7 +650,12 @@ seekSyncContent _ [] _ = return False seekSyncContent o rs currbranch = do mvar <- liftIO newEmptyMVar bloom <- case keyOptions o of - Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar (WorkTreeItems [])) + Just WantAllKeys -> ifM preferredcontentmatchesfilenames + ( Just <$> genBloomFilter (seekworktree mvar (WorkTreeItems [])) + , do + liftIO $ print "skipped first pass" + pure Nothing + ) _ -> case currbranch of (Just origbranch, Just adj) | adjustmentHidesFiles adj -> do l <- workTreeItems' (AllowHidden True) ww (contentOfOption o) @@ -692,6 +701,12 @@ seekSyncContent o rs currbranch = do void $ liftIO $ tryPutMVar mvar () next $ return True + preferredcontentmatchesfilenames = + preferredcontentmatchesfilenames' Nothing + <||> anyM (preferredcontentmatchesfilenames' . Just . Remote.uuid) rs + preferredcontentmatchesfilenames' = + introspectPreferredRequiredContent matchNeedsFileName + {- If it's preferred content, and we don't have it, get it from one of the - listed remotes (preferring the cheaper earlier ones). - @@ -717,11 +732,13 @@ syncFile ebloom rs af k = do u <- getUUID let locs' = concat [if inhere || got then [u] else [], putrs, locs] - -- A bloom filter is populated with all the keys in the first pass. - -- On the second pass, avoid dropping keys that were seen in the - -- first pass, which would happen otherwise when preferred content - -- matches on the filename, which is not available in the second - -- pass. + -- To handle --all, a bloom filter is populated with all the keys + -- of files in the working tree in the first pass. On the second + -- pass, avoid dropping keys that were seen in the first pass, which + -- would happen otherwise when preferred content matches on the + -- filename, which is not available in the second pass. + -- (When the preferred content expressions do not match on + -- filenames, the first pass is skipped for speed.) -- -- When there's a false positive in the bloom filter, the result -- is keeping a key that preferred content doesn't really want. @@ -1,6 +1,6 @@ {- user-specified limits on files to act on - - - Copyright 2011-2019 Joey Hess <id@joeyh.name> + - Copyright 2011-2020 Joey Hess <id@joeyh.name> - - Licensed under the GNU AGPL version 3 or higher. -} @@ -88,6 +88,7 @@ addInclude = addLimit . limitInclude limitInclude :: MkLimit Annex limitInclude glob = Right $ MatchFiles { matchAction = const $ matchGlobFile glob + , matchNeedsFileName = True , matchNeedsFileContent = False } @@ -98,6 +99,7 @@ addExclude = addLimit . limitExclude limitExclude :: MkLimit Annex limitExclude glob = Right $ MatchFiles { matchAction = const $ not <$$> matchGlobFile glob + , matchNeedsFileName = True , matchNeedsFileContent = False } @@ -136,6 +138,7 @@ matchMagic :: String -> (Magic -> FilePath -> Annex (Maybe String)) -> (Provided matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ MatchFiles { matchAction = const go + , matchNeedsFileName = False , matchNeedsFileContent = True } where @@ -152,12 +155,14 @@ matchMagic limitname _ _ Nothing _ = addUnlocked :: Annex () addUnlocked = addLimit $ Right $ MatchFiles { matchAction = const $ matchLockStatus False + , matchNeedsFileName = True , matchNeedsFileContent = False } addLocked :: Annex () addLocked = addLimit $ Right $ MatchFiles { matchAction = const $ matchLockStatus True + , matchNeedsFileName = True , matchNeedsFileContent = False } @@ -184,6 +189,7 @@ addIn s = do (name, date) = separate (== '@') s use a = Right $ MatchFiles { matchAction = checkKey . a + , matchNeedsFileName = False , matchNeedsFileContent = False } inuuid u notpresent key @@ -211,6 +217,7 @@ limitPresent u = MatchFiles else do us <- Remote.keyLocations key return $ maybe False (`elem` us) u + , matchNeedsFileName = False , matchNeedsFileContent = False } @@ -218,6 +225,7 @@ limitPresent u = MatchFiles limitInDir :: FilePath -> MatchFiles Annex limitInDir dir = MatchFiles { matchAction = const go + , matchNeedsFileName = True , matchNeedsFileContent = False } where @@ -247,6 +255,7 @@ limitCopies want = case splitc ':' want of Just n -> Right $ MatchFiles { matchAction = \notpresent -> checkKey $ go' n good notpresent + , matchNeedsFileName = False , matchNeedsFileContent = False } go' n good notpresent key = do @@ -268,6 +277,7 @@ limitLackingCopies approx want = case readish want of Just needed -> Right $ MatchFiles { matchAction = \notpresent mi -> flip checkKey mi $ go mi needed notpresent + , matchNeedsFileName = False , matchNeedsFileContent = False } Nothing -> Left "bad value for number of lacking copies" @@ -293,6 +303,7 @@ limitLackingCopies approx want = case readish want of limitUnused :: MatchFiles Annex limitUnused = MatchFiles { matchAction = go + , matchNeedsFileName = False , matchNeedsFileContent = False } where @@ -306,6 +317,7 @@ limitUnused = MatchFiles limitAnything :: MatchFiles Annex limitAnything = MatchFiles { matchAction = \_ _ -> return True + , matchNeedsFileName = False , matchNeedsFileContent = False } @@ -313,6 +325,7 @@ limitAnything = MatchFiles limitNothing :: MatchFiles Annex limitNothing = MatchFiles { matchAction = \_ _ -> return False + , matchNeedsFileName = False , matchNeedsFileContent = False } @@ -332,6 +345,7 @@ limitInAllGroup getgroupmap groupname = Right $ MatchFiles else if not (S.null (S.intersection want notpresent)) then return False else checkKey (check want) mi + , matchNeedsFileName = False , matchNeedsFileContent = False } where @@ -346,6 +360,7 @@ addInBackend = addLimit . limitInBackend limitInBackend :: MkLimit Annex limitInBackend name = Right $ MatchFiles { matchAction = const $ checkKey check + , matchNeedsFileName = False , matchNeedsFileContent = False } where @@ -359,6 +374,7 @@ addSecureHash = addLimit $ Right limitSecureHash limitSecureHash :: MatchFiles Annex limitSecureHash = MatchFiles { matchAction = const $ checkKey isCryptographicallySecure + , matchNeedsFileName = False , matchNeedsFileContent = False } @@ -374,6 +390,7 @@ limitSize lb vs s = case readSize dataUnits s of Nothing -> Left "bad size" Just sz -> Right $ MatchFiles { matchAction = go sz + , matchNeedsFileName = False , matchNeedsFileContent = False } where @@ -399,6 +416,7 @@ limitMetaData s = case parseMetaDataMatcher s of Left e -> Left e Right (f, matching) -> Right $ MatchFiles { matchAction = const $ checkKey (check f matching) + , matchNeedsFileName = False , matchNeedsFileContent = False } where @@ -419,6 +437,7 @@ addTimeLimit duration = do shutdown True liftIO $ exitWith $ ExitFailure 101 else return True + , matchNeedsFileName = False , matchNeedsFileContent = False } @@ -427,6 +446,7 @@ addAccessedWithin duration = do now <- liftIO getPOSIXTime addLimit $ Right $ MatchFiles { matchAction = const $ checkKey $ check now + , matchNeedsFileName = False , matchNeedsFileContent = False } where diff --git a/Limit/Wanted.hs b/Limit/Wanted.hs index 2276a3a4ff..552f0c2e5d 100644 --- a/Limit/Wanted.hs +++ b/Limit/Wanted.hs @@ -15,12 +15,14 @@ import Types.FileMatcher addWantGet :: Annex () addWantGet = addLimit $ Right $ MatchFiles { matchAction = const $ checkWant $ wantGet False Nothing + , matchNeedsFileName = False , matchNeedsFileContent = False } addWantDrop :: Annex () addWantDrop = addLimit $ Right $ MatchFiles { matchAction = const $ checkWant $ wantDrop False Nothing Nothing + , matchNeedsFileName = False , matchNeedsFileContent = False } diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index 7b8366f855..adb0189ec3 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -1,6 +1,6 @@ {- git-annex preferred content matcher configuration - - - Copyright 2012-2019 Joey Hess <id@joeyh.name> + - Copyright 2012-2020 Joey Hess <id@joeyh.name> - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,6 +21,7 @@ module Logs.PreferredContent ( defaultStandardGroup, preferredRequiredMapsLoad, preferredRequiredMapsLoad', + introspectPreferredRequiredContent, prop_standardGroups_parse, ) where @@ -61,6 +62,16 @@ checkMap getmap mu notpresent mkey afile d = do Nothing -> return d Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d) +{- Checks if the preferred or required content for the specified repository + - (or the current repository if none is specified) contains any terms + - that meet the condition. -} +introspectPreferredRequiredContent :: (MatchFiles Annex -> Bool) -> Maybe UUID -> Annex Bool +introspectPreferredRequiredContent c mu = do + u <- maybe getUUID return mu + check u preferredContentMap <||> check u requiredContentMap + where + check u mk = mk >>= return . maybe False (any c) . M.lookup u + preferredContentMap :: Annex (FileMatcherMap Annex) preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return =<< Annex.getState Annex.preferredcontentmap diff --git a/Types/FileMatcher.hs b/Types/FileMatcher.hs index 8b5558f0e0..8a56a67ac7 100644 --- a/Types/FileMatcher.hs +++ b/Types/FileMatcher.hs @@ -57,8 +57,11 @@ type AssumeNotPresent = S.Set UUID data MatchFiles a = MatchFiles { matchAction :: AssumeNotPresent -> MatchInfo -> a Bool + , matchNeedsFileName :: Bool + -- ^ does the matchAction need a filename in order to match? , matchNeedsFileContent :: Bool - -- ^ does the matchAction need the file content to be present? + -- ^ does the matchAction need the file content to be present in + -- order to succeed? } type FileMatcher a = Matcher (MatchFiles a) diff --git a/doc/todo/skip_first_pass_in_git_annex_sync.mdwn b/doc/todo/skip_first_pass_in_git_annex_sync.mdwn index e9e2a0eba8..a70b0df14d 100644 --- a/doc/todo/skip_first_pass_in_git_annex_sync.mdwn +++ b/doc/todo/skip_first_pass_in_git_annex_sync.mdwn @@ -19,3 +19,5 @@ and it led to a 2x speedup (with warm cache): This repo has 25641 keys and all of them are in the worktree too. + +> [[done]]! --[[Joey]] diff --git a/doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment b/doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment new file mode 100644 index 0000000000..6802de5e07 --- /dev/null +++ b/doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2020-09-24T19:04:32Z" + content=""" +One side effect of this optimisation is that, while sync --all used to +tell the filenames it was getting or dropping, when operating on files +in the working tree, when the optimsation is enabled it will only +display the keys. So, its behavior in 2 different repos might seem +inconsistent to a user, who doesn't know about all these gory 2 pass details. + +I think, if that became a problem, the best fix would be to only display +the keys, and never the worktree filenames, even when running the first +pass. But I'll wait and see if that needs to be done, I suppose. +"""]] |