summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Annex/FileMatcher.hs1
-rw-r--r--CHANGELOG3
-rw-r--r--Command/Sync.hs35
-rw-r--r--Limit.hs22
-rw-r--r--Limit/Wanted.hs2
-rw-r--r--Logs/PreferredContent.hs13
-rw-r--r--Types/FileMatcher.hs5
-rw-r--r--doc/todo/skip_first_pass_in_git_annex_sync.mdwn2
-rw-r--r--doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment15
9 files changed, 86 insertions, 12 deletions
diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs
index f2eef0f59c..73e6706480 100644
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@@ -267,6 +267,7 @@ call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex)
call (Right sub) = Right $ Operation $ MatchFiles
{ matchAction = \notpresent mi ->
matchMrun sub $ \o -> matchAction o notpresent mi
+ , matchNeedsFileName = any matchNeedsFileName sub
, matchNeedsFileContent = any matchNeedsFileContent sub
}
call (Left err) = Left err
diff --git a/CHANGELOG b/CHANGELOG
index 4fdb0f9618..171a3b190e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -18,6 +18,9 @@ git-annex (8.20200909) UNRELEASED; urgency=medium
message, with some hints for the user for what to do.
* Improve --debug output to show pid of processes that are started and
stopped.
+ * sync --all: Sped up seeking to around twice as fast, by avoiding a
+ pass over the worktree files when preferred content expressions of the
+ local repo and remotes don't use include=/exclude=.
-- Joey Hess <id@joeyh.name> Mon, 14 Sep 2020 18:34:37 -0400
diff --git a/Command/Sync.hs b/Command/Sync.hs
index 5b50daf0b2..df3faa8772 100644
--- a/Command/Sync.hs
+++ b/Command/Sync.hs
@@ -57,6 +57,7 @@ import Annex.Drop
import Annex.UUID
import Logs.UUID
import Logs.Export
+import Logs.PreferredContent
import Annex.AutoMerge
import Annex.AdjustedBranch
import Annex.Ssh
@@ -65,6 +66,7 @@ import Annex.UpdateInstead
import Annex.Export
import Annex.TaggedPush
import Annex.CurrentBranch
+import Types.FileMatcher
import qualified Database.Export as Export
import Utility.Bloom
import Utility.OptParse
@@ -633,9 +635,11 @@ newer remote b = do
- (Or, when in an ajusted branch where some files are hidden, at files in
- the original branch.)
-
- - With --all, makes a second pass over all keys.
- - This ensures that preferred content expressions that match on
- - filenames work, even when in --all mode.
+ - With --all, when preferred content expressions look at filenames,
+ - makes a first pass over the files in the work tree so those preferred
+ - content expressions will match. The second pass is over all keys,
+ - and only preferred content expressions that don't look at filenames
+ - will match.
-
- Returns true if any file transfers were made.
-
@@ -646,7 +650,12 @@ seekSyncContent _ [] _ = return False
seekSyncContent o rs currbranch = do
mvar <- liftIO newEmptyMVar
bloom <- case keyOptions o of
- Just WantAllKeys -> Just <$> genBloomFilter (seekworktree mvar (WorkTreeItems []))
+ Just WantAllKeys -> ifM preferredcontentmatchesfilenames
+ ( Just <$> genBloomFilter (seekworktree mvar (WorkTreeItems []))
+ , do
+ liftIO $ print "skipped first pass"
+ pure Nothing
+ )
_ -> case currbranch of
(Just origbranch, Just adj) | adjustmentHidesFiles adj -> do
l <- workTreeItems' (AllowHidden True) ww (contentOfOption o)
@@ -692,6 +701,12 @@ seekSyncContent o rs currbranch = do
void $ liftIO $ tryPutMVar mvar ()
next $ return True
+ preferredcontentmatchesfilenames =
+ preferredcontentmatchesfilenames' Nothing
+ <||> anyM (preferredcontentmatchesfilenames' . Just . Remote.uuid) rs
+ preferredcontentmatchesfilenames' =
+ introspectPreferredRequiredContent matchNeedsFileName
+
{- If it's preferred content, and we don't have it, get it from one of the
- listed remotes (preferring the cheaper earlier ones).
-
@@ -717,11 +732,13 @@ syncFile ebloom rs af k = do
u <- getUUID
let locs' = concat [if inhere || got then [u] else [], putrs, locs]
- -- A bloom filter is populated with all the keys in the first pass.
- -- On the second pass, avoid dropping keys that were seen in the
- -- first pass, which would happen otherwise when preferred content
- -- matches on the filename, which is not available in the second
- -- pass.
+ -- To handle --all, a bloom filter is populated with all the keys
+ -- of files in the working tree in the first pass. On the second
+ -- pass, avoid dropping keys that were seen in the first pass, which
+ -- would happen otherwise when preferred content matches on the
+ -- filename, which is not available in the second pass.
+ -- (When the preferred content expressions do not match on
+ -- filenames, the first pass is skipped for speed.)
--
-- When there's a false positive in the bloom filter, the result
-- is keeping a key that preferred content doesn't really want.
diff --git a/Limit.hs b/Limit.hs
index a5c9ada0aa..2caff89f71 100644
--- a/Limit.hs
+++ b/Limit.hs
@@ -1,6 +1,6 @@
{- user-specified limits on files to act on
-
- - Copyright 2011-2019 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -88,6 +88,7 @@ addInclude = addLimit . limitInclude
limitInclude :: MkLimit Annex
limitInclude glob = Right $ MatchFiles
{ matchAction = const $ matchGlobFile glob
+ , matchNeedsFileName = True
, matchNeedsFileContent = False
}
@@ -98,6 +99,7 @@ addExclude = addLimit . limitExclude
limitExclude :: MkLimit Annex
limitExclude glob = Right $ MatchFiles
{ matchAction = const $ not <$$> matchGlobFile glob
+ , matchNeedsFileName = True
, matchNeedsFileContent = False
}
@@ -136,6 +138,7 @@ matchMagic :: String -> (Magic -> FilePath -> Annex (Maybe String)) -> (Provided
matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob =
Right $ MatchFiles
{ matchAction = const go
+ , matchNeedsFileName = False
, matchNeedsFileContent = True
}
where
@@ -152,12 +155,14 @@ matchMagic limitname _ _ Nothing _ =
addUnlocked :: Annex ()
addUnlocked = addLimit $ Right $ MatchFiles
{ matchAction = const $ matchLockStatus False
+ , matchNeedsFileName = True
, matchNeedsFileContent = False
}
addLocked :: Annex ()
addLocked = addLimit $ Right $ MatchFiles
{ matchAction = const $ matchLockStatus True
+ , matchNeedsFileName = True
, matchNeedsFileContent = False
}
@@ -184,6 +189,7 @@ addIn s = do
(name, date) = separate (== '@') s
use a = Right $ MatchFiles
{ matchAction = checkKey . a
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
inuuid u notpresent key
@@ -211,6 +217,7 @@ limitPresent u = MatchFiles
else do
us <- Remote.keyLocations key
return $ maybe False (`elem` us) u
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
@@ -218,6 +225,7 @@ limitPresent u = MatchFiles
limitInDir :: FilePath -> MatchFiles Annex
limitInDir dir = MatchFiles
{ matchAction = const go
+ , matchNeedsFileName = True
, matchNeedsFileContent = False
}
where
@@ -247,6 +255,7 @@ limitCopies want = case splitc ':' want of
Just n -> Right $ MatchFiles
{ matchAction = \notpresent -> checkKey $
go' n good notpresent
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
go' n good notpresent key = do
@@ -268,6 +277,7 @@ limitLackingCopies approx want = case readish want of
Just needed -> Right $ MatchFiles
{ matchAction = \notpresent mi -> flip checkKey mi $
go mi needed notpresent
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
Nothing -> Left "bad value for number of lacking copies"
@@ -293,6 +303,7 @@ limitLackingCopies approx want = case readish want of
limitUnused :: MatchFiles Annex
limitUnused = MatchFiles
{ matchAction = go
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
where
@@ -306,6 +317,7 @@ limitUnused = MatchFiles
limitAnything :: MatchFiles Annex
limitAnything = MatchFiles
{ matchAction = \_ _ -> return True
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
@@ -313,6 +325,7 @@ limitAnything = MatchFiles
limitNothing :: MatchFiles Annex
limitNothing = MatchFiles
{ matchAction = \_ _ -> return False
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
@@ -332,6 +345,7 @@ limitInAllGroup getgroupmap groupname = Right $ MatchFiles
else if not (S.null (S.intersection want notpresent))
then return False
else checkKey (check want) mi
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
where
@@ -346,6 +360,7 @@ addInBackend = addLimit . limitInBackend
limitInBackend :: MkLimit Annex
limitInBackend name = Right $ MatchFiles
{ matchAction = const $ checkKey check
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
where
@@ -359,6 +374,7 @@ addSecureHash = addLimit $ Right limitSecureHash
limitSecureHash :: MatchFiles Annex
limitSecureHash = MatchFiles
{ matchAction = const $ checkKey isCryptographicallySecure
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
@@ -374,6 +390,7 @@ limitSize lb vs s = case readSize dataUnits s of
Nothing -> Left "bad size"
Just sz -> Right $ MatchFiles
{ matchAction = go sz
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
where
@@ -399,6 +416,7 @@ limitMetaData s = case parseMetaDataMatcher s of
Left e -> Left e
Right (f, matching) -> Right $ MatchFiles
{ matchAction = const $ checkKey (check f matching)
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
where
@@ -419,6 +437,7 @@ addTimeLimit duration = do
shutdown True
liftIO $ exitWith $ ExitFailure 101
else return True
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
@@ -427,6 +446,7 @@ addAccessedWithin duration = do
now <- liftIO getPOSIXTime
addLimit $ Right $ MatchFiles
{ matchAction = const $ checkKey $ check now
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
where
diff --git a/Limit/Wanted.hs b/Limit/Wanted.hs
index 2276a3a4ff..552f0c2e5d 100644
--- a/Limit/Wanted.hs
+++ b/Limit/Wanted.hs
@@ -15,12 +15,14 @@ import Types.FileMatcher
addWantGet :: Annex ()
addWantGet = addLimit $ Right $ MatchFiles
{ matchAction = const $ checkWant $ wantGet False Nothing
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
addWantDrop :: Annex ()
addWantDrop = addLimit $ Right $ MatchFiles
{ matchAction = const $ checkWant $ wantDrop False Nothing Nothing
+ , matchNeedsFileName = False
, matchNeedsFileContent = False
}
diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs
index 7b8366f855..adb0189ec3 100644
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@@ -1,6 +1,6 @@
{- git-annex preferred content matcher configuration
-
- - Copyright 2012-2019 Joey Hess <id@joeyh.name>
+ - Copyright 2012-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -21,6 +21,7 @@ module Logs.PreferredContent (
defaultStandardGroup,
preferredRequiredMapsLoad,
preferredRequiredMapsLoad',
+ introspectPreferredRequiredContent,
prop_standardGroups_parse,
) where
@@ -61,6 +62,16 @@ checkMap getmap mu notpresent mkey afile d = do
Nothing -> return d
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
+{- Checks if the preferred or required content for the specified repository
+ - (or the current repository if none is specified) contains any terms
+ - that meet the condition. -}
+introspectPreferredRequiredContent :: (MatchFiles Annex -> Bool) -> Maybe UUID -> Annex Bool
+introspectPreferredRequiredContent c mu = do
+ u <- maybe getUUID return mu
+ check u preferredContentMap <||> check u requiredContentMap
+ where
+ check u mk = mk >>= return . maybe False (any c) . M.lookup u
+
preferredContentMap :: Annex (FileMatcherMap Annex)
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return
=<< Annex.getState Annex.preferredcontentmap
diff --git a/Types/FileMatcher.hs b/Types/FileMatcher.hs
index 8b5558f0e0..8a56a67ac7 100644
--- a/Types/FileMatcher.hs
+++ b/Types/FileMatcher.hs
@@ -57,8 +57,11 @@ type AssumeNotPresent = S.Set UUID
data MatchFiles a = MatchFiles
{ matchAction :: AssumeNotPresent -> MatchInfo -> a Bool
+ , matchNeedsFileName :: Bool
+ -- ^ does the matchAction need a filename in order to match?
, matchNeedsFileContent :: Bool
- -- ^ does the matchAction need the file content to be present?
+ -- ^ does the matchAction need the file content to be present in
+ -- order to succeed?
}
type FileMatcher a = Matcher (MatchFiles a)
diff --git a/doc/todo/skip_first_pass_in_git_annex_sync.mdwn b/doc/todo/skip_first_pass_in_git_annex_sync.mdwn
index e9e2a0eba8..a70b0df14d 100644
--- a/doc/todo/skip_first_pass_in_git_annex_sync.mdwn
+++ b/doc/todo/skip_first_pass_in_git_annex_sync.mdwn
@@ -19,3 +19,5 @@ and it led to a 2x speedup (with warm cache):
This repo has 25641 keys and all of them are in the worktree too.
+
+> [[done]]! --[[Joey]]
diff --git a/doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment b/doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment
new file mode 100644
index 0000000000..6802de5e07
--- /dev/null
+++ b/doc/todo/skip_first_pass_in_git_annex_sync/comment_2_9a93d321f955c3cdac5b8cbcd452f42e._comment
@@ -0,0 +1,15 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 2"""
+ date="2020-09-24T19:04:32Z"
+ content="""
+One side effect of this optimisation is that, while sync --all used to
+tell the filenames it was getting or dropping, when operating on files
+in the working tree, when the optimsation is enabled it will only
+display the keys. So, its behavior in 2 different repos might seem
+inconsistent to a user, who doesn't know about all these gory 2 pass details.
+
+I think, if that became a problem, the best fix would be to only display
+the keys, and never the worktree filenames, even when running the first
+pass. But I'll wait and see if that needs to be done, I suppose.
+"""]]