diff options
author | Joey Hess <joeyh@joeyh.name> | 2020-07-14 14:35:26 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2020-07-14 14:35:26 -0400 |
commit | 7b2d23655655b3ea7dab3821b9b7192bf969c8db (patch) | |
tree | c4ff38c40fe90912c12965f749b9b70a0473de0c /Logs | |
parent | 535cdc8d48b103a000c8f847dd4e8b85a3fbe7e3 (diff) |
importfeed: stream metadata for 5% speedup
On top of the 10% speedup from streaming url logs.
Diffstat (limited to 'Logs')
-rw-r--r-- | Logs/MetaData.hs | 14 | ||||
-rw-r--r-- | Logs/Web.hs | 21 |
2 files changed, 21 insertions, 14 deletions
diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs index ea1462c61f..6206ec2218 100644 --- a/Logs/MetaData.hs +++ b/Logs/MetaData.hs @@ -19,13 +19,14 @@ - after the other remote redundantly set foo +x, it was unset, - and so foo currently has no value. - - - Copyright 2014-2019 Joey Hess <id@joeyh.name> + - Copyright 2014-2020 Joey Hess <id@joeyh.name> - - Licensed under the GNU AGPL version 3 or higher. -} module Logs.MetaData ( getCurrentMetaData, + parseCurrentMetaData, getCurrentRemoteMetaData, addMetaData, addRemoteMetaData, @@ -47,6 +48,7 @@ import Logs.MetaData.Pure import qualified Data.Set as S import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L {- Go through the log from oldest to newest, and combine it all - into a single MetaData representing the current state. @@ -60,9 +62,13 @@ getCurrentMetaData = getCurrentMetaData' metaDataLogFile getCurrentMetaData' :: (GitConfig -> Key -> RawFilePath) -> Key -> Annex MetaData getCurrentMetaData' getlogfile k = do config <- Annex.getGitConfig - ls <- S.toAscList <$> readLog (getlogfile config k) - let loggedmeta = logToCurrentMetaData ls - return $ currentMetaData $ unionMetaData loggedmeta + parseCurrentMetaData <$> Annex.Branch.get (getlogfile config k) + +parseCurrentMetaData :: L.ByteString -> MetaData +parseCurrentMetaData content = + let ls = S.toAscList $ parseLog content + loggedmeta = logToCurrentMetaData ls + in currentMetaData $ unionMetaData loggedmeta (lastchanged ls loggedmeta) where lastchanged [] _ = emptyMetaData diff --git a/Logs/Web.hs b/Logs/Web.hs index a73f18186b..7a7b529937 100644 --- a/Logs/Web.hs +++ b/Logs/Web.hs @@ -13,7 +13,7 @@ module Logs.Web ( getUrlsWithPrefix, setUrlPresent, setUrlMissing, - knownUrls, + withKnownUrls, Downloader(..), getDownloader, setDownloader, @@ -87,8 +87,8 @@ setUrlMissing key url = do _ -> True {- Finds all known urls. -} -knownUrls :: Annex [(Key, URLString)] -knownUrls = do +withKnownUrls :: (Annex (Maybe (Key, [URLString])) -> Annex a) -> Annex a +withKnownUrls a = do {- Ensure any journalled changes are committed to the git-annex - branch, since we're going to look at its tree. -} _ <- Annex.Branch.update @@ -98,15 +98,16 @@ knownUrls = do Annex.Branch.fullname g <- Annex.gitRepo let want = urlLogFileKey . getTopFilePath . Git.LsTree.file - catObjectStreamLsTree l want g (go []) + catObjectStreamLsTree l want g (\reader -> a (go reader)) `finally` void (liftIO cleanup) where - go c reader = liftIO reader >>= \case - Just (k, Just content) -> - let !c' = zip (repeat k) (geturls content) ++ c - in go c' reader - Just (_, Nothing) -> go c reader - Nothing -> return c + go reader = liftIO reader >>= \case + Just (k, Just content) -> + case geturls content of + [] -> go reader + us -> return (Just (k, us)) + Just (_, Nothing) -> go reader + Nothing -> return Nothing geturls = map (decodeBS . fromLogInfo) . getLog |