summaryrefslogtreecommitdiff
path: root/Logs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2020-07-14 14:35:26 -0400
committerJoey Hess <joeyh@joeyh.name>2020-07-14 14:35:26 -0400
commit7b2d23655655b3ea7dab3821b9b7192bf969c8db (patch)
treec4ff38c40fe90912c12965f749b9b70a0473de0c /Logs
parent535cdc8d48b103a000c8f847dd4e8b85a3fbe7e3 (diff)
importfeed: stream metadata for 5% speedup
On top of the 10% speedup from streaming url logs.
Diffstat (limited to 'Logs')
-rw-r--r--Logs/MetaData.hs14
-rw-r--r--Logs/Web.hs21
2 files changed, 21 insertions, 14 deletions
diff --git a/Logs/MetaData.hs b/Logs/MetaData.hs
index ea1462c61f..6206ec2218 100644
--- a/Logs/MetaData.hs
+++ b/Logs/MetaData.hs
@@ -19,13 +19,14 @@
- after the other remote redundantly set foo +x, it was unset,
- and so foo currently has no value.
-
- - Copyright 2014-2019 Joey Hess <id@joeyh.name>
+ - Copyright 2014-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Logs.MetaData (
getCurrentMetaData,
+ parseCurrentMetaData,
getCurrentRemoteMetaData,
addMetaData,
addRemoteMetaData,
@@ -47,6 +48,7 @@ import Logs.MetaData.Pure
import qualified Data.Set as S
import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as L
{- Go through the log from oldest to newest, and combine it all
- into a single MetaData representing the current state.
@@ -60,9 +62,13 @@ getCurrentMetaData = getCurrentMetaData' metaDataLogFile
getCurrentMetaData' :: (GitConfig -> Key -> RawFilePath) -> Key -> Annex MetaData
getCurrentMetaData' getlogfile k = do
config <- Annex.getGitConfig
- ls <- S.toAscList <$> readLog (getlogfile config k)
- let loggedmeta = logToCurrentMetaData ls
- return $ currentMetaData $ unionMetaData loggedmeta
+ parseCurrentMetaData <$> Annex.Branch.get (getlogfile config k)
+
+parseCurrentMetaData :: L.ByteString -> MetaData
+parseCurrentMetaData content =
+ let ls = S.toAscList $ parseLog content
+ loggedmeta = logToCurrentMetaData ls
+ in currentMetaData $ unionMetaData loggedmeta
(lastchanged ls loggedmeta)
where
lastchanged [] _ = emptyMetaData
diff --git a/Logs/Web.hs b/Logs/Web.hs
index a73f18186b..7a7b529937 100644
--- a/Logs/Web.hs
+++ b/Logs/Web.hs
@@ -13,7 +13,7 @@ module Logs.Web (
getUrlsWithPrefix,
setUrlPresent,
setUrlMissing,
- knownUrls,
+ withKnownUrls,
Downloader(..),
getDownloader,
setDownloader,
@@ -87,8 +87,8 @@ setUrlMissing key url = do
_ -> True
{- Finds all known urls. -}
-knownUrls :: Annex [(Key, URLString)]
-knownUrls = do
+withKnownUrls :: (Annex (Maybe (Key, [URLString])) -> Annex a) -> Annex a
+withKnownUrls a = do
{- Ensure any journalled changes are committed to the git-annex
- branch, since we're going to look at its tree. -}
_ <- Annex.Branch.update
@@ -98,15 +98,16 @@ knownUrls = do
Annex.Branch.fullname
g <- Annex.gitRepo
let want = urlLogFileKey . getTopFilePath . Git.LsTree.file
- catObjectStreamLsTree l want g (go [])
+ catObjectStreamLsTree l want g (\reader -> a (go reader))
`finally` void (liftIO cleanup)
where
- go c reader = liftIO reader >>= \case
- Just (k, Just content) ->
- let !c' = zip (repeat k) (geturls content) ++ c
- in go c' reader
- Just (_, Nothing) -> go c reader
- Nothing -> return c
+ go reader = liftIO reader >>= \case
+ Just (k, Just content) ->
+ case geturls content of
+ [] -> go reader
+ us -> return (Just (k, us))
+ Just (_, Nothing) -> go reader
+ Nothing -> return Nothing
geturls = map (decodeBS . fromLogInfo) . getLog