summaryrefslogtreecommitdiff
path: root/Logs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2019-01-10 13:23:42 -0400
committerJoey Hess <joeyh@joeyh.name>2019-01-10 13:30:36 -0400
commit66603d6f75a7a10187f9450111f3bdb489bc59e4 (patch)
tree7550f5229a1c583b3c25225d8ade85af9209f9d6 /Logs
parent7e54c215b49c498161ed4beacc5dcf78e7660009 (diff)
attoparsec parsers for all new-format uuid-based logs
There should be some speed gains here, especially for chunk and remote state logs, which are queried once per key. Now only old-format uuid-based logs still need to be converted to attoparsec.
Diffstat (limited to 'Logs')
-rw-r--r--Logs/Chunk.hs4
-rw-r--r--Logs/Chunk/Pure.hs26
-rw-r--r--Logs/Config.hs12
-rw-r--r--Logs/Export.hs37
-rw-r--r--Logs/MapLog.hs33
-rw-r--r--Logs/PreferredContent/Raw.hs6
-rw-r--r--Logs/RemoteState.hs9
-rw-r--r--Logs/Schedule.hs4
-rw-r--r--Logs/UUIDBased.hs6
9 files changed, 84 insertions, 53 deletions
diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs
index 11b49b6209..cbcb747bd4 100644
--- a/Logs/Chunk.hs
+++ b/Logs/Chunk.hs
@@ -38,7 +38,7 @@ chunksStored u k chunkmethod chunkcount = do
c <- liftIO currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (chunkLogFile config k) $
- buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog . decodeBL
+ buildLog . changeMapLog c (u, chunkmethod) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
@@ -46,7 +46,7 @@ chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)]
getCurrentChunks u k = do
config <- Annex.getGitConfig
- select . parseLog . decodeBL <$> Annex.Branch.get (chunkLogFile config k)
+ select . parseLog <$> Annex.Branch.get (chunkLogFile config k)
where
select = filter (\(_m, ct) -> ct > 0)
. map (\((_ku, m), l) -> (m, value l))
diff --git a/Logs/Chunk/Pure.hs b/Logs/Chunk/Pure.hs
index fa3788ca8d..73e27b5c91 100644
--- a/Logs/Chunk/Pure.hs
+++ b/Logs/Chunk/Pure.hs
@@ -19,6 +19,9 @@ import Logs.MapLog
import Data.Int
import qualified Data.ByteString as S
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
-- Currently chunks are all fixed size, but other chunking methods
@@ -33,20 +36,13 @@ type ChunkCount = Integer
type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount
-parseChunkMethod :: String -> ChunkMethod
-parseChunkMethod s = maybe (UnknownChunks $ encodeBS s) FixedSizeChunks (readish s)
-
buildChunkMethod :: ChunkMethod -> Builder
buildChunkMethod (FixedSizeChunks sz) = int64Dec sz
buildChunkMethod (UnknownChunks s) = byteString s
-parseLog :: String -> ChunkLog
-parseLog = parseMapLog fieldparser valueparser
- where
- fieldparser s =
- let (u,m) = separate (== ':') s
- in Just (toUUID u, parseChunkMethod m)
- valueparser = readish
+chunkMethodParser :: A.Parser ChunkMethod
+chunkMethodParser =
+ (FixedSizeChunks <$> A8.decimal) <|> (UnknownChunks <$> A.takeByteString)
buildLog :: ChunkLog -> Builder
buildLog = buildMapLog fieldbuilder valuebuilder
@@ -54,3 +50,13 @@ buildLog = buildMapLog fieldbuilder valuebuilder
fieldbuilder (u, m) = buildUUID u <> sep <> buildChunkMethod m
valuebuilder = integerDec
sep = charUtf8 ':'
+
+parseLog :: L.ByteString -> ChunkLog
+parseLog = parseMapLog fieldparser valueparser
+ where
+ fieldparser = (,)
+ <$> (toUUID <$> A8.takeTill (== ':'))
+ <* A8.char ':'
+ <*> chunkMethodParser
+ <* A.endOfInput
+ valueparser = A8.decimal
diff --git a/Logs/Config.hs b/Logs/Config.hs
index 767cee48ac..8773c432ae 100644
--- a/Logs/Config.hs
+++ b/Logs/Config.hs
@@ -20,6 +20,8 @@ import Logs.MapLog
import qualified Annex.Branch
import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
type ConfigName = String
@@ -35,7 +37,7 @@ setGlobalConfig' :: ConfigName -> ConfigValue -> Annex ()
setGlobalConfig' name new = do
c <- liftIO currentVectorClock
Annex.Branch.change configLog $
- buildGlobalConfig . changeMapLog c name new . parseGlobalConfig . decodeBL
+ buildGlobalConfig . changeMapLog c name new . parseGlobalConfig
unsetGlobalConfig :: ConfigName -> Annex ()
unsetGlobalConfig name = do
@@ -53,9 +55,11 @@ buildGlobalConfig = buildMapLog fieldbuilder valuebuilder
fieldbuilder = byteString . encodeBS
valuebuilder = byteString . encodeBS
-parseGlobalConfig :: String -> MapLog ConfigName ConfigValue
-parseGlobalConfig = parseMapLog Just Just
+parseGlobalConfig :: L.ByteString -> MapLog ConfigName ConfigValue
+parseGlobalConfig = parseMapLog string string
+ where
+ string = decodeBS <$> A.takeByteString
loadGlobalConfig :: Annex (M.Map ConfigName ConfigValue)
-loadGlobalConfig = M.filter (not . null) . simpleMap . parseGlobalConfig . decodeBL
+loadGlobalConfig = M.filter (not . null) . simpleMap . parseGlobalConfig
<$> Annex.Branch.get configLog
diff --git a/Logs/Export.hs b/Logs/Export.hs
index ac0208bcf5..57fa0f565f 100644
--- a/Logs/Export.hs
+++ b/Logs/Export.hs
@@ -18,6 +18,9 @@ import Logs
import Logs.MapLog
import Annex.UUID
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
data Exported = Exported
@@ -30,7 +33,7 @@ data ExportParticipants = ExportParticipants
{ exportFrom :: UUID
, exportTo :: UUID
}
- deriving (Eq, Ord)
+ deriving (Eq, Ord, Show)
data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref]
@@ -44,7 +47,6 @@ data ExportChange = ExportChange
getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap
. parseExportLog
- . decodeBL
<$> Annex.Branch.get exportLog
where
get (ep, exported)
@@ -74,7 +76,7 @@ recordExport remoteuuid ec = do
buildExportLog
. changeMapLog c ep exported
. M.mapWithKey (updateothers c u)
- . parseExportLog . decodeBL
+ . parseExportLog
where
updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t }))
| u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le
@@ -92,17 +94,16 @@ recordExportBeginning remoteuuid newtree = do
old <- fromMaybe (Exported emptyTree [])
. M.lookup ep . simpleMap
. parseExportLog
- . decodeBL
<$> Annex.Branch.get exportLog
let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) }
Annex.Branch.change exportLog $
buildExportLog
. changeMapLog c ep new
- . parseExportLog . decodeBL
+ . parseExportLog
Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree")
-parseExportLog :: String -> MapLog ExportParticipants Exported
-parseExportLog = parseMapLog parseExportParticipants parseExported
+parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported
+parseExportLog = parseMapLog exportParticipantsParser exportedParser
buildExportLog :: MapLog ExportParticipants Exported -> Builder
buildExportLog = buildMapLog buildExportParticipants buildExported
@@ -113,14 +114,11 @@ buildExportParticipants ep =
where
sep = charUtf8 ':'
-parseExportParticipants :: String -> Maybe ExportParticipants
-parseExportParticipants s = case separate (== ':') s of
- ("",_) -> Nothing
- (_,"") -> Nothing
- (f,t) -> Just $ ExportParticipants
- { exportFrom = toUUID f
- , exportTo = toUUID t
- }
+exportParticipantsParser :: A.Parser ExportParticipants
+exportParticipantsParser = ExportParticipants
+ <$> (toUUID <$> A8.takeWhile1 (/= ':'))
+ <* A8.char ':'
+ <*> (toUUID <$> A8.takeWhile1 (const True))
buildExported :: Exported -> Builder
buildExported exported = go (exportedTreeish exported : incompleteExportedTreeish exported)
@@ -129,7 +127,8 @@ buildExported exported = go (exportedTreeish exported : incompleteExportedTreeis
go (r:rs) = rref r <> mconcat [ charUtf8 ' ' <> rref r' | r' <- rs ]
rref r = byteString (encodeBS' (Git.fromRef r))
-parseExported :: String -> Maybe Exported
-parseExported s = case words s of
- (et:it) -> Just $ Exported (Git.Ref et) (map Git.Ref it)
- _ -> Nothing
+exportedParser :: A.Parser Exported
+exportedParser = Exported <$> refparser <*> many refparser
+ where
+ refparser = (Git.Ref . decodeBS <$> A8.takeWhile1 (/= ' ') )
+ <* ((const () <$> A8.char ' ') <|> A.endOfInput)
diff --git a/Logs/MapLog.hs b/Logs/MapLog.hs
index c0018f79b7..72307b2fb0 100644
--- a/Logs/MapLog.hs
+++ b/Logs/MapLog.hs
@@ -4,6 +4,8 @@
-
- A line of the log will look like: "timestamp field value"
-
+ - The field names cannot contain whitespace.
+ -
- Copyright 2014, 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
@@ -19,13 +21,16 @@ import Common
import Annex.VectorClock
import Logs.Line
+import qualified Data.ByteString.Lazy as L
import qualified Data.Map.Strict as M
+import qualified Data.Attoparsec.ByteString.Lazy as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
data LogEntry v = LogEntry
{ changed :: VectorClock
, value :: v
- } deriving (Eq)
+ } deriving (Eq, Show)
type MapLog f v = M.Map f (LogEntry v)
@@ -39,17 +44,23 @@ buildMapLog fieldbuilder valuebuilder = mconcat . map genline . M.toList
sp = charUtf8 ' '
nl = charUtf8 '\n'
-parseMapLog :: Ord f => (String -> Maybe f) -> (String -> Maybe v) -> String -> MapLog f v
-parseMapLog fieldparser valueparser = M.fromListWith best . mapMaybe parse . splitLines
+parseMapLog :: Ord f => A.Parser f -> A.Parser v -> L.ByteString -> MapLog f v
+parseMapLog fieldparser valueparser = fromMaybe M.empty . A.maybeResult
+ . A.parse (mapLogParser fieldparser valueparser)
+
+mapLogParser :: Ord f => A.Parser f -> A.Parser v -> A.Parser (MapLog f v)
+mapLogParser fieldparser valueparser = M.fromListWith best <$> parseLogLines go
where
- parse line = do
- let (sc, rest) = splitword line
- (sf, sv) = splitword rest
- c <- parseVectorClock sc
- f <- fieldparser sf
- v <- valueparser sv
- Just (f, LogEntry c v)
- splitword = separate (== ' ')
+ go = do
+ c <- vectorClockParser
+ _ <- A8.char ' '
+ w <- A8.takeTill (== ' ')
+ f <- either fail return $
+ A.parseOnly (fieldparser <* A.endOfInput) w
+ _ <- A8.char ' '
+ v <- valueparser
+ A.endOfInput
+ return (f, LogEntry c v)
changeMapLog :: Ord f => VectorClock -> f -> v -> MapLog f v -> MapLog f v
changeMapLog c f v = M.insert f $ LogEntry c v
diff --git a/Logs/PreferredContent/Raw.hs b/Logs/PreferredContent/Raw.hs
index b095428461..2d572f9dcc 100644
--- a/Logs/PreferredContent/Raw.hs
+++ b/Logs/PreferredContent/Raw.hs
@@ -18,6 +18,7 @@ import Types.Group
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
{- Changes the preferred content configuration of a remote. -}
@@ -51,7 +52,10 @@ groupPreferredContentSet g val = do
Annex.changeState $ \s -> s { Annex.preferredcontentmap = Nothing }
parseGroupPreferredContent :: L.ByteString -> MapLog Group String
-parseGroupPreferredContent = parseMapLog (Just . toGroup) Just . decodeBL
+parseGroupPreferredContent = parseMapLog parsegroup parsestring
+ where
+ parsegroup = Group <$> A.takeByteString
+ parsestring = decodeBS <$> A.takeByteString
buildGroupPreferredContent :: MapLog Group PreferredContentExpression -> Builder
buildGroupPreferredContent = buildMapLog buildgroup buildexpr
diff --git a/Logs/RemoteState.hs b/Logs/RemoteState.hs
index 5b856b713c..ec3f5ae392 100644
--- a/Logs/RemoteState.hs
+++ b/Logs/RemoteState.hs
@@ -17,6 +17,8 @@ import qualified Annex.Branch
import qualified Annex
import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
type RemoteState = String
@@ -26,7 +28,7 @@ setRemoteState u k s = do
c <- liftIO currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (remoteStateLogFile config k) $
- buildRemoteState . changeLog c u s . parseLogNew Just . decodeBL
+ buildRemoteState . changeLog c u s . parseRemoteState
buildRemoteState :: Log RemoteState -> Builder
buildRemoteState = buildLogNew (byteString . encodeBS)
@@ -34,7 +36,10 @@ buildRemoteState = buildLogNew (byteString . encodeBS)
getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState)
getRemoteState u k = do
config <- Annex.getGitConfig
- extract . parseLogNew Just . decodeBL
+ extract . parseRemoteState
<$> Annex.Branch.get (remoteStateLogFile config k)
where
extract m = value <$> M.lookup u m
+
+parseRemoteState :: L.ByteString -> Log RemoteState
+parseRemoteState = parseLogNew (decodeBS <$> A.takeByteString)
diff --git a/Logs/Schedule.hs b/Logs/Schedule.hs
index b8eb94f8b1..e43af52a85 100644
--- a/Logs/Schedule.hs
+++ b/Logs/Schedule.hs
@@ -42,10 +42,10 @@ scheduleSet NoUUID _ = error "unknown UUID; cannot modify"
scheduleMap :: Annex (M.Map UUID [ScheduledActivity])
scheduleMap = simpleMap
- . parseLogWithUUID parser . decodeBL
+ . parseLog parser . decodeBL
<$> Annex.Branch.get scheduleLog
where
- parser _uuid = eitherToMaybe . parseScheduledActivities
+ parser = eitherToMaybe . parseScheduledActivities
scheduleGet :: UUID -> Annex (S.Set ScheduledActivity)
scheduleGet u = do
diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs
index e371844921..f8bd1f7253 100644
--- a/Logs/UUIDBased.hs
+++ b/Logs/UUIDBased.hs
@@ -39,6 +39,8 @@ import Annex.VectorClock
import Logs.MapLog
import Logs.Line
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
type Log v = MapLog UUID v
@@ -84,8 +86,8 @@ parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . splitLines
buildLogNew :: (v -> Builder) -> Log v -> Builder
buildLogNew = buildMapLog buildUUID
-parseLogNew :: (String -> Maybe v) -> String -> Log v
-parseLogNew = parseMapLog (Just . toUUID)
+parseLogNew :: A.Parser v -> L.ByteString -> Log v
+parseLogNew = parseMapLog (toUUID <$> A.takeByteString)
changeLog :: VectorClock -> UUID -> v -> Log v -> Log v
changeLog = changeMapLog