summaryrefslogtreecommitdiff
path: root/Logs
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2019-01-10 14:39:36 -0400
committerJoey Hess <joeyh@joeyh.name>2019-01-10 16:34:20 -0400
commit591e4b145fef5049fef2a45b66f47557b617def8 (patch)
treeb443afbbc0f12e2171d3868d4623603f884c3263 /Logs
parent66603d6f75a7a10187f9450111f3bdb489bc59e4 (diff)
convert old uuid-based log parsers to attoparsec
This preserves the workaround for the old bug that caused NoUUID items to be stored in the log, prefixing log lines with " ". It's now handled implicitly, by using takeWhile1 (/= ' ') to get the uuid. There is a behavior change from the old parser, which split the value into words and then recombined it. That meant that "foo bar" and "foo\tbar" came out as "foo bar". That behavior was not documented, and seems surprising; it meant that after a git-annex describe here "foo bar", you wouldn't get that same string back out when git-annex displayed repo descriptions. Otoh, some other parsers relied on the old behavior, and the attoparsec rewrites had to deal with the issue themselves... For group.log, there are some edge cases around the user providing a group name with a leading or trailing space. The old parser would ignore such excess whitespace. The new parser does too, because the alternative is to refuse to parse something like " group1 group2 " due to excess whitespace, which would be even more confusing behavior. The only git-annex branch log file that is not converted to attoparsec and bytestring-builder now is transitions.log.
Diffstat (limited to 'Logs')
-rw-r--r--Logs/Activity.hs31
-rw-r--r--Logs/Difference.hs12
-rw-r--r--Logs/Difference/Pure.hs7
-rw-r--r--Logs/Group.hs18
-rw-r--r--Logs/Multicast.hs17
-rw-r--r--Logs/PreferredContent.hs6
-rw-r--r--Logs/PreferredContent/Raw.hs14
-rw-r--r--Logs/Remote.hs13
-rw-r--r--Logs/Schedule.hs14
-rw-r--r--Logs/Trust/Basic.hs8
-rw-r--r--Logs/Trust/Pure.hs55
-rw-r--r--Logs/UUID.hs12
-rw-r--r--Logs/UUIDBased.hs50
13 files changed, 153 insertions, 104 deletions
diff --git a/Logs/Activity.hs b/Logs/Activity.hs
index d66f313626..f49257e181 100644
--- a/Logs/Activity.hs
+++ b/Logs/Activity.hs
@@ -1,6 +1,6 @@
{- git-annex activity log
-
- - Copyright 2015 Joey Hess <id@joeyh.name>
+ - Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -17,23 +17,36 @@ import qualified Annex.Branch
import Logs
import Logs.UUIDBased
+import qualified Data.ByteString as S
+import qualified Data.Attoparsec.ByteString as A
import Data.ByteString.Builder
-data Activity = Fsck
+data Activity
+ = Fsck
deriving (Eq, Read, Show, Enum, Bounded)
recordActivity :: Activity -> UUID -> Annex ()
recordActivity act uuid = do
c <- liftIO currentVectorClock
Annex.Branch.change activityLog $
- buildLog (byteString . encodeBS . show)
- . changeLog c uuid act
- . parseLog readish . decodeBL
+ buildLog buildActivity
+ . changeLog c uuid (Right act)
+ . parseLog parseActivity
lastActivities :: Maybe Activity -> Annex (Log Activity)
-lastActivities wantact = parseLog onlywanted . decodeBL <$> Annex.Branch.get activityLog
+lastActivities wantact = parseLog (onlywanted =<< parseActivity)
+ <$> Annex.Branch.get activityLog
where
- onlywanted s = case readish s of
- Just a | wanted a -> Just a
- _ -> Nothing
+ onlywanted (Right a) | wanted a = pure a
+ onlywanted _ = fail "unwanted activity"
wanted a = maybe True (a ==) wantact
+
+buildActivity :: Either S.ByteString Activity -> Builder
+buildActivity (Right a) = byteString $ encodeBS $ show a
+buildActivity (Left b) = byteString b
+
+-- Allow for unknown activities to be added later by preserving them.
+parseActivity :: A.Parser (Either S.ByteString Activity)
+parseActivity = go <$> A.takeByteString
+ where
+ go b = maybe (Left b) Right $ readish $ decodeBS b
diff --git a/Logs/Difference.hs b/Logs/Difference.hs
index 56a7ef862b..e59a485f5b 100644
--- a/Logs/Difference.hs
+++ b/Logs/Difference.hs
@@ -13,6 +13,7 @@ module Logs.Difference (
) where
import qualified Data.Map as M
+import qualified Data.Attoparsec.ByteString as A
import Data.ByteString.Builder
import Annex.Common
@@ -26,17 +27,16 @@ recordDifferences :: Differences -> UUID -> Annex ()
recordDifferences ds@(Differences {}) uuid = do
c <- liftIO currentVectorClock
Annex.Branch.change differenceLog $
- buildLog (byteString . encodeBS)
- . changeLog c uuid (showDifferences ds)
- . parseLog Just . decodeBL
+ buildLog byteString
+ . changeLog c uuid (encodeBS $ showDifferences ds)
+ . parseLog A.takeByteString
recordDifferences UnknownDifferences _ = return ()
-- Map of UUIDs that have Differences recorded.
-- If a new version of git-annex has added a Difference this version
-- doesn't know about, it will contain UnknownDifferences.
recordedDifferences :: Annex (M.Map UUID Differences)
-recordedDifferences = parseDifferencesLog . decodeBL <$> Annex.Branch.get differenceLog
+recordedDifferences = parseDifferencesLog <$> Annex.Branch.get differenceLog
recordedDifferencesFor :: UUID -> Annex Differences
-recordedDifferencesFor u = fromMaybe mempty . M.lookup u
- <$> recordedDifferences
+recordedDifferencesFor u = fromMaybe mempty . M.lookup u <$> recordedDifferences
diff --git a/Logs/Difference/Pure.hs b/Logs/Difference/Pure.hs
index 78a11d71f1..8c8c484257 100644
--- a/Logs/Difference/Pure.hs
+++ b/Logs/Difference/Pure.hs
@@ -11,13 +11,16 @@ module Logs.Difference.Pure (
) where
import qualified Data.Map as M
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString as A
import Annex.Common
import Types.Difference
import Logs.UUIDBased
-parseDifferencesLog :: String -> (M.Map UUID Differences)
-parseDifferencesLog = simpleMap . parseLog (Just . readDifferences)
+parseDifferencesLog :: L.ByteString -> (M.Map UUID Differences)
+parseDifferencesLog = simpleMap
+ . parseLog (readDifferences . decodeBS <$> A.takeByteString)
-- The sum of all recorded differences, across all UUIDs.
allDifferences :: M.Map UUID Differences -> Differences
diff --git a/Logs/Group.hs b/Logs/Group.hs
index 1dcc84247e..d216f591df 100644
--- a/Logs/Group.hs
+++ b/Logs/Group.hs
@@ -16,9 +16,10 @@ module Logs.Group (
inUnwantedGroup
) where
-import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import qualified Data.Set as S
+import qualified Data.Attoparsec.ByteString as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
import Annex.Common
@@ -39,7 +40,7 @@ groupChange uuid@(UUID _) modifier = do
curr <- lookupGroups uuid
c <- liftIO currentVectorClock
Annex.Branch.change groupLog $
- buildLog buildGroup . changeLog c uuid (modifier curr) . parseGroup
+ buildLog buildGroup . changeLog c uuid (modifier curr) . parseLog parseGroup
-- The changed group invalidates the preferred content cache.
Annex.changeState $ \s -> s
@@ -55,8 +56,15 @@ buildGroup = go . S.toList
go (g:gs) = bld g <> mconcat [ charUtf8 ' ' <> bld g' | g' <- gs ]
bld (Group g) = byteString g
-parseGroup :: L.ByteString -> Log (S.Set Group)
-parseGroup = parseLog (Just . S.fromList . map toGroup . words) . decodeBL
+parseGroup :: A.Parser (S.Set Group)
+parseGroup = S.fromList <$> go []
+ where
+ go l = (A.endOfInput *> pure l)
+ <|> ((getgroup <* A8.char ' ') >>= go . (:l))
+ <|> ((:l) <$> getgroup)
+ -- allow extra writespace before or after a group name
+ <|> (A8.char ' ' >>= const (go l))
+ getgroup = Group <$> A8.takeWhile1 (/= ' ')
groupSet :: UUID -> S.Set Group -> Annex ()
groupSet u g = groupChange u (const g)
@@ -68,7 +76,7 @@ groupMap = maybe groupMapLoad return =<< Annex.getState Annex.groupmap
{- Loads the map, updating the cache. -}
groupMapLoad :: Annex GroupMap
groupMapLoad = do
- m <- makeGroupMap . simpleMap . parseGroup <$> Annex.Branch.get groupLog
+ m <- makeGroupMap . simpleMap . parseLog parseGroup <$> Annex.Branch.get groupLog
Annex.changeState $ \s -> s { Annex.groupmap = Just m }
return m
diff --git a/Logs/Multicast.hs b/Logs/Multicast.hs
index 1227de748a..41ebe3f20d 100644
--- a/Logs/Multicast.hs
+++ b/Logs/Multicast.hs
@@ -1,6 +1,6 @@
{- git-annex multicast fingerprint log
-
- - Copyright 2017 Joey Hess <id@joeyh.name>
+ - Copyright 2017, 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -17,6 +17,7 @@ import Logs
import Logs.UUIDBased
import qualified Data.Map as M
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
newtype Fingerprint = Fingerprint String
@@ -26,9 +27,17 @@ recordFingerprint :: Fingerprint -> UUID -> Annex ()
recordFingerprint fp uuid = do
c <- liftIO currentVectorClock
Annex.Branch.change multicastLog $
- buildLog (byteString . encodeBS . show)
+ buildLog buildFindgerPrint
. changeLog c uuid fp
- . parseLog readish . decodeBL
+ . parseLog fingerprintParser
knownFingerPrints :: Annex (M.Map UUID Fingerprint)
-knownFingerPrints = simpleMap . parseLog readish . decodeBL <$> Annex.Branch.get activityLog
+knownFingerPrints = simpleMap . parseLog fingerprintParser
+ <$> Annex.Branch.get activityLog
+
+fingerprintParser :: A.Parser Fingerprint
+fingerprintParser = maybe (fail "fingerprint parse failed") pure
+ . readish . decodeBS =<< A.takeByteString
+
+buildFindgerPrint :: Fingerprint -> Builder
+buildFindgerPrint = byteString . encodeBS . show
diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs
index 2cfcc57506..8229bf36ec 100644
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@@ -1,6 +1,6 @@
{- git-annex preferred content matcher configuration
-
- - Copyright 2012-2014 Joey Hess <id@joeyh.name>
+ - Copyright 2012-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@@ -26,6 +26,7 @@ module Logs.PreferredContent (
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Either
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Annex.Common
import Logs.PreferredContent.Raw
@@ -73,8 +74,7 @@ preferredRequiredMapsLoad = do
groupmap <- groupMap
configmap <- readRemoteLog
let genmap l gm = simpleMap
- . parseLogWithUUID ((Just .) . makeMatcher groupmap configmap gm)
- . decodeBL
+ . parseLogWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString)
<$> Annex.Branch.get l
pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw
rc <- genmap requiredContentLog M.empty
diff --git a/Logs/PreferredContent/Raw.hs b/Logs/PreferredContent/Raw.hs
index 2d572f9dcc..81fb49d44a 100644
--- a/Logs/PreferredContent/Raw.hs
+++ b/Logs/PreferredContent/Raw.hs
@@ -32,9 +32,9 @@ setLog :: FilePath -> UUID -> PreferredContentExpression -> Annex ()
setLog logfile uuid@(UUID _) val = do
c <- liftIO currentVectorClock
Annex.Branch.change logfile $
- buildLog (byteString . encodeBS)
+ buildLog buildPreferredContentExpression
. changeLog c uuid val
- . parseLog Just . decodeBL
+ . parseLog parsePreferredContentExpression
Annex.changeState $ \s -> s
{ Annex.preferredcontentmap = Nothing
, Annex.requiredcontentmap = Nothing
@@ -63,12 +63,18 @@ buildGroupPreferredContent = buildMapLog buildgroup buildexpr
buildgroup (Group g) = byteString g
buildexpr = byteString . encodeBS
+parsePreferredContentExpression :: A.Parser PreferredContentExpression
+parsePreferredContentExpression = decodeBS <$> A.takeByteString
+
+buildPreferredContentExpression :: PreferredContentExpression -> Builder
+buildPreferredContentExpression = byteString . encodeBS
+
preferredContentMapRaw :: Annex (M.Map UUID PreferredContentExpression)
-preferredContentMapRaw = simpleMap . parseLog Just . decodeBL
+preferredContentMapRaw = simpleMap . parseLog parsePreferredContentExpression
<$> Annex.Branch.get preferredContentLog
requiredContentMapRaw :: Annex (M.Map UUID PreferredContentExpression)
-requiredContentMapRaw = simpleMap . parseLog Just . decodeBL
+requiredContentMapRaw = simpleMap . parseLog parsePreferredContentExpression
<$> Annex.Branch.get requiredContentLog
groupPreferredContentMapRaw :: Annex (M.Map Group PreferredContentExpression)
diff --git a/Logs/Remote.hs b/Logs/Remote.hs
index a7bc80d285..aac7c273b9 100644
--- a/Logs/Remote.hs
+++ b/Logs/Remote.hs
@@ -12,7 +12,6 @@ module Logs.Remote (
keyValToConfig,
configToKeyVal,
showConfig,
- parseConfig,
prop_isomorphic_configEscape,
prop_parse_show_Config,
@@ -26,6 +25,7 @@ import Logs.UUIDBased
import qualified Data.Map as M
import Data.Char
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
{- Adds or updates a remote's config in the log. -}
@@ -35,14 +35,15 @@ configSet u cfg = do
Annex.Branch.change remoteLog $
buildLog (byteString . encodeBS . showConfig)
. changeLog c u cfg
- . parseLog parseConfig . decodeBL
+ . parseLog remoteConfigParser
{- Map of remotes by uuid containing key/value config maps. -}
readRemoteLog :: Annex (M.Map UUID RemoteConfig)
-readRemoteLog = simpleMap . parseLog parseConfig . decodeBL <$> Annex.Branch.get remoteLog
+readRemoteLog = simpleMap . parseLog remoteConfigParser
+ <$> Annex.Branch.get remoteLog
-parseConfig :: String -> Maybe RemoteConfig
-parseConfig = Just . keyValToConfig . words
+remoteConfigParser :: A.Parser RemoteConfig
+remoteConfigParser = keyValToConfig . words . decodeBS <$> A.takeByteString
showConfig :: RemoteConfig -> String
showConfig = unwords . configToKeyVal
@@ -93,7 +94,7 @@ prop_parse_show_Config :: RemoteConfig -> Bool
prop_parse_show_Config c
-- whitespace and '=' are not supported in keys
| any (\k -> any isSpace k || elem '=' k) (M.keys c) = True
- | otherwise = parseConfig (showConfig c) ~~ Just c
+ | otherwise = A.parseOnly remoteConfigParser (encodeBS $ showConfig c) ~~ Right c
where
normalize v = sort . M.toList <$> v
a ~~ b = normalize a == normalize b
diff --git a/Logs/Schedule.hs b/Logs/Schedule.hs
index e43af52a85..be235a361f 100644
--- a/Logs/Schedule.hs
+++ b/Logs/Schedule.hs
@@ -20,6 +20,7 @@ module Logs.Schedule (
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Time.LocalTime
+import qualified Data.Attoparsec.ByteString.Lazy as A
import Data.ByteString.Builder
import Annex.Common
@@ -33,19 +34,18 @@ scheduleSet :: UUID -> [ScheduledActivity] -> Annex ()
scheduleSet uuid@(UUID _) activities = do
c <- liftIO currentVectorClock
Annex.Branch.change scheduleLog $
- buildLog (byteString . encodeBS)
- . changeLog c uuid val
- . parseLog Just . decodeBL
+ buildLog byteString
+ . changeLog c uuid (encodeBS val)
+ . parseLog A.takeByteString
where
val = fromScheduledActivities activities
scheduleSet NoUUID _ = error "unknown UUID; cannot modify"
scheduleMap :: Annex (M.Map UUID [ScheduledActivity])
-scheduleMap = simpleMap
- . parseLog parser . decodeBL
- <$> Annex.Branch.get scheduleLog
+scheduleMap = simpleMap . parseLog parser <$> Annex.Branch.get scheduleLog
where
- parser = eitherToMaybe . parseScheduledActivities
+ parser = either fail pure . parseScheduledActivities . decodeBS
+ =<< A.takeByteString
scheduleGet :: UUID -> Annex (S.Set ScheduledActivity)
scheduleGet u = do
diff --git a/Logs/Trust/Basic.hs b/Logs/Trust/Basic.hs
index 711b526b41..8917c6838f 100644
--- a/Logs/Trust/Basic.hs
+++ b/Logs/Trust/Basic.hs
@@ -19,20 +19,18 @@ import Logs
import Logs.UUIDBased
import Logs.Trust.Pure as X
-import Data.ByteString.Builder
-
{- Changes the trust level for a uuid in the trustLog. -}
trustSet :: UUID -> TrustLevel -> Annex ()
trustSet uuid@(UUID _) level = do
c <- liftIO currentVectorClock
Annex.Branch.change trustLog $
- buildLog (byteString . encodeBS . showTrustLog) .
+ buildLog buildTrustLevel .
changeLog c uuid level .
- parseLog (Just . parseTrustLog) . decodeBL
+ parseLog trustLevelParser
Annex.changeState $ \s -> s { Annex.trustmap = Nothing }
trustSet NoUUID _ = error "unknown UUID; cannot modify"
{- Does not include forcetrust or git config values, just those from the
- log file. -}
trustMapRaw :: Annex TrustMap
-trustMapRaw = calcTrustMap . decodeBL <$> Annex.Branch.get trustLog
+trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog
diff --git a/Logs/Trust/Pure.hs b/Logs/Trust/Pure.hs
index 74b7fd38cb..96f7a33aa6 100644
--- a/Logs/Trust/Pure.hs
+++ b/Logs/Trust/Pure.hs
@@ -1,36 +1,49 @@
{- git-annex trust log, pure operations
-
- - Copyright 2010-2013 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
+{-# LANGUAGE OverloadedStrings #-}
+
module Logs.Trust.Pure where
import Annex.Common
import Types.TrustLevel
import Logs.UUIDBased
-calcTrustMap :: String -> TrustMap
-calcTrustMap = simpleMap . parseLog (Just . parseTrustLog)
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
+import Data.ByteString.Builder
+
+calcTrustMap :: L.ByteString -> TrustMap
+calcTrustMap = simpleMap . parseLog trustLevelParser
-{- The trust.log used to only list trusted repos, without a field for the
- - trust status, which is why this defaults to Trusted. -}
-parseTrustLog :: String -> TrustLevel
-parseTrustLog s = maybe Trusted parse $ headMaybe $ words s
+trustLevelParser :: A.Parser TrustLevel
+trustLevelParser = (totrust <$> A8.anyChar <* A.endOfInput)
+ -- The trust log used to only list trusted repos, without a
+ -- value for the trust status
+ <|> (const Trusted <$> A.endOfInput)
where
- parse "1" = Trusted
- parse "0" = UnTrusted
- parse "X" = DeadTrusted
- parse _ = SemiTrusted
-
-showTrustLog :: TrustLevel -> String
-showTrustLog Trusted = "1"
-showTrustLog UnTrusted = "0"
-showTrustLog DeadTrusted = "X"
-showTrustLog SemiTrusted = "?"
-
-prop_parse_show_TrustLog :: Bool
-prop_parse_show_TrustLog = all check [minBound .. maxBound]
+ totrust '1' = Trusted
+ totrust '0' = UnTrusted
+ totrust 'X' = DeadTrusted
+ -- Allow for future expansion by treating unknown trust levels as
+ -- semitrusted.
+ totrust _ = SemiTrusted
+
+buildTrustLevel :: TrustLevel -> Builder
+buildTrustLevel Trusted = byteString "1"
+buildTrustLevel UnTrusted = byteString "0"
+buildTrustLevel DeadTrusted = byteString "X"
+buildTrustLevel SemiTrusted = byteString "?"
+
+prop_parse_build_TrustLevelLog :: Bool
+prop_parse_build_TrustLevelLog = all check [minBound .. maxBound]
where
- check l = parseTrustLog (showTrustLog l) == l
+ check l =
+ let v = A.parseOnly trustLevelParser $ L.toStrict $
+ toLazyByteString $ buildTrustLevel l
+ in v == Right l
diff --git a/Logs/UUID.hs b/Logs/UUID.hs
index 227f69784c..f2d5c0f077 100644
--- a/Logs/UUID.hs
+++ b/Logs/UUID.hs
@@ -24,15 +24,15 @@ import Logs.UUIDBased
import qualified Annex.UUID
import qualified Data.Map.Strict as M
+import qualified Data.ByteString.Lazy as L
+import qualified Data.Attoparsec.ByteString.Lazy as A
{- Records a description for a uuid in the log. -}
describeUUID :: UUID -> UUIDDesc -> Annex ()
describeUUID uuid desc = do
c <- liftIO currentVectorClock
Annex.Branch.change uuidLog $
- buildLog buildUUIDDesc
- . changeLog c uuid desc
- . parseLog (Just . UUIDDesc . encodeBS) . decodeBL
+ buildLog buildUUIDDesc . changeLog c uuid desc . parseUUIDLog
{- The map is cached for speed. -}
uuidDescMap :: Annex UUIDDescMap
@@ -44,11 +44,13 @@ uuidDescMap = maybe uuidDescMapLoad return =<< Annex.getState Annex.uuiddescmap
- it may not have been described and otherwise would not appear. -}
uuidDescMapLoad :: Annex UUIDDescMap
uuidDescMapLoad = do
- m <- (simpleMap . parseLog (Just . UUIDDesc . encodeBS)) . decodeBL
- <$> Annex.Branch.get uuidLog
+ m <- simpleMap . parseUUIDLog <$> Annex.Branch.get uuidLog
u <- Annex.UUID.getUUID
let m' = M.insertWith preferold u mempty m
Annex.changeState $ \s -> s { Annex.uuiddescmap = Just m' }
return m'
where
preferold = flip const
+
+parseUUIDLog :: L.ByteString -> Log UUIDDesc
+parseUUIDLog = parseLog (UUIDDesc <$> A.takeByteString)
diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs
index f8bd1f7253..921aa504b7 100644
--- a/Logs/UUIDBased.hs
+++ b/Logs/UUIDBased.hs
@@ -14,7 +14,7 @@
- Licensed under the GNU GPL version 3 or higher.
-}
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, TupleSections #-}
module Logs.UUIDBased (
Log,
@@ -39,9 +39,12 @@ import Annex.VectorClock
import Logs.MapLog
import Logs.Line
+import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString.Lazy as A
+import qualified Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
+import qualified Data.DList as D
type Log v = MapLog UUID v
@@ -56,32 +59,28 @@ buildLog builder = mconcat . map genline . M.toList
sp = charUtf8 ' '
nl = charUtf8 '\n'
-parseLog :: (String -> Maybe a) -> String -> Log a
+parseLog :: A.Parser a -> L.ByteString -> Log a
parseLog = parseLogWithUUID . const
-parseLogWithUUID :: (UUID -> String -> Maybe a) -> String -> Log a
-parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . splitLines
+parseLogWithUUID :: (UUID -> A.Parser a) -> L.ByteString -> Log a
+parseLogWithUUID parser = fromMaybe M.empty . A.maybeResult
+ . A.parse (logParser parser)
+
+logParser :: (UUID -> A.Parser a) -> A.Parser (Log a)
+logParser parser = M.fromListWith best <$> parseLogLines go
where
- parse line
- -- This is a workaround for a bug that caused
- -- NoUUID items to be stored in the log.
- -- It can be removed at any time; is just here to clean
- -- up logs where that happened temporarily.
- | " " `isPrefixOf` line = Nothing
- | null ws = Nothing
- | otherwise = parser u (unwords info) >>= makepair
- where
- makepair v = Just (u, LogEntry ts v)
- ws = words line
- u = toUUID $ Prelude.head ws
- t = Prelude.last ws
- ts
- | tskey `isPrefixOf` t = fromMaybe Unknown $
- parseVectorClock $ drop 1 $ dropWhile (/= '=') t
- | otherwise = Unknown
- info
- | ts == Unknown = drop 1 ws
- | otherwise = drop 1 $ beginning ws
+ go = do
+ u <- toUUID <$> A8.takeWhile1 (/= ' ')
+ (dl, ts) <- accumval D.empty
+ v <- either fail return $ A.parseOnly (parser u <* A.endOfInput)
+ (S.intercalate " " $ D.toList dl)
+ return (u, LogEntry ts v)
+ accumval dl =
+ ((dl,) <$> parsetimestamp)
+ <|> (A8.char ' ' *> (A8.takeWhile (/= ' ')) >>= accumval . D.snoc dl)
+ parsetimestamp =
+ (A8.string " timestamp=" *> vectorClockParser <* A.endOfInput)
+ <|> (const Unknown <$> A.endOfInput)
buildLogNew :: (v -> Builder) -> Log v -> Builder
buildLogNew = buildMapLog buildUUID
@@ -94,6 +93,3 @@ changeLog = changeMapLog
addLog :: UUID -> LogEntry v -> Log v -> Log v
addLog = addMapLog
-
-tskey :: String
-tskey = "timestamp="