diff options
Diffstat (limited to 'cmd/restic/lock.go')
-rw-r--r-- | cmd/restic/lock.go | 152 |
1 files changed, 134 insertions, 18 deletions
diff --git a/cmd/restic/lock.go b/cmd/restic/lock.go index f39a08db6..11c1ed8f5 100644 --- a/cmd/restic/lock.go +++ b/cmd/restic/lock.go @@ -2,6 +2,7 @@ package main import ( "context" + "fmt" "sync" "time" @@ -11,6 +12,7 @@ import ( ) type lockContext struct { + lock *restic.Lock cancel context.CancelFunc refreshWG sync.WaitGroup } @@ -21,17 +23,29 @@ var globalLocks struct { sync.Once } -func lockRepo(ctx context.Context, repo restic.Repository) (*restic.Lock, context.Context, error) { - return lockRepository(ctx, repo, false) +func lockRepo(ctx context.Context, repo restic.Repository, retryLock time.Duration, json bool) (*restic.Lock, context.Context, error) { + return lockRepository(ctx, repo, false, retryLock, json) } -func lockRepoExclusive(ctx context.Context, repo restic.Repository) (*restic.Lock, context.Context, error) { - return lockRepository(ctx, repo, true) +func lockRepoExclusive(ctx context.Context, repo restic.Repository, retryLock time.Duration, json bool) (*restic.Lock, context.Context, error) { + return lockRepository(ctx, repo, true, retryLock, json) +} + +var ( + retrySleepStart = 5 * time.Second + retrySleepMax = 60 * time.Second +) + +func minDuration(a, b time.Duration) time.Duration { + if a <= b { + return a + } + return b } // lockRepository wraps the ctx such that it is cancelled when the repository is unlocked // cancelling the original context also stops the lock refresh -func lockRepository(ctx context.Context, repo restic.Repository, exclusive bool) (*restic.Lock, context.Context, error) { +func lockRepository(ctx context.Context, repo restic.Repository, exclusive bool, retryLock time.Duration, json bool) (*restic.Lock, context.Context, error) { // make sure that a repository is unlocked properly and after cancel() was // called by the cleanup handler in global.go globalLocks.Do(func() { @@ -43,26 +57,65 @@ func lockRepository(ctx context.Context, repo restic.Repository, exclusive bool) lockFn = restic.NewExclusiveLock } - lock, err := lockFn(ctx, repo) + var lock *restic.Lock + var err error + + retrySleep := minDuration(retrySleepStart, retryLock) + retryMessagePrinted := false + retryTimeout := time.After(retryLock) + +retryLoop: + for { + lock, err = lockFn(ctx, repo) + if err != nil && restic.IsAlreadyLocked(err) { + + if !retryMessagePrinted { + if !json { + Verbosef("repo already locked, waiting up to %s for the lock\n", retryLock) + } + retryMessagePrinted = true + } + + debug.Log("repo already locked, retrying in %v", retrySleep) + retrySleepCh := time.After(retrySleep) + + select { + case <-ctx.Done(): + return nil, ctx, ctx.Err() + case <-retryTimeout: + debug.Log("repo already locked, timeout expired") + // Last lock attempt + lock, err = lockFn(ctx, repo) + break retryLoop + case <-retrySleepCh: + retrySleep = minDuration(retrySleep*2, retrySleepMax) + } + } else { + // anything else, either a successful lock or another error + break retryLoop + } + } if restic.IsInvalidLock(err) { return nil, ctx, errors.Fatalf("%v\n\nthe `unlock --remove-all` command can be used to remove invalid locks. Make sure that no other restic process is accessing the repository when running the command", err) } if err != nil { - return nil, ctx, errors.Fatalf("unable to create lock in backend: %v", err) + return nil, ctx, fmt.Errorf("unable to create lock in backend: %w", err) } debug.Log("create lock %p (exclusive %v)", lock, exclusive) ctx, cancel := context.WithCancel(ctx) lockInfo := &lockContext{ + lock: lock, cancel: cancel, } lockInfo.refreshWG.Add(2) refreshChan := make(chan struct{}) + forceRefreshChan := make(chan refreshLockRequest) globalLocks.Lock() globalLocks.locks[lock] = lockInfo - go refreshLocks(ctx, lock, lockInfo, refreshChan) - go monitorLockRefresh(ctx, lock, lockInfo, refreshChan) + go refreshLocks(ctx, repo.Backend(), lockInfo, refreshChan, forceRefreshChan) + go monitorLockRefresh(ctx, lockInfo, refreshChan, forceRefreshChan) globalLocks.Unlock() return lock, ctx, err @@ -74,8 +127,13 @@ var refreshInterval = 5 * time.Minute // the difference allows to compensate for a small time drift between clients. var refreshabilityTimeout = restic.StaleLockTimeout - refreshInterval*3/2 -func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, refreshed chan<- struct{}) { +type refreshLockRequest struct { + result chan bool +} + +func refreshLocks(ctx context.Context, backend restic.Backend, lockInfo *lockContext, refreshed chan<- struct{}, forceRefresh <-chan refreshLockRequest) { debug.Log("start") + lock := lockInfo.lock ticker := time.NewTicker(refreshInterval) lastRefresh := lock.Time @@ -99,6 +157,22 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, case <-ctx.Done(): debug.Log("terminate") return + + case req := <-forceRefresh: + debug.Log("trying to refresh stale lock") + // keep on going if our current lock still exists + success := tryRefreshStaleLock(ctx, backend, lock, lockInfo.cancel) + // inform refresh goroutine about forced refresh + select { + case <-ctx.Done(): + case req.result <- success: + } + + if success { + // update lock refresh time + lastRefresh = lock.Time + } + case <-ticker.C: if time.Since(lastRefresh) > refreshabilityTimeout { // the lock is too old, wait until the expiry monitor cancels the context @@ -111,7 +185,7 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, Warnf("unable to refresh lock: %v\n", err) } else { lastRefresh = lock.Time - // inform monitor gorountine about successful refresh + // inform monitor goroutine about successful refresh select { case <-ctx.Done(): case refreshed <- struct{}{}: @@ -121,7 +195,7 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, } } -func monitorLockRefresh(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, refreshed <-chan struct{}) { +func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <-chan struct{}, forceRefresh chan<- refreshLockRequest) { // time.Now() might use a monotonic timer which is paused during standby // convert to unix time to ensure we compare real time values lastRefresh := time.Now().UnixNano() @@ -133,24 +207,47 @@ func monitorLockRefresh(ctx context.Context, lock *restic.Lock, lockInfo *lockCo // timers are paused during standby, which is a problem as the refresh timeout // _must_ expire if the host was too long in standby. Thus fall back to periodic checks // https://github.com/golang/go/issues/35012 - timer := time.NewTimer(pollDuration) + ticker := time.NewTicker(pollDuration) defer func() { - timer.Stop() + ticker.Stop() lockInfo.cancel() lockInfo.refreshWG.Done() }() + var refreshStaleLockResult chan bool + for { select { case <-ctx.Done(): debug.Log("terminate expiry monitoring") return case <-refreshed: + if refreshStaleLockResult != nil { + // ignore delayed refresh notifications while the stale lock is refreshed + continue + } lastRefresh = time.Now().UnixNano() - case <-timer.C: - if time.Now().UnixNano()-lastRefresh < refreshabilityTimeout.Nanoseconds() { - // restart timer - timer.Reset(pollDuration) + case <-ticker.C: + if time.Now().UnixNano()-lastRefresh < refreshabilityTimeout.Nanoseconds() || refreshStaleLockResult != nil { + continue + } + + debug.Log("trying to refreshStaleLock") + // keep on going if our current lock still exists + refreshReq := refreshLockRequest{ + result: make(chan bool), + } + refreshStaleLockResult = refreshReq.result + + // inform refresh goroutine about forced refresh + select { + case <-ctx.Done(): + case forceRefresh <- refreshReq: + } + case success := <-refreshStaleLockResult: + if success { + lastRefresh = time.Now().UnixNano() + refreshStaleLockResult = nil continue } @@ -160,6 +257,25 @@ func monitorLockRefresh(ctx context.Context, lock *restic.Lock, lockInfo *lockCo } } +func tryRefreshStaleLock(ctx context.Context, backend restic.Backend, lock *restic.Lock, cancel context.CancelFunc) bool { + freeze := restic.AsBackend[restic.FreezeBackend](backend) + if freeze != nil { + debug.Log("freezing backend") + freeze.Freeze() + defer freeze.Unfreeze() + } + + err := lock.RefreshStaleLock(ctx) + if err != nil { + Warnf("failed to refresh stale lock: %v\n", err) + // cancel context while the backend is still frozen to prevent accidental modifications + cancel() + return false + } + + return true +} + func unlockRepo(lock *restic.Lock) { if lock == nil { return |