summaryrefslogtreecommitdiff
path: root/cmd/restic/lock.go
diff options
context:
space:
mode:
Diffstat (limited to 'cmd/restic/lock.go')
-rw-r--r--cmd/restic/lock.go152
1 files changed, 134 insertions, 18 deletions
diff --git a/cmd/restic/lock.go b/cmd/restic/lock.go
index f39a08db6..11c1ed8f5 100644
--- a/cmd/restic/lock.go
+++ b/cmd/restic/lock.go
@@ -2,6 +2,7 @@ package main
import (
"context"
+ "fmt"
"sync"
"time"
@@ -11,6 +12,7 @@ import (
)
type lockContext struct {
+ lock *restic.Lock
cancel context.CancelFunc
refreshWG sync.WaitGroup
}
@@ -21,17 +23,29 @@ var globalLocks struct {
sync.Once
}
-func lockRepo(ctx context.Context, repo restic.Repository) (*restic.Lock, context.Context, error) {
- return lockRepository(ctx, repo, false)
+func lockRepo(ctx context.Context, repo restic.Repository, retryLock time.Duration, json bool) (*restic.Lock, context.Context, error) {
+ return lockRepository(ctx, repo, false, retryLock, json)
}
-func lockRepoExclusive(ctx context.Context, repo restic.Repository) (*restic.Lock, context.Context, error) {
- return lockRepository(ctx, repo, true)
+func lockRepoExclusive(ctx context.Context, repo restic.Repository, retryLock time.Duration, json bool) (*restic.Lock, context.Context, error) {
+ return lockRepository(ctx, repo, true, retryLock, json)
+}
+
+var (
+ retrySleepStart = 5 * time.Second
+ retrySleepMax = 60 * time.Second
+)
+
+func minDuration(a, b time.Duration) time.Duration {
+ if a <= b {
+ return a
+ }
+ return b
}
// lockRepository wraps the ctx such that it is cancelled when the repository is unlocked
// cancelling the original context also stops the lock refresh
-func lockRepository(ctx context.Context, repo restic.Repository, exclusive bool) (*restic.Lock, context.Context, error) {
+func lockRepository(ctx context.Context, repo restic.Repository, exclusive bool, retryLock time.Duration, json bool) (*restic.Lock, context.Context, error) {
// make sure that a repository is unlocked properly and after cancel() was
// called by the cleanup handler in global.go
globalLocks.Do(func() {
@@ -43,26 +57,65 @@ func lockRepository(ctx context.Context, repo restic.Repository, exclusive bool)
lockFn = restic.NewExclusiveLock
}
- lock, err := lockFn(ctx, repo)
+ var lock *restic.Lock
+ var err error
+
+ retrySleep := minDuration(retrySleepStart, retryLock)
+ retryMessagePrinted := false
+ retryTimeout := time.After(retryLock)
+
+retryLoop:
+ for {
+ lock, err = lockFn(ctx, repo)
+ if err != nil && restic.IsAlreadyLocked(err) {
+
+ if !retryMessagePrinted {
+ if !json {
+ Verbosef("repo already locked, waiting up to %s for the lock\n", retryLock)
+ }
+ retryMessagePrinted = true
+ }
+
+ debug.Log("repo already locked, retrying in %v", retrySleep)
+ retrySleepCh := time.After(retrySleep)
+
+ select {
+ case <-ctx.Done():
+ return nil, ctx, ctx.Err()
+ case <-retryTimeout:
+ debug.Log("repo already locked, timeout expired")
+ // Last lock attempt
+ lock, err = lockFn(ctx, repo)
+ break retryLoop
+ case <-retrySleepCh:
+ retrySleep = minDuration(retrySleep*2, retrySleepMax)
+ }
+ } else {
+ // anything else, either a successful lock or another error
+ break retryLoop
+ }
+ }
if restic.IsInvalidLock(err) {
return nil, ctx, errors.Fatalf("%v\n\nthe `unlock --remove-all` command can be used to remove invalid locks. Make sure that no other restic process is accessing the repository when running the command", err)
}
if err != nil {
- return nil, ctx, errors.Fatalf("unable to create lock in backend: %v", err)
+ return nil, ctx, fmt.Errorf("unable to create lock in backend: %w", err)
}
debug.Log("create lock %p (exclusive %v)", lock, exclusive)
ctx, cancel := context.WithCancel(ctx)
lockInfo := &lockContext{
+ lock: lock,
cancel: cancel,
}
lockInfo.refreshWG.Add(2)
refreshChan := make(chan struct{})
+ forceRefreshChan := make(chan refreshLockRequest)
globalLocks.Lock()
globalLocks.locks[lock] = lockInfo
- go refreshLocks(ctx, lock, lockInfo, refreshChan)
- go monitorLockRefresh(ctx, lock, lockInfo, refreshChan)
+ go refreshLocks(ctx, repo.Backend(), lockInfo, refreshChan, forceRefreshChan)
+ go monitorLockRefresh(ctx, lockInfo, refreshChan, forceRefreshChan)
globalLocks.Unlock()
return lock, ctx, err
@@ -74,8 +127,13 @@ var refreshInterval = 5 * time.Minute
// the difference allows to compensate for a small time drift between clients.
var refreshabilityTimeout = restic.StaleLockTimeout - refreshInterval*3/2
-func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, refreshed chan<- struct{}) {
+type refreshLockRequest struct {
+ result chan bool
+}
+
+func refreshLocks(ctx context.Context, backend restic.Backend, lockInfo *lockContext, refreshed chan<- struct{}, forceRefresh <-chan refreshLockRequest) {
debug.Log("start")
+ lock := lockInfo.lock
ticker := time.NewTicker(refreshInterval)
lastRefresh := lock.Time
@@ -99,6 +157,22 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext,
case <-ctx.Done():
debug.Log("terminate")
return
+
+ case req := <-forceRefresh:
+ debug.Log("trying to refresh stale lock")
+ // keep on going if our current lock still exists
+ success := tryRefreshStaleLock(ctx, backend, lock, lockInfo.cancel)
+ // inform refresh goroutine about forced refresh
+ select {
+ case <-ctx.Done():
+ case req.result <- success:
+ }
+
+ if success {
+ // update lock refresh time
+ lastRefresh = lock.Time
+ }
+
case <-ticker.C:
if time.Since(lastRefresh) > refreshabilityTimeout {
// the lock is too old, wait until the expiry monitor cancels the context
@@ -111,7 +185,7 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext,
Warnf("unable to refresh lock: %v\n", err)
} else {
lastRefresh = lock.Time
- // inform monitor gorountine about successful refresh
+ // inform monitor goroutine about successful refresh
select {
case <-ctx.Done():
case refreshed <- struct{}{}:
@@ -121,7 +195,7 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext,
}
}
-func monitorLockRefresh(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, refreshed <-chan struct{}) {
+func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <-chan struct{}, forceRefresh chan<- refreshLockRequest) {
// time.Now() might use a monotonic timer which is paused during standby
// convert to unix time to ensure we compare real time values
lastRefresh := time.Now().UnixNano()
@@ -133,24 +207,47 @@ func monitorLockRefresh(ctx context.Context, lock *restic.Lock, lockInfo *lockCo
// timers are paused during standby, which is a problem as the refresh timeout
// _must_ expire if the host was too long in standby. Thus fall back to periodic checks
// https://github.com/golang/go/issues/35012
- timer := time.NewTimer(pollDuration)
+ ticker := time.NewTicker(pollDuration)
defer func() {
- timer.Stop()
+ ticker.Stop()
lockInfo.cancel()
lockInfo.refreshWG.Done()
}()
+ var refreshStaleLockResult chan bool
+
for {
select {
case <-ctx.Done():
debug.Log("terminate expiry monitoring")
return
case <-refreshed:
+ if refreshStaleLockResult != nil {
+ // ignore delayed refresh notifications while the stale lock is refreshed
+ continue
+ }
lastRefresh = time.Now().UnixNano()
- case <-timer.C:
- if time.Now().UnixNano()-lastRefresh < refreshabilityTimeout.Nanoseconds() {
- // restart timer
- timer.Reset(pollDuration)
+ case <-ticker.C:
+ if time.Now().UnixNano()-lastRefresh < refreshabilityTimeout.Nanoseconds() || refreshStaleLockResult != nil {
+ continue
+ }
+
+ debug.Log("trying to refreshStaleLock")
+ // keep on going if our current lock still exists
+ refreshReq := refreshLockRequest{
+ result: make(chan bool),
+ }
+ refreshStaleLockResult = refreshReq.result
+
+ // inform refresh goroutine about forced refresh
+ select {
+ case <-ctx.Done():
+ case forceRefresh <- refreshReq:
+ }
+ case success := <-refreshStaleLockResult:
+ if success {
+ lastRefresh = time.Now().UnixNano()
+ refreshStaleLockResult = nil
continue
}
@@ -160,6 +257,25 @@ func monitorLockRefresh(ctx context.Context, lock *restic.Lock, lockInfo *lockCo
}
}
+func tryRefreshStaleLock(ctx context.Context, backend restic.Backend, lock *restic.Lock, cancel context.CancelFunc) bool {
+ freeze := restic.AsBackend[restic.FreezeBackend](backend)
+ if freeze != nil {
+ debug.Log("freezing backend")
+ freeze.Freeze()
+ defer freeze.Unfreeze()
+ }
+
+ err := lock.RefreshStaleLock(ctx)
+ if err != nil {
+ Warnf("failed to refresh stale lock: %v\n", err)
+ // cancel context while the backend is still frozen to prevent accidental modifications
+ cancel()
+ return false
+ }
+
+ return true
+}
+
func unlockRepo(lock *restic.Lock) {
if lock == nil {
return