Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/storage/protocols/tus/s3-locker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ export class S3Locker implements Locker {
return false
}

async renewLock(id: string): Promise<boolean> {
async renewLock(id: string, checkLocked: () => boolean): Promise<boolean> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any specific reason why we need to pass a function a not check directly this.isLocked?

Copy link
Contributor Author

@itslenny itslenny Dec 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is in renewLock() wich is a part of S3Locker, but isLocked is part of S3Lock. So when S3Lock calls this.locker.renewLock() it passes in a function to allow S3Locker to check the lock state of S3Lock.

Alternatives would be

  • move the lock states to S3Locker with a map<string, boolean> (id -> isLocked) and share it that way
  • pass in the whole S3Lock instance (instead of checkLocked) and expose isLocked from S3Lock (currently private) so the locker can check the lock directly

const lockKey = this.getLockKey(id)

try {
Expand All @@ -139,6 +139,12 @@ export class S3Locker implements Locker {
const body = await response.Body.transformToString()
const currentLock: LockMetadata = JSON.parse(body)

// Check if lock is still held before updating
// This prevents writing the lock back to S3 if unlock() was called during GetObject
if (!checkLocked()) {
return false
}

// Update expiration time
const updatedLock: LockMetadata = {
...currentLock,
Expand Down Expand Up @@ -402,7 +408,7 @@ export class S3Lock implements Lock {
}

try {
const renewed = await this.locker.renewLock(this.id)
const renewed = await this.locker.renewLock(this.id, () => this.isLocked)
if (!renewed) {
this.locker
.getLogger()
Expand Down
71 changes: 66 additions & 5 deletions src/test/s3-locker.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ describe('S3Locker', () => {
for (const { lock } of allLocks) {
try {
await lock.unlock()
} catch (error) {
} catch {
// Ignore cleanup errors
}
}
Expand All @@ -92,7 +92,7 @@ describe('S3Locker', () => {
for (const { lock } of allLocks) {
try {
await lock.unlock()
} catch (error) {
} catch {
// Ignore cleanup errors
}
}
Expand All @@ -106,7 +106,7 @@ describe('S3Locker', () => {
if (s3Client && typeof s3Client.destroy === 'function') {
s3Client.destroy()
}
} catch (error) {
} catch {
// Ignore cleanup errors
}
})
Expand Down Expand Up @@ -135,7 +135,7 @@ describe('S3Locker', () => {
})
)
}
} catch (error) {
} catch {
// Ignore cleanup errors
}
}
Expand Down Expand Up @@ -404,6 +404,67 @@ describe('S3Locker', () => {
await lock1.unlock()
})

test('should not create zombie locks with rapid lock cycling and contention', async () => {
// This test reproduces the zombie lock race condition:
// 1. Lock1 acquired, starts renewal timer (fires at T=1000)
// 2. Lock2 tries to acquire same ID (retries/waits)
// 3. Lock2 aborted, Lock1 unlocked at T=150
// 4. Renewal timer fires at T=1000 with in-flight renewLock()
// 5. renewLock() GET happens, then unlock() DELETE happens
// 6. WITHOUT fix: renewLock() PUT happens AFTER DELETE, creating zombie
// 7. Next iteration tries to acquire but finds zombie, times out

const lockId = 'contention-race-test'
const cancelReq = jest.fn()

for (let i = 0; i < 5; i++) {
const lock1 = locker.newLock(lockId)
const lock2 = locker.newLock(lockId)

const abortController1 = new AbortController()
const abortController2 = new AbortController()

// Acquire first lock
// This can throw LockTimeout if a zombie lock exists from previous iteration
const start = Date.now()
try {
await lock1.lock(abortController1.signal, cancelReq)
} catch (error: any) {
const lockDuration = Date.now() - start
throw new Error(
`Lock acquisition failed on iteration ${i} after ${lockDuration}ms with error: ${error.message}. `
)
}
const lockDuration = Date.now() - start

// Lock should be acquired quickly (< 500ms)
// Longer times indicate retries due to contention or zombie locks
if (lockDuration > 500) {
await lock1.unlock()
throw new Error(
`Lock acquisition took ${lockDuration}ms on iteration ${i}, indicating potential zombie lock or excessive retries`
)
}

// Start second lock attempt (creates contention)
const lock2Promise = lock2.lock(abortController2.signal, cancelReq)

// Abort the second lock after a short delay
setTimeout(() => abortController2.abort(), 100)

// Handle lock2
try {
await lock2Promise
await lock2.unlock()
} catch {
// Expected - lock was aborted
}

// Unlock first lock
await lock1.unlock()
}
}, 10000)

test('should handle unlock without lock', async () => {
const lock = locker.newLock('test-lock-1')

Expand Down Expand Up @@ -594,7 +655,7 @@ describe('S3Locker', () => {
await lock2.lock(abortController2.signal, cancelReq)
// If it succeeds unexpectedly, unlock it
await lock2.unlock()
} catch (error) {
} catch {
// Expected behavior - lock contention
secondLockFailed = true
}
Expand Down
Loading