diff --git a/packages/core-backend/CHANGELOG.md b/packages/core-backend/CHANGELOG.md index 3ffe46464fe..caccc3d8f7c 100644 --- a/packages/core-backend/CHANGELOG.md +++ b/packages/core-backend/CHANGELOG.md @@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Improve WebSocket connection lifecycle tracing in `BackendWebSocketService` ([#7101](https://github.com/MetaMask/core/pull/7101)) + - WebSocket connection duration is now properly reflected in trace span duration instead of only in custom data + - Trace all disconnections (both manual and unexpected) to provide complete connection lifecycle visibility in traces + - Omit `connectionDuration_ms` from disconnection traces when connection never established (onClose without onOpen) +- Update `BackendWebSocketService` default exponential backoff options for reconnection ([#7101](https://github.com/MetaMask/core/pull/7101)) + - Increase default `reconnectDelay` from 500 milliseconds to 10 seconds + - Increase default `maxReconnectDelay` from 30 seconds to 60 seconds +- Simplify WebSocket disconnection code in `BackendWebSocketService` ([#7101](https://github.com/MetaMask/core/pull/7101)) + - Centralize all disconnection logic in `ws.onclose` handler for single source of truth + - Centralize all state changes within `#establishConnection` method - state transitions only occur in `onopen` (CONNECTING → CONNECTED) and `onclose` (any state → DISCONNECTED) + - Add `MANUAL_DISCONNECT_CODE` (4999) and `MANUAL_DISCONNECT_REASON` constants to distinguish manual from unexpected disconnects + +### Removed + +- Remove `BackendWebSocketService Channel Message` trace as it provided no useful performance insights ([#7101](https://github.com/MetaMask/core/pull/7101)) + ## [4.0.0] ### Changed diff --git a/packages/core-backend/README.md b/packages/core-backend/README.md index ce7d6ab7999..b3c052cb1d8 100644 --- a/packages/core-backend/README.md +++ b/packages/core-backend/README.md @@ -311,7 +311,7 @@ sequenceDiagram #### Key Flow Characteristics 1. **Initial Setup**: BackendWebSocketService establishes connection, then AccountActivityService subscribes to selected account. Backend automatically sends a system notification with all chains that are currently up. AccountActivityService tracks these chains internally and notifies TokenBalancesController, which increases polling interval to 5 min -2. **Chain Status Tracking**: AccountActivityService maintains an internal set of chains that are 'up' based on system notifications. On disconnect/error, it marks all tracked chains as 'down' before clearing the set +2. **Chain Status Tracking**: AccountActivityService maintains an internal set of chains that are 'up' based on system notifications. On disconnect, it marks all tracked chains as 'down' before clearing the set 3. **System Notifications**: Backend automatically sends chain status updates (up/down) upon subscription and when status changes. AccountActivityService forwards these to TokenBalancesController, which adjusts polling intervals (up: 5min, down: 30s + immediate fetch) 4. **User Account Changes**: When users switch accounts, AccountActivityService unsubscribes from old account and subscribes to new account. Backend sends fresh system notification with current chain status for the new account 5. **Connection Resilience**: On reconnection, AccountActivityService resubscribes to selected account and receives fresh chain status via system notification. Automatic reconnection with exponential backoff diff --git a/packages/core-backend/src/AccountActivityService.test.ts b/packages/core-backend/src/AccountActivityService.test.ts index 00e83d7a9d1..2472e133ab1 100644 --- a/packages/core-backend/src/AccountActivityService.test.ts +++ b/packages/core-backend/src/AccountActivityService.test.ts @@ -686,11 +686,11 @@ describe('AccountActivityService', () => { timestamp: 1760344704595, }); - // Publish WebSocket ERROR state event - should flush tracked chains as down + // Publish WebSocket DISCONNECTED state event - should flush tracked chains as down rootMessenger.publish( 'BackendWebSocketService:connectionStateChanged', { - state: WebSocketState.ERROR, + state: WebSocketState.DISCONNECTED, url: 'ws://test', reconnectAttempts: 2, timeout: 10000, @@ -701,7 +701,7 @@ describe('AccountActivityService', () => { ); await completeAsyncOperations(100); - // Verify that the ERROR state triggered the status change for tracked chains + // Verify that the DISCONNECTED state triggered the status change for tracked chains expect(statusChangedEventListener).toHaveBeenCalledWith({ chainIds: ['eip155:1', 'eip155:137', 'eip155:56'], status: 'down', @@ -720,11 +720,11 @@ describe('AccountActivityService', () => { mocks.getSelectedAccount.mockReturnValue(null); - // Publish WebSocket ERROR state event without any tracked chains + // Publish WebSocket DISCONNECTED state event without any tracked chains rootMessenger.publish( 'BackendWebSocketService:connectionStateChanged', { - state: WebSocketState.ERROR, + state: WebSocketState.DISCONNECTED, url: 'ws://test', reconnectAttempts: 2, timeout: 10000, diff --git a/packages/core-backend/src/AccountActivityService.ts b/packages/core-backend/src/AccountActivityService.ts index 0044865e783..506245d6f93 100644 --- a/packages/core-backend/src/AccountActivityService.ts +++ b/packages/core-backend/src/AccountActivityService.ts @@ -451,6 +451,15 @@ export class AccountActivityService { status: data.status, timestamp, }); + + log( + `WebSocket status change - Published tracked chains as ${data.status}`, + { + count: data.chainIds.length, + chains: data.chainIds, + status: data.status, + }, + ); } /** @@ -467,11 +476,8 @@ export class AccountActivityService { // WebSocket connected - resubscribe to selected account // The system notification will automatically provide the list of chains that are up await this.#subscribeToSelectedAccount(); - } else if ( - state === WebSocketState.DISCONNECTED || - state === WebSocketState.ERROR - ) { - // On disconnect/error, flush all tracked chains as down + } else if (state === WebSocketState.DISCONNECTED) { + // On disconnect, flush all tracked chains as down const chainsToMarkDown = Array.from(this.#chainsUp); if (chainsToMarkDown.length > 0) { @@ -481,13 +487,10 @@ export class AccountActivityService { timestamp: Date.now(), }); - log( - 'WebSocket error/disconnection - Published tracked chains as down', - { - count: chainsToMarkDown.length, - chains: chainsToMarkDown, - }, - ); + log('WebSocket disconnection - Published tracked chains as down', { + count: chainsToMarkDown.length, + chains: chainsToMarkDown, + }); // Clear the tracking set since all chains are now down this.#chainsUp.clear(); diff --git a/packages/core-backend/src/BackendWebSocketService.test.ts b/packages/core-backend/src/BackendWebSocketService.test.ts index 55e34688c40..e3196e18e92 100644 --- a/packages/core-backend/src/BackendWebSocketService.test.ts +++ b/packages/core-backend/src/BackendWebSocketService.test.ts @@ -102,8 +102,13 @@ class MockWebSocket extends EventTarget { MockWebSocket.instanceCount += 1; this.url = url; // TypeScript has issues with jest.spyOn on WebSocket methods, so using direct assignment + // Store reference to simulateClose for use in close() + const simulateCloseFn = this.simulateClose.bind(this); // eslint-disable-next-line jest/prefer-spy-on - this.close = jest.fn().mockImplementation(); + this.close = jest.fn().mockImplementation((code = 1000, reason = '') => { + // When close() is called, trigger the close event to simulate real WebSocket behavior + simulateCloseFn(code, reason); + }); // eslint-disable-next-line jest/prefer-spy-on this.send = jest.fn().mockImplementation((data: string) => { this._lastSentMessage = data; @@ -506,8 +511,8 @@ describe('BackendWebSocketService', () => { async ({ service }) => { expect(service.getConnectionInfo().url).toBe('ws://test.example.com'); expect(service.getConnectionInfo().timeout).toBe(10000); - expect(service.getConnectionInfo().reconnectDelay).toBe(500); - expect(service.getConnectionInfo().maxReconnectDelay).toBe(5000); + expect(service.getConnectionInfo().reconnectDelay).toBe(10000); + expect(service.getConnectionInfo().maxReconnectDelay).toBe(60000); expect(service.getConnectionInfo().requestTimeout).toBe(30000); expect(service).toBeInstanceOf(BackendWebSocketService); }, @@ -877,14 +882,19 @@ describe('BackendWebSocketService', () => { mockWebSocketOptions: { autoConnect: false }, }, async ({ service, completeAsyncOperations }) => { + // Mock Math.random to make Cockatiel's jitter deterministic + jest.spyOn(Math, 'random').mockReturnValue(0); + // eslint-disable-next-line @typescript-eslint/no-floating-promises service.connect(); // Advance time past the timeout - await completeAsyncOperations(150); + await completeAsyncOperations(110); - // Should have transitioned to ERROR state after timeout - expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); + // Should have transitioned to DISCONNECTED state after timeout + expect(service.getConnectionInfo().state).toBe( + WebSocketState.DISCONNECTED, + ); }, ); }); @@ -939,13 +949,51 @@ describe('BackendWebSocketService', () => { mockWs.simulateClose(1006, 'Connection failed'); await completeAsyncOperations(0); - // Should schedule reconnect and be in ERROR state - expect(service.getConnectionInfo().state).toBe(WebSocketState.ERROR); + // Should schedule reconnect and be in DISCONNECTED state + expect(service.getConnectionInfo().state).toBe( + WebSocketState.DISCONNECTED, + ); }, ); }); - it('should clear connection timeout in handleClose when timeout occurs then close fires', async () => { + it('should resolve connection promise when manual disconnect occurs during CONNECTING phase', async () => { + await withService( + { mockWebSocketOptions: { autoConnect: false } }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Start connection (don't await it) + const connectPromise = service.connect(); + await completeAsyncOperations(0); + + // Get the WebSocket instance and verify CONNECTING state + const mockWs = getMockWebSocket(); + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTING, + ); + + // Simulate a manual disconnect by closing with the manual disconnect code + mockWs.simulateClose(4999, 'Internal: Manual disconnect'); + await completeAsyncOperations(0); + + // The connection promise should resolve (not reject) because it was a manual disconnect + expect(await connectPromise).toBeUndefined(); + + // Should be in DISCONNECTED state + expect(service.getConnectionInfo().state).toBe( + WebSocketState.DISCONNECTED, + ); + + // Verify no reconnection was scheduled (attempts should remain 0) + expect(service.getConnectionInfo().reconnectAttempts).toBe(0); + + // Advance time to ensure no delayed reconnection attempt + await completeAsyncOperations(5000); + expect(service.getConnectionInfo().reconnectAttempts).toBe(0); + }, + ); + }); + + it('should clear connection timeout when timeout occurs then close fires', async () => { await withService( { options: { timeout: 100 }, @@ -965,24 +1013,23 @@ describe('BackendWebSocketService', () => { WebSocketState.CONNECTING, ); - // Let timeout fire (closes WebSocket and sets state to ERROR) - await completeAsyncOperations(150); + // Let timeout fire (closes WebSocket and sets state to DISCONNECTED) + // Advance time past timeout but before reconnect would fire + await completeAsyncOperations(100); - // State should be ERROR or DISCONNECTED after timeout - const stateAfterTimeout = service.getConnectionInfo().state; - expect([WebSocketState.ERROR, WebSocketState.DISCONNECTED]).toContain( - stateAfterTimeout, + // State should be DISCONNECTED after timeout + expect(service.getConnectionInfo().state).toBe( + WebSocketState.DISCONNECTED, ); // Now manually trigger close event - // Since state is ERROR (not CONNECTING), onclose will call handleClose - // which will clear connectionTimeout + // Since state is DISCONNECTED, onclose will return early due to idempotency guard mockWs.simulateClose(1006, 'Close after timeout'); await completeAsyncOperations(0); - // State should still be ERROR or DISCONNECTED - expect([WebSocketState.ERROR, WebSocketState.DISCONNECTED]).toContain( - service.getConnectionInfo().state, + // State should still be DISCONNECTED + expect(service.getConnectionInfo().state).toBe( + WebSocketState.DISCONNECTED, ); }, ); @@ -1072,10 +1119,13 @@ describe('BackendWebSocketService', () => { await completeAsyncOperations(10); const mockWs = getMockWebSocket(); - mockWs.simulateError(); + // Simulate connection failure (error is always followed by close in real WebSocket) + mockWs.simulateClose(1006, 'Connection failed'); await completeAsyncOperations(0); const attemptsBefore = service.getConnectionInfo().reconnectAttempts; + // Should be 1 after the failure + expect(attemptsBefore).toBe(1); // Try to force reconnection while timer is already scheduled await service.forceReconnection(); @@ -1087,6 +1137,134 @@ describe('BackendWebSocketService', () => { }, ); }); + + it('should clear reconnect timer when feature is disabled', async () => { + let isEnabled = true; + await withService( + { + options: { + isEnabled: () => isEnabled, + }, + mockWebSocketOptions: { autoConnect: false }, + }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Mock Math.random to make Cockatiel's jitter deterministic + jest.spyOn(Math, 'random').mockReturnValue(0); + + // Trigger a connection failure to schedule a reconnect + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(10); + + const mockWs = getMockWebSocket(); + // Simulate connection failure to trigger reconnect timer + mockWs.simulateClose(1006, 'Connection failed'); + await completeAsyncOperations(0); + + // Verify reconnect timer is scheduled + expect(service.getConnectionInfo().reconnectAttempts).toBe(1); + + // Now disable the feature + isEnabled = false; + + // Try to connect again with feature disabled + await service.connect(); + + // Reconnect attempts should be reset to 0 (timers cleared) + expect(service.getConnectionInfo().reconnectAttempts).toBe(0); + + // Advance time to ensure the old reconnect timer doesn't fire + await completeAsyncOperations(10000); + + // Should still be 0, confirming timer was cleared + expect(service.getConnectionInfo().reconnectAttempts).toBe(0); + }, + ); + }); + + it('should include connectionDuration_ms in trace when connection was established', async () => { + const mockTraceFn = jest.fn((_request, fn) => fn?.()); + await withService( + { + options: { traceFn: mockTraceFn }, + }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Connect and let it establish + await service.connect(); + await completeAsyncOperations(10); + + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTED, + ); + + // Clear previous trace calls to focus on disconnection trace + mockTraceFn.mockClear(); + + // Trigger unexpected close after connection was established + const mockWs = getMockWebSocket(); + mockWs.simulateClose(1006, 'Abnormal closure'); + await completeAsyncOperations(10); + + // Find the Disconnection trace call + const disconnectionTrace = mockTraceFn.mock.calls.find( + (call) => call[0]?.name === 'BackendWebSocketService Disconnection', + ); + + expect(disconnectionTrace).toBeDefined(); + expect(disconnectionTrace?.[0]?.data).toHaveProperty( + 'connectionDuration_ms', + ); + expect( + disconnectionTrace?.[0]?.data?.connectionDuration_ms, + ).toBeGreaterThan(0); + }, + ); + }); + + it('should omit connectionDuration_ms in trace when connection never established', async () => { + const mockTraceFn = jest.fn((_request, fn) => fn?.()); + await withService( + { + options: { traceFn: mockTraceFn }, + mockWebSocketOptions: { autoConnect: false }, + }, + async ({ service, getMockWebSocket, completeAsyncOperations }) => { + // Start connecting + // eslint-disable-next-line @typescript-eslint/no-floating-promises + service.connect(); + await completeAsyncOperations(0); + + expect(service.getConnectionInfo().state).toBe( + WebSocketState.CONNECTING, + ); + + // Manually disconnect before connection establishes + // This will set state to DISCONNECTING and close the WebSocket + service.disconnect(); + await completeAsyncOperations(0); + + // Clear trace calls from connection attempt + mockTraceFn.mockClear(); + + // Simulate the close event after disconnect (state is now DISCONNECTING, not CONNECTING) + // This will trigger #handleClose with connectedAt = 0 + const mockWs = getMockWebSocket(); + mockWs.simulateClose(1000, 'Normal closure'); + await completeAsyncOperations(10); + + // Find the Disconnection trace call + const disconnectionTrace = mockTraceFn.mock.calls.find( + (call) => call[0]?.name === 'BackendWebSocketService Disconnection', + ); + + // Trace should exist but should NOT have connectionDuration_ms + expect(disconnectionTrace).toBeDefined(); + expect(disconnectionTrace?.[0]?.data).not.toHaveProperty( + 'connectionDuration_ms', + ); + }, + ); + }); }); // ===================================================== @@ -1437,7 +1615,9 @@ describe('BackendWebSocketService', () => { service.disconnect(); - await expect(requestPromise).rejects.toThrow('WebSocket disconnected'); + await expect(requestPromise).rejects.toThrow( + 'WebSocket connection closed: 4999 Internal: Manual disconnect', + ); }); }); @@ -1840,7 +2020,9 @@ describe('BackendWebSocketService', () => { service.destroy(); // Pending request should be rejected - await expect(requestPromise).rejects.toThrow('Service cleanup'); + await expect(requestPromise).rejects.toThrow( + 'WebSocket connection closed: 4999 Internal: Manual disconnect', + ); // Verify service is in disconnected state expect(service.getConnectionInfo().state).toBe( diff --git a/packages/core-backend/src/BackendWebSocketService.ts b/packages/core-backend/src/BackendWebSocketService.ts index 70962d161bc..428d4603110 100644 --- a/packages/core-backend/src/BackendWebSocketService.ts +++ b/packages/core-backend/src/BackendWebSocketService.ts @@ -16,6 +16,12 @@ const SERVICE_NAME = 'BackendWebSocketService' as const; const log = createModuleLogger(projectLogger, SERVICE_NAME); +// WebSocket close codes and reasons for internal operations +const MANUAL_DISCONNECT_CODE = 4999 as const; +const MANUAL_DISCONNECT_REASON = 'Internal: Manual disconnect' as const; +const FORCE_RECONNECT_CODE = 4998 as const; +const FORCE_RECONNECT_REASON = 'Internal: Force reconnect' as const; + const MESSENGER_EXPOSED_METHODS = [ 'connect', 'disconnect', @@ -89,9 +95,7 @@ export function getCloseReason(code: number): string { export enum WebSocketState { CONNECTING = 'connecting', CONNECTED = 'connected', - DISCONNECTING = 'disconnecting', DISCONNECTED = 'disconnected', - ERROR = 'error', } /** @@ -119,10 +123,10 @@ export type BackendWebSocketServiceOptions = { /** Connection timeout in milliseconds (default: 10000) */ timeout?: number; - /** Initial reconnection delay in milliseconds (default: 500) */ + /** Initial reconnection delay in milliseconds (default: 10000) */ reconnectDelay?: number; - /** Maximum reconnection delay in milliseconds (default: 5000) */ + /** Maximum reconnection delay in milliseconds (default: 60000) */ maxReconnectDelay?: number; /** Request timeout in milliseconds (default: 30000) */ @@ -315,9 +319,6 @@ export class BackendWebSocketService { #connectedAt: number = 0; - // Track manual disconnects to prevent automatic reconnection - #manualDisconnect = false; - // Simplified subscription storage (single flat map) // Key: subscription ID string (e.g., 'sub_abc123def456') // Value: WebSocketSubscription object with channels, callback and metadata @@ -352,8 +353,8 @@ export class BackendWebSocketService { this.#options = { url: options.url, timeout: options.timeout ?? 10000, - reconnectDelay: options.reconnectDelay ?? 500, - maxReconnectDelay: options.maxReconnectDelay ?? 5000, + reconnectDelay: options.reconnectDelay ?? 10000, + maxReconnectDelay: options.maxReconnectDelay ?? 60000, requestTimeout: options.requestTimeout ?? 30000, }; @@ -425,9 +426,6 @@ export class BackendWebSocketService { * @returns Promise that resolves when connection is established */ async connect(): Promise { - // Reset manual disconnect flag when explicitly connecting - this.#manualDisconnect = false; - // Priority 1: Check if feature is enabled via callback (feature flag check) // If feature is disabled, stop all connection attempts if (this.#isEnabled && !this.#isEnabled()) { @@ -472,17 +470,8 @@ export class BackendWebSocketService { throw error; } - this.#setState(WebSocketState.CONNECTING); - // Establish the actual WebSocket connection - try { - await this.#establishConnection(bearerToken); - } catch (error) { - const errorMessage = getErrorMessage(error); - log('Connection attempt failed', { errorMessage, error }); - this.#setState(WebSocketState.ERROR); - throw error; - } + await this.#establishConnection(bearerToken); })(); try { @@ -501,29 +490,12 @@ export class BackendWebSocketService { * Closes WebSocket connection */ disconnect(): void { - if ( - this.#state === WebSocketState.DISCONNECTED || - this.#state === WebSocketState.DISCONNECTING - ) { + if (this.#state === WebSocketState.DISCONNECTED || !this.#ws) { return; } - // Mark this as a manual disconnect to prevent automatic reconnection - this.#manualDisconnect = true; - - this.#setState(WebSocketState.DISCONNECTING); - this.#clearTimers(); - this.#clearPendingRequests(new Error('WebSocket disconnected')); - - // Clear any pending connection promise - this.#connectionPromise = null; - - // Reset reconnect attempts on manual disconnect - this.#reconnectAttempts = 0; - - if (this.#ws) { - this.#ws.close(1000, 'Normal closure'); - } + // Close WebSocket with manual disconnect code and reason + this.#ws.close(MANUAL_DISCONNECT_CODE, MANUAL_DISCONNECT_REASON); log('WebSocket manually disconnected'); } @@ -545,19 +517,17 @@ export class BackendWebSocketService { * @returns Promise that resolves when disconnection is complete (reconnection is scheduled) */ async forceReconnection(): Promise { - // If a reconnect is already scheduled, don't force another one - if (this.#reconnectTimer) { - log('Reconnect already scheduled, skipping force reconnection'); + if (this.#state === WebSocketState.DISCONNECTED || !this.#ws) { + log('WebSocket already disconnected, scheduling reconnect'); + this.#scheduleReconnect(); return; } log('Forcing WebSocket reconnection to clean up subscription state'); - // Perform controlled disconnect - this.disconnect(); - - // Schedule reconnection with exponential backoff - this.#scheduleReconnect(); + // This ensures ws.onclose will schedule a reconnect (not treat it as manual disconnect) + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion + this.#ws!.close(FORCE_RECONNECT_CODE, FORCE_RECONNECT_REASON); } /** @@ -834,21 +804,15 @@ export class BackendWebSocketService { * Called when service is being destroyed or app is terminating */ destroy(): void { + // Always clear timers first to prevent reconnection attempts after destruction + // This handles the case where destroy() is called while DISCONNECTED with a pending reconnect timer this.#clearTimers(); - this.#clearSubscriptions(); - // Clear any pending connection promise - this.#connectionPromise = null; - - // Clear all pending requests - this.#clearPendingRequests(new Error('Service cleanup')); - - if (this.#ws && this.#ws.readyState === WebSocket.OPEN) { - this.#ws.close(1000, 'Service cleanup'); - } + // Reset reconnect attempts to prevent any future reconnection logic + this.#reconnectAttempts = 0; - // Set state to disconnected immediately - this.#setState(WebSocketState.DISCONNECTED); + // Disconnect the WebSocket if connected (will be no-op if already disconnected) + this.disconnect(); } /** @@ -992,46 +956,36 @@ export class BackendWebSocketService { */ async #establishConnection(bearerToken: string): Promise { const wsUrl = this.#buildAuthenticatedUrl(bearerToken); - const connectionStartTime = Date.now(); - return new Promise((resolve, reject) => { - const ws = new WebSocket(wsUrl); - this.#connectionTimeout = setTimeout(() => { - log('WebSocket connection timeout - forcing close', { - timeout: this.#options.timeout, - }); - ws.close(); - reject( - new Error( - `Failed to connect to WebSocket: Connection timeout after ${this.#options.timeout}ms`, - ), - ); - }, this.#options.timeout); - - ws.onopen = () => { - if (this.#connectionTimeout) { - clearTimeout(this.#connectionTimeout); - this.#connectionTimeout = null; - } + // Transition to CONNECTING state before creating WebSocket + this.#setState(WebSocketState.CONNECTING); + return this.#trace( + { + name: `${SERVICE_NAME} Connection`, + data: { + reconnectAttempt: this.#reconnectAttempts, + }, + tags: { + service: SERVICE_NAME, + }, + }, + () => { + return new Promise((resolve, reject) => { + const ws = new WebSocket(wsUrl); + this.#connectionTimeout = setTimeout(() => { + log('WebSocket connection timeout - forcing close', { + timeout: this.#options.timeout, + }); + // Close the WebSocket - onclose will handle rejection and state change + ws.close(); + }, this.#options.timeout); + + ws.onopen = () => { + if (this.#connectionTimeout) { + clearTimeout(this.#connectionTimeout); + this.#connectionTimeout = null; + } - // Calculate connection latency - const connectionLatency = Date.now() - connectionStartTime; - - // Trace successful connection with latency - // Promise result intentionally not awaited - // eslint-disable-next-line @typescript-eslint/no-floating-promises - this.#trace( - { - name: `${SERVICE_NAME} Connection`, - data: { - reconnectAttempt: this.#reconnectAttempts, - latency_ms: connectionLatency, - }, - tags: { - service: SERVICE_NAME, - }, - }, - () => { this.#ws = ws; this.#setState(WebSocketState.CONNECTED); this.#connectedAt = Date.now(); @@ -1047,52 +1001,101 @@ export class BackendWebSocketService { }, 10000); resolve(); - }, - ); - }; - - ws.onerror = (event: Event) => { - log('WebSocket onerror event triggered', { event }); - if (this.#connectionTimeout) { - clearTimeout(this.#connectionTimeout); - this.#connectionTimeout = null; - } - const error = new Error(`WebSocket connection error to ${wsUrl}`); - reject(error); - }; - - ws.onclose = (event: CloseEvent) => { - log('WebSocket onclose event triggered', { - code: event.code, - reason: event.reason, - wasClean: event.wasClean, + }; + + ws.onclose = (event: CloseEvent) => { + log('WebSocket onclose event triggered', { + code: event.code, + reason: event.reason || getCloseReason(event.code), + wasClean: event.wasClean, + }); + + // Guard against duplicate close events + if (this.#state === WebSocketState.DISCONNECTED) { + return; + } + + // Detect if this is a manual disconnect or service cleanup based on close code + const isManualDisconnect = + event.code === MANUAL_DISCONNECT_CODE && + event.reason === MANUAL_DISCONNECT_REASON; + + // If connection hasn't been established yet, handle the connection promise + if (this.#state === WebSocketState.CONNECTING) { + if (isManualDisconnect) { + // Manual disconnect during connection - resolve to prevent reconnection + resolve(); + } else { + // Failed connection attempt - reject to trigger reconnection + reject( + new Error( + `WebSocket connection closed during connection: ${event.code} ${event.reason}`, + ), + ); + } + } + + // Calculate connection duration before we clear state (only if we were connected) + const connectionDuration_ms = + this.#connectedAt > 0 ? Date.now() - this.#connectedAt : 0; + + // Clear all timers + this.#clearTimers(); + + // Clear WebSocket reference to allow garbage collection + this.#ws = undefined; + + // Clear connection tracking + this.#connectionPromise = null; + this.#connectedAt = 0; + + this.#clearPendingRequests( + new Error( + `WebSocket connection closed: ${event.code} ${event.reason || getCloseReason(event.code)}`, + ), + ); + this.#clearSubscriptions(); + + // Update state to disconnected + this.#setState(WebSocketState.DISCONNECTED); + + // Check if this was a manual disconnect + if (isManualDisconnect) { + // Manual disconnect - reset attempts and don't reconnect + this.#reconnectAttempts = 0; + } else { + // Unexpected disconnect - schedule reconnection + this.#scheduleReconnect(); + } + + // eslint-disable-next-line @typescript-eslint/no-floating-promises + this.#trace({ + name: `${SERVICE_NAME} Disconnection`, + data: { + code: event.code, + reason: event.reason || getCloseReason(event.code), + wasClean: event.wasClean, + reconnectAttempts: this.#reconnectAttempts, + ...(connectionDuration_ms > 0 && { connectionDuration_ms }), + }, + tags: { + service: SERVICE_NAME, + }, + }); + }; + + // Set up message handler immediately - no need to wait for connection + ws.onmessage = (event: MessageEvent) => { + try { + const message = this.#parseMessage(event.data); + this.#handleMessage(message); + } catch { + // Silently ignore invalid JSON messages + } + }; }); - if (this.#state === WebSocketState.CONNECTING) { - // Handle connection-phase close events - if (this.#connectionTimeout) { - clearTimeout(this.#connectionTimeout); - this.#connectionTimeout = null; - } - reject( - new Error( - `WebSocket connection closed during connection: ${event.code} ${event.reason}`, - ), - ); - } else { - this.#handleClose(event); - } - }; - - // Set up message handler immediately - no need to wait for connection - ws.onmessage = (event: MessageEvent) => { - try { - const message = this.#parseMessage(event.data); - this.#handleMessage(message); - } catch { - // Silently ignore invalid JSON messages - } - }; - }); + }, + ); } // ============================================================================= @@ -1203,29 +1206,7 @@ export class BackendWebSocketService { return; } - // Calculate notification latency: time from server sent to client received - const receivedAt = Date.now(); - const latency = receivedAt - message.timestamp; - - // Trace channel message processing with latency data - // Promise result intentionally not awaited - // eslint-disable-next-line @typescript-eslint/no-floating-promises - this.#trace( - { - name: `${SERVICE_NAME} Channel Message`, - data: { - latency_ms: latency, - event: message.event, - }, - tags: { - service: SERVICE_NAME, - }, - }, - () => { - // Direct lookup for exact channel match - this.#channelCallbacks.get(message.channel)?.callback(message); - }, - ); + this.#channelCallbacks.get(message.channel)?.callback(message); } /** @@ -1289,67 +1270,6 @@ export class BackendWebSocketService { // 5. EVENT HANDLERS (PRIVATE) // ============================================================================= - /** - * Handles WebSocket close events (mobile optimized) - * - * @param event - The WebSocket close event - */ - #handleClose(event: CloseEvent): void { - // Calculate connection duration before we clear state - const connectionDuration = Date.now() - this.#connectedAt; - - if (this.#connectionTimeout) { - clearTimeout(this.#connectionTimeout); - this.#connectionTimeout = null; - } - if (this.#stableConnectionTimer) { - clearTimeout(this.#stableConnectionTimer); - this.#stableConnectionTimer = null; - } - - this.#connectedAt = 0; - - // Clear any pending connection promise - this.#connectionPromise = null; - - // Clear subscriptions and pending requests on any disconnect - // This ensures clean state for reconnection - this.#clearPendingRequests(new Error('WebSocket connection closed')); - this.#clearSubscriptions(); - - // Update state to disconnected - this.#setState(WebSocketState.DISCONNECTED); - - // Check if this was a manual disconnect - if (this.#manualDisconnect) { - // Manual disconnect - don't reconnect - return; - } - - // Trace unexpected disconnect with details - // Promise result intentionally not awaited - // eslint-disable-next-line @typescript-eslint/no-floating-promises - this.#trace( - { - name: `${SERVICE_NAME} Disconnect`, - data: { - code: event.code, - reason: event.reason || getCloseReason(event.code), - connectionDuration_ms: connectionDuration, - }, - tags: { - service: SERVICE_NAME, - disconnect_type: 'unexpected', - }, - }, - () => { - // Empty trace callback - just measuring the event - }, - ); - - this.#scheduleReconnect(); - } - /** * Handles WebSocket errors * @@ -1374,7 +1294,7 @@ export class BackendWebSocketService { * * Call this from: * - connect() catch block (on connection failure) - * - #handleClose() (on unexpected disconnect) + * - ws.onclose handler (on unexpected disconnect) * * For user-initiated actions (sign in, unlock), call connect() directly instead. * @@ -1431,9 +1351,6 @@ export class BackendWebSocketService { }).next(); } - /** - * Clears all active timers - */ #clearTimers(): void { if (this.#reconnectTimer) { clearTimeout(this.#reconnectTimer);