diff --git a/src/attachments-streaming/attachments-streaming-pool.test.ts b/src/attachments-streaming/attachments-streaming-pool.test.ts index e36314c..4357f9c 100644 --- a/src/attachments-streaming/attachments-streaming-pool.test.ts +++ b/src/attachments-streaming/attachments-streaming-pool.test.ts @@ -5,13 +5,7 @@ import { ProcessAttachmentReturnType, } from '../types'; import { AttachmentsStreamingPool } from './attachments-streaming-pool'; -import { - runWithSdkLogContext, - runWithUserLogContext, - getSdkLogContextValue, -} from '../logger/logger.context'; -// Mock types interface TestState { attachments: { completed: boolean }; } @@ -83,7 +77,6 @@ describe(AttachmentsStreamingPool.name, () => { stream: mockStream, }); - expect(pool).toBeDefined(); expect(pool['adapter']).toBe(mockAdapter); expect(pool['attachments']).toEqual(mockAttachments); expect(pool['batchSize']).toBe(10); @@ -656,13 +649,13 @@ describe(AttachmentsStreamingPool.name, () => { }); it('should process user stream callback correctly while maintaining context isolation', async () => { - let userCallbackContextId: string | undefined; let userCallbackExecuted = false; // Mock stream to capture context info const mockStreamFn: ExternalSystemAttachmentStreamingFunction = jest .fn() .mockImplementation(async () => { + await Promise.resolve(); userCallbackExecuted = true; // Record that the callback executed return { diff --git a/src/common/event-type-translation.test.ts b/src/common/event-type-translation.test.ts new file mode 100644 index 0000000..27a621c --- /dev/null +++ b/src/common/event-type-translation.test.ts @@ -0,0 +1,200 @@ +import { EventType, ExtractorEventType } from '../types/extraction'; +import { LoaderEventType } from '../types/loading'; +import { + translateExtractorEventType, + translateIncomingEventType, + translateLoaderEventType, + translateOutgoingEventType, +} from './event-type-translation'; + +describe(translateIncomingEventType.name, () => { + it.each([ + [ + EventType.ExtractionExternalSyncUnitsStart, + EventType.StartExtractingExternalSyncUnits, + ], + [EventType.ExtractionMetadataStart, EventType.StartExtractingMetadata], + [EventType.ExtractionDataStart, EventType.StartExtractingData], + [EventType.ExtractionDataContinue, EventType.ContinueExtractingData], + [EventType.ExtractionDataDelete, EventType.StartDeletingExtractorState], + [ + EventType.ExtractionAttachmentsStart, + EventType.StartExtractingAttachments, + ], + [ + EventType.ExtractionAttachmentsContinue, + EventType.ContinueExtractingAttachments, + ], + [ + EventType.ExtractionAttachmentsDelete, + EventType.StartDeletingExtractorAttachmentsState, + ], + ])('maps legacy extraction event %s to %s', (legacy, modern) => { + expect(translateIncomingEventType(legacy)).toBe(modern); + }); + + it.each([ + [EventType.StartExtractingExternalSyncUnits], + [EventType.StartExtractingMetadata], + [EventType.StartExtractingData], + [EventType.ContinueExtractingData], + [EventType.StartDeletingExtractorState], + [EventType.StartExtractingAttachments], + [EventType.ContinueExtractingAttachments], + [EventType.StartDeletingExtractorAttachmentsState], + [EventType.StartLoadingData], + [EventType.ContinueLoadingData], + [EventType.StartLoadingAttachments], + [EventType.ContinueLoadingAttachments], + [EventType.StartDeletingLoaderState], + [EventType.StartDeletingLoaderAttachmentState], + [EventType.UnknownEventType], + ])('is a no-op for already-modern event type %s', (eventType) => { + expect(translateIncomingEventType(eventType)).toBe(eventType); + }); + + it('warns and returns the input verbatim for an unrecognised event type', () => { + const warnSpy = jest.spyOn(console, 'warn').mockImplementation(() => {}); + + const result = translateIncomingEventType('NONSENSE_EVENT' as EventType); + + expect(result).toBe('NONSENSE_EVENT'); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('NONSENSE_EVENT') + ); + + warnSpy.mockRestore(); + }); +}); + +describe(translateExtractorEventType.name, () => { + it.each([ + [ + ExtractorEventType.ExtractionExternalSyncUnitsDone, + ExtractorEventType.ExternalSyncUnitExtractionDone, + ], + [ + ExtractorEventType.ExtractionExternalSyncUnitsError, + ExtractorEventType.ExternalSyncUnitExtractionError, + ], + [ + ExtractorEventType.ExtractionMetadataDone, + ExtractorEventType.MetadataExtractionDone, + ], + [ + ExtractorEventType.ExtractionMetadataError, + ExtractorEventType.MetadataExtractionError, + ], + [ + ExtractorEventType.ExtractionDataProgress, + ExtractorEventType.DataExtractionProgress, + ], + [ + ExtractorEventType.ExtractionDataDelay, + ExtractorEventType.DataExtractionDelayed, + ], + [ + ExtractorEventType.ExtractionDataDone, + ExtractorEventType.DataExtractionDone, + ], + [ + ExtractorEventType.ExtractionDataError, + ExtractorEventType.DataExtractionError, + ], + [ + ExtractorEventType.ExtractionDataDeleteDone, + ExtractorEventType.ExtractorStateDeletionDone, + ], + [ + ExtractorEventType.ExtractionDataDeleteError, + ExtractorEventType.ExtractorStateDeletionError, + ], + [ + ExtractorEventType.ExtractionAttachmentsProgress, + ExtractorEventType.AttachmentExtractionProgress, + ], + [ + ExtractorEventType.ExtractionAttachmentsDelay, + ExtractorEventType.AttachmentExtractionDelayed, + ], + [ + ExtractorEventType.ExtractionAttachmentsDone, + ExtractorEventType.AttachmentExtractionDone, + ], + [ + ExtractorEventType.ExtractionAttachmentsError, + ExtractorEventType.AttachmentExtractionError, + ], + [ + ExtractorEventType.ExtractionAttachmentsDeleteDone, + ExtractorEventType.ExtractorAttachmentsStateDeletionDone, + ], + [ + ExtractorEventType.ExtractionAttachmentsDeleteError, + ExtractorEventType.ExtractorAttachmentsStateDeletionError, + ], + ])('maps legacy extractor event %s to %s', (legacy, modern) => { + expect(translateExtractorEventType(legacy)).toBe(modern); + }); + + it.each([ + [ExtractorEventType.DataExtractionDone], + [ExtractorEventType.DataExtractionProgress], + [ExtractorEventType.AttachmentExtractionDone], + [ExtractorEventType.MetadataExtractionDone], + [ExtractorEventType.UnknownEventType], + ])('is a no-op for already-modern extractor event %s', (eventType) => { + expect(translateExtractorEventType(eventType)).toBe(eventType); + }); +}); + +describe(translateLoaderEventType.name, () => { + it.each([ + [LoaderEventType.DataLoadingDelay, LoaderEventType.DataLoadingDelayed], + [ + LoaderEventType.AttachmentsLoadingProgress, + LoaderEventType.AttachmentLoadingProgress, + ], + [ + LoaderEventType.AttachmentsLoadingDelayed, + LoaderEventType.AttachmentLoadingDelayed, + ], + [ + LoaderEventType.AttachmentsLoadingDone, + LoaderEventType.AttachmentLoadingDone, + ], + [ + LoaderEventType.AttachmentsLoadingError, + LoaderEventType.AttachmentLoadingError, + ], + ])('maps legacy loader event %s to %s', (legacy, modern) => { + expect(translateLoaderEventType(legacy)).toBe(modern); + }); + + it.each([ + [LoaderEventType.DataLoadingDone], + [LoaderEventType.DataLoadingProgress], + [LoaderEventType.AttachmentLoadingDone], + ])('is a no-op for already-modern loader event %s', (eventType) => { + expect(translateLoaderEventType(eventType)).toBe(eventType); + }); +}); + +describe(translateOutgoingEventType.name, () => { + it('routes extractor events through translateExtractorEventType', () => { + expect( + translateOutgoingEventType(ExtractorEventType.ExtractionDataDone) + ).toBe(ExtractorEventType.DataExtractionDone); + }); + + it('routes loader events through translateLoaderEventType', () => { + expect( + translateOutgoingEventType(LoaderEventType.AttachmentsLoadingDone) + ).toBe(LoaderEventType.AttachmentLoadingDone); + }); + + it('passes through unknown event types unchanged', () => { + const unknown = 'SOME_UNKNOWN_EVENT' as ExtractorEventType; + expect(translateOutgoingEventType(unknown)).toBe(unknown); + }); +}); diff --git a/src/common/time-value-resolver.test.ts b/src/common/time-value-resolver.test.ts index 6570c0f..f84a3da 100644 --- a/src/common/time-value-resolver.test.ts +++ b/src/common/time-value-resolver.test.ts @@ -213,7 +213,6 @@ describe('time-value-resolver', () => { ); const after = new Date().toISOString(); - expect(result).toBeDefined(); expect(result! >= before).toBe(true); expect(result! <= after).toBe(true); }); diff --git a/src/multithreading/create-worker.test.ts b/src/multithreading/create-worker.test.ts index d11af2b..0c9eb07 100644 --- a/src/multithreading/create-worker.test.ts +++ b/src/multithreading/create-worker.test.ts @@ -7,18 +7,22 @@ import { createWorker } from './create-worker'; describe(createWorker.name, () => { it('should create a Worker instance when valid parameters are provided', async () => { + // Arrange const workerPath = __dirname + '../tests/dummy-worker.ts'; + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: EventType.ExtractionExternalSyncUnitsStart }, + }); + // Act const worker = isMainThread ? await createWorker({ - event: createMockEvent(mockServer.baseUrl, { - payload: { event_type: EventType.ExtractionExternalSyncUnitsStart }, - }), + event, initialState: {}, workerPath, }) : null; + // Assert expect(worker).not.toBeNull(); expect(worker).toBeInstanceOf(Worker); @@ -28,44 +32,51 @@ describe(createWorker.name, () => { }); it('should throw error when not in main thread', async () => { + // Arrange const originalIsMainThread = isMainThread; // eslint-disable-next-line @typescript-eslint/no-explicit-any (isMainThread as any) = false; const workerPath = __dirname + '../tests/dummy-worker.ts'; + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: EventType.ExtractionExternalSyncUnitsStart }, + }); + // Act & Assert await expect( createWorker({ - event: createMockEvent(mockServer.baseUrl, { - payload: { event_type: EventType.ExtractionExternalSyncUnitsStart }, - }), + event, initialState: {}, workerPath, }) ).rejects.toThrow('Worker threads can not start more worker threads.'); - // Restore original value // eslint-disable-next-line @typescript-eslint/no-explicit-any (isMainThread as any) = originalIsMainThread; }); it('[edge] should handle worker creation with minimal valid data', async () => { + // Arrange const workerPath = __dirname + '../tests/dummy-worker.ts'; + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: EventType.ExtractionExternalSyncUnitsStart }, + }); if (isMainThread) { + // Act const worker = await createWorker({ - event: createMockEvent(mockServer.baseUrl, { - payload: { event_type: EventType.ExtractionExternalSyncUnitsStart }, - }), + event, initialState: {}, workerPath, }); + // Assert expect(worker).toBeInstanceOf(Worker); await worker.terminate(); } }); it('[edge] should handle worker creation with complex initial state', async () => { + // Arrange const workerPath = __dirname + '../tests/dummy-worker.ts'; const complexState = { nested: { @@ -73,33 +84,40 @@ describe(createWorker.name, () => { config: { enabled: true }, }, }; + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: EventType.ExtractionDataStart }, + }); if (isMainThread) { + // Act const worker = await createWorker({ - event: createMockEvent(mockServer.baseUrl, { - payload: { event_type: EventType.ExtractionDataStart }, - }), + event, initialState: complexState, workerPath, }); + // Assert expect(worker).toBeInstanceOf(Worker); await worker.terminate(); } }); it('[edge] should handle different event types', async () => { + // Arrange const workerPath = __dirname + '../tests/dummy-worker.ts'; + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: EventType.ExtractionMetadataStart }, + }); if (isMainThread) { + // Act const worker = await createWorker({ - event: createMockEvent(mockServer.baseUrl, { - payload: { event_type: EventType.ExtractionMetadataStart }, - }), + event, initialState: {}, workerPath, }); + // Assert expect(worker).toBeInstanceOf(Worker); await worker.terminate(); } diff --git a/src/multithreading/process-task.test.ts b/src/multithreading/process-task.test.ts new file mode 100644 index 0000000..18d7e27 --- /dev/null +++ b/src/multithreading/process-task.test.ts @@ -0,0 +1,173 @@ +import { EventType } from '../types/extraction'; +import { WorkerMessageSubject } from '../types/workers'; + +// These tests cover logic that is NOT exercised by the end-to-end integration +// tests under src/tests/timeout-handling/: +// - translation of legacy wire event types into the new enum (mutates event in place) +// - the hasWorkerEmitted guard that prevents onTimeout from firing after a +// successful emit (integration tests only exercise the positive case) +// - the error branch that posts WorkerMessageFailed and exits(1) +// +// Tests for the happy path, timeout-signal-reaches-worker behavior, and main-thread +// early return were removed — they either duplicate what the integration suite +// already exercises or assert mocked behavior with little signal. + +const mockParentPortPostMessage = jest.fn(); +const mockParentPortOn = jest.fn(); + +let mockIsMainThread = false; + +jest.mock('node:worker_threads', () => ({ + get isMainThread() { + return mockIsMainThread; + }, + get parentPort() { + return { + postMessage: mockParentPortPostMessage, + on: mockParentPortOn, + }; + }, + get workerData() { + return (global as Record).__workerData__ ?? {}; + }, +})); + +jest.mock('../common/event-type-translation', () => ({ + translateIncomingEventType: jest.fn((t: string) => t), +})); + +jest.mock('../logger/logger', () => ({ + Logger: jest.fn().mockImplementation(() => ({ + log: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + info: jest.fn(), + logFn: jest.fn(), + })), + serializeError: jest.fn((e: unknown) => String(e)), +})); + +jest.mock('../logger/logger.context', () => ({ + runWithSdkLogContext: jest.fn((fn: () => unknown) => fn()), + runWithUserLogContext: jest.fn((fn: () => unknown) => fn()), +})); + +jest.mock('../state/state', () => ({ + createAdapterState: jest.fn(), +})); + +jest.mock('./worker-adapter/worker-adapter', () => ({ + WorkerAdapter: jest.fn().mockImplementation(() => ({ + isTimeout: false, + hasWorkerEmitted: false, + })), +})); + +import { processTask } from './process-task'; +import { translateIncomingEventType } from '../common/event-type-translation'; +import { createAdapterState } from '../state/state'; +import { WorkerAdapter } from './worker-adapter/worker-adapter'; +import { createMockEvent } from '../common/test-utils'; + +function setWorkerData(data: Record) { + (global as Record).__workerData__ = data; +} + +function makeEvent(eventType = EventType.StartExtractingData) { + return createMockEvent('http://localhost:0', { + payload: { event_type: eventType }, + }); +} + +// Flush the microtask queue enough to let the async IIFE inside processTask run. +const flush = async () => new Promise((r) => setTimeout(r, 0)); + +describe(processTask.name, () => { + let processExitSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + mockIsMainThread = false; + + processExitSpy = jest + .spyOn(process, 'exit') + .mockImplementation((() => {}) as () => never); + + (createAdapterState as jest.Mock).mockResolvedValue({}); + }); + + afterEach(() => { + processExitSpy.mockRestore(); + }); + + it('should translate incoming event type before passing to task', async () => { + // Arrange + const event = makeEvent(EventType.StartExtractingData); + setWorkerData({ event, initialState: {}, options: {} }); + (translateIncomingEventType as jest.Mock).mockReturnValue( + EventType.StartExtractingMetadata + ); + const task = jest.fn().mockResolvedValue(undefined); + const onTimeout = jest.fn().mockResolvedValue(undefined); + + // Act + processTask({ task, onTimeout }); + await flush(); + + // Assert + expect(translateIncomingEventType).toHaveBeenCalledWith( + EventType.StartExtractingData + ); + // The event is mutated in place — downstream code (including task) sees the + // translated type, not the original wire type. + expect(event.payload.event_type).toBe(EventType.StartExtractingMetadata); + }); + + it('should NOT call onTimeout when the worker already emitted before timeout check', async () => { + // Arrange + const event = makeEvent(); + setWorkerData({ event, initialState: {}, options: {} }); + // Both flags true: a timeout arrived but the worker had already emitted — + // onTimeout must be skipped. This is the guard the integration suite cannot + // target cleanly because it requires a precise race between emit and timeout. + const mockAdapter = { isTimeout: true, hasWorkerEmitted: true }; + (WorkerAdapter as jest.Mock).mockImplementation(() => mockAdapter); + const task = jest.fn().mockResolvedValue(undefined); + const onTimeout = jest.fn().mockResolvedValue(undefined); + + // Act + processTask({ task, onTimeout }); + await flush(); + + // Assert + expect(onTimeout).not.toHaveBeenCalled(); + expect(processExitSpy).toHaveBeenCalledWith(0); + }); + + it('should post WorkerMessageFailed with the error message and exit(1) when task throws', async () => { + // Arrange + const event = makeEvent(); + setWorkerData({ event, initialState: {}, options: {} }); + const mockAdapter = { isTimeout: false, hasWorkerEmitted: false }; + (WorkerAdapter as jest.Mock).mockImplementation(() => mockAdapter); + const taskError = new Error('task boom'); + const task = jest.fn().mockRejectedValue(taskError); + const onTimeout = jest.fn().mockResolvedValue(undefined); + + // Act + processTask({ task, onTimeout }); + await flush(); + + // Assert + expect(mockParentPortPostMessage).toHaveBeenCalledWith( + expect.objectContaining({ + subject: WorkerMessageSubject.WorkerMessageFailed, + payload: expect.objectContaining({ + message: expect.stringContaining('task boom'), + }), + }) + ); + expect(processExitSpy).toHaveBeenCalledWith(1); + expect(onTimeout).not.toHaveBeenCalled(); + }); +}); diff --git a/src/multithreading/spawn/spawn.test.ts b/src/multithreading/spawn/spawn.test.ts new file mode 100644 index 0000000..fd56094 --- /dev/null +++ b/src/multithreading/spawn/spawn.test.ts @@ -0,0 +1,250 @@ +import { EventEmitter } from 'events'; +import { EventType } from '../../types/extraction'; +import { WorkerEvent, WorkerMessageSubject } from '../../types/workers'; +import { createMockEvent } from '../../common/test-utils'; + +// --------------------------------------------------------------------------- +// Mocks +// --------------------------------------------------------------------------- + +jest.mock('../create-worker', () => ({ + createWorker: jest.fn(), +})); + +jest.mock('../../common/control-protocol', () => ({ + emit: jest.fn().mockResolvedValue({}), +})); + +jest.mock('../../logger/logger', () => ({ + Logger: jest.fn().mockImplementation(() => ({ + log: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + info: jest.fn(), + logFn: jest.fn(), + })), + serializeError: jest.fn((e: unknown) => String(e)), +})); + +jest.mock('../../common/helpers', () => ({ + getLibraryVersion: jest.fn().mockReturnValue('1.0.0-test'), + getMemoryUsage: jest.fn().mockReturnValue({ + formattedMessage: 'Memory: RSS 100/512MB (19.53%) [...]', + rssUsedMB: '100.00', + rssUsedPercent: '19.53%', + heapUsedPercent: '30.00%', + externalMB: '10.00', + arrayBuffersMB: '5.00', + }), + sleep: jest.fn(), + truncateFilename: jest.fn((f: string) => f), + truncateMessage: jest.fn((m: string) => m), +})); + +// --------------------------------------------------------------------------- +// Imports after mocks +// --------------------------------------------------------------------------- +import { spawn, Spawn } from './spawn'; +import { createWorker } from '../create-worker'; +import { emit } from '../../common/control-protocol'; +import { getMemoryUsage } from '../../common/helpers'; + +// --------------------------------------------------------------------------- +// Factory for a fake worker (EventEmitter with postMessage + terminate) +// --------------------------------------------------------------------------- +function makeWorker() { + const w = new EventEmitter() as EventEmitter & { + postMessage: jest.Mock; + terminate: jest.Mock; + }; + w.postMessage = jest.fn(); + w.terminate = jest.fn().mockResolvedValue(0); + return w; +} + +// --------------------------------------------------------------------------- +// Helper: instantiate Spawn directly, injecting a mock logger via console swap +// --------------------------------------------------------------------------- +function buildSpawn(overrides: { + worker: ReturnType; + options?: Record; + resolve?: () => void; +}): Spawn { + const event = createMockEvent('http://localhost:0', { + payload: { event_type: EventType.StartExtractingData }, + }); + const mockLogger = { + log: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + info: jest.fn(), + logFn: jest.fn(), + }; + const originalConsole = console; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (global as any).console = mockLogger; + const s = new Spawn({ + event, + worker: overrides.worker as never, + options: overrides.options as never, + resolve: overrides.resolve ?? jest.fn(), + originalConsole: originalConsole as Console, + }); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (global as any).console = originalConsole; + return s; +} + +// --------------------------------------------------------------------------- +// spawn() factory tests +// --------------------------------------------------------------------------- + +describe('spawn() factory', () => { + beforeEach(() => { + jest.clearAllMocks(); + jest.useFakeTimers({ legacyFakeTimers: true }); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + + it('should emit a no-script event and NOT spawn a worker for an unknown event type', async () => { + const event = createMockEvent('http://localhost:0', { + payload: { event_type: EventType.UnknownEventType }, + }); + + await spawn({ event, initialState: {} }); + + // No worker process should be started + expect(createWorker).not.toHaveBeenCalled(); + // The platform should still receive a terminal event (so the run doesn't hang) + expect(emit).toHaveBeenCalledWith( + expect.objectContaining({ event, eventType: expect.any(String) }) + ); + }); + + it('should reject the returned promise when createWorker throws', async () => { + (createWorker as jest.Mock).mockRejectedValue(new Error('worker boom')); + + const event = createMockEvent('http://localhost:0', { + payload: { event_type: EventType.StartExtractingData }, + }); + + await expect( + spawn({ event, initialState: {}, workerPath: '/fake/path.js' }) + ).rejects.toThrow('worker boom'); + }); +}); + +// --------------------------------------------------------------------------- +// Spawn class — lifecycle tests +// --------------------------------------------------------------------------- + +describe('Spawn class', () => { + let worker: ReturnType; + let resolveMock: jest.Mock; + + beforeEach(() => { + jest.clearAllMocks(); + jest.useFakeTimers({ legacyFakeTimers: true }); + worker = makeWorker(); + resolveMock = jest.fn(); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + + // ------------------------------------------------------------------------- + // WorkerMessageFailed captured and propagated in error emit + // ------------------------------------------------------------------------- + it('should include the WorkerMessageFailed reason in the error event emitted to the platform', async () => { + buildSpawn({ worker, resolve: resolveMock }); + + worker.emit(WorkerEvent.WorkerMessage, { + subject: WorkerMessageSubject.WorkerMessageFailed, + payload: { message: 'connector exploded' }, + }); + worker.emit(WorkerEvent.WorkerExit, 1); + + await Promise.resolve(); + await Promise.resolve(); + + // The platform receives an error event whose message contains the reason + // sent by the worker — this is what operators see in the run log. + expect(emit).toHaveBeenCalledWith( + expect.objectContaining({ + data: expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringContaining('connector exploded'), + }), + }), + }) + ); + expect(resolveMock).toHaveBeenCalled(); + }); + + it('should emit an error event when the worker exits without ever emitting', async () => { + buildSpawn({ worker, resolve: resolveMock }); + + worker.emit(WorkerEvent.WorkerExit, 1); + + await Promise.resolve(); + await Promise.resolve(); + + expect(emit).toHaveBeenCalled(); + expect(resolveMock).toHaveBeenCalled(); + }); + + // Soft-timeout and hard-timeout timer behavior is covered end-to-end by + // src/tests/timeout-handling/ (real workers, real timers). Unit tests with + // fake timers only re-asserted the mocked setTimeout call and gave no signal. + + // ------------------------------------------------------------------------- + // Memory monitoring — error clears the interval + // ------------------------------------------------------------------------- + it('should clear the memory monitoring interval when getMemoryUsage throws to prevent repeated crashes', async () => { + (getMemoryUsage as jest.Mock).mockImplementation(() => { + throw new Error('OOM'); + }); + + const clearIntervalSpy = jest.spyOn(global, 'clearInterval'); + + buildSpawn({ worker, resolve: resolveMock }); + + jest.advanceTimersByTime(30_001); // MEMORY_LOG_INTERVAL = 30 s + await Promise.resolve(); + + expect(clearIntervalSpy).toHaveBeenCalled(); + }); + + // ------------------------------------------------------------------------- + // Soft-timeout race: worker emits AFTER softTimeoutSent — no double-emit + // ------------------------------------------------------------------------- + it('should NOT emit an error when the worker successfully emits just after receiving the soft-timeout signal', async () => { + buildSpawn({ worker, resolve: resolveMock }); + + // Trigger soft timeout — sends WorkerMessageExit to the worker + jest.advanceTimersByTime(600_001); + await Promise.resolve(); + + // Worker responds: emits its event successfully, then exits normally + worker.emit(WorkerEvent.WorkerMessage, { + subject: WorkerMessageSubject.WorkerMessageEmitted, + }); + worker.emit(WorkerEvent.WorkerExit, 0); + + // The exit handler defers via setImmediate when softTimeoutSent=true + jest.runAllImmediates(); + await Promise.resolve(); + await Promise.resolve(); + + // No error should reach the platform — the worker completed its job + const errorEmits = (emit as jest.Mock).mock.calls.filter( + (call) => call[0]?.data?.error + ); + expect(errorEmits).toHaveLength(0); + expect(resolveMock).toHaveBeenCalled(); + }); +}); diff --git a/src/multithreading/worker-adapter/worker-adapter.emit.test.ts b/src/multithreading/worker-adapter/worker-adapter.emit.test.ts new file mode 100644 index 0000000..3a4118d --- /dev/null +++ b/src/multithreading/worker-adapter/worker-adapter.emit.test.ts @@ -0,0 +1,602 @@ +import { UNBOUNDED_DATE_TIME_VALUE } from '../../common/constants'; +import { State } from '../../state/state'; +import { mockServer } from '../../tests/jest.setup'; +import { createMockEvent } from '../../common/test-utils'; +import { + AdapterState, + AirdropEvent, + Artifact, + EventType, + ExtractorEventType, + LoaderEventType, +} from '../../types'; +import { ActionType, LoaderReport } from '../../types/loading'; +import { WorkerAdapter } from './worker-adapter'; + +/* eslint-disable @typescript-eslint/no-require-imports */ + +jest.mock('../../common/control-protocol', () => ({ + emit: jest.fn().mockResolvedValue({}), +})); + +jest.mock('../../mappers/mappers'); +jest.mock('../../uploader/uploader'); +jest.mock('../../repo/repo'); +jest.mock('node:worker_threads', () => ({ + parentPort: { postMessage: jest.fn() }, +})); +jest.mock('../../attachments-streaming/attachments-streaming-pool', () => ({ + AttachmentsStreamingPool: jest.fn().mockImplementation(() => ({ + streamAll: jest.fn().mockResolvedValue(undefined), + })), +})); + +interface TestState { + attachments: { completed: boolean }; +} + +function makeAdapter(eventType: EventType = EventType.StartExtractingData): { + adapter: WorkerAdapter; + event: AirdropEvent; + adapterState: State; +} { + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: eventType }, + }); + const initialState: AdapterState = { + attachments: { completed: false }, + lastSyncStarted: '', + lastSuccessfulSyncStarted: '', + snapInVersionId: '', + toDevRev: { + attachmentsMetadata: { + artifactIds: [], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }, + }; + const adapterState = new State({ event, initialState }); + const adapter = new WorkerAdapter({ event, adapterState }); + return { adapter, event, adapterState }; +} + +describe(`${WorkerAdapter.name}.emit`, () => { + let adapter: WorkerAdapter; + let counter: { counter: number }; + let mockPostMessage: jest.Mock; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter()); + + counter = { counter: 0 }; + const workerThreads = require('node:worker_threads'); + mockPostMessage = jest.fn().mockImplementation(() => { + counter.counter += 1; + }); + if (workerThreads.parentPort) { + jest + .spyOn(workerThreads.parentPort, 'postMessage') + .mockImplementation(mockPostMessage); + } else { + workerThreads.parentPort = { postMessage: mockPostMessage }; + } + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('should emit only one event when multiple events of same type are sent', async () => { + // Arrange + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + + // Act + await adapter.emit(ExtractorEventType.MetadataExtractionError, { + reports: [], + processed_files: [], + }); + await adapter.emit(ExtractorEventType.MetadataExtractionError, { + reports: [], + processed_files: [], + }); + + // Assert + expect(counter.counter).toBe(1); + }); + + it('should emit only once even when a different event type follows', async () => { + // Arrange + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + + // Act + await adapter.emit(ExtractorEventType.MetadataExtractionError, { + reports: [], + processed_files: [], + }); + await adapter.emit(ExtractorEventType.MetadataExtractionError, { + reports: [], + processed_files: [], + }); + await adapter.emit(ExtractorEventType.MetadataExtractionError, { + reports: [], + processed_files: [], + }); + + // Assert + expect(counter.counter).toBe(1); + }); + + it('should correctly emit one event even if postState errors', async () => { + // Arrange + adapter['adapterState'].postState = jest + .fn() + .mockRejectedValue(new Error('postState error')); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + + // Act + await adapter.emit(ExtractorEventType.MetadataExtractionError, { + reports: [], + processed_files: [], + }); + + // Assert + expect(counter.counter).toBe(1); + }); + + it('should correctly emit one event even if uploadAllRepos errors', async () => { + // Arrange + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest + .fn() + .mockRejectedValue(new Error('uploadAllRepos error')); + + // Act + await adapter.emit(ExtractorEventType.MetadataExtractionError, { + reports: [], + processed_files: [], + }); + + // Assert + expect(counter.counter).toBe(1); + }); + + it('should include artifacts in data for extraction events', async () => { + // Arrange + const { emit: mockEmit } = require('../../common/control-protocol'); + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + adapter['_artifacts'] = [ + { id: 'art-1', item_count: 10, item_type: 'issues' }, + ] as Artifact[]; + + // Act + await adapter.emit(ExtractorEventType.DataExtractionDone); + + // Assert + expect(mockEmit).toHaveBeenCalledWith( + expect.objectContaining({ + data: expect.objectContaining({ + artifacts: expect.arrayContaining([ + expect.objectContaining({ id: 'art-1' }), + ]), + }), + }) + ); + const callData = mockEmit.mock.calls[0][0].data; + expect(callData).not.toHaveProperty('reports'); + expect(callData).not.toHaveProperty('processed_files'); + }); + + it('should include reports and processed_files in data for loader events', async () => { + // Arrange + const { emit: mockEmit } = require('../../common/control-protocol'); + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + adapter['loaderReports'] = [ + { item_type: 'tasks', [ActionType.CREATED]: 5 }, + ] as LoaderReport[]; + adapter['_processedFiles'] = ['file-1', 'file-2']; + + // Act + await adapter.emit(LoaderEventType.DataLoadingDone); + + // Assert + expect(mockEmit).toHaveBeenCalledWith( + expect.objectContaining({ + data: expect.objectContaining({ + reports: expect.arrayContaining([ + expect.objectContaining({ item_type: 'tasks' }), + ]), + processed_files: ['file-1', 'file-2'], + }), + }) + ); + const callData = mockEmit.mock.calls[0][0].data; + expect(callData).not.toHaveProperty('artifacts'); + }); + + it('should not include artifacts, reports, or processed_files for unknown event types', async () => { + // Arrange + const { emit: mockEmit } = require('../../common/control-protocol'); + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + adapter['_artifacts'] = [ + { id: 'art-1', item_count: 10, item_type: 'issues' }, + ] as Artifact[]; + adapter['loaderReports'] = [ + { item_type: 'tasks', [ActionType.CREATED]: 5 }, + ] as LoaderReport[]; + adapter['_processedFiles'] = ['file-1']; + + // Act + await adapter.emit('SOME_UNKNOWN_EVENT' as ExtractorEventType); + + // Assert + const callData = mockEmit.mock.calls[0][0].data; + expect(callData).not.toHaveProperty('artifacts'); + expect(callData).not.toHaveProperty('reports'); + expect(callData).not.toHaveProperty('processed_files'); + }); + + it('should include artifacts for all ExtractorEventType values', async () => { + // Arrange + const { emit: mockEmit } = require('../../common/control-protocol'); + const extractorEvents = [ + ExtractorEventType.DataExtractionDone, + ExtractorEventType.DataExtractionProgress, + ExtractorEventType.DataExtractionError, + ExtractorEventType.AttachmentExtractionDone, + ExtractorEventType.AttachmentExtractionProgress, + ]; + + for (const eventType of extractorEvents) { + jest.clearAllMocks(); + adapter.hasWorkerEmitted = false; + adapter['adapterState'].postState = jest + .fn() + .mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + + // Act + await adapter.emit(eventType); + + // Assert + const callData = mockEmit.mock.calls[0]?.[0]?.data; + expect(callData).toHaveProperty('artifacts'); + expect(callData).not.toHaveProperty('reports'); + } + }); + + it('should include reports and processed_files for all LoaderEventType values', async () => { + // Arrange + const { emit: mockEmit } = require('../../common/control-protocol'); + const loaderEvents = [ + LoaderEventType.DataLoadingDone, + LoaderEventType.DataLoadingProgress, + LoaderEventType.DataLoadingError, + LoaderEventType.AttachmentLoadingDone, + LoaderEventType.AttachmentLoadingProgress, + ]; + + for (const eventType of loaderEvents) { + jest.clearAllMocks(); + adapter.hasWorkerEmitted = false; + adapter['adapterState'].postState = jest + .fn() + .mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + + // Act + await adapter.emit(eventType); + + // Assert + const callData = mockEmit.mock.calls[0]?.[0]?.data; + expect(callData).toHaveProperty('reports'); + expect(callData).toHaveProperty('processed_files'); + expect(callData).not.toHaveProperty('artifacts'); + } + }); + + it('should truncate a long error message, preserving the original prefix', async () => { + // Arrange + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + const longMessage = 'E'.repeat(20_000); + + // Act + await adapter.emit(ExtractorEventType.DataExtractionError, { + error: { message: longMessage }, + }); + + // Assert + const { emit: mockEmit } = require('../../common/control-protocol'); + const emittedMessage = mockEmit.mock.calls[0][0].data?.error + ?.message as string; + expect(emittedMessage.length).toBeLessThan(longMessage.length); + expect(emittedMessage.startsWith('E'.repeat(100))).toBe(true); + }); +}); + +describe(`${WorkerAdapter.name}.emit — ExternalSyncUnitExtractionDone legacy path`, () => { + it('should upload ESUs via a repo and strip external_sync_units from the emitted payload', async () => { + // Arrange + const { adapter } = makeAdapter(EventType.StartExtractingExternalSyncUnits); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + const pushMock = jest.fn().mockResolvedValue(undefined); + jest.spyOn(adapter, 'initializeRepos'); + jest.spyOn(adapter, 'getRepo').mockReturnValue({ push: pushMock } as never); + const esus = [{ id: 'esu-1' }, { id: 'esu-2' }] as never; + + // Act + await adapter.emit(ExtractorEventType.ExternalSyncUnitExtractionDone, { + external_sync_units: esus, + }); + + // Assert + expect(pushMock).toHaveBeenCalledWith(esus); + // external_sync_units must NOT appear in the payload sent to the platform + // (it would be too large for SQS — that is the entire reason this path exists). + const { emit: mockEmit } = require('../../common/control-protocol'); + const emittedData = mockEmit.mock.calls[0][0].data as Record< + string, + unknown + >; + expect(emittedData).not.toHaveProperty('external_sync_units'); + }); +}); + +describe('WorkerAdapter — workersOldest / workersNewest boundary updates', () => { + let adapter: WorkerAdapter; + let mockPostMessage: jest.Mock; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter()); + + const workerThreads = require('node:worker_threads'); + mockPostMessage = jest.fn(); + if (workerThreads.parentPort) { + jest + .spyOn(workerThreads.parentPort, 'postMessage') + .mockImplementation(mockPostMessage); + } else { + workerThreads.parentPort = { postMessage: mockPostMessage }; + } + + adapter['adapterState'].postState = jest.fn().mockResolvedValue(undefined); + adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + async function emitDone( + adapterInstance: WorkerAdapter, + extractionStart: string | undefined, + extractionEnd: string | undefined + ) { + adapterInstance.event.payload.event_context.extract_from = extractionStart; + adapterInstance.event.payload.event_context.extract_to = extractionEnd; + // Reset the emit guard so we can emit multiple times within one test. + adapterInstance['hasWorkerEmitted'] = false; + + await adapterInstance.emit(ExtractorEventType.AttachmentExtractionDone, { + reports: [], + processed_files: [], + }); + } + + describe('initial import with UNBOUNDED start', () => { + it('should set workersOldest to UNBOUNDED_DATE_TIME_VALUE and workersNewest to extraction end', async () => { + await emitDone( + adapter, + UNBOUNDED_DATE_TIME_VALUE, + '2025-06-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); + expect(adapter.state.workersNewest).toBe('2025-06-01T00:00:00.000Z'); + }); + }); + + describe('reconciliation after UNBOUNDED initial import', () => { + it('should NOT overwrite workersOldest when reconciliation start is later than sentinel', async () => { + await emitDone( + adapter, + UNBOUNDED_DATE_TIME_VALUE, + '2025-06-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '2025-01-01T00:00:00.000Z', + '2025-03-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); + expect(adapter.state.workersNewest).toBe('2025-06-01T00:00:00.000Z'); + }); + + it('should NOT overwrite workersOldest even when reconciliation start is very early', async () => { + await emitDone( + adapter, + UNBOUNDED_DATE_TIME_VALUE, + '2025-06-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '1980-01-01T00:00:00.000Z', + '1990-01-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); + expect(adapter.state.workersNewest).toBe('2025-06-01T00:00:00.000Z'); + }); + }); + + describe('forward sync after UNBOUNDED initial import', () => { + it('should expand workersNewest forward while preserving workersOldest', async () => { + await emitDone( + adapter, + UNBOUNDED_DATE_TIME_VALUE, + '2025-06-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '2025-06-01T00:00:00.000Z', + '2025-07-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); + expect(adapter.state.workersNewest).toBe('2025-07-01T00:00:00.000Z'); + }); + }); + + describe('reconciliation with end beyond current newest', () => { + it('should expand workersNewest when reconciliation end is later', async () => { + await emitDone( + adapter, + UNBOUNDED_DATE_TIME_VALUE, + '2025-06-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '2024-01-01T00:00:00.000Z', + '2025-08-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); + expect(adapter.state.workersNewest).toBe('2025-08-01T00:00:00.000Z'); + }); + }); + + describe('first sync with absolute dates (no UNBOUNDED)', () => { + it('should set both boundaries from the extraction range', async () => { + await emitDone( + adapter, + '2025-01-01T00:00:00.000Z', + '2025-03-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); + expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); + }); + }); + + describe('reconciliation after absolute initial sync', () => { + it('should expand workersOldest backward when reconciliation start is earlier', async () => { + await emitDone( + adapter, + '2025-01-01T00:00:00.000Z', + '2025-03-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '2024-06-01T00:00:00.000Z', + '2025-02-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe('2024-06-01T00:00:00.000Z'); + expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); + }); + + it('should NOT change boundaries when reconciliation is within existing range', async () => { + await emitDone( + adapter, + '2025-01-01T00:00:00.000Z', + '2025-03-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '2025-01-15T00:00:00.000Z', + '2025-02-15T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); + expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); + }); + + it('should expand both boundaries when reconciliation exceeds both', async () => { + await emitDone( + adapter, + '2025-01-01T00:00:00.000Z', + '2025-03-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '2024-06-01T00:00:00.000Z', + '2025-09-01T00:00:00.000Z' + ); + + expect(adapter.state.workersOldest).toBe('2024-06-01T00:00:00.000Z'); + expect(adapter.state.workersNewest).toBe('2025-09-01T00:00:00.000Z'); + }); + }); + + describe('multiple forward syncs', () => { + it('should progressively expand workersNewest while preserving workersOldest', async () => { + await emitDone( + adapter, + UNBOUNDED_DATE_TIME_VALUE, + '2025-06-01T00:00:00.000Z' + ); + + await emitDone( + adapter, + '2025-06-01T00:00:00.000Z', + '2025-07-01T00:00:00.000Z' + ); + expect(adapter.state.workersNewest).toBe('2025-07-01T00:00:00.000Z'); + + await emitDone( + adapter, + '2025-07-01T00:00:00.000Z', + '2025-08-01T00:00:00.000Z' + ); + expect(adapter.state.workersNewest).toBe('2025-08-01T00:00:00.000Z'); + + expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); + }); + }); + + describe('non-AttachmentExtractionDone events should NOT update boundaries', () => { + it.each([ + ['DataExtractionDone', ExtractorEventType.DataExtractionDone], + ['DataExtractionProgress', ExtractorEventType.DataExtractionProgress], + ['MetadataExtractionError', ExtractorEventType.MetadataExtractionError], + [ + 'AttachmentExtractionError', + ExtractorEventType.AttachmentExtractionError, + ], + ])('should not update boundaries on %s', async (_label, eventType) => { + adapter.state.workersOldest = '2025-01-01T00:00:00.000Z'; + adapter.state.workersNewest = '2025-03-01T00:00:00.000Z'; + adapter.event.payload.event_context.extract_from = + '2024-01-01T00:00:00.000Z'; + adapter.event.payload.event_context.extract_to = + '2025-12-01T00:00:00.000Z'; + + await adapter.emit(eventType, { + reports: [], + processed_files: [], + }); + + expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); + expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); + }); + }); +}); diff --git a/src/multithreading/worker-adapter/worker-adapter.extraction.test.ts b/src/multithreading/worker-adapter/worker-adapter.extraction.test.ts new file mode 100644 index 0000000..a88f8d7 --- /dev/null +++ b/src/multithreading/worker-adapter/worker-adapter.extraction.test.ts @@ -0,0 +1,900 @@ +import { AttachmentsStreamingPool } from '../../attachments-streaming/attachments-streaming-pool'; +import { State } from '../../state/state'; +import { mockServer } from '../../tests/jest.setup'; +import { createMockEvent } from '../../common/test-utils'; +import { + AdapterState, + AirdropEvent, + Artifact, + EventType, + ExtractorEventType, +} from '../../types'; +import { WorkerAdapter } from './worker-adapter'; + +/* eslint-disable @typescript-eslint/no-require-imports */ + +jest.mock('../../common/control-protocol', () => ({ + emit: jest.fn().mockResolvedValue({}), +})); + +jest.mock('../../mappers/mappers'); +jest.mock('../../uploader/uploader'); +jest.mock('../../repo/repo'); +jest.mock('node:worker_threads', () => ({ + parentPort: { postMessage: jest.fn() }, +})); +jest.mock('../../attachments-streaming/attachments-streaming-pool', () => ({ + AttachmentsStreamingPool: jest.fn().mockImplementation(() => ({ + streamAll: jest.fn().mockResolvedValue(undefined), + })), +})); + +interface TestState { + attachments: { completed: boolean }; +} + +function makeAdapter(eventType: EventType = EventType.StartExtractingData): { + adapter: WorkerAdapter; + event: AirdropEvent; + adapterState: State; +} { + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: eventType }, + }); + const initialState: AdapterState = { + attachments: { completed: false }, + lastSyncStarted: '', + lastSuccessfulSyncStarted: '', + snapInVersionId: '', + toDevRev: { + attachmentsMetadata: { + artifactIds: [], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }, + }; + const adapterState = new State({ event, initialState }); + const adapter = new WorkerAdapter({ event, adapterState }); + return { adapter, event, adapterState }; +} + +describe(`${WorkerAdapter.name}.streamAttachments`, () => { + let adapter: WorkerAdapter; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter()); + }); + + it('should process all artifact batches successfully', async () => { + const mockStream = jest.fn(); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1', 'artifact2'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValueOnce({ + attachments: [ + { + url: 'http://example.com/file1.pdf', + id: 'attachment1', + file_name: 'file1.pdf', + parent_id: 'parent1', + }, + { + url: 'http://example.com/file2.pdf', + id: 'attachment2', + file_name: 'file2.pdf', + parent_id: 'parent2', + }, + ], + }) + .mockResolvedValueOnce({ + attachments: [ + { + url: 'http://example.com/file3.pdf', + id: 'attachment3', + file_name: 'file3.pdf', + parent_id: 'parent3', + }, + ], + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(adapter.initializeRepos).toHaveBeenCalledWith([ + { itemType: 'ssor_attachment' }, + ]); + expect(adapter.initializeRepos).toHaveBeenCalledTimes(1); + expect( + adapter['uploader'].getAttachmentsFromArtifactId + ).toHaveBeenCalledTimes(2); + + expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([]); + expect(adapter.state.toDevRev.attachmentsMetadata.lastProcessed).toBe(0); + expect(result).toBeUndefined(); + }); + + it('[edge] should handle invalid batch size by using 1 instead', async () => { + const mockStream = jest.fn(); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + attachments: [ + { + url: 'http://example.com/file1.pdf', + id: 'attachment1', + file_name: 'file1.pdf', + parent_id: 'parent1', + }, + ], + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + batchSize: 0, + }); + + expect(result).toBeUndefined(); + }); + + it('[edge] should cap batch size to 50 when batchSize is greater than 50', async () => { + const mockStream = jest.fn(); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + attachments: [ + { + url: 'http://example.com/file1.pdf', + id: 'attachment1', + file_name: 'file1.pdf', + parent_id: 'parent1', + }, + ], + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + batchSize: 100, + }); + + expect(result).toBeUndefined(); + }); + + it('[edge] should handle empty attachments metadata artifact IDs', async () => { + const mockStream = jest.fn(); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: [], + lastProcessed: 0, + }, + }; + + const result = await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(result).toBeUndefined(); + }); + + it('[edge] should handle errors when getting attachments', async () => { + const mockStream = jest.fn(); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + const mockError = new Error('Failed to get attachments'); + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + error: mockError, + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(result).toEqual({ + error: mockError, + }); + }); + + it('[edge] should handle empty attachments array from artifact', async () => { + const mockStream = jest.fn(); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + attachments: [], + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([]); + expect(result).toBeUndefined(); + }); + + it('should use custom processors when provided', async () => { + const mockStream = jest.fn(); + const mockReducer = jest.fn().mockReturnValue(['custom-reduced']); + const mockIterator = jest.fn().mockResolvedValue({}); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + attachments: [{ id: 'attachment1' }], + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + processors: { + reducer: mockReducer, + iterator: mockIterator, + }, + }); + + expect(mockReducer).toHaveBeenCalledWith({ + attachments: [{ id: 'attachment1' }], + adapter: adapter, + batchSize: 1, + }); + expect(mockIterator).toHaveBeenCalledWith({ + reducedAttachments: ['custom-reduced'], + adapter: adapter, + stream: mockStream, + }); + expect(result).toBeUndefined(); + }); + + it('should handle rate limiting from iterator', async () => { + const mockStream = jest.fn(); + + (AttachmentsStreamingPool as jest.Mock).mockImplementationOnce(() => ({ + streamAll: jest.fn().mockResolvedValue({ delay: 30 }), + })); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + attachments: [{ id: 'attachment1' }], + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(result).toEqual({ delay: 30 }); + expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([ + 'artifact1', + ]); + }); + + it('should handle error from iterator', async () => { + const mockStream = jest.fn(); + + (AttachmentsStreamingPool as jest.Mock).mockImplementationOnce(() => ({ + streamAll: jest.fn().mockResolvedValue({ + error: 'Mock error', + }), + })); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + attachments: [{ id: 'attachment1' }], + }); + + adapter.initializeRepos = jest.fn(); + + const result = await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(result).toEqual({ error: 'Mock error' }); + expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([ + 'artifact1', + ]); + }); + + it('should emit progress event and exit process on timeout, preserving state for resumption', async () => { + const mockStream = jest.fn(); + + const exitSpy = jest + .spyOn(process, 'exit') + .mockImplementation(() => undefined as never); + + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1', 'artifact2', 'artifact3'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValue({ + attachments: [ + { + url: 'http://example.com/file1.pdf', + id: 'attachment1', + file_name: 'file1.pdf', + parent_id: 'parent1', + }, + ], + }); + + (AttachmentsStreamingPool as jest.Mock).mockImplementationOnce(() => ({ + streamAll: jest.fn().mockImplementation(() => { + adapter.isTimeout = true; + return {}; + }), + })); + + adapter.initializeRepos = jest.fn(); + + const emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); + + await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(emitSpy).toHaveBeenCalledWith( + ExtractorEventType.AttachmentExtractionProgress + ); + expect(exitSpy).toHaveBeenCalledWith(0); + expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([ + 'artifact1', + 'artifact2', + 'artifact3', + ]); + expect( + adapter['uploader'].getAttachmentsFromArtifactId + ).toHaveBeenCalledTimes(1); + + exitSpy.mockRestore(); + }); + + it('should reset lastProcessed and attachment IDs list after processing all artifacts', async () => { + const mockStream = jest.fn(); + adapter.state.toDevRev = { + attachmentsMetadata: { + artifactIds: ['artifact1'], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }; + adapter['uploader'].getAttachmentsFromArtifactId = jest + .fn() + .mockResolvedValueOnce({ + attachments: [ + { + url: 'http://example.com/file1.pdf', + id: 'attachment1', + file_name: 'file1.pdf', + parent_id: 'parent1', + }, + { + url: 'http://example.com/file2.pdf', + id: 'attachment2', + file_name: 'file2.pdf', + parent_id: 'parent2', + }, + { + url: 'http://example.com/file3.pdf', + id: 'attachment3', + file_name: 'file3.pdf', + parent_id: 'parent3', + }, + ], + }); + + adapter.processAttachment = jest.fn().mockResolvedValue(null); + + await adapter.streamAttachments({ + stream: mockStream, + }); + + expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toHaveLength( + 0 + ); + expect(adapter.state.toDevRev.attachmentsMetadata.lastProcessed).toBe(0); + }); +}); + +describe(`${WorkerAdapter.name}.processAttachment`, () => { + let adapter: WorkerAdapter; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter(EventType.StartExtractingAttachments)); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + const createMockHttpStream = (headers: Record = {}) => ({ + headers, + data: { destroy: jest.fn() }, + }); + + const makeAttachment = (overrides = {}) => ({ + id: 'att-1', + url: 'https://example.com/file.pdf', + file_name: 'file.pdf', + parent_id: 'parent-1', + content_type: 'application/pdf', + ...overrides, + }); + + function setupUploaderHappyPath() { + adapter['uploader'].getArtifactUploadUrl = jest.fn().mockResolvedValue({ + response: { + artifact_id: 'art_1', + upload_url: 'https://upload', + form_data: [], + }, + }); + adapter['uploader'].streamArtifact = jest + .fn() + .mockResolvedValue({ response: {} }); + adapter['uploader'].confirmArtifactUpload = jest + .fn() + .mockResolvedValue({ response: {} }); + + const pushMock = jest.fn().mockResolvedValue(undefined); + adapter.getRepo = jest.fn().mockReturnValue({ push: pushMock }); + return pushMock; + } + + // ---- content-type resolution (existing tests) ---- + it('should use attachment.content_type when provided, ignoring HTTP header', async () => { + setupUploaderHappyPath(); + const mockStream = jest.fn().mockResolvedValue({ + httpStream: createMockHttpStream({ + 'content-type': 'text/plain', + 'content-length': '100', + }), + }); + + await adapter.processAttachment( + makeAttachment({ content_type: 'application/pdf' }) as never, + mockStream + ); + + expect(adapter['uploader'].getArtifactUploadUrl).toHaveBeenCalledWith( + 'file.pdf', + 'application/pdf', + 100 + ); + }); + + it('should use HTTP header content-type when attachment.content_type is not set', async () => { + setupUploaderHappyPath(); + const mockStream = jest.fn().mockResolvedValue({ + httpStream: createMockHttpStream({ + 'content-type': 'image/jpeg', + 'content-length': '200', + }), + }); + + const attachment = { + id: 'att-2', + url: 'https://example.com/photo.jpg', + file_name: 'photo.jpg', + parent_id: 'parent-2', + }; + + await adapter.processAttachment(attachment as never, mockStream); + + expect(adapter['uploader'].getArtifactUploadUrl).toHaveBeenCalledWith( + 'photo.jpg', + 'image/jpeg', + 200 + ); + }); + + it('should fall back to application/octet-stream when neither content_type nor HTTP header is set', async () => { + setupUploaderHappyPath(); + const mockStream = jest.fn().mockResolvedValue({ + httpStream: createMockHttpStream({}), + }); + + const attachment = { + id: 'att-3', + url: 'https://example.com/file.bin', + file_name: 'file.bin', + parent_id: 'parent-3', + }; + + await adapter.processAttachment(attachment as never, mockStream); + + expect(adapter['uploader'].getArtifactUploadUrl).toHaveBeenCalledWith( + 'file.bin', + 'application/octet-stream', + undefined + ); + }); + + // ---- error paths (ported from coverage file) ---- + it('should return the stream error directly when the stream function returns an error', async () => { + const stream = jest + .fn() + .mockResolvedValue({ error: new Error('stream failed') }); + const result = await adapter.processAttachment( + makeAttachment() as never, + stream + ); + expect(result?.error).toBeDefined(); + }); + + it('should propagate a rate-limit delay from the stream function', async () => { + const stream = jest.fn().mockResolvedValue({ delay: 5 }); + const result = await adapter.processAttachment( + makeAttachment() as never, + stream + ); + expect(result?.delay).toBe(5); + }); + + it('should return an error containing the attachment ID when getArtifactUploadUrl fails', async () => { + const stream = jest + .fn() + .mockResolvedValue({ httpStream: createMockHttpStream() }); + adapter['uploader'].getArtifactUploadUrl = jest + .fn() + .mockResolvedValue({ error: new Error('upload url failed') }); + + const result = await adapter.processAttachment( + makeAttachment() as never, + stream + ); + expect(result?.error?.message).toContain('att-1'); + expect(result?.error?.message).toContain('preparing artifact'); + }); + + it('should return an error when streamArtifact fails', async () => { + const stream = jest + .fn() + .mockResolvedValue({ httpStream: createMockHttpStream() }); + adapter['uploader'].getArtifactUploadUrl = jest.fn().mockResolvedValue({ + response: { + artifact_id: 'art-1', + upload_url: 'https://upload', + form_data: [], + }, + }); + adapter['uploader'].streamArtifact = jest + .fn() + .mockResolvedValue({ error: new Error('stream failed') }); + + const result = await adapter.processAttachment( + makeAttachment() as never, + stream + ); + expect(result?.error?.message).toContain('streaming to artifact'); + }); + + it('should return an error when confirmArtifactUpload fails', async () => { + const stream = jest + .fn() + .mockResolvedValue({ httpStream: createMockHttpStream() }); + adapter['uploader'].getArtifactUploadUrl = jest.fn().mockResolvedValue({ + response: { + artifact_id: 'art-1', + upload_url: 'https://upload', + form_data: [], + }, + }); + adapter['uploader'].streamArtifact = jest + .fn() + .mockResolvedValue({ response: {} }); + adapter['uploader'].confirmArtifactUpload = jest + .fn() + .mockResolvedValue({ error: new Error('confirm failed') }); + + const result = await adapter.processAttachment( + makeAttachment() as never, + stream + ); + expect(result?.error?.message).toContain('confirming upload'); + }); + + it.each([ + { inline: true, expected: true }, + { inline: false, expected: false }, + ])( + 'should set inline=$expected on the ssorAttachment when attachment.inline=$inline', + async ({ inline, expected }) => { + const pushMock = setupUploaderHappyPath(); + const stream = jest + .fn() + .mockResolvedValue({ httpStream: createMockHttpStream() }); + + await adapter.processAttachment( + makeAttachment({ inline }) as never, + stream + ); + + const ssorItem = pushMock.mock.calls[0][0][0] as Record; + expect(ssorItem.inline).toBe(expected); + } + ); + + it('should return a descriptive error when the stream function returns no httpStream', async () => { + const stream = jest.fn().mockResolvedValue({ httpStream: null }); + const result = await adapter.processAttachment( + makeAttachment() as never, + stream + ); + expect(result?.error?.message).toContain( + 'Error while opening attachment stream' + ); + }); +}); + +describe(`${WorkerAdapter.name}.initializeRepos — event size threshold`, () => { + it('should set isTimeout=true once the cumulative artifact payload exceeds EVENT_SIZE_THRESHOLD_BYTES', () => { + const { adapter } = makeAdapter(); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + let capturedOnUpload: ((artifact: Artifact) => void) | undefined; + const { Repo } = require('../../repo/repo'); + (Repo as jest.Mock).mockImplementationOnce( + (opts: { onUpload: (a: Artifact) => void }) => { + capturedOnUpload = opts.onUpload; + return { itemType: 'issues', upload: jest.fn(), uploadedArtifacts: [] }; + } + ); + + adapter.initializeRepos([{ itemType: 'issues' }]); + expect(capturedOnUpload).toBeDefined(); + + capturedOnUpload!({ + id: 'artifact-x', + item_count: 1, + item_type: 'x'.repeat(200_000), + }); + + expect(adapter.isTimeout).toBe(true); + }); +}); + +describe(`${WorkerAdapter.name}.getRepo`, () => { + it('should return undefined and log an error when the requested repo was never initialised', () => { + const { adapter } = makeAdapter(); + const consoleSpy = jest + .spyOn(console, 'error') + .mockImplementation(() => {}); + + const result = adapter.getRepo('non-existent-type'); + + expect(result).toBeUndefined(); + expect(consoleSpy).toHaveBeenCalledWith( + expect.stringContaining('non-existent-type') + ); + consoleSpy.mockRestore(); + }); +}); + +describe(`${WorkerAdapter.name}.destroyHttpStream`, () => { + let adapter: WorkerAdapter; + + beforeEach(() => { + ({ adapter } = makeAdapter()); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it.each([ + { + label: 'calls destroy() when available', + data: { destroy: jest.fn(), close: jest.fn() }, + expectDestroy: true, + expectClose: false, + }, + { + label: 'calls close() when destroy is not present', + data: { close: jest.fn() }, + expectDestroy: false, + expectClose: true, + }, + { + label: 'does not throw when neither method is present', + data: {}, + expectDestroy: false, + expectClose: false, + }, + { + label: 'does not throw when data is null', + data: null, + expectDestroy: false, + expectClose: false, + }, + ])('$label', ({ data, expectDestroy, expectClose }) => { + const httpStream = { data } as never; + expect(() => adapter['destroyHttpStream'](httpStream)).not.toThrow(); + + if (expectDestroy) { + expect((data as { destroy: jest.Mock }).destroy).toHaveBeenCalled(); + } + if (expectClose) { + expect((data as { close: jest.Mock }).close).toHaveBeenCalled(); + } + }); + + it('should warn without re-throwing when destroy() itself throws', () => { + const warnSpy = jest.spyOn(console, 'warn').mockImplementation(() => {}); + const httpStream = { + data: { + destroy: () => { + throw new Error('stream error'); + }, + }, + }; + expect(() => + adapter['destroyHttpStream'](httpStream as never) + ).not.toThrow(); + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('Error while destroying HTTP stream'), + expect.any(Error) + ); + }); +}); + +describe(`${WorkerAdapter.name} — extractionScope`, () => { + it('should return empty object by default', () => { + const { adapter } = makeAdapter(); + expect(adapter.extractionScope).toEqual({}); + }); + + it('should return extraction scope from adapter state', () => { + const { adapter, adapterState } = makeAdapter(); + const extractionScope = { + tasks: { extract: true }, + users: { extract: false }, + }; + + ( + adapterState as unknown as { + _extractionScope: Record; + } + )._extractionScope = extractionScope; + + expect(adapter.extractionScope).toEqual(extractionScope); + }); +}); + +describe(`${WorkerAdapter.name} — shouldExtract`, () => { + it('should return true when extraction scope is empty', () => { + const { adapter } = makeAdapter(); + expect(adapter.shouldExtract('tasks')).toBe(true); + expect(adapter.shouldExtract('users')).toBe(true); + }); + + it('should return true when item type is not in scope', () => { + const { adapter, adapterState } = makeAdapter(); + ( + adapterState as unknown as { + _extractionScope: Record; + } + )._extractionScope = { + tasks: { extract: true }, + }; + expect(adapter.shouldExtract('users')).toBe(true); + }); + + it('should return true when item type has extract: true', () => { + const { adapter, adapterState } = makeAdapter(); + ( + adapterState as unknown as { + _extractionScope: Record; + } + )._extractionScope = { + tasks: { extract: true }, + }; + expect(adapter.shouldExtract('tasks')).toBe(true); + }); + + it('should return false when item type has extract: false', () => { + const { adapter, adapterState } = makeAdapter(); + ( + adapterState as unknown as { + _extractionScope: Record; + } + )._extractionScope = { + tasks: { extract: false }, + users: { extract: true }, + }; + expect(adapter.shouldExtract('tasks')).toBe(false); + expect(adapter.shouldExtract('users')).toBe(true); + }); +}); diff --git a/src/multithreading/worker-adapter/worker-adapter.loading.test.ts b/src/multithreading/worker-adapter/worker-adapter.loading.test.ts new file mode 100644 index 0000000..cb99c83 --- /dev/null +++ b/src/multithreading/worker-adapter/worker-adapter.loading.test.ts @@ -0,0 +1,741 @@ +import { State } from '../../state/state'; +import { mockServer } from '../../tests/jest.setup'; +import { createMockEvent } from '../../common/test-utils'; +import { + AdapterState, + AirdropEvent, + EventType, + LoaderEventType, +} from '../../types'; +import { + ActionType, + ExternalSystemAttachment, + ExternalSystemItem, +} from '../../types/loading'; +import { WorkerAdapter } from './worker-adapter'; + +jest.mock('../../common/control-protocol', () => ({ + emit: jest.fn().mockResolvedValue({}), +})); + +jest.mock('../../mappers/mappers'); +jest.mock('../../uploader/uploader'); +jest.mock('../../repo/repo'); +jest.mock('node:worker_threads', () => ({ + parentPort: { postMessage: jest.fn() }, +})); +jest.mock('../../attachments-streaming/attachments-streaming-pool', () => ({ + AttachmentsStreamingPool: jest.fn().mockImplementation(() => ({ + streamAll: jest.fn().mockResolvedValue(undefined), + })), +})); + +interface TestState { + attachments: { completed: boolean }; +} + +function makeAdapter(eventType: EventType): { + adapter: WorkerAdapter; + event: AirdropEvent; + adapterState: State; +} { + const event = createMockEvent(mockServer.baseUrl, { + payload: { event_type: eventType }, + }); + const initialState: AdapterState = { + attachments: { completed: false }, + lastSyncStarted: '', + lastSuccessfulSyncStarted: '', + snapInVersionId: '', + toDevRev: { + attachmentsMetadata: { + artifactIds: [], + lastProcessed: 0, + lastProcessedAttachmentsIdsList: [], + }, + }, + }; + const adapterState = new State({ event, initialState }); + const adapter = new WorkerAdapter({ event, adapterState }); + return { adapter, event, adapterState }; +} + +function makeLoaderItem(devrevId = 'dev-1'): ExternalSystemItem { + return { + id: { devrev: devrevId, external: 'ext-1' }, + created_date: '', + modified_date: '', + data: {}, + }; +} + +function setupLoaderFile( + adapter: WorkerAdapter, + items: ExternalSystemItem[], + itemType = 'tasks' +) { + adapter['adapterState'].state.fromDevRev = { + filesToLoad: [ + { + id: 'artifact-1', + file_name: 'file.json', + itemType, + count: items.length, + lineToProcess: 0, + completed: false, + }, + ], + }; + adapter['uploader'].getJsonObjectByArtifactId = jest + .fn() + .mockResolvedValue({ response: items }); +} + +describe(`${WorkerAdapter.name}.loadItemTypes — timeout and unexpected errors`, () => { + let adapter: WorkerAdapter; + let exitSpy: jest.SpyInstance; + let emitSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter(EventType.ContinueLoadingData)); + exitSpy = jest + .spyOn(process, 'exit') + .mockImplementation(() => undefined as never); + emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); + }); + + afterEach(() => { + exitSpy.mockRestore(); + jest.restoreAllMocks(); + }); + + it('should emit DataLoadingProgress and exit on timeout', async () => { + // Arrange + const items = [makeLoaderItem('dev-1'), makeLoaderItem('dev-2')]; + setupLoaderFile(adapter, items); + adapter.isTimeout = true; + const itemTypesToLoad = [ + { itemType: 'tasks', create: jest.fn(), update: jest.fn() }, + ]; + + // Act + await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith(LoaderEventType.DataLoadingProgress); + expect(exitSpy).toHaveBeenCalledWith(0); + }); + + it('should emit DataLoadingProgress mid-loop when timeout arrives between items', async () => { + // Arrange + const items = [ + makeLoaderItem('dev-1'), + makeLoaderItem('dev-2'), + makeLoaderItem('dev-3'), + ]; + setupLoaderFile(adapter, items); + exitSpy.mockRestore(); + exitSpy = jest.spyOn(process, 'exit').mockImplementation((() => { + throw new Error('process.exit'); + }) as never); + let loadItemCallCount = 0; + // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/require-await + jest.spyOn(adapter as any, 'loadItem').mockImplementation(async () => { + loadItemCallCount++; + if (loadItemCallCount === 1) { + adapter.isTimeout = true; + } + return { report: { item_type: 'tasks', updated: 1 } }; + }); + const itemTypesToLoad = [ + { itemType: 'tasks', create: jest.fn(), update: jest.fn() }, + ]; + + // Act & Assert + await expect(adapter.loadItemTypes({ itemTypesToLoad })).rejects.toThrow( + 'process.exit' + ); + expect(loadItemCallCount).toBe(1); + expect(emitSpy).toHaveBeenCalledWith(LoaderEventType.DataLoadingProgress); + }); + + it('should emit DataLoadingError and exit(1) on unexpected error', async () => { + // Arrange + adapter['adapterState'].state.fromDevRev = { + filesToLoad: [ + { + id: 'artifact-1', + file_name: 'file1.json', + itemType: 'tasks', + count: 1, + lineToProcess: 0, + completed: false, + }, + ], + }; + adapter['uploader'].getJsonObjectByArtifactId = jest + .fn() + .mockRejectedValue(new Error('Unexpected network failure')); + const itemTypesToLoad = [ + { itemType: 'tasks', create: jest.fn(), update: jest.fn() }, + ]; + + // Act + await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith( + LoaderEventType.DataLoadingError, + expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringContaining('Error during data loading'), + }), + }) + ); + expect(exitSpy).toHaveBeenCalledWith(1); + }); +}); + +describe(`${WorkerAdapter.name}.loadItemTypes — loadItem branch coverage via public API`, () => { + let adapter: WorkerAdapter; + let emitSpy: jest.SpyInstance; + let exitSpy: jest.SpyInstance; + + const itemTypesToLoad = [ + { itemType: 'tasks', create: jest.fn(), update: jest.fn() }, + ]; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter(EventType.ContinueLoadingData)); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); + exitSpy = jest + .spyOn(process, 'exit') + .mockImplementation(() => undefined as never); + + itemTypesToLoad[0].create = jest.fn(); + itemTypesToLoad[0].update = jest.fn(); + }); + + afterEach(() => { + exitSpy.mockRestore(); + jest.restoreAllMocks(); + }); + + it('should accumulate an UPDATED report when the connector updates the item and the mapper sync succeeds', async () => { + // Arrange + setupLoaderFile(adapter, [makeLoaderItem('dev-1')]); + adapter['_mappers'].getByTargetId = jest.fn().mockResolvedValue({ + data: { sync_mapper_record: { id: 'smr-1' } }, + }); + adapter['_mappers'].update = jest.fn().mockResolvedValue({ data: {} }); + itemTypesToLoad[0].update = jest + .fn() + .mockResolvedValue({ id: 'ext-updated-1' }); + + // Act + const { reports } = await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(reports).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + item_type: 'tasks', + [ActionType.UPDATED]: 1, + }), + ]) + ); + expect(emitSpy).not.toHaveBeenCalled(); + }); + + it('should fall back to create and accumulate a CREATED report when the mapper record does not exist (404)', async () => { + // Arrange + setupLoaderFile(adapter, [makeLoaderItem('dev-2')]); + const axiosError = { isAxiosError: true, response: { status: 404 } }; + adapter['_mappers'].getByTargetId = jest.fn().mockRejectedValue(axiosError); + adapter['_mappers'].create = jest.fn().mockResolvedValue({ data: {} }); + itemTypesToLoad[0].create = jest + .fn() + .mockResolvedValue({ id: 'new-ext-id' }); + + // Act + const { reports } = await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(reports).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + item_type: 'tasks', + [ActionType.CREATED]: 1, + }), + ]) + ); + expect(emitSpy).not.toHaveBeenCalled(); + }); + + it('should emit DataLoadingDelayed and stop processing when the connector signals a rate-limit delay', async () => { + // Arrange + setupLoaderFile(adapter, [makeLoaderItem('dev-3')]); + adapter['_mappers'].getByTargetId = jest.fn().mockResolvedValue({ + data: { sync_mapper_record: { id: 'smr-1' } }, + }); + itemTypesToLoad[0].update = jest.fn().mockResolvedValue({ delay: 15 }); + + // Act + await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith( + LoaderEventType.DataLoadingDelayed, + expect.objectContaining({ delay: 15 }) + ); + }); + + it('should count the item as FAILED when the update succeeds but the mapper sync throws', async () => { + // Arrange + setupLoaderFile(adapter, [makeLoaderItem('dev-4')]); + adapter['_mappers'].getByTargetId = jest.fn().mockResolvedValue({ + data: { sync_mapper_record: { id: 'smr-1' } }, + }); + adapter['_mappers'].update = jest + .fn() + .mockRejectedValue(new Error('mapper down')); + itemTypesToLoad[0].update = jest.fn().mockResolvedValue({ id: 'ext-id' }); + + // Act + const { reports } = await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(emitSpy).not.toHaveBeenCalled(); + expect(reports).toBeDefined(); + }); + + it('should not emit for a non-404 Axios error from the mapper (recorded as item-level error)', async () => { + // Arrange + setupLoaderFile(adapter, [makeLoaderItem('dev-5')]); + const axiosError = { + isAxiosError: true, + message: 'internal server error', + response: { status: 500 }, + }; + adapter['_mappers'].getByTargetId = jest.fn().mockRejectedValue(axiosError); + + // Act + await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(emitSpy).not.toHaveBeenCalled(); + }); + + it('should handle a null sync_mapper_record gracefully and continue loading', async () => { + // Arrange + setupLoaderFile(adapter, [makeLoaderItem('dev-6')]); + adapter['_mappers'].getByTargetId = jest + .fn() + .mockResolvedValue({ data: null }); + + // Act + const { reports } = await adapter.loadItemTypes({ itemTypesToLoad }); + + // Assert + expect(emitSpy).not.toHaveBeenCalled(); + expect(reports).toBeDefined(); + }); +}); + +describe(`${WorkerAdapter.name}.loadItemTypes — additional branches`, () => { + let adapter: WorkerAdapter; + let emitSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter(EventType.ContinueLoadingData)); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('should return immediately with empty reports when filesToLoad is empty', async () => { + // Arrange + adapter['adapterState'].state.fromDevRev = { filesToLoad: [] }; + + // Act + const result = await adapter.loadItemTypes({ + itemTypesToLoad: [ + { itemType: 'tasks', create: jest.fn(), update: jest.fn() }, + ], + }); + + // Assert + expect(result.reports).toEqual([]); + expect(emitSpy).not.toHaveBeenCalled(); + }); + + it('should emit DataLoadingError when a file references an item type not in itemTypesToLoad', async () => { + // Arrange + adapter['adapterState'].state.fromDevRev = { + filesToLoad: [ + { + id: 'art-1', + file_name: 'file.json', + itemType: 'unknown-type', + count: 1, + lineToProcess: 0, + completed: false, + }, + ], + }; + adapter['uploader'].getJsonObjectByArtifactId = jest + .fn() + .mockResolvedValue({ response: [makeLoaderItem()] }); + + // Act + await adapter.loadItemTypes({ + itemTypesToLoad: [ + { itemType: 'tasks', create: jest.fn(), update: jest.fn() }, + ], + }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith( + LoaderEventType.DataLoadingError, + expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringContaining('unknown-type'), + }), + }) + ); + }); +}); + +describe(`${WorkerAdapter.name}.loadAttachments — timeout, transformer errors, unexpected errors`, () => { + let adapter: WorkerAdapter; + let exitSpy: jest.SpyInstance; + let emitSpy: jest.SpyInstance; + + function setupFilesToLoad( + a: WorkerAdapter, + items: ExternalSystemAttachment[] + ) { + a['adapterState'].state.fromDevRev = { + filesToLoad: [ + { + id: 'artifact-1', + file_name: 'attachments.json', + itemType: 'attachment', + count: items.length, + lineToProcess: 0, + completed: false, + }, + ], + }; + + a['uploader'].getJsonObjectByArtifactId = jest + .fn() + .mockResolvedValue({ response: items }); + } + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter(EventType.ContinueLoadingAttachments)); + exitSpy = jest + .spyOn(process, 'exit') + .mockImplementation(() => undefined as never); + emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); + }); + + afterEach(() => { + exitSpy.mockRestore(); + jest.restoreAllMocks(); + }); + + it('should emit AttachmentLoadingProgress and exit on timeout', async () => { + // Arrange + const items = [ + { + reference_id: 'ref-1', + parent_type: 'task', + parent_reference_id: 'parent-1', + file_name: 'file.pdf', + file_type: 'application/pdf', + file_size: 100, + url: 'https://example.com/file.pdf', + valid_until: '', + created_by_id: 'user-1', + created_date: '', + modified_by_id: 'user-1', + modified_date: '', + }, + ] as ExternalSystemAttachment[]; + setupFilesToLoad(adapter, items); + adapter.isTimeout = true; + + // Act + await adapter.loadAttachments({ + create: jest.fn(), + }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith( + LoaderEventType.AttachmentLoadingProgress + ); + expect(exitSpy).toHaveBeenCalledWith(0); + }); + + it('should emit AttachmentLoadingError on transformer file error', async () => { + // Arrange + adapter['adapterState'].state.fromDevRev = { + filesToLoad: [ + { + id: 'bad-artifact', + file_name: 'attachments.json', + itemType: 'attachment', + count: 1, + lineToProcess: 0, + completed: false, + }, + ], + }; + adapter['uploader'].getJsonObjectByArtifactId = jest + .fn() + .mockResolvedValue({ + response: null, + error: new Error('Artifact not found'), + }); + + // Act + await adapter.loadAttachments({ + create: jest.fn(), + }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith( + LoaderEventType.AttachmentLoadingError, + expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringContaining('Transformer file not found'), + }), + }) + ); + }); + + it('should emit AttachmentLoadingError and exit(1) on unexpected error', async () => { + // Arrange + const items = [ + { + reference_id: 'ref-1', + parent_type: 'task', + parent_reference_id: 'parent-1', + file_name: 'file.pdf', + file_type: 'application/pdf', + file_size: 100, + url: 'https://example.com/file.pdf', + valid_until: '', + created_by_id: 'user-1', + created_date: '', + modified_by_id: 'user-1', + modified_date: '', + }, + ] as ExternalSystemAttachment[]; + setupFilesToLoad(adapter, items); + const mockCreate = jest + .fn() + .mockRejectedValue(new Error('Unexpected API failure')); + + // Act + await adapter.loadAttachments({ create: mockCreate }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith( + LoaderEventType.AttachmentLoadingError, + expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringContaining('Error during attachment loading'), + }), + }) + ); + expect(exitSpy).toHaveBeenCalledWith(1); + }); +}); + +describe(`${WorkerAdapter.name}.loadAttachments — additional branches`, () => { + let adapter: WorkerAdapter; + let emitSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter(EventType.ContinueLoadingAttachments)); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + it('should return immediately with empty reports when fromDevRev is not set', async () => { + // Arrange + adapter['adapterState'].state.fromDevRev = undefined; + + // Act + const result = await adapter.loadAttachments({ create: jest.fn() }); + + // Assert + expect(result.reports).toEqual([]); + expect(emitSpy).not.toHaveBeenCalled(); + }); + + it('should emit AttachmentLoadingDelayed and stop the loop when the connector signals a rate-limit delay', async () => { + // Arrange + adapter['adapterState'].state.fromDevRev = { + filesToLoad: [ + { + id: 'art-1', + file_name: 'attachments.json', + itemType: 'attachment', + count: 1, + lineToProcess: 0, + completed: false, + }, + ], + }; + adapter['uploader'].getJsonObjectByArtifactId = jest + .fn() + .mockResolvedValue({ + response: [ + { + reference_id: 'ref-1', + parent_type: 'task', + parent_reference_id: 'parent-1', + file_name: 'file.pdf', + file_type: 'application/pdf', + file_size: 100, + url: 'https://example.com/file.pdf', + valid_until: '', + created_by_id: 'user-1', + created_date: '', + modified_by_id: 'user-1', + modified_date: '', + }, + ], + }); + jest + // eslint-disable-next-line @typescript-eslint/no-explicit-any + .spyOn(adapter as any, 'loadAttachment') + .mockResolvedValue({ rateLimit: { delay: 20 } } as never); + + // Act + await adapter.loadAttachments({ create: jest.fn() }); + + // Assert + expect(emitSpy).toHaveBeenCalledWith( + LoaderEventType.AttachmentLoadingDelayed, + expect.objectContaining({ delay: 20 }) + ); + }); +}); + +describe(`${WorkerAdapter.name}.loadAttachment`, () => { + let adapter: WorkerAdapter; + + beforeEach(() => { + jest.clearAllMocks(); + ({ adapter } = makeAdapter(EventType.ContinueLoadingAttachments)); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + }); + + afterEach(() => { + jest.restoreAllMocks(); + }); + + function makeAttachment(): ExternalSystemAttachment { + return { + reference_id: 'ref-1', + parent_type: 'task', + parent_reference_id: 'parent-1', + file_name: 'file.pdf', + file_type: 'application/pdf', + file_size: 100, + url: 'https://example.com/file.pdf', + valid_until: '', + created_by_id: 'user-1', + created_date: '', + modified_by_id: 'user-1', + modified_date: '', + } as ExternalSystemAttachment; + } + + it('should return a CREATED report when create succeeds', async () => { + // Arrange + adapter['_mappers'].create = jest.fn().mockResolvedValue({ data: {} }); + const create = jest.fn().mockResolvedValue({ id: 'att-ext-1' }); + + // Act + const result = await adapter['loadAttachment']({ + item: makeAttachment(), + create, + }); + + // Assert + expect(result.report?.item_type).toBe('attachment'); + expect(result.report?.[ActionType.CREATED]).toBe(1); + }); + + it('should still return CREATED even when mapper create fails — attachment loading is resilient', async () => { + // Arrange + adapter['_mappers'].create = jest + .fn() + .mockRejectedValue(new Error('mapper failed')); + const create = jest.fn().mockResolvedValue({ id: 'att-ext-1' }); + + // Act + const result = await adapter['loadAttachment']({ + item: makeAttachment(), + create, + }); + + // Assert + expect(result.report?.[ActionType.CREATED]).toBe(1); + }); + + it('should propagate rate-limit delay when the connector signals one', async () => { + // Arrange + const create = jest.fn().mockResolvedValue({ delay: 30 }); + + // Act + const result = await adapter['loadAttachment']({ + item: makeAttachment(), + create, + }); + + // Assert + expect(result.rateLimit?.delay).toBe(30); + }); + + it('should return a FAILED report when create returns neither id nor delay', async () => { + // Arrange + const create = jest.fn().mockResolvedValue({ id: null, delay: null }); + + // Act + const result = await adapter['loadAttachment']({ + item: makeAttachment(), + create, + }); + + // Assert + expect(result.report?.[ActionType.FAILED]).toBe(1); + }); +}); diff --git a/src/multithreading/worker-adapter/worker-adapter.serialization.test.ts b/src/multithreading/worker-adapter/worker-adapter.serialization.test.ts new file mode 100644 index 0000000..bf488fd --- /dev/null +++ b/src/multithreading/worker-adapter/worker-adapter.serialization.test.ts @@ -0,0 +1,83 @@ +import { jsonl } from 'js-jsonl'; + +// Pin the serialization contract for items that reach the uploader. +// +// The SDK uploads items via Uploader.upload(), which calls jsonl.stringify() on +// the input. That means user `create`/`update` callbacks and normalizers can +// silently produce inputs that fail or lose information at the wire boundary: +// +// - Circular references throw "Converting circular structure to JSON" +// - BigInt values throw "Do not know how to serialize a BigInt" +// - Date objects are converted to ISO strings (information loss: no Date on +// the other side, just a string) +// - undefined fields are dropped (null fields are preserved) +// +// These tests exist to catch regressions in the serialization layer (e.g., +// silently switching to a different serializer that masks BigInt or mangles +// Dates) before they reach production. + +describe('serialization boundary for items uploaded via jsonl', () => { + it('throws when an item contains a circular reference', () => { + // Arrange + const item: Record = { id: 'a' }; + item.self = item; + + // Act & Assert + expect(() => jsonl.stringify([item])).toThrow(/circular/i); + }); + + it('throws when an item contains a BigInt field', () => { + // Arrange + const item = { id: 'a', counter: BigInt(1) }; + + // Act & Assert + expect(() => jsonl.stringify([item])).toThrow(/BigInt/i); + }); + + it('serializes Date instances to ISO strings (information loss — consumer receives a string)', () => { + // Arrange + const item = { + id: 'a', + created: new Date('2025-01-01T00:00:00.000Z'), + }; + + // Act + const output = jsonl.stringify([item]); + const parsed = JSON.parse(output) as Record; + + // Assert + expect(parsed.created).toBe('2025-01-01T00:00:00.000Z'); + expect(typeof parsed.created).toBe('string'); + }); + + it('drops undefined fields but preserves null fields', () => { + // Arrange + const item = { + id: 'a', + present: null, + missing: undefined, + }; + + // Act + const output = jsonl.stringify([item]); + const parsed = JSON.parse(output) as Record; + + // Assert + expect(parsed).toEqual({ id: 'a', present: null }); + expect(parsed).not.toHaveProperty('missing'); + }); + + it('emits one newline-terminated line per item (jsonl format)', () => { + // Arrange + const items = [{ id: 'a' }, { id: 'b' }, { id: 'c' }]; + + // Act + const output = jsonl.stringify(items); + const lines = output.split('\n').filter((l) => l.length > 0); + + // Assert + expect(lines).toHaveLength(3); + expect(JSON.parse(lines[0])).toEqual({ id: 'a' }); + expect(JSON.parse(lines[2])).toEqual({ id: 'c' }); + }); +}); diff --git a/src/multithreading/worker-adapter/worker-adapter.test.ts b/src/multithreading/worker-adapter/worker-adapter.test.ts deleted file mode 100644 index 0148a38..0000000 --- a/src/multithreading/worker-adapter/worker-adapter.test.ts +++ /dev/null @@ -1,1619 +0,0 @@ -import { AttachmentsStreamingPool } from '../../attachments-streaming/attachments-streaming-pool'; -import { UNBOUNDED_DATE_TIME_VALUE } from '../../common/constants'; -import { State } from '../../state/state'; -import { mockServer } from '../../tests/jest.setup'; -import { createMockEvent } from '../../common/test-utils'; -import { - AdapterState, - AirdropEvent, - Artifact, - EventType, - ExtractorEventType, - LoaderEventType, -} from '../../types'; -import { - ActionType, - ExternalSystemAttachment, - ExternalSystemItem, - LoaderReport, -} from '../../types/loading'; -import { WorkerAdapter } from './worker-adapter'; - -/* eslint-disable @typescript-eslint/no-require-imports */ - -// Mock dependencies -jest.mock('../../common/control-protocol', () => ({ - emit: jest.fn().mockResolvedValue({}), -})); - -// const mockPostState = jest.spyOn(State.prototype, 'postState').mockResolvedValue(); // Mock to resolve void -// const mockFetchState = jest.spyOn(State.prototype, 'fetchState').mockResolvedValue({}); // Mock to resolve a default state - -jest.mock('../../mappers/mappers'); -jest.mock('../../uploader/uploader'); -// jest.mock('../../state/state'); -jest.mock('../../repo/repo'); -jest.mock('node:worker_threads', () => ({ - parentPort: { - postMessage: jest.fn(), - }, -})); -jest.mock('../../attachments-streaming/attachments-streaming-pool', () => { - return { - AttachmentsStreamingPool: jest.fn().mockImplementation(() => { - return { - streamAll: jest.fn().mockResolvedValue(undefined), - }; - }), - }; -}); - -describe(WorkerAdapter.name, () => { - interface TestState { - attachments: { completed: boolean }; - } - - let adapter: WorkerAdapter; - let mockEvent: AirdropEvent; - let mockAdapterState: State; - - beforeEach(() => { - // Reset all mocks - jest.clearAllMocks(); - - // Create mock objects - mockEvent = createMockEvent(mockServer.baseUrl, { - payload: { event_type: EventType.StartExtractingData }, - }); - - const initialState: AdapterState = { - attachments: { completed: false }, - lastSyncStarted: '', - lastSuccessfulSyncStarted: '', - snapInVersionId: '', - toDevRev: { - attachmentsMetadata: { - artifactIds: [], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }, - }; - - mockAdapterState = new State({ - event: mockEvent, - initialState: initialState, - }); - - // Create the adapter instance - adapter = new WorkerAdapter({ - event: mockEvent, - adapterState: mockAdapterState, - }); - }); - - describe(WorkerAdapter.prototype.streamAttachments.name, () => { - it('should process all artifact batches successfully', async () => { - const mockStream = jest.fn(); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1', 'artifact2'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting attachments from each artifact - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValueOnce({ - attachments: [ - { - url: 'http://example.com/file1.pdf', - id: 'attachment1', - file_name: 'file1.pdf', - parent_id: 'parent1', - }, - { - url: 'http://example.com/file2.pdf', - id: 'attachment2', - file_name: 'file2.pdf', - parent_id: 'parent2', - }, - ], - }) - .mockResolvedValueOnce({ - attachments: [ - { - url: 'http://example.com/file3.pdf', - id: 'attachment3', - file_name: 'file3.pdf', - parent_id: 'parent3', - }, - ], - }); - - // Mock the initializeRepos method - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - }); - - expect(adapter.initializeRepos).toHaveBeenCalledWith([ - { itemType: 'ssor_attachment' }, - ]); - expect(adapter.initializeRepos).toHaveBeenCalledTimes(1); - expect( - adapter['uploader'].getAttachmentsFromArtifactId - ).toHaveBeenCalledTimes(2); - - // Verify state was updated correctly - expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual( - [] - ); - expect(adapter.state.toDevRev.attachmentsMetadata.lastProcessed).toBe(0); - expect(result).toBeUndefined(); - }); - - it('[edge] should handle invalid batch size by using 1 instead', async () => { - const mockStream = jest.fn(); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting attachments - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - attachments: [ - { - url: 'http://example.com/file1.pdf', - id: 'attachment1', - file_name: 'file1.pdf', - parent_id: 'parent1', - }, - ], - }); - - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - batchSize: 0, - }); - - expect(result).toBeUndefined(); - }); - - it('[edge] should cap batch size to 50 when batchSize is greater than 50', async () => { - const mockStream = jest.fn(); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting attachments - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - attachments: [ - { - url: 'http://example.com/file1.pdf', - id: 'attachment1', - file_name: 'file1.pdf', - parent_id: 'parent1', - }, - ], - }); - - // Mock the required methods - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - batchSize: 100, // Set batch size greater than 50 - }); - - expect(result).toBeUndefined(); - }); - - it('[edge] should handle empty attachments metadata artifact IDs', async () => { - const mockStream = jest.fn(); - - // Set up adapter state with no artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: [], - lastProcessed: 0, - }, - }; - - const result = await adapter.streamAttachments({ - stream: mockStream, - }); - - expect(result).toBeUndefined(); - }); - - it('[edge] should handle errors when getting attachments', async () => { - const mockStream = jest.fn(); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock error when getting attachments - const mockError = new Error('Failed to get attachments'); - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - error: mockError, - }); - - // Mock methods - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - }); - - expect(result).toEqual({ - error: mockError, - }); - }); - - it('[edge] should handle empty attachments array from artifact', async () => { - const mockStream = jest.fn(); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting empty attachments - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - attachments: [], - }); - - // Mock methods - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - }); - - expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual( - [] - ); - expect(result).toBeUndefined(); - }); - - it('should use custom processors when provided', async () => { - const mockStream = jest.fn(); - const mockReducer = jest.fn().mockReturnValue(['custom-reduced']); - const mockIterator = jest.fn().mockResolvedValue({}); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting attachments - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - attachments: [{ id: 'attachment1' }], - }); - - // Mock methods - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - processors: { - reducer: mockReducer, - iterator: mockIterator, - }, - }); - - expect(mockReducer).toHaveBeenCalledWith({ - attachments: [{ id: 'attachment1' }], - adapter: adapter, - batchSize: 1, - }); - expect(mockIterator).toHaveBeenCalledWith({ - reducedAttachments: ['custom-reduced'], - adapter: adapter, - stream: mockStream, - }); - expect(result).toBeUndefined(); - }); - - it('should handle rate limiting from iterator', async () => { - const mockStream = jest.fn(); - - (AttachmentsStreamingPool as jest.Mock).mockImplementationOnce(() => { - return { - // Return an object with a `streamAll` method that resolves to your desired value. - streamAll: jest.fn().mockResolvedValue({ delay: 30 }), - }; - }); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting attachments - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - attachments: [{ id: 'attachment1' }], - }); - - // Mock methods - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - }); - - expect(result).toEqual({ - delay: 30, - }); - // The artifactIds array should remain unchanged - expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([ - 'artifact1', - ]); - }); - - it('should handle error from iterator', async () => { - const mockStream = jest.fn(); - - (AttachmentsStreamingPool as jest.Mock).mockImplementationOnce(() => { - return { - // Return an object with a `streamAll` method that resolves to your desired value. - streamAll: jest.fn().mockResolvedValue({ - error: 'Mock error', - }), - }; - }); - - // Set up adapter state with artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting attachments - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - attachments: [{ id: 'attachment1' }], - }); - - // Mock methods - adapter.initializeRepos = jest.fn(); - - const result = await adapter.streamAttachments({ - stream: mockStream, - }); - - expect(result).toEqual({ - error: 'Mock error', - }); - // The artifactIds array should remain unchanged - expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([ - 'artifact1', - ]); - }); - - it('should emit progress event and exit process on timeout, preserving state for resumption', async () => { - const mockStream = jest.fn(); - - // Mock process.exit to prevent it from killing the test runner - const exitSpy = jest - .spyOn(process, 'exit') - .mockImplementation(() => undefined as never); - - // Set up adapter state with multiple artifact IDs - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1', 'artifact2', 'artifact3'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - - // Mock getting attachments for each artifact - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValue({ - attachments: [ - { - url: 'http://example.com/file1.pdf', - id: 'attachment1', - file_name: 'file1.pdf', - parent_id: 'parent1', - }, - ], - }); - - // Mock the pool to simulate timeout happening during the first artifact - (AttachmentsStreamingPool as jest.Mock).mockImplementationOnce(() => { - return { - streamAll: jest.fn().mockImplementation(() => { - adapter.isTimeout = true; - return {}; - }), - }; - }); - - adapter.initializeRepos = jest.fn(); - - // Mock emit to verify it's called with progress event - const emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); - - await adapter.streamAttachments({ - stream: mockStream, - }); - - // Should have emitted progress event - expect(emitSpy).toHaveBeenCalledWith( - ExtractorEventType.AttachmentExtractionProgress - ); - - // Should have called process.exit(0) - expect(exitSpy).toHaveBeenCalledWith(0); - - // The current artifact should NOT be removed from the list - expect(adapter.state.toDevRev.attachmentsMetadata.artifactIds).toEqual([ - 'artifact1', - 'artifact2', - 'artifact3', - ]); - - // Only the first artifact should have been fetched - expect( - adapter['uploader'].getAttachmentsFromArtifactId - ).toHaveBeenCalledTimes(1); - - exitSpy.mockRestore(); - }); - - it('should reset lastProcessed and attachment IDs list after processing all artifacts', async () => { - const mockStream = jest.fn(); - adapter.state.toDevRev = { - attachmentsMetadata: { - artifactIds: ['artifact1'], - lastProcessed: 0, - lastProcessedAttachmentsIdsList: [], - }, - }; - adapter['uploader'].getAttachmentsFromArtifactId = jest - .fn() - .mockResolvedValueOnce({ - attachments: [ - { - url: 'http://example.com/file1.pdf', - id: 'attachment1', - file_name: 'file1.pdf', - parent_id: 'parent1', - }, - { - url: 'http://example.com/file2.pdf', - id: 'attachment2', - file_name: 'file2.pdf', - parent_id: 'parent2', - }, - { - url: 'http://example.com/file3.pdf', - id: 'attachment3', - file_name: 'file3.pdf', - parent_id: 'parent3', - }, - ], - }); - - adapter.processAttachment = jest.fn().mockResolvedValue(null); - - await adapter.streamAttachments({ - stream: mockStream, - }); - - expect( - adapter.state.toDevRev.attachmentsMetadata.artifactIds - ).toHaveLength(0); - expect(adapter.state.toDevRev.attachmentsMetadata.lastProcessed).toBe(0); - }); - }); - - describe(WorkerAdapter.prototype.processAttachment.name, () => { - const createMockHttpStream = (headers: Record = {}) => ({ - headers, - data: { destroy: jest.fn() }, - }); - - beforeEach(() => { - adapter.initializeRepos([{ itemType: 'ssor_attachment' }]); - - const mockRepo = { push: jest.fn().mockResolvedValue(undefined) }; - adapter.getRepo = jest.fn().mockReturnValue(mockRepo); - }); - - it('should use attachment.content_type when provided, ignoring HTTP header', async () => { - const mockStream = jest.fn().mockResolvedValue({ - httpStream: createMockHttpStream({ - 'content-type': 'text/plain', - 'content-length': '100', - }), - }); - - adapter['uploader'].getArtifactUploadUrl = jest.fn().mockResolvedValue({ - response: { - artifact_id: 'art_1', - upload_url: 'https://upload', - form_data: [], - }, - }); - adapter['uploader'].streamArtifact = jest - .fn() - .mockResolvedValue({ response: {} }); - adapter['uploader'].confirmArtifactUpload = jest - .fn() - .mockResolvedValue({ response: {} }); - - const attachment = { - id: 'att-1', - url: 'https://example.com/file.pdf', - file_name: 'file.pdf', - parent_id: 'parent-1', - content_type: 'application/pdf', - }; - - await adapter.processAttachment(attachment, mockStream); - - expect(adapter['uploader'].getArtifactUploadUrl).toHaveBeenCalledWith( - 'file.pdf', - 'application/pdf', - 100 - ); - }); - - it('should use HTTP header content-type when attachment.content_type is not set', async () => { - const mockStream = jest.fn().mockResolvedValue({ - httpStream: createMockHttpStream({ - 'content-type': 'image/jpeg', - 'content-length': '200', - }), - }); - - adapter['uploader'].getArtifactUploadUrl = jest.fn().mockResolvedValue({ - response: { - artifact_id: 'art_2', - upload_url: 'https://upload', - form_data: [], - }, - }); - adapter['uploader'].streamArtifact = jest - .fn() - .mockResolvedValue({ response: {} }); - adapter['uploader'].confirmArtifactUpload = jest - .fn() - .mockResolvedValue({ response: {} }); - - const attachment = { - id: 'att-2', - url: 'https://example.com/photo.jpg', - file_name: 'photo.jpg', - parent_id: 'parent-2', - }; - - await adapter.processAttachment(attachment, mockStream); - - expect(adapter['uploader'].getArtifactUploadUrl).toHaveBeenCalledWith( - 'photo.jpg', - 'image/jpeg', - 200 - ); - }); - - it('should fall back to application/octet-stream when neither content_type nor HTTP header is set', async () => { - const mockStream = jest.fn().mockResolvedValue({ - httpStream: createMockHttpStream({}), - }); - - adapter['uploader'].getArtifactUploadUrl = jest.fn().mockResolvedValue({ - response: { - artifact_id: 'art_3', - upload_url: 'https://upload', - form_data: [], - }, - }); - adapter['uploader'].streamArtifact = jest - .fn() - .mockResolvedValue({ response: {} }); - adapter['uploader'].confirmArtifactUpload = jest - .fn() - .mockResolvedValue({ response: {} }); - - const attachment = { - id: 'att-3', - url: 'https://example.com/file.bin', - file_name: 'file.bin', - parent_id: 'parent-3', - }; - - await adapter.processAttachment(attachment, mockStream); - - expect(adapter['uploader'].getArtifactUploadUrl).toHaveBeenCalledWith( - 'file.bin', - 'application/octet-stream', - undefined - ); - }); - }); - - describe(WorkerAdapter.prototype.emit.name, () => { - let counter: { counter: number }; - let mockPostMessage: jest.Mock; - - beforeEach(() => { - counter = { counter: 0 }; - - // Import the worker_threads module and spy on parentPort.postMessage - const workerThreads = require('node:worker_threads'); - mockPostMessage = jest.fn().mockImplementation(() => { - counter.counter += 1; - }); - - // Spy on the parentPort.postMessage method - if (workerThreads.parentPort) { - jest - .spyOn(workerThreads.parentPort, 'postMessage') - .mockImplementation(mockPostMessage); - } else { - // If parentPort is null (not in worker context), create a mock - workerThreads.parentPort = { - postMessage: mockPostMessage, - }; - } - }); - - afterEach(() => { - // Restore all mocks - jest.restoreAllMocks(); - }); - - it('should emit only one event when multiple events of same type are sent', async () => { - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - - expect(counter.counter).toBe(1); - }); - - it('should emit event when different event type is sent after previous events', async () => { - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - - expect(counter.counter).toBe(1); - }); - - it('should correctly emit one event even if postState errors', async () => { - adapter['adapterState'].postState = jest - .fn() - .mockRejectedValue(new Error('postState error')); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - expect(counter.counter).toBe(1); - }); - - it('should correctly emit one event even if uploadAllRepos errors', async () => { - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest - .fn() - .mockRejectedValue(new Error('uploadAllRepos error')); - - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - expect(counter.counter).toBe(1); - }); - - it('should include artifacts in data for extraction events', async () => { - const { emit: mockEmit } = require('../../common/control-protocol'); - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - adapter['_artifacts'] = [ - { id: 'art-1', item_count: 10, item_type: 'issues' }, - ] as Artifact[]; - - await adapter.emit(ExtractorEventType.DataExtractionDone); - - expect(mockEmit).toHaveBeenCalledWith( - expect.objectContaining({ - data: expect.objectContaining({ - artifacts: expect.arrayContaining([ - expect.objectContaining({ id: 'art-1' }), - ]), - }), - }) - ); - // Should not include loader-specific fields - const callData = mockEmit.mock.calls[0][0].data; - expect(callData).not.toHaveProperty('reports'); - expect(callData).not.toHaveProperty('processed_files'); - }); - - it('should include reports and processed_files in data for loader events', async () => { - const { emit: mockEmit } = require('../../common/control-protocol'); - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - adapter['loaderReports'] = [ - { item_type: 'tasks', [ActionType.CREATED]: 5 }, - ] as LoaderReport[]; - adapter['_processedFiles'] = ['file-1', 'file-2']; - - await adapter.emit(LoaderEventType.DataLoadingDone); - - expect(mockEmit).toHaveBeenCalledWith( - expect.objectContaining({ - data: expect.objectContaining({ - reports: expect.arrayContaining([ - expect.objectContaining({ item_type: 'tasks' }), - ]), - processed_files: ['file-1', 'file-2'], - }), - }) - ); - // Should not include extraction-specific fields - const callData = mockEmit.mock.calls[0][0].data; - expect(callData).not.toHaveProperty('artifacts'); - }); - - it('should not include artifacts, reports, or processed_files for unknown event types', async () => { - const { emit: mockEmit } = require('../../common/control-protocol'); - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - adapter['_artifacts'] = [ - { id: 'art-1', item_count: 10, item_type: 'issues' }, - ] as Artifact[]; - adapter['loaderReports'] = [ - { item_type: 'tasks', [ActionType.CREATED]: 5 }, - ] as LoaderReport[]; - adapter['_processedFiles'] = ['file-1']; - - await adapter.emit('SOME_UNKNOWN_EVENT' as ExtractorEventType); - - const callData = mockEmit.mock.calls[0][0].data; - expect(callData).not.toHaveProperty('artifacts'); - expect(callData).not.toHaveProperty('reports'); - expect(callData).not.toHaveProperty('processed_files'); - }); - - it('should include artifacts for all ExtractorEventType values', async () => { - const { emit: mockEmit } = require('../../common/control-protocol'); - - const extractorEvents = [ - ExtractorEventType.DataExtractionDone, - ExtractorEventType.DataExtractionProgress, - ExtractorEventType.DataExtractionError, - ExtractorEventType.AttachmentExtractionDone, - ExtractorEventType.AttachmentExtractionProgress, - ]; - - for (const eventType of extractorEvents) { - jest.clearAllMocks(); - adapter.hasWorkerEmitted = false; - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - - await adapter.emit(eventType); - - const callData = mockEmit.mock.calls[0]?.[0]?.data; - expect(callData).toHaveProperty('artifacts'); - expect(callData).not.toHaveProperty('reports'); - } - }); - - it('should include reports and processed_files for all LoaderEventType values', async () => { - const { emit: mockEmit } = require('../../common/control-protocol'); - - const loaderEvents = [ - LoaderEventType.DataLoadingDone, - LoaderEventType.DataLoadingProgress, - LoaderEventType.DataLoadingError, - LoaderEventType.AttachmentLoadingDone, - LoaderEventType.AttachmentLoadingProgress, - ]; - - for (const eventType of loaderEvents) { - jest.clearAllMocks(); - adapter.hasWorkerEmitted = false; - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - - await adapter.emit(eventType); - - const callData = mockEmit.mock.calls[0]?.[0]?.data; - expect(callData).toHaveProperty('reports'); - expect(callData).toHaveProperty('processed_files'); - expect(callData).not.toHaveProperty('artifacts'); - } - }); - }); - - describe('workersOldest / workersNewest boundary updates', () => { - let mockPostMessage: jest.Mock; - - beforeEach(() => { - const workerThreads = require('node:worker_threads'); - mockPostMessage = jest.fn(); - if (workerThreads.parentPort) { - jest - .spyOn(workerThreads.parentPort, 'postMessage') - .mockImplementation(mockPostMessage); - } else { - workerThreads.parentPort = { postMessage: mockPostMessage }; - } - - adapter['adapterState'].postState = jest - .fn() - .mockResolvedValue(undefined); - adapter.uploadAllRepos = jest.fn().mockResolvedValue(undefined); - }); - - afterEach(() => { - jest.restoreAllMocks(); - }); - - /** - * Helper: sets extract_from and extract_to on the event context, - * resets the emit guard so the adapter can emit again, then emits - * AttachmentExtractionDone. - */ - async function emitDone( - adapterInstance: WorkerAdapter<{ attachments: { completed: boolean } }>, - extractionStart: string | undefined, - extractionEnd: string | undefined - ) { - adapterInstance.event.payload.event_context.extract_from = - extractionStart; - adapterInstance.event.payload.event_context.extract_to = extractionEnd; - // Reset the emit guard so we can emit multiple times in a single test - adapterInstance['hasWorkerEmitted'] = false; - - await adapterInstance.emit(ExtractorEventType.AttachmentExtractionDone, { - reports: [], - processed_files: [], - }); - } - - describe('initial import with UNBOUNDED start', () => { - it('should set workersOldest to UNBOUNDED_DATE_TIME_VALUE and workersNewest to extraction end', async () => { - await emitDone( - adapter, - UNBOUNDED_DATE_TIME_VALUE, - '2025-06-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); - expect(adapter.state.workersNewest).toBe('2025-06-01T00:00:00.000Z'); - }); - }); - - describe('reconciliation after UNBOUNDED initial import', () => { - it('should NOT overwrite workersOldest when reconciliation start is later than sentinel', async () => { - // Initial import: UNBOUNDED start, NOW end - await emitDone( - adapter, - UNBOUNDED_DATE_TIME_VALUE, - '2025-06-01T00:00:00.000Z' - ); - - // Reconciliation: absolute dates within the range - await emitDone( - adapter, - '2025-01-01T00:00:00.000Z', - '2025-03-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); - expect(adapter.state.workersNewest).toBe('2025-06-01T00:00:00.000Z'); - }); - - it('should NOT overwrite workersOldest even when reconciliation start is very early', async () => { - // Initial import: UNBOUNDED start, NOW end - await emitDone( - adapter, - UNBOUNDED_DATE_TIME_VALUE, - '2025-06-01T00:00:00.000Z' - ); - - // Reconciliation with a very old start date — still later than epoch - await emitDone( - adapter, - '1980-01-01T00:00:00.000Z', - '1990-01-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); - expect(adapter.state.workersNewest).toBe('2025-06-01T00:00:00.000Z'); - }); - }); - - describe('forward sync after UNBOUNDED initial import', () => { - it('should expand workersNewest forward while preserving workersOldest', async () => { - // Initial import - await emitDone( - adapter, - UNBOUNDED_DATE_TIME_VALUE, - '2025-06-01T00:00:00.000Z' - ); - - // Forward sync: from workersNewest to now - await emitDone( - adapter, - '2025-06-01T00:00:00.000Z', - '2025-07-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); - expect(adapter.state.workersNewest).toBe('2025-07-01T00:00:00.000Z'); - }); - }); - - describe('reconciliation with end beyond current newest', () => { - it('should expand workersNewest when reconciliation end is later', async () => { - // Initial import - await emitDone( - adapter, - UNBOUNDED_DATE_TIME_VALUE, - '2025-06-01T00:00:00.000Z' - ); - - // Reconciliation with end beyond current newest - await emitDone( - adapter, - '2024-01-01T00:00:00.000Z', - '2025-08-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); - expect(adapter.state.workersNewest).toBe('2025-08-01T00:00:00.000Z'); - }); - }); - - describe('first sync with absolute dates (no UNBOUNDED)', () => { - it('should set both boundaries from the extraction range', async () => { - await emitDone( - adapter, - '2025-01-01T00:00:00.000Z', - '2025-03-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); - }); - }); - - describe('reconciliation after absolute initial sync', () => { - it('should expand workersOldest backward when reconciliation start is earlier', async () => { - // Initial sync with absolute dates - await emitDone( - adapter, - '2025-01-01T00:00:00.000Z', - '2025-03-01T00:00:00.000Z' - ); - - // Reconciliation with earlier start - await emitDone( - adapter, - '2024-06-01T00:00:00.000Z', - '2025-02-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe('2024-06-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); - }); - - it('should NOT change boundaries when reconciliation is within existing range', async () => { - // Initial sync - await emitDone( - adapter, - '2025-01-01T00:00:00.000Z', - '2025-03-01T00:00:00.000Z' - ); - - // Reconciliation entirely within existing range - await emitDone( - adapter, - '2025-01-15T00:00:00.000Z', - '2025-02-15T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); - }); - - it('should expand both boundaries when reconciliation exceeds both', async () => { - // Initial sync - await emitDone( - adapter, - '2025-01-01T00:00:00.000Z', - '2025-03-01T00:00:00.000Z' - ); - - // Reconciliation exceeding both ends - await emitDone( - adapter, - '2024-06-01T00:00:00.000Z', - '2025-09-01T00:00:00.000Z' - ); - - expect(adapter.state.workersOldest).toBe('2024-06-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-09-01T00:00:00.000Z'); - }); - }); - - describe('multiple forward syncs', () => { - it('should progressively expand workersNewest while preserving workersOldest', async () => { - // Initial import - await emitDone( - adapter, - UNBOUNDED_DATE_TIME_VALUE, - '2025-06-01T00:00:00.000Z' - ); - - // First forward sync - await emitDone( - adapter, - '2025-06-01T00:00:00.000Z', - '2025-07-01T00:00:00.000Z' - ); - expect(adapter.state.workersNewest).toBe('2025-07-01T00:00:00.000Z'); - - // Second forward sync - await emitDone( - adapter, - '2025-07-01T00:00:00.000Z', - '2025-08-01T00:00:00.000Z' - ); - expect(adapter.state.workersNewest).toBe('2025-08-01T00:00:00.000Z'); - - // workersOldest should remain the sentinel throughout - expect(adapter.state.workersOldest).toBe(UNBOUNDED_DATE_TIME_VALUE); - }); - }); - - describe('non-AttachmentExtractionDone events should NOT update boundaries', () => { - it('should not update boundaries on DataExtractionDone', async () => { - adapter.state.workersOldest = '2025-01-01T00:00:00.000Z'; - adapter.state.workersNewest = '2025-03-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_from = - '2024-01-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_to = - '2025-12-01T00:00:00.000Z'; - - await adapter.emit(ExtractorEventType.DataExtractionDone, { - reports: [], - processed_files: [], - }); - - expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); - }); - - it('should not update boundaries on DataExtractionProgress', async () => { - adapter.state.workersOldest = '2025-01-01T00:00:00.000Z'; - adapter.state.workersNewest = '2025-03-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_from = - '2024-01-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_to = - '2025-12-01T00:00:00.000Z'; - - await adapter.emit(ExtractorEventType.DataExtractionProgress, { - reports: [], - processed_files: [], - }); - - expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); - }); - - it('should not update boundaries on MetadataExtractionError', async () => { - adapter.state.workersOldest = '2025-01-01T00:00:00.000Z'; - adapter.state.workersNewest = '2025-03-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_from = - '2024-01-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_to = - '2025-12-01T00:00:00.000Z'; - - await adapter.emit(ExtractorEventType.MetadataExtractionError, { - reports: [], - processed_files: [], - }); - - expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); - }); - - it('should not update boundaries on AttachmentExtractionError', async () => { - adapter.state.workersOldest = '2025-01-01T00:00:00.000Z'; - adapter.state.workersNewest = '2025-03-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_from = - '2024-01-01T00:00:00.000Z'; - adapter.event.payload.event_context.extract_to = - '2025-12-01T00:00:00.000Z'; - - await adapter.emit(ExtractorEventType.AttachmentExtractionError, { - reports: [], - processed_files: [], - }); - - expect(adapter.state.workersOldest).toBe('2025-01-01T00:00:00.000Z'); - expect(adapter.state.workersNewest).toBe('2025-03-01T00:00:00.000Z'); - }); - }); - }); - - describe('extractionScope', () => { - it('should return empty object by default', () => { - expect(adapter.extractionScope).toEqual({}); - }); - - it('should return extraction scope from adapter state', () => { - const extractionScope = { - tasks: { extract: true }, - users: { extract: false }, - }; - - // Simulate what State.init() does when parsing objects from API - ( - mockAdapterState as unknown as { - _extractionScope: Record; - } - )._extractionScope = extractionScope; - - expect(adapter.extractionScope).toEqual({ - tasks: { extract: true }, - users: { extract: false }, - }); - }); - }); - - describe('shouldExtract', () => { - it('should return true when extraction scope is empty', () => { - expect(adapter.shouldExtract('tasks')).toBe(true); - expect(adapter.shouldExtract('users')).toBe(true); - }); - - it('should return true when item type is not in scope', () => { - ( - mockAdapterState as unknown as { - _extractionScope: Record; - } - )._extractionScope = { - tasks: { extract: true }, - }; - expect(adapter.shouldExtract('users')).toBe(true); - }); - - it('should return true when item type has extract: true', () => { - ( - mockAdapterState as unknown as { - _extractionScope: Record; - } - )._extractionScope = { - tasks: { extract: true }, - }; - expect(adapter.shouldExtract('tasks')).toBe(true); - }); - - it('should return false when item type has extract: false', () => { - ( - mockAdapterState as unknown as { - _extractionScope: Record; - } - )._extractionScope = { - tasks: { extract: false }, - users: { extract: true }, - }; - expect(adapter.shouldExtract('tasks')).toBe(false); - expect(adapter.shouldExtract('users')).toBe(true); - }); - }); - - describe(WorkerAdapter.prototype.loadItemTypes.name, () => { - let exitSpy: jest.SpyInstance; - let emitSpy: jest.SpyInstance; - - beforeEach(() => { - exitSpy = jest - .spyOn(process, 'exit') - .mockImplementation(() => undefined as never); - emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); - - // Set event type to loading continuation (not StartLoadingData) so we can - // set fromDevRev state directly without mocking getLoaderBatches - mockEvent.payload.event_type = - EventType.ContinueLoadingData as unknown as EventType; - }); - - afterEach(() => { - exitSpy.mockRestore(); - }); - - function setupFilesToLoad(items: ExternalSystemItem[]) { - adapter['adapterState'].state.fromDevRev = { - filesToLoad: [ - { - id: 'artifact-1', - file_name: 'file1.json', - itemType: 'tasks', - count: items.length, - lineToProcess: 0, - completed: false, - }, - ], - }; - - adapter['uploader'].getJsonObjectByArtifactId = jest - .fn() - .mockResolvedValue({ response: items }); - } - - it('should emit DataLoadingProgress and exit on timeout', async () => { - const items: ExternalSystemItem[] = [ - { - id: { devrev: 'dev-1', external: 'ext-1' }, - created_date: '', - modified_date: '', - data: {}, - }, - { - id: { devrev: 'dev-2', external: 'ext-2' }, - created_date: '', - modified_date: '', - data: {}, - }, - ]; - setupFilesToLoad(items); - - // Set timeout before calling loadItemTypes - adapter.isTimeout = true; - - const itemTypesToLoad = [ - { - itemType: 'tasks', - create: jest.fn(), - update: jest.fn(), - }, - ]; - - await adapter.loadItemTypes({ itemTypesToLoad }); - - expect(emitSpy).toHaveBeenCalledWith(LoaderEventType.DataLoadingProgress); - expect(exitSpy).toHaveBeenCalledWith(0); - }); - - it('should emit DataLoadingProgress mid-loop when timeout arrives between items', async () => { - const items: ExternalSystemItem[] = [ - { - id: { devrev: 'dev-1', external: 'ext-1' }, - created_date: '', - modified_date: '', - data: {}, - }, - { - id: { devrev: 'dev-2', external: 'ext-2' }, - created_date: '', - modified_date: '', - data: {}, - }, - { - id: { devrev: 'dev-3', external: 'ext-3' }, - created_date: '', - modified_date: '', - data: {}, - }, - ]; - setupFilesToLoad(items); - - // Mock process.exit to throw so it stops execution like a real exit would - exitSpy.mockRestore(); - exitSpy = jest.spyOn(process, 'exit').mockImplementation((() => { - throw new Error(`process.exit`); - }) as never); - - let loadItemCallCount = 0; - // Mock loadItem to set timeout after the first call - // eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/require-await - jest.spyOn(adapter as any, 'loadItem').mockImplementation(async () => { - loadItemCallCount++; - if (loadItemCallCount === 1) { - adapter.isTimeout = true; - } - return { report: { item_type: 'tasks', updated: 1 } }; - }); - - const itemTypesToLoad = [ - { - itemType: 'tasks', - create: jest.fn(), - update: jest.fn(), - }, - ]; - - // process.exit throws, so this will throw - await expect(adapter.loadItemTypes({ itemTypesToLoad })).rejects.toThrow( - 'process.exit' - ); - - // First item processed, then timeout detected on second iteration - expect(loadItemCallCount).toBe(1); - expect(emitSpy).toHaveBeenCalledWith(LoaderEventType.DataLoadingProgress); - }); - - it('should emit DataLoadingError and exit(1) on unexpected error', async () => { - adapter['adapterState'].state.fromDevRev = { - filesToLoad: [ - { - id: 'artifact-1', - file_name: 'file1.json', - itemType: 'tasks', - count: 1, - lineToProcess: 0, - completed: false, - }, - ], - }; - - // Make getJsonObjectByArtifactId throw (not return error — throw) - adapter['uploader'].getJsonObjectByArtifactId = jest - .fn() - .mockRejectedValue(new Error('Unexpected network failure')); - - const itemTypesToLoad = [ - { - itemType: 'tasks', - create: jest.fn(), - update: jest.fn(), - }, - ]; - - await adapter.loadItemTypes({ itemTypesToLoad }); - - expect(emitSpy).toHaveBeenCalledWith( - LoaderEventType.DataLoadingError, - expect.objectContaining({ - error: expect.objectContaining({ - message: expect.stringContaining('Error during data loading'), - }), - }) - ); - expect(exitSpy).toHaveBeenCalledWith(1); - }); - }); - - describe(WorkerAdapter.prototype.loadAttachments.name, () => { - let exitSpy: jest.SpyInstance; - let emitSpy: jest.SpyInstance; - - beforeEach(() => { - exitSpy = jest - .spyOn(process, 'exit') - .mockImplementation(() => undefined as never); - emitSpy = jest.spyOn(adapter, 'emit').mockResolvedValue(); - - // Set event type to continuation so we can set fromDevRev state directly - mockEvent.payload.event_type = - EventType.ContinueLoadingAttachments as unknown as EventType; - }); - - afterEach(() => { - exitSpy.mockRestore(); - }); - - function setupFilesToLoad(items: ExternalSystemAttachment[]) { - adapter['adapterState'].state.fromDevRev = { - filesToLoad: [ - { - id: 'artifact-1', - file_name: 'attachments.json', - itemType: 'attachment', - count: items.length, - lineToProcess: 0, - completed: false, - }, - ], - }; - - adapter['uploader'].getJsonObjectByArtifactId = jest - .fn() - .mockResolvedValue({ response: items }); - } - - it('should emit AttachmentLoadingProgress and exit on timeout', async () => { - const items = [ - { - reference_id: 'ref-1', - parent_type: 'task', - parent_reference_id: 'parent-1', - file_name: 'file.pdf', - file_type: 'application/pdf', - file_size: 100, - url: 'https://example.com/file.pdf', - valid_until: '', - created_by_id: 'user-1', - created_date: '', - modified_by_id: 'user-1', - modified_date: '', - }, - ] as ExternalSystemAttachment[]; - setupFilesToLoad(items); - - adapter.isTimeout = true; - - await adapter.loadAttachments({ - create: jest.fn(), - }); - - expect(emitSpy).toHaveBeenCalledWith( - LoaderEventType.AttachmentLoadingProgress - ); - expect(exitSpy).toHaveBeenCalledWith(0); - }); - - it('should emit AttachmentLoadingError on transformer file error', async () => { - adapter['adapterState'].state.fromDevRev = { - filesToLoad: [ - { - id: 'bad-artifact', - file_name: 'attachments.json', - itemType: 'attachment', - count: 1, - lineToProcess: 0, - completed: false, - }, - ], - }; - - adapter['uploader'].getJsonObjectByArtifactId = jest - .fn() - .mockResolvedValue({ - response: null, - error: new Error('Artifact not found'), - }); - - await adapter.loadAttachments({ - create: jest.fn(), - }); - - expect(emitSpy).toHaveBeenCalledWith( - LoaderEventType.AttachmentLoadingError, - expect.objectContaining({ - error: expect.objectContaining({ - message: expect.stringContaining('Transformer file not found'), - }), - }) - ); - }); - - it('should emit AttachmentLoadingError and exit(1) on unexpected error', async () => { - const items = [ - { - reference_id: 'ref-1', - parent_type: 'task', - parent_reference_id: 'parent-1', - file_name: 'file.pdf', - file_type: 'application/pdf', - file_size: 100, - url: 'https://example.com/file.pdf', - valid_until: '', - created_by_id: 'user-1', - created_date: '', - modified_by_id: 'user-1', - modified_date: '', - }, - ] as ExternalSystemAttachment[]; - setupFilesToLoad(items); - - // Make the create function throw - const mockCreate = jest - .fn() - .mockRejectedValue(new Error('Unexpected API failure')); - - await adapter.loadAttachments({ create: mockCreate }); - - expect(emitSpy).toHaveBeenCalledWith( - LoaderEventType.AttachmentLoadingError, - expect.objectContaining({ - error: expect.objectContaining({ - message: expect.stringContaining('Error during attachment loading'), - }), - }) - ); - expect(exitSpy).toHaveBeenCalledWith(1); - }); - }); -}); diff --git a/src/repo/repo.test.ts b/src/repo/repo.test.ts index 4991697..b1d93c7 100644 --- a/src/repo/repo.test.ts +++ b/src/repo/repo.test.ts @@ -31,15 +31,20 @@ describe(Repo.name, () => { }); it('should normalize and push items when array contains items', async () => { + // Arrange const items = createItems(10); + + // Act await repo.push(items); - expect(normalize).toHaveBeenCalledTimes(10); + // Assert + expect(normalize).toHaveBeenCalledTimes(10); const normalizedItems = items.map((item) => normalizeItem(item)); expect(repo.getItems()).toEqual(normalizedItems); }); it('should not normalize items when normalize function is not provided', async () => { + // Arrange repo = new Repo({ event: createMockEvent(mockServer.baseUrl, { payload: { event_type: EventType.ExtractionDataStart }, @@ -48,18 +53,25 @@ describe(Repo.name, () => { onUpload: jest.fn(), options: {}, }); - const items = createItems(10); + + // Act await repo.push(items); + + // Assert expect(normalize).not.toHaveBeenCalled(); }); it('[edge] should not push items when items array is empty', async () => { + // Act await repo.push([]); + + // Assert expect(repo.getItems()).toEqual([]); }); it('should not normalize items when item type is external_domain_metadata', async () => { + // Arrange repo = new Repo({ event: createMockEvent(mockServer.baseUrl, { payload: { event_type: EventType.ExtractionDataStart }, @@ -69,14 +81,17 @@ describe(Repo.name, () => { onUpload: jest.fn(), options: {}, }); - const items = createItems(10); + + // Act await repo.push(items); + // Assert expect(normalize).not.toHaveBeenCalled(); }); it('should not normalize items when item type is ssor_attachment', async () => { + // Arrange repo = new Repo({ event: createMockEvent(mockServer.baseUrl, { payload: { event_type: EventType.ExtractionDataStart }, @@ -86,41 +101,58 @@ describe(Repo.name, () => { onUpload: jest.fn(), options: {}, }); - const items = createItems(10); + + // Act await repo.push(items); + // Assert expect(normalize).not.toHaveBeenCalled(); }); it('should leave 5 items in the items array after pushing 2005 items with batch size of 2000', async () => { + // Arrange const items = createItems(2005); + + // Act await repo.push(items); + // Assert expect(repo.getItems().length).toBe(5); }); it('should normalize all items when pushing 4005 items with batch size of 2000', async () => { + // Arrange const items = createItems(4005); + + // Act await repo.push(items); + // Assert expect(normalize).toHaveBeenCalledTimes(4005); }); it('should upload 2 batches when pushing 4005 items with batch size of 2000', async () => { + // Arrange const uploadSpy = jest.spyOn(repo, 'upload'); - const items = createItems(4005); + + // Act await repo.push(items); + // Assert expect(uploadSpy).toHaveBeenCalledTimes(2); uploadSpy.mockRestore(); }); it('should leave 5 items in array after pushing 4005 items with batch size of 2000', async () => { + // Arrange const items = createItems(4005); + + // Act await repo.push(items); + // Assert expect(repo.getItems().length).toBe(5); }); @@ -140,39 +172,59 @@ describe(Repo.name, () => { }); it('should empty the items array after pushing 50 items with batch size of 50', async () => { + // Arrange const items = createItems(50); + + // Act await repo.push(items); + + // Assert expect(repo.getItems()).toEqual([]); }); it('should leave 5 items in the items array after pushing 205 items with batch size of 50', async () => { + // Arrange const items = createItems(205); + + // Act await repo.push(items); + // Assert expect(repo.getItems().length).toBe(5); }); it('should normalize all items when pushing 205 items with batch size of 50', async () => { + // Arrange const items = createItems(205); + + // Act await repo.push(items); + // Assert expect(normalize).toHaveBeenCalledTimes(205); }); it('should upload 4 batches when pushing 205 items with batch size of 50', async () => { + // Arrange const uploadSpy = jest.spyOn(repo, 'upload'); - const items = createItems(205); + + // Act await repo.push(items); + // Assert expect(uploadSpy).toHaveBeenCalledTimes(4); uploadSpy.mockRestore(); }); it('should leave 5 items in array after pushing 205 items with batch size of 50', async () => { + // Arrange const items = createItems(205); + + // Act await repo.push(items); + // Assert expect(repo.getItems().length).toBe(5); }); }); diff --git a/src/state/state.extract-window.test.ts b/src/state/state.extract-window.test.ts new file mode 100644 index 0000000..3f71304 --- /dev/null +++ b/src/state/state.extract-window.test.ts @@ -0,0 +1,193 @@ +import { mockServer } from '../tests/jest.setup'; +import { createMockEvent } from '../common/test-utils'; +import { EventType, TimeValueType } from '../types/extraction'; +import { State, createAdapterState } from './state'; + +describe(State.name, () => { + let fetchStateSpy: jest.SpyInstance; + let processExitSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + jest.restoreAllMocks(); + + fetchStateSpy = jest.spyOn(State.prototype, 'fetchState'); + processExitSpy = jest.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('process.exit called'); + }); + }); + + describe('Enhanced Control Protocol - extraction window validation', () => { + it('should exit the process if extract_from >= extract_to', async () => { + // Arrange: start is after end (inverted window) + const event = createMockEvent(mockServer.baseUrl, { + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2025-06-01T00:00:00Z', + }, + extraction_end_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2024-01-01T00:00:00Z', + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + + // Act & Assert + await expect( + createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }) + ).rejects.toThrow('process.exit called'); + expect(processExitSpy).toHaveBeenCalledWith(1); + }); + + it('should exit the process if extract_from equals extract_to', async () => { + // Arrange: start equals end (zero-width window) + const event = createMockEvent(mockServer.baseUrl, { + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2024-06-01T00:00:00Z', + }, + extraction_end_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2024-06-01T00:00:00Z', + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + + // Act & Assert + await expect( + createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }) + ).rejects.toThrow('process.exit called'); + expect(processExitSpy).toHaveBeenCalledWith(1); + }); + + it('should not exit when extract_from < extract_to', async () => { + // Arrange: valid window + const event = createMockEvent(mockServer.baseUrl, { + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2024-01-01T00:00:00Z', + }, + extraction_end_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2025-06-01T00:00:00Z', + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: process.exit should NOT have been called + expect(processExitSpy).not.toHaveBeenCalled(); + }); + + it('should not validate when only extract_from is set', async () => { + // Arrange: only start, no end + const event = createMockEvent(mockServer.baseUrl, { + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2024-01-01T00:00:00Z', + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: process.exit should NOT have been called + expect(processExitSpy).not.toHaveBeenCalled(); + }); + + it('should not exit when extract_from is UNBOUNDED and extract_to is a real timestamp', async () => { + // Arrange: UNBOUNDED start (epoch) with a real ABSOLUTE end timestamp + const event = createMockEvent(mockServer.baseUrl, { + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.UNBOUNDED, + }, + extraction_end_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2025-06-01T00:00:00Z', + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: process.exit should NOT have been called + expect(processExitSpy).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/src/state/state.pending-boundaries.test.ts b/src/state/state.pending-boundaries.test.ts new file mode 100644 index 0000000..a6f812a --- /dev/null +++ b/src/state/state.pending-boundaries.test.ts @@ -0,0 +1,231 @@ +import { mockServer } from '../tests/jest.setup'; +import { createMockEvent } from '../common/test-utils'; +import { EventType, TimeValueType } from '../types/extraction'; +import { State, createAdapterState } from './state'; + +/* eslint-disable @typescript-eslint/no-require-imports */ + +describe(State.name, () => { + let postStateSpy: jest.SpyInstance; + let fetchStateSpy: jest.SpyInstance; + let installInitialDomainMappingSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + jest.restoreAllMocks(); + + postStateSpy = jest.spyOn(State.prototype, 'postState'); + fetchStateSpy = jest.spyOn(State.prototype, 'fetchState'); + installInitialDomainMappingSpy = jest.spyOn( + require('../common/install-initial-domain-mapping'), + 'installInitialDomainMapping' + ); + jest.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('process.exit called'); + }); + }); + + describe('Pending extraction boundaries (pendingWorkersOldest/pendingWorkersNewest)', () => { + const FIXED_NOW = '2026-03-26T10:00:00.000Z'; + + beforeEach(() => { + jest.useFakeTimers(); + jest.setSystemTime(new Date(FIXED_NOW)); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + + it('should store resolved values in pendingWorkersOldest/pendingWorkersNewest on StartExtractingMetadata', async () => { + // Arrange + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: '', + }, + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.UNBOUNDED, + }, + extraction_end_time: { + type: TimeValueType.CURRENT_TIME, + }, + }, + }, + }); + + fetchStateSpy.mockRejectedValue({ + isAxiosError: true, + response: { status: 404 }, + }); + installInitialDomainMappingSpy.mockResolvedValue({ success: true }); + postStateSpy.mockResolvedValue({ success: true }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + const state = await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert + expect(state.state.pendingWorkersOldest).toBe('1970-01-01T00:00:00.000Z'); + expect(state.state.pendingWorkersNewest).toBe(FIXED_NOW); + expect(event.payload.event_context.extract_from).toBe( + '1970-01-01T00:00:00.000Z' + ); + expect(event.payload.event_context.extract_to).toBe(FIXED_NOW); + }); + + it('should overwrite pending values on a retry (new StartExtractingMetadata after failure)', async () => { + // Arrange: state has stale pending values from a previous failed attempt + const staleOldest = '2026-03-25T08:00:00.000Z'; + const staleNewest = '2026-03-25T09:00:00.000Z'; + + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: 'test_snap_in_version_id', + }, + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.UNBOUNDED, + }, + extraction_end_time: { + type: TimeValueType.CURRENT_TIME, + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + pendingWorkersOldest: staleOldest, + pendingWorkersNewest: staleNewest, + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + const state = await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: pending values are overwritten with fresh resolution, not stale values + expect(state.state.pendingWorkersOldest).toBe('1970-01-01T00:00:00.000Z'); + expect(state.state.pendingWorkersNewest).toBe(FIXED_NOW); + expect(state.state.pendingWorkersNewest).not.toBe(staleNewest); + }); + + it('should reuse pending values from state on ContinueExtractingData instead of re-resolving', async () => { + // Arrange: state has pending values from a prior StartExtractingMetadata phase + const pendingOldest = '1970-01-01T00:00:00.000Z'; + const pendingNewest = '2026-03-26T08:00:00.000Z'; // Earlier than FIXED_NOW + + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: 'test_snap_in_version_id', + }, + payload: { + event_type: EventType.ContinueExtractingData, + event_context: { + // Platform still sends TimeValue objects, but they should be ignored + extraction_start_time: { + type: TimeValueType.CURRENT_TIME, + }, + extraction_end_time: { + type: TimeValueType.CURRENT_TIME, + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + pendingWorkersOldest: pendingOldest, + pendingWorkersNewest: pendingNewest, + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + const state = await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: uses cached pending values, NOT new Date() resolution + expect(event.payload.event_context.extract_from).toBe(pendingOldest); + expect(event.payload.event_context.extract_to).toBe(pendingNewest); + // Pending values in state remain unchanged + expect(state.state.pendingWorkersOldest).toBe(pendingOldest); + expect(state.state.pendingWorkersNewest).toBe(pendingNewest); + }); + + it('should not set extract_from/extract_to on ContinueExtractingData if no pending values exist', async () => { + // Arrange: state has no pending values (e.g. old state from before this feature) + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: 'test_snap_in_version_id', + }, + payload: { event_type: EventType.ContinueExtractingData }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: no extraction timestamps are set + expect(event.payload.event_context.extract_from).toBeUndefined(); + expect(event.payload.event_context.extract_to).toBeUndefined(); + }); + + it('should reuse pending values on StartExtractingAttachments', async () => { + // Arrange: state has pending values from the StartExtractingMetadata phase + const pendingOldest = '1970-01-01T00:00:00.000Z'; + const pendingNewest = '2026-03-26T08:00:00.000Z'; + + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: 'test_snap_in_version_id', + }, + payload: { event_type: EventType.StartExtractingAttachments }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + pendingWorkersOldest: pendingOldest, + pendingWorkersNewest: pendingNewest, + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: pending values are reused + expect(event.payload.event_context.extract_from).toBe(pendingOldest); + expect(event.payload.event_context.extract_to).toBe(pendingNewest); + }); + }); +}); diff --git a/src/state/state.post-state.test.ts b/src/state/state.post-state.test.ts new file mode 100644 index 0000000..2459031 --- /dev/null +++ b/src/state/state.post-state.test.ts @@ -0,0 +1,100 @@ +import { mockServer } from '../tests/jest.setup'; +import { createMockEvent } from '../common/test-utils'; +import { EventType } from '../types/extraction'; +import { State, createAdapterState } from './state'; + +/* eslint-disable @typescript-eslint/no-require-imports */ + +describe(State.name, () => { + let postStateSpy: jest.SpyInstance; + let fetchStateSpy: jest.SpyInstance; + let processExitSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + jest.restoreAllMocks(); + + postStateSpy = jest.spyOn(State.prototype, 'postState'); + fetchStateSpy = jest.spyOn(State.prototype, 'fetchState'); + processExitSpy = jest.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('process.exit called'); + }); + }); + + describe('State.postState', () => { + it('should POST the stringified state with Authorization header to the update endpoint', async () => { + // Arrange + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: '1.0.0', + secrets: { service_account_token: 'test_token' }, + }, + payload: { event_type: EventType.StartExtractingData }, + }); + const stateToPost = { snapInVersionId: '1.0.0', foo: 'bar' }; + fetchStateSpy.mockResolvedValue({ + state: JSON.stringify({ snapInVersionId: '1.0.0' }), + }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + postStateSpy.mockRestore(); + + const adapterState = await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + await adapterState.postState(stateToPost as never); + + // The mock server records all incoming requests — inspect what was sent + const requests = mockServer.getRequests( + 'POST', + '/worker_data_url.update' + ); + expect(requests).toHaveLength(1); + + const body = requests[0].body as { state: string }; + // Body must contain the stringified state, preserving the original fields + expect(typeof body.state).toBe('string'); + const parsed = JSON.parse(body.state) as Record; + expect(parsed.foo).toBe('bar'); + expect(parsed.snapInVersionId).toBe('1.0.0'); + }); + + it('should exit(1) when postState HTTP request fails', async () => { + // Arrange + const event = createMockEvent(mockServer.baseUrl, { + context: { snap_in_version_id: '1.0.0' }, + payload: { event_type: EventType.StartExtractingData }, + }); + fetchStateSpy.mockResolvedValue({ + state: JSON.stringify({ snapInVersionId: '1.0.0' }), + }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + + postStateSpy.mockRestore(); + + const adapterState = await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Mock axiosClient.post directly to bypass the retry backoff + + const axiosClientModule = require('../http/axios-client-internal'); + const axiosPostSpy = jest + .spyOn(axiosClientModule.axiosClient, 'post') + .mockRejectedValue(new Error('network error')); + + await expect(adapterState.postState()).rejects.toThrow( + 'process.exit called' + ); + expect(processExitSpy).toHaveBeenCalledWith(1); + + axiosPostSpy.mockRestore(); + }); + }); +}); diff --git a/src/state/state.test.ts b/src/state/state.test.ts index 6d07d41..d43f3a2 100644 --- a/src/state/state.test.ts +++ b/src/state/state.test.ts @@ -4,7 +4,7 @@ import { } from '../common/constants'; import { mockServer } from '../tests/jest.setup'; import { createMockEvent } from '../common/test-utils'; -import { EventType, TimeValue, TimeValueType } from '../types/extraction'; +import { EventType } from '../types/extraction'; import { State, createAdapterState } from './state'; import { extractionSdkState } from './state.interfaces'; @@ -300,574 +300,6 @@ describe(State.name, () => { } ); - describe('Enhanced Control Protocol - TimeValue resolution failures', () => { - it('should exit the process if extraction_start_time resolution fails', async () => { - // Arrange: WORKERS_NEWEST type but state has no workersNewest - const event = createMockEvent(mockServer.baseUrl, { - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.WORKERS_NEWEST, - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - workers_oldest: '', - workers_newest: '', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - jest.spyOn(console, 'error').mockImplementation(() => {}); - - // Act & Assert - await expect( - createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }) - ).rejects.toThrow('process.exit called'); - expect(processExitSpy).toHaveBeenCalledWith(1); - }); - - it('should exit the process if extraction_end_time resolution fails', async () => { - // Arrange: WORKERS_NEWEST type but state has no workersNewest - const event = createMockEvent(mockServer.baseUrl, { - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.UNBOUNDED, - }, - extraction_end_time: { - type: TimeValueType.WORKERS_NEWEST, - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - workers_oldest: '', - workers_newest: '', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - jest.spyOn(console, 'error').mockImplementation(() => {}); - - // Act & Assert - await expect( - createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }) - ).rejects.toThrow('process.exit called'); - expect(processExitSpy).toHaveBeenCalledWith(1); - }); - }); - - describe('Backwards compatibility - missing TimeValue type', () => { - it('should skip resolution when extraction_start_time has no type', async () => { - // Arrange: platform sends extraction_start_time without a type field (old platform version) - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: 'test_snap_in_version_id', - }, - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: {} as unknown as TimeValue, - extraction_end_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2025-06-01T00:00:00Z', - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - const state = await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: should not crash, extract_from is not set, extract_to is resolved - expect(processExitSpy).not.toHaveBeenCalled(); - expect(event.payload.event_context.extract_from).toBeUndefined(); - expect(event.payload.event_context.extract_to).toBe( - '2025-06-01T00:00:00.000Z' - ); - expect(state.state.pendingWorkersNewest).toBe('2025-06-01T00:00:00.000Z'); - }); - - it('should skip resolution when extraction_end_time has no type', async () => { - // Arrange: platform sends extraction_end_time without a type field - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: 'test_snap_in_version_id', - }, - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2024-01-01T00:00:00Z', - }, - extraction_end_time: {} as unknown as TimeValue, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: should not crash, extract_to is not set, extract_from is resolved - expect(processExitSpy).not.toHaveBeenCalled(); - expect(event.payload.event_context.extract_from).toBe( - '2024-01-01T00:00:00.000Z' - ); - expect(event.payload.event_context.extract_to).toBeUndefined(); - }); - - it('should skip resolution when both extraction times have no type', async () => { - // Arrange: platform sends both time values without type fields - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: 'test_snap_in_version_id', - }, - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - value: 'some-value', - } as unknown as TimeValue, - extraction_end_time: { - value: 'some-value', - } as unknown as TimeValue, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: should not crash, neither extraction time is resolved - expect(processExitSpy).not.toHaveBeenCalled(); - expect(event.payload.event_context.extract_from).toBeUndefined(); - expect(event.payload.event_context.extract_to).toBeUndefined(); - }); - }); - - describe('Enhanced Control Protocol - extraction window validation', () => { - it('should exit the process if extract_from >= extract_to', async () => { - // Arrange: start is after end (inverted window) - const event = createMockEvent(mockServer.baseUrl, { - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2025-06-01T00:00:00Z', - }, - extraction_end_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2024-01-01T00:00:00Z', - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - jest.spyOn(console, 'error').mockImplementation(() => {}); - - // Act & Assert - await expect( - createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }) - ).rejects.toThrow('process.exit called'); - expect(processExitSpy).toHaveBeenCalledWith(1); - }); - - it('should exit the process if extract_from equals extract_to', async () => { - // Arrange: start equals end (zero-width window) - const event = createMockEvent(mockServer.baseUrl, { - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2024-06-01T00:00:00Z', - }, - extraction_end_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2024-06-01T00:00:00Z', - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - jest.spyOn(console, 'error').mockImplementation(() => {}); - - // Act & Assert - await expect( - createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }) - ).rejects.toThrow('process.exit called'); - expect(processExitSpy).toHaveBeenCalledWith(1); - }); - - it('should not exit when extract_from < extract_to', async () => { - // Arrange: valid window - const event = createMockEvent(mockServer.baseUrl, { - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2024-01-01T00:00:00Z', - }, - extraction_end_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2025-06-01T00:00:00Z', - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: process.exit should NOT have been called - expect(processExitSpy).not.toHaveBeenCalled(); - }); - - it('should not validate when only extract_from is set', async () => { - // Arrange: only start, no end - const event = createMockEvent(mockServer.baseUrl, { - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2024-01-01T00:00:00Z', - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: process.exit should NOT have been called - expect(processExitSpy).not.toHaveBeenCalled(); - }); - - it('should not exit when extract_from is UNBOUNDED and extract_to is a real timestamp', async () => { - // Arrange: UNBOUNDED start (epoch) with a real ABSOLUTE end timestamp - const event = createMockEvent(mockServer.baseUrl, { - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.UNBOUNDED, - }, - extraction_end_time: { - type: TimeValueType.ABSOLUTE_TIME, - value: '2025-06-01T00:00:00Z', - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: process.exit should NOT have been called - expect(processExitSpy).not.toHaveBeenCalled(); - }); - }); - - describe('Pending extraction boundaries (pendingWorkersOldest/pendingWorkersNewest)', () => { - const FIXED_NOW = '2026-03-26T10:00:00.000Z'; - - beforeEach(() => { - jest.useFakeTimers(); - jest.setSystemTime(new Date(FIXED_NOW)); - }); - - afterEach(() => { - jest.useRealTimers(); - }); - - it('should store resolved values in pendingWorkersOldest/pendingWorkersNewest on StartExtractingMetadata', async () => { - // Arrange - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: '', - }, - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.UNBOUNDED, - }, - extraction_end_time: { - type: TimeValueType.CURRENT_TIME, - }, - }, - }, - }); - - fetchStateSpy.mockRejectedValue({ - isAxiosError: true, - response: { status: 404 }, - }); - installInitialDomainMappingSpy.mockResolvedValue({ success: true }); - postStateSpy.mockResolvedValue({ success: true }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - const state = await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert - expect(state.state.pendingWorkersOldest).toBe('1970-01-01T00:00:00.000Z'); - expect(state.state.pendingWorkersNewest).toBe(FIXED_NOW); - expect(event.payload.event_context.extract_from).toBe( - '1970-01-01T00:00:00.000Z' - ); - expect(event.payload.event_context.extract_to).toBe(FIXED_NOW); - }); - - it('should overwrite pending values on a retry (new StartExtractingMetadata after failure)', async () => { - // Arrange: state has stale pending values from a previous failed attempt - const staleOldest = '2026-03-25T08:00:00.000Z'; - const staleNewest = '2026-03-25T09:00:00.000Z'; - - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: 'test_snap_in_version_id', - }, - payload: { - event_type: EventType.StartExtractingMetadata, - event_context: { - extraction_start_time: { - type: TimeValueType.UNBOUNDED, - }, - extraction_end_time: { - type: TimeValueType.CURRENT_TIME, - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - pendingWorkersOldest: staleOldest, - pendingWorkersNewest: staleNewest, - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - const state = await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: pending values are overwritten with fresh resolution, not stale values - expect(state.state.pendingWorkersOldest).toBe('1970-01-01T00:00:00.000Z'); - expect(state.state.pendingWorkersNewest).toBe(FIXED_NOW); - expect(state.state.pendingWorkersNewest).not.toBe(staleNewest); - }); - - it('should reuse pending values from state on ContinueExtractingData instead of re-resolving', async () => { - // Arrange: state has pending values from a prior StartExtractingMetadata phase - const pendingOldest = '1970-01-01T00:00:00.000Z'; - const pendingNewest = '2026-03-26T08:00:00.000Z'; // Earlier than FIXED_NOW - - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: 'test_snap_in_version_id', - }, - payload: { - event_type: EventType.ContinueExtractingData, - event_context: { - // Platform still sends TimeValue objects, but they should be ignored - extraction_start_time: { - type: TimeValueType.CURRENT_TIME, - }, - extraction_end_time: { - type: TimeValueType.CURRENT_TIME, - }, - }, - }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - pendingWorkersOldest: pendingOldest, - pendingWorkersNewest: pendingNewest, - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - const state = await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: uses cached pending values, NOT new Date() resolution - expect(event.payload.event_context.extract_from).toBe(pendingOldest); - expect(event.payload.event_context.extract_to).toBe(pendingNewest); - // Pending values in state remain unchanged - expect(state.state.pendingWorkersOldest).toBe(pendingOldest); - expect(state.state.pendingWorkersNewest).toBe(pendingNewest); - }); - - it('should not set extract_from/extract_to on ContinueExtractingData if no pending values exist', async () => { - // Arrange: state has no pending values (e.g. old state from before this feature) - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: 'test_snap_in_version_id', - }, - payload: { event_type: EventType.ContinueExtractingData }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: no extraction timestamps are set - expect(event.payload.event_context.extract_from).toBeUndefined(); - expect(event.payload.event_context.extract_to).toBeUndefined(); - }); - - it('should reuse pending values on StartExtractingAttachments', async () => { - // Arrange: state has pending values from the StartExtractingMetadata phase - const pendingOldest = '1970-01-01T00:00:00.000Z'; - const pendingNewest = '2026-03-26T08:00:00.000Z'; - - const event = createMockEvent(mockServer.baseUrl, { - context: { - snap_in_version_id: 'test_snap_in_version_id', - }, - payload: { event_type: EventType.StartExtractingAttachments }, - }); - - const stringifiedState = JSON.stringify({ - snapInVersionId: 'test_snap_in_version_id', - pendingWorkersOldest: pendingOldest, - pendingWorkersNewest: pendingNewest, - }); - fetchStateSpy.mockResolvedValue({ state: stringifiedState }); - jest.spyOn(console, 'log').mockImplementation(() => {}); - - // Act - await createAdapterState({ - event, - initialState: {}, - initialDomainMapping: {}, - }); - - // Assert: pending values are reused - expect(event.payload.event_context.extract_from).toBe(pendingOldest); - expect(event.payload.event_context.extract_to).toBe(pendingNewest); - }); - }); - it('should populate extractionScope from API response', async () => { // Arrange const event = createMockEvent(mockServer.baseUrl, { @@ -942,4 +374,32 @@ describe(State.name, () => { // Assert expect(result.extractionScope).toEqual({}); }); + + it('should warn but continue when objects field contains invalid JSON', async () => { + // Arrange + const event = createMockEvent(mockServer.baseUrl, { + context: { snap_in_version_id: '1.0.0' }, + payload: { event_type: EventType.StartExtractingData }, + }); + fetchStateSpy.mockResolvedValue({ + state: JSON.stringify({ snapInVersionId: '1.0.0' }), + objects: 'NOT_VALID_JSON', + }); + const warnSpy = jest.spyOn(console, 'warn').mockImplementation(() => {}); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + const result = await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: should not crash, extractionScope is empty (default) + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining('Failed to parse extractionScope') + ); + expect(result.extractionScope).toEqual({}); + expect(processExitSpy).not.toHaveBeenCalled(); + }); }); diff --git a/src/state/state.time-value-resolution.test.ts b/src/state/state.time-value-resolution.test.ts new file mode 100644 index 0000000..a451d78 --- /dev/null +++ b/src/state/state.time-value-resolution.test.ts @@ -0,0 +1,209 @@ +import { mockServer } from '../tests/jest.setup'; +import { createMockEvent } from '../common/test-utils'; +import { EventType, TimeValue, TimeValueType } from '../types/extraction'; +import { State, createAdapterState } from './state'; + +describe(State.name, () => { + let fetchStateSpy: jest.SpyInstance; + let processExitSpy: jest.SpyInstance; + + beforeEach(() => { + jest.clearAllMocks(); + jest.restoreAllMocks(); + + fetchStateSpy = jest.spyOn(State.prototype, 'fetchState'); + processExitSpy = jest.spyOn(process, 'exit').mockImplementation(() => { + throw new Error('process.exit called'); + }); + }); + + describe('Enhanced Control Protocol - TimeValue resolution failures', () => { + it('should exit the process if extraction_start_time resolution fails', async () => { + // Arrange: WORKERS_NEWEST type but state has no workersNewest + const event = createMockEvent(mockServer.baseUrl, { + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.WORKERS_NEWEST, + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + workers_oldest: '', + workers_newest: '', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + + // Act & Assert + await expect( + createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }) + ).rejects.toThrow('process.exit called'); + expect(processExitSpy).toHaveBeenCalledWith(1); + }); + + it('should exit the process if extraction_end_time resolution fails', async () => { + // Arrange: WORKERS_NEWEST type but state has no workersNewest + const event = createMockEvent(mockServer.baseUrl, { + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.UNBOUNDED, + }, + extraction_end_time: { + type: TimeValueType.WORKERS_NEWEST, + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + workers_oldest: '', + workers_newest: '', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + jest.spyOn(console, 'error').mockImplementation(() => {}); + + // Act & Assert + await expect( + createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }) + ).rejects.toThrow('process.exit called'); + expect(processExitSpy).toHaveBeenCalledWith(1); + }); + }); + + describe('Backwards compatibility - missing TimeValue type', () => { + it('should skip resolution when extraction_start_time has no type', async () => { + // Arrange: platform sends extraction_start_time without a type field (old platform version) + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: 'test_snap_in_version_id', + }, + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: {} as unknown as TimeValue, + extraction_end_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2025-06-01T00:00:00Z', + }, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + const state = await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: should not crash, extract_from is not set, extract_to is resolved + expect(processExitSpy).not.toHaveBeenCalled(); + expect(event.payload.event_context.extract_from).toBeUndefined(); + expect(event.payload.event_context.extract_to).toBe( + '2025-06-01T00:00:00.000Z' + ); + expect(state.state.pendingWorkersNewest).toBe('2025-06-01T00:00:00.000Z'); + }); + + it('should skip resolution when extraction_end_time has no type', async () => { + // Arrange: platform sends extraction_end_time without a type field + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: 'test_snap_in_version_id', + }, + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + type: TimeValueType.ABSOLUTE_TIME, + value: '2024-01-01T00:00:00Z', + }, + extraction_end_time: {} as unknown as TimeValue, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: should not crash, extract_to is not set, extract_from is resolved + expect(processExitSpy).not.toHaveBeenCalled(); + expect(event.payload.event_context.extract_from).toBe( + '2024-01-01T00:00:00.000Z' + ); + expect(event.payload.event_context.extract_to).toBeUndefined(); + }); + + it('should skip resolution when both extraction times have no type', async () => { + // Arrange: platform sends both time values without type fields + const event = createMockEvent(mockServer.baseUrl, { + context: { + snap_in_version_id: 'test_snap_in_version_id', + }, + payload: { + event_type: EventType.StartExtractingMetadata, + event_context: { + extraction_start_time: { + value: 'some-value', + } as unknown as TimeValue, + extraction_end_time: { + value: 'some-value', + } as unknown as TimeValue, + }, + }, + }); + + const stringifiedState = JSON.stringify({ + snapInVersionId: 'test_snap_in_version_id', + }); + fetchStateSpy.mockResolvedValue({ state: stringifiedState }); + jest.spyOn(console, 'log').mockImplementation(() => {}); + + // Act + await createAdapterState({ + event, + initialState: {}, + initialDomainMapping: {}, + }); + + // Assert: should not crash, neither extraction time is resolved + expect(processExitSpy).not.toHaveBeenCalled(); + expect(event.payload.event_context.extract_from).toBeUndefined(); + expect(event.payload.event_context.extract_to).toBeUndefined(); + }); + }); +}); diff --git a/src/types/extraction.test.ts b/src/types/extraction.test.ts index d1f2d70..bdd76fb 100644 --- a/src/types/extraction.test.ts +++ b/src/types/extraction.test.ts @@ -7,23 +7,16 @@ import { TimeValueType, } from './extraction'; -// Test the EventContext interface and related extraction types describe('ExtractionTypes', () => { const baseEvent = createMockEvent(mockServer.baseUrl, { payload: { event_type: EventType.StartExtractingData }, }); - it('should create event context without optional fields', () => { - const event = { ...baseEvent }; - - // If this compiles, the test passes - expect(event).toBeDefined(); - expect(event.payload.event_context).toBeDefined(); - }); - it('should create event context with all optional fields', () => { + // Arrange const event = { ...baseEvent }; + // Act event.payload.event_context = { ...baseEvent.payload.event_context, extract_from: '2024-01-01T00:00:00Z', @@ -32,7 +25,7 @@ describe('ExtractionTypes', () => { reset_extract_from: true, } as EventContext; - expect(event).toBeDefined(); + // Assert expect(event.payload.event_context.extract_from).toBe( '2024-01-01T00:00:00Z' ); @@ -44,44 +37,54 @@ describe('ExtractionTypes', () => { }); it('should create event context with partial optional fields', () => { + // Arrange const event = { ...baseEvent }; + // Act event.payload.event_context = { ...baseEvent.payload.event_context, extract_from: '2024-01-01T00:00:00Z', } as EventContext; - expect(event).toBeDefined(); + // Assert expect(event.payload.event_context.extract_from).toBe( '2024-01-01T00:00:00Z' ); }); it('should handle different InitialSyncScope values', () => { + // Arrange const event = { ...baseEvent }; + // Act event.payload.event_context = { ...baseEvent.payload.event_context, initial_sync_scope: InitialSyncScope.FULL_HISTORY, } as EventContext; + // Assert expect(event.payload.event_context.initial_sync_scope).toBe( InitialSyncScope.FULL_HISTORY ); }); it('[edge] should handle null event context gracefully', () => { + // Arrange const event = { ...baseEvent }; + // Act // eslint-disable-next-line @typescript-eslint/no-explicit-any event.payload.event_context = null as any; + // Assert expect(event.payload.event_context).toBeNull(); }); it('[edge] should handle undefined optional fields', () => { + // Arrange const event = { ...baseEvent }; + // Act event.payload.event_context = { ...baseEvent.payload.event_context, extract_from: undefined, @@ -90,6 +93,7 @@ describe('ExtractionTypes', () => { reset_extract_from: undefined, } as EventContext; + // Assert expect(event.payload.event_context.extract_from).toBeUndefined(); expect(event.payload.event_context.extract_to).toBeUndefined(); expect(event.payload.event_context.initial_sync_scope).toBeUndefined(); @@ -97,6 +101,7 @@ describe('ExtractionTypes', () => { }); it('[edge] should handle explicit boolean values for reset_extract_from', () => { + // Arrange & Act const eventWithTrue = createMockEvent(mockServer.baseUrl, { payload: { event_type: EventType.StartExtractingData, @@ -105,7 +110,6 @@ describe('ExtractionTypes', () => { }, }, }); - const eventWithFalse = createMockEvent(mockServer.baseUrl, { payload: { event_type: EventType.StartExtractingData, @@ -115,6 +119,7 @@ describe('ExtractionTypes', () => { }, }); + // Assert expect(eventWithTrue.payload.event_context.reset_extract_from).toBe(true); expect(eventWithFalse.payload.event_context.reset_extract_from).toBe(false); expect(typeof eventWithTrue.payload.event_context.reset_extract_from).toBe( @@ -127,6 +132,7 @@ describe('ExtractionTypes', () => { describe('TimeValueType enum', () => { it('should have all expected values', () => { + // Assert expect(TimeValueType.WORKERS_OLDEST).toBe('workers_oldest'); expect(TimeValueType.WORKERS_OLDEST_MINUS_WINDOW).toBe( 'workers_oldest_minus_window' @@ -141,7 +147,10 @@ describe('ExtractionTypes', () => { }); it('should have exactly seven values', () => { + // Act const values = Object.values(TimeValueType); + + // Assert expect(values.length).toBe(7); }); }); diff --git a/src/uploader/uploader.test.ts b/src/uploader/uploader.test.ts index 680c4cb..41e9777 100644 --- a/src/uploader/uploader.test.ts +++ b/src/uploader/uploader.test.ts @@ -141,7 +141,6 @@ describe(Uploader.name, () => { const result = await uploader.upload(itemType, fetchedObjects); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.artifact).toBeUndefined(); }); @@ -162,7 +161,6 @@ describe(Uploader.name, () => { const result = await uploader.upload(itemType, fetchedObjects); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.artifact).toBeUndefined(); }); @@ -183,7 +181,6 @@ describe(Uploader.name, () => { const result = await uploader.upload(itemType, fetchedObjects); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.artifact).toBeUndefined(); }); @@ -198,7 +195,6 @@ describe(Uploader.name, () => { const result = await uploader.upload(itemType, fetchedObjects); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.artifact).toBeUndefined(); }); @@ -374,7 +370,6 @@ describe(Uploader.name, () => { // Assert expect(result.response).toBeUndefined(); - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); }); }); @@ -699,7 +694,6 @@ describe(Uploader.name, () => { }); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.attachments).toBeUndefined(); }); @@ -716,7 +710,6 @@ describe(Uploader.name, () => { }); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.attachments).toBeUndefined(); }); @@ -735,7 +728,6 @@ describe(Uploader.name, () => { }); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.attachments).toBeUndefined(); }); @@ -755,7 +747,6 @@ describe(Uploader.name, () => { }); // Assert - expect(result.error).toBeDefined(); expect(result.error).toHaveProperty('message'); expect(result.attachments).toBeUndefined(); });