From 15083ff33b8fd2b3f391be746cd57bcce212a9e7 Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Fri, 14 Nov 2025 10:39:18 +0400 Subject: [PATCH 1/8] Add partial results persistence feature - Add RUNNING status to JobStatus enum - Add ThreadLocal in BulkScanWorker to pass ScanJobDescription to scan implementations - Add upsertPartialScanResult() method to IPersistenceProvider interface - Implement upsertPartialScanResult() in MongoPersistenceProvider - Update DummyPersistenceProvider test implementation - Update BulkScanWorkerManager to pass ScanJobDescription to worker.handle() This enables real-time persistence of intermediate scan results to MongoDB as probes complete during TLS scans. --- .../rub/nds/crawler/constant/JobStatus.java | 2 + .../rub/nds/crawler/core/BulkScanWorker.java | 30 ++++++++++++--- .../crawler/core/BulkScanWorkerManager.java | 2 +- .../persistence/IPersistenceProvider.java | 11 ++++++ .../persistence/MongoPersistenceProvider.java | 37 +++++++++++++++++++ .../dummy/DummyPersistenceProvider.java | 8 ++++ 6 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java index 99c521b..03765fd 100644 --- a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java +++ b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java @@ -15,6 +15,8 @@ public enum JobStatus { /** Job is waiting to be executed. */ TO_BE_EXECUTED(false), + /** Job is currently being executed. Partial results may be available in DB. */ + RUNNING(false), /** The domain was not resolvable. An empty result was written to DB. */ UNRESOLVABLE(true), /** An uncaught exception occurred while resolving the host. */ diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java index 11831cc..37ed4c2 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java @@ -9,6 +9,7 @@ package de.rub.nds.crawler.core; import de.rub.nds.crawler.data.ScanConfig; +import de.rub.nds.crawler.data.ScanJobDescription; import de.rub.nds.crawler.data.ScanTarget; import de.rub.nds.crawler.util.CanceallableThreadPoolExecutor; import de.rub.nds.scanner.core.execution.NamedThreadFactory; @@ -41,6 +42,10 @@ public abstract class BulkScanWorker { /** The scan configuration for this worker */ protected final T scanConfig; + // ThreadLocal to pass ScanJobDescription to scan() implementations + private static final ThreadLocal currentJobDescription = + new ThreadLocal<>(); + /** * Calls the inner scan function and may handle cleanup. This is needed to wrap the scanner into * a future object such that we can handle timeouts properly. @@ -75,22 +80,37 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread * initialize itself. In this case it will also clean up itself if all jobs are done. * * @param scanTarget The target to scan. + * @param jobDescription The job description for this scan. * @return A future that resolves to the scan result once the scan is done. */ - public Future handle(ScanTarget scanTarget) { + public Future handle(ScanTarget scanTarget, ScanJobDescription jobDescription) { // if we initialized ourself, we also clean up ourself shouldCleanupSelf.weakCompareAndSetAcquire(false, init()); activeJobs.incrementAndGet(); return timeoutExecutor.submit( () -> { - Document result = scan(scanTarget); - if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) { - cleanup(); + try { + currentJobDescription.set(jobDescription); + Document result = scan(scanTarget); + if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) { + cleanup(); + } + return result; + } finally { + currentJobDescription.remove(); } - return result; }); } + /** + * Get the ScanJobDescription for the current scan. Only valid when called from within scan(). + * + * @return The current ScanJobDescription, or null if not in a scan context + */ + protected ScanJobDescription getCurrentJobDescription() { + return currentJobDescription.get(); + } + /** * Scans a target and returns the result as a Document. This is the core scanning functionality * that must be implemented by subclasses. diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java index 3e78782..482543c 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java @@ -148,6 +148,6 @@ public Future handle( bulkScanInfo.getScanConfig(), parallelConnectionThreads, parallelScanThreads); - return worker.handle(scanJobDescription.getScanTarget()); + return worker.handle(scanJobDescription.getScanTarget(), scanJobDescription); } } diff --git a/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java index 30d2ffb..1619ff6 100644 --- a/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java +++ b/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java @@ -27,6 +27,17 @@ public interface IPersistenceProvider { */ void insertScanResult(ScanResult scanResult, ScanJobDescription job); + /** + * Upsert a partial scan result into the database. This method updates an existing document with + * the same ID, or inserts a new one if it doesn't exist. Used during scanning to persist + * intermediate results as probes complete. The document is identified by the job's UUID and + * will be overwritten when insertScanResult() is called with the final result. + * + * @param partialResult The partial scan result to upsert (contains intermediate data). + * @param job The job that is being executed. + */ + void upsertPartialScanResult(ScanResult partialResult, ScanJobDescription job); + /** * Insert a bulk scan into the database. This is used to store metadata about the bulk scan. * This adds an ID to the bulk scan. diff --git a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java index a1278c1..6308a67 100644 --- a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java +++ b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java @@ -275,6 +275,43 @@ public void insertScanResult(ScanResult scanResult, ScanJobDescription scanJobDe } } + @Override + public void upsertPartialScanResult( + ScanResult partialResult, ScanJobDescription scanJobDescription) { + LOGGER.debug( + "Upserting partial scan result for job ID: {} with status: {}", + scanJobDescription.getId(), + partialResult.getResultStatus()); + + // Set the ID to match the job's UUID (same document as final result will use) + partialResult.setId(scanJobDescription.getId().toString()); + + try { + var collection = + resultCollectionCache.getUnchecked( + Pair.of( + scanJobDescription.getDbName(), + scanJobDescription.getCollectionName())); + + // Use replaceOne with upsert option to update existing or insert new + com.mongodb.client.model.ReplaceOptions replaceOptions = + new com.mongodb.client.model.ReplaceOptions().upsert(true); + + org.bson.Document filter = new org.bson.Document("_id", partialResult.getId()); + + collection.replaceOne(filter, partialResult, replaceOptions); + + LOGGER.debug( + "Upserted partial result for job ID: {} to collection: {}.{}", + scanJobDescription.getId(), + scanJobDescription.getDbName(), + scanJobDescription.getCollectionName()); + } catch (Exception e) { + LOGGER.warn("Exception while upserting partial result to MongoDB (non-fatal): ", e); + // Don't throw - partial result persistence should not fail the scan + } + } + @Override public List getScanResultsByTarget( String dbName, String collectionName, String target) { diff --git a/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java b/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java index 501b3d4..10ac1f8 100644 --- a/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java +++ b/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java @@ -25,6 +25,14 @@ public void insertScanResult(ScanResult scanResult, ScanJobDescription job) { results.add(scanResult); } + @Override + public void upsertPartialScanResult(ScanResult partialResult, ScanJobDescription job) { + // Remove existing result with same ID if present + results.removeIf(r -> r.getId() != null && r.getId().equals(partialResult.getId())); + // Add the partial result + results.add(partialResult); + } + @Override public void insertBulkScan(BulkScan bulkScan) { bulkScans.add(bulkScan); From c5ee54f7c6abe527b4d2ad4fb29f25d284a7c9e3 Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Wed, 19 Nov 2025 14:25:36 +0400 Subject: [PATCH 2/8] Removed unused code --- .../persistence/IPersistenceProvider.java | 11 ------ .../persistence/MongoPersistenceProvider.java | 37 ------------------- .../dummy/DummyPersistenceProvider.java | 8 ---- 3 files changed, 56 deletions(-) diff --git a/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java index 1619ff6..30d2ffb 100644 --- a/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java +++ b/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java @@ -27,17 +27,6 @@ public interface IPersistenceProvider { */ void insertScanResult(ScanResult scanResult, ScanJobDescription job); - /** - * Upsert a partial scan result into the database. This method updates an existing document with - * the same ID, or inserts a new one if it doesn't exist. Used during scanning to persist - * intermediate results as probes complete. The document is identified by the job's UUID and - * will be overwritten when insertScanResult() is called with the final result. - * - * @param partialResult The partial scan result to upsert (contains intermediate data). - * @param job The job that is being executed. - */ - void upsertPartialScanResult(ScanResult partialResult, ScanJobDescription job); - /** * Insert a bulk scan into the database. This is used to store metadata about the bulk scan. * This adds an ID to the bulk scan. diff --git a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java index 6308a67..a1278c1 100644 --- a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java +++ b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java @@ -275,43 +275,6 @@ public void insertScanResult(ScanResult scanResult, ScanJobDescription scanJobDe } } - @Override - public void upsertPartialScanResult( - ScanResult partialResult, ScanJobDescription scanJobDescription) { - LOGGER.debug( - "Upserting partial scan result for job ID: {} with status: {}", - scanJobDescription.getId(), - partialResult.getResultStatus()); - - // Set the ID to match the job's UUID (same document as final result will use) - partialResult.setId(scanJobDescription.getId().toString()); - - try { - var collection = - resultCollectionCache.getUnchecked( - Pair.of( - scanJobDescription.getDbName(), - scanJobDescription.getCollectionName())); - - // Use replaceOne with upsert option to update existing or insert new - com.mongodb.client.model.ReplaceOptions replaceOptions = - new com.mongodb.client.model.ReplaceOptions().upsert(true); - - org.bson.Document filter = new org.bson.Document("_id", partialResult.getId()); - - collection.replaceOne(filter, partialResult, replaceOptions); - - LOGGER.debug( - "Upserted partial result for job ID: {} to collection: {}.{}", - scanJobDescription.getId(), - scanJobDescription.getDbName(), - scanJobDescription.getCollectionName()); - } catch (Exception e) { - LOGGER.warn("Exception while upserting partial result to MongoDB (non-fatal): ", e); - // Don't throw - partial result persistence should not fail the scan - } - } - @Override public List getScanResultsByTarget( String dbName, String collectionName, String target) { diff --git a/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java b/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java index 10ac1f8..501b3d4 100644 --- a/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java +++ b/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java @@ -25,14 +25,6 @@ public void insertScanResult(ScanResult scanResult, ScanJobDescription job) { results.add(scanResult); } - @Override - public void upsertPartialScanResult(ScanResult partialResult, ScanJobDescription job) { - // Remove existing result with same ID if present - results.removeIf(r -> r.getId() != null && r.getId().equals(partialResult.getId())); - // Add the partial result - results.add(partialResult); - } - @Override public void insertBulkScan(BulkScan bulkScan) { bulkScans.add(bulkScan); From ca7fa873720563c28d5e44e03671ee0701aaf6c5 Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Wed, 19 Nov 2025 18:17:26 +0400 Subject: [PATCH 3/8] Add BulkScanWorker unit tests --- .../nds/crawler/core/BulkScanWorkerTest.java | 323 ++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java new file mode 100644 index 0000000..f98fa3a --- /dev/null +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -0,0 +1,323 @@ +/* + * TLS-Crawler - A TLS scanning tool to perform large scale scans with the TLS-Scanner + * + * Copyright 2018-2023 Ruhr University Bochum, Paderborn University, and Hackmanit GmbH + * + * Licensed under Apache License, Version 2.0 + * http://www.apache.org/licenses/LICENSE-2.0.txt + */ +package de.rub.nds.crawler.core; + +import static org.junit.jupiter.api.Assertions.*; + +import de.rub.nds.crawler.constant.JobStatus; +import de.rub.nds.crawler.data.BulkScan; +import de.rub.nds.crawler.data.ScanConfig; +import de.rub.nds.crawler.data.ScanJobDescription; +import de.rub.nds.crawler.data.ScanTarget; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Future; +import org.bson.Document; +import org.junit.jupiter.api.Test; + +class BulkScanWorkerTest { + + // Test implementation of ScanConfig + static class TestScanConfig extends ScanConfig implements Serializable { + public TestScanConfig() { + super(de.rub.nds.scanner.core.config.ScannerDetail.NORMAL, 0, 60); + } + + @Override + public BulkScanWorker createWorker( + String bulkScanID, int parallelConnectionThreads, int parallelScanThreads) { + return new TestBulkScanWorker(bulkScanID, this, parallelScanThreads); + } + } + + // Test implementation of BulkScanWorker + static class TestBulkScanWorker extends BulkScanWorker { + private boolean initCalled = false; + private boolean cleanupCalled = false; + private ScanJobDescription capturedJobDescription = null; + + TestBulkScanWorker(String bulkScanId, TestScanConfig scanConfig, int parallelScanThreads) { + super(bulkScanId, scanConfig, parallelScanThreads); + } + + @Override + public Document scan(ScanTarget scanTarget) { + // Capture the job description during scan + capturedJobDescription = getCurrentJobDescription(); + + Document result = new Document(); + result.put("target", scanTarget.getHostname()); + result.put("hasJobDescription", capturedJobDescription != null); + if (capturedJobDescription != null) { + result.put("jobId", capturedJobDescription.getId().toString()); + } + return result; + } + + @Override + protected void initInternal() { + initCalled = true; + } + + @Override + protected void cleanupInternal() { + cleanupCalled = true; + } + + public boolean isInitCalled() { + return initCalled; + } + + public boolean isCleanupCalled() { + return cleanupCalled; + } + + public ScanJobDescription getCapturedJobDescription() { + return capturedJobDescription; + } + } + + @Test + void testGetCurrentJobDescriptionReturnsNullOutsideScanContext() { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + // getCurrentJobDescription() is protected, so we can't call it directly from test + // But we can verify through the scan() method that it returns null when not in context + assertNull( + worker.getCapturedJobDescription(), + "Job description should be null before any scan"); + } + + @Test + void testGetCurrentJobDescriptionReturnsCorrectJobInScanContext() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + // Execute the scan + Future future = worker.handle(target, jobDescription); + Document result = future.get(); + + // Verify the job description was available during scan + assertTrue( + result.getBoolean("hasJobDescription"), + "Job description should be available in scan context"); + assertEquals(jobDescription.getId().toString(), result.getString("jobId")); + + // Verify the captured job description matches + assertNotNull(worker.getCapturedJobDescription()); + assertEquals(jobDescription.getId(), worker.getCapturedJobDescription().getId()); + assertEquals(target, worker.getCapturedJobDescription().getScanTarget()); + } + + @Test + void testThreadLocalIsCleanedUpAfterScan() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + // Execute the scan + Future future = worker.handle(target, jobDescription); + future.get(); // Wait for completion + + // After scan completes, the ThreadLocal should be cleaned up + // We can verify this by running another scan and checking it gets the new job description + ScanTarget newTarget = new ScanTarget(); + newTarget.setHostname("example2.com"); + newTarget.setPort(443); + + ScanJobDescription newJobDescription = + new ScanJobDescription(newTarget, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future2 = worker.handle(newTarget, newJobDescription); + Document result2 = future2.get(); + + // The second scan should have the second job description, not the first + assertEquals(newJobDescription.getId().toString(), result2.getString("jobId")); + assertEquals(newJobDescription.getId(), worker.getCapturedJobDescription().getId()); + } + + @Test + void testMultipleConcurrentScansHaveSeparateContexts() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 2); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + // Create multiple job descriptions + List jobDescriptions = new ArrayList<>(); + List> futures = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + ScanTarget target = new ScanTarget(); + target.setHostname("example" + i + ".com"); + target.setPort(443); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + jobDescriptions.add(jobDescription); + + futures.add(worker.handle(target, jobDescription)); + } + + // Wait for all scans to complete and verify each got the correct job description + for (int i = 0; i < 5; i++) { + Document result = futures.get(i).get(); + assertTrue(result.getBoolean("hasJobDescription")); + assertEquals( + jobDescriptions.get(i).getId().toString(), + result.getString("jobId"), + "Scan " + i + " should have its own job description"); + } + } + + @Test + void testInitializationIsCalledOnFirstHandle() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + assertFalse(worker.isInitCalled(), "Init should not be called before first handle"); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future = worker.handle(target, jobDescription); + future.get(); + + assertTrue(worker.isInitCalled(), "Init should be called on first handle"); + } + + @Test + void testCleanupIsCalledWhenAllJobsComplete() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future = worker.handle(target, jobDescription); + future.get(); + + // Give cleanup a moment to execute (it runs after job completion) + Thread.sleep(100); + + assertTrue(worker.isCleanupCalled(), "Cleanup should be called when all jobs complete"); + } + + @Test + void testManualInitPreventsSelfCleanup() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + // Call init manually + worker.init(); + assertTrue(worker.isInitCalled(), "Init should be called"); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future = worker.handle(target, jobDescription); + future.get(); + + // Give cleanup a moment (if it were to execute) + Thread.sleep(100); + + assertFalse( + worker.isCleanupCalled(), + "Cleanup should NOT be called when init was manual (shouldCleanupSelf = false)"); + + // Cleanup should only be called when we explicitly call it + worker.cleanup(); + assertTrue(worker.isCleanupCalled(), "Cleanup should be called when explicitly called"); + } +} From fccfe08d8ed80feb950ab7f127a8910078e62937 Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Thu, 20 Nov 2025 04:26:56 +0400 Subject: [PATCH 4/8] changed BulkScanWorker.handle() to take only scanJobDescription as an input. scanTarget can be accessed from scanJobDescription, and the UUID in scanJobDescription is needed to write partial results to MongoDB during a scan --- .../de/rub/nds/crawler/core/BulkScanWorker.java | 5 ++--- .../nds/crawler/core/BulkScanWorkerManager.java | 2 +- .../rub/nds/crawler/core/BulkScanWorkerTest.java | 14 +++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java index 37ed4c2..74f9fab 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java @@ -79,11 +79,10 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread * Handles a scan target by submitting it to the executor. If init was not called, it will * initialize itself. In this case it will also clean up itself if all jobs are done. * - * @param scanTarget The target to scan. * @param jobDescription The job description for this scan. * @return A future that resolves to the scan result once the scan is done. */ - public Future handle(ScanTarget scanTarget, ScanJobDescription jobDescription) { + public Future handle(ScanJobDescription jobDescription) { // if we initialized ourself, we also clean up ourself shouldCleanupSelf.weakCompareAndSetAcquire(false, init()); activeJobs.incrementAndGet(); @@ -91,7 +90,7 @@ public Future handle(ScanTarget scanTarget, ScanJobDescription jobDesc () -> { try { currentJobDescription.set(jobDescription); - Document result = scan(scanTarget); + Document result = scan(jobDescription.getScanTarget()); if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) { cleanup(); } diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java index 482543c..7f80cd9 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java @@ -148,6 +148,6 @@ public Future handle( bulkScanInfo.getScanConfig(), parallelConnectionThreads, parallelScanThreads); - return worker.handle(scanJobDescription.getScanTarget(), scanJobDescription); + return worker.handle(scanJobDescription); } } diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java index f98fa3a..3d71093 100644 --- a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -119,7 +119,7 @@ void testGetCurrentJobDescriptionReturnsCorrectJobInScanContext() throws Excepti new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); // Execute the scan - Future future = worker.handle(target, jobDescription); + Future future = worker.handle(jobDescription); Document result = future.get(); // Verify the job description was available during scan @@ -157,7 +157,7 @@ void testThreadLocalIsCleanedUpAfterScan() throws Exception { new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); // Execute the scan - Future future = worker.handle(target, jobDescription); + Future future = worker.handle(jobDescription); future.get(); // Wait for completion // After scan completes, the ThreadLocal should be cleaned up @@ -169,7 +169,7 @@ void testThreadLocalIsCleanedUpAfterScan() throws Exception { ScanJobDescription newJobDescription = new ScanJobDescription(newTarget, bulkScan, JobStatus.TO_BE_EXECUTED); - Future future2 = worker.handle(newTarget, newJobDescription); + Future future2 = worker.handle(newJobDescription); Document result2 = future2.get(); // The second scan should have the second job description, not the first @@ -205,7 +205,7 @@ void testMultipleConcurrentScansHaveSeparateContexts() throws Exception { new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); jobDescriptions.add(jobDescription); - futures.add(worker.handle(target, jobDescription)); + futures.add(worker.handle(jobDescription)); } // Wait for all scans to complete and verify each got the correct job description @@ -243,7 +243,7 @@ void testInitializationIsCalledOnFirstHandle() throws Exception { ScanJobDescription jobDescription = new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); - Future future = worker.handle(target, jobDescription); + Future future = worker.handle(jobDescription); future.get(); assertTrue(worker.isInitCalled(), "Init should be called on first handle"); @@ -271,7 +271,7 @@ void testCleanupIsCalledWhenAllJobsComplete() throws Exception { ScanJobDescription jobDescription = new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); - Future future = worker.handle(target, jobDescription); + Future future = worker.handle(jobDescription); future.get(); // Give cleanup a moment to execute (it runs after job completion) @@ -306,7 +306,7 @@ void testManualInitPreventsSelfCleanup() throws Exception { ScanJobDescription jobDescription = new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); - Future future = worker.handle(target, jobDescription); + Future future = worker.handle(jobDescription); future.get(); // Give cleanup a moment (if it were to execute) From 1f63b37763789e31549237393393f76248806311 Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Mon, 24 Nov 2025 09:44:29 +0400 Subject: [PATCH 5/8] Modify BulkScanWorkerTest to simulate MongoDB operations using DummyPersistenceProvider --- .../nds/crawler/core/BulkScanWorkerTest.java | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java index 3d71093..be783c0 100644 --- a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -14,7 +14,9 @@ import de.rub.nds.crawler.data.BulkScan; import de.rub.nds.crawler.data.ScanConfig; import de.rub.nds.crawler.data.ScanJobDescription; +import de.rub.nds.crawler.data.ScanResult; import de.rub.nds.crawler.data.ScanTarget; +import de.rub.nds.crawler.dummy.DummyPersistenceProvider; import java.io.Serializable; import java.util.ArrayList; import java.util.List; @@ -132,6 +134,49 @@ void testGetCurrentJobDescriptionReturnsCorrectJobInScanContext() throws Excepti assertNotNull(worker.getCapturedJobDescription()); assertEquals(jobDescription.getId(), worker.getCapturedJobDescription().getId()); assertEquals(target, worker.getCapturedJobDescription().getScanTarget()); + + // Simulate the partial results persistence flow + DummyPersistenceProvider persistenceProvider = new DummyPersistenceProvider(); + + // Update job status to SUCCESS (required by ScanResult constructor) + jobDescription.setStatus(JobStatus.SUCCESS); + + // Create ScanResult from the scan result Document and job description + ScanResult scanResult = new ScanResult(jobDescription, result); + + // Verify ScanResult has the correct scanJobDescriptionId + assertEquals( + jobDescription.getId().toString(), + scanResult.getScanJobDescriptionId(), + "ScanResult should use job description UUID as scanJobDescriptionId"); + + // Simulate persisting to MongoDB + persistenceProvider.insertScanResult(scanResult, jobDescription); + + // Simulate retrieving from MongoDB by scanJobDescriptionId + ScanResult retrievedResult = + persistenceProvider.getScanResultByScanJobDescriptionId( + "test-db", "test-collection", jobDescription.getId().toString()); + + // Verify the retrieved result matches + assertNotNull( + retrievedResult, "Should be able to retrieve ScanResult by job description ID"); + assertEquals( + jobDescription.getId().toString(), + retrievedResult.getScanJobDescriptionId(), + "Retrieved result should have matching scanJobDescriptionId"); + assertEquals( + scanResult.getBulkScan(), + retrievedResult.getBulkScan(), + "Retrieved result should have matching bulk scan ID"); + assertEquals( + scanResult.getScanTarget(), + retrievedResult.getScanTarget(), + "Retrieved result should have matching scan target"); + assertEquals( + scanResult.getResult(), + retrievedResult.getResult(), + "Retrieved result should have matching result document"); } @Test From 60b44eb46fefa1cfcca8c0a7f5828a30d4cd90ab Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Wed, 26 Nov 2025 11:29:35 +0400 Subject: [PATCH 6/8] Replace example.com with example.invalid --- .../rub/nds/crawler/core/BulkScanWorkerTest.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java index be783c0..227fd4b 100644 --- a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -104,7 +104,7 @@ void testGetCurrentJobDescriptionReturnsCorrectJobInScanContext() throws Excepti TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); ScanTarget target = new ScanTarget(); - target.setHostname("example.com"); + target.setHostname("example.invalid"); target.setPort(443); BulkScan bulkScan = @@ -185,7 +185,7 @@ void testThreadLocalIsCleanedUpAfterScan() throws Exception { TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); ScanTarget target = new ScanTarget(); - target.setHostname("example.com"); + target.setHostname("example.invalid"); target.setPort(443); BulkScan bulkScan = @@ -208,7 +208,7 @@ void testThreadLocalIsCleanedUpAfterScan() throws Exception { // After scan completes, the ThreadLocal should be cleaned up // We can verify this by running another scan and checking it gets the new job description ScanTarget newTarget = new ScanTarget(); - newTarget.setHostname("example2.com"); + newTarget.setHostname("example2.invalid"); newTarget.setPort(443); ScanJobDescription newJobDescription = @@ -243,7 +243,7 @@ void testMultipleConcurrentScansHaveSeparateContexts() throws Exception { for (int i = 0; i < 5; i++) { ScanTarget target = new ScanTarget(); - target.setHostname("example" + i + ".com"); + target.setHostname("example" + i + ".invalid"); target.setPort(443); ScanJobDescription jobDescription = @@ -272,7 +272,7 @@ void testInitializationIsCalledOnFirstHandle() throws Exception { assertFalse(worker.isInitCalled(), "Init should not be called before first handle"); ScanTarget target = new ScanTarget(); - target.setHostname("example.com"); + target.setHostname("example.invalid"); target.setPort(443); BulkScan bulkScan = @@ -300,7 +300,7 @@ void testCleanupIsCalledWhenAllJobsComplete() throws Exception { TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); ScanTarget target = new ScanTarget(); - target.setHostname("example.com"); + target.setHostname("example.invalid"); target.setPort(443); BulkScan bulkScan = @@ -335,7 +335,7 @@ void testManualInitPreventsSelfCleanup() throws Exception { assertTrue(worker.isInitCalled(), "Init should be called"); ScanTarget target = new ScanTarget(); - target.setHostname("example.com"); + target.setHostname("example.invalid"); target.setPort(443); BulkScan bulkScan = From 2fc13dd27a66ae90fd9ce3db3c6e6aefce17a9f6 Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Wed, 26 Nov 2025 11:40:02 +0400 Subject: [PATCH 7/8] Fix BulkScanWorkerTest --- .../java/de/rub/nds/crawler/core/BulkScanWorkerTest.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java index 227fd4b..b0b8383 100644 --- a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -52,13 +52,14 @@ static class TestBulkScanWorker extends BulkScanWorker { @Override public Document scan(ScanTarget scanTarget) { // Capture the job description during scan - capturedJobDescription = getCurrentJobDescription(); + ScanJobDescription localJobDescription = getCurrentJobDescription(); + capturedJobDescription = localJobDescription; Document result = new Document(); result.put("target", scanTarget.getHostname()); - result.put("hasJobDescription", capturedJobDescription != null); - if (capturedJobDescription != null) { - result.put("jobId", capturedJobDescription.getId().toString()); + result.put("hasJobDescription", localJobDescription != null); + if (localJobDescription != null) { + result.put("jobId", localJobDescription.getId().toString()); } return result; } From 4f437a818c067b6f546d8a72d56fa06eb5a46579 Mon Sep 17 00:00:00 2001 From: mattiaformenti Date: Wed, 26 Nov 2025 12:05:20 +0400 Subject: [PATCH 8/8] Update BulkScanWorkerTest to use IPs Replace all hostname usages with TEST-NET-1 IPs (RFC 5737) --- .../rub/nds/crawler/core/BulkScanWorkerTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java index b0b8383..52a1a13 100644 --- a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -56,7 +56,7 @@ public Document scan(ScanTarget scanTarget) { capturedJobDescription = localJobDescription; Document result = new Document(); - result.put("target", scanTarget.getHostname()); + result.put("target", scanTarget.getIp()); result.put("hasJobDescription", localJobDescription != null); if (localJobDescription != null) { result.put("jobId", localJobDescription.getId().toString()); @@ -105,7 +105,7 @@ void testGetCurrentJobDescriptionReturnsCorrectJobInScanContext() throws Excepti TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); ScanTarget target = new ScanTarget(); - target.setHostname("example.invalid"); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) target.setPort(443); BulkScan bulkScan = @@ -186,7 +186,7 @@ void testThreadLocalIsCleanedUpAfterScan() throws Exception { TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); ScanTarget target = new ScanTarget(); - target.setHostname("example.invalid"); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) target.setPort(443); BulkScan bulkScan = @@ -209,7 +209,7 @@ void testThreadLocalIsCleanedUpAfterScan() throws Exception { // After scan completes, the ThreadLocal should be cleaned up // We can verify this by running another scan and checking it gets the new job description ScanTarget newTarget = new ScanTarget(); - newTarget.setHostname("example2.invalid"); + newTarget.setIp("192.0.2.2"); // TEST-NET-1 (RFC 5737) newTarget.setPort(443); ScanJobDescription newJobDescription = @@ -244,7 +244,7 @@ void testMultipleConcurrentScansHaveSeparateContexts() throws Exception { for (int i = 0; i < 5; i++) { ScanTarget target = new ScanTarget(); - target.setHostname("example" + i + ".invalid"); + target.setIp("192.0.2." + (i + 1)); // TEST-NET-1 (RFC 5737) target.setPort(443); ScanJobDescription jobDescription = @@ -273,7 +273,7 @@ void testInitializationIsCalledOnFirstHandle() throws Exception { assertFalse(worker.isInitCalled(), "Init should not be called before first handle"); ScanTarget target = new ScanTarget(); - target.setHostname("example.invalid"); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) target.setPort(443); BulkScan bulkScan = @@ -301,7 +301,7 @@ void testCleanupIsCalledWhenAllJobsComplete() throws Exception { TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); ScanTarget target = new ScanTarget(); - target.setHostname("example.invalid"); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) target.setPort(443); BulkScan bulkScan = @@ -336,7 +336,7 @@ void testManualInitPreventsSelfCleanup() throws Exception { assertTrue(worker.isInitCalled(), "Init should be called"); ScanTarget target = new ScanTarget(); - target.setHostname("example.invalid"); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) target.setPort(443); BulkScan bulkScan =