diff --git a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java index 99c521b..03765fd 100644 --- a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java +++ b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java @@ -15,6 +15,8 @@ public enum JobStatus { /** Job is waiting to be executed. */ TO_BE_EXECUTED(false), + /** Job is currently being executed. Partial results may be available in DB. */ + RUNNING(false), /** The domain was not resolvable. An empty result was written to DB. */ UNRESOLVABLE(true), /** An uncaught exception occurred while resolving the host. */ diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java index 11831cc..74f9fab 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java @@ -9,6 +9,7 @@ package de.rub.nds.crawler.core; import de.rub.nds.crawler.data.ScanConfig; +import de.rub.nds.crawler.data.ScanJobDescription; import de.rub.nds.crawler.data.ScanTarget; import de.rub.nds.crawler.util.CanceallableThreadPoolExecutor; import de.rub.nds.scanner.core.execution.NamedThreadFactory; @@ -41,6 +42,10 @@ public abstract class BulkScanWorker { /** The scan configuration for this worker */ protected final T scanConfig; + // ThreadLocal to pass ScanJobDescription to scan() implementations + private static final ThreadLocal currentJobDescription = + new ThreadLocal<>(); + /** * Calls the inner scan function and may handle cleanup. This is needed to wrap the scanner into * a future object such that we can handle timeouts properly. @@ -74,23 +79,37 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread * Handles a scan target by submitting it to the executor. If init was not called, it will * initialize itself. In this case it will also clean up itself if all jobs are done. * - * @param scanTarget The target to scan. + * @param jobDescription The job description for this scan. * @return A future that resolves to the scan result once the scan is done. */ - public Future handle(ScanTarget scanTarget) { + public Future handle(ScanJobDescription jobDescription) { // if we initialized ourself, we also clean up ourself shouldCleanupSelf.weakCompareAndSetAcquire(false, init()); activeJobs.incrementAndGet(); return timeoutExecutor.submit( () -> { - Document result = scan(scanTarget); - if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) { - cleanup(); + try { + currentJobDescription.set(jobDescription); + Document result = scan(jobDescription.getScanTarget()); + if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) { + cleanup(); + } + return result; + } finally { + currentJobDescription.remove(); } - return result; }); } + /** + * Get the ScanJobDescription for the current scan. Only valid when called from within scan(). + * + * @return The current ScanJobDescription, or null if not in a scan context + */ + protected ScanJobDescription getCurrentJobDescription() { + return currentJobDescription.get(); + } + /** * Scans a target and returns the result as a Document. This is the core scanning functionality * that must be implemented by subclasses. diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java index 3e78782..7f80cd9 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java @@ -148,6 +148,6 @@ public Future handle( bulkScanInfo.getScanConfig(), parallelConnectionThreads, parallelScanThreads); - return worker.handle(scanJobDescription.getScanTarget()); + return worker.handle(scanJobDescription); } } diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java new file mode 100644 index 0000000..3d71093 --- /dev/null +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -0,0 +1,323 @@ +/* + * TLS-Crawler - A TLS scanning tool to perform large scale scans with the TLS-Scanner + * + * Copyright 2018-2023 Ruhr University Bochum, Paderborn University, and Hackmanit GmbH + * + * Licensed under Apache License, Version 2.0 + * http://www.apache.org/licenses/LICENSE-2.0.txt + */ +package de.rub.nds.crawler.core; + +import static org.junit.jupiter.api.Assertions.*; + +import de.rub.nds.crawler.constant.JobStatus; +import de.rub.nds.crawler.data.BulkScan; +import de.rub.nds.crawler.data.ScanConfig; +import de.rub.nds.crawler.data.ScanJobDescription; +import de.rub.nds.crawler.data.ScanTarget; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Future; +import org.bson.Document; +import org.junit.jupiter.api.Test; + +class BulkScanWorkerTest { + + // Test implementation of ScanConfig + static class TestScanConfig extends ScanConfig implements Serializable { + public TestScanConfig() { + super(de.rub.nds.scanner.core.config.ScannerDetail.NORMAL, 0, 60); + } + + @Override + public BulkScanWorker createWorker( + String bulkScanID, int parallelConnectionThreads, int parallelScanThreads) { + return new TestBulkScanWorker(bulkScanID, this, parallelScanThreads); + } + } + + // Test implementation of BulkScanWorker + static class TestBulkScanWorker extends BulkScanWorker { + private boolean initCalled = false; + private boolean cleanupCalled = false; + private ScanJobDescription capturedJobDescription = null; + + TestBulkScanWorker(String bulkScanId, TestScanConfig scanConfig, int parallelScanThreads) { + super(bulkScanId, scanConfig, parallelScanThreads); + } + + @Override + public Document scan(ScanTarget scanTarget) { + // Capture the job description during scan + capturedJobDescription = getCurrentJobDescription(); + + Document result = new Document(); + result.put("target", scanTarget.getHostname()); + result.put("hasJobDescription", capturedJobDescription != null); + if (capturedJobDescription != null) { + result.put("jobId", capturedJobDescription.getId().toString()); + } + return result; + } + + @Override + protected void initInternal() { + initCalled = true; + } + + @Override + protected void cleanupInternal() { + cleanupCalled = true; + } + + public boolean isInitCalled() { + return initCalled; + } + + public boolean isCleanupCalled() { + return cleanupCalled; + } + + public ScanJobDescription getCapturedJobDescription() { + return capturedJobDescription; + } + } + + @Test + void testGetCurrentJobDescriptionReturnsNullOutsideScanContext() { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + // getCurrentJobDescription() is protected, so we can't call it directly from test + // But we can verify through the scan() method that it returns null when not in context + assertNull( + worker.getCapturedJobDescription(), + "Job description should be null before any scan"); + } + + @Test + void testGetCurrentJobDescriptionReturnsCorrectJobInScanContext() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + // Execute the scan + Future future = worker.handle(jobDescription); + Document result = future.get(); + + // Verify the job description was available during scan + assertTrue( + result.getBoolean("hasJobDescription"), + "Job description should be available in scan context"); + assertEquals(jobDescription.getId().toString(), result.getString("jobId")); + + // Verify the captured job description matches + assertNotNull(worker.getCapturedJobDescription()); + assertEquals(jobDescription.getId(), worker.getCapturedJobDescription().getId()); + assertEquals(target, worker.getCapturedJobDescription().getScanTarget()); + } + + @Test + void testThreadLocalIsCleanedUpAfterScan() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + // Execute the scan + Future future = worker.handle(jobDescription); + future.get(); // Wait for completion + + // After scan completes, the ThreadLocal should be cleaned up + // We can verify this by running another scan and checking it gets the new job description + ScanTarget newTarget = new ScanTarget(); + newTarget.setHostname("example2.com"); + newTarget.setPort(443); + + ScanJobDescription newJobDescription = + new ScanJobDescription(newTarget, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future2 = worker.handle(newJobDescription); + Document result2 = future2.get(); + + // The second scan should have the second job description, not the first + assertEquals(newJobDescription.getId().toString(), result2.getString("jobId")); + assertEquals(newJobDescription.getId(), worker.getCapturedJobDescription().getId()); + } + + @Test + void testMultipleConcurrentScansHaveSeparateContexts() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 2); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + // Create multiple job descriptions + List jobDescriptions = new ArrayList<>(); + List> futures = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + ScanTarget target = new ScanTarget(); + target.setHostname("example" + i + ".com"); + target.setPort(443); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + jobDescriptions.add(jobDescription); + + futures.add(worker.handle(jobDescription)); + } + + // Wait for all scans to complete and verify each got the correct job description + for (int i = 0; i < 5; i++) { + Document result = futures.get(i).get(); + assertTrue(result.getBoolean("hasJobDescription")); + assertEquals( + jobDescriptions.get(i).getId().toString(), + result.getString("jobId"), + "Scan " + i + " should have its own job description"); + } + } + + @Test + void testInitializationIsCalledOnFirstHandle() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + assertFalse(worker.isInitCalled(), "Init should not be called before first handle"); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future = worker.handle(jobDescription); + future.get(); + + assertTrue(worker.isInitCalled(), "Init should be called on first handle"); + } + + @Test + void testCleanupIsCalledWhenAllJobsComplete() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future = worker.handle(jobDescription); + future.get(); + + // Give cleanup a moment to execute (it runs after job completion) + Thread.sleep(100); + + assertTrue(worker.isCleanupCalled(), "Cleanup should be called when all jobs complete"); + } + + @Test + void testManualInitPreventsSelfCleanup() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + // Call init manually + worker.init(); + assertTrue(worker.isInitCalled(), "Init should be called"); + + ScanTarget target = new ScanTarget(); + target.setHostname("example.com"); + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + Future future = worker.handle(jobDescription); + future.get(); + + // Give cleanup a moment (if it were to execute) + Thread.sleep(100); + + assertFalse( + worker.isCleanupCalled(), + "Cleanup should NOT be called when init was manual (shouldCleanupSelf = false)"); + + // Cleanup should only be called when we explicitly call it + worker.cleanup(); + assertTrue(worker.isCleanupCalled(), "Cleanup should be called when explicitly called"); + } +}