Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/java/de/rub/nds/crawler/constant/JobStatus.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
public enum JobStatus {
/** Job is waiting to be executed. */
TO_BE_EXECUTED(false),
/** Job is currently being executed. Partial results may be available in DB. */
RUNNING(false),
/** The domain was not resolvable. An empty result was written to DB. */
UNRESOLVABLE(true),
/** An uncaught exception occurred while resolving the host. */
Expand Down
31 changes: 25 additions & 6 deletions src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package de.rub.nds.crawler.core;

import de.rub.nds.crawler.data.ScanConfig;
import de.rub.nds.crawler.data.ScanJobDescription;
import de.rub.nds.crawler.data.ScanTarget;
import de.rub.nds.crawler.util.CanceallableThreadPoolExecutor;
import de.rub.nds.scanner.core.execution.NamedThreadFactory;
Expand Down Expand Up @@ -41,6 +42,10 @@ public abstract class BulkScanWorker<T extends ScanConfig> {
/** The scan configuration for this worker */
protected final T scanConfig;

// ThreadLocal to pass ScanJobDescription to scan() implementations
private static final ThreadLocal<ScanJobDescription> currentJobDescription =
new ThreadLocal<>();

/**
* Calls the inner scan function and may handle cleanup. This is needed to wrap the scanner into
* a future object such that we can handle timeouts properly.
Expand Down Expand Up @@ -74,23 +79,37 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread
* Handles a scan target by submitting it to the executor. If init was not called, it will
* initialize itself. In this case it will also clean up itself if all jobs are done.
*
* @param scanTarget The target to scan.
* @param jobDescription The job description for this scan.
* @return A future that resolves to the scan result once the scan is done.
*/
public Future<Document> handle(ScanTarget scanTarget) {
public Future<Document> handle(ScanJobDescription jobDescription) {
// if we initialized ourself, we also clean up ourself
shouldCleanupSelf.weakCompareAndSetAcquire(false, init());
activeJobs.incrementAndGet();
return timeoutExecutor.submit(
() -> {
Document result = scan(scanTarget);
if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) {
cleanup();
try {
currentJobDescription.set(jobDescription);
Document result = scan(jobDescription.getScanTarget());
if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) {
cleanup();
}
return result;
} finally {
currentJobDescription.remove();
}
return result;
});
}

/**
* Get the ScanJobDescription for the current scan. Only valid when called from within scan().
*
* @return The current ScanJobDescription, or null if not in a scan context
*/
protected ScanJobDescription getCurrentJobDescription() {
return currentJobDescription.get();
}

/**
* Scans a target and returns the result as a Document. This is the core scanning functionality
* that must be implemented by subclasses.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,6 @@ public Future<Document> handle(
bulkScanInfo.getScanConfig(),
parallelConnectionThreads,
parallelScanThreads);
return worker.handle(scanJobDescription.getScanTarget());
return worker.handle(scanJobDescription);
}
}
Loading