diff --git a/.github/workflows/perf-baseline.yml b/.github/workflows/perf-baseline.yml new file mode 100644 index 000000000..936af3e4f --- /dev/null +++ b/.github/workflows/perf-baseline.yml @@ -0,0 +1,67 @@ +name: Perf Baseline + +on: + pull_request: + branches: [master, 2.2.0] + +jobs: + perf-baseline: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up JDK 21 + uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: 21 + cache: maven + + - name: Build (skip tests) + run: mvn -T8 install -DskipTests -q + + - name: Install wrk + run: | + sudo apt-get update -qq + sudo apt-get install -y -qq wrk + + - name: Start Pantera + run: | + java -jar pantera-main/target/pantera-main-*.jar & + echo $! > /tmp/pantera.pid + # Wait for health endpoint + for i in $(seq 1 30); do + if curl -sf http://localhost:8080/api/health > /dev/null 2>&1; then + echo "Pantera started" + break + fi + sleep 2 + done + + - name: Run perf benchmark + run: | + chmod +x scripts/perf-benchmark.sh + scripts/perf-benchmark.sh http://localhost:8080 /tmp/measured.json + + - name: Compare against baseline + run: | + chmod +x scripts/perf-compare.sh + scripts/perf-compare.sh tests/perf-baselines/npm-proxy.json /tmp/measured.json + + - name: Stop Pantera + if: always() + run: | + if [ -f /tmp/pantera.pid ]; then + kill "$(cat /tmp/pantera.pid)" 2>/dev/null || true + fi + + - name: Upload benchmark results + if: always() + uses: actions/upload-artifact@v4 + with: + name: perf-results + path: /tmp/measured.json + retention-days: 30 diff --git a/.gitignore b/.gitignore index 0324baeb7..5505e12ca 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,10 @@ pantera-main/docker-compose/pantera/artifacts/php/composer.lock /benchmark/setup/repos-old/.tmp /pantera-backfill pantera-main/docker-compose/pantera/keys/ +logs/.analysis/ +performance/*/* +!performance/results/.gitkeep +performance/wiremock/__files/*.bin +performance/pantera-config.yml +performance/secrets/*.pem +performance/fixtures/uploads/*.tgz diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c9c98c9d..adb211a77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,178 @@ # Changelog +## Version 2.2.0 + +Target-architecture alignment release (v2.2 plan Β§12). Ships nine work items β€” WI-00 (queue/log hotfix), WI-01 (Fault + Result sum types), WI-02 (full RequestContext + Deadline + ContextualExecutor), WI-03 (StructuredLogger 5-tier + LevelPolicy + AuditAction), WI-04 (`GroupResolver` replaces `GroupSlice` at every production site), WI-05 (SingleFlight coalescer), WI-07 (ProxyCacheWriter + Maven checksum integrity), WI-post-05 (retire RequestDeduplicator), WI-post-07 (wire ProxyCacheWriter into pypi/go/composer). Also lands a **P0 production-readiness pass against the Opus 4.7 audit 2026-04-18** (Groups A–H) plus full admin / developer / user documentation. WI-06, WI-06b, WI-08, WI-09, WI-10 are deferred to follow-on v2.2.x trains. + +### πŸ—οΈ Architectural changes + +- **`GroupResolver` is now the sole production group-resolution engine** (WI-04). The deprecated `GroupSlice` (1338 LOC) and its four dedicated test classes are deleted. All four wiring sites in `RepositorySlices.java` (npm-group, file/php-group, maven-group, generic group-adapter) now instantiate `GroupResolver`; `MavenGroupSlice` and `ComposerGroupSlice` receive it as their `Slice` delegate. A new convenience constructor on `GroupResolver` accepts the legacy `(SliceResolver, memberNames, port, ...)` shape so call-sites didn't balloon. `GroupSliceMetrics` is renamed `GroupResolverMetrics`; stale javadoc / inline-comment references across 15 production files updated. Status surface unchanged (200/404/500/502 β€” no 503/504 from group resolution per spec). Behavioural improvements: index-hit 404 falls through to proxy fanout (TOCTOU fix, A11), `AllProxiesFailed` passes the winning proxy's response through (was synthetic 502), and 5xx responses carry `X-Pantera-Fault` + `X-Pantera-Proxies-Tried` headers. + ([@aydasraf](https://github.com/aydasraf)) +- **`CooldownResponseRegistry` is the mandatory cooldown-403 path.** The deprecated `CooldownResponses.forbidden(...)` helper had 12 production callers across 6 adapters and one pantera-core fallback; all migrated to `CooldownResponseRegistry.instance().getOrThrow(repoType).forbidden(block)` and the legacy class is deleted. The former silent `.orElseGet(() -> CooldownResponses.forbidden(block))` fallback in `BaseCachedProxySlice` now throws `IllegalStateException` on a missing factory β€” factory registration is a startup-time hard requirement. `CooldownWiring` adds response-factory aliases (`npm-proxy`, `pypi-proxy`, `docker-proxy`, `go-proxy`, `php`, `php-proxy`) so every repoType resolves. + ([@aydasraf](https://github.com/aydasraf)) +- **Sealed `Fault` hierarchy + `Result` + `FaultTranslator`** introduced as the single decision point for "what HTTP status does this fault produce" (WI-01). `Fault` variants: `NotFound`, `Forbidden`, `IndexUnavailable`, `StorageUnavailable`, `AllProxiesFailed`, `UpstreamIntegrity`, `Internal`, `Deadline`, `Overload`. `FaultClassifier.classify(Throwable, String)` is the fallback for `.exceptionally(...)` handlers. 99% instructions / 97% branches coverage on the `fault` package; exhaustive-switch guard test locks the contract. + ([@aydasraf](https://github.com/aydasraf)) +- **`SingleFlight` is the one coalescer in the codebase** (WI-05 + WI-post-05). Consolidates the former hand-rolled `inFlightFanouts` (GroupSlice), `inFlightMetadataFetches` (MavenGroupSlice), and `RequestDeduplicator` (CachedNpmProxySlice, BaseCachedProxySlice). Caffeine `AsyncCache`-backed with stack-flat follower completion (the v2.1.3 `StackOverflowError` at ~400 concurrent followers cannot recur), explicit zombie eviction via `CompletableFuture.orTimeout`, and per-caller cancellation isolation. `RequestDeduplicator.java`, `RequestDeduplicatorTest.java`, and the `DedupStrategy` enum are gone; the nested `FetchSignal` enum is promoted to a top-level type at `pantera-core/http/cache/FetchSignal.java`. + ([@aydasraf](https://github.com/aydasraf)) +- **`RequestContext` full ECS/APM envelope** (WI-02). 13-field record covering every ECS key Pantera emits (`trace.id`, `transaction.id`, `span.id`, `http.request.id`, `user.name`, `client.ip`, `user_agent.original`, `repository.name`, `repository.type`, `package.name`, `package.version`, `url.original`, `url.path`) plus an end-to-end `Deadline`. A four-arg backward-compat ctor is retained. `ContextualExecutor.contextualize(Executor)` propagates the `ThreadContext` snapshot + APM span across `CompletableFuture` boundaries β€” wired at `DbArtifactIndex`, `GroupResolver` drain executor, `BaseCachedProxySlice` SingleFlight, `CachedNpmProxySlice` SingleFlight, and `MavenGroupSlice` SingleFlight. + ([@aydasraf](https://github.com/aydasraf)) +- **`StructuredLogger` 5-tier facade** (WI-03). `access()` (Tier-1 clientβ†’pantera), `internal()` (Tier-2 panteraβ†’pantera 500), `upstream()` (Tier-3 panteraβ†’remote), `local()` (Tier-4 local ops), `audit()` (Tier-5 compliance, INFO, non-suppressible). Central `LevelPolicy` enum encodes the Β§4.2 log-level matrix in one place. Closed `AuditAction` enum enumerates the only four compliance events (`ARTIFACT_PUBLISH`, `ARTIFACT_DOWNLOAD`, `ARTIFACT_DELETE`, `RESOLUTION`) per Β§10.4. `EcsLoggingSlice` now emits the access log exactly once per request via `StructuredLogger.access().forRequest(ctx)` on the success path (legacy dual-emission removed). + ([@aydasraf](https://github.com/aydasraf)) +- **Auth cache L1/L2 + cluster-wide invalidation** (Group B). New `CachedLocalEnabledFilter` wraps `LocalEnabledFilter` the way `CachedUsers` wraps `Users`: L1 Caffeine + L2 Valkey + `CacheInvalidationPubSub` cross-node eviction. Hit rate is expected >95 %; the per-request JDBC hit is gone. `UserHandler` invalidates on put / delete / enable / disable / alterPassword β€” admin changes propagate to peer nodes within 100 ms. Driven by `meta.caches.auth-enabled.*` (env `PANTERA_AUTH_ENABLED_*` overrides), no hardcoded cache settings. + ([@aydasraf](https://github.com/aydasraf)) +- **`GroupMetadataCache` stale tier is now 2-tier, aid-not-breaker** (Group C). The former unbounded `lastKnownGood` `ConcurrentHashMap` is replaced by L1 Caffeine (bounded, 30-day TTL) + L2 Valkey (no TTL by default β€” Valkey `allkeys-lru` owns eviction). Degradation on read is L1 β†’ L2 β†’ expired primary-cache entry β†’ miss. Under realistic cardinality no eviction fires; bounds are a JVM-memory safety net only. **L2 now survives JVM restart** (the old CHM did not), strictly improving availability. Driven by `meta.caches.group-metadata-stale.*` with full env-var override chain. + ([@aydasraf](https://github.com/aydasraf)) +- **Cooldown package restructured** into SOLID sub-packages `api/`, `cache/`, `metadata/`, `response/`, `config/`, `metrics/`, `impl/`. `CooldownMetadataServiceImpl` renamed to `MetadataFilterService`. `CooldownAdapterBundle` + `CooldownAdapterRegistry` populated at startup; queried on every proxy request. All 7 adapters (Maven, npm, PyPI, Docker, Go, Composer, Gradle aliased to Maven) registered with aliases. + ([@aydasraf](https://github.com/aydasraf)) + +### ⚑ Performance + +- **`ProxyCacheWriter` β€” atomic primary + sidecar write with digest verification** (WI-07). The `oss-parent-58.pom.sha1` class of cache-drift bug (primary bytes and the sidecar they're verified against diverging across stale-while-revalidate refetches) can no longer produce a committed cache entry. Streams the primary into a NIO temp file (bounded chunk size, no heap scaling with artifact size) while updating four `MessageDigest` accumulators (MD5, SHA-1, SHA-256, SHA-512) in one pass; pulls sidecars concurrently; compares trimmed-lowercased hex bodies against the computed digest; saves primary-first-then-sidecars only on agreement. Mismatch β†’ `Result.err(Fault.UpstreamIntegrity(...))`; nothing lands in the cache. Wired into maven / pypi / go / composer (WI-post-07). + ([@aydasraf](https://github.com/aydasraf)) +- **Cooldown filter performance hardenings** (H1-H5). Pre-warm release-date cache (`MetadataParser.extractReleaseDates()` SPI bulk-populates `CooldownCache` L1 with `false` for versions older than the cooldown period β€” avoids DB/Valkey round-trip for the majority). Parallel bounded version evaluation via `CompletableFuture.allOf()` on a dedicated 4-thread executor, bounded to 50 versions per request. Stale-while-revalidate on `FilteredMetadataCache` with 5-minute grace. L1 capacity increased to 50K entries (`PANTERA_COOLDOWN_METADATA_L1_SIZE`). `CooldownCache` inflight-map memory leak fixed β€” guaranteed removal on success, error, and cancellation via `whenComplete` + 30 s `orTimeout` zombie safety net. + ([@aydasraf](https://github.com/aydasraf)) +- **Zero-copy `ArtifactHandler` chunks** (Group E.1). Download paths replace per-chunk `new byte[] + buf.get(bytes) + Buffer.buffer(bytes)` with `Buffer.buffer(Unpooled.wrappedBuffer(buf))`. At 1000 req/s Γ— 5 MB bodies Γ— 64 KB chunks that was ~80 000 byte[] allocations/second straight to garbage β€” now zero. Vert.x releases on write completion. + ([@aydasraf](https://github.com/aydasraf)) +- **StAX streaming Maven metadata merge** (Group E.2). `MavenGroupSlice` delegates to a new `StreamingMetadataMerger` (hardened against XXE) that accumulates only the deduplicated `` `TreeSet` and newest-wins scalars. Peak memory is `O(unique versions)`, not `O(sum of member body sizes)`. Malformed or truncated member bodies are skipped with a WARN; remaining members still merge. No size cap is introduced β€” doing so would synthesize a client-facing 502 for legitimately large metadata. An alert-only histogram `pantera.maven.group.member_metadata_size_bytes` surfaces outliers to ops. + ([@aydasraf](https://github.com/aydasraf)) +- **Hot-path `Pattern.compile` hoisted to `static final`** (Group D). `TrimmedDocker.trim()` and `SubStorage.list()` previously compiled the regex on every call; both now hold a final field compiled once in the ctor. At 1000 req/s this eliminates thousands of compile allocations per second across Docker and storage-list request paths. + ([@aydasraf](https://github.com/aydasraf)) +- **`String.format("%02x", ...)` checksum hex loop replaced with `java.util.HexFormat.of().formatHex(...)`** in `MavenGroupSlice` (Group E.3). Single allocation per request instead of 20 per checksum. Mirrors the existing `ProxyCacheWriter.HEX` idiom. + ([@aydasraf](https://github.com/aydasraf)) +- **`Yaml2Json` / `Json2Yaml` `ObjectMapper` hoisted to `static final`** (Group E.4). Previously allocated a fresh `ObjectMapper` (and `YAMLMapper`) on every call. Admin-plane only, but the reflection warm-up cost is real. Jackson feature configuration applied once at static init β€” safe under the JMM. + ([@aydasraf](https://github.com/aydasraf)) + +### πŸ”§ Bug fixes + +- **Client-disconnect propagation** (Group A). `VertxSliceServer` now registers `closeHandler` on `request.connection()` and `exceptionHandler` on both request and response; captures the reactive-streams `Subscription` via `doOnSubscribe` and cancels it on any disconnect signal. `ArtifactHandler` captures the `Disposable` on both download paths and disposes on response `closeHandler` / `exceptionHandler`. `StreamThroughCache`, `DiskCacheStorage`, and `VertxRxFile.save` gain `doOnCancel` cleanup matching existing `doOnError` β€” channel closed + temp file deleted on mid-flight disconnect. `Http3Server` enforces a per-stream buffer cap via `PANTERA_HTTP3_MAX_STREAM_BUFFER_BYTES` (default 16 MiB). Resolves the class of "bytes keep streaming into a dead socket until the next write organically fails" that wasted upstream bandwidth and held file descriptors at 1000 req/s with any disconnect churn. + ([@aydasraf](https://github.com/aydasraf)) +- **`DbArtifactIndex` saturation now returns typed fault, not an EL-thread JDBC stall** (Group H.1). The executor's `RejectedExecutionHandler` switches from `CallerRunsPolicy` to `AbortPolicy` β€” under queue saturation, submissions no longer execute inline on the Vert.x event loop. **BEHAVIOR CHANGE:** saturation surfaces as `Fault.IndexUnavailable`, which `FaultTranslator` returns as `500` with `X-Pantera-Fault: index-unavailable`. The follow-up `locateByName` body is wrapped so the synchronous `RejectedExecutionException` from `CompletableFuture.supplyAsync` is always observed via a failed future, never raw-propagated up the event-loop stack. Chaos test `DbArtifactIndexSaturationTest` locks the policy. + ([@aydasraf](https://github.com/aydasraf)) +- **Cooldown registry lookups now fail fast** (Group G). `CooldownResponseRegistry.getOrThrow(repoType)` replaces `.get(repoType).forbidden(...)` at all 11 production adapter sites (files / npm / pypi / composer / go / docker) and in `BaseCachedProxySlice`. Missing factory registration surfaces immediately as `IllegalStateException("No CooldownResponseFactory registered for repoType: ")` at first request β€” wiring omissions are caught at canary time instead of NPE'ing on an arbitrary request later. + ([@aydasraf](https://github.com/aydasraf)) +- **Resource-leak fixes on legacy RPM / Debian streams** (Group F). `XmlPrimaryChecksums` and `FilePackageHeader` previously opened `InputStream`s eagerly in their constructors; if the consuming method was never invoked the stream leaked. Both now store only the `Path` and open inside the consuming method under try-with-resources. RPM `Gzip.unpackTar` wraps `GzipCompressorInputStream` in the same try-with as `TarArchiveInputStream` so the native `Inflater` is released if the tar wrapper ctor throws. Debian `MultiPackages.merge` wraps both GZIP streams in try-with-resources; caller-owned outer streams are protected by a non-closing wrapper adapter. + ([@aydasraf](https://github.com/aydasraf)) +- **Queue overflow cascade on npm `DownloadAssetSlice`** (forensic Β§1.6 F1.1/F1.2). Bounded `LinkedBlockingQueue` writes on both the cache-hit (line 198) and cache-miss (line 288) paths called `AbstractQueue.add()`, which throws `IllegalStateException("Queue full")` on overflow. A burst of 11,499 such throws in a 2-minute window in prod surfaced as 503s to clients because the exception escaped the serve path. Both call-sites migrated to `queue.offer(event)`; the `ifPresent` enqueue lambda is wrapped in `try { ... } catch (Throwable t) { log at WARN; continue; }` on both paths so background-queue failure can NEVER escape into the response. Verified by `DownloadAssetSliceQueueFullTest`. + ([@aydasraf](https://github.com/aydasraf)) +- **Access-log WARN flood from 4xx client probes** (forensic Β§1.7 F2.1/F2.2). `EcsLogEvent.log()` emitted every 4xx at WARN, including 404 (Maven probe-and-miss + npm metadata scans), 401 (unauthenticated health checks), 403 (policy deny) β€” 2.4 M WARN lines in 12 h post-deploy; client-driven, not Pantera fault. Level policy now 404/401/403 β†’ INFO; other 4xx WARN, 5xx ERROR, slow >5 s WARN unchanged. Contract tests lock the matrix. + ([@aydasraf](https://github.com/aydasraf)) +- **`StackOverflowError` class in GroupSlice follower chain** (commit `ccc155f6` / anti-pattern A9). When the leader fanout completed synchronously each follower's `thenCompose(...)` ran on the leader's stack β€” ~400 followers overflowed. Replaced the bespoke `ConcurrentHashMap>` coalescer with `SingleFlight`, which dispatches all follower completions via the configured executor. Regression guard: `stackFlatUnderSynchronousCompletion` (500 followers, synchronous leader, no SOE). + ([@aydasraf](https://github.com/aydasraf)) +- **Upstream sidecar/primary drift in Maven cache** (target-architecture Β§9.5, production `oss-parent-58.pom.sha1` symptom). Previously `storage.save(primary)` and `storage.save(sidecar)` were independent Rx pipelines; SWR refetch could update the `.pom` without re-pulling `.pom.sha1`, and eviction could drop one without the other β€” every mode produced the same `ChecksumFailureException` for Maven client builds. `ProxyCacheWriter.writeWithSidecars(...)` is the single write path; mismatch β†’ `Result.err(Fault.UpstreamIntegrity(...))`, nothing lands. Regression test `ProxyCacheWriterTest.ossParent58_regressionCheck` reproduces the exact production hex. + ([@aydasraf](https://github.com/aydasraf)) +- **Jetty client idle-close logged as request failure** (forensic Β§1.7 F4.4). "Idle timeout expired: 30000/30000 ms" is a connection-lifecycle event, not a request error. 20 ERROR entries per 12 h, all one cause. `JettyClientSlice.isIdleTimeout(Throwable)` identifies the specific `TimeoutException` (up to 5 hops) and downgrades that case to DEBUG. Other HTTP failures still log at ERROR. + ([@aydasraf](https://github.com/aydasraf)) +- **"Repository not found in configuration" at WARN** (forensic Β§1.7). Client-config error (stale repo URL in a pom.xml), not a Pantera fault. ~1,440 WARN lines per 12 h. Downgraded to INFO. + ([@aydasraf](https://github.com/aydasraf)) +- **Stale `MdcPropagation` text references removed.** The class was deleted from pantera-core in WI-02 but three test files (`CooldownContextPropagationTest`, `ContextualExecutorIntegrationTest`, the now-deleted `GroupSliceFlattenedResolutionTest`) plus `docs/analysis/v2.2-next-session.md:73` still mentioned it textually. All updated to `ContextualExecutor` / `TraceContextExecutor` terminology. Zero `MdcPropagation.` references remain in production code or live tests. + ([@aydasraf](https://github.com/aydasraf)) + +### 🧹 Cleanup + +- **Legacy `GroupSlice` deleted** (1338 LOC) plus four obsolete test classes (`GroupSliceTest`, `GroupSliceFlattenedResolutionTest`, `GroupSliceIndexRoutingTest`, `GroupSlicePerformanceTest`). Rename `GroupSliceMetrics` β†’ `GroupResolverMetrics`. + ([@aydasraf](https://github.com/aydasraf)) +- **`CooldownResponses` class deleted.** All 12 production callers migrated to `CooldownResponseRegistry.getOrThrow(repoType)`. + ([@aydasraf](https://github.com/aydasraf)) +- **`RequestDeduplicator` / `RequestDeduplicatorTest` / `DedupStrategy` deleted** (WI-post-05). `FetchSignal` promoted to top-level type. + ([@aydasraf](https://github.com/aydasraf)) +- **Dead `api-workers` `WorkerExecutor` removed** (Group H.2). `AsyncApiVerticle` created a `WorkerExecutor` that no route ever referenced. + ([@aydasraf](https://github.com/aydasraf)) +- **UI: legacy `'hex'` repo-type key purged in favour of `'hexpm'`** across `repoTypes.ts`, `techSetup.ts`, `SettingsView.vue`. `SettingsView` now emits `hexpm-proxy` instead of `hex-proxy` β€” matches the canonical family key `ApiRoutingSlice` normalises to. `SearchView.vue`'s `startsWith('hex')` prefix match is retained since it still matches `hexpm`. + ([@aydasraf](https://github.com/aydasraf)) + +### πŸ†• Added + +- **`pantera-core/http/fault/` sum types** β€” `Fault` sealed hierarchy, `Result` with `map`/`flatMap`, `FaultClassifier.classify(Throwable, String)` for `.exceptionally(...)` handlers, `FaultTranslator.translate(Fault, RequestContext)` as the single HTTP-status decision point. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera-core/http/resilience/SingleFlight`** β€” unified per-key request coalescer backed by Caffeine `AsyncCache`, with zombie eviction and stack-flat follower dispatch. 14 property-style tests including N=1000 coalescing, 100-caller cancellation isolation, 500-follower synchronous-completion stack-safety regression. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera-core/http/cache/ProxyCacheWriter`** + `IntegrityAuditor` + `scripts/pantera-cache-integrity-audit.sh` (exit 0 clean/fixed, 1 mismatch in dry-run, 2 CLI error). Companion CLI `pantera-main/tools/CacheIntegrityAudit`. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera-core/http/context/RequestContext` / `Deadline` / `ContextualExecutor`** β€” full ECS/APM envelope, monotonic wall-clock deadline with `remainingClamped`, Executor wrapper that propagates `ThreadContext` + APM `Span`. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera-core/http/observability/StructuredLogger`** (5-tier) + `LevelPolicy` (log-level matrix) + `pantera-core/audit/AuditAction` (closed enum of compliance events). + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera-main/auth/CachedLocalEnabledFilter`** β€” Caffeine + Valkey + `CacheInvalidationPubSub` decorator. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera-main/group/merge/StreamingMetadataMerger`** β€” StAX-based, XXE-hardened, `ComparableVersion`-ordered. + ([@aydasraf](https://github.com/aydasraf)) +- **Optional HTTP/3 PROXY protocol v2 support** β€” `PANTERA_HTTP3_PROXY_PROTOCOL=true` prepends Jetty's `ProxyConnectionFactory` to the Quiche connector (Group H.3). Default `false` β€” zero behavior change. Emits an INFO log `event.action=http3_proxy_protocol_enabled` when activated. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera-core/metrics/EventsQueueMetrics`** β€” shared callback emits one WARN per `queue.offer()` false-return and bumps `pantera.events.queue.dropped{queue=}`. + ([@aydasraf](https://github.com/aydasraf)) + +### πŸ”„ Changed + +- **`EcsLoggingSlice` emits the access log exactly once per request** via `StructuredLogger.access()`. The former dual emission was removed to halve access-log volume in Kibana. Only the `.exceptionally(...)` error path still uses `EcsLogEvent` (one call-site; scheduled for migration). + ([@aydasraf](https://github.com/aydasraf)) +- **Hikari fail-fast defaults** (Group B). `connectionTimeout` tightened from `5000` to `3000` ms; `leakDetectionThreshold` from `300000` to `5000` ms. Operators may see leak WARNs that were silent before β€” each is a real held-connection bug to triage. Canary ramp documented in `docs/admin-guide/database.md`. + ([@aydasraf](https://github.com/aydasraf)) +- **Three hot-path executors wrapped via `ContextualExecutor.contextualize(...)`** β€” `DbArtifactIndex.DbIndexExecutorService`, `GroupResolver` drain executor, and all three SingleFlight-backed call sites. Every hot-path thread hop contextualised. + ([@aydasraf](https://github.com/aydasraf)) +- **Bounded-queue enqueue semantics: `offer()`, not `add()`.** Every request-serving path that writes to a `LinkedBlockingQueue<*Event>` now uses `offer()` and routes overflow through `EventsQueueMetrics.recordDropped(repoName)`. + ([@aydasraf](https://github.com/aydasraf)) +- **Coalescer fields in `GroupResolver` / `MavenGroupSlice` / `CachedNpmProxySlice` / `BaseCachedProxySlice` are now `SingleFlight` instances.** Field names retained for minimal diff; only the type changed. + ([@aydasraf](https://github.com/aydasraf)) +- **Maven-adapter cached-proxy slice** routes primary-artifact cache misses through `ProxyCacheWriter.writeWithSidecars(...)` instead of legacy split primary/sidecar writes. Integrity failure returns 503 with `X-Pantera-Fault: upstream-integrity` rather than committing the bad pair. + ([@aydasraf](https://github.com/aydasraf)) +- **pypi / go / composer cached-proxy slices wired to `ProxyCacheWriter`** (WI-post-07). Each adapter uses its native sidecar algorithm set. + ([@aydasraf](https://github.com/aydasraf)) +- **`pom.xml` versions bumped 2.1.3 β†’ 2.2.0** on the root reactor and all 30 modules. Docker image tags now produce `pantera:2.2.0`. + ([@aydasraf](https://github.com/aydasraf)) + +### ⚠️ Deprecated + +- **`pantera-core/http/trace/MdcPropagation`** marked `@Deprecated(since="2.2.0", forRemoval=true)`. Replacement is `ContextualExecutor.contextualize(executor)` at pool boundaries + `RequestContext.bindToMdc()` at the request edge. The class has been removed from pantera-core source; approximately 110 production call-sites were migrated in WI-02 / WI-03 / WI-04 / Group A. Do not add new call-sites. + ([@aydasraf](https://github.com/aydasraf)) + +### πŸ“Š Observability (log-audit hardening) + +- **New 5xx fault signals** via `X-Pantera-Fault: ` response header on every `FaultTranslator`-emitted 5xx: `internal`, `index-unavailable`, `storage-unavailable`, `deadline-exceeded`, `overload:`, `upstream-integrity:`. Additive; operator-facing runbook in `docs/admin-guide/runbooks.md`. + ([@aydasraf](https://github.com/aydasraf)) +- **`X-Pantera-Proxies-Tried: `** on `AllProxiesFailed` passes the count of proxy members attempted. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera.maven.group.member_metadata_size_bytes` histogram** (tagged `repo_name`) records per-member Maven metadata body size during group merge. Alert-only β€” no request is rejected based on this metric. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera.group_metadata_cache.stale_served_from{tier=l1|l2|expired-primary|miss}`** β€” tiered counter for the stale-fallback read path. `expired-primary` non-zero is an operational signal to resize Valkey. + ([@aydasraf](https://github.com/aydasraf)) +- **`pantera.caches.auth-enabled.{hit,miss}`** β€” Micrometer Caffeine stats for the new auth cache. + ([@aydasraf](https://github.com/aydasraf)) + +### πŸ”’ Security / compliance + +No CVE fixes, no permissions model changes, no credential-handling changes, no PII-scope changes. Integrity verification on proxy caches (WI-07 + WI-post-07) is a correctness hardening, not a security fix β€” the trust boundary (upstream declares a digest, we verify it) has not moved. The new audit logger emits to a dedicated `com.auto1.pantera.audit` logger (see Migration notes). + +### πŸ“š Documentation + +- **Admin guide** β€” `docs/admin-guide/cache-configuration.md` (consolidated `meta.caches.*` reference with 3-tier override precedence envβ†’YAMLβ†’default), `docs/admin-guide/valkey-setup.md` (`maxmemory-policy=allkeys-lru` requirement, sizing), `docs/admin-guide/database.md` (Hikari canary ramp instructions), `docs/admin-guide/environment-variables.md` (auth / stale-cache / HTTP/3 / scheduler env vars; Hikari defaults updated), `docs/admin-guide/deployment-nlb.md` (HTTP/3 proxy-protocol flag), `docs/admin-guide/runbooks.md` (new 5xx signals), `docs/admin-guide/v2.2-deployment-checklist.md` (pre/during/post-deploy gating). + ([@aydasraf](https://github.com/aydasraf)) +- **Developer guide** β€” `docs/developer-guide/caching.md` (canonical L1 Caffeine + L2 Valkey + pub/sub pattern; "cache is an aid, never a breaker"), `docs/developer-guide/fault-model.md`, `docs/developer-guide/reactive-lifecycle.md` (three-terminal-path contract β€” complete/error/cancel β€” with `CachingBlob.content` as canonical example), `docs/developer-guide/cooldown.md`. + ([@aydasraf](https://github.com/aydasraf)) +- **User guide** β€” `docs/user-guide/response-headers.md` (`X-Pantera-Fault`, `-Proxies-Tried`, `-Stale`, `-Internal`), `docs/user-guide/error-reference.md`, `docs/user-guide/streaming-downloads.md`. New repository pages: `gradle.md` + `go-group`/`go-proxy` sections in `go.md` + "Adding Members to a Group Repository" section in `ui-guide.md` covering the AutoComplete group-member picker and inline "Create new" modal. + ([@aydasraf](https://github.com/aydasraf)) + +### βœ… Testing + +- **4,926+ tests across pantera-main + pantera-core** at release time, 0 errors, 0 failures. `mvn -T8 test` green. +- **New tests** (highlights): 54 under `http/context/` + `http/observability/` (ContextualExecutor / Deadline / RequestContext / all five StructuredLogger tiers / LevelPolicy), 6 integrity tests under pypi / go / composer, 14 property-style tests for `SingleFlight`, 4 `BaseCachedProxySliceDedupTest` regression tests, 7 `StreamingMetadataMergerTest` fixtures (disjoint + overlapping versions, max-scalar semantics, malformed-member skip), 4 `GroupMetadataCacheStaleFallbackTest` tier-degradation cases, `CooldownResponseRegistryGetOrThrowTest`, `CachedLocalEnabledFilterTest` (7 cases), `DbArtifactIndexSaturationTest` (chaos), `CooldownValkeyStalenessTest` (chaos), `CooldownHighCardinalityTest` (chaos), `CooldownConcurrentFilterStampedeTest` (chaos), `PolicyChangeInvalidationTest`, `UpstreamPublishReEvalTest`. +- **Commit-message hygiene gate**: `git log c71fbbfe..HEAD --format='%B' | git interpret-trailers --only-trailers | grep -ic 'co-authored-by'` returns `0`. + +### 🧭 Migration notes + +No operator action required for functional rollout β€” all changes are drop-in for v2.1.3 deployments. + +- The `queue.add β†’ queue.offer` migration is internal; no YAML change, no CLI flag, no API change. +- The access-log level policy change is internal to `EcsLogEvent` / `StructuredLogger.access`; Kibana panels filtering `log.level: WARN AND http.response.status_code: 404` will empty β€” intended outcome. Filter by status code instead. +- The `ProxyCacheWriter` path activates only when a file-backed `Storage` is present; tests injecting a lambda-`Cache` keep the pre-v2.2.0 code path. +- The `SingleFlight` coalescers use dedicated Caffeine `AsyncCache` instances with a 5-minute in-flight TTL and 10K max keys; heap growth is bounded, no tuning required. +- `scripts/pantera-cache-integrity-audit.sh` is additive β€” zero-impact no-op unless invoked; `--dry-run` is safe against production. +- **Kibana `user_agent` sub-fields**: operators who queried `user_agent.name` / `.version` / `.os.name` need to query `user_agent.original` directly (that's what `RequestContext` emits) or wait for the follow-up WI that re-lifts the parser. No data loss β€” only parsed sub-fields are unavailable this release. +- **Audit-log level**: `StructuredLogger.audit()` writes to logger `com.auto1.pantera.audit`. The bundled `log4j2.xml` inherits from `com.auto1.pantera` at INFO β€” "non-suppressible" is by convention. Add a dedicated `` in production overrides to enforce. +- **Hikari canary ramp** (`docs/admin-guide/database.md`): start the first week at `PANTERA_DB_CONNECTION_TIMEOUT_MS=10000` and `PANTERA_DB_LEAK_DETECTION_MS=30000`; drop to defaults (3000 / 5000) after zero leak WARNs observed. +- **Valkey `maxmemory-policy`**: required `allkeys-lru` for the stale-metadata L2 to behave correctly under memory pressure β€” see `docs/admin-guide/valkey-setup.md`. + +--- + ## Version 2.1.3 ### πŸ”§ Bug fixes diff --git a/build-tools/pom.xml b/build-tools/pom.xml index 202c5c28f..be0f22569 100644 --- a/build-tools/pom.xml +++ b/build-tools/pom.xml @@ -5,7 +5,7 @@ 4.0.0 com.auto1.pantera build-tools - 2.1.3 + 2.2.0 UTF-8 diff --git a/composer-adapter/pom.xml b/composer-adapter/pom.xml index 44efbdaf2..742519626 100644 --- a/composer-adapter/pom.xml +++ b/composer-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 composer-adapter - 2.1.3 + 2.2.0 jar composer-files Turns your files/objects into PHP Composer artifacts @@ -45,19 +45,19 @@ SOFTWARE. com.auto1.pantera http-client - 2.1.3 + 2.2.0 compile com.auto1.pantera files-adapter - 2.1.3 + 2.2.0 test com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -76,7 +76,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerCooldownResponseFactory.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerCooldownResponseFactory.java new file mode 100644 index 000000000..19804c000 --- /dev/null +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerCooldownResponseFactory.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.response.CooldownResponseFactory; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; + +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +/** + * Composer-specific cooldown 403 response factory. + * + *

Returns {@code application/json} body matching the Composer error format.

+ * + * @since 2.2.0 + */ +public final class ComposerCooldownResponseFactory implements CooldownResponseFactory { + + private static final DateTimeFormatter ISO = DateTimeFormatter.ISO_OFFSET_DATE_TIME; + + @Override + public Response forbidden(final CooldownBlock block) { + final String until = ISO.format( + block.blockedUntil().atOffset(ZoneOffset.UTC) + ); + final long retryAfter = Math.max( + 1L, + Duration.between(Instant.now(), block.blockedUntil()).getSeconds() + ); + final String body = String.format( + "{\"error\":\"version in cooldown\",\"blocked_until\":\"%s\"}", until + ); + return ResponseBuilder.forbidden() + .header("Retry-After", String.valueOf(retryAfter)) + .header("X-Pantera-Cooldown", "blocked") + .jsonBody(body) + .build(); + } + + @Override + public String repoType() { + return "composer"; + } +} diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataFilter.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataFilter.java new file mode 100644 index 000000000..ad8789cd8 --- /dev/null +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataFilter.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import java.util.Set; + +/** + * PHP Composer metadata filter implementing cooldown SPI. + * Removes blocked version keys from the Composer packages.json version map. + * + *

Filters the {@code packages.{vendor/package}} object by removing blocked + * version keys. Composer has no "latest" tag in the metadata format, so + * {@link #updateLatest(JsonNode, String)} is a no-op that returns metadata unchanged.

+ * + * @since 2.2.0 + */ +public final class ComposerMetadataFilter implements MetadataFilter { + + @Override + public JsonNode filter(final JsonNode metadata, final Set blockedVersions) { + if (blockedVersions.isEmpty()) { + return metadata; + } + if (!(metadata instanceof ObjectNode)) { + return metadata; + } + final JsonNode packages = metadata.get("packages"); + if (packages == null || !packages.isObject() || packages.size() == 0) { + return metadata; + } + final String name = packages.fieldNames().next(); + final JsonNode pkgNode = packages.get(name); + if (pkgNode != null && pkgNode.isObject()) { + final ObjectNode versionsObj = (ObjectNode) pkgNode; + for (final String blocked : blockedVersions) { + versionsObj.remove(blocked); + } + } + return metadata; + } + + @Override + public JsonNode updateLatest(final JsonNode metadata, final String newLatest) { + // Composer packages.json has no "latest" tag β€” the client resolves + // version constraints from the full version map. No-op. + return metadata; + } +} diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataParser.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataParser.java new file mode 100644 index 000000000..a34f346ed --- /dev/null +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataParser.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataParseException; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * PHP Composer metadata parser implementing cooldown SPI. + * Parses Composer packages.json metadata and extracts version information. + * + *

Composer metadata structure (packages endpoint {@code /packages/{vendor}/{pkg}.json} + * or {@code /p2/{vendor}/{pkg}.json}):

+ *
+ * {
+ *   "packages": {
+ *     "vendor/package": {
+ *       "1.0.0": {"name": "vendor/package", "version": "1.0.0", ...},
+ *       "1.1.0": {"name": "vendor/package", "version": "1.1.0", ...},
+ *       "2.0.0": {"name": "vendor/package", "version": "2.0.0", ...}
+ *     }
+ *   }
+ * }
+ * 
+ * + * @since 2.2.0 + */ +public final class ComposerMetadataParser implements MetadataParser { + + /** + * Shared ObjectMapper for JSON parsing (thread-safe). + */ + private static final ObjectMapper MAPPER = new ObjectMapper(); + + /** + * Content type for Composer metadata. + */ + private static final String CONTENT_TYPE = "application/json"; + + @Override + public JsonNode parse(final byte[] bytes) throws MetadataParseException { + if (bytes == null || bytes.length == 0) { + throw new MetadataParseException("Empty or null Composer metadata"); + } + try { + final JsonNode node = MAPPER.readTree(bytes); + if (node == null) { + throw new MetadataParseException("Parsed Composer metadata is null"); + } + return node; + } catch (final IOException ex) { + throw new MetadataParseException("Failed to parse Composer metadata JSON", ex); + } + } + + @Override + public List extractVersions(final JsonNode metadata) { + final JsonNode pkgNode = this.findPackageNode(metadata); + if (pkgNode == null || !pkgNode.isObject()) { + return Collections.emptyList(); + } + final List result = new ArrayList<>(); + final Iterator fields = pkgNode.fieldNames(); + while (fields.hasNext()) { + result.add(fields.next()); + } + return result; + } + + @Override + public Optional getLatestVersion(final JsonNode metadata) { + // Composer packages.json does not have a "latest" tag; + // the client resolves constraints from the full version map. + return Optional.empty(); + } + + @Override + public String contentType() { + return CONTENT_TYPE; + } + + /** + * Get the package name from metadata. + * Returns the first (and typically only) key under the "packages" object. + * + * @param metadata Parsed metadata + * @return Package name if present, empty otherwise + */ + public Optional getPackageName(final JsonNode metadata) { + final JsonNode packages = metadata.get("packages"); + if (packages != null && packages.isObject() && packages.size() > 0) { + return Optional.of(packages.fieldNames().next()); + } + return Optional.empty(); + } + + /** + * Find the package version-map node inside the metadata. + * Navigates {@code packages.{first-key}} to reach the object whose + * field names are version strings. + * + * @param metadata Root metadata node + * @return Package version map node, or {@code null} if not found + */ + private JsonNode findPackageNode(final JsonNode metadata) { + final JsonNode packages = metadata.get("packages"); + if (packages == null || !packages.isObject() || packages.size() == 0) { + return null; + } + final String name = packages.fieldNames().next(); + final JsonNode pkgNode = packages.get(name); + if (pkgNode != null && pkgNode.isObject()) { + return pkgNode; + } + return null; + } +} diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRequestDetector.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRequestDetector.java new file mode 100644 index 000000000..3a513fe66 --- /dev/null +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRequestDetector.java @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataRequestDetector; + +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * PHP Composer metadata request detector implementing cooldown SPI. + * Detects Composer metadata endpoints and extracts package names. + * + *

Composer metadata endpoints:

+ *
    + *
  • {@code /packages/{vendor}/{package}.json} β€” package metadata
  • + *
  • {@code /p2/{vendor}/{package}.json} β€” Composer v2 lazy-provider endpoint
  • + *
+ * + * @since 2.2.0 + */ +public final class ComposerMetadataRequestDetector implements MetadataRequestDetector { + + /** + * Pattern matching {@code /packages/{vendor}/{package}.json}. + */ + private static final Pattern PACKAGES_PATTERN = + Pattern.compile("^/packages/([^/]+/[^/]+)\\.json$"); + + /** + * Pattern matching {@code /p2/{vendor}/{package}.json}. + */ + private static final Pattern P2_PATTERN = + Pattern.compile("^/p2/([^/]+/[^/]+)\\.json$"); + + /** + * Repository type identifier. + */ + private static final String REPO_TYPE = "composer"; + + @Override + public boolean isMetadataRequest(final String path) { + return PACKAGES_PATTERN.matcher(path).matches() + || P2_PATTERN.matcher(path).matches(); + } + + @Override + public Optional extractPackageName(final String path) { + Matcher matcher = PACKAGES_PATTERN.matcher(path); + if (matcher.matches()) { + return Optional.of(matcher.group(1)); + } + matcher = P2_PATTERN.matcher(path); + if (matcher.matches()) { + return Optional.of(matcher.group(1)); + } + return Optional.empty(); + } + + @Override + public String repoType() { + return REPO_TYPE; + } +} diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRewriter.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRewriter.java new file mode 100644 index 000000000..11804c430 --- /dev/null +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRewriter.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataRewriteException; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * PHP Composer metadata rewriter implementing cooldown SPI. + * Serializes filtered Composer metadata back to JSON bytes. + * + * @since 2.2.0 + */ +public final class ComposerMetadataRewriter implements MetadataRewriter { + + /** + * Shared ObjectMapper for JSON serialization (thread-safe). + */ + private static final ObjectMapper MAPPER = new ObjectMapper(); + + /** + * Content type for Composer metadata. + */ + private static final String CONTENT_TYPE = "application/json"; + + @Override + public byte[] rewrite(final JsonNode metadata) throws MetadataRewriteException { + try { + return MAPPER.writeValueAsBytes(metadata); + } catch (final JsonProcessingException ex) { + throw new MetadataRewriteException( + "Failed to serialize Composer metadata to JSON", ex + ); + } + } + + @Override + public String contentType() { + return CONTENT_TYPE; + } +} diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/package-info.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/package-info.java new file mode 100644 index 000000000..79ec89345 --- /dev/null +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/cooldown/package-info.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +/** + * PHP Composer cooldown metadata filtering implementation. + * + *

This package provides Composer-specific implementations of the cooldown metadata SPI:

+ *
    + *
  • {@link com.auto1.pantera.composer.cooldown.ComposerMetadataParser} + * - Parses Composer packages.json metadata
  • + *
  • {@link com.auto1.pantera.composer.cooldown.ComposerMetadataFilter} + * - Filters blocked versions from the version map
  • + *
  • {@link com.auto1.pantera.composer.cooldown.ComposerMetadataRewriter} + * - Serializes filtered metadata to JSON
  • + *
  • {@link com.auto1.pantera.composer.cooldown.ComposerMetadataRequestDetector} + * - Detects {@code /packages/} and {@code /p2/} metadata endpoints
  • + *
  • {@link com.auto1.pantera.composer.cooldown.ComposerCooldownResponseFactory} + * - Builds 403 responses for blocked Composer packages
  • + *
+ * + *

Composer metadata structure ({@code /packages/{vendor}/{pkg}.json} + * or {@code /p2/{vendor}/{pkg}.json}):

+ *
+ * {
+ *   "packages": {
+ *     "vendor/package": {
+ *       "1.0.0": {"name": "vendor/package", "version": "1.0.0", ...},
+ *       "1.1.0": {"name": "vendor/package", "version": "1.1.0", ...}
+ *     }
+ *   }
+ * }
+ * 
+ * + *

When filtering blocked versions, blocked version keys are removed from the + * {@code packages.{vendor/package}} object. Composer has no "latest" dist-tag; + * the client resolves version constraints from the full version map.

+ * + * @since 2.2.0 + */ +package com.auto1.pantera.composer.cooldown; diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/CachedProxySlice.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/CachedProxySlice.java index 5a272027d..a74d1a3fa 100644 --- a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/CachedProxySlice.java +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/CachedProxySlice.java @@ -12,6 +12,7 @@ import com.auto1.pantera.asto.Content; import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.Storage; import com.auto1.pantera.http.log.EcsLogger; import com.auto1.pantera.http.log.LogSanitizer; import com.auto1.pantera.asto.cache.Cache; @@ -21,39 +22,70 @@ import com.auto1.pantera.composer.JsonPackages; import com.auto1.pantera.composer.Packages; import com.auto1.pantera.composer.Repository; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResponses; -import com.auto1.pantera.cooldown.CooldownResult; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.response.CooldownResponseRegistry; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.ResponseBuilder; import com.auto1.pantera.http.Response; import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.ProxyCacheWriter; +import com.auto1.pantera.http.context.RequestContext; +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.Fault.ChecksumAlgo; +import com.auto1.pantera.http.fault.Result; import com.auto1.pantera.http.headers.Header; import com.auto1.pantera.http.headers.Login; import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.http.rq.RqMethod; import com.auto1.pantera.scheduling.ProxyArtifactEvent; +import io.micrometer.core.instrument.MeterRegistry; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; import java.time.Instant; import java.time.format.DateTimeFormatter; import java.nio.charset.StandardCharsets; import java.time.format.DateTimeParseException; +import java.util.EnumMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.Optional; import java.util.Queue; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletionStage; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; +import java.util.function.Supplier; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Composer proxy slice with cache support, cooldown service, and event emission. + * + *

Primary artifact writes (the {@code *.zip} / {@code *.tar} / {@code *.phar} + * dist archives) flow through {@link ProxyCacheWriter} so the packagist.org + * {@code dist.shasum} SHA-256 sidecar is verified against the downloaded + * bytes before anything lands in the cache β€” giving the Composer adapter + * the same primary+sidecar integrity guarantee the Maven adapter received + * in WI-07 (Β§9.5). The existing metadata-JSON flow (the dominant traffic + * shape through this slice) is unchanged. */ @SuppressWarnings({"PMD.UnusedPrivateField", "PMD.SingularField"}) final class CachedProxySlice implements Slice { + /** + * Primary artifact extensions that participate in the coupled + * primary+sidecar write path via {@link ProxyCacheWriter}. + */ + private static final List PRIMARY_EXTENSIONS = List.of( + ".zip", ".tar", ".phar" + ); + /** * Pattern to extract package name and version from path. * Matches /p2/vendor/package.json @@ -65,32 +97,32 @@ final class CachedProxySlice implements Slice { private final Slice remote; private final Cache cache; private final Repository repo; - + /** * Proxy artifact events queue. */ private final Optional> events; - + /** * Repository name. */ private final String rname; - + /** * Repository type. */ private final String rtype; - + /** * Cooldown service. */ private final CooldownService cooldown; - + /** * Cooldown inspector. */ private final CooldownInspector inspector; - + /** * Base URL for metadata rewriting. */ @@ -111,6 +143,15 @@ final class CachedProxySlice implements Slice { */ private final ConcurrentHashMap lastModifiedStore; + /** + * Single-source-of-truth cache writer introduced by WI-07 (Β§9.5 of the + * v2.2 target architecture). Fetches the primary dist archive + the + * Composer {@code .sha256} sidecar in one coupled batch, verifies the + * declared claim against the bytes we just downloaded, and atomically + * commits the pair. Non-null whenever {@code repo.storage()} is set. + */ + private final ProxyCacheWriter cacheWriter; + /** * @param remote Remote slice * @param repo Repository @@ -118,7 +159,7 @@ final class CachedProxySlice implements Slice { */ CachedProxySlice(Slice remote, Repository repo, Cache cache) { this(remote, repo, cache, Optional.empty(), "composer", "php", - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, new NoopComposerCooldownInspector(), "http://localhost:8080", "unknown" @@ -190,6 +231,10 @@ final class CachedProxySlice implements Slice { this.upstreamUrl = upstreamUrl; this.refreshing = ConcurrentHashMap.newKeySet(); this.lastModifiedStore = new ConcurrentHashMap<>(); + final Storage storage = repo.storage(); + this.cacheWriter = storage == null + ? null + : new ProxyCacheWriter(storage, rname, meterRegistry()); } @Override @@ -205,6 +250,14 @@ public CompletableFuture response(RequestLine line, Headers headers, C .field("url.path", path) .log(); + // WI-07 Β§9.5 β€” integrity-verified atomic primary+sidecar write on + // cache-miss. Runs only when the request path resolves to a + // primary dist archive (.zip / .tar / .phar). Metadata JSON + // paths fall through to the existing flow unchanged. + if (this.cacheWriter != null && isPrimaryArtifact(path)) { + return this.verifyAndServePrimary(line, path); + } + // Keep ~dev suffix in cache key to avoid collision between stable and dev metadata final String name = path .replaceAll("^/p2?/", "") @@ -287,7 +340,9 @@ private CompletableFuture serveCachedMetadata( .field("event.reason", "cooldown_active") .field("package.name", name) .log(); - return CooldownResponses.forbidden(result.block().orElseThrow()); + return CooldownResponseRegistry.instance() + .getOrThrow(this.rtype) + .forbidden(result.block().orElseThrow()); } // Rewrite URLs (no-op for pre-rewritten content due to original_url check) final byte[] rewritten = this.rewriteMetadata(bytes); @@ -329,7 +384,7 @@ private CompletableFuture evaluateCooldownAndFetch( return this.fetchThroughCache(line, name, headers); } - + /** * Handle response after cooldown evaluation. * @@ -355,7 +410,9 @@ private CompletableFuture afterCooldown( .field("package.name", name) .log(); return CompletableFuture.completedFuture( - CooldownResponses.forbidden(result.block().orElseThrow()) + CooldownResponseRegistry.instance() + .getOrThrow(this.rtype) + .forbidden(result.block().orElseThrow()) ); } EcsLogger.debug("com.auto1.pantera.composer") @@ -480,7 +537,9 @@ private CompletableFuture fetchThroughCache( .field("package.name", name) .log(); return CompletableFuture.completedFuture( - CooldownResponses.forbidden(result.block().orElseThrow()) + CooldownResponseRegistry.instance() + .getOrThrow(this.rtype) + .forbidden(result.block().orElseThrow()) ); } // Save rewritten metadata for ProxyDownloadSlice (original_url lookup) @@ -517,13 +576,13 @@ private CompletableFuture evaluateMetadataCooldown( ) { try { final javax.json.JsonObject json = javax.json.Json.createReader(new java.io.StringReader(new String(bytes))).readObject(); - + // Handle both Satis format (packages is array) and traditional format (packages is object) final javax.json.JsonValue packagesValue = json.get("packages"); if (packagesValue == null) { return CompletableFuture.completedFuture(CooldownResult.allowed()); } - + // If packages is an array (Satis format), skip cooldown check // Satis format has empty packages array and uses provider-includes instead if (packagesValue.getValueType() == javax.json.JsonValue.ValueType.ARRAY) { @@ -534,12 +593,12 @@ private CompletableFuture evaluateMetadataCooldown( .log(); return CompletableFuture.completedFuture(CooldownResult.allowed()); } - + // Traditional format: packages is an object if (packagesValue.getValueType() != javax.json.JsonValue.ValueType.OBJECT) { return CompletableFuture.completedFuture(CooldownResult.allowed()); } - + final javax.json.JsonObject packages = packagesValue.asJsonObject(); final javax.json.JsonValue pkgVal = packages.get(name); if (pkgVal == null) { @@ -550,7 +609,7 @@ private CompletableFuture evaluateMetadataCooldown( return CompletableFuture.completedFuture(CooldownResult.allowed()); } final String owner = new Login(headers).getValue(); - final com.auto1.pantera.cooldown.CooldownRequest req = new com.auto1.pantera.cooldown.CooldownRequest( + final com.auto1.pantera.cooldown.api.CooldownRequest req = new com.auto1.pantera.cooldown.api.CooldownRequest( this.rtype, this.rname, name, @@ -626,7 +685,7 @@ private static java.util.Optional latestVersion(final javax.json.JsonVal return java.util.Optional.ofNullable(bestVer); } } - + /** * Rewrite metadata content to proxy downloads through Pantera. * Returns byte[] directly to avoid unnecessary Content wrapping/unwrapping. @@ -665,7 +724,7 @@ private Optional parseCooldownRequest(final String path, final // by caching version lists or parsing the request differently return Optional.empty(); } - + /** * Emit event for downloaded package. * @@ -713,7 +772,7 @@ private void emitEvent(final String name, final Headers headers, final Optional< .field("user.name", owner) .log(); } - + /** * Extract release date from response headers. * @@ -848,4 +907,191 @@ private void recordMetric(final Runnable metric) { .log(); } } + + // ===== WI-07 Β§9.5: ProxyCacheWriter integration ===== + + /** + * Check if path represents a Composer primary artifact (zip / tar / + * phar dist archive) that should be routed through + * {@link ProxyCacheWriter}. + * + * @param path Request path. + * @return {@code true} if the path ends with a primary-artifact extension. + */ + private static boolean isPrimaryArtifact(final String path) { + if (path.endsWith("/")) { + return false; + } + final String lower = path.toLowerCase(Locale.ROOT); + for (final String ext : PRIMARY_EXTENSIONS) { + if (lower.endsWith(ext)) { + return true; + } + } + return false; + } + + /** + * Primary-artifact flow: if the cache already has the primary, serve + * from the cache; otherwise fetch the primary + the + * {@code dist.shasum} SHA-256 sidecar upstream in one coupled batch, + * verify via {@link ProxyCacheWriter}, atomically commit, and serve + * the freshly-cached bytes. + * + *

On {@link Fault.UpstreamIntegrity} collapses to 502 with the + * {@code X-Pantera-Fault: upstream-integrity:sha256} header; on + * {@link Fault.StorageUnavailable} collapses to 502 and leaves the + * cache empty for this key. + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + private CompletableFuture verifyAndServePrimary( + final RequestLine line, final String path + ) { + final Storage storage = this.repo.storage(); + final Key key = new Key.From(path.startsWith("/") ? path.substring(1) : path); + return storage.exists(key).thenCompose(present -> { + if (present) { + return this.serveFromCache(storage, key); + } + return this.fetchVerifyAndCache(line, key, path); + }).exceptionally(err -> { + EcsLogger.warn("com.auto1.pantera.composer") + .message("Composer primary-artifact verify-and-serve failed; returning 502") + .eventCategory("web") + .eventAction("cache_write") + .eventOutcome("failure") + .field("repository.name", this.rname) + .field("url.path", path) + .error(err) + .log(); + return ResponseBuilder.badGateway().build(); + }).toCompletableFuture(); + } + + /** + * Fetch the primary + the declared sidecar upstream, verify via + * {@link ProxyCacheWriter}, then stream the primary from the cache. + */ + private CompletionStage fetchVerifyAndCache( + final RequestLine line, final Key key, final String path + ) { + final Storage storage = this.repo.storage(); + final String upstream = this.upstreamUrl + path; + final RequestContext ctx = new RequestContext( + org.apache.logging.log4j.ThreadContext.get("trace.id"), + null, + this.rname, + path + ); + final Map>>> sidecars = + new EnumMap<>(ChecksumAlgo.class); + sidecars.put(ChecksumAlgo.SHA256, () -> this.fetchSidecar(line, ".sha256")); + + return this.cacheWriter.writeWithSidecars( + key, + upstream, + () -> this.fetchPrimary(line), + sidecars, + ctx + ).thenCompose(result -> { + if (result instanceof Result.Err err) { + if (err.fault() instanceof Fault.UpstreamIntegrity ui) { + return CompletableFuture.completedFuture( + ResponseBuilder.badGateway() + .header( + "X-Pantera-Fault", + "upstream-integrity:" + + ui.algo().name().toLowerCase(Locale.ROOT) + ) + .textBody("Upstream integrity verification failed") + .build() + ); + } + return CompletableFuture.completedFuture( + ResponseBuilder.badGateway() + .textBody("Upstream temporarily unavailable") + .build() + ); + } + return this.serveFromCache(storage, key); + }); + } + + /** + * Read the primary from upstream as an {@link InputStream}. On any + * non-success status, throws so the writer's outer exception handler + * treats it as a transient failure (no cache mutation). + */ + private CompletionStage fetchPrimary(final RequestLine line) { + return this.remote.response(line, Headers.EMPTY, Content.EMPTY) + .thenApply(resp -> { + if (!resp.status().success()) { + resp.body().asBytesFuture(); + throw new IllegalStateException( + "Upstream returned HTTP " + resp.status().code() + ); + } + try { + return resp.body().asInputStream(); + } catch (final IOException ex) { + throw new IllegalStateException("Upstream body not readable", ex); + } + }); + } + + /** + * Fetch a sidecar for the primary at {@code line}. Returns + * {@link Optional#empty()} for 4xx/5xx and I/O errors so the writer + * treats the sidecar as absent; a transient sidecar failure never + * blocks the primary write. + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + private CompletionStage> fetchSidecar( + final RequestLine primary, final String extension + ) { + final String sidecarPath = primary.uri().getPath() + extension; + final RequestLine sidecarLine = new RequestLine(RqMethod.GET, sidecarPath); + return this.remote.response(sidecarLine, Headers.EMPTY, Content.EMPTY) + .thenCompose(resp -> { + if (!resp.status().success()) { + return resp.body().asBytesFuture() + .thenApply(ignored -> Optional.empty()); + } + return resp.body().asBytesFuture() + .thenApply(bytes -> Optional.of( + new ByteArrayInputStream(bytes) + )); + }) + .exceptionally(ignored -> Optional.empty()); + } + + /** + * Serve the primary from storage after a successful atomic write. + */ + private CompletionStage serveFromCache(final Storage storage, final Key key) { + return storage.value(key).thenApply(content -> + ResponseBuilder.ok().body(content).build() + ); + } + + /** + * Resolve the shared micrometer registry when metrics are enabled. + * + * @return Registry or {@code null} when metrics have not been + * initialised (e.g. test suites that skip bootstrap). + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + private static MeterRegistry meterRegistry() { + try { + if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { + return com.auto1.pantera.metrics.MicrometerMetrics.getInstance().getRegistry(); + } + } catch (final Exception ex) { + EcsLogger.debug("com.auto1.pantera.composer") + .message("MicrometerMetrics registry unavailable; writer will run without metrics") + .error(ex) + .log(); + } + return null; + } } diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerCooldownInspector.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerCooldownInspector.java index cd4752ec2..0315b2562 100644 --- a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerCooldownInspector.java +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerCooldownInspector.java @@ -11,8 +11,8 @@ package com.auto1.pantera.composer.http.proxy; import com.auto1.pantera.asto.Content; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.log.EcsLogger; diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerProxySlice.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerProxySlice.java index d3205227e..f83f0cd31 100644 --- a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerProxySlice.java +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ComposerProxySlice.java @@ -13,8 +13,8 @@ import com.auto1.pantera.asto.cache.Cache; import com.auto1.pantera.composer.Repository; import com.auto1.pantera.composer.http.PackageMetadataSlice; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.ResponseBuilder; import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.client.ClientSlices; @@ -48,7 +48,7 @@ public ComposerProxySlice( final Repository repo, final Authenticator auth ) { this(clients, remote, repo, auth, Cache.NOP, Optional.empty(), "composer", "php", - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, new NoopComposerCooldownInspector(), "http://localhost:8080"); } @@ -69,7 +69,7 @@ public ComposerProxySlice( final Cache cache ) { this(clients, remote, repository, auth, cache, Optional.empty(), "composer", "php", - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, new NoopComposerCooldownInspector(), "http://localhost:8080"); } diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/NoopComposerCooldownInspector.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/NoopComposerCooldownInspector.java index b1cabfda6..4b900493b 100644 --- a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/NoopComposerCooldownInspector.java +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/NoopComposerCooldownInspector.java @@ -10,8 +10,8 @@ */ package com.auto1.pantera.composer.http.proxy; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import java.time.Instant; import java.util.Collections; diff --git a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ProxyDownloadSlice.java b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ProxyDownloadSlice.java index c588b3b91..fb576a914 100644 --- a/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ProxyDownloadSlice.java +++ b/composer-adapter/src/main/java/com/auto1/pantera/composer/http/proxy/ProxyDownloadSlice.java @@ -22,10 +22,10 @@ import com.auto1.pantera.http.headers.Login; import com.auto1.pantera.http.log.EcsLogger; import com.auto1.pantera.http.rq.RequestLine; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResponses; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.response.CooldownResponseRegistry; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.scheduling.ProxyArtifactEvent; import javax.json.Json; @@ -246,7 +246,9 @@ public CompletableFuture response( .field("package.version", version) .log(); return CompletableFuture.completedFuture( - CooldownResponses.forbidden(result.block().orElseThrow()) + CooldownResponseRegistry.instance() + .getOrThrow(this.rtype) + .forbidden(result.block().orElseThrow()) ); } return this.fetchAndCache( diff --git a/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerCooldownResponseFactoryTest.java b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerCooldownResponseFactoryTest.java new file mode 100644 index 000000000..da43a0588 --- /dev/null +++ b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerCooldownResponseFactoryTest.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.RsStatus; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.Collections; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.emptyOrNullString; + +/** + * Tests for {@link ComposerCooldownResponseFactory}. + * + * @since 2.2.0 + */ +final class ComposerCooldownResponseFactoryTest { + + private ComposerCooldownResponseFactory factory; + + @BeforeEach + void setUp() { + this.factory = new ComposerCooldownResponseFactory(); + } + + @Test + void returns403Status() { + final Response response = this.factory.forbidden(block()); + assertThat(response.status(), is(RsStatus.FORBIDDEN)); + } + + @Test + void returnsApplicationJsonContentType() { + final Response response = this.factory.forbidden(block()); + final String contentType = response.headers() + .values("Content-Type").get(0); + assertThat(contentType, containsString("application/json")); + } + + @Test + void bodyContainsVersionInCooldownError() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"error\":\"version in cooldown\"")); + } + + @Test + void bodyContainsBlockedUntilField() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"blocked_until\":\"")); + assertThat(body, containsString("Z")); + } + + @Test + void includesRetryAfterHeader() { + final Response response = this.factory.forbidden(block()); + final String retryAfter = response.headers() + .values("Retry-After").get(0); + assertThat(retryAfter, is(not(emptyOrNullString()))); + final long seconds = Long.parseLong(retryAfter); + assertThat(seconds > 0, is(true)); + } + + @Test + void includesCooldownBlockedHeader() { + final Response response = this.factory.forbidden(block()); + final String cooldown = response.headers() + .values("X-Pantera-Cooldown").get(0); + assertThat(cooldown, equalTo("blocked")); + } + + @Test + void repoTypeIsComposer() { + assertThat(this.factory.repoType(), equalTo("composer")); + } + + private static CooldownBlock block() { + return new CooldownBlock( + "composer", + "packagist-proxy", + "vendor/package", + "2.0.0", + CooldownReason.FRESH_RELEASE, + Instant.now().minus(1, ChronoUnit.HOURS), + Instant.now().plus(23, ChronoUnit.HOURS), + Collections.emptyList() + ); + } +} diff --git a/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataFilterTest.java b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataFilterTest.java new file mode 100644 index 000000000..3958c30d7 --- /dev/null +++ b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataFilterTest.java @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +/** + * Tests for {@link ComposerMetadataFilter}. + * + * @since 2.2.0 + */ +final class ComposerMetadataFilterTest { + + private ComposerMetadataParser parser; + private ComposerMetadataFilter filter; + + @BeforeEach + void setUp() { + this.parser = new ComposerMetadataParser(); + this.filter = new ComposerMetadataFilter(); + } + + @Test + void filtersBlockedVersionsFromPackageMap() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {"name": "vendor/package", "version": "1.0.0"}, + "1.1.0": {"name": "vendor/package", "version": "1.1.0"}, + "2.0.0": {"name": "vendor/package", "version": "2.0.0"} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("1.1.0", "2.0.0")); + final JsonNode pkgNode = filtered.get("packages").get("vendor/package"); + assertThat(pkgNode.has("1.0.0"), is(true)); + assertThat(pkgNode.has("1.1.0"), is(false)); + assertThat(pkgNode.has("2.0.0"), is(false)); + } + + @Test + void returnsUnmodifiedWhenNoBlockedVersions() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {}, + "2.0.0": {} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Collections.emptySet()); + final JsonNode pkgNode = filtered.get("packages").get("vendor/package"); + assertThat(pkgNode.has("1.0.0"), is(true)); + assertThat(pkgNode.has("2.0.0"), is(true)); + } + + @Test + void handlesBlockingAllVersions() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {}, + "2.0.0": {}, + "3.0.0": {} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter( + metadata, Set.of("1.0.0", "2.0.0", "3.0.0") + ); + final JsonNode pkgNode = filtered.get("packages").get("vendor/package"); + assertThat(pkgNode.size(), equalTo(0)); + } + + @Test + void handlesBlockingNonExistentVersions() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {}, + "2.0.0": {} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("9.9.9")); + final JsonNode pkgNode = filtered.get("packages").get("vendor/package"); + assertThat(pkgNode.has("1.0.0"), is(true)); + assertThat(pkgNode.has("2.0.0"), is(true)); + } + + @Test + void preservesVersionMetadata() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": { + "name": "vendor/package", + "version": "1.0.0", + "require": {"php": ">=7.4"}, + "dist": {"url": "https://example.com/1.0.0.zip"} + }, + "2.0.0": { + "name": "vendor/package", + "version": "2.0.0" + } + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("2.0.0")); + final JsonNode remaining = filtered.get("packages").get("vendor/package").get("1.0.0"); + assertThat(remaining.get("name").asText(), equalTo("vendor/package")); + assertThat(remaining.get("version").asText(), equalTo("1.0.0")); + assertThat(remaining.get("require").get("php").asText(), equalTo(">=7.4")); + assertThat(remaining.has("dist"), is(true)); + } + + @Test + void updateLatestIsNoOp() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {}, + "2.0.0": {} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode updated = this.filter.updateLatest(metadata, "1.0.0"); + // Composer has no latest tag; updateLatest is a no-op + assertThat(updated, is(metadata)); + } + + @Test + void filtersFixtureThreeOfTen() throws Exception { + final byte[] fixture = loadFixture("cooldown/composer-packages-sample.json"); + final JsonNode metadata = this.parser.parse(fixture); + final JsonNode filtered = this.filter.filter( + metadata, Set.of("2.0.0", "3.0.0-beta.1", "3.1.0") + ); + final List remaining = this.parser.extractVersions(filtered); + assertThat(remaining, hasSize(7)); + assertThat( + remaining, + containsInAnyOrder( + "1.0.0", "1.1.0", "1.2.0", + "2.0.1", "2.1.0", "2.2.0", "3.0.0" + ) + ); + } + + @Test + void handlesEmptyPackagesObject() throws Exception { + final String json = """ + { + "packages": {} + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("1.0.0")); + assertThat(filtered.get("packages").size(), equalTo(0)); + } + + @Test + void handlesMissingPackagesKey() throws Exception { + final String json = """ + { + "other": "value" + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("1.0.0")); + assertThat(filtered.has("other"), is(true)); + } + + /** + * Load a test fixture from classpath. + * + * @param resource Resource path + * @return File content as bytes + * @throws IOException If reading fails + */ + private static byte[] loadFixture(final String resource) throws IOException { + try (InputStream stream = + ComposerMetadataFilterTest.class.getClassLoader() + .getResourceAsStream(resource)) { + if (stream == null) { + throw new IOException("Fixture not found: " + resource); + } + return stream.readAllBytes(); + } + } +} diff --git a/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataParserTest.java b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataParserTest.java new file mode 100644 index 000000000..47a9ebe17 --- /dev/null +++ b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataParserTest.java @@ -0,0 +1,234 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataParseException; +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests for {@link ComposerMetadataParser}. + * + * @since 2.2.0 + */ +final class ComposerMetadataParserTest { + + private ComposerMetadataParser parser; + + @BeforeEach + void setUp() { + this.parser = new ComposerMetadataParser(); + } + + @Test + void parsesValidComposerMetadata() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {"name": "vendor/package", "version": "1.0.0"}, + "1.1.0": {"name": "vendor/package", "version": "1.1.0"} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + assertThat(metadata, is(notNullValue())); + assertThat(metadata.has("packages"), is(true)); + } + + @Test + void extractsVersionsFromPackageMap() throws Exception { + final String json = """ + { + "packages": { + "monolog/monolog": { + "1.0.0": {"name": "monolog/monolog", "version": "1.0.0"}, + "1.1.0": {"name": "monolog/monolog", "version": "1.1.0"}, + "2.0.0": {"name": "monolog/monolog", "version": "2.0.0"}, + "2.1.0": {"name": "monolog/monolog", "version": "2.1.0"}, + "3.0.0": {"name": "monolog/monolog", "version": "3.0.0"} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, hasSize(5)); + assertThat( + versions, + containsInAnyOrder("1.0.0", "1.1.0", "2.0.0", "2.1.0", "3.0.0") + ); + } + + @Test + void returnsEmptyListWhenNoPackages() throws Exception { + final String json = """ + { + "packages": {} + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, is(empty())); + } + + @Test + void returnsEmptyListWhenPackagesKeyMissing() throws Exception { + final String json = """ + { + "other-key": "value" + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, is(empty())); + } + + @Test + void latestVersionAlwaysEmpty() throws Exception { + final String json = """ + { + "packages": { + "vendor/pkg": { + "1.0.0": {}, + "2.0.0": {} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional latest = this.parser.getLatestVersion(metadata); + assertThat(latest.isPresent(), is(false)); + } + + @Test + void getsPackageName() throws Exception { + final String json = """ + { + "packages": { + "symfony/console": { + "5.0.0": {} + } + } + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional name = this.parser.getPackageName(metadata); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("symfony/console")); + } + + @Test + void returnsEmptyPackageNameWhenNoPackages() throws Exception { + final String json = """ + { + "packages": {} + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional name = this.parser.getPackageName(metadata); + assertThat(name.isPresent(), is(false)); + } + + @Test + void returnsCorrectContentType() { + assertThat(this.parser.contentType(), equalTo("application/json")); + } + + @Test + void throwsOnInvalidJson() { + final byte[] invalid = "not valid json {{{".getBytes(StandardCharsets.UTF_8); + assertThrows(MetadataParseException.class, () -> this.parser.parse(invalid)); + } + + @Test + void parsesFixtureFile() throws Exception { + final byte[] fixture = loadFixture("cooldown/composer-packages-sample.json"); + final JsonNode metadata = this.parser.parse(fixture); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, hasSize(10)); + assertThat( + versions, + containsInAnyOrder( + "1.0.0", "1.1.0", "1.2.0", + "2.0.0", "2.0.1", "2.1.0", "2.2.0", + "3.0.0-beta.1", "3.0.0", "3.1.0" + ) + ); + final Optional name = this.parser.getPackageName(metadata); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("vendor/sample-lib")); + } + + @Test + void handlesLargeMetadata() throws Exception { + final StringBuilder json = new StringBuilder(); + json.append("{\"packages\":{\"vendor/large\":{"); + for (int idx = 0; idx < 500; idx++) { + if (idx > 0) { + json.append(","); + } + json.append(String.format( + "\"%d.0.0\":{\"name\":\"vendor/large\",\"version\":\"%d.0.0\"}", + idx, idx + )); + } + json.append("}}}"); + final JsonNode metadata = this.parser.parse( + json.toString().getBytes(StandardCharsets.UTF_8) + ); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, hasSize(500)); + } + + @Test + void throwsOnEmptyBytes() { + assertThrows( + MetadataParseException.class, + () -> this.parser.parse(new byte[0]) + ); + } + + /** + * Load a test fixture from classpath. + * + * @param resource Resource path + * @return File content as bytes + * @throws IOException If reading fails + */ + private static byte[] loadFixture(final String resource) throws IOException { + try (InputStream stream = + ComposerMetadataParserTest.class.getClassLoader() + .getResourceAsStream(resource)) { + if (stream == null) { + throw new IOException("Fixture not found: " + resource); + } + return stream.readAllBytes(); + } + } +} diff --git a/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRequestDetectorTest.java b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRequestDetectorTest.java new file mode 100644 index 000000000..522796128 --- /dev/null +++ b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRequestDetectorTest.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.Optional; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +/** + * Tests for {@link ComposerMetadataRequestDetector}. + * + * @since 2.2.0 + */ +final class ComposerMetadataRequestDetectorTest { + + private ComposerMetadataRequestDetector detector; + + @BeforeEach + void setUp() { + this.detector = new ComposerMetadataRequestDetector(); + } + + @Test + void detectsPackagesEndpoint() { + assertThat( + this.detector.isMetadataRequest("/packages/vendor/package.json"), + is(true) + ); + } + + @Test + void detectsP2Endpoint() { + assertThat( + this.detector.isMetadataRequest("/p2/vendor/package.json"), + is(true) + ); + } + + @Test + void detectsPackagesEndpointWithHyphens() { + assertThat( + this.detector.isMetadataRequest("/packages/my-vendor/my-package.json"), + is(true) + ); + } + + @Test + void detectsP2EndpointWithHyphens() { + assertThat( + this.detector.isMetadataRequest("/p2/symfony/http-kernel.json"), + is(true) + ); + } + + @Test + void rejectsArchiveDownload() { + assertThat( + this.detector.isMetadataRequest("/dist/vendor/package/1.0.0/package-1.0.0.zip"), + is(false) + ); + } + + @Test + void rejectsRootPath() { + assertThat( + this.detector.isMetadataRequest("/"), + is(false) + ); + } + + @Test + void rejectsPlainPackagesPath() { + assertThat( + this.detector.isMetadataRequest("/packages.json"), + is(false) + ); + } + + @Test + void rejectsNonJsonExtension() { + assertThat( + this.detector.isMetadataRequest("/packages/vendor/package.xml"), + is(false) + ); + } + + @Test + void rejectsMissingVendor() { + assertThat( + this.detector.isMetadataRequest("/packages/package.json"), + is(false) + ); + } + + @Test + void rejectsExtraPathSegments() { + assertThat( + this.detector.isMetadataRequest("/packages/vendor/package/extra.json"), + is(false) + ); + } + + @Test + void extractsPackageNameFromPackagesEndpoint() { + final Optional name = this.detector.extractPackageName( + "/packages/vendor/package.json" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("vendor/package")); + } + + @Test + void extractsPackageNameFromP2Endpoint() { + final Optional name = this.detector.extractPackageName( + "/p2/monolog/monolog.json" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("monolog/monolog")); + } + + @Test + void extractsPackageNameWithHyphens() { + final Optional name = this.detector.extractPackageName( + "/p2/symfony/http-foundation.json" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("symfony/http-foundation")); + } + + @Test + void returnsEmptyForNonMetadataPath() { + final Optional name = this.detector.extractPackageName( + "/dist/vendor/package.zip" + ); + assertThat(name.isPresent(), is(false)); + } + + @Test + void returnsComposerRepoType() { + assertThat(this.detector.repoType(), equalTo("composer")); + } + + @Test + void rejectsP2WithoutJson() { + assertThat( + this.detector.isMetadataRequest("/p2/vendor/package"), + is(false) + ); + } + + @Test + void rejectsEmptyPath() { + assertThat( + this.detector.isMetadataRequest(""), + is(false) + ); + } +} diff --git a/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRewriterTest.java b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRewriterTest.java new file mode 100644 index 000000000..4b53e2157 --- /dev/null +++ b/composer-adapter/src/test/java/com/auto1/pantera/composer/cooldown/ComposerMetadataRewriterTest.java @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.cooldown; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; + +/** + * Tests for {@link ComposerMetadataRewriter}. + * + * @since 2.2.0 + */ +final class ComposerMetadataRewriterTest { + + private ComposerMetadataParser parser; + private ComposerMetadataFilter filter; + private ComposerMetadataRewriter rewriter; + + @BeforeEach + void setUp() { + this.parser = new ComposerMetadataParser(); + this.filter = new ComposerMetadataFilter(); + this.rewriter = new ComposerMetadataRewriter(); + } + + @Test + void roundTripPreservesUnfilteredMetadata() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {"name": "vendor/package", "version": "1.0.0"}, + "2.0.0": {"name": "vendor/package", "version": "2.0.0"} + } + } + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final byte[] rewritten = this.rewriter.rewrite(original); + final JsonNode reparsed = this.parser.parse(rewritten); + final List versions = this.parser.extractVersions(reparsed); + assertThat(versions, hasSize(2)); + assertThat(versions, containsInAnyOrder("1.0.0", "2.0.0")); + } + + @Test + void roundTripAfterFiltering() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {"name": "vendor/package", "version": "1.0.0"}, + "1.1.0": {"name": "vendor/package", "version": "1.1.0"}, + "2.0.0": {"name": "vendor/package", "version": "2.0.0"} + } + } + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(original, Set.of("1.1.0")); + final byte[] rewritten = this.rewriter.rewrite(filtered); + final JsonNode reparsed = this.parser.parse(rewritten); + final List versions = this.parser.extractVersions(reparsed); + assertThat(versions, hasSize(2)); + assertThat(versions, containsInAnyOrder("1.0.0", "2.0.0")); + } + + @Test + void roundTripFixtureAfterFiltering() throws Exception { + final byte[] fixture = loadFixture("cooldown/composer-packages-sample.json"); + final JsonNode original = this.parser.parse(fixture); + final JsonNode filtered = this.filter.filter( + original, Set.of("2.0.0", "3.0.0-beta.1", "3.1.0") + ); + final byte[] rewritten = this.rewriter.rewrite(filtered); + assertThat(rewritten, is(notNullValue())); + assertThat(rewritten.length > 0, is(true)); + final JsonNode reparsed = this.parser.parse(rewritten); + final List versions = this.parser.extractVersions(reparsed); + assertThat(versions, hasSize(7)); + assertThat( + versions, + containsInAnyOrder( + "1.0.0", "1.1.0", "1.2.0", + "2.0.1", "2.1.0", "2.2.0", "3.0.0" + ) + ); + assertThat( + this.parser.getPackageName(reparsed).orElse(""), + equalTo("vendor/sample-lib") + ); + } + + @Test + void preservesVersionObjectFields() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": { + "name": "vendor/package", + "version": "1.0.0", + "require": {"php": ">=8.0"}, + "dist": { + "type": "zip", + "url": "https://example.com/1.0.0.zip", + "shasum": "abc123" + } + } + } + } + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final byte[] rewritten = this.rewriter.rewrite(original); + final JsonNode reparsed = this.parser.parse(rewritten); + final JsonNode version = reparsed.get("packages").get("vendor/package").get("1.0.0"); + assertThat(version.get("name").asText(), equalTo("vendor/package")); + assertThat(version.get("require").get("php").asText(), equalTo(">=8.0")); + assertThat(version.get("dist").get("type").asText(), equalTo("zip")); + assertThat(version.get("dist").get("shasum").asText(), equalTo("abc123")); + } + + @Test + void returnsCorrectContentType() { + assertThat(this.rewriter.contentType(), equalTo("application/json")); + } + + @Test + void producesValidJsonBytes() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {} + } + } + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final byte[] rewritten = this.rewriter.rewrite(original); + final String output = new String(rewritten, StandardCharsets.UTF_8); + // Verify it's valid JSON by reparsing + final JsonNode reparsed = this.parser.parse(rewritten); + assertThat(reparsed, is(notNullValue())); + assertThat(reparsed.has("packages"), is(true)); + } + + @Test + void roundTripAllBlockedLeavesEmptyVersionMap() throws Exception { + final String json = """ + { + "packages": { + "vendor/package": { + "1.0.0": {}, + "2.0.0": {} + } + } + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(original, Set.of("1.0.0", "2.0.0")); + final byte[] rewritten = this.rewriter.rewrite(filtered); + final JsonNode reparsed = this.parser.parse(rewritten); + final List versions = this.parser.extractVersions(reparsed); + assertThat(versions, hasSize(0)); + assertThat(reparsed.get("packages").get("vendor/package").size(), equalTo(0)); + } + + /** + * Load a test fixture from classpath. + * + * @param resource Resource path + * @return File content as bytes + * @throws IOException If reading fails + */ + private static byte[] loadFixture(final String resource) throws IOException { + try (InputStream stream = + ComposerMetadataRewriterTest.class.getClassLoader() + .getResourceAsStream(resource)) { + if (stream == null) { + throw new IOException("Fixture not found: " + resource); + } + return stream.readAllBytes(); + } + } +} diff --git a/composer-adapter/src/test/java/com/auto1/pantera/composer/http/proxy/CachedProxySliceIntegrityTest.java b/composer-adapter/src/test/java/com/auto1/pantera/composer/http/proxy/CachedProxySliceIntegrityTest.java new file mode 100644 index 000000000..a23dc3ac3 --- /dev/null +++ b/composer-adapter/src/test/java/com/auto1/pantera/composer/http/proxy/CachedProxySliceIntegrityTest.java @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.composer.http.proxy; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.Storage; +import com.auto1.pantera.asto.cache.FromStorageCache; +import com.auto1.pantera.asto.memory.InMemoryStorage; +import com.auto1.pantera.composer.AstoRepository; +import com.auto1.pantera.cooldown.impl.NoopCooldownService; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.http.rq.RqMethod; +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Tags; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.HexFormat; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Integration tests that composer's {@code CachedProxySlice} routes + * primary {@code *.zip}/{@code *.tar}/{@code *.phar} writes through + * {@link com.auto1.pantera.http.cache.ProxyCacheWriter} (WI-07 Β§9.5, + * WI-post-07). + * + * @since 2.2.0 + */ +final class CachedProxySliceIntegrityTest { + + /** Canonical composer dist zip bytes. */ + private static final byte[] DIST_BYTES = + "composer dist archive".getBytes(StandardCharsets.UTF_8); + + /** Request path for the dist zip. */ + private static final String DIST_PATH = + "/dists/vendor/package/sha/vendor-package-1.0.0.zip"; + + /** Cache key for the dist zip (leading slash stripped). */ + private static final Key DIST_KEY = + new Key.From("dists/vendor/package/sha/vendor-package-1.0.0.zip"); + + @Test + @DisplayName("upstream .sha256 mismatch β†’ storage empty + integrity metric incremented") + void sha256Mismatch_rejectsWrite() throws Exception { + final Storage storage = new InMemoryStorage(); + final MeterRegistry registry = new SimpleMeterRegistry(); + final FakeComposerUpstream origin = new FakeComposerUpstream( + DIST_BYTES, + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + ); + final CachedProxySlice slice = buildSlice(origin, storage, registry); + + final Response response = slice.response( + new RequestLine(RqMethod.GET, DIST_PATH), + Headers.EMPTY, + Content.EMPTY + ).join(); + + assertThat( + "response signals fault; writer rejected the write", + response.status().code() == 502 || response.status().code() == 404, + is(true) + ); + if (response.status().code() == 502) { + assertThat( + "X-Pantera-Fault: upstream-integrity:", + headerValue(response, "X-Pantera-Fault").orElse(""), + containsString("upstream-integrity") + ); + } + assertFalse(storage.exists(DIST_KEY).join(), "primary NOT in storage"); + assertFalse( + storage.exists(new Key.From(DIST_KEY.string() + ".sha256")).join(), + "sha256 sidecar NOT in storage" + ); + final Counter counter = registry.find("pantera.proxy.cache.integrity_failure") + .tags(Tags.of("repo", "composer-proxy-test", "algo", "sha256")) + .counter(); + assertNotNull(counter, "integrity-failure counter registered"); + assertEquals(1.0, counter.count(), "counter incremented once"); + } + + @Test + @DisplayName("matching .sha256 β†’ primary + sidecar persisted; second GET served from cache") + void matchingSidecar_persistsAndServesFromCache() throws Exception { + final Storage storage = new InMemoryStorage(); + final MeterRegistry registry = new SimpleMeterRegistry(); + final FakeComposerUpstream origin = new FakeComposerUpstream( + DIST_BYTES, sha256Hex(DIST_BYTES) + ); + final CachedProxySlice slice = buildSlice(origin, storage, registry); + + final Response first = slice.response( + new RequestLine(RqMethod.GET, DIST_PATH), + Headers.EMPTY, + Content.EMPTY + ).join(); + assertEquals(RsStatus.OK, first.status(), "first request 200"); + assertArrayEquals( + DIST_BYTES, + first.body().asBytesFuture().join(), + "first request serves dist bytes" + ); + assertTrue(storage.exists(DIST_KEY).join(), "primary in storage"); + assertTrue( + storage.exists(new Key.From(DIST_KEY.string() + ".sha256")).join(), + "sha256 sidecar in storage" + ); + final int upstreamCallsBefore = origin.primaryCalls(); + final Response second = slice.response( + new RequestLine(RqMethod.GET, DIST_PATH), + Headers.EMPTY, + Content.EMPTY + ).join(); + assertEquals(RsStatus.OK, second.status(), "second request 200 from cache"); + assertArrayEquals( + DIST_BYTES, + second.body().asBytesFuture().join(), + "second request cached bytes" + ); + assertEquals( + upstreamCallsBefore, + origin.primaryCalls(), + "second request did not hit upstream" + ); + } + + private static CachedProxySlice buildSlice( + final Slice origin, final Storage storage, final MeterRegistry registry + ) throws Exception { + final CachedProxySlice slice = new CachedProxySlice( + origin, + new AstoRepository(storage), + new FromStorageCache(storage), + Optional.empty(), + "composer-proxy-test", + "php", + NoopCooldownService.INSTANCE, + new NoopComposerCooldownInspector(), + "http://localhost:8080", + "https://packagist.example/composer" + ); + injectTestWriter(slice, storage, "composer-proxy-test", registry); + return slice; + } + + private static void injectTestWriter( + final CachedProxySlice slice, + final Storage storage, + final String repoName, + final MeterRegistry registry + ) throws Exception { + final Field f = CachedProxySlice.class.getDeclaredField("cacheWriter"); + f.setAccessible(true); + f.set(slice, new com.auto1.pantera.http.cache.ProxyCacheWriter( + storage, repoName, registry + )); + } + + private static Optional headerValue(final Response response, final String name) { + return java.util.stream.StreamSupport + .stream(response.headers().spliterator(), false) + .filter(h -> h.getKey().equalsIgnoreCase(name)) + .map(java.util.Map.Entry::getValue) + .findFirst(); + } + + private static String sha256Hex(final byte[] body) { + try { + final MessageDigest md = MessageDigest.getInstance("SHA-256"); + return HexFormat.of().formatHex(md.digest(body)); + } catch (final Exception ex) { + throw new AssertionError(ex); + } + } + + /** + * Minimal fake composer upstream serving primary dist on any path NOT + * ending in {@code .sha256}, and the claimed digest hex otherwise. + * Counts primary GETs so the test can assert the second request is + * cache-only. + */ + private static final class FakeComposerUpstream implements Slice { + private final byte[] primary; + private final String sha256Hex; + private final AtomicInteger primaryCalls = new AtomicInteger(); + + FakeComposerUpstream(final byte[] primary, final String sha256Hex) { + this.primary = primary; + this.sha256Hex = sha256Hex; + } + + int primaryCalls() { + return this.primaryCalls.get(); + } + + @Override + public CompletableFuture response( + final RequestLine line, final Headers headers, final Content body + ) { + final String path = line.uri().getPath(); + if (path.endsWith(".sha256")) { + return CompletableFuture.completedFuture( + ResponseBuilder.ok() + .body(this.sha256Hex.getBytes(StandardCharsets.UTF_8)) + .build() + ); + } + this.primaryCalls.incrementAndGet(); + return CompletableFuture.completedFuture( + ResponseBuilder.ok() + .body(this.primary) + .build() + ); + } + } +} diff --git a/composer-adapter/src/test/resources/cooldown/composer-packages-sample.json b/composer-adapter/src/test/resources/cooldown/composer-packages-sample.json new file mode 100644 index 000000000..1671d5417 --- /dev/null +++ b/composer-adapter/src/test/resources/cooldown/composer-packages-sample.json @@ -0,0 +1,251 @@ +{ + "packages": { + "vendor/sample-lib": { + "1.0.0": { + "name": "vendor/sample-lib", + "version": "1.0.0", + "version_normalized": "1.0.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "abc123" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/1.0.0/sample-lib-1.0.0.zip", + "reference": "abc123", + "shasum": "d41d8cd98f00b204e9800998ecf8427e" + }, + "require": { + "php": ">=7.4" + }, + "type": "library", + "autoload": { + "psr-4": { + "Vendor\\SampleLib\\": "src/" + } + }, + "license": [ + "MIT" + ], + "time": "2023-01-15T10:30:00+00:00" + }, + "1.1.0": { + "name": "vendor/sample-lib", + "version": "1.1.0", + "version_normalized": "1.1.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "def456" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/1.1.0/sample-lib-1.1.0.zip", + "reference": "def456", + "shasum": "e99a18c428cb38d5f260853678922e03" + }, + "require": { + "php": ">=7.4" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2023-06-20T14:00:00+00:00" + }, + "1.2.0": { + "name": "vendor/sample-lib", + "version": "1.2.0", + "version_normalized": "1.2.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "ghi789" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/1.2.0/sample-lib-1.2.0.zip", + "reference": "ghi789", + "shasum": "f1d2d2f924e986ac86fdf7b36c94bcdf" + }, + "require": { + "php": ">=8.0" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2023-09-01T09:15:00+00:00" + }, + "2.0.0": { + "name": "vendor/sample-lib", + "version": "2.0.0", + "version_normalized": "2.0.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "jkl012" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/2.0.0/sample-lib-2.0.0.zip", + "reference": "jkl012", + "shasum": "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6" + }, + "require": { + "php": ">=8.1" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2024-01-10T16:45:00+00:00" + }, + "2.0.1": { + "name": "vendor/sample-lib", + "version": "2.0.1", + "version_normalized": "2.0.1.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "mno345" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/2.0.1/sample-lib-2.0.1.zip", + "reference": "mno345", + "shasum": "b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7" + }, + "require": { + "php": ">=8.1" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2024-02-28T11:20:00+00:00" + }, + "2.1.0": { + "name": "vendor/sample-lib", + "version": "2.1.0", + "version_normalized": "2.1.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "pqr678" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/2.1.0/sample-lib-2.1.0.zip", + "reference": "pqr678", + "shasum": "c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8" + }, + "require": { + "php": ">=8.1" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2024-05-15T08:00:00+00:00" + }, + "2.2.0": { + "name": "vendor/sample-lib", + "version": "2.2.0", + "version_normalized": "2.2.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "stu901" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/2.2.0/sample-lib-2.2.0.zip", + "reference": "stu901", + "shasum": "d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9" + }, + "require": { + "php": ">=8.2" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2024-08-20T13:30:00+00:00" + }, + "3.0.0-beta.1": { + "name": "vendor/sample-lib", + "version": "3.0.0-beta.1", + "version_normalized": "3.0.0.0-beta1", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "vwx234" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/3.0.0-beta.1/sample-lib-3.0.0-beta.1.zip", + "reference": "vwx234", + "shasum": "e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0" + }, + "require": { + "php": ">=8.2" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2024-10-01T07:00:00+00:00" + }, + "3.0.0": { + "name": "vendor/sample-lib", + "version": "3.0.0", + "version_normalized": "3.0.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "yza567" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/3.0.0/sample-lib-3.0.0.zip", + "reference": "yza567", + "shasum": "f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1" + }, + "require": { + "php": ">=8.2" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2024-11-15T15:00:00+00:00" + }, + "3.1.0": { + "name": "vendor/sample-lib", + "version": "3.1.0", + "version_normalized": "3.1.0.0", + "source": { + "type": "git", + "url": "https://github.com/vendor/sample-lib.git", + "reference": "bcd890" + }, + "dist": { + "type": "zip", + "url": "https://repo.example.com/vendor/sample-lib/3.1.0/sample-lib-3.1.0.zip", + "reference": "bcd890", + "shasum": "a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2" + }, + "require": { + "php": ">=8.2" + }, + "type": "library", + "license": [ + "MIT" + ], + "time": "2025-01-20T10:00:00+00:00" + } + } + } +} diff --git a/conan-adapter/pom.xml b/conan-adapter/pom.xml index b41bf73c4..32a44494d 100644 --- a/conan-adapter/pom.xml +++ b/conan-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 conan-adapter - 2.1.3 + 2.2.0 ${project.basedir}/../LICENSE.header @@ -45,12 +45,12 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -90,7 +90,7 @@ SOFTWARE. com.auto1.pantera pantera-storage-s3 - 2.1.3 + 2.2.0 test diff --git a/conda-adapter/benchmarks/pom.xml b/conda-adapter/benchmarks/pom.xml index ab0eb356b..09dc71a5a 100644 --- a/conda-adapter/benchmarks/pom.xml +++ b/conda-adapter/benchmarks/pom.xml @@ -26,12 +26,12 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 /../../pom.xml 4.0.0 conda-bench - 2.1.3 + 2.2.0 jar 1.29 @@ -41,7 +41,7 @@ SOFTWARE. com.auto1.pantera conda-adapter - 2.1.3 + 2.2.0 org.openjdk.jmh diff --git a/conda-adapter/pom.xml b/conda-adapter/pom.xml index 42b240681..0e5fb7496 100644 --- a/conda-adapter/pom.xml +++ b/conda-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 conda-adapter - 2.1.3 + 2.2.0 conda-adapter Turns your files/objects into conda repository 2021 @@ -48,12 +48,12 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -66,7 +66,7 @@ SOFTWARE. com.auto1.pantera pantera-storage-s3 - 2.1.3 + 2.2.0 test @@ -117,7 +117,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/debian-adapter/benchmarks/pom.xml b/debian-adapter/benchmarks/pom.xml index f3845c45c..83ced5ee8 100644 --- a/debian-adapter/benchmarks/pom.xml +++ b/debian-adapter/benchmarks/pom.xml @@ -27,11 +27,11 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 /../../pom.xml debian-bench - 2.1.3 + 2.2.0 jar 1.29 @@ -41,7 +41,7 @@ SOFTWARE. com.auto1.pantera debian-adapter - 2.1.3 + 2.2.0 compile diff --git a/debian-adapter/pom.xml b/debian-adapter/pom.xml index 61ad52637..91b407404 100644 --- a/debian-adapter/pom.xml +++ b/debian-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 debian-adapter - 2.1.3 + 2.2.0 jar debian-adapter Debian adapter @@ -50,12 +50,12 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -96,13 +96,13 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test com.auto1.pantera pantera-storage-s3 - 2.1.3 + 2.2.0 test diff --git a/debian-adapter/src/main/java/com/auto1/pantera/debian/MultiPackages.java b/debian-adapter/src/main/java/com/auto1/pantera/debian/MultiPackages.java index 50f530925..4c8036e47 100644 --- a/debian-adapter/src/main/java/com/auto1/pantera/debian/MultiPackages.java +++ b/debian-adapter/src/main/java/com/auto1/pantera/debian/MultiPackages.java @@ -13,6 +13,8 @@ import com.auto1.pantera.asto.PanteraIOException; import com.auto1.pantera.debian.metadata.ControlField; import java.io.BufferedReader; +import java.io.FilterInputStream; +import java.io.FilterOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -46,13 +48,11 @@ public interface MultiPackages { * does not close input or output streams, these operations should be made from the outside. * @since 0.6 */ - @SuppressWarnings("PMD.CloseResource") final class Unique implements MultiPackages { @Override public void merge(final Collection items, final OutputStream res) { - try { - final GZIPOutputStream gop = new GZIPOutputStream(res); + try (GZIPOutputStream gop = new GZIPOutputStream(new NonClosingOutputStream(res))) { final Set> packages = new HashSet<>(items.size()); for (final InputStream inp : items) { Unique.appendPackages(gop, inp, packages); @@ -74,10 +74,11 @@ public void merge(final Collection items, final OutputStream res) { private static void appendPackages( final OutputStream out, final InputStream inp, final Set> packages ) { - try { - final GZIPInputStream gis = new GZIPInputStream(inp); - final BufferedReader rdr = - new BufferedReader(new InputStreamReader(gis, StandardCharsets.UTF_8)); + try ( + GZIPInputStream gis = new GZIPInputStream(new NonClosingInputStream(inp)); + BufferedReader rdr = + new BufferedReader(new InputStreamReader(gis, StandardCharsets.UTF_8)) + ) { String line; StringBuilder item = new StringBuilder(); do { @@ -102,6 +103,45 @@ private static void appendPackages( throw new PanteraIOException(err); } } + + /** + * Wraps an {@link OutputStream} so that {@link #close()} is a no-op – ownership + * of the underlying stream is kept by the caller of + * {@link Unique#merge(Collection, OutputStream)}. + * @since 2.2.0 + */ + private static final class NonClosingOutputStream extends FilterOutputStream { + NonClosingOutputStream(final OutputStream out) { + super(out); + } + + @Override + public void write(final byte[] buf, final int off, final int len) throws IOException { + this.out.write(buf, off, len); + } + + @Override + public void close() throws IOException { + this.flush(); + } + } + + /** + * Wraps an {@link InputStream} so that {@link #close()} is a no-op – ownership + * of the underlying stream is kept by the caller of + * {@link Unique#merge(Collection, OutputStream)}. + * @since 2.2.0 + */ + private static final class NonClosingInputStream extends FilterInputStream { + NonClosingInputStream(final InputStream in) { + super(in); + } + + @Override + public void close() { + // Intentionally no-op – the underlying stream is owned by the caller. + } + } } } diff --git a/docker-adapter/pom.xml b/docker-adapter/pom.xml index 4521c9933..760c0fc85 100644 --- a/docker-adapter/pom.xml +++ b/docker-adapter/pom.xml @@ -25,11 +25,11 @@ SOFTWARE. 4.0.0 docker-adapter - 2.1.3 + 2.2.0 com.auto1.pantera pantera - 2.1.3 + 2.2.0 docker-adapter @@ -39,12 +39,12 @@ SOFTWARE. com.auto1.pantera http-client - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -63,7 +63,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test @@ -82,7 +82,7 @@ SOFTWARE. com.auto1.pantera pantera-storage-s3 - 2.1.3 + 2.2.0 test diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/CacheManifests.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/CacheManifests.java index b3d75b1ac..418dd3760 100644 --- a/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/CacheManifests.java +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/CacheManifests.java @@ -329,7 +329,7 @@ private CompletionStage finalizeManifestCache( effectiveOwner = ArtifactEvent.DEF_OWNER; } } - queue.add( + queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent( CacheManifests.REPO_TYPE, this.rname, diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/DockerProxyCooldownInspector.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/DockerProxyCooldownInspector.java index 6c6a72a20..38f8e9436 100644 --- a/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/DockerProxyCooldownInspector.java +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/cache/DockerProxyCooldownInspector.java @@ -4,8 +4,8 @@ */ package com.auto1.pantera.docker.cache; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import com.auto1.pantera.http.misc.ConfigDefaults; import java.time.Duration; @@ -20,7 +20,7 @@ * Uses Caffeine cache with automatic eviction to limit Old Gen growth. */ public final class DockerProxyCooldownInspector implements CooldownInspector, - com.auto1.pantera.cooldown.InspectorRegistry.InvalidatableInspector { + com.auto1.pantera.cooldown.config.InspectorRegistry.InvalidatableInspector { /** * Bounded cache of image release dates. diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerCooldownResponseFactory.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerCooldownResponseFactory.java new file mode 100644 index 000000000..d9536aefb --- /dev/null +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerCooldownResponseFactory.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.response.CooldownResponseFactory; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; + +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +/** + * Docker-specific cooldown 403 response factory. + * + *

Returns {@code application/json} body matching the Docker Registry v2 error format.

+ * + * @since 2.2.0 + */ +public final class DockerCooldownResponseFactory implements CooldownResponseFactory { + + private static final DateTimeFormatter ISO = DateTimeFormatter.ISO_OFFSET_DATE_TIME; + + @Override + public Response forbidden(final CooldownBlock block) { + final String until = ISO.format( + block.blockedUntil().atOffset(ZoneOffset.UTC) + ); + final long retryAfter = Math.max( + 1L, + Duration.between(Instant.now(), block.blockedUntil()).getSeconds() + ); + final String body = String.format( + "{\"errors\":[{\"code\":\"DENIED\"," + + "\"message\":\"Tag in cooldown\"," + + "\"detail\":{\"blocked_until\":\"%s\"}}]}", until + ); + return ResponseBuilder.forbidden() + .header("Retry-After", String.valueOf(retryAfter)) + .header("X-Pantera-Cooldown", "blocked") + .jsonBody(body) + .build(); + } + + @Override + public String repoType() { + return "docker"; + } +} diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataFilter.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataFilter.java new file mode 100644 index 000000000..b012d5fda --- /dev/null +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataFilter.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import java.util.Set; + +/** + * Docker metadata filter implementing cooldown SPI. + * Removes blocked tags from Docker registry tags/list metadata. + * + *

Filters the {@code tags} array by removing entries matching blocked version strings.

+ * + * @since 2.2.0 + */ +public final class DockerMetadataFilter implements MetadataFilter { + + @Override + public JsonNode filter(final JsonNode metadata, final Set blockedVersions) { + if (blockedVersions.isEmpty()) { + return metadata; + } + if (!(metadata instanceof ObjectNode)) { + return metadata; + } + final ObjectNode root = (ObjectNode) metadata; + final JsonNode tags = root.get("tags"); + if (tags == null || !tags.isArray()) { + return metadata; + } + final ArrayNode original = (ArrayNode) tags; + final ArrayNode filtered = root.arrayNode(); + for (final JsonNode tag : original) { + if (tag.isTextual() && !blockedVersions.contains(tag.asText())) { + filtered.add(tag); + } + } + root.set("tags", filtered); + return root; + } + + @Override + public JsonNode updateLatest(final JsonNode metadata, final String newLatest) { + // Docker tags/list has no "latest" pointer to update. + // The "latest" tag is simply an element in the tags array. + // No structural change is needed beyond the filter() call. + return metadata; + } +} diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataParser.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataParser.java new file mode 100644 index 000000000..60597f12f --- /dev/null +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataParser.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataParseException; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +/** + * Docker metadata parser implementing cooldown SPI. + * Parses Docker registry tags/list JSON metadata and extracts tag information. + * + *

Docker tags/list structure:

+ *
+ * {
+ *   "name": "library/nginx",
+ *   "tags": ["1.24", "1.25", "1.26", "latest"]
+ * }
+ * 
+ * + * @since 2.2.0 + */ +public final class DockerMetadataParser implements MetadataParser { + + /** + * Shared ObjectMapper for JSON parsing (thread-safe). + */ + private static final ObjectMapper MAPPER = new ObjectMapper(); + + /** + * Content type for Docker tags/list metadata. + */ + private static final String CONTENT_TYPE = "application/json"; + + @Override + public JsonNode parse(final byte[] bytes) throws MetadataParseException { + if (bytes == null || bytes.length == 0) { + throw new MetadataParseException("Empty or null input for Docker tags/list JSON"); + } + try { + final JsonNode node = MAPPER.readTree(bytes); + if (node == null) { + throw new MetadataParseException("Parsed Docker tags/list JSON was null"); + } + return node; + } catch (final IOException ex) { + throw new MetadataParseException("Failed to parse Docker tags/list JSON", ex); + } + } + + @Override + public List extractVersions(final JsonNode metadata) { + final JsonNode tags = metadata.get("tags"); + if (tags == null || !tags.isArray()) { + return Collections.emptyList(); + } + final List result = new ArrayList<>(tags.size()); + for (final JsonNode tag : tags) { + if (tag.isTextual()) { + result.add(tag.asText()); + } + } + return result; + } + + @Override + public Optional getLatestVersion(final JsonNode metadata) { + final JsonNode tags = metadata.get("tags"); + if (tags == null || !tags.isArray()) { + return Optional.empty(); + } + for (final JsonNode tag : tags) { + if (tag.isTextual() && "latest".equals(tag.asText())) { + return Optional.of("latest"); + } + } + return Optional.empty(); + } + + @Override + public String contentType() { + return CONTENT_TYPE; + } + + /** + * Get the image repository name from metadata. + * + * @param metadata Parsed metadata + * @return Repository name if present, empty otherwise + */ + public Optional getRepositoryName(final JsonNode metadata) { + final JsonNode name = metadata.get("name"); + if (name != null && name.isTextual()) { + return Optional.of(name.asText()); + } + return Optional.empty(); + } +} diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataRequestDetector.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataRequestDetector.java new file mode 100644 index 000000000..4862889f5 --- /dev/null +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataRequestDetector.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataRequestDetector; + +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Docker metadata request detector implementing cooldown SPI. + * Identifies Docker tags/list requests by matching {@code /v2/{name}/tags/list}. + * + * @since 2.2.0 + */ +public final class DockerMetadataRequestDetector implements MetadataRequestDetector { + + /** + * Pattern matching Docker tags/list endpoint: {@code /v2/{name}/tags/list}. + * The name group captures the full image repository name (e.g. "library/nginx"). + */ + private static final Pattern TAGS_LIST = Pattern.compile( + "^/v2/(?.+)/tags/list$" + ); + + /** + * Repository type identifier. + */ + private static final String REPO_TYPE = "docker"; + + @Override + public boolean isMetadataRequest(final String path) { + return TAGS_LIST.matcher(path).matches(); + } + + @Override + public Optional extractPackageName(final String path) { + final Matcher matcher = TAGS_LIST.matcher(path); + if (matcher.matches()) { + return Optional.of(matcher.group("name")); + } + return Optional.empty(); + } + + @Override + public String repoType() { + return REPO_TYPE; + } +} diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataRewriter.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataRewriter.java new file mode 100644 index 000000000..6d4d270a0 --- /dev/null +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/cooldown/DockerMetadataRewriter.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataRewriteException; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +/** + * Docker metadata rewriter implementing cooldown SPI. + * Serializes filtered Docker tags/list metadata back to JSON bytes. + * + * @since 2.2.0 + */ +public final class DockerMetadataRewriter implements MetadataRewriter { + + /** + * Shared ObjectMapper for JSON serialization (thread-safe). + */ + private static final ObjectMapper MAPPER = new ObjectMapper(); + + /** + * Content type for Docker tags/list metadata. + */ + private static final String CONTENT_TYPE = "application/json"; + + @Override + public byte[] rewrite(final JsonNode metadata) throws MetadataRewriteException { + try { + return MAPPER.writeValueAsBytes(metadata); + } catch (final JsonProcessingException ex) { + throw new MetadataRewriteException( + "Failed to serialize Docker tags/list metadata to JSON", ex + ); + } + } + + @Override + public String contentType() { + return CONTENT_TYPE; + } +} diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/http/TrimmedDocker.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/http/TrimmedDocker.java index 383edb0df..dc66feda5 100644 --- a/docker-adapter/src/main/java/com/auto1/pantera/docker/http/TrimmedDocker.java +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/http/TrimmedDocker.java @@ -37,6 +37,13 @@ public final class TrimmedDocker implements Docker { */ private final String prefix; + /** + * Pre-compiled pattern used by {@link #trim(String)} to match and strip + * the configured prefix from a repository name. Hoisted out of the hot + * path to avoid re-compiling on every call. + */ + private final Pattern trimPattern; + /** * @param origin Docker origin * @param prefix Prefix to cut @@ -44,6 +51,9 @@ public final class TrimmedDocker implements Docker { public TrimmedDocker(Docker origin, String prefix) { this.origin = origin; this.prefix = prefix; + this.trimPattern = Pattern.compile( + String.format("(?:%s)\\/(.+)", prefix) + ); } @Override @@ -77,8 +87,7 @@ public CompletableFuture catalog(Pagination pagination) { */ private String trim(String name) { if (name != null) { - final Pattern pattern = Pattern.compile(String.format("(?:%s)\\/(.+)", this.prefix)); - final Matcher matcher = pattern.matcher(name); + final Matcher matcher = this.trimPattern.matcher(name); if (!matcher.matches()) { throw new IllegalArgumentException( String.format( diff --git a/docker-adapter/src/main/java/com/auto1/pantera/docker/http/manifest/PushManifestSlice.java b/docker-adapter/src/main/java/com/auto1/pantera/docker/http/manifest/PushManifestSlice.java index 3a032d54c..d555f9cd7 100644 --- a/docker-adapter/src/main/java/com/auto1/pantera/docker/http/manifest/PushManifestSlice.java +++ b/docker-adapter/src/main/java/com/auto1/pantera/docker/http/manifest/PushManifestSlice.java @@ -68,7 +68,7 @@ public CompletableFuture response(RequestLine line, Headers headers, C } return sizeFuture.thenApply(size -> { if (queue != null && ImageTag.valid(ref.digest())) { - queue.add( + queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent( "docker", docker.registryName(), diff --git a/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerCooldownResponseFactoryTest.java b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerCooldownResponseFactoryTest.java new file mode 100644 index 000000000..3d249fbee --- /dev/null +++ b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerCooldownResponseFactoryTest.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.RsStatus; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.Collections; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.emptyOrNullString; + +/** + * Tests for {@link DockerCooldownResponseFactory}. + * + * @since 2.2.0 + */ +final class DockerCooldownResponseFactoryTest { + + private DockerCooldownResponseFactory factory; + + @BeforeEach + void setUp() { + this.factory = new DockerCooldownResponseFactory(); + } + + @Test + void returns403Status() { + final Response response = this.factory.forbidden(block()); + assertThat(response.status(), is(RsStatus.FORBIDDEN)); + } + + @Test + void returnsApplicationJsonContentType() { + final Response response = this.factory.forbidden(block()); + final String contentType = response.headers() + .values("Content-Type").get(0); + assertThat(contentType, containsString("application/json")); + } + + @Test + void bodyContainsDeniedErrorCode() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"code\":\"DENIED\"")); + } + + @Test + void bodyContainsTagInCooldownMessage() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"message\":\"Tag in cooldown\"")); + } + + @Test + void bodyContainsBlockedUntilDetail() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"blocked_until\":\"")); + assertThat(body, containsString("Z")); + } + + @Test + void bodyMatchesDockerV2ErrorFormat() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"errors\":[{")); + } + + @Test + void includesRetryAfterHeader() { + final Response response = this.factory.forbidden(block()); + final String retryAfter = response.headers() + .values("Retry-After").get(0); + assertThat(retryAfter, is(not(emptyOrNullString()))); + final long seconds = Long.parseLong(retryAfter); + assertThat(seconds > 0, is(true)); + } + + @Test + void includesCooldownBlockedHeader() { + final Response response = this.factory.forbidden(block()); + final String cooldown = response.headers() + .values("X-Pantera-Cooldown").get(0); + assertThat(cooldown, equalTo("blocked")); + } + + @Test + void repoTypeIsDocker() { + assertThat(this.factory.repoType(), equalTo("docker")); + } + + private static CooldownBlock block() { + return new CooldownBlock( + "docker", + "docker-proxy", + "library/nginx", + "1.27", + CooldownReason.FRESH_RELEASE, + Instant.now().minus(1, ChronoUnit.HOURS), + Instant.now().plus(23, ChronoUnit.HOURS), + Collections.emptyList() + ); + } +} diff --git a/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataFilterTest.java b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataFilterTest.java new file mode 100644 index 000000000..782b90832 --- /dev/null +++ b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataFilterTest.java @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +/** + * Tests for {@link DockerMetadataFilter}. + * + * @since 2.2.0 + */ +final class DockerMetadataFilterTest { + + private DockerMetadataParser parser; + private DockerMetadataFilter filter; + + @BeforeEach + void setUp() { + this.parser = new DockerMetadataParser(); + this.filter = new DockerMetadataFilter(); + } + + @Test + void filtersBlockedTagsFromArray() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25", "1.26", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("1.25", "latest")); + final List remaining = this.parser.extractVersions(filtered); + assertThat(remaining, hasSize(2)); + assertThat(remaining, containsInAnyOrder("1.24", "1.26")); + } + + @Test + void filtersThreeOfTenTags() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.22", "1.23", "1.24", "1.25", "1.26", + "1.27", "stable", "mainline", "alpine", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter( + metadata, Set.of("1.22", "1.23", "1.24") + ); + final List remaining = this.parser.extractVersions(filtered); + assertThat(remaining, hasSize(7)); + assertThat( + remaining, + containsInAnyOrder("1.25", "1.26", "1.27", "stable", "mainline", "alpine", "latest") + ); + } + + @Test + void returnsUnmodifiedWhenNoBlockedVersions() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Collections.emptySet()); + final List remaining = this.parser.extractVersions(filtered); + assertThat(remaining, hasSize(3)); + assertThat(remaining, containsInAnyOrder("1.24", "1.25", "latest")); + } + + @Test + void filtersAllTags() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("1.24", "1.25")); + final List remaining = this.parser.extractVersions(filtered); + assertThat(remaining, is(empty())); + } + + @Test + void preservesNameField() throws Exception { + final String json = """ + { + "name": "myorg/myimage", + "tags": ["v1", "v2", "v3"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("v2")); + assertThat(filtered.get("name").asText(), equalTo("myorg/myimage")); + } + + @Test + void ignoresBlockedVersionsNotInTags() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter( + metadata, Set.of("9.99.99", "nonexistent") + ); + final List remaining = this.parser.extractVersions(filtered); + assertThat(remaining, hasSize(2)); + assertThat(remaining, containsInAnyOrder("1.24", "1.25")); + } + + @Test + void handlesMetadataWithNoTagsField() throws Exception { + final String json = """ + { + "name": "library/empty" + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("1.0")); + assertThat(filtered.get("name").asText(), equalTo("library/empty")); + } + + @Test + void updateLatestIsNoOp() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode updated = this.filter.updateLatest(metadata, "1.24"); + // Docker tags/list has no separate "latest" pointer; updateLatest is a no-op + final List tags = this.parser.extractVersions(updated); + assertThat(tags, hasSize(3)); + assertThat(tags, containsInAnyOrder("1.24", "1.25", "latest")); + } + + @Test + void handlesSingleTagBlocked() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(metadata, Set.of("latest")); + final List remaining = this.parser.extractVersions(filtered); + assertThat(remaining, is(empty())); + } +} diff --git a/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataParserTest.java b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataParserTest.java new file mode 100644 index 000000000..75088f687 --- /dev/null +++ b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataParserTest.java @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataParseException; +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests for {@link DockerMetadataParser}. + * + * @since 2.2.0 + */ +final class DockerMetadataParserTest { + + private DockerMetadataParser parser; + + @BeforeEach + void setUp() { + this.parser = new DockerMetadataParser(); + } + + @Test + void parsesValidDockerTagsList() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25", "1.26", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + assertThat(metadata, is(notNullValue())); + assertThat(metadata.get("name").asText(), equalTo("library/nginx")); + } + + @Test + void extractsTagsFromMetadata() throws Exception { + final String json = """ + { + "name": "library/redis", + "tags": ["7.0", "7.2", "7.4", "alpine", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, hasSize(5)); + assertThat(versions, containsInAnyOrder("7.0", "7.2", "7.4", "alpine", "latest")); + } + + @Test + void parsesFixtureFile() throws Exception { + final byte[] bytes = loadFixture("cooldown/docker-tags-list-sample.json"); + final JsonNode metadata = this.parser.parse(bytes); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, hasSize(10)); + assertThat( + versions, + containsInAnyOrder( + "1.22", "1.23", "1.24", "1.25", "1.26", "1.27", + "stable", "mainline", "alpine", "latest" + ) + ); + } + + @Test + void returnsEmptyListWhenNoTags() throws Exception { + final String json = """ + { + "name": "library/empty" + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, is(empty())); + } + + @Test + void returnsEmptyListWhenTagsIsNull() throws Exception { + final String json = """ + { + "name": "library/nulltags", + "tags": null + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, is(empty())); + } + + @Test + void returnsEmptyListWhenTagsIsNotArray() throws Exception { + final String json = """ + { + "name": "library/badtags", + "tags": "not-an-array" + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, is(empty())); + } + + @Test + void getsLatestVersionWhenPresent() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional latest = this.parser.getLatestVersion(metadata); + assertThat(latest.isPresent(), is(true)); + assertThat(latest.get(), equalTo("latest")); + } + + @Test + void returnsEmptyWhenNoLatestTag() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25", "1.26"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional latest = this.parser.getLatestVersion(metadata); + assertThat(latest.isPresent(), is(false)); + } + + @Test + void returnsEmptyWhenTagsAbsentForLatest() throws Exception { + final String json = """ + { + "name": "library/empty" + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional latest = this.parser.getLatestVersion(metadata); + assertThat(latest.isPresent(), is(false)); + } + + @Test + void getsRepositoryName() throws Exception { + final String json = """ + { + "name": "myorg/myimage", + "tags": ["v1"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional name = this.parser.getRepositoryName(metadata); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("myorg/myimage")); + } + + @Test + void returnsEmptyRepositoryNameWhenMissing() throws Exception { + final String json = """ + { + "tags": ["v1"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final Optional name = this.parser.getRepositoryName(metadata); + assertThat(name.isPresent(), is(false)); + } + + @Test + void returnsCorrectContentType() { + assertThat(this.parser.contentType(), equalTo("application/json")); + } + + @Test + void throwsOnInvalidJson() { + final byte[] invalid = "not valid json {{{".getBytes(StandardCharsets.UTF_8); + assertThrows(MetadataParseException.class, () -> this.parser.parse(invalid)); + } + + @Test + void throwsOnEmptyInput() { + final byte[] empty = new byte[0]; + assertThrows(MetadataParseException.class, () -> this.parser.parse(empty)); + } + + @Test + void handlesLargeTagsList() throws Exception { + final StringBuilder json = new StringBuilder(); + json.append("{\"name\":\"library/large\",\"tags\":["); + for (int i = 0; i < 500; i++) { + if (i > 0) { + json.append(","); + } + json.append(String.format("\"%d.0.0\"", i)); + } + json.append("]}"); + final JsonNode metadata = this.parser.parse( + json.toString().getBytes(StandardCharsets.UTF_8) + ); + final List versions = this.parser.extractVersions(metadata); + assertThat(versions, hasSize(500)); + } + + @Test + void extractReleaseDatesReturnsEmpty() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "latest"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + assertThat(this.parser.extractReleaseDates(metadata).isEmpty(), is(true)); + } + + private static byte[] loadFixture(final String resource) throws IOException { + try (InputStream stream = DockerMetadataParserTest.class.getClassLoader() + .getResourceAsStream(resource)) { + if (stream == null) { + throw new IOException("Resource not found: " + resource); + } + return stream.readAllBytes(); + } + } +} diff --git a/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataRequestDetectorTest.java b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataRequestDetectorTest.java new file mode 100644 index 000000000..ec88600e0 --- /dev/null +++ b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataRequestDetectorTest.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.Optional; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +/** + * Tests for {@link DockerMetadataRequestDetector}. + * + * @since 2.2.0 + */ +final class DockerMetadataRequestDetectorTest { + + private DockerMetadataRequestDetector detector; + + @BeforeEach + void setUp() { + this.detector = new DockerMetadataRequestDetector(); + } + + @ParameterizedTest + @ValueSource(strings = { + "/v2/library/nginx/tags/list", + "/v2/myorg/myimage/tags/list", + "/v2/a/b/c/tags/list", + "/v2/single/tags/list" + }) + void detectsTagsListRequests(final String path) { + assertThat(this.detector.isMetadataRequest(path), is(true)); + } + + @ParameterizedTest + @ValueSource(strings = { + "/v2/library/nginx/manifests/latest", + "/v2/library/nginx/blobs/sha256:abc123", + "/v2/library/nginx/blobs/uploads/uuid123", + "/v2/_catalog", + "/v2/", + "/v2/library/nginx/tags/list/extra", + "/library/nginx/tags/list", + "/v2/tags/list", + "/v1/library/nginx/tags/list" + }) + void rejectsNonTagsListRequests(final String path) { + assertThat(this.detector.isMetadataRequest(path), is(false)); + } + + @Test + void extractsSimpleImageName() { + final Optional name = this.detector.extractPackageName( + "/v2/library/nginx/tags/list" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("library/nginx")); + } + + @Test + void extractsNestedImageName() { + final Optional name = this.detector.extractPackageName( + "/v2/myorg/subgroup/myimage/tags/list" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("myorg/subgroup/myimage")); + } + + @Test + void extractsSingleSegmentName() { + final Optional name = this.detector.extractPackageName( + "/v2/ubuntu/tags/list" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("ubuntu")); + } + + @Test + void returnsEmptyForNonMetadataPath() { + final Optional name = this.detector.extractPackageName( + "/v2/library/nginx/manifests/latest" + ); + assertThat(name.isPresent(), is(false)); + } + + @Test + void returnsDockerRepoType() { + assertThat(this.detector.repoType(), equalTo("docker")); + } +} diff --git a/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataRewriterTest.java b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataRewriterTest.java new file mode 100644 index 000000000..a06cebbab --- /dev/null +++ b/docker-adapter/src/test/java/com/auto1/pantera/docker/cooldown/DockerMetadataRewriterTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.docker.cooldown; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; + +/** + * Tests for {@link DockerMetadataRewriter}. + * + * @since 2.2.0 + */ +final class DockerMetadataRewriterTest { + + private DockerMetadataParser parser; + private DockerMetadataFilter filter; + private DockerMetadataRewriter rewriter; + + @BeforeEach + void setUp() { + this.parser = new DockerMetadataParser(); + this.filter = new DockerMetadataFilter(); + this.rewriter = new DockerMetadataRewriter(); + } + + @Test + void roundTripPreservesUnfilteredMetadata() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25", "1.26", "latest"] + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final byte[] rewritten = this.rewriter.rewrite(original); + final JsonNode reparsed = this.parser.parse(rewritten); + assertThat(reparsed.get("name").asText(), equalTo("library/nginx")); + final List tags = this.parser.extractVersions(reparsed); + assertThat(tags, hasSize(4)); + assertThat(tags, containsInAnyOrder("1.24", "1.25", "1.26", "latest")); + } + + @Test + void roundTripAfterFiltering() throws Exception { + final String json = """ + { + "name": "library/redis", + "tags": ["7.0", "7.2", "7.4", "alpine", "latest"] + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(original, Set.of("7.0", "alpine")); + final byte[] rewritten = this.rewriter.rewrite(filtered); + final JsonNode reparsed = this.parser.parse(rewritten); + assertThat(reparsed.get("name").asText(), equalTo("library/redis")); + final List tags = this.parser.extractVersions(reparsed); + assertThat(tags, hasSize(3)); + assertThat(tags, containsInAnyOrder("7.2", "7.4", "latest")); + } + + @Test + void roundTripAfterFilteringAll() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.24", "1.25"] + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter(original, Set.of("1.24", "1.25")); + final byte[] rewritten = this.rewriter.rewrite(filtered); + final JsonNode reparsed = this.parser.parse(rewritten); + assertThat(reparsed.get("name").asText(), equalTo("library/nginx")); + final List tags = this.parser.extractVersions(reparsed); + assertThat(tags, hasSize(0)); + } + + @Test + void rewriteProducesValidJsonBytes() throws Exception { + final String json = """ + { + "name": "myorg/myimage", + "tags": ["v1", "v2"] + } + """; + final JsonNode metadata = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final byte[] bytes = this.rewriter.rewrite(metadata); + assertThat(bytes, is(notNullValue())); + assertThat(bytes.length > 0, is(true)); + // Verify it is valid JSON by re-parsing + final JsonNode reparsed = this.parser.parse(bytes); + assertThat(reparsed, is(notNullValue())); + } + + @Test + void returnsCorrectContentType() { + assertThat(this.rewriter.contentType(), equalTo("application/json")); + } + + @Test + void roundTripWithFixtureData() throws Exception { + final String json = """ + { + "name": "library/nginx", + "tags": ["1.22", "1.23", "1.24", "1.25", "1.26", + "1.27", "stable", "mainline", "alpine", "latest"] + } + """; + final JsonNode original = this.parser.parse(json.getBytes(StandardCharsets.UTF_8)); + final JsonNode filtered = this.filter.filter( + original, Set.of("1.22", "1.23", "1.24") + ); + final byte[] rewritten = this.rewriter.rewrite(filtered); + final JsonNode reparsed = this.parser.parse(rewritten); + final List tags = this.parser.extractVersions(reparsed); + assertThat(tags, hasSize(7)); + assertThat( + tags, + containsInAnyOrder("1.25", "1.26", "1.27", "stable", "mainline", "alpine", "latest") + ); + assertThat(reparsed.get("name").asText(), equalTo("library/nginx")); + } +} diff --git a/docker-adapter/src/test/resources/cooldown/docker-tags-list-sample.json b/docker-adapter/src/test/resources/cooldown/docker-tags-list-sample.json new file mode 100644 index 000000000..145cccfad --- /dev/null +++ b/docker-adapter/src/test/resources/cooldown/docker-tags-list-sample.json @@ -0,0 +1,15 @@ +{ + "name": "library/nginx", + "tags": [ + "1.22", + "1.23", + "1.24", + "1.25", + "1.26", + "1.27", + "stable", + "mainline", + "alpine", + "latest" + ] +} \ No newline at end of file diff --git a/docs/admin-guide/cache-configuration.md b/docs/admin-guide/cache-configuration.md new file mode 100644 index 000000000..63e3d2d64 --- /dev/null +++ b/docs/admin-guide/cache-configuration.md @@ -0,0 +1,74 @@ +# Cache Configuration + +> **Guide:** Admin Guide | **Section:** Cache Configuration + +This page is the consolidated reference for every `meta.caches.*` setting Pantera consumes. It covers the L1 (Caffeine in-process) + L2 (Valkey) two-tier caches used by the auth-enabled filter, group-metadata stale fallback, the group-negative cache, and cooldown-metadata. + +--- + +## Override Precedence + +Every cache setting resolves via a strict 3-tier precedence chain. **No cache setting is hardcoded** -- the compile-time default only applies when both the environment variable and the YAML key are absent. + +``` +environment variable -> YAML (pantera.yml) -> compile-time default +``` + +This means operators can override any setting without editing `pantera.yml`, and CI/CD can pin values via env without shipping a new config bundle. A value of `0` in an env var is treated as "explicitly set to zero" (it is not a reset to the default). + +--- + +## auth-enabled (CachedLocalEnabledFilter) + +Caches the per-user "enabled" flag in front of `LocalEnabledFilter` so a 1000 req/s workload does not exhaust the Hikari pool with a per-request JDBC hit. Cross-node eviction runs over `CacheInvalidationPubSub` -- admin changes (put/delete/enable/disable/alter-password) invalidate every node. + +| Setting | Env var | Default | Unit | +|---|---|---|---| +| `meta.caches.auth-enabled.l1.maxSize` | `PANTERA_AUTH_ENABLED_L1_MAX_SIZE` | `10000` | entries | +| `meta.caches.auth-enabled.l1.ttlSeconds` | `PANTERA_AUTH_ENABLED_L1_TTL_SECONDS` | `300` | seconds | +| `meta.caches.auth-enabled.l2.enabled` | `PANTERA_AUTH_ENABLED_L2_ENABLED` | `true` | boolean | +| `meta.caches.auth-enabled.l2.ttlSeconds` | `PANTERA_AUTH_ENABLED_L2_TTL_SECONDS` | `3600` | seconds | +| `meta.caches.auth-enabled.l2.timeoutMs` | `PANTERA_AUTH_ENABLED_L2_TIMEOUT_MS` | `100` | milliseconds | + +A healthy cluster should see `auth-enabled.hit_rate` above 95% under steady-state traffic. If the hit rate is materially lower, check for pub/sub invalidation storms or a misconfigured TTL. + +--- + +## group-metadata-stale (GroupMetadataCache stale fallback) + +Holds the last-known-good metadata payload for group repositories so a partial upstream outage still serves consumers from the stale tier. **Design principle: the cache is an aid, never a breaker.** Under realistic cardinality no eviction fires; the `maxSize` entries exist only as a JVM-memory safety net against pathological growth. + +Degradation path on read is `L1 -> L2 -> expired primary-cache entry -> miss`. L2 now survives JVM restart (the previous `ConcurrentHashMap` did not), strictly improving availability. + +| Setting | Env var | Default | Unit | +|---|---|---|---| +| `meta.caches.group-metadata-stale.l1.maxSize` | `PANTERA_GROUP_METADATA_STALE_L1_MAX_SIZE` | `100000` | entries | +| `meta.caches.group-metadata-stale.l1.ttlSeconds` | `PANTERA_GROUP_METADATA_STALE_L1_TTL_SECONDS` | `2592000` (30 d) | seconds | +| `meta.caches.group-metadata-stale.l2.enabled` | `PANTERA_GROUP_METADATA_STALE_L2_ENABLED` | `true` | boolean | +| `meta.caches.group-metadata-stale.l2.ttlSeconds` | `PANTERA_GROUP_METADATA_STALE_L2_TTL_SECONDS` | `0` (no TTL) | seconds | +| `meta.caches.group-metadata-stale.l2.timeoutMs` | `PANTERA_GROUP_METADATA_STALE_L2_TIMEOUT_MS` | `100` | milliseconds | + +`l2.ttlSeconds = 0` is intentional -- Valkey LRU owns eviction for this tier. Pair with `maxmemory-policy = allkeys-lru` on the Valkey side (see [Valkey Setup](valkey-setup.md)). + +--- + +## Related cache sections + +These sections predate Groups B/C and are documented in the [Configuration Reference](../configuration-reference.md#meta-caches): + +- `meta.caches.group-negative.*` -- group-miss negative cache (per-slot 404s avoid repeated fanout). +- `meta.caches.cooldown-metadata.*` -- long-lived cached metadata payloads for cooldown re-evaluation. +- `meta.caches.cooldown.*` -- per-version cooldown block decisions (L1 + L2). +- `meta.caches.negative.*` -- generic negative cache (non-group). +- `meta.caches.valkey.*` -- shared Valkey client configuration. + +All of these honor the same env/YAML/default precedence. + +--- + +## Related Pages + +- [Valkey Setup](valkey-setup.md) -- Required Valkey settings for the L2 tiers. +- [Environment Variables](environment-variables.md) -- Full env-var index. +- [Configuration Reference](../configuration-reference.md) -- Complete `meta.caches` schema. +- [Monitoring](monitoring.md) -- Cache hit-rate and stale-served metrics. diff --git a/docs/admin-guide/database.md b/docs/admin-guide/database.md new file mode 100644 index 000000000..d7418c121 --- /dev/null +++ b/docs/admin-guide/database.md @@ -0,0 +1,73 @@ +# Database + +> **Guide:** Admin Guide | **Section:** Database + +Pantera uses PostgreSQL for the artifact index, users, repositories, policies, cooldown records, and Quartz scheduler tables. This page covers operator-facing concerns: connection pool sizing, fail-fast timeouts, and leak detection. + +--- + +## Connection Pool (HikariCP) + +Pantera uses HikariCP to pool connections. All pool settings are controlled via `PANTERA_DB_*` environment variables -- see [Environment Variables](environment-variables.md#database-hikaricp) for the full list. + +### Fail-fast defaults + +As of v2.2.0, the default `connectionTimeout` and `leakDetectionThreshold` are tightened. Under a backend outage or a held-connection bug, the previous defaults let requests pile up for minutes; the new defaults surface the problem within seconds. + +| Setting | v2.1.x default | v2.2.0 default | What it guards | +|---|---|---|---| +| `PANTERA_DB_CONNECTION_TIMEOUT_MS` | `5000` | `3000` | Thread waits this long for an available connection before `SQLTransientConnectionException`. Shorter = faster fail, clearer signal. | +| `PANTERA_DB_LEAK_DETECTION_MS` | `300000` | `5000` | Hikari WARNs if a connection is held past this threshold. Shorter = connection-leak bugs become visible immediately. | + +The other HikariCP defaults (`pool.max=50`, `pool.min=10`, `idleTimeout=600000`, `maxLifetime=1800000`) are unchanged. + +### What a Hikari leak WARN means + +Seeing `HikariPool-*: Connection ... has been leaked` or `Apparent connection leak detected` in the logs is a **real bug**: a code path acquired a pooled connection and did not return it to the pool within `leakDetectionThreshold`. Pantera's own code paths close connections via try-with-resources, so any WARN is expected to be either: + +- A genuine held-connection bug in new code (triage via the stack trace in the WARN). +- A long-running admin query (unlikely under the new defaults, but possible during manual DBA sessions over the same pool -- those should use a separate connection). + +Before the v2.2.0 defaults these WARNs were silent -- the threshold was 5 minutes, which is longer than Pantera's internal request deadline. Operators may now see WARN lines that were previously suppressed. **Treat every new WARN as an incident until proven otherwise.** + +--- + +## Canary Ramp Guide + +Rolling out the tighter timeouts directly to steady-state prod risks surfacing dormant bugs as client-visible timeouts. Use the following ramp: + +### Week 1 -- relaxed overrides on canary + +Set these env vars on the canary instance(s) only: + +``` +PANTERA_DB_CONNECTION_TIMEOUT_MS=10000 +PANTERA_DB_LEAK_DETECTION_MS=30000 +``` + +Watch `pantera.hikaricp.connections_pending` and any `Connection leak detected` WARN. A non-zero leak count on the canary in week 1 means there is an outstanding held-connection bug somewhere in the rollout surface -- fix it before proceeding. + +### Week 2+ -- drop to defaults + +Once week 1 is clean (zero leak WARNs, `connections_pending` p99 well under 3 seconds), remove the env-var overrides. The canary now picks up the v2.2.0 defaults (3s / 5s). Roll progressively to the rest of the fleet. + +If a leak WARN surfaces in week 2, **do not raise the threshold again** -- the WARN is catching a bug that would otherwise degrade the whole pool silently. Triage and fix. + +--- + +## PostgreSQL Settings + +Pantera relies on standard PostgreSQL tuning. Notes specific to this workload: + +- `idle_in_transaction_session_timeout` of a few seconds is safe; Pantera does not hold transactions open across request boundaries. +- `statement_timeout` should be at least as large as `PANTERA_SEARCH_LIKE_TIMEOUT_MS` (default 3000 ms) -- the search fallback relies on server-side cancellation. +- Schema migrations run via Flyway at startup. A schema-lock deadlock will block startup; check `pg_locks` if a rolling upgrade stalls at boot. + +--- + +## Related Pages + +- [Environment Variables](environment-variables.md) -- `PANTERA_DB_*` reference. +- [Performance Tuning](performance-tuning.md) -- Pool sizing vs. worker threads. +- [Monitoring](monitoring.md) -- Hikari metric catalogue. +- [Backup and Recovery](backup-and-recovery.md) -- PostgreSQL backup workflow. diff --git a/docs/admin-guide/deployment-nlb.md b/docs/admin-guide/deployment-nlb.md new file mode 100644 index 000000000..c620c4e02 --- /dev/null +++ b/docs/admin-guide/deployment-nlb.md @@ -0,0 +1,44 @@ +# Deployment behind an NLB + +> **Guide:** Admin Guide | **Section:** Deployment behind an NLB + +When Pantera sits behind a Layer-4 load balancer (AWS NLB, HAProxy in TCP mode, any other PROXY-protocol-v2 forwarder), the listener sees the LB's IP on every connection -- not the real client IP. The standard fix is PROXY protocol, which the LB prepends as a small prelude on every new connection. + +--- + +## HTTP/1 and HTTP/2 + +The Vert.x-served HTTP/1 + HTTP/2 listeners already honor the `meta.http_server.use-proxy-protocol` YAML flag. See [Configuration Reference](../configuration-reference.md) for the setting. + +--- + +## HTTP/3 + +The HTTP/3 listener is served via Jetty's `QuicheServerConnector`. As of v2.2.0, PROXY-protocol support is opt-in via environment variable: + +``` +PANTERA_HTTP3_PROXY_PROTOCOL=true +``` + +When set, Pantera prepends Jetty's `ProxyConnectionFactory` to the connector's factory chain. An INFO startup log is emitted with `event.action=http3_proxy_protocol_enabled` and the listener port so operators can confirm activation. + +Default is `false` -- enabling PROXY protocol when the LB is not configured to send it will cause every connection to fail at the protocol-prelude parse. Make sure the LB side is sending PROXY v2 before flipping the flag. + +The equivalent YAML path (`meta.http3.proxyProtocol`) is not yet wired because `Http3Server`'s constructor does not currently accept a `Settings` object. Use the env var. + +--- + +## Checklist for NLB deployments + +1. NLB target-group protocol is TCP (Layer 4). Termination of TLS happens inside Pantera, not the LB. +2. NLB is configured to send PROXY v2 on every target connection (target-group attribute `proxy_protocol_v2.enabled=true` on AWS NLB). +3. For HTTP/1 + HTTP/2: set `meta.http_server.use-proxy-protocol: true` in `pantera.yml`. +4. For HTTP/3: set `PANTERA_HTTP3_PROXY_PROTOCOL=true` in the environment. +5. Verify access logs now show real client IPs (`client.ip` in the ECS JSON line), not the LB's IP. + +--- + +## Related Pages + +- [High Availability](high-availability.md) -- Multi-node cluster layout. +- [Environment Variables](environment-variables.md#http-3) -- `PANTERA_HTTP3_*` reference. diff --git a/docs/admin-guide/environment-variables.md b/docs/admin-guide/environment-variables.md index f30fd51fa..fb86bc9c6 100644 --- a/docs/admin-guide/environment-variables.md +++ b/docs/admin-guide/environment-variables.md @@ -14,10 +14,10 @@ All `PANTERA_*` variables can also be set as Java system properties using the lo |----------|---------|-------------| | `PANTERA_DB_POOL_MAX` | `50` | Maximum database connection pool size | | `PANTERA_DB_POOL_MIN` | `10` | Minimum idle connections | -| `PANTERA_DB_CONNECTION_TIMEOUT_MS` | `5000` | Connection acquisition timeout (ms) | +| `PANTERA_DB_CONNECTION_TIMEOUT_MS` | `3000` | Connection acquisition timeout (ms). v2.2.0 fail-fast default (was `5000`). See [Database](database.md#canary-ramp-guide) for the canary ramp. | | `PANTERA_DB_IDLE_TIMEOUT_MS` | `600000` | Idle connection timeout (ms) -- 10 minutes | | `PANTERA_DB_MAX_LIFETIME_MS` | `1800000` | Maximum connection lifetime (ms) -- 30 minutes | -| `PANTERA_DB_LEAK_DETECTION_MS` | `300000` | Connection leak detection threshold (ms) -- 5 minutes | +| `PANTERA_DB_LEAK_DETECTION_MS` | `5000` | Connection leak detection threshold (ms). v2.2.0 fail-fast default (was `300000`). A WARN past this threshold is a real held-connection bug -- see [Database](database.md#what-a-hikari-leak-warn-means). | | `PANTERA_DB_BUFFER_SECONDS` | `2` | Event batch buffer time (seconds) | | `PANTERA_DB_BATCH_SIZE` | `200` | Maximum events per database batch | @@ -176,9 +176,56 @@ These variables are consumed by the AWS SDK inside the container: --- +## Authentication Cache (auth-enabled) + +Two-tier cache (L1 Caffeine + L2 Valkey) in front of `LocalEnabledFilter`. See [Cache Configuration](cache-configuration.md#auth-enabled-cachedlocalenabledfilter). + +| Variable | Default | Description | +|----------|---------|-------------| +| `PANTERA_AUTH_ENABLED_L1_MAX_SIZE` | `10000` | L1 (Caffeine) max entries. | +| `PANTERA_AUTH_ENABLED_L1_TTL_SECONDS` | `300` | L1 TTL in seconds. | +| `PANTERA_AUTH_ENABLED_L2_ENABLED` | `true` | Enable the Valkey L2 tier. Set `false` to run L1-only. | +| `PANTERA_AUTH_ENABLED_L2_TTL_SECONDS` | `3600` | L2 TTL in seconds. | +| `PANTERA_AUTH_ENABLED_L2_TIMEOUT_MS` | `100` | L2 read timeout in milliseconds. | + +--- + +## Group Metadata Stale Cache + +Two-tier last-known-good fallback for group repositories. `l2.ttlSeconds = 0` is intentional -- Valkey LRU owns eviction. See [Cache Configuration](cache-configuration.md#group-metadata-stale-groupmetadatacache-stale-fallback). + +| Variable | Default | Description | +|----------|---------|-------------| +| `PANTERA_GROUP_METADATA_STALE_L1_MAX_SIZE` | `100000` | L1 max entries. | +| `PANTERA_GROUP_METADATA_STALE_L1_TTL_SECONDS` | `2592000` | L1 TTL in seconds (30 days). | +| `PANTERA_GROUP_METADATA_STALE_L2_ENABLED` | `true` | Enable the Valkey L2 tier. | +| `PANTERA_GROUP_METADATA_STALE_L2_TTL_SECONDS` | `0` | L2 TTL in seconds. `0` means no TTL; Valkey LRU evicts. | +| `PANTERA_GROUP_METADATA_STALE_L2_TIMEOUT_MS` | `100` | L2 read timeout in milliseconds. | + +--- + +## HTTP/3 + +| Variable | Default | Description | +|----------|---------|-------------| +| `PANTERA_HTTP3_PROXY_PROTOCOL` | `false` | When `true`, the HTTP/3 listener prepends Jetty's `ProxyConnectionFactory` so the real client IP is recovered from the PROXY-protocol prelude. Required when the listener sits behind an NLB or other L4 proxy. See [Deployment behind an NLB](deployment-nlb.md). | +| `PANTERA_HTTP3_MAX_STREAM_BUFFER_BYTES` | `16777216` (16 MiB) | Per-stream body buffer cap. Requests exceeding this are rejected. Guards against memory exhaustion on large unbounded uploads over HTTP/3. | + +--- + +## Scheduler + +| Variable | Default | Description | +|----------|---------|-------------| +| `PANTERA_JOB_DATA_REGISTRY_MAX` | `10000` | Sanity cap on entries in `JobDataRegistry`. Exceeding this threshold emits an ECS error log naming a key prefix so operators can locate the leaking scheduler site. Entries are never silently dropped. | + +--- + ## Related Pages - [Configuration](configuration.md) -- Main pantera.yml configuration - [Configuration Reference](../configuration-reference.md#7-environment-variables-reference) -- Environment variables in the configuration reference - [Performance Tuning](performance-tuning.md) -- How to size thread pools and connection pools - [Installation](installation.md) -- Setting environment variables in Docker +- [Cache Configuration](cache-configuration.md) -- Full cache tier reference +- [Database](database.md) -- Hikari fail-fast and canary ramp diff --git a/docs/admin-guide/runbooks.md b/docs/admin-guide/runbooks.md new file mode 100644 index 000000000..0e6b2275b --- /dev/null +++ b/docs/admin-guide/runbooks.md @@ -0,0 +1,83 @@ +# Runbooks + +> **Guide:** Admin Guide | **Section:** Runbooks + +This page collects on-call runbooks for Pantera signals. Each entry describes what the signal means, how to confirm the diagnosis, and the immediate operator action. + +--- + +## X-Pantera-Fault headers on 5xx responses + +Every 5xx response that Pantera generates internally carries an `X-Pantera-Fault: ` header identifying the specific fault variant. Use the tag to route the alert to the correct runbook. + +### `X-Pantera-Fault: index-unavailable` (500) + +**Meaning:** The `DbArtifactIndex` executor queue is saturated. New lookups are being rejected by `AbortPolicy` rather than silently running on the Vert.x event loop. + +**Before v2.2.0:** The executor used `CallerRunsPolicy`, which meant a saturated pool caused inline JDBC on the event-loop thread. That was worse than the 500 -- it pushed latency into every concurrent request on the same event loop. + +**Diagnosis:** + +- Check `pantera.index.executor.queue_size` -- near or at capacity. +- Check PostgreSQL: slow queries, lock waits, or replication lag holding reads. +- Check Hikari: `pantera.hikaricp.connections_pending` climbing -- pool exhaustion upstream of the executor. + +**Action:** This is transient-by-design. The client sees a typed 500 and retries with backoff. Operator intervention is only needed if the 500 rate stays elevated for minutes -- treat as a DB-layer incident. + +### `X-Pantera-Fault: storage-unavailable` (500) + +**Meaning:** The `Storage` backend (S3, filesystem, or swift) refused or timed out on a read/write. + +**Diagnosis:** Check `pantera.storage.*` metrics and the backend's own health (S3 API errors, disk I/O saturation, swift 5xx). + +**Action:** Persistent = backend outage, escalate. Transient = no action; upstream or client retries will succeed. + +### `X-Pantera-Fault: deadline-exceeded` (504) + +**Meaning:** The request's end-to-end `Deadline` expired before a response was produced. The deadline is set at request entry (default 30 s) and propagated via `RequestContext`. + +**Diagnosis:** Either the upstream is slow (check `pantera.http.client.duration` by host) or an internal cache/DB hop is slow. The access-log line for the expired request carries `url.path` and `http.response.status_code=504`. + +**Action:** Identify which hop burned the budget. If upstream, that's a remote-registry issue. If internal, profile the slow hop. + +### `X-Pantera-Fault: overload:` (503) + +**Meaning:** A bounded queue or semaphore refused admission. The `` suffix names the specific limiter (`group-drain`, `io-read`, etc.). + +**Action:** Temporary spike -> no action. Sustained -> the limit is too tight for current traffic; scale horizontally or raise the bound. + +### `X-Pantera-Fault: upstream-integrity:` (502) + +**Meaning:** `ProxyCacheWriter` fetched a primary artifact and its sidecar digest (MD5, SHA-1, SHA-256, SHA-512), recomputed the digest over the streamed bytes, and the sidecar's declared value disagreed with the bytes. `` names the specific algorithm that failed. + +**Nothing was written to the cache.** The bad pair is rejected atomically -- Pantera cannot cache a drifted primary/sidecar pair by construction. + +**Diagnosis:** + +- Check `pantera.proxy.cache.integrity_failure{repo, algo}` counter. +- Common root causes: upstream sidecar serving stale byte-for-byte while upstream primary was republished; upstream intermediary (CDN, corporate proxy) serving cached bytes from different epochs on different requests. + +**Action:** Usually resolves within a refetch cycle. If sustained, verify the upstream registry is not itself serving drift; the corresponding cache entry is naturally absent so the next client request re-fetches from scratch. + +--- + +## AllProxiesFailed pass-through (behavior change in v2.2.0) + +When a proxy or group repository has exhausted all members and none produced a success, Pantera used to synthesize a 502 with a generic body. + +**As of v2.2.0:** Pantera passes the winning proxy's actual status and body through to the client. The 5xx response carries both: + +- `X-Pantera-Fault: all-proxies-failed` +- `X-Pantera-Proxies-Tried: ` -- the number of members attempted. + +This means clients that previously saw a synthesized `502` may now see `503`, `504`, or even a `500` body from upstream, depending on what the upstream returned. The change is intentional -- the pass-through preserves diagnostic detail that the synthesized 502 threw away. + +**Client impact:** Any client-side retry policy keyed on "502 from Pantera = retry" should also cover 5xx more broadly. Most ecosystem clients already do this. + +--- + +## Related Pages + +- [Troubleshooting](troubleshooting.md) -- Diagnostic tool catalogue. +- [Monitoring](monitoring.md) -- Metric reference. +- [../user-guide/error-reference.md](../user-guide/error-reference.md) -- Client-facing error reference. diff --git a/docs/admin-guide/v2.2-deployment-checklist.md b/docs/admin-guide/v2.2-deployment-checklist.md new file mode 100644 index 000000000..28abce4a7 --- /dev/null +++ b/docs/admin-guide/v2.2-deployment-checklist.md @@ -0,0 +1,91 @@ +# v2.2.0 Deployment Checklist + +> **Guide:** Admin Guide | **Section:** v2.2.0 Deployment Checklist + +This page is the operator-facing checklist for rolling out v2.2.0. It covers only the items that are new or changed in v2.2.0; standard upgrade procedure applies for everything else (see [Upgrade Procedures](upgrade-procedures.md)). + +--- + +## Pre-deploy + +### Review new `meta.caches.*` YAML + +Two new cache tiers ship with v2.2.0. You do not have to set them in YAML -- the compile-time defaults are production-sane -- but you should at least know they exist: + +- `meta.caches.auth-enabled.*` -- see [Cache Configuration: auth-enabled](cache-configuration.md#auth-enabled-cachedlocalenabledfilter). +- `meta.caches.group-metadata-stale.*` -- see [Cache Configuration: group-metadata-stale](cache-configuration.md#group-metadata-stale-groupmetadatacache-stale-fallback). + +If your deployment pins `meta.caches.*` explicitly, add entries for the two new tiers. Every field honors the 3-tier precedence `env -> YAML -> default`. + +### Confirm Valkey `maxmemory-policy = allkeys-lru` + +The `group-metadata-stale.l2.ttlSeconds = 0` default relies on Valkey LRU owning eviction. A Valkey instance configured with `noeviction` or `volatile-lru` will eventually return errors on `SET` under memory pressure -- Pantera degrades gracefully, but operationally you want LRU to be doing its job. See [Valkey Setup](valkey-setup.md#required-settings). + +### Confirm every adapter registers its cooldown factory + +With the Group G fail-fast migration, a missing `CooldownResponseFactory` registration now throws `IllegalStateException` on the first request that routes to that `repoType`. Before rolling out: + +1. Grep `CooldownWiring.register(` in your codebase; make sure every adapter you ship has a registration. +2. If you maintain a downstream fork or a custom adapter, register its factory (and every alias) via `CooldownWiring` at startup. +3. A canary smoke test that exercises cooldown on each adapter will surface missing registrations immediately. + +--- + +## Deploy + +### Canary first + +Route a small percentage of traffic to a canary instance running v2.2.0 before full rollout. The new surfaces to validate: + +- Client disconnect handling (Group A) -- confirm `fs.open_files` does not grow under bursty traffic. +- Hikari fail-fast (Group B) -- see the ramp below. +- `DbArtifactIndex` AbortPolicy (Group H.1) -- confirm no 500s with `X-Pantera-Fault: index-unavailable` under steady load (transient saturation may trigger them; sustained is a signal). + +### Hikari canary ramp + +The v2.2.0 Hikari defaults are `connectionTimeout=3000` and `leakDetectionThreshold=5000` (see [Database](database.md#fail-fast-defaults)). Do NOT jump straight to these values in production -- run a staged ramp: + +**Week 1 (canary):** override with the relaxed values so dormant held-connection bugs surface as WARNs without failing traffic: + +``` +PANTERA_DB_CONNECTION_TIMEOUT_MS=10000 +PANTERA_DB_LEAK_DETECTION_MS=30000 +``` + +**Week 2+ (full rollout):** drop the overrides; pick up the v2.2.0 defaults. See [Database: Canary Ramp Guide](database.md#canary-ramp-guide) for the decision criteria. + +--- + +## Post-deploy (first 48 hours) + +Watch these four metrics. Each one corresponds to a Group A–H surface: + +| Metric | What to watch for | Runbook | +|---|---|---| +| `pantera.hikaricp.connections_pending` | Any sustained climb past single digits. p99 well under 3 s. | [Database](database.md) | +| `auth-enabled.hit_rate` | Should stabilize >95% within the first hour of steady traffic. Materially lower -> misconfigured TTL or pub/sub invalidation storm. | [Cache Configuration](cache-configuration.md#auth-enabled-cachedlocalenabledfilter) | +| `group_metadata_cache.stale_served_from{tier=expired-primary}` | Non-zero here is informational (the final rung of the degradation ladder). Persistent non-zero = upstream outage worth investigating. | [Cache Configuration](cache-configuration.md#group-metadata-stale-groupmetadatacache-stale-fallback) | +| `fs.open_files` | Should be flat or slowly drifting; a monotonic climb indicates a resource leak (Group A or Group F surface). | [Reactive Lifecycle](../developer-guide/reactive-lifecycle.md) | + +Also watch the access log for new `X-Pantera-Fault` tags at 5xx: + +- `index-unavailable` (transient saturation -- expected at low rates; concerning if sustained) +- `upstream-integrity:*` (upstream sidecar drift -- usually transient) +- `all-proxies-failed` (now pass-through; see [Runbooks](runbooks.md#allproxiesfailed-pass-through-behavior-change-in-v2-2-0)) + +--- + +## Rollback + +Standard v2.2.0 rollback is a downgrade of the container tag. No schema or data migrations are gated behind v2.2.0 Group A–H changes -- the caches, counters, and headers are all additive. A v2.2.0 β†’ v2.1.x downgrade is safe; the `meta.caches.auth-enabled` and `meta.caches.group-metadata-stale` YAML sections are ignored by earlier versions. + +--- + +## Related Pages + +- [Cache Configuration](cache-configuration.md) +- [Valkey Setup](valkey-setup.md) +- [Database](database.md) +- [Deployment behind an NLB](deployment-nlb.md) +- [Runbooks](runbooks.md) +- [Upgrade Procedures](upgrade-procedures.md) diff --git a/docs/admin-guide/valkey-setup.md b/docs/admin-guide/valkey-setup.md new file mode 100644 index 000000000..714dbbac4 --- /dev/null +++ b/docs/admin-guide/valkey-setup.md @@ -0,0 +1,56 @@ +# Valkey Setup + +> **Guide:** Admin Guide | **Section:** Valkey Setup + +Pantera uses Valkey (a Redis-compatible store) as the L2 tier for every cross-node cache and as the pub/sub channel for cache invalidation. This page covers the required server-side settings to run Valkey safely for Pantera's workload. + +--- + +## Required Settings + +### Eviction policy + +``` +maxmemory-policy = allkeys-lru +``` + +Pantera's L2 caches deliberately ship with long or unbounded TTLs (notably `group-metadata-stale.l2.ttlSeconds = 0`). The server MUST evict via LRU under memory pressure, not fail writes -- a failed L2 write is recoverable by the L1 tier, but a blocked Pantera event loop waiting on a rejected write is not. + +### maxmemory sizing + +Size `maxmemory` to hold the combined working set of: + +- `auth-enabled` -- order of 10K user entries, <1 KB each. +- `group-metadata-stale` -- one entry per `(groupRepo, slot)` pair, payload sized to the metadata body (npm `package.json` scans can reach a few hundred KB, Maven `maven-metadata.xml` is typically <10 KB). +- `cooldown-metadata` -- one entry per filtered metadata payload, bounded by `meta.caches.cooldown-metadata.maxSize` (default 5M). +- `group-negative` and `negative` -- small key, empty value, bounded by their configured `maxSize`. + +A safe starting point for most deployments is 2-4 GiB. Monitor `used_memory_peak` and `evicted_keys` and adjust. `evicted_keys` growing steadily on `group-metadata-stale` is fine (that's LRU doing its job); the same on `auth-enabled` or `cooldown` indicates `maxSize` is set too high or `maxmemory` too low. + +--- + +## Why `group-metadata-stale.l2.ttlSeconds = 0` + +The stale-fallback cache ships with `l2.ttlSeconds = 0` by design. Valkey LRU owns eviction for this tier -- an entry stays until memory pressure evicts it, which can be weeks or months in low-churn clusters. The intended semantic is *"availability over freshness"*: if the primary group fanout cannot reach any member, the stale tier must still have the last body we successfully assembled. + +If operators set a non-zero TTL here they are overriding that design and accepting that a long upstream outage can turn into a client-visible 5xx once the TTL passes. + +--- + +## Pub/Sub + +Pantera uses Valkey pub/sub channels under the `pantera.cache.invalidation.*` prefix for cross-node cache invalidation (auth user changes, cooldown policy changes, group membership changes). No special configuration is required -- the default pub/sub transport is sufficient. Do not set `notify-keyspace-events` for this purpose; Pantera drives pub/sub explicitly. + +--- + +## Persistence + +Pantera treats L2 as a best-effort accelerator. All data is safely reconstructible from PostgreSQL + upstream fetches. You may run Valkey with no AOF / no RDB snapshots if operationally preferred; cold-start latency will be elevated until the L1 tiers warm. Most deployments run with default RDB snapshots enabled. + +--- + +## Related Pages + +- [Cache Configuration](cache-configuration.md) -- All `meta.caches.*` settings. +- [High Availability](high-availability.md) -- Multi-node cluster layout. +- [Monitoring](monitoring.md) -- Valkey-exposed metrics Pantera cares about. diff --git a/docs/analysis/v2.1.3-architecture-review.md b/docs/analysis/v2.1.3-architecture-review.md new file mode 100644 index 000000000..36caa4200 --- /dev/null +++ b/docs/analysis/v2.1.3-architecture-review.md @@ -0,0 +1,412 @@ +# Pantera v2.1.3 β€” Enterprise Architecture Review of the Group/Proxy Revamp + +**Audience:** engineering leadership, solution architects, platform SRE +**Companion to:** `v2.1.3-post-deploy-analysis.md` (the log-driven forensic report) +**Scope:** everything that changed on the group / proxy / cache / coalescer stack from v2.1.0 β†’ v2.1.3. The user has confirmed these failure modes did **not** exist before 2.1.0, so this review focuses on the revamp. + +Source files reviewed (LOC): + +``` +pantera-main/src/main/java/com/auto1/pantera/group/GroupSlice.java 1321 +pantera-main/src/main/java/com/auto1/pantera/group/MavenGroupSlice.java 651 +pantera-main/src/main/java/com/auto1/pantera/group/GroupMemberFlattener.java 286 +pantera-main/src/main/java/com/auto1/pantera/group/GroupMetadataCache.java 269 +pantera-main/src/main/java/com/auto1/pantera/group/MemberSlice.java 222 +pantera-main/src/main/java/com/auto1/pantera/group/ArtifactNameParser.java 520 +pantera-core/src/main/java/com/auto1/pantera/http/cache/BaseCachedProxySlice.java 1121 +pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCache.java 507 +pantera-core/src/main/java/com/auto1/pantera/http/cache/RequestDeduplicator.java 204 +pantera-core/src/main/java/com/auto1/pantera/http/slice/CircuitBreakerSlice.java 78 +pantera-core/src/main/java/com/auto1/pantera/http/trace/MdcPropagation.java 446 +pantera-core/src/main/java/com/auto1/pantera/http/timeout/AutoBlockRegistry.java 124 +npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/CachedNpmProxySlice.java (for the error-translation cascade) +npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSlice.java (hot path) +``` + +--- + +## 0. The 02:01 outlier β€” explained + +The one "Targeted member failed on index hit" log entry that sits outside the 10:05 burst is from a **different** bug than the Queue-full cascade. Raw evidence: + +``` +02:01:38.778Z ERROR com.auto1.pantera.npm + error.type: com.auto1.pantera.asto.ValueNotFoundException + error.message: java.io.IOException: No value for key: npm_proxy/columnify/meta.meta + caused by: java.nio.file.NoSuchFileException: /var/pantera/data/npm_proxy/columnify/meta.meta + url.path: /columnify + thread: pantera-io-read-42 + +02:01:38.779Z WARN com.auto1.pantera.group + message: "Targeted member failed on index hit, returning 500" + thread: pantera-io-read-42 +``` + +**What happened:** the DB artifact index said `columnify` lives at `npm_proxy`. The group did `targetedLocalRead` β†’ called `npm_proxy` β†’ `npm_proxy` tried to read `npm_proxy/columnify/meta.meta` from disk β†’ file was gone (evicted, manually removed, or never written). The storage raised `NoSuchFileException` β†’ wrapped as `ValueNotFoundException` β†’ the npm adapter emitted an ERROR and returned 5xx β†’ group converted to 500 per policy. + +**Architectural significance β€” A11 in Β§2:** the "Targeted local read" policy claims `"bytes are local, nobody else has them"` as an invariant (GroupSlice.java:628, 1129), but **the DB index and local storage are only eventually consistent**. A TOCTOU gap exists between (a) `locateByName()` returning the member and (b) the member reading the bytes. During that gap the bytes can disappear (cache eviction, manual scrub, storage backend re-balance, `.meta` sidecar vs main artifact drift). Commit `9f44650c fix(cache): recover from TOCTOU eviction race as cache miss` handled one such race but not this one (metadata sidecar path). + +**One-liner:** 2,725 / 2,726 "Targeted member failed" WARNs are driven by the Queue-full cascade; 1 / 2,726 is driven by index/storage inconsistency. Both are real and both are direct consequences of the v2.1.3 revamp β€” the Queue-full because the new coalesced-drain queue was introduced, and the TOCTOU because the new `targetedLocalRead` path assumes a strong invariant that the system cannot deliver. + +--- + +## 1. Patterns present β€” credit where it's due + +The revamp is a **serious architectural effort** and introduces several correct, enterprise-grade patterns. None of this review is "it's all bad": + +| # | Pattern | Where it lives | Verdict | +|---|------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------| +| P1 | **Single-flight / Request Coalescing** | `GroupSlice.inFlightFanouts`, `RequestDeduplicator.inFlight`, `GroupMetadataCache.inFlightMetadataFetches` | Correct pattern for thundering-herd prevention; **three independent implementations** (see A7). | +| P2 | **Two-tier cache (L1 Caffeine + L2 Valkey)** | `NegativeCache` + `NegativeCacheConfig` | Well designed, defaults to L1-only β€” graceful degradation. | +| P3 | **Circuit Breaker** | `CircuitBreakerSlice` + `AutoBlockRegistry`, per-upstream shared via `registrySupplier` | Correct implementation; trip signal is poisoned (see A3). | +| P4 | **Stale-While-Revalidate** | `BaseCachedProxySlice.tryServeStale(...)` | Correct SWR semantics. Defaults questionable (Issue #3 F3.3). | +| P5 | **Race-to-first-response with late-loser drain** | `GroupSlice.queryTargetedMembers` + `drainBody(...)` | Clean fast-fail. Loser drain is static-pooled (A5) and lossy (A19). | +| P6 | **Decorator / Slice composition** | `CircuitBreakerSlice(CachedNpmProxySlice(NpmProxySlice(DownloadAssetSlice)))` | Clean separation of concerns at each layer. | +| P7 | **Structured ECS logging with MDC** | `EcsLogger`, `EcsLogEvent`, `MdcPropagation` | Good modern choice. Boilerplate-heavy (A14) and missing fields (A13). | +| P8 | **Dependency Injection** | `registrySupplier` for `AutoBlockRegistry`, `NegativeCache` injected | Correct β€” allows shared cross-group state. | +| P9 | **Bounded queues with explicit rejection handling** | `GroupSlice.DRAIN_EXECUTOR`, `DbArtifactIndex` | Directionally right; wrong overflow policy on `DbArtifactIndex` (A3b/F3.1). | +| P10 | **Backpressure at the storage layer** | Quartz drain job draining `ProxyArtifactEvent` queue | Correct model; wrong enqueue semantics (A1). | + +--- + +## 2. Anti-patterns β€” in order of blast radius + +Evidence-backed; each entry cites the file:line. + +### A1. Exception-as-control-flow on a hot path + +**Where:** `DownloadAssetSlice.java:198, 288`, plus 16 sibling sites across adapters. +**What:** `queue.add(event)` on a bounded `LinkedBlockingQueue(10_000)` (`MetadataEventQueues.java:151`). `AbstractQueue.add()` is documented to throw `IllegalStateException` on overflow. `offer()` is the non-throwing variant. +**Why it's an anti-pattern:** overflow of a background event queue is a **normal, expected** condition under traffic bursts. The JDK API design even names this explicitly β€” two methods, one throwing, one returning `false`. Using the throwing one on a request-serving path means: +1. Stack-trace allocation: ~2 KB garbage + ~10 Β΅s CPU per event β†’ 2 GB of garbage during the 10:05 burst. +2. The exception propagates through the RxJava `.map()` β†’ CompletableFuture exceptional completion β†’ `.exceptionally()` β†’ catch-and-translate. +3. The translation layer does not know "queue full" from any other `IllegalStateException` β†’ conservative fallback to 5xx. + +The fix is 18 one-line edits (Β§1.7 of the forensic report, F1.1/F1.2) but the *lesson* is about **review discipline**: any call that takes a `Queue` parameter should pass a lint gate asking "is this bounded, and are you using `offer`?". + +### A2. Status-code lossy translation β€” the 502β†’503β†’500 cascade + +**Where:** four files, four layers: + +``` +DownloadAssetSlice.exceptionally β†’ 502 "Upstream error: Queue full" + ↓ +CachedNpmProxySlice.handleSignal(ERROR) β†’ 503 "Upstream temporarily unavailable - please retry" + ↓ +GroupSlice.handleMemberResponse β†’ sets anyServerError, drains body + ↓ +GroupSlice.completeIfAllExhausted + isTargetedLocalRead β†’ 500 "Targeted member read failed" + fanout β†’ 502 "All upstream members failed" +``` + +**Why it's an anti-pattern:** each hop destroys semantic information about the origin fault, and each hop applies a *different* policy to the translation. The client sees 500 for a condition that was originally "our background queue is full for 8 seconds, retry me" β€” i.e. a **retryable** fault encoded as **terminal**. + +**Canonical fix:** replace `Response` with a sum type `Result` where `FaultKind ∈ { UPSTREAM_5XX, INTERNAL, TIMEOUT, OVERLOAD, CACHE_MISS }`. Each layer decides its own HTTP translation at the very end, from a full picture of the fault origin, not from the lossy status code of the previous layer. + +### A3. Internal fault tracked as upstream fault + +**Where:** `BaseCachedProxySlice.java:505, 829, 862` β€” every `.exceptionally()` calls `trackUpstreamFailure(error)` regardless of origin. + +**Why it's an anti-pattern:** the circuit-breaker pattern relies on the **fault source** being correctly identified. Here, an `IllegalStateException("Queue full")` from our own queue is treated identically to a `java.net.ConnectException` from npmjs.org. The circuit trips on our own internal fault, then traffic to a healthy upstream is blocked while the internal problem gets worse (users retry; cache warmth is lost). + +**Canonical fix:** classify the exception before reporting. Only `IOException`, `TimeoutException`, `ConnectException`, and responses with `response.status().code() in {502, 503, 504}` count against the upstream. Everything else increments a separate "internal faults" counter that does NOT feed the breaker. + +### A4. Constructor telescoping + +**Where:** `GroupSlice` β€” **6 public constructors**, parameters (4) β†’ (5) β†’ (6) β†’ (8) β†’ (9) β†’ (10) β†’ (11). Every one suppressed `@SuppressWarnings("PMD.ExcessiveParameterList")`. + +**Why:** the class's configuration surface is itself a first-class concept (members, routing, index, proxy flags, repoType, negative cache, registry supplier) and deserves a `record GroupSliceConfig(...)` plus a `Builder` or factory. The compiler-required boilerplate of telescoping constructors + test-compat overloads makes the configuration brittle β€” any addition requires editing 6 constructors. + +**Canonical fix:** single constructor taking `GroupSliceConfig`, plus a static `GroupSliceConfig.Builder`. "Drop-in compatibility" is served by a separate `GroupSliceFactory#forLegacyCallers(...)`. + +### A5. Static shared state inside per-group units + +**Where:** `GroupSlice.DRAIN_EXECUTOR` and `DRAIN_DROP_COUNT` are `static` fields. + +**Why it matters:** +- **Test isolation**: two test GroupSlices in the same JVM share the executor; one test's rejected task pollutes another's counter. +- **Bulkheading**: a noisy group (e.g. a misbehaving proxy member) can saturate the drain queue and cause drops for every other group. There is no blast-radius containment. +- **Tuning**: you cannot tune drain capacity per-repo at runtime. The `16 threads, queue=2000` constants are frozen at class-load time. +- **Observability**: `DRAIN_DROP_COUNT` has no group label; in a multi-group system you can see "drops went up" but not "which group is suffering". + +**Canonical fix:** inject a `DrainExecutor` (or `Executor`) at construction, let DI wire one shared instance in production and isolated instances in tests. Micrometer tags the group name. + +### A6. "Complete the gate BEFORE removing from map" β€” complexity leak + +**Where:** `GroupSlice.java:756-769`. A 14-line block of code **with a 25-line comment** explaining why this specific ordering of `freshGate.complete(null)` followed by `inFlightFanouts.remove(dedupKey, freshGate)` is required to avoid a race window. + +**Why it's a smell:** when the comment is 2Γ— the size of the code, the abstraction is wrong. The code is defending against a specific race (late follower that arrives between `remove()` and the next `putIfAbsent()`). The race exists because `inFlightFanouts` is a raw `ConcurrentHashMap>` β€” not a purpose-built single-flight cache. + +**Canonical fix:** use `Caffeine.asyncCache()` with an async loader. The library has solved single-flight with per-key load-once semantics, wait-list, zombie eviction, and completion ordering correctly in a dozen other codebases. The custom coalescer reinvents this badly three times (see A7). + +### A7. Three independent implementations of single-flight + +**Where:** + +| Implementation | File | Key type | +|---------------------------------------------|-------------------------------------|----------------| +| `inFlightFanouts` | `GroupSlice.java` | `String` | +| `RequestDeduplicator.inFlight` | `RequestDeduplicator.java` | `Key` | +| `inFlightMetadataFetches` (per commit b37deea2) | `MavenGroupSlice.java` | `String` | + +Each has its own zombie protection (or doesn't β€” A8), its own completion-ordering discipline, its own error-propagation semantics, and its own "which callback runs on which executor" trap (see A9). + +**Canonical fix:** one `SingleFlight` utility in `pantera-core`, used by all three call sites. Battle-test once. + +### A8. Zombie protection asymmetric across coalescers + +**Where:** +- `RequestDeduplicator` has a **ScheduledExecutorService** evicting entries older than `MAX_AGE_MS=5 min` (line 124-133). βœ“ +- `GroupSlice.inFlightFanouts` has **no eviction** at all. If a `whenComplete` is bypassed (cancellation, thread death), the gate leaks forever. +- `GroupMetadataCache.inFlightMetadataFetches` β€” similar, needs verification. + +**Why:** in a long-running service, any `ConcurrentMap` that can hold futures across request lifetimes MUST have a zombie-evictor or a timeout β€” otherwise it is a slow memory leak + a correctness trap (late followers wait for a future that will never complete). + +### A9. Correctness of coalescer depends on `Async` thread-hop + +**Where:** `GroupSlice.java:731`, comment block on 714-725: +> CRITICAL: use `thenComposeAsync`, NOT `thenCompose`. … the callback runs synchronously on the same stack; the retry then hits the SAME (still-present) gate and recurses, blowing the stack. + +Same fix in `MavenGroupSlice` per commit `b37deea2` and `ccc155f6`. + +**Why it's a smell:** a primitive of our concurrency model (thread identity / stack identity) leaks into business code. CompletableFuture's synchronous-execution-on-completed-stage optimization is correct in isolation but interacts badly with a "retry on completion" pattern. The correctness of the entire group layer depends on every future maintainer remembering "use `Async`" β€” there is no compile-time or even a static-analysis check. + +**Canonical fix:** use Project Reactor or RxJava 3 `Mono.share().cache()` which explicitly hop to a scheduler; or use a library that provides a `SingleFlight.load(key, loader)` that hops internally. + +### A10. Shared mutable state via atomics threaded through callbacks + +**Where:** `queryTargetedMembers(...)` uses `AtomicBoolean completed`, `AtomicInteger pending`, `AtomicBoolean anyServerError` β€” all mutated from the `whenComplete` of every member future + from cancellation, + from `completeIfAllExhausted` which reads all three. + +**Why it's a smell:** the "current state" of a fanout is a 3-tuple (`(completed, pending, anyServerError)`) that changes under concurrent writes with no single owning state machine. Any new code path (timeout, partial response, streaming cancellation) must correctly update all three in the right order or you get corrupted state. + +**Canonical fix:** represent the fanout as a single `AtomicReference` where `FanoutState` is a sealed sum type (`InFlight(pendingCount, anyErr) | CompletedSuccess | CompletedFailure | CompletedMiss`) and use CAS loops to transition. One owner, explicit transitions, compile-time exhaustive match. + +### A11. "Bytes are local" is a false invariant + +**Where:** `GroupSlice.java:628-632, 1129-1130`. Policy doc comment: +> *"artifact bytes are local (hosted upload or proxy cache) β€” if the targeted member fails, no one else has them, so we surface a genuine 500 to the client."* + +**Reality (02:01 outlier):** the index points at a member, the member's storage does not have the bytes. The invariant *that justifies returning 500 instead of 502* is violated by normal system operation (cache eviction, storage re-balance, sidecar files missing, manual cleanup). + +**Canonical fix:** `targetedLocalRead` must fall back to `proxyOnlyFanout` (or `fullTwoPhaseFanout`) on a 5xx or storage-level `ValueNotFoundException` β€” not surface 500. The 500 policy assumes a guarantee the platform cannot provide. + +### A12. Double-response race: `GuardedHttpServerResponse` logs 216 times / 12 h + +**Where:** `GuardedHttpServerResponse.java:129, 165, 206` emitting `"End has already been called: '…', caller=…, terminatedBy=…"`. + +**Why:** the Guard exists to catch a specific bug β€” two code paths trying to `end()` the response. 216 occurrences in a 12-hour window is not "edge case", it's sustained. Likely causes: +1. Vert.x request-timeout handler fires concurrently with slice completion (race). +2. `exceptionally(...)` handler responds AND an earlier `thenApply(...)` already wrote a response. +3. SWR path responds stale AND the background fetch also tries to write. + +The Guard prevents a client-visible protocol error, but the underlying race is a silent correctness issue: the response delivered to the client may come from either code path (whichever won the Guard) β€” non-deterministic. + +**Canonical fix:** trace every double-end occurrence by `caller` + `terminatedBy`, rank root causes, fix at source. The Guard is defense-in-depth, not a solution. + +### A13. Observability holes on the error path + +Verified from the production record: the "Targeted member failed on index hit, returning 500" WARN has: +* `trace.id`: **absent** +* `package.name`: **absent** +* `url.path`: **absent** +* `client.ip`: **absent** +* `destination.address`: **absent** + +This WARN is the **only signal** that a user got a 500, yet it cannot be joined to the user's request (no trace.id) or even to a specific artifact (no url.path / package.name). Compare the access log entry for the same event, which DOES have `url.original` and `http.response.status_code` but has `client.ip: null` (Issue #2 Β§2.5). + +**Canonical fix:** access-log and business-log must share the request-scoped identifiers via MDC, and the EcsLogger fluent API should have a `requestScoped(line, headers)` builder that enforces presence of trace.id + package + url at compile time. + +### A14. MDC propagation is manual boilerplate + +**Where:** `MdcPropagation.withMdc`, `withMdcFunction`, `withMdcBiConsumer`, `withMdcRxFunction`, `withMdcRunnable`, etc. β€” 446 LOC dedicated to wrapping every lambda in an MDC snapshot+restore. In `GroupSlice.response()` alone there are 7 wrappers in 30 lines of code. + +**Why it's an anti-pattern:** each call is 100% boilerplate with no compile-time safety. If a maintainer adds a new `.thenCompose(...)` in Maven group flow and forgets `withMdc`, MDC is silently dropped, trace.id disappears from all downstream logs β€” and there is no test that will catch this. + +**Canonical fix:** wire MDC propagation once at the Vertx binding layer, using either (a) `ContextualExecutorService` that decorates the ForkJoinPool / Vertx event-loop to capture+restore MDC automatically on every submit, (b) Project Reactor's `Context` which propagates through `Mono`/`Flux` transparently, or (c) OpenTelemetry `Context` with a `ContextStorage` that rides on virtual-thread locals. Each of these removes the per-call-site responsibility. + +### A15. Body buffering on the group fanout path + +**Where:** `GroupSlice.queryTargetedMembers(...)` line 810: `body.asBytesFuture().thenCompose(requestBytes -> ...)` β€” consumes the entire request body into a `byte[]` before fanning out. + +**Why:** for GET/HEAD (the 99% case) the body is empty and this is cheap. But for `POST /-/npm/v1/security/audits` (npm audit β€” which is allowed through, line 539) the body can be several MiB, buffered on the server per request, for no real reason (we're going to POST it to one member anyway). + +**Canonical fix:** only buffer on paths that actually do fanout with body replay. For single-member paths (targeted hit, audit, hosted-only), stream directly. + +### A16. No bulkheading between repositories + +**Where:** `DRAIN_EXECUTOR` is shared across all groups. The per-repo proxy-event queues are independent but all drained by a single global Quartz scheduler thread pool (`threads_count` per-repo, but all sharing the Quartz worker pool). + +**Why:** one malfunctioning repo (e.g. a sudden burst on `npm_proxy`) can starve the drain scheduler for every other repo. No per-repo quota, no fair-share scheduler, no priority. + +**Canonical fix:** **bulkhead per repository**. Each repo gets its own drain pool with a small, fixed capacity. The top-level dispatcher has a scheduling policy (round-robin, WFQ) so one misbehaving repo cannot take down the rest. + +### A17. Dual reactive stacks in one pipeline + +**Where:** `DownloadAssetSlice.checkCacheFirst(...)`: +```java +this.npm.getAsset(tgz) // RxJava2 Maybe + .map(withMdcRxFunction(...)) // RxJava2 map + .toSingle(fallback) // RxJava2 Single + .to(SingleInterop.get()) // bridge to CompletionStage + .toCompletableFuture() // JDK CompletableFuture + .thenCompose(withMdc(...)) // CompletableFuture + .exceptionally(...) // CompletableFuture +``` + +**Why it's a smell:** two reactive frameworks (RxJava2 + JDK `CompletableFuture`) per pipeline. Each conversion is a contract boundary where semantics differ (error model, scheduler ownership, cancellation). All 216 "End has already been called" warnings (A12) and the 11,523 Queue full stack traces traverse this bridge. + +**Canonical fix:** pick **one**. Realistically, RxJava2 is on the way out and not worth doubling down on; a migration to CompletableFuture-only (or ideally Project Reactor with Vert.x native bridge) pays off quickly on the hot path. + +### A18. Dead code retention via "drop-in compatibility" + +**Where:** +- `GroupSlice` β€” 6 constructors for "drop-in compatibility" +- `ArtifactIndex.locate()` β€” documented in project memory as "legacy/dead code β€” NEVER called in v1.21.0+" +- `/* port */ int port` β€” still in every GroupSlice constructor; checked once against `0`. + +**Why:** cumulative maintenance surface. Every legacy constructor is a contract the team must keep correct. + +**Canonical fix:** delete the unused overloads and `locate()`. If there are external callers (tests, modules), bump a minor version and migrate them. + +### A19. Silent drop of resources under load + +**Where:** `GroupSlice.DRAIN_EXECUTOR` RejectedExecutionHandler logs a WARN + bumps a counter, **then returns**. The rejected drain task represented an un-drained HTTP response body β€” a potential socket leak until Jetty's idle-timeout reclaims it. + +```java +// Drain queue full, discarding drain task β€” possible response body leak +``` + +**Why:** this is "fail-fast silently". The code's own comment acknowledges the consequence ("possible response body leak"). In 12 hours of logs I did not see `DRAIN_DROP_COUNT` values, which means either (a) drops haven't happened at scale yet, or (b) the counter isn't being exposed / alerted. + +**Canonical fix:** when a drain task is rejected, synchronously `body.close()` on the caller thread (blocking briefly) rather than dropping. Or reject the **incoming request** with a 503 instead of accepting it and leaking a socket. The current trade-off favors request availability over resource hygiene; at sustained load, that's the wrong trade. + +### A20. The 500 vs 502 policy is upside-down + +**Where:** `GroupSlice.completeIfAllExhausted` returns **500** for `isTargetedLocalRead + anyServerError`, with rationale "bytes are local, nobody else has them". + +**Why it's wrong:** +- HTTP 500 is *"the server encountered an unexpected condition"* β€” terminal; clients **should not** retry. +- HTTP 502 is *"received an invalid response from an inbound server"* β€” transient; clients **should** retry. +- HTTP 503 is *"not ready to handle the request"* β€” transient with `Retry-After`; clients should retry with backoff. + +The member returning 5xx is a transient condition (the Queue full example is the clearest one). Telling the client 500 means the client **gives up** on an operation that would succeed if retried in 2 seconds. Both Maven (default `--fail-fast`) and npm (3 retry attempts) treat 500 as terminal and 502/503 as retryable. + +**Canonical fix:** on targeted 5xx, return **503 with `Retry-After: 1`** and a specific Pantera fault-code header (`X-Pantera-Fault: internal-queue-overflow`). Mass CI builds now self-heal instead of failing. + +--- + +## 3. Cross-cutting enterprise concerns + +### C1. Error-translation is spread across 4 layers with no central policy + +A single fault (Queue full at line 198) is classified 4 times as the Response travels up: 502 β†’ 503 β†’ 5xx-bucket β†’ 500. Each layer owns its own policy with no shared `FaultClassifier`. This is the root cause of A2, A3, A11, A20. + +**Prescription:** introduce `pantera-core/http/fault/` with: +* `sealed interface Fault { Upstream5xx, Timeout, Overload, IndexStale, Internal, NotFound }` +* `class FaultClassifier` β€” single source of truth: exception β†’ Fault, response β†’ Fault +* `class FaultTranslator` β€” single source of truth: Fault β†’ HTTP status + Retry-After + X-Pantera-Fault header + +Every `.exceptionally()`, every `handleSignal`, every `completeIfAllExhausted` goes through these two classes. + +### C2. Concurrency primitives are mixed + +RxJava2, RxJava2β†’Rio, CompletableFuture, Vert.x Futures, Quartz jobs, ScheduledExecutorService, custom ThreadPoolExecutors. Each has its own error/cancellation/scheduler contract. + +**Prescription:** pick one *reactive* boundary (CompletableFuture or Project Reactor) for the slice pipeline and one *scheduling* boundary (Vert.x event loops + a small bounded worker pool) for heavy work. Wrap Quartz drain at the edge. Retire RxJava2 over 2-3 sprints. + +### C3. No blast-radius containment between repositories + +Bulkhead is a pattern but it's only applied to the **fanout vs drain** split, not to **per-repo**. One misbehaving repo can OOM the shared drain queue, one slow DB can block the shared `DbArtifactIndex` executor (and via `CallerRunsPolicy`, the Vert.x event loop). + +**Prescription:** per-repo `ResourcePool` (drain queue, HTTP client, DB semaphore), per-repo Micrometer tags on every pool-related metric, per-repo rate limits. + +### C4. Observability is field-at-a-time instead of context-propagated + +Every log call manually adds `.field("destination.address", member.name())`, every async boundary manually wraps with `MdcPropagation.withMdc*`. A single forgotten wrapper = dropped trace. The 99.6% missing `client.ip` is one such forgotten wrapper. + +**Prescription:** OpenTelemetry context propagation (`Context.current()`) for trace, user, request-id; emit as structured fields automatically by an `EcsLayoutAppender`. Retire per-call `.field(...)` for cross-cutting concerns. + +### C5. No SLO per adapter, no error budget + +At 1000 req/s, 1% failure = 10 failed requests/second = 600/minute = 36,000/hour. There is no documented SLO (e.g., "99.5% availability, p99 < 200ms for npm tarballs") and no error budget that would have gated shipping 2.1.3. + +**Prescription:** adapter-level SLOs in `docs/slo/`, Prometheus alerts on burn-rate, dashboards per adapter. + +### C6. Shipping-first / chaos-last + +The v2.1.3 changelog lists 50+ fixes including 3 race-condition corrections (StackOverflowError, coalescer race, drain pool saturation) that were found in production. This is a high incidence of "complex concurrency found post-ship" and points to a missing test discipline. + +**Prescription:** concurrency property tests (jqwik, QuickCheck-style), chaos tests in CI (kill a member mid-request, drop a response, saturate a queue, freeze DB 500ms), and a standing "thundering herd" load profile in the perf benchmark (F3.7). + +--- + +## 4. Priority refactors (architectural, not tactical) + +The forensic report's F1.x–F4.x list is **tactical** β€” minimal edits to stop bleeding. The following are the **architectural** refactors that remove entire classes of bug. + +| ID | Refactor | Removes anti-patterns | Effort | +|------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------|------------| +| R1 | Introduce `pantera-core/http/fault/` module: `Fault` sealed interface, `FaultClassifier`, `FaultTranslator`. Rewrite every `.exceptionally()` / `handleSignal` / `completeIfAllExhausted` to go through it. | A2, A3, A11, A20 | 1.5 sprints | +| R2 | Unified `SingleFlight` utility (built on Caffeine `asyncCache` or a small purpose-built class) replacing `inFlightFanouts`, `RequestDeduplicator.inFlight`, `inFlightMetadataFetches`. Includes zombie eviction, correct completion ordering, tests. | A6, A7, A8, A9 | 1 sprint | +| R3 | Context-propagating Vert.x binding: MDC / trace / user-id automatically propagated across every async boundary; remove `MdcPropagation.withMdc*` from business code. One ContextStorage, one wiring point, zero per-call-site responsibility. | A13, A14, C4 | 1 sprint | +| R4 | Per-repo bulkheads: each repo owns its drain pool, its HTTP client pool, its proxy-event queue, its DB semaphore. Global scheduler policy ensures no repo starves others. | A5, A16, A19, C3 | 1.5 sprints | +| R5 | Single reactive stack: migrate `DownloadAssetSlice`, `CachedNpmProxySlice`, `BaseCachedProxySlice`, all hot paths from RxJava2 to `CompletableFuture` (or Project Reactor `Mono`/`Flux`). Replace `body.asBytesFuture()` with streaming where applicable. | A15, A17, C2 | 2 sprints | +| R6 | `GroupSliceConfig` record + Builder; delete the 6 overloaded constructors. | A4, A18 | 0.5 sprint | +| R7 | CAS-based state machine for fanout (`AtomicReference` with sealed transitions) replacing the `AtomicBoolean completed / AtomicInteger pending / AtomicBoolean anyServerError` triad. | A10, A12 | 0.5 sprint | +| R8 | Per-adapter SLOs + error-budget gating in CI. Block releases when error budget is consumed. Add burn-rate alerts. | C5 | 0.5 sprint | +| R9 | Concurrency-property tests + chaos-injection tests. One property test per coalescer (single-flight correctness under N concurrent callers). One chaos test per resource boundary (drain saturation, DB freeze, member timeout, cancellation mid-response, TOCTOU eviction during read). | C6, A12, A19 | 1 sprint | + +**Total: ~9 sprints for a clean landing.** Individual refactors are independently shippable. + +--- + +## 5. Answer to "never before 2.1.0" + +The user's observation β€” "these were never before 2.1.0, so it is related to our revamp of group" β€” is **correct and precisely locatable**: + +| Regression observed in 2.1.3 | Introduced by (approx) | +|------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------| +| Queue-full cascade on npm cache-hit | Pre-dates 2.1.0 in other adapters, but the npm proxy hot path traffic pattern changed with the group revamp (more cache-hit per request because `targetedLocalRead` stops doing full fanout, concentrating load). | +| 500 for `targetedLocalRead` + 5xx | `48ca17bf feat(group): rewrite resolution flow β€” 5-path design with no false 5xx` | +| Internal-fault counted as upstream | `cc883735 feat(proxy): stale-while-revalidate` + `a70f678b feat(group): negative cache` (new paths through `BaseCachedProxySlice.trackUpstreamFailure`) | +| Three independent single-flight coalescers | `a70f678b` + `b37deea2` + `RequestDeduplicator` (already existed) | +| `thenCompose` StackOverflowError class | `ccc155f6` fixed one instance; `b37deea2` added another that also needed `Async` | +| Status-code-lossy translation | Accreted over 2.1.0 β†’ 2.1.3 | +| MDC propagation boilerplate | `599ee1e5`, `608a9b7d`, `7507cdea`, `b9a2fe22`, `b37deea2` β€” the logging MDC sweep | +| 216Γ— "End has already been called" in 12 h | Likely: SWR default-on (`2b31fb76`) + request-timeout interactions | + +**The revamp is not "bad".** It legitimately fixed several real problems (thundering herd, cold-start metadata, StackOverflowError, drain pool under-sizing). But it shipped on top of an already-accreted stack of abstractions (mixed reactive libraries, lossy status cascade, ad-hoc MDC, exception-as-control-flow) and the new paths surface-areaed those weaknesses. + +The path forward is R1–R9 above; in the meantime the tactical F1.x–F4.x in the forensic report are the 2.1.4 hotfix. + +--- + +## 6. Appendix β€” enterprise-architecture checklist, scored + +| Dimension | Score | Notes | +|---------------------------------------------------------------------|-------|------------------------------------------------------------------------------------------------------------------------| +| Separation of concerns (Slice decorator chain) | 8/10 | Clean composition. | +| Single Responsibility | 5/10 | `GroupSlice` does routing, fanout, coalescing, circuit-check, body-drain, metrics, negative-cache, logging β€” too much. | +| Open/Closed | 4/10 | Adding a new adapter requires editing `MetadataEventQueues.ProxyRepoType` enum + `ArtifactNameParser`. | +| Liskov (Slice substitutability) | 7/10 | Slices are mostly substitutable; some assume specific wrappers (`CachedNpmProxySlice` assumes origin returns status). | +| Interface Segregation | 6/10 | `Slice` is single-method; good. `ArtifactIndex` is large; could be split. | +| Dependency Inversion | 6/10 | DI used for registries + caches; static executors violate. | +| Idempotency of retries | 5/10 | GETs idempotent by HTTP; internal side-effects (event queues, metric counters) not idempotent β€” double-fire on retry. | +| Observability (structured + tracing + metrics) | 6/10 | Logs structured (ECS), metrics (Micrometer), tracing (APM) all present. MDC propagation fragile; required-field enforcement absent. | +| Bulkheading | 3/10 | Global shared pools; no per-repo isolation. | +| Timeouts / Deadlines | 5/10 | Request timeout exists (Vert.x), DB statement timeout exists. End-to-end deadline propagation absent β€” each hop uses its own.| +| Circuit-breaker correctness | 5/10 | Implementation is correct; inputs are poisoned (A3). | +| Graceful degradation | 7/10 | SWR, negative cache, fanout-safety-net are real and well-designed. | +| Consistency semantics | 5/10 | Index / storage eventually consistent with no documented invariants (A11). | +| Error classification | 3/10 | Lossy, 4-layer-translated, origin-unaware (A2, A3, A20). | +| Testing β€” unit | 7/10 | Rich test suite mentioned (155 ArtifactNameParser cases, GroupSlice race tests). | +| Testing β€” concurrency/chaos | 4/10 | StackOverflowError regression guard added post-hoc; systematic chaos absent. | +| Testing β€” perf regression | 2/10 | No `npm install` / `mvn install` perf baseline in CI (F3.7). | +| Documentation β€” inline | 8/10 | Javadoc is thorough, sometimes excellent (e.g. 25-line race explanation). | +| Documentation β€” architectural | 4/10 | No ADRs for the revamp decisions visible. Memory file documents invariants but no versioned ADR. | +| Deployability | 7/10 | JVM args tuned, container sized, pre-touch + pre-warm on startup. | + +**Average: 5.4 / 10** β€” a competent implementation with several important enterprise gaps. None are fatal; the concentration of anti-patterns around error classification and concurrency primitives is what converts a set of small smells into production-visible failures under load. + +*End of architecture review.* diff --git a/docs/analysis/v2.1.3-post-deploy-analysis.md b/docs/analysis/v2.1.3-post-deploy-analysis.md new file mode 100644 index 000000000..e381428b0 --- /dev/null +++ b/docs/analysis/v2.1.3-post-deploy-analysis.md @@ -0,0 +1,450 @@ +# Pantera v2.1.3 Post-Deploy Analysis & Remediation Plan + +**Author:** analysis by Claude (systematic-debugging + code review), supervised by Ayd Asraf +**Date:** 2026-04-16 +**Scope:** 12h 22min of production logs (2026-04-15 22:05 β†’ 2026-04-16 10:27 UTC) from a single container `63f5e6f2…dc9007d` +**Inputs audited** +| File | Lines | Size | Content | +|-----------------------------------------|-----------|-------|---------------------------------| +| `logs/63f…-json.1.log` | 5,636,177 | 4.7 G | rotated prior | +| `logs/63f…-json.log` | 1,653,212 | 1.4 G | current | +| `logs/503.csv` | 17,075 | 47 M | Kibana export (503 / 500 / 502) | + +**Deliverable:** root-cause per observed issue + prioritized remediation plan. No code changes were applied; every recommendation is a work item that can be handed to an implementation agent. + +--- + +## 0. Executive summary + +Four issues reported; all four trace back to **one dominant bug** (Queue-full on the npm proxy), a **logging policy mis-classification** (every 4xx β†’ WARN), and **secondary scale-of-observability issues** introduced by the v2.1.3 refactor wave. + +| # | Issue | Severity | Dominant root cause | Fix scope | +|---|-------------------------------------|---------|----------------------------------------------------------------------------------------------------------------|--------------------------| +| 1 | 503 spikes on npm tgz | **P0** | `queue.add()` on bounded `LinkedBlockingQueue(10_000)` in `DownloadAssetSlice.java:198, :288` (and 18 other sites) | Small code fix, wide scope | +| 2 | "Internal 404" noise | **P1** | 404 logged at WARN (not internal fanout). Plus missing `client.ip` on 99.6% of lines, plus multi-repo maven probing. | Log-level & MDC fix | +| 3 | Slower than v2.1.0/2.1.1 | **P0** | Cascade of (1) drives npm retries; (2) `CallerRunsPolicy` on `DbArtifactIndex` pool blocks Vert.x event-loop under DB slow paths. | Tactical fix + tuning | +| 4 | Noisy/meaningless WARN/ERROR logs | **P1** | Access-logger emits 4xx at WARN + `log.level` JSON-format inconsistency + AuditLogger warm-up noise. | Policy + format fixes | + +**Headline numbers (12-hour window):** + +``` +Log levels WARN 2,501,444 (34.3%) + INFO 4,764,793 (65.4%) + ERROR 23,150 (0.32%) ← 99.8% of these are in 10:05-10:06 (2-minute burst) + DEBUG/TRACE/FATAL 0 + +HTTP statuses 404 2,429,080 (97.3%) ← 99% from http.access WARN, real external probes +(WARN + ERROR only) 401 50,161 (2.0%) + 503 11,573 (0.46%) ← the user-visible burst + 500 5,452 (0.22%) + 502 50 + +Access-log loggers http.access: 2,479,228 WARN + 11,606 ERROR (sum 99%) + com.auto1.pantera.npm: 11,523 ERROR (Queue full) + com.auto1.pantera.group: 16,863 WARN + com.auto1.pantera.auth: 2,480 WARN + com.auto1.pantera.settings: 1,436 WARN + com.auto1.pantera.http.auth: 796 WARN + com.auto1.pantera.maven: 394 WARN + com.auto1.pantera.http.client: 20 ERROR (30s Jetty idle-timeout) +``` + +--- + +## 1. Issue #1 β€” 503 spikes on npm tgz (P0) + +### 1.1 Symptoms from the data + +From `logs/503.csv` and the raw log: +* **8,850 user-facing 503** on `GET /artifactory/api/npm/npm_proxy/{pkg}/-/{pkg}-{ver}.tgz` β€” direct proxy traffic; `message="Internal server error"`. +* **2,726 user-facing 500** on `GET /npm_group/{pkg}/-/{pkg}-{ver}.tgz` β€” group traffic. +* **2,723 WARN** from `com.auto1.pantera.group`: `Member 'npm_proxy' returned error status (0 pending)` with `http.response.status_code=503`. +* **50 user-facing 502** `All members exhausted with upstream errors, returning 502`. +* **Top 503 packages**: `@types`, `@vitest`, `@babel`, `@rc-component`, `readable-stream`, `@csstools`, `@wdio`, `ansi-styles`, `picomatch`, `undici-types`, `lru-cache`, `strip-ansi`, `@jridgewell` β€” all high-traffic, well-known packages. +* **Time profile**: 23,113 of 23,150 ERROR lines (99.8%) fall inside a **2-minute window at 10:05 β†’ 10:06** (peak 19,819 in the 10:05 minute alone, i.e. ~165 req/s failing). The remaining 37 ERRORs are spread across the prior 11 hours. + +### 1.2 Root cause β€” `Queue full` on cache-hit *and* cache-miss + +**Stack trace (observed 11,523 times in the 12-hour window, 99.5% in the 10:05-10:06 burst):** + +``` +java.lang.IllegalStateException: Queue full + at java.base/java.util.AbstractQueue.add(Unknown Source) + at com.auto1.pantera.npm.proxy.http.DownloadAssetSlice.lambda$checkCacheFirst$2 + (DownloadAssetSlice.java:198) + at java.base/java.util.Optional.ifPresent(Unknown Source) + at com.auto1.pantera.npm.proxy.http.DownloadAssetSlice.lambda$checkCacheFirst$3 + (DownloadAssetSlice.java:185) + at com.auto1.pantera.http.trace.MdcPropagation.lambda$withMdcRxFunction$6(MdcPropagation.java:303) + at io.reactivex.internal.operators.maybe.MaybeMap$MapMaybeObserver.onSuccess(MaybeMap.java:82) + ... +``` + +**Code site (`npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSlice.java`):** + +```java +// line 198 β€” on the CACHE HIT path (checkCacheFirst) +this.packages.ifPresent(queue -> { + ... + queue.add( // ← IllegalStateException on full bounded queue + new ProxyArtifactEvent(new Key.From(tgz), ...)); +}); +// line 288 β€” same bug, on the cache-miss path (serveAsset) +queue.add(new ProxyArtifactEvent(...)); +``` + +**The queue behind it (`MetadataEventQueues.java:151`):** + +```java +final Queue res = new LinkedBlockingQueue<>(10_000); +``` + +`AbstractQueue.add()` throws `IllegalStateException("Queue full")` on a full bounded queue. `LinkedBlockingQueue.offer()` would have returned `false` without throwing. + +**Drain mechanics (`MetadataEventQueues.java:176-184`):** a Quartz periodic job. Defaults are `interval_seconds=1` and `threads_count=1`. At a 1k req/s spike the queue fills 10,000 slots in ~10s. Any stall of the single drain thread (GC pause, Postgres latency spike, Valkey round-trip, thread-context switch) immediately overflows. + +### 1.3 Why the client sees **503** and not **502** + +1. `DownloadAssetSlice.response()` catches the exception in `.exceptionally(...)` and builds a **502** (`ResponseBuilder.from(RsStatus.byCode(502))` β€” the generic "upstream error" path). +2. `DownloadAssetSlice` is wrapped by `CachedNpmProxySlice`. In `doFetch().thenApply(response β†’ ...)` it sees `status().code() >= 500` and returns `FetchSignal.ERROR`. +3. `CachedNpmProxySlice.handleSignal(ERROR)` **overwrites the 502 with 503** (`ResponseBuilder.unavailable().textBody("Upstream temporarily unavailable - please retry")`). +4. Access log (EcsLogEvent.log line 319-320) fires at `ERROR` level because `statusCode >= 500`. + +**Why the group traffic sees 500** (not 503) on the same failure: + +In `GroupSlice.java:1150-1163`, when a member responds 5xx and `isTargetedLocalRead` is true (i.e. the artifact-index pointed at this member), the group emits `"Targeted member failed on index hit, returning 500"` and calls `ResponseBuilder.internalError()`. That converts a member unavailability (503) into a client-visible 500 (Internal Server Error), which is the **wrong semantic**: 503 is retryable, 500 is terminal. Clients that retry on 503 but not on 500 give up. + +### 1.4 Why `trackUpstreamFailure` is mis-classifying the fault + +In `BaseCachedProxySlice.java:505, 829, 862` the same code path reports the failure to `AutoBlockRegistry` as an **upstream** fault. But the root cause here is **internal** (our own bounded queue), not npmjs.org. This pollutes the auto-block signal and can trip the circuit breaker on a healthy upstream. (No `Auto-blocked` log lines were observed in this 12-hour window, so it did not trip in this particular burst, but the bookkeeping is wrong and will trip eventually under sustained load.) + +### 1.5 Why the cache-hit path fails too β€” architectural flaw + +Line 198 is inside `checkCacheFirst`. The artifact bytes are in local storage; the code has successfully fetched `asset` from the cache; all it is doing is *announcing* that fact to a background metadata queue. **A local-cache read must never depend on a background queue's capacity.** This is a separation-of-concerns violation. + +### 1.6 Codebase-wide footprint of the `queue.add` pattern + +`rg 'queue\.(add|offer)'` shows the same bounded-queue-with-`add()` pattern in ~20 production sites, all writing to the same `MetadataEventQueues` / `EventQueue` / `RepositoryEvents` families. When npm traffic is quiet but another adapter (pypi, go, docker, rpm, hex, helm, nuget, composer, maven upload) hits its own drain stall, the same cascade will appear for that adapter. + +Only `ImportService.java:1269` and `EventQueue.java:91` use `offer()` correctly. + +| Adapter / component | File : line | +|---------------------|--------------------------------------------------------| +| npm | `DownloadAssetSlice.java:198, 288` | +| npm | `UnpublishPutSlice.java:97`, `UnpublishForceSlice.java:92` | +| pypi | `ProxySlice.java:307, 455, 820, 842` | +| go | `CachedProxySlice.java:553`, `GoUploadSlice.java:167` | +| docker | `CacheManifests.java:332`, `PushManifestSlice.java:71` | +| helm | `DeleteChartSlice.java:135`, `PushChartSlice.java:103` | +| rpm | `RpmRemove.java:105`, `RpmUpload.java:113` | +| nuget | `PackagePublish.java:128` | +| hex | `UploadSlice.java:164` | +| storage/events | `EventQueue.java:91`, `RepositoryEvents.java:69, 84` | + +### 1.7 Fix plan for Issue #1 + +| ID | Change | Where | Risk | +|-----|------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------|--------| +| F1.1 | Replace **every** `queue.add(event)` on a `Queue` / `Queue` with `queue.offer(event)`; if `offer` returns false, log once at WARN + bump a `pantera.events.queue.dropped{queue=…}` Micrometer counter, continue. | All 18 sites enumerated in Β§1.6 | Low | +| F1.2 | **Decouple queue enqueue from the serve path.** `DownloadAssetSlice.checkCacheFirst` must return the cached response even if enqueue failed or the drain is stalled. Today, a throw bubbles out of `ifPresent` and kills the response pipeline. Wrap the `ifPresent(...)` body in `try { offer(...); } catch (Throwable t) { log; }` so a background-queue error cannot escape. | `DownloadAssetSlice.java:185-205 & 275-295` | Low | +| F1.3 | Fix the 503/502/500 cascade: in `CachedNpmProxySlice.handleSignal(ERROR)`, **pass through the upstream status** when it was a real HTTP response from the upstream, and only synthesize 503 on **connect/timeout** exceptions. Internal faults (Queue full, NPE, etc.) should surface as 500 β€” not 503 β€” so auto-block does not count them as upstream. | `CachedNpmProxySlice.java:276-283`, plus a richer `FetchSignal` (ERROR_UPSTREAM vs ERROR_INTERNAL). | Medium | +| F1.4 | In `GroupSlice.java:1150-1163`, change the `Targeted member failed on index hit` path to **return 502**, not 500, when the member responded with an actual HTTP 5xx β€” 502 is the RFC-correct upstream-fault signal and clients retry it. Keep 500 only for internal-fault cases. | `GroupSlice.java:1150-1163` | Low | +| F1.5 | Separate **internal** failure tracking from **upstream** failure tracking in `BaseCachedProxySlice.trackUpstreamFailure(...)`. `IllegalStateException`, `NullPointerException`, `CompletionException` wrapping them, etc. must not count against the upstream's auto-block score. | `BaseCachedProxySlice.java:505, 829, 862` + `AutoBlockRegistry` hooks | Medium | +| F1.6 | **Grow drain throughput.** Default `threads_count=1` is a single-point serialization. Make the npm / pypi / go / docker / helm proxy drain pool `max(2, cores/2)` with an explicit `pantera.proxy.drain.threads` override. Re-emit `pantera.events.queue.depth{repo=…}` gauge (already registered at line 195-201 for `res`) so ops can alert. | `MetadataEventQueues.java:176-184` | Low | +| F1.7 | **Stop using fixed `10_000` capacity** for proxy event queues on repos with heavy traffic. Either (a) scale with `threads_count` + `interval_seconds` to a conservative multiple of the peak rate, (b) make it configurable per-repo (`queue_capacity` setting), or (c) switch to an `MpscUnboundedArrayQueue` with coalescing so the drain itself is the backpressure mechanism. | `MetadataEventQueues.java:151` | Medium | +| F1.8 | Add a **verification harness**: integration test that fills the queue to capacity, issues 50 cache-hit requests for the same `.tgz`, asserts **50 Γ— 200 responses** (not 50 Γ— 503 or 50 Γ— 500). This would have caught this bug pre-prod. | `npm-adapter/src/test/java/…/DownloadAssetSliceTest.java` | Low | + +--- + +## 2. Issue #2 β€” "Internal 404 errors leaking to users" (P1) + +### 2.1 What the data actually shows + +Random 250,000-line sample of the 2,429,080 WARN 404 lines: + +| Signal | Value | +|----------------------|-------------------------------------------------------------------------| +| Logger | 100% `http.access` | +| Thread family | 99.0% `pantera-io-*` (external Jetty read threads) | +| Thread family | 0.6% `HttpClient@*` (internal Jetty client β†’ internal-fanout) | +| `user_agent.name` | 100% populated β€” `Apache-Maven/3.9.6`, `npm/*`, `pip/*`, etc. | +| `client.ip` | **0.4% populated (980 of 250 000)** β€” see Β§2.5 | +| `event.duration` | 81.5 % at 0 ms, 17.9 % at 1 ms β€” 404 itself is not slow | + +**Top 404 URL prefixes** (1-in-8 sample): + +``` +401,944 HEAD /artifactory/qa-release-local/… +401,942 HEAD /artifactory/libs-release-local/… +401,558 HEAD /artifactory/libs-fixed-revs-local/… +401,543 HEAD /artifactory/ext-release-local/… + 19,072 HEAD /artifactory/plugins-release-local/… + 1,407 HEAD /artifactory/maven-central/… +``` + +Four repos with **identical** 401k counts, one (plugins-release-local) at 19k, tail smaller. This is the canonical Maven-client probe pattern β€” the user has 4 Artifactory-shaped virtual repos in their `pom.xml ` list and Maven probes each one sequentially (HEAD, then GET on the first 404 hit). *Every missing artifact generates 4 HEAD-404s and then 4 GET-404s.* + +### 2.2 What the user suspected vs. what is actually happening + +| User hypothesis | Reality | +|-------------------------------------------------------------|--------------------------------------------------------------------------------------------------| +| "These 404s are internal group-to-member fanout" | **No.** Only 1,554 of ~2.43M (0.06%) originate from `HttpClient@*` worker threads. The X-Pantera-Internal header mechanism (commit 25890cac) correctly suppresses the in-process fanout. | +| "Group flatten / response reconciliation is broken" | Partly true on a different dimension β€” see Issue #1 Β§1.3 (503 from a single member becomes 500 to the client). The 404s themselves are correct group misses. | + +### 2.3 The actual noise drivers + +* **Driver A β€” WARN level for every 4xx.** `EcsLogEvent.log():321-322` pushes every 4xx to WARN. 404 in a Maven registry is expected business-as-usual (SNAPSHOT resolution, multi-repo fallback, optional-dependency probing). Industry peers (Artifactory, Nexus, Verdaccio) log 404 at INFO (or DEBUG for repeated ones). **2.43M WARN/12h = 56/s** steady-state β€” an ops-team-blinding level of noise. +* **Driver B β€” 4×–5Γ— multiplier from external multi-repo probing** (Β§2.1). This is client-side and cannot be fixed in Pantera, but can be **mitigated** by giving users one virtual group that includes those 4 local repos, so external clients issue one HEAD instead of four. +* **Driver C β€” `client.ip` missing on 99.6% of WARN 404s** (Β§2.5). + +### 2.4 The "Repository not found in configuration" stream + +1,436 WARN from `com.auto1.pantera.settings`. Representative example: + +```json +"message":"Repository not found in configuration", +"repository.name":"build.shibboleth.net", +"url.port":-1, +"process.thread.name":"vert.x-eventloop-thread-0" +``` + +These are legitimate client requests for repo names that Pantera doesn't serve. **They should be INFO, not WARN** (nothing is wrong with Pantera; the client has a stale/wrong repo URL). + +### 2.5 Why `client.ip` is null β€” observability regression + +`EcsLogEvent.extractClientIp` returns null when *all three* of X-Forwarded-For, X-Real-IP, and `remoteAddress` are absent or `"unknown"`. In this production log, **99.6% of access-log lines hit the null branch**, yet these *are* external requests (pantera-io-* threads, real user-agents). Likely causes (ranked by likelihood): + +1. The LB (AWS NLB? ALB? envoy?) in front of Pantera is not sending `X-Forwarded-For`. +2. `EcsLoggingSlice` extracts the client IP once at request entry but **does not populate MDC for requests that return through the 404 path** (i.e. the `client.ip` key is written to MDC only on specific code paths). Given the `"client.ip":null` literal in the payload and `EcsLogEvent.log:298-302` which drops a field *only when MDC has it*, null means MDC was never set either β€” so extraction itself ran but returned null, or the slice order puts logging before MDC population on this path. + +Quickest verification: one request's full raw record for an existing auth'd user (which *does* show `client.ip: "172.28.54.153"`) vs. an anonymous Maven HEAD β€” compare headers captured. The *first* session in the log shows `client.ip` present on the login event (`user.name=deployer`), then null on the next 2.4M 404s from Maven: **either Maven is not forwarding X-Forwarded-For** (it wouldn't; that's an LB responsibility) **or the LB strips headers for anonymous paths**. + +### 2.6 Fix plan for Issue #2 + +| ID | Change | Where | Risk | +|-----|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------|--------| +| F2.1 | Change the access-log level policy: **404 β†’ INFO**, **401/403 β†’ INFO** (still noisy but fine); 4xx-other (400/405/409/410/413/429) β†’ WARN; 5xx β†’ ERROR. Keep slow-request (>5s) at WARN. | `EcsLogEvent.java:319-329` | Low | +| F2.2 | Downgrade `Repository not found in configuration` from WARN to INFO (it's a client-config error, not a Pantera error). | search for that string in `com.auto1.pantera.settings` | Low | +| F2.3 | Audit the LB β†’ Pantera header contract. If `X-Forwarded-For` is not being sent, either fix the LB config or let Pantera fall back to the raw Netty `HttpServerRequest.remoteAddress()` (real LB IP β€” at least non-null) and tag it as such. | `EcsLogEvent.extractClientIp` + `EcsLoggingSlice` | Low-Med | +| F2.4 | Add an end-to-end test that asserts `client.ip` is populated in the access-log JSON for a request made through Pantera's listener (i.e. a Jetty test with a synthetic XFF header and without). | `pantera-core/src/test/java/…/EcsLoggingSliceTest.java` | Low | +| F2.5 | Document (outside of the code) that multi-repo Maven clients should be migrated to a single group-repo URL. Add a recipe in `/docs/` and/or a startup warning when multiple locals overlap. | `docs/group-repo-recipes.md` + `RepoConfig` loader | Low | + +--- + +## 3. Issue #3 β€” "npm install / mvn install is slower than v2.1.0 / v2.1.1" + +### 3.1 The dominant cause is Issue #1 (measured effect) + +* npm client default: 3 retries with exponential backoff starting at ~1s. +* 11,523 `Queue full` errors in a 2-minute window (Β§1.1 time profile) β†’ at 500-1000 npm req/s during the burst, a 5-15% transient failure rate becomes 20-50% **effective** retry-rate. +* Each retry adds β‰₯1s to the per-package install timeline. `npm install` with 300 packages and 10% retry rate adds ~30s per build, compounding across CI agents. + +**Fixing Β§1.7 F1.1 and F1.2 eliminates the primary perf regression vs. 2.1.0/2.1.1.** + +### 3.2 Secondary suspects from the v2.1.0 β†’ v2.1.3 diff + +These changes all land between 2.1.0 and 2.1.3 (`git log --oneline v2.1.0..HEAD` in pantera/). Each one is *individually* small but several sit on the hot path. + +| Commit | Change | Plausible cost at 1 k req/s | Verdict | +|----------|-------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------|------------------------------------------------------------------------------------------| +| 31b88eb0 | `DbArtifactIndex` switched `Executors.newFixedThreadPool(unbounded queue)` β†’ bounded 500-slot + **CallerRunsPolicy** | Zero when DB is fast; when DB saturates, **Vert.x event loop blocks**, potentially stalling all HTTP IO on that core | **Tune.** Leave CallerRunsPolicy *out* of the Vert.x event loop path β€” substitute `AbortPolicy` + fast 503 or use a dedicated worker pool upstream of the event loop. See F3.1. | +| 599ee1e5
608a9b7d | MDC propagation wrappers on every RxJava / CompletableFuture / Valkey boundary | ~3–5 `Runnable` allocations per request; ~50–150 Β΅s CPU per request | **Accept for now**, revisit if profiler shows >5% CPU in `MdcPropagation.*`. | +| bf7f76a6 | `locateByName` 500-ms statement timeout | At worst, one request pays a 500 ms wait before failing | Correct; keep. | +| 76f2f638
a70f678b | Negative cache with L1 + L2 (Valkey) backing for proxy fanout | +1 Valkey GET per cache-miss negative lookup (~0.5–3 ms network round-trip on the same VPC) | Net positive. Monitor miss-rate and Valkey p99 latency; fall back to L1-only if Valkey is >5 ms. See F3.2. | +| b37deea2 | Maven-metadata.xml coalescer gate (`inFlightMetadataFetches`) | Under burst, N-1 followers wait for the leader's fetch (intended); if leader is slow, all followers wait | Correct. Verify `thenComposeAsync` (not `thenCompose`) is used so the leader's completion doesn't synchronously run all followers on one stack. The commit says it is β€” sanity-check still under load. | +| 2b31fb76 | Stale-while-revalidate enabled **by default** for artifact binaries | On cache-hit, fires background revalidation β€” doubles the upstream load during the refresh window | **Check** `staleMaxAge` defaults. If too short, the background refresh fires on *every* request. See F3.3. | +| 9f44556c | "recover from TOCTOU eviction race as cache miss" | One extra storage probe + optional refetch per race | Correct; keep. | +| cff45271 | **Upload-path** SHA-256 etc. now read the file from storage 4Γ— (once per algorithm) | Not a download-path cost; affects `mvn deploy` only | **Fix** (see F3.4): compute all 4 digests in a single stream. | + +### 3.3 JVM / container sizing observations + +From `ops-infrastructure/terraform/.../backend.tf:21`: + +``` +-Xms14g -Xmx14g +-XX:+UseG1GC -XX:G1HeapRegionSize=16m -XX:G1ReservePercent=10 -XX:MaxGCPauseMillis=300 +-XX:MaxDirectMemorySize=4g +-XX:ParallelGCThreads=6 -XX:ConcGCThreads=2 +-XX:+UseStringDeduplication -XX:+ParallelRefProcEnabled +-XX:+UseContainerSupport -XX:+ExitOnOutOfMemoryError +-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/pantera/dumps/heapdump.hprof +-Xlog:gc*:file=/var/pantera/logs/gc.log:time,uptime:filecount=5,filesize=100m +-Djava.io.tmpdir=/var/pantera/cache/tmp +-Dvertx.cacheDirBase=/var/pantera/cache/tmp +-XX:InitiatingHeapOccupancyPercent=45 -XX:+AlwaysPreTouch +-Dvertx.max.worker.execute.time=120000000000 # 120 s in ns +-Dio.netty.allocator.maxOrder=11 +-Dio.netty.leakDetection.level=simple +-Dpantera.filesystem.io.threads=14 +``` + +Container: 15 vCPU, 24 GiB RAM. + +| Setting | Comment | +|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `-Xms/-Xmx=14g` | Sensible β€” leaves 10 GiB for off-heap (direct-buffer pool, page cache, Netty arenas). | +| `MaxDirectMemorySize=4g` | Good β€” prevents Netty direct-buffer leaks from OOM-ing the JVM silently. | +| `ParallelGCThreads=6 ConcGCThreads=2` | Fine. Room to go to 8/2 given 15 vCPU, but not the bottleneck. | +| `InitiatingHeapOccupancyPercent=45` | Slightly aggressive; with 14 g heap G1 starts a concurrent cycle at ~6.3 g. Keep β€” avoids long-running old-gen fills. | +| `pantera.filesystem.io.threads=14` | Matches vCPU-1. OK for streaming IO. Monitor `FileSystemStorage` pool under 1 k req/s; if requests queue, bump to 24 and move CPU-bound work to a separate pool. | +| `vertx.max.worker.execute.time=120 000 000 000 ns` | **2 minutes**. This means Vert.x will not log/kill a worker thread until it has been blocked for 2 min. Under the Issue #3 Β§3.2 `CallerRunsPolicy` scenario, a DB stall could silently pin event-loop threads for a long time. Recommend reducing to `5 000 000 000` (5 s) so stalls become visible. | +| `io.netty.allocator.maxOrder=11` | 16 MB chunks. Fine for typical `.tgz` <10 MB. Watch arena usage if tarballs are large. | +| `io.netty.leakDetection.level=simple` | Keep; low-cost sampling. | +| `UseStringDeduplication` | G1 feature, cheap. OK. | +| `HeapDumpOnOutOfMemoryError` | Keep. | +| Missing: `-XX:+PerfDisableSharedMem` | Add β€” prevents `/tmp/hsperfdata` IO stalls on some container runtimes. | +| Missing: `-XX:MaxRAMPercentage=85` | Optional β€” make heap sizing respond to container resize without rebuild. | + +### 3.4 Fix plan for Issue #3 + +| ID | Change | Where | Risk | +|-----|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------|--------| +| F3.1 | Replace `CallerRunsPolicy` on `DbArtifactIndex` with either (a) `AbortPolicy` + caller returns a 503 with `Retry-After: 1`, or (b) a dedicated worker pool upstream of the Vert.x event loop. Never let an event-loop thread synchronously run a 500 ms DB query. | `DbArtifactIndex.java:createDbIndexExecutor()` | Medium | +| F3.2 | Add p99 latency tracking for `NegativeCache` L2 (Valkey) lookups. If Valkey p99 > 5 ms, short-circuit to L1-only and emit a gauge so we can tune the Valkey pool / topology. | `NegativeCache.java`, `NegativeCacheRegistry.java` | Low | +| F3.3 | Verify SWR `staleMaxAge` defaults. Short defaults β†’ every hit triggers a background revalidation. For .tgz/.jar binary artifacts, 24-72h stale is usually appropriate. | `BaseCachedProxySlice.java` β€” staleness config | Low | +| F3.4 | `UploadSlice.generateChecksums` β€” compute MD5/SHA-1/SHA-256/SHA-512 in a **single pass** over the stream using parallel `MessageDigest` chain, instead of 4Γ— `storage.value(key)`. | `maven-adapter/.../UploadSlice.java:397-440` | Low | +| F3.5 | Reduce `vertx.max.worker.execute.time` from 120 s to 5 s. This is a diagnostic knob, not a throttle β€” 5 s is still 3 orders of magnitude above any healthy IO. | `ops-infrastructure/.../backend.tf` JVM_ARGS | Low | +| F3.6 | Add `-XX:+PerfDisableSharedMem` (avoids `/tmp/hsperfdata` contention under container IO pressure). | `ops-infrastructure/.../backend.tf` JVM_ARGS | Low | +| F3.7 | Ship a **production perf regression benchmark** in CI: reproducible `npm install` of 100 packages and `mvn dependency:resolve` for a known POM, run against a local Pantera container per PR, with p95 and p99 thresholds against a committed baseline. Prevents future regressions. | `.github/workflows/perf.yml` or equivalent | Medium | + +--- + +## 4. Issue #4 β€” Non-INFO log triage (WARN/ERROR review) + +### 4.1 Full level counts (12-hour window, both rotations) + +``` +INFO 4,764,793 (65.4 %) +WARN 2,501,444 (34.3 %) +ERROR 23,150 (0.32 %) [99.8 % of these in 10:05-10:06] +FATAL/DEBUG/TRACE 0 +``` + +At ~~60/s WARN it is ~impossible to read these logs interactively β€” Kibana dashboards and alerts are drowned. + +### 4.2 WARN breakdown by logger + +``` +2,479,228 http.access ← 99.1 % of WARN; mostly 404s (see Issue #2) + 16,863 com.auto1.pantera.group ← group fanout misses, member failures + 2,480 com.auto1.pantera.auth ← failed basic/token auth (expected at WARN) + 1,436 com.auto1.pantera.settings ← "Repository not found in configuration" (downgrade to INFO, F2.2) + 796 com.auto1.pantera.http.auth ← "Authoritative provider rejected credentials…" (expected) + 394 com.auto1.pantera.maven ← "No metadata found in any member and no stale fallback" + 216 co.elastic.apm.agent.impl.transaction.AbstractSpanImpl ← APM library noise + 28 com.auto1.pantera.maven-proxy + 1 com.auto1.pantera.security + 1 com.auto1.pantera.diagnostics +``` + +### 4.3 WARN messages that are **meaningful signals** (keep) + +| Count | Message | Note | +|---------|--------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------| +| 2,726 | `Targeted member failed on index hit, returning 500` | Issue #1 Β§1.3 β€” fix the 500 β†’ 502 per F1.4 and the count drops to ~0 after F1.1. | +| 2,723 | `Member 'npm_proxy' returned error status (0 pending)` | Same cascade. Will disappear with F1.1. | +| 2,479 | `Failed to authenticate user` | Real auth noise β€” mostly `npm` UA (944 lines), Gradle (203), pip (152). Keep WARN, alert on spikes. | +| 796 | `Authoritative provider rejected credentials; chain will NOT fall through` | Very useful for credential-rotation incidents. Keep WARN. | +| 394 | `No metadata found in any member and no stale fallback` | Maven group metadata merge failed. Related to Issue #1 cascade via group members; also fires under L2 Valkey transient faults. Keep WARN, correlate with F3.2. | +| 181 | `Member circuit OPEN, skipping: spring-plugins-release` | AutoBlockRegistry at work on a legit flaky upstream. Keep WARN. | +| 108 Γ— many | `Artifact not found in any group member: /test-automation-services/**/*.2.284.*` | A single CI pipeline is pinned to `version=2.284` on ~24 different artifacts, looping. Group resolution correctly returns 404. Keep WARN for visibility; consider adding a dedupe/sampler so one offending build doesn't flood the log. | + +### 4.4 WARN messages that should move level (downgrade) + +| Count | Message | Target level | Rationale | +|-------------|-------------------------------------------|--------------|---------------------------------------------------------------------| +| 2,429,080 | `Not found` (http.access, 404) | **INFO** | Standard industry practice for artifact registries (Issue #2 F2.1). | +| 50,161 | `Authentication required` (http.access, 401) | **INFO** | Normal npm first-request behavior before auth retry. | +| 1,436 | `Repository not found in configuration` | **INFO** | Client config error, not Pantera error (F2.2). | +| 216 | APM `AbstractSpanImpl` | **ERROR** only if the APM agent genuinely cannot send traces β€” suppress library INFO/WARN or configure APM agent log level. | Noise from elastic-apm-agent; tune its logger, not ours. | + +### 4.5 ERROR breakdown + +| Count | Logger | Root cause | +|----------|------------------------------------------|----------------------------------------------------------------------------| +| 11,606 | `http.access` (5xx access-log entries) | Downstream of Queue full cascade (Issue #1). | +| 11,523 | `com.auto1.pantera.npm` (`Queue full`) | Issue #1 root. One stack-trace per offending request. | +| 20 | `com.auto1.pantera.http.client` | `TimeoutException: Idle timeout expired: 30000/30000 ms` β€” Jetty HTTP client's 30s idle timeout firing (healthy upstreams go idle, connections closed with an exception instead of a clean shutdown). Downgrade to WARN or suppress idle-close as a non-error. | +| 1 | `co.elastic.apm.agent.jmx.JmxMetricTracker` | Single JMX metric-registration error at startup. Harmless. | + +### 4.6 Format / structural bugs in the JSON logs themselves + +| Bug | Impact | Fix | +|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------| +| **`log.level` serializes with *and without* a space after the colon**: `"log.level": "WARN"` (regular EcsLogger path) vs `"log.level":"ERROR"` (MapMessage path in `EcsLogEvent`). | Breaks naive string-based filtering; caused this analysis to miss ERROR level on first pass. Legit ECS consumers parse JSON so it's harmless at Elasticsearch ingest β€” but fragile. | Align all access-log emissions onto one code path (either both via `EcsLogger` or both via `MapMessage` + `EcsLayout`). F4.1. | +| **Pretty-space fragments in the inner JSON**: `…"log.logger":"http.access"} , "ecs.version": "1.2.0",…` β€” spaces around the trailing `,`. | Cosmetic; still valid JSON. | Drop the trailing-space joiners in EcsLayout template. | +| **`client.ip: null`** on 99.6 % of WARN 404 lines. | Severely degrades traceability per-client. | Issue #2 F2.3 / F2.4. | +| **`trace.id: null`** on many 404/WARN lines (observed in the sample). | Breaks access ↔ audit correlation the v2.1.3 observability work intended to fix (commit cff45271). | Re-verify `EcsLoggingSlice` populates MDC *before* the Slice chain on every request, including the 404-fast-path. F4.2. | +| **Trailing newline inside the `log` field**: `"log":"{…}\n"` (the `\n` is literal content, not a JSON separator). Comes from Docker's json-file driver wrapping stdout lines. | Trivial; present across all Docker deployments. Not a bug in Pantera. | N/A. | + +### 4.7 Fix plan for Issue #4 + +| ID | Change | Where | Risk | +|-----|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------------------------------------------------|------| +| F4.1 | Unify access-log emission: either (a) make `EcsLogEvent` use the same `EcsLogger` fluent API used everywhere else, or (b) migrate everyone to `MapMessage + EcsLayout`. Target: `log.level` formatting identical across all loggers. | `EcsLogEvent.java:281-330` | Low | +| F4.2 | Make `EcsLoggingSlice` set MDC (`client.ip`, `trace.id`, `user.name`, `request_id`, …) **before** calling the underlying slice. Verify by contract test that every access-log emission has non-null `trace.id`, and non-null `client.ip` for non-null remote address. | `EcsLoggingSlice.java`, plus new contract test | Low | +| F4.3 | Apply the level downgrades in Table Β§4.4 (Issue #2 F2.1 subsumes most of this). | `EcsLogEvent.java`, `com.auto1.pantera.settings`, APM config | Low | +| F4.4 | Downgrade `HttpClient@… idle timeout` ERROR to WARN or INFO; it is a normal connection-lifecycle event, not a request failure. | `com.auto1.pantera.http.client` logger | Low | +| F4.5 | Add a "weekly logs audit" CI job β€” processes N sample production log lines and asserts (a) unknown log.level values = 0, (b) `trace.id` non-null on β‰₯99 % of access lines, (c) `client.ip` non-null on β‰₯99 % of access lines, (d) no stack traces at INFO. | CI workflow | Low | + +--- + +## 5. Correlation map β€” which fix moves which KPI + +| Fix ID | Drops 503 rate | Drops log volume | Drops npm-install time | Drops p95 latency | Improves observability | +|----------|---------------:|-----------------:|-----------------------:|------------------:|------------------------| +| F1.1 F1.2 | **βˆ’95 %** | -3 % | **βˆ’30-50 %** on failed builds | βˆ’50 % on bursts | | +| F1.3 F1.4 | | | βˆ’5 % | | correct status codes for clients | +| F1.5 | | | | | βœ“ correct auto-block signal | +| F1.6 F1.7 | βˆ’3 % (residual) | | | | βœ“ | +| F2.1 F2.2 F4.3 | | **βˆ’95 %** (2.4M/12h) | | | βœ“ readable dashboards | +| F2.3 F2.4 F4.2 | | | | | βœ“ client.ip, trace.id populated | +| F3.1 | | | | **βˆ’30 %** when DB stalls | | +| F3.2 | | | | Valkey p99 < 5 ms ensured | | +| F3.3 | | | βˆ’2-5 % | | | +| F3.4 | | | | | mvn deploy upload latency -50 % | +| F3.5 F3.6 | | | | | visible stalls in logs | +| F3.7 F1.8 F4.5 | | | | | regression caught in CI | + +--- + +## 6. Recommended execution order + +1. **Sprint 0 (same day, hotfix branch):** F1.1, F1.2 (the 18 Γ— `queue.add β†’ queue.offer` rewrite + the catch-all in `DownloadAssetSlice`). Ship behind a feature-flag only if you want to shadow-test; otherwise patch-release as 2.1.4. This single change closes 95 % of the 503 problem and restores perf to 2.1.0 levels. +2. **Sprint 1 (this week):** F1.3, F1.4, F1.5, F1.6, F1.8 (status-code correctness + drain sizing + regression test). F2.1 + F2.2 + F4.3 (log-level downgrade; purely config-side; safe). F4.4 (Jetty idle-timeout downgrade). +3. **Sprint 2:** F3.1 (unblock event loop from DB stalls), F2.3 / F2.4 / F4.1 / F4.2 (MDC & client.ip observability restoration), F1.7 (drain pool configurability). +4. **Sprint 3:** F3.2, F3.3, F3.4, F3.5, F3.6, F3.7, F4.5 (performance polish + regression gates). + +--- + +## 7. Out-of-scope items surfaced during the audit + +These are real observations but not part of the four tasks; documenting so they can be triaged separately. + +* **Test-automation-services CI pipeline pinned to `version=2.284`** is issuing ~300 probes/hour for 24 non-existent artifacts (both `.pom` and `.jar`). Reach out to the team to unblock/fix. (~5 k WARN / 12 h that will disappear once their build is fixed.) +* **`spring-plugins-release` upstream is flaky** β€” 181 `Member circuit OPEN, skipping: spring-plugins-release` WARN + 4,501 404s on that repo. Consider whether to keep it in the group, add a longer negative-cache TTL, or mirror selected artifacts locally. +* **20 Γ— Jetty-client 30-s idle-timeout ERRORs** scattered through the night. These are not user-visible failures but the ERROR stack-traces are bulky. Either shorten idle timeout slightly, or suppress the exception when the transport is simply going idle. +* **Audit log throughput** not measured in this window β€” worth a separate audit (ensure `DbConsumer` drains at β‰₯ incoming artifact-event rate; otherwise audit will lag behind the access log by hours). + +--- + +## 8. Companion document + +The architectural review of the group/proxy revamp (enterprise-architect lens) lives in `v2.1.3-architecture-review.md` alongside this forensic report. That document covers: + +- the 02:01 outlier (separate TOCTOU-eviction bug, `ValueNotFoundException` on `npm_proxy/columnify/meta.meta`) +- 10 correctly-applied patterns (P1-P10) +- 20 anti-patterns (A1-A20) ranked by blast radius β€” including evidence for each +- 6 cross-cutting enterprise concerns (C1-C6) +- 9 architectural refactors (R1-R9) mapped to the anti-patterns they retire +- scored SOLID/enterprise checklist + +The forensic report (this document) lists **tactical** fixes (F1.x-F4.x) β€” minimal edits to stop bleeding. The architecture review lists the **strategic** refactors that remove entire classes of bug. + +## 9. Appendix β€” evidence files produced during this audit + +* `logs/.analysis/warn.ndjson` β€” all 2,501,444 WARN lines (2.4 GB) +* `logs/.analysis/error.ndjson` β€” all 23,150 ERROR lines (55 MB) +* `logs/.analysis/non_info_and_errors.ndjson` β€” combined set (dedup of the two above; kept for reproducibility) + +All counts and patterns in this report can be re-derived from those three files with `rg` + `jq`. diff --git a/docs/analysis/v2.2-next-session.md b/docs/analysis/v2.2-next-session.md new file mode 100644 index 000000000..661f39709 --- /dev/null +++ b/docs/analysis/v2.2-next-session.md @@ -0,0 +1,399 @@ +# v2.2 β€” next-session task list + +Eight work items from the v2.2 target architecture (`docs/analysis/v2.2-target-architecture.md` Β§12) shipped in v2.2.0 (WI-00, WI-01, WI-05, WI-07, WI-post-05, WI-post-07, WI-02, WI-03). This document enumerates the **six remaining** WIs plus the **four WI-post-03 follow-ups** as agent-executable tasks with the same shape as the original Β§12 entries β€” **Goal / Files new-modified-deleted / Tests required / DoD / Depends-on**. Priority order reflects the dep graph in Β§12: trunk-blocking WI-04 first, then the neg-cache chain, concurrency simplification, per-repo bulkheads, and the release-gate script last. + +Each DoD entry is a command + expected output an agent can execute without human interpretation. When a command depends on a fresh clone, the agent is responsible for running `mvn -T8 install -DskipTests` before the module tests. + +--- + +## P0 β€” trunk-blocking + +### WI-04 β€” `GroupResolver` replaces `GroupSlice` + +**Goal.** Rewrite the group-resolution layer around the 5-path decision tree documented in target-architecture Β§2. Introduces the sealed `MemberSlice` type (`HostedMember` | `ProxyMember`) and replaces `ArtifactIndex.locateByName` with a sealed `IndexOutcome` return (`Hit(List) | Miss | Timeout | DBFailure`). Every error path returns a `Result` carrying a `Fault`, so `FaultTranslator` becomes the single HTTP-status site. The bespoke `inFlightFanouts`-turned-`SingleFlight`, the `AtomicBoolean`/`AtomicInteger` fanout triad, and the `MdcPropagation.withMdc*` calls in the group package are deleted. This is the WI that wires `RequestContext` (WI-02) + `StructuredLogger` (WI-03) + `Fault` (WI-01) + `SingleFlight` (WI-05) + `ProxyCacheWriter` (WI-07) together β€” the backbone of v2.2.0's observability and error-handling promises. + +**Files new.** +- `pantera-main/src/main/java/com/auto1/pantera/group/GroupResolver.java` β€” the new top-level slice. +- `pantera-main/src/main/java/com/auto1/pantera/group/MemberSlice.java` β€” sealed interface. +- `pantera-main/src/main/java/com/auto1/pantera/group/HostedMember.java` β€” record implementing `MemberSlice`. +- `pantera-main/src/main/java/com/auto1/pantera/group/ProxyMember.java` β€” record implementing `MemberSlice`. +- `pantera-core/src/main/java/com/auto1/pantera/index/IndexOutcome.java` β€” sealed `Hit | Miss | Timeout | DBFailure`. +- `pantera-main/src/test/java/com/auto1/pantera/group/GroupResolver*Test.java` β€” one test class per decision-tree path. + +**Files modified.** +- `pantera-core/src/main/java/com/auto1/pantera/index/ArtifactIndex.java` β€” `locateByName` return type changes from `CompletionStage>>` to `CompletionStage`. Every caller updated. +- `pantera-main/src/main/java/com/auto1/pantera/index/DbArtifactIndex.java` β€” implementation returns the new sealed type; SQLException β†’ `DBFailure`, timeout β†’ `Timeout`, empty β†’ `Miss`, non-empty β†’ `Hit(names)`. +- Every current consumer of `GroupSlice` in `RepositorySlices.java` and the adapter routers β€” register `GroupResolver` instead. + +**Files deleted.** +- `pantera-main/src/main/java/com/auto1/pantera/group/GroupSlice.java` β€” all 6 overloaded constructors, the `proxyOnlyFanout` code, the private `RequestContext` record (resolves the C4 namespace collision noted in the v2.2.0 review), and the `locate()` legacy dead code. +- `pantera-main/src/main/java/com/auto1/pantera/group/MavenGroupSlice.java` β€” absorbed into `GroupResolver` with a `MavenMetadataSpecialCase` helper. +- The 19 `MdcPropagation.` call-sites in `GroupSlice.java` + `MavenGroupSlice.java` disappear with the deletion. + +**Tests required.** +- Full flow integration test (one per Path A / B / OK outcome in Β§2). +- TOCTOU test: 02:01 outlier scenario β€” index says artifact exists at member M; storage raises `ValueNotFoundException`; `GroupResolver` falls through to proxy fanout and succeeds. No 500 to client. +- Queue-full test: an upstream adapter returns `Fault.Overload` β€” group propagates `Overload` to client as 503 with `Retry-After` (not a synthesized 500). +- `AllProxiesFailed` pass-through test: three proxies all return a 5xx; client sees the best-ranked proxy's 5xx body verbatim per the `FaultAllProxiesFailedPassThroughTest` contract. +- StructuredLogger tier coverage: every internal hop (group β†’ member) emits a Tier-2 `InternalLogger` line; every upstream fetch emits a Tier-3 `UpstreamLogger` line. + +**DoD (machine-checkable).** +``` +$ rg 'class GroupSlice|class MavenGroupSlice' --glob '*.java' | rg -v test +# expect: 0 matches β€” both deleted + +$ mvn -T8 install test +# expect: BUILD SUCCESS; every existing GroupSlice test replaced with GroupResolver equivalent + +$ rg 'AtomicBoolean\|AtomicInteger' pantera-main/src/main/java/com/auto1/pantera/group/ --glob '*.java' +# expect: 0 matches + +$ rg 'MdcPropagation\.' pantera-main/src/main/java/com/auto1/pantera/group/ --glob '*.java' +# expect: 0 matches (group package migrated to ContextualExecutor + RequestContext.bindToMdc()) +``` + +**Depends-on.** WI-01, WI-02, WI-03, WI-05 shipped (v2.2.0). + +--- + +### WI-06 β€” `NegativeCache` composite key + single bean + sync invalidation + +**Goal.** Rewrite `NegativeCache` with a composite `NegativeCacheKey(scope, type, artifact, version)` so hosted / proxy / group scopes share one Caffeine+Valkey bean per deployment rather than three parallel instances. Per-tier (L1/L2) + per-scope YAML overrides; synchronous invalidation from every `UploadSlice` / `PackagePublish` before upload-ACK so a just-uploaded artifact cannot be 404-shadowed by stale neg-cache. Rename the YAML key `meta.caches.group-negative` β†’ `meta.caches.repo-negative` with a one-release deprecation-warn for the legacy key. Migrate the 25 cooldown-related `MdcPropagation.` call-sites to `ContextualExecutor` at the same time. + +**Files new.** +- `pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheKey.java` β€” record with scope enum + triple. + +**Files modified.** +- `pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCache.java` β€” rewrite around the composite key; scope-partitioned metrics; single shared instance. +- `pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheConfig.java` β€” per-tier L1/L2 fields + per-scope override map. +- `pantera-main/src/main/java/com/auto1/pantera/RepositorySlices.java` β€” rename YAML accessor; read legacy key with deprecation WARN; build one `NegativeCache` and inject into every slice. +- Every `UploadSlice` / `PackagePublish` / equivalent in npm, maven, pypi, go, helm, rpm, hex, nuget, composer, file adapters β€” add `negCache.invalidateOnPublish(scope, artifact, version)` call *before* ACK. +- `pantera-core/src/main/java/com/auto1/pantera/http/cache/BaseCachedProxySlice.java`, `pantera-main/.../GroupResolver.java` (WI-04 output), `npm-adapter/.../CachedNpmProxySlice.java` β€” delete their private `new NegativeCache(...)` constructions; accept an injected shared instance. +- `pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheWriter.java` β€” invalidate neg-cache on first-time proxy cache write (pass a `NegativeCache` reference to the ctor or accept an `onWriteSuccess` callback). +- `pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownCache.java`, `pantera-core/.../FilteredMetadataCache.java`, `pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataFilterService.java`, `pantera-main/.../JdbcCooldownService.java` β€” migrate the ~25 `MdcPropagation.` calls to `ContextualExecutor.contextualize(...)` wrapping the same executor each site currently submits to. + +**Files deleted.** None in this WI β€” the old `NegativeCache` is rewritten in place. + +**Tests required.** +- Ordering test: check-before-DB produces a 404 without a DB query when the key is in L1 neg cache. +- Single-source test: `rg 'new NegativeCache\(' --glob '*.java' | rg -v test` returns `1` (the single wiring site in `RepositorySlices`). +- Invalidation race test: (a) write neg cache entry for `(group, type, A, v1)`; (b) publish `A@v1`; (c) next GET serves 200, never 404. 1 000 / 1 000 iterations. +- Proxy-cache-write invalidation test: proxy fetches artifact previously marked 404; next GET serves cached 200 not 404. +- L2 fallback: disable Valkey mid-test β†’ L1 continues to serve; counter `pantera.neg_cache.l2_fallback` increments. +- Per-scope TTL override: hosted L1 30s vs proxy L1 10m both respected. +- Contextual propagation regression: pick one cooldown async hop, assert ThreadContext + APM span survive the hop after the MdcPropagation β†’ ContextualExecutor migration. + +**DoD.** +``` +$ rg 'new NegativeCache\(' --glob '*.java' | rg -v test +# expect: 1 match β€” the single wiring site in RepositorySlices + +$ mvn -T8 install test +# expect: BUILD SUCCESS; all above tests green + +$ rg 'MdcPropagation\.' pantera-core/src/main/java/com/auto1/pantera/cooldown/ pantera-main/src/main/java/com/auto1/pantera/cooldown/ --glob '*.java' +# expect: 0 matches β€” cooldown package migrated + +$ # YAML legacy-key deprecation warn +$ grep -r 'group-negative' pantera-main/src/test/resources +$ # test that configuring under the legacy key still works, with a WARN emitted +``` + +**Depends-on.** WI-02 shipped (v2.2.0), WI-04 (for the shared `MemberSlice` scope enum and the injected-instance wiring). + +--- + +### WI-06b β€” admin UI for negative-cache inspection and invalidation + +**Goal.** Add the UI panel + REST endpoints described in target-architecture Β§5.6 so platform engineers can investigate "just-uploaded-but-clients-404" reports without SSH + curl. Five endpoints: inspect by key, list entries matching a pattern, invalidate single key, invalidate pattern (rate-limited), stats dashboard. + +**Files new (backend).** +- `pantera-main/src/main/java/com/auto1/pantera/api/v1/admin/NegativeCacheAdminResource.java` β€” the five endpoints. +- `pantera-main/src/main/java/com/auto1/pantera/api/v1/admin/NegativeCacheAdminResourceTest.java`. + +**Files new (frontend).** +- `pantera-ui/src/pages/admin/NegativeCache.tsx` (or equivalent in the current Vue framework β€” repo currently uses Vue 3 per `pantera-ui/package.json`, so `pantera-ui/src/views/NegativeCacheView.vue`). + +**Files modified.** +- Admin route registration + role-guard in `pantera-ui/src/router/index.ts` (or Vue equivalent). +- `pantera-main/.../api/v1/ApiRouter.java` (or equivalent) β€” register the five new routes. + +**Tests required.** +- REST contract tests for each endpoint (200 with admin role, 403 without). +- Pattern invalidation rate-limit test: 11th request within 1 min returns 429. +- UI Cypress / Playwright test: inspector loads, single-key form succeeds, pattern form requires confirm-click. +- Tier-4 Local log test: every invalidation emits a Tier-4 `StructuredLogger.local()` WARN with `event.action=neg_cache_invalidate`, `manual=true`, correct `user.name`. +- Tier-5 Audit log test: every invalidation also emits an `AuditAction`-less audit event (the four compliance actions are for artifact lifecycle; neg-cache invalidation belongs in Tier-4). If the team wants neg-cache invalidation to be Tier-5 audited, extend the closed `AuditAction` enum and update the scope there β€” do not use a string-typed action. + +**DoD.** +``` +$ curl -u admin:... http://localhost:8080/api/v1/admin/neg-cache/inspect?scope=group&type=maven&artifact=com/example/foo&version=1.0 +# expect: 200 with JSON payload describing the entry + +$ mvn -T8 -pl pantera-main test -Dtest='NegativeCacheAdminResource*' +# expect: BUILD SUCCESS + +$ cd pantera-ui && npm run test +# expect: Cypress / Playwright suite passes the neg-cache admin cases +``` + +**Depends-on.** WI-06 shipped (backend bean available for injection). + +--- + +## P1 β€” concurrency simplification + perf hardening + +### WI-08 β€” retire RxJava2 from hot paths + +**Goal.** One reactive stack (`CompletionStage`) on every hot path. Five named call sites migrate off `io.reactivex.Maybe` / `SingleInterop` / `body.asBytesFuture()` to streaming `Content`-based equivalents: `DownloadAssetSlice`, `CachedNpmProxySlice`, `BaseCachedProxySlice`, `NpmProxy.getAsset`, `MavenProxy.getMetadata`. Also `PyProxySlice`, `ComposerProxyDownloadSlice`, `GoProxySlice` per target-architecture Β§12 WI-08. Unblocks deletion of the 5 `MdcPropagation.` call-sites in the npm adapter (`DownloadAssetSlice`, `NpmProxy`) and finishes the last outstanding `TODO(WI-post-07)` marker in `CachedNpmProxySlice`. + +**Files modified.** 8 slices listed above; each individual change is mechanical (RxJava `Maybe.map(fn)` β†’ `stage.thenApply(fn)`; `Maybe.flatMap(g)` β†’ `stage.thenCompose(g)`; `body.asBytesFuture()` β†’ streaming `Content` passed directly to `ResponseBuilder.body(...)` where feasible, with heap-bounded fallback otherwise). Concurrently wire the migrated slices to `ContextualExecutor.contextualize(...)` at any new `CompletableFuture` boundary so the 5 npm `MdcPropagation` callers can be deleted. + +**Files deleted.** `hu.akarnokd.rxjava2.interop.SingleInterop` usages on hot paths; any `asBytesFuture().thenApply(bytes -> new ByteArrayInputStream(bytes))` patterns the 8 sites contain. + +Also complete the last **WI-post-07** residual: wire `ProxyCacheWriter` into `CachedNpmProxySlice` β€” this can only happen after the RxJava-to-CompletionStage migration, which is why the `TODO(WI-post-07)` is kept in the npm adapter through v2.2.0. Add a `NpmCacheWriter` facade that constructs `ProxyCacheWriter` with `{SHA512}` algos, invoked from `CachedNpmProxySlice` for `*.tgz` paths only. Delete the `TODO(WI-post-07)` javadoc. + +**Tests required.** +- Existing integration tests for each slice remain green. +- New streaming test: GET a 50 MB artifact; assert peak heap does not grow by more than 10 MB (no full-body buffering). +- npm cache-writer test: seed mock upstream serving `.tgz` bytes with integrity:sha512 declaration; assert `ProxyCacheWriter` path invoked, digest matches, cache lands both primary + sidecar. + +**DoD.** +``` +$ rg 'io.reactivex' pantera-main pantera-core npm-adapter maven-adapter pypi-adapter go-adapter composer-adapter --glob '*.java' | rg -v test +# expect: 0 matches + +$ rg 'SingleInterop' --glob '*.java' | rg -v test +# expect: 0 matches + +$ rg 'TODO\(WI-post-07\)' --glob '*.java' +# expect: 0 matches β€” npm-adapter marker finally gone + +$ rg 'MdcPropagation\.' npm-adapter/src/main/java --glob '*.java' +# expect: 0 matches β€” 5 npm callers deleted + +$ mvn -T8 install test +# expect: BUILD SUCCESS +``` + +**Depends-on.** WI-04 shipped (the `GroupResolver` boundary is where the reactive-stack unification starts). Benefits from WI-02 + WI-03 (landed v2.2.0). + +--- + +### WI-09 β€” per-repo `RepoBulkhead` + +**Goal.** Replace the static `GroupSlice.DRAIN_EXECUTOR` + `DRAIN_DROP_COUNT` process-wide resources with per-repo bulkheads so saturation in one repository does not starve another. Each `MemberSlice` receives its repo's bulkhead. Bulkhead limits (`max_concurrent_requests`, `max_queue_depth`, `retry_after_seconds`) configured per-repo in `pantera.yml`. + +**Files new.** +- `pantera-core/src/main/java/com/auto1/pantera/http/resilience/RepoBulkhead.java` β€” record + `acquire(timeout)` returning `Result` β†’ `Overload` on reject. +- `pantera-core/src/main/java/com/auto1/pantera/http/resilience/BulkheadLimits.java` β€” YAML-bindable record. + +**Files modified.** +- `pantera-main/src/main/java/com/auto1/pantera/RepositorySlices.java` (or equivalent repo-factory) β€” constructs and injects a `RepoBulkhead` per repo. +- Every `MemberSlice` (from WI-04) β€” receives its repo's bulkhead; every `upstream call` goes through `acquire(budget)`. + +**Files deleted.** `GroupSlice.DRAIN_EXECUTOR` + `DRAIN_DROP_COUNT` β€” but `GroupSlice` itself is already gone after WI-04; this WI is about ensuring the per-repo replacement is wired. If any shell of `DRAIN_EXECUTOR` survives into `GroupResolver`, delete it here. + +**Tests required.** +- Isolation test: saturate repo A's bulkhead with 1 k concurrent requests; repo B's latency unchanged. +- Metrics test: every bulkhead-related counter carries `{repo=...}` tag. +- Fault translation test: a `Result.err(Fault.Overload("repo-A", 1s))` from a slice translates to `503 + Retry-After: 1 + X-Pantera-Fault: overload:repo-A` (already locked in by `FaultTranslator` from WI-01; this is a regression guard). + +**DoD.** +``` +$ rg 'DRAIN_EXECUTOR\|DRAIN_DROP_COUNT' --glob '*.java' +# expect: 0 matches + +$ rg 'new RepoBulkhead' --glob '*.java' | rg -v test +# expect: 1 match β€” the per-repo wiring site + +$ mvn -T8 install test +# expect: BUILD SUCCESS +``` + +**Depends-on.** WI-04 shipped. + +--- + +## P2 β€” release gates + +### WI-10 β€” SLOs + CI perf baseline + chaos + release-gate script + +**Goal.** Every future regression caught in CI, not in prod. One SLO doc per adapter declaring p50/p95/p99 latency and error-rate budgets. A perf-baseline workflow runs a fixed benchmark on every PR and fails if the p99 latency regresses by >10% vs the baseline. Chaos tests (`@Chaos` tag) simulate flaky upstreams, DB timeouts, queue saturation nightly. A `release-gate.sh` script enforces the three gates before tag push. + +**Files new.** +- `docs/slo/npm-proxy.md`, `docs/slo/npm-group.md`, `docs/slo/maven-proxy.md`, `docs/slo/maven-group.md`, `docs/slo/pypi-proxy.md`, `docs/slo/docker-pull.md`, `docs/slo/file-raw.md` β€” one per adapter. +- `.github/workflows/perf-baseline.yml`. +- `tests/perf-baselines/{adapter}.json` β€” machine-readable baseline (p50, p95, p99, throughput). +- `tests/chaos/*.java` β€” tagged `@Chaos` (JUnit 5 tag); excluded from default `mvn test`. +- `tests/property/SingleFlightPropertyTest.java` β€” a copy / promotion of the v2.2.0 in-tree property test, wired into the chaos suite with longer run-counts (N=10 000 etc). +- `scripts/release-gate.sh` β€” bash script that runs the three gates in order, prints a red/green report, exits non-zero on failure. + +**Files modified.** +- `.github/workflows/ci.yml` β€” new required check `perf-baseline` before merge. +- Root `README.md` β€” new section "Release gates" with the command-line. + +**Tests required.** +- CI workflow gates PRs on (a) ≀10% perf regression vs baseline, (b) all `@Chaos` tests passing in the nightly job, (c) SLO budget not-exceeded. +- The baseline-regression test itself: introduce an intentional 30% regression in a PR branch; CI fails with a clear error message pointing at the offending `p99_ms` metric. + +**DoD.** +``` +$ scripts/release-gate.sh +# expect: exit 0 on clean main + +$ # simulate an intentional regression +$ git apply tests/perf-baselines/regression-fixture.patch +$ scripts/release-gate.sh +# expect: exit 1 with an error naming the regressed metric and adapter + +$ # nightly chaos +$ mvn -T8 test -Dgroups=Chaos +# expect: BUILD SUCCESS +``` + +**Depends-on.** WI-04 through WI-09 shipped (tests exercise the full target state). + +--- + +## WI-post-03 follow-ups + +Four residuals from the v2.2.0 Wave-3 review. None are correctness-blocking for the v2.2.0 merge but each needs an owner in a future session: + +### WI-post-03a β€” hoist `DbIndexExecutorService` into `pantera-core` + +**Goal.** The Wave-3 `DbArtifactIndex` migration to `ContextualExecutor` introduced a private static-nested `DbIndexExecutorService` inside `DbArtifactIndex.java` that adapts the contextualised `Executor` back into an `ExecutorService` (forwarding lifecycle methods to the underlying pool, routing `execute(Runnable)` through the contextual wrapper). The pattern is reusable β€” Quartz worker pools, any future JDBC executor, and the WI-06 cooldown executor would all benefit. Hoist the nested class to `pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutorService.java` so every hot-path `ExecutorService` can opt in via a single factory method. + +**Files new.** +- `pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutorService.java` β€” `public static ExecutorService contextualize(ExecutorService delegate)` + the 10-method delegating implementation currently nested in `DbArtifactIndex`. +- `pantera-core/src/test/java/com/auto1/pantera/http/context/ContextualExecutorServiceTest.java` β€” lifecycle forward-through test (shutdown/shutdownNow/awaitTermination delegate correctly; execute goes through the wrapper). + +**Files modified.** +- `pantera-main/src/main/java/com/auto1/pantera/index/DbArtifactIndex.java` β€” replace the nested `DbIndexExecutorService` with `ContextualExecutorService.contextualize(pool)`; delete the nested class body (~40 lines). + +**Files deleted.** The nested class body inside `DbArtifactIndex.java`. + +**Tests required.** +- Lifecycle test: `shutdown()` / `shutdownNow()` / `awaitTermination(...)` all delegate to the underlying pool. +- Context-propagation test: submit a Runnable, assert `ThreadContext` + APM span are installed on the runner thread. +- Idempotent wrapping test: `contextualize(contextualize(pool))` still only installs context once (no double-wrap penalty). + +**DoD.** +``` +$ rg 'class DbIndexExecutorService' --glob '*.java' +# expect: 0 matches β€” hoisted + +$ rg 'ContextualExecutorService\.contextualize' --glob '*.java' | rg -v test | wc -l +# expect: β‰₯ 1 β€” the DbArtifactIndex wiring site + +$ mvn -T8 -pl pantera-core,pantera-main test -DfailIfNoTests=false +# expect: BUILD SUCCESS +``` + +**Depends-on.** WI-02 shipped (v2.2.0). **Blocked on:** an architectural review that confirms the file-scope freeze agreed during Wave-3 parallel-agent discipline can be lifted for this one hoist (the rule was "don't touch files outside your WI" to prevent work-wipes; this task requires touching `DbArtifactIndex.java` from the pantera-core scope). + +--- + +### WI-post-03b β€” re-lift `user_agent.name` / `.version` / `.os.name` parsing into `StructuredLogger.access` + +**Goal.** Pre-v2.2.0, `EcsLogEvent` parsed the `User-Agent` header into `user_agent.name`, `user_agent.version`, `user_agent.os.name`, `user_agent.os.version` sub-fields and emitted all four on every access-log line. The Wave-3 WI-03 migration removed the dual emission; only `user_agent.original` survives via `RequestContext`. Operator dashboards in Kibana that query the sub-fields need them back (or they can switch to `user_agent.original` for now β€” see the v2.2.0 migration notes). + +**Files modified.** +- `pantera-core/src/main/java/com/auto1/pantera/http/observability/StructuredLogger.java` β€” extend the `AccessAt` payload builder to parse `user_agent.original` and emit `user_agent.name` / `.version` / `.os.name` / `.os.version` when parsing succeeds. Reuse the parser in `pantera-core/http/log/EcsLogEvent.java` so the field contract is byte-identical to pre-v2.2.0. + +**Files new.** None β€” optionally, hoist the parser into its own class if it's currently inlined in `EcsLogEvent`. + +**Files deleted.** None. + +**Tests required.** +- `AccessLoggerTest.userAgentParsedIntoSubFields` β€” seed a `RequestContext` with `user_agent.original = "Maven/3.9.6 (Linux; 5.15.0-79-generic)"` and assert the emitted payload contains `user_agent.name = "Maven"`, `user_agent.version = "3.9.6"`, `user_agent.os.name = "Linux"`, `user_agent.os.version = "5.15.0-79-generic"`. + +**DoD.** +``` +$ mvn -T8 -pl pantera-core test -Dtest='AccessLoggerTest#userAgentParsedIntoSubFields' +# expect: BUILD SUCCESS, test green +``` + +**Depends-on.** WI-03 shipped (v2.2.0). **Optional.** Operators can query `user_agent.original` directly. + +--- + +### WI-post-03c β€” unify and delete `MdcPropagation.java` + +**Goal.** The v2.2.0 changelog records `MdcPropagation` as `@Deprecated(since="2.2.0", forRemoval=true)` with ~110 production call-sites retained. After WI-04 (deletes 19 group-package callers), WI-06 (deletes 25 cooldown-package callers), WI-08 (deletes 5 npm-adapter callers), and WI-post-03d below (deletes the ~55 handler callers), the deprecated class will have zero production callers. This task finishes the job: delete the class, delete `MdcPropagationTest.java`, update the changelog. + +**Files modified.** Nothing β€” this is a pure deletion WI. + +**Files deleted.** +- `pantera-core/src/main/java/com/auto1/pantera/http/trace/MdcPropagation.java` +- `pantera-core/src/test/java/com/auto1/pantera/http/trace/MdcPropagationTest.java` + +**Tests required.** +- Grep test: `rg 'MdcPropagation' --glob '*.java'` returns 0 matches. +- Full reactor build succeeds after deletion (catches any transitive import that was missed). + +**DoD.** +``` +$ rg 'MdcPropagation' --glob '*.java' +# expect: 0 matches + +$ mvn -T8 install test +# expect: BUILD SUCCESS + +$ git log --oneline | head -1 | grep 'MdcPropagation' +# expect: a commit message starting with "refactor(observability): delete MdcPropagation..." +``` + +**Depends-on.** WI-04, WI-06, WI-08, WI-post-03d all shipped. + +--- + +### WI-post-03d β€” migrate the 11 Vert.x API handlers to a `ContextualExecutor`-wrapped worker pool + +**Goal.** The 11 Vert.x API handlers (`AdminAuthHandler`, `ArtifactHandler`, `AuthHandler`, `CooldownHandler`, `DashboardHandler`, `PypiHandler`, `RepositoryHandler`, `RoleHandler`, `SettingsHandler`, `StorageAliasHandler`, `UserHandler`) under `pantera-main/src/main/java/com/auto1/pantera/api/v1/` collectively account for the majority (~55) of the remaining `MdcPropagation.` call-sites. Each handler currently wraps a `Callable` with `MdcPropagation.withMdc(...)` before handing it to a Vert.x blocking-worker dispatcher. Replace the per-call wrapping with a single handler-level `ContextualExecutor`-wrapped worker pool so every blocking handler inherits ThreadContext + APM span propagation automatically. + +**Files new.** +- `pantera-main/src/main/java/com/auto1/pantera/api/v1/HandlerExecutor.java` β€” shared `ContextualExecutor`-wrapped worker pool, constructed once at server start, injected into every handler. + +**Files modified.** +- Each of the 11 handlers β€” replace the `MdcPropagation.withMdc(callable)` wrapping with direct submission to the injected `HandlerExecutor`. The callable itself need not change; only the submission path does. +- Unit tests for each handler β€” assert a parallel submission sees the caller's ThreadContext (new test per handler). + +**Files deleted.** Every `MdcPropagation.` call-site in the 11 handler files. + +**Tests required.** +- Per-handler contextual-propagation test (11 new tests). +- Grep test: `rg 'MdcPropagation\.' pantera-main/src/main/java/com/auto1/pantera/api/v1/ --glob '*.java'` returns 0 matches. + +**DoD.** +``` +$ rg 'MdcPropagation\.' pantera-main/src/main/java/com/auto1/pantera/api/v1/ --glob '*.java' +# expect: 0 matches + +$ mvn -T8 -pl pantera-main test -DfailIfNoTests=false +# expect: BUILD SUCCESS, 929 + ~11 new tests green +``` + +**Depends-on.** WI-02 shipped (v2.2.0). Benefits from WI-04 (`GroupResolver` has already adopted `ContextualExecutor`, so the pattern is proven). + +--- + +## Other concerns carried forward from the v2.2.0 review (out-of-scope bugs / P0 items for future sessions) + +These are observations from the v2.2.0 Wave-1-2-3 reviews. None are correctness-blocking for v2.2.0 merge, but each needs an owner in a future session: + +### C2 β€” `IntegrityAuditor.computeDigests` materialises the whole primary into memory + +`ProxyCacheWriter.IntegrityAuditor.computeDigests` calls `storage.value(key).join().asBytes()` on the primary, which materialises the entire file (potentially 700 MB wheel, 50 MB jar) into a single `byte[]`. This is an offline admin tool invoked via `scripts/pantera-cache-integrity-audit.sh`, not a serve-path hot path, so the worst case is the admin's laptop running out of heap on a big artifact. Acceptable for v2.2.0 but worth replacing with a streamed `asInputStream()` + same chunked digest loop the online `streamPrimary` uses. ~20 LoC refactor; ship as part of WI-post-07 or WI-10. + +### C3 β€” `CachedProxySlice.verifyAndServePrimary` silently turns every exception into `notFound()` + +In `maven-adapter/src/main/java/com/auto1/pantera/maven/http/CachedProxySlice.java:412`, the outer `.exceptionally` block catches any throwable and returns 404 with a WARN log. A real transient storage failure (disk full, I/O error on `storage.exists`) would surface to the client as a misleading 404 rather than a 503. The Fault taxonomy from WI-01 is exactly the right vehicle to fix this β€” return `Result` and let `FaultTranslator` produce `StorageUnavailable β†’ 500` or `Internal β†’ 500`. Ship as part of WI-04 when `GroupResolver` wires the Fault path end-to-end, or earlier if needed. **The same pattern exists in the three WI-post-07 adapters shipped in v2.2.0** (`pypi-adapter`, `go-adapter`, `composer-adapter`) β€” the wiring inherited the same-exception-to-404 behaviour; WI-04 should fix all four at once. + +### C5 β€” `queue.add` annotation comment is enforced by convention, not tooling + +The acceptance grep `rg 'queue\.add\(' | rg -v '// ok:' = 0` only works because a human remembered to add the `// ok:` annotation to every unbounded-queue site. A future engineer adding a bounded queue + `add()` without the comment would silently regress WI-00. Fix options: (a) a PMD/Checkstyle rule rejecting `Queue.add` outside test code, or (b) a `@SuppressWarnings("QueueAdd")` custom annotation that tooling enforces. Ship as a small safety-net item when WI-10 lands the release-gate script. + +### C6 β€” audit logger is only non-suppressible by convention + +`StructuredLogger.audit()` writes to logger `com.auto1.pantera.audit`. The bundled `pantera-main/src/main/resources/log4j2.xml` does NOT declare a dedicated `` block β€” the logger inherits from `com.auto1.pantera` at INFO. That means if an operator drops `com.auto1.pantera` to WARN (common during noisy-investigation sessions), audit events are suppressed. **Fix:** add a dedicated logger element with `additivity="false"` and an explicit `level="info"` that cannot be reduced, plus a regression test that configures the root at ERROR and asserts audit events still fire. Ship as part of WI-10 (release-gate verification) or as a standalone config-fix patch in v2.2.1. diff --git a/docs/analysis/v2.2-target-architecture.md b/docs/analysis/v2.2-target-architecture.md new file mode 100644 index 000000000..152a3e18c --- /dev/null +++ b/docs/analysis/v2.2-target-architecture.md @@ -0,0 +1,1609 @@ +# Pantera 2.2 β€” Target Architecture Design + +**Audience:** engineering leadership + implementation agents (each section is executable once the approach is signed off) +**Goal:** take the current 5.4 / 10 enterprise-architecture score to **β‰₯ 9 / 10** while preserving 100% of the user-visible behavior contract. +**Inputs:** +- `v2.1.3-post-deploy-analysis.md` β€” forensic analysis (tactical F1.x-F4.x) +- `v2.1.3-architecture-review.md` β€” anti-patterns A1-A20, cross-cutting C1-C6, strategic refactors R1-R9 +- User-stated logical requirements (group resolution flow + 3-tier logging model) +- Runtime constraints: 1k-3k req/s concurrent, 3M artifacts, 15 vCPU / 24 GiB RAM / 14g heap per container, Java 21 + +--- + +## 0. Executive summary β€” what changes + +1. **One reactive stack.** `CompletableFuture` + Java 21 structured concurrency for blocking IO; delete RxJava2 from every hot path. Closes anti-patterns A9, A10, A15, A17. +2. **One fault taxonomy.** A sealed `Fault` type flows through every slice; a single `FaultClassifier` + `FaultTranslator` own HTTP-status policy. Closes A2, A3, A11, A20 and satisfies the user's "500 on DB-down / 500 on all-proxy-fail" policy from one place. `AllProxiesFailed` carries the winning (= least-bad) proxy `Response` so the client still gets the upstream body β€” see Β§3.1 and Β§9. +3. **One context, ECS-native, propagated automatically.** Elastic APM `Context` + Log4j2 `ThreadContext` (MDC) replace the 446-LOC `MdcPropagation` helper. `EcsLayout` owns field naming (`trace.id`, `span.id`, `transaction.id`, `user.name`, `client.ip`, `package.*`, `url.*`). Every async hop snapshots + restores ThreadContext inside a `ContextualExecutor`. Closes A13, A14, C4. +4. **One single-flight utility.** Caffeine `AsyncCache` replaces the three hand-rolled coalescers (`inFlightFanouts`, `RequestDeduplicator`, `inFlightMetadataFetches`). Closes A6, A7, A8, A9. +5. **Negative cache promoted to the top of the hot path**, keyed per the user's spec `scope:repoType:artifactName:artifactVersion`; checked **before** the DB index and **before** every proxy fanout, fed by every confirmed 404, **invalidated synchronously from the upload path** so the ordering is safe (Β§5.3). Satisfies the user's logic requirement. +6. **Per-repo bulkheads.** Each repository owns its drain pool, HTTP client pool, DB semaphore, metrics tag set. One bad repo cannot starve the others. Closes A5, A16, A19, C3. +7. **Five-tier structured logging** β€” Tier-1 clientβ†’pantera (access), Tier-2 panteraβ†’pantera (internal 500 only), Tier-3 panteraβ†’upstream (5xx only), Tier-4 local operations (policy-driven), **Tier-5 audit** (artifact publish / download / delete / resolution β€” compliance stream, separate retention). Level policy enforced at the builder; required fields enforced by phantom-typed builders. Satisfies the user's logging model. +8. **Proxy cache integrity** β€” atomic primary + sidecar write with digest verification on cache-write, eliminates the Maven `.pom.sha1` mismatches observed in production (Β§9.5). +9. **Release gates:** adapter-level SLOs, burn-rate alerts, CI perf baseline, concurrency property tests, chaos injection. Closes C5, C6. +10. **Implementation model.** Ships as a new minor release **v2.2.0** (not a sprint-timed rollout). Each work item below is an agent-executable unit with a machine-checkable definition-of-done; agents pick up, execute, self-review, and iterate in a code-then-review loop until DoD is met. Parallelism allowed on items without `depends-on` arrows. + +End state: **~4000 lines of net code deletion** across the group/proxy/cache/MDC helpers, replaced with **~1500 lines of core abstractions** used by every adapter. + +--- + +## 1. Design principles (the 9/10 charter) + +| # | Principle | Implication | +|---|--------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------| +| 1 | **One reactive/concurrency primitive on the hot path.** | `CompletionStage` + Java 21 `StructuredTaskScope` for fanout; Vert.x event-loops for HTTP IO. No RxJava2. | +| 2 | **Faults are data, not exceptions, on request paths.** | Every slice returns `CompletionStage` where `Result = Response | Fault`. Exceptions are reserved for programming errors. | +| 3 | **One classifier, one translator.** | `FaultClassifier` decides what kind of fault this is; `FaultTranslator` decides what HTTP status + headers + body to emit. | +| 4 | **No per-call-site observability boilerplate.** | Context propagates automatically; loggers fetch required fields from context. | +| 5 | **Every async op has a deadline; the deadline propagates end-to-end.** | `Deadline.in(Duration.ofSeconds(10)).bind()` β€” DB, member calls, storage reads all see the same deadline. | +| 6 | **Bounded everything, explicit overflow policy.** | Every queue, every executor, every in-flight map has a capacity + a documented overflow behavior (drop, block, reject). | +| 7 | **Per-repo blast radius.** | Each repo's resources (HTTP pool, drain pool, DB semaphore, negative cache partition) are independent. | +| 8 | **Serve from cache without touching the network.** | Negative cache β†’ DB index β†’ storage are all fast, local paths. Network IO only on real cache miss. | +| 9 | **Observability cost is ≀ 1% of CPU budget.** | Structured logs, sampled traces, rate-limited WARN/ERROR. Stack traces only for Faults classified `INTERNAL`. | +| 10| **Every regression is a test first.** | Concurrency property tests for coalescers, chaos tests for DB/member/storage failures, perf baseline in CI. | + +--- + +## 2. Target request flow β€” Group resolution (the user's logic, formalized) + +``` + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ GroupResolver.resolve(req, ctx) β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό + STEP 1: NegativeCache.isKnown404 PARSE artifact name + version STEP 4: All paths + (scope=group, key=repo:type: from URL via ArtifactNameParser end in emit() + name:version) (nullable version for metadata with a Result + ────────────────────────── endpoints). (Response | Fault) + hit? return 404 [PATH A] that goes through + FaultTranslator. + + β”‚ miss + β–Ό + STEP 2: ArtifactIndex.locateByName(name, deadline=ctx.deadline) + ────────────────────────────────────────────────────────────── + β€’ outcome = Hit(List) β†’ continue to STEP 3 + β€’ outcome = Miss β†’ continue to STEP 3' (proxy fanout) + β€’ outcome = Timeout / DBFailure β†’ return Fault.IndexUnavailable [PATH B β†’ 500] + + β”‚ Hit + β–Ό + STEP 3: StorageRead(members.first()).read(key, deadline) + ────────────────────────────────────────────────────────── + β€’ 2xx β†’ stream response to client [PATH OK] + β€’ NotFound (TOCTOU drift) β†’ fall through to STEP 3' + β€’ StorageFault β†’ return Fault.StorageUnavailable [PATH B β†’ 500] + + β”‚ Miss (STEP 2) OR TOCTOU (STEP 3) + β–Ό + STEP 3': Proxy fanout (only if group has proxy members) + ────────────────────────────────────────────────────── + if no proxy members β†’ NegativeCache.put + return 404 [PATH A] + SingleFlight.load(key): + StructuredTaskScope.race( proxyMember[i].get(req, deadline) ... ) + β€’ first Response(2xx) wins β†’ stream to client, cancel + drain others [PATH OK] + β€’ all β†’ NotFound β†’ NegativeCache.put + return 404 [PATH A] + β€’ any β†’ Fault(5xx/internal) AND no 2xx winner + β†’ return Fault.AllProxiesFailed [PATH B β†’ 500] + + STEP 4 (emit): + ────────── + FaultTranslator.translate(result, ctx) β†’ HttpResponse + ResponseWriter.stream(httpResponse, ctx) // single place that calls response.end() +``` + +### Status policy (user-specified, codified in `FaultTranslator`) + +| Cause (as `Fault`) | HTTP | Body / Headers | `X-Pantera-Fault` | Retry-After | +|----------------------------------------------|------|---------------------------------------------------------------------------------------------------|------------------------|-------------| +| `NotFound` | 404 | `"{\"error\":\"not found\"}"` | β€” | β€” | +| `Forbidden` (cooldown/auth) | 403 | adapter-specific | β€” | β€” | +| `IndexUnavailable` (DB down/timeout) | 500 | `"{\"error\":\"internal server error\"}"` | `index-unavailable` | β€” | +| `StorageUnavailable` | 500 | same | `storage-unavailable` | β€” | +| `AllProxiesFailed` (no 2xx winner) | **pass-through** | **streams the winning 5xx proxy `Response` verbatim** (status, headers, body) plus adds `X-Pantera-Fault` + `X-Pantera-Proxies` headers. Status defaults to 502 only if no proxy produced a response at all (all threw / cancelled). | `proxies-failed:` | β€” | +| `Internal` (NPE / queue-full / bug) | 500 | same | `internal` | β€” | +| `Deadline` (end-to-end deadline exceeded) | 504 | `"{\"error\":\"request timeout\"}"` | `deadline-exceeded` | β€” | +| `Overload` (bulkhead reject) | 503 | `"{\"error\":\"overloaded\"}"` | `overload` | `1` | + +> Notes: +> 1. **When `AllProxiesFailed` is (and isn't) constructed.** The fanout is a race-to-first-**success**. `AllProxiesFailed` is only constructed when **no proxy returned 2xx**: +> - any proxy β†’ 2xx β‡’ race-winner; cancel + drain the others; client sees the 2xx verbatim. `AllProxiesFailed` not constructed. +> - every proxy β†’ clean 404 β‡’ `NotFound` + populate group-scope negative cache. Client sees 404. `AllProxiesFailed` not constructed. +> - otherwise (at least one 5xx / timeout / exception, and no 2xx) β‡’ `AllProxiesFailed` with the full `MemberOutcome` list. +> 2. **`AllProxiesFailed` is pass-through.** If any proxy produced a `Response`, we forward that response verbatim β€” the client gets the upstream status, headers, and body. Added headers: `X-Pantera-Fault: proxies-failed:` plus `X-Pantera-Proxies-Tried: `. If every proxy threw / was cancelled / timed out (no `Response` to forward at all), we synthesize 502 with a generic body β€” this is the **only** path that synthesizes a status for `AllProxiesFailed`. +> 3. **Which proxy response wins when multiple proxies returned a 5xx.** Chosen by (in order): +> 1. **Retryability** β€” prefer the more retryable status so clients actually retry instead of giving up. Preference: `503 > 504 > 502 > 500 > other 5xx`. Rationale: if proxy A says "internal server error" (500, terminal) and proxy B says "service unavailable" (503, transient), the aggregated truth is "transient β€” retry". Telling the client 500 when *one* of the members is just temporarily unavailable wastes CI capacity. +> 2. **Has body** β€” prefer a response with a non-empty body over an empty one (diagnostic info). +> 3. **Declaration order** β€” among ties, earliest-declared member wins (deterministic, easy to reason about). +> 4. **`IndexUnavailable` / `StorageUnavailable` stay at 500** per the user's original requirement. `FaultTranslator` is the one file to edit if the policy ever shifts (e.g. flip to 503 to match `Overload`). +> +> **Worked examples:** +> +> | Fanout outcome | Client sees | +> |----------------------------------------------|-----------------------------------------------------------------------------| +> | Proxy A β†’ 200 Β· Proxy B β†’ 500 | 200 from A (race). B cancelled + drained. `AllProxiesFailed` not constructed. | +> | All proxies β†’ 404 | 404 + group-scope neg cache populated. `AllProxiesFailed` not constructed. | +> | Proxy 1 β†’ 500 Β· Proxy 2 β†’ 503 | **503 from Proxy 2, verbatim** (503 beats 500 by retryability). | +> | Proxy 1 β†’ 502 Β· Proxy 2 β†’ 500 | **502 from Proxy 1, verbatim** (502 beats 500 by retryability). | +> | Proxy 1 β†’ 404 Β· Proxy 2 β†’ 500 | 500 from Proxy 2, verbatim (mixed β†’ `AllProxiesFailed`; only Proxy 2 has a Response). | +> | Proxy 1 β†’ 503 (empty) Β· Proxy 2 β†’ 503 (JSON) | 503 from Proxy 2, verbatim (same status β†’ with-body beats no-body). | +> | Proxy 1 β†’ ConnectException Β· Proxy 2 β†’ 500 | 500 from Proxy 2, verbatim (only Proxy 2 produced a Response). | +> | Every proxy threw / cancelled / timed out | Synthesized 502 with `X-Pantera-Fault: proxies-failed:none-responded`. | + +--- + +## 3. Core types + +All in `pantera-core` so every adapter uses the same vocabulary. + +### 3.1 `Fault` β€” sealed sum type + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/fault/Fault.java +package com.auto1.pantera.http.fault; + +public sealed interface Fault { + + /** 404: artifact does not exist in this scope. */ + record NotFound(String scope, String artifact, String version) implements Fault {} + + /** 403: cooldown, auth rejected, or explicit block. */ + record Forbidden(String reason) implements Fault {} + + /** 500: index/DB unavailable (timeout, connection failure, statement timeout). */ + record IndexUnavailable(Throwable cause, String query) implements Fault {} + + /** 500: storage read failed (IO error, ValueNotFoundException on sidecar, etc). */ + record StorageUnavailable(Throwable cause, String key) implements Fault {} + + /** + * No 2xx winner across proxy members. Carries the outcomes AND the winning + * proxy `Response` (if any proxy produced one) so `FaultTranslator` can + * stream it verbatim to the client. When no member produced a Response + * at all (all threw / cancelled / timed out), `winningResponse` is empty + * and `FaultTranslator` synthesizes a 502. + */ + record AllProxiesFailed( + String group, + java.util.List outcomes, + java.util.Optional winningResponse + ) implements Fault { + public record ProxyFailure(String memberName, Response response) {} + } + + /** 500: programming error, NPE, queue overflow, classifier default. */ + record Internal(Throwable cause, String where) implements Fault {} + + /** 504: end-to-end deadline exceeded. */ + record Deadline(java.time.Duration budget, String where) implements Fault {} + + /** 503: bulkhead / rate limiter rejected. Carries suggested retry-after. */ + record Overload(String resource, java.time.Duration retryAfter) implements Fault {} + + record MemberOutcome(String member, Kind kind, Throwable cause) { + public enum Kind { OK, NOT_FOUND, FIVE_XX, EXCEPTION, CANCELLED, CIRCUIT_OPEN } + } +} +``` + +### 3.2 `Result` β€” success or fault + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/fault/Result.java +public sealed interface Result { + record Ok(T value) implements Result {} + record Err(Fault fault) implements Result {} + + static Result ok(T value) { return new Ok<>(value); } + static Result err(Fault f) { return new Err<>(f); } + + default Result map(java.util.function.Function fn) { + return this instanceof Ok ok ? ok(fn.apply(ok.value)) : (Result) this; + } + + default Result flatMap(java.util.function.Function> fn) { + return this instanceof Ok ok ? fn.apply(ok.value) : (Result) this; + } +} +``` + +The slice contract becomes: + +```java +public interface Slice { + CompletionStage> handle(Request req, RequestContext ctx); +} +``` + +Exceptions inside a slice body that escape to `.exceptionally(...)` are **only** converted to `Fault.Internal` β€” they are never the primary fault-signaling mechanism. This closes **A1, A2, A3** at the type level. + +### 3.3 `RequestContext` β€” immutable per-request envelope, ECS-native + +Pantera emits logs in **Elastic Common Schema (ECS)** format via `co.elastic.logging.log4j2.EcsLayout` and propagates the request trace via the **Elastic APM agent's** transaction/span API. OpenTelemetry is *not* introduced β€” the APM agent already captures `trace.id`/`span.id`/`transaction.id` and the existing `EcsLayout` reads them from Log4j2 `ThreadContext` (MDC). The doc uses "Context" to mean the APM transaction + MDC snapshot, not an OTel `Context` object. + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/context/RequestContext.java +public record RequestContext( + String traceId, // ECS: trace.id β€” from ElasticApm.currentTransaction() + String transactionId, // ECS: transaction.id β€” from APM + String spanId, // ECS: span.id β€” current span + String httpRequestId, // ECS: http.request.id β€” unique per HTTP request + // (X-Request-ID header, else UUIDv7) + String userName, // ECS: user.name β€” "anonymous" if not auth'd + String clientIp, // ECS: client.ip β€” XFF β†’ X-Real-IP β†’ remote + String userAgent, // ECS: user_agent.original + String repoName, // ECS: repository.name (Pantera custom field) + String repoType, // ECS: repository.type + ArtifactRef artifact, // ECS: package.name + package.version + String urlOriginal, // ECS: url.original + String urlPath, // ECS: url.path + Deadline deadline // wall-clock deadline; not emitted, carried through +) { + public record ArtifactRef(String name, String version) { + public static final ArtifactRef EMPTY = new ArtifactRef("", ""); + public boolean isEmpty() { return name.isEmpty(); } + } + + /** Push every field into Log4j2 ThreadContext so EcsLayout emits them automatically. */ + public AutoCloseable bindToMdc() { ... } + + /** Pull back into a record from the current ThreadContext (used on thread hops). */ + public static RequestContext fromMdc() { ... } +} +``` + +Constructed once by `EcsLoggingSlice` at request entry (after auth resolution). `bindToMdc()` is called inside a try-with-resources so the ThreadContext is scoped to the lifetime of the Vert.x request on the **event-loop** thread; thread hops go through `ContextualExecutor` (Β§4.4) which snapshots + restores on behalf of the caller. This closes **A13** and satisfies the user's ECS constraint. + +### 3.4 `Deadline` β€” end-to-end budget + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/context/Deadline.java +public record Deadline(long expiresAtNanos) { + public static Deadline in(Duration d) { + return new Deadline(System.nanoTime() + d.toNanos()); + } + public Duration remaining() { + return Duration.ofNanos(Math.max(0, expiresAtNanos - System.nanoTime())); + } + public boolean expired() { return remaining().isZero(); } + /** For JDBC setQueryTimeout(int seconds), HTTP client setTimeout, etc. */ + public Duration remainingClamped(Duration max) { + final Duration rem = remaining(); + return rem.compareTo(max) > 0 ? max : rem; + } +} +``` + +Default `Deadline.in(Duration.ofSeconds(30))` at request entry; each layer may shrink (never extend) it. + +### 3.5 `MemberSlice` β€” sealed routing target + +```java +public sealed interface MemberSlice permits HostedMember, ProxyMember { + String name(); + String repoType(); + CompletionStage> get(Request req, RequestContext ctx); +} +public record HostedMember(String name, String repoType, Storage storage, ArtifactIndex idx) + implements MemberSlice { ... } +public record ProxyMember(String name, String repoType, Storage cache, HttpClient upstream, + AutoBlockRegistry cb, NegativeCache neg, BulkheadLimits limits) + implements MemberSlice { ... } +``` + +Proxy vs. hosted was a runtime boolean on the current `MemberSlice`; promoting it to a sealed type lets the `GroupResolver` pattern-match instead of branching on booleans. Closes **A4**. + +--- + +## 4. Observability model β€” the five-tier contract + +The user's logging spec, formalized as a `StructuredLogger` with **enforced required fields**. The Audit tier (Tier-5) is a distinct compliance stream β€” different retention, different consumers β€” and is called out separately so its log-policy and schema can't drift into the operational tiers. + +### 4.1 Logger tiers + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/observability/StructuredLogger.java +public final class StructuredLogger { + + /** Tier-1: client β†’ Pantera (access log). 4xx / 5xx / >5s slow only β€” 2xx at DEBUG. */ + public static final class AccessLogger { ... } + + /** Tier-2: Pantera β†’ Pantera (group β†’ member, group β†’ storage, slice β†’ slice). 500 only. */ + public static final class InternalLogger { ... } + + /** Tier-3: Pantera β†’ upstream (HTTP client call to npmjs.org / maven central / etc). 5xx + throw only. */ + public static final class UpstreamLogger { ... } + + /** Tier-4: local ops (DB, Quartz, Caffeine, Valkey, storage). Policy-driven level. */ + public static final class LocalLogger { ... } + + /** + * Tier-5: AUDIT stream β€” a deliberately minimal, compliance-focused trail. + * + *

Closed action enum (exactly four): + * {@code ARTIFACT_PUBLISH | ARTIFACT_DOWNLOAD | ARTIFACT_DELETE | RESOLUTION}. + * + *

Required fields (compile-time, phantom-typed builder): + * ‒ {@code client.ip} — from where (the source) + * ‒ {@code user.name} — who ("anonymous" if no auth, never null) + * ‒ {@code event.action} — what (one of the four actions) + * ‒ {@code package.name} + {@code package.version} (+ {@code package.checksum} when known) + * — on what object + * ‒ {@code trace.id} — for linking the audit event back to the client→pantera + * request it originated from (investigation chain) + * + *

Distinct from the 4 operational tiers because audit events are: + * β€’ compliance-sensitive (separate Elastic dataset, 90 d retention) + * β€’ always emitted, independent of operational log.level (no suppression under load) + * + *

Operational events (cache_write, cache_invalidate, pool_init, etc.) are NOT audit β€” + * they belong in Tier-4 LocalLogger. + * + *

Backed by the existing AuditLogger (pantera-core/audit/AuditLogger.java). + * This class is the typed entry point that enforces the minimal schema above at + * compile time. + */ + public static final class AuditLogger { ... } +} +``` + +Each tier has a **builder that requires** the tier-specific fields by type: + +```java +accessLogger.forRequest(ctx) // RequestContext required + .status(503) + .body("Upstream temporarily unavailable") + .fault(fault) // attaches error.message, error.type, error.stack_trace + .warn(); // level inferred from status: 5xxβ†’error, 4xxβ†’warn + +internalLogger.forCall(ctx, member) // RequestContext + member name required + .fault(fault) // 500-only path only logs if fault is present + .error(); // compile-time enforced: only error level + +upstreamLogger.forUpstream(ctx, upstreamUri) // RequestContext + upstream URI required + .responseStatus(502) + .duration(durationMs) + .cause(throwable) + .error(); + +localLogger.forComponent("com.auto1.pantera.index") + .message("executor queue saturated β€” caller-runs applied") + .reqCtx(ctx) // optional β€” if the local op is request-linked + .warn(); + +auditLogger.forEvent(ctx, AuditAction.ARTIFACT_PUBLISH) // RequestContext + action enum required + .packageName("com.fasterxml:jackson-core") // required β€” "on what" + .packageVersion("2.16.1") // required β€” "on what" + .packageChecksum("sha256:…") // optional (known on PUBLISH/DOWNLOAD, unknown on RESOLUTION) + .emit(); // always emits at INFO on the audit dataset, + // regardless of operational log-level settings +// client.ip, user.name, trace.id are taken from `ctx` β€” the builder reads the +// RequestContext and refuses to compile if any of those fields is missing. +``` + +The closed action enum: + +```java +// pantera-core/src/main/java/com/auto1/pantera/audit/AuditAction.java +public enum AuditAction { + ARTIFACT_PUBLISH, // upload / deploy / push of an artifact + ARTIFACT_DOWNLOAD, // successful serve of an artifact to a client + ARTIFACT_DELETE, // explicit delete via API or admin + RESOLUTION // metadata/index lookup that resolved a concrete coordinate +} +``` + +Anything else β€” cache writes, negative-cache invalidations, pool initialisations, drain-queue drops β€” **is not audit**. Those go through `LocalLogger` (Tier-4) with `LevelPolicy.LOCAL_*`. + +### 4.2 Level policy (enforced, table-driven) + +Every successful operation has a DEBUG hook. When DEBUG is enabled (e.g. `log.level=DEBUG` on `com.auto1.pantera.*` for a narrow component), the full trace of 2xx paths and internal happy-paths lands in logs β€” useful for investigation of "why did this specific request take 400 ms?". In production with INFO+ only, DEBUG events are suppressed for zero runtime cost. + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/observability/LevelPolicy.java +public enum LevelPolicy { + + // Tier-1 β€” client β†’ pantera + CLIENT_FACING_SUCCESS /* 2xx / 3xx */ (Level.DEBUG), + CLIENT_FACING_NOT_FOUND /* 404 */ (Level.INFO), + CLIENT_FACING_UNAUTH /* 401 / 403 */ (Level.INFO), + CLIENT_FACING_4XX_OTHER /* 4xx other */ (Level.WARN), + CLIENT_FACING_5XX /* 5xx */ (Level.ERROR), + CLIENT_FACING_SLOW /* >5s */ (Level.WARN), + + // Tier-2 β€” pantera β†’ pantera (group β†’ member, group β†’ storage, slice β†’ slice) + INTERNAL_CALL_SUCCESS /* 2xx */ (Level.DEBUG), + INTERNAL_CALL_NOT_FOUND /* 404 */ (Level.DEBUG), + INTERNAL_CALL_500 /* 500 */ (Level.ERROR), + + // Tier-3 β€” pantera β†’ upstream (HTTP client call to npmjs.org / maven central / etc) + UPSTREAM_SUCCESS /* 2xx */ (Level.DEBUG), + UPSTREAM_NOT_FOUND /* 404 */ (Level.DEBUG), + UPSTREAM_5XX /* 5xx + threw */ (Level.ERROR), + + // Tier-4 β€” local operations (DB, Quartz, Caffeine, Valkey, storage, drain, pool init) + LOCAL_CONFIG_CHANGE /* config load, pool init */ (Level.INFO), + LOCAL_OP_SUCCESS /* routine ops */ (Level.DEBUG), + LOCAL_DEGRADED /* fallback, shed, retry */ (Level.WARN), + LOCAL_FAILURE /* op failed */ (Level.ERROR), + + // Tier-5 β€” audit (always emitted regardless of operational log levels) + AUDIT_EVENT /* ARTIFACT_PUBLISH | DOWNLOAD | DELETE | RESOLUTION */ + (Level.INFO); + // … +} +``` + +This encodes the user's rules: +- **Every successful operation has a DEBUG hook** (Tier-1 2xx, Tier-2 2xx, Tier-3 2xx, Tier-4 op-success) β€” enable per-component for investigation, off in normal production. +- Client 404 β†’ INFO (not WARN) β€” fixes the 2.4M WARN/12h noise. +- Internal 500 β†’ ERROR with stack + trace.id (Tier-2). +- Upstream 5xx β†’ ERROR with stack + trace.id + destination + duration (Tier-3). +- Local ops β†’ mapped by semantic usefulness, not by library defaults. +- Audit β†’ always at INFO on the audit dataset, cannot be suppressed. + +### 4.3 Required-field enforcement + +The tier builders return **phantom-typed** builders so the compiler catches missing fields: + +```java +// compile-time error: missing trace.id / url +accessLogger.forRequest(null).status(500).error(); + +// OK +accessLogger.forRequest(ctx).status(500).fault(fault).error(); +``` + +No `EcsLogger.error("…")` can be called from a request path without a `RequestContext` β€” closes **A13** and satisfies the user's "we must be able to link it to one of clientβ†’pantera request". + +### 4.4 Context propagation (retire `MdcPropagation`) + +Implemented with the **existing** stack β€” Elastic APM agent (for `trace.id`/`span.id` lifecycle) + Log4j2 `ThreadContext` (for ECS field propagation). No new library: + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutor.java +public static Executor contextualize(Executor delegate) { + return task -> { + // 1) snapshot ThreadContext (MDC) on the caller thread β€” holds the ECS fields + final java.util.Map ctx = org.apache.logging.log4j.ThreadContext.getImmutableContext(); + // 2) snapshot the current APM span so the new thread joins the same transaction + final co.elastic.apm.api.Span apmSpan = co.elastic.apm.api.ElasticApm.currentSpan(); + delegate.execute(() -> { + final java.util.Map prior = org.apache.logging.log4j.ThreadContext.getImmutableContext(); + org.apache.logging.log4j.ThreadContext.clearMap(); + org.apache.logging.log4j.ThreadContext.putAll(ctx); + try (co.elastic.apm.api.Scope s = apmSpan.activate()) { + task.run(); + } finally { + org.apache.logging.log4j.ThreadContext.clearMap(); + org.apache.logging.log4j.ThreadContext.putAll(prior); + } + }); + }; +} +``` + +Wire this once at: +- Vert.x event-loop registration (for blocking `executeBlocking` handlers) +- The drain `ThreadPoolExecutor` +- The `DbArtifactIndex` executor +- The Quartz worker thread factory +- Any `ForkJoinPool` used on the hot path (consider swapping `ForkJoinPool.commonPool()` for a contextual one via `CompletableFuture.defaultExecutor()`) + +Every `CompletableFuture.supplyAsync(..., ctxExecutor)`, every `.thenApplyAsync(..., ctxExecutor)`, every `executor.submit` now propagates ECS fields and the APM span transparently. Loggers read from `ThreadContext`. **`MdcPropagation.withMdc*` is deleted** (446 LOC gone; ~25 call-sites per hot adapter simplified). Closes **A14, C4**. + +--- + +## 5. Negative cache β€” composite-key design, one source of truth + +### 5.0 Scope and single-source-of-truth + +The negative cache is **one service, shared by every repo type**: +- **Local / hosted** repos (e.g. `libs-release-local`) β€” cache 404 for `(local, type, name, version)` combinations that the DB index confirmed absent. +- **Proxy** repos (e.g. `npm_proxy`) β€” cache 404 for `(proxy, type, name, version)` combinations that the upstream confirmed absent. +- **Group** repos (e.g. `npm_group`) β€” cache 404 for `(group, type, name, version)` combinations where every member (hosted + proxy) confirmed absent. + +Each repo type reads/writes its own scope partition, but there is **ONE `NegativeCache` bean per JVM**, backed by a shared L1 Caffeine and the shared Valkey L2. No separate caches per adapter, no parallel `NegativeCacheRegistry` + per-slice fields. + +Current v2.1.3 has **three** negative-cache instances drifting around: `GroupSlice.negativeCache`, `BaseCachedProxySlice.negativeCache`, `CachedNpmProxySlice.negativeCache`. WI-06's acceptance criteria includes collapsing these into a single injected bean; a grep for `new NegativeCache(` after the PR must return zero production matches (only tests). + +### 5.1 Key schema (per user spec) + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheKey.java +public record NegativeCacheKey( + String scope, // repo name β€” hosted/local, proxy, OR group + String repoType, // "maven" | "npm" | "pypi" | "go" | "docker" | "composer" | "helm" | "rpm" | "gem" | "debian" | "hex" | "file" + String artifactName, // "@scope/pkg", "org.springframework.cloud:spring-cloud-config", … + String artifactVersion // "2.2.2.RELEASE", "1.2.3"; "" for metadata queries +) { + public String flat() { return scope + ':' + repoType + ':' + artifactName + ':' + artifactVersion; } +} +``` + +### 5.2 Cache topology and configuration + +``` +L1 Caffeine AsyncCache + scope-partitioned bookkeeping for metrics: + pantera.neg_cache.hits{scope=...}, .misses, .size + +L2 Valkey Redis protocol, key = "neg:" + flat() + shared across all Pantera instances + pipelined GETs, MGET for batch warm + +Stampede guard Caffeine.asyncCache lookup semantics + coalesces concurrent isKnown404 calls for the same key + β†’ one L2 fetch per key per window +``` + +**Configuration model** β€” answering the user's "global vs per-tier" question: + +- **Per-tier** (L1 / L2) is the right granularity. The two tiers have different failure and pressure profiles: L1 is per-JVM memory, L2 is a shared cluster with its own eviction policy and network cost. One TTL would tilt the system one way or the other. +- **Per-scope override** is supported β€” a specific group or proxy can opt into shorter/longer TTLs via YAML if it's known-volatile (e.g. SNAPSHOT-heavy repos can set `neg_cache.l1.ttl: 30s`). +- **Sensible defaults** β€” fall back to these when YAML is silent: + +```yaml +# pantera-main configuration (YAML) +meta: + caches: + repo-negative: # one cache, shared by hosted / proxy / group scopes + l1: + max_size: 100_000 # entries per JVM (sized for all scopes combined) + ttl: 5m # TTL at L1 + l2: + enabled: true + max_size: 10_000_000 # entries across the Valkey cluster + ttl: 1h # TTL at L2 + valkey_host: valkey-ng.prod.auto1 + valkey_db: 3 + # optional per-scope override β€” any repo name (hosted, proxy, or group): + per_scope: + libs-snapshot-local: # hosted snapshot repo β€” volatile, shorter TTL + l1: { ttl: 30s } + l2: { ttl: 5m } + npm_proxy: # proxy β€” longer TTL, upstream is stable + l1: { ttl: 10m } + l2: { ttl: 4h } +``` + +Key renamed from `group-negative` β†’ `repo-negative` (the original name was misleading β€” the cache is NOT group-specific; it serves hosted, proxy, and group scopes uniformly). `NegativeCacheConfig` already separates L1 and L2 fields (`DEFAULT_L1_MAX_SIZE`, `DEFAULT_L1_TTL`, `DEFAULT_L2_MAX_SIZE`, `DEFAULT_L2_TTL`) β€” the rewrite retains that shape, adds the `per_scope` map, renames the YAML section. + +### 5.3 Position in the flow β€” BEFORE the DB index (and why it's safe) + +Per the user's preference ("check before database index search") β€” this is the correct choice **because the upload path owns synchronous invalidation**, so the ordering cannot serve a false 404 for a real artifact: + +``` +1. NegativeCache.isKnown404(groupScope, type, name, ver) ───── 0.05 ms p99 (L1 hit), 1-3 ms p99 (L2 hit) + ↓ hit ─► 404 immediately (PATH A) + ↓ miss +2. ArtifactIndex.locateByName(name) ─── 5-15 ms p99 (DB hit), 500 ms cap on timeout + ↓ Hit ─► targeted storage read + ↓ Miss ─► PATH 3 (proxy fanout) + ↓ Error ─► Fault.IndexUnavailable (PATH B β†’ 500) + +3. Proxy fanout + For each proxy member: + 3a. NegativeCache.isKnown404(memberScope, type, name, ver) + ↓ hit ─► skip this member + ↓ miss ─► member.get(req) + 3b. Race-to-first; cancel losers on winner + 3c. All members exhausted AND all responses ∈ {404, skipped} ─► populate neg-cache + at both memberScope (each contributing member) AND groupScope (short-circuit next time) + 3d. All members exhausted AND any Fault / any 5xx ─► Fault.AllProxiesFailed (PATH B β†’ 500) +``` + +**Why this ordering is safe:** + +| Concern | Mitigation | +|----------------------------------------------|-------------------------------------------------------------------------------------------------| +| "Someone uploads a new version β€” neg cache stale β€” false 404" | Upload path (`UploadSlice`, `PackagePublish`, etc.) invalidates matching keys **synchronously before acknowledging the upload**. See Β§5.5. | +| "Neg cache drifts from DB state over time" | L1 TTL 5 min bounds the window. L2 TTL 1 h bounded by cluster-wide invalidation batch on upload. | +| "DB has a correct answer we'd miss" | Neg cache only holds `repo:type:name:version` combinations we've **actively confirmed** as 404 within the TTL window. A DB answer that changes is either a new upload (invalidation fires) or an externally-mutated record (admin API β€” Β§5.5). | +| "Upload β†’ query race for very fast CI flows" | Upload invalidates neg cache **before** inserting the DB row. Read path: neg-cache miss β†’ DB hit β†’ serve. Correct by construction. | + +**Why not DB first?** + +The user legitimately asked whether DB-first is safer. Trade-off: +- DB-first is ~5–15 ms per 404 (DB statement latency at p99). Under the observed 404-heavy Maven multi-repo probe pattern (Β§2.5 of the forensic report: 400k+ 404s/repo/12 h), neg-cache-first saves **~60 cores of DB load** during peak. +- DB-first also means every 404 pays the 500 ms worst-case `locateByName` statement timeout if the DB is degraded β€” the exact condition under which we most want to shed load. +- Neg-cache-first + synchronous upload-invalidation preserves correctness while avoiding the DB-degrade amplification. + +**This ordering assumes synchronous invalidation is wired.** WI-06 in the implementation plan fails acceptance if the upload-path invalidation test (Β§5.5) does not pass. + +### 5.4 Write policy + +```java +// when a final NotFound is confirmed: +onNotFound(ctx) { + negCache.put(NegativeCacheKey(ctx.repoName, ctx.repoType, ctx.artifact.name, ctx.artifact.version)); +} + +// when a member returns 404 during fanout: +onMember404(member, ctx) { + negCache.put(NegativeCacheKey(member.name, ctx.repoType, ctx.artifact.name, ctx.artifact.version)); +} + +// NEVER write on 5xx β€” 5xx is transient +// NEVER write on Timeout / Overload +``` + +### 5.5 Invalidation (the guard that makes Β§5.3 safe) + +Every event that creates a new artifact-version in Pantera MUST invalidate the negative cache for that `(scope, type, name, version)` across **every scope that could have cached it**, and MUST do so synchronously β€” i.e. the upload ACK is not sent until the invalidation has reached L1 and L2. + +**What triggers invalidation:** + +| Event | Trigger point | Scopes invalidated | +|-------------------------------|-------------------------------------------------------------|-------------------------------------------------------| +| Upload (Maven deploy, npm publish, pypi upload, docker push, helm push, …) | `UploadSlice.response` (and every adapter's publish/push slice) β€” *before ACK* | `{ repoUploadedTo } βˆͺ { every group containing that repo as member }` | +| Proxy fetch β†’ 200 (artifact landed in proxy cache for the first time) | `ProxyCacheWriter.writeWithSidecars` after atomic move | `{ proxyName } βˆͺ { every group containing that proxy }` | +| Admin API `POST /api/v1/admin/neg-cache/invalidate` | manual ops | as specified in the request | +| Admin UI (Β§5.6) | manual ops | as specified in the form | + +**Upload-path invalidation (canonical):** + +```java +// UploadSlice (and every adapter's equivalent publish/push slice) β€” at the top +// of response(), before the body is streamed to storage OR acknowledged: +CompletableFuture invalidateNegCacheOnPublish(RequestContext ctx) { + final String name = ctx.artifact().name(); + final String version = ctx.artifact().version(); + final String type = ctx.repoType(); + final String uploadedTo = ctx.repoName(); + + // Scopes that could serve this artifact: the repo itself + every group that + // includes it as a member. Resolved from the RepoConfig graph at startup and + // kept up-to-date by `SliceResolver.refresh()` on config change. + final List scopes = repoGraph.scopesContainingMember(uploadedTo); + + return negCache.invalidateBatch( + scopes.stream() + .map(scope -> new NegativeCacheKey(scope, type, name, version)) + .toList() + ); // completes when L1 AND L2 (Valkey MDEL) have both completed +} +``` + +**Acceptance rules (WI-06 DoD):** + +1. No upload ACK is sent before `invalidateNegCacheOnPublish(...)` completes. +2. Race-condition test: (a) write neg-cache entry for `(group, type, A, v1)`; (b) publish `A@v1` to a member of the group; (c) the very next GET via the group serves 200 β€” never 404. Must pass 1,000 / 1,000 iterations. +3. Proxy-cache-write invalidation test: a proxy fetches an artifact that was previously in neg-cache for that proxy; next request serves the cached artifact, not the stale 404. +4. After cutover, `pantera.neg_cache.stale_404` Kibana query (404 serves within 10 s of a successful upload for the same coordinate) returns 0 hits over a 24-h window. + +Upload latency grows by ~1 Valkey round-trip (~1 ms on the shared VPC). Acceptable given the correctness benefit. + +**TTL is a backstop, not the primary correctness mechanism.** L1 5 min, L2 1 h. If invalidation fails for any reason, TTL eventually corrects β€” but the SLO target is "zero stale 404s within 10 s of upload", which requires synchronous invalidation. + +### 5.6 Admin UI β€” negative-cache inspection and invalidation + +Ops / platform engineers need to inspect and manually invalidate the negative cache when investigating reports of "I just published X but clients get 404". Rather than SSH + curl against the admin REST endpoint, add a UI page. + +**Placement:** `pantera-ui` β†’ `Admin` β†’ `Negative cache` tab. + +**Features:** + +1. **Inspector** β€” paginated list of entries in L1 for the local JVM, filterable by `scope`, `repoType`, `artifactName`, `version`. Column for `ttl_remaining` and which tier (`L1 only` / `L1+L2`). L2-only entries fetched via a small probe endpoint (`GET /api/v1/admin/neg-cache/probe?key=...`). +2. **Single-key invalidation** β€” form fields `scope`, `repoType`, `artifactName`, `version`; POST to `/api/v1/admin/neg-cache/invalidate` with the exact key. Shows "invalidated" confirmation with the count of tiers affected. +3. **Pattern invalidation** β€” fields `scope?`, `repoType?`, `artifactName?`, `version?` β€” any absent field becomes a wildcard. POSTs `/api/v1/admin/neg-cache/invalidate-pattern`. Shows affected-count. Gated behind an "I'm sure" confirm (a wildcard-all can invalidate millions of L2 entries). +4. **Stats** β€” per-scope hit/miss/size counters (read from `/api/v1/metrics` Prometheus endpoint, scoped to `pantera.neg_cache.*{scope=...}`). Shows which scopes have the highest 404 rate (useful when a CI pipeline is probing non-existent artifacts and flooding neg-cache). +5. **Audit** β€” every invalidation emits a Tier-4 Local WARN (`component=com.auto1.pantera.cache`, `event.action=neg_cache_invalidate`, `manual=true`, `user.name=`, `trace.id=`). This is NOT Tier-5 audit (cache ops are operational, see Β§4.1), but it is surfaced in a dedicated admin-actions Kibana dashboard. + +**Security:** +- Both REST endpoints require the admin role (same role as existing `POST /api/v1/admin/repositories`). +- Pattern invalidation logged at Local WARN with the admin's `user.name`. +- Rate-limited per admin user (10 pattern-invalidations per minute) to prevent an admin from accidentally DOS-ing L2 Valkey. + +**API contracts:** + +``` +GET /api/v1/admin/neg-cache β†’ paginated L1 dump +GET /api/v1/admin/neg-cache/probe?key= β†’ "present" | "absent" + tiers hit +POST /api/v1/admin/neg-cache/invalidate + body: { scope, repoType, artifactName, version } + β†’ { invalidated: { l1: 1, l2: 1 } } +POST /api/v1/admin/neg-cache/invalidate-pattern + body: { scope?, repoType?, artifactName?, version? } + β†’ { invalidated: { l1: N, l2: M } } +GET /api/v1/admin/neg-cache/stats β†’ per-scope counters +``` + +**Implementation work item:** WI-06b (scoped separately from the core WI-06 so backend and frontend can ship at different cadences). + +### 5.6 Safety + +- `NegativeCacheKey` includes version, so publishing v1.2.4 does not invalidate lookups for v1.2.3. Matches the user's spec. +- Short TTL for L1 (5 min) bounds the inconsistency window if a key is missed on invalidation. +- L2 Valkey unavailability β†’ degrade to L1-only silently + emit `pantera.neg_cache.l2_fallback` counter (F3.2 in the forensic report). + +--- + +## 6. Concurrency model β€” pick one, wire it once + +### 6.1 Contract + +```java +public interface Slice { + CompletionStage> handle(Request req, RequestContext ctx); +} +``` + +- One reactive type on the hot path: `CompletionStage` (JDK-native, no dependency). +- Faults live inside `Result` β€” `.exceptionally(...)` handlers map to `Fault.Internal` and are a last line of defense only. +- **No RxJava2 on hot paths.** `hu.akarnokd.rxjava2.interop.SingleInterop` + `io.reactivex.Maybe` usages in `DownloadAssetSlice`, `BaseCachedProxySlice`, `NpmProxy` are migrated to `CompletionStage`. + +### 6.2 Thread model + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Vert.x event loops β”‚ HTTP IO (accept, parse, write). Non-blocking. β”‚ +β”‚ (2 Γ— vCPU = 30 threads)β”‚ NEVER run DB or storage reads. β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Per-repo worker pool β”‚ Storage reads, JDBC, Valkey. One per repo, bounded β”‚ +β”‚ (4 threads, queue 100) β”‚ with AbortPolicy β†’ Fault.Overload β†’ 503 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Shared drain pool β”‚ 16 threads, queue 2000, AbortPolicy (A19 fix β€” β”‚ +β”‚ (per-JVM, tagged) β”‚ reject synchronously closes body on caller thread).β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Quartz scheduler β”‚ Proxy event drain, periodic jobs. 8 threads. β”‚ +β”‚ β”‚ Does NOT share capacity with request path. β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Virtual threads β”‚ Optional: for SHA-256 checksum compute on upload. β”‚ +β”‚ (unbounded, bounded by β”‚ Allows blocking digest calls on a thread each. β”‚ +β”‚ a Semaphore to 50) β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### 6.3 Fanout via `StructuredTaskScope` (Java 21) + +```java +// pantera-main/src/main/java/com/auto1/pantera/group/GroupResolver.java +private CompletionStage> proxyFanout( + List members, Request req, RequestContext ctx) { + + return CompletableFuture.supplyAsync(() -> { + try (var scope = new StructuredTaskScope.ShutdownOnSuccess>()) { + List>> forks = members.stream() + .map(m -> scope.fork(() -> + m.get(req, ctx).toCompletableFuture().join() + )) + .toList(); + scope.joinUntil(ctx.deadline().expiresAt()); + Result winner = scope.result(); // first success + // loser subtasks: scope.fork() ensures their resources are released + return winner; + } catch (StructuredTaskScope.Deadline | InterruptedException e) { + return Result.err(new Fault.Deadline(ctx.deadline().total(), "proxy-fanout")); + } catch (ExecutionException e) { + return classify(e.getCause(), ctx); + } + }, ctx.executorForRepo(ctx.repoName())); +} +``` + +`StructuredTaskScope.ShutdownOnSuccess` handles cancellation of losers natively and propagates the winner. The loser responses' bodies are drained by a registered `shutdown()` hook that streams to `/dev/null` on the shared drain pool. + +> **Note:** `StructuredTaskScope` is preview in Java 21 β€” finalized in Java 25. If production cannot rely on preview features, substitute `CompletableFuture.anyOf(members.map(m -> m.get(req, ctx)))` with explicit cancellation via `future.cancel(true)` in a `.whenComplete` hook. Migration friction: low. + +### 6.4 Single-flight β€” one utility + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/resilience/SingleFlight.java +public final class SingleFlight { + + private final AsyncLoadingCache cache; + + public SingleFlight(Duration inflightTtl, int maxInFlight, Executor executor) { + this.cache = Caffeine.newBuilder() + .expireAfterWrite(inflightTtl) // zombie eviction (A8) + .maximumSize(maxInFlight) + .executor(executor) + .buildAsync((k, e) -> { throw new UnsupportedOperationException(); }); + } + + /** Load-or-join: concurrent calls for the same key share one `loader.get()` invocation. */ + public CompletableFuture load(K key, Supplier> loader) { + return cache.get(key, (k, e) -> loader.get().toCompletableFuture()); + } + + public void invalidate(K key) { cache.synchronous().invalidate(key); } +} +``` + +- `GroupResolver.proxyFanout` uses `SingleFlight>` for its in-flight coalescer. +- `BaseCachedProxySlice.fetchFromUpstream` uses the same for its dedup. +- `MavenGroupSlice.fetchMetadata` uses the same. + +One utility, one test suite, one set of invariants. Closes **A6, A7, A8, A9**. + +--- + +## 7. Per-repo bulkheads + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/resilience/RepoBulkhead.java +public final class RepoBulkhead { + private final String repo; + private final Semaphore inFlight; // max concurrent requests per repo + private final Executor workerPool; // per-repo pool for blocking ops + private final RateLimiter rateLimiter; // Failsafe or Guava + private final AutoBlockRegistry breaker; // per-upstream circuit + private final NegativeCache.Scope negCache; + private final Storage.Scope storage; + private final DrainPool.Scope drain; + + public CompletionStage> run(Supplier>> op) { + if (!inFlight.tryAcquire()) { + return CompletableFuture.completedFuture( + Result.err(new Fault.Overload(repo, Duration.ofSeconds(1)))); + } + if (!rateLimiter.tryAcquire()) { + inFlight.release(); + return CompletableFuture.completedFuture( + Result.err(new Fault.Overload(repo, Duration.ofSeconds(1)))); + } + return op.get() + .whenComplete((r, e) -> inFlight.release()); + } +} +``` + +Every repository gets exactly one `RepoBulkhead` at start-up. The `SliceResolver` injects it into the repo's `ProxyMember` / `HostedMember`. One misbehaving repo consumes only its own resources. + +Closes **A5, A16, A19, C3**. + +--- + +## 8. Group resolver β€” canonical implementation sketch + +```java +// pantera-main/src/main/java/com/auto1/pantera/group/GroupResolver.java +public final class GroupResolver implements Slice { + + private final String group; + private final String repoType; + private final List hostedMembers; + private final List proxyMembers; + private final ArtifactIndex index; + private final NegativeCache negCache; + private final SingleFlight> inFlight; + private final RepoBulkhead bulkhead; + + @Override + public CompletionStage> handle(Request req, RequestContext ctx) { + + // Parse artifact ref; for metadata endpoints, name="", version="". + ArtifactRef ref = ArtifactNameParser.parse(repoType, req.path()) + .orElse(ArtifactRef.EMPTY); + RequestContext scoped = ctx.with(repoName, repoType, ref); + + return bulkhead.run(() -> resolve(req, scoped)); + } + + private CompletionStage> resolve(Request req, RequestContext ctx) { + + // 1. Negative cache (group scope) + if (!ctx.artifact().isEmpty()) { + var key = new NegativeCacheKey(group, repoType, + ctx.artifact().name(), ctx.artifact().version()); + if (negCache.isKnown404(key)) { + StructuredLogger.access.forRequest(ctx).status(404).fault(new NotFound(group, ...)).log(); + return CompletableFuture.completedFuture(Result.err(new NotFound(group, ...))); + } + } + + // 2. DB index + return index.locateByName(ctx.artifact().name(), ctx.deadline()) + .thenCompose(outcome -> switch (outcome) { + case Hit h -> readHosted(h.members(), req, ctx); + case Miss m -> proxyFanoutPath(req, ctx); + case Timeout | DBFailure f -> CompletableFuture.completedFuture( + Result.err(new IndexUnavailable(f.cause(), f.query()))); + }); + } + + private CompletionStage> readHosted( + List members, Request req, RequestContext ctx) { + + // STEP 3: try the targeted member's storage. On TOCTOU drift, fall through. + return members.get(0).storage().read(req.key(), ctx.deadline()) + .thenCompose(res -> switch (res) { + case Ok ok -> CompletableFuture.completedFuture((Result) ok); + case Err err when + err.fault() instanceof NotFound -> proxyFanoutPath(req, ctx); + case Err err -> CompletableFuture.completedFuture((Result) err); + }); + } + + private CompletionStage> proxyFanoutPath(Request req, RequestContext ctx) { + if (proxyMembers.isEmpty()) { + negCache.put(new NegativeCacheKey(group, repoType, + ctx.artifact().name(), ctx.artifact().version())); + return CompletableFuture.completedFuture( + Result.err(new NotFound(group, ctx.artifact().name(), ctx.artifact().version()))); + } + var key = new NegativeCacheKey(group, repoType, + ctx.artifact().name(), ctx.artifact().version()); + return inFlight.load(key, () -> doProxyFanout(req, ctx)); + } + + private CompletionStage> doProxyFanout(Request req, RequestContext ctx) { + // Filter out members whose (memberScope) negative cache already says 404. + List candidates = proxyMembers.stream() + .filter(m -> !negCache.isKnown404(new NegativeCacheKey(m.name(), repoType, + ctx.artifact().name(), ctx.artifact().version()))) + .toList(); + if (candidates.isEmpty()) { + negCache.put(new NegativeCacheKey(group, repoType, ctx.artifact().name(), ctx.artifact().version())); + return CompletableFuture.completedFuture(Result.err(new NotFound(group, ...))); + } + // Race-to-first with structured concurrency, deadline-bounded. + return RaceToFirst.race(candidates, m -> m.get(req, ctx), ctx.deadline()) + .thenApply(outcomes -> { + Optional> winner = outcomes.stream() + .filter(o -> o instanceof Ok) + .findFirst(); + if (winner.isPresent()) return winner.get(); + boolean all404 = outcomes.stream().allMatch(o -> + o instanceof Err e && e.fault() instanceof NotFound); + if (all404) { + negCache.put(new NegativeCacheKey(group, repoType, + ctx.artifact().name(), ctx.artifact().version())); + return Result.err(new NotFound(group, ctx.artifact().name(), ctx.artifact().version())); + } + return Result.err(new AllProxiesFailed(group, toMemberOutcomes(outcomes))); + }); + } +} +``` + +Every branch terminates in a `Result`. The final HTTP translation happens **exactly once**, in the top-level `FaultTranslator.translate(...)` called by the Vert.x binding. + +--- + +## 9. Error translation β€” single site + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/fault/FaultTranslator.java +public final class FaultTranslator { + + public static Response translate(Fault fault, RequestContext ctx) { + return switch (fault) { + case NotFound nf -> ResponseBuilder.notFound().build(); + case Forbidden fb -> ResponseBuilder.forbidden(fb.reason()).build(); + case IndexUnavailable iu -> internalErrorWithTag("index-unavailable"); + case StorageUnavailable su -> internalErrorWithTag("storage-unavailable"); + case AllProxiesFailed apf -> passThroughProxy(apf); // ← amended: stream the proxy's own 5xx + case Internal i -> internalErrorWithTag("internal"); + case Deadline d -> ResponseBuilder.gatewayTimeout() + .header("X-Pantera-Fault", "deadline-exceeded").build(); + case Overload ov -> ResponseBuilder.serviceUnavailable() + .header("Retry-After", String.valueOf(ov.retryAfter().toSeconds())) + .header("X-Pantera-Fault", "overload:" + ov.resource()).build(); + }; + } + + /** + * Pass through the "best" proxy 5xx response verbatim. If no proxy produced + * a Response at all, synthesize a plain 502 β€” this is the only AllProxiesFailed + * path that invents a status code. + */ + private static Response passThroughProxy(AllProxiesFailed apf) { + if (apf.winningResponse().isPresent()) { + final var w = apf.winningResponse().get(); + return ResponseBuilder.from(w.response().status()) + .headers(w.response().headers()) + .header("X-Pantera-Fault", "proxies-failed:" + w.memberName()) + .header("X-Pantera-Proxies-Tried", String.valueOf(apf.outcomes().size())) + .body(w.response().body()) // stream the body β€” not buffered + .build(); + } + // No Response at all (everyone threw / cancelled / timed out) β€” synthesize 502. + return ResponseBuilder.badGateway() + .header("X-Pantera-Fault", "proxies-failed:none-responded") + .header("X-Pantera-Proxies-Tried", String.valueOf(apf.outcomes().size())) + .jsonBody("{\"error\":\"all upstream members failed\"}") + .build(); + } + + public static Fault classify(Throwable t, String where) { + Throwable cause = unwrap(t); + return switch (cause) { + case java.util.concurrent.TimeoutException te -> new Deadline(Duration.ZERO, where); + case java.net.ConnectException ce -> new Internal(ce, where); + case java.io.IOException ioe -> new Internal(ioe, where); + case com.auto1.pantera.asto.ValueNotFoundException vnf + -> new StorageUnavailable(vnf, vnf.getMessage()); + case java.lang.IllegalStateException ise + when "Queue full".equals(ise.getMessage()) -> new Overload("event-queue", Duration.ofSeconds(1)); + default -> new Internal(cause, where); + }; + } + + /** + * Pick the "best" proxy response to pass through when all fail. Ranking: + * 5xx with a non-empty body > 5xx with empty body > any other Response. + * Within a rank, earliest member in declaration order wins. + */ + public static java.util.Optional + pickWinningFailure(java.util.List outcomes) { + // Implementation: stable-sort by rank; first element with a Response wins. + // Members whose outcome is EXCEPTION/CANCELLED/CIRCUIT_OPEN contribute no Response. + // Full logic in the production class + unit tested (WI-01 DoD). + ... + } +} +``` + +One file. One decision point for "what HTTP status comes out". One place to change the policy. + +Closes **A2, A3, A20, C1**. The `AllProxiesFailed` pass-through also honors the HTTP semantics that the upstream's *own* 5xx response body carries useful diagnostic info (e.g. npmjs.org's `{"error":"..."}` JSON), which we were previously discarding. + +--- + +## 9.5 Proxy cache integrity β€” sidecar pairing for Maven + +### Symptom observed in production + +``` +[WARNING] Could not validate integrity of download from + https://artifactory.prod.auto1.team/artifactory/releases/com/fasterxml/oss-parent/58/oss-parent-58.pom +org.eclipse.aether.transfer.ChecksumFailureException: Checksum validation failed, + expected '15ce8a2c447057a4cfffd7a1d57b80937d293e7a' (REMOTE_EXTERNAL) + but is actually '0ed9e5d9e7cad24fce51b18455e0cf5ccd2c94b6' +``` + +Maven Aether fetched `oss-parent-58.pom` and `oss-parent-58.pom.sha1` **from Pantera**; the `.sha1` sidecar content and the SHA-1 of the `.pom` bytes **disagree**. `REMOTE_EXTERNAL` means Maven pulled the expected checksum from Pantera (the remote repo) β€” so our cached pair is inconsistent. + +### Why this happens after v2.1.3 (ranked by likelihood) + +1. **SWR refetch overwrites primary without coupled sidecar refetch.** Commit `cc883735 feat(proxy): stale-while-revalidate for binary artifact cache` fires a background refetch of the `.pom` on cache-hit. The `.pom.sha1` sidecar is served by a separate slice invocation and has no refetch coupling β€” when the bg refresh lands, the primary and sidecar diverge silently. +2. **Independent concurrent fetches** of `.pom` and `.pom.sha1`. Maven fetches both in quick succession. If Pantera's cache is cold, two independent `BaseCachedProxySlice.doFetch` invocations fire. The two upstream GETs can pick up different bytes if the upstream registry hiccups between them; each writes its own file to storage; pair is inconsistent. +3. **Partial-write on primary.** If the upstream streams the `.pom` and the connection drops at 99%, the cached `.pom` is truncated; the `.sha1` we cached refers to the full file. Pantera has no end-to-end verify. +4. **TOCTOU eviction** similar to the 02:01 outlier (`NoSuchFileException: …meta.meta`) β€” one of the pair gets evicted, the other remains, next fetch re-populates only the evicted one from a different upstream version. +5. **Upstream content drift.** Maven Central *should not* mutate released artifacts; in practice it has been observed for small set of historical POMs. Low base rate but real. + +### Target design β€” invariant + +> **For any cached artifact A in Pantera's storage, the `.md5 / .sha1 / .sha256 / .sha512` sidecars MUST hash the exact bytes of A stored at that instant. An inconsistency must be either prevented on write or healed on detect.** + +### Implementation β€” `ProxyCacheWriter` + +A new single-source-of-truth class that wraps `Storage.save(...)` for proxy adapters: + +```java +// pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheWriter.java +public final class ProxyCacheWriter { + + /** + * Fetch primary + ALL sidecar digests from upstream in one concurrent batch, + * verify, then atomically move into the cache. If any sidecar disagrees with + * the bytes we just downloaded, we fail the write and emit Tier-3 ERROR. + */ + public CompletionStage> writeWithSidecars( + final Key primaryKey, + final Supplier> fetchPrimary, + final java.util.Map>>> fetchSidecars, + final RequestContext ctx + ) { + // 1. Stream the primary into a temp file while computing ALL digests in one pass. + // Uses parallel MessageDigest accumulators fed from the same byte-buffer stream. + // No full-body buffering β€” NIO SeekableByteChannel into a temp path. + // 2. Fetch each sidecar (small, usually < 100 bytes). + // 3. For each sidecar present, assert hex(sidecar) == computed[algo]. + // On disagreement: write NOTHING to cache, return Err(Fault.UpstreamIntegrity). + // 4. On agreement: atomically move primary + sidecars from temp to cache + // in a single rename-or-mkdir-swap operation provided by FileStorage. + // 5. Emit Tier-4 LocalLogger event (component=com.auto1.pantera.cache, + // event.action=cache_write, outcome=success, package.checksum=sha256, + // pantera.cache.sidecars=[list]). cache_write is operational, NOT audit. + } +} +``` + +Wiring: + +- **Maven adapter** (`maven-adapter/.../CachedProxySlice`, `BaseCachedProxySlice` maven specialization): + - Replace every `storage.save(...)` on primary/sidecar with `ProxyCacheWriter.writeWithSidecars(...)`. + - `fetchSidecars` accepts the full set `{md5, sha1, sha256, sha512}`; each one is fetched only if the upstream returns 200 on HEAD (most upstreams only serve `.sha1` + `.md5`). +- **SWR coherence** β€” `BaseCachedProxySlice.refetchStale(...)` calls `writeWithSidecars(...)` as a single unit. A failed verification leaves the existing cache pair in place (serve-stale behavior unchanged); the failure increments `pantera.proxy.cache.integrity_failure{repo}` counter and emits Tier-3 ERROR. +- **Single-flight coalescing** β€” primary + sidecars coalesce under ONE `SingleFlight` key keyed on `artifactBaseName:version` (not URL path), so concurrent `.pom` and `.pom.sha1` requests for the same artifact share one `writeWithSidecars` invocation. + +### Healing stale pairs + +One-time admin tool + periodic job: + +``` +scripts/pantera-cache-integrity-audit.sh [--repo libs-release-local] [--fix] +``` + +Scans cached artifacts, recomputes digest, compares to cached sidecar. On drift: +- `--dry-run` (default): report only, emits Tier-4 WARN per offender. +- `--fix`: evict the affected pair (both primary + sidecars), let the next request repopulate via `writeWithSidecars`. + +Ships as part of WI-07. + +### New `Fault` case + +```java +/** 5xx from upstream OR internal integrity failure on proxy cache write. */ +record UpstreamIntegrity( + String upstreamUri, + ChecksumAlgo algo, + String sidecarClaim, + String computed +) implements Fault {} +``` + +`FaultTranslator.translate` maps `UpstreamIntegrity` β†’ 502 with `X-Pantera-Fault: upstream-integrity:`. Client retries; a subsequent fetch will either find consistent upstream bytes or keep failing (at which point ops is paged). + +--- + +## 10. Logging discipline (codified) + +### 10.1 Log lines required per tier (the user's spec, extended with Audit) + +``` +Tier-1 (client β†’ pantera) ALWAYS for 4xx / 5xx / >5s slow. 2xx emitted only at DEBUG. + Required fields: trace.id, transaction.id, request.id, user.name, client.ip, + user_agent.*, http.request.method, http.response.status_code, + url.original, url.path, package.name, package.version, + event.duration + Stack trace: when Fault is Internal / StorageUnavailable / IndexUnavailable + Level: LevelPolicy.CLIENT_FACING_{NOT_FOUND|UNAUTH|4XX_OTHER|5XX|SLOW} + +Tier-2 (pantera β†’ pantera) ONLY when emitted fault is 500 (tier is suppressed otherwise). + Required fields: trace.id (= parent client trace.id), span.id, internal.source (group), + internal.target (member), package.name, package.version, + error.type, error.message, error.stack_trace + Level: LevelPolicy.INTERNAL_CALL_500 (ERROR) + +Tier-3 (pantera β†’ upstream) ONLY when upstream returns 5xx or throws, OR on integrity failure. + Required fields: trace.id (= client trace.id), span.id, destination.address, + destination.port, url.full, http.request.method, + http.response.status_code, event.duration, + error.type, error.message, error.stack_trace + Level: LevelPolicy.UPSTREAM_5XX (ERROR) + +Tier-4 (pantera local) Policy-driven (config changes INFO; degradation WARN; failures ERROR). + Required fields: component, event.action, message; + trace.id (if request-linked, taken from ThreadContext) + Stack trace: on LevelPolicy.LOCAL_FAILURE + Level: LevelPolicy.LOCAL_{CONFIG|OP_SUCCESS|DEGRADED|FAILURE} + +Tier-5 (AUDIT) ALWAYS emitted on one of FOUR actions. Minimal schema. + Cannot be suppressed by level config. + Closed action enum: ARTIFACT_PUBLISH | ARTIFACT_DOWNLOAD | ARTIFACT_DELETE | RESOLUTION + Required fields (exactly): @timestamp β€” when (RFC 3339, milli-precision; + emitted by EcsLayout at log-record + creation time β€” asserted in the contract + test so no path can emit without it) + client.ip β€” from where + user.name β€” who ("anonymous" if unauth'd; never null) + event.action β€” what (one of the four above) + package.name β€” on what + package.version β€” on what + trace.id β€” investigation chain back to Tier-1 + Optional fields: package.checksum β€” sha256 hex, when known (PUBLISH / DOWNLOAD) + event.outcome β€” "success" | "failure" (for uploads/deletes) + Routing: event.category = "audit" + data_stream.dataset = "pantera.audit" + Retention: 90 d (vs. 14 d for operational tiers) + Level: LevelPolicy.AUDIT_EVENT (INFO, never suppressed) + + NOTE: repository.*, event.dataset, client.geo.*, etc. are NOT required by the + audit tier. If they happen to be in MDC they are emitted by EcsLayout for + free; they are not part of the compliance contract. +``` + +### 10.2 Tier-1 = the access log + +Emitted **exactly once** by the Vert.x binding in a `whenComplete` wrapped around the entire slice pipeline, reading `Result` and `RequestContext`. No slice may emit an access log line. + +### 10.3 Tier-2 / Tier-3 = emitted by the caller, not the callee + +The internal logger fires when a slice records a `Fault` that escalates to 500. The upstream logger fires when the HTTP client's `Response` is 5xx or the call threw. Both pass through the tier-specific builder that reads `Context.current()` for the parent trace.id. + +### 10.4 Tier-4 = component-specific + +`LocalLogger.forComponent("com.auto1.pantera.index")` + a structured event taxonomy (`pool_init`, `queue_saturated`, `stmt_timeout`, `cache_evict`, …). Level from `LevelPolicy`. If a request context is active (via `Context.current()`), trace.id is attached automatically. + +### 10.5 What goes away + +- The 2.4M WARN/12h noise (all 404 β†’ INFO) β€” **95% log volume reduction**. +- "Not found" message at WARN β€” gone. +- "Repository not found in configuration" at WARN β€” β†’ INFO. +- "HttpClient@… idle timeout" ERROR β†’ Tier-4 DEBUG (library lifecycle). +- `log.level` JSON-format inconsistency β€” single formatter path now. +- `client.ip: null` β€” compile-time required. +- `trace.id: null` β€” compile-time required on Tier-1/2/3/5. +- Audit events with null `package.name` (the `W1` finding from commit cff45271) β€” compile-time required. + +--- + +## 11. Module / package structure + +``` +pantera-core/ +β”œβ”€β”€ http/ +β”‚ β”œβ”€β”€ fault/ NEW (Fault, Result, FaultClassifier, FaultTranslator) +β”‚ β”œβ”€β”€ context/ NEW (RequestContext, Deadline, ContextualExecutor β€” APM/ThreadContext-based) +β”‚ β”œβ”€β”€ resilience/ NEW (SingleFlight, RepoBulkhead, CircuitBreakerSlice, AutoBlockRegistry) +β”‚ β”œβ”€β”€ observability/ NEW (StructuredLogger with 5 tiers, LevelPolicy, +β”‚ β”‚ accessLogger/internalLogger/upstreamLogger/localLogger/auditLogger) +β”‚ β”œβ”€β”€ cache/ +β”‚ β”‚ β”œβ”€β”€ NegativeCache REWRITE (composite-key, L1+L2, per-tier + per-scope TTL/size) +β”‚ β”‚ β”œβ”€β”€ NegativeCacheKey NEW +β”‚ β”‚ β”œβ”€β”€ ProxyCacheWriter NEW (atomic primary+sidecar write with digest verification) +β”‚ β”‚ └── SingleFlight MOVED β†’ resilience/ +β”‚ β”œβ”€β”€ slice/ +β”‚ β”‚ β”œβ”€β”€ Slice API CHANGE: CompletionStage> +β”‚ β”‚ β”œβ”€β”€ EcsLoggingSlice REWRITE (delegates to StructuredLogger.access) +β”‚ β”‚ └── CircuitBreakerSlice MINOR (use Fault.Overload instead of synthetic 503) +β”‚ └── trace/ +β”‚ └── MdcPropagation DELETED (replaced by ContextualExecutor + ThreadContext) +β”‚ +β”œβ”€β”€ audit/ +β”‚ └── AuditLogger API CHANGE: accepts RequestContext + AuditAction enum; +β”‚ enforces required fields at compile time (Tier-5) +β”‚ +β”œβ”€β”€ async/ NEW +β”‚ β”œβ”€β”€ RaceToFirst (race-to-first with cancellation + drain) +β”‚ β”œβ”€β”€ StructuredFanout (wraps StructuredTaskScope for pre-Java 25) +β”‚ └── Deadlines (helpers) +β”‚ +pantera-main/ +β”œβ”€β”€ group/ +β”‚ β”œβ”€β”€ GroupResolver NEW name (replaces GroupSlice) +β”‚ β”œβ”€β”€ MemberSlice NOW sealed (HostedMember | ProxyMember) +β”‚ β”œβ”€β”€ ArtifactNameParser MINOR (return ArtifactRef instead of String) +β”‚ └── RoutingRule unchanged +β”‚ +β”œβ”€β”€ index/ +β”‚ β”œβ”€β”€ DbArtifactIndex REWRITE (AbortPolicy β†’ Fault.Overload, per-repo semaphore) +β”‚ └── ArtifactIndex API CHANGE: returns IndexOutcome sum type +β”‚ +β”œβ”€β”€ scheduling/ +β”‚ β”œβ”€β”€ MetadataEventQueues MINOR (queue.offer + DroppedCounter) +β”‚ └── DrainExecutor NEW (per-repo drain pools) +β”‚ +each *-adapter/ MIGRATE (RxJava2 β†’ CompletionStage; use Fault types) +``` + +Net: **~4 new packages**, **~3 deletions**, ~15 rewrites of existing classes. + +--- + +## 12. Implementation β€” agent-executable work items + +This ships as a new minor release **v2.2.0** (not a sprint-timed rollout). The items below are designed for a **code β†’ self-review β†’ reviewer-agent β†’ iterate** loop. Each item is: + +- **self-contained** β€” can be picked up without reading the others +- **fully specified** β€” DoD is machine-checkable (tests green, counters at expected values, contract assertions compile) +- **dependency-graphed** β€” `depends-on` lists only items whose merge is required before this one's merge + +Items without a `depends-on` arrow can proceed in parallel. Conflicts resolved by the reviewer agent on rebase. + +Notation: `WI-xx` = work item. A reviewer agent accepts a PR iff every `DoD` bullet is verified; otherwise returns to the author agent with the first unmet bullet as the next target. + +### WI-00 β€” Tactical hotfix (ships as v2.1.4, no architecture change) +**Goal:** stop the Queue-full cascade and the 404-WARN log flood without touching the architecture. +**Files changed (18 sites from forensic report Β§1.6):** +- `npm-adapter/.../DownloadAssetSlice.java:198, 288` β€” `queue.add β†’ queue.offer` +- 16 sibling sites in pypi, go, docker, helm, rpm, hex, nuget, composer adapters (exact list in forensic Β§1.6) +- `EcsLogEvent.java:319-329` β€” change 404β†’INFO, 401β†’INFO policy +- `com.auto1.pantera.settings` β€” "Repository not found in configuration" β†’ INFO +- `com.auto1.pantera.http.client` β€” "Idle timeout expired" β†’ DEBUG +- `DownloadAssetSlice.checkCacheFirst` β€” wrap every `queue.offer(...)` call inside `ifPresent(...)` in `try { ... } catch (Throwable t) { log; }` so background-queue failure cannot escape the serve path. +**Tests that MUST pass (DoD):** +- `DownloadAssetSliceQueueFullTest` (new): saturate the queue to 10 000 entries, then issue 50 concurrent cache-hit GETs β†’ expect 50 Γ— 200 and zero `Queue full` logs escaping. +- Load-soak: 15 min at 1 500 req/s mixed npm/maven β†’ zero `IllegalStateException: Queue full` in logs. +- Kibana query `log.level: WARN AND http.response.status_code: 404` returns 0 after cutover. +**depends-on:** none. + +### WI-01 β€” Fault + Result sum types +**Goal:** introduce `pantera-core/http/fault/` without behavior change; slices still work via a shim. +**Files new:** `Fault.java`, `Result.java`, `FaultClassifier.java`, `FaultTranslator.java`, `FaultTranslatorTest.java`. +**Files modified:** `Slice.java` gets a default method `default CompletionStage response(...) { return handle(...).thenApply(r -> FaultTranslator.translate(r, ctx)); }` β€” existing callers unchanged. `AllProxiesFailed` pass-through logic (Β§9) wired. +**Tests:** +- Exhaustive pattern-match test over every `Fault` subtype (one `case` per variant, a deliberately-broken case fails compilation). +- `FaultClassifier` round-trips: `ConnectException` β†’ `Internal`; `TimeoutException` β†’ `Deadline`; `ValueNotFoundException` β†’ `StorageUnavailable`; `IllegalStateException("Queue full")` β†’ `Overload`. +- `FaultTranslator.translate(AllProxiesFailed)` with a winning ProxyFailure β†’ response preserves the upstream status, headers, and body; `X-Pantera-Fault: proxies-failed:` is present. +**DoD:** all existing tests pass; new coverage β‰₯ 95% on the `fault` package. +**depends-on:** WI-00 shipped. + +### WI-02 β€” `RequestContext` + ECS/APM propagation +**Goal:** one immutable context envelope per request; propagates via `ContextualExecutor` + Log4j2 ThreadContext + APM. +**Files new:** `RequestContext.java`, `Deadline.java`, `ContextualExecutor.java`, `RequestContextTest.java`. +**Files modified:** `EcsLoggingSlice` β€” constructs the `RequestContext` at the request edge, attaches to ThreadContext, ensures APM span is active. +**Tests:** +- Contract test: after `EcsLoggingSlice`, a `CompletableFuture.supplyAsync(..., ctxExecutor)` invocation on a different thread sees all RequestContext fields in `ThreadContext.get(...)` and `ElasticApm.currentSpan().getTraceId()` equal to the caller's. +- Contract test: every access-log line emitted by `AccessLogger.forRequest(...)` has non-null `trace.id`, `transaction.id`, `client.ip` (when the LB provides XFF) β€” assertions enforced in a `CapturingAppender` harness. +**DoD:** existing `MdcPropagation` call-sites compile unchanged (they remain backed temporarily); contract tests green. +**depends-on:** WI-01. + +### WI-03 β€” `StructuredLogger` with 5 tiers, required fields enforced +**Goal:** phantom-typed builders for Tier-1 (access), Tier-2 (internal), Tier-3 (upstream), Tier-4 (local), Tier-5 (audit). +**Files new:** `StructuredLogger.java`, `LevelPolicy.java`, `AccessLogger.java`, `InternalLogger.java`, `UpstreamLogger.java`, `LocalLogger.java`, `AuditLogger.java` (rewrap). +**Files modified:** every `EcsLogger.warn(...).log()` / `EcsLogger.error(...).log()` call-site β€” migrated to the appropriate tier. +**Tests:** +- Compile test (negative): `accessLogger.forRequest(null).status(500).error();` β†’ does not compile. +- Contract test: 1 000 random request replays; every resulting log line passes a JSON-Schema check specific to its tier (trace.id non-null on T1/2/3/5, client.ip non-null on T1, error.stack_trace non-null on 5xx, event.action ∈ enum on T5, etc.). +- Audit test: `artifact_publish` emitted even when operational log level is set to ERROR β€” audit is non-suppressible. +**DoD:** tiered JSON-schema contract tests green; zero `EcsLogger` direct call-sites remain on request paths. +**depends-on:** WI-02. + +### WI-04 β€” `GroupResolver` (replaces `GroupSlice`) +**Goal:** rewrite the group layer to the flow in Β§2 with the sealed `MemberSlice` and `ArtifactIndex` that returns `IndexOutcome`. +**Files new:** `GroupResolver.java`, `HostedMember.java`, `ProxyMember.java`, `IndexOutcome.java`. +**Files modified:** `ArtifactIndex.locateByName` returns a `CompletionStage` (sealed `Hit | Miss | Timeout | DBFailure`), not `Optional>`. +**Files deleted:** the 6 overloaded `GroupSlice` constructors; `locate()` dead code; `AtomicBoolean/AtomicInteger/AtomicBoolean` fanout triad; `MdcPropagation.withMdc*` call-sites in the group package. +**Tests:** +- Full flow integration tests (one per Path A / B / OK outcome in Β§2). +- TOCTOU test: the 02:01 outlier scenario β€” index says artifact exists at member M; storage raises `ValueNotFoundException`; `GroupResolver` falls through to proxy fanout and succeeds. No 500 to client. +- Queue-full test: an upstream adapter returns `Fault.Overload` β€” group propagates `Overload` to client as 503 with Retry-After (not a synthesized 500). +- `AllProxiesFailed` pass-through test: three proxies all return a 5xx; client sees the best-ranked proxy's 5xx body verbatim. +**DoD:** all above tests green; `ops-infrastructure/` smoke tests against a local `pantera-main` container see identical behavior for the 200/404 happy paths. +**depends-on:** WI-01, WI-02, WI-03. + +### WI-05 β€” `SingleFlight` utility + consolidate 3 coalescers +**Goal:** one Caffeine-AsyncCache-based utility for request coalescing; delete the 3 hand-rolled implementations. +**Files new:** `pantera-core/http/resilience/SingleFlight.java` + tests. +**Files modified:** `GroupResolver` (uses `SingleFlight` for `proxyOnlyFanout` coalescer); `CachedNpmProxySlice` (replaces `RequestDeduplicator` usage); `MavenGroupSlice` (replaces `inFlightMetadataFetches`). +**Files deleted:** `inFlightFanouts` field + coalescer code in GroupSlice; `RequestDeduplicator.java`; `inFlightMetadataFetches` field + coalescer code in MavenGroupSlice. +**Tests:** +- Property test: N = 1 000 concurrent `load(k, loader)` calls for the same key β†’ `loader` invoked exactly once; all 1 000 callers receive the same value. +- Cancellation test: 100 callers; cancel 50 mid-load; remaining 50 get value; loader ran once. +- Zombie-eviction test: loader never completes; after `MAX_AGE_MS`, entry evicted; next `load(k, ...)` invokes loader again. +- StackOverflow-regression test: 500 followers waiting on a gate; leader completes β†’ no SOE under `thenComposeAsync`. +**DoD:** three coalescer implementations deleted; all above tests green. +**depends-on:** WI-01. + +### WI-06 β€” `NegativeCache` with composite key + single source of truth + synchronous upload invalidation +**Goal:** rewrite `NegativeCache` with `NegativeCacheKey`, per-tier config, per-scope override; collapse three parallel negative-cache instances into **one** injected bean used by hosted/proxy/group scopes; upload path invalidates synchronously across all scopes that could cache the artifact. +**Files new:** `NegativeCacheKey.java`. +**Files modified:** +- `NegativeCache.java` (rewrite β€” composite key, scope-partitioned metrics, shared across all adapters). +- `NegativeCacheConfig.java` (per-tier L1/L2 fields + per-scope override map). +- YAML config key renamed: `meta.caches.group-negative` β†’ `meta.caches.repo-negative` (legacy key read with a one-release deprecation warning, then removed in v2.3). +- Every `UploadSlice` / `PackagePublish` / equivalent β€” add `invalidateNegCacheOnPublish(ctx)` call *before* ACK. +- `BaseCachedProxySlice.java`, `GroupSlice.java`, `CachedNpmProxySlice.java` β€” delete their private `new NegativeCache(...)` constructions and accept an injected shared instance. +- `ProxyCacheWriter.java` (from WI-07) β€” invalidate neg-cache on first-time proxy cache write. +**Tests:** +- Ordering test: check-before-DB produces a 404 without a DB query when the key is in L1 neg cache. +- Single-source test: `rg 'new NegativeCache\(' pantera-core pantera-main npm-adapter maven-adapter pypi-adapter go-adapter docker-adapter composer-adapter helm-adapter rpm-adapter hex-adapter nuget-adapter file-adapter` returns zero production matches (test-only matches ignored). +- Invalidation race test: (a) write neg cache entry for `(group, type, A, v1)`; (b) publish `A@v1`; (c) next GET serves 200, never 404. 1 000 / 1 000 iterations. +- Proxy-cache-write invalidation test: proxy fetches artifact previously marked 404; next GET serves cached 200 not 404. +- L2 fallback: disable Valkey mid-test β†’ L1 continues to serve; counter `pantera.neg_cache.l2_fallback` increments. +- Per-scope TTL override: `libs-snapshot-local` L1 TTL = 30s respected (hosted); `npm_proxy` L1 TTL = 10m respected (proxy). +- 24-h staging soak: Kibana query "404 served within 10s of a successful upload for the same coordinate" returns 0 hits. +**DoD:** Maven multi-repo 404 storms (Β§2.5 of forensic) drop by β‰₯ 80% in a load-test; zero false 404s on post-upload reads; one `NegativeCache` bean visible in the DI wiring. +**depends-on:** WI-05. + +### WI-06b β€” Admin UI for negative-cache inspection and invalidation +**Goal:** add the UI panel + REST endpoints described in Β§5.6 so platform engineers can investigate "just-uploaded-but-clients-404" reports without SSH + curl. +**Files new (backend):** `NegativeCacheAdminResource.java` in `pantera-main/api/v1/admin/` β€” the five endpoints from Β§5.6. +**Files new (frontend):** `pantera-ui/src/pages/admin/NegativeCache.tsx` (or equivalent in the current UI framework) β€” inspector, single-key form, pattern form, stats dashboard. +**Files modified:** admin route registration + role-guard; `AuditLogger` wiring is **NOT** used here (invalidation is operational, not compliance β€” see Β§4.1). Tier-4 Local WARN is emitted with `user.name` of the admin. +**Tests:** +- REST contract tests for each endpoint (200 with admin role, 403 without). +- Pattern invalidation rate-limit test: 11th request within 1 min returns 429. +- UI Cypress / Playwright test: inspector loads, single-key form succeeds, pattern form requires confirm-click. +- Audit test: every invalidation emits a Tier-4 Local WARN with `event.action=neg_cache_invalidate`, `manual=true`, correct `user.name`. +**DoD:** On-call dashboard link β†’ page renders β†’ admin invalidates a known stale entry β†’ next GET serves correctly; rate-limit test green. +**depends-on:** WI-06 merged (UI calls the same `NegativeCache` bean). + +### WI-07 β€” `ProxyCacheWriter` + Maven checksum-integrity fix +**Goal:** atomic primary + sidecar write; eliminate `oss-parent-58.pom.sha1` mismatches (Β§9.5). +**Files new:** `ProxyCacheWriter.java`, `ProxyCacheWriterTest.java`, `scripts/pantera-cache-integrity-audit.sh`. +**Files modified:** `maven-adapter/.../CachedProxySlice`, `BaseCachedProxySlice` maven specialization β€” replace `storage.save(...)` with `writeWithSidecars(...)`. `SWR` refetch path also invokes `writeWithSidecars`. +**Tests:** +- Verification failure: upstream serves `.pom` bytes that don't match upstream `.sha1` β†’ write rejected, no cache entry created, Tier-3 ERROR emitted with `Fault.UpstreamIntegrity`. +- Atomicity: kill the write midway β†’ neither primary nor sidecar visible in cache. +- SWR coherence: seed stale primary + sidecar pair; trigger refetch; upstream serves new pair; both updated atomically. +- Audit tool: seed an intentionally-mismatched pair; `--dry-run` reports it; `--fix` evicts it; next GET re-populates correctly. +**DoD:** `ChecksumFailureException` does not appear in Maven client logs against a fresh Pantera build for any historically-affected artifact (`com.fasterxml/oss-parent`, `org.springframework.cloud/*` metadata); audit tool green. +**depends-on:** WI-01 (for `Fault.UpstreamIntegrity`). + +### WI-08 β€” Retire RxJava2 from hot paths +**Goal:** one reactive stack (`CompletionStage`) on every hot path. +**Files modified:** `DownloadAssetSlice`, `CachedNpmProxySlice`, `BaseCachedProxySlice`, `NpmProxy.getAsset`, `MavenProxy.getMetadata`, `PyProxySlice`, `ComposerProxyDownloadSlice`, `GoProxySlice`. +**Files deleted:** `hu.akarnokd.rxjava2.interop.SingleInterop` usages on hot paths; `body.asBytesFuture()` replaced with streaming where applicable. +**Tests:** existing integration tests remain green; new streaming test: GET a 50 MB artifact, assert peak heap does not grow by more than 10 MB (no full-body buffering). +**DoD:** `rg 'io.reactivex' pantera-main pantera-core *-adapter --glob '*.java'` returns only test files; `rg 'SingleInterop' ...` returns empty. +**depends-on:** WI-01, WI-04. + +### WI-09 β€” `RepoBulkhead` per repository +**Goal:** per-repo isolation for drain pool, HTTP client, DB semaphore, rate limiter. +**Files new:** `RepoBulkhead.java`, `BulkheadLimits.java` (record for YAML config). +**Files modified:** `SliceResolver` constructs and injects a `RepoBulkhead` per repo; every `MemberSlice` receives its repo's bulkhead. +**Files deleted:** static `DRAIN_EXECUTOR` + `DRAIN_DROP_COUNT` in `GroupSlice`. +**Tests:** +- Isolation test: saturate repo A's bulkhead with 1 k concurrent requests; repo B's latency unchanged. +- Metrics test: every bulkhead-related counter carries `{repo=...}` tag. +**DoD:** per-repo metrics visible in a Prometheus scrape; saturation in one repo does not affect others in a multi-repo load test. +**depends-on:** WI-04. + +### WI-10 β€” Release gates (SLO + CI perf baseline + chaos) +**Goal:** every future regression caught in CI, not in prod. +**Files new:** +- `docs/slo/{npm-proxy, npm-group, maven-proxy, maven-group, pypi-proxy, docker-pull, file-raw}.md` (one per adapter) +- `.github/workflows/perf-baseline.yml` +- `tests/perf-baselines/{adapter}.json` +- `tests/chaos/*.java` (tagged `@Chaos`) +- `tests/property/SingleFlightPropertyTest.java` +- `scripts/release-gate.sh` +**Tests:** CI workflow gates PRs on (a) ≀ 10% perf regression vs baseline, (b) all `@Chaos` tests passing in the nightly job, (c) SLO budget not-exceeded. +**DoD:** workflow runs green on a clean main; intentionally-introduced regression PR fails with a clear error. +**depends-on:** WI-04 through WI-09 merged (tests exercise the full target state). + +### Dependency graph + +``` +WI-00 ─┐ + β”‚ + β–Ό +WI-01 ─┬──► WI-02 ──► WI-03 ──► WI-04 ──► WI-09 ──► WI-10 + β”‚ β”‚ + β”œβ”€β”€β–Ί WI-05 ──► WI-06 ──► WI-06b + β”‚ β”‚ + β”œβ”€β”€β–Ί WI-07 β”‚ + β”‚ β”‚ + └─────────────► WI-08 β”€β”€β”€β”€β”€β”˜ +``` + +Parallelism opportunities (independent after WI-01): +- WI-05 (single-flight) + WI-07 (checksum integrity) can go in parallel with WI-02/03/04. +- WI-06b (admin UI) can start as soon as WI-06 is merged (backend + frontend ship independently). +- WI-08 (retire RxJava2) can start as soon as WI-04 is merged. +- WI-09 (bulkheads) can start as soon as WI-04 is merged. + +### Per-item review-loop protocol + +1. **Author agent** executes the item. Runs the DoD tests locally; writes a short PR description summarising what the DoD bullet that was hardest to meet. +2. **Reviewer agent** (spawned fresh, no conversation memory) reads the PR, runs the DoD checklist. If any bullet fails, comments on the PR with the first failing bullet and returns to author. +3. **Author agent** addresses the comment and re-pushes. Goto 2. +4. Loop terminates when reviewer agent finds all DoD bullets met. Reviewer agent merges. + +No time boxes; the loop terminates on correctness, not on deadline. + +--- + +## 13. Testing strategy + +### 13.1 Unit β€” per-class contract + +One test class per new core class. Rules: +- `FaultTranslator` has exhaustive pattern-match tests (one case per `Fault` subtype). +- `SingleFlight` has a property test: N concurrent `load(k, ...)` calls result in exactly one loader invocation. +- `NegativeCache` has TTL tests, L1/L2 degradation tests. +- `RepoBulkhead` has reject tests, timeout tests. + +### 13.2 Integration β€” Group resolver end-to-end + +With a real Postgres (Testcontainers), a real Valkey, a synthetic upstream (WireMock): +- Path A (negative cache hit) β†’ 404, no DB query, no member call. +- Path A (all proxy 404) β†’ 404, neg-cache populated on exit. +- Path B (DB timeout) β†’ 500, `X-Pantera-Fault: index-unavailable`. +- Path B (all proxy 5xx) β†’ 500, `X-Pantera-Fault: proxies-failed`. +- Path B (all proxy timeout) β†’ 504 (via Deadline), `X-Pantera-Fault: deadline-exceeded`. +- Targeted local read + TOCTOU drift β†’ falls through to proxy fanout (no more spurious 500). + +### 13.3 Concurrency property tests + +With `jqwik` or a small custom harness: +- `SingleFlight.load` under 100 concurrent callers for the same key β†’ 1 loader invocation. +- Single-flight cancellation: cancel 50 of 100 callers mid-load β†’ remaining 50 still get the value. +- Race-to-first: 5 members, one returns 200 at 10ms, others at 100ms β†’ client gets 200 at ~10ms, losers are cancelled within 20ms. + +### 13.4 Chaos injection + +Tags: `@Chaos` on JUnit. Run in a nightly CI stage. +- `@KillMember` β€” kill one proxy member mid-request. +- `@DbStall(500ms)` β€” add 500ms artificial delay to every DB call. +- `@StorageEvict` β€” delete a random cached artifact between index-lookup and storage-read. +- `@ValkeyOffline` β€” disable L2 cache at a random time during the test. +- `@FullDrainQueue` β€” fill the drain executor to capacity. + +Every chaos test verifies: no 5xx to client that would not have happened without the chaos; no resource leak (connection count returns to baseline within 60s); no silent correctness issue (e.g. wrong artifact bytes returned). + +### 13.5 Perf baseline in CI + +`.github/workflows/perf-baseline.yml`: +- Docker-compose: Pantera + Postgres + Valkey + npmjs.org mirror (simulated). +- `npm install` of a 100-package manifest; run 5x; record p50/p95/p99. +- `mvn dependency:resolve` for a 200-artifact POM; run 5x; record p50/p95/p99. +- Compare against `perf-baselines/{adapter}.json` with thresholds (e.g., p99 ≀ 1.1 Γ— baseline). +- PR blocks on regression >10%. + +Baseline reset by maintainer with `--bless` flag. + +--- + +## 14. SLOs and release gates + +### 14.1 Per-adapter SLOs + +| Adapter | Availability | p50 | p95 | p99 | Error budget / 28 d | +|----------------|-------------|-------|-------|-------|---------------------| +| npm proxy | 99.9 % | 20ms | 80ms | 200ms | ~40 min | +| npm group | 99.9 % | 30ms | 120ms | 300ms | ~40 min | +| maven proxy | 99.9 % | 25ms | 100ms | 250ms | ~40 min | +| maven group | 99.9 % | 35ms | 140ms | 350ms | ~40 min | +| pypi proxy | 99.9 % | 20ms | 80ms | 200ms | ~40 min | +| docker pull | 99.9 % | 40ms | 150ms | 400ms | ~40 min | +| file / raw | 99.95% | 10ms | 40ms | 100ms | ~20 min | + +### 14.2 Burn-rate alerts + +- **Fast burn** (5 min / 1 h): consuming 14 d of error budget in 1 h β†’ page. +- **Slow burn** (6 h / 1 d): consuming 7 d of error budget in 6 h β†’ ticket. + +### 14.3 Release gate + +`scripts/release-gate.sh` reads the last 24 h of Prometheus and refuses to tag a release if any SLO is above its budget. Human override (`--force`) is logged to a release-log file. + +--- + +## 15. The 5.4 β†’ 9+ scorecard + +| Dimension | 2.1.3 | 2.2 target | Driven by | +|-------------------------------------------|------:|-----------:|----------------------------------------------------------------| +| Separation of concerns | 8 | 9 | GroupResolver isolated from translation/observability | +| Single Responsibility | 5 | 9 | `GroupResolver` only resolves; translation, logging, bulkhead injected | +| Open/Closed | 4 | 8 | New adapter = new `MemberSlice` impl + `ArtifactNameParser` case | +| Liskov | 7 | 9 | Sealed `MemberSlice` types; pattern-match | +| Interface Segregation | 6 | 8 | `Slice` still one method; split `ArtifactIndex` into `Locator` + `Writer` | +| Dependency Inversion | 6 | 9 | No static pools; everything DI'd | +| Idempotency of retries | 5 | 8 | Side effects (queue enqueue, metrics) idempotent per-request | +| Observability | 6 | 10 | Typed builders, OTel context, required fields enforced | +| Bulkheading | 3 | 9 | Per-repo pools, per-repo metrics, rate limits | +| Timeouts / Deadlines | 5 | 9 | End-to-end `Deadline` propagated | +| Circuit-breaker correctness | 5 | 9 | Poisoning eliminated by Fault classification | +| Graceful degradation | 7 | 9 | SWR + neg cache + fallback member retained, hardened | +| Consistency semantics | 5 | 8 | TOCTOU drift handled as cache miss (fall through) | +| Error classification | 3 | 10 | One classifier, one translator | +| Testing β€” unit | 7 | 9 | Property tests on SingleFlight, NegativeCache, FaultClassifier | +| Testing β€” concurrency/chaos | 4 | 9 | Nightly chaos tests, property tests | +| Testing β€” perf regression | 2 | 9 | CI perf baseline | +| Documentation β€” inline | 8 | 9 | Retained | +| Documentation β€” architectural | 4 | 9 | ADRs per phase, this doc + its children | +| Deployability | 7 | 9 | JVM knobs + SLO gates | + +**Weighted average: 5.4 β†’ 9.0.** Reaching 10/10 requires cultural changes (on-call rotations with error-budget authority, game days, error-budget-policy-backed feature freezes) beyond a codebase refactor. + +--- + +## 16. What this design explicitly does NOT do + +- **Does not rewrite the storage layer.** `asto` + file-system storage are retained. +- **Does not replace Vert.x.** Vert.x remains the HTTP server; only the slice composition contract changes. +- **Does not migrate to Postgres pooling alternatives.** HikariCP + `DbArtifactIndex` remain; only the pool's overflow policy changes. +- **Does not remove Quartz.** Quartz scheduler remains for periodic jobs; only the per-repo drain pool shape changes. +- **Does not introduce a new message bus.** Event propagation stays in-process; queues are for scheduling batches, not for cross-service messaging. +- **Does not add a new serialization format to APIs.** Public HTTP API is unchanged. + +Each of these could be revisited later on its own merits; none are on the critical path to 9/10. + +--- + +## 17. Next step + +A reviewer acceptance check (all four now folded into the v2.2.0 scope): + +1. **Request flow in Β§2** β€” matches intent (amended: `AllProxiesFailed` is pass-through; see Β§9 and the status policy table). +2. **Status policy in Β§2** β€” `IndexUnavailable` / `StorageUnavailable` / `Internal` still 500 per the product contract; `AllProxiesFailed` streams the upstream 5xx body verbatim so the client gets the diagnostic JSON it expects. +3. **Logging tiers in Β§4.1** β€” extended to 5 tiers (Tier-5 Audit is distinct; always emitted; separate dataset/retention). +4. **Negative cache** β€” TTL + size configured per-tier (L1/L2) with per-scope overrides; check-before-DB is safe because upload-path invalidation is synchronous (Β§5.5). +5. **Maven checksum failures** β€” addressed by `ProxyCacheWriter` (Β§9.5 / WI-07). +6. **Implementation cadence** β€” agent-executable work items (WI-00 … WI-10); no sprints; loop terminates on correctness, not on deadline (Β§12). + +Ready to pick up: +- **WI-00** can proceed immediately (hotfix, 2.1.4). +- **WI-01** unblocks everything else in the v2.2.0 train. + +*End of target architecture (revision 2, amendments 2026-04-16).* diff --git a/docs/analysis/v2.2.0-pr-description.md b/docs/analysis/v2.2.0-pr-description.md new file mode 100644 index 000000000..2ccbae87e --- /dev/null +++ b/docs/analysis/v2.2.0-pr-description.md @@ -0,0 +1,187 @@ +# v2.2.0 β€” target-architecture train + cooldown metadata filtering + +## Summary + +This PR lands the **first eight work items** of the v2.2 target architecture plus **cooldown metadata filtering** across all 7 adapters. The target-architecture items are **WI-00** (queue/log hotfix), **WI-01** (Fault + Result sum types), **WI-05** (SingleFlight coalescer), **WI-07** (ProxyCacheWriter + Maven checksum integrity), **WI-post-05** (retire `RequestDeduplicator`; promote `FetchSignal`), **WI-post-07** (wire ProxyCacheWriter into pypi/go/composer), **WI-02** (full `RequestContext` + `Deadline` + `ContextualExecutor`), and **WI-03** (`StructuredLogger` 5-tier + `LevelPolicy` + `AuditAction`). The cooldown work delivers two-layer enforcement (soft metadata filter + hard 403) for Maven, npm, PyPI, Docker, Go, Composer, and Gradle with 5 performance hardenings (H1-H5), SOLID package restructure, admin/invalidation hardening, and 250+ tests including chaos tests. Full design rationale is in `docs/cooldown-metadata-filtering.md` and the target-architecture doc; forensic evidence for every "before/after" claim is in `docs/analysis/v2.1.3-post-deploy-analysis.md`. + +## Work items shipped (8) + +- [x] **WI-00** β€” queue overflow + access-log level policy (commit `4242ea94`) + - `queue.add()` β†’ `queue.offer()` across every request-serving enqueue site in npm / pypi / go / docker / helm / rpm / hex / nuget / composer / core + - `EventsQueueMetrics` shared drop-counter + single-WARN-per-drop + - 4xx access-log level policy: 404/401/403 β†’ INFO; other 4xx β†’ WARN (unchanged) + - Jetty idle-timeout β†’ DEBUG; "Repository not found" β†’ INFO + - `DownloadAssetSliceQueueFullTest`: 50 concurrent cache-hits over a saturated queue β†’ 50 Γ— 200 +- [x] **WI-01** β€” `Fault` + `Result` sum types (commit `08684bc0`) + - Sealed `Fault` hierarchy (`NotFound`, `Forbidden`, `IndexUnavailable`, `StorageUnavailable`, `AllProxiesFailed`, `UpstreamIntegrity`, `Internal`, `Deadline`, `Overload`) + - `Result` with `map` / `flatMap` + - `FaultClassifier` for `.exceptionally(...)` fallback + - `FaultTranslator` β€” single HTTP-status decision point; implements the Β§2 worked-examples table (retryability > body > declaration-order) including the `AllProxiesFailed` pass-through contract + - 40 tests; 99% instruction / 97% branch coverage on the `fault` package +- [x] **WI-05** β€” unify three coalescers into `SingleFlight` (commit `03214a9e`) + - Caffeine `AsyncCache`-backed; per-caller cancellation isolation; stack-flat follower dispatch; zombie eviction via `CompletableFuture.orTimeout` + - Migrates `GroupSlice.inFlightFanouts`, `MavenGroupSlice.inFlightMetadataFetches`, `CachedNpmProxySlice` (`RequestDeduplicator`) β€” field names retained, only the type changes + - 14 property-style tests including N=1000 coalescing, 100-caller cancellation, 500-follower synchronous-completion stack-safety +- [x] **WI-07** β€” `ProxyCacheWriter` + Maven checksum integrity (commit `c165f38f`) + - Single write-path for `primary + sidecars` with streamed NIO temp-file + four concurrent `MessageDigest` accumulators + - Atomic "primary first, sidecars after" commit; partial-failure rollback deletes both + - `Fault.UpstreamIntegrity` on sidecar disagreement; nothing lands in the cache + - Maven adapter wired end-to-end + - `scripts/pantera-cache-integrity-audit.sh` with `--dry-run` / `--fix` for healing pre-existing drift + - Regression test reproduces the exact production `oss-parent-58.pom.sha1` hex +- [x] **Version bump 2.1.3 β†’ 2.2.0** (commit `9b8e0055`) + - Root `pom.xml` + all 30 module poms + - `mvn install` now produces `pantera-main-2.2.0.jar` and the image tags `pantera:2.2.0` +- [x] **WI-post-05** β€” retire `RequestDeduplicator`; promote `FetchSignal` (commit `cf799266`) + - `BaseCachedProxySlice` migrated from `RequestDeduplicator.deduplicate(...)` to `SingleFlight.load(...)` + - `RequestDeduplicator.java` + `RequestDeduplicatorTest.java` + `DedupStrategy` deleted + - `FetchSignal` promoted to top-level at `pantera-core/http/cache/FetchSignal.java` + - `BaseCachedProxySliceDedupTest` β€” 4 regression tests covering coalescing, `NOT_FOUND` propagation, `ERROR` propagation, cancellation isolation +- [x] **WI-post-07** β€” wire `ProxyCacheWriter` into pypi / go / composer (commit `0629b543`) + - Each adapter's `CachedProxySlice` constructs a `ProxyCacheWriter` when a file-backed `Storage` is present + - Primary-artifact cache misses (`.whl` / `.tar.gz` for pypi; `.zip` for go; `.zip` for composer) route through the coupled primary+sidecar write path + - Adapter-native sidecar algo sets: pypi {SHA-256, MD5}; go {SHA-256}; composer {SHA-256} + - One atomicity test + one digest-mismatch test per adapter (`CachedPyProxySliceIntegrityTest`, `CachedProxySliceIntegrityTest` Γ— 2) +- [x] **WI-02** β€” full `RequestContext` + `Deadline` + `ContextualExecutor` (commit `129b0bf1`) + - `RequestContext` expanded from 4 β†’ 13 fields; 4-arg backward-compat ctor retained + - `Deadline` monotonic wall-clock deadline with `in(Duration)` / `remaining()` / `expired()` / `remainingClamped(max)` / `expiresAt()` + - `ContextualExecutor.contextualize(Executor)` propagates `ThreadContext` + APM span across `CompletableFuture` boundaries + - Wired at `DbArtifactIndex` (via internal `DbIndexExecutorService` adapter that forwards lifecycle), `GroupSlice.DRAIN_EXECUTOR`, `BaseCachedProxySlice` / `CachedNpmProxySlice` / `MavenGroupSlice` SingleFlights + - 20 new tests: `RequestContextTest` (14), `ContextualExecutorTest` (5), `DeadlineTest` (8), `ContextualExecutorIntegrationTest` (3) +- [x] **WI-03** β€” `StructuredLogger` 5-tier + `LevelPolicy` + `AuditAction` (commit `b8fd2bab`) + - `AccessLogger` / `InternalLogger` / `UpstreamLogger` / `LocalLogger` / `AuditLogger` β€” five tier builders, each with `Objects.requireNonNull` on required fields at entry + - `LevelPolicy` encodes the Β§4.2 log-level matrix as a single enum + - `AuditAction` closed enum: `{ARTIFACT_PUBLISH, ARTIFACT_DOWNLOAD, ARTIFACT_DELETE, RESOLUTION}` per Β§10.4 + - `EcsLoggingSlice` emits access log exactly once per request via `StructuredLogger.access()` (legacy dual emission removed) + - `MdcPropagation` marked `@Deprecated(since="2.2.0", forRemoval=true)` + - 34 new tests across `AccessLoggerTest`, `AuditLoggerTest`, `InternalLoggerTest`, `UpstreamLoggerTest`, `LocalLoggerTest`, `LevelPolicyTest` + +## Cooldown Metadata Filtering (8 phases) + +Two-layer cooldown enforcement across all 7 adapters (Maven, npm, PyPI, Docker, Go, Composer, Gradle). See `docs/cooldown-metadata-filtering.md` for full architecture. + +- [x] **Phase 1** -- SOLID package restructure (`api/`, `cache/`, `metadata/`, `response/`, `config/`, `metrics/`, `impl/`) +- [x] **Phase 2** -- 5 performance hardenings: H1 (pre-warm release-date cache), H2 (parallel bounded evaluation, 4 threads), H3 (SWR on metadata cache), H4 (50K L1 capacity), H5 (inflight-map leak fix) +- [x] **Phase 3** -- Per-adapter metadata parser/filter/rewriter/detector for 7 adapters (235+ unit tests) +- [x] **Phase 4** -- Per-adapter 403 response factories with `CooldownResponseRegistry` +- [x] **Phase 5** -- Admin unblock flow hardened (sync invalidation, CooldownCache + FilteredMetadataCache both invalidated, Micrometer counters) +- [x] **Phase 6** -- `CooldownAdapterBundle` record + `CooldownAdapterRegistry` populated at startup; all 7 adapters wired +- [x] **Phase 7** -- Integration tests (MetadataFilterServiceIntegrationTest, CooldownAdapterRegistryTest) + chaos test (100-concurrent stampede dedup) +- [x] **Phase 8** -- Documentation + changelog + final verification + +## Work items deferred to v2.2.x / v2.3 (6) + +Every deferred item has an explicit entry with goal/files/tests/DoD/deps in `docs/analysis/v2.2-next-session.md`. + +- [ ] **WI-04** β€” `GroupResolver` replaces `GroupSlice`; sealed `MemberSlice` (Hosted/Proxy); `ArtifactIndex.locateByName` returns `IndexOutcome` sealed type. Backbone WI of v2.2.0 β€” wires `RequestContext` (WI-02), `StructuredLogger` (WI-03), `Fault` (WI-01), `SingleFlight` (WI-05), `ProxyCacheWriter` (WI-07) together. +- [ ] **WI-06** β€” `NegativeCache` with composite `NegativeCacheKey`, per-tier + per-scope YAML, one bean shared across hosted/proxy/group scopes, synchronous upload invalidation +- [ ] **WI-06b** β€” admin UI page + REST endpoints for neg-cache inspection and invalidation +- [ ] **WI-08** β€” retire RxJava2 from `DownloadAssetSlice` / `CachedNpmProxySlice` / `BaseCachedProxySlice` / `NpmProxy.getAsset` / `MavenProxy.getMetadata`. Unblocks deletion of 5 `MdcPropagation` call-sites in npm-adapter and completion of the remaining WI-post-07 wiring. +- [ ] **WI-09** β€” `RepoBulkhead` per repo; retire static `GroupSlice.DRAIN_EXECUTOR` +- [ ] **WI-10** β€” adapter SLOs, CI perf baseline, chaos tests, release-gate script + +## Test run + +All test suites pass locally at branch HEAD: + +``` +$ mvn -T8 install -DskipTests +BUILD SUCCESS (docker image pantera:2.2.0) + +$ mvn -pl pantera-core test +Tests run: 891, Failures: 0, Errors: 0, Skipped: 7 +BUILD SUCCESS + +$ mvn -pl pantera-main test -DfailIfNoTests=false +Tests run: 929, Failures: 0, Errors: 0, Skipped: 4 +BUILD SUCCESS + +$ mvn -T4 -pl npm-adapter,maven-adapter,pypi-adapter,go-adapter,composer-adapter,\ + docker-adapter,helm-adapter,rpm-adapter,hexpm-adapter,nuget-adapter \ + test -DfailIfNoTests=false +Adapter totals: + npm-adapter 191 + hexpm-adapter 19 + maven-adapter 56 (3 skipped) + rpm-adapter 252 (1 skipped) + composer-files 27 + goproxy 86 (1 skipped) + nuget-adapter 126 + pypi-adapter 334 + helm-adapter 77 + docker-adapter 444 (1 skipped) + ----------------------- + Aggregate 1 612 tests, 0 failures, 0 errors, 6 skipped +BUILD SUCCESS + +TOTAL across reactor: 3 432 tests, 0 failures, 0 errors, 17 skipped (all green) +``` + +Acceptance queries from the target-architecture doc + session brief (each matches the expected count): + +``` +# Foundation gates +$ rg 'queue\.add\(' --glob '*.java' | rg -v test | rg -v '// ok:' +# 0 matches β€” WI-00 complete + +# WI-post-05 +$ rg 'RequestDeduplicator|class DedupStrategy|RequestDeduplicator\.FetchSignal' --glob '*.java' | rg -v test +# 0 matches β€” legacy type retired +$ rg 'new FetchSignal|FetchSignal\.(SUCCESS|NOT_FOUND|ERROR)' --glob '*.java' | rg -v test | wc -l +# 11 β€” every production call-site uses the promoted top-level enum + +# WI-post-07 +$ rg 'TODO\(WI-post-07\)' --glob '*.java' | wc -l +# 1 β€” only npm-adapter retains the marker (blocked on WI-08 RxJava retirement) +$ rg 'new ProxyCacheWriter' --glob '*.java' | rg -v test | wc -l +# 4 β€” maven + pypi + go + composer + +# WI-02 +$ ls pantera-core/src/main/java/com/auto1/pantera/http/context/ +# ContextualExecutor.java Deadline.java RequestContext.java +$ wc -l pantera-core/src/main/java/com/auto1/pantera/http/context/RequestContext.java +# 340 + +# WI-03 +$ rg 'StructuredLogger\.access\(\)' --glob '*.java' | wc -l +# 14 (1 production + 13 tests) +$ rg 'enum AuditAction' --glob '*.java' | wc -l +# 1 β€” single closed enum +$ rg 'new EcsLogEvent\(\)' pantera-core/src/main/java/com/auto1/pantera/http/slice/EcsLoggingSlice.java +# 1 match at line 193 β€” the .exceptionally() error path only (dual emission on success was removed) +$ rg 'MdcPropagation\.' --glob '*.java' | rg -v test | wc -l +# ~110 β€” documented remaining call-sites; deletion scheduled for WI-06/WI-08/Vert.x-handler follow-up + +# Commit-message hygiene +$ git log c71fbbfe..HEAD --format='%B' | git interpret-trailers --only-trailers | grep -ic 'co-authored-by' +# 0 +``` + +## Reviewer focus + +Focus areas when approving, in priority order: + +1. **`FaultTranslator` + `pickWinningFailure` policy faithfulness.** Implements the worked-examples table from target-architecture Β§2. `FaultAllProxiesFailedPassThroughTest` has one test per row. If you suspect a row is wrong, add a row-specific test that asserts the expected status / header / body shape β€” don't tweak the translator silently. +2. **SingleFlight zombie eviction.** Caffeine's `expireAfterWrite` does NOT expire pending futures in an `AsyncCache`; zombie protection lives in `orTimeout(inflightTtl)` on the wrapped loader future (see the comment in `SingleFlight.java:188-206`). The `zombieEvictedAfterTtl` test exercises the real timer, not a mock β€” a refactor that replaces `orTimeout` with anything else must keep that invariant. +3. **ProxyCacheWriter temp-file handling.** Every error path (stream IO failure, size read failure, save failure) funnels through `deleteQuietly(tempFile)` in `commit()` / `streamPrimary()` / `rejectIntegrity()` / the outer `.exceptionally`. A temp-file leak would be silent; if you suspect one, add an assertion on `Files.list(System.getProperty("java.io.tmpdir"))` in `ProxyCacheWriterTest` to lock the invariant. +4. **`RequestContext` / `ContextualExecutor` wiring.** The three hot-path executors (`DbArtifactIndex` via its internal `DbIndexExecutorService` adapter, `GroupSlice.DRAIN_EXECUTOR`, all three SingleFlight instances) are wrapped β€” verify the wrapping is present in every reviewer's mental model of the request flow. Every `CompletableFuture.runAsync(..., ctxExecutor)` now propagates ECS MDC + APM span; the 4-arg backward-compat `RequestContext` ctor ensures legacy callers compile unchanged. +5. **`EcsLoggingSlice` access-log single-emission.** Line 176 emits via `StructuredLogger.access().forRequest(rctx)`; the former second emission (`new EcsLogEvent(...)...log()` alongside the StructuredLogger call) was removed to halve Kibana log volume. The sole remaining `new EcsLogEvent()` call is on line 193 β€” the `.exceptionally(...)` error path β€” and is scheduled for migration by the same follow-up WI that re-lifts `user_agent.*` parsing. + +## Risks and mitigations + +Three new risks introduced by Wave 3, plus the three Wave 1-2 risks retained: + +1. **`MdcPropagation` retained as `@Deprecated` with ~110 production callers.** The class cannot be deleted until WI-06 (removes 25 cooldown-related callers), WI-08 (removes 5 npm-adapter callers blocked on RxJava retirement), and the Vert.x-handler contextualisation follow-up (removes the ~55 callers in `pantera-main/api/v1/*Handler.java`) all land. **Mitigation:** the class is stable and documented; no new call-sites are permitted (enforce via PR-review β€” there is no checkstyle gate yet). Scheduled for removal in v2.3.0. +2. **Rich `user_agent.name` / `.version` / `.os.name` parsing lost.** The pre-v2.2.0 `EcsLogEvent` instance emitted parsed User-Agent sub-fields on every access-log line. When the dual emission was removed, only `user_agent.original` survives via `RequestContext`. **Mitigation (operator):** Kibana dashboards that query `user_agent.name` or `user_agent.version` need to switch to `user_agent.original` or wait for the follow-up WI that re-lifts parsing into `StructuredLogger.access`. **Mitigation (code):** if an operator files a dashboard-regression ticket, that WI is ~30 LoC and can ship in a v2.2.x patch. +3. **`DbIndexExecutorService` is a localised copy of `TraceContextExecutor`-style delegation.** The adapter lives inside `DbArtifactIndex` as a private static-nested class that forwards lifecycle methods to the underlying `ExecutorService` and routes `execute(Runnable)` through `ContextualExecutor`. Hoisting it into a reusable `pantera-core/http/context/ContextualExecutorService` would share the code with Quartz pools and any future `ExecutorService` hotspot. **Mitigation:** track as `WI-post-03a` in the next-session doc; the current duplication is ~40 lines and does not block the release. +4. **SingleFlight allocates one new `CompletableFuture` per caller on top of the shared one.** Two `whenCompleteAsync` hooks per call (invalidate + forwarder) run on the executor. At 2k req/s for a single popular package this is 4k executor submissions per second β€” not a hot-path concern versus the per-request cost, but measurable. **Mitigation:** if the WI-10 perf baseline flags this, the invalidate hook can move to a single `whenComplete` on the shared future and the forwarder can become a no-copy `minimalCompletionStage`. Not needed today. +5. **ProxyCacheWriter rollback is best-effort.** If the primary save succeeds and the sidecar save fails AND the subsequent `storage.delete(primary)` also fails (e.g. underlying filesystem transiently read-only), the cache can end up holding a primary without a sidecar. Maven client behaviour on missing sidecar is to refetch β€” the `IntegrityAuditor` also heals this case β€” so the worst case is a transient 502 on the next GET, not a silent integrity bug. **Mitigation:** run the audit tool in `--dry-run` as a nightly cron against production caches for the first release. +6. **The npm adapter still carries one `TODO(WI-post-07)` marker.** Its `CachedNpmProxySlice` primary write path is not architecturally protected against drift until WI-post-07 is completed for npm, which requires WI-08 (RxJava2 retirement) to land first. npm is a low-drift-risk adapter (single SHA-512 sidecar, always co-located in the tarball metadata), so the residual risk is smaller than the Maven case that v2.2.0 closes. Tracked in `v2.2-next-session.md` as part of WI-08's DoD. + +## Links + +- Target architecture: `docs/analysis/v2.2-target-architecture.md` +- v2.1.3 post-deploy forensics: `docs/analysis/v2.1.3-post-deploy-analysis.md` +- v2.1.3 architecture review (20 anti-patterns, 9 refactors): `docs/analysis/v2.1.3-architecture-review.md` +- Remaining-work task list: `docs/analysis/v2.2-next-session.md` +- Changelog: `CHANGELOG-v2.2.0.md` diff --git a/docs/cooldown-metadata-filtering.md b/docs/cooldown-metadata-filtering.md new file mode 100644 index 000000000..a143c0d63 --- /dev/null +++ b/docs/cooldown-metadata-filtering.md @@ -0,0 +1,250 @@ +# Cooldown Metadata Filtering + +> **Audience:** Developers and operators. Describes the two-layer cooldown enforcement architecture introduced in v2.2.0. + +--- + +## Overview + +Pantera enforces artifact cooldown at two layers: + +1. **Soft metadata filter** -- when a client fetches package metadata (version lists, index pages), blocked versions are silently removed from the response. The client never sees blocked versions in the first place. +2. **Hard 403 response** -- if a client requests a specific blocked artifact by direct URL (bypassing metadata), Pantera returns a per-adapter 403 Forbidden response with structured error details. + +This two-layer design prevents build tools from resolving blocked versions while providing clear error messages when direct access is attempted. + +## Supported Adapters + +| Adapter | Metadata Format | Metadata Endpoint | Content-Type | Reuses | +|------------|--------------------------|----------------------------------|---------------------|----------| +| Maven | XML (`maven-metadata.xml`)| `.../{groupId}/{artifactId}/maven-metadata.xml` | `application/xml` | -- | +| Gradle | XML (same as Maven) | Same as Maven | `application/xml` | Maven | +| npm | JSON (packument) | `/{package}` | `application/json` | -- | +| PyPI | HTML (simple index) | `/simple/{package}/` | `text/html` | -- | +| Docker | JSON (tags list) | `/v2/{name}/tags/list` | `application/json` | -- | +| Go | Plain text (version list)| `/{module}/@v/list` | `text/plain` | -- | +| Composer | JSON (packages.json) | `/packages/{vendor}/{name}.json` or `/p2/` | `application/json` | -- | + +## Per-Adapter Metadata Filtering Behaviour + +### Maven / Gradle + +- **Parser:** DOM-parses `maven-metadata.xml`; extracts `` elements from ``. +- **Filter:** Removes `X` nodes where X is blocked; updates `` and ``. +- **Rewriter:** Serializes DOM back to XML bytes. +- **Detector:** Path ends with `maven-metadata.xml`. +- Gradle reuses Maven components (same metadata format). + +### npm + +- **Parser:** Jackson parses the JSON packument; extracts version keys from the `versions` object and release dates from the `time` object. +- **Filter:** Removes blocked version keys from `versions`, `time`, and `dist-tags` objects. +- **Rewriter:** Serializes modified JSON. +- **Detector:** Path matches the package name pattern (no file extension, no `/-/`). +- Supports pre-warming the release-date cache from the `time` field (H1 optimisation). + +### PyPI + +- **Parser:** Parses HTML simple index page; extracts `` tags containing download links. +- **Filter:** Removes `` tags for blocked versions. +- **Rewriter:** Serializes back to HTML. +- **Detector:** Path matches `/simple/`. + +### Docker + +- **Parser:** Jackson parses the `tags/list` JSON; extracts the `tags` array. +- **Filter:** Removes blocked tag strings from the `tags` array. +- **Rewriter:** Serializes JSON. +- **Detector:** Path matches `/v2/{name}/tags/list`. + +### Go + +- **Parser:** Splits plain-text response by newline; each non-empty line is a version string. +- **Filter:** Removes lines matching blocked versions. +- **Rewriter:** Joins remaining lines with newline. +- **Detector:** Path ends with `/@v/list`. + +### Composer + +- **Parser:** Jackson parses `packages.json`; extracts version keys from the `packages.{name}` map. +- **Filter:** Removes blocked version keys from the packages map. +- **Rewriter:** Serializes JSON. +- **Detector:** Path matches `/packages/` or `/p2/`. + +## 403 Response Factories + +Each adapter provides a `CooldownResponseFactory` that builds format-appropriate 403 responses: + +| Adapter | Content-Type | Body Shape | +|------------|-----------------------|-------------------------------------------------| +| Maven | `text/plain` | Human-readable blocked message with unblock timestamp | +| npm | `application/json` | npm-compatible JSON error envelope | +| PyPI | `text/plain` | Human-readable message | +| Docker | `application/json` | Docker registry error spec JSON | +| Go | `text/plain` | Human-readable message with ISO-8601 timestamp | +| Composer | `application/json` | Composer-compatible JSON error | +| Gradle | `text/plain` | Reuses Maven factory | + +All responses include: +- `Retry-After` header (seconds until block expires) +- `X-Pantera-Cooldown: blocked` header + +## Performance Characteristics + +### H1: Pre-warmed Release-Date Cache + +When metadata is fetched and parsed, release dates embedded in the metadata (e.g., npm's `time` field) are extracted and used to pre-warm the `CooldownCache` L1. Versions older than the cooldown period are guaranteed allowed, so the L1 cache is populated with `false` (allowed) immediately -- avoiding a DB/Valkey round-trip on the hot path for the majority of versions. + +### H2: Parallel Bounded Version Evaluation + +Version cooldown evaluation runs in parallel on a dedicated 4-thread executor pool, bounded to a maximum of 50 versions per request. Versions are dispatched via `CompletableFuture.allOf()` for concurrent evaluation, reducing end-to-end latency for metadata with many recent versions. + +### H3: Stale-While-Revalidate (SWR) on FilteredMetadataCache + +When a cached metadata entry expires, the stale bytes are returned immediately to the caller while a background task re-evaluates the metadata. This eliminates tail latency spikes at cache expiry boundaries. The SWR grace period is 5 minutes beyond the logical TTL. + +### H4: L1 Cache Capacity (50K entries) + +The `FilteredMetadataCache` L1 (Caffeine) defaults to 50,000 entries. Configurable via the `PANTERA_COOLDOWN_METADATA_L1_SIZE` environment variable. + +### H5: Inflight-Map Memory Leak Fix + +The `CooldownCache` inflight deduplication map now guarantees removal on both success and exceptional completion, preventing memory leaks when DB queries fail. A 30-second `orTimeout` safety net prevents zombie entries from lingering. + +## Architecture: Adapter Bundle Registration + +Each adapter registers a `CooldownAdapterBundle` at startup via `CooldownAdapterRegistry`: + +```java +public record CooldownAdapterBundle( + MetadataParser parser, + MetadataFilter filter, + MetadataRewriter rewriter, + MetadataRequestDetector detector, + CooldownResponseFactory responseFactory +) {} +``` + +When a metadata request arrives, the proxy layer: +1. Looks up the bundle by `repoType` +2. Uses `detector.isMetadataRequest(path)` to confirm it is a metadata request +3. Routes through `MetadataFilterService.filterMetadata(...)` with the bundle's parser/filter/rewriter +4. On direct artifact request for a blocked version, uses `responseFactory.forbidden(block)` for the 403 response + +## Admin Operations + +### Unblock a Specific Version + +```bash +curl -X POST "http://pantera:8086/api/v1/cooldown/unblock" \ + -H "Authorization: Bearer $TOKEN" \ + -d '{"repo_type":"npm","repo_name":"npm-proxy","package":"lodash","version":"4.18.0"}' +``` + +On unblock: +- The DB record is updated first +- `FilteredMetadataCache` L1 + L2 are invalidated for the package +- `CooldownCache` L1 + L2 are invalidated for the specific version +- All invalidation futures complete synchronously before the 200 response + +### Policy Change (Duration Update) + +When the cooldown duration is changed (e.g., 30d to 7d): +- `FilteredMetadataCache.clearAll()` is called to flush all cached filtered metadata +- Subsequent requests re-evaluate all versions against the new policy + +### Cache Invalidation + +Manual full cache invalidation: +```bash +curl -X POST "http://pantera:8086/api/v1/cooldown/invalidate" \ + -H "Authorization: Bearer $TOKEN" \ + -d '{"repo_type":"npm","repo_name":"npm-proxy"}' +``` + +## Configuration + +### Global Cooldown Duration + +Set in `pantera.yaml`: +```yaml +meta: + cooldown: + enabled: true + minimum_allowed_age: 72h # default: 72 hours +``` + +### Per-Repo-Type Override + +```yaml +meta: + cooldown: + repo_types: + npm: + enabled: true + minimum_allowed_age: 168h # 7 days for npm + maven: + enabled: true + minimum_allowed_age: 72h +``` + +### Per-Repo-Name Override (highest priority) + +```yaml +meta: + cooldown: + repos: + my-internal-npm: + enabled: false # disable for this specific repo +``` + +### L1 Cache Size + +Environment variable: +```bash +export PANTERA_COOLDOWN_METADATA_L1_SIZE=50000 # default +``` + +### FilteredMetadataCache (YAML) + +```yaml +meta: + caches: + cooldown-metadata: + ttl: 24h + maxSize: 50000 + valkey: + enabled: true + l1MaxSize: 500 + l1Ttl: 5m + l2Ttl: 24h +``` + +## Package Structure + +``` +pantera-core/.../cooldown/ + api/ CooldownService, CooldownInspector, CooldownRequest, CooldownResult, + CooldownBlock, CooldownReason + cache/ CooldownCache, CooldownCacheConfig, CooldownCacheMetrics + metadata/ CooldownMetadataService, MetadataParser, MetadataFilter, + MetadataRewriter, MetadataRequestDetector, MetadataFilterService, + FilteredMetadataCache, VersionComparators + response/ CooldownResponseFactory, CooldownResponseRegistry + config/ CooldownSettings, CooldownAdapterBundle, CooldownAdapterRegistry, + InspectorRegistry + metrics/ CooldownMetrics + impl/ DefaultCooldownService, CachedCooldownInspector, NoopCooldownService + +{adapter}/cooldown/ + {Adapter}MetadataParser, {Adapter}MetadataFilter, {Adapter}MetadataRewriter, + {Adapter}MetadataRequestDetector, {Adapter}CooldownResponseFactory +``` + +## Testing + +- **235+ unit tests** across all 7 adapters (parser, filter, rewriter, detector, response factory) +- **13 integration tests** (MetadataFilterServiceIntegrationTest, CooldownAdapterRegistryTest) +- **2 chaos tests** (CooldownConcurrentFilterStampedeTest: 100-concurrent stampede dedup) +- **Performance tests** (VersionEvaluationParallelTest, CooldownMetadataServicePerformanceTest) +- **SWR tests** (FilteredMetadataCacheSWRTest) diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 60f8582ee..b76116d17 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -1206,4 +1206,18 @@ jattach 1 jcmd "JFR.dump name=pantera filename=/var/pantera/logs/pantera.jfr" --- +## 17. Performance benchmarking + +Pantera ships a local Docker-based scaling benchmark under [`performance/`](../performance/README.md) that measures saturation-rps and SLO-rps at configurable CPU/RAM sizes. See the linked README for prerequisites, the run commands (`make smoke`, `make matrix`), and interpretation caveats. + +Quick start: +```bash +cd performance +make setup # generate mock-upstream bodies +make smoke # one cell end-to-end (~15-20 min) +make matrix # full 6-cell matrix (~2 h) +``` + +--- + *This document covers Pantera version 2.0.0. For questions, contact the Auto1 DevOps Team.* diff --git a/docs/developer-guide/caching.md b/docs/developer-guide/caching.md new file mode 100644 index 000000000..d9d0e8ae5 --- /dev/null +++ b/docs/developer-guide/caching.md @@ -0,0 +1,109 @@ +# Caching + +> **Guide:** Developer Guide | **Section:** Caching + +Pantera's caching strategy is a canonical L1 Caffeine + L2 Valkey two-tier layout with a pub/sub invalidation channel. This page describes the pattern, the existing implementations, and the rules for adding a new cached component. + +--- + +## Canonical Pattern + +A cached component in Pantera looks like this: + +1. **L1 (Caffeine)** -- In-process, per-JVM. Microsecond reads. Bounded by `maxSize`; `expireAfterWrite` via the configured TTL. +2. **L2 (Valkey)** -- Shared across cluster nodes. Millisecond reads with a strict `timeoutMs` ceiling; on timeout the L2 is treated as a miss, not a failure. Survives JVM restart. +3. **Invalidation** -- Writes that mutate the underlying truth (DB, repo config, etc.) publish an invalidation message on `CacheInvalidationPubSub`. Every node subscribes; each subscriber evicts the keyed L1 entry and deletes the L2 entry. + +The three reference implementations to study before building a new one: + +| Class | Purpose | +|---|---| +| `com.auto1.pantera.auth.CachedUsers` | Caches the User record by username. Oldest of the three; establishes the pattern. | +| `com.auto1.pantera.auth.CachedLocalEnabledFilter` | Caches the per-user "enabled" flag in front of `LocalEnabledFilter`. Added in v2.2.0 (Group B). | +| `com.auto1.pantera.group.GroupMetadataCache` | Two-tier stale fallback for group repositories. Added in v2.2.0 (Group C). Demonstrates the "aid, not breaker" principle. | + +--- + +## Design Principle: Cache Is an Aid, Never a Breaker + +This principle governs every cached-path decision in Pantera. It has three consequences: + +1. **Stale fallbacks are last-resort, not required.** If L1 + L2 both miss, the primary path (DB query, upstream fetch) runs and produces a live result. No "stale-only" mode exists where a cold cache forces failure. +2. **Cache failures never become client failures.** An L2 timeout or Valkey unavailability degrades to L1-only operation; the request still completes. Instrumentation records the degradation but does not escalate it. +3. **Bounds are safety nets, not expiry mechanisms.** The `maxSize` on a cache tier exists to prevent pathological memory growth. Under realistic cardinality no eviction fires -- entries expire via TTL or invalidation. If an operator sees persistent eviction, the sizing is wrong, not the workload. + +`GroupMetadataCache` is the canonical example: its degradation ladder is `L1 -> L2 -> expired primary-cache entry -> miss`, where the final "miss" falls through to the normal live fanout. No tier being unavailable breaks the contract. + +--- + +## How to Add a New Cached Component + +The minimum checklist: + +### 1. Write the decorator + +A cached component is a decorator that wraps the underlying truth source. Follow `CachedLocalEnabledFilter` for the simplest shape: + +```java +public class CachedThing implements Thing { + private final Thing delegate; + private final Cache l1; // Caffeine + private final ValkeyCache l2; // see GlobalCacheConfig + + @Override + public CompletableFuture get(String key) { + // L1 hit + Value v = l1.getIfPresent(key); + if (v != null) return CompletableFuture.completedFuture(v); + // L2 read with timeoutMs + return l2.getAsync(key) + .thenCompose(l2hit -> { + if (l2hit != null) { l1.put(key, l2hit); return completed(l2hit); } + return delegate.get(key).thenApply(v2 -> { + l1.put(key, v2); + l2.putAsync(key, v2); + return v2; + }); + }); + } + + public void invalidate(String key) { + l1.invalidate(key); + l2.deleteAsync(key); + } +} +``` + +### 2. Wire config into `GlobalCacheConfig` + +Add a nested record for your cache's settings alongside `AuthEnabled` and `GroupMetadataStale`. Honor the **3-tier precedence**: + +``` +environment variable -> YAML (meta.caches.) -> compile-time default +``` + +Never inline a literal default -- always route through `ConfigDefaults.getLong / getBoolean / getInt`. This is a hard requirement; the admin guide's "no cache setting is hardcoded" contract is enforced here. + +### 3. Subscribe to `CacheInvalidationPubSub` + +Use `CacheInvalidationPubSub.subscribe(String namespace, Consumer)`. The namespace is a short stable tag (e.g. `auth.enabled`, `group.metadata`) used as the pub/sub channel suffix. The consumer receives the invalidation key and evicts both tiers. + +### 4. Publish on mutation + +Every write site that mutates the underlying truth MUST publish. For auth, that's `UserHandler.put/delete/enable/disable/alterPassword`. Missing a publish site means one node's cache goes stale relative to the others -- a silent correctness bug. + +### 5. Document in the admin guide + +Add a section to `docs/admin-guide/cache-configuration.md` with the full env-var/YAML/default table. This is where operators discover tunables. + +### 6. Regression test + +At minimum: (a) cache hit returns the value, (b) cache miss falls through to delegate, (c) `invalidate(key)` empties both tiers, (d) L2 timeout degrades gracefully to L1-only, (e) pub/sub receive triggers eviction. `CachedLocalEnabledFilterTest` is the closest template. + +--- + +## Related Pages + +- [Admin: Cache Configuration](../admin-guide/cache-configuration.md) -- Operator-facing reference. +- [Admin: Valkey Setup](../admin-guide/valkey-setup.md) -- L2 server-side requirements. +- [Fault Model](fault-model.md) -- How cache failures map to faults (they don't, by construction). diff --git a/docs/developer-guide/cooldown.md b/docs/developer-guide/cooldown.md new file mode 100644 index 000000000..efa27d97e --- /dev/null +++ b/docs/developer-guide/cooldown.md @@ -0,0 +1,54 @@ +# Cooldown (Developer View) + +> **Guide:** Developer Guide | **Section:** Cooldown + +This page is the contributor-facing notes on the cooldown response-factory registry. For the end-user view, see [User Guide: Cooldown](../user-guide/cooldown.md). For operator configuration, see [Admin Guide: Cooldown](../admin-guide/cooldown.md). + +--- + +## `CooldownResponseRegistry` -- use `getOrThrow` + +Every adapter that needs to emit a 403 "blocked by cooldown" response does so through `CooldownResponseRegistry`. There are two lookup methods: + +| Method | When to use | +|---|---| +| `getOrThrow(repoType)` | **Default.** Every production call site. Missing registration is a startup-time bug and should fail loudly. | +| `get(repoType)` | Only when the caller genuinely handles the missing-factory case (tests, diagnostic tools). | + +As of v2.2.0 (Group G), all 11 adapter sites across files / npm / pypi / composer / go / docker are migrated to `getOrThrow`. `BaseCachedProxySlice`'s former silent fallback (`.orElseGet(() -> CooldownResponses.forbidden(block))`) is replaced by `getOrThrow` as well -- the old fallback is deleted. + +### Why + +The `.get(repoType)` path previously NPE'd on missing registration, losing the descriptive `repoType` context. `getOrThrow` produces a clear `IllegalStateException("No CooldownResponseFactory registered for repoType: ")` instead, making the startup wiring omission immediately triage-able. + +--- + +## Adapter Responsibility: Register at Startup + +Every adapter that participates in cooldown MUST register its response factory through `CooldownWiring` during startup. The canonical shape: + +```java +CooldownWiring.register(registry, "", myFactory); +// plus any aliases the routing layer might resolve to +CooldownWiring.register(registry, "", myFactory); +CooldownWiring.register(registry, "", myFactory); +``` + +Aliases matter. The v2.2.0 wiring currently ships aliases for `npm-proxy`, `pypi-proxy`, `docker-proxy`, `go-proxy`, `php`, `php-proxy` so every `repoType` string that can reach the registry at runtime resolves to a factory. + +**Missing registration now fails fast.** With `getOrThrow` on every site, the first request that routes to the unregistered type will throw `IllegalStateException` instead of serving a degenerate response or NPE'ing. This is intentional: a silent silent-fallback hides the bug forever; a loud startup error surfaces it in the first canary smoke test. + +### Checklist when adding a new adapter + +1. Implement your `CooldownResponseFactory` (format-appropriate 403 body). +2. Call `CooldownWiring.register(...)` for the primary `repoType` in your adapter's bootstrap. +3. Register every alias the routing layer can produce for your family. Check `ApiRoutingSlice` normalization rules if unsure. +4. Adapter test coverage: at least one test that goes through the full wiring and hits your factory via `getOrThrow`. + +--- + +## Related Pages + +- [Fault Model](fault-model.md) -- Cooldown block is `Fault.Forbidden`, not a bespoke fault. +- [Admin: Cooldown](../admin-guide/cooldown.md) -- Operator configuration. +- [User: Cooldown](../user-guide/cooldown.md) -- End-user view. diff --git a/docs/developer-guide/fault-model.md b/docs/developer-guide/fault-model.md new file mode 100644 index 000000000..29e7c52bf --- /dev/null +++ b/docs/developer-guide/fault-model.md @@ -0,0 +1,56 @@ +# Fault Model + +> **Guide:** Developer Guide | **Section:** Fault Model + +Pantera's request pipeline distinguishes between *faults* (typed, expected, translated to a well-defined HTTP status) and *throwables* (untyped, unexpected, classified via `FaultClassifier` fallback). Every slice returning `Result` produces one or the other -- the only decision point for HTTP status mapping is `FaultTranslator`. + +--- + +## Sealed Fault Hierarchy + +The `Fault` type lives in `pantera-core/src/main/java/com/auto1/pantera/http/fault/`. It is a sealed interface -- every variant is enumerated in one file, so the compiler forces `FaultTranslator`'s switch to be exhaustive. + +| Variant | Emitted by | Client-facing status | +|---|---|---| +| `NotFound` | Slices that resolved the repo but not the artifact. | `404` | +| `Forbidden` | Auth/policy slices, cooldown block. | `403` | +| `IndexUnavailable` | `DbArtifactIndex` on executor saturation. | `500` + `X-Pantera-Fault: index-unavailable` | +| `StorageUnavailable` | Storage backend refused a read/write. | `500` + `X-Pantera-Fault: storage-unavailable` | +| `AllProxiesFailed` | Group/proxy fanout with no member success. | Pass-through of the winning proxy's response + `X-Pantera-Fault: all-proxies-failed` + `X-Pantera-Proxies-Tried: ` | +| `UpstreamIntegrity` | `ProxyCacheWriter` on primary/sidecar digest mismatch. | `502` + `X-Pantera-Fault: upstream-integrity:` | +| `Deadline` | Request `Deadline` expired before response produced. | `504` + `X-Pantera-Fault: deadline-exceeded` | +| `Overload` | Bounded queue or semaphore refused admission. | `503` + `X-Pantera-Fault: overload:` | +| `Internal` | Catch-all; classification fallback only. | `500` + `X-Pantera-Fault: internal` | + +The full policy table (worked examples for AllProxiesFailed pass-through, ordering between NotFound and Overload, etc.) is in `docs/analysis/v2.2-target-architecture.md` Β§9 and locked by the `FaultAllProxiesFailedPassThroughTest` + `FaultTranslatorTest` suites. + +--- + +## New Emitters (v2.2.0) + +### `DbArtifactIndex -> Fault.IndexUnavailable` + +Added in Group H.1. The index executor switched from `CallerRunsPolicy` to `AbortPolicy`. On queue saturation the `RejectedExecutionException` (whether synchronous from `supplyAsync` or asynchronous via normal completion) is mapped to `Fault.IndexUnavailable` by `GroupResolver`'s existing `.exceptionally(...)` handler. `FaultTranslator` returns a 500 with `X-Pantera-Fault: index-unavailable`. + +Behavioral note: a follow-up commit (`abee2ec9`) wrapped `CompletableFuture.supplyAsync` in a try/catch so the synchronous rejection is always observed via the returned future, not propagated up the stack. Callers on the Vert.x event loop see a failed future, never a raw exception. + +--- + +## How to Add a New Fault Variant + +1. **Add the record to `Fault.java`.** Keep the field list minimal -- whatever a translator or a classifier needs, nothing more. Example: `UpstreamIntegrity` carries the `algo` string so the header tag can include it. +2. **Extend `FaultTranslator.translate(...)`.** The switch is exhaustive -- the compiler will reject the build until you add a case. Decide: HTTP status, headers, optional body. +3. **Extend `FaultClassifier.classify(...)` if the fault has a fallback path.** Not every fault needs one; only add a classifier case if there is a `Throwable` type that should map to the new fault without an explicit emit site. Most new faults don't need this. +4. **Regression tests.** At minimum, one test that emits the new fault end-to-end through `FaultTranslator` and asserts the response shape, and one exhaustive-switch guard (`FaultTranslatorTest.translatesEveryFaultVariant`). +5. **Document the new header tag in the user guide.** Append to `docs/user-guide/error-reference.md` so external consumers know what the tag means. + +Rule of thumb: if you are about to emit `Fault.Internal(e)` from a new slice, stop and consider whether the failure mode deserves its own typed variant. `Internal` is a last resort; every other fault is strictly better diagnostic signal. + +--- + +## Related Pages + +- [Reactive Lifecycle](reactive-lifecycle.md) -- How faults propagate through cancel-aware chains. +- [Caching](caching.md) -- Why cache failures do NOT emit faults. +- [Admin: Runbooks](../admin-guide/runbooks.md) -- Operator response to each fault. +- [User: Error Reference](../user-guide/error-reference.md) -- Client-facing tag glossary. diff --git a/docs/developer-guide/reactive-lifecycle.md b/docs/developer-guide/reactive-lifecycle.md new file mode 100644 index 000000000..680603746 --- /dev/null +++ b/docs/developer-guide/reactive-lifecycle.md @@ -0,0 +1,84 @@ +# Reactive Lifecycle + +> **Guide:** Developer Guide | **Section:** Reactive Lifecycle + +Pantera streams bodies through RxJava2 `Flowable` chains. Every chain that owns a resource -- file handles, pooled HTTP connections, temp files -- MUST wire all three terminal paths: complete, error, and cancel. Missing the cancel path is the single most common source of resource leaks in the codebase. + +--- + +## Request/Response Lifecycle with Cancel Propagation + +At 1000 req/s, clients disconnecting mid-response is routine. Before v2.2.0 those disconnects did not propagate into the slice subscription -- upstream Jetty fetches kept streaming bytes into dead sockets until the next write organically failed, wasting upstream bandwidth and holding file handles. + +The v2.2.0 fix (Group A) wires end-to-end cancel: + +1. `VertxSliceServer` registers a `closeHandler` on `request.connection()` and `exceptionHandler` on both request and response. +2. The reactive-streams `Subscription` is captured via `doOnSubscribe` and stashed in an `AtomicReference` cancel hook. +3. On any disconnect signal, the cancel hook fires `subscription.cancel()`, which propagates up the `Flowable` chain. +4. Each `Flowable` operator that owns a resource observes the cancel via `doOnCancel` and releases. + +The net effect: client disconnect -> Pantera's response path cancels -> upstream `HttpClient.GET` cancels -> upstream socket closes -> Jetty releases buffers. All within a single event-loop tick. + +--- + +## The Three-Terminal-Path Pattern + +Every `Flowable`/`Publisher` chain that owns a resource has exactly three terminal paths: + +| Path | Trigger | Required action | +|---|---|---| +| **Complete** | Upstream emits `onComplete`. | Normal finalization. Resource is almost always already released by virtue of the `onComplete` observer. | +| **Error** | Upstream emits `onError(Throwable)`. | Explicit cleanup via `doOnError`. The downstream subscriber will also see the error -- do not swallow. | +| **Cancel** | Downstream subscriber cancels. | Explicit cleanup via `doOnCancel`. This is the path that is most commonly missed. | + +If your operator only handles complete + error, a cancelling subscriber will leak. Complete + error observers are not invoked on cancel -- cancel is a third, separate terminal. + +### Canonical Example: `CachingBlob.content` + +`CachingBlob.content` streams a primary body into a temp file while hashing it. It wires all three paths: + +```java +return stream + .doOnComplete(() -> { /* success: temp file promoted to final location */ }) + .doOnError(e -> { channel.close(); tempFile.deleteIfExists(); }) + .doOnCancel(() -> { channel.close(); tempFile.deleteIfExists(); }); +``` + +The error and cancel cleanup blocks are textually identical -- the channel must close and the temp file must be deleted on any non-success termination. Missing `doOnCancel` here would produce file-descriptor leaks on every client disconnect mid-download. + +Other call-sites following the same pattern (updated in v2.2.0): + +- `StreamThroughCache` -- `doOnCancel` matches existing `doOnError`. +- `DiskCacheStorage` -- ditto. +- `VertxRxFile.save` -- added safety-net `doOnError` closing the `AsyncFile` on upstream error. +- `ArtifactHandler` (both download paths) -- captures the `Disposable` returned by `Flowable.subscribe` and disposes on response `closeHandler` / `exceptionHandler`. + +--- + +## Requirement for New Reactive Sites + +When adding a new `Flowable` / `Publisher` chain that owns any of the following: + +- A file handle (`FileChannel`, `AsyncFile`, `InputStream`, `OutputStream`) +- A temp file / temp directory +- A pooled HTTP connection +- A database cursor / `ResultSet` +- Any native resource (ByteBuffer direct memory, off-heap anything) + +you MUST wire all three terminal paths. A missing `doOnCancel` is a leak; reviewers should treat it as a required change request. + +### Self-check + +Before merging a new reactive site, ask: + +1. "If a subscriber cancels right now, what leaks?" +2. "Is the cleanup on cancel identical to the cleanup on error? If so, have I written `.doOnError(...).doOnCancel(...)` with the same block?" +3. "Is there an integration test that exercises the cancel path?" -- `VertxSliceServerCancelPropagationTest` and the chaos tests in `pantera-main/src/test/java/com/auto1/pantera/chaos/` are the templates. + +--- + +## Related Pages + +- [Caching](caching.md) -- Cache reads/writes must also honor cancel. +- [Fault Model](fault-model.md) -- Cancel is NOT a fault; it is a normal terminal path. +- [Admin: Runbooks](../admin-guide/runbooks.md) -- Operator view of the signals. diff --git a/docs/slo/docker-pull.md b/docs/slo/docker-pull.md new file mode 100644 index 000000000..638abf1c9 --- /dev/null +++ b/docs/slo/docker-pull.md @@ -0,0 +1,17 @@ +# SLO: docker-pull + +| Metric | Target | +|--------|--------| +| Availability | 99.9% (28-day rolling) | +| p50 latency | 40ms | +| p95 latency | 150ms | +| p99 latency | 400ms | +| Error budget (28d) | ~40 min | + +## Burn-rate alerts +- Fast (5m/1h): consuming 14d budget in 1h -> page +- Slow (6h/1d): consuming 7d budget in 6h -> ticket + +## Measurement +- Source: Prometheus `pantera_http_request_duration_seconds{repo="docker-pull"}` +- Window: 28-day rolling diff --git a/docs/slo/file-raw.md b/docs/slo/file-raw.md new file mode 100644 index 000000000..a36eda779 --- /dev/null +++ b/docs/slo/file-raw.md @@ -0,0 +1,17 @@ +# SLO: file-raw + +| Metric | Target | +|--------|--------| +| Availability | 99.95% (28-day rolling) | +| p50 latency | 10ms | +| p95 latency | 40ms | +| p99 latency | 100ms | +| Error budget (28d) | ~20 min | + +## Burn-rate alerts +- Fast (5m/1h): consuming 14d budget in 1h -> page +- Slow (6h/1d): consuming 7d budget in 6h -> ticket + +## Measurement +- Source: Prometheus `pantera_http_request_duration_seconds{repo="file-raw"}` +- Window: 28-day rolling diff --git a/docs/slo/maven-group.md b/docs/slo/maven-group.md new file mode 100644 index 000000000..aae0576a4 --- /dev/null +++ b/docs/slo/maven-group.md @@ -0,0 +1,17 @@ +# SLO: maven-group + +| Metric | Target | +|--------|--------| +| Availability | 99.9% (28-day rolling) | +| p50 latency | 35ms | +| p95 latency | 140ms | +| p99 latency | 350ms | +| Error budget (28d) | ~40 min | + +## Burn-rate alerts +- Fast (5m/1h): consuming 14d budget in 1h -> page +- Slow (6h/1d): consuming 7d budget in 6h -> ticket + +## Measurement +- Source: Prometheus `pantera_http_request_duration_seconds{repo="maven-group"}` +- Window: 28-day rolling diff --git a/docs/slo/maven-proxy.md b/docs/slo/maven-proxy.md new file mode 100644 index 000000000..688b69a12 --- /dev/null +++ b/docs/slo/maven-proxy.md @@ -0,0 +1,17 @@ +# SLO: maven-proxy + +| Metric | Target | +|--------|--------| +| Availability | 99.9% (28-day rolling) | +| p50 latency | 25ms | +| p95 latency | 100ms | +| p99 latency | 250ms | +| Error budget (28d) | ~40 min | + +## Burn-rate alerts +- Fast (5m/1h): consuming 14d budget in 1h -> page +- Slow (6h/1d): consuming 7d budget in 6h -> ticket + +## Measurement +- Source: Prometheus `pantera_http_request_duration_seconds{repo="maven-proxy"}` +- Window: 28-day rolling diff --git a/docs/slo/npm-group.md b/docs/slo/npm-group.md new file mode 100644 index 000000000..656c06163 --- /dev/null +++ b/docs/slo/npm-group.md @@ -0,0 +1,17 @@ +# SLO: npm-group + +| Metric | Target | +|--------|--------| +| Availability | 99.9% (28-day rolling) | +| p50 latency | 30ms | +| p95 latency | 120ms | +| p99 latency | 300ms | +| Error budget (28d) | ~40 min | + +## Burn-rate alerts +- Fast (5m/1h): consuming 14d budget in 1h -> page +- Slow (6h/1d): consuming 7d budget in 6h -> ticket + +## Measurement +- Source: Prometheus `pantera_http_request_duration_seconds{repo="npm-group"}` +- Window: 28-day rolling diff --git a/docs/slo/npm-proxy.md b/docs/slo/npm-proxy.md new file mode 100644 index 000000000..1b287214d --- /dev/null +++ b/docs/slo/npm-proxy.md @@ -0,0 +1,17 @@ +# SLO: npm-proxy + +| Metric | Target | +|--------|--------| +| Availability | 99.9% (28-day rolling) | +| p50 latency | 20ms | +| p95 latency | 80ms | +| p99 latency | 200ms | +| Error budget (28d) | ~40 min | + +## Burn-rate alerts +- Fast (5m/1h): consuming 14d budget in 1h -> page +- Slow (6h/1d): consuming 7d budget in 6h -> ticket + +## Measurement +- Source: Prometheus `pantera_http_request_duration_seconds{repo="npm-proxy"}` +- Window: 28-day rolling diff --git a/docs/slo/pypi-proxy.md b/docs/slo/pypi-proxy.md new file mode 100644 index 000000000..160500d37 --- /dev/null +++ b/docs/slo/pypi-proxy.md @@ -0,0 +1,17 @@ +# SLO: pypi-proxy + +| Metric | Target | +|--------|--------| +| Availability | 99.9% (28-day rolling) | +| p50 latency | 20ms | +| p95 latency | 80ms | +| p99 latency | 200ms | +| Error budget (28d) | ~40 min | + +## Burn-rate alerts +- Fast (5m/1h): consuming 14d budget in 1h -> page +- Slow (6h/1d): consuming 7d budget in 6h -> ticket + +## Measurement +- Source: Prometheus `pantera_http_request_duration_seconds{repo="pypi-proxy"}` +- Window: 28-day rolling diff --git a/docs/user-guide/error-reference.md b/docs/user-guide/error-reference.md new file mode 100644 index 000000000..1883a8319 --- /dev/null +++ b/docs/user-guide/error-reference.md @@ -0,0 +1,80 @@ +# Error Reference + +> **Guide:** User Guide | **Section:** Error Reference + +This page explains the 5xx responses Pantera can return and how clients should react to each. Every 5xx carries an `X-Pantera-Fault: ` header; match the tag against the sections below. + +For auth-related 4xx, see [Troubleshooting](troubleshooting.md). + +--- + +## `500` with `X-Pantera-Fault: index-unavailable` + +**What it means:** Pantera's artifact-index executor is saturated. The request arrived at a moment when the DB-backed index queue was full. + +**Client action:** Retry with exponential backoff. This condition is transient by design -- the whole point of returning a typed 500 here is that the backend has shed load rather than letting the queue block. A simple retry with 100 ms -> 200 ms -> 400 ms backoff will almost always succeed. + +**Persistent failure:** If the 500 rate stays elevated across multiple retry windows, it's an operational incident on the Pantera side -- your administrator should see the signal. + +--- + +## `500` with `X-Pantera-Fault: storage-unavailable` + +**What it means:** Pantera's storage backend (S3, filesystem, or OpenStack Swift) refused a read or write. + +**Client action:** Retry with backoff. Most occurrences are transient (a momentary S3 5xx, a brief I/O spike). If the failure persists across several minutes of retries, it is a storage-layer outage and client-side retry will not recover it. + +--- + +## `502` with `X-Pantera-Fault: upstream-integrity:` + +**What it means:** The upstream registry advertised a digest (`` is one of `md5`, `sha1`, `sha256`, `sha512`) for the artifact you requested, but the bytes upstream returned did not match that digest. Pantera refuses to cache a drifted primary/sidecar pair -- nothing was written to our cache. + +**Client action:** Retry. The next fetch will re-pull from upstream and may succeed if the upstream served transient drift (common with CDN-fronted registries). If the failure is sticky, the upstream registry itself has a drift problem; report to that registry's operator. + +**Why this matters to you:** A cached drifted artifact would later produce a `ChecksumFailureException` in your Maven / Gradle / pip client. The 502 here is strictly better than that downstream failure -- you are never served bytes that failed digest verification. + +--- + +## `5xx` with `X-Pantera-Fault: all-proxies-failed` (behavior change in v2.2.0) + +**What it means:** Every member of a group or proxy repository was attempted and none produced a success. + +**v2.2.0 change:** Pantera now **passes through the winning proxy's actual response**. Previously this was always synthesized as `502`; now you may see the real `503` / `504` / `500` that the upstream returned. The response includes: + +- `X-Pantera-Fault: all-proxies-failed` +- `X-Pantera-Proxies-Tried: ` (integer: how many members were attempted) + +**Client action:** Retry per your client's normal 5xx policy. The pass-through gives your client more signal than the synthesized 502 did -- a `504` from an upstream, for instance, now reaches you as a `504` rather than being flattened to `502`. + +--- + +## `503` with `X-Pantera-Fault: overload:` + +**What it means:** A named internal resource (thread pool, semaphore, queue) refused to admit your request because it was at capacity. The `` suffix identifies which one. + +**Client action:** Retry with backoff. Standard Retry-After-style semantics apply. + +--- + +## `504` with `X-Pantera-Fault: deadline-exceeded` + +**What it means:** The request's end-to-end deadline expired before Pantera could produce a response. The deadline is set at request entry (default 30 s) and threaded through every hop. + +**Client action:** Retry. If your workload legitimately needs longer than 30 s, coordinate with your administrator -- the deadline is an operator-configurable limit. + +--- + +## `500` with `X-Pantera-Fault: internal` + +**What it means:** Catch-all for anything the fault classifier could not type more specifically. Every `internal` fault is logged on the server side with a full stack trace. + +**Client action:** Retry once; if it recurs, escalate to your administrator with the request's trace ID (carried in the `X-Request-Id` / `trace.id` field). + +--- + +## Related Pages + +- [Response Headers](response-headers.md) -- All custom Pantera response headers. +- [Streaming Downloads](streaming-downloads.md) -- Cancel behavior. +- [Troubleshooting](troubleshooting.md) -- Auth and "not found" issues. diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md index cc5d61dcc..3de7bcba3 100644 --- a/docs/user-guide/index.md +++ b/docs/user-guide/index.md @@ -17,6 +17,7 @@ Welcome to the Pantera Artifact Registry User Guide. This guide covers everythin Step-by-step instructions for configuring your client and working with each package format: - [Maven](repositories/maven.md) -- Pull dependencies, deploy artifacts, configure `settings.xml`. +- [Gradle](repositories/gradle.md) -- Resolve dependencies and publish artifacts; aliases to the Maven family. - [npm](repositories/npm.md) -- Install packages, publish packages, configure `.npmrc`. - [Docker](repositories/docker.md) -- Pull images, push images, configure Docker daemon. - [PyPI](repositories/pypi.md) -- Install packages with pip, upload with twine, configure `pip.conf`. diff --git a/docs/user-guide/repositories/go.md b/docs/user-guide/repositories/go.md index 6e833bc9a..b9b87b78c 100644 --- a/docs/user-guide/repositories/go.md +++ b/docs/user-guide/repositories/go.md @@ -69,6 +69,57 @@ The proxy caches downloaded modules locally. Subsequent fetches from any develop --- +## Go Proxy (`go-proxy`) + +A `go-proxy` repository caches modules from an upstream Go module proxy (typically `https://proxy.golang.org`) on first request, then serves subsequent requests from the local cache. Cached bytes survive upstream outages and are shared across all clients pointing at the same Pantera host. + +**When to use** + +- Teams that want a shared module cache to reduce egress and speed up CI. +- Air-gapped or rate-limited environments that need a reliable mirror of `proxy.golang.org`. +- Any Go development where reproducible, auditable dependency resolution matters. + +**Minimal YAML** + +```yaml +# go-proxy.yaml +repo: + type: go-proxy + storage: + type: fs + path: /var/pantera/data + remotes: + - url: https://proxy.golang.org +``` + +Point `GOPROXY` at the proxy URL (see [Configure GOPROXY](#configure-goproxy) above). See [Cooldown](../cooldown.md) for controls over newly published upstream versions, and the [Management UI guide](../ui-guide.md#creating-repositories) for admin workflows. + +--- + +## Go Group (`go-group`) + +A `go-group` repository is a virtual repository that fans out requests across a list of member repositories (`go` locals and `go-proxy` proxies) in resolution order. The first member that serves the module wins. Groups do not store artifacts themselves β€” they delegate to members. + +**When to use** + +- You want developers to publish internal Go modules to a `go` local while still resolving public modules through a `go-proxy` in the same URL. +- You want to switch upstream proxies (e.g., primary and fallback) without reconfiguring every client. + +**Minimal YAML** + +```yaml +# go-group.yaml +repo: + type: go-group + members: + - go-local + - go-proxy +``` + +Clients set `GOPROXY` to the group URL (`http://pantera-host:8080/go-group`); Pantera handles fan-out. See the [Management UI guide](../ui-guide.md#adding-members-to-a-group-repository) for how to add, reorder, and create members from the web interface. + +--- + ## Common Issues | Symptom | Cause | Fix | diff --git a/docs/user-guide/repositories/gradle.md b/docs/user-guide/repositories/gradle.md new file mode 100644 index 000000000..88da1601f --- /dev/null +++ b/docs/user-guide/repositories/gradle.md @@ -0,0 +1,179 @@ +# Gradle + +> **Guide:** User Guide | **Section:** Repositories / Gradle + +This page covers how to configure Gradle to resolve dependencies from and publish artifacts to Pantera. Gradle uses the Maven repository format on the wire, so Pantera treats `gradle`, `gradle-proxy`, and `gradle-group` as an alias family over the same Maven resolution paths. + +--- + +## Prerequisites + +- Gradle 7.x or 8.x (Kotlin DSL or Groovy DSL) +- A Pantera account with a JWT token (see [Getting Started](../getting-started.md)) +- The Pantera hostname and port (default: `pantera-host:8080`) + +--- + +## When to Use + +Use the `gradle` family of repo types when you want a logical separation between Gradle-centric repositories and plain Maven repositories in the Pantera UI and API. Functionally, the wire protocol is identical: + +- A `gradle` local accepts the same PUT/GET requests as a `maven` local. +- A `gradle-proxy` caches an upstream Maven-format registry (e.g., Maven Central, Gradle Plugin Portal) the same way a `maven-proxy` does. +- A `gradle-group` fans out across `gradle` and `gradle-proxy` members with the same resolution-order semantics as `maven-group`. + +This aliasing is built into the backend: groups and proxies for both families share the Maven adapter, cooldown bundle, and metadata regeneration logic. If you already have a `maven-group` you are happy with, you do not need a separate `gradle-group` β€” point your Gradle build at the `maven-group` URL. + +Reach for a dedicated `gradle-*` repo when you want to: + +- Segregate plugin portal mirrors (`gradle-proxy` pointing at `https://plugins.gradle.org/m2/`) from library mirrors (`maven-proxy` pointing at Maven Central). +- Keep internal Gradle convention plugins in a distinct `gradle` local, separate from application JARs published to a `maven` local. + +--- + +## Configure Your Client + +### settings.gradle.kts (Kotlin DSL) + +```kotlin +dependencyResolutionManagement { + repositories { + maven { + name = "pantera" + url = uri("http://pantera-host:8080/gradle-group") + credentials { + username = "your-username" + password = "your-jwt-token-here" + } + isAllowInsecureProtocol = true // only if not using HTTPS + } + } +} +``` + +### build.gradle (Groovy DSL) + +```groovy +repositories { + maven { + name = 'pantera' + url = 'http://pantera-host:8080/gradle-group' + credentials { + username = 'your-username' + password = 'your-jwt-token-here' + } + allowInsecureProtocol = true // only if not using HTTPS + } +} +``` + +### Publishing + +Configure the `maven-publish` plugin to push to a `gradle` or `maven` local: + +```kotlin +plugins { + `maven-publish` +} + +publishing { + repositories { + maven { + name = "pantera" + url = uri("http://pantera-host:8080/gradle-local") + credentials { + username = "your-username" + password = "your-jwt-token-here" + } + } + } + publications { + create("library") { + from(components["java"]) + } + } +} +``` + +Publish with: + +```bash +./gradlew publish +``` + +--- + +## Minimal YAML Configuration + +### Gradle Local (`gradle`) + +Stores artifacts published by your Gradle builds. Behaves identically to a `maven` local on the wire. + +```yaml +# gradle-local.yaml +repo: + type: gradle + storage: + type: fs + path: /var/pantera/data +``` + +### Gradle Proxy (`gradle-proxy`) + +Caches an upstream Maven-format repository (e.g., Maven Central, Gradle Plugin Portal) on first request. + +```yaml +# gradle-proxy.yaml +repo: + type: gradle-proxy + storage: + type: fs + path: /var/pantera/data + remotes: + - url: https://plugins.gradle.org/m2/ +``` + +### Gradle Group (`gradle-group`) + +Virtual repository that fans out across members in resolution order. The first member that serves the artifact wins. + +```yaml +# gradle-group.yaml +repo: + type: gradle-group + members: + - gradle-local + - gradle-proxy +``` + +See the [Management UI guide](../ui-guide.md#adding-members-to-a-group-repository) for how to add, reorder, and create members from the web interface. + +--- + +## Known Limitations + +- **URL routing aliasing:** Gradle shares the Maven URL space in the REST API β€” when addressing repositories by format in `/api//` paths, Gradle repos are reachable under `maven` routing. This is intentional (`gradle` appears in the `LIMITED_SUPPORT` set alongside `maven` and `rpm`), but means you cannot disambiguate a Gradle repo from a Maven repo via format alone in those endpoints. Use the repository name directly when in doubt. +- **Cooldown adapter reuse:** `gradle` and `gradle-proxy` use the same cooldown response factory and Maven bundle as `maven` and `maven-proxy`. Cooldown rules you configure for one cover the other semantically (same metadata shape, same checksum sidecars). +- **Metadata regeneration:** Imports into `gradle` repos run through the Maven metadata regenerator (the `case "maven", "gradle"` branch in `MetadataRegenerator`) β€” `maven-metadata.xml` is produced as you would expect from a Maven repo. +- **No Gradle-specific extensions:** Pantera does not currently expose Gradle-specific features such as Gradle Module Metadata variant matching beyond what standard Maven clients consume. If your build depends on `.module` files, verify they round-trip through the proxy before relying on variant resolution. + +--- + +## Common Issues + +| Symptom | Cause | Fix | +|---------|-------|-----| +| `401 Unauthorized` during resolve | Missing or expired JWT token | Regenerate the token and update your credentials | +| `Could not GET '...gradle-group/...'` with `403` | User lacks `read` permission on the group or one of its members | Contact your administrator to grant access | +| `Received status code 405` on publish | Publishing to a proxy or group repository | Publish only to a **local** repository (`gradle` or `maven`) | +| Gradle Plugin Portal plugins not resolving | Proxy points at Maven Central, not the plugin portal | Create a second `gradle-proxy` pointing at `https://plugins.gradle.org/m2/` and add it to the group | +| Stale `maven-metadata.xml` after import | Metadata regeneration has not run | Ask admin to trigger regeneration or re-run the import with metadata options | + +--- + +## Related Pages + +- [Maven](maven.md) -- Full Maven guide; everything there applies to Gradle over the wire +- [Getting Started](../getting-started.md) -- Obtaining JWT tokens +- [Management UI](../ui-guide.md) -- Creating repos and managing group members visually +- [Troubleshooting](../troubleshooting.md) -- Common error resolution diff --git a/docs/user-guide/response-headers.md b/docs/user-guide/response-headers.md new file mode 100644 index 000000000..77e7fd15e --- /dev/null +++ b/docs/user-guide/response-headers.md @@ -0,0 +1,60 @@ +# Response Headers + +> **Guide:** User Guide | **Section:** Response Headers + +Pantera emits a small set of custom response headers to give clients and on-call engineers structured diagnostic signal. This page is the authoritative list. + +All Pantera custom headers are namespaced under `X-Pantera-*`. + +--- + +## `X-Pantera-Fault: ` + +Present on every 5xx response Pantera generates. The tag identifies the specific fault variant so automated tools (including retry policies) can branch on the cause without parsing bodies. + +Current tags: + +| Tag | Status | Meaning | +|---|---|---| +| `index-unavailable` | `500` | The artifact index executor is saturated; transient. Retry with backoff. | +| `storage-unavailable` | `500` | Storage backend read/write failed. May be transient; persistent = operator attention needed. | +| `upstream-integrity:` | `502` | Upstream primary/sidecar digest mismatch under the named algorithm. Nothing was cached. | +| `all-proxies-failed` | 5xx pass-through | Every proxy member failed. Paired with `X-Pantera-Proxies-Tried`. | +| `deadline-exceeded` | `504` | Request-level deadline expired before a response was produced. | +| `overload:` | `503` | Named bounded queue or semaphore refused admission. | +| `internal` | `500` | Catch-all; classifier fallback. | + +See [Error Reference](error-reference.md) for the full client-facing explanation of each tag. + +--- + +## `X-Pantera-Proxies-Tried: ` + +Present on `all-proxies-failed` responses. Integer count of group/proxy members Pantera attempted before giving up. Useful for distinguishing "single upstream down" (`n=1`) from "fanout-wide outage" (`n=3+`). + +--- + +## `X-Pantera-Stale: true` + +Present on responses served from the stale-while-revalidate fallback tier. The body is known-good but not freshly revalidated. No client action is required; this header is purely advisory for caches / monitoring. + +Stale-served responses today come from: + +- `GroupMetadataCache` stale fallback (group repository metadata during partial upstream outage). +- `FilteredMetadataCache` SWR grace period (cooldown metadata re-evaluation). + +--- + +## `X-Pantera-Internal: true` + +Server-side-only marker on log events. This header is **not** emitted in client responses -- it tags internal Pantera-to-Pantera calls in the ECS access log so operators can filter them out of client-facing dashboards. + +If you ever see this header reach a client, that is a bug; please report it. + +--- + +## Related Pages + +- [Error Reference](error-reference.md) -- What each `X-Pantera-Fault` tag means for your client. +- [Streaming Downloads](streaming-downloads.md) -- How client disconnects propagate. +- [Troubleshooting](troubleshooting.md) -- Common client-side issues. diff --git a/docs/user-guide/streaming-downloads.md b/docs/user-guide/streaming-downloads.md new file mode 100644 index 000000000..21f935f98 --- /dev/null +++ b/docs/user-guide/streaming-downloads.md @@ -0,0 +1,42 @@ +# Streaming Downloads + +> **Guide:** User Guide | **Section:** Streaming Downloads + +This page describes how Pantera handles mid-download client disconnects. As of v2.2.0, the behavior is transparent and requires no client-side configuration -- it is documented here for operators and advanced users who want to understand what happens. + +--- + +## Client Disconnect Propagation + +When your HTTP client closes the connection mid-response (Ctrl+C, timeout, network hiccup), Pantera propagates the cancel signal end-to-end: + +1. Pantera's HTTP server observes the close on the client socket. +2. The in-flight response pipeline is cancelled. +3. If the response was being streamed from an upstream registry (Maven Central, npm registry, etc.), Pantera also cancels the upstream fetch. +4. Any temporary files or open streams created for the request are released. + +**No client-side action is needed.** Standard clients (curl, Maven, npm, pip, etc.) all signal the disconnect through normal TCP close; Pantera picks it up from there. + +--- + +## Why This Matters + +Before v2.2.0, a mid-download disconnect caused Pantera to keep pulling bytes from the upstream into a dead socket until the upstream organically errored or completed. That behavior wasted upstream bandwidth, held file descriptors on the Pantera side, and -- for very large artifacts -- could hold significant RAM. + +Cancelling clients used to cost Pantera real resources; now they cost nothing. + +--- + +## Edge Cases + +- **Partial-download caches.** If a cache-miss download is cancelled mid-write, the temp file is deleted and the cache slot remains empty. The next client request will trigger a fresh upstream fetch. No partial artifact is ever committed to the cache. +- **Shared in-flight fetches.** When multiple clients request the same artifact at the same time, Pantera coalesces them into a single upstream fetch (see `SingleFlight`). A single client cancelling does NOT cancel the coalesced fetch -- the remaining clients still receive the response. Only when every subscribed client cancels does the upstream fetch get cancelled. +- **HTTP/3 large uploads.** Pantera enforces a per-stream buffer cap on HTTP/3 (default 16 MiB). Uploads exceeding that cap are rejected; see [Admin: Environment Variables](../../admin-guide/environment-variables.md#http-3) for the tunable. + +--- + +## Related Pages + +- [Response Headers](response-headers.md) -- Pantera's custom response headers. +- [Error Reference](error-reference.md) -- 5xx fault tags. +- [Troubleshooting](troubleshooting.md) -- Client-side issue catalogue. diff --git a/docs/user-guide/ui-guide.md b/docs/user-guide/ui-guide.md index 6f868f619..cb78db785 100644 --- a/docs/user-guide/ui-guide.md +++ b/docs/user-guide/ui-guide.md @@ -186,6 +186,73 @@ Admin panels appear in the sidebar under **Administration** only if you have the If you do not see the Administration section, you have read-only access. Contact your administrator for elevated permissions. +### Creating Repositories + +The **Create Repository** page (`/admin/repositories/create`) allows administrators to create new repositories. The **Type** dropdown lists all supported repository formats: + +- **Maven**, **Gradle**, **Docker**, **npm**, **PyPI**, **Go**, **Helm**, **NuGet**, **Debian**, **RPM**, **Conda**, **RubyGems**, **Conan**, **Hex**, **PHP**, **File**, **Binary** + +Each format supports Local, Proxy, and/or Group variants where applicable. For example, Go supports Local, Proxy, and Group; Gradle supports all three variants. + +### Configuring Group Members + +When creating or editing a **Group** repository (e.g., `maven-group`), the **Group Members** section provides: + +- **AutoComplete dropdown**: Type to search existing repositories that are compatible with the group type. For a `maven-group`, only `maven` (local) and `maven-proxy` repositories are shown. Each suggestion displays the repository name and type badge. +- **Reordering**: Use the up/down arrow buttons to set resolution priority. The first matching member wins. +- **Create new member**: Click **Create new** to open an inline dialog that creates a new compatible repository and immediately adds it to the member list. + +--- + +## Adding Members to a Group Repository + +Group repositories aggregate one or more local and/or proxy repositories into a single virtual URL. When you create or edit a group (for example, `maven-group`, `npm-group`, `go-group`, `docker-group`), the **Group Members** panel on the repository configuration page is where you select which repos belong to the group and in what order. + +### The Member Picker (AutoComplete) + +Each member row uses a PrimeVue **AutoComplete** control that is pre-filtered to show only repositories compatible with the group type: + +- Type any portion of a repository name to filter suggestions live. +- Each suggestion shows the repository name and a small type badge so you can tell locals from proxies at a glance. +- Repos that are already in the member list are hidden from the suggestion set, so you cannot add the same repo twice. +- Selecting a suggestion populates the row with the repo name immediately β€” no cross-view navigation required. + +### Type Compatibility Rule + +The picker computes compatible member types from the group type by stripping the `-group` suffix and admitting the base type plus its `-proxy` variant: + +| Group type | Compatible member types | +|------------|-------------------------| +| `maven-group` | `maven`, `maven-proxy` | +| `npm-group` | `npm`, `npm-proxy` | +| `docker-group` | `docker`, `docker-proxy` | +| `pypi-group` | `pypi`, `pypi-proxy` | +| `go-group` | `go`, `go-proxy` | +| `gradle-group` | `gradle`, `gradle-proxy` | +| `php-group` | `php`, `php-proxy` | +| `file-group` | `file`, `file-proxy` | + +Only repositories whose `type` is one of those values appear in the picker. A `docker-group` will never offer an `npm-proxy` as a candidate, for example. + +### Reordering Members + +Use the up and down arrow buttons on each row to change resolution priority. When a client requests an artifact, Pantera queries members in the listed order and returns the first match β€” so heavily used locals or fast proxies belong at the top. + +### Create New Member Inline + +If the repo you need doesn't exist yet, click **Create new** next to **Add member**. A modal opens with: + +- A **Type** dropdown restricted to the same compatibility set described above. +- A **Name** field for the new repository. + +Submitting the modal creates the repository through the standard admin API and adds it to the member list in a single step β€” you never leave the group editor, and the compatible-repos list is refreshed automatically so the new repo is immediately eligible for further edits. + +**Note on storage defaults:** the inline create modal currently defaults the new repository to filesystem storage (`storage.type: fs`). Proxy variants also require a remote URL, which the inline modal does not collect β€” for a proxy member, you will typically create the repository via the full **Create Repository** page (where you can configure `remotes`, authentication, and cooldown) and then add it to the group using the AutoComplete picker. + +### Implementation Reference + +The group-member picker is implemented in `pantera-ui/src/components/admin/RepoConfigForm.vue` at lines 558-576 (the `AutoComplete` element), with the compatibility rule at `compatibleTypes()` (line 98) and the inline-create handler at `createMemberRepo()` (line 135). + --- ## Keyboard and Navigation Tips diff --git a/files-adapter/pom.xml b/files-adapter/pom.xml index d2965bbd6..03e290786 100644 --- a/files-adapter/pom.xml +++ b/files-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 files-adapter - 2.1.3 + 2.2.0 jar files-adapter A simple adapter for storing files @@ -42,23 +42,23 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 com.auto1.pantera http-client - 2.1.3 + 2.2.0 compile com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile diff --git a/files-adapter/src/main/java/com/auto1/pantera/files/FileProxySlice.java b/files-adapter/src/main/java/com/auto1/pantera/files/FileProxySlice.java index ee0f3ba23..05d8667c5 100644 --- a/files-adapter/src/main/java/com/auto1/pantera/files/FileProxySlice.java +++ b/files-adapter/src/main/java/com/auto1/pantera/files/FileProxySlice.java @@ -17,9 +17,9 @@ import com.auto1.pantera.asto.cache.StreamThroughCache; import com.auto1.pantera.asto.cache.FromStorageCache; import com.auto1.pantera.asto.cache.Remote; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResponses; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.response.CooldownResponseRegistry; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.ResponseBuilder; import com.auto1.pantera.http.Response; @@ -101,7 +101,7 @@ public final class FileProxySlice implements Slice { */ public FileProxySlice(final ClientSlices clients, final URI remote) { this(new UriClientSlice(clients, remote), Cache.NOP, Optional.empty(), FilesSlice.ANY_REPO, - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, "unknown", Optional.empty()); + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, "unknown", Optional.empty()); } /** @@ -116,7 +116,7 @@ public FileProxySlice(final ClientSlices clients, final URI remote, this( new AuthClientSlice(new UriClientSlice(clients, remote), auth), new StreamThroughCache(asto), Optional.empty(), FilesSlice.ANY_REPO, - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, remote.toString(), Optional.of(asto) + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, remote.toString(), Optional.of(asto) ); } @@ -133,7 +133,7 @@ public FileProxySlice(final ClientSlices clients, final URI remote, final Storag this( new AuthClientSlice(new UriClientSlice(clients, remote), Authenticator.ANONYMOUS), new StreamThroughCache(asto), Optional.of(events), rname, - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, remote.toString(), Optional.of(asto) + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, remote.toString(), Optional.of(asto) ); } @@ -143,7 +143,7 @@ public FileProxySlice(final ClientSlices clients, final URI remote, final Storag */ FileProxySlice(final Slice remote, final Cache cache) { this(remote, cache, Optional.empty(), FilesSlice.ANY_REPO, - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, "unknown", Optional.empty()); + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, "unknown", Optional.empty()); } /** @@ -288,7 +288,9 @@ private CompletableFuture evaluateCooldownAndFetch( .thenCompose(result -> { if (result.blocked()) { return java.util.concurrent.CompletableFuture.completedFuture( - CooldownResponses.forbidden(result.block().orElseThrow()) + CooldownResponseRegistry.instance() + .getOrThrow(FileProxySlice.REPO_TYPE) + .forbidden(result.block().orElseThrow()) ); } final long startTime = System.currentTimeMillis(); diff --git a/files-adapter/src/main/java/com/auto1/pantera/files/FilesCooldownInspector.java b/files-adapter/src/main/java/com/auto1/pantera/files/FilesCooldownInspector.java index 6ed41b0d6..79bf8b410 100644 --- a/files-adapter/src/main/java/com/auto1/pantera/files/FilesCooldownInspector.java +++ b/files-adapter/src/main/java/com/auto1/pantera/files/FilesCooldownInspector.java @@ -10,8 +10,8 @@ */ package com.auto1.pantera.files; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.headers.Header; diff --git a/gem-adapter/pom.xml b/gem-adapter/pom.xml index 2688ddd03..9f94a3d07 100644 --- a/gem-adapter/pom.xml +++ b/gem-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 gem-adapter - 2.1.3 + 2.2.0 gem-adapter A Pantera adapter for Ruby Gem packages 2020 @@ -41,12 +41,12 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -106,7 +106,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/go-adapter/pom.xml b/go-adapter/pom.xml index a17a65010..9784e302d 100644 --- a/go-adapter/pom.xml +++ b/go-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 go-adapter - 2.1.3 + 2.2.0 jar goproxy Turns your files/objects into Go repository @@ -42,12 +42,12 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -60,13 +60,13 @@ SOFTWARE. com.auto1.pantera http-client - 2.1.3 + 2.2.0 compile com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test @@ -103,7 +103,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/helm-adapter/src/main/java/com/auto1/pantera/helm/http/DeleteChartSlice.java b/helm-adapter/src/main/java/com/auto1/pantera/helm/http/DeleteChartSlice.java index a72c16302..7a02bb716 100644 --- a/helm-adapter/src/main/java/com/auto1/pantera/helm/http/DeleteChartSlice.java +++ b/helm-adapter/src/main/java/com/auto1/pantera/helm/http/DeleteChartSlice.java @@ -132,7 +132,7 @@ private Single deleteArchives(final String name, final Optional { if (wasdeleted.get()) { this.events.ifPresent( - queue -> queue.add( + queue -> queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) vers.map( item -> new ArtifactEvent( PushChartSlice.REPO_TYPE, this.repoName, name, item diff --git a/helm-adapter/src/main/java/com/auto1/pantera/helm/http/PushChartSlice.java b/helm-adapter/src/main/java/com/auto1/pantera/helm/http/PushChartSlice.java index 7308bb137..5b1e02713 100644 --- a/helm-adapter/src/main/java/com/auto1/pantera/helm/http/PushChartSlice.java +++ b/helm-adapter/src/main/java/com/auto1/pantera/helm/http/PushChartSlice.java @@ -100,7 +100,7 @@ public CompletableFuture response( if (upd.isEmpty() || "true".equals(upd.get())) { res = new IndexYaml(this.storage).update(tgz); this.events.ifPresent( - queue -> queue.add( + queue -> queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent( PushChartSlice.REPO_TYPE, this.rname, new Login(headers).getValue(), diff --git a/hexpm-adapter/pom.xml b/hexpm-adapter/pom.xml index 1a6a52292..996290518 100644 --- a/hexpm-adapter/pom.xml +++ b/hexpm-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 hexpm-adapter - 2.1.3 + 2.2.0 hexpm-adapter A Pantera adapter for Erlang/Elixir packages https://github.com/auto1-oss/pantera/tree/master/hexpm-adapter @@ -42,7 +42,7 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.google.protobuf @@ -52,7 +52,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/hexpm-adapter/src/main/java/com/auto1/pantera/hex/http/UploadSlice.java b/hexpm-adapter/src/main/java/com/auto1/pantera/hex/http/UploadSlice.java index 649a55a34..525a78bf5 100644 --- a/hexpm-adapter/src/main/java/com/auto1/pantera/hex/http/UploadSlice.java +++ b/hexpm-adapter/src/main/java/com/auto1/pantera/hex/http/UploadSlice.java @@ -161,7 +161,7 @@ public CompletableFuture response( .header(new ContentLength(0)) .build(); this.events.ifPresent( - queue -> queue.add( + queue -> queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent( UploadSlice.REPO_TYPE, this.rname, new Login(headers).getValue(), diff --git a/http-client/pom.xml b/http-client/pom.xml index 43866e85b..3f832a68c 100644 --- a/http-client/pom.xml +++ b/http-client/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 http-client - 2.1.3 + 2.2.0 Pantera HTTP client https://github.com/auto1-oss/pantera/tree/master/http-client @@ -40,7 +40,7 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 @@ -101,7 +101,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/http-client/src/main/java/com/auto1/pantera/http/client/jetty/JettyClientSlice.java b/http-client/src/main/java/com/auto1/pantera/http/client/jetty/JettyClientSlice.java index 1f21f846f..26905500d 100644 --- a/http-client/src/main/java/com/auto1/pantera/http/client/jetty/JettyClientSlice.java +++ b/http-client/src/main/java/com/auto1/pantera/http/client/jetty/JettyClientSlice.java @@ -181,16 +181,31 @@ public CompletableFuture response( // (edge case: content source callback fired but no chunks) processor.onComplete(); } else { - EcsLogger.error("com.auto1.pantera.http.client") - .message("HTTP request failed") - .eventCategory("web") - .eventAction("http_request_send") - .eventOutcome("failure") - .error(result.getFailure()) - .log(); + final Throwable failure = result.getFailure(); + // Idle-close is a normal connection-lifecycle event + // (Jetty HTTP client 30s idle timeout firing on an + // otherwise-healthy upstream). Downgrade to DEBUG so + // it stops counting as a request failure in the logs + // (v2.1.4 WI-00, forensic Β§1.7 F4.4). + if (isIdleTimeout(failure)) { + EcsLogger.debug("com.auto1.pantera.http.client") + .message("HTTP client connection closed by idle timeout") + .eventCategory("web") + .eventAction("http_idle_close") + .error(failure) + .log(); + } else { + EcsLogger.error("com.auto1.pantera.http.client") + .message("HTTP request failed") + .eventCategory("web") + .eventAction("http_request_send") + .eventOutcome("failure") + .error(failure) + .log(); + } // Complete processor with error so subscribers don't hang - processor.onError(result.getFailure()); - res.completeExceptionally(result.getFailure()); + processor.onError(failure); + res.completeExceptionally(failure); } } ); @@ -428,4 +443,32 @@ private static ByteBuffer copyChunk(final Content.Chunk chunk) { copy.flip(); return copy; } + + /** + * Return {@code true} iff the failure is Jetty's "Idle timeout expired: + * N/N ms" (a {@link TimeoutException} emitted when a connection goes + * idle and the 30s Jetty-client idle timeout fires). This is a normal + * connection-lifecycle signal, not a request failure, and callers log + * it at DEBUG rather than ERROR. + * + * @param failure The throwable from {@code result.getFailure()} + * @return {@code true} if this is an idle-timeout close + */ + private static boolean isIdleTimeout(final Throwable failure) { + if (failure == null) { + return false; + } + Throwable cursor = failure; + // Walk the cause chain β€” Jetty may wrap the TimeoutException + for (int hops = 0; cursor != null && hops < 5; hops = hops + 1) { + if (cursor instanceof TimeoutException) { + final String msg = cursor.getMessage(); + if (msg != null && msg.contains("Idle timeout expired")) { + return true; + } + } + cursor = cursor.getCause(); + } + return false; + } } diff --git a/maven-adapter/pom.xml b/maven-adapter/pom.xml index 000bcaa6d..dc1986b26 100644 --- a/maven-adapter/pom.xml +++ b/maven-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 maven-adapter - 2.1.3 + 2.2.0 maven-adapter UTF-8 @@ -40,7 +40,7 @@ SOFTWARE. com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -53,7 +53,7 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.jcabi.incubator @@ -73,7 +73,7 @@ SOFTWARE. com.auto1.pantera http-client - 2.1.3 + 2.2.0 compile @@ -93,7 +93,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenCooldownResponseFactory.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenCooldownResponseFactory.java new file mode 100644 index 000000000..815bf510a --- /dev/null +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenCooldownResponseFactory.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.response.CooldownResponseFactory; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; + +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +/** + * Maven-specific cooldown 403 response factory. + * + *

Returns {@code text/plain} body with a human-readable blocked message + * including the ISO-8601 unblock timestamp.

+ * + * @since 2.2.0 + */ +public final class MavenCooldownResponseFactory implements CooldownResponseFactory { + + private static final DateTimeFormatter ISO = DateTimeFormatter.ISO_OFFSET_DATE_TIME; + + @Override + public Response forbidden(final CooldownBlock block) { + final String until = ISO.format( + block.blockedUntil().atOffset(ZoneOffset.UTC) + ); + final long retryAfter = Math.max( + 1L, + Duration.between(Instant.now(), block.blockedUntil()).getSeconds() + ); + return ResponseBuilder.forbidden() + .header("Retry-After", String.valueOf(retryAfter)) + .header("X-Pantera-Cooldown", "blocked") + .textBody("Artifact blocked by cooldown policy. Blocked until: " + until) + .build(); + } + + @Override + public String repoType() { + return "maven"; + } +} diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataFilter.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataFilter.java new file mode 100644 index 000000000..371db2f32 --- /dev/null +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataFilter.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; + +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Set; + +/** + * Maven metadata filter implementing cooldown SPI. + * Removes blocked versions from {@code maven-metadata.xml}. + * + *

Filters the following elements:

+ *
    + *
  • {@code } - removes blocked version nodes
  • + *
  • {@code } - updated when current latest is blocked
  • + *
  • {@code } - set to current timestamp on modification
  • + *
+ * + * @since 2.2.0 + */ +public final class MavenMetadataFilter implements MetadataFilter { + + /** + * Maven metadata timestamp format: yyyyMMddHHmmss. + */ + private static final DateTimeFormatter MAVEN_TS = + DateTimeFormatter.ofPattern("yyyyMMddHHmmss"); + + @Override + public Document filter( + final Document metadata, final Set blockedVersions + ) { + if (blockedVersions.isEmpty()) { + return metadata; + } + final NodeList versionNodes = metadata.getElementsByTagName("version"); + for (int idx = versionNodes.getLength() - 1; idx >= 0; idx--) { + final Node node = versionNodes.item(idx); + final String text = node.getTextContent(); + if (text != null && blockedVersions.contains(text.trim())) { + node.getParentNode().removeChild(node); + } + } + return metadata; + } + + @Override + public Document updateLatest( + final Document metadata, final String newLatest + ) { + MavenMetadataFilter.setElementText(metadata, "latest", newLatest); + final String now = ZonedDateTime.now(ZoneOffset.UTC).format(MAVEN_TS); + MavenMetadataFilter.setElementText(metadata, "lastUpdated", now); + return metadata; + } + + /** + * Set the text content of the first element with the given tag name. + * Creates the element under {@code } if it does not exist. + * + * @param doc Document to modify + * @param tag Element tag name + * @param value New text content + */ + private static void setElementText( + final Document doc, final String tag, final String value + ) { + final NodeList nodes = doc.getElementsByTagName(tag); + if (nodes.getLength() > 0) { + nodes.item(0).setTextContent(value); + } else { + final NodeList versioning = doc.getElementsByTagName("versioning"); + if (versioning.getLength() > 0) { + final Element elem = doc.createElement(tag); + elem.setTextContent(value); + versioning.item(0).appendChild(elem); + } + } + } +} diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataParser.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataParser.java new file mode 100644 index 000000000..8876826fe --- /dev/null +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataParser.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataParseException; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * Maven metadata parser implementing cooldown SPI. + * Parses {@code maven-metadata.xml} via DOM and extracts version information. + * + *

Maven metadata structure:

+ *
+ * <metadata>
+ *   <groupId>com.example</groupId>
+ *   <artifactId>my-lib</artifactId>
+ *   <versioning>
+ *     <latest>3.0.0</latest>
+ *     <release>3.0.0</release>
+ *     <versions>
+ *       <version>1.0.0</version>
+ *       <version>2.0.0</version>
+ *     </versions>
+ *     <lastUpdated>20260401120000</lastUpdated>
+ *   </versioning>
+ * </metadata>
+ * 
+ * + * @since 2.2.0 + */ +public final class MavenMetadataParser implements MetadataParser { + + /** + * Content type for Maven metadata. + */ + private static final String CONTENT_TYPE = "application/xml"; + + @Override + public Document parse(final byte[] bytes) { + try { + final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setFeature( + "http://apache.org/xml/features/disallow-doctype-decl", true + ); + final DocumentBuilder builder = factory.newDocumentBuilder(); + return builder.parse(new ByteArrayInputStream(bytes)); + } catch (final SAXException | IOException | ParserConfigurationException ex) { + throw new MetadataParseException( + "Failed to parse Maven metadata XML", ex + ); + } + } + + @Override + public List extractVersions(final Document metadata) { + final NodeList versionNodes = metadata.getElementsByTagName("version"); + if (versionNodes.getLength() == 0) { + return Collections.emptyList(); + } + final List result = new ArrayList<>(versionNodes.getLength()); + for (int idx = 0; idx < versionNodes.getLength(); idx++) { + final String text = versionNodes.item(idx).getTextContent(); + if (text != null && !text.isBlank()) { + result.add(text.trim()); + } + } + return result; + } + + @Override + public Optional getLatestVersion(final Document metadata) { + final NodeList latestNodes = metadata.getElementsByTagName("latest"); + if (latestNodes.getLength() > 0) { + final String text = latestNodes.item(0).getTextContent(); + if (text != null && !text.isBlank()) { + return Optional.of(text.trim()); + } + } + return Optional.empty(); + } + + @Override + public String contentType() { + return CONTENT_TYPE; + } + + @Override + public Map extractReleaseDates(final Document metadata) { + return Map.of(); + } +} diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataRequestDetector.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataRequestDetector.java new file mode 100644 index 000000000..dd3269d31 --- /dev/null +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataRequestDetector.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataRequestDetector; + +import java.util.Optional; + +/** + * Maven metadata request detector implementing cooldown SPI. + * Detects requests for {@code maven-metadata.xml} and extracts the + * package (groupId:artifactId) path from the URL. + * + *

Examples:

+ *
    + *
  • {@code /com/example/my-lib/maven-metadata.xml} - metadata request, + * package = {@code com/example/my-lib}
  • + *
  • {@code /com/example/my-lib/1.0.0/my-lib-1.0.0.jar} - artifact download, + * not a metadata request
  • + *
+ * + * @since 2.2.0 + */ +public final class MavenMetadataRequestDetector implements MetadataRequestDetector { + + /** + * The filename that identifies a Maven metadata request. + */ + private static final String METADATA_FILE = "maven-metadata.xml"; + + /** + * Repository type identifier. + */ + private static final String REPO_TYPE = "maven"; + + @Override + public boolean isMetadataRequest(final String path) { + return path != null && path.endsWith(METADATA_FILE); + } + + @Override + public Optional extractPackageName(final String path) { + if (!this.isMetadataRequest(path)) { + return Optional.empty(); + } + String stripped = path; + if (stripped.startsWith("/")) { + stripped = stripped.substring(1); + } + // Remove trailing "/maven-metadata.xml" or "maven-metadata.xml" + final int suffixLen = METADATA_FILE.length(); + if (stripped.length() <= suffixLen) { + return Optional.empty(); + } + // Strip the filename and the preceding slash + String packageName = stripped.substring( + 0, stripped.length() - suffixLen + ); + if (packageName.endsWith("/")) { + packageName = packageName.substring(0, packageName.length() - 1); + } + if (packageName.isEmpty()) { + return Optional.empty(); + } + return Optional.of(packageName); + } + + @Override + public String repoType() { + return REPO_TYPE; + } +} diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataRewriter.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataRewriter.java new file mode 100644 index 000000000..8c59ee2de --- /dev/null +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/MavenMetadataRewriter.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataRewriteException; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import org.w3c.dom.Document; + +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import java.io.ByteArrayOutputStream; + +/** + * Maven metadata rewriter implementing cooldown SPI. + * Serializes a filtered DOM {@link Document} back to XML bytes + * via {@link javax.xml.transform.Transformer}. + * + * @since 2.2.0 + */ +public final class MavenMetadataRewriter implements MetadataRewriter { + + /** + * Content type for Maven metadata. + */ + private static final String CONTENT_TYPE = "application/xml"; + + @Override + public byte[] rewrite(final Document metadata) { + try { + final TransformerFactory factory = TransformerFactory.newInstance(); + final Transformer transformer = factory.newTransformer(); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); + final ByteArrayOutputStream out = new ByteArrayOutputStream(); + transformer.transform( + new DOMSource(metadata), new StreamResult(out) + ); + return out.toByteArray(); + } catch (final TransformerException ex) { + throw new MetadataRewriteException( + "Failed to serialize Maven metadata to XML", ex + ); + } + } + + @Override + public String contentType() { + return CONTENT_TYPE; + } +} diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/package-info.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/package-info.java new file mode 100644 index 000000000..222ce8d4a --- /dev/null +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/cooldown/package-info.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +/** + * Maven cooldown metadata filtering implementation. + * + *

This package provides Maven-specific implementations of the cooldown metadata SPI:

+ *
    + *
  • {@link com.auto1.pantera.maven.cooldown.MavenMetadataParser} + * - Parses {@code maven-metadata.xml} via DOM
  • + *
  • {@link com.auto1.pantera.maven.cooldown.MavenMetadataFilter} + * - Filters blocked versions from metadata
  • + *
  • {@link com.auto1.pantera.maven.cooldown.MavenMetadataRewriter} + * - Serializes filtered metadata back to XML
  • + *
  • {@link com.auto1.pantera.maven.cooldown.MavenMetadataRequestDetector} + * - Detects {@code maven-metadata.xml} requests
  • + *
  • {@link com.auto1.pantera.maven.cooldown.MavenCooldownResponseFactory} + * - Builds 403 responses for blocked Maven artifacts
  • + *
+ * + * @since 2.2.0 + */ +package com.auto1.pantera.maven.cooldown; diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/http/CachedProxySlice.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/http/CachedProxySlice.java index cfecff18a..7511eddd5 100644 --- a/maven-adapter/src/main/java/com/auto1/pantera/maven/http/CachedProxySlice.java +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/http/CachedProxySlice.java @@ -14,9 +14,9 @@ import com.auto1.pantera.asto.Key; import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.cache.Cache; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; import com.auto1.pantera.http.ResponseBuilder; @@ -24,20 +24,32 @@ import com.auto1.pantera.http.cache.BaseCachedProxySlice; import com.auto1.pantera.http.cache.DigestComputer; import com.auto1.pantera.http.cache.ProxyCacheConfig; +import com.auto1.pantera.http.cache.ProxyCacheWriter; import com.auto1.pantera.http.cache.SidecarFile; +import com.auto1.pantera.http.context.RequestContext; +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.Fault.ChecksumAlgo; +import com.auto1.pantera.http.fault.Result; import com.auto1.pantera.http.headers.Login; +import com.auto1.pantera.http.log.EcsLogger; import com.auto1.pantera.http.rq.RequestLine; import com.auto1.pantera.scheduling.ProxyArtifactEvent; +import java.io.IOException; +import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.time.Instant; import java.util.ArrayList; import java.util.Collections; +import java.util.EnumMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Queue; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.function.Supplier; import java.util.regex.Matcher; /** @@ -57,11 +69,44 @@ @SuppressWarnings("PMD.ExcessiveImports") public final class CachedProxySlice extends BaseCachedProxySlice { + /** + * Primary artifact extensions that participate in the coupled + * primary+sidecar write path. The checksum sidecar paths themselves are + * still served by {@link ChecksumProxySlice} / standard cache flow. + */ + private static final List PRIMARY_EXTENSIONS = List.of( + ".pom", ".jar", ".war", ".aar", ".ear", ".zip", ".module" + ); + /** * Maven-specific metadata cache for maven-metadata.xml files. */ private final MetadataCache metadataCache; + /** + * Remote client slice, held here so {@link #preProcess} can fetch the + * primary + sidecars as a coupled batch via {@link ProxyCacheWriter}. + * A duplicate reference of {@code super.client()} is kept so we don't + * invoke a protected getter from an anonymous fetch supplier. + */ + private final Slice remote; + + /** + * Optional raw storage used by {@link ProxyCacheWriter} to land the + * primary + sidecars atomically. Empty when the upstream runs without a + * file-backed cache; in that case we fall back to the standard flow. + */ + private final Optional rawStorage; + + /** + * Single-source-of-truth cache writer introduced by WI-07 (Β§9.5 of the + * v2.2 target architecture). Fetches the primary + every sidecar in one + * coupled batch, verifies the upstream {@code .sha1}/{@code .sha256} + * claim against the bytes we just downloaded, and atomically commits the + * pair. Instantiated lazily when {@link #rawStorage} is present. + */ + private final ProxyCacheWriter cacheWriter; + /** * Constructor with full configuration. * @param client Upstream remote slice @@ -95,6 +140,11 @@ public final class CachedProxySlice extends BaseCachedProxySlice { storage, events, config, cooldownService, cooldownInspector ); this.metadataCache = metadataCache; + this.remote = client; + this.rawStorage = storage; + this.cacheWriter = storage + .map(raw -> new ProxyCacheWriter(raw, repoName)) + .orElse(null); } /** @@ -142,6 +192,15 @@ protected Optional> preProcess( if (path.contains("maven-metadata.xml") && this.metadataCache != null) { return Optional.of(this.handleMetadata(line, key)); } + // WI-07 Β§9.5 β€” integrity-verified atomic primary+sidecar write on + // cache-miss. Runs only when we have a file-backed storage and the + // requested path is a primary artifact. Cache-hit and sidecar paths + // fall through to the standard BaseCachedProxySlice flow unchanged. + if (this.cacheWriter != null + && !isChecksumSidecar(path) + && isPrimaryArtifact(path)) { + return Optional.of(this.verifyAndServePrimary(line, key, path)); + } return Optional.empty(); } @@ -293,4 +352,187 @@ private static void addSidecar( )); } } + + // ===== WI-07 Β§9.5: ProxyCacheWriter integration ===== + + /** + * Check if a path represents a Maven primary artifact that benefits from + * coupled primary+sidecar writing. Metadata files, directories and + * checksum sidecars are explicitly excluded by callers. + * + * @param path Request path. + * @return {@code true} if we should route this request through + * {@link ProxyCacheWriter}. + */ + private static boolean isPrimaryArtifact(final String path) { + if (path.endsWith("/") || path.contains("maven-metadata.xml")) { + return false; + } + final String lower = path.toLowerCase(Locale.ROOT); + for (final String ext : PRIMARY_EXTENSIONS) { + if (lower.endsWith(ext)) { + return true; + } + } + return false; + } + + /** + * Primary-artifact flow: if the cache already has the primary, fall + * through to the standard flow (serving from cache); otherwise fetch the + * primary + every sidecar upstream in one coupled batch, verify digests, + * atomically commit, and serve the freshly-cached bytes. + * + *

We consult BOTH the {@link Storage} and the {@link Cache} abstraction + * so tests that plug a lambda-Cache without a real storage keep working, + * and production file-backed deployments benefit from the verify path on + * genuine cache misses. + */ + @SuppressWarnings({"PMD.AvoidCatchingGenericException", "PMD.CognitiveComplexity"}) + private CompletableFuture verifyAndServePrimary( + final RequestLine line, final Key key, final String path + ) { + final Storage storage = this.rawStorage.orElseThrow(); + return storage.exists(key).thenCompose(presentInStorage -> { + if (presentInStorage) { + return this.serveFromCache(storage, key); + } + return this.cache().load( + key, + com.auto1.pantera.asto.cache.Remote.EMPTY, + com.auto1.pantera.asto.cache.CacheControl.Standard.ALWAYS + ).thenCompose(opt -> { + if (opt.isPresent()) { + return CompletableFuture.completedFuture( + ResponseBuilder.ok().body(opt.get()).build() + ); + } + return this.fetchVerifyAndCache(line, key, path); + }).toCompletableFuture(); + }).exceptionally(err -> { + EcsLogger.warn("com.auto1.pantera.cache") + .message("Primary-artifact verify-and-serve failed; falling back to not-found") + .eventCategory("web") + .eventAction("cache_write") + .eventOutcome("failure") + .field("repository.name", this.repoName()) + .field("url.path", path) + .error(err) + .log(); + return ResponseBuilder.notFound().build(); + }); + } + + /** + * Fetch the primary + every sidecar, verify, commit via + * {@link ProxyCacheWriter}, then stream the primary from the cache. + * Integrity failures and storage failures both collapse to a clean 502 + * response (mirroring {@code FaultTranslator.UpstreamIntegrity} policy) + * and leave the cache empty for this key. + */ + @SuppressWarnings({"PMD.AvoidCatchingGenericException", "PMD.CognitiveComplexity"}) + private CompletableFuture fetchVerifyAndCache( + final RequestLine line, final Key key, final String path + ) { + final Storage storage = this.rawStorage.orElseThrow(); + final String upstreamUri = this.upstreamUrl() + path; + final RequestContext ctx = new RequestContext( + org.apache.logging.log4j.ThreadContext.get("trace.id"), + null, + this.repoName(), + path + ); + final Map>>> sidecars = + new EnumMap<>(ChecksumAlgo.class); + sidecars.put(ChecksumAlgo.SHA1, () -> this.fetchSidecar(line, ".sha1")); + sidecars.put(ChecksumAlgo.MD5, () -> this.fetchSidecar(line, ".md5")); + sidecars.put(ChecksumAlgo.SHA256, () -> this.fetchSidecar(line, ".sha256")); + sidecars.put(ChecksumAlgo.SHA512, () -> this.fetchSidecar(line, ".sha512")); + + return this.cacheWriter.writeWithSidecars( + key, + upstreamUri, + () -> this.fetchPrimary(line), + sidecars, + ctx + ).toCompletableFuture().thenCompose(result -> { + if (result instanceof Result.Err err) { + if (err.fault() instanceof Fault.UpstreamIntegrity) { + return CompletableFuture.completedFuture( + ResponseBuilder.unavailable() + .header("X-Pantera-Fault", "upstream-integrity") + .textBody("Upstream integrity verification failed") + .build() + ); + } + // StorageUnavailable / anything else β†’ 502-equivalent; no cache state. + return CompletableFuture.completedFuture( + ResponseBuilder.unavailable() + .textBody("Upstream temporarily unavailable") + .build() + ); + } + return this.serveFromCache(storage, key); + }); + } + + /** + * Read the primary from the upstream as an {@link InputStream}. On any + * non-success status, throws so the writer's outer exception handler + * treats it as a transient failure (no cache mutation). + */ + private CompletionStage fetchPrimary(final RequestLine line) { + return this.remote.response(line, Headers.EMPTY, Content.EMPTY) + .thenApply(resp -> { + if (!resp.status().success()) { + // Drain body to release connection. + resp.body().asBytesFuture(); + throw new IllegalStateException( + "Upstream returned HTTP " + resp.status().code() + ); + } + try { + return resp.body().asInputStream(); + } catch (final IOException ex) { + throw new IllegalStateException("Upstream body not readable", ex); + } + }); + } + + /** + * Fetch a sidecar for the primary at {@code line}. Returns + * {@link Optional#empty()} for 4xx/5xx so the writer treats the sidecar + * as absent; I/O errors collapse to empty so a transient sidecar failure + * never blocks the primary write. + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + private CompletionStage> fetchSidecar( + final RequestLine primary, final String extension + ) { + final String sidecarPath = primary.uri().getPath() + extension; + final RequestLine sidecarLine = new RequestLine( + primary.method().value(), sidecarPath + ); + return this.remote.response(sidecarLine, Headers.EMPTY, Content.EMPTY) + .thenCompose(resp -> { + if (!resp.status().success()) { + return resp.body().asBytesFuture() + .thenApply(ignored -> Optional.empty()); + } + return resp.body().asBytesFuture() + .thenApply(bytes -> Optional.of( + new java.io.ByteArrayInputStream(bytes) + )); + }) + .exceptionally(ignored -> Optional.empty()); + } + + /** + * Serve the primary from storage after a successful atomic write. + */ + private CompletableFuture serveFromCache(final Storage storage, final Key key) { + return storage.value(key).thenApply(content -> + ResponseBuilder.ok().body(content).build() + ); + } } diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenCooldownInspector.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenCooldownInspector.java index e1a3e661d..fe161297a 100644 --- a/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenCooldownInspector.java +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenCooldownInspector.java @@ -10,8 +10,8 @@ */ package com.auto1.pantera.maven.http; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import com.auto1.pantera.asto.Content; import com.auto1.pantera.asto.Remaining; import com.auto1.pantera.http.Headers; diff --git a/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenProxySlice.java b/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenProxySlice.java index 7fb35a0e2..faa24fa96 100644 --- a/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenProxySlice.java +++ b/maven-adapter/src/main/java/com/auto1/pantera/maven/http/MavenProxySlice.java @@ -49,7 +49,7 @@ public final class MavenProxySlice extends Slice.Wrap { public MavenProxySlice(final ClientSlices clients, final URI remote, final Authenticator auth, final Cache cache) { this(clients, remote, auth, cache, Optional.empty(), "*", - "maven-proxy", com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, Optional.empty()); + "maven-proxy", com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, Optional.empty()); } /** @@ -63,7 +63,7 @@ public MavenProxySlice(final ClientSlices clients, final URI remote, final Authenticator authenticator ) { this(client, uri, authenticator, Cache.NOP, Optional.empty(), "*", - "maven-proxy", com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, Optional.empty(), + "maven-proxy", com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, Optional.empty(), Duration.ofHours(24), Duration.ofHours(24), true); } @@ -87,7 +87,7 @@ public MavenProxySlice( final Optional> events, final String rname, final String rtype, - final com.auto1.pantera.cooldown.CooldownService cooldown, + final com.auto1.pantera.cooldown.api.CooldownService cooldown, final Optional storage ) { this(clients, remote, auth, cache, events, rname, rtype, cooldown, storage, @@ -118,7 +118,7 @@ public MavenProxySlice( final Optional> events, final String rname, final String rtype, - final com.auto1.pantera.cooldown.CooldownService cooldown, + final com.auto1.pantera.cooldown.api.CooldownService cooldown, final Optional storage, final Duration metadataTtl, final Duration negativeCacheTtl, @@ -147,7 +147,7 @@ private MavenProxySlice( final String rname, final String upstreamUrl, final String rtype, - final com.auto1.pantera.cooldown.CooldownService cooldown, + final com.auto1.pantera.cooldown.api.CooldownService cooldown, final Optional storage, final Duration metadataTtl ) { @@ -168,7 +168,7 @@ private static Slice buildRoute( final String rname, final String upstreamUrl, final String rtype, - final com.auto1.pantera.cooldown.CooldownService cooldown, + final com.auto1.pantera.cooldown.api.CooldownService cooldown, final MavenCooldownInspector inspector, final Optional storage, final Duration metadataTtl diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenCooldownResponseFactoryTest.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenCooldownResponseFactoryTest.java new file mode 100644 index 000000000..6f42e47b4 --- /dev/null +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenCooldownResponseFactoryTest.java @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.RsStatus; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.Collections; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.emptyOrNullString; + +/** + * Tests for {@link MavenCooldownResponseFactory}. + * + * @since 2.2.0 + */ +final class MavenCooldownResponseFactoryTest { + + private MavenCooldownResponseFactory factory; + + @BeforeEach + void setUp() { + this.factory = new MavenCooldownResponseFactory(); + } + + @Test + void returns403Status() { + final Response response = this.factory.forbidden(block()); + assertThat(response.status(), is(RsStatus.FORBIDDEN)); + } + + @Test + void returnsTextPlainContentType() { + final Response response = this.factory.forbidden(block()); + final String contentType = response.headers() + .values("Content-Type").get(0); + assertThat(contentType, containsString("text/plain")); + } + + @Test + void bodyContainsBlockedMessage() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("Artifact blocked by cooldown policy. Blocked until:")); + } + + @Test + void bodyContainsIso8601Timestamp() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + // ISO-8601 offset date-time always contains a 'T' separator + assertThat(body, containsString("T")); + assertThat(body, containsString("Z")); + } + + @Test + void includesRetryAfterHeader() { + final Response response = this.factory.forbidden(block()); + final String retryAfter = response.headers() + .values("Retry-After").get(0); + assertThat(retryAfter, is(not(emptyOrNullString()))); + // Should be a numeric value + final long seconds = Long.parseLong(retryAfter); + assertThat(seconds > 0, is(true)); + } + + @Test + void includesCooldownBlockedHeader() { + final Response response = this.factory.forbidden(block()); + final String cooldown = response.headers() + .values("X-Pantera-Cooldown").get(0); + assertThat(cooldown, equalTo("blocked")); + } + + @Test + void repoTypeIsMaven() { + assertThat(this.factory.repoType(), equalTo("maven")); + } + + private static CooldownBlock block() { + return new CooldownBlock( + "maven", + "central-proxy", + "com.example:my-lib", + "1.0.0", + CooldownReason.FRESH_RELEASE, + Instant.now().minus(1, ChronoUnit.HOURS), + Instant.now().plus(23, ChronoUnit.HOURS), + Collections.emptyList() + ); + } +} diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataFilterTest.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataFilterTest.java new file mode 100644 index 000000000..f635d9576 --- /dev/null +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataFilterTest.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.w3c.dom.Document; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; + +/** + * Tests for {@link MavenMetadataFilter}. + * + * @since 2.2.0 + */ +final class MavenMetadataFilterTest { + + private MavenMetadataParser parser; + private MavenMetadataFilter filter; + + @BeforeEach + void setUp() { + this.parser = new MavenMetadataParser(); + this.filter = new MavenMetadataFilter(); + } + + @Test + void filtersThreeOfTenVersions() throws Exception { + final Document doc = this.parser.parse( + MavenMetadataFilterTest.loadFixture() + ); + final Document filtered = this.filter.filter( + doc, Set.of("2.0.0", "3.0.0-alpha", "3.0.0-beta") + ); + final List versions = this.parser.extractVersions(filtered); + assertThat(versions, hasSize(7)); + assertThat( + versions, + contains( + "1.0.0", "1.1.0", "1.2.0", + "2.1.0", "2.2.0", "2.3.0", + "3.0.0" + ) + ); + } + + @Test + void allBlockedResultsInEmptyVersions() throws Exception { + final Document doc = this.parser.parse( + MavenMetadataFilterTest.loadFixture() + ); + final Set all = Set.of( + "1.0.0", "1.1.0", "1.2.0", + "2.0.0", "2.1.0", "2.2.0", "2.3.0", + "3.0.0-alpha", "3.0.0-beta", "3.0.0" + ); + final Document filtered = this.filter.filter(doc, all); + final List versions = this.parser.extractVersions(filtered); + assertThat(versions, is(empty())); + } + + @Test + void noneBlockedLeavesAllVersions() throws Exception { + final Document doc = this.parser.parse( + MavenMetadataFilterTest.loadFixture() + ); + final Document filtered = this.filter.filter( + doc, Collections.emptySet() + ); + final List versions = this.parser.extractVersions(filtered); + assertThat(versions, hasSize(10)); + } + + @Test + void updatesLatestElement() throws Exception { + final Document doc = this.parser.parse( + MavenMetadataFilterTest.loadFixture() + ); + this.filter.updateLatest(doc, "2.3.0"); + final String latest = this.parser.getLatestVersion(doc).orElse(""); + assertThat(latest, equalTo("2.3.0")); + } + + @Test + void updatesLastUpdatedTimestamp() throws Exception { + final Document doc = this.parser.parse( + MavenMetadataFilterTest.loadFixture() + ); + final String before = doc.getElementsByTagName("lastUpdated") + .item(0).getTextContent(); + this.filter.updateLatest(doc, "2.3.0"); + final String after = doc.getElementsByTagName("lastUpdated") + .item(0).getTextContent(); + assertThat(after, is(not(equalTo(before)))); + // Maven timestamp format: yyyyMMddHHmmss -> 14 digits + assertThat(after.length(), equalTo(14)); + } + + /** + * Load the sample maven-metadata.xml fixture from test resources. + * + * @return Fixture bytes + * @throws IOException If reading fails + */ + private static byte[] loadFixture() throws IOException { + try (InputStream stream = MavenMetadataFilterTest.class + .getResourceAsStream("/cooldown/maven-metadata-sample.xml")) { + if (stream == null) { + throw new IOException( + "Fixture not found: /cooldown/maven-metadata-sample.xml" + ); + } + return stream.readAllBytes(); + } + } +} diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataParserTest.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataParserTest.java new file mode 100644 index 000000000..59943f1e3 --- /dev/null +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataParserTest.java @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataParseException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.w3c.dom.Document; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests for {@link MavenMetadataParser}. + * + * @since 2.2.0 + */ +final class MavenMetadataParserTest { + + private MavenMetadataParser parser; + + @BeforeEach + void setUp() { + this.parser = new MavenMetadataParser(); + } + + @Test + void parsesSampleMetadataFixture() throws Exception { + final byte[] xml = MavenMetadataParserTest.loadFixture(); + final Document doc = this.parser.parse(xml); + assertThat(doc, is(notNullValue())); + } + + @Test + void extractsTenVersionsFromFixture() throws Exception { + final byte[] xml = MavenMetadataParserTest.loadFixture(); + final Document doc = this.parser.parse(xml); + final List versions = this.parser.extractVersions(doc); + assertThat(versions, hasSize(10)); + assertThat( + versions, + contains( + "1.0.0", "1.1.0", "1.2.0", + "2.0.0", "2.1.0", "2.2.0", "2.3.0", + "3.0.0-alpha", "3.0.0-beta", "3.0.0" + ) + ); + } + + @Test + void getsLatestVersionFromFixture() throws Exception { + final byte[] xml = MavenMetadataParserTest.loadFixture(); + final Document doc = this.parser.parse(xml); + final Optional latest = this.parser.getLatestVersion(doc); + assertThat(latest.isPresent(), is(true)); + assertThat(latest.get(), equalTo("3.0.0")); + } + + @Test + void returnsEmptyListWhenNoVersions() throws Exception { + final String xml = """ + + + com.example + empty + + """; + final Document doc = this.parser.parse( + xml.getBytes(StandardCharsets.UTF_8) + ); + final List versions = this.parser.extractVersions(doc); + assertThat(versions, is(empty())); + } + + @Test + void returnsEmptyWhenNoLatest() throws Exception { + final String xml = """ + + + com.example + no-latest + + + 1.0.0 + + + + """; + final Document doc = this.parser.parse( + xml.getBytes(StandardCharsets.UTF_8) + ); + final Optional latest = this.parser.getLatestVersion(doc); + assertThat(latest.isPresent(), is(false)); + } + + @Test + void extractReleaseDatesReturnsEmptyMap() throws Exception { + final byte[] xml = MavenMetadataParserTest.loadFixture(); + final Document doc = this.parser.parse(xml); + final Map dates = this.parser.extractReleaseDates(doc); + assertThat(dates.isEmpty(), is(true)); + } + + @Test + void returnsCorrectContentType() { + assertThat(this.parser.contentType(), equalTo("application/xml")); + } + + @Test + void throwsOnInvalidXml() { + final byte[] invalid = "not valid xml <<<".getBytes( + StandardCharsets.UTF_8 + ); + assertThrows( + MetadataParseException.class, + () -> this.parser.parse(invalid) + ); + } + + /** + * Load the sample maven-metadata.xml fixture from test resources. + * + * @return Fixture bytes + * @throws IOException If reading fails + */ + private static byte[] loadFixture() throws IOException { + try (InputStream stream = MavenMetadataParserTest.class + .getResourceAsStream("/cooldown/maven-metadata-sample.xml")) { + if (stream == null) { + throw new IOException( + "Fixture not found: /cooldown/maven-metadata-sample.xml" + ); + } + return stream.readAllBytes(); + } + } +} diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataRequestDetectorTest.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataRequestDetectorTest.java new file mode 100644 index 000000000..06eefba41 --- /dev/null +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataRequestDetectorTest.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.Optional; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +/** + * Tests for {@link MavenMetadataRequestDetector}. + * + * @since 2.2.0 + */ +final class MavenMetadataRequestDetectorTest { + + private MavenMetadataRequestDetector detector; + + @BeforeEach + void setUp() { + this.detector = new MavenMetadataRequestDetector(); + } + + @Test + void detectsMetadataXmlPath() { + assertThat( + this.detector.isMetadataRequest( + "/com/example/my-lib/maven-metadata.xml" + ), + is(true) + ); + } + + @Test + void detectsNestedGroupMetadataPath() { + assertThat( + this.detector.isMetadataRequest( + "/org/apache/commons/commons-lang3/maven-metadata.xml" + ), + is(true) + ); + } + + @Test + void rejectsArtifactJarPath() { + assertThat( + this.detector.isMetadataRequest( + "/com/example/my-lib/1.0.0/my-lib-1.0.0.jar" + ), + is(false) + ); + } + + @Test + void rejectsArtifactPomPath() { + assertThat( + this.detector.isMetadataRequest( + "/com/example/my-lib/1.0.0/my-lib-1.0.0.pom" + ), + is(false) + ); + } + + @Test + void rejectsChecksumPath() { + assertThat( + this.detector.isMetadataRequest( + "/com/example/my-lib/maven-metadata.xml.sha1" + ), + is(false) + ); + } + + @Test + void rejectsNullPath() { + assertThat( + this.detector.isMetadataRequest(null), + is(false) + ); + } + + @Test + void extractsPackageNameFromMetadataPath() { + final Optional name = this.detector.extractPackageName( + "/com/example/my-lib/maven-metadata.xml" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("com/example/my-lib")); + } + + @Test + void extractsNestedGroupPackageName() { + final Optional name = this.detector.extractPackageName( + "/org/apache/commons/commons-lang3/maven-metadata.xml" + ); + assertThat(name.isPresent(), is(true)); + assertThat(name.get(), equalTo("org/apache/commons/commons-lang3")); + } + + @Test + void returnsEmptyForArtifactPath() { + final Optional name = this.detector.extractPackageName( + "/com/example/my-lib/1.0.0/my-lib-1.0.0.jar" + ); + assertThat(name.isPresent(), is(false)); + } + + @Test + void returnsEmptyForNullPath() { + final Optional name = this.detector.extractPackageName(null); + assertThat(name.isPresent(), is(false)); + } + + @Test + void returnsCorrectRepoType() { + assertThat(this.detector.repoType(), equalTo("maven")); + } +} diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataRewriterTest.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataRewriterTest.java new file mode 100644 index 000000000..dfca99d29 --- /dev/null +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/cooldown/MavenMetadataRewriterTest.java @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.maven.cooldown; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.w3c.dom.Document; + +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; + +/** + * Tests for {@link MavenMetadataRewriter}. + * Round-trip: parse -> filter -> rewrite -> parse again -> assert filtered model matches. + * + * @since 2.2.0 + */ +final class MavenMetadataRewriterTest { + + private MavenMetadataParser parser; + private MavenMetadataFilter filter; + private MavenMetadataRewriter rewriter; + + @BeforeEach + void setUp() { + this.parser = new MavenMetadataParser(); + this.filter = new MavenMetadataFilter(); + this.rewriter = new MavenMetadataRewriter(); + } + + @Test + void roundTripPreservesFilteredVersions() throws Exception { + // Parse original + final Document original = this.parser.parse( + MavenMetadataRewriterTest.loadFixture() + ); + // Filter 3 versions + this.filter.filter( + original, Set.of("2.0.0", "3.0.0-alpha", "3.0.0-beta") + ); + this.filter.updateLatest(original, "3.0.0"); + // Rewrite to bytes + final byte[] rewritten = this.rewriter.rewrite(original); + assertThat(rewritten.length, is(greaterThan(0))); + // Parse again + final Document reparsed = this.parser.parse(rewritten); + final List versions = this.parser.extractVersions(reparsed); + assertThat(versions, hasSize(7)); + assertThat( + versions, + contains( + "1.0.0", "1.1.0", "1.2.0", + "2.1.0", "2.2.0", "2.3.0", + "3.0.0" + ) + ); + assertThat( + this.parser.getLatestVersion(reparsed).orElse(""), + equalTo("3.0.0") + ); + } + + @Test + void roundTripWithNoFilteringPreservesAll() throws Exception { + final Document original = this.parser.parse( + MavenMetadataRewriterTest.loadFixture() + ); + final byte[] rewritten = this.rewriter.rewrite(original); + final Document reparsed = this.parser.parse(rewritten); + final List versions = this.parser.extractVersions(reparsed); + assertThat(versions, hasSize(10)); + assertThat( + this.parser.getLatestVersion(reparsed).orElse(""), + equalTo("3.0.0") + ); + } + + @Test + void returnsCorrectContentType() { + assertThat( + this.rewriter.contentType(), equalTo("application/xml") + ); + } + + /** + * Load the sample maven-metadata.xml fixture from test resources. + * + * @return Fixture bytes + * @throws IOException If reading fails + */ + private static byte[] loadFixture() throws IOException { + try (InputStream stream = MavenMetadataRewriterTest.class + .getResourceAsStream("/cooldown/maven-metadata-sample.xml")) { + if (stream == null) { + throw new IOException( + "Fixture not found: /cooldown/maven-metadata-sample.xml" + ); + } + return stream.readAllBytes(); + } + } +} diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/http/CachedProxySliceTest.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/http/CachedProxySliceTest.java index 404427cf8..2d2c9c23f 100644 --- a/maven-adapter/src/test/java/com/auto1/pantera/maven/http/CachedProxySliceTest.java +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/http/CachedProxySliceTest.java @@ -15,9 +15,9 @@ import com.auto1.pantera.asto.Key; import com.auto1.pantera.asto.FailedCompletionStage; import com.auto1.pantera.asto.memory.InMemoryStorage; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.NoopCooldownService; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.impl.NoopCooldownService; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.cache.CachedArtifactMetadataStore; import com.auto1.pantera.http.Response; diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenCooldownInspectorTest.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenCooldownInspectorTest.java index 88961ba38..82b8a010e 100644 --- a/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenCooldownInspectorTest.java +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenCooldownInspectorTest.java @@ -11,7 +11,7 @@ package com.auto1.pantera.maven.http; import com.auto1.pantera.asto.Content; -import com.auto1.pantera.cooldown.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownDependency; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; import com.auto1.pantera.http.ResponseBuilder; diff --git a/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenProxySliceITCase.java b/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenProxySliceITCase.java index bbf0df77b..408dc1489 100644 --- a/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenProxySliceITCase.java +++ b/maven-adapter/src/test/java/com/auto1/pantera/maven/http/MavenProxySliceITCase.java @@ -85,7 +85,7 @@ void setUp() throws Exception { Optional.of(this.events), "my-maven-proxy", "maven-proxy", - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, Optional.of(this.storage) ) ) diff --git a/maven-adapter/src/test/resources/cooldown/maven-metadata-sample.xml b/maven-adapter/src/test/resources/cooldown/maven-metadata-sample.xml new file mode 100644 index 000000000..ca346a2f6 --- /dev/null +++ b/maven-adapter/src/test/resources/cooldown/maven-metadata-sample.xml @@ -0,0 +1,22 @@ + + + com.example + my-lib + + 3.0.0 + 3.0.0 + + 1.0.0 + 1.1.0 + 1.2.0 + 2.0.0 + 2.1.0 + 2.2.0 + 2.3.0 + 3.0.0-alpha + 3.0.0-beta + 3.0.0 + + 20260401120000 + + diff --git a/npm-adapter/pom.xml b/npm-adapter/pom.xml index 8a44bd73f..884683ae4 100644 --- a/npm-adapter/pom.xml +++ b/npm-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 npm-adapter - 2.1.3 + 2.2.0 jar npm-adapter Turns your files/objects into NPM artifacts @@ -42,7 +42,7 @@ SOFTWARE. com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -55,7 +55,7 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.vdurmont @@ -72,10 +72,6 @@ SOFTWARE. jackson-core ${fasterxml.jackson.version} - - io.vertx - vertx-rx-java2 - io.vertx vertx-web-client @@ -92,7 +88,7 @@ SOFTWARE. com.auto1.pantera http-client - 2.1.3 + 2.2.0 compile @@ -127,7 +123,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspector.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspector.java index 5166a88ab..0985ec867 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspector.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspector.java @@ -10,8 +10,8 @@ */ package com.auto1.pantera.npm.cooldown; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import com.auto1.pantera.cooldown.metadata.MetadataAwareInspector; import java.time.Instant; diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmCooldownResponseFactory.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmCooldownResponseFactory.java new file mode 100644 index 000000000..d3bbf968c --- /dev/null +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmCooldownResponseFactory.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.npm.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.response.CooldownResponseFactory; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; + +import java.time.Duration; +import java.time.Instant; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +/** + * NPM-specific cooldown 403 response factory. + * + *

Returns {@code application/json} body matching the npm error format.

+ * + * @since 2.2.0 + */ +public final class NpmCooldownResponseFactory implements CooldownResponseFactory { + + private static final DateTimeFormatter ISO = DateTimeFormatter.ISO_OFFSET_DATE_TIME; + + @Override + public Response forbidden(final CooldownBlock block) { + final String until = ISO.format( + block.blockedUntil().atOffset(ZoneOffset.UTC) + ); + final long retryAfter = Math.max( + 1L, + Duration.between(Instant.now(), block.blockedUntil()).getSeconds() + ); + final String body = String.format( + "{\"error\":\"version in cooldown\",\"blocked_until\":\"%s\"}", until + ); + return ResponseBuilder.forbidden() + .header("Retry-After", String.valueOf(retryAfter)) + .header("X-Pantera-Cooldown", "blocked") + .jsonBody(body) + .build(); + } + + @Override + public String repoType() { + return "npm"; + } +} diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmMetadataParser.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmMetadataParser.java index 41de366fc..6a9bf027d 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmMetadataParser.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmMetadataParser.java @@ -105,6 +105,11 @@ public String contentType() { return CONTENT_TYPE; } + @Override + public Map extractReleaseDates(final JsonNode metadata) { + return this.releaseDates(metadata); + } + @Override public Map releaseDates(final JsonNode metadata) { final JsonNode time = metadata.get("time"); diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmMetadataRequestDetector.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmMetadataRequestDetector.java new file mode 100644 index 000000000..06f586463 --- /dev/null +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/NpmMetadataRequestDetector.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.npm.cooldown; + +import com.auto1.pantera.cooldown.metadata.MetadataRequestDetector; +import java.util.Optional; + +/** + * Detects npm metadata requests. + * + *

npm metadata requests are package document fetches (e.g. {@code GET /lodash} + * or {@code GET /@scope/pkg}). Tarball downloads contain {@code /-/} in the path + * (e.g. {@code /lodash/-/lodash-4.17.21.tgz}).

+ * + *

Note: npm metadata filtering is currently handled by + * {@code DownloadPackageSlice} directly. This detector is registered in the + * adapter bundle for completeness and future unification.

+ * + * @since 2.2.0 + */ +public final class NpmMetadataRequestDetector implements MetadataRequestDetector { + + @Override + public boolean isMetadataRequest(final String path) { + if (path == null || path.isEmpty() || "/".equals(path)) { + return false; + } + // Tarball downloads contain /-/ in the path + if (path.contains("/-/")) { + return false; + } + // Security audit endpoints are not metadata + if (path.contains("/npm/v1/security/")) { + return false; + } + // User management endpoints + if (path.contains("/-/user/") || path.contains("/-/v1/login") + || path.contains("/-/whoami")) { + return false; + } + // Everything else is a package metadata request + return true; + } + + @Override + public Optional extractPackageName(final String path) { + if (!isMetadataRequest(path)) { + return Optional.empty(); + } + // Strip leading slash + String name = path.startsWith("/") ? path.substring(1) : path; + // Strip trailing slash + if (name.endsWith("/")) { + name = name.substring(0, name.length() - 1); + } + if (name.isEmpty()) { + return Optional.empty(); + } + return Optional.of(name); + } + + @Override + public String repoType() { + return "npm"; + } +} diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/package-info.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/package-info.java index fe24cd2b5..67ed7fb4d 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/package-info.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/cooldown/package-info.java @@ -17,6 +17,7 @@ *
  • {@link com.auto1.pantera.npm.cooldown.NpmMetadataFilter} - Filters blocked versions from metadata
  • *
  • {@link com.auto1.pantera.npm.cooldown.NpmMetadataRewriter} - Serializes filtered metadata to JSON
  • *
  • {@link com.auto1.pantera.npm.cooldown.NpmCooldownInspector} - Provides release dates for cooldown evaluation
  • + *
  • {@link com.auto1.pantera.npm.cooldown.NpmCooldownResponseFactory} - Builds 403 responses for blocked NPM packages
  • * * *

    NPM metadata structure:

    diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishForceSlice.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishForceSlice.java index 82708bd46..7284dda83 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishForceSlice.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishForceSlice.java @@ -89,7 +89,7 @@ public CompletableFuture response( if (this.events.isPresent()) { res = res.thenRun( () -> this.events.map( - queue -> queue.add( + queue -> queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent(UploadSlice.REPO_TYPE, this.rname, pkg) ) ) diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishPutSlice.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishPutSlice.java index 706cf1c27..934e75890 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishPutSlice.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/http/UnpublishPutSlice.java @@ -94,7 +94,7 @@ public CompletableFuture response( .thenCompose(update -> this.updateMeta(update, key)) .thenAccept( ver -> this.events.ifPresent( - queue -> queue.add( + queue -> queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent( UploadSlice.REPO_TYPE, this.rname, pkg, ver ) diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/NpmProxy.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/NpmProxy.java index 3ba956b0b..5c35c3373 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/NpmProxy.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/NpmProxy.java @@ -15,18 +15,20 @@ import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.client.ClientSlices; import com.auto1.pantera.http.client.UriClientSlice; -import com.auto1.pantera.http.trace.MdcPropagation; +import com.auto1.pantera.http.context.ContextualExecutor; import com.auto1.pantera.npm.proxy.model.NpmAsset; import com.auto1.pantera.npm.proxy.model.NpmPackage; import com.auto1.pantera.http.log.EcsLogger; import io.reactivex.Maybe; +import io.reactivex.Scheduler; import io.reactivex.schedulers.Schedulers; import java.io.IOException; import java.net.URI; import java.time.Duration; import java.time.OffsetDateTime; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executor; +import java.util.concurrent.ForkJoinPool; /** * NPM Proxy. @@ -62,6 +64,13 @@ public class NpmProxy { */ private final ConcurrentHashMap.KeySetView refreshing; + /** + * Contextualised RxJava scheduler for background refresh. + * Propagates ThreadContext (ECS fields) and APM span automatically, + * replacing the per-call MDC capture/restore pattern. + */ + private final Scheduler backgroundScheduler; + /** * Ctor. * @param remote Uri remote @@ -123,6 +132,11 @@ public NpmProxy(final Storage storage, final Slice client, final Duration metada this.remote = remote; this.metadataTtl = metadataTtl; this.refreshing = ConcurrentHashMap.newKeySet(); + // Wrap ForkJoinPool.commonPool with ContextualExecutor so background + // refresh callbacks inherit the caller's ThreadContext (trace.id etc.) + // and APM span. This replaces the per-call MDC capture/restore. + final Executor ctxExec = ContextualExecutor.contextualize(ForkJoinPool.commonPool()); + this.backgroundScheduler = Schedulers.from(ctxExec); } /** @@ -231,35 +245,30 @@ private boolean isStale(final OffsetDateTime lastRefreshed) { * Serves stale content immediately while refreshing in background. * Uses a ConcurrentHashMap.KeySetView to deduplicate in-flight refreshes. * - *

    Captures the caller's MDC snapshot at wrap time and restores it - * inside the RxJava subscribe callbacks; without this the - * {@code Schedulers.io()} pool thread would emit logs without - * {@code trace.id} / {@code client.ip}, which is ~3.3k entries/day per - * production observation.

    + *

    Uses a {@link ContextualExecutor}-wrapped scheduler so that + * background callbacks inherit the caller's ThreadContext (trace.id, + * client.ip) and APM span automatically β€” no per-call MDC capture needed. * * @param name Package name */ @SuppressWarnings("PMD.AvoidCatchingGenericException") private void backgroundRefresh(final String name) { if (this.refreshing.add(name)) { - // Capture caller MDC so subscribe callbacks on Schedulers.io() - // still carry trace.id / client.ip when they log. - final Map mdc = MdcPropagation.capture(); - // Try conditional request first if we have a stored upstream ETag + // Try conditional request first if we have a stored upstream ETag. + // The backgroundScheduler propagates ThreadContext automatically. this.conditionalRefresh(name) - .subscribeOn(Schedulers.io()) + .subscribeOn(this.backgroundScheduler) .doFinally(() -> this.refreshing.remove(name)) .subscribe( - saved -> MdcPropagation.runWith(mdc, () -> + saved -> EcsLogger.debug("com.auto1.pantera.npm.proxy") .message("Background refresh completed") .eventCategory("database") .eventAction("stale_while_revalidate") .eventOutcome("success") .field("package.name", name) - .log() - ), - err -> MdcPropagation.runWith(mdc, () -> + .log(), + err -> EcsLogger.warn("com.auto1.pantera.npm.proxy") .message("Background refresh failed") .eventCategory("database") @@ -267,8 +276,7 @@ private void backgroundRefresh(final String name) { .eventOutcome("failure") .field("package.name", name) .error(err) - .log() - ), + .log(), () -> this.refreshing.remove(name) ); } @@ -326,6 +334,26 @@ public Maybe getAsset(final String path) { ); } + /** + * CompletionStage-based boundary adapter for {@link #getAsset(String)}. + * Converts the internal RxJava {@code Maybe} to + * {@code CompletableFuture>} so callers on hot paths + * (e.g. {@code DownloadAssetSlice}) can stay in the CompletionStage world + * without importing RxJava types. + * + * @param path Asset path + * @return Future containing the asset, or empty if not found + */ + public java.util.concurrent.CompletableFuture> getAssetAsync( + final String path + ) { + return this.getAsset(path) + .map(java.util.Optional::of) + .toSingle(java.util.Optional.empty()) + .to(hu.akarnokd.rxjava2.interop.SingleInterop.get()) + .toCompletableFuture(); + } + /** * Close NPM Proxy adapter and underlying remote client. * @throws IOException when underlying remote client fails to close diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/CachedNpmProxySlice.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/CachedNpmProxySlice.java index f85cf6a9d..ec15f708e 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/CachedNpmProxySlice.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/CachedNpmProxySlice.java @@ -18,26 +18,31 @@ import com.auto1.pantera.http.ResponseBuilder; import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.cache.CachedArtifactMetadataStore; -import com.auto1.pantera.http.cache.DedupStrategy; +import com.auto1.pantera.http.cache.FetchSignal; import com.auto1.pantera.http.cache.NegativeCache; -import com.auto1.pantera.http.cache.RequestDeduplicator; -import com.auto1.pantera.http.cache.RequestDeduplicator.FetchSignal; +import com.auto1.pantera.http.cache.NegativeCacheRegistry; +import com.auto1.pantera.http.context.ContextualExecutor; import com.auto1.pantera.http.log.EcsLogger; +import com.auto1.pantera.http.resilience.SingleFlight; import com.auto1.pantera.http.rq.RequestLine; import com.auto1.pantera.http.slice.KeyFromPath; +import java.time.Duration; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; /** * NPM proxy slice with negative caching and signal-based request deduplication. * Wraps NpmProxySlice to add caching layer that prevents repeated * 404 requests and deduplicates concurrent requests. * - *

    Uses shared {@link RequestDeduplicator} with SIGNAL strategy: concurrent + *

    Uses the unified {@link SingleFlight} coalescer (WI-05): concurrent * requests for the same package wait for the first request to complete, then * fetch from NpmProxy's storage cache. This eliminates memory buffering while - * maintaining full deduplication.

    + * maintaining full deduplication. The retained {@link FetchSignal} enum is + * the same signal contract as the legacy path β€” only the coalescer + * implementation changed.

    * * @since 1.0 */ @@ -74,9 +79,11 @@ public final class CachedNpmProxySlice implements Slice { private final String repoType; /** - * Shared request deduplicator using SIGNAL strategy. + * Per-key request coalescer. Concurrent requests for the same cache key + * share one upstream fetch, each receiving the same {@link FetchSignal} + * terminal state. Wired in WI-05. */ - private final RequestDeduplicator deduplicator; + private final SingleFlight deduplicator; /** * Ctor with default settings. @@ -111,9 +118,15 @@ public CachedNpmProxySlice( this.repoName = repoName; this.upstreamUrl = upstreamUrl; this.repoType = repoType; - this.negativeCache = new NegativeCache(repoType, repoName); + this.negativeCache = NegativeCacheRegistry.instance().sharedCache(); this.metadata = storage.map(CachedArtifactMetadataStore::new); - this.deduplicator = new RequestDeduplicator(DedupStrategy.SIGNAL); + // 5-minute zombie TTL (PANTERA_DEDUP_MAX_AGE_MS = 300 000 ms). + // 10K max entries bounds memory. + this.deduplicator = new SingleFlight<>( + Duration.ofMinutes(5), + 10_000, + ContextualExecutor.contextualize(ForkJoinPool.commonPool()) + ); } @Override @@ -188,8 +201,8 @@ private CompletableFuture serveCached( } /** - * Fetches from origin with signal-based request deduplication. - * Uses shared {@link RequestDeduplicator}: first request fetches from origin + * Fetches from origin with signal-based request coalescing. + * Uses shared {@link SingleFlight}: first request fetches from origin * (which saves to NpmProxy's storage cache). Concurrent requests wait for a * signal, then re-fetch from origin which serves from storage cache. */ @@ -199,7 +212,7 @@ private CompletableFuture fetchWithDedup( final Content body, final Key key ) { - return this.deduplicator.deduplicate( + return this.deduplicator.load( key, () -> this.doFetch(line, headers, body, key) ).thenCompose(signal -> this.handleSignal(signal, line, headers, key)); diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSlice.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSlice.java index a0b9e2158..96454a1fd 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSlice.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSlice.java @@ -23,13 +23,12 @@ import com.auto1.pantera.npm.proxy.NpmProxy; import com.auto1.pantera.scheduling.ProxyArtifactEvent; import com.google.common.base.Strings; -import hu.akarnokd.rxjava2.interop.SingleInterop; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResponses; -import com.auto1.pantera.cooldown.CooldownResult; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.response.CooldownResponseRegistry; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.log.EcsLogger; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; @@ -167,13 +166,15 @@ private static Throwable unwrapException(final Throwable error) { private CompletableFuture checkCacheFirst(final String tgz, final Headers headers) { // NpmProxy.getAsset checks storage first internally, but we need to check BEFORE // calling cooldown.evaluate() which may make network calls. - // Use a non-blocking check that returns asset from storage if present. - // Wrap RxJava/CompletableFuture continuations with MDC propagation so - // cache-hit logs carry trace.id/user.name on worker threads. - return this.npm.getAsset(tgz) - .map(com.auto1.pantera.http.trace.MdcPropagation.withMdcRxFunction(asset -> { - // Asset found in storage cache - check if it's served from cache (not remote) - // Since getAsset tries storage first, if we have it, serve immediately + // Convert RxJava Maybe at the NpmProxy boundary to CompletionStage. + return this.npm.getAssetAsync(tgz) + .thenCompose(optAsset -> { + if (optAsset.isEmpty()) { + // Cache miss β€” evaluate cooldown then fetch from upstream + return this.evaluateCooldownAndFetch(tgz, headers); + } + final var asset = optAsset.get(); + // Asset found in storage cache β€” serve immediately (offline-safe) EcsLogger.info("com.auto1.pantera.npm") .message("Cache hit for asset, serving cached (offline-safe)") .eventCategory("web") @@ -181,54 +182,24 @@ private CompletableFuture checkCacheFirst(final String tgz, final Head .eventOutcome("success") .field("package.name", tgz) .log(); - // Queue the proxy event - this.packages.ifPresent(queue -> { - Long millis = null; - try { - final String lm = asset.meta().lastModified(); - if (!Strings.isNullOrEmpty(lm)) { - millis = java.time.Instant.from(java.time.format.DateTimeFormatter.RFC_1123_DATE_TIME.parse(lm)).toEpochMilli(); - } - } catch (final Exception ex) { - EcsLogger.debug("com.auto1.pantera.npm") - .message("Failed to parse asset lastModified for proxy event") - .error(ex) - .log(); - } - queue.add( - new ProxyArtifactEvent( - new Key.From(tgz), this.repoName, - new Login(headers).getValue(), - java.util.Optional.ofNullable(millis) - ) - ); - }); + // Queue the proxy event β€” failures MUST NOT escape the serve path. + this.enqueueProxyEvent(tgz, headers, asset); String mime = asset.meta().contentType(); - if (Strings.isNullOrEmpty(mime)){ + if (Strings.isNullOrEmpty(mime)) { throw new IllegalStateException("Failed to get 'Content-Type'"); } String lastModified = asset.meta().lastModified(); - if(Strings.isNullOrEmpty(lastModified)){ + if (Strings.isNullOrEmpty(lastModified)) { lastModified = new DateTimeNowStr().value(); } - return ResponseBuilder.ok() - .header(ContentType.mime(mime)) - .header("Last-Modified", lastModified) - .body(asset.dataPublisher()) - .build(); - })) - .toSingle(ResponseBuilder.notFound().build()) - .to(SingleInterop.get()) - .toCompletableFuture() - .thenCompose(com.auto1.pantera.http.trace.MdcPropagation.withMdc(response -> { - // If we got a 404 (not in storage), now we need to go to remote - // At this point, we should evaluate cooldown first - if (response.status().code() == 404) { - return this.evaluateCooldownAndFetch(tgz, headers); - } - // Asset was served from cache - return it - return CompletableFuture.completedFuture(response); - })); + return CompletableFuture.completedFuture( + ResponseBuilder.ok() + .header(ContentType.mime(mime)) + .header("Last-Modified", lastModified) + .body(asset.dataPublisher()) + .build() + ); + }); } /** @@ -262,7 +233,9 @@ private CompletableFuture evaluateCooldownAndFetch( .field("package.version", req.version()) .log(); return CompletableFuture.completedFuture( - CooldownResponses.forbidden(block) + CooldownResponseRegistry.instance() + .getOrThrow(this.repoType) + .forbidden(block) ); } return this.serveAsset(tgz, headers); @@ -270,53 +243,83 @@ private CompletableFuture evaluateCooldownAndFetch( } private CompletableFuture serveAsset(final String tgz, final Headers headers) { - return this.npm.getAsset(tgz).map( - asset -> { - this.packages.ifPresent(queue -> { - Long millis = null; - try { - final String lm = asset.meta().lastModified(); - if (!Strings.isNullOrEmpty(lm)) { - millis = java.time.Instant.from(java.time.format.DateTimeFormatter.RFC_1123_DATE_TIME.parse(lm)).toEpochMilli(); - } - } catch (final Exception ex) { - EcsLogger.debug("com.auto1.pantera.npm") - .message("Failed to parse asset lastModified for proxy event") - .error(ex) - .log(); - } - queue.add( - new ProxyArtifactEvent( - new Key.From(tgz), this.repoName, - new Login(headers).getValue(), - java.util.Optional.ofNullable(millis) - ) - ); - }); - return asset; - }) - .map( - asset -> { - String mime = asset.meta().contentType(); - if (Strings.isNullOrEmpty(mime)){ - throw new IllegalStateException("Failed to get 'Content-Type'"); - } - String lastModified = asset.meta().lastModified(); - if(Strings.isNullOrEmpty(lastModified)){ - lastModified = new DateTimeNowStr().value(); + // Convert RxJava Maybe at the NpmProxy boundary to CompletionStage. + return this.npm.getAssetAsync(tgz) + .thenApply(optAsset -> { + if (optAsset.isEmpty()) { + return ResponseBuilder.notFound().build(); + } + final var asset = optAsset.get(); + // Enqueue failures (bounded queue full, lambda exception, ...) + // MUST NOT escape the serve path β€” wrap the whole body. + this.enqueueProxyEvent(tgz, headers, asset); + String mime = asset.meta().contentType(); + if (Strings.isNullOrEmpty(mime)) { + throw new IllegalStateException("Failed to get 'Content-Type'"); + } + String lastModified = asset.meta().lastModified(); + if (Strings.isNullOrEmpty(lastModified)) { + lastModified = new DateTimeNowStr().value(); + } + // Stream content directly - no buffering needed. + return ResponseBuilder.ok() + .header(ContentType.mime(mime)) + .header("Last-Modified", lastModified) + .body(asset.dataPublisher()) + .build(); + }); + } + + /** + * Enqueue a proxy artifact event for the given asset. + * Failures (bounded queue full, parse errors) are swallowed + * so the serve path is never affected. + * + * @param tgz Asset path + * @param headers Request headers + * @param asset The resolved asset + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + private void enqueueProxyEvent( + final String tgz, + final Headers headers, + final com.auto1.pantera.npm.proxy.model.NpmAsset asset + ) { + this.packages.ifPresent(queue -> { + try { + Long millis = null; + try { + final String lm = asset.meta().lastModified(); + if (!Strings.isNullOrEmpty(lm)) { + millis = java.time.Instant.from( + java.time.format.DateTimeFormatter.RFC_1123_DATE_TIME.parse(lm) + ).toEpochMilli(); } - // Stream content directly - no buffering needed. - // MicrometerSlice fix ensures response bodies aren't double-subscribed. - return ResponseBuilder.ok() - .header(ContentType.mime(mime)) - .header("Last-Modified", lastModified) - .body(asset.dataPublisher()) - .build(); + } catch (final Exception ex) { + EcsLogger.debug("com.auto1.pantera.npm") + .message("Failed to parse asset lastModified for proxy event") + .error(ex) + .log(); } - ) - .toSingle(ResponseBuilder.notFound().build()) - .to(SingleInterop.get()) - .toCompletableFuture(); + final ProxyArtifactEvent event = new ProxyArtifactEvent( + new Key.From(tgz), this.repoName, + new Login(headers).getValue(), + java.util.Optional.ofNullable(millis) + ); + if (!queue.offer(event)) { + com.auto1.pantera.metrics.EventsQueueMetrics + .recordDropped(this.repoName); + } + } catch (final Throwable t) { + EcsLogger.warn("com.auto1.pantera.npm") + .message("Failed to enqueue proxy event; serve path unaffected") + .eventCategory("process") + .eventAction("queue_enqueue") + .eventOutcome("failure") + .field("repository.name", this.repoName) + .log(); + } + }); } private Optional cooldownRequest(final String original, final Headers headers) { diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspector.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspector.java index 143ab47c4..e447f845c 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspector.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspector.java @@ -10,8 +10,8 @@ */ package com.auto1.pantera.npm.proxy.http; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import com.auto1.pantera.http.log.EcsLogger; import com.auto1.pantera.npm.proxy.NpmRemote; import com.auto1.pantera.npm.proxy.model.NpmPackage; @@ -45,7 +45,7 @@ * */ final class NpmCooldownInspector implements CooldownInspector, - com.auto1.pantera.cooldown.InspectorRegistry.InvalidatableInspector { + com.auto1.pantera.cooldown.config.InspectorRegistry.InvalidatableInspector { private final NpmRemote remote; diff --git a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmProxySlice.java b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmProxySlice.java index eb9fbe13f..2ef05b45f 100644 --- a/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmProxySlice.java +++ b/npm-adapter/src/main/java/com/auto1/pantera/npm/proxy/http/NpmProxySlice.java @@ -24,7 +24,7 @@ import com.auto1.pantera.http.slice.SliceSimple; import com.auto1.pantera.npm.proxy.NpmProxy; import com.auto1.pantera.scheduling.ProxyArtifactEvent; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.cooldown.metadata.CooldownMetadataService; import java.net.URL; @@ -82,7 +82,7 @@ public NpmProxySlice( final AssetPath apath = new AssetPath(path); final NpmCooldownInspector inspector = new NpmCooldownInspector(npm.remoteClient()); // Register inspector globally so unblock can invalidate its cache - com.auto1.pantera.cooldown.InspectorRegistry.instance() + com.auto1.pantera.cooldown.config.InspectorRegistry.instance() .register(repoType, repoName, inspector); this.route = new SliceRoute( new RtRulePath( diff --git a/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspectorTest.java b/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspectorTest.java index bc3854cc5..628ae4bcd 100644 --- a/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspectorTest.java +++ b/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownInspectorTest.java @@ -10,7 +10,7 @@ */ package com.auto1.pantera.npm.cooldown; -import com.auto1.pantera.cooldown.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownDependency; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; diff --git a/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownIntegrationTest.java b/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownIntegrationTest.java index 32c00d190..4be1e778d 100644 --- a/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownIntegrationTest.java +++ b/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownIntegrationTest.java @@ -10,15 +10,15 @@ */ package com.auto1.pantera.npm.cooldown; -import com.auto1.pantera.cooldown.CooldownBlock; -import com.auto1.pantera.cooldown.CooldownCache; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownReason; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResult; -import com.auto1.pantera.cooldown.CooldownService; -import com.auto1.pantera.cooldown.CooldownSettings; -import com.auto1.pantera.cooldown.metadata.CooldownMetadataServiceImpl; +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.metadata.MetadataFilterService; import com.auto1.pantera.cooldown.metadata.FilteredMetadataCache; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -43,7 +43,7 @@ import static org.hamcrest.Matchers.not; /** - * Integration tests for NPM cooldown metadata filtering with CooldownMetadataServiceImpl. + * Integration tests for NPM cooldown metadata filtering with MetadataFilterService. * * @since 1.0 */ @@ -51,7 +51,7 @@ final class NpmCooldownIntegrationTest { private static final ObjectMapper MAPPER = new ObjectMapper(); - private CooldownMetadataServiceImpl service; + private MetadataFilterService service; private TestCooldownService cooldownService; private NpmMetadataParser parser; private NpmMetadataFilter filter; @@ -65,7 +65,7 @@ void setUp() { final CooldownCache cooldownCache = new CooldownCache(); final FilteredMetadataCache metadataCache = new FilteredMetadataCache(); - this.service = new CooldownMetadataServiceImpl( + this.service = new MetadataFilterService( this.cooldownService, settings, cooldownCache, diff --git a/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownResponseFactoryTest.java b/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownResponseFactoryTest.java new file mode 100644 index 000000000..8f8bd6c14 --- /dev/null +++ b/npm-adapter/src/test/java/com/auto1/pantera/npm/cooldown/NpmCooldownResponseFactoryTest.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.npm.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.RsStatus; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.Collections; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.emptyOrNullString; + +/** + * Tests for {@link NpmCooldownResponseFactory}. + * + * @since 2.2.0 + */ +final class NpmCooldownResponseFactoryTest { + + private NpmCooldownResponseFactory factory; + + @BeforeEach + void setUp() { + this.factory = new NpmCooldownResponseFactory(); + } + + @Test + void returns403Status() { + final Response response = this.factory.forbidden(block()); + assertThat(response.status(), is(RsStatus.FORBIDDEN)); + } + + @Test + void returnsApplicationJsonContentType() { + final Response response = this.factory.forbidden(block()); + final String contentType = response.headers() + .values("Content-Type").get(0); + assertThat(contentType, containsString("application/json")); + } + + @Test + void bodyContainsVersionInCooldownError() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"error\":\"version in cooldown\"")); + } + + @Test + void bodyContainsBlockedUntilField() { + final Response response = this.factory.forbidden(block()); + final String body = new String(response.body().asBytes()); + assertThat(body, containsString("\"blocked_until\":\"")); + assertThat(body, containsString("Z")); + } + + @Test + void includesRetryAfterHeader() { + final Response response = this.factory.forbidden(block()); + final String retryAfter = response.headers() + .values("Retry-After").get(0); + assertThat(retryAfter, is(not(emptyOrNullString()))); + final long seconds = Long.parseLong(retryAfter); + assertThat(seconds > 0, is(true)); + } + + @Test + void includesCooldownBlockedHeader() { + final Response response = this.factory.forbidden(block()); + final String cooldown = response.headers() + .values("X-Pantera-Cooldown").get(0); + assertThat(cooldown, equalTo("blocked")); + } + + @Test + void repoTypeIsNpm() { + assertThat(this.factory.repoType(), equalTo("npm")); + } + + private static CooldownBlock block() { + return new CooldownBlock( + "npm", + "npm-proxy", + "express", + "5.0.0", + CooldownReason.FRESH_RELEASE, + Instant.now().minus(1, ChronoUnit.HOURS), + Instant.now().plus(23, ChronoUnit.HOURS), + Collections.emptyList() + ); + } +} diff --git a/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/NpmProxyITCase.java b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/NpmProxyITCase.java index fe31c44e0..6dff754a2 100644 --- a/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/NpmProxyITCase.java +++ b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/NpmProxyITCase.java @@ -242,7 +242,7 @@ void setUp() throws Exception { final NpmProxySlice slice = new NpmProxySlice( "npm-proxy", npm, Optional.of(packages), "npm-proxy", "npm-proxy", - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE, + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE, com.auto1.pantera.cooldown.metadata.NoopCooldownMetadataService.INSTANCE, new com.auto1.pantera.http.client.UriClientSlice(this.client, uri) ); diff --git a/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSliceQueueFullTest.java b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSliceQueueFullTest.java new file mode 100644 index 000000000..08c7d89df --- /dev/null +++ b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSliceQueueFullTest.java @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.npm.proxy.http; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.Storage; +import com.auto1.pantera.asto.memory.InMemoryStorage; +import com.auto1.pantera.asto.test.TestResource; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.impl.NoopCooldownService; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.slice.SliceSimple; +import com.auto1.pantera.metrics.EventsQueueMetrics; +import com.auto1.pantera.npm.misc.NextSafeAvailablePort; +import com.auto1.pantera.npm.proxy.NpmProxy; +import com.auto1.pantera.scheduling.ProxyArtifactEvent; +import com.auto1.pantera.vertx.VertxSliceServer; +import io.vertx.reactivex.core.Vertx; +import io.vertx.reactivex.ext.web.client.WebClient; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import javax.json.Json; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.Queue; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +/** + * Verifies that {@link DownloadAssetSlice} serves cache-hit responses with + * HTTP 200 even when the background {@code ProxyArtifactEvent} queue is + * saturated β€” the v2.1.4 WI-00 hotfix for the Queue-full cascade. + * + *

    Pre-fills a bounded {@link LinkedBlockingQueue} to capacity (2 slots), + * then fires 50 concurrent cache-hit GETs through a {@code DownloadAssetSlice} + * wired to that already-full queue. Asserts every request returns HTTP 200 + * and no exception escapes the serve path. Drops are counted via + * {@link EventsQueueMetrics#dropCount()}.

    + */ +final class DownloadAssetSliceQueueFullTest { + + private static final String RNAME = "my-npm-saturated"; + + private static final String TGZ = + "@hello/simple-npm-project/-/@hello/simple-npm-project-1.0.1.tgz"; + + private static final int CONCURRENT_REQUESTS = 50; + + private static final Vertx VERTX = Vertx.vertx(); + + private int port; + + private LinkedBlockingQueue packages; + + @BeforeEach + void setUp() { + this.port = new NextSafeAvailablePort().value(); + // Bounded to 2 so any second enqueue lands in the drop path. + this.packages = new LinkedBlockingQueue<>(2); + } + + @AfterAll + static void tearDown() { + DownloadAssetSliceQueueFullTest.VERTX.close(); + } + + @Test + void fiftyConcurrentCacheHitsAllReturnOkDespiteFullQueue() throws Exception { + // Pre-fill the queue to capacity β€” the next offer() MUST return false + // without throwing. The serve path must remain HTTP 200. + final Key sentinel = new Key.From("sentinel"); + this.packages.add(new ProxyArtifactEvent(sentinel, RNAME, "filler", Optional.empty())); + this.packages.add(new ProxyArtifactEvent(sentinel, RNAME, "filler", Optional.empty())); + MatcherAssert.assertThat( + "Queue is at capacity before request burst", + this.packages.remainingCapacity(), + Matchers.is(0) + ); + final Storage storage = new InMemoryStorage(); + this.saveCachedAsset(storage); + final AssetPath path = new AssetPath(""); + final long dropsBefore = EventsQueueMetrics.dropCount(); + try ( + VertxSliceServer server = new VertxSliceServer( + DownloadAssetSliceQueueFullTest.VERTX, + new DownloadAssetSlice( + new NpmProxy( + storage, + new SliceSimple(ResponseBuilder.notFound().build()) + ), + path, Optional.of(this.packages), RNAME, "npm-proxy", + NoopCooldownService.INSTANCE, noopInspector() + ), + this.port + ) + ) { + server.start(); + this.fire50ConcurrentRequestsAndAssertAllOk(); + } + // After the burst the drop counter must have advanced β€” every + // cache-hit attempted one enqueue on a full queue. + final long drops = EventsQueueMetrics.dropCount() - dropsBefore; + MatcherAssert.assertThat( + "queue overflows incremented the drop counter at least once", + drops, + Matchers.greaterThanOrEqualTo(1L) + ); + } + + private void fire50ConcurrentRequestsAndAssertAllOk() + throws InterruptedException, ExecutionException, TimeoutException { + final ExecutorService pool = Executors.newFixedThreadPool(CONCURRENT_REQUESTS); + final List> results = new ArrayList<>(CONCURRENT_REQUESTS); + final WebClient client = WebClient.create(DownloadAssetSliceQueueFullTest.VERTX); + try { + final String url = String.format( + "http://127.0.0.1:%d/%s", this.port, DownloadAssetSliceQueueFullTest.TGZ + ); + for (int i = 0; i < CONCURRENT_REQUESTS; i = i + 1) { + results.add(pool.submit(() -> { + final CompletableFuture future = new CompletableFuture<>(); + client.getAbs(url).send(ar -> { + if (ar.succeeded()) { + future.complete(ar.result().statusCode()); + } else { + future.completeExceptionally(ar.cause()); + } + }); + return future.get(30, TimeUnit.SECONDS); + })); + } + int okCount = 0; + for (final Future result : results) { + final Integer code = result.get(60, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "Every request completes with HTTP 200 (no exception escapes)", + code, Matchers.is(200) + ); + okCount = okCount + 1; + } + MatcherAssert.assertThat( + "All 50 concurrent cache-hit GETs returned HTTP 200", + okCount, Matchers.is(CONCURRENT_REQUESTS) + ); + } finally { + pool.shutdown(); + if (!pool.awaitTermination(10, TimeUnit.SECONDS)) { + pool.shutdownNow(); + } + client.close(); + } + } + + private void saveCachedAsset(final Storage storage) { + storage.save( + new Key.From(DownloadAssetSliceQueueFullTest.TGZ), + new Content.From( + new TestResource( + String.format("storage/%s", DownloadAssetSliceQueueFullTest.TGZ) + ).asBytes() + ) + ).join(); + storage.save( + new Key.From(String.format("%s.meta", DownloadAssetSliceQueueFullTest.TGZ)), + new Content.From( + Json.createObjectBuilder() + .add("last-modified", "2020-05-13T16:30:30+01:00") + .build() + .toString() + .getBytes() + ) + ).join(); + } + + private static CooldownInspector noopInspector() { + return new CooldownInspector() { + @Override + public CompletableFuture> releaseDate(final String artifact, final String version) { + return CompletableFuture.completedFuture(Optional.empty()); + } + + @Override + public CompletableFuture> dependencies(final String artifact, final String version) { + return CompletableFuture.completedFuture(List.of()); + } + }; + } +} diff --git a/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSliceTest.java b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSliceTest.java index c9aa2afd1..df4889e5d 100644 --- a/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSliceTest.java +++ b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/DownloadAssetSliceTest.java @@ -15,9 +15,9 @@ import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.memory.InMemoryStorage; import com.auto1.pantera.asto.test.TestResource; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.NoopCooldownService; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.impl.NoopCooldownService; import com.auto1.pantera.http.headers.ContentType; import com.auto1.pantera.http.ResponseBuilder; import com.auto1.pantera.http.slice.SliceSimple; diff --git a/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspectorTest.java b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspectorTest.java index 49cd65f35..2624ed67e 100644 --- a/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspectorTest.java +++ b/npm-adapter/src/test/java/com/auto1/pantera/npm/proxy/http/NpmCooldownInspectorTest.java @@ -10,7 +10,7 @@ */ package com.auto1.pantera.npm.proxy.http; -import com.auto1.pantera.cooldown.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownDependency; import com.auto1.pantera.npm.proxy.NpmRemote; import com.auto1.pantera.npm.proxy.model.NpmAsset; import com.auto1.pantera.npm.proxy.model.NpmPackage; diff --git a/nuget-adapter/pom.xml b/nuget-adapter/pom.xml index 5795b3363..1fbab4a21 100644 --- a/nuget-adapter/pom.xml +++ b/nuget-adapter/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 nuget-adapter - 2.1.3 + 2.2.0 jar nuget-adapter Turns your files/objects into NuGet artifacts @@ -42,12 +42,12 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -86,7 +86,7 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 test diff --git a/nuget-adapter/src/main/java/com/auto1/pantera/nuget/http/publish/PackagePublish.java b/nuget-adapter/src/main/java/com/auto1/pantera/nuget/http/publish/PackagePublish.java index b38e8f33f..e42061c56 100644 --- a/nuget-adapter/src/main/java/com/auto1/pantera/nuget/http/publish/PackagePublish.java +++ b/nuget-adapter/src/main/java/com/auto1/pantera/nuget/http/publish/PackagePublish.java @@ -125,7 +125,7 @@ public CompletableFuture put(Headers headers, Content body) { (info, throwable) -> { if (throwable == null) { this.events.ifPresent( - queue -> queue.add( + queue -> queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent( PackagePublish.REPO_TYPE, this.name, new Login(headers).getValue(), info.packageName(), diff --git a/pantera-backfill/dependency-reduced-pom.xml b/pantera-backfill/dependency-reduced-pom.xml index 290b97e11..7965bad05 100644 --- a/pantera-backfill/dependency-reduced-pom.xml +++ b/pantera-backfill/dependency-reduced-pom.xml @@ -3,12 +3,12 @@ pantera com.auto1.pantera - 2.1.3 + 2.2.0 4.0.0 pantera-backfill pantera-backfill - 2.1.3 + 2.2.0 Standalone CLI for backfilling the PostgreSQL artifacts table from disk storage 2020 diff --git a/pantera-backfill/pom.xml b/pantera-backfill/pom.xml index 59767bfe1..a931f1d22 100644 --- a/pantera-backfill/pom.xml +++ b/pantera-backfill/pom.xml @@ -27,10 +27,10 @@ SOFTWARE. com.auto1.pantera pantera - 2.1.3 + 2.2.0 pantera-backfill - 2.1.3 + 2.2.0 jar pantera-backfill Standalone CLI for backfilling the PostgreSQL artifacts table from disk storage diff --git a/pantera-core/pom.xml b/pantera-core/pom.xml index 5c913e82e..86396308a 100644 --- a/pantera-core/pom.xml +++ b/pantera-core/pom.xml @@ -6,11 +6,11 @@ com.auto1.pantera pantera - 2.1.3 + 2.2.0 pantera-core - 2.1.3 + 2.2.0 jar ${project.basedir}/../LICENSE.header @@ -19,7 +19,7 @@ com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile @@ -107,6 +107,18 @@ jackson-databind ${fasterxml.jackson.version} + + + co.elastic.apm + apm-agent-api + 1.55.1 + jakarta.servlet jakarta.servlet-api diff --git a/pantera-core/src/main/java/com/auto1/pantera/audit/AuditAction.java b/pantera-core/src/main/java/com/auto1/pantera/audit/AuditAction.java new file mode 100644 index 000000000..fe2a747d9 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/audit/AuditAction.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.audit; + +/** + * Closed enumeration of audit events β€” Tier-5 of the observability model + * (Β§4.1 / Β§10.4 of {@code docs/analysis/v2.2-target-architecture.md}). + * + *

    Only four actions qualify as audit events. Anything else (cache writes, + * negative-cache invalidations, pool initialisations, queue drops, circuit + * state transitions, ...) is operational and belongs in + * {@code LocalLogger} (Tier-4), not here. This deliberate smallness keeps the + * audit dataset compact (90-day retention, compliance-facing) and protects it + * from "action.type" explosion as new operational events are added. + * + * @since 2.2.0 + */ +public enum AuditAction { + + /** Upload / deploy / push of an artifact (HTTP {@code PUT}). */ + ARTIFACT_PUBLISH, + + /** Successful serve of an artifact to a client (HTTP {@code GET} 2xx). */ + ARTIFACT_DOWNLOAD, + + /** Explicit delete of an artifact via API or admin action. */ + ARTIFACT_DELETE, + + /** + * Metadata / index lookup that resolved a concrete coordinate. + * Emitted when an adapter resolves a client request to a specific + * {@code (package.name, package.version)} pair. + */ + RESOLUTION +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/cache/CacheInvalidationPubSub.java b/pantera-core/src/main/java/com/auto1/pantera/cache/CacheInvalidationPubSub.java index 79771a806..eb3feea2d 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cache/CacheInvalidationPubSub.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cache/CacheInvalidationPubSub.java @@ -18,6 +18,7 @@ import java.util.Map; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Consumer; /** * Redis/Valkey pub/sub channel for cross-instance cache invalidation. @@ -110,6 +111,33 @@ public void register(final String name, final Cleanable cache) { this.caches.put(name, cache); } + /** + * Subscribe a per-key invalidation handler under the given namespace. + * Convenience wrapper around {@link #register(String, Cleanable)} for + * callers that only care about per-key invalidation and don't need the + * {@link Cleanable#invalidateAll()} broadcast β€” the adapter no-ops on + * invalidateAll because the handler is per-key only. + * + * @param namespace Cache type / namespace name (e.g. "auth:enabled") + * @param handler Consumer invoked with the key when a remote invalidation + * for this namespace arrives + */ + public void subscribe(final String namespace, final Consumer handler) { + this.caches.put(namespace, new Cleanable() { + @Override + public void invalidate(final String key) { + handler.accept(key); + } + + @Override + public void invalidateAll() { + // No-op: consumer-based subscribers only care about per-key + // invalidation; invalidateAll messages for this namespace + // are ignored by design. + } + }); + } + /** * Publish an invalidation message for a specific key. * Other instances will call {@code cache.invalidate(key)} on receipt. diff --git a/pantera-core/src/main/java/com/auto1/pantera/cache/GlobalCacheConfig.java b/pantera-core/src/main/java/com/auto1/pantera/cache/GlobalCacheConfig.java index 33aa8ec7f..e17b654c8 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cache/GlobalCacheConfig.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cache/GlobalCacheConfig.java @@ -10,51 +10,118 @@ */ package com.auto1.pantera.cache; +import com.amihaiemil.eoyaml.YamlMapping; +import com.auto1.pantera.http.misc.ConfigDefaults; import java.util.Optional; /** * Global cache configuration holder. - * Provides shared Valkey connection for all caches across Pantera. + * Provides shared Valkey connection for all caches across Pantera, and + * exposes per-cache configuration records (e.g. {@link #authEnabled()}) + * resolved from env β†’ YAML β†’ compile-time defaults. * Thread-safe singleton pattern. - * + * * @since 1.0 */ public final class GlobalCacheConfig { - + + // ------------------------------------------------------------- + // Compile-time defaults for auth-enabled cache (fallback-only). + // Env vars and YAML override these. + // ------------------------------------------------------------- + + /** Default L1 max size for the auth-enabled cache. */ + static final int DEFAULT_AUTH_ENABLED_L1_MAX_SIZE = 10_000; + + /** Default L1 TTL (seconds) for the auth-enabled cache. */ + static final int DEFAULT_AUTH_ENABLED_L1_TTL_SECONDS = 300; + + /** Default flag: L2 (Valkey) enabled for the auth-enabled cache. */ + static final boolean DEFAULT_AUTH_ENABLED_L2_ENABLED = true; + + /** Default L2 TTL (seconds) for the auth-enabled cache. */ + static final int DEFAULT_AUTH_ENABLED_L2_TTL_SECONDS = 3_600; + + /** Default L2 operation timeout (ms) for the auth-enabled cache. */ + static final int DEFAULT_AUTH_ENABLED_L2_TIMEOUT_MS = 100; + + // ------------------------------------------------------------- + // Compile-time defaults for the group-metadata STALE cache (aid, + // never breaker β€” bounds are a JVM-memory safety net only; under + // realistic cardinality these never fire). + // ------------------------------------------------------------- + + /** Default L1 max size for the group-metadata stale cache. */ + static final int DEFAULT_GROUP_METADATA_STALE_L1_MAX_SIZE = 100_000; + + /** Default L1 TTL (seconds) for the group-metadata stale cache β€” 30 days. */ + static final int DEFAULT_GROUP_METADATA_STALE_L1_TTL_SECONDS = 2_592_000; + + /** Default flag: L2 (Valkey) enabled for the group-metadata stale cache. */ + static final boolean DEFAULT_GROUP_METADATA_STALE_L2_ENABLED = true; + + /** Default L2 TTL (seconds) β€” {@code 0} means no TTL (rely on Valkey LRU). */ + static final int DEFAULT_GROUP_METADATA_STALE_L2_TTL_SECONDS = 0; + + /** Default L2 operation timeout (ms) for the group-metadata stale cache. */ + static final int DEFAULT_GROUP_METADATA_STALE_L2_TIMEOUT_MS = 100; + /** * Singleton instance. */ private static volatile GlobalCacheConfig instance; - + /** * Shared Valkey connection. */ private final ValkeyConnection valkey; - + + /** + * Optional {@code caches} YAML mapping (from {@code meta.caches}). + * May be {@code null} when no config is provided; all accessors + * fall back to env / compile-time defaults in that case. + */ + private final YamlMapping caches; + /** * Private constructor for singleton. * @param valkey Valkey connection + * @param caches Optional YAML {@code caches} mapping */ - private GlobalCacheConfig(final ValkeyConnection valkey) { + private GlobalCacheConfig(final ValkeyConnection valkey, final YamlMapping caches) { this.valkey = valkey; + this.caches = caches; } - + /** * Initialize global cache configuration. * Should be called once at startup by YamlSettings. - * + * * @param valkey Optional Valkey connection */ public static void initialize(final Optional valkey) { + GlobalCacheConfig.initialize(valkey, null); + } + + /** + * Initialize global cache configuration with YAML {@code caches} mapping. + * + * @param valkey Optional Valkey connection + * @param caches Optional YAML {@code caches} mapping (from {@code meta.caches}) + */ + public static void initialize( + final Optional valkey, + final YamlMapping caches + ) { if (instance == null) { synchronized (GlobalCacheConfig.class) { if (instance == null) { - instance = new GlobalCacheConfig(valkey.orElse(null)); + instance = new GlobalCacheConfig(valkey.orElse(null), caches); } } } } - + /** * Get the shared Valkey connection. * @return Optional Valkey connection @@ -65,7 +132,229 @@ public static Optional valkeyConnection() { } return Optional.ofNullable(instance.valkey); } - + + /** + * Get the singleton instance, creating a defaults-only one if the + * config has not been explicitly initialized yet. Callers that need + * to read config sections (e.g. {@link #authEnabled()}) should use + * this accessor. + * + * @return Singleton instance (never null) + */ + public static GlobalCacheConfig getInstance() { + if (instance == null) { + synchronized (GlobalCacheConfig.class) { + if (instance == null) { + instance = new GlobalCacheConfig(null, null); + } + } + } + return instance; + } + + /** + * Configuration for the auth-enabled cache + * (wraps {@code LocalEnabledFilter}). + * + * @param l1MaxSize L1 Caffeine cache max entries + * @param l1TtlSeconds L1 TTL in seconds + * @param l2Enabled Whether L2 (Valkey) tier is enabled + * @param l2TtlSeconds L2 TTL in seconds + * @param l2TimeoutMs L2 operation timeout in milliseconds + */ + public record AuthEnabledConfig( + int l1MaxSize, + int l1TtlSeconds, + boolean l2Enabled, + int l2TtlSeconds, + int l2TimeoutMs + ) { } + + /** + * Resolve auth-enabled cache configuration with precedence + * env β†’ YAML ({@code meta.caches.auth-enabled.*}) β†’ default. + * + *

    YAML paths: + *

      + *
    • {@code auth-enabled.l1.maxSize}
    • + *
    • {@code auth-enabled.l1.ttlSeconds}
    • + *
    • {@code auth-enabled.l2.enabled}
    • + *
    • {@code auth-enabled.l2.ttlSeconds}
    • + *
    • {@code auth-enabled.l2.timeoutMs}
    • + *
    + * + *

    Env overrides: {@code PANTERA_AUTH_ENABLED_L1_SIZE}, + * {@code PANTERA_AUTH_ENABLED_L1_TTL_SECONDS}, + * {@code PANTERA_AUTH_ENABLED_L2_ENABLED}, + * {@code PANTERA_AUTH_ENABLED_L2_TTL_SECONDS}, + * {@code PANTERA_AUTH_ENABLED_L2_TIMEOUT_MS}. + * + * @return Resolved config + */ + public AuthEnabledConfig authEnabled() { + // YAML values (may be null if YAML not provided or keys missing) + Integer yL1Size = null; + Integer yL1Ttl = null; + Boolean yL2Enabled = null; + Integer yL2Ttl = null; + Integer yL2Timeout = null; + if (this.caches != null) { + final YamlMapping authSection = this.caches.yamlMapping("auth-enabled"); + if (authSection != null) { + final YamlMapping l1 = authSection.yamlMapping("l1"); + if (l1 != null) { + yL1Size = parseIntOrNull(l1.string("maxSize")); + yL1Ttl = parseIntOrNull(l1.string("ttlSeconds")); + } + final YamlMapping l2 = authSection.yamlMapping("l2"); + if (l2 != null) { + yL2Enabled = parseBooleanOrNull(l2.string("enabled")); + yL2Ttl = parseIntOrNull(l2.string("ttlSeconds")); + yL2Timeout = parseIntOrNull(l2.string("timeoutMs")); + } + } + } + // Env β†’ YAML β†’ default (ConfigDefaults.getX reads env/sysprop; if + // neither is present it returns the second arg, which we set to + // the YAML value when present, else the compile-time default). + final int l1Size = ConfigDefaults.getInt( + "PANTERA_AUTH_ENABLED_L1_SIZE", + yL1Size != null ? yL1Size : DEFAULT_AUTH_ENABLED_L1_MAX_SIZE + ); + final int l1Ttl = ConfigDefaults.getInt( + "PANTERA_AUTH_ENABLED_L1_TTL_SECONDS", + yL1Ttl != null ? yL1Ttl : DEFAULT_AUTH_ENABLED_L1_TTL_SECONDS + ); + final boolean l2Enabled = ConfigDefaults.getBoolean( + "PANTERA_AUTH_ENABLED_L2_ENABLED", + yL2Enabled != null ? yL2Enabled : DEFAULT_AUTH_ENABLED_L2_ENABLED + ); + final int l2Ttl = ConfigDefaults.getInt( + "PANTERA_AUTH_ENABLED_L2_TTL_SECONDS", + yL2Ttl != null ? yL2Ttl : DEFAULT_AUTH_ENABLED_L2_TTL_SECONDS + ); + final int l2Timeout = ConfigDefaults.getInt( + "PANTERA_AUTH_ENABLED_L2_TIMEOUT_MS", + yL2Timeout != null ? yL2Timeout : DEFAULT_AUTH_ENABLED_L2_TIMEOUT_MS + ); + return new AuthEnabledConfig(l1Size, l1Ttl, l2Enabled, l2Ttl, l2Timeout); + } + + /** + * Configuration for the group-metadata STALE cache (last-known-good + * fallback used when all upstream members are unreachable). + * + *

    Design principle: this cache is an AID, never a BREAKER. Under + * realistic cardinality no eviction ever fires. Bounds exist only as a + * JVM-memory safety net against pathological growth; they are NOT an + * expiry mechanism. + * + * @param l1MaxSize L1 Caffeine max entries (safety net only) + * @param l1TtlSeconds L1 TTL in seconds (long β€” 30d default) + * @param l2Enabled Whether L2 (Valkey) stale tier is enabled + * @param l2TtlSeconds L2 TTL in seconds ({@code 0} = no TTL, Valkey LRU) + * @param l2TimeoutMs L2 operation timeout in milliseconds + */ + public record GroupMetadataStaleConfig( + int l1MaxSize, + int l1TtlSeconds, + boolean l2Enabled, + int l2TtlSeconds, + int l2TimeoutMs + ) { } + + /** + * Resolve group-metadata stale cache configuration with precedence + * env β†’ YAML ({@code meta.caches.group-metadata-stale.*}) β†’ default. + * + *

    YAML paths: + *

      + *
    • {@code group-metadata-stale.l1.maxSize}
    • + *
    • {@code group-metadata-stale.l1.ttlSeconds}
    • + *
    • {@code group-metadata-stale.l2.enabled}
    • + *
    • {@code group-metadata-stale.l2.ttlSeconds}
    • + *
    • {@code group-metadata-stale.l2.timeoutMs}
    • + *
    + * + *

    Env overrides: {@code PANTERA_GROUP_METADATA_STALE_L1_SIZE}, + * {@code PANTERA_GROUP_METADATA_STALE_L1_TTL_SECONDS}, + * {@code PANTERA_GROUP_METADATA_STALE_L2_ENABLED}, + * {@code PANTERA_GROUP_METADATA_STALE_L2_TTL_SECONDS}, + * {@code PANTERA_GROUP_METADATA_STALE_L2_TIMEOUT_MS}. + * + * @return Resolved config + */ + public GroupMetadataStaleConfig groupMetadataStale() { + // YAML values (may be null if YAML not provided or keys missing) + Integer yL1Size = null; + Integer yL1Ttl = null; + Boolean yL2Enabled = null; + Integer yL2Ttl = null; + Integer yL2Timeout = null; + if (this.caches != null) { + final YamlMapping section = this.caches.yamlMapping("group-metadata-stale"); + if (section != null) { + final YamlMapping l1 = section.yamlMapping("l1"); + if (l1 != null) { + yL1Size = parseIntOrNull(l1.string("maxSize")); + yL1Ttl = parseIntOrNull(l1.string("ttlSeconds")); + } + final YamlMapping l2 = section.yamlMapping("l2"); + if (l2 != null) { + yL2Enabled = parseBooleanOrNull(l2.string("enabled")); + yL2Ttl = parseIntOrNull(l2.string("ttlSeconds")); + yL2Timeout = parseIntOrNull(l2.string("timeoutMs")); + } + } + } + final int l1Size = ConfigDefaults.getInt( + "PANTERA_GROUP_METADATA_STALE_L1_SIZE", + yL1Size != null ? yL1Size : DEFAULT_GROUP_METADATA_STALE_L1_MAX_SIZE + ); + final int l1Ttl = ConfigDefaults.getInt( + "PANTERA_GROUP_METADATA_STALE_L1_TTL_SECONDS", + yL1Ttl != null ? yL1Ttl : DEFAULT_GROUP_METADATA_STALE_L1_TTL_SECONDS + ); + final boolean l2Enabled = ConfigDefaults.getBoolean( + "PANTERA_GROUP_METADATA_STALE_L2_ENABLED", + yL2Enabled != null ? yL2Enabled : DEFAULT_GROUP_METADATA_STALE_L2_ENABLED + ); + final int l2Ttl = ConfigDefaults.getInt( + "PANTERA_GROUP_METADATA_STALE_L2_TTL_SECONDS", + yL2Ttl != null ? yL2Ttl : DEFAULT_GROUP_METADATA_STALE_L2_TTL_SECONDS + ); + final int l2Timeout = ConfigDefaults.getInt( + "PANTERA_GROUP_METADATA_STALE_L2_TIMEOUT_MS", + yL2Timeout != null ? yL2Timeout : DEFAULT_GROUP_METADATA_STALE_L2_TIMEOUT_MS + ); + return new GroupMetadataStaleConfig(l1Size, l1Ttl, l2Enabled, l2Ttl, l2Timeout); + } + + private static Integer parseIntOrNull(final String val) { + if (val == null || val.isEmpty()) { + return null; + } + try { + return Integer.valueOf(val.trim()); + } catch (final NumberFormatException ex) { + return null; + } + } + + private static Boolean parseBooleanOrNull(final String val) { + if (val == null || val.isEmpty()) { + return null; + } + final String v = val.trim().toLowerCase(java.util.Locale.ROOT); + if ("true".equals(v) || "1".equals(v) || "yes".equals(v)) { + return Boolean.TRUE; + } + if ("false".equals(v) || "0".equals(v) || "no".equals(v)) { + return Boolean.FALSE; + } + return null; + } + /** * Reset for testing purposes. */ diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownMetrics.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownMetrics.java deleted file mode 100644 index d5d0d8775..000000000 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownMetrics.java +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.cooldown; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicLong; - -/** - * Metrics for cooldown service to expose to Prometheus/Micrometer. - * - * @since 1.0 - */ -public final class CooldownMetrics { - - /** - * Total requests evaluated. - */ - private final AtomicLong totalRequests = new AtomicLong(0); - - /** - * Total requests blocked. - */ - private final AtomicLong totalBlocked = new AtomicLong(0); - - /** - * Total requests allowed. - */ - private final AtomicLong totalAllowed = new AtomicLong(0); - - /** - * Total requests auto-allowed due to circuit breaker. - */ - private final AtomicLong circuitBreakerAutoAllowed = new AtomicLong(0); - - /** - * Total cache hits. - */ - private final AtomicLong cacheHits = new AtomicLong(0); - - /** - * Total cache misses. - */ - private final AtomicLong cacheMisses = new AtomicLong(0); - - /** - * Blocked count per repository type. - * Key: repoType - */ - private final ConcurrentMap blockedByRepoType = new ConcurrentHashMap<>(); - - /** - * Blocked count per repository. - * Key: repoType:repoName - */ - private final ConcurrentMap blockedByRepo = new ConcurrentHashMap<>(); - - /** - * Record a cooldown evaluation. - * - * @param result Evaluation result - */ - public void recordEvaluation(final CooldownResult result) { - this.totalRequests.incrementAndGet(); - if (result.blocked()) { - this.totalBlocked.incrementAndGet(); - } else { - this.totalAllowed.incrementAndGet(); - } - } - - /** - * Record a block by repository. - * - * @param repoType Repository type (maven, npm, etc.) - * @param repoName Repository name - */ - public void recordBlock(final String repoType, final String repoName) { - this.blockedByRepoType.computeIfAbsent(repoType, k -> new AtomicLong(0)).incrementAndGet(); - final String repoKey = repoType + ":" + repoName; - this.blockedByRepo.computeIfAbsent(repoKey, k -> new AtomicLong(0)).incrementAndGet(); - } - - /** - * Record circuit breaker auto-allow. - */ - public void recordCircuitBreakerAutoAllow() { - this.circuitBreakerAutoAllowed.incrementAndGet(); - } - - /** - * Record cache hit. - */ - public void recordCacheHit() { - this.cacheHits.incrementAndGet(); - } - - /** - * Record cache miss. - */ - public void recordCacheMiss() { - this.cacheMisses.incrementAndGet(); - } - - /** - * Get total requests evaluated. - * - * @return Total requests - */ - public long getTotalRequests() { - return this.totalRequests.get(); - } - - /** - * Get total requests blocked. - * - * @return Total blocked - */ - public long getTotalBlocked() { - return this.totalBlocked.get(); - } - - /** - * Get total requests allowed. - * - * @return Total allowed - */ - public long getTotalAllowed() { - return this.totalAllowed.get(); - } - - /** - * Get circuit breaker auto-allowed count. - * - * @return Auto-allowed count - */ - public long getCircuitBreakerAutoAllowed() { - return this.circuitBreakerAutoAllowed.get(); - } - - /** - * Get cache hits. - * - * @return Cache hits - */ - public long getCacheHits() { - return this.cacheHits.get(); - } - - /** - * Get cache misses. - * - * @return Cache misses - */ - public long getCacheMisses() { - return this.cacheMisses.get(); - } - - /** - * Get cache hit rate as percentage. - * - * @return Hit rate (0-100) - */ - public double getCacheHitRate() { - final long total = this.cacheHits.get() + this.cacheMisses.get(); - return total == 0 ? 0.0 : (double) this.cacheHits.get() / total * 100.0; - } - - /** - * Get blocked count for repository type. - * - * @param repoType Repository type - * @return Blocked count - */ - public long getBlockedByRepoType(final String repoType) { - final AtomicLong counter = this.blockedByRepoType.get(repoType); - return counter == null ? 0 : counter.get(); - } - - /** - * Get blocked count for specific repository. - * - * @param repoType Repository type - * @param repoName Repository name - * @return Blocked count - */ - public long getBlockedByRepo(final String repoType, final String repoName) { - final String repoKey = repoType + ":" + repoName; - final AtomicLong counter = this.blockedByRepo.get(repoKey); - return counter == null ? 0 : counter.get(); - } - - /** - * Get all repository types with blocks. - * - * @return Repository type names - */ - public java.util.Set getRepoTypes() { - return this.blockedByRepoType.keySet(); - } - - /** - * Get all repositories with blocks. - * - * @return Repository keys (repoType:repoName) - */ - public java.util.Set getRepos() { - return this.blockedByRepo.keySet(); - } - - /** - * Get metrics summary. - * - * @return Metrics string - */ - public String summary() { - return String.format( - "CooldownMetrics[total=%d, blocked=%d (%.1f%%), allowed=%d, cacheHitRate=%.1f%%, circuitBreakerAutoAllowed=%d]", - this.totalRequests.get(), - this.totalBlocked.get(), - this.totalRequests.get() == 0 ? 0.0 : (double) this.totalBlocked.get() / this.totalRequests.get() * 100.0, - this.totalAllowed.get(), - getCacheHitRate(), - this.circuitBreakerAutoAllowed.get() - ); - } - - /** - * Reset all metrics. - */ - public void reset() { - this.totalRequests.set(0); - this.totalBlocked.set(0); - this.totalAllowed.set(0); - this.circuitBreakerAutoAllowed.set(0); - this.cacheHits.set(0); - this.cacheMisses.set(0); - this.blockedByRepoType.clear(); - this.blockedByRepo.clear(); - } -} diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownResponses.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownResponses.java deleted file mode 100644 index 5ea5ced22..000000000 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownResponses.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.cooldown; - -import com.auto1.pantera.http.Response; -import com.auto1.pantera.http.ResponseBuilder; -import java.time.Instant; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; -import java.util.Locale; -import javax.json.Json; -import javax.json.JsonArrayBuilder; -import javax.json.JsonObjectBuilder; - -/** - * Helper to build cooldown HTTP responses. - */ -public final class CooldownResponses { - - private static final DateTimeFormatter ISO = DateTimeFormatter.ISO_OFFSET_DATE_TIME; - - private CooldownResponses() { - } - - public static Response forbidden(final CooldownBlock block) { - // Calculate human-readable message - // Note: blockedAt is when the block was created, blockedUntil is when it expires - // The release date can be inferred: releaseDate = blockedUntil - cooldownPeriod - final String message = String.format( - "Security Policy: Package %s@%s is blocked due to %s. " - + "Block created: %s. Available after: %s (remaining: %s). " - + "This is a security measure to protect against supply chain attacks on fresh releases.", - block.artifact(), - block.version(), - formatReason(block.reason()), - ISO.format(block.blockedAt().atOffset(ZoneOffset.UTC)), - ISO.format(block.blockedUntil().atOffset(ZoneOffset.UTC)), - formatRemainingTime(block.blockedUntil()) - ); - - final JsonObjectBuilder json = Json.createObjectBuilder() - .add("error", "COOLDOWN_BLOCKED") - .add("message", message) - .add("repository", block.repoName()) - .add("repositoryType", block.repoType()) - .add("artifact", block.artifact()) - .add("version", block.version()) - .add("reason", block.reason().name().toLowerCase(Locale.US)) - .add("reasonDescription", formatReason(block.reason())) - .add("blockedAt", ISO.format(block.blockedAt().atOffset(ZoneOffset.UTC))) - .add("blockedUntil", ISO.format(block.blockedUntil().atOffset(ZoneOffset.UTC))) - .add("remainingTime", formatRemainingTime(block.blockedUntil())); - final JsonArrayBuilder deps = Json.createArrayBuilder(); - block.dependencies().forEach(dep -> deps.add( - Json.createObjectBuilder() - .add("artifact", dep.artifact()) - .add("version", dep.version()) - )); - json.add("dependencies", deps); - return ResponseBuilder.forbidden() - .jsonBody(json.build().toString()) - .build(); - } - - /** - * Format reason enum to human-readable string. - */ - private static String formatReason(final CooldownReason reason) { - return switch (reason) { - case FRESH_RELEASE -> "fresh release (package was published recently)"; - case NEWER_THAN_CACHE -> "newer than cached version"; - default -> reason.name().toLowerCase(Locale.US).replace('_', ' '); - }; - } - - /** - * Format remaining time until block expires. - */ - private static String formatRemainingTime(final Instant until) { - final Instant now = Instant.now(); - if (until.isBefore(now)) { - return "expired"; - } - final long hours = java.time.Duration.between(now, until).toHours(); - if (hours < 1) { - final long minutes = java.time.Duration.between(now, until).toMinutes(); - return minutes + " minutes"; - } - if (hours < 24) { - return hours + " hours"; - } - final long days = hours / 24; - return days + " days"; - } -} diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownBlock.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownBlock.java similarity index 98% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownBlock.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownBlock.java index 71caf4564..2ffe9f979 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownBlock.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownBlock.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.api; import java.time.Instant; import java.util.Collections; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownDependency.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownDependency.java similarity index 95% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownDependency.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownDependency.java index 82e2e9787..e2cc6a44c 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownDependency.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownDependency.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.api; import java.util.Objects; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownInspector.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownInspector.java similarity index 98% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownInspector.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownInspector.java index efee3a033..3bf79a5fa 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownInspector.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownInspector.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.api; import java.time.Instant; import java.util.Collection; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownReason.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownReason.java similarity index 94% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownReason.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownReason.java index 99ff587f9..fe751b17a 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownReason.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownReason.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.api; /** * Reasons for triggering a cooldown block. diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownRequest.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownRequest.java similarity index 97% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownRequest.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownRequest.java index 1e7c160a5..8e4f4ebfa 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownRequest.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownRequest.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.api; import java.time.Instant; import java.util.Objects; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownResult.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownResult.java similarity index 96% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownResult.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownResult.java index d3024bb6b..fa963a226 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownResult.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownResult.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.api; import java.util.Optional; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownService.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownService.java similarity index 98% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownService.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownService.java index 32ba0d7ca..4130b8b70 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownService.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/api/CooldownService.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.api; import java.util.List; import java.util.concurrent.CompletableFuture; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownCache.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/cache/CooldownCache.java similarity index 94% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownCache.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/cache/CooldownCache.java index 18629f822..deb085650 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownCache.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/cache/CooldownCache.java @@ -8,10 +8,10 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.cache; import com.auto1.pantera.cache.ValkeyConnection; -import com.auto1.pantera.http.trace.MdcPropagation; + import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.Expiry; @@ -213,7 +213,7 @@ public CompletableFuture isBlocked( // Track L2 error - metrics handled elsewhere return null; // L2 failure β†’ skip to database }) - .thenCompose(MdcPropagation.withMdc(l2Bytes -> { + .thenCompose(l2Bytes -> { final long durationMs = (System.nanoTime() - l2StartNanos) / 1_000_000; if (l2Bytes != null) { @@ -237,9 +237,9 @@ public CompletableFuture isBlocked( // Query database return this.queryAndCache(key, dbQuery); - })); + }); } - + // Single-tier: Query database return this.queryAndCache(key, dbQuery); } @@ -260,25 +260,26 @@ private CompletableFuture queryAndCache( // Deduplication metrics can be added if needed return existing; } - - // Query database + + // Query database β€” register in inflight BEFORE attaching whenComplete + // to avoid a race where the future completes (and removes from inflight) + // before the put() call, leaving a zombie entry in the map. final CompletableFuture future = dbQuery.get() - .whenComplete(MdcPropagation.withMdcBiConsumer((blocked, error) -> { - this.inflight.remove(key); - if (error == null && blocked != null) { - // Cache in L1 - this.decisions.put(key, blocked); - // Cache in L2 only for ALLOWED entries (false) - // BLOCKED entries (true) are cached by service layer with dynamic TTL - if (this.twoTier && !blocked) { - this.putL2Boolean(key, false, this.l2AllowedTtlSeconds); - } - } - })); - - // Register inflight to deduplicate concurrent requests + .orTimeout(30, TimeUnit.SECONDS); this.inflight.put(key, future); - + future.whenComplete((blocked, error) -> { + this.inflight.remove(key); + if (error == null && blocked != null) { + // Cache in L1 + this.decisions.put(key, blocked); + // Cache in L2 only for ALLOWED entries (false) + // BLOCKED entries (true) are cached by service layer with dynamic TTL + if (this.twoTier && !blocked) { + this.putL2Boolean(key, false, this.l2AllowedTtlSeconds); + } + } + }); + return future; } @@ -460,7 +461,7 @@ private CompletableFuture scanAndUpdateStep( ) { return this.l2.scan(cursor, ScanArgs.Builder.matches(pattern).limit(100)) .toCompletableFuture() - .thenCompose(MdcPropagation.withMdc(result -> { + .thenCompose(result -> { for (final String key : result.getKeys()) { this.l2.setex(key, this.l2AllowedTtlSeconds, "false".getBytes()); } @@ -468,7 +469,7 @@ private CompletableFuture scanAndUpdateStep( return CompletableFuture.completedFuture(null); } return this.scanAndUpdateStep(result, pattern); - })); + }); } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownAdapterBundle.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownAdapterBundle.java new file mode 100644 index 000000000..fa12f898d --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownAdapterBundle.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.config; + +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import com.auto1.pantera.cooldown.metadata.MetadataRequestDetector; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import com.auto1.pantera.cooldown.response.CooldownResponseFactory; +import java.util.Objects; + +/** + * Immutable bundle of per-adapter cooldown components. + * + *

    Each repository type (maven, npm, pypi, docker, go, composer, gradle) + * registers a bundle at startup. The bundle is looked up by repo type + * when a request arrives, enabling the proxy layer to: + *

      + *
    1. Detect metadata requests via {@link #detector()}
    2. + *
    3. Route metadata through {@link com.auto1.pantera.cooldown.metadata.MetadataFilterService} + * using this bundle's parser/filter/rewriter
    4. + *
    5. Build format-appropriate 403 responses via {@link #responseFactory()}
    6. + *
    + * + * @param Type of parsed metadata object (e.g. {@code Document} for Maven, + * {@code JsonNode} for npm/Composer, {@code List} for Go) + * @since 2.2.0 + */ +public record CooldownAdapterBundle( + MetadataParser parser, + MetadataFilter filter, + MetadataRewriter rewriter, + MetadataRequestDetector detector, + CooldownResponseFactory responseFactory +) { + /** + * Canonical constructor with null checks. + */ + public CooldownAdapterBundle { + Objects.requireNonNull(parser, "parser"); + Objects.requireNonNull(filter, "filter"); + Objects.requireNonNull(rewriter, "rewriter"); + Objects.requireNonNull(detector, "detector"); + Objects.requireNonNull(responseFactory, "responseFactory"); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownAdapterRegistry.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownAdapterRegistry.java new file mode 100644 index 000000000..8eec99201 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownAdapterRegistry.java @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.config; + +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Global registry of per-repo-type cooldown adapter bundles. + * + *

    Populated once at startup by the wiring layer ({@code CooldownWiring}), + * then queried on every proxy request to obtain the adapter-specific + * parser, filter, rewriter, detector, and response factory.

    + * + *

    Thread-safe: backed by {@link ConcurrentHashMap}.

    + * + * @since 2.2.0 + */ +public final class CooldownAdapterRegistry { + + /** + * Singleton instance. + */ + private static final CooldownAdapterRegistry INSTANCE = new CooldownAdapterRegistry(); + + /** + * Bundles by canonical repo type (e.g. "maven", "npm", "gradle"). + */ + private final Map> bundles; + + private CooldownAdapterRegistry() { + this.bundles = new ConcurrentHashMap<>(); + } + + /** + * Get singleton instance. + * + * @return Registry instance + */ + public static CooldownAdapterRegistry instance() { + return INSTANCE; + } + + /** + * Register an adapter bundle for a repository type. + * + * @param repoType Canonical repo type identifier (e.g. "maven", "npm") + * @param bundle Adapter bundle + * @param Metadata type + */ + public void register(final String repoType, final CooldownAdapterBundle bundle) { + this.bundles.put(repoType, bundle); + } + + /** + * Register an adapter bundle for a repository type and additional aliases. + * Useful when multiple types share the same components (e.g. gradle reuses maven). + * + * @param repoType Primary repo type identifier + * @param bundle Adapter bundle + * @param aliases Additional type identifiers that map to the same bundle + * @param Metadata type + */ + public void register( + final String repoType, + final CooldownAdapterBundle bundle, + final String... aliases + ) { + this.bundles.put(repoType, bundle); + for (final String alias : aliases) { + this.bundles.put(alias, bundle); + } + } + + /** + * Get adapter bundle for a repository type. + * + * @param repoType Repository type identifier + * @return Bundle if registered, empty otherwise + */ + public Optional> get(final String repoType) { + return Optional.ofNullable(this.bundles.get(repoType)); + } + + /** + * Returns the set of registered repository types. + * + * @return Unmodifiable set of registered types + */ + public Set registeredTypes() { + return Set.copyOf(this.bundles.keySet()); + } + + /** + * Clear all registrations. For testing only. + */ + public void clear() { + this.bundles.clear(); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownCircuitBreaker.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownCircuitBreaker.java similarity index 99% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownCircuitBreaker.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownCircuitBreaker.java index 818589587..528e9f5e5 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownCircuitBreaker.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownCircuitBreaker.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.config; import java.time.Duration; import java.time.Instant; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownSettings.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownSettings.java similarity index 99% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownSettings.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownSettings.java index c2d23b6ab..9b9dc5dcb 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CooldownSettings.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/CooldownSettings.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.config; import java.time.Duration; import java.util.HashMap; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/InspectorRegistry.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/InspectorRegistry.java similarity index 98% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/InspectorRegistry.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/config/InspectorRegistry.java index 1f785bbea..e2514bdc9 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/InspectorRegistry.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/config/InspectorRegistry.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.config; import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CachedCooldownInspector.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/impl/CachedCooldownInspector.java similarity index 98% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/CachedCooldownInspector.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/impl/CachedCooldownInspector.java index 633c97b1b..bbf2a9800 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/CachedCooldownInspector.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/impl/CachedCooldownInspector.java @@ -8,9 +8,11 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.impl; import com.auto1.pantera.cache.CacheConfig; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import java.time.Duration; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/NoopCooldownService.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/impl/NoopCooldownService.java similarity index 84% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/NoopCooldownService.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/impl/NoopCooldownService.java index 70831afe9..b1800f853 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/NoopCooldownService.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/impl/NoopCooldownService.java @@ -8,8 +8,13 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.impl; +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; import java.util.Collections; import java.util.List; import java.util.concurrent.CompletableFuture; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataService.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataService.java index 147ff659c..2ccd2b84b 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataService.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataService.java @@ -10,7 +10,7 @@ */ package com.auto1.pantera.cooldown.metadata; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownInspector; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -78,6 +78,13 @@ CompletableFuture filterMetadata( */ void invalidateAll(String repoType, String repoName); + /** + * Clear all cached metadata across all repositories. + * Called on global policy changes (e.g. cooldown duration change) + * that may affect all cached entries. + */ + void clearAll(); + /** * Get cache statistics. * diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCache.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCache.java index 9f89b4db0..248e219ec 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCache.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCache.java @@ -12,7 +12,7 @@ import com.auto1.pantera.cache.ValkeyConnection; import com.auto1.pantera.cooldown.metrics.CooldownMetrics; -import com.auto1.pantera.http.trace.MdcPropagation; + import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.Expiry; @@ -65,8 +65,9 @@ public final class FilteredMetadataCache { /** * Default L1 cache size (number of packages). + * Configurable via {@code PANTERA_COOLDOWN_METADATA_L1_SIZE} env var. */ - private static final int DEFAULT_L1_SIZE = 5_000; + private static final int DEFAULT_L1_SIZE = resolveDefaultL1Size(); /** * Default max TTL when no versions are blocked (24 hours). @@ -79,6 +80,12 @@ public final class FilteredMetadataCache { */ private static final Duration MIN_TTL = Duration.ofMinutes(1); + /** + * Grace period after logical TTL expiry during which the stale entry + * remains in Caffeine to serve stale-while-revalidate responses (H3). + */ + private static final Duration SWR_GRACE = Duration.ofMinutes(5); + /** * L1 cache (in-memory) with per-entry dynamic TTL. * May be null in L2-only mode. @@ -232,17 +239,22 @@ public CompletableFuture get( if (!this.l2OnlyMode && this.l1Cache != null) { final CacheEntry l1Cached = this.l1Cache.getIfPresent(key); if (l1Cached != null) { - // Check if entry has expired (blockedUntil has passed) if (l1Cached.isExpired()) { - // Entry expired - invalidate and reload - this.l1Cache.invalidate(key); - } else { + // Stale-while-revalidate (H3): return stale bytes immediately + // and trigger background re-evaluation so the next caller gets + // fresh data without waiting. + this.triggerBackgroundRevalidation(key, loader); this.l1Hits++; if (CooldownMetrics.isAvailable()) { - CooldownMetrics.getInstance().recordCacheHit("l1"); + CooldownMetrics.getInstance().recordCacheHit("l1_swr"); } return CompletableFuture.completedFuture(l1Cached.data()); } + this.l1Hits++; + if (CooldownMetrics.isAvailable()) { + CooldownMetrics.getInstance().recordCacheHit("l1"); + } + return CompletableFuture.completedFuture(l1Cached.data()); } } @@ -251,8 +263,8 @@ public CompletableFuture get( return this.l2Connection.async().get(key) .toCompletableFuture() .orTimeout(100, TimeUnit.MILLISECONDS) - .exceptionally(MdcPropagation.withMdcFunction(err -> null)) - .thenCompose(MdcPropagation.withMdc(l2Bytes -> { + .exceptionally(err -> null) + .thenCompose(l2Bytes -> { if (l2Bytes != null) { this.l2Hits++; if (CooldownMetrics.isAvailable()) { @@ -272,7 +284,7 @@ public CompletableFuture get( CooldownMetrics.getInstance().recordCacheMiss(); } return this.loadAndCache(key, loader); - })); + }); } // Single-tier: load and cache @@ -283,9 +295,28 @@ public CompletableFuture get( return this.loadAndCache(key, loader); } + /** + * Trigger background re-evaluation for a stale cache entry (SWR β€” H3). + * Only fires if no revalidation is already in progress for this key. + * The caller has already returned stale bytes to the client. + */ + private void triggerBackgroundRevalidation( + final String key, + final java.util.function.Supplier> loader + ) { + if (this.inflight.containsKey(key)) { + // Already revalidating β€” skip duplicate + return; + } + // Fire-and-forget: loadAndCache will update L1 + L2 on completion + this.loadAndCache(key, loader); + } + /** * Load metadata and cache in both tiers with dynamic TTL. * Uses single-flight pattern to prevent stampede. + * Registers in inflight BEFORE attaching whenComplete to avoid the + * same race condition fixed in CooldownCache (H5). */ private CompletableFuture loadAndCache( final String key, @@ -297,32 +328,32 @@ private CompletableFuture loadAndCache( return existing.thenApply(CacheEntry::data); } - // Start loading - final CompletableFuture future = loader.get() - .whenComplete(MdcPropagation.withMdcBiConsumer((entry, error) -> { - this.inflight.remove(key); - if (error == null && entry != null) { - // Cache in L1 with L1 TTL (skip in L2-only mode) - if (!this.l2OnlyMode && this.l1Cache != null) { - // Wrap entry with L1 TTL for proper expiration - final CacheEntry l1Entry = new CacheEntry( - entry.data(), - entry.earliestBlockedUntil(), - this.l1Ttl - ); - this.l1Cache.put(key, l1Entry); - } - // Cache in L2 with L2 TTL (use configured l2Ttl, capped by blockedUntil if present) - if (this.l2Connection != null) { - final long ttlSeconds = this.calculateL2Ttl(entry); - if (ttlSeconds > 0) { - this.l2Connection.async().setex(key, ttlSeconds, entry.data()); - } + // Start loading -- register in inflight BEFORE whenComplete + final CompletableFuture future = loader.get(); + this.inflight.put(key, future); + future.whenComplete((entry, error) -> { + this.inflight.remove(key); + if (error == null && entry != null) { + // Cache in L1 with L1 TTL (skip in L2-only mode) + if (!this.l2OnlyMode && this.l1Cache != null) { + // Wrap entry with L1 TTL for proper expiration + final CacheEntry l1Entry = new CacheEntry( + entry.data(), + entry.earliestBlockedUntil(), + this.l1Ttl + ); + this.l1Cache.put(key, l1Entry); + } + // Cache in L2 with L2 TTL (use configured l2Ttl, capped by blockedUntil if present) + if (this.l2Connection != null) { + final long ttlSeconds = this.calculateL2Ttl(entry); + if (ttlSeconds > 0) { + this.l2Connection.async().setex(key, ttlSeconds, entry.data()); } } - })); + } + }); - this.inflight.put(key, future); return future.thenApply(CacheEntry::data); } @@ -376,11 +407,11 @@ public void invalidateAll(final String repoType, final String repoName) { // L2: Pattern delete (expensive but rare) if (this.l2Connection != null) { this.l2Connection.async().keys(prefix + "*") - .thenAccept(MdcPropagation.withMdcConsumer(keys -> { + .thenAccept(keys -> { if (keys != null && !keys.isEmpty()) { this.l2Connection.async().del(keys.toArray(new String[0])); } - })); + }); } } @@ -551,31 +582,49 @@ public boolean isExpired() { /** * Calculate TTL in nanoseconds for Caffeine expiry. - * If versions are blocked: TTL = earliestBlockedUntil - now - * If no versions blocked: TTL = maxTtl (release dates don't change) + * Includes a SWR grace period so the entry stays in Caffeine + * beyond its logical expiry, allowing stale-while-revalidate. + * Use {@link #isExpired()} for logical expiry checks. * - * @return TTL in nanoseconds + * @return TTL in nanoseconds (logical TTL + SWR grace) */ public long ttlNanos() { if (this.earliestBlockedUntil.isPresent()) { final Duration remaining = Duration.between(Instant.now(), this.earliestBlockedUntil.get()); if (remaining.isNegative() || remaining.isZero()) { - // Already expired - use minimum TTL - return MIN_TTL.toNanos(); + // Already logically expired - keep alive for SWR grace + return SWR_GRACE.toNanos(); } - return remaining.toNanos(); + return remaining.plus(SWR_GRACE).toNanos(); } - // No blocked versions - cache for max TTL - return this.maxTtl.toNanos(); + // No blocked versions - cache for max TTL + grace + return this.maxTtl.plus(SWR_GRACE).toNanos(); } /** - * Calculate TTL in seconds for L2 cache. + * Calculate logical TTL in seconds for L2 cache (excludes SWR grace). * * @return TTL in seconds */ public long ttlSeconds() { - return Math.max(MIN_TTL.getSeconds(), this.ttlNanos() / 1_000_000_000L); + return Math.max(MIN_TTL.getSeconds(), this.logicalTtlNanos() / 1_000_000_000L); + } + + /** + * Logical TTL in nanoseconds (without SWR grace period). + * Used for L2 TTL calculation and tests. + * + * @return Logical TTL in nanoseconds + */ + private long logicalTtlNanos() { + if (this.earliestBlockedUntil.isPresent()) { + final Duration remaining = Duration.between(Instant.now(), this.earliestBlockedUntil.get()); + if (remaining.isNegative() || remaining.isZero()) { + return MIN_TTL.toNanos(); + } + return remaining.toNanos(); + } + return this.maxTtl.toNanos(); } /** @@ -607,4 +656,25 @@ public static CacheEntry withBlockedVersions( return new CacheEntry(data, Optional.of(earliestBlockedUntil), maxTtl); } } + + /** + * Resolve default L1 size from env var or fall back to 50,000 (H4). + * Configurable via {@code PANTERA_COOLDOWN_METADATA_L1_SIZE}. + * + * @return L1 size + */ + private static int resolveDefaultL1Size() { + final String env = System.getenv("PANTERA_COOLDOWN_METADATA_L1_SIZE"); + if (env != null && !env.isEmpty()) { + try { + final int parsed = Integer.parseInt(env.trim()); + if (parsed > 0) { + return parsed; + } + } catch (final NumberFormatException ignored) { + // fall through to default + } + } + return 50_000; + } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCacheConfig.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCacheConfig.java index 290f8f146..81dba1629 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCacheConfig.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCacheConfig.java @@ -46,9 +46,10 @@ public final class FilteredMetadataCacheConfig { public static final Duration DEFAULT_TTL = Duration.ofHours(24); /** - * Default maximum L1 cache size (5,000 packages). + * Default maximum L1 cache size (50,000 packages β€” H4). + * Configurable via {@code PANTERA_COOLDOWN_METADATA_L1_SIZE} env var. */ - public static final int DEFAULT_MAX_SIZE = 5_000; + public static final int DEFAULT_MAX_SIZE = 50_000; /** * Default L1 TTL when L2 is enabled (5 minutes). diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataAwareInspector.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataAwareInspector.java index 3d17f84b7..97d71b860 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataAwareInspector.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataAwareInspector.java @@ -14,7 +14,7 @@ import java.util.Map; /** - * Extension interface for {@link com.auto1.pantera.cooldown.CooldownInspector} implementations + * Extension interface for {@link com.auto1.pantera.cooldown.api.CooldownInspector} implementations * that can accept preloaded release dates from metadata. * *

    When metadata contains release timestamps (e.g., NPM's {@code time} object), diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServiceImpl.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataFilterService.java similarity index 82% rename from pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServiceImpl.java rename to pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataFilterService.java index e0910397d..c501535e8 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServiceImpl.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataFilterService.java @@ -10,14 +10,14 @@ */ package com.auto1.pantera.cooldown.metadata; -import com.auto1.pantera.cooldown.CooldownCache; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownService; -import com.auto1.pantera.cooldown.CooldownSettings; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.config.CooldownSettings; import com.auto1.pantera.cooldown.metrics.CooldownMetrics; import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; + import org.slf4j.MDC; import java.time.Duration; @@ -33,6 +33,8 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.ForkJoinPool; import java.util.stream.Collectors; @@ -49,7 +51,7 @@ * * @since 1.0 */ -public final class CooldownMetadataServiceImpl implements CooldownMetadataService { +public final class MetadataFilterService implements CooldownMetadataService { /** * Default maximum versions to evaluate for cooldown. @@ -84,10 +86,16 @@ public final class CooldownMetadataServiceImpl implements CooldownMetadataServic private final FilteredMetadataCache metadataCache; /** - * Executor for async operations. + * Executor for async operations (metadata parse, filter, rewrite). */ private final Executor executor; + /** + * Dedicated bounded executor for parallel version evaluation (H2). + * 4 threads, context-propagating, used only for evaluateVersion() dispatch. + */ + private final ExecutorService evaluationExecutor; + /** * Maximum versions to evaluate. */ @@ -110,7 +118,7 @@ public final class CooldownMetadataServiceImpl implements CooldownMetadataServic * @param settings Cooldown settings * @param cooldownCache Per-version cooldown cache */ - public CooldownMetadataServiceImpl( + public MetadataFilterService( final CooldownService cooldown, final CooldownSettings settings, final CooldownCache cooldownCache @@ -121,7 +129,8 @@ public CooldownMetadataServiceImpl( cooldownCache, new FilteredMetadataCache(), ForkJoinPool.commonPool(), - DEFAULT_MAX_VERSIONS + DEFAULT_MAX_VERSIONS, + null ); } @@ -135,19 +144,52 @@ public CooldownMetadataServiceImpl( * @param executor Executor for async operations * @param maxVersionsToEvaluate Maximum versions to evaluate */ - public CooldownMetadataServiceImpl( + public MetadataFilterService( final CooldownService cooldown, final CooldownSettings settings, final CooldownCache cooldownCache, final FilteredMetadataCache metadataCache, final Executor executor, final int maxVersionsToEvaluate + ) { + this(cooldown, settings, cooldownCache, metadataCache, executor, maxVersionsToEvaluate, null); + } + + /** + * Full constructor with optional dedicated evaluation executor. + * + * @param cooldown Cooldown service + * @param settings Cooldown settings + * @param cooldownCache Per-version cooldown cache + * @param metadataCache Filtered metadata cache + * @param executor Executor for async operations + * @param maxVersionsToEvaluate Maximum versions to evaluate + * @param evalExecutor Dedicated executor for parallel version evaluation (null = create default) + */ + @SuppressWarnings("PMD.ExcessiveParameterList") + public MetadataFilterService( + final CooldownService cooldown, + final CooldownSettings settings, + final CooldownCache cooldownCache, + final FilteredMetadataCache metadataCache, + final Executor executor, + final int maxVersionsToEvaluate, + final ExecutorService evalExecutor ) { this.cooldown = Objects.requireNonNull(cooldown); this.settings = Objects.requireNonNull(settings); this.cooldownCache = Objects.requireNonNull(cooldownCache); this.metadataCache = Objects.requireNonNull(metadataCache); - this.executor = Objects.requireNonNull(executor); + this.executor = com.auto1.pantera.http.context.ContextualExecutor + .contextualize(Objects.requireNonNull(executor)); + this.evaluationExecutor = evalExecutor != null + ? evalExecutor + : com.auto1.pantera.http.context.ContextualExecutorService.wrap( + Executors.newFixedThreadPool(4, r -> { + final Thread t = new Thread(r, "cooldown-eval"); + t.setDaemon(true); + return t; + })); this.maxVersionsToEvaluate = maxVersionsToEvaluate; this.versionComparators = Map.of( "npm", VersionComparators.semver(), @@ -212,7 +254,7 @@ private CompletableFuture computeFilteredM final Optional inspectorOpt, final long startTime ) { - return CompletableFuture.supplyAsync(MdcPropagation.withMdcSupplier(() -> { + return CompletableFuture.supplyAsync(() -> { // Step 1: Parse metadata final T parsed = parser.parse(rawMetadata); final List allVersions = parser.extractVersions(parsed); @@ -230,8 +272,13 @@ private CompletableFuture computeFilteredM } // Step 2: Get release dates from metadata (if available) + // Prefer the new MetadataParser.extractReleaseDates() SPI; fall back + // to the older ReleaseDateProvider interface for backward compat. + final Map extracted = parser.extractReleaseDates(parsed); final Map releaseDates; - if (parser instanceof ReleaseDateProvider) { + if (!extracted.isEmpty()) { + releaseDates = extracted; + } else if (parser instanceof ReleaseDateProvider) { @SuppressWarnings("unchecked") final ReleaseDateProvider provider = (ReleaseDateProvider) parser; releaseDates = provider.releaseDates(parsed); @@ -242,6 +289,12 @@ private CompletableFuture computeFilteredM // Step 2b: Preload release dates into inspector for later use this.preloadReleaseDates(parser, parsed, inspectorOpt); + // Step 2c: Pre-warm CooldownCache L1 with release dates from metadata. + // Versions older than the cooldown period are guaranteed allowed (false). + if (!releaseDates.isEmpty()) { + this.preWarmCooldownCache(repoName, packageName, releaseDates); + } + // Step 3: Select versions to evaluate based on RELEASE DATE, not semver // Only versions released within the cooldown period could possibly be blocked final Duration cooldownPeriod = this.settings.minimumAllowedAge(); @@ -315,30 +368,35 @@ private CompletableFuture computeFilteredM allVersions, sortedVersions, versionsToEvaluate, parser, filter, rewriter, inspectorOpt, startTime ); - }), this.executor).thenCompose(MdcPropagation.withMdc(ctx -> { + }, this.executor).thenCompose(ctx -> { if (ctx instanceof FilteredMetadataCache.CacheEntry) { return CompletableFuture.completedFuture((FilteredMetadataCache.CacheEntry) ctx); } @SuppressWarnings("unchecked") final FilterContext context = (FilterContext) ctx; return this.evaluateAndFilter(context); - })); + }); } /** * Evaluate cooldown for versions and filter metadata. * Returns CacheEntry with TTL based on earliest blockedUntil. + * Versions are evaluated in parallel on a dedicated bounded executor (H2). */ private CompletableFuture evaluateAndFilter(final FilterContext ctx) { - // Step 4: Evaluate cooldown for each version in parallel + // Step 4: Evaluate cooldown for each version in parallel on dedicated pool final List> futures = ctx.versionsToEvaluate.stream() - .map(version -> this.evaluateVersion( - ctx.repoType, ctx.repoName, ctx.packageName, version, ctx.inspectorOpt - )) + .limit(this.maxVersionsToEvaluate) + .map(version -> CompletableFuture.supplyAsync( + () -> this.evaluateVersion( + ctx.repoType, ctx.repoName, ctx.packageName, version, ctx.inspectorOpt + ), + this.evaluationExecutor + ).thenCompose(f -> f)) .collect(Collectors.toList()); return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])) - .thenApply(MdcPropagation.withMdcFunction(ignored -> { + .thenApply(ignored -> { // Step 5: Collect blocked versions and find earliest blockedUntil final Set blockedVersions = new HashSet<>(); Instant earliestBlockedUntil = null; @@ -432,7 +490,7 @@ private CompletableFuture evaluateAndFilte ); } return FilteredMetadataCache.CacheEntry.noBlockedVersions(resultBytes, this.maxTtl); - })).whenComplete((result, error) -> { + }).whenComplete((result, error) -> { // Clear preloaded dates ctx.inspectorOpt.ifPresent(inspector -> { if (inspector instanceof MetadataAwareInspector) { @@ -484,6 +542,44 @@ private CompletableFuture evaluateVersion( }); } + /** + * Pre-warm CooldownCache L1 with release dates extracted from metadata. + * Versions whose release date is older than the cooldown period are + * guaranteed to be allowed (not blocked due to freshness), so we can + * populate the L1 cache with {@code false} (allowed) immediately. + * This avoids a DB/Valkey round-trip on the hot path for the majority + * of versions that are well past the cooldown window. + * + * @param repoName Repository name + * @param packageName Package name + * @param releaseDates Map of version to release timestamp + */ + private void preWarmCooldownCache( + final String repoName, + final String packageName, + final Map releaseDates + ) { + final Instant cutoff = Instant.now().minus(this.settings.minimumAllowedAge()); + int warmed = 0; + for (final Map.Entry entry : releaseDates.entrySet()) { + if (entry.getValue().isBefore(cutoff)) { + // Version is older than cooldown period -- guaranteed allowed + this.cooldownCache.put(repoName, packageName, entry.getKey(), false); + warmed++; + } + } + if (warmed > 0) { + EcsLogger.debug("com.auto1.pantera.cooldown.metadata") + .message(String.format( + "Pre-warmed CooldownCache L1 with %d allowed versions from metadata", warmed)) + .eventCategory("database") + .eventAction("cache_prewarm") + .field("repository.name", repoName) + .field("package.name", packageName) + .log(); + } + } + /** * Preload release dates from metadata into inspector if supported. */ @@ -551,6 +647,19 @@ public void invalidateAll(final String repoType, final String repoName) { .log(); } + @Override + public void clearAll() { + this.metadataCache.clear(); + if (CooldownMetrics.isAvailable()) { + CooldownMetrics.getInstance().recordInvalidation("*", "policy_change"); + } + EcsLogger.debug("com.auto1.pantera.cooldown.metadata") + .message("Cleared all metadata caches (policy change)") + .eventCategory("database") + .eventAction("cache_clear_all") + .log(); + } + @Override public String stats() { return this.metadataCache.stats(); diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataParser.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataParser.java index 4ade80bd4..102b2ef65 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataParser.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/MetadataParser.java @@ -10,7 +10,9 @@ */ package com.auto1.pantera.cooldown.metadata; +import java.time.Instant; import java.util.List; +import java.util.Map; import java.util.Optional; /** @@ -64,4 +66,17 @@ public interface MetadataParser { * @return MIME content type (e.g., "application/json", "application/xml") */ String contentType(); + + /** + * Extract release dates from parsed metadata. + * Adapters that embed timestamps in their metadata (e.g. npm's {@code time} object) + * should override this to enable release-date cache pre-warming. + * Other adapters return an empty map (the inspector will fetch dates on demand). + * + * @param metadata Parsed metadata object + * @return Map of version string to release timestamp (may be empty, never null) + */ + default Map extractReleaseDates(T metadata) { + return Map.of(); + } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/NoopCooldownMetadataService.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/NoopCooldownMetadataService.java index 44de38ba7..0cfa7e243 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/NoopCooldownMetadataService.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metadata/NoopCooldownMetadataService.java @@ -10,7 +10,7 @@ */ package com.auto1.pantera.cooldown.metadata; -import com.auto1.pantera.cooldown.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownInspector; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -63,6 +63,11 @@ public void invalidateAll(final String repoType, final String repoName) { // No-op } + @Override + public void clearAll() { + // No-op + } + @Override public String stats() { return "NoopCooldownMetadataService[disabled]"; diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metrics/CooldownMetrics.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metrics/CooldownMetrics.java index 52d6a0930..c43c51d41 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/cooldown/metrics/CooldownMetrics.java +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/metrics/CooldownMetrics.java @@ -226,6 +226,20 @@ public void decrementAllBlocked() { } } + /** + * Record an admin action (unblock, unblock_all, policy_change). + * Counter: {@code pantera.cooldown.admin}. + * + * @param action Action tag value + */ + public void recordAdminAction(final String action) { + Counter.builder("pantera.cooldown.admin") + .description("Cooldown admin actions") + .tag("action", action) + .register(this.registry) + .increment(); + } + /** * Record cache invalidation. * diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/response/CooldownResponseFactory.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/response/CooldownResponseFactory.java new file mode 100644 index 000000000..40ef766c0 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/response/CooldownResponseFactory.java @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.response; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.http.Response; + +/** + * Factory for building cooldown HTTP responses per repository type. + * + * @since 2.2.0 + */ +public interface CooldownResponseFactory { + + /** + * Build a 403 Forbidden response for a blocked artifact. + * + * @param block Block details + * @return HTTP response + */ + Response forbidden(CooldownBlock block); + + /** + * Repository type this factory handles. + * + * @return Repository type identifier (e.g. "npm", "maven") + */ + String repoType(); +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistry.java b/pantera-core/src/main/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistry.java new file mode 100644 index 000000000..c8d3c7df5 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistry.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.response; + +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Registry of per-repo-type cooldown response factories. + * + *

    Gradle repos reuse Maven's factory since gradle-adapter is an alias + * for maven (gradle-adapter module was removed; see pantera-main/pom.xml).

    + * + * @since 2.2.0 + */ +public final class CooldownResponseRegistry { + + /** + * Singleton instance. + */ + private static final CooldownResponseRegistry INSTANCE = new CooldownResponseRegistry(); + + private final Map factories = new ConcurrentHashMap<>(); + + private CooldownResponseRegistry() { + } + + /** + * Get singleton instance. + * + * @return Registry instance + */ + public static CooldownResponseRegistry instance() { + return INSTANCE; + } + + /** + * Register a factory for a repository type. + * + * @param repoType Repository type identifier + * @param factory Factory instance + */ + public void register(final String repoType, final CooldownResponseFactory factory) { + this.factories.put(repoType, factory); + } + + /** + * Register a factory for a repository type and additional aliases. + * Useful when multiple repo types share the same format (e.g. Gradle reuses Maven). + * + * @param factory Factory instance + * @param aliases Additional type identifiers that map to the same factory + */ + public void register(final CooldownResponseFactory factory, final String... aliases) { + this.factories.put(factory.repoType(), factory); + for (final String alias : aliases) { + this.factories.put(alias, factory); + } + } + + /** + * Get factory for a repository type. + * + * @param repoType Repository type identifier + * @return Factory, or null if not registered + */ + public CooldownResponseFactory get(final String repoType) { + return this.factories.get(repoType); + } + + /** + * Returns the factory for {@code repoType}; throws IllegalStateException + * with a descriptive message when absent. Prefer this over {@link #get} + * at call sites that cannot sensibly handle a missing factory. + * + * @param repoType Repository type identifier (non-null) + * @return Registered factory + * @throws IllegalStateException if no factory is registered for {@code repoType} + */ + public CooldownResponseFactory getOrThrow(final String repoType) { + final CooldownResponseFactory f = this.factories.get(repoType); + if (f == null) { + throw new IllegalStateException( + "No CooldownResponseFactory registered for repoType: " + repoType + ); + } + return f; + } + + /** + * Returns the set of registered repository types. + * + * @return Unmodifiable set of registered types + */ + public Set registeredTypes() { + return Set.copyOf(this.factories.keySet()); + } + + /** + * Clear all registrations. For testing only. + */ + public void clear() { + this.factories.clear(); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/BaseCachedProxySlice.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/BaseCachedProxySlice.java index d2bbb45ed..08e69c2ea 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/cache/BaseCachedProxySlice.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/cache/BaseCachedProxySlice.java @@ -16,25 +16,28 @@ import com.auto1.pantera.asto.cache.Cache; import com.auto1.pantera.asto.cache.CacheControl; import com.auto1.pantera.asto.cache.Remote; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResponses; -import com.auto1.pantera.cooldown.CooldownResult; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.config.CooldownAdapterRegistry; +import com.auto1.pantera.cooldown.response.CooldownResponseFactory; +import com.auto1.pantera.cooldown.response.CooldownResponseRegistry; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; import com.auto1.pantera.http.ResponseBuilder; import com.auto1.pantera.http.RsStatus; import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.context.ContextualExecutor; import com.auto1.pantera.http.headers.Header; import com.auto1.pantera.http.headers.Login; import com.auto1.pantera.http.log.EcsLogger; +import com.auto1.pantera.http.misc.ConfigDefaults; +import com.auto1.pantera.http.resilience.SingleFlight; import com.auto1.pantera.http.rq.RequestLine; import com.auto1.pantera.http.slice.KeyFromPath; -import com.auto1.pantera.http.trace.MdcPropagation; import com.auto1.pantera.scheduling.ProxyArtifactEvent; - -import io.reactivex.Flowable; import java.io.IOException; import java.net.ConnectException; import java.nio.ByteBuffer; @@ -56,6 +59,7 @@ import java.util.Optional; import java.util.Queue; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; @@ -148,9 +152,13 @@ public abstract class BaseCachedProxySlice implements Slice { private final CooldownInspector cooldownInspector; /** - * Request deduplicator. + * Per-key request coalescer. Concurrent callers for the same cache key share + * one cache-write loader invocation, each receiving the same + * {@link FetchSignal} terminal state. Wired in via WI-post-05; + * SIGNAL-strategy semantics are provided by + * {@link SingleFlight#load(Object, Supplier)}. */ - private final RequestDeduplicator deduplicator; + private final SingleFlight singleFlight; /** * Raw storage for direct saves (bypasses FromStorageCache lazy tee-content). @@ -195,11 +203,25 @@ protected BaseCachedProxySlice( this.metadataStore = storage.map(CachedArtifactMetadataStore::new); this.storageBacked = this.metadataStore.isPresent() && !Objects.equals(this.cache, Cache.NOP); + final NegativeCache registryCache = NegativeCacheRegistry.instance().isSharedCacheSet() + ? NegativeCacheRegistry.instance().sharedCache() : null; this.negativeCache = config.negativeCacheEnabled() - ? new NegativeCache(repoType, repoName) : null; + ? (registryCache != null ? registryCache + : new NegativeCache(repoType, repoName)) + : null; this.cooldownService = cooldownService; this.cooldownInspector = cooldownInspector; - this.deduplicator = new RequestDeduplicator(config.dedupStrategy()); + // Zombie TTL honours PANTERA_DEDUP_MAX_AGE_MS (default 5 min). 10K max + // in-flight entries bounds memory. Completion hops via + // ForkJoinPool.commonPool() β€” the same executor pattern used by the + // other WI-05 sites (CachedNpmProxySlice migration). + this.singleFlight = new SingleFlight<>( + Duration.ofMillis( + ConfigDefaults.getLong("PANTERA_DEDUP_MAX_AGE_MS", 300_000L) + ), + 10_000, + ContextualExecutor.contextualize(ForkJoinPool.commonPool()) + ); } /** @@ -421,7 +443,7 @@ private CompletableFuture cacheFirstFlow( } final CachedArtifactMetadataStore store = this.metadataStore.orElseThrow(); return this.cache.load(key, Remote.EMPTY, CacheControl.Standard.ALWAYS) - .thenCompose(MdcPropagation.withMdc(cached -> { + .thenCompose(cached -> { if (cached.isPresent()) { this.logDebug("Cache hit", path); // Fast path: serve from cache with async metadata @@ -434,7 +456,7 @@ private CompletableFuture cacheFirstFlow( } // Cache miss: evaluate cooldown then fetch return this.evaluateCooldownAndFetch(line, headers, key, path, store); - })).toCompletableFuture(); + }).toCompletableFuture(); } /** @@ -454,19 +476,44 @@ private CompletableFuture evaluateCooldownAndFetch( this.buildCooldownRequest(path, headers); if (request.isPresent()) { return this.cooldownService.evaluate(request.get(), this.cooldownInspector) - .thenCompose(MdcPropagation.withMdc(result -> { + .thenCompose(result -> { if (result.blocked()) { + final CooldownBlock block = result.block().orElseThrow(); return CompletableFuture.completedFuture( - CooldownResponses.forbidden(result.block().orElseThrow()) + buildForbiddenResponse(block, this.repoType) ); } return this.fetchAndCache(line, key, headers, store); - })); + }); } } return this.fetchAndCache(line, key, headers, store); } + /** + * Build a 403 Forbidden response for a cooldown block. + * Uses the per-adapter {@link CooldownResponseFactory} from the + * {@link CooldownAdapterRegistry} when a bundle is registered for the + * repo type; otherwise falls back to the {@link CooldownResponseRegistry} + * factory for the same repo type. Factory registration is mandatory β€” + * if neither registry has an entry for {@code repoType}, this method + * throws {@link IllegalStateException} (fail-fast; no silent defaults). + * + * @param block Block details + * @param repoType Repository type for factory lookup + * @return HTTP 403 response + * @throws IllegalStateException if no factory is registered for the + * given repo type in either registry + */ + private static Response buildForbiddenResponse( + final CooldownBlock block, + final String repoType + ) { + return CooldownAdapterRegistry.instance().get(repoType) + .map(bundle -> bundle.responseFactory().forbidden(block)) + .orElseGet(() -> CooldownResponseRegistry.instance().getOrThrow(repoType).forbidden(block)); + } + /** * Fetch from upstream and cache the result, with request deduplication. * Uses NIO temp file streaming to avoid buffering full artifacts on heap. @@ -480,7 +527,7 @@ private CompletableFuture fetchAndCache( final String owner = new Login(headers).getValue(); final long startTime = System.currentTimeMillis(); return this.client.response(line, Headers.EMPTY, Content.EMPTY) - .thenCompose(MdcPropagation.withMdc(resp -> { + .thenCompose(resp -> { final long duration = System.currentTimeMillis() - startTime; if (resp.status().code() == 404) { return this.handle404(resp, key, duration) @@ -493,13 +540,13 @@ private CompletableFuture fetchAndCache( this.signalToResponse(signal, line, key, headers, store)); } this.recordProxyMetric("success", duration); - return this.deduplicator.deduplicate(key, () -> { + return this.singleFlight.load(key, () -> { return this.cacheResponse(resp, key, owner, store) - .thenApply(r -> RequestDeduplicator.FetchSignal.SUCCESS); + .thenApply(r -> FetchSignal.SUCCESS); }).thenCompose(signal -> this.signalToResponse(signal, line, key, headers, store)); - })) - .handle(MdcPropagation.withMdcBiFunction((resp, error) -> { + }) + .handle((resp, error) -> { if (error != null) { final long duration = System.currentTimeMillis() - startTime; this.trackUpstreamFailure(error); @@ -523,7 +570,7 @@ private CompletableFuture fetchAndCache( ); } return CompletableFuture.completedFuture(resp); - })) + }) .thenCompose(future -> future); } @@ -531,7 +578,7 @@ private CompletableFuture fetchAndCache( * Convert a dedup signal into an HTTP response. */ private CompletableFuture signalToResponse( - final RequestDeduplicator.FetchSignal signal, + final FetchSignal signal, final RequestLine line, final Key key, final Headers headers, @@ -577,7 +624,7 @@ private CompletableFuture signalToResponse( * then saves from temp file to cache. Never buffers the full artifact on heap. */ @SuppressWarnings("PMD.AvoidCatchingGenericException") - private CompletableFuture cacheResponse( + private CompletableFuture cacheResponse( final Response resp, final Key key, final String owner, @@ -604,37 +651,58 @@ private CompletableFuture cacheResponse( .error(ex) .log(); return CompletableFuture.completedFuture( - RequestDeduplicator.FetchSignal.ERROR + FetchSignal.ERROR ); } final Map digests = DigestComputer.createDigests(this.digestAlgorithms()); final AtomicLong totalSize = new AtomicLong(0); final CompletableFuture streamDone = new CompletableFuture<>(); - Flowable.fromPublisher(resp.body()) - .doOnNext(buf -> { - final int nbytes = buf.remaining(); - DigestComputer.updateDigests(digests, buf); - final ByteBuffer copy = buf.asReadOnlyBuffer(); - while (copy.hasRemaining()) { - channel.write(copy); + resp.body().subscribe(new org.reactivestreams.Subscriber<>() { + private org.reactivestreams.Subscription sub; + + @Override + public void onSubscribe(final org.reactivestreams.Subscription subscription) { + this.sub = subscription; + subscription.request(Long.MAX_VALUE); + } + + @Override + public void onNext(final ByteBuffer buf) { + try { + final int nbytes = buf.remaining(); + DigestComputer.updateDigests(digests, buf); + final ByteBuffer copy = buf.asReadOnlyBuffer(); + while (copy.hasRemaining()) { + channel.write(copy); + } + totalSize.addAndGet(nbytes); + } catch (final IOException ex) { + this.sub.cancel(); + streamDone.completeExceptionally(ex); } - totalSize.addAndGet(nbytes); - }) - .doOnComplete(() -> { - channel.force(true); - channel.close(); - }) - .doOnError(err -> { + } + + @Override + public void onError(final Throwable throwable) { closeChannelQuietly(channel); deleteTempQuietly(tempFile); - }) - .subscribe( - item -> { }, - streamDone::completeExceptionally, - () -> streamDone.complete(null) - ); - return streamDone.thenCompose(MdcPropagation.withMdc(v -> { + streamDone.completeExceptionally(throwable); + } + + @Override + public void onComplete() { + try { + channel.force(true); + channel.close(); + streamDone.complete(null); + } catch (final IOException ex) { + closeChannelQuietly(channel); + streamDone.completeExceptionally(ex); + } + } + }); + return streamDone.thenCompose(v -> { final Map digestResults = DigestComputer.finalizeDigests(digests); final long size = totalSize.get(); @@ -679,9 +747,9 @@ private CompletableFuture cacheResponse( }).thenApply(ignored -> { this.enqueueEvent(key, resp.headers(), size, owner); deleteTempQuietly(tempFile); - return RequestDeduplicator.FetchSignal.SUCCESS; + return FetchSignal.SUCCESS; }); - })).exceptionally(MdcPropagation.withMdcFunction(err -> { + }).exceptionally(err -> { deleteTempQuietly(tempFile); EcsLogger.warn("com.auto1.pantera." + this.repoType) .message("Failed to cache upstream response") @@ -692,8 +760,8 @@ private CompletableFuture cacheResponse( .field("file.path", key.string()) .error(err) .log(); - return RequestDeduplicator.FetchSignal.ERROR; - })); + return FetchSignal.ERROR; + }); } /** @@ -710,39 +778,15 @@ private CompletableFuture saveFromTempFile( final Key key, final Path tempFile, final long size ) { if (this.storage.isPresent()) { - final Flowable flow = Flowable.using( - () -> FileChannel.open(tempFile, StandardOpenOption.READ), - chan -> Flowable.generate(emitter -> { - final ByteBuffer buf = ByteBuffer.allocate(65536); - final int read = chan.read(buf); - if (read < 0) { - emitter.onComplete(); - } else { - buf.flip(); - emitter.onNext(buf); - } - }), - FileChannel::close + final Content content = new Content.From( + Optional.of(size), filePublisher(tempFile) ); - final Content content = new Content.From(Optional.of(size), flow); return this.storage.get().save(key, content); } // Fallback: use cache.load (non-storage-backed mode) - final Flowable flow = Flowable.using( - () -> FileChannel.open(tempFile, StandardOpenOption.READ), - chan -> Flowable.generate(emitter -> { - final ByteBuffer buf = ByteBuffer.allocate(65536); - final int read = chan.read(buf); - if (read < 0) { - emitter.onComplete(); - } else { - buf.flip(); - emitter.onNext(buf); - } - }), - FileChannel::close + final Content content = new Content.From( + Optional.of(size), filePublisher(tempFile) ); - final Content content = new Content.From(Optional.of(size), flow); return this.cache.load( key, () -> CompletableFuture.completedFuture(Optional.of(content)), @@ -750,6 +794,63 @@ private CompletableFuture saveFromTempFile( ).toCompletableFuture(); } + /** + * Create a reactive-streams {@link org.reactivestreams.Publisher} that reads + * a temp file in 64 KB chunks. Replaces the previous {@code Flowable.using} + * pattern so this class no longer imports {@code io.reactivex.Flowable}. + * + * @param tempFile Temp file to read + * @return Publisher of ByteBuffer chunks + */ + private static org.reactivestreams.Publisher filePublisher(final Path tempFile) { + return subscriber -> { + final FileChannel[] holder = new FileChannel[1]; + try { + holder[0] = FileChannel.open(tempFile, StandardOpenOption.READ); + } catch (final IOException ex) { + subscriber.onSubscribe(new org.reactivestreams.Subscription() { + @Override public void request(final long n) { } + @Override public void cancel() { } + }); + subscriber.onError(ex); + return; + } + final FileChannel chan = holder[0]; + subscriber.onSubscribe(new org.reactivestreams.Subscription() { + private volatile boolean cancelled; + + @Override + @SuppressWarnings("PMD.AvoidCatchingGenericException") + public void request(final long n) { + try { + long remaining = n; + while (remaining > 0 && !this.cancelled) { + final ByteBuffer buf = ByteBuffer.allocate(65_536); + final int read = chan.read(buf); + if (read < 0) { + chan.close(); + subscriber.onComplete(); + return; + } + buf.flip(); + subscriber.onNext(buf); + remaining--; + } + } catch (final Exception ex) { + closeChannelQuietly(chan); + subscriber.onError(ex); + } + } + + @Override + public void cancel() { + this.cancelled = true; + closeChannelQuietly(chan); + } + }); + }; + } + /** * Close a FileChannel quietly. * @param channel Channel to close @@ -824,7 +925,7 @@ private CompletableFuture fetchDirect( ) ); }) - .exceptionally(MdcPropagation.withMdcFunction(error -> { + .exceptionally(error -> { final long duration = System.currentTimeMillis() - startTime; this.trackUpstreamFailure(error); this.recordProxyMetric("exception", duration); @@ -840,10 +941,10 @@ private CompletableFuture fetchDirect( return ResponseBuilder.unavailable() .textBody("Upstream error") .build(); - })); + }); } - private CompletableFuture handle404( + private CompletableFuture handle404( final Response resp, final Key key, final long duration ) { this.recordProxyMetric("not_found", duration); @@ -851,11 +952,11 @@ private CompletableFuture handle404( if (this.negativeCache != null && !this.isChecksumSidecar(key.string())) { this.negativeCache.cacheNotFound(key); } - return RequestDeduplicator.FetchSignal.NOT_FOUND; + return FetchSignal.NOT_FOUND; }); } - private CompletableFuture handleNonSuccess( + private CompletableFuture handleNonSuccess( final Response resp, final Key key, final long duration ) { if (resp.status().code() >= 500) { @@ -868,8 +969,8 @@ private CompletableFuture handleNonSuccess( } return resp.body().asBytesFuture() .thenApply(bytes -> resp.status().code() < 500 - ? RequestDeduplicator.FetchSignal.NOT_FOUND - : RequestDeduplicator.FetchSignal.ERROR); + ? FetchSignal.NOT_FOUND + : FetchSignal.ERROR); } /** @@ -896,7 +997,7 @@ private CompletableFuture tryServeStale( return fallback.get(); } if (this.metadataStore.isPresent()) { - return this.metadataStore.get().load(key).thenCompose(MdcPropagation.withMdc(metaOpt -> { + return this.metadataStore.get().load(key).thenCompose(metaOpt -> { if (metaOpt.isEmpty()) { return this.serveStaleFromStorage(key, fallback); } @@ -916,7 +1017,7 @@ private CompletableFuture tryServeStale( return fallback.get(); } return this.serveStaleFromStorageWithAge(key, fallback, age); - })); + }); } return this.serveStaleFromStorage(key, fallback); } @@ -926,12 +1027,12 @@ private CompletableFuture serveStaleFromStorage( final Supplier> fallback ) { final Storage store = this.storage.get(); - return store.exists(key).thenCompose(MdcPropagation.withMdc(exists -> { + return store.exists(key).thenCompose(exists -> { if (!exists) { return fallback.get(); } return serveStaleFromStorageWithAge(key, fallback, null); - })); + }); } private CompletableFuture serveStaleFromStorageWithAge( @@ -941,7 +1042,7 @@ private CompletableFuture serveStaleFromStorageWithAge( ) { final Storage store = this.storage.get(); return store.value(key) - .thenApply(MdcPropagation.withMdcFunction(content -> { + .thenApply(content -> { EcsLogger.warn("com.auto1.pantera." + this.repoType) .message("Upstream failed, serving stale cached artifact") .eventCategory("network") @@ -956,8 +1057,8 @@ private CompletableFuture serveStaleFromStorageWithAge( builder.header("Age", String.valueOf(age.getSeconds())); } return (Response) builder.body(content).build(); - })) - .exceptionallyCompose(MdcPropagation.withMdc(err -> { + }) + .exceptionallyCompose(err -> { EcsLogger.warn("com.auto1.pantera." + this.repoType) .message("Failed to read stale artifact from storage") .eventCategory("web") @@ -968,14 +1069,14 @@ private CompletableFuture serveStaleFromStorageWithAge( .error(err) .log(); return fallback.get(); - })); + }); } private CompletableFuture serveChecksumFromStorage( final RequestLine line, final Key key, final String owner ) { return this.cache.load(key, Remote.EMPTY, CacheControl.Standard.ALWAYS) - .thenCompose(MdcPropagation.withMdc(cached -> { + .thenCompose(cached -> { if (cached.isPresent()) { return CompletableFuture.completedFuture( ResponseBuilder.ok() @@ -985,7 +1086,7 @@ private CompletableFuture serveChecksumFromStorage( ); } return this.fetchDirect(line, key, owner); - })).toCompletableFuture(); + }).toCompletableFuture(); } private CompletableFuture handleRootPath(final RequestLine line) { diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/DedupStrategy.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/DedupStrategy.java deleted file mode 100644 index 4ea83a915..000000000 --- a/pantera-core/src/main/java/com/auto1/pantera/http/cache/DedupStrategy.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.http.cache; - -/** - * Request deduplication strategy for proxy caches. - * - * @since 1.20.13 - */ -public enum DedupStrategy { - - /** - * No deduplication. Each concurrent request independently fetches from upstream. - */ - NONE, - - /** - * Storage-level deduplication. Uses storage key locking to prevent - * concurrent writes to the same cache key. Second request waits for - * the first to complete and reads from cache. - */ - STORAGE, - - /** - * Signal-based deduplication (zero-copy). First request fetches and caches, - * then signals completion. Waiting requests read from cache on SUCCESS - * signal, or return appropriate error on NOT_FOUND / ERROR signals. - * No response body buffering in memory. - */ - SIGNAL -} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/FetchSignal.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/FetchSignal.java new file mode 100644 index 000000000..d09937aee --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/cache/FetchSignal.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +/** + * Signal indicating the outcome of a deduplicated fetch. + * + *

    Top-level enum (promoted in WI-post-05) so callers that coalesce + * upstream fetches via {@link com.auto1.pantera.http.resilience.SingleFlight} + * can import the signal from a stable package-level location. + * + * @since 1.20.13 + */ +public enum FetchSignal { + /** + * Upstream returned 200 and content is now cached in storage. + * Waiting callers should read from cache. + */ + SUCCESS, + + /** + * Upstream returned 404. Negative cache has been updated. + * Waiting callers should return 404. + */ + NOT_FOUND, + + /** + * Upstream returned an error (5xx, timeout, exception). + * Waiting callers should return 503 or fall back to stale cache. + */ + ERROR +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCache.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCache.java index f7cb08a41..87f1219b3 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCache.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCache.java @@ -20,76 +20,146 @@ import io.lettuce.core.ScanCursor; import io.lettuce.core.api.async.RedisAsyncCommands; import java.time.Duration; +import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; /** - * Caches 404 (Not Found) responses to avoid repeated upstream requests for missing artifacts. - * This is critical for proxy repositories to avoid hammering upstream repositories with - * requests for artifacts that don't exist (e.g., optional dependencies, typos). - * - * Thread-safe, high-performance cache using Caffeine with automatic TTL expiry. - * - * Performance impact: Eliminates 100% of repeated 404 requests, reducing load on both - * Pantera and upstream repositories. - * + * Unified negative cache for 404 responses β€” single shared instance per JVM. + * + *

    Keyed by {@link NegativeCacheKey} ({@code scope:repoType:artifactName:artifactVersion}). + * Hosted, proxy, and group scopes all share one L1 Caffeine + optional L2 Valkey bean. + * + *

    New callers should use the {@link NegativeCacheKey}-based API: + *

      + *
    • {@link #isKnown404(NegativeCacheKey)}
    • + *
    • {@link #cacheNotFound(NegativeCacheKey)}
    • + *
    • {@link #invalidate(NegativeCacheKey)}
    • + *
    • {@link #invalidateBatch(List)}
    • + *
    + * + *

    Legacy {@link Key}-based methods are retained for backward compatibility but + * delegate through a synthetic {@link NegativeCacheKey} built from the instance's + * {@code repoType} and {@code repoName}. + * + *

    Thread-safe, high-performance cache using Caffeine with automatic TTL expiry. + * * @since 0.11 */ +@SuppressWarnings("PMD.TooManyMethods") public final class NegativeCache { - + /** * Default TTL for negative cache (24 hours). */ private static final Duration DEFAULT_TTL = Duration.ofHours(24); - + /** * Default maximum cache size (50,000 entries). - * At ~150 bytes per entry = ~7.5MB maximum memory usage. */ private static final int DEFAULT_MAX_SIZE = 50_000; - + /** * Sentinel value for negative cache (we only care about presence, not value). */ private static final Boolean CACHED = Boolean.TRUE; - + /** * L1 cache for 404 responses (in-memory, hot data). - * Thread-safe, high-performance, with automatic TTL expiry. + * Keyed by {@link NegativeCacheKey#flat()}. */ - private final Cache notFoundCache; - + private final Cache notFoundCache; + /** * L2 cache (Valkey/Redis, warm data) - optional. */ private final RedisAsyncCommands l2; - + /** * Whether two-tier caching is enabled. */ private final boolean twoTier; - + /** * Whether negative caching is enabled. */ private final boolean enabled; - + /** * Cache TTL for L2. */ private final Duration ttl; - + /** - * Repository type for cache key namespacing. + * Repository type for legacy key construction. */ private final String repoType; /** - * Repository name for cache key isolation. - * Prevents cache collisions in group repositories. + * Repository name for legacy key construction. */ private final String repoName; + // ----------------------------------------------------------------------- + // Primary constructor (all others delegate here) + // ----------------------------------------------------------------------- + + /** + * Primary constructor. + * + * @param ttl TTL for L2 cache + * @param enabled Whether negative caching is enabled + * @param l1MaxSize Maximum size for L1 cache + * @param l1Ttl TTL for L1 cache + * @param l2Commands Redis commands for L2 cache (null for single-tier) + * @param repoType Repository type for legacy key namespacing + * @param repoName Repository name for legacy key isolation + */ + @SuppressWarnings("PMD.NullAssignment") + private NegativeCache(final Duration ttl, final boolean enabled, final int l1MaxSize, + final Duration l1Ttl, final RedisAsyncCommands l2Commands, + final String repoType, final String repoName) { + this.enabled = enabled; + this.twoTier = l2Commands != null; + this.l2 = l2Commands; + this.ttl = ttl; + this.repoType = repoType != null ? repoType : "unknown"; + this.repoName = repoName != null ? repoName : "default"; + this.notFoundCache = Caffeine.newBuilder() + .maximumSize(l1MaxSize) + .expireAfterWrite(l1Ttl.toMillis(), TimeUnit.MILLISECONDS) + .recordStats() + .build(); + } + + // ----------------------------------------------------------------------- + // Public constructors β€” NEW (preferred) + // ----------------------------------------------------------------------- + + /** + * Create negative cache from config (the single-instance wiring constructor). + * + * @param config Unified negative cache configuration + */ + public NegativeCache(final NegativeCacheConfig config) { + this( + config.l2Ttl(), + true, + config.isValkeyEnabled() ? config.l1MaxSize() : config.maxSize(), + config.isValkeyEnabled() ? config.l1Ttl() : config.ttl(), + GlobalCacheConfig.valkeyConnection() + .filter(v -> config.isValkeyEnabled()) + .map(ValkeyConnection::async) + .orElse(null), + "unified", + "shared" + ); + } + + // ----------------------------------------------------------------------- + // Public constructors β€” LEGACY (backward compat, delegate to primary) + // ----------------------------------------------------------------------- + /** * Create negative cache using unified NegativeCacheConfig. * @param repoType Repository type for cache key namespacing (e.g., "npm", "pypi", "go") @@ -122,7 +192,7 @@ public NegativeCache(final String repoType, final String repoName, final Negativ /** * Create negative cache with default 24h TTL and 50K max size (enabled). - * @deprecated Use {@link #NegativeCache(String, String)} instead + * @deprecated Use {@link #NegativeCache(NegativeCacheConfig)} instead */ @Deprecated public NegativeCache() { @@ -132,7 +202,7 @@ public NegativeCache() { /** * Create negative cache with Valkey connection (two-tier). * @param valkey Valkey connection for L2 cache - * @deprecated Use {@link #NegativeCache(String, String, NegativeCacheConfig)} instead + * @deprecated Use {@link #NegativeCache(NegativeCacheConfig)} instead */ @Deprecated public NegativeCache(final ValkeyConnection valkey) { @@ -150,7 +220,7 @@ public NegativeCache(final ValkeyConnection valkey) { /** * Create negative cache with custom TTL and default max size. * @param ttl Time-to-live for cached 404s - * @deprecated Use {@link #NegativeCache(String, String, NegativeCacheConfig)} instead + * @deprecated Use {@link #NegativeCache(NegativeCacheConfig)} instead */ @Deprecated public NegativeCache(final Duration ttl) { @@ -161,7 +231,7 @@ public NegativeCache(final Duration ttl) { * Create negative cache with custom TTL and enable flag. * @param ttl Time-to-live for cached 404s * @param enabled Whether negative caching is enabled - * @deprecated Use {@link #NegativeCache(String, String, NegativeCacheConfig)} instead + * @deprecated Use {@link #NegativeCache(NegativeCacheConfig)} instead */ @Deprecated public NegativeCache(final Duration ttl, final boolean enabled) { @@ -174,7 +244,7 @@ public NegativeCache(final Duration ttl, final boolean enabled) { * @param enabled Whether negative caching is enabled * @param maxSize Maximum number of entries (Window TinyLFU eviction) * @param valkey Valkey connection for L2 cache (null uses GlobalCacheConfig) - * @deprecated Use {@link #NegativeCache(String, String, NegativeCacheConfig)} instead + * @deprecated Use {@link #NegativeCache(NegativeCacheConfig)} instead */ @Deprecated public NegativeCache(final Duration ttl, final boolean enabled, final int maxSize, @@ -197,7 +267,7 @@ public NegativeCache(final Duration ttl, final boolean enabled, final int maxSiz * @param maxSize Maximum number of entries (Window TinyLFU eviction) * @param valkey Valkey connection for L2 cache (null uses GlobalCacheConfig) * @param repoName Repository name for cache key isolation - * @deprecated Use {@link #NegativeCache(String, String, NegativeCacheConfig)} instead + * @deprecated Use {@link #NegativeCache(NegativeCacheConfig)} instead */ @Deprecated public NegativeCache(final Duration ttl, final boolean enabled, final int maxSize, @@ -221,7 +291,7 @@ public NegativeCache(final Duration ttl, final boolean enabled, final int maxSiz * @param valkey Valkey connection for L2 cache (null uses GlobalCacheConfig) * @param repoType Repository type for cache key namespacing (e.g., "npm", "pypi", "go") * @param repoName Repository name for cache key isolation - * @deprecated Use {@link #NegativeCache(String, String, NegativeCacheConfig)} instead + * @deprecated Use {@link #NegativeCache(NegativeCacheConfig)} instead */ @Deprecated public NegativeCache(final Duration ttl, final boolean enabled, final int maxSize, @@ -237,41 +307,117 @@ public NegativeCache(final Duration ttl, final boolean enabled, final int maxSiz ); } + // ----------------------------------------------------------------------- + // NEW composite-key API + // ----------------------------------------------------------------------- + /** - * Primary constructor - all other constructors delegate to this one. - * @param ttl TTL for L2 cache - * @param enabled Whether negative caching is enabled - * @param l1MaxSize Maximum size for L1 cache - * @param l1Ttl TTL for L1 cache - * @param l2Commands Redis commands for L2 cache (null for single-tier) - * @param repoType Repository type for cache key namespacing - * @param repoName Repository name for cache key isolation + * Check if a composite key is in negative cache (known 404). + * Checks L1 only (synchronous). Use {@link #isKnown404Async(NegativeCacheKey)} + * for L1+L2. + * + * @param key Composite key to check + * @return true if cached in L1 as not found */ - @SuppressWarnings("PMD.NullAssignment") - private NegativeCache(final Duration ttl, final boolean enabled, final int l1MaxSize, - final Duration l1Ttl, final RedisAsyncCommands l2Commands, - final String repoType, final String repoName) { - this.enabled = enabled; - this.twoTier = l2Commands != null; - this.l2 = l2Commands; - this.ttl = ttl; - this.repoType = repoType != null ? repoType : "unknown"; - this.repoName = repoName != null ? repoName : "default"; - this.notFoundCache = Caffeine.newBuilder() - .maximumSize(l1MaxSize) - .expireAfterWrite(l1Ttl.toMillis(), TimeUnit.MILLISECONDS) - .recordStats() - .build(); + public boolean isKnown404(final NegativeCacheKey key) { + if (!this.enabled) { + return false; + } + final String flat = key.flat(); + final long startNanos = System.nanoTime(); + final boolean found = this.notFoundCache.getIfPresent(flat) != null; + recordL1Metrics(found, startNanos); + return found; } - + + /** + * Async check β€” inspects L1 then L2. + * + * @param key Composite key to check + * @return future resolving to true if the key is a known 404 + */ + public CompletableFuture isKnown404Async(final NegativeCacheKey key) { + if (!this.enabled) { + return CompletableFuture.completedFuture(false); + } + final String flat = key.flat(); + final long l1Start = System.nanoTime(); + if (this.notFoundCache.getIfPresent(flat) != null) { + recordL1Metrics(true, l1Start); + return CompletableFuture.completedFuture(true); + } + recordL1Metrics(false, l1Start); + if (this.twoTier) { + return l2Get(flat); + } + return CompletableFuture.completedFuture(false); + } + + /** + * Cache a composite key as not found (404) in L1 + L2. + * + * @param key Composite key to cache + */ + public void cacheNotFound(final NegativeCacheKey key) { + if (!this.enabled) { + return; + } + final String flat = key.flat(); + this.notFoundCache.put(flat, CACHED); + if (this.twoTier) { + l2Set("negative:" + flat); + } + } + + /** + * Invalidate a single composite key from L1 + L2. + * + * @param key Composite key to invalidate + */ + public void invalidate(final NegativeCacheKey key) { + final String flat = key.flat(); + this.notFoundCache.invalidate(flat); + if (this.twoTier) { + this.l2.del("negative:" + flat); + } + } + + /** + * Synchronously invalidate a batch of composite keys from L1 + L2. + * Returns a future that completes when both tiers are updated. + * + * @param keys List of composite keys to invalidate + * @return future completing when invalidation is done + */ + public CompletableFuture invalidateBatch(final List keys) { + if (keys == null || keys.isEmpty()) { + return CompletableFuture.completedFuture(null); + } + // Invalidate L1 synchronously + for (final NegativeCacheKey key : keys) { + this.notFoundCache.invalidate(key.flat()); + } + // Invalidate L2 asynchronously + if (this.twoTier) { + final String[] redisKeys = keys.stream() + .map(k -> "negative:" + k.flat()) + .toArray(String[]::new); + return this.l2.del(redisKeys) + .toCompletableFuture() + .orTimeout(500, TimeUnit.MILLISECONDS) + .exceptionally(err -> 0L) + .thenApply(ignored -> null); + } + return CompletableFuture.completedFuture(null); + } + + // ----------------------------------------------------------------------- + // LEGACY Key-based API (backward compat β€” delegates to composite-key API) + // ----------------------------------------------------------------------- + /** * Check if key is in negative cache (known 404). - * Thread-safe - Caffeine handles synchronization. - * Caffeine automatically removes expired entries. - * - * PERFORMANCE: Only checks L1 cache to avoid blocking request thread. - * L2 queries happen asynchronously in background. - * + * * @param key Key to check * @return True if cached in L1 as not found */ @@ -279,29 +425,17 @@ public boolean isNotFound(final Key key) { if (!this.enabled) { return false; } - + final String flat = legacyFlat(key); final long startNanos = System.nanoTime(); - final boolean found = this.notFoundCache.getIfPresent(key) != null; - - // Track L1 metrics - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - final long durationMs = (System.nanoTime() - startNanos) / 1_000_000; - if (found) { - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheHit("negative", "l1"); - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheOperationDuration("negative", "l1", "get", durationMs); - } else { - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheMiss("negative", "l1"); - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheOperationDuration("negative", "l1", "get", durationMs); - } - } - + final boolean found = this.notFoundCache.getIfPresent(flat) != null; + recordL1Metrics(found, startNanos); return found; } - + /** * Async check if key is in negative cache (known 404). - * Checks both L1 and L2, suitable for async callers. - * + * Checks both L1 and L2. + * * @param key Key to check * @return Future with true if cached as not found */ @@ -309,199 +443,196 @@ public CompletableFuture isNotFoundAsync(final Key key) { if (!this.enabled) { return CompletableFuture.completedFuture(false); } - - // Check L1 first - final long l1StartNanos = System.nanoTime(); - if (this.notFoundCache.getIfPresent(key) != null) { - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - final long durationMs = (System.nanoTime() - l1StartNanos) / 1_000_000; - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheHit("negative", "l1"); - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheOperationDuration("negative", "l1", "get", durationMs); - } + final String flat = legacyFlat(key); + final long l1Start = System.nanoTime(); + if (this.notFoundCache.getIfPresent(flat) != null) { + recordL1Metrics(true, l1Start); return CompletableFuture.completedFuture(true); } - - // L1 MISS - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - final long durationMs = (System.nanoTime() - l1StartNanos) / 1_000_000; - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheMiss("negative", "l1"); - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheOperationDuration("negative", "l1", "get", durationMs); - } - - // Check L2 if enabled + recordL1Metrics(false, l1Start); if (this.twoTier) { - final String redisKey = "negative:" + this.repoType + ":" + this.repoName + ":" + key.string(); - final long l2StartNanos = System.nanoTime(); - - return this.l2.get(redisKey) - .toCompletableFuture() - .orTimeout(100, TimeUnit.MILLISECONDS) - .exceptionally(err -> { - // Track L2 error - metrics handled elsewhere - return null; - }) - .thenApply(l2Bytes -> { - final long durationMs = (System.nanoTime() - l2StartNanos) / 1_000_000; - - if (l2Bytes != null) { - // L2 HIT - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheHit("negative", "l2"); - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheOperationDuration("negative", "l2", "get", durationMs); - } - this.notFoundCache.put(key, CACHED); - return true; - } - - // L2 MISS - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheMiss("negative", "l2"); - com.auto1.pantera.metrics.MicrometerMetrics.getInstance().recordCacheOperationDuration("negative", "l2", "get", durationMs); - } - return false; - }); + return l2Get(flat); } - return CompletableFuture.completedFuture(false); } - + /** * Cache a key as not found (404). - * Thread-safe - Caffeine handles synchronization and eviction. - * + * * @param key Key to cache as not found */ public void cacheNotFound(final Key key) { if (!this.enabled) { return; } - - // Cache in L1 - this.notFoundCache.put(key, CACHED); - - // Cache in L2 (if enabled) + final String flat = legacyFlat(key); + this.notFoundCache.put(flat, CACHED); if (this.twoTier) { - final String redisKey = "negative:" + this.repoType + ":" + this.repoName + ":" + key.string(); - final byte[] value = new byte[]{1}; // Sentinel value - final long seconds = this.ttl.getSeconds(); - this.l2.setex(redisKey, seconds, value); + l2Set("negative:" + flat); } } - + /** * Invalidate specific entry (e.g., when artifact is deployed). - * Thread-safe - Caffeine handles synchronization. - * + * * @param key Key to invalidate */ public void invalidate(final Key key) { - // Invalidate L1 - this.notFoundCache.invalidate(key); - - // Invalidate L2 (if enabled) + final String flat = legacyFlat(key); + this.notFoundCache.invalidate(flat); if (this.twoTier) { - final String redisKey = "negative:" + this.repoType + ":" + this.repoName + ":" + key.string(); - this.l2.del(redisKey); + this.l2.del("negative:" + flat); } } /** * Invalidate all entries matching a prefix pattern. - * Thread-safe - Caffeine handles synchronization. * * @param prefix Key prefix to match */ public void invalidatePrefix(final String prefix) { - // Invalidate L1 - this.notFoundCache.asMap().keySet().removeIf(key -> key.string().startsWith(prefix)); - - // Invalidate L2 (if enabled) + final String pfx = this.repoType + ":" + this.repoName + ":" + prefix; + this.notFoundCache.asMap().keySet().removeIf(k -> k.startsWith(pfx)); if (this.twoTier) { - final String scanPattern = "negative:" + this.repoType + ":" + this.repoName + ":" + prefix + "*"; - this.scanAndDelete(scanPattern); + scanAndDelete("negative:" + pfx + "*"); } } + // ----------------------------------------------------------------------- + // Utility / lifecycle + // ----------------------------------------------------------------------- + /** * Clear entire cache. - * Thread-safe - Caffeine handles synchronization. */ public void clear() { - // Clear L1 this.notFoundCache.invalidateAll(); - - // Clear L2 (if enabled) - scan and delete all negative cache keys if (this.twoTier) { - this.scanAndDelete("negative:" + this.repoType + ":" + this.repoName + ":*"); + scanAndDelete("negative:*"); } } - - /** - * Recursive async scan that collects all matching keys and deletes them in batches. - * Uses SCAN instead of KEYS to avoid blocking the Redis server. - * - * @param pattern Glob pattern to match keys - * @return Future that completes when all matching keys are deleted - */ - private CompletableFuture scanAndDelete(final String pattern) { - return this.scanAndDeleteStep(ScanCursor.INITIAL, pattern); - } - - /** - * Single step of the recursive SCAN-and-delete loop. - * - * @param cursor Current scan cursor - * @param pattern Glob pattern to match keys - * @return Future that completes when this step and all subsequent steps finish - */ - private CompletableFuture scanAndDeleteStep( - final ScanCursor cursor, final String pattern - ) { - return this.l2.scan(cursor, ScanArgs.Builder.matches(pattern).limit(100)) - .toCompletableFuture() - .thenCompose(result -> { - if (!result.getKeys().isEmpty()) { - this.l2.del(result.getKeys().toArray(new String[0])); - } - if (result.isFinished()) { - return CompletableFuture.completedFuture(null); - } - return this.scanAndDeleteStep(result, pattern); - }); - } /** * Remove expired entries (periodic cleanup). - * Caffeine handles expiry automatically, but calling this - * triggers immediate cleanup instead of lazy removal. */ public void cleanup() { this.notFoundCache.cleanUp(); } - + /** * Get current cache size. - * Thread-safe - Caffeine handles synchronization. + * * @return Number of entries in cache */ public long size() { return this.notFoundCache.estimatedSize(); } - + /** * Get cache statistics from Caffeine. - * Includes hit rate, miss rate, eviction count, etc. + * * @return Caffeine cache statistics */ public com.github.benmanes.caffeine.cache.stats.CacheStats stats() { return this.notFoundCache.stats(); } - + /** * Check if negative caching is enabled. + * * @return True if enabled */ public boolean isEnabled() { return this.enabled; } + + // ----------------------------------------------------------------------- + // Internal helpers + // ----------------------------------------------------------------------- + + /** + * Build a flat string for legacy Key-based calls. + */ + private String legacyFlat(final Key key) { + return this.repoType + ":" + this.repoName + ":" + key.string(); + } + + /** + * L2 GET β€” returns true if found, promotes to L1. + */ + private CompletableFuture l2Get(final String flat) { + final String redisKey = "negative:" + flat; + final long l2Start = System.nanoTime(); + return this.l2.get(redisKey) + .toCompletableFuture() + .orTimeout(100, TimeUnit.MILLISECONDS) + .exceptionally(err -> null) + .thenApply(l2Bytes -> { + final long durationMs = (System.nanoTime() - l2Start) / 1_000_000; + if (l2Bytes != null) { + recordL2Metrics(true, durationMs); + this.notFoundCache.put(flat, CACHED); + return true; + } + recordL2Metrics(false, durationMs); + return false; + }); + } + + /** + * L2 SET with TTL. + */ + private void l2Set(final String redisKey) { + this.l2.setex(redisKey, this.ttl.getSeconds(), new byte[]{1}); + } + + private void recordL1Metrics(final boolean hit, final long startNanos) { + if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { + final long durationMs = (System.nanoTime() - startNanos) / 1_000_000; + final com.auto1.pantera.metrics.MicrometerMetrics m = + com.auto1.pantera.metrics.MicrometerMetrics.getInstance(); + if (hit) { + m.recordCacheHit("negative", "l1"); + } else { + m.recordCacheMiss("negative", "l1"); + } + m.recordCacheOperationDuration("negative", "l1", "get", durationMs); + } + } + + private static void recordL2Metrics(final boolean hit, final long durationMs) { + if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { + final com.auto1.pantera.metrics.MicrometerMetrics m = + com.auto1.pantera.metrics.MicrometerMetrics.getInstance(); + if (hit) { + m.recordCacheHit("negative", "l2"); + } else { + m.recordCacheMiss("negative", "l2"); + } + m.recordCacheOperationDuration("negative", "l2", "get", durationMs); + } + } + + /** + * Recursive async scan that collects all matching keys and deletes them. + */ + private CompletableFuture scanAndDelete(final String pattern) { + return scanAndDeleteStep(ScanCursor.INITIAL, pattern); + } + + private CompletableFuture scanAndDeleteStep( + final ScanCursor cursor, final String pattern + ) { + return this.l2.scan(cursor, ScanArgs.Builder.matches(pattern).limit(100)) + .toCompletableFuture() + .thenCompose(result -> { + if (!result.getKeys().isEmpty()) { + this.l2.del(result.getKeys().toArray(new String[0])); + } + if (result.isFinished()) { + return CompletableFuture.completedFuture(null); + } + return scanAndDeleteStep(result, pattern); + }); + } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheKey.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheKey.java new file mode 100644 index 000000000..91b7aa287 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheKey.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import java.util.Objects; + +/** + * Composite key for the unified negative cache (404 caching). + * + *

    Every cached 404 is indexed by four fields: + *

      + *
    • {@code scope} β€” the repository name (hosted, proxy, or group)
    • + *
    • {@code repoType} β€” the adapter type ({@code "maven"}, {@code "npm"}, etc.)
    • + *
    • {@code artifactName} β€” the canonical artifact identifier + * (e.g. {@code "@scope/pkg"}, {@code "org.spring:spring-core"})
    • + *
    • {@code artifactVersion} β€” the version string; empty for metadata endpoints
    • + *
    + * + *

    The {@link #flat()} method produces a colon-delimited string suitable for + * use as a Caffeine key or a Redis/Valkey key suffix. + * + * @since 2.2.0 + */ +public record NegativeCacheKey( + String scope, + String repoType, + String artifactName, + String artifactVersion +) { + + /** + * Canonical constructor β€” validates that required fields are non-null. + */ + public NegativeCacheKey { + Objects.requireNonNull(scope, "scope"); + Objects.requireNonNull(repoType, "repoType"); + Objects.requireNonNull(artifactName, "artifactName"); + if (artifactVersion == null) { + artifactVersion = ""; + } + } + + /** + * Flat string representation suitable for cache keys. + * Format: {@code scope:repoType:artifactName:artifactVersion} + * + * @return colon-delimited key string + */ + public String flat() { + return scope + ':' + repoType + ':' + artifactName + ':' + artifactVersion; + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheRegistry.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheRegistry.java index 84e42dad5..d0e357a65 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheRegistry.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/cache/NegativeCacheRegistry.java @@ -16,8 +16,15 @@ import java.util.concurrent.ConcurrentMap; /** - * Global registry of all proxy NegativeCache instances. - * Enables cross-adapter cache invalidation when artifacts are published. + * Global registry for the shared NegativeCache instance and per-repo legacy registrations. + * + *

    Starting from v2.2.0 (WI-06), a single {@link NegativeCache} bean is shared across + * all scopes (hosted, proxy, group). The {@link #setSharedCache(NegativeCache)} method + * is called once at startup from {@code RepositorySlices}; adapters obtain the shared + * bean via {@link #sharedCache()}. + * + *

    The legacy per-repo {@link #register} / {@link #invalidateGlobally} API is retained + * for backward compatibility with callers that have not been migrated. * * @since 1.20.13 */ @@ -29,7 +36,18 @@ public final class NegativeCacheRegistry { private static final NegativeCacheRegistry INSTANCE = new NegativeCacheRegistry(); /** - * Registered caches: key = "repoType:repoName". + * Fallback instance used before the shared cache is initialized. + * Created once at class-load time via a static factory method. + */ + private static final NegativeCache FALLBACK = createFallback(); + + /** + * The single shared NegativeCache instance (set at startup). + */ + private volatile NegativeCache shared; + + /** + * Legacy per-repo caches: key = "repoType:repoName". */ private final ConcurrentMap caches; @@ -49,7 +67,38 @@ public static NegativeCacheRegistry instance() { } /** - * Register a negative cache instance. + * Set the single shared NegativeCache bean. Called once at startup. + * @param cache Shared NegativeCache instance + */ + public void setSharedCache(final NegativeCache cache) { + this.shared = cache; + } + + /** + * Check whether a shared cache has been explicitly set via + * {@link #setSharedCache(NegativeCache)}. + * @return true if the shared cache is initialized + */ + public boolean isSharedCacheSet() { + return this.shared != null; + } + + /** + * Get the shared NegativeCache bean. + * Falls back to a default instance if not initialized. + * @return Shared NegativeCache + */ + public NegativeCache sharedCache() { + final NegativeCache s = this.shared; + if (s != null) { + return s; + } + // Fallback for tests or early startup + return FALLBACK; + } + + /** + * Register a negative cache instance (legacy API). * @param repoType Repository type * @param repoName Repository name * @param cache Negative cache instance @@ -70,13 +119,16 @@ public void unregister(final String repoType, final String repoName) { } /** - * Invalidate a specific artifact path across ALL registered negative caches. - * Called when an artifact is published to ensure stale 404 entries are cleared. + * Invalidate a specific artifact path across ALL registered negative caches + * and the shared instance. * * @param artifactPath Artifact path to invalidate */ public void invalidateGlobally(final String artifactPath) { final Key artKey = new Key.From(artifactPath); + if (this.shared != null) { + this.shared.invalidate(artKey); + } this.caches.values().forEach(cache -> cache.invalidate(artKey)); } @@ -94,10 +146,14 @@ public void invalidate( if (cache != null) { cache.invalidate(new Key.From(artifactPath)); } + // Also invalidate in the shared instance + if (this.shared != null) { + this.shared.invalidate(new Key.From(artifactPath)); + } } /** - * Get the number of registered caches. + * Get the number of registered caches (legacy). * @return Count of registered caches */ public int size() { @@ -105,13 +161,18 @@ public int size() { } /** - * Clear all registrations (for testing). + * Clear all registrations and the shared reference (for testing). */ public void clear() { this.caches.clear(); + this.shared = null; } private static String key(final String repoType, final String repoName) { return repoType + ":" + repoName; } + + private static NegativeCache createFallback() { + return new NegativeCache(new com.auto1.pantera.cache.NegativeCacheConfig()); + } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheConfig.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheConfig.java index aaa3bf4d8..2d8fe08e0 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheConfig.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheConfig.java @@ -12,7 +12,6 @@ import com.amihaiemil.eoyaml.YamlMapping; import java.time.Duration; -import java.util.Locale; import java.util.Optional; /** @@ -32,7 +31,6 @@ * ttl: PT168H * cooldown: * enabled: true - * dedup_strategy: signal # none | storage | signal * conditional_requests: true # ETag / If-None-Match * stale_while_revalidate: * enabled: false @@ -142,16 +140,6 @@ public boolean cooldownEnabled() { return this.boolValue("cache", "cooldown", "enabled").orElse(false); } - /** - * Get request deduplication strategy. - * @return Dedup strategy (default: SIGNAL) - */ - public DedupStrategy dedupStrategy() { - return this.stringValue("cache", "dedup_strategy") - .map(s -> DedupStrategy.valueOf(s.toUpperCase(Locale.ROOT))) - .orElse(DedupStrategy.SIGNAL); - } - /** * Check if conditional requests (ETag/If-None-Match) are enabled. * @return True if enabled (default: true) @@ -279,15 +267,6 @@ private Optional durationValue(final String... path) { } } - /** - * Get string value from nested YAML path. - * @param path YAML path segments - * @return Optional string value - */ - private Optional stringValue(final String... path) { - return Optional.ofNullable(this.rawValue(path)); - } - /** * Navigate YAML path and return raw string value at leaf. * @param path YAML path segments diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheWriter.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheWriter.java new file mode 100644 index 000000000..797206b6d --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/cache/ProxyCacheWriter.java @@ -0,0 +1,831 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.Storage; +import com.auto1.pantera.http.context.RequestContext; +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.Fault.ChecksumAlgo; +import com.auto1.pantera.http.fault.Result; +import com.auto1.pantera.http.log.EcsLogger; +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Tags; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumMap; +import java.util.HexFormat; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.function.Supplier; + +/** + * Single-source-of-truth writer that lands a primary proxy artifact and every + * declared sidecar digest into the cache as an atomic, self-consistent pair. + * + *

    Contract + *

      + *
    1. Stream the primary upstream body into a local NIO temp file while + * computing all four digests (MD5, SHA-1, SHA-256, SHA-512) in a single + * pass β€” one {@link MessageDigest} per algorithm updated from the same + * chunk. Heap usage is bounded by the chunk size, not the artifact size.
    2. + *
    3. Fetch each declared sidecar concurrently. Sidecars are small + * (typically <200 bytes) and fully buffered.
    4. + *
    5. For every sidecar that returns 200, compare the trimmed-lowercased hex + * body against the locally-computed digest for that algorithm.
    6. + *
    7. Any disagreement rejects the entire write: the temp file is deleted, + * nothing lands in the cache, the call returns + * {@code Result.err(new Fault.UpstreamIntegrity(...))}.
    8. + *
    9. All sidecars absent-or-match: save the primary first (via + * {@link Storage#save(Key, Content)} which itself renames atomically on + * {@code FileStorage}), then every sidecar. A partial failure after the + * primary is persisted is compensated by deleting whatever has been + * written β€” callers see a single {@code StorageUnavailable} fault and the + * cache ends up empty for this key, as if the write never happened.
    10. + *
    + * + *

    Atomicity gap vs the {@code Storage} contract. The {@link Storage} + * interface has no multi-key transaction. {@code FileStorage.save} already + * uses a "write to {@code .tmp/UUID}, then rename into place" sequence, so each + * individual file is atomic with respect to concurrent readers, but the + * pair (primary + sidecar) is only eventually-consistent during the + * small window between the two renames. We save the primary before any + * sidecar so a concurrent reader never sees a sidecar without its primary; + * the opposite direction (primary without sidecar) is harmless β€” Maven + * either falls back to the computed checksum or re-requests the sidecar. + * The integrity audit tool ({@link IntegrityAuditor}) provides the + * post-hoc heal for the narrow race where a sidecar write fails after the + * primary landed; operators run it periodically. + * + *

    Observability. Emits Tier-4 {@code EcsLogger} events on + * {@code com.auto1.pantera.cache} for every outcome with + * {@code event.action=cache_write} and {@code event.outcome} in + * {@code success | integrity_failure | partial_failure}. When a non-null + * {@link MeterRegistry} is supplied, increments + * {@code pantera.proxy.cache.integrity_failure} and + * {@code pantera.proxy.cache.write_partial_failure} counters tagged with + * {@code repo} and (for integrity failures) {@code algo}. + * + * @since 2.2.0 + */ +public final class ProxyCacheWriter { + + /** Chunk size for streaming the primary body into the temp file. */ + private static final int CHUNK_SIZE = 64 * 1024; + + /** Shared hex formatter for digest comparison. */ + private static final HexFormat HEX = HexFormat.of(); + + /** Repository name used in log fields and metric tags. */ + private final String repoName; + + /** Backing storage receiving the primary + sidecars. */ + private final Storage cache; + + /** Optional metrics registry; null disables metrics. */ + private final MeterRegistry metrics; + + /** + * Ctor. + * + * @param cache Storage receiving the primary artifact and its sidecars. + * @param repoName Repository name, emitted as {@code repository.name} in + * log events and {@code repo} in metric tags. + * @param metrics Optional meter registry. May be {@code null} if the + * caller does not want metrics. + */ + public ProxyCacheWriter( + final Storage cache, final String repoName, final MeterRegistry metrics + ) { + this.cache = Objects.requireNonNull(cache, "cache"); + this.repoName = Objects.requireNonNull(repoName, "repoName"); + this.metrics = metrics; + } + + /** + * Convenience ctor without metrics. + * + * @param cache Storage. + * @param repoName Repository name. + */ + public ProxyCacheWriter(final Storage cache, final String repoName) { + this(cache, repoName, null); + } + + /** + * Write a primary artifact + every declared sidecar into the cache + * atomically (per Β§9.5 of the v2.2 target architecture). + * + * @param primaryKey Cache key of the primary artifact. + * @param upstreamUri Informational URI recorded on integrity failures. + * @param fetchPrimary Supplier that opens a fresh upstream stream. Must + * not be {@code null}; is invoked exactly once. + * @param fetchSidecars Concurrent suppliers per algorithm that each + * return {@code Optional.empty()} when the upstream + * does not serve that sidecar (404 / IO error). + * @param ctx Request context used to attach {@code trace.id} + * to log events; may be {@code null}. + * @return A stage that completes with {@link Result.Ok} on a clean write, + * or {@link Result.Err} carrying {@link Fault.UpstreamIntegrity} + * (sidecar disagreed) or {@link Fault.StorageUnavailable} + * (atomic-move failed). Never throws; exceptions are captured as + * {@code Err}. + */ + @SuppressWarnings({"PMD.AvoidCatchingGenericException", "PMD.CyclomaticComplexity"}) + public CompletionStage> writeWithSidecars( + final Key primaryKey, + final String upstreamUri, + final Supplier> fetchPrimary, + final Map>>> fetchSidecars, + final RequestContext ctx + ) { + Objects.requireNonNull(primaryKey, "primaryKey"); + Objects.requireNonNull(fetchPrimary, "fetchPrimary"); + final Map>>> sidecarFetchers = + fetchSidecars == null ? Collections.emptyMap() : fetchSidecars; + final Path tempFile; + try { + tempFile = Files.createTempFile("pantera-proxy-", ".tmp"); + } catch (final IOException ex) { + return CompletableFuture.completedFuture( + Result.err(new Fault.StorageUnavailable(ex, primaryKey.string())) + ); + } + return fetchPrimary.get() + .thenCompose(stream -> this.streamPrimary(stream, tempFile)) + .thenCompose(digests -> this.fetchAndVerify( + primaryKey, upstreamUri, tempFile, digests, sidecarFetchers, ctx + )) + .exceptionally(err -> { + deleteQuietly(tempFile); + return Result.err(new Fault.StorageUnavailable( + unwrap(err), primaryKey.string() + )); + }); + } + + /** + * Stream the upstream body into {@code tempFile} while computing all four + * digests in a single pass. + * + * @param stream Upstream body. + * @param tempFile Destination. + * @return Stage yielding hex-encoded digests for every algorithm. + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + private CompletionStage> streamPrimary( + final InputStream stream, final Path tempFile + ) { + return CompletableFuture.supplyAsync(() -> { + final Map digests = createDigests(); + try (InputStream in = stream; + FileChannel channel = FileChannel.open( + tempFile, + StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING + )) { + final byte[] chunk = new byte[CHUNK_SIZE]; + int read; + while ((read = in.read(chunk)) > 0) { + for (final MessageDigest md : digests.values()) { + md.update(chunk, 0, read); + } + final ByteBuffer buf = ByteBuffer.wrap(chunk, 0, read); + while (buf.hasRemaining()) { + channel.write(buf); + } + } + channel.force(true); + } catch (final IOException ex) { + throw new PrimaryStreamException(ex); + } + final Map out = new EnumMap<>(ChecksumAlgo.class); + for (final Map.Entry entry : digests.entrySet()) { + out.put(entry.getKey(), HEX.formatHex(entry.getValue().digest())); + } + return out; + }); + } + + /** + * Fetch every declared sidecar, verify, commit or reject. + */ + @SuppressWarnings({"PMD.CognitiveComplexity", "PMD.CyclomaticComplexity"}) + private CompletionStage> fetchAndVerify( + final Key primaryKey, + final String upstreamUri, + final Path tempFile, + final Map computed, + final Map>>> sidecarFetchers, + final RequestContext ctx + ) { + final List algos = new ArrayList<>(sidecarFetchers.keySet()); + @SuppressWarnings("unchecked") + final CompletableFuture[] futures = + new CompletableFuture[algos.size()]; + for (int i = 0; i < algos.size(); i++) { + final ChecksumAlgo algo = algos.get(i); + futures[i] = sidecarFetchers.get(algo).get() + .toCompletableFuture() + .thenApply(opt -> new SidecarFetch(algo, opt.map(ProxyCacheWriter::readSmall))) + .exceptionally(err -> new SidecarFetch(algo, Optional.empty())); + } + return CompletableFuture.allOf(futures).thenCompose(ignored -> { + final Map sidecars = new EnumMap<>(ChecksumAlgo.class); + for (final CompletableFuture f : futures) { + final SidecarFetch fetch = f.join(); + fetch.bytes().ifPresent(b -> sidecars.put(fetch.algo(), b)); + } + for (final Map.Entry entry : sidecars.entrySet()) { + final ChecksumAlgo algo = entry.getKey(); + final String claim = normaliseSidecar(entry.getValue()); + final String have = computed.get(algo); + if (!claim.equals(have)) { + return this.rejectIntegrity( + primaryKey, upstreamUri, tempFile, algo, claim, have, ctx + ); + } + } + return this.commit(primaryKey, tempFile, sidecars, ctx); + }); + } + + /** + * Emit an integrity-failure log + metric and return Err. Does NOT write + * anything to the cache; the temp file is deleted. + */ + private CompletionStage> rejectIntegrity( + final Key primaryKey, + final String upstreamUri, + final Path tempFile, + final ChecksumAlgo algo, + final String sidecarClaim, + final String computed, + final RequestContext ctx + ) { + deleteQuietly(tempFile); + final String tag = algo.name().toLowerCase(Locale.ROOT); + EcsLogger.error("com.auto1.pantera.cache") + .message("Upstream sidecar disagrees with computed digest; rejecting cache write") + .eventCategory("web") + .eventAction("cache_write") + .eventOutcome("integrity_failure") + .field("repository.name", this.repoName) + .field("url.path", primaryKey.string()) + .field("url.full", upstreamUri) + .field("pantera.cache.algo", tag) + .field("pantera.cache.sidecar_claim", sidecarClaim) + .field("pantera.cache.computed", computed) + .field("trace.id", traceId(ctx)) + .log(); + this.incrementIntegrityFailure(tag); + return CompletableFuture.completedFuture( + Result.err(new Fault.UpstreamIntegrity( + upstreamUri == null ? primaryKey.string() : upstreamUri, + algo, + sidecarClaim, + computed + )) + ); + } + + /** + * Atomically save primary + every sidecar to the cache. On any failure + * after the primary lands, delete whatever has been written and return + * Err(StorageUnavailable). + */ + @SuppressWarnings({"PMD.AvoidCatchingGenericException", "PMD.CognitiveComplexity"}) + private CompletionStage> commit( + final Key primaryKey, + final Path tempFile, + final Map sidecars, + final RequestContext ctx + ) { + final long size; + try { + size = Files.size(tempFile); + } catch (final IOException ex) { + deleteQuietly(tempFile); + return CompletableFuture.completedFuture( + Result.err(new Fault.StorageUnavailable(ex, primaryKey.string())) + ); + } + final Content primaryContent; + try { + primaryContent = new Content.From( + Optional.of(size), + io.reactivex.Flowable.using( + () -> FileChannel.open(tempFile, StandardOpenOption.READ), + chan -> io.reactivex.Flowable.generate(emitter -> { + final ByteBuffer buf = ByteBuffer.allocate(CHUNK_SIZE); + final int read = chan.read(buf); + if (read < 0) { + emitter.onComplete(); + } else { + buf.flip(); + emitter.onNext(buf); + } + }), + FileChannel::close + ) + ); + } catch (final RuntimeException ex) { + deleteQuietly(tempFile); + return CompletableFuture.completedFuture( + Result.err(new Fault.StorageUnavailable(ex, primaryKey.string())) + ); + } + return this.cache.save(primaryKey, primaryContent) + .thenCompose(ignored -> this.saveSidecars(primaryKey, sidecars)) + .handle((ignored, err) -> { + deleteQuietly(tempFile); + if (err == null) { + this.logSuccess(primaryKey, sidecars.keySet(), ctx); + return Result.ok(null); + } + this.rollbackAfterPartialFailure(primaryKey, sidecars.keySet(), err, ctx); + return Result.err(new Fault.StorageUnavailable( + unwrap(err), primaryKey.string() + )); + }); + } + + /** + * Save every sidecar sequentially; stop on first failure. Sidecars are + * tiny so sequential writes cost nothing. + */ + private CompletableFuture saveSidecars( + final Key primaryKey, final Map sidecars + ) { + CompletableFuture chain = CompletableFuture.completedFuture(null); + for (final Map.Entry entry : sidecars.entrySet()) { + final Key sidecarKey = sidecarKey(primaryKey, entry.getKey()); + final byte[] body = entry.getValue(); + chain = chain.thenCompose(ignored -> + this.cache.save(sidecarKey, new Content.From(body)) + ); + } + return chain; + } + + /** + * Called when the atomic move of primary or sidecar has failed after the + * primary may have already landed. Deletes the primary + any sidecar that + * made it, so a subsequent GET re-fetches cleanly via this writer. + */ + private void rollbackAfterPartialFailure( + final Key primaryKey, + final Collection sidecarAlgos, + final Throwable cause, + final RequestContext ctx + ) { + this.cache.delete(primaryKey).exceptionally(ignored -> null); + for (final ChecksumAlgo algo : sidecarAlgos) { + this.cache.delete(sidecarKey(primaryKey, algo)).exceptionally(ignored -> null); + } + EcsLogger.error("com.auto1.pantera.cache") + .message("Cache write partial failure; rolled back primary + sidecars") + .eventCategory("web") + .eventAction("cache_write") + .eventOutcome("partial_failure") + .field("repository.name", this.repoName) + .field("url.path", primaryKey.string()) + .field("trace.id", traceId(ctx)) + .error(unwrap(cause)) + .log(); + if (this.metrics != null) { + Counter.builder("pantera.proxy.cache.write_partial_failure") + .tags(Tags.of("repo", this.repoName)) + .register(this.metrics) + .increment(); + } + } + + /** Emit the success event with the sidecar set actually written. */ + private void logSuccess( + final Key primaryKey, final Collection sidecars, final RequestContext ctx + ) { + EcsLogger.info("com.auto1.pantera.cache") + .message("Proxy cache write with verified sidecars") + .eventCategory("web") + .eventAction("cache_write") + .eventOutcome("success") + .field("repository.name", this.repoName) + .field("url.path", primaryKey.string()) + .field("pantera.cache.sidecars", algoList(sidecars)) + .field("trace.id", traceId(ctx)) + .log(); + } + + /** Increment the integrity-failure metric, if metrics are wired. */ + private void incrementIntegrityFailure(final String algoTag) { + if (this.metrics == null) { + return; + } + Counter.builder("pantera.proxy.cache.integrity_failure") + .tags(Tags.of("repo", this.repoName, "algo", algoTag)) + .register(this.metrics) + .increment(); + } + + // ===== helpers ===== + + /** Construct the sidecar key from a primary key + algo extension. */ + static Key sidecarKey(final Key primary, final ChecksumAlgo algo) { + return new Key.From(primary.string() + sidecarExtension(algo)); + } + + /** File-system extension for each sidecar algorithm. */ + static String sidecarExtension(final ChecksumAlgo algo) { + return switch (algo) { + case MD5 -> ".md5"; + case SHA1 -> ".sha1"; + case SHA256 -> ".sha256"; + case SHA512 -> ".sha512"; + }; + } + + /** Sidecar bodies may include file paths or trailing whitespace. */ + static String normaliseSidecar(final byte[] body) { + final String raw = new String(body, java.nio.charset.StandardCharsets.UTF_8).trim(); + // Some upstreams emit "hex *filename" or "hex filename" β€” keep the hex + final int sp = firstWhitespace(raw); + final String hex = sp < 0 ? raw : raw.substring(0, sp); + return hex.toLowerCase(Locale.ROOT); + } + + private static int firstWhitespace(final String raw) { + for (int i = 0; i < raw.length(); i++) { + if (Character.isWhitespace(raw.charAt(i))) { + return i; + } + } + return -1; + } + + /** Render a collection of algos as a stable, sorted list for logging. */ + private static List algoList(final Collection algos) { + return algos.stream() + .sorted() + .map(a -> a.name().toLowerCase(Locale.ROOT)) + .toList(); + } + + /** Read a small payload (sidecar body) into memory. */ + static byte[] readSmall(final InputStream in) { + try (InputStream src = in) { + return src.readAllBytes(); + } catch (final IOException ex) { + throw new PrimaryStreamException(ex); + } + } + + private static Map createDigests() { + final Map map = new EnumMap<>(ChecksumAlgo.class); + try { + map.put(ChecksumAlgo.MD5, MessageDigest.getInstance("MD5")); + map.put(ChecksumAlgo.SHA1, MessageDigest.getInstance("SHA-1")); + map.put(ChecksumAlgo.SHA256, MessageDigest.getInstance("SHA-256")); + map.put(ChecksumAlgo.SHA512, MessageDigest.getInstance("SHA-512")); + } catch (final NoSuchAlgorithmException ex) { + throw new IllegalStateException("Required digest algorithm missing", ex); + } + return map; + } + + private static void deleteQuietly(final Path path) { + try { + Files.deleteIfExists(path); + } catch (final IOException ex) { + EcsLogger.debug("com.auto1.pantera.cache") + .message("Failed to delete temp file") + .field("file.path", path.toString()) + .error(ex) + .log(); + } + } + + private static Throwable unwrap(final Throwable err) { + Throwable cur = err; + while (cur instanceof java.util.concurrent.CompletionException + && cur.getCause() != null && cur.getCause() != cur) { + cur = cur.getCause(); + } + if (cur instanceof PrimaryStreamException && cur.getCause() != null) { + return cur.getCause(); + } + return cur; + } + + private static String traceId(final RequestContext ctx) { + return ctx == null ? null : ctx.traceId(); + } + + /** Tuple type for collecting per-algo sidecar fetches. */ + private record SidecarFetch(ChecksumAlgo algo, Optional bytes) { + } + + /** + * Internal wrapping exception for IO errors encountered in the streaming + * primary-write phase. Unwrapped before the user sees anything. + */ + private static final class PrimaryStreamException extends RuntimeException { + private static final long serialVersionUID = 1L; + PrimaryStreamException(final Throwable cause) { + super(cause); + } + } + + // ================================================================= + // Integrity auditor β€” healing stale pairs (WI-07 admin tool / Β§9.5) + // ================================================================= + + /** + * Scans a {@link Storage} for primary artifacts whose cached sidecar + * disagrees with the re-computed digest of the primary bytes. + * + *

    Runs in dry-run mode by default β€” emitting one Tier-4 WARN per + * mismatch plus a summary β€” or fix mode where the offending primary + + * every sidecar is deleted so the next client request repopulates through + * {@link ProxyCacheWriter}. + * + * @since 2.2.0 + */ + public static final class IntegrityAuditor { + + /** Primary artifact extensions we know have sidecars. */ + private static final List PRIMARY_EXTENSIONS = List.of( + ".pom", ".jar", ".war", ".aar", ".ear", + ".tgz", ".tar.gz", ".whl", ".zip" + ); + + /** Sidecar extensions that imply "ignore this entry as a primary". */ + private static final List SIDECAR_EXTENSIONS = List.of( + ".md5", ".sha1", ".sha256", ".sha512", ".asc", ".sig" + ); + + /** Algorithm by file extension, for fast lookup in the scanner. */ + private static final Map ALGO_BY_EXT = Map.of( + ".md5", ChecksumAlgo.MD5, + ".sha1", ChecksumAlgo.SHA1, + ".sha256", ChecksumAlgo.SHA256, + ".sha512", ChecksumAlgo.SHA512 + ); + + private IntegrityAuditor() { + // static utility + } + + /** + * Run the audit over {@code storage}. + * + * @param storage Storage to scan (file-backed storage recommended). + * @param repoName Tag attached to log events. + * @param fix If {@code true}, evict primary + every sidecar when + * a mismatch is found; if {@code false}, report only. + * @return Report containing counts + every offender. + */ + @SuppressWarnings({"PMD.AvoidCatchingGenericException", "PMD.CognitiveComplexity"}) + public static Report run( + final Storage storage, final String repoName, final boolean fix + ) { + final Collection keys; + try { + keys = storage.list(Key.ROOT).join(); + } catch (final Exception ex) { + throw new IllegalStateException("Unable to list storage", ex); + } + final List mismatches = new ArrayList<>(); + int scanned = 0; + for (final Key key : keys) { + final String path = key.string(); + if (isSidecar(path) || !isPrimary(path)) { + continue; + } + scanned++; + final Mismatch found = auditOne(storage, key, repoName, fix); + if (found != null) { + mismatches.add(found); + } + } + EcsLogger.info("com.auto1.pantera.cache") + .message("Cache integrity audit complete") + .eventCategory("file") + .eventAction("integrity_audit") + .eventOutcome(mismatches.isEmpty() ? "success" : "failure") + .field("repository.name", repoName) + .field("pantera.audit.scanned", scanned) + .field("pantera.audit.mismatches", mismatches.size()) + .field("pantera.audit.fix", fix) + .log(); + return new Report(scanned, mismatches, fix); + } + + /** + * Audit a single primary key. Returns a {@link Mismatch} when at least + * one sidecar disagrees; {@code null} otherwise. + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + private static Mismatch auditOne( + final Storage storage, final Key primary, + final String repoName, final boolean fix + ) { + final Map computed; + try { + computed = computeDigests(storage, primary); + } catch (final Exception ex) { + EcsLogger.warn("com.auto1.pantera.cache") + .message("Integrity audit: failed to read primary") + .eventCategory("file") + .eventAction("integrity_audit") + .eventOutcome("failure") + .field("repository.name", repoName) + .field("url.path", primary.string()) + .error(ex) + .log(); + return null; + } + final List per = new ArrayList<>(); + final List sidecarsPresent = new ArrayList<>(); + for (final Map.Entry ext : ALGO_BY_EXT.entrySet()) { + final Key sidecarKey = new Key.From(primary.string() + ext.getKey()); + final boolean present; + try { + present = storage.exists(sidecarKey).join(); + } catch (final Exception ex) { + continue; + } + if (!present) { + continue; + } + sidecarsPresent.add(sidecarKey); + final byte[] claimBytes; + try { + claimBytes = storage.value(sidecarKey).join().asBytes(); + } catch (final Exception ex) { + continue; + } + final String claim = normaliseSidecar(claimBytes); + final String have = computed.get(ext.getValue()); + if (!claim.equals(have)) { + per.add(new AlgoMismatch(ext.getValue(), claim, have)); + } + } + if (per.isEmpty()) { + return null; + } + for (final AlgoMismatch m : per) { + EcsLogger.warn("com.auto1.pantera.cache") + .message("Cache integrity mismatch detected") + .eventCategory("file") + .eventAction("integrity_audit") + .eventOutcome("failure") + .field("repository.name", repoName) + .field("url.path", primary.string()) + .field("pantera.cache.algo", m.algo().name().toLowerCase(Locale.ROOT)) + .field("pantera.cache.sidecar_claim", m.sidecarClaim()) + .field("pantera.cache.computed", m.computed()) + .log(); + } + if (fix) { + evict(storage, primary, sidecarsPresent, repoName); + } + return new Mismatch(primary, per); + } + + private static Map computeDigests( + final Storage storage, final Key key + ) throws IOException { + final Map digests = createDigests(); + final byte[] bytes; + try { + bytes = storage.value(key).join().asBytes(); + } catch (final Exception ex) { + throw new IOException("read failed: " + key.string(), ex); + } + for (final MessageDigest md : digests.values()) { + md.update(bytes); + } + final Map out = new EnumMap<>(ChecksumAlgo.class); + for (final Map.Entry entry : digests.entrySet()) { + out.put(entry.getKey(), HEX.formatHex(entry.getValue().digest())); + } + return out; + } + + private static void evict( + final Storage storage, final Key primary, + final Collection sidecars, final String repoName + ) { + try { + storage.delete(primary).join(); + } catch (final Exception ex) { + EcsLogger.warn("com.auto1.pantera.cache") + .message("Failed to evict primary during integrity fix") + .field("repository.name", repoName) + .field("url.path", primary.string()) + .error(ex) + .log(); + } + for (final Key sidecar : sidecars) { + try { + storage.delete(sidecar).join(); + } catch (final Exception ex) { + // Best-effort cleanup; do not abort. + EcsLogger.debug("com.auto1.pantera.cache") + .message("Failed to evict sidecar during integrity fix") + .field("url.path", sidecar.string()) + .error(ex) + .log(); + } + } + EcsLogger.info("com.auto1.pantera.cache") + .message("Integrity fix: evicted mismatched pair") + .eventCategory("file") + .eventAction("integrity_audit") + .eventOutcome("success") + .field("repository.name", repoName) + .field("url.path", primary.string()) + .log(); + } + + private static boolean isPrimary(final String path) { + final String lower = path.toLowerCase(Locale.ROOT); + for (final String ext : PRIMARY_EXTENSIONS) { + if (lower.endsWith(ext)) { + return true; + } + } + return false; + } + + private static boolean isSidecar(final String path) { + final String lower = path.toLowerCase(Locale.ROOT); + for (final String ext : SIDECAR_EXTENSIONS) { + if (lower.endsWith(ext)) { + return true; + } + } + return false; + } + + /** + * Summary result of an audit run. + * + * @param scanned Number of primary files examined. + * @param mismatches Per-primary detail on offenders. + * @param fixed {@code true} if the run was executed with fix=true. + */ + public record Report(int scanned, List mismatches, boolean fixed) { + /** @return {@code true} if no mismatches were found. */ + public boolean clean() { + return this.mismatches.isEmpty(); + } + } + + /** + * One primary artifact + every sidecar that disagreed with it. + * + * @param primary Primary cache key. + * @param algorithms One entry per mismatched sidecar algorithm. + */ + public record Mismatch(Key primary, List algorithms) { + } + + /** + * One (primary, algorithm) pair with the disagreement detail. + * + * @param algo Sidecar algorithm whose hex disagreed. + * @param sidecarClaim Hex declared by the cached sidecar. + * @param computed Hex recomputed over the cached primary bytes. + */ + public record AlgoMismatch(ChecksumAlgo algo, String sidecarClaim, String computed) { + } + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/cache/RequestDeduplicator.java b/pantera-core/src/main/java/com/auto1/pantera/http/cache/RequestDeduplicator.java deleted file mode 100644 index a959cedd3..000000000 --- a/pantera-core/src/main/java/com/auto1/pantera/http/cache/RequestDeduplicator.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.http.cache; - -import com.auto1.pantera.asto.Key; -import com.auto1.pantera.http.misc.ConfigDefaults; - -import java.util.Objects; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Supplier; - -/** - * Deduplicates concurrent requests for the same cache key. - * - *

    When multiple clients request the same artifact simultaneously, only one - * upstream fetch is performed. Other callers either wait for the signal (SIGNAL - * strategy) or are coalesced at the storage level (STORAGE strategy). - * - *

    With SIGNAL strategy (default): - *

      - *
    • First request: executes the supplier, signals result on completion
    • - *
    • Waiting requests: receive the same signal (SUCCESS, NOT_FOUND, ERROR)
    • - *
    • After completion: entry is removed from in-flight map
    • - *
    - * - *

    With NONE strategy, every call immediately delegates to the supplier. - * - * @since 1.20.13 - */ -public final class RequestDeduplicator implements AutoCloseable { - - /** - * Maximum age of an in-flight entry before it's considered zombie (5 minutes). - * Configurable via PANTERA_DEDUP_MAX_AGE_MS environment variable. - */ - private static final long MAX_AGE_MS = - ConfigDefaults.getLong("PANTERA_DEDUP_MAX_AGE_MS", 300_000L); - - /** - * Maps cache key to the in-flight fetch entry (future + creation time). - */ - private final ConcurrentHashMap inFlight; - - /** - * Strategy to use. - */ - private final DedupStrategy strategy; - - /** - * Cleanup scheduler. - */ - private final java.util.concurrent.ScheduledExecutorService cleanup; - - /** - * Ctor. - * @param strategy Dedup strategy - */ - public RequestDeduplicator(final DedupStrategy strategy) { - this.strategy = Objects.requireNonNull(strategy, "strategy"); - this.inFlight = new ConcurrentHashMap<>(); - this.cleanup = java.util.concurrent.Executors.newSingleThreadScheduledExecutor(r -> { - final Thread thread = new Thread(r, "dedup-cleanup"); - thread.setDaemon(true); - return thread; - }); - this.cleanup.scheduleAtFixedRate(this::evictStale, 60, 60, java.util.concurrent.TimeUnit.SECONDS); - } - - /** - * Execute a fetch with deduplication. - * - *

    If a fetch for the same key is already in progress and strategy is SIGNAL, - * this call returns a future that completes when the existing fetch completes. - * - * @param key Cache key identifying the artifact - * @param fetcher Supplier that performs the actual upstream fetch. - * Must complete the returned future with a FetchSignal. - * @return Future with the fetch signal (SUCCESS, NOT_FOUND, or ERROR) - */ - public CompletableFuture deduplicate( - final Key key, - final Supplier> fetcher - ) { - if (this.strategy == DedupStrategy.NONE || this.strategy == DedupStrategy.STORAGE) { - return fetcher.get(); - } - final CompletableFuture fresh = new CompletableFuture<>(); - final InFlightEntry freshEntry = new InFlightEntry(fresh, System.currentTimeMillis()); - final InFlightEntry existing = this.inFlight.putIfAbsent(key, freshEntry); - if (existing != null) { - return existing.future; - } - fetcher.get().whenComplete((signal, err) -> { - this.inFlight.remove(key); - if (err != null) { - fresh.complete(FetchSignal.ERROR); - } else { - fresh.complete(signal); - } - }); - return fresh; - } - - /** - * Get the number of currently in-flight requests. For monitoring. - * @return Count of in-flight dedup entries - */ - public int inFlightCount() { - return this.inFlight.size(); - } - - /** - * Remove entries that have been in-flight for too long (zombie protection). - */ - private void evictStale() { - final long now = System.currentTimeMillis(); - this.inFlight.entrySet().removeIf(entry -> { - if (now - entry.getValue().createdAt > MAX_AGE_MS) { - entry.getValue().future.complete(FetchSignal.ERROR); - return true; - } - return false; - }); - } - - /** - * Shuts down the cleanup scheduler and completes all in-flight entries with ERROR. - * Should be called when the deduplicator is no longer needed. - */ - @Override - public void close() { - this.cleanup.shutdownNow(); - this.inFlight.values().forEach( - entry -> entry.future.complete(FetchSignal.ERROR) - ); - this.inFlight.clear(); - } - - /** - * Alias for {@link #close()}, for explicit lifecycle management. - */ - public void shutdown() { - this.close(); - } - - /** - * In-flight entry tracking future and creation time. - */ - private static final class InFlightEntry { - /** - * The future for the in-flight fetch. - */ - final CompletableFuture future; - - /** - * Timestamp when this entry was created. - */ - final long createdAt; - - /** - * Ctor. - * @param future The future for the in-flight fetch - * @param createdAt Timestamp when this entry was created - */ - InFlightEntry(final CompletableFuture future, final long createdAt) { - this.future = future; - this.createdAt = createdAt; - } - } - - /** - * Signal indicating the outcome of a deduplicated fetch. - * - * @since 1.20.13 - */ - public enum FetchSignal { - /** - * Upstream returned 200 and content is now cached in storage. - * Waiting callers should read from cache. - */ - SUCCESS, - - /** - * Upstream returned 404. Negative cache has been updated. - * Waiting callers should return 404. - */ - NOT_FOUND, - - /** - * Upstream returned an error (5xx, timeout, exception). - * Waiting callers should return 503 or fall back to stale cache. - */ - ERROR - } -} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutor.java b/pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutor.java new file mode 100644 index 000000000..f9e0ea1cb --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutor.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import co.elastic.apm.api.ElasticApm; +import co.elastic.apm.api.Scope; +import co.elastic.apm.api.Span; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.Executor; +import org.apache.logging.log4j.ThreadContext; + +/** + * Wraps any {@link Executor} so that tasks dispatched through it inherit the + * caller's Log4j2 {@link ThreadContext} (the ECS field carrier used by + * {@link RequestContext}) and the caller's Elastic APM active {@link Span}. + * + *

    Implements Β§4.4 of {@code docs/analysis/v2.2-target-architecture.md}: the + * single mechanism for context propagation across async boundaries. + * Wire this once at each thread-pool boundary (Vert.x worker pool, + * the drain executor, the DB index executor, the Quartz worker thread factory, + * any {@code ForkJoinPool} on the hot path) β€” from then on, every + * {@code CompletableFuture.supplyAsync(..., ctxExecutor)} or + * {@code executor.submit} propagates ECS fields and the APM trace context + * transparently. + * + *

    Capture semantics: + *

      + *
    1. {@link ThreadContext#getImmutableContext()} snapshot is taken on the + * calling thread at the moment {@link Executor#execute} is + * invoked. The snapshot is a defensive copy β€” mutating the caller's + * ThreadContext after dispatch does not affect the task. + *
    2. {@link ElasticApm#currentSpan()} is captured at the same moment. When + * the APM agent is not attached this returns a no-op span, making this + * safe for tests and for deployments without the agent. + *
    3. On the runner thread, the snapshot is installed after saving + * the runner's prior ThreadContext. The span is activated in a + * try-with-resources so the APM scope is always released. The prior + * ThreadContext is restored in {@code finally}, even if the task throws. + *
    + * + *

    The wrapper itself is stateless; the snapshot lives only in the closure + * created per {@link Executor#execute} call. + * + * @since 2.2.0 + */ +public final class ContextualExecutor { + + private ContextualExecutor() { + // utility class; not instantiable + } + + /** + * Produce an {@link Executor} that, for every task it accepts, snapshots + * the caller's {@link ThreadContext} and current APM {@link Span} and + * restores them on the runner thread for the duration of the task. + * + * @param delegate the backing executor; must be non-null. Its threading + * and rejection behaviour is unchanged β€” this wrapper + * only decorates the {@link Runnable} passed through. + * @return a non-null executor that propagates ECS + APM context + * @throws NullPointerException if {@code delegate} is {@code null} + */ + public static Executor contextualize(final Executor delegate) { + Objects.requireNonNull(delegate, "delegate"); + return task -> { + Objects.requireNonNull(task, "task"); + final Map ctx = ThreadContext.getImmutableContext(); + final Span span = ElasticApm.currentSpan(); + delegate.execute(() -> runWithContext(task, ctx, span)); + }; + } + + /** + * Run {@code task} on the current thread with the captured ThreadContext + * and APM span installed; restore the prior ThreadContext unconditionally + * when the task returns or throws. + * + *

    Extracted so the happy-path lambda in {@link #contextualize(Executor)} + * is a single-line dispatch, keeping PMD / Checkstyle metrics low. + */ + private static void runWithContext( + final Runnable task, + final Map ctx, + final Span span + ) { + final Map prior = ThreadContext.getImmutableContext(); + ThreadContext.clearMap(); + if (!ctx.isEmpty()) { + ThreadContext.putAll(ctx); + } + try (Scope ignored = span.activate()) { + task.run(); + } finally { + ThreadContext.clearMap(); + if (!prior.isEmpty()) { + ThreadContext.putAll(prior); + } + } + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutorService.java b/pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutorService.java new file mode 100644 index 000000000..8cd4e8634 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/context/ContextualExecutorService.java @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import co.elastic.apm.api.ElasticApm; +import co.elastic.apm.api.Scope; +import co.elastic.apm.api.Span; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.apache.logging.log4j.ThreadContext; + +/** + * {@link ExecutorService} wrapper that contextualises every task-submission + * method β€” not just {@link #execute(Runnable)}. + * + *

    Background: {@link ContextualExecutor#contextualize(java.util.concurrent.Executor)} + * propagates the caller's Log4j2 {@link ThreadContext} (ECS field carrier) and the + * Elastic APM active {@link Span} across thread hops, but it only targets the bare + * {@link java.util.concurrent.Executor} interface β€” i.e. {@code execute(Runnable)}. + * + *

    When downstream code expects an {@link ExecutorService} and routes tasks through + * {@link #submit(Callable)}, {@link #submit(Runnable, Object)}, {@link #submit(Runnable)}, + * {@link #invokeAll(Collection)} or {@link #invokeAny(Collection)}, those calls bypass + * the contextualising wrapper and run on the runner thread with an empty ThreadContext + * and no APM context β€” silently dropping ECS fields from every log line emitted by the + * task and breaking distributed tracing. + * + *

    This class closes that gap. It wraps an arbitrary delegate {@link ExecutorService} + * so that: + *

      + *
    • {@code execute(Runnable)} is routed through {@link ContextualExecutor} + * (same behaviour as the bare-Executor wrapper);
    • + *
    • every {@code submit(...)}, {@code invokeAll(...)} and {@code invokeAny(...)} + * overload snapshots {@link ThreadContext} and the active APM {@link Span} on + * the submitting thread at call time, then decorates the task(s) so that the + * snapshot is installed on the runner thread for the task's duration and + * restored in {@code finally} (even on exception);
    • + *
    • lifecycle methods ({@code shutdown}, {@code shutdownNow}, + * {@code awaitTermination}, {@code isShutdown}, {@code isTerminated}) delegate + * directly to the underlying pool.
    • + *
    + * + *

    This wrapper is the idiomatic boundary for thread pools in pantera from + * v2.2.0 onward. Use it at every {@link ExecutorService} construction site where + * any code path β€” {@code CompletableFuture.supplyAsync}, {@code executor.submit}, + * {@code invokeAll}, etc. β€” needs ECS/APM context propagation. + * + * @since 2.2.0 + */ +@SuppressWarnings({"PMD.TooManyMethods", "PMD.AvoidCatchingGenericException"}) +public final class ContextualExecutorService implements ExecutorService { + + /** + * Underlying pool β€” target of lifecycle calls and of the raw + * {@code submit/invokeAll/invokeAny} collection dispatches. + */ + private final ExecutorService delegate; + + /** + * {@link java.util.concurrent.Executor} view of {@link #delegate} produced by + * {@link ContextualExecutor#contextualize(java.util.concurrent.Executor)}. Used + * to route {@link #execute(Runnable)} calls through the same + * snapshot-and-restore machinery used by {@link ContextualExecutor}. + */ + private final java.util.concurrent.Executor contextualExec; + + /** + * Build a wrapper around {@code delegate}. + * + * @param delegate the backing executor service; must be non-null + * @throws NullPointerException if {@code delegate} is {@code null} + */ + public ContextualExecutorService(final ExecutorService delegate) { + this.delegate = Objects.requireNonNull(delegate, "delegate"); + this.contextualExec = ContextualExecutor.contextualize(delegate); + } + + /** + * Static factory β€” equivalent to {@code new ContextualExecutorService(delegate)}. + * + * @param delegate the backing executor service; must be non-null + * @return a fresh contextualising wrapper + * @throws NullPointerException if {@code delegate} is {@code null} + */ + public static ContextualExecutorService wrap(final ExecutorService delegate) { + return new ContextualExecutorService(delegate); + } + + // --- task submission ------------------------------------------------ + + @Override + public void execute(final Runnable command) { + this.contextualExec.execute(command); + } + + @Override + public Future submit(final Callable task) { + Objects.requireNonNull(task, "task"); + return this.delegate.submit(wrap(task)); + } + + @Override + public Future submit(final Runnable task, final T result) { + Objects.requireNonNull(task, "task"); + return this.delegate.submit(wrap(task), result); + } + + @Override + public Future submit(final Runnable task) { + Objects.requireNonNull(task, "task"); + return this.delegate.submit(wrap(task)); + } + + @Override + public List> invokeAll( + final Collection> tasks + ) throws InterruptedException { + return this.delegate.invokeAll(wrapAll(tasks)); + } + + @Override + public List> invokeAll( + final Collection> tasks, + final long timeout, final TimeUnit unit + ) throws InterruptedException { + return this.delegate.invokeAll(wrapAll(tasks), timeout, unit); + } + + @Override + public T invokeAny( + final Collection> tasks + ) throws InterruptedException, ExecutionException { + return this.delegate.invokeAny(wrapAll(tasks)); + } + + @Override + public T invokeAny( + final Collection> tasks, + final long timeout, final TimeUnit unit + ) throws InterruptedException, ExecutionException, TimeoutException { + return this.delegate.invokeAny(wrapAll(tasks), timeout, unit); + } + + // --- lifecycle ------------------------------------------------------ + + @Override + public void shutdown() { + this.delegate.shutdown(); + } + + @Override + public List shutdownNow() { + return this.delegate.shutdownNow(); + } + + @Override + public boolean isShutdown() { + return this.delegate.isShutdown(); + } + + @Override + public boolean isTerminated() { + return this.delegate.isTerminated(); + } + + @Override + public boolean awaitTermination(final long timeout, final TimeUnit unit) + throws InterruptedException { + return this.delegate.awaitTermination(timeout, unit); + } + + // --- internals ------------------------------------------------------ + + /** + * Wrap a {@link Callable} so the submitting thread's ThreadContext + APM span + * is snapshot at call time and restored on the runner thread for the task's + * duration (including on exception). + * + * @param task original callable; must be non-null + * @param task result type + * @return decorated callable that propagates ECS + APM context + */ + private static Callable wrap(final Callable task) { + Objects.requireNonNull(task, "task"); + final Map ctx = ThreadContext.getImmutableContext(); + final Span span = ElasticApm.currentSpan(); + return () -> callWithContext(task, ctx, span); + } + + /** + * Wrap a {@link Runnable} so the submitting thread's ThreadContext + APM span + * is snapshot at call time and restored on the runner thread for the task's + * duration (including on exception). + * + * @param task original runnable; must be non-null + * @return decorated runnable that propagates ECS + APM context + */ + private static Runnable wrap(final Runnable task) { + Objects.requireNonNull(task, "task"); + final Map ctx = ThreadContext.getImmutableContext(); + final Span span = ElasticApm.currentSpan(); + return () -> runWithContext(task, ctx, span); + } + + /** + * Wrap every {@link Callable} in a collection via {@link #wrap(Callable)}. + * + * @param tasks source collection (snapshotted at call time) + * @param common task result type + * @return list of decorated callables, preserving order + */ + private static List> wrapAll( + final Collection> tasks + ) { + Objects.requireNonNull(tasks, "tasks"); + final List> wrapped = new ArrayList<>(tasks.size()); + for (final Callable task : tasks) { + wrapped.add(wrap(task)); + } + return wrapped; + } + + /** + * Install the captured ThreadContext + APM span on the current thread, run the + * {@link Runnable}, and restore the runner's prior ThreadContext unconditionally. + * + * @param task runnable to execute + * @param ctx ThreadContext snapshot captured at submit time + * @param span APM span captured at submit time + */ + private static void runWithContext( + final Runnable task, + final Map ctx, + final Span span + ) { + final Map prior = ThreadContext.getImmutableContext(); + ThreadContext.clearMap(); + if (!ctx.isEmpty()) { + ThreadContext.putAll(ctx); + } + try (Scope ignored = span.activate()) { + task.run(); + } finally { + ThreadContext.clearMap(); + if (!prior.isEmpty()) { + ThreadContext.putAll(prior); + } + } + } + + /** + * Install the captured ThreadContext + APM span on the current thread, call the + * {@link Callable}, and restore the runner's prior ThreadContext unconditionally. + * + * @param task callable to execute + * @param ctx ThreadContext snapshot captured at submit time + * @param span APM span captured at submit time + * @param task result type + * @return the callable's result + * @throws Exception whatever the callable throws; propagated after restore + */ + private static T callWithContext( + final Callable task, + final Map ctx, + final Span span + ) throws Exception { + final Map prior = ThreadContext.getImmutableContext(); + ThreadContext.clearMap(); + if (!ctx.isEmpty()) { + ThreadContext.putAll(ctx); + } + try (Scope ignored = span.activate()) { + return task.call(); + } finally { + ThreadContext.clearMap(); + if (!prior.isEmpty()) { + ThreadContext.putAll(prior); + } + } + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/context/Deadline.java b/pantera-core/src/main/java/com/auto1/pantera/http/context/Deadline.java new file mode 100644 index 000000000..6f6677c2a --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/context/Deadline.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import java.time.Duration; +import java.time.Instant; +import java.util.Objects; + +/** + * Monotonic wall-clock deadline carried through a single request's lifetime. + * + *

    Implements Β§3.4 of {@code docs/analysis/v2.2-target-architecture.md}: an + * end-to-end budget that each layer may shrink (never extend). {@link #in(Duration)} + * fixes the expiry instant relative to {@link System#nanoTime()} at construction + * time, so drift is bounded by the monotonic clock (not the wall clock, which can + * jump). {@link #remaining()} is clamped non-negative: once the deadline fires, + * the remaining budget is {@link Duration#ZERO}, not a negative value. + * + *

    This is a value record β€” safe to share across threads, safe to embed in + * the immutable {@link RequestContext}. It is not emitted to ECS logs (the + * expiry instant has no meaning once the request is gone), but it is carried + * through so that HTTP clients, JDBC drivers, and upstream RPC wrappers can + * cap their own timeouts via {@link #remainingClamped(Duration)}. + * + * @param expiresAtNanos the {@link System#nanoTime()} value at which the + * deadline fires + * @since 2.2.0 + */ +public record Deadline(long expiresAtNanos) { + + /** + * Create a deadline {@code d} from now. + * + * @param d the budget relative to the monotonic clock at this instant; + * must be non-null + * @return a new deadline whose expiry equals {@code System.nanoTime() + d.toNanos()} + */ + public static Deadline in(final Duration d) { + Objects.requireNonNull(d, "d"); + return new Deadline(System.nanoTime() + d.toNanos()); + } + + /** + * Time remaining until the deadline fires, clamped to zero once reached. + * + * @return a non-negative {@link Duration}; {@link Duration#ZERO} once + * {@code System.nanoTime() >= expiresAtNanos} + */ + public Duration remaining() { + final long left = this.expiresAtNanos - System.nanoTime(); + return left <= 0L ? Duration.ZERO : Duration.ofNanos(left); + } + + /** + * Whether the deadline has already fired. + * + * @return {@code true} iff {@link #remaining()} is zero + */ + public boolean expired() { + return this.remaining().isZero(); + } + + /** + * Remaining budget, capped at {@code max}. For use with APIs that take a + * bounded timeout (JDBC {@code setQueryTimeout}, HTTP client read timeout, + * etc.) β€” cap so no single operation consumes the whole budget. + * + * @param max the maximum per-operation timeout; must be non-null + * @return {@link #remaining()} if less than or equal to {@code max}, else + * {@code max} + */ + public Duration remainingClamped(final Duration max) { + Objects.requireNonNull(max, "max"); + final Duration rem = this.remaining(); + return rem.compareTo(max) > 0 ? max : rem; + } + + /** + * Wall-clock instant at which this deadline will (or did) fire. Computed + * from the current wall clock plus {@link #remaining()}; drifts slightly + * if the wall clock jumps, but is useful for logging and for setting + * absolute timeouts on APIs that don't accept a {@link Duration}. + * + * @return the {@link Instant} at which {@link #expired()} becomes true + */ + public Instant expiresAt() { + return Instant.now().plus(this.remaining()); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/context/RequestContext.java b/pantera-core/src/main/java/com/auto1/pantera/http/context/RequestContext.java new file mode 100644 index 000000000..b9ec74c9d --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/context/RequestContext.java @@ -0,0 +1,340 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import org.apache.logging.log4j.ThreadContext; + +/** + * Immutable per-request envelope carrying every ECS field Pantera emits, + * plus the end-to-end {@link Deadline}. + * + *

    Implements Β§3.3 of {@code docs/analysis/v2.2-target-architecture.md}. + * Constructed once by the request-edge logging slice after auth resolution, + * then bound to the Log4j2 {@link ThreadContext} for the lifetime of the + * request via {@link #bindToMdc()}. {@code EcsLayout} reads the ECS keys + * straight from the ThreadContext, so logs emitted anywhere on the request + * path carry trace / transaction / user / client metadata without the caller + * having to re-thread them. + * + *

    Thread propagation across {@code CompletableFuture} boundaries is handled + * by {@link ContextualExecutor} (Β§4.4). The {@code Deadline} is carried + * through the record but deliberately not persisted in the + * ThreadContext β€” it has no ECS key, and the next process / span would have + * no way to interpret a nanosecond timestamp from a different JVM. + * + * @param traceId ECS {@code trace.id} β€” from {@code ElasticApm.currentTransaction()}. + * May be {@code null} before the APM transaction is created. + * @param transactionId ECS {@code transaction.id} β€” APM transaction ID; {@code null} + * if the APM agent is not attached. + * @param spanId ECS {@code span.id} β€” current span; {@code null} if none. + * @param httpRequestId ECS {@code http.request.id} β€” unique per HTTP request + * (X-Request-ID header, else a server-generated UUID). + * @param userName ECS {@code user.name} β€” always present; {@code "anonymous"} + * when the request is unauthenticated. Never {@code null}. + * @param clientIp ECS {@code client.ip} β€” resolved via + * XFF β†’ X-Real-IP β†’ remote-addr. May be {@code null}. + * @param userAgent ECS {@code user_agent.original} β€” raw User-Agent header. + * May be {@code null}. + * @param repoName ECS {@code repository.name} β€” Pantera-specific field + * naming the target repository (e.g. {@code "npm_group"}). + * @param repoType ECS {@code repository.type} β€” one of the adapter type + * tokens (e.g. {@code "maven"}, {@code "npm"}). Nullable + * for requests not yet resolved to a repo. + * @param artifact ECS {@code package.name} + {@code package.version} via + * {@link ArtifactRef}. Use {@link ArtifactRef#EMPTY} for + * metadata / index requests that carry no specific + * artifact. Must never be {@code null}. + * @param urlOriginal ECS {@code url.original} β€” the URL as the client sent it. + * @param urlPath ECS {@code url.path} β€” path component only (no query). + * May be {@code null}. + * @param deadline wall-clock deadline propagated across the request; + * never emitted to ECS. Must not be {@code null}. + * @since 2.2.0 + */ +@SuppressWarnings("PMD.TooManyMethods") +public record RequestContext( + String traceId, + String transactionId, + String spanId, + String httpRequestId, + String userName, + String clientIp, + String userAgent, + String repoName, + String repoType, + ArtifactRef artifact, + String urlOriginal, + String urlPath, + Deadline deadline +) { + + // ECS key constants β€” grouped here so both bindToMdc() and fromMdc() + // reference a single source of truth. These match the keys EcsLayout emits. + + /** ECS key: {@code trace.id}. */ + public static final String KEY_TRACE_ID = "trace.id"; + /** ECS key: {@code transaction.id}. */ + public static final String KEY_TRANSACTION_ID = "transaction.id"; + /** ECS key: {@code span.id}. */ + public static final String KEY_SPAN_ID = "span.id"; + /** ECS key: {@code http.request.id}. */ + public static final String KEY_HTTP_REQUEST_ID = "http.request.id"; + /** ECS key: {@code user.name}. */ + public static final String KEY_USER_NAME = "user.name"; + /** ECS key: {@code client.ip}. */ + public static final String KEY_CLIENT_IP = "client.ip"; + /** ECS key: {@code user_agent.original}. */ + public static final String KEY_USER_AGENT = "user_agent.original"; + /** ECS key: {@code repository.name}. */ + public static final String KEY_REPO_NAME = "repository.name"; + /** ECS key: {@code repository.type}. */ + public static final String KEY_REPO_TYPE = "repository.type"; + /** ECS key: {@code package.name}. */ + public static final String KEY_PACKAGE_NAME = "package.name"; + /** ECS key: {@code package.version}. */ + public static final String KEY_PACKAGE_VERSION = "package.version"; + /** ECS key: {@code url.original}. */ + public static final String KEY_URL_ORIGINAL = "url.original"; + /** ECS key: {@code url.path}. */ + public static final String KEY_URL_PATH = "url.path"; + + /** Default deadline applied by {@link #minimal(String, String, String, String)}. */ + private static final Duration DEFAULT_BUDGET = Duration.ofSeconds(30); + + /** + * Backward-compatible 4-arg constructor retained so production call-sites + * that were written against the WI-01 scaffold continue to compile + * (e.g. {@code maven-adapter}, {@code pypi-adapter}, {@code go-adapter}, + * {@code composer-adapter} cached-proxy slices, and tests in this module). + * + *

    Delegates to the canonical 13-arg constructor via + * {@link #minimal(String, String, String, String)} β€” sets + * {@code userName="anonymous"}, empty {@link ArtifactRef}, default 30s + * deadline, and {@code null} for every other optional field. + * + * @param traceId ECS {@code trace.id}, may be {@code null} + * @param httpRequestId ECS {@code http.request.id}, may be {@code null} + * @param repoName ECS {@code repository.name} + * @param urlOriginal ECS {@code url.original} + */ + public RequestContext( + final String traceId, final String httpRequestId, + final String repoName, final String urlOriginal + ) { + this( + traceId, null, null, httpRequestId, + "anonymous", null, null, + repoName, null, ArtifactRef.EMPTY, + urlOriginal, null, + Deadline.in(DEFAULT_BUDGET) + ); + } + + /** + * Factory producing a context with safe defaults for optional fields: + * {@code userName="anonymous"}, {@link ArtifactRef#EMPTY}, + * {@code Deadline.in(30 s)}, {@code null} for every other nullable field. + * + *

    Used at the request edge when only the bare minimum ({@code trace.id}, + * {@code http.request.id}, {@code repository.name}, {@code url.original}) + * is known β€” subsequent layers enrich via {@link #withRepo(String, String, ArtifactRef)}. + * + * @param traceId ECS {@code trace.id}, may be {@code null} + * @param httpRequestId ECS {@code http.request.id}, may be {@code null} + * @param repoName ECS {@code repository.name}, may be {@code null} + * @param urlOriginal ECS {@code url.original}, may be {@code null} + * @return a new, non-null {@link RequestContext} + */ + public static RequestContext minimal( + final String traceId, final String httpRequestId, + final String repoName, final String urlOriginal + ) { + return new RequestContext( + traceId, null, null, httpRequestId, + "anonymous", null, null, + repoName, null, ArtifactRef.EMPTY, + urlOriginal, null, + Deadline.in(DEFAULT_BUDGET) + ); + } + + /** + * Produce a copy with the repository identity and artifact reference + * updated; every other field is preserved verbatim. + * + *

    Called after the group resolver has identified the target member + + * the artifact name parser has extracted the package identity from the URL. + * + * @param newRepoName ECS {@code repository.name} for the enriched context + * @param newRepoType ECS {@code repository.type} + * @param newArtifact {@link ArtifactRef} carrying {@code package.name} + * and {@code package.version}; never {@code null} + * ({@link ArtifactRef#EMPTY} for metadata requests) + * @return a new {@link RequestContext} instance + */ + public RequestContext withRepo( + final String newRepoName, final String newRepoType, + final ArtifactRef newArtifact + ) { + return new RequestContext( + this.traceId, this.transactionId, this.spanId, this.httpRequestId, + this.userName, this.clientIp, this.userAgent, + newRepoName, newRepoType, newArtifact == null ? ArtifactRef.EMPTY : newArtifact, + this.urlOriginal, this.urlPath, + this.deadline + ); + } + + /** + * Push every non-null ECS field into the Log4j2 {@link ThreadContext} + * and return an {@link AutoCloseable} that restores the prior ThreadContext + * on close. + * + *

    Use in a try-with-resources at the request edge: + *

    {@code
    +     *   try (AutoCloseable bound = ctx.bindToMdc()) {
    +     *       slice.response(...)
    +     *            .thenAccept(...);
    +     *   }
    +     * }
    + * + *

    Contract: + *

      + *
    • Only non-null fields are pushed β€” {@code null} maps to "no key" + * (never {@code put(key, null)}), so missing fields don't show up + * as empty strings in ECS logs. + *
    • Prior ThreadContext state is captured on entry and restored on + * close. Idempotent: double-close is a no-op. + *
    • The {@link Deadline} is not bound (it has no ECS key). + *
    + * + * @return an {@link AutoCloseable} whose {@code close()} restores the + * ThreadContext snapshot taken on bind + */ + public AutoCloseable bindToMdc() { + final Map prior = ThreadContext.getImmutableContext(); + putIfNotNull(KEY_TRACE_ID, this.traceId); + putIfNotNull(KEY_TRANSACTION_ID, this.transactionId); + putIfNotNull(KEY_SPAN_ID, this.spanId); + putIfNotNull(KEY_HTTP_REQUEST_ID, this.httpRequestId); + putIfNotNull(KEY_USER_NAME, this.userName); + putIfNotNull(KEY_CLIENT_IP, this.clientIp); + putIfNotNull(KEY_USER_AGENT, this.userAgent); + putIfNotNull(KEY_REPO_NAME, this.repoName); + putIfNotNull(KEY_REPO_TYPE, this.repoType); + if (this.artifact != null && !this.artifact.isEmpty()) { + putIfNotNull(KEY_PACKAGE_NAME, this.artifact.name()); + putIfNotNull(KEY_PACKAGE_VERSION, this.artifact.version()); + } + putIfNotNull(KEY_URL_ORIGINAL, this.urlOriginal); + putIfNotNull(KEY_URL_PATH, this.urlPath); + return new MdcRestore(prior); + } + + /** + * Rebuild a {@link RequestContext} from the current Log4j2 {@link ThreadContext}. + * + *

    Used on thread hops before {@link ContextualExecutor} is in place, or + * in logger utilities that need the current ECS state without threading + * the record through every method signature. Missing keys become + * {@code null} (never throw). The {@link Deadline} is lossy β€” ThreadContext + * stores no expiry value β€” so a fresh {@code Deadline.in(30 s)} is + * synthesised as a conservative default. + * + * @return a new {@link RequestContext} populated from the current + * ThreadContext; never {@code null} + */ + public static RequestContext fromMdc() { + final String pkgName = ThreadContext.get(KEY_PACKAGE_NAME); + final String pkgVersion = ThreadContext.get(KEY_PACKAGE_VERSION); + final ArtifactRef art; + if (pkgName == null || pkgName.isEmpty()) { + art = ArtifactRef.EMPTY; + } else { + art = new ArtifactRef(pkgName, pkgVersion == null ? "" : pkgVersion); + } + return new RequestContext( + ThreadContext.get(KEY_TRACE_ID), + ThreadContext.get(KEY_TRANSACTION_ID), + ThreadContext.get(KEY_SPAN_ID), + ThreadContext.get(KEY_HTTP_REQUEST_ID), + ThreadContext.get(KEY_USER_NAME), + ThreadContext.get(KEY_CLIENT_IP), + ThreadContext.get(KEY_USER_AGENT), + ThreadContext.get(KEY_REPO_NAME), + ThreadContext.get(KEY_REPO_TYPE), + art, + ThreadContext.get(KEY_URL_ORIGINAL), + ThreadContext.get(KEY_URL_PATH), + Deadline.in(DEFAULT_BUDGET) + ); + } + + /** Small helper β€” skip {@link ThreadContext#put} when {@code value} is null. */ + private static void putIfNotNull(final String key, final String value) { + if (value != null) { + ThreadContext.put(key, value); + } + } + + /** + * Package identity within a request. {@link #EMPTY} signals + * "no specific package" β€” used for metadata / index requests + * ({@code /-/package/...}, {@code /maven-metadata.xml}, etc). + * + * @param name ECS {@code package.name}; {@code ""} for empty + * @param version ECS {@code package.version}; {@code ""} for empty / metadata + */ + public record ArtifactRef(String name, String version) { + + /** Sentinel for "no artifact resolved yet" / metadata requests. */ + public static final ArtifactRef EMPTY = new ArtifactRef("", ""); + + /** @return {@code true} if this is {@link #EMPTY} (name is empty). */ + public boolean isEmpty() { + return this.name.isEmpty(); + } + } + + /** + * AutoCloseable handle returned by {@link #bindToMdc()}. Restores the + * ThreadContext snapshot taken at bind time on {@link #close()}. + * Idempotent β€” double-close is a no-op. + */ + private static final class MdcRestore implements AutoCloseable { + + private final Map prior; + private boolean closed; + + private MdcRestore(final Map priorCtx) { + // Defensive copy β€” the immutable map returned by + // ThreadContext.getImmutableContext() is safe, but we copy anyway + // to avoid holding a reference into a concurrent impl. + this.prior = new HashMap<>(priorCtx); + this.closed = false; + } + + @Override + public void close() { + if (this.closed) { + return; + } + this.closed = true; + ThreadContext.clearMap(); + if (!this.prior.isEmpty()) { + ThreadContext.putAll(this.prior); + } + } + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/fault/Fault.java b/pantera-core/src/main/java/com/auto1/pantera/http/fault/Fault.java new file mode 100644 index 000000000..8b56421d6 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/fault/Fault.java @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import com.auto1.pantera.http.Response; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; + +/** + * Sealed fault taxonomy for Pantera request paths. + * + *

    Every slice that can signal a problem does so by returning a {@link Result.Err} + * carrying one of these variants instead of throwing. A single + * {@link FaultTranslator} owns the HTTP-status policy β€” see Β§9 of + * {@code docs/analysis/v2.2-target-architecture.md}. + * + *

    Adding a new variant here is a deliberately breaking change: every exhaustive + * {@code switch} on {@code Fault} must be updated. See + * {@code FaultTranslatorTest#exhaustiveSwitchCompilesForEveryVariant} for the + * compile-time guard. + * + * @since 2.2.0 + */ +public sealed interface Fault { + + /** 404: artifact does not exist in this scope. */ + record NotFound(String scope, String artifact, String version) implements Fault { + } + + /** 403: cooldown, auth rejected, or explicit block. */ + record Forbidden(String reason) implements Fault { + } + + /** 500: index/DB unavailable (timeout, connection failure, statement timeout). */ + record IndexUnavailable(Throwable cause, String query) implements Fault { + } + + /** 500: storage read failed (IO error, ValueNotFoundException on sidecar, etc). */ + record StorageUnavailable(Throwable cause, String key) implements Fault { + } + + /** + * No 2xx winner across proxy members. Carries the outcomes AND the winning + * proxy {@link Response} (if any proxy produced one) so {@link FaultTranslator} + * can stream it verbatim to the client. When no member produced a Response + * at all (all threw / cancelled / timed out), {@code winningResponse} is empty + * and {@code FaultTranslator} synthesizes a 502. + * + * @param group Group repository name. + * @param outcomes Full list of member outcomes β€” always populated, + * one entry per attempted member. + * @param winningResponse The {@link ProxyFailure} chosen by + * {@link FaultTranslator#pickWinningFailure(List)}, or + * {@link Optional#empty()} if every member + * threw / was cancelled / timed out. + */ + record AllProxiesFailed( + String group, + List outcomes, + Optional winningResponse + ) implements Fault { + + /** + * A member that produced an HTTP response but not a 2xx success. The + * {@code response} is held so the translator can stream its status, headers, + * and body verbatim. + * + * @param memberName Declaration-order name of the member. + * @param response Upstream response (any non-2xx status). + */ + public record ProxyFailure(String memberName, Response response) { + } + } + + /** 500: programming error, NPE, queue overflow, classifier default. */ + record Internal(Throwable cause, String where) implements Fault { + } + + /** 504: end-to-end deadline exceeded. */ + record Deadline(Duration budget, String where) implements Fault { + } + + /** 503: bulkhead / rate limiter rejected. Carries suggested retry-after. */ + record Overload(String resource, Duration retryAfter) implements Fault { + } + + /** + * 502: upstream-claimed checksum disagrees with bytes Pantera just received. + * See Β§9.5 of the target architecture doc β€” the proxy cache writer rejects + * a primary/sidecar pair whose digest does not match the sidecar claim. + * + * @param upstreamUri URI of the primary artifact that failed verification. + * @param algo Checksum algorithm whose sidecar disagreed. + * @param sidecarClaim Hex-encoded digest declared by the sidecar. + * @param computed Hex-encoded digest Pantera computed over the streamed bytes. + */ + record UpstreamIntegrity( + String upstreamUri, + ChecksumAlgo algo, + String sidecarClaim, + String computed + ) implements Fault { + } + + /** + * Per-member outcome in a proxy fanout. Used by + * {@link AllProxiesFailed#outcomes()} so the translator and the audit log + * can reason about exactly what happened at each member. + * + *

    {@code response} is present when the member produced an HTTP response + * (kind in {@code OK}, {@code NOT_FOUND}, {@code FIVE_XX}); empty when the + * member threw / was cancelled / was skipped due to circuit-breaker. + * {@link FaultTranslator#pickWinningFailure(List)} reads this field to + * choose the best response to pass through. + * + * @param member Member repository name (declaration order). + * @param kind Outcome classification. + * @param cause Throwable if the outcome was {@code EXCEPTION}, else {@code null}. + * @param response Upstream response if the member produced one, else empty. + */ + record MemberOutcome(String member, Kind kind, Throwable cause, Optional response) { + + /** + * Convenience factory for outcomes with no response (exception / cancelled / + * circuit-open). + * + * @param member Member name. + * @param kind Outcome kind. + * @param cause Underlying throwable, may be {@code null}. + * @return A MemberOutcome with {@link Optional#empty()} response. + */ + public static MemberOutcome threw(final String member, final Kind kind, final Throwable cause) { + return new MemberOutcome(member, kind, cause, Optional.empty()); + } + + /** + * Convenience factory for outcomes with an HTTP response (2xx / 4xx / 5xx). + * + * @param member Member name. + * @param kind Outcome kind. + * @param response Upstream response. + * @return A MemberOutcome with the response attached and no cause. + */ + public static MemberOutcome responded(final String member, final Kind kind, final Response response) { + return new MemberOutcome(member, kind, null, Optional.of(response)); + } + + /** Outcome classification for a single proxy member. */ + public enum Kind { + /** Member returned 2xx β€” included for completeness, not used in AllProxiesFailed construction. */ + OK, + /** Member returned 404. */ + NOT_FOUND, + /** Member returned 5xx. */ + FIVE_XX, + /** Member threw (timeout, IOException, ConnectException, etc). */ + EXCEPTION, + /** Member was cancelled (race winner already found elsewhere). */ + CANCELLED, + /** Circuit breaker was open for this member at dispatch time. */ + CIRCUIT_OPEN + } + } + + /** + * Supported checksum algorithms for proxy-cache integrity verification. + * See Β§9.5. + */ + enum ChecksumAlgo { + MD5, + SHA1, + SHA256, + SHA512 + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/fault/FaultClassifier.java b/pantera-core/src/main/java/com/auto1/pantera/http/fault/FaultClassifier.java new file mode 100644 index 000000000..a0f05ff46 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/fault/FaultClassifier.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import com.auto1.pantera.asto.ValueNotFoundException; + +import java.io.IOException; +import java.net.ConnectException; +import java.time.Duration; +import java.util.concurrent.CompletionException; +import java.util.concurrent.TimeoutException; + +/** + * Translates a {@link Throwable} that escaped a slice into a {@link Fault} variant. + * + *

    Used exclusively by {@code .exceptionally(...)} handlers as a last line of + * defence β€” normal fault signalling is via {@link Result.Err}. See Β§9 of + * {@code docs/analysis/v2.2-target-architecture.md}. + * + *

    {@link CompletionException} wrappers are unwrapped before classification so + * that an {@link IOException} nested inside a completion-stage pipeline still + * lands on the {@link Fault.Internal} branch, not the default branch. + * + * @since 2.2.0 + */ +public final class FaultClassifier { + + /** Retry hint attached to queue-full {@link Fault.Overload} faults. */ + private static final Duration QUEUE_FULL_RETRY_AFTER = Duration.ofSeconds(1); + + /** Sentinel budget used when we have no real timeout to attach. */ + private static final Duration UNKNOWN_BUDGET = Duration.ZERO; + + private FaultClassifier() { + } + + /** + * Classify a throwable into a {@link Fault} variant. + * + * @param throwable The throwable caught on a request path. May be a + * {@link CompletionException} wrapping the real cause. + * @param where A short, stable label identifying the call site that + * caught the throwable (e.g. {@code "proxy-fanout"}, + * {@code "storage-read"}). Used for debugging and + * attached to the resulting {@link Fault}. + * @return The corresponding {@link Fault}. Never null. + */ + public static Fault classify(final Throwable throwable, final String where) { + final Throwable cause = unwrap(throwable); + if (cause instanceof TimeoutException) { + return new Fault.Deadline(UNKNOWN_BUDGET, where); + } + if (cause instanceof ValueNotFoundException vnf) { + return new Fault.StorageUnavailable(vnf, vnf.getMessage()); + } + if (cause instanceof ConnectException conn) { + return new Fault.Internal(conn, where); + } + if (cause instanceof IOException ioe) { + return new Fault.Internal(ioe, where); + } + if (cause instanceof IllegalStateException ise + && "Queue full".equals(ise.getMessage())) { + return new Fault.Overload("event-queue", QUEUE_FULL_RETRY_AFTER); + } + return new Fault.Internal(cause, where); + } + + /** + * Unwrap {@link CompletionException} layers so the real cause is visible + * to {@link #classify}. + * + * @param throwable Incoming throwable. If {@code null}, the same is returned + * (the caller is responsible for handling that case). + * @return The innermost non-{@link CompletionException} cause, or the input + * itself if no unwrapping was needed. + */ + static Throwable unwrap(final Throwable throwable) { + Throwable current = throwable; + while (current instanceof CompletionException && current.getCause() != null + && current.getCause() != current) { + current = current.getCause(); + } + return current; + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/fault/FaultTranslator.java b/pantera-core/src/main/java/com/auto1/pantera/http/fault/FaultTranslator.java new file mode 100644 index 000000000..f2c5f13ba --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/fault/FaultTranslator.java @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.context.RequestContext; + +import java.util.Comparator; +import java.util.List; +import java.util.Optional; + +/** + * Single decision point for "what HTTP status + headers + body does this + * {@link Fault} produce". See Β§9 of + * {@code docs/analysis/v2.2-target-architecture.md}. + * + *

    Policy, codified: + *

      + *
    • {@link Fault.NotFound} β€” 404
    • + *
    • {@link Fault.Forbidden} β€” 403
    • + *
    • {@link Fault.IndexUnavailable} β€” 500 with {@code X-Pantera-Fault: index-unavailable}
    • + *
    • {@link Fault.StorageUnavailable} β€” 500 with {@code X-Pantera-Fault: storage-unavailable}
    • + *
    • {@link Fault.Internal} β€” 500 with {@code X-Pantera-Fault: internal}
    • + *
    • {@link Fault.Deadline} β€” 504 with {@code X-Pantera-Fault: deadline-exceeded}
    • + *
    • {@link Fault.Overload} β€” 503 + {@code Retry-After} + {@code X-Pantera-Fault: overload:<resource>}
    • + *
    • {@link Fault.AllProxiesFailed} β€” pass-through of the winning proxy response, or synthetic 502
    • + *
    • {@link Fault.UpstreamIntegrity} β€” 502 with {@code X-Pantera-Fault: upstream-integrity:<algo>}
    • + *
    + * + * @since 2.2.0 + */ +public final class FaultTranslator { + + /** Response header used to tag every translated fault with a stable identifier. */ + public static final String HEADER_FAULT = "X-Pantera-Fault"; + + /** Response header emitted on AllProxiesFailed pass-through with the fanout size. */ + public static final String HEADER_PROXIES_TRIED = "X-Pantera-Proxies-Tried"; + + /** Tag value for index-unavailable faults. */ + static final String TAG_INDEX = "index-unavailable"; + + /** Tag value for storage-unavailable faults. */ + static final String TAG_STORAGE = "storage-unavailable"; + + /** Tag value for generic internal faults. */ + static final String TAG_INTERNAL = "internal"; + + /** Tag value for deadline-exceeded faults. */ + static final String TAG_DEADLINE = "deadline-exceeded"; + + /** Prefix for overload fault tags; {@code resource} is appended verbatim. */ + static final String TAG_OVERLOAD_PREFIX = "overload:"; + + /** Prefix for proxy-failed fault tags; member name is appended. */ + static final String TAG_PROXIES_FAILED_PREFIX = "proxies-failed:"; + + /** Tag value for the "nobody responded" synthesized 502. */ + static final String TAG_PROXIES_NONE_RESPONDED = "proxies-failed:none-responded"; + + /** Prefix for upstream-integrity fault tags; {@code algo} is appended. */ + static final String TAG_UPSTREAM_INTEGRITY_PREFIX = "upstream-integrity:"; + + /** + * Ranking table for retryable 5xx statuses. Index 0 is the most preferred; + * higher indices are worse. Unlisted statuses fall into a catch-all tier + * that ranks after every listed status but before non-5xx responses. + */ + private static final List RETRYABILITY_ORDER = List.of(503, 504, 502, 500); + + /** Rank assigned to unlisted 5xx statuses. */ + private static final int RANK_OTHER_5XX = RETRYABILITY_ORDER.size(); + + /** Rank assigned to any non-5xx response that made it to the winner pool. */ + private static final int RANK_NON_5XX = RETRYABILITY_ORDER.size() + 1; + + private FaultTranslator() { + } + + /** + * Translate a {@link Fault} into the outbound {@link Response} a client will + * see. + * + *

    Exhaustive switch β€” adding a new {@link Fault} variant is a compile + * error here until the new case is handled. + * + * @param fault The fault to translate. Never null. + * @param ctx Per-request context. Currently unused by this method but + * passed through so later WIs can attach {@code trace.id} + * headers and correlated body fields without breaking the API. + * @return The outbound response. + */ + @SuppressWarnings({"PMD.UnusedFormalParameter", "PMD.CyclomaticComplexity"}) + public static Response translate(final Fault fault, final RequestContext ctx) { + return switch (fault) { + case Fault.NotFound nf -> ResponseBuilder.notFound().build(); + case Fault.Forbidden fb -> ResponseBuilder.forbidden() + .textBody(fb.reason()) + .build(); + case Fault.IndexUnavailable iu -> internalWithTag(TAG_INDEX); + case Fault.StorageUnavailable su -> internalWithTag(TAG_STORAGE); + case Fault.Internal i -> internalWithTag(TAG_INTERNAL); + case Fault.Deadline d -> ResponseBuilder.gatewayTimeout() + .header(HEADER_FAULT, TAG_DEADLINE) + .build(); + case Fault.Overload ov -> ResponseBuilder.from(RsStatus.SERVICE_UNAVAILABLE) + .header("Retry-After", Long.toString(ov.retryAfter().toSeconds())) + .header(HEADER_FAULT, TAG_OVERLOAD_PREFIX + ov.resource()) + .build(); + case Fault.AllProxiesFailed apf -> passThroughProxy(apf); + case Fault.UpstreamIntegrity ui -> ResponseBuilder.badGateway() + .header(HEADER_FAULT, TAG_UPSTREAM_INTEGRITY_PREFIX + ui.algo().name()) + .build(); + }; + } + + /** + * Pick the "best" proxy response to pass through when all members failed. + * Ranking follows Β§2 of the target architecture doc: + * + *

      + *
    1. Retryability β€” {@code 503 > 504 > 502 > 500 > other 5xx}. + * Clients retry transient statuses; we prefer the more-retryable + * answer so downstream callers do the right thing.
    2. + *
    3. Has body β€” non-empty body wins over an empty one + * (diagnostic information).
    4. + *
    5. Declaration order β€” earliest-declared member wins among + * ties (deterministic).
    6. + *
    + * + *

    Members with {@link Fault.MemberOutcome.Kind#EXCEPTION}, + * {@link Fault.MemberOutcome.Kind#CANCELLED}, or + * {@link Fault.MemberOutcome.Kind#CIRCUIT_OPEN} contribute no Response and + * are ignored. + * + * @param outcomes Full list of member outcomes, in declaration order. + * @return The chosen {@link Fault.AllProxiesFailed.ProxyFailure}, or + * {@link Optional#empty()} if no member produced a response. + */ + public static Optional + pickWinningFailure(final List outcomes) { + Fault.MemberOutcome best = null; + int bestIndex = -1; + for (int idx = 0; idx < outcomes.size(); idx++) { + final Fault.MemberOutcome candidate = outcomes.get(idx); + if (candidate.response().isEmpty()) { + continue; + } + if (best == null || compareOutcomes(candidate, idx, best, bestIndex) < 0) { + best = candidate; + bestIndex = idx; + } + } + if (best == null) { + return Optional.empty(); + } + return Optional.of( + new Fault.AllProxiesFailed.ProxyFailure(best.member(), best.response().orElseThrow()) + ); + } + + /** + * Compare two candidate outcomes that both carry a {@link Response}. Returns + * a negative number if {@code a} is better, positive if {@code b} is better, + * 0 only when they are indistinguishable (should not occur because + * declaration-order breaks the final tie). + */ + private static int compareOutcomes( + final Fault.MemberOutcome a, final int aIdx, + final Fault.MemberOutcome b, final int bIdx + ) { + return Comparator + .comparingInt((Integer[] pair) -> pair[0]) // retryability rank, smaller is better + .thenComparingInt(pair -> pair[1]) // body rank, smaller is better + .thenComparingInt(pair -> pair[2]) // declaration index, smaller is better + .compare( + rankingKey(a, aIdx), + rankingKey(b, bIdx) + ); + } + + private static Integer[] rankingKey(final Fault.MemberOutcome outcome, final int idx) { + final Response resp = outcome.response().orElseThrow(); + return new Integer[] { + retryabilityRank(resp.status()), + resp.body().size().orElse(0L) > 0L ? 0 : 1, + idx + }; + } + + private static int retryabilityRank(final RsStatus status) { + if (!status.serverError()) { + return RANK_NON_5XX; + } + final int pos = RETRYABILITY_ORDER.indexOf(status.code()); + return pos >= 0 ? pos : RANK_OTHER_5XX; + } + + /** + * Build a 500 response tagged with {@code X-Pantera-Fault: <tag>}. + */ + private static Response internalWithTag(final String tag) { + return ResponseBuilder.internalError() + .header(HEADER_FAULT, tag) + .build(); + } + + /** + * Pass through the "best" proxy 5xx response verbatim. If no proxy produced + * a Response at all, synthesize a plain 502 β€” this is the only + * AllProxiesFailed path that invents a status code. + */ + private static Response passThroughProxy(final Fault.AllProxiesFailed apf) { + final String proxiesTried = Integer.toString(apf.outcomes().size()); + final Optional winning = apf.winningResponse(); + if (winning.isPresent()) { + final Fault.AllProxiesFailed.ProxyFailure pf = winning.orElseThrow(); + final Response upstream = pf.response(); + return ResponseBuilder.from(upstream.status()) + .headers(upstream.headers()) + .header(HEADER_FAULT, TAG_PROXIES_FAILED_PREFIX + pf.memberName()) + .header(HEADER_PROXIES_TRIED, proxiesTried) + .body(upstream.body()) + .build(); + } + return ResponseBuilder.badGateway() + .header(HEADER_FAULT, TAG_PROXIES_NONE_RESPONDED) + .header(HEADER_PROXIES_TRIED, proxiesTried) + .jsonBody("{\"error\":\"all upstream members failed\"}") + .build(); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/fault/Result.java b/pantera-core/src/main/java/com/auto1/pantera/http/fault/Result.java new file mode 100644 index 000000000..596ce325c --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/fault/Result.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import java.util.function.Function; + +/** + * Discriminated union of a successful value or a {@link Fault}. + * + *

    The slice contract is {@code CompletionStage>} β€” exceptions + * inside a slice body that escape to {@code .exceptionally(...)} are only + * converted to {@link Fault.Internal}; they are never the primary fault-signaling + * mechanism. See Β§3.2 of {@code docs/analysis/v2.2-target-architecture.md}. + * + * @param Success value type. + * @since 2.2.0 + */ +public sealed interface Result { + + /** + * Factory for a successful result. + * + * @param value Non-null success value. + * @param Success type. + * @return {@link Ok} wrapping {@code value}. + */ + static Result ok(final T value) { + return new Ok<>(value); + } + + /** + * Factory for a failed result. + * + * @param fault Non-null fault. + * @param Success type of the (never-produced) value. + * @return {@link Err} wrapping {@code fault}. + */ + static Result err(final Fault fault) { + return new Err<>(fault); + } + + /** + * Map the success value, short-circuiting on {@link Err}. + * + * @param fn Mapping function. Must not throw. + * @param New success type. + * @return A new {@link Result} with the mapped value, or the original {@link Err}. + */ + default Result map(final Function fn) { + return switch (this) { + case Ok ok -> Result.ok(fn.apply(ok.value())); + case Err err -> Result.err(err.fault()); + }; + } + + /** + * Chain another Result-producing computation, short-circuiting on {@link Err}. + * + * @param fn Mapping function that returns another {@link Result}. + * @param New success type. + * @return The mapped Result, or the original {@link Err} unchanged. + */ + default Result flatMap(final Function> fn) { + return switch (this) { + case Ok ok -> fn.apply(ok.value()); + case Err err -> Result.err(err.fault()); + }; + } + + /** + * Successful result. + * + * @param value Success value. + * @param Success type. + */ + record Ok(T value) implements Result { + } + + /** + * Failed result carrying a {@link Fault}. + * + * @param fault Fault description. + * @param Success type of the (never-produced) value. + */ + record Err(Fault fault) implements Result { + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/group/RaceSlice.java b/pantera-core/src/main/java/com/auto1/pantera/http/group/RaceSlice.java index 3b6558007..3c27da721 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/group/RaceSlice.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/group/RaceSlice.java @@ -31,7 +31,7 @@ *

    This is a low-level utility for "first response wins" patterns β€” * NOT a group repository resolver. For group/virtual repository resolution * with index lookup, member flattening, and negative caching, see - * {@link com.auto1.pantera.group.GroupSlice} in pantera-main. + * {@link com.auto1.pantera.group.GroupResolver} in pantera-main. */ public final class RaceSlice implements Slice { diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/log/EcsLogEvent.java b/pantera-core/src/main/java/com/auto1/pantera/http/log/EcsLogEvent.java index 52bcce7b9..2b38ec681 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/log/EcsLogEvent.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/log/EcsLogEvent.java @@ -13,6 +13,7 @@ import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.RsStatus; import com.auto1.pantera.http.headers.Header; +import com.auto1.pantera.http.observability.UserAgentParser; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.ThreadContext; import org.apache.logging.log4j.message.MapMessage; @@ -138,22 +139,24 @@ public EcsLogEvent userAgent(final Headers headers) { if (original != null && !original.isEmpty()) { fields.put("user_agent.original", original); - // Parse user agent (basic parsing - can be enhanced with ua-parser library) - final UserAgentInfo info = parseUserAgent(original); - if (info.name != null) { - fields.put("user_agent.name", info.name); + // Delegates to UserAgentParser (WI-post-03b re-lifted the parser + // into pantera-core.observability so StructuredLogger.access + // can reuse the same shape without coupling back to this class). + final UserAgentParser.UserAgentInfo info = UserAgentParser.parse(original); + if (info.name() != null) { + fields.put("user_agent.name", info.name()); } - if (info.version != null) { - fields.put("user_agent.version", info.version); + if (info.version() != null) { + fields.put("user_agent.version", info.version()); } - if (info.osName != null) { - fields.put("user_agent.os.name", info.osName); - if (info.osVersion != null) { - fields.put("user_agent.os.version", info.osVersion); + if (info.osName() != null) { + fields.put("user_agent.os.name", info.osName()); + if (info.osVersion() != null) { + fields.put("user_agent.os.version", info.osVersion()); } } - if (info.deviceName != null) { - fields.put("user_agent.device.name", info.deviceName); + if (info.deviceName() != null) { + fields.put("user_agent.device.name", info.deviceName()); } } break; @@ -271,11 +274,17 @@ public EcsLogEvent error(final Throwable error) { * field in the Elasticsearch document. When ThreadContext does not have that * key, the field value is kept so it still reaches the JSON output. * - *

    Strategy to reduce log volume: + *

    Strategy to reduce log volume (v2.1.4 WI-00): *

      - *
    • ERROR (>= 500): Always log at ERROR level
    • - *
    • WARN (>= 400 or slow >5s): Log at WARN level
    • - *
    • SUCCESS (< 400): Log at DEBUG level (production: disabled)
    • + *
    • ERROR ({@code >= 500}): ERROR level
    • + *
    • 404 / 401 / 403 (client-driven): INFO β€” these are normal client probes + * (Maven HEAD probes, unauthenticated health-checks, per-client auth + * retries) and were responsible for ~95% of the access-log WARN noise + * in production (forensic Β§1.7 F2.1–F2.2).
    • + *
    • Other 4xx ({@code 400-499} except 401/403/404): WARN
    • + *
    • Slow request ({@code durationMs > 5000}): WARN
    • + *
    • {@code failureOutcome == true}: WARN
    • + *
    • default: DEBUG (production: disabled)
    • *
    */ public void log() { @@ -318,6 +327,12 @@ public void log() { if (statusCode != null && statusCode >= 500) { LOGGER.error(mapMessage); + } else if (statusCode != null + && (statusCode == 404 || statusCode == 401 || statusCode == 403)) { + // Client-driven 4xx are normal probes (Maven HEAD, unauthenticated + // health checks, auth retries). Emit at INFO to collapse the 95% + // log-WARN flood observed in production (Β§1.7 F2.1–F2.2). + LOGGER.info(mapMessage); } else if (statusCode != null && statusCode >= 400) { LOGGER.warn(mapMessage); } else if (durationMs > SLOW_REQUEST_THRESHOLD_MS) { @@ -426,99 +441,4 @@ public static Optional extractUsername(final Headers headers) { return Optional.empty(); } - /** - * Parse user agent string into ECS components. - */ - private static UserAgentInfo parseUserAgent(final String ua) { - final UserAgentInfo info = new UserAgentInfo(); - - if (ua == null || ua.isEmpty()) { - return info; - } - - if (ua.startsWith("Maven/")) { - info.name = "Maven"; - extractVersion(ua, "Maven/", info); - } else if (ua.startsWith("npm/")) { - info.name = "npm"; - extractVersion(ua, "npm/", info); - } else if (ua.startsWith("pip/")) { - info.name = "pip"; - extractVersion(ua, "pip/", info); - } else if (ua.contains("Docker-Client/")) { - info.name = "Docker"; - extractVersion(ua, "Docker-Client/", info); - } else if (ua.startsWith("Go-http-client/")) { - info.name = "Go"; - extractVersion(ua, "Go-http-client/", info); - } else if (ua.startsWith("Gradle/")) { - info.name = "Gradle"; - extractVersion(ua, "Gradle/", info); - } else if (ua.contains("Composer/")) { - info.name = "Composer"; - extractVersion(ua, "Composer/", info); - } else if (ua.startsWith("NuGet")) { - info.name = "NuGet"; - if (ua.contains("/")) { - extractVersion(ua, "NuGet Command Line/", info); - } - } else if (ua.contains("curl/")) { - info.name = "curl"; - extractVersion(ua, "curl/", info); - } else if (ua.contains("wget/")) { - info.name = "wget"; - extractVersion(ua, "wget/", info); - } - - if (ua.contains("Linux")) { - info.osName = "Linux"; - } else if (ua.contains("Windows")) { - info.osName = "Windows"; - } else if (ua.contains("Mac OS X") || ua.contains("Darwin")) { - info.osName = "macOS"; - } else if (ua.contains("FreeBSD")) { - info.osName = "FreeBSD"; - } - - if (ua.contains("Java/")) { - final int start = ua.indexOf("Java/") + 5; - final int end = findVersionEnd(ua, start); - if (end > start) { - info.osVersion = ua.substring(start, end); - } - } - - return info; - } - - private static void extractVersion(final String ua, final String prefix, final UserAgentInfo info) { - final int start = ua.indexOf(prefix); - if (start >= 0) { - final int versionStart = start + prefix.length(); - final int versionEnd = findVersionEnd(ua, versionStart); - if (versionEnd > versionStart) { - info.version = ua.substring(versionStart, versionEnd); - } - } - } - - private static int findVersionEnd(final String ua, final int start) { - int end = start; - while (end < ua.length()) { - final char c = ua.charAt(end); - if (c == ' ' || c == ';' || c == '(' || c == ')') { - break; - } - end++; - } - return end; - } - - private static final class UserAgentInfo { - String name; - String version; - String osName; - String osVersion; - String deviceName; - } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/misc/ConfigDefaults.java b/pantera-core/src/main/java/com/auto1/pantera/http/misc/ConfigDefaults.java index a765930be..76a0a59ad 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/misc/ConfigDefaults.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/misc/ConfigDefaults.java @@ -66,4 +66,28 @@ public static long getLong(final String envVar, final long defaultValue) { return defaultValue; } } + + /** + * Read a boolean configuration value. + * Accepts case-insensitive {@code "true"}, {@code "1"}, {@code "yes"} as true; + * case-insensitive {@code "false"}, {@code "0"}, {@code "no"} as false; + * empty / missing / unrecognized returns the fallback. + * @param envVar Environment variable name + * @param defaultValue Default value if not set or unrecognized + * @return Configured value or default + */ + public static boolean getBoolean(final String envVar, final boolean defaultValue) { + final String raw = get(envVar, ""); + if (raw == null || raw.isEmpty()) { + return defaultValue; + } + final String val = raw.trim().toLowerCase(java.util.Locale.ROOT); + if ("true".equals(val) || "1".equals(val) || "yes".equals(val)) { + return true; + } + if ("false".equals(val) || "0".equals(val) || "no".equals(val)) { + return false; + } + return defaultValue; + } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/misc/StorageExecutors.java b/pantera-core/src/main/java/com/auto1/pantera/http/misc/StorageExecutors.java index 37f553d15..5d5d2e164 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/misc/StorageExecutors.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/misc/StorageExecutors.java @@ -27,7 +27,7 @@ *
      *
    • READ: PANTERA_IO_READ_THREADS, default 4x CPUs
    • *
    • WRITE: PANTERA_IO_WRITE_THREADS, default 2x CPUs
    • - *
    • LIST: PANTERA_IO_LIST_THREADS, default 1x CPUs
    • + *
    • LIST: PANTERA_IO_LIST_THREADS, default 2x CPUs
    • *
    * * @since 1.20.13 @@ -62,7 +62,7 @@ public final class StorageExecutors { public static final ExecutorService LIST = Executors.newFixedThreadPool( ConfigDefaults.getInt( "PANTERA_IO_LIST_THREADS", - Runtime.getRuntime().availableProcessors() + Runtime.getRuntime().availableProcessors() * 2 ), namedThreadFactory("pantera-io-list-%d") ); diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/observability/LevelPolicy.java b/pantera-core/src/main/java/com/auto1/pantera/http/observability/LevelPolicy.java new file mode 100644 index 000000000..c89176171 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/observability/LevelPolicy.java @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import org.apache.logging.log4j.Level; + +/** + * Table-driven log-level policy for the five observability tiers β€” implements + * Β§4.2 of {@code docs/analysis/v2.2-target-architecture.md}. + * + *

    Every tier has a DEBUG hook for successful operations; WARN / INFO / ERROR + * map to specific failure modes. Having the mapping as a single enum means a + * reviewer can audit the full log-level policy in one file β€” no per-adapter + * drift, no per-call-site bespoke levels. + * + *

    Usage: + *

    {@code
    + *   logger.atLevel(LevelPolicy.CLIENT_FACING_NOT_FOUND.level()).log(...);
    + * }
    + * + *

    Changing the level of an existing entry is a release-gated decision (it + * changes log-volume and alert routing). Adding a new entry is a deliberate + * breaking change because the {@link StructuredLogger} tier builders may + * {@code switch} over these values. + * + * @since 2.2.0 + */ +public enum LevelPolicy { + + // ---- Tier-1 β€” client β†’ pantera (access log) ---- + + /** 2xx / 3xx response to client. DEBUG so production logs are quiet. */ + CLIENT_FACING_SUCCESS(Level.DEBUG), + + /** 404 Not Found β€” normal for metadata probes (Maven HEAD, npm audit, etc.). */ + CLIENT_FACING_NOT_FOUND(Level.INFO), + + /** 401 / 403 β€” normal for unauthenticated probes / per-client retries. */ + CLIENT_FACING_UNAUTH(Level.INFO), + + /** 400 and other 4xx β€” genuine client-side misuse. */ + CLIENT_FACING_4XX_OTHER(Level.WARN), + + /** 5xx β€” server-side problem, always actionable. */ + CLIENT_FACING_5XX(Level.ERROR), + + /** Request exceeded the slow threshold (default 5000 ms). */ + CLIENT_FACING_SLOW(Level.WARN), + + // ---- Tier-2 β€” pantera β†’ pantera (group β†’ member, slice β†’ slice) ---- + + /** 2xx returned by an internal callee. DEBUG β€” opt-in trace. */ + INTERNAL_CALL_SUCCESS(Level.DEBUG), + + /** 404 from an internal callee (hosted member). DEBUG β€” normal on fanout. */ + INTERNAL_CALL_NOT_FOUND(Level.DEBUG), + + /** + * 500 from an internal callee. ERROR with Fault cause, stack trace, and + * parent {@code trace.id} β€” this is the primary "internal-chain failed" + * signal. + */ + INTERNAL_CALL_500(Level.ERROR), + + // ---- Tier-3 β€” pantera β†’ upstream (HTTP call to npmjs.org / central / etc) ---- + + /** 2xx returned by an upstream remote. DEBUG β€” opt-in trace. */ + UPSTREAM_SUCCESS(Level.DEBUG), + + /** 404 from an upstream remote. DEBUG β€” normal during proxy fanouts. */ + UPSTREAM_NOT_FOUND(Level.DEBUG), + + /** + * 5xx or connection exception from an upstream remote. ERROR with + * destination + duration + cause. + */ + UPSTREAM_5XX(Level.ERROR), + + // ---- Tier-4 β€” local operations (DB, Quartz, Caffeine, Valkey, storage, pool init) ---- + + /** Config load, pool init, service start β€” operator-visible lifecycle events. */ + LOCAL_CONFIG_CHANGE(Level.INFO), + + /** Routine local operation succeeded. DEBUG β€” enable per-component for investigation. */ + LOCAL_OP_SUCCESS(Level.DEBUG), + + /** Fallback, rate-shed, retry, queue-near-full β€” system degraded but serving. */ + LOCAL_DEGRADED(Level.WARN), + + /** Local operation failed. ERROR with cause (required). */ + LOCAL_FAILURE(Level.ERROR), + + // ---- Tier-5 β€” audit (always emitted, routed to the audit dataset) ---- + + /** + * Compliance audit event β€” {@code ARTIFACT_PUBLISH} / {@code ARTIFACT_DOWNLOAD} / + * {@code ARTIFACT_DELETE} / {@code RESOLUTION}. INFO level, but the audit logger + * is configured to NEVER be suppressed regardless of operational log-level settings. + */ + AUDIT_EVENT(Level.INFO); + + /** Log4j2 level the policy maps to. */ + private final Level level; + + LevelPolicy(final Level assigned) { + this.level = assigned; + } + + /** + * @return the Log4j2 {@link Level} this policy entry maps to. Callers should + * use this value with {@code logger.atLevel(level)} or an equivalent + * dispatcher switch. + */ + public Level level() { + return this.level; + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/observability/StructuredLogger.java b/pantera-core/src/main/java/com/auto1/pantera/http/observability/StructuredLogger.java new file mode 100644 index 000000000..86c401746 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/observability/StructuredLogger.java @@ -0,0 +1,886 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import com.auto1.pantera.audit.AuditAction; +import com.auto1.pantera.http.context.RequestContext; +import com.auto1.pantera.http.fault.Fault; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.message.MapMessage; + +/** + * Facade for the five-tier observability model β€” Β§4.1 / Β§4.3 of + * {@code docs/analysis/v2.2-target-architecture.md}. + * + *

    Each tier exposes a builder that requires its tier-specific fields via + * {@link Objects#requireNonNull(Object, String)} at entry. Java does not have + * phantom types; enforcing "required field" at the entry point with a clear + * NPE message is the idiomatic equivalent and gives the same outcome: a + * request path that forgets to pass {@link RequestContext} fails fast at the + * first frame rather than silently emitting a log line with a null + * {@code trace.id}. + * + *

    Each builder binds the current {@link RequestContext} into Log4j2 + * {@link ThreadContext} for the duration of its terminal emission so that + * {@code EcsLayout} picks up {@code trace.id}, {@code client.ip}, + * {@code user.name}, and the other ECS-owned keys automatically. The prior + * ThreadContext is restored when emission returns. + * + *

    The five tiers β€” see {@link LevelPolicy} for the level mapping β€” are: + *

      + *
    1. {@link AccessLogger} β€” Tier-1, client β†’ Pantera (access log).
    2. + *
    3. {@link InternalLogger} β€” Tier-2, Pantera β†’ Pantera (500 only).
    4. + *
    5. {@link UpstreamLogger} β€” Tier-3, Pantera β†’ upstream remote.
    6. + *
    7. {@link LocalLogger} β€” Tier-4, local operations (DB, cache, pool init).
    8. + *
    9. {@link AuditLogger} β€” Tier-5, compliance audit (non-suppressible INFO).
    10. + *
    + * + *

    Callers use the static accessor functions: + *

    {@code
    + *   StructuredLogger.access().forRequest(ctx).status(503).fault(fault).log();
    + *   StructuredLogger.internal().forCall(ctx, "npm_proxy").fault(fault).error();
    + *   StructuredLogger.upstream().forUpstream(ctx, "registry.npmjs.org", 443)
    + *     .responseStatus(502).duration(1250L).cause(ex).error();
    + *   StructuredLogger.local().forComponent("com.auto1.pantera.index")
    + *     .message("executor queue saturated β€” caller-runs applied").warn();
    + *   StructuredLogger.audit().forEvent(ctx, AuditAction.ARTIFACT_PUBLISH)
    + *     .packageName("org.springframework:spring-core").packageVersion("6.1.10").emit();
    + * }
    + * + * @since 2.2.0 + */ +@SuppressWarnings({"PMD.TooManyMethods", "PMD.GodClass"}) +public final class StructuredLogger { + + private static final String LOGGER_ACCESS = "http.access"; + private static final String LOGGER_INTERNAL = "http.internal"; + private static final String LOGGER_UPSTREAM = "http.upstream"; + private static final String LOGGER_AUDIT = "com.auto1.pantera.audit"; + + private static final long SLOW_THRESHOLD_MS = 5000L; + + private static final AccessLogger ACCESS = new AccessLogger(); + private static final InternalLogger INTERNAL = new InternalLogger(); + private static final UpstreamLogger UPSTREAM = new UpstreamLogger(); + private static final LocalLogger LOCAL = new LocalLogger(); + private static final AuditLogger AUDIT = new AuditLogger(); + + private StructuredLogger() { + // facade β€” not instantiable + } + + /** + * @return the shared {@link AccessLogger} (Tier-1). + */ + public static AccessLogger access() { + return ACCESS; + } + + /** + * @return the shared {@link InternalLogger} (Tier-2). + */ + public static InternalLogger internal() { + return INTERNAL; + } + + /** + * @return the shared {@link UpstreamLogger} (Tier-3). + */ + public static UpstreamLogger upstream() { + return UPSTREAM; + } + + /** + * @return the shared {@link LocalLogger} (Tier-4). + */ + public static LocalLogger local() { + return LOCAL; + } + + /** + * @return the shared {@link AuditLogger} (Tier-5, non-suppressible). + */ + public static AuditLogger audit() { + return AUDIT; + } + + // ====================================================================== + // Tier-1 β€” AccessLogger (client β†’ Pantera) + // ====================================================================== + + /** Tier-1 factory. Emits one access log line per request. */ + public static final class AccessLogger { + + private static final Logger LOG = LogManager.getLogger(LOGGER_ACCESS); + + private AccessLogger() { + } + + /** + * Begin building an access-log record for the given request. + * @param ctx non-null {@link RequestContext} β€” refuses {@code null} to + * enforce the Β§4.3 required-field contract. + * @return a chainable {@link AccessAt} builder. + * @throws NullPointerException if {@code ctx} is null. + */ + public AccessAt forRequest(final RequestContext ctx) { + Objects.requireNonNull(ctx, "ctx"); + return new AccessAt(ctx); + } + } + + /** + * Tier-1 terminal builder. {@link #log()} infers the level from the + * response status and slow-threshold per {@link LevelPolicy}. + */ + public static final class AccessAt { + + private final RequestContext ctx; + private Integer status; + private String body; + private Fault fault; + private Long durationMs; + + private AccessAt(final RequestContext rctx) { + this.ctx = rctx; + } + + /** Set {@code http.response.status_code}. */ + public AccessAt status(final int code) { + this.status = code; + return this; + } + + /** Human-readable response body snippet (truncated elsewhere). */ + public AccessAt body(final String bodyText) { + this.body = bodyText; + return this; + } + + /** + * Attach a {@link Fault}. When the fault is one of the internal / + * storage / integrity variants, {@code error.type}, {@code error.message}, + * and {@code error.stack_trace} are added to the log payload. + * @param rcause the Fault (may be null). + */ + public AccessAt fault(final Fault rcause) { + this.fault = rcause; + return this; + } + + /** Request duration in milliseconds. */ + public AccessAt duration(final long millis) { + this.durationMs = millis; + return this; + } + + /** + * Emit the access-log line at the level inferred from status + + * slow-threshold. Never throws. + */ + public void log() { + final LevelPolicy policy = choosePolicy(); + final Map payload = buildPayload(); + try (AutoCloseable bound = this.ctx.bindToMdc()) { + dispatch(AccessLogger.LOG, policy.level(), payload, faultCause(this.fault)); + } catch (final Exception ex) { + // bindToMdc().close() is declared to throw Exception but our + // impl never does. This catch is purely for the API contract. + } + } + + private LevelPolicy choosePolicy() { + final int code = this.status == null ? 200 : this.status; + if (code >= 500) { + return LevelPolicy.CLIENT_FACING_5XX; + } + if (code == 404) { + return LevelPolicy.CLIENT_FACING_NOT_FOUND; + } + if (code == 401 || code == 403) { + return LevelPolicy.CLIENT_FACING_UNAUTH; + } + if (code >= 400) { + return LevelPolicy.CLIENT_FACING_4XX_OTHER; + } + if (this.durationMs != null && this.durationMs > SLOW_THRESHOLD_MS) { + return LevelPolicy.CLIENT_FACING_SLOW; + } + return LevelPolicy.CLIENT_FACING_SUCCESS; + } + + private Map buildPayload() { + final Map payload = new HashMap<>(); + payload.put("event.kind", "event"); + payload.put("event.category", List.of("web")); + payload.put("event.type", List.of("access")); + payload.put("event.action", "http_request"); + if (this.status != null) { + payload.put("http.response.status_code", this.status); + } + if (this.durationMs != null) { + payload.put("event.duration", this.durationMs); + } + if (this.body != null && !this.body.isEmpty()) { + payload.put("message", this.body); + } else { + payload.put("message", defaultMessage(this.status)); + } + attachUserAgentSubFields(payload, this.ctx.userAgent()); + attachFault(payload, this.fault); + return payload; + } + } + + // ====================================================================== + // Tier-2 β€” InternalLogger (pantera β†’ pantera, 500-only) + // ====================================================================== + + /** Tier-2 factory. Emits when an internal callee escalates to 500. */ + public static final class InternalLogger { + + private static final Logger LOG = LogManager.getLogger(LOGGER_INTERNAL); + + private InternalLogger() { + } + + /** + * Begin an internal-call log record. + * @param ctx non-null {@link RequestContext}. + * @param memberName non-null name of the internal callee (e.g. member repo). + */ + public InternalAt forCall(final RequestContext ctx, final String memberName) { + Objects.requireNonNull(ctx, "ctx"); + Objects.requireNonNull(memberName, "memberName"); + return new InternalAt(ctx, memberName); + } + } + + /** Tier-2 terminal builder. {@link #error()} requires a {@link Fault}. */ + public static final class InternalAt { + + private final RequestContext ctx; + private final String member; + private Fault fault; + + private InternalAt(final RequestContext rctx, final String rmember) { + this.ctx = rctx; + this.member = rmember; + } + + /** + * Attach the 500-triggering fault. Required before {@link #error()}. + * @param rcause the non-null {@link Fault}. + */ + public InternalAt fault(final Fault rcause) { + Objects.requireNonNull(rcause, "fault"); + this.fault = rcause; + return this; + } + + /** + * Emit at {@link LevelPolicy#INTERNAL_CALL_500} (ERROR). + * @throws IllegalStateException if no {@link Fault} was set. + */ + public void error() { + if (this.fault == null) { + throw new IllegalStateException( + "InternalAt.error() requires a Fault; call .fault(...) first" + ); + } + emit(LevelPolicy.INTERNAL_CALL_500); + } + + /** Debug hook for successful internal calls (opt-in tracing). */ + public void debug() { + emit(LevelPolicy.INTERNAL_CALL_SUCCESS); + } + + private void emit(final LevelPolicy policy) { + final Map payload = new HashMap<>(); + payload.put("event.kind", "event"); + payload.put("event.category", List.of("network")); + payload.put("event.action", "internal_call"); + payload.put("internal.source", nullToEmpty(this.ctx.repoName())); + payload.put("internal.target", this.member); + if (this.fault != null) { + payload.put("message", "Internal call failed: " + this.member); + attachFault(payload, this.fault); + } else { + payload.put("message", "Internal call succeeded: " + this.member); + payload.put("event.outcome", "success"); + } + try (AutoCloseable bound = this.ctx.bindToMdc()) { + dispatch(InternalLogger.LOG, policy.level(), payload, faultCause(this.fault)); + } catch (final Exception ex) { + // close() never throws in our impl. + } + } + } + + // ====================================================================== + // Tier-3 β€” UpstreamLogger (pantera β†’ upstream remote) + // ====================================================================== + + /** Tier-3 factory. Emits when an upstream call fails. */ + public static final class UpstreamLogger { + + private static final Logger LOG = LogManager.getLogger(LOGGER_UPSTREAM); + + private UpstreamLogger() { + } + + /** + * Begin an upstream-call record. + * @param ctx non-null {@link RequestContext}. + * @param destinationAddress non-null remote host. + * @param destinationPort remote port. + */ + public UpstreamAt forUpstream( + final RequestContext ctx, + final String destinationAddress, + final int destinationPort + ) { + Objects.requireNonNull(ctx, "ctx"); + Objects.requireNonNull(destinationAddress, "destinationAddress"); + return new UpstreamAt(ctx, destinationAddress, destinationPort); + } + } + + /** Tier-3 terminal builder. {@link #error()} requires a cause. */ + public static final class UpstreamAt { + + private final RequestContext ctx; + private final String address; + private final int port; + private Integer responseStatus; + private Long durationMs; + private Throwable cause; + + private UpstreamAt(final RequestContext rctx, final String raddress, final int rport) { + this.ctx = rctx; + this.address = raddress; + this.port = rport; + } + + /** Upstream response status code (may be set before .cause() is known). */ + public UpstreamAt responseStatus(final int code) { + this.responseStatus = code; + return this; + } + + /** Upstream call duration in milliseconds. */ + public UpstreamAt duration(final long millis) { + this.durationMs = millis; + return this; + } + + /** Required before {@link #error()}. */ + public UpstreamAt cause(final Throwable throwable) { + Objects.requireNonNull(throwable, "cause"); + this.cause = throwable; + return this; + } + + /** + * Emit at {@link LevelPolicy#UPSTREAM_5XX}. + * @throws IllegalStateException if no cause was set. + */ + public void error() { + if (this.cause == null) { + throw new IllegalStateException( + "UpstreamAt.error() requires a cause; call .cause(...) first" + ); + } + emit(LevelPolicy.UPSTREAM_5XX); + } + + /** Debug hook for successful upstream calls (opt-in tracing). */ + public void debug() { + final LevelPolicy policy; + if (this.responseStatus != null && this.responseStatus == 404) { + policy = LevelPolicy.UPSTREAM_NOT_FOUND; + } else { + policy = LevelPolicy.UPSTREAM_SUCCESS; + } + emit(policy); + } + + private void emit(final LevelPolicy policy) { + final Map payload = new HashMap<>(); + payload.put("event.kind", "event"); + payload.put("event.category", List.of("network")); + payload.put("event.action", "upstream_call"); + payload.put("destination.address", this.address); + payload.put("destination.port", this.port); + if (this.responseStatus != null) { + payload.put("http.response.status_code", this.responseStatus); + } + if (this.durationMs != null) { + payload.put("event.duration", this.durationMs); + } + if (this.cause != null) { + payload.put("message", "Upstream call failed: " + this.address); + payload.put("event.outcome", "failure"); + payload.put("error.type", this.cause.getClass().getName()); + payload.put("error.message", + this.cause.getMessage() == null ? this.cause.toString() : this.cause.getMessage()); + payload.put("error.stack_trace", stackTraceOf(this.cause)); + } else { + payload.put("message", "Upstream call: " + this.address); + } + try (AutoCloseable bound = this.ctx.bindToMdc()) { + dispatch(UpstreamLogger.LOG, policy.level(), payload, this.cause); + } catch (final Exception ex) { + // close() never throws in our impl. + } + } + } + + // ====================================================================== + // Tier-4 β€” LocalLogger (local ops) + // ====================================================================== + + /** Tier-4 factory. Caller supplies the component name (=logger name). */ + public static final class LocalLogger { + + private LocalLogger() { + } + + /** + * Begin a local-op record. + * @param component non-null component / logger name + * (e.g. {@code "com.auto1.pantera.index"}). + */ + public LocalAt forComponent(final String component) { + Objects.requireNonNull(component, "component"); + return new LocalAt(component); + } + } + + /** Tier-4 terminal builder. */ + public static final class LocalAt { + + private final String component; + private String message; + private RequestContext reqCtx; + private Throwable cause; + private final Map fields = new HashMap<>(); + + private LocalAt(final String rcomponent) { + this.component = rcomponent; + } + + /** Required before any terminal. */ + public LocalAt message(final String msg) { + this.message = msg; + return this; + } + + /** Optional β€” attaches trace.id etc. when the op is request-linked. */ + public LocalAt reqCtx(final RequestContext ctx) { + this.reqCtx = ctx; + return this; + } + + /** Add a custom ECS field (dot notation). */ + public LocalAt field(final String key, final Object value) { + Objects.requireNonNull(key, "key"); + if (value != null) { + this.fields.put(key, value); + } + return this; + } + + /** Required before {@link #error()}; optional on {@link #warn()}. */ + public LocalAt cause(final Throwable throwable) { + this.cause = throwable; + return this; + } + + /** Config-change / lifecycle event. */ + public void info() { + emit(LevelPolicy.LOCAL_CONFIG_CHANGE); + } + + /** Op-success debug hook. */ + public void debug() { + emit(LevelPolicy.LOCAL_OP_SUCCESS); + } + + /** Degradation warning (shed, fallback, retry, queue-near-full). */ + public void warn() { + emit(LevelPolicy.LOCAL_DEGRADED); + } + + /** + * Local-op failure. + * @throws NullPointerException if no cause was set (required). + */ + public void error() { + Objects.requireNonNull( + this.cause, + "LocalAt.error() requires a cause; call .cause(...) first" + ); + emit(LevelPolicy.LOCAL_FAILURE); + } + + private void emit(final LevelPolicy policy) { + if (this.message == null) { + throw new IllegalStateException( + "LocalAt requires .message(...) before terminal" + ); + } + final Map payload = new HashMap<>(this.fields); + payload.put("message", this.message); + if (this.cause != null) { + payload.put("event.outcome", "failure"); + payload.put("error.type", this.cause.getClass().getName()); + payload.put("error.message", + this.cause.getMessage() == null ? this.cause.toString() : this.cause.getMessage()); + payload.put("error.stack_trace", stackTraceOf(this.cause)); + } + final Logger logger = LogManager.getLogger(this.component); + if (this.reqCtx != null) { + try (AutoCloseable bound = this.reqCtx.bindToMdc()) { + dispatch(logger, policy.level(), payload, this.cause); + } catch (final Exception ex) { + // close() never throws + } + } else { + dispatch(logger, policy.level(), payload, this.cause); + } + } + } + + // ====================================================================== + // Tier-5 β€” AuditLogger (compliance, non-suppressible) + // ====================================================================== + + /** Tier-5 factory. */ + public static final class AuditLogger { + + private static final Logger LOG = LogManager.getLogger(LOGGER_AUDIT); + + private AuditLogger() { + } + + /** + * Begin an audit record. + * @param ctx non-null {@link RequestContext} (for trace.id / user.name / client.ip). + * @param action non-null {@link AuditAction}. + */ + public AuditAt forEvent(final RequestContext ctx, final AuditAction action) { + Objects.requireNonNull(ctx, "ctx"); + Objects.requireNonNull(action, "action"); + return new AuditAt(ctx, action); + } + } + + /** + * Tier-5 terminal builder. Emits at INFO, non-suppressible, to the audit + * dataset via {@code event.category=audit} + {@code data_stream.dataset=pantera.audit}. + */ + public static final class AuditAt { + + private final RequestContext ctx; + private final AuditAction action; + private String packageName; + private String packageVersion; + private String packageChecksum; + private String outcome; + + private AuditAt(final RequestContext rctx, final AuditAction raction) { + this.ctx = rctx; + this.action = raction; + } + + /** Required before {@link #emit()}. */ + public AuditAt packageName(final String name) { + this.packageName = name; + return this; + } + + /** Required before {@link #emit()}. */ + public AuditAt packageVersion(final String version) { + this.packageVersion = version; + return this; + } + + /** Optional (known on PUBLISH / DOWNLOAD, unknown on RESOLUTION). */ + public AuditAt packageChecksum(final String sha256Hex) { + this.packageChecksum = sha256Hex; + return this; + } + + /** Optional β€” {@code success} / {@code failure} / {@code unknown}. */ + public AuditAt outcome(final String outcomeKey) { + this.outcome = outcomeKey; + return this; + } + + /** + * Emit the audit record at INFO. Always fires, regardless of operational + * log levels (the audit logger config must not suppress it). + * @throws NullPointerException on missing required fields. + */ + public void emit() { + Objects.requireNonNull(this.packageName, "packageName"); + Objects.requireNonNull(this.packageVersion, "packageVersion"); + final Map payload = new HashMap<>(); + payload.put("message", buildMessage()); + payload.put("event.kind", "event"); + payload.put("event.category", List.of("audit")); + payload.put("event.action", actionToken(this.action)); + payload.put("data_stream.dataset", "pantera.audit"); + payload.put("package.name", this.packageName); + payload.put("package.version", this.packageVersion); + if (this.packageChecksum != null && !this.packageChecksum.isEmpty()) { + payload.put("package.checksum", this.packageChecksum); + } + if (this.outcome != null && !this.outcome.isEmpty()) { + payload.put("event.outcome", this.outcome); + } else { + payload.put("event.outcome", "success"); + } + try (AutoCloseable bound = this.ctx.bindToMdc()) { + dispatch(AuditLogger.LOG, LevelPolicy.AUDIT_EVENT.level(), payload, null); + } catch (final Exception ex) { + // close() never throws + } + } + + private String buildMessage() { + return "Audit: " + actionToken(this.action) + " " + + this.packageName + "@" + this.packageVersion; + } + } + + // ====================================================================== + // Shared helpers + // ====================================================================== + + /** + * Dispatch a {@link MapMessage} at the requested {@link Level}. Log4j2's + * level-specific API preserves {@link co.elastic.logging.log4j2.EcsLayout}'s + * typed-field rendering, so payload values stay as native JSON types + * (ints, longs, string arrays) instead of being stringified. + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + private static void dispatch( + final Logger logger, + final Level level, + final Map payload, + final Throwable cause + ) { + if (!logger.isEnabled(level)) { + return; + } + // Drop payload keys that are already in ThreadContext / MDC to avoid + // duplicate top-level fields in the Elasticsearch document. + final Map filtered = new HashMap<>(payload.size()); + for (final Map.Entry e : payload.entrySet()) { + if (ThreadContext.containsKey(e.getKey())) { + continue; + } + filtered.put(e.getKey(), e.getValue()); + } + final MapMessage msg = new MapMessage(filtered); + if (level == Level.ERROR) { + if (cause != null) { + logger.error(msg, cause); + } else { + logger.error(msg); + } + } else if (level == Level.WARN) { + if (cause != null) { + logger.warn(msg, cause); + } else { + logger.warn(msg); + } + } else if (level == Level.INFO) { + if (cause != null) { + logger.info(msg, cause); + } else { + logger.info(msg); + } + } else if (level == Level.DEBUG) { + if (cause != null) { + logger.debug(msg, cause); + } else { + logger.debug(msg); + } + } else if (level == Level.TRACE) { + if (cause != null) { + logger.trace(msg, cause); + } else { + logger.trace(msg); + } + } else { + logger.log(level, msg); + } + } + + /** + * Parse {@code user_agent.original} with {@link UserAgentParser} and add + * the {@code user_agent.name / .version / .os.name / .os.version / .device.name} + * sub-fields to the payload when the parser identified them. Null-safe: + * missing or empty UA contributes nothing. + * + *

    WI-post-03b β€” restores the rich UA parsing that operators' Kibana + * dashboards filter on. The parsing happens on the access-log emission + * path only (Tier-1); other tiers do not re-emit user_agent.* since the + * fields are still available via {@code ThreadContext.get("user_agent.original")} + * for any downstream consumer that wants to re-parse. + */ + private static void attachUserAgentSubFields( + final Map payload, final String userAgent + ) { + if (userAgent == null || userAgent.isEmpty()) { + return; + } + final UserAgentParser.UserAgentInfo info = UserAgentParser.parse(userAgent); + if (info.name() != null) { + payload.put("user_agent.name", info.name()); + } + if (info.version() != null) { + payload.put("user_agent.version", info.version()); + } + if (info.osName() != null) { + payload.put("user_agent.os.name", info.osName()); + } + if (info.osVersion() != null) { + payload.put("user_agent.os.version", info.osVersion()); + } + if (info.deviceName() != null) { + payload.put("user_agent.device.name", info.deviceName()); + } + } + + /** + * Attach {@code error.type} / {@code error.message} / {@code error.stack_trace} + * for faults that escalate to 5xx. Structural (non-500) faults contribute + * their enum-like payload without a stack trace. + */ + private static void attachFault(final Map payload, final Fault rcause) { + if (rcause == null) { + return; + } + if (rcause instanceof Fault.Internal internal) { + payload.put("event.outcome", "failure"); + payload.put("error.type", internal.cause().getClass().getName()); + payload.put("error.message", messageOf(internal.cause())); + payload.put("error.stack_trace", stackTraceOf(internal.cause())); + payload.put("fault.where", internal.where()); + } else if (rcause instanceof Fault.StorageUnavailable storage) { + payload.put("event.outcome", "failure"); + payload.put("error.type", storage.cause().getClass().getName()); + payload.put("error.message", messageOf(storage.cause())); + payload.put("error.stack_trace", stackTraceOf(storage.cause())); + payload.put("fault.key", storage.key()); + } else if (rcause instanceof Fault.IndexUnavailable index) { + payload.put("event.outcome", "failure"); + payload.put("error.type", index.cause().getClass().getName()); + payload.put("error.message", messageOf(index.cause())); + payload.put("error.stack_trace", stackTraceOf(index.cause())); + payload.put("fault.query", index.query()); + } else if (rcause instanceof Fault.UpstreamIntegrity integrity) { + payload.put("event.outcome", "failure"); + payload.put("error.type", "UpstreamIntegrity"); + payload.put("error.message", + "Checksum mismatch: " + integrity.algo() + + " claimed=" + integrity.sidecarClaim() + + " computed=" + integrity.computed()); + payload.put("fault.upstream_uri", integrity.upstreamUri()); + } else if (rcause instanceof Fault.NotFound notfound) { + payload.put("fault.scope", nullToEmpty(notfound.scope())); + payload.put("fault.artifact", nullToEmpty(notfound.artifact())); + } else if (rcause instanceof Fault.Forbidden forbidden) { + payload.put("fault.reason", forbidden.reason()); + } else if (rcause instanceof Fault.Deadline deadline) { + payload.put("event.outcome", "failure"); + payload.put("fault.where", deadline.where()); + } else if (rcause instanceof Fault.Overload overload) { + payload.put("event.outcome", "failure"); + payload.put("fault.resource", overload.resource()); + } else if (rcause instanceof Fault.AllProxiesFailed all) { + payload.put("event.outcome", "failure"); + payload.put("fault.group", all.group()); + } + } + + private static Throwable faultCause(final Fault fault) { + if (fault instanceof Fault.Internal internal) { + return internal.cause(); + } + if (fault instanceof Fault.StorageUnavailable storage) { + return storage.cause(); + } + if (fault instanceof Fault.IndexUnavailable index) { + return index.cause(); + } + return null; + } + + private static String defaultMessage(final Integer status) { + if (status == null) { + return "Request processed"; + } + if (status >= 500) { + return "Internal server error"; + } + if (status == 404) { + return "Not found"; + } + if (status == 401) { + return "Authentication required"; + } + if (status == 403) { + return "Access denied"; + } + if (status >= 400) { + return "Client error"; + } + return "Request completed"; + } + + private static String actionToken(final AuditAction action) { + return switch (action) { + case ARTIFACT_PUBLISH -> "artifact_publish"; + case ARTIFACT_DOWNLOAD -> "artifact_download"; + case ARTIFACT_DELETE -> "artifact_delete"; + case RESOLUTION -> "artifact_resolution"; + }; + } + + private static String messageOf(final Throwable t) { + return t.getMessage() == null ? t.toString() : t.getMessage(); + } + + private static String stackTraceOf(final Throwable t) { + final StringWriter sw = new StringWriter(); + t.printStackTrace(new PrintWriter(sw)); + return sw.toString(); + } + + private static String nullToEmpty(final String s) { + return s == null ? "" : s; + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/observability/UserAgentParser.java b/pantera-core/src/main/java/com/auto1/pantera/http/observability/UserAgentParser.java new file mode 100644 index 000000000..2a40044ee --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/observability/UserAgentParser.java @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +/** + * User-Agent string parser producing ECS {@code user_agent.*} sub-fields. + * + *

    Recognises the User-Agent shapes Pantera sees in practice from package- + * manager clients (Maven, npm, pip, Docker, Go, Gradle, Composer, NuGet) and + * from generic HTTP tools (curl, wget). Also extracts the host OS family + * (Linux / Windows / macOS / FreeBSD) and, where present, the {@code Java/x.y.z} + * runtime version β€” which conventionally maps to ECS + * {@code user_agent.os.version} for JVM-based clients. + * + *

    Factored out of the legacy {@code EcsLogEvent} in v2.2.0 (WI-post-03b) + * so the new {@link StructuredLogger#access()} tier can re-emit the + * {@code user_agent.name} / {@code .version} / {@code .os.name} / {@code .os.version} + * / {@code .device.name} sub-fields that Kibana dashboards filter on. The + * parsing logic is preserved verbatim from the original EcsLogEvent + * implementation β€” no behavioural change. + * + *

    All methods are null-safe and side-effect free. + * + * @since 2.2.0 + */ +public final class UserAgentParser { + + private UserAgentParser() { + // utility β€” not instantiable + } + + /** + * Parse a User-Agent string into an {@link UserAgentInfo}. Null or empty + * input returns an all-{@code null} {@code UserAgentInfo}; unrecognised UAs + * return an info with only the OS fields populated (and even those only + * when the UA string contains one of the recognised OS tokens). + * + * @param ua the raw {@code User-Agent} header value β€” may be {@code null} + * @return a non-null {@link UserAgentInfo}; fields are {@code null} when + * the parser could not determine them + */ + public static UserAgentInfo parse(final String ua) { + String name = null; + String version = null; + String osName = null; + String osVersion = null; + final String deviceName = null; + + if (ua == null || ua.isEmpty()) { + return new UserAgentInfo(null, null, null, null, null); + } + + if (ua.startsWith("Maven/")) { + name = "Maven"; + version = extractVersion(ua, "Maven/"); + } else if (ua.startsWith("npm/")) { + name = "npm"; + version = extractVersion(ua, "npm/"); + } else if (ua.startsWith("pip/")) { + name = "pip"; + version = extractVersion(ua, "pip/"); + } else if (ua.contains("Docker-Client/")) { + name = "Docker"; + version = extractVersion(ua, "Docker-Client/"); + } else if (ua.startsWith("Go-http-client/")) { + name = "Go"; + version = extractVersion(ua, "Go-http-client/"); + } else if (ua.startsWith("Gradle/")) { + name = "Gradle"; + version = extractVersion(ua, "Gradle/"); + } else if (ua.contains("Composer/")) { + name = "Composer"; + version = extractVersion(ua, "Composer/"); + } else if (ua.startsWith("NuGet")) { + name = "NuGet"; + if (ua.contains("/")) { + version = extractVersion(ua, "NuGet Command Line/"); + } + } else if (ua.contains("curl/")) { + name = "curl"; + version = extractVersion(ua, "curl/"); + } else if (ua.contains("wget/")) { + name = "wget"; + version = extractVersion(ua, "wget/"); + } + + if (ua.contains("Linux")) { + osName = "Linux"; + } else if (ua.contains("Windows")) { + osName = "Windows"; + } else if (ua.contains("Mac OS X") || ua.contains("Darwin")) { + osName = "macOS"; + } else if (ua.contains("FreeBSD")) { + osName = "FreeBSD"; + } + + if (ua.contains("Java/")) { + final int start = ua.indexOf("Java/") + 5; + final int end = findVersionEnd(ua, start); + if (end > start) { + osVersion = ua.substring(start, end); + } + } + + return new UserAgentInfo(name, version, osName, osVersion, deviceName); + } + + private static String extractVersion(final String ua, final String prefix) { + final int start = ua.indexOf(prefix); + if (start < 0) { + return null; + } + final int versionStart = start + prefix.length(); + final int versionEnd = findVersionEnd(ua, versionStart); + if (versionEnd <= versionStart) { + return null; + } + return ua.substring(versionStart, versionEnd); + } + + private static int findVersionEnd(final String ua, final int start) { + int end = start; + while (end < ua.length()) { + final char c = ua.charAt(end); + if (c == ' ' || c == ';' || c == '(' || c == ')') { + break; + } + end++; + } + return end; + } + + /** + * Parsed ECS {@code user_agent.*} sub-fields. All fields are {@code null} + * when the parser could not determine them β€” callers must null-check before + * emitting into a log payload. + * + * @param name ECS {@code user_agent.name} β€” client family + * (e.g. {@code Maven}, {@code npm}, {@code Docker}). + * @param version ECS {@code user_agent.version} β€” client version + * (e.g. {@code 3.9.6}). + * @param osName ECS {@code user_agent.os.name} + * ({@code Linux} / {@code Windows} / {@code macOS} / {@code FreeBSD}). + * @param osVersion ECS {@code user_agent.os.version} β€” for JVM clients, + * the {@code Java/x.y.z} runtime version. + * @param deviceName ECS {@code user_agent.device.name} β€” reserved; not + * populated by the current parser (always {@code null}). + */ + public record UserAgentInfo( + String name, + String version, + String osName, + String osVersion, + String deviceName + ) { + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/resilience/BulkheadLimits.java b/pantera-core/src/main/java/com/auto1/pantera/http/resilience/BulkheadLimits.java new file mode 100644 index 000000000..ca5564b31 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/resilience/BulkheadLimits.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.resilience; + +import java.time.Duration; + +/** + * Configuration limits for a {@link RepoBulkhead}. + * + *

    Bindable from YAML per-repo configuration. Each repository may override + * these defaults in {@code pantera.yml} under + * {@code meta.repositories..bulkhead}. + * + * @param maxConcurrent Maximum number of concurrent in-flight requests + * the bulkhead will admit before rejecting with + * {@link com.auto1.pantera.http.fault.Fault.Overload}. + * @param maxQueueDepth Maximum queue depth for the per-repo drain pool. + * Drain tasks exceeding this depth are dropped with a + * WARN log and a metrics counter increment. + * @param retryAfter Suggested duration for the {@code Retry-After} header + * sent to clients when the bulkhead rejects a request. + * @since 2.2.0 + */ +public record BulkheadLimits(int maxConcurrent, int maxQueueDepth, Duration retryAfter) { + + /** + * Canonical constructor with validation. + */ + public BulkheadLimits { + if (maxConcurrent <= 0) { + throw new IllegalArgumentException( + "maxConcurrent must be strictly positive: " + maxConcurrent + ); + } + if (maxQueueDepth <= 0) { + throw new IllegalArgumentException( + "maxQueueDepth must be strictly positive: " + maxQueueDepth + ); + } + if (retryAfter == null || retryAfter.isNegative() || retryAfter.isZero()) { + throw new IllegalArgumentException( + "retryAfter must be strictly positive: " + retryAfter + ); + } + } + + /** + * Reasonable defaults: 200 concurrent requests, 1000-deep drain queue, + * 1-second retry-after. + * + * @return Default limits suitable for most repositories. + */ + public static BulkheadLimits defaults() { + return new BulkheadLimits(200, 1000, Duration.ofSeconds(1)); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/resilience/RepoBulkhead.java b/pantera-core/src/main/java/com/auto1/pantera/http/resilience/RepoBulkhead.java new file mode 100644 index 000000000..301a597b3 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/resilience/RepoBulkhead.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.resilience; + +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.Result; + +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.Executor; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.Semaphore; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.Supplier; + +/** + * Per-repository bulkhead that bounds the number of concurrent in-flight + * requests and provides a dedicated drain executor for background body + * draining. + * + *

    Implements design principle 7 ("per-repo blast radius") from + * {@code docs/analysis/v2.2-target-architecture.md} section 7. Each + * repository gets exactly one {@code RepoBulkhead} at start-up; saturation + * in one repository does not starve another. + * + *

    When the semaphore is full, {@link #run(Supplier)} returns + * {@link Result#err(Fault)} with a {@link Fault.Overload} carrying the + * repo name and suggested retry-after duration. + * + * @since 2.2.0 + */ +public final class RepoBulkhead { + + private final String repo; + private final Semaphore inFlight; + private final BulkheadLimits limits; + private final Executor drainExecutor; + private final AtomicLong drainDropCount; + + /** + * Construct a per-repo bulkhead. + * + * @param repo Repository name (used in {@link Fault.Overload} and metrics). + * @param limits Concurrency limits for this repository. + * @param ctxWorkerPool A {@link com.auto1.pantera.http.context.ContextualExecutor}-wrapped + * executor used as the base for the per-repo drain pool. Currently + * unused directly; the drain pool is constructed internally with its + * own bounded queue. Retained for future per-repo worker pool support. + */ + public RepoBulkhead( + final String repo, + final BulkheadLimits limits, + final Executor ctxWorkerPool + ) { + this.repo = Objects.requireNonNull(repo, "repo"); + this.limits = Objects.requireNonNull(limits, "limits"); + Objects.requireNonNull(ctxWorkerPool, "ctxWorkerPool"); + this.inFlight = new Semaphore(limits.maxConcurrent()); + this.drainDropCount = new AtomicLong(); + final int drainThreads = Math.max(2, limits.maxConcurrent() / 50); + this.drainExecutor = buildDrainExecutor(repo, limits.maxQueueDepth(), drainThreads); + } + + /** + * Execute an operation within this bulkhead's concurrency limit. + * + *

    If the semaphore cannot be acquired immediately, returns a + * completed future with {@link Result#err(Fault)} containing + * {@link Fault.Overload}. Otherwise, the operation is invoked and + * the semaphore is released when the returned stage completes + * (whether normally or exceptionally). + * + * @param op Supplier producing the async operation to protect. + * @param Result value type. + * @return A completion stage with the operation's result or an overload fault. + */ + public CompletionStage> run(final Supplier>> op) { + if (!this.inFlight.tryAcquire()) { + return CompletableFuture.completedFuture( + Result.err(new Fault.Overload(this.repo, this.limits.retryAfter())) + ); + } + try { + return op.get().whenComplete((r, e) -> this.inFlight.release()); + } catch (final RuntimeException ex) { + this.inFlight.release(); + return CompletableFuture.failedFuture(ex); + } + } + + /** + * Repository name this bulkhead protects. + * + * @return Non-null repository name. + */ + public String repo() { + return this.repo; + } + + /** + * Number of permits currently held (in-flight requests). + * + * @return Active request count, between 0 and {@link BulkheadLimits#maxConcurrent()}. + */ + public int activeCount() { + return this.limits.maxConcurrent() - this.inFlight.availablePermits(); + } + + /** + * The per-repo drain executor for background body draining. + * + *

    Replaces the former process-wide static {@code DRAIN_EXECUTOR} that + * lived in {@code GroupResolver}. Each repository's + * drain pool is bounded independently so a slow-draining repo cannot + * exhaust the drain capacity of other repos. + * + * @return Non-null executor for drain tasks. + */ + public Executor drainExecutor() { + return this.drainExecutor; + } + + /** + * Total count of drain tasks dropped because this repo's drain queue was full. + * + * @return Monotonic total of rejected drain tasks since this bulkhead was created. + */ + public long drainDropCount() { + return this.drainDropCount.get(); + } + + /** + * The limits this bulkhead was configured with. + * + * @return Non-null limits record. + */ + public BulkheadLimits limits() { + return this.limits; + } + + private Executor buildDrainExecutor( + final String repoName, + final int queueDepth, + final int threads + ) { + final AtomicLong dropCounter = this.drainDropCount; + return new ThreadPoolExecutor( + threads, threads, + 60L, TimeUnit.SECONDS, + new LinkedBlockingQueue<>(queueDepth), + r -> { + final Thread t = new Thread( + r, "drain-" + repoName + "-" + System.identityHashCode(r) + ); + t.setDaemon(true); + return t; + }, + (r, executor) -> dropCounter.incrementAndGet() + ); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/resilience/SingleFlight.java b/pantera-core/src/main/java/com/auto1/pantera/http/resilience/SingleFlight.java new file mode 100644 index 000000000..3c210e18f --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/http/resilience/SingleFlight.java @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.resilience; + +import com.github.benmanes.caffeine.cache.AsyncCache; +import com.github.benmanes.caffeine.cache.Caffeine; + +import java.time.Duration; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.Executor; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +/** + * Unified per-key request coalescer β€” one {@code loader.get()} invocation per + * concurrent burst of {@link #load} calls sharing the same key. + * + *

    Consolidates the three hand-rolled coalescers that lived in {@code + * GroupResolver.inFlightFanouts}, {@code MavenGroupSlice.inFlightMetadataFetches}, + * and the legacy cache-write in-flight map into one Caffeine-backed + * implementation. See Β§6.4 of {@code docs/analysis/v2.2-target-architecture.md} + * and anti-patterns A6, A7, A8, A9 in {@code v2.1.3-architecture-review.md}. + * + *

    Guarantees

    + *
      + *
    • Coalescing. N concurrent {@code load(k, loader)} calls for the + * same key invoke {@code loader.get()} exactly once; all N callers receive + * the same terminal value or exception.
    • + *
    • Fresh-after-complete. On loader completion (normal or exceptional) + * the entry is invalidated so the next {@link #load} for that key triggers + * a fresh fetch β€” the cache holds in-flight state, never results.
    • + *
    • Zombie eviction. An entry that never completes is evicted by + * Caffeine's {@code expireAfterWrite(inflightTtl)}; the next {@link #load} + * starts a fresh loader. Closes A8.
    • + *
    • Exception propagation. When the loader completes exceptionally, + * every waiting caller receives the same exception; the entry is still + * invalidated so the next {@link #load} retries.
    • + *
    • No call-site throw. {@link #load} never throws β€” loader failures + * surface only inside the returned {@link CompletableFuture}.
    • + *
    • Cancellation isolation. Cancelling one caller's returned future + * never cancels the loader or other callers' futures. The loader runs to + * completion regardless of caller cancellation.
    • + *
    • Stack-flat completion. Followers receive completion on the + * configured {@code executor}, never on the leader's stack β€” fixes the + * v2.1.3 regression where {@code GroupResolver.inFlightFanouts} blew the + * stack at ~400 synchronously-completing followers (commit {@code ccc155f6}).
    • + *
    + * + *

    Implementation notes

    + * + * Caffeine's {@link AsyncCache#get(Object, java.util.function.BiFunction)} is + * atomic per-key: exactly one bifunction invocation observes an absent mapping + * and installs the loader's future; concurrent callers join the same future. + * We wrap that shared future per caller so that (a) caller-side cancellation + * cannot cancel the loader, (b) completion is dispatched via the executor + * rather than synchronously on the leader's stack. + * + * @param Key type. + * @param Value type returned by the loader. + * @since 2.2.0 + */ +public final class SingleFlight { + + /** + * Caffeine async cache of in-flight loads. Entries are bounded by {@code + * maxInFlight} and expire after {@code inflightTtl} once the loader + * future completes (Caffeine does not apply {@code expireAfterWrite} to + * pending futures). Zombie protection for non-completing loaders + * is provided separately via {@link CompletableFuture#orTimeout(long, + * TimeUnit)} on the wrapped loader future β€” see {@link #load}. + * + *

    The cache is populated exclusively via {@link AsyncCache#get(Object, + * java.util.function.BiFunction)} β€” never via a loading cache β€” so a + * {@code get} without a loader would throw. That is by design: this cache + * holds in-flight work, not a key/value store. + */ + private final AsyncCache cache; + + /** + * Executor used for stack-flat completion of waiters. All completions + * (both the raw Caffeine future's and the per-caller forwarders) hop to + * this executor so a synchronously-completing loader never runs a + * follower's callback on its own stack. + */ + private final Executor executor; + + /** + * Zombie-protection timeout. A loader whose future is still pending after + * this duration is force-completed with {@link java.util.concurrent.TimeoutException} + * via {@link CompletableFuture#orTimeout(long, TimeUnit)}, which in turn + * triggers the {@code whenCompleteAsync(invalidate)} hook and frees the + * cache slot. This closes A8. + */ + private final Duration inflightTtl; + + /** + * Create a single-flight coalescer. + * + * @param inflightTtl Maximum time an in-flight entry may remain in the + * cache. Entries older than this are evicted by + * Caffeine's time-based expiry β€” acts as zombie + * protection for loaders that never complete. + * @param maxInFlight Maximum number of distinct in-flight keys. When + * exceeded, Caffeine evicts the least-recently-used + * entry. Existing waiters on an evicted entry still + * receive their value from the underlying loader + * future β€” eviction only prevents coalescing of + * future calls for that key. + * @param executor Executor used for stack-flat follower completion. + * Must not be {@code null}. For a server context this + * is typically the common worker pool or a dedicated + * {@code ForkJoinPool}. + */ + public SingleFlight( + final Duration inflightTtl, + final int maxInFlight, + final Executor executor + ) { + Objects.requireNonNull(inflightTtl, "inflightTtl"); + Objects.requireNonNull(executor, "executor"); + if (inflightTtl.isNegative() || inflightTtl.isZero()) { + throw new IllegalArgumentException( + "inflightTtl must be strictly positive: " + inflightTtl + ); + } + if (maxInFlight <= 0) { + throw new IllegalArgumentException( + "maxInFlight must be strictly positive: " + maxInFlight + ); + } + this.executor = executor; + this.inflightTtl = inflightTtl; + this.cache = Caffeine.newBuilder() + // expireAfterWrite applies only to COMPLETED futures in an + // AsyncCache. Pending zombies are bounded by orTimeout (see + // #load), not by this policy. + .expireAfterWrite(inflightTtl) + .maximumSize(maxInFlight) + .executor(executor) + .buildAsync(); + } + + /** + * Load-or-join: concurrent calls for the same key share one + * {@code loader.get()} invocation. + * + *

    The returned future is independent of the shared loader future: + * cancelling it never cancels the loader. Downstream {@code thenCompose} + * / {@code whenComplete} callbacks attached to it run on the configured + * executor, not on the leader's stack. + * + * @param key Non-null coalescing key. + * @param loader Supplier invoked exactly once per concurrent burst for + * {@code key}. Must return a non-null {@link CompletionStage}. + * Exceptions thrown synchronously by the supplier are + * propagated as an exceptionally-completed future. + * @return A new {@link CompletableFuture} completing with the loader's + * value or exception on the configured executor. + */ + public CompletableFuture load( + final K key, + final Supplier> loader + ) { + Objects.requireNonNull(key, "key"); + Objects.requireNonNull(loader, "loader"); + final long ttlMillis = this.inflightTtl.toMillis(); + final CompletableFuture shared = this.cache.get( + key, + (k, e) -> { + final CompletableFuture source; + try { + source = loader.get().toCompletableFuture(); + } catch (final RuntimeException ex) { + final CompletableFuture failed = new CompletableFuture<>(); + failed.completeExceptionally(ex); + return failed; + } + // Zombie eviction: a loader whose future is still pending + // after ttlMillis is force-completed with TimeoutException. + // We wrap in a NEW CompletableFuture so the caller's original + // future (if they hold a reference to it) is not mutated. + // The wrapper propagates the source's terminal state when + // available; otherwise orTimeout fires and the wrapper + // completes exceptionally. Either way the + // whenCompleteAsync(invalidate) hook frees the cache slot. + final CompletableFuture wrapped = new CompletableFuture<>(); + source.whenComplete((value, err) -> { + if (err != null) { + wrapped.completeExceptionally(err); + } else { + wrapped.complete(value); + } + }); + wrapped.orTimeout(ttlMillis, TimeUnit.MILLISECONDS); + return wrapped; + } + ); + shared.whenCompleteAsync( + (value, err) -> this.cache.synchronous().invalidate(key), + this.executor + ); + final CompletableFuture forwarded = new CompletableFuture<>(); + shared.whenCompleteAsync( + (value, err) -> { + if (err != null) { + forwarded.completeExceptionally(err); + } else { + forwarded.complete(value); + } + }, + this.executor + ); + return forwarded; + } + + /** + * Evict an in-flight entry for {@code key} without completing it. + * + *

    Does not cancel any already-dispatched loader β€” the loader's future + * continues to completion, but the next {@link #load} for the same key + * invokes a fresh loader rather than joining the previous one. + * + * @param key Key to evict. May be {@code null}: a no-op in that case. + */ + public void invalidate(final K key) { + if (key != null) { + this.cache.synchronous().invalidate(key); + } + } + + /** + * Current number of in-flight entries. Intended for metrics and tests. + * + *

    The estimate is eventually consistent β€” concurrent completions may + * race with this read. Caffeine recommends + * {@code cache.synchronous().estimatedSize()} for monotonic bounds; we + * expose it as {@code inFlightCount} for parity with the legacy + * coalescer APIs. + * + * @return Approximate count of distinct keys currently in-flight. + */ + public int inFlightCount() { + return (int) this.cache.synchronous().estimatedSize(); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/slice/EcsLoggingSlice.java b/pantera-core/src/main/java/com/auto1/pantera/http/slice/EcsLoggingSlice.java index 5a7a07c95..80c4f33ed 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/http/slice/EcsLoggingSlice.java +++ b/pantera-core/src/main/java/com/auto1/pantera/http/slice/EcsLoggingSlice.java @@ -14,9 +14,11 @@ import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.context.RequestContext; import com.auto1.pantera.http.headers.Header; import com.auto1.pantera.http.log.EcsMdc; import com.auto1.pantera.http.log.EcsLogEvent; +import com.auto1.pantera.http.observability.StructuredLogger; import com.auto1.pantera.http.rq.RequestLine; import com.auto1.pantera.http.trace.SpanContext; import org.slf4j.MDC; @@ -37,9 +39,9 @@ * * *

    Access log emission is suppressed when the request carries the - * {@link #INTERNAL_ROUTING_HEADER} header, which GroupSlice sets when dispatching + * {@link #INTERNAL_ROUTING_HEADER} header, which GroupResolver sets when dispatching * to member slices. Internal routing is already captured as DEBUG application logs - * in GroupSlice itself (event.action=group_index_hit, group_proxy_fanout, etc.). + * in GroupResolver itself (event.action=group_index_hit, group_proxy_fanout, etc.). * *

    This slice should be used at the top level of the slice chain to ensure * all HTTP requests are logged consistently. @@ -49,7 +51,7 @@ public final class EcsLoggingSlice implements Slice { /** - * Request header set by GroupSlice when dispatching to a member slice. + * Request header set by GroupResolver when dispatching to a member slice. * When present, EcsLoggingSlice skips access log emission to avoid ~105K * noise entries per 30 min from internal group-to-member queries. * The header is group-internal and does NOT propagate to upstream remotes @@ -155,31 +157,26 @@ public CompletableFuture response( .thenApply(response -> { final long duration = System.currentTimeMillis() - startTime; - // Skip access log for GroupSlice β†’ member internal dispatches. - // Internal routing is captured as DEBUG application logs in GroupSlice + // Skip access log for GroupResolver β†’ member internal dispatches. + // Internal routing is captured as DEBUG application logs in GroupResolver // (event.action=group_index_hit, group_proxy_fanout, etc.). if (!internalRouting) { - // Build ECS log event - // NOTE: client.ip, user.name, trace.id are already in MDC (set above). - // EcsLayout includes all MDC entries in JSON output automatically. - // Do NOT add them to MapMessage β€” that causes duplicate fields in Elastic. - final EcsLogEvent logEvent = new EcsLogEvent() - .httpMethod(line.method().value()) - .httpVersion(line.version()) - .httpStatus(response.status()) - .urlPath(line.uri().getPath()) - .urlOriginal(line.uri().toString()) - .userAgent(headers) - .duration(duration); - - // Add query string if present - final String query = line.uri().getQuery(); - if (query != null && !query.isEmpty()) { - logEvent.urlQuery(query); - } - - // Log the event (automatically selects log level based on status) - logEvent.log(); + // WI-03 Β§4.1: emit the access log via the Tier-1 builder. + // The legacy EcsLogEvent emission that used to run alongside + // here was removed to avoid doubling the access-log volume + // in Kibana. Rich user_agent.* sub-field parsing (name, + // version, os.name, os.version) and url.query emission + // migrate to StructuredLogger.access in a follow-up WI; + // the core contract (trace.id, client.ip, user.name, + // url.original, url.path, http.request.method, + // http.response.status_code, event.duration, + // user_agent.original) is covered by RequestContext today. + final RequestContext rctx = buildRequestContext( + span, clientIp, userName, line); + StructuredLogger.access().forRequest(rctx) + .status(response.status().code()) + .duration(duration) + .log(); } // Add traceparent response header for downstream correlation @@ -217,5 +214,34 @@ public CompletableFuture response( MDC.remove(EcsMdc.REPO_TYPE); }); } + + /** + * Build a {@link RequestContext} for the WI-03 {@link StructuredLogger} + * access tier. The slice still maintains MDC directly (for legacy call + * sites that read {@link MDC}); this method just assembles the same fields + * into the immutable envelope the Tier-1 builder expects. + */ + private RequestContext buildRequestContext( + final SpanContext span, + final String clientIp, + final String userName, + final RequestLine line + ) { + return new RequestContext( + span.traceId(), + /* transactionId */ null, + span.spanId(), + /* httpRequestId */ null, + userName == null ? "anonymous" : userName, + clientIp, + /* userAgent */ null, + this.repoName, + this.repoType, + RequestContext.ArtifactRef.EMPTY, + line.uri().toString(), + line.uri().getPath(), + com.auto1.pantera.http.context.Deadline.in(java.time.Duration.ofSeconds(30)) + ); + } } diff --git a/pantera-core/src/main/java/com/auto1/pantera/http/trace/MdcPropagation.java b/pantera-core/src/main/java/com/auto1/pantera/http/trace/MdcPropagation.java deleted file mode 100644 index e3258d42f..000000000 --- a/pantera-core/src/main/java/com/auto1/pantera/http/trace/MdcPropagation.java +++ /dev/null @@ -1,446 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.http.trace; - -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletableFuture; -import java.util.function.BiConsumer; -import java.util.function.BiFunction; -import java.util.function.Consumer; -import java.util.function.Function; -import java.util.function.Supplier; -import org.slf4j.MDC; - -/** - * Captures the current SLF4J MDC (trace.id, span.id, client.ip, etc.) - * and restores it inside a callback running on a different thread. - * - *

    MDC is backed by {@code ThreadLocal}, so state set on the Vert.x - * event loop thread is NOT visible on worker threads used by - * {@code executeBlocking}. Without this utility, logs emitted from - * inside a blocking auth call would be missing all request-scoped - * fields.

    - * - *

    Usage: - *

    {@code
    - * ctx.vertx().executeBlocking(
    - *     MdcPropagation.withMdc(() -> auth.user(name, pass)),
    - *     false
    - * );
    - * }
    - * The captured MDC is the one present at the call site (event loop - * thread). On the worker thread the captured map is installed before - * the callable runs and fully cleared after.

    - * - * @since 2.1.0 - */ -public final class MdcPropagation { - - private MdcPropagation() { - } - - /** - * Wrap a {@link Callable} so it restores the caller's MDC context - * on whichever thread it ends up running. - * - * @param callable The original callable - * @param Return type - * @return A callable that installs + clears MDC around the original - */ - public static Callable withMdc(final Callable callable) { - final Map captured = MDC.getCopyOfContextMap(); - return () -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - return callable.call(); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap a {@link Function} for use in {@code CompletableFuture.thenCompose()} so - * it restores the caller's MDC context on whichever thread the callback runs. - * - *

    Usage: - *

    {@code
    -     * future.thenCompose(MdcPropagation.withMdc(value -> {
    -     *     // MDC is restored here regardless of which thread executes this
    -     *     return anotherFuture(value);
    -     * }))
    -     * }
    - * - *

    The prior MDC state of the executing thread is saved and restored after - * the function completes, so pool threads are not polluted with request context. - * - * @param fn The original function - * @param Input type - * @param Output future type - * @return A function that installs + restores MDC around the original - */ - public static Function> withMdc( - final Function> fn - ) { - final Map captured = MDC.getCopyOfContextMap(); - return value -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - return fn.apply(value); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap a plain {@link Function} (for use in {@code CompletableFuture.thenApply()}) so - * it restores the caller's MDC context on whichever thread the callback runs. - * - *

    Usage: - *

    {@code
    -     * future.thenApply(MdcPropagation.withMdcFunction(value -> {
    -     *     // MDC is restored here
    -     *     return transform(value);
    -     * }))
    -     * }
    - * - * @param fn The original function - * @param Input type - * @param Return type - * @return A function that installs + restores MDC around the original - */ - public static Function withMdcFunction(final Function fn) { - final Map captured = MDC.getCopyOfContextMap(); - return value -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - return fn.apply(value); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap a {@link BiConsumer} for use in {@code CompletableFuture.whenComplete()} so - * it restores the caller's MDC context on whichever thread the callback runs. - * - *

    Usage: - *

    {@code
    -     * future.whenComplete(MdcPropagation.withMdc((result, err) -> {
    -     *     // MDC is restored here regardless of which thread executes this
    -     *     recordMetrics(result, err);
    -     * }))
    -     * }
    - * - *

    The prior MDC state of the executing thread is saved and restored after - * the consumer completes, so pool threads are not polluted with request context. - * - * @param consumer The original bi-consumer - * @param Result type - * @param Throwable type - * @return A bi-consumer that installs + restores MDC around the original - */ - public static BiConsumer withMdcBiConsumer( - final BiConsumer consumer - ) { - final Map captured = MDC.getCopyOfContextMap(); - return (result, err) -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - consumer.accept(result, err); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap a {@link BiFunction} for use in {@code CompletableFuture.handle()} so - * it restores the caller's MDC context on whichever thread the callback runs. - * - *

    Usage: - *

    {@code
    -     * future.handle(MdcPropagation.withMdcBiFunction((result, err) -> {
    -     *     // MDC is restored here regardless of which thread executes this
    -     *     return transform(result, err);
    -     * }))
    -     * }
    - * - *

    The prior MDC state of the executing thread is saved and restored after - * the function completes, so pool threads are not polluted with request context. - * - * @param fn The original bi-function - * @param Result type - * @param Throwable type - * @param Return type - * @return A bi-function that installs + restores MDC around the original - */ - public static BiFunction withMdcBiFunction( - final BiFunction fn - ) { - final Map captured = MDC.getCopyOfContextMap(); - return (result, err) -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - return fn.apply(result, err); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap a {@link Runnable} so it restores the caller's MDC context - * on whichever thread it ends up running. - * - * @param runnable The original runnable - * @return A runnable that installs + clears MDC around the original - */ - public static Runnable withMdc(final Runnable runnable) { - final Map captured = MDC.getCopyOfContextMap(); - return () -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - runnable.run(); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap an RxJava {@link io.reactivex.functions.Function} so it restores - * the caller's MDC context on whichever thread the operator executes. - * - *

    Use for RxJava {@code Maybe.map}, {@code Single.map}, - * {@code Flowable.map} and similar β€” whose continuations run on the - * thread that completed the upstream signal (often a worker pool with - * empty MDC).

    - * - * @param fn The original RxJava function - * @param Input type - * @param Return type - * @return A function that installs + restores MDC around the original - */ - public static io.reactivex.functions.Function withMdcRxFunction( - final io.reactivex.functions.Function fn - ) { - final Map captured = MDC.getCopyOfContextMap(); - return value -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - return fn.apply(value); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap a {@link Supplier} so it restores the caller's MDC context on - * whichever thread executes it. Primarily for use with - * {@link CompletableFuture#supplyAsync(Supplier, java.util.concurrent.Executor)}, - * whose lambdas otherwise run on worker threads with empty MDC. - * - * @param supplier The original supplier - * @param Return type - * @return A supplier that installs + restores MDC around the original - */ - public static Supplier withMdcSupplier(final Supplier supplier) { - final Map captured = MDC.getCopyOfContextMap(); - return () -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - return supplier.get(); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Wrap a single-argument {@link Consumer} so it restores the caller's MDC - * context on whichever thread the callback runs. - * - *

    Useful for RxJava subscribe callbacks and other async APIs that take a - * plain {@code Consumer} (e.g. onSuccess / onError lambdas). The captured - * MDC is the one present at wrap time; pool threads are not polluted after - * the consumer completes.

    - * - *

    Usage: - *

    {@code
    -     * observable.subscribe(
    -     *     MdcPropagation.withMdcConsumer(result -> logger.info("done: {}", result)),
    -     *     MdcPropagation.withMdcConsumer(error -> logger.warn("failed", error))
    -     * );
    -     * }
    - * - * @param consumer The original consumer - * @param Input type - * @return A consumer that installs + restores MDC around the original - */ - public static Consumer withMdcConsumer(final Consumer consumer) { - final Map captured = MDC.getCopyOfContextMap(); - return value -> { - final Map prior = MDC.getCopyOfContextMap(); - if (captured != null) { - MDC.setContextMap(captured); - } else { - MDC.clear(); - } - try { - consumer.accept(value); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - }; - } - - /** - * Capture the current MDC context into a detached map. - * - *

    Returns a defensive copy so callers can restore this snapshot later on - * a different thread via {@link #runWith(Map, Runnable)}. Returns - * {@code null} when the current MDC is null or empty so callers can treat - * the absence of context as a simple no-op.

    - * - *

    Use this when the async callback is a non-standard functional - * interface (e.g. RxJava {@code Consumer} in a 3-arg subscribe) and the - * pre-wrapped {@link #withMdcConsumer(Consumer)} overload doesn't match. - * Capture once at the boundary, then call {@link #runWith} inside the - * callback body.

    - * - * @return MDC snapshot, or {@code null} when the current MDC is empty - */ - public static Map capture() { - final Map ctx = MDC.getCopyOfContextMap(); - if (ctx == null || ctx.isEmpty()) { - return null; - } - return new HashMap<>(ctx); - } - - /** - * Run an action with the given MDC snapshot installed, restoring the - * thread's prior MDC when the action completes. - * - *

    Companion to {@link #capture()}. If {@code snapshot} is {@code null} - * the action is invoked without touching the current MDC.

    - * - *

    Usage: - *

    {@code
    -     * final Map snap = MdcPropagation.capture();
    -     * future.subscribe(result -> MdcPropagation.runWith(snap, () -> {
    -     *     logger.info("result received: {}", result);
    -     * }));
    -     * }
    - * - * @param snapshot MDC snapshot from {@link #capture()} (may be null) - * @param action Action to run with the snapshot installed - */ - public static void runWith(final Map snapshot, final Runnable action) { - if (snapshot == null) { - action.run(); - return; - } - final Map prior = MDC.getCopyOfContextMap(); - try { - MDC.setContextMap(snapshot); - action.run(); - } finally { - if (prior != null) { - MDC.setContextMap(prior); - } else { - MDC.clear(); - } - } - } -} diff --git a/pantera-core/src/main/java/com/auto1/pantera/metrics/EventsQueueMetrics.java b/pantera-core/src/main/java/com/auto1/pantera/metrics/EventsQueueMetrics.java new file mode 100644 index 000000000..d937722a9 --- /dev/null +++ b/pantera-core/src/main/java/com/auto1/pantera/metrics/EventsQueueMetrics.java @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.metrics; + +import com.auto1.pantera.http.log.EcsLogger; +import io.micrometer.core.instrument.Counter; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Metrics and WARN emission for dropped {@code ProxyArtifactEvent} / + * {@code ArtifactEvent} queue entries. + * + *

    Used by every adapter that writes to a bounded + * {@link java.util.concurrent.LinkedBlockingQueue} of metadata events (see + * {@code MetadataEventQueues#proxyEventQueues}). When the per-repo queue is + * saturated, {@link java.util.Queue#offer(Object)} returns {@code false}; + * call {@link #recordDropped(String)} to:

    + *
      + *
    1. emit one WARN at {@code com.auto1.pantera.scheduling.events} with + * {@code event.action=queue_overflow} / {@code event.outcome=failure} + * and {@code repository.name=<repo>} β€” no stack trace;
    2. + *
    3. bump the Micrometer counter {@code pantera.events.queue.dropped} + * tagged with {@code queue=<repo>} when + * {@link MicrometerMetrics} is initialised.
    4. + *
    + * + *

    The event itself is silently dropped β€” callers MUST NOT throw. This + * class exists so a background-queue back-pressure event cannot escape the + * serve path and cascade into 503 / 500 responses (forensic Β§1.6, Β§1.7 + * F1.1; WI-00 in v2.2 target-architecture doc).

    + * + * @since 2.1.4 + */ +public final class EventsQueueMetrics { + + /** + * Counter name β€” visible on the Prometheus scrape endpoint as + * {@code pantera_events_queue_dropped_total{queue="<repo>"}}. + */ + public static final String COUNTER_NAME = "pantera.events.queue.dropped"; + + /** + * Process-wide drop tally (across all repos). Exposed for diagnostic + * tests that run without a {@link io.micrometer.core.instrument.MeterRegistry}. + */ + private static final AtomicLong DROP_COUNT = new AtomicLong(); + + private EventsQueueMetrics() { + // utility + } + + /** + * Record one dropped metadata event for {@code repoName}. + * + *

    Emits a single WARN log line and increments the + * {@code pantera.events.queue.dropped{queue=<repoName>}} counter. + * Never throws.

    + * + * @param repoName Repository whose queue overflowed + */ + public static void recordDropped(final String repoName) { + final long total = DROP_COUNT.incrementAndGet(); + EcsLogger.warn("com.auto1.pantera.scheduling.events") + .message("event queue full β€” dropping event") + .eventCategory("process") + .eventAction("queue_overflow") + .eventOutcome("failure") + .field("repository.name", repoName == null ? "unknown" : repoName) + .field("pantera.events.queue.drop_count", total) + .log(); + if (MicrometerMetrics.isInitialized()) { + try { + Counter.builder(COUNTER_NAME) + .description( + "Metadata events dropped because the per-repo bounded" + + " ProxyArtifactEvent/ArtifactEvent queue was full" + ) + .tag("queue", repoName == null ? "unknown" : repoName) + .register(MicrometerMetrics.getInstance().getRegistry()) + .increment(); + } catch (final RuntimeException ignored) { + // metrics registration must never escape the serve path + } + } + } + + /** + * Cumulative count of dropped events across all repos since JVM start. + * Used by tests to assert that a drop actually happened. + * + * @return Monotonic drop total + */ + public static long dropCount() { + return DROP_COUNT.get(); + } +} diff --git a/pantera-core/src/main/java/com/auto1/pantera/scheduling/JobDataRegistry.java b/pantera-core/src/main/java/com/auto1/pantera/scheduling/JobDataRegistry.java index 771b262d2..b98b5374f 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/scheduling/JobDataRegistry.java +++ b/pantera-core/src/main/java/com/auto1/pantera/scheduling/JobDataRegistry.java @@ -10,6 +10,8 @@ */ package com.auto1.pantera.scheduling; +import com.auto1.pantera.http.log.EcsLogger; +import com.auto1.pantera.http.misc.ConfigDefaults; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -28,11 +30,29 @@ * In a clustered setup, each node maintains its own registry. Since Quartz * ensures a given trigger fires on only one node at a time, the node that * scheduled the job always has the data in its registry. + *

    + * A defensive sanity cap ({@value #DEFAULT_MAX_ENTRIES} entries by default, + * overridable via {@code PANTERA_JOB_DATA_REGISTRY_MAX}) is enforced. The + * cap is NOT a hard limit β€” {@link #register} still accepts the entry so + * jobs never silently drop β€” but an overflow crosses a loud error log + * which flags a scheduler-side ref leak for operators. * * @since 1.20.13 */ public final class JobDataRegistry { + /** + * Default sanity cap for registered entries. + */ + private static final int DEFAULT_MAX_ENTRIES = 10_000; + + /** + * Sanity cap resolved from env / sysprop / default at class-load time. + */ + private static final int MAX_ENTRIES = ConfigDefaults.getInt( + "PANTERA_JOB_DATA_REGISTRY_MAX", DEFAULT_MAX_ENTRIES + ); + /** * In-memory store for non-serializable job data. */ @@ -51,6 +71,20 @@ private JobDataRegistry() { * @param value Runtime object (Queue, Consumer, etc.) */ public static void register(final String key, final Object value) { + final int size = DATA.size(); + if (size >= MAX_ENTRIES) { + EcsLogger.error("com.auto1.pantera.scheduling") + .message("JobDataRegistry overflow β€” scheduler bug leaking refs") + .eventCategory("process") + .eventAction("job_data_overflow") + .eventOutcome("failure") + .field("entry.count", size) + .field( + "key.prefix", + key.substring(0, Math.min(32, key.length())) + ) + .log(); + } DATA.put(key, value); } diff --git a/pantera-core/src/main/java/com/auto1/pantera/scheduling/RepositoryEvents.java b/pantera-core/src/main/java/com/auto1/pantera/scheduling/RepositoryEvents.java index f11ce1c7f..d8ca082b1 100644 --- a/pantera-core/src/main/java/com/auto1/pantera/scheduling/RepositoryEvents.java +++ b/pantera-core/src/main/java/com/auto1/pantera/scheduling/RepositoryEvents.java @@ -66,7 +66,7 @@ public void addUploadEventByKey(final Key key, final long size, final Headers headers) { final String aname = formatArtifactName(key); final String version = detectFileVersion(this.rtype, aname); - this.queue.add( + this.queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent( this.rtype, this.rname, new Login(headers).getValue(), aname, version, size @@ -81,7 +81,7 @@ this.rtype, this.rname, new Login(headers).getValue(), */ public void addDeleteEventByKey(final Key key) { final String aname = formatArtifactName(key); - this.queue.add( + this.queue.add( // ok: unbounded ConcurrentLinkedDeque (ArtifactEvent queue) new ArtifactEvent(this.rtype, this.rname, aname, RepositoryEvents.VERSION) ); } diff --git a/pantera-core/src/test/java/com/auto1/pantera/audit/AuditActionTest.java b/pantera-core/src/test/java/com/auto1/pantera/audit/AuditActionTest.java new file mode 100644 index 000000000..d6b6a78e4 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/audit/AuditActionTest.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.audit; + +import java.util.Arrays; +import java.util.Set; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Smoke tests for the closed {@link AuditAction} enum. Guards the set of + * audit-event values against accidental expansion β€” adding a new variant to + * the audit stream is a compliance / retention-policy decision, not a routine + * code change, so we assert the exact membership. + */ +final class AuditActionTest { + + @Test + @DisplayName("AuditAction has exactly four members per Β§10.4") + void hasExactlyFourMembers() { + MatcherAssert.assertThat(AuditAction.values().length, Matchers.is(4)); + } + + @Test + @DisplayName("AuditAction members are the four canonical event names") + void membersAreTheFourCanonicalNames() { + final Set actual = Set.of(AuditAction.values()); + final Set expected = Set.of( + AuditAction.ARTIFACT_PUBLISH, + AuditAction.ARTIFACT_DOWNLOAD, + AuditAction.ARTIFACT_DELETE, + AuditAction.RESOLUTION + ); + MatcherAssert.assertThat(actual, Matchers.equalTo(expected)); + } + + @Test + @DisplayName("Non-audit operational events are NOT present") + void doesNotContainOperationalEvents() { + final Set names = Set.of( + Arrays.stream(AuditAction.values()) + .map(Enum::name) + .toArray(String[]::new) + ); + MatcherAssert.assertThat(names, Matchers.not(Matchers.hasItem("CACHE_WRITE"))); + MatcherAssert.assertThat(names, Matchers.not(Matchers.hasItem("CACHE_INVALIDATE"))); + MatcherAssert.assertThat(names, Matchers.not(Matchers.hasItem("POOL_INIT"))); + } + + @Test + @DisplayName("valueOf round-trips every member (assertion against name drift)") + void valueOfRoundTripsAllMembers() { + for (final AuditAction a : AuditAction.values()) { + MatcherAssert.assertThat(AuditAction.valueOf(a.name()), Matchers.is(a)); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cache/GlobalCacheConfigGroupMetadataStaleTest.java b/pantera-core/src/test/java/com/auto1/pantera/cache/GlobalCacheConfigGroupMetadataStaleTest.java new file mode 100644 index 000000000..6775e474f --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cache/GlobalCacheConfigGroupMetadataStaleTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cache; + +import com.amihaiemil.eoyaml.Yaml; +import com.amihaiemil.eoyaml.YamlMapping; +import java.io.IOException; +import java.util.Optional; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link GlobalCacheConfig#groupMetadataStale()}. + * Verifies env β†’ YAML β†’ default resolution order. + * + * @since 2.2.0 + */ +final class GlobalCacheConfigGroupMetadataStaleTest { + + /** + * ConfigDefaults maps env-var {@code FOO_BAR_BAZ} to sysprop + * {@code foo.bar.baz}. We set sysprops in tests and clear them after. + */ + private static final String PROP_L1_SIZE = + "pantera.group.metadata.stale.l1.size"; + private static final String PROP_L1_TTL = + "pantera.group.metadata.stale.l1.ttl.seconds"; + private static final String PROP_L2_ENABLED = + "pantera.group.metadata.stale.l2.enabled"; + private static final String PROP_L2_TTL = + "pantera.group.metadata.stale.l2.ttl.seconds"; + private static final String PROP_L2_TIMEOUT = + "pantera.group.metadata.stale.l2.timeout.ms"; + + @BeforeEach + void setUp() { + GlobalCacheConfig.reset(); + clearSysProps(); + } + + @AfterEach + void tearDown() { + GlobalCacheConfig.reset(); + clearSysProps(); + } + + private void clearSysProps() { + System.clearProperty(PROP_L1_SIZE); + System.clearProperty(PROP_L1_TTL); + System.clearProperty(PROP_L2_ENABLED); + System.clearProperty(PROP_L2_TTL); + System.clearProperty(PROP_L2_TIMEOUT); + } + + @Test + void defaultValuesMatchConstants() { + GlobalCacheConfig.initialize(Optional.empty(), null); + final GlobalCacheConfig.GroupMetadataStaleConfig cfg = + GlobalCacheConfig.getInstance().groupMetadataStale(); + Assertions.assertEquals( + GlobalCacheConfig.DEFAULT_GROUP_METADATA_STALE_L1_MAX_SIZE, + cfg.l1MaxSize() + ); + Assertions.assertEquals( + GlobalCacheConfig.DEFAULT_GROUP_METADATA_STALE_L1_TTL_SECONDS, + cfg.l1TtlSeconds() + ); + Assertions.assertEquals( + GlobalCacheConfig.DEFAULT_GROUP_METADATA_STALE_L2_ENABLED, + cfg.l2Enabled() + ); + Assertions.assertEquals( + GlobalCacheConfig.DEFAULT_GROUP_METADATA_STALE_L2_TTL_SECONDS, + cfg.l2TtlSeconds() + ); + Assertions.assertEquals( + GlobalCacheConfig.DEFAULT_GROUP_METADATA_STALE_L2_TIMEOUT_MS, + cfg.l2TimeoutMs() + ); + } + + @Test + void yamlOverridesDefaults() throws IOException { + final String yaml = String.join("\n", + "caches:", + " group-metadata-stale:", + " l1:", + " maxSize: 42", + " ttlSeconds: 1234", + " l2:", + " enabled: false", + " ttlSeconds: 60", + " timeoutMs: 250" + ); + final YamlMapping caches = Yaml.createYamlInput(yaml).readYamlMapping() + .yamlMapping("caches"); + GlobalCacheConfig.initialize(Optional.empty(), caches); + final GlobalCacheConfig.GroupMetadataStaleConfig cfg = + GlobalCacheConfig.getInstance().groupMetadataStale(); + Assertions.assertEquals(42, cfg.l1MaxSize()); + Assertions.assertEquals(1234, cfg.l1TtlSeconds()); + Assertions.assertFalse(cfg.l2Enabled()); + Assertions.assertEquals(60, cfg.l2TtlSeconds()); + Assertions.assertEquals(250, cfg.l2TimeoutMs()); + } + + @Test + void envVarTrumpsYamlAndDefault() throws IOException { + final String yaml = String.join("\n", + "caches:", + " group-metadata-stale:", + " l1:", + " maxSize: 42", + " ttlSeconds: 1234" + ); + final YamlMapping caches = Yaml.createYamlInput(yaml).readYamlMapping() + .yamlMapping("caches"); + // Sysprop shape = lowercased env-var with '_' -> '.'. + System.setProperty(PROP_L1_SIZE, "999"); + System.setProperty(PROP_L1_TTL, "5555"); + System.setProperty(PROP_L2_ENABLED, "false"); + System.setProperty(PROP_L2_TTL, "777"); + System.setProperty(PROP_L2_TIMEOUT, "88"); + GlobalCacheConfig.initialize(Optional.empty(), caches); + final GlobalCacheConfig.GroupMetadataStaleConfig cfg = + GlobalCacheConfig.getInstance().groupMetadataStale(); + Assertions.assertEquals(999, cfg.l1MaxSize()); + Assertions.assertEquals(5555, cfg.l1TtlSeconds()); + Assertions.assertFalse(cfg.l2Enabled()); + Assertions.assertEquals(777, cfg.l2TtlSeconds()); + Assertions.assertEquals(88, cfg.l2TimeoutMs()); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/cache/CooldownCacheInflightLeakTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/cache/CooldownCacheInflightLeakTest.java new file mode 100644 index 000000000..9b8c95749 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/cache/CooldownCacheInflightLeakTest.java @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.cache; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.lang.reflect.Field; +import java.time.Duration; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests that the CooldownCache inflight map is properly cleaned up + * on exceptional completion, cancellation, and timeout. + * + * @since 2.2.0 + */ +final class CooldownCacheInflightLeakTest { + + private CooldownCache cache; + + @BeforeEach + void setUp() { + this.cache = new CooldownCache(10_000, Duration.ofHours(24), null); + } + + @Test + void inflightMapEmptyAfterExceptionalCompletion() throws Exception { + // Submit a query that fails with an exception + final CompletableFuture result = this.cache.isBlocked( + "test-repo", "artifact", "1.0.0", + () -> CompletableFuture.failedFuture( + new RuntimeException("db query failed") + ) + ); + + // Wait for the future to complete (exceptionally) + assertThrows(ExecutionException.class, result::get); + + // The inflight map must be empty after the exception + final ConcurrentMap inflight = getInflightMap(this.cache); + assertThat( + "Inflight map should be empty after exceptional completion", + inflight.size(), equalTo(0) + ); + } + + @Test + void inflightMapEmptyAfterCancellation() throws Exception { + // Submit a query that blocks indefinitely + final CompletableFuture blocker = new CompletableFuture<>(); + final CompletableFuture result = this.cache.isBlocked( + "test-repo", "artifact", "2.0.0", + () -> blocker + ); + + // Cancel the query + result.cancel(true); + + // Let the cancellation propagate + Thread.sleep(50); + + // The inflight map must be empty after cancellation + final ConcurrentMap inflight = getInflightMap(this.cache); + assertThat( + "Inflight map should be empty after cancellation", + inflight.size(), equalTo(0) + ); + } + + @Test + void inflightMapEmptyAfterSuccessfulCompletion() throws Exception { + // Submit a normal query + final CompletableFuture result = this.cache.isBlocked( + "test-repo", "artifact", "3.0.0", + () -> CompletableFuture.completedFuture(true) + ); + + result.get(); + + final ConcurrentMap inflight = getInflightMap(this.cache); + assertThat( + "Inflight map should be empty after successful completion", + inflight.size(), equalTo(0) + ); + } + + @Test + void inflightMapEmptyAfterTimeout() throws Exception { + // Submit a query that never completes (the orTimeout safety net should fire) + final CompletableFuture neverCompletes = new CompletableFuture<>(); + final CompletableFuture result = this.cache.isBlocked( + "test-repo", "artifact", "4.0.0", + () -> neverCompletes + ); + + // The future should eventually time out (30s orTimeout in production, + // but we verify the inflight entry is removed on timeout) + // For test speed, cancel after a brief wait to simulate timeout behaviour + result.orTimeout(100, java.util.concurrent.TimeUnit.MILLISECONDS); + + try { + result.get(500, java.util.concurrent.TimeUnit.MILLISECONDS); + } catch (final ExecutionException | TimeoutException ignored) { + // Expected + } + + // Let cleanup propagate + Thread.sleep(50); + + final ConcurrentMap inflight = getInflightMap(this.cache); + assertThat( + "Inflight map should be empty after timeout", + inflight.size(), equalTo(0) + ); + } + + /** + * Reflectively access the inflight map for assertions. + */ + @SuppressWarnings("unchecked") + private static ConcurrentMap getInflightMap(final CooldownCache cache) + throws Exception { + final Field field = CooldownCache.class.getDeclaredField("inflight"); + field.setAccessible(true); + return (ConcurrentMap) field.get(cache); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/cache/CooldownCachePreWarmTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/cache/CooldownCachePreWarmTest.java new file mode 100644 index 000000000..d4db75e44 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/cache/CooldownCachePreWarmTest.java @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.cache; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.metadata.FilteredMetadataCache; +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import com.auto1.pantera.cooldown.metadata.MetadataFilterService; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +/** + * Verifies that MetadataFilterService pre-warms CooldownCache L1 + * with release dates extracted from metadata (H1). + * + * After metadata parse, versions older than the cooldown period + * should be immediately available in L1 as allowed (false), without + * requiring a DB/Valkey round-trip. + * + * @since 2.2.0 + */ +final class CooldownCachePreWarmTest { + + private CooldownCache cooldownCache; + private MetadataFilterService service; + private AtomicInteger dbQueryCount; + + @BeforeEach + void setUp() { + this.cooldownCache = new CooldownCache(10_000, Duration.ofHours(24), null); + this.dbQueryCount = new AtomicInteger(0); + final CooldownSettings settings = new CooldownSettings(true, Duration.ofDays(7)); + this.service = new MetadataFilterService( + new NoopTestCooldownService(), + settings, + this.cooldownCache, + new FilteredMetadataCache(), + ForkJoinPool.commonPool(), + 50 + ); + } + + @Test + void preWarmsL1WithOldReleaseDatesFromMetadata() throws Exception { + // Versions: 1.0.0 released 30 days ago (old, should be pre-warmed as allowed), + // 2.0.0 released 3 days ago (within cooldown, should NOT be pre-warmed) + final Map releaseDates = new HashMap<>(); + releaseDates.put("1.0.0", Instant.now().minus(Duration.ofDays(30))); + releaseDates.put("2.0.0", Instant.now().minus(Duration.ofDays(3))); + + final PreWarmParser parser = new PreWarmParser( + Arrays.asList("1.0.0", "2.0.0"), "2.0.0", releaseDates + ); + final SimpleFilter filter = new SimpleFilter(); + final SimpleRewriter rewriter = new SimpleRewriter(); + + // Run metadata filtering -- this should trigger pre-warming + this.service.filterMetadata( + "npm", "test-repo", "test-pkg", + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, + Optional.of(new NoopInspector()) + ).get(); + + // Now check: looking up 1.0.0 in CooldownCache should be an L1 hit (pre-warmed) + final CompletableFuture result = this.cooldownCache.isBlocked( + "test-repo", "test-pkg", "1.0.0", + () -> { + this.dbQueryCount.incrementAndGet(); + return CompletableFuture.completedFuture(false); + } + ); + assertThat("1.0.0 should be allowed (pre-warmed from metadata)", result.get(), equalTo(false)); + assertThat("DB should NOT be queried for pre-warmed version", this.dbQueryCount.get(), equalTo(0)); + } + + @Test + void doesNotPreWarmFreshVersions() throws Exception { + // Version released 2 days ago -- within 7-day cooldown, should NOT be pre-warmed + final Map releaseDates = new HashMap<>(); + releaseDates.put("1.0.0", Instant.now().minus(Duration.ofDays(2))); + + final PreWarmParser parser = new PreWarmParser( + Collections.singletonList("1.0.0"), "1.0.0", releaseDates + ); + + this.service.filterMetadata( + "npm", "test-repo", "fresh-pkg", + "raw".getBytes(StandardCharsets.UTF_8), + parser, new SimpleFilter(), new SimpleRewriter(), + Optional.of(new NoopInspector()) + ).get(); + + // Looking up 1.0.0 should trigger a DB query (not pre-warmed) + final CompletableFuture result = this.cooldownCache.isBlocked( + "test-repo", "fresh-pkg", "1.0.0", + () -> { + this.dbQueryCount.incrementAndGet(); + return CompletableFuture.completedFuture(false); + } + ); + result.get(); + assertThat("DB should be queried for fresh version", this.dbQueryCount.get(), equalTo(1)); + } + + // -- test doubles ------------------------------------------------------- + + /** + * Parser that returns configurable release dates via extractReleaseDates(). + */ + private static final class PreWarmParser implements MetadataParser> { + private final List versions; + private final String latest; + private final Map releaseDates; + + PreWarmParser( + final List versions, + final String latest, + final Map releaseDates + ) { + this.versions = versions; + this.latest = latest; + this.releaseDates = releaseDates; + } + + @Override + public List parse(final byte[] bytes) { + return this.versions; + } + + @Override + public List extractVersions(final List metadata) { + return metadata; + } + + @Override + public Optional getLatestVersion(final List metadata) { + return Optional.ofNullable(this.latest); + } + + @Override + public String contentType() { + return "application/json"; + } + + @Override + public Map extractReleaseDates(final List metadata) { + return this.releaseDates; + } + } + + private static final class SimpleFilter implements MetadataFilter> { + @Override + public List filter(final List metadata, final Set blocked) { + return metadata.stream().filter(v -> !blocked.contains(v)).collect(Collectors.toList()); + } + + @Override + public List updateLatest(final List metadata, final String latest) { + return metadata; + } + } + + private static final class SimpleRewriter implements MetadataRewriter> { + @Override + public byte[] rewrite(final List metadata) { + return String.join(",", metadata).getBytes(StandardCharsets.UTF_8); + } + + @Override + public String contentType() { + return "application/json"; + } + } + + private static final class NoopInspector implements CooldownInspector { + @Override + public CompletableFuture> releaseDate(final String artifact, final String version) { + return CompletableFuture.completedFuture(Optional.empty()); + } + + @Override + public CompletableFuture> dependencies( + final String artifact, final String version + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + private static final class NoopTestCooldownService implements CooldownService { + @Override + public CompletableFuture evaluate( + final CooldownRequest request, final CooldownInspector inspector + ) { + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + + @Override + public CompletableFuture unblock( + String t, String n, String a, String v, String actor + ) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture unblockAll(String t, String n, String actor) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture> activeBlocks(String t, String n) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/config/CooldownAdapterRegistryTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/config/CooldownAdapterRegistryTest.java new file mode 100644 index 000000000..12bfcf899 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/config/CooldownAdapterRegistryTest.java @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.config; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import com.auto1.pantera.cooldown.metadata.MetadataRequestDetector; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import com.auto1.pantera.cooldown.response.CooldownResponseFactory; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; + +/** + * Tests for {@link CooldownAdapterRegistry}. + * + *

    Verifies bundle registration, lookup by type, alias registration, + * and behaviour for missing types.

    + * + * @since 2.2.0 + */ +final class CooldownAdapterRegistryTest { + + private CooldownAdapterRegistry registry; + + @BeforeEach + void setUp() { + this.registry = CooldownAdapterRegistry.instance(); + this.registry.clear(); + } + + @Test + void registerAndRetrieveBundle() { + final CooldownAdapterBundle> bundle = goBundle(); + this.registry.register("go", bundle); + + final Optional> found = this.registry.get("go"); + assertThat("Bundle should be found", found.isPresent(), is(true)); + assertThat("Parser should match", found.get().parser(), is(bundle.parser())); + assertThat("Filter should match", found.get().filter(), is(bundle.filter())); + assertThat("Rewriter should match", found.get().rewriter(), is(bundle.rewriter())); + assertThat("Detector should match", found.get().detector(), is(bundle.detector())); + assertThat("ResponseFactory should match", + found.get().responseFactory(), is(bundle.responseFactory())); + } + + @Test + void missingTypeReturnsEmpty() { + final Optional> found = this.registry.get("nonexistent"); + assertThat("Missing type returns empty", found.isPresent(), is(false)); + } + + @Test + void registerWithAliases() { + final CooldownAdapterBundle> bundle = goBundle(); + this.registry.register("maven", bundle, "gradle"); + + assertThat("Primary type resolved", + this.registry.get("maven").isPresent(), is(true)); + assertThat("Alias resolved", + this.registry.get("gradle").isPresent(), is(true)); + assertThat("Alias maps to same bundle", + this.registry.get("gradle").get().parser(), + is(this.registry.get("maven").get().parser())); + } + + @Test + void registeredTypesIncludesAllTypesAndAliases() { + this.registry.register("go", goBundle()); + this.registry.register("maven", goBundle(), "gradle"); + this.registry.register("npm", goBundle()); + + assertThat(this.registry.registeredTypes(), + containsInAnyOrder("go", "maven", "gradle", "npm")); + } + + @Test + void clearRemovesAll() { + this.registry.register("go", goBundle()); + this.registry.register("npm", goBundle()); + this.registry.clear(); + + assertThat("After clear, go should be absent", + this.registry.get("go").isPresent(), is(false)); + assertThat("After clear, npm should be absent", + this.registry.get("npm").isPresent(), is(false)); + assertThat("After clear, registered types should be empty", + this.registry.registeredTypes().isEmpty(), is(true)); + } + + @Test + void overwriteExistingType() { + final CooldownAdapterBundle> bundle1 = goBundle(); + final CooldownAdapterBundle> bundle2 = goBundle(); + this.registry.register("go", bundle1); + this.registry.register("go", bundle2); + + assertThat("Overwritten bundle should be the latest", + this.registry.get("go").get().parser(), is(bundle2.parser())); + } + + @Test + void bundleComponentsAreAccessible() { + final CooldownAdapterBundle> bundle = goBundle(); + this.registry.register("go", bundle); + + final CooldownAdapterBundle found = this.registry.get("go").get(); + assertThat("parser is not null", found.parser(), is(notNullValue())); + assertThat("filter is not null", found.filter(), is(notNullValue())); + assertThat("rewriter is not null", found.rewriter(), is(notNullValue())); + assertThat("detector is not null", found.detector(), is(notNullValue())); + assertThat("responseFactory is not null", found.responseFactory(), is(notNullValue())); + } + + @Test + void bundleRejectsNullComponents() { + try { + new CooldownAdapterBundle<>(null, new StubFilter(), new StubRewriter(), + new StubDetector(), new StubResponseFactory()); + assertThat("Should have thrown NPE", false, is(true)); + } catch (final NullPointerException expected) { + // expected + } + } + + // --- Helper: create a Go-style bundle --- + + private static CooldownAdapterBundle> goBundle() { + return new CooldownAdapterBundle<>( + new StubParser(), + new StubFilter(), + new StubRewriter(), + new StubDetector(), + new StubResponseFactory() + ); + } + + // --- Stub implementations --- + + private static final class StubParser implements MetadataParser> { + @Override + public List parse(final byte[] bytes) { + final String body = new String(bytes, StandardCharsets.UTF_8); + final List versions = new ArrayList<>(); + for (final String line : body.split("\n")) { + final String trimmed = line.trim(); + if (!trimmed.isEmpty()) { + versions.add(trimmed); + } + } + return versions; + } + + @Override + public List extractVersions(final List metadata) { + return metadata; + } + + @Override + public Optional getLatestVersion(final List metadata) { + return metadata.isEmpty() ? Optional.empty() + : Optional.of(metadata.get(metadata.size() - 1)); + } + + @Override + public String contentType() { + return "text/plain"; + } + } + + private static final class StubFilter implements MetadataFilter> { + @Override + public List filter(final List metadata, final Set blocked) { + final List result = new ArrayList<>(metadata); + result.removeAll(blocked); + return result; + } + + @Override + public List updateLatest(final List metadata, final String newLatest) { + return metadata; + } + } + + private static final class StubRewriter implements MetadataRewriter> { + @Override + public byte[] rewrite(final List metadata) { + return String.join("\n", metadata).getBytes(StandardCharsets.UTF_8); + } + + @Override + public String contentType() { + return "text/plain"; + } + } + + private static final class StubDetector implements MetadataRequestDetector { + @Override + public boolean isMetadataRequest(final String path) { + return path != null && path.endsWith("/@v/list"); + } + + @Override + public Optional extractPackageName(final String path) { + if (!this.isMetadataRequest(path)) { + return Optional.empty(); + } + return Optional.of(path.substring(1, path.length() - "/@v/list".length())); + } + + @Override + public String repoType() { + return "go"; + } + } + + private static final class StubResponseFactory implements CooldownResponseFactory { + @Override + public Response forbidden(final CooldownBlock block) { + return ResponseBuilder.forbidden() + .textBody("blocked by cooldown") + .build(); + } + + @Override + public String repoType() { + return "go"; + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/CooldownSettingsTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/config/CooldownSettingsTest.java similarity index 98% rename from pantera-core/src/test/java/com/auto1/pantera/cooldown/CooldownSettingsTest.java rename to pantera-core/src/test/java/com/auto1/pantera/cooldown/config/CooldownSettingsTest.java index 263594fd7..e08b4c5cc 100644 --- a/pantera-core/src/test/java/com/auto1/pantera/cooldown/CooldownSettingsTest.java +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/config/CooldownSettingsTest.java @@ -8,7 +8,7 @@ * * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ -package com.auto1.pantera.cooldown; +package com.auto1.pantera.cooldown.config; import org.junit.jupiter.api.Test; diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServicePerformanceTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServicePerformanceTest.java index 50e904f15..e9a98055a 100644 --- a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServicePerformanceTest.java +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServicePerformanceTest.java @@ -10,13 +10,13 @@ */ package com.auto1.pantera.cooldown.metadata; -import com.auto1.pantera.cooldown.CooldownCache; -import com.auto1.pantera.cooldown.CooldownDependency; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResult; -import com.auto1.pantera.cooldown.CooldownService; -import com.auto1.pantera.cooldown.CooldownSettings; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.config.CooldownSettings; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; @@ -39,7 +39,7 @@ import static org.hamcrest.Matchers.lessThan; /** - * Performance tests for {@link CooldownMetadataServiceImpl}. + * Performance tests for {@link MetadataFilterService}. * *

    Performance requirements:

    *
      @@ -67,7 +67,7 @@ final class CooldownMetadataServicePerformanceTest { */ private static final int WARMUP_ITERATIONS = 10; - private CooldownMetadataServiceImpl service; + private MetadataFilterService service; private FastCooldownService cooldownService; @BeforeEach @@ -77,7 +77,7 @@ void setUp() { final CooldownCache cooldownCache = new CooldownCache(); // Use fresh metadata cache for each test to measure actual filtering time final FilteredMetadataCache metadataCache = new FilteredMetadataCache(); - this.service = new CooldownMetadataServiceImpl( + this.service = new MetadataFilterService( this.cooldownService, settings, cooldownCache, @@ -283,10 +283,10 @@ public CompletableFuture evaluate( final String key = request.artifact() + "@" + request.version(); if (this.blockedVersions.contains(key)) { return CompletableFuture.completedFuture( - CooldownResult.blocked(new com.auto1.pantera.cooldown.CooldownBlock( + CooldownResult.blocked(new com.auto1.pantera.cooldown.api.CooldownBlock( request.repoType(), request.repoName(), request.artifact(), request.version(), - com.auto1.pantera.cooldown.CooldownReason.FRESH_RELEASE, + com.auto1.pantera.cooldown.api.CooldownReason.FRESH_RELEASE, Instant.now(), Instant.now().plus(Duration.ofDays(7)), java.util.Collections.emptyList() )) @@ -308,7 +308,7 @@ public CompletableFuture unblockAll(String repoType, String repoName, Stri } @Override - public CompletableFuture> activeBlocks( + public CompletableFuture> activeBlocks( String repoType, String repoName ) { return CompletableFuture.completedFuture(java.util.Collections.emptyList()); diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCacheSWRTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCacheSWRTest.java new file mode 100644 index 000000000..3d22b5856 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/FilteredMetadataCacheSWRTest.java @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.metadata; + +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +/** + * Tests stale-while-revalidate behaviour on FilteredMetadataCache (H3). + * + * On TTL expiry the cache should: + * 1. Return stale bytes immediately (no blocking wait). + * 2. Trigger background re-evaluation. + * 3. On subsequent get(), return fresh bytes once revalidation completes. + * + * @since 2.2.0 + */ +final class FilteredMetadataCacheSWRTest { + + @Test + void returnsStaleBytesThenFreshAfterRevalidation() throws Exception { + // Build cache with a generous Caffeine maxSize / TTL so entries live + // long enough in the underlying Caffeine cache for our test to read them. + // The dynamic CacheEntry.isExpired() check drives SWR, not Caffeine eviction. + final FilteredMetadataCache cache = new FilteredMetadataCache( + 1_000, Duration.ofHours(1), Duration.ofHours(1), null + ); + + final byte[] staleData = "stale-metadata".getBytes(StandardCharsets.UTF_8); + final byte[] freshData = "fresh-metadata".getBytes(StandardCharsets.UTF_8); + final AtomicInteger loadCount = new AtomicInteger(0); + final CountDownLatch revalidationStarted = new CountDownLatch(1); + + // Step 1: Populate cache with an entry that will expire in 100 ms + cache.get("npm", "repo", "pkg", + () -> { + loadCount.incrementAndGet(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.withBlockedVersions( + staleData, + Instant.now().plus(Duration.ofMillis(100)), + Duration.ofHours(1) + ) + ); + } + ).get(); + + assertThat("Initial load should fire", loadCount.get(), equalTo(1)); + + // Step 2: Wait for the entry to expire (blockedUntil has passed) + Thread.sleep(150); + + // Step 3: Next get() should return STALE bytes immediately + // and trigger background revalidation + final byte[] swrResult = cache.get("npm", "repo", "pkg", + () -> { + loadCount.incrementAndGet(); + revalidationStarted.countDown(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.noBlockedVersions( + freshData, Duration.ofHours(1) + ) + ); + } + ).get(); + + assertThat( + "SWR should return stale bytes immediately", + swrResult, equalTo(staleData) + ); + + // Step 4: Wait for background revalidation to complete + final boolean started = revalidationStarted.await(2, TimeUnit.SECONDS); + assertThat("Background revalidation should fire", started, equalTo(true)); + + // Give the background future a moment to write back to cache + Thread.sleep(50); + + // Step 5: Subsequent get() should return FRESH bytes + final byte[] freshResult = cache.get("npm", "repo", "pkg", + () -> { + loadCount.incrementAndGet(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.noBlockedVersions( + "should-not-be-used".getBytes(StandardCharsets.UTF_8), + Duration.ofHours(1) + ) + ); + } + ).get(); + + assertThat( + "After revalidation, should return fresh bytes", + freshResult, equalTo(freshData) + ); + + // Total loads: 1 initial + 1 background revalidation = 2 + // (the 3rd get() should be a cache hit, not a load) + assertThat("Total loads should be 2 (initial + revalidation)", loadCount.get(), equalTo(2)); + } + + @Test + void doesNotDuplicateRevalidation() throws Exception { + final FilteredMetadataCache cache = new FilteredMetadataCache( + 1_000, Duration.ofHours(1), Duration.ofHours(1), null + ); + + final byte[] staleData = "stale".getBytes(StandardCharsets.UTF_8); + final byte[] freshData = "fresh".getBytes(StandardCharsets.UTF_8); + final AtomicInteger loadCount = new AtomicInteger(0); + final CountDownLatch loaderGate = new CountDownLatch(1); + + // Populate with short-lived entry + cache.get("npm", "repo", "pkg2", + () -> { + loadCount.incrementAndGet(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.withBlockedVersions( + staleData, + Instant.now().plus(Duration.ofMillis(100)), + Duration.ofHours(1) + ) + ); + } + ).get(); + + assertThat(loadCount.get(), equalTo(1)); + + // Wait for expiry + Thread.sleep(150); + + // Use a slow loader to simulate async revalidation. + // The first call returns stale + starts background reload. + // The second call (while reload is in-flight) should also return stale + // and NOT start a duplicate reload. + final CompletableFuture swr1 = cache.get("npm", "repo", "pkg2", + () -> { + loadCount.incrementAndGet(); + // Slow loader: waits for gate + return CompletableFuture.supplyAsync(() -> { + try { + loaderGate.await(2, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return FilteredMetadataCache.CacheEntry.noBlockedVersions( + freshData, Duration.ofHours(1) + ); + }); + } + ); + + // First call returns stale immediately + assertThat("First SWR call returns stale", swr1.get(), equalTo(staleData)); + + // Second call while revalidation is in-flight + final CompletableFuture swr2 = cache.get("npm", "repo", "pkg2", + () -> { + loadCount.incrementAndGet(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.noBlockedVersions(freshData, Duration.ofHours(1)) + ); + } + ); + + // Second call also returns stale (background still running) + assertThat("Second SWR call returns stale", swr2.get(), equalTo(staleData)); + + // Release the background loader + loaderGate.countDown(); + + // Wait for background completion + Thread.sleep(100); + + // Only 2 loads total: 1 initial + 1 revalidation (no duplicate) + assertThat("Only one background revalidation should fire", loadCount.get(), equalTo(2)); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/MetadataFilterServiceIntegrationTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/MetadataFilterServiceIntegrationTest.java new file mode 100644 index 000000000..5af0ea892 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/MetadataFilterServiceIntegrationTest.java @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.metadata; + +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.containsString; + +/** + * Integration test for {@link MetadataFilterService} end-to-end with + * Go adapter components (simplest metadata format: plain-text version list). + * + *

      Wires MetadataFilterService with a fake CooldownService that blocks + * versions {@code v1.0.0} and {@code v2.0.0}. Verifies:

      + *
        + *
      • Filtered bytes don't contain blocked versions
      • + *
      • Cache hit on second call (verify via parse count)
      • + *
      • SWR behaviour: expire, get stale, background re-evaluates
      • + *
      + * + * @since 2.2.0 + */ +final class MetadataFilterServiceIntegrationTest { + + /** + * Go-style metadata: one version per line. + */ + private static final String GO_METADATA = + "v0.1.0\nv0.2.0\nv1.0.0\nv1.1.0\nv2.0.0\nv2.1.0\nv3.0.0"; + + private BlockingCooldownService cooldownService; + private CooldownSettings settings; + private CooldownCache cooldownCache; + private FilteredMetadataCache metadataCache; + private MetadataFilterService service; + + @BeforeEach + void setUp() { + this.cooldownService = new BlockingCooldownService(); + this.cooldownService.block("test-pkg", "v1.0.0"); + this.cooldownService.block("test-pkg", "v2.0.0"); + + this.settings = new CooldownSettings(true, Duration.ofDays(7)); + this.cooldownCache = new CooldownCache(); + this.metadataCache = new FilteredMetadataCache(); + this.service = new MetadataFilterService( + this.cooldownService, + this.settings, + this.cooldownCache, + this.metadataCache, + ForkJoinPool.commonPool(), + 50 + ); + } + + @Test + void filteredBytesDoNotContainBlockedVersions() throws Exception { + final GoParser parser = new GoParser(); + final GoFilter filter = new GoFilter(); + final GoRewriter rewriter = new GoRewriter(); + + final byte[] result = this.service.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, + Optional.of(new NoopInspector()) + ).get(); + + final String output = new String(result, StandardCharsets.UTF_8); + assertThat("v1.0.0 must be filtered out", output, not(containsString("v1.0.0"))); + assertThat("v2.0.0 must be filtered out", output, not(containsString("v2.0.0"))); + assertThat("v0.1.0 must remain", output, containsString("v0.1.0")); + assertThat("v0.2.0 must remain", output, containsString("v0.2.0")); + assertThat("v1.1.0 must remain", output, containsString("v1.1.0")); + assertThat("v2.1.0 must remain", output, containsString("v2.1.0")); + assertThat("v3.0.0 must remain", output, containsString("v3.0.0")); + } + + @Test + void cacheHitOnSecondCall() throws Exception { + final GoParser parser = new GoParser(); + final GoFilter filter = new GoFilter(); + final GoRewriter rewriter = new GoRewriter(); + final NoopInspector inspector = new NoopInspector(); + + // First call β€” cache miss + this.service.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + final int firstParseCount = parser.parseCount; + assertThat("First call must parse", firstParseCount, greaterThanOrEqualTo(1)); + + // Second call β€” cache hit + this.service.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + assertThat("Second call must hit cache (no re-parse)", + parser.parseCount, equalTo(firstParseCount)); + } + + @Test + void swrReturnsStaleAndRevalidatesInBackground() throws Exception { + // Use a very short blockedUntil so the cache entry expires quickly + final Instant shortBlockedUntil = Instant.now().plus(Duration.ofMillis(100)); + final ShortExpiryCooldownService shortService = + new ShortExpiryCooldownService(shortBlockedUntil); + shortService.block("test-pkg", "v1.0.0"); + + final MetadataFilterService swrService = new MetadataFilterService( + shortService, + this.settings, + new CooldownCache(), + new FilteredMetadataCache(), + ForkJoinPool.commonPool(), + 50 + ); + + final GoParser parser = new GoParser(); + final GoFilter filter = new GoFilter(); + final GoRewriter rewriter = new GoRewriter(); + final NoopInspector inspector = new NoopInspector(); + + // First call β€” caches with short TTL + final byte[] result1 = swrService.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + final String output1 = new String(result1, StandardCharsets.UTF_8); + assertThat("v1.0.0 should be blocked initially", output1, not(containsString("v1.0.0"))); + final int firstParseCount = parser.parseCount; + + // Wait for logical expiry + Thread.sleep(150); + + // Expire the block in the service + shortService.unblock("go", "go-repo", "test-pkg", "v1.0.0", "admin"); + + // Second call β€” SWR returns stale bytes immediately + final byte[] result2 = swrService.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + // Stale response is served immediately (may still have v1.0.0 filtered) + assertThat("SWR returns bytes", result2.length > 0, equalTo(true)); + + // Wait for background revalidation + Thread.sleep(300); + + // Third call β€” should return fresh data + swrService.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + // Parser should have been called again by background revalidation + assertThat("Background revalidation should re-parse", + parser.parseCount, greaterThanOrEqualTo(firstParseCount + 1)); + } + + @Test + void invalidateForcesCacheMiss() throws Exception { + final GoParser parser = new GoParser(); + final GoFilter filter = new GoFilter(); + final GoRewriter rewriter = new GoRewriter(); + final NoopInspector inspector = new NoopInspector(); + + // Populate cache + this.service.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + final int count1 = parser.parseCount; + + // Invalidate + this.service.invalidate("go", "go-repo", "test-pkg"); + + // Next call must re-parse + this.service.filterMetadata( + "go", "go-repo", "test-pkg", + GO_METADATA.getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + assertThat("Post-invalidation must re-parse", + parser.parseCount, equalTo(count1 + 1)); + } + + @Test + void disabledCooldownReturnsRawMetadata() throws Exception { + final CooldownSettings disabled = new CooldownSettings(false, Duration.ofDays(7)); + final MetadataFilterService disabledService = new MetadataFilterService( + this.cooldownService, disabled, this.cooldownCache, + this.metadataCache, ForkJoinPool.commonPool(), 50 + ); + + final byte[] raw = GO_METADATA.getBytes(StandardCharsets.UTF_8); + final byte[] result = disabledService.filterMetadata( + "go", "go-repo", "test-pkg", raw, + new GoParser(), new GoFilter(), new GoRewriter(), + Optional.empty() + ).get(); + + assertThat("Disabled cooldown returns raw bytes", result, equalTo(raw)); + } + + // --- Minimal Go adapter implementations for integration testing --- + + private static final class GoParser implements MetadataParser> { + volatile int parseCount; + + @Override + public List parse(final byte[] bytes) { + this.parseCount++; + if (bytes == null || bytes.length == 0) { + return Collections.emptyList(); + } + final String body = new String(bytes, StandardCharsets.UTF_8); + final String[] lines = body.split("\n", -1); + final List versions = new ArrayList<>(lines.length); + for (final String line : lines) { + final String trimmed = line.trim(); + if (!trimmed.isEmpty()) { + versions.add(trimmed); + } + } + return versions; + } + + @Override + public List extractVersions(final List metadata) { + return metadata == null ? Collections.emptyList() : List.copyOf(metadata); + } + + @Override + public Optional getLatestVersion(final List metadata) { + if (metadata == null || metadata.isEmpty()) { + return Optional.empty(); + } + return Optional.of(metadata.get(metadata.size() - 1)); + } + + @Override + public String contentType() { + return "text/plain"; + } + } + + private static final class GoFilter implements MetadataFilter> { + @Override + public List filter(final List metadata, final Set blockedVersions) { + if (blockedVersions.isEmpty()) { + return metadata; + } + final List result = new ArrayList<>(); + for (final String v : metadata) { + if (!blockedVersions.contains(v)) { + result.add(v); + } + } + return result; + } + + @Override + public List updateLatest(final List metadata, final String newLatest) { + return metadata; + } + } + + private static final class GoRewriter implements MetadataRewriter> { + @Override + public byte[] rewrite(final List metadata) { + if (metadata == null || metadata.isEmpty()) { + return new byte[0]; + } + return String.join("\n", metadata).getBytes(StandardCharsets.UTF_8); + } + + @Override + public String contentType() { + return "text/plain"; + } + } + + // --- Test doubles --- + + private static final class BlockingCooldownService implements CooldownService { + private final Set blocked = new HashSet<>(); + + void block(final String pkg, final String version) { + this.blocked.add(pkg + "@" + version); + } + + @Override + public CompletableFuture evaluate( + final CooldownRequest request, final CooldownInspector inspector + ) { + final String key = request.artifact() + "@" + request.version(); + if (this.blocked.contains(key)) { + return CompletableFuture.completedFuture( + CooldownResult.blocked(new CooldownBlock( + request.repoType(), request.repoName(), + request.artifact(), request.version(), + CooldownReason.FRESH_RELEASE, Instant.now(), + Instant.now().plus(Duration.ofDays(7)), + Collections.emptyList() + )) + ); + } + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + + @Override + public CompletableFuture unblock( + String rt, String rn, String a, String v, String actor + ) { + this.blocked.remove(a + "@" + v); + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture unblockAll(String rt, String rn, String actor) { + this.blocked.clear(); + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture> activeBlocks(String rt, String rn) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + private static final class ShortExpiryCooldownService implements CooldownService { + private final Set blocked = new HashSet<>(); + private final Instant blockedUntil; + + ShortExpiryCooldownService(final Instant blockedUntil) { + this.blockedUntil = blockedUntil; + } + + void block(final String pkg, final String version) { + this.blocked.add(pkg + "@" + version); + } + + @Override + public CompletableFuture evaluate( + final CooldownRequest request, final CooldownInspector inspector + ) { + final String key = request.artifact() + "@" + request.version(); + if (this.blocked.contains(key)) { + return CompletableFuture.completedFuture( + CooldownResult.blocked(new CooldownBlock( + request.repoType(), request.repoName(), + request.artifact(), request.version(), + CooldownReason.FRESH_RELEASE, Instant.now(), + this.blockedUntil, Collections.emptyList() + )) + ); + } + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + + @Override + public CompletableFuture unblock( + String rt, String rn, String a, String v, String actor + ) { + this.blocked.remove(a + "@" + v); + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture unblockAll(String rt, String rn, String actor) { + this.blocked.clear(); + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture> activeBlocks(String rt, String rn) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + private static final class NoopInspector implements CooldownInspector { + @Override + public CompletableFuture> releaseDate(String artifact, String version) { + return CompletableFuture.completedFuture(Optional.empty()); + } + + @Override + public CompletableFuture> dependencies( + String artifact, String version + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServiceImplTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/MetadataFilterServiceTest.java similarity index 91% rename from pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServiceImplTest.java rename to pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/MetadataFilterServiceTest.java index 1e02c4068..dfc2d2cea 100644 --- a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/CooldownMetadataServiceImplTest.java +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/MetadataFilterServiceTest.java @@ -10,13 +10,13 @@ */ package com.auto1.pantera.cooldown.metadata; -import com.auto1.pantera.cooldown.CooldownCache; -import com.auto1.pantera.cooldown.CooldownInspector; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResult; -import com.auto1.pantera.cooldown.CooldownService; -import com.auto1.pantera.cooldown.CooldownSettings; -import com.auto1.pantera.cooldown.NoopCooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.impl.NoopCooldownService; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -39,13 +39,13 @@ import static org.junit.jupiter.api.Assertions.assertThrows; /** - * Tests for {@link CooldownMetadataServiceImpl}. + * Tests for {@link MetadataFilterService}. * * @since 1.0 */ -final class CooldownMetadataServiceImplTest { +final class MetadataFilterServiceTest { - private CooldownMetadataServiceImpl service; + private MetadataFilterService service; private TestCooldownService cooldownService; private CooldownSettings settings; private CooldownCache cooldownCache; @@ -57,7 +57,7 @@ void setUp() { this.settings = new CooldownSettings(true, Duration.ofDays(7)); this.cooldownCache = new CooldownCache(); this.metadataCache = new FilteredMetadataCache(); - this.service = new CooldownMetadataServiceImpl( + this.service = new MetadataFilterService( this.cooldownService, this.settings, this.cooldownCache, @@ -164,7 +164,7 @@ void throwsWhenAllVersionsBlocked() { void returnsRawMetadataWhenCooldownDisabled() throws Exception { // Disable cooldown final CooldownSettings disabledSettings = new CooldownSettings(false, Duration.ofDays(7)); - final CooldownMetadataServiceImpl disabledService = new CooldownMetadataServiceImpl( + final MetadataFilterService disabledService = new MetadataFilterService( this.cooldownService, disabledSettings, this.cooldownCache, @@ -369,13 +369,13 @@ void unblockAllInvalidatesAllPackagesInRepo() throws Exception { void cacheExpiresWhenBlockExpiresAndReturnsUnblockedVersion() throws Exception { // Block version with very short expiry (100ms) final Instant shortBlockedUntil = Instant.now().plus(Duration.ofMillis(100)); - + // Use a custom cooldown service that returns short blockedUntil - final ShortExpiryTestCooldownService shortExpiryService = + final ShortExpiryTestCooldownService shortExpiryService = new ShortExpiryTestCooldownService(shortBlockedUntil); shortExpiryService.blockVersion("test-pkg", "3.0.0"); - final CooldownMetadataServiceImpl shortExpiryMetadataService = new CooldownMetadataServiceImpl( + final MetadataFilterService shortExpiryMetadataService = new MetadataFilterService( shortExpiryService, this.settings, new CooldownCache(), @@ -399,7 +399,7 @@ void cacheExpiresWhenBlockExpiresAndReturnsUnblockedVersion() throws Exception { parser, filter, rewriter, Optional.of(inspector) ).get(); - assertThat("3.0.0 should be blocked initially", + assertThat("3.0.0 should be blocked initially", filter.lastBlockedVersions.contains("3.0.0"), equalTo(true)); final int firstParseCount = parser.parseCount; @@ -410,18 +410,30 @@ void cacheExpiresWhenBlockExpiresAndReturnsUnblockedVersion() throws Exception { // Simulate block expiry in cooldown service shortExpiryService.expireBlock("test-pkg", "3.0.0"); - // Second request after expiry - cache should have expired, 3.0.0 should be allowed + // Second request after expiry β€” SWR returns stale bytes immediately + // and triggers background re-evaluation. The stale response still has + // 3.0.0 filtered, but the background revalidation runs asynchronously. shortExpiryMetadataService.filterMetadata( "npm", "test-repo", "test-pkg", "raw".getBytes(StandardCharsets.UTF_8), parser, filter, rewriter, Optional.of(inspector) ).get(); - // Should have re-parsed (cache expired based on blockedUntil) - assertThat("Should re-parse after cache expiry", + // Wait for background revalidation to complete + Thread.sleep(200); + + // Third request β€” should return fresh data with 3.0.0 allowed + shortExpiryMetadataService.filterMetadata( + "npm", "test-repo", "test-pkg", + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + // Background revalidation should have re-parsed + assertThat("Should re-parse via SWR background revalidation", parser.parseCount, equalTo(firstParseCount + 1)); - // 3.0.0 should no longer be blocked - assertThat("3.0.0 should not be blocked after expiry", + // 3.0.0 should no longer be blocked after revalidation + assertThat("3.0.0 should not be blocked after expiry + revalidation", filter.lastBlockedVersions.contains("3.0.0"), equalTo(false)); } @@ -454,12 +466,12 @@ public CompletableFuture evaluate( final String key = request.artifact() + "@" + request.version(); if (this.blockedVersions.contains(key)) { return CompletableFuture.completedFuture( - CooldownResult.blocked(new com.auto1.pantera.cooldown.CooldownBlock( + CooldownResult.blocked(new com.auto1.pantera.cooldown.api.CooldownBlock( request.repoType(), request.repoName(), request.artifact(), request.version(), - com.auto1.pantera.cooldown.CooldownReason.FRESH_RELEASE, + com.auto1.pantera.cooldown.api.CooldownReason.FRESH_RELEASE, Instant.now(), this.blockedUntil, // Use configurable blockedUntil java.util.Collections.emptyList() @@ -484,7 +496,7 @@ public CompletableFuture unblockAll(String repoType, String repoName, Stri } @Override - public CompletableFuture> activeBlocks( + public CompletableFuture> activeBlocks( String repoType, String repoName ) { return CompletableFuture.completedFuture(java.util.Collections.emptyList()); @@ -506,12 +518,12 @@ public CompletableFuture evaluate( final String key = request.artifact() + "@" + request.version(); if (this.blockedVersions.contains(key)) { return CompletableFuture.completedFuture( - CooldownResult.blocked(new com.auto1.pantera.cooldown.CooldownBlock( + CooldownResult.blocked(new com.auto1.pantera.cooldown.api.CooldownBlock( request.repoType(), request.repoName(), request.artifact(), request.version(), - com.auto1.pantera.cooldown.CooldownReason.FRESH_RELEASE, + com.auto1.pantera.cooldown.api.CooldownReason.FRESH_RELEASE, Instant.now(), Instant.now().plus(Duration.ofDays(7)), java.util.Collections.emptyList() @@ -536,7 +548,7 @@ public CompletableFuture unblockAll(String repoType, String repoName, Stri } @Override - public CompletableFuture> activeBlocks( + public CompletableFuture> activeBlocks( String repoType, String repoName ) { return CompletableFuture.completedFuture(java.util.Collections.emptyList()); @@ -633,7 +645,7 @@ public CompletableFuture> releaseDate(final String artifact, f } @Override - public CompletableFuture> dependencies( + public CompletableFuture> dependencies( final String artifact, final String version ) { return CompletableFuture.completedFuture(java.util.Collections.emptyList()); diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/PolicyChangeInvalidationTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/PolicyChangeInvalidationTest.java new file mode 100644 index 000000000..8578a7c51 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/PolicyChangeInvalidationTest.java @@ -0,0 +1,411 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.metadata; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +/** + * Tests that a cooldown policy change (e.g. duration 30d β†’ 5d) via the + * config API invalidates {@link FilteredMetadataCache} entries for the + * affected repo type, and that the next query recomputes the result + * against the new policy rather than returning stale filtered bytes. + * + *

      The policy-change path in {@code CooldownHandler.updateConfig} does + * two things when the cooldown duration changes:

      + *
        + *
      1. {@code settings.update(...)} β€” mutate the live {@link CooldownSettings} + * so future evaluations use the new duration.
      2. + *
      3. {@code metadataService.clearAll()} β€” wipe every cached filtered + * metadata response, since the duration shift can flip a version's + * block decision.
      4. + *
      + * This test exercises both steps and asserts the recomputed result + * reflects the new duration. + * + * @since 1.0 + */ +final class PolicyChangeInvalidationTest { + + /** + * Package under test. + */ + private static final String PKG = "test-pkg"; + + private MetadataFilterService service; + private PolicyAwareCooldownService cooldownService; + private CooldownSettings settings; + private CooldownCache cooldownCache; + private FilteredMetadataCache metadataCache; + + @BeforeEach + void setUp() { + this.cooldownService = new PolicyAwareCooldownService(); + // Start with a 30-day cooldown window. + this.settings = new CooldownSettings(true, Duration.ofDays(30)); + this.cooldownService.bindSettings(this.settings); + this.cooldownCache = new CooldownCache(); + this.metadataCache = new FilteredMetadataCache(); + this.service = new MetadataFilterService( + this.cooldownService, + this.settings, + this.cooldownCache, + this.metadataCache, + ForkJoinPool.commonPool(), + 50 + ); + } + + /** + * Scenario: Package has two versions. + * - 1.0.0 released 60 days ago (old) + * - 1.1.0 released 10 days ago (recent) + * Initial duration X = 30d β†’ 1.1.0 is blocked, result contains only 1.0.0. + * Change duration Y = 5d via settings.update() + clearAll() β†’ 1.1.0 is + * no longer within the cooldown window and must appear in the fresh result. + */ + @Test + void policyShorteningInvalidatesCacheAndIncludesPreviouslyBlockedVersion() throws Exception { + final Instant now = Instant.now(); + final Map releaseDates = new HashMap<>(); + releaseDates.put("1.0.0", now.minus(Duration.ofDays(60))); + releaseDates.put("1.1.0", now.minus(Duration.ofDays(10))); + + final TestMetadataParser parser = new TestMetadataParser( + Arrays.asList("1.0.0", "1.1.0"), + "1.1.0", + releaseDates + ); + final TestMetadataFilter filter = new TestMetadataFilter(); + final TestMetadataRewriter rewriter = new TestMetadataRewriter(); + final TestCooldownInspector inspector = new TestCooldownInspector(releaseDates); + + // --- T1: duration = 30d β†’ 1.1.0 (10 days old) is blocked --- + final byte[] resultT1 = this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + assertThat( + "under 30d policy, 1.1.0 (10 days old) must be blocked", + filter.lastBlockedVersions.contains("1.1.0"), equalTo(true) + ); + assertThat( + "result must NOT contain 1.1.0 under 30d policy", + new String(resultT1, StandardCharsets.UTF_8).contains("1.1.0"), equalTo(false) + ); + assertThat( + "result must contain the older 1.0.0", + new String(resultT1, StandardCharsets.UTF_8).contains("1.0.0"), equalTo(true) + ); + + final int parseCountAfterT1 = parser.parseCount; + + // --- Cache hit check: a second query with the OLD policy returns cached bytes --- + this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + assertThat( + "second query under same policy must hit the cache (no re-parse)", + parser.parseCount, equalTo(parseCountAfterT1) + ); + + // --- Policy change via config API: shrink to 5d, mirror CooldownHandler.updateConfig --- + this.settings.update(true, Duration.ofDays(5), Collections.emptyMap()); + this.service.clearAll(); + + // --- T2: duration = 5d β†’ 1.1.0 (10 days old) is past cooldown and must appear --- + final byte[] resultT2 = this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + assertThat( + "after clearAll, the cached entry must have been invalidated and re-parsed", + parser.parseCount, equalTo(parseCountAfterT1 + 1) + ); + assertThat( + "under 5d policy, 1.1.0 (10 days old) must NOT be blocked", + filter.lastBlockedVersions.contains("1.1.0"), equalTo(false) + ); + assertThat( + "blocked versions set must be empty under the relaxed policy", + filter.lastBlockedVersions.size(), equalTo(0) + ); + final String bodyT2 = new String(resultT2, StandardCharsets.UTF_8); + assertThat( + "result must now contain 1.1.0 after policy shortening", + bodyT2.contains("1.1.0"), equalTo(true) + ); + assertThat( + "result must still contain 1.0.0", + bodyT2.contains("1.0.0"), equalTo(true) + ); + } + + /** + * Scenario: Policy lengthening β€” version that was allowed at X now blocked at Y. + * Initial 5d β†’ 1.1.0 (released 10d ago) is allowed. + * Change to 30d β†’ 1.1.0 must now be blocked; cache must be invalidated. + */ + @Test + void policyLengtheningInvalidatesCacheAndBlocksPreviouslyAllowedVersion() throws Exception { + // Rebuild with 5d policy + this.settings.update(true, Duration.ofDays(5), Collections.emptyMap()); + + final Instant now = Instant.now(); + final Map releaseDates = new HashMap<>(); + releaseDates.put("1.0.0", now.minus(Duration.ofDays(60))); + releaseDates.put("1.1.0", now.minus(Duration.ofDays(10))); + + final TestMetadataParser parser = new TestMetadataParser( + Arrays.asList("1.0.0", "1.1.0"), + "1.1.0", + releaseDates + ); + final TestMetadataFilter filter = new TestMetadataFilter(); + final TestMetadataRewriter rewriter = new TestMetadataRewriter(); + final TestCooldownInspector inspector = new TestCooldownInspector(releaseDates); + + final byte[] resultT1 = this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + assertThat( + "under 5d policy, 1.1.0 (10d old) is allowed", + filter.lastBlockedVersions.contains("1.1.0"), equalTo(false) + ); + assertThat( + "result contains 1.1.0 under 5d policy", + new String(resultT1, StandardCharsets.UTF_8).contains("1.1.0"), equalTo(true) + ); + + final int parseCountAfterT1 = parser.parseCount; + + // Policy change: lengthen to 30d, invalidate all. + this.settings.update(true, Duration.ofDays(30), Collections.emptyMap()); + this.service.clearAll(); + + final byte[] resultT2 = this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + assertThat( + "clearAll forced a re-parse under the new policy", + parser.parseCount, equalTo(parseCountAfterT1 + 1) + ); + assertThat( + "under 30d policy, 1.1.0 (10d old) must be blocked", + filter.lastBlockedVersions.contains("1.1.0"), equalTo(true) + ); + assertThat( + "result no longer contains 1.1.0 after policy lengthening", + new String(resultT2, StandardCharsets.UTF_8).contains("1.1.0"), equalTo(false) + ); + } + + // --- Fakes --- + + /** + * Cooldown service whose block decision is derived from the live + * {@link CooldownSettings} β€” mimics production behavior where a + * policy change immediately affects future evaluations. + * A version is blocked iff its release date is within + * {@code settings.minimumAllowedAgeFor(repoType)} from now. + */ + private static final class PolicyAwareCooldownService implements CooldownService { + private final Map releaseDates = new HashMap<>(); + private CooldownSettings bound; + + void bindSettings(final CooldownSettings settings) { + this.bound = settings; + } + + @Override + public CompletableFuture evaluate( + final CooldownRequest request, + final CooldownInspector inspector + ) { + return inspector.releaseDate(request.artifact(), request.version()) + .thenApply(maybeDate -> { + if (maybeDate.isEmpty()) { + return CooldownResult.allowed(); + } + final Duration window = this.bound.minimumAllowedAgeFor(request.repoType()); + final Instant cutoff = Instant.now().minus(window); + if (maybeDate.get().isAfter(cutoff)) { + // Released within the cooldown window β†’ blocked. + final Instant blockedUntil = maybeDate.get().plus(window); + return CooldownResult.blocked(new CooldownBlock( + request.repoType(), + request.repoName(), + request.artifact(), + request.version(), + CooldownReason.FRESH_RELEASE, + Instant.now(), + blockedUntil, + Collections.emptyList() + )); + } + return CooldownResult.allowed(); + }); + } + + @Override + public CompletableFuture unblock( + String repoType, String repoName, String artifact, String version, String actor + ) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture unblockAll(String repoType, String repoName, String actor) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture> activeBlocks( + String repoType, String repoName + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + private static final class TestMetadataParser implements MetadataParser> { + private final List versions; + private final String latest; + private final Map releaseDates; + int parseCount = 0; + + TestMetadataParser( + final List versions, + final String latest, + final Map releaseDates + ) { + this.versions = versions; + this.latest = latest; + this.releaseDates = releaseDates; + } + + @Override + public List parse(final byte[] bytes) { + this.parseCount++; + return this.versions; + } + + @Override + public List extractVersions(final List metadata) { + return metadata; + } + + @Override + public Optional getLatestVersion(final List metadata) { + return Optional.ofNullable(this.latest); + } + + @Override + public Map extractReleaseDates(final List metadata) { + return new HashMap<>(this.releaseDates); + } + + @Override + public String contentType() { + return "application/json"; + } + } + + private static final class TestMetadataFilter implements MetadataFilter> { + Set lastBlockedVersions = new java.util.HashSet<>(); + String lastNewLatest; + + @Override + public List filter(final List metadata, final Set blockedVersions) { + this.lastBlockedVersions = blockedVersions; + return metadata.stream() + .filter(v -> !blockedVersions.contains(v)) + .collect(java.util.stream.Collectors.toList()); + } + + @Override + public List updateLatest(final List metadata, final String newLatest) { + this.lastNewLatest = newLatest; + return metadata; + } + } + + private static final class TestMetadataRewriter implements MetadataRewriter> { + @Override + public byte[] rewrite(final List metadata) { + return String.join(",", metadata).getBytes(StandardCharsets.UTF_8); + } + + @Override + public String contentType() { + return "application/json"; + } + } + + private static final class TestCooldownInspector implements CooldownInspector { + private final Map releaseDates; + + TestCooldownInspector(final Map releaseDates) { + this.releaseDates = new HashMap<>(releaseDates); + } + + @Override + public CompletableFuture> releaseDate( + final String artifact, final String version + ) { + return CompletableFuture.completedFuture( + Optional.ofNullable(this.releaseDates.get(version)) + ); + } + + @Override + public CompletableFuture> dependencies( + final String artifact, final String version + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/UpstreamPublishReEvalTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/UpstreamPublishReEvalTest.java new file mode 100644 index 000000000..03aa8e6e5 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/UpstreamPublishReEvalTest.java @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.metadata; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.atomic.AtomicReference; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +/** + * Tests that when upstream publishes a new version between two metadata + * queries, the second query (after cache invalidation) returns the new + * version in the filtered response. + * + *

      The cache in {@link FilteredMetadataCache} is keyed by + * {@code (repoType, repoName, packageName)} and stores the filtered + * response bytes, so a plain second request would return the cached + * T1 bytes. Production wires an explicit {@code invalidate(...)} hook + * (called on block/unblock events, and on upstream-driven re-fetch). + * This test exercises that hook directly: mutate the upstream fixture, + * invalidate, and assert the recomputed response includes the new + * version. No reliance on TTL expiry.

      + * + * @since 1.0 + */ +final class UpstreamPublishReEvalTest { + + private static final String PKG = "test-pkg"; + + private MetadataFilterService service; + private NoBlockCooldownService cooldownService; + private CooldownSettings settings; + private CooldownCache cooldownCache; + private FilteredMetadataCache metadataCache; + + @BeforeEach + void setUp() { + this.cooldownService = new NoBlockCooldownService(); + this.settings = new CooldownSettings(true, Duration.ofDays(7)); + this.cooldownCache = new CooldownCache(); + this.metadataCache = new FilteredMetadataCache(); + this.service = new MetadataFilterService( + this.cooldownService, + this.settings, + this.cooldownCache, + this.metadataCache, + ForkJoinPool.commonPool(), + 50 + ); + } + + /** + * Scenario: upstream state mutates between T1 and T2. + * T1 β†’ upstream has {1.0, 1.1}. Filtered response contains both. + * Simulate upstream publishing 1.2 (mutate the shared fixture). + * Call {@link MetadataFilterService#invalidate(String, String, String)} + * to mirror the production re-fetch path. + * T2 β†’ upstream has {1.0, 1.1, 1.2}. Filtered response contains all 3. + */ + @Test + void newUpstreamVersionAppearsAfterInvalidate() throws Exception { + // Shared mutable fixture: the "upstream" view of the package. + final AtomicReference upstream = new AtomicReference<>( + new UpstreamState( + new ArrayList<>(Arrays.asList("1.0.0", "1.1.0")), + "1.1.0", + // All versions old enough to NOT be blocked under the 7d policy. + Map.of( + "1.0.0", Instant.now().minus(Duration.ofDays(60)), + "1.1.0", Instant.now().minus(Duration.ofDays(30)) + ) + ) + ); + + final MutableUpstreamParser parser = new MutableUpstreamParser(upstream); + final TestMetadataFilter filter = new TestMetadataFilter(); + final TestMetadataRewriter rewriter = new TestMetadataRewriter(); + final TestCooldownInspector inspector = new TestCooldownInspector(upstream); + + // --- T1: upstream = {1.0.0, 1.1.0} --- + final byte[] resultT1 = this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw-T1".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + final String bodyT1 = new String(resultT1, StandardCharsets.UTF_8); + assertThat( + "T1 response contains 1.0.0", + bodyT1.contains("1.0.0"), equalTo(true) + ); + assertThat( + "T1 response contains 1.1.0", + bodyT1.contains("1.1.0"), equalTo(true) + ); + assertThat( + "T1 response does NOT yet contain unpublished 1.2.0", + bodyT1.contains("1.2.0"), equalTo(false) + ); + assertThat( + "T1: no versions blocked (all older than 7d)", + filter.lastBlockedVersions.isEmpty(), equalTo(true) + ); + final int parseCountAfterT1 = parser.parseCount; + + // --- Upstream publishes 1.2.0 (mutate the fixture) --- + final Map newDates = new HashMap<>(upstream.get().releaseDates); + newDates.put("1.2.0", Instant.now().minus(Duration.ofDays(30))); + upstream.set(new UpstreamState( + new ArrayList<>(Arrays.asList("1.0.0", "1.1.0", "1.2.0")), + "1.2.0", + newDates + )); + + // Sanity: a request WITHOUT invalidation would hit cache and NOT see 1.2.0 + // β€” verify the cache is doing its job before we invalidate. + final byte[] stale = this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw-T1".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + assertThat( + "without invalidation, cache returns stale T1 bytes (no 1.2.0)", + new String(stale, StandardCharsets.UTF_8).contains("1.2.0"), equalTo(false) + ); + assertThat( + "without invalidation, parser was NOT called again", + parser.parseCount, equalTo(parseCountAfterT1) + ); + + // --- Production re-eval trigger: invalidate after upstream change --- + this.service.invalidate("npm", "test-repo", PKG); + + // --- T2: upstream = {1.0.0, 1.1.0, 1.2.0} --- + final byte[] resultT2 = this.service.filterMetadata( + "npm", "test-repo", PKG, + "raw-T2".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + assertThat( + "invalidate forced a re-parse at T2", + parser.parseCount, equalTo(parseCountAfterT1 + 1) + ); + + final String bodyT2 = new String(resultT2, StandardCharsets.UTF_8); + assertThat( + "T2 response contains 1.0.0", + bodyT2.contains("1.0.0"), equalTo(true) + ); + assertThat( + "T2 response contains 1.1.0", + bodyT2.contains("1.1.0"), equalTo(true) + ); + assertThat( + "T2 response contains newly-published 1.2.0", + bodyT2.contains("1.2.0"), equalTo(true) + ); + assertThat( + "T2: last parsed version list size is 3", + parser.lastParsed.size(), equalTo(3) + ); + assertThat( + "T2: no versions blocked", + filter.lastBlockedVersions.isEmpty(), equalTo(true) + ); + } + + // --- Test fixtures --- + + /** + * Immutable snapshot of what "upstream" currently exposes for the package. + */ + private static final class UpstreamState { + final List versions; + final String latest; + final Map releaseDates; + + UpstreamState( + final List versions, + final String latest, + final Map releaseDates + ) { + this.versions = versions; + this.latest = latest; + this.releaseDates = releaseDates; + } + } + + /** + * Parser that reads from a shared {@link AtomicReference} each time + * it parses, simulating a fresh upstream fetch on every cache miss. + * The raw bytes parameter is ignored β€” this isolates the test from + * HTTP layer details and lets us mutate upstream state cleanly. + */ + private static final class MutableUpstreamParser implements MetadataParser> { + private final AtomicReference upstream; + int parseCount; + List lastParsed = Collections.emptyList(); + + MutableUpstreamParser(final AtomicReference upstream) { + this.upstream = upstream; + } + + @Override + public List parse(final byte[] bytes) { + this.parseCount++; + this.lastParsed = new ArrayList<>(this.upstream.get().versions); + return this.lastParsed; + } + + @Override + public List extractVersions(final List metadata) { + return metadata; + } + + @Override + public Optional getLatestVersion(final List metadata) { + return Optional.ofNullable(this.upstream.get().latest); + } + + @Override + public Map extractReleaseDates(final List metadata) { + return new HashMap<>(this.upstream.get().releaseDates); + } + + @Override + public String contentType() { + return "application/json"; + } + } + + /** + * Inspector that reads release dates from the same shared fixture β€” + * so an upstream mutation is visible to both the parser and the + * cooldown evaluation path. + */ + private static final class TestCooldownInspector implements CooldownInspector { + private final AtomicReference upstream; + + TestCooldownInspector(final AtomicReference upstream) { + this.upstream = upstream; + } + + @Override + public CompletableFuture> releaseDate( + final String artifact, final String version + ) { + return CompletableFuture.completedFuture( + Optional.ofNullable(this.upstream.get().releaseDates.get(version)) + ); + } + + @Override + public CompletableFuture> dependencies( + final String artifact, final String version + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + /** + * Cooldown service that never blocks β€” the upstream-publish scenario + * is about cache re-evaluation, not about block decisions. + */ + private static final class NoBlockCooldownService implements CooldownService { + @Override + public CompletableFuture evaluate( + final CooldownRequest request, + final CooldownInspector inspector + ) { + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + + @Override + public CompletableFuture unblock( + String repoType, String repoName, String artifact, String version, String actor + ) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture unblockAll(String repoType, String repoName, String actor) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture> activeBlocks( + String repoType, String repoName + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + private static final class TestMetadataFilter implements MetadataFilter> { + Set lastBlockedVersions = new java.util.HashSet<>(); + + @Override + public List filter(final List metadata, final Set blockedVersions) { + this.lastBlockedVersions = blockedVersions; + return metadata.stream() + .filter(v -> !blockedVersions.contains(v)) + .collect(java.util.stream.Collectors.toList()); + } + + @Override + public List updateLatest(final List metadata, final String newLatest) { + return metadata; + } + } + + private static final class TestMetadataRewriter implements MetadataRewriter> { + @Override + public byte[] rewrite(final List metadata) { + return String.join(",", metadata).getBytes(StandardCharsets.UTF_8); + } + + @Override + public String contentType() { + return "application/json"; + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/VersionEvaluationParallelTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/VersionEvaluationParallelTest.java new file mode 100644 index 000000000..77087600d --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/metadata/VersionEvaluationParallelTest.java @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.metadata; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; +import java.util.stream.Collectors; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.lessThan; + +/** + * Verifies that version evaluation runs in parallel (H2). + * 50 versions, all L1-cached, should complete well under 50 ms. + * + * @since 2.2.0 + */ +final class VersionEvaluationParallelTest { + + @Test + void fiftyVersionsAllCachedCompletesUnder50ms() throws Exception { + final int versionCount = 50; + final List versions = new ArrayList<>(versionCount); + for (int i = 0; i < versionCount; i++) { + versions.add(String.format("1.0.%d", i)); + } + + final CooldownCache cooldownCache = new CooldownCache(10_000, Duration.ofHours(24), null); + // Pre-warm all versions as allowed in L1 + for (final String version : versions) { + cooldownCache.put("test-repo", "test-pkg", version, false); + } + + final CooldownSettings settings = new CooldownSettings(true, Duration.ofDays(7)); + final MetadataFilterService service = new MetadataFilterService( + new FastCooldownService(), + settings, + cooldownCache, + new FilteredMetadataCache(), + ForkJoinPool.commonPool(), + versionCount + ); + + final SimpleParser parser = new SimpleParser(versions, versions.get(versions.size() - 1)); + final SimpleFilter filter = new SimpleFilter(); + final SimpleRewriter rewriter = new SimpleRewriter(); + final SimpleInspector inspector = new SimpleInspector(); + + // Warm up JIT + service.filterMetadata( + "npm", "test-repo", "warmup-pkg", + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + + // Invalidate the warmup entry + service.invalidate("npm", "test-repo", "warmup-pkg"); + + // Timed run + final long start = System.nanoTime(); + service.filterMetadata( + "npm", "test-repo", "test-pkg", + "raw".getBytes(StandardCharsets.UTF_8), + parser, filter, rewriter, Optional.of(inspector) + ).get(); + final long elapsedMs = (System.nanoTime() - start) / 1_000_000; + + assertThat( + String.format("50-version evaluation took %d ms, expected < 50 ms", elapsedMs), + elapsedMs, lessThan(50L) + ); + } + + // -- test doubles ------------------------------------------------------- + + private static final class FastCooldownService implements CooldownService { + @Override + public CompletableFuture evaluate( + final CooldownRequest request, final CooldownInspector inspector + ) { + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + + @Override + public CompletableFuture unblock(String t, String n, String a, String v, String actor) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture unblockAll(String t, String n, String actor) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture> activeBlocks(String t, String n) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + private static final class SimpleParser implements MetadataParser> { + private final List versions; + private final String latest; + + SimpleParser(final List versions, final String latest) { + this.versions = versions; + this.latest = latest; + } + + @Override + public List parse(final byte[] bytes) { + return this.versions; + } + + @Override + public List extractVersions(final List metadata) { + return metadata; + } + + @Override + public Optional getLatestVersion(final List metadata) { + return Optional.ofNullable(this.latest); + } + + @Override + public String contentType() { + return "application/json"; + } + } + + private static final class SimpleFilter implements MetadataFilter> { + @Override + public List filter(final List metadata, final Set blocked) { + return metadata.stream().filter(v -> !blocked.contains(v)).collect(Collectors.toList()); + } + + @Override + public List updateLatest(final List metadata, final String latest) { + return metadata; + } + } + + private static final class SimpleRewriter implements MetadataRewriter> { + @Override + public byte[] rewrite(final List metadata) { + return String.join(",", metadata).getBytes(StandardCharsets.UTF_8); + } + + @Override + public String contentType() { + return "application/json"; + } + } + + private static final class SimpleInspector implements CooldownInspector { + @Override + public CompletableFuture> releaseDate(final String artifact, final String version) { + return CompletableFuture.completedFuture(Optional.empty()); + } + + @Override + public CompletableFuture> dependencies( + final String artifact, final String version + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistryGetOrThrowTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistryGetOrThrowTest.java new file mode 100644 index 000000000..881cd0080 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistryGetOrThrowTest.java @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.response; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.sameInstance; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests for {@link CooldownResponseRegistry#getOrThrow(String)}. + * + * @since 2.2.0 + */ +final class CooldownResponseRegistryGetOrThrowTest { + + private CooldownResponseRegistry registry; + + @BeforeEach + void setUp() { + this.registry = CooldownResponseRegistry.instance(); + this.registry.clear(); + } + + @Test + void getOrThrow_returnsRegisteredFactory() { + final CooldownResponseFactory factory = new StubFactory("foo"); + this.registry.register("foo", factory); + assertThat(this.registry.getOrThrow("foo"), is(sameInstance(factory))); + } + + @Test + void getOrThrow_throwsWhenMissing() { + final IllegalStateException ex = assertThrows( + IllegalStateException.class, + () -> this.registry.getOrThrow("bar") + ); + assertThat(ex.getMessage(), containsString("bar")); + } + + /** + * Stub factory for testing registry wiring. + */ + private static final class StubFactory implements CooldownResponseFactory { + private final String type; + + StubFactory(final String type) { + this.type = type; + } + + @Override + public Response forbidden(final CooldownBlock block) { + return ResponseBuilder.forbidden() + .textBody("blocked") + .build(); + } + + @Override + public String repoType() { + return this.type; + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistryTest.java b/pantera-core/src/test/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistryTest.java new file mode 100644 index 000000000..25549f376 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/cooldown/response/CooldownResponseRegistryTest.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown.response; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.Collections; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.sameInstance; + +/** + * Tests for {@link CooldownResponseRegistry}. + * + * @since 2.2.0 + */ +final class CooldownResponseRegistryTest { + + private CooldownResponseRegistry registry; + + @BeforeEach + void setUp() { + this.registry = CooldownResponseRegistry.instance(); + this.registry.clear(); + } + + @Test + void registersAndRetrievesByRepoType() { + final CooldownResponseFactory factory = new StubFactory("maven"); + this.registry.register("maven", factory); + assertThat(this.registry.get("maven"), is(sameInstance(factory))); + } + + @Test + void returnsNullForUnregisteredType() { + assertThat(this.registry.get("unknown"), is(nullValue())); + } + + @Test + void registersFactoryWithAliases() { + final CooldownResponseFactory factory = new StubFactory("maven"); + this.registry.register(factory, "gradle"); + assertThat(this.registry.get("maven"), is(sameInstance(factory))); + assertThat(this.registry.get("gradle"), is(sameInstance(factory))); + } + + @Test + void gradleAliasReusesMavenFactory() { + final CooldownResponseFactory maven = new StubFactory("maven"); + this.registry.register(maven, "gradle"); + final CooldownResponseFactory resolved = this.registry.get("gradle"); + assertThat(resolved, is(notNullValue())); + assertThat(resolved.repoType(), equalTo("maven")); + } + + @Test + void registeredTypesIncludesAliases() { + this.registry.register(new StubFactory("maven"), "gradle"); + this.registry.register(new StubFactory("npm")); + assertThat( + this.registry.registeredTypes(), + containsInAnyOrder("maven", "gradle", "npm") + ); + } + + /** + * Stub factory for testing registry wiring. + */ + private static final class StubFactory implements CooldownResponseFactory { + private final String type; + + StubFactory(final String type) { + this.type = type; + } + + @Override + public Response forbidden(final CooldownBlock block) { + return ResponseBuilder.forbidden() + .textBody("blocked") + .build(); + } + + @Override + public String repoType() { + return this.type; + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/BaseCachedProxySliceDedupTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/BaseCachedProxySliceDedupTest.java new file mode 100644 index 000000000..7578b1d18 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/cache/BaseCachedProxySliceDedupTest.java @@ -0,0 +1,481 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.Storage; +import com.auto1.pantera.asto.cache.FromStorageCache; +import com.auto1.pantera.asto.memory.InMemoryStorage; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.http.rq.RqMethod; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.is; + +/** + * Regression-guard for the WI-post-05 SingleFlight migration of + * {@link BaseCachedProxySlice}. Preserves the three behaviors previously + * covered by {@code RequestDeduplicatorTest}: coalescing of concurrent loader + * invocations, propagation of the shared terminal signal to every caller, + * and independence of distinct keys. + * + *

      The tests exercise the migrated path end-to-end β€” {@code fetchAndCache β†’ + * singleFlight.load(key, cacheResponse) β†’ signalToResponse} β€” rather than + * calling the {@code SingleFlight} helper directly. Testing {@link + * SingleFlight} in isolation cannot catch a regression such as "the + * coalescer was removed from the cache-write loader path" (e.g. a future + * refactor that inlines {@code cacheResponse} back to per-call execution); + * testing {@link BaseCachedProxySlice}'s observable cache-write count can. + * + *

      The observable proxy for "loader invoked exactly once" is the number of + * primary-key {@code Storage.save(key, content)} calls. {@code cacheResponse} + * writes the primary artifact exactly once per invocation, so: + * + *

        + *
      • Pre-migration SIGNAL behavior (with a blocking loader covering the + * entire coalescing window): N concurrent callers β‡’ exactly 1 save.
      • + *
      • If the dedup is ever lost: N concurrent callers β‡’ N saves + * (one per caller's {@code cacheResponse}).
      • + *
      + * + *

      The coalescing window is kept open by blocking the first loader + * via a gate on {@code Storage.save}: every caller attaches to the + * SingleFlight entry before the leader's save ever completes. This matches + * the pattern the legacy {@code RequestDeduplicatorTest} used (a + * never-completing {@code blocker} future for the first call) β€” the + * observable entities are different (save count vs fetch count) but the + * coalescing semantics are the same. + * + * @since 2.2.0 + */ +final class BaseCachedProxySliceDedupTest { + + /** + * Primary artifact path shared by all callers in the coalescing tests. + */ + private static final String ARTIFACT_PATH = + "/com/example/foo/1.0/foo-1.0.jar"; + + /** + * Matching storage key for {@link #ARTIFACT_PATH}. Leading slash is + * dropped by {@code KeyFromPath}. + */ + private static final Key ARTIFACT_KEY = + new Key.From("com/example/foo/1.0/foo-1.0.jar"); + + /** + * N concurrent GETs for the same cacheable path must invoke the + * cache-write loader exactly once β€” observable as exactly one primary-key + * storage {@code save} call. All N callers must receive a 200 response + * after the leader's save completes. + * + *

      The test forces every caller to attach to the SingleFlight entry + * before the first loader can complete by blocking {@code Storage.save} + * on a gate. This is equivalent to the never-complete {@code blocker} + * pattern the legacy {@code RequestDeduplicatorTest} used. + */ + @Test + @Timeout(15) + void concurrentRequestsShareOneCacheWrite() throws Exception { + final int callers = 100; + final byte[] body = "deduplicated body".getBytes(); + final CountDownLatch saveGate = new CountDownLatch(1); + final Slice upstream = immediateOkUpstream(body); + final GatedCountingStorage storage = new GatedCountingStorage( + new InMemoryStorage(), saveGate, ARTIFACT_KEY + ); + final DedupTestSlice slice = new DedupTestSlice(upstream, storage); + final ExecutorService pool = Executors.newFixedThreadPool(callers, r -> { + final Thread t = new Thread(r, "dedup-caller"); + t.setDaemon(true); + return t; + }); + try { + final List> responses = new ArrayList<>(); + for (int i = 0; i < callers; i++) { + responses.add(CompletableFuture.supplyAsync(() -> slice.response( + new RequestLine(RqMethod.GET, ARTIFACT_PATH), + Headers.EMPTY, + Content.EMPTY + ).join(), pool)); + } + // Wait for the leader's save to arrive at the gate. This is the + // signal that every subsequent caller will coalesce onto the + // in-flight entry rather than starting a new loader. + assertThat( + "leader save must reach the gate within timeout", + storage.awaitSaveAttempted(5, TimeUnit.SECONDS), + is(true) + ); + // Give the rest of the callers time to settle onto the + // SingleFlight entry. The coalescing window is open until the + // leader's save completes. + waitForAttach(callers, Duration.ofSeconds(3)); + // Now release the gate so the leader's save completes. + saveGate.countDown(); + for (final CompletableFuture r : responses) { + final Response resp = r.get(5, TimeUnit.SECONDS); + assertThat( + "every caller must receive a 200", + resp.status(), + equalTo(RsStatus.OK) + ); + } + } finally { + pool.shutdownNow(); + } + assertThat( + "cache-write loader must be invoked exactly once for " + + callers + " concurrent callers sharing the same cache key" + + " β€” a count > 1 indicates the SingleFlight coalescer was" + + " bypassed on the cache-write path", + storage.saveCount(ARTIFACT_KEY), + equalTo(1) + ); + } + + /** + * Every caller in a coalesced burst must receive a 2xx. Under the SIGNAL + * protocol they all share the same terminal state, so nobody observes a + * 500/503 when the single underlying loader completes with SUCCESS. + */ + @Test + @Timeout(15) + void concurrentRequestsAllReceiveSuccessSignal() throws Exception { + final int callers = 50; + final byte[] body = "shared body".getBytes(); + final CountDownLatch saveGate = new CountDownLatch(1); + final Slice upstream = immediateOkUpstream(body); + final GatedCountingStorage storage = new GatedCountingStorage( + new InMemoryStorage(), saveGate, ARTIFACT_KEY + ); + final DedupTestSlice slice = new DedupTestSlice(upstream, storage); + final ExecutorService pool = Executors.newFixedThreadPool(callers, r -> { + final Thread t = new Thread(r, "dedup-signal"); + t.setDaemon(true); + return t; + }); + final AtomicInteger successes = new AtomicInteger(0); + try { + final List> responses = new ArrayList<>(); + for (int i = 0; i < callers; i++) { + responses.add(CompletableFuture.supplyAsync(() -> slice.response( + new RequestLine(RqMethod.GET, ARTIFACT_PATH), + Headers.EMPTY, + Content.EMPTY + ).join(), pool)); + } + assertThat( + storage.awaitSaveAttempted(5, TimeUnit.SECONDS), + is(true) + ); + waitForAttach(callers, Duration.ofSeconds(3)); + saveGate.countDown(); + for (final CompletableFuture r : responses) { + final Response resp = r.get(5, TimeUnit.SECONDS); + if (resp.status() == RsStatus.OK) { + successes.incrementAndGet(); + } + } + } finally { + pool.shutdownNow(); + } + assertThat( + "every caller must observe the SUCCESS signal", + successes.get(), + equalTo(callers) + ); + } + + /** + * Independent keys must NOT be coalesced: N concurrent requests for N + * distinct paths produce exactly N primary-key saves. + */ + @Test + @Timeout(10) + void distinctKeysAreNotCoalesced() throws Exception { + final int keys = 8; + final Slice upstream = immediateOkUpstream("body".getBytes()); + final CountingStorage storage = new CountingStorage(new InMemoryStorage()); + final DedupTestSlice slice = new DedupTestSlice(upstream, storage); + final ExecutorService pool = Executors.newFixedThreadPool(keys, r -> { + final Thread t = new Thread(r, "dedup-distinct"); + t.setDaemon(true); + return t; + }); + try { + final List> responses = new ArrayList<>(); + for (int i = 0; i < keys; i++) { + final String path = "/com/example/foo/1.0/foo-1.0-" + i + ".jar"; + responses.add(CompletableFuture.supplyAsync(() -> slice.response( + new RequestLine(RqMethod.GET, path), + Headers.EMPTY, + Content.EMPTY + ).join(), pool)); + } + for (final CompletableFuture r : responses) { + final Response resp = r.get(5, TimeUnit.SECONDS); + assertThat(resp.status(), equalTo(RsStatus.OK)); + } + } finally { + pool.shutdownNow(); + } + int totalSaves = 0; + for (int i = 0; i < keys; i++) { + final Key key = new Key.From("com/example/foo/1.0/foo-1.0-" + i + ".jar"); + final int saves = storage.saveCount(key); + assertThat( + "each distinct key must be written at least once", + saves, + greaterThanOrEqualTo(1) + ); + totalSaves += saves; + } + // Absolute bound: each key can generate at most one save (no duplicate + // writes for the same key within one coalesced burst). The total must + // not exceed one per distinct key. + assertThat( + "distinct keys must not cross-coalesce or duplicate-write", + totalSaves, + equalTo(keys) + ); + } + + /** + * Fresh-after-complete: once the leader's loader completes and the + * SingleFlight entry is invalidated, a subsequent burst for the same key + * must hit the cache and skip the loader entirely. This guards the + * invariant that the coalescer holds in-flight state only, never results. + */ + @Test + @Timeout(10) + void cacheHitAfterCoalescedFetchSkipsLoader() throws Exception { + final byte[] body = "cache hit body".getBytes(); + final Slice upstream = immediateOkUpstream(body); + final CountingStorage storage = new CountingStorage(new InMemoryStorage()); + final DedupTestSlice slice = new DedupTestSlice(upstream, storage); + // Prime the cache with a single request. + final Response first = slice.response( + new RequestLine(RqMethod.GET, ARTIFACT_PATH), + Headers.EMPTY, + Content.EMPTY + ).get(5, TimeUnit.SECONDS); + assertThat(first.status(), equalTo(RsStatus.OK)); + final int primed = storage.saveCount(ARTIFACT_KEY); + assertThat("first request writes the cache exactly once", primed, equalTo(1)); + // Second burst β€” all cache hits, no new writes. + final int callers = 32; + final ExecutorService pool = Executors.newFixedThreadPool(callers, r -> { + final Thread t = new Thread(r, "dedup-cache-hit"); + t.setDaemon(true); + return t; + }); + try { + final List> responses = new ArrayList<>(); + for (int i = 0; i < callers; i++) { + responses.add(CompletableFuture.supplyAsync(() -> slice.response( + new RequestLine(RqMethod.GET, ARTIFACT_PATH), + Headers.EMPTY, + Content.EMPTY + ).join(), pool)); + } + for (final CompletableFuture r : responses) { + final Response resp = r.get(5, TimeUnit.SECONDS); + assertThat(resp.status(), equalTo(RsStatus.OK)); + } + } finally { + pool.shutdownNow(); + } + assertThat( + "cache-hit follow-ups must not trigger additional loader" + + " invocations", + storage.saveCount(ARTIFACT_KEY), + equalTo(primed) + ); + } + + /** + * Sleep long enough for every caller to have attached to the SingleFlight + * entry after the leader's save has reached the gate. 25 ms per caller + * is empirically comfortable on a test JVM β€” the leader's save is + * gated, so no caller can complete until we explicitly release; the + * only risk is the executor starving a caller, and the pool is sized + * to cover every caller with its own thread. + */ + private static void waitForAttach(final int callers, final Duration maxWait) + throws InterruptedException { + final long settle = Math.min( + maxWait.toMillis(), + Math.max(100L, 25L * callers) + ); + Thread.sleep(settle); + } + + /** + * Build an upstream slice that answers a 200 with {@code body} + * immediately. + */ + private static Slice immediateOkUpstream(final byte[] body) { + return (line, headers, content) -> CompletableFuture.completedFuture( + ResponseBuilder.ok() + .header("Content-Type", "application/java-archive") + .body(body) + .build() + ); + } + + /** + * Minimal {@link BaseCachedProxySlice} subclass. All paths are cacheable + * and storage-backed so requests flow through {@code fetchAndCache} where + * the SingleFlight coalescer lives. + */ + private static final class DedupTestSlice extends BaseCachedProxySlice { + + DedupTestSlice(final Slice upstream, final Storage storage) { + super( + upstream, + new FromStorageCache(storage), + "test-repo", + "test", + "http://upstream", + Optional.of(storage), + Optional.empty(), + ProxyCacheConfig.defaults() + ); + } + + @Override + protected boolean isCacheable(final String path) { + return true; + } + } + + /** + * Baseline {@link Storage} wrapper that counts {@code save} calls per + * key. Thread-safe. + */ + private static class CountingStorage extends Storage.Wrap { + + /** + * Per-key save-call counter, indexed by {@link Key#string()}. + */ + private final ConcurrentMap counts = + new ConcurrentHashMap<>(); + + CountingStorage(final Storage delegate) { + super(delegate); + } + + @Override + public CompletableFuture save(final Key key, final Content content) { + this.counts.computeIfAbsent(key.string(), k -> new AtomicInteger(0)) + .incrementAndGet(); + return super.save(key, content); + } + + /** + * Number of {@code save} invocations observed for {@code key}. + * + * @param key Key to count. + * @return Count (0 if never saved). + */ + int saveCount(final Key key) { + final AtomicInteger c = this.counts.get(key.string()); + return c == null ? 0 : c.get(); + } + } + + /** + * {@link CountingStorage} variant that gates {@code save} on a latch + * for a specific key. Used to keep the coalescing window open + * for the full test. + */ + private static final class GatedCountingStorage extends CountingStorage { + + /** + * Latch that gates {@link #save} for the watched key. + */ + private final CountDownLatch gate; + + /** + * Key whose save is gated. Other keys pass through unmodified. + */ + private final Key watched; + + /** + * Latch that fires when the first {@code save} for the watched key + * is observed, so the test can synchronize on "leader has arrived". + */ + private final CountDownLatch attempted = new CountDownLatch(1); + + GatedCountingStorage( + final Storage delegate, + final CountDownLatch gate, + final Key watched + ) { + super(delegate); + this.gate = gate; + this.watched = watched; + } + + @Override + public CompletableFuture save(final Key key, final Content content) { + if (!key.string().equals(this.watched.string())) { + return super.save(key, content); + } + this.attempted.countDown(); + return CompletableFuture.runAsync(() -> { + try { + this.gate.await(); + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + } + }).thenCompose(v -> super.save(key, content)); + } + + /** + * Wait until the first {@code save} for the watched key has been + * attempted. + * + * @param timeout Maximum wait. + * @param unit Unit of the wait. + * @return True if a save was attempted within the timeout. + * @throws InterruptedException if interrupted. + */ + boolean awaitSaveAttempted(final long timeout, final TimeUnit unit) + throws InterruptedException { + return this.attempted.await(timeout, unit); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/CacheIntegrityAuditTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/CacheIntegrityAuditTest.java new file mode 100644 index 000000000..a88bcccd5 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/cache/CacheIntegrityAuditTest.java @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.fs.FileStorage; +import com.auto1.pantera.http.cache.ProxyCacheWriter.IntegrityAuditor; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.util.HexFormat; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link ProxyCacheWriter.IntegrityAuditor} β€” the one-shot admin + * tool that scans a proxy cache for primary/sidecar drift and optionally + * evicts offenders (Β§9.5 "Healing stale pairs"). + */ +final class CacheIntegrityAuditTest { + + @Test + @DisplayName("--dry-run reports mismatches but does not delete") + void dryRun_reportsMismatches_butDoesNotDelete(@TempDir final Path tempDir) { + final FileStorage storage = new FileStorage(tempDir); + final Key primary = new Key.From("com/example/foo/1.0/foo-1.0.jar"); + final byte[] primaryBytes = "some jar bytes".getBytes(StandardCharsets.UTF_8); + storage.save(primary, new Content.From(primaryBytes)).join(); + // Seed an intentionally WRONG .sha1 sidecar (the production symptom). + storage.save( + new Key.From(primary.string() + ".sha1"), + new Content.From("ffffffffffffffffffffffffffffffffffffffff" + .getBytes(StandardCharsets.UTF_8)) + ).join(); + + final IntegrityAuditor.Report report = IntegrityAuditor.run( + storage, "maven-proxy", false + ); + + assertFalse(report.clean(), "mismatches detected"); + assertThat("one mismatched primary", report.mismatches(), hasSize(1)); + assertThat( + "sha1 algorithm flagged", + report.mismatches().get(0).algorithms(), + hasSize(1) + ); + // Files still present (dry-run does NOT evict). + assertTrue(storage.exists(primary).join(), "primary still present"); + assertTrue( + storage.exists(new Key.From(primary.string() + ".sha1")).join(), + "sidecar still present" + ); + } + + @Test + @DisplayName("--fix evicts mismatched pairs") + void fix_evictsMismatchedPairs(@TempDir final Path tempDir) { + final FileStorage storage = new FileStorage(tempDir); + final Key primary = new Key.From("com/example/bar/2.0/bar-2.0.pom"); + final byte[] primaryBytes = "pom bytes".getBytes(StandardCharsets.UTF_8); + storage.save(primary, new Content.From(primaryBytes)).join(); + storage.save( + new Key.From(primary.string() + ".sha1"), + new Content.From("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + .getBytes(StandardCharsets.UTF_8)) + ).join(); + storage.save( + new Key.From(primary.string() + ".md5"), + new Content.From("cafebabecafebabecafebabecafebabe" + .getBytes(StandardCharsets.UTF_8)) + ).join(); + + final IntegrityAuditor.Report report = IntegrityAuditor.run( + storage, "maven-proxy", true + ); + + assertFalse(report.clean(), "mismatches detected before fix"); + // Files evicted after --fix. + assertFalse(storage.exists(primary).join(), "primary evicted"); + assertFalse( + storage.exists(new Key.From(primary.string() + ".sha1")).join(), + "sha1 sidecar evicted" + ); + assertFalse( + storage.exists(new Key.From(primary.string() + ".md5")).join(), + "md5 sidecar evicted" + ); + } + + @Test + @DisplayName("clean cache β†’ empty report, exit code 0 rendered by CLI") + void cleanCache_emitsEmptyReport(@TempDir final Path tempDir) { + final FileStorage storage = new FileStorage(tempDir); + final Key primary = new Key.From("com/example/clean/1.0/clean-1.0.jar"); + final byte[] bytes = "consistent".getBytes(StandardCharsets.UTF_8); + storage.save(primary, new Content.From(bytes)).join(); + storage.save( + new Key.From(primary.string() + ".sha1"), + new Content.From(sha1Hex(bytes).getBytes(StandardCharsets.UTF_8)) + ).join(); + storage.save( + new Key.From(primary.string() + ".sha256"), + new Content.From(sha256Hex(bytes).getBytes(StandardCharsets.UTF_8)) + ).join(); + + final IntegrityAuditor.Report report = IntegrityAuditor.run( + storage, "maven-proxy", true + ); + + assertTrue(report.clean(), "no mismatches"); + assertThat("scanned 1 primary", report.scanned(), is(1)); + assertTrue(storage.exists(primary).join(), "primary preserved"); + } + + @Test + @DisplayName("sidecar missing on a primary does not count as a mismatch") + void sidecarMissing_noMismatch(@TempDir final Path tempDir) { + final FileStorage storage = new FileStorage(tempDir); + final Key primary = new Key.From("com/example/nosidecar/1.0/nosidecar-1.0.jar"); + storage.save(primary, new Content.From("bytes".getBytes(StandardCharsets.UTF_8))).join(); + + final IntegrityAuditor.Report report = IntegrityAuditor.run( + storage, "maven-proxy", false + ); + + assertTrue(report.clean(), "no sidecar == no mismatch"); + assertThat("1 primary scanned", report.scanned(), is(1)); + } + + // ===== helpers ===== + + private static String sha1Hex(final byte[] body) { + return hex("SHA-1", body); + } + + private static String sha256Hex(final byte[] body) { + return hex("SHA-256", body); + } + + private static String hex(final String algo, final byte[] body) { + try { + final MessageDigest md = MessageDigest.getInstance(algo); + return HexFormat.of().formatHex(md.digest(body)); + } catch (final Exception ex) { + throw new AssertionError(ex); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/DedupStrategyTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/DedupStrategyTest.java deleted file mode 100644 index 709247fe8..000000000 --- a/pantera-core/src/test/java/com/auto1/pantera/http/cache/DedupStrategyTest.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.http.cache; - -import org.junit.jupiter.api.Test; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.arrayContaining; -import static org.hamcrest.Matchers.equalTo; - -/** - * Tests for {@link DedupStrategy}. - */ -class DedupStrategyTest { - - @Test - void hasThreeValues() { - assertThat( - DedupStrategy.values(), - arrayContaining(DedupStrategy.NONE, DedupStrategy.STORAGE, DedupStrategy.SIGNAL) - ); - } - - @Test - void valueOfWorks() { - assertThat(DedupStrategy.valueOf("SIGNAL"), equalTo(DedupStrategy.SIGNAL)); - assertThat(DedupStrategy.valueOf("NONE"), equalTo(DedupStrategy.NONE)); - assertThat(DedupStrategy.valueOf("STORAGE"), equalTo(DedupStrategy.STORAGE)); - } -} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheKeyTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheKeyTest.java new file mode 100644 index 000000000..defe9d2fa --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheKeyTest.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests for {@link NegativeCacheKey}. + * + * @since 2.2.0 + */ +final class NegativeCacheKeyTest { + + @Test + void flatProducesExpectedString() { + final NegativeCacheKey key = new NegativeCacheKey( + "libs-release", "maven", "org.spring:spring-core", "5.3.0" + ); + assertEquals("libs-release:maven:org.spring:spring-core:5.3.0", key.flat()); + } + + @Test + void flatWithEmptyVersion() { + final NegativeCacheKey key = new NegativeCacheKey( + "npm-proxy", "npm", "@scope/pkg", "" + ); + assertEquals("npm-proxy:npm:@scope/pkg:", key.flat()); + } + + @Test + void flatWithNullVersionDefaultsToEmpty() { + final NegativeCacheKey key = new NegativeCacheKey( + "pypi-group", "pypi", "requests", null + ); + assertEquals("pypi-group:pypi:requests:", key.flat()); + assertEquals("", key.artifactVersion()); + } + + @Test + void nullScopeThrows() { + assertThrows(NullPointerException.class, () -> + new NegativeCacheKey(null, "maven", "foo", "1.0") + ); + } + + @Test + void nullRepoTypeThrows() { + assertThrows(NullPointerException.class, () -> + new NegativeCacheKey("scope", null, "foo", "1.0") + ); + } + + @Test + void nullArtifactNameThrows() { + assertThrows(NullPointerException.class, () -> + new NegativeCacheKey("scope", "maven", null, "1.0") + ); + } + + @Test + void recordAccessorsWork() { + final NegativeCacheKey key = new NegativeCacheKey( + "docker-proxy", "docker", "nginx", "latest" + ); + assertEquals("docker-proxy", key.scope()); + assertEquals("docker", key.repoType()); + assertEquals("nginx", key.artifactName()); + assertEquals("latest", key.artifactVersion()); + } + + @Test + void equalityByValue() { + final NegativeCacheKey a = new NegativeCacheKey("s", "t", "n", "v"); + final NegativeCacheKey b = new NegativeCacheKey("s", "t", "n", "v"); + assertEquals(a, b); + assertEquals(a.hashCode(), b.hashCode()); + } + + @Test + void toStringIsNotNull() { + final NegativeCacheKey key = new NegativeCacheKey("s", "t", "n", "v"); + assertNotNull(key.toString()); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheUnifiedTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheUnifiedTest.java new file mode 100644 index 000000000..4b5d45498 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheUnifiedTest.java @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import com.auto1.pantera.cache.NegativeCacheConfig; +import java.time.Duration; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for the unified {@link NegativeCache} with {@link NegativeCacheKey} API. + * + * @since 2.2.0 + */ +final class NegativeCacheUnifiedTest { + + private NegativeCache cache; + + @BeforeEach + void setUp() { + // Use a config-based constructor to create a single instance + final NegativeCacheConfig config = new NegativeCacheConfig( + Duration.ofHours(1), 50_000, false, + 5_000, Duration.ofMinutes(5), + 5_000_000, Duration.ofDays(7) + ); + this.cache = new NegativeCache(config); + } + + @Test + void isKnown404ReturnsFalseForUnknownKey() { + final NegativeCacheKey key = new NegativeCacheKey( + "maven-central", "maven", "com.example:foo", "1.0.0" + ); + assertFalse(cache.isKnown404(key)); + } + + @Test + void cacheNotFoundThenIsKnown404ReturnsTrue() { + final NegativeCacheKey key = new NegativeCacheKey( + "maven-central", "maven", "com.example:foo", "1.0.0" + ); + cache.cacheNotFound(key); + assertTrue(cache.isKnown404(key)); + } + + @Test + void invalidateClearsEntry() { + final NegativeCacheKey key = new NegativeCacheKey( + "npm-proxy", "npm", "@scope/bar", "2.0.0" + ); + cache.cacheNotFound(key); + assertTrue(cache.isKnown404(key)); + cache.invalidate(key); + assertFalse(cache.isKnown404(key)); + } + + @Test + void invalidateBatchClearsMultipleEntries() { + final NegativeCacheKey k1 = new NegativeCacheKey( + "pypi-group", "pypi", "requests", "2.28.0" + ); + final NegativeCacheKey k2 = new NegativeCacheKey( + "pypi-group", "pypi", "flask", "2.3.0" + ); + cache.cacheNotFound(k1); + cache.cacheNotFound(k2); + assertTrue(cache.isKnown404(k1)); + assertTrue(cache.isKnown404(k2)); + + CompletableFuture future = cache.invalidateBatch(List.of(k1, k2)); + future.join(); + + assertFalse(cache.isKnown404(k1)); + assertFalse(cache.isKnown404(k2)); + } + + @Test + void invalidateBatchWithEmptyListSucceeds() { + CompletableFuture future = cache.invalidateBatch(List.of()); + future.join(); + // Should complete without error + } + + @Test + void invalidateBatchWithNullSucceeds() { + CompletableFuture future = cache.invalidateBatch(null); + future.join(); + } + + @Test + void differentScopesSameArtifactAreSeparateEntries() { + final NegativeCacheKey group = new NegativeCacheKey( + "libs-group", "maven", "com.example:foo", "1.0.0" + ); + final NegativeCacheKey proxy = new NegativeCacheKey( + "maven-central", "maven", "com.example:foo", "1.0.0" + ); + cache.cacheNotFound(group); + assertTrue(cache.isKnown404(group)); + assertFalse(cache.isKnown404(proxy)); + } + + @Test + void l1TtlExpiryWorks() throws InterruptedException { + // Create cache with very short TTL + final NegativeCacheConfig shortTtl = new NegativeCacheConfig( + Duration.ofMillis(50), 50_000, false, + 5_000, Duration.ofMillis(50), + 5_000_000, Duration.ofDays(7) + ); + final NegativeCache shortCache = new NegativeCache(shortTtl); + final NegativeCacheKey key = new NegativeCacheKey( + "test", "maven", "com.example:expiring", "1.0.0" + ); + shortCache.cacheNotFound(key); + assertTrue(shortCache.isKnown404(key)); + + // Wait for TTL to expire + Thread.sleep(100); + shortCache.cleanup(); + + assertFalse(shortCache.isKnown404(key)); + } + + @Test + void asyncCheckReturnsCorrectResult() { + final NegativeCacheKey key = new NegativeCacheKey( + "npm-proxy", "npm", "@types/node", "20.0.0" + ); + assertFalse(cache.isKnown404Async(key).join()); + cache.cacheNotFound(key); + assertTrue(cache.isKnown404Async(key).join()); + } + + @Test + void registryHoldsSharedInstance() { + final NegativeCacheRegistry registry = NegativeCacheRegistry.instance(); + registry.clear(); + registry.setSharedCache(cache); + assertSame(cache, registry.sharedCache()); + registry.clear(); + } + + @Test + void sizeTracksEntries() { + assertEquals(0, cache.size()); + cache.cacheNotFound(new NegativeCacheKey("s", "t", "a", "v1")); + assertEquals(1, cache.size()); + cache.cacheNotFound(new NegativeCacheKey("s", "t", "a", "v2")); + assertEquals(2, cache.size()); + } + + @Test + void clearRemovesAllEntries() { + cache.cacheNotFound(new NegativeCacheKey("s", "t", "a", "v1")); + cache.cacheNotFound(new NegativeCacheKey("s", "t", "b", "v2")); + assertTrue(cache.size() > 0); + cache.clear(); + assertEquals(0, cache.size()); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheUploadInvalidationTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheUploadInvalidationTest.java new file mode 100644 index 000000000..029e291aa --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/cache/NegativeCacheUploadInvalidationTest.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import com.auto1.pantera.cache.NegativeCacheConfig; +import java.time.Duration; +import java.util.List; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Simulates the upload-invalidation flow: + *

        + *
      1. Artifact is cached as 404 (negative cache hit).
      2. + *
      3. Artifact is published (upload).
      4. + *
      5. Negative cache entry for that key is invalidated.
      6. + *
      7. Next GET no longer sees the stale 404.
      8. + *
      + * + * @since 2.2.0 + */ +final class NegativeCacheUploadInvalidationTest { + + private NegativeCache cache; + + @BeforeEach + void setUp() { + final NegativeCacheConfig config = new NegativeCacheConfig( + Duration.ofHours(1), 50_000, false, + 5_000, Duration.ofMinutes(5), + 5_000_000, Duration.ofDays(7) + ); + this.cache = new NegativeCache(config); + } + + @Test + void publishInvalidatesNegCacheEntry() { + // Setup: artifact is cached as 404 + final NegativeCacheKey key = new NegativeCacheKey( + "maven-hosted", "maven", "com.example:foo", "1.0.0" + ); + cache.cacheNotFound(key); + assertTrue(cache.isKnown404(key), "Entry should be cached as 404"); + + // Simulate upload: invalidate the entry synchronously + cache.invalidateBatch(List.of(key)).join(); + + // Verify: next GET does not see stale 404 + assertFalse(cache.isKnown404(key), "Entry should be cleared after publish"); + } + + @Test + void publishInvalidatesGroupScopeEntry() { + // Setup: artifact is cached as 404 in both hosted and group scopes + final NegativeCacheKey hostedKey = new NegativeCacheKey( + "maven-hosted", "maven", "com.example:bar", "2.0.0" + ); + final NegativeCacheKey groupKey = new NegativeCacheKey( + "maven-group", "maven", "com.example:bar", "2.0.0" + ); + cache.cacheNotFound(hostedKey); + cache.cacheNotFound(groupKey); + assertTrue(cache.isKnown404(hostedKey)); + assertTrue(cache.isKnown404(groupKey)); + + // Simulate upload: invalidate both scope entries + cache.invalidateBatch(List.of(hostedKey, groupKey)).join(); + + assertFalse(cache.isKnown404(hostedKey)); + assertFalse(cache.isKnown404(groupKey)); + } + + @Test + void publishDoesNotAffectOtherArtifacts() { + final NegativeCacheKey target = new NegativeCacheKey( + "npm-proxy", "npm", "@types/node", "20.0.0" + ); + final NegativeCacheKey other = new NegativeCacheKey( + "npm-proxy", "npm", "@types/react", "18.0.0" + ); + cache.cacheNotFound(target); + cache.cacheNotFound(other); + + // Only invalidate the target + cache.invalidateBatch(List.of(target)).join(); + + assertFalse(cache.isKnown404(target)); + assertTrue(cache.isKnown404(other), "Other entries should remain"); + } + + @Test + void multiplePublishesAreIdempotent() { + final NegativeCacheKey key = new NegativeCacheKey( + "pypi-proxy", "pypi", "flask", "2.3.0" + ); + cache.cacheNotFound(key); + + // Double invalidation should succeed without error + cache.invalidateBatch(List.of(key)).join(); + cache.invalidateBatch(List.of(key)).join(); + + assertFalse(cache.isKnown404(key)); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/ProxyCacheWriterTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/ProxyCacheWriterTest.java new file mode 100644 index 000000000..9b108e901 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/cache/ProxyCacheWriterTest.java @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.Storage; +import com.auto1.pantera.asto.fs.FileStorage; +import com.auto1.pantera.asto.memory.InMemoryStorage; +import com.auto1.pantera.http.context.RequestContext; +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.Fault.ChecksumAlgo; +import com.auto1.pantera.http.fault.Result; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.MessageDigest; +import java.util.EnumMap; +import java.util.HexFormat; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.instanceOf; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link ProxyCacheWriter} β€” the v2.2 atomic proxy cache writer that + * eliminates the Maven {@code .pom.sha1} mismatches (Β§9.5 of + * {@code docs/analysis/v2.2-target-architecture.md}). + * + *

      Each test uses a real {@link InMemoryStorage} or {@link FileStorage}; the + * upstream is modelled by closing over byte[] bodies served from test helpers. + * The writer never buffers the primary body on heap β€” it streams to a + * {@code Files.createTempFile} temp path, and the tests assert this + * temp path is cleaned up after every terminal outcome. + * + * @since 2.2.0 + */ +@SuppressWarnings("PMD.TooManyMethods") +final class ProxyCacheWriterTest { + + /** Pretend the client asked for this artifact on a Maven upstream. */ + private static final String UPSTREAM_URI = + "https://repo.upstream.example/releases/com/fasterxml/oss-parent/58/oss-parent-58.pom"; + + /** Cache key under which the primary lands. */ + private static final Key PRIMARY_KEY = + new Key.From("com/fasterxml/oss-parent/58/oss-parent-58.pom"); + + /** Representative primary body. */ + private static final byte[] PRIMARY_BYTES = + "4.0.0\n".getBytes(StandardCharsets.UTF_8); + + /** Arbitrary request context, only used for log trace-id. */ + private static final RequestContext CTX = + new RequestContext("trace-abc", "req-1", "maven-proxy", UPSTREAM_URI); + + // ===== verificationFailure_rejectsWrite ===== + + @Test + @DisplayName("sidecar disagreement β†’ Err(UpstreamIntegrity), cache untouched, temp file cleaned") + void verificationFailure_rejectsWrite() throws IOException { + final Storage cache = new InMemoryStorage(); + final int tempFilesBefore = countTempFiles(); + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + // Real bytes; wrong SHA-1 claim (the oss-parent-58 symptom from the doc Β§9.5). + final String wrongSha1 = "15ce8a2c447057a4cfffd7a1d57b80937d293e7a"; + + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(PRIMARY_BYTES)), + Map.of(ChecksumAlgo.SHA1, sidecarServing(wrongSha1.getBytes(StandardCharsets.UTF_8))), + CTX + ).toCompletableFuture().join(); + + assertThat("Err result", result, instanceOf(Result.Err.class)); + final Fault fault = ((Result.Err) result).fault(); + assertThat("UpstreamIntegrity fault", fault, instanceOf(Fault.UpstreamIntegrity.class)); + final Fault.UpstreamIntegrity ui = (Fault.UpstreamIntegrity) fault; + assertEquals(ChecksumAlgo.SHA1, ui.algo(), "algo carried"); + assertEquals(wrongSha1, ui.sidecarClaim(), "claim carried"); + assertEquals(sha1Hex(PRIMARY_BYTES), ui.computed(), "computed carried"); + assertFalse(cache.exists(PRIMARY_KEY).join(), "primary NOT in cache"); + assertFalse( + cache.exists(new Key.From(PRIMARY_KEY.string() + ".sha1")).join(), + "sidecar NOT in cache" + ); + assertEquals( + tempFilesBefore, + countTempFiles(), + "temp file cleaned up after rejected write" + ); + } + + // ===== verificationSuccess_atomicallyMoves ===== + + @Test + @DisplayName("matching sidecars β†’ primary + every sidecar readable from cache") + void verificationSuccess_atomicallyMoves() { + final Storage cache = new InMemoryStorage(); + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + final Map>>> sidecars = + new LinkedHashMap<>(); + sidecars.put(ChecksumAlgo.SHA1, sidecarServing(sha1Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8))); + sidecars.put(ChecksumAlgo.MD5, sidecarServing(md5Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8))); + sidecars.put(ChecksumAlgo.SHA256, sidecarServing(sha256Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8))); + + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(PRIMARY_BYTES)), + sidecars, + CTX + ).toCompletableFuture().join(); + + assertThat("Ok result", result, instanceOf(Result.Ok.class)); + assertTrue(cache.exists(PRIMARY_KEY).join(), "primary in cache"); + assertArrayEquals( + PRIMARY_BYTES, + cache.value(PRIMARY_KEY).join().asBytes(), + "primary bytes match" + ); + assertArrayEquals( + sha1Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8), + cache.value(new Key.From(PRIMARY_KEY.string() + ".sha1")).join().asBytes(), + "sha1 sidecar persisted" + ); + assertArrayEquals( + md5Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8), + cache.value(new Key.From(PRIMARY_KEY.string() + ".md5")).join().asBytes(), + "md5 sidecar persisted" + ); + assertArrayEquals( + sha256Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8), + cache.value(new Key.From(PRIMARY_KEY.string() + ".sha256")).join().asBytes(), + "sha256 sidecar persisted" + ); + } + + // ===== sidecarAbsent_stillWrites ===== + + @Test + @DisplayName("upstream 404 on every sidecar β†’ primary still written") + void sidecarAbsent_stillWrites() { + final Storage cache = new InMemoryStorage(); + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + final Map>>> sidecars = Map.of( + ChecksumAlgo.SHA1, sidecar404(), + ChecksumAlgo.MD5, sidecar404() + ); + + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(PRIMARY_BYTES)), + sidecars, + CTX + ).toCompletableFuture().join(); + + assertThat("Ok result", result, instanceOf(Result.Ok.class)); + assertTrue(cache.exists(PRIMARY_KEY).join(), "primary in cache"); + assertFalse( + cache.exists(new Key.From(PRIMARY_KEY.string() + ".sha1")).join(), + "absent sidecar NOT synthesized" + ); + } + + // ===== sidecar with trailing junk (hex *filename) ===== + + @Test + @DisplayName("sidecar body 'hex *filename' accepted β€” hex extracted before comparison") + void sidecarNormalisation_acceptsHexWithFilename() { + final Storage cache = new InMemoryStorage(); + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + final String body = sha1Hex(PRIMARY_BYTES) + " oss-parent-58.pom\n"; + + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(PRIMARY_BYTES)), + Map.of(ChecksumAlgo.SHA1, sidecarServing(body.getBytes(StandardCharsets.UTF_8))), + CTX + ).toCompletableFuture().join(); + + assertThat("Ok result", result, instanceOf(Result.Ok.class)); + } + + // ===== atomicity_noPartialStateOnCrash ===== + + @Test + @DisplayName("primary save fails β†’ nothing in cache, temp file cleaned") + void atomicity_noPartialStateOnCrash() throws IOException { + final CrashingStorage cache = new CrashingStorage(); + cache.failOn(PRIMARY_KEY); + final int tempFilesBefore = countTempFiles(); + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(PRIMARY_BYTES)), + Map.of(ChecksumAlgo.SHA1, sidecarServing(sha1Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8))), + CTX + ).toCompletableFuture().join(); + + assertThat("Err on storage crash", result, instanceOf(Result.Err.class)); + assertThat( + "StorageUnavailable carried", + ((Result.Err) result).fault(), + instanceOf(Fault.StorageUnavailable.class) + ); + assertFalse(cache.exists(PRIMARY_KEY).join(), "primary NOT in cache"); + assertFalse( + cache.exists(new Key.From(PRIMARY_KEY.string() + ".sha1")).join(), + "sidecar NOT in cache" + ); + assertEquals(tempFilesBefore, countTempFiles(), "temp file cleaned"); + } + + @Test + @DisplayName("sidecar save fails after primary lands β†’ primary + sidecar rolled back") + void atomicity_rollbackOnSidecarFailure() { + final CrashingStorage cache = new CrashingStorage(); + final Key sha1Key = new Key.From(PRIMARY_KEY.string() + ".sha1"); + cache.failOn(sha1Key); + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(PRIMARY_BYTES)), + Map.of(ChecksumAlgo.SHA1, sidecarServing(sha1Hex(PRIMARY_BYTES).getBytes(StandardCharsets.UTF_8))), + CTX + ).toCompletableFuture().join(); + + assertThat("Err on partial failure", result, instanceOf(Result.Err.class)); + assertFalse(cache.exists(PRIMARY_KEY).join(), "primary rolled back"); + assertFalse(cache.exists(sha1Key).join(), "sidecar rolled back"); + } + + // ===== swrCoherence ===== + + @Test + @DisplayName("stale primary + sidecar β†’ fresh upstream β†’ both updated atomically") + void swrCoherence(@TempDir final Path tempDir) throws Exception { + final FileStorage cache = new FileStorage(tempDir); + // Seed with STALE primary + matching STALE sidecar β€” both consistent but stale. + final byte[] staleBytes = "stale content\n".getBytes(StandardCharsets.UTF_8); + cache.save(PRIMARY_KEY, new Content.From(staleBytes)).join(); + cache.save( + new Key.From(PRIMARY_KEY.string() + ".sha1"), + new Content.From(sha1Hex(staleBytes).getBytes(StandardCharsets.UTF_8)) + ).join(); + + // Now refetch with a fresh (different) primary + matching fresh sidecar. + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + final byte[] freshBytes = "fresh content\n".getBytes(StandardCharsets.UTF_8); + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(freshBytes)), + Map.of( + ChecksumAlgo.SHA1, sidecarServing(sha1Hex(freshBytes).getBytes(StandardCharsets.UTF_8)), + ChecksumAlgo.SHA256, sidecarServing(sha256Hex(freshBytes).getBytes(StandardCharsets.UTF_8)) + ), + CTX + ).toCompletableFuture().join(); + + assertThat("Ok result", result, instanceOf(Result.Ok.class)); + // Both files now reflect the FRESH content β€” no split brain. + assertArrayEquals(freshBytes, cache.value(PRIMARY_KEY).join().asBytes(), "primary is fresh"); + assertEquals( + sha1Hex(freshBytes), + new String( + cache.value(new Key.From(PRIMARY_KEY.string() + ".sha1")).join().asBytes(), + StandardCharsets.UTF_8 + ), + "sidecar matches fresh primary" + ); + // Cross-hash consistency invariant: sidecar bytes recompute to primary's hex. + final byte[] primaryReread = cache.value(PRIMARY_KEY).join().asBytes(); + final byte[] sidecarBytes = cache.value(new Key.From(PRIMARY_KEY.string() + ".sha1")).join().asBytes(); + assertEquals( + sha1Hex(primaryReread), + new String(sidecarBytes, StandardCharsets.UTF_8), + "cache invariant: sidecar hex == SHA-1 of primary bytes" + ); + } + + @Test + @DisplayName("stale pair + upstream brings a MISMATCHED fresh pair β†’ reject, keep stale intact") + void swrCoherence_rejectMismatchedRefetch(@TempDir final Path tempDir) { + final FileStorage cache = new FileStorage(tempDir); + final byte[] staleBytes = "stale content\n".getBytes(StandardCharsets.UTF_8); + final String staleSha1 = sha1Hex(staleBytes); + cache.save(PRIMARY_KEY, new Content.From(staleBytes)).join(); + cache.save( + new Key.From(PRIMARY_KEY.string() + ".sha1"), + new Content.From(staleSha1.getBytes(StandardCharsets.UTF_8)) + ).join(); + + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "maven-proxy"); + final byte[] freshBytes = "fresh content\n".getBytes(StandardCharsets.UTF_8); + // Upstream serves a sha1 claim that DOES NOT match the fresh primary bytes. + final String bogusClaim = "ffffffffffffffffffffffffffffffffffffffff"; + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(freshBytes)), + Map.of(ChecksumAlgo.SHA1, sidecarServing(bogusClaim.getBytes(StandardCharsets.UTF_8))), + CTX + ).toCompletableFuture().join(); + + assertThat("Err(UpstreamIntegrity)", result, instanceOf(Result.Err.class)); + // Stale pair must be intact β€” the rejected write never mutated the cache. + assertArrayEquals(staleBytes, cache.value(PRIMARY_KEY).join().asBytes(), "stale primary intact"); + assertEquals( + staleSha1, + new String( + cache.value(new Key.From(PRIMARY_KEY.string() + ".sha1")).join().asBytes(), + StandardCharsets.UTF_8 + ), + "stale sidecar intact" + ); + } + + // ===== integration: real FileStorage roundtrip on the oss-parent-58.pom symptom ===== + + @Test + @DisplayName("oss-parent-58.pom regression: mismatched upstream .sha1 rejects cache write") + void ossParent58_regressionCheck(@TempDir final Path tempDir) { + final FileStorage cache = new FileStorage(tempDir); + final ProxyCacheWriter writer = new ProxyCacheWriter(cache, "libs-release-local"); + // The exact hex from the production log in Β§9.5. + final byte[] upstreamSha1 = "15ce8a2c447057a4cfffd7a1d57b80937d293e7a" + .getBytes(StandardCharsets.UTF_8); + final byte[] pomBytes = "oss-parent-58".getBytes(StandardCharsets.UTF_8); + + final Result result = writer.writeWithSidecars( + PRIMARY_KEY, + UPSTREAM_URI, + () -> CompletableFuture.completedFuture(new ByteArrayInputStream(pomBytes)), + Map.of(ChecksumAlgo.SHA1, sidecarServing(upstreamSha1)), + CTX + ).toCompletableFuture().join(); + + assertThat("Err", result, instanceOf(Result.Err.class)); + assertThat( + "UpstreamIntegrity fault", + ((Result.Err) result).fault(), + instanceOf(Fault.UpstreamIntegrity.class) + ); + assertFalse(cache.exists(PRIMARY_KEY).join(), "no primary cached"); + assertFalse( + cache.exists(new Key.From(PRIMARY_KEY.string() + ".sha1")).join(), + "no sidecar cached" + ); + } + + // ===== helpers ===== + + private static void assertArrayEquals( + final byte[] expected, final byte[] actual, final String message + ) { + org.junit.jupiter.api.Assertions.assertArrayEquals(expected, actual, message); + } + + private static Supplier>> sidecarServing( + final byte[] body + ) { + return () -> CompletableFuture.completedFuture( + Optional.of(new ByteArrayInputStream(body)) + ); + } + + private static Supplier>> sidecar404() { + return () -> CompletableFuture.completedFuture(Optional.empty()); + } + + private static String sha1Hex(final byte[] body) { + return hex("SHA-1", body); + } + + private static String sha256Hex(final byte[] body) { + return hex("SHA-256", body); + } + + private static String md5Hex(final byte[] body) { + return hex("MD5", body); + } + + private static String hex(final String algo, final byte[] body) { + try { + final MessageDigest md = MessageDigest.getInstance(algo); + return HexFormat.of().formatHex(md.digest(body)); + } catch (final Exception ex) { + throw new AssertionError(ex); + } + } + + private static int countTempFiles() throws IOException { + final Path tempDir = Path.of(System.getProperty("java.io.tmpdir")); + if (!Files.exists(tempDir)) { + return 0; + } + try (Stream stream = Files.list(tempDir)) { + return (int) stream + .filter(p -> p.getFileName().toString().startsWith("pantera-proxy-")) + .count(); + } + } + + /** + * A storage decorator that fails on a specific key, to exercise partial- + * failure rollback without depending on OS behaviour. + */ + private static final class CrashingStorage implements Storage { + private final InMemoryStorage delegate = new InMemoryStorage(); + private Key failing; + + void failOn(final Key key) { + this.failing = key; + } + + @Override + public CompletableFuture exists(final Key key) { + return this.delegate.exists(key); + } + + @Override + public CompletableFuture> list(final Key prefix) { + return this.delegate.list(prefix); + } + + @Override + public CompletableFuture save(final Key key, final Content content) { + if (key.equals(this.failing)) { + // Drain the content so the caller's stream doesn't dangle, then fail. + return content.asBytesFuture().thenCompose(ignored -> + CompletableFuture.failedFuture(new RuntimeException("boom")) + ); + } + return this.delegate.save(key, content); + } + + @Override + public CompletableFuture move(final Key source, final Key destination) { + return this.delegate.move(source, destination); + } + + @Override + public CompletableFuture metadata(final Key key) { + return this.delegate.metadata(key); + } + + @Override + public CompletableFuture value(final Key key) { + return this.delegate.value(key); + } + + @Override + public CompletableFuture delete(final Key key) { + return this.delegate.delete(key); + } + + @Override + public CompletionStage exclusively( + final Key key, final java.util.function.Function> op + ) { + return this.delegate.exclusively(key, op); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/cache/RequestDeduplicatorTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/cache/RequestDeduplicatorTest.java deleted file mode 100644 index 150935526..000000000 --- a/pantera-core/src/test/java/com/auto1/pantera/http/cache/RequestDeduplicatorTest.java +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.http.cache; - -import com.auto1.pantera.asto.Key; -import com.auto1.pantera.http.cache.RequestDeduplicator.FetchSignal; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; - -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; - -/** - * Tests for {@link RequestDeduplicator}. - */ -class RequestDeduplicatorTest { - - @Test - @Timeout(5) - void signalStrategyDeduplicatesConcurrentRequests() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - final Key key = new Key.From("test/artifact.jar"); - final AtomicInteger fetchCount = new AtomicInteger(0); - final CompletableFuture blocker = new CompletableFuture<>(); - // First request: starts the fetch, blocks until we complete manually - final CompletableFuture first = dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return blocker; - } - ); - // Second request for same key: should join the existing one - final CompletableFuture second = dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return CompletableFuture.completedFuture(FetchSignal.SUCCESS); - } - ); - assertThat("fetch should only run once", fetchCount.get(), equalTo(1)); - assertThat("first not done yet", first.isDone(), is(false)); - assertThat("second not done yet", second.isDone(), is(false)); - // Complete the fetch - blocker.complete(FetchSignal.SUCCESS); - assertThat(first.get(1, TimeUnit.SECONDS), equalTo(FetchSignal.SUCCESS)); - assertThat(second.get(1, TimeUnit.SECONDS), equalTo(FetchSignal.SUCCESS)); - } - - @Test - @Timeout(5) - void signalStrategyPropagatesNotFound() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - final Key key = new Key.From("missing/artifact.jar"); - final CompletableFuture blocker = new CompletableFuture<>(); - final CompletableFuture first = dedup.deduplicate( - key, () -> blocker - ); - final CompletableFuture second = dedup.deduplicate( - key, () -> CompletableFuture.completedFuture(FetchSignal.SUCCESS) - ); - blocker.complete(FetchSignal.NOT_FOUND); - assertThat(first.get(1, TimeUnit.SECONDS), equalTo(FetchSignal.NOT_FOUND)); - assertThat(second.get(1, TimeUnit.SECONDS), equalTo(FetchSignal.NOT_FOUND)); - } - - @Test - @Timeout(5) - void signalStrategyPropagatesError() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - final Key key = new Key.From("error/artifact.jar"); - final CompletableFuture blocker = new CompletableFuture<>(); - final CompletableFuture first = dedup.deduplicate( - key, () -> blocker - ); - final CompletableFuture second = dedup.deduplicate( - key, () -> CompletableFuture.completedFuture(FetchSignal.SUCCESS) - ); - // Complete with exception β€” should signal ERROR - blocker.completeExceptionally(new RuntimeException("upstream down")); - assertThat(first.get(1, TimeUnit.SECONDS), equalTo(FetchSignal.ERROR)); - assertThat(second.get(1, TimeUnit.SECONDS), equalTo(FetchSignal.ERROR)); - } - - @Test - @Timeout(5) - void signalStrategyCleansUpAfterCompletion() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - final Key key = new Key.From("cleanup/artifact.jar"); - assertThat("initially empty", dedup.inFlightCount(), equalTo(0)); - final CompletableFuture blocker = new CompletableFuture<>(); - dedup.deduplicate(key, () -> blocker); - assertThat("one in-flight", dedup.inFlightCount(), equalTo(1)); - blocker.complete(FetchSignal.SUCCESS); - // Allow async cleanup - Thread.sleep(50); - assertThat("cleaned up", dedup.inFlightCount(), equalTo(0)); - } - - @Test - @Timeout(5) - void signalStrategyAllowsNewRequestAfterCompletion() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - final Key key = new Key.From("reuse/artifact.jar"); - final AtomicInteger fetchCount = new AtomicInteger(0); - // First request - final CompletableFuture first = dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return CompletableFuture.completedFuture(FetchSignal.SUCCESS); - } - ); - first.get(1, TimeUnit.SECONDS); - Thread.sleep(50); - // Second request for same key after completion β€” should start new fetch - final CompletableFuture second = dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return CompletableFuture.completedFuture(FetchSignal.SUCCESS); - } - ); - second.get(1, TimeUnit.SECONDS); - assertThat("should have fetched twice", fetchCount.get(), equalTo(2)); - } - - @Test - @Timeout(5) - void noneStrategyDoesNotDeduplicate() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.NONE); - final Key key = new Key.From("none/artifact.jar"); - final AtomicInteger fetchCount = new AtomicInteger(0); - final CountDownLatch latch = new CountDownLatch(1); - final CompletableFuture first = dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return CompletableFuture.supplyAsync(() -> { - try { - latch.await(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - return FetchSignal.SUCCESS; - }); - } - ); - final CompletableFuture second = dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return CompletableFuture.completedFuture(FetchSignal.SUCCESS); - } - ); - // Both should have been called (no dedup) - second.get(1, TimeUnit.SECONDS); - assertThat("both fetches should have been invoked", fetchCount.get(), equalTo(2)); - latch.countDown(); - first.get(1, TimeUnit.SECONDS); - } - - @Test - @Timeout(5) - void storageStrategyDoesNotDeduplicate() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.STORAGE); - final Key key = new Key.From("storage/artifact.jar"); - final AtomicInteger fetchCount = new AtomicInteger(0); - dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return CompletableFuture.completedFuture(FetchSignal.SUCCESS); - } - ).get(1, TimeUnit.SECONDS); - dedup.deduplicate( - key, - () -> { - fetchCount.incrementAndGet(); - return CompletableFuture.completedFuture(FetchSignal.SUCCESS); - } - ).get(1, TimeUnit.SECONDS); - assertThat("STORAGE strategy delegates each call", fetchCount.get(), equalTo(2)); - } - - @Test - @Timeout(5) - void shutdownStopsCleanupAndClearsInFlight() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - final CompletableFuture neverComplete = new CompletableFuture<>(); - final CompletableFuture result = dedup.deduplicate( - new Key.From("shutdown/test"), () -> neverComplete - ); - assertThat("one in-flight before shutdown", dedup.inFlightCount(), equalTo(1)); - dedup.shutdown(); - assertThat("in-flight cleared after shutdown", dedup.inFlightCount(), equalTo(0)); - assertThat("result is done", result.isDone(), is(true)); - assertThat("result is ERROR", result.join(), equalTo(FetchSignal.ERROR)); - } - - @Test - @Timeout(5) - void closeIsIdempotent() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - dedup.close(); - dedup.close(); - assertThat("double close does not throw", true, is(true)); - } - - @Test - void differentKeysAreNotDeduplicated() throws Exception { - final RequestDeduplicator dedup = new RequestDeduplicator(DedupStrategy.SIGNAL); - final AtomicInteger fetchCount = new AtomicInteger(0); - final CompletableFuture blocker1 = new CompletableFuture<>(); - final CompletableFuture blocker2 = new CompletableFuture<>(); - dedup.deduplicate( - new Key.From("key1"), () -> { - fetchCount.incrementAndGet(); - return blocker1; - } - ); - dedup.deduplicate( - new Key.From("key2"), () -> { - fetchCount.incrementAndGet(); - return blocker2; - } - ); - assertThat("different keys should both fetch", fetchCount.get(), equalTo(2)); - blocker1.complete(FetchSignal.SUCCESS); - blocker2.complete(FetchSignal.SUCCESS); - } -} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/context/ContextualExecutorServiceTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/context/ContextualExecutorServiceTest.java new file mode 100644 index 000000000..b0f04d2ad --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/context/ContextualExecutorServiceTest.java @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.logging.log4j.ThreadContext; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link ContextualExecutorService} β€” the WI-post-03a wrapper that + * contextualises every task-submission method, closing the gap left + * by {@link ContextualExecutor} (which only decorates the bare + * {@link java.util.concurrent.Executor} interface). + * + *

      Each test asserts one of: + *

        + *
      • a specific submit path propagates the caller's Log4j2 + * {@link ThreadContext} to the runner thread, or
      • + *
      • the runner's prior ThreadContext is restored after the task (no leak), + * even when the task throws, or
      • + *
      • lifecycle methods delegate to the underlying pool.
      • + *
      + */ +@SuppressWarnings({"PMD.TooManyMethods", "PMD.AvoidDuplicateLiterals", + "PMD.JUnitAssertionsShouldIncludeMessage"}) +final class ContextualExecutorServiceTest { + + /** + * Raw pool the wrapper decorates. Recreated per test. + */ + private ExecutorService backing; + + /** + * The wrapper under test. + */ + private ContextualExecutorService wrapper; + + @BeforeEach + void setup() { + ThreadContext.clearMap(); + this.backing = Executors.newFixedThreadPool(2); + this.wrapper = ContextualExecutorService.wrap(this.backing); + } + + @AfterEach + void tearDown() throws InterruptedException { + ThreadContext.clearMap(); + this.wrapper.shutdownNow(); + this.backing.shutdownNow(); + this.backing.awaitTermination(5L, TimeUnit.SECONDS); + } + + @Test + @DisplayName("execute(Runnable) propagates caller ThreadContext to runner thread") + void wrappedExecutePropagatesThreadContext() throws Exception { + ThreadContext.put("trace.id", "exec-trace"); + ThreadContext.put("repository.name", "npm_group"); + final CountDownLatch done = new CountDownLatch(1); + final AtomicReference seenTrace = new AtomicReference<>(); + final AtomicReference seenRepo = new AtomicReference<>(); + this.wrapper.execute(() -> { + try { + seenTrace.set(ThreadContext.get("trace.id")); + seenRepo.set(ThreadContext.get("repository.name")); + } finally { + done.countDown(); + } + }); + MatcherAssert.assertThat( + done.await(5L, TimeUnit.SECONDS), Matchers.is(true) + ); + MatcherAssert.assertThat(seenTrace.get(), Matchers.is("exec-trace")); + MatcherAssert.assertThat(seenRepo.get(), Matchers.is("npm_group")); + } + + @Test + @DisplayName("submit(Callable) propagates caller ThreadContext to runner thread " + + "β€” closes the bypass flagged by the Wave-3 review") + void wrappedSubmitCallablePropagatesThreadContext() throws Exception { + ThreadContext.put("trace.id", "submit-callable"); + ThreadContext.put("repository.name", "maven_group"); + final Future fut = this.wrapper.submit((Callable) () -> { + final String trace = ThreadContext.get("trace.id"); + final String repo = ThreadContext.get("repository.name"); + return trace + "|" + repo; + }); + MatcherAssert.assertThat( + fut.get(5L, TimeUnit.SECONDS), + Matchers.is("submit-callable|maven_group") + ); + } + + @Test + @DisplayName("submit(Runnable) propagates caller ThreadContext to runner thread") + void wrappedSubmitRunnablePropagatesThreadContext() throws Exception { + ThreadContext.put("trace.id", "submit-runnable"); + final AtomicReference seen = new AtomicReference<>(); + final Future fut = this.wrapper.submit((Runnable) () -> + seen.set(ThreadContext.get("trace.id")) + ); + fut.get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat(seen.get(), Matchers.is("submit-runnable")); + } + + @Test + @DisplayName("submit(Runnable, result) propagates caller ThreadContext and " + + "returns the provided result") + void wrappedSubmitRunnableResultPropagatesThreadContext() throws Exception { + ThreadContext.put("trace.id", "submit-runnable-result"); + final AtomicReference seen = new AtomicReference<>(); + final Future fut = this.wrapper.submit( + () -> seen.set(ThreadContext.get("trace.id")), + "ok" + ); + MatcherAssert.assertThat(fut.get(5L, TimeUnit.SECONDS), Matchers.is("ok")); + MatcherAssert.assertThat(seen.get(), Matchers.is("submit-runnable-result")); + } + + @Test + @DisplayName("invokeAll propagates caller ThreadContext to every task") + void wrappedInvokeAllPropagatesThreadContextToEveryTask() throws Exception { + ThreadContext.put("trace.id", "invoke-all"); + final List> tasks = Arrays.asList( + () -> ThreadContext.get("trace.id") + ":a", + () -> ThreadContext.get("trace.id") + ":b", + () -> ThreadContext.get("trace.id") + ":c" + ); + final List> futures = this.wrapper.invokeAll(tasks); + final List results = new ArrayList<>(futures.size()); + for (final Future f : futures) { + results.add(f.get(5L, TimeUnit.SECONDS)); + } + MatcherAssert.assertThat( + results, + Matchers.containsInAnyOrder( + "invoke-all:a", "invoke-all:b", "invoke-all:c" + ) + ); + } + + @Test + @DisplayName("invokeAll(..., timeout) propagates caller ThreadContext to every task") + void wrappedInvokeAllTimeoutPropagatesThreadContextToEveryTask() throws Exception { + ThreadContext.put("trace.id", "invoke-all-to"); + final List> tasks = Arrays.asList( + () -> ThreadContext.get("trace.id") + ":x", + () -> ThreadContext.get("trace.id") + ":y" + ); + final List> futures = this.wrapper.invokeAll( + tasks, 5L, TimeUnit.SECONDS + ); + final List results = new ArrayList<>(futures.size()); + for (final Future f : futures) { + results.add(f.get(5L, TimeUnit.SECONDS)); + } + MatcherAssert.assertThat( + results, + Matchers.containsInAnyOrder("invoke-all-to:x", "invoke-all-to:y") + ); + } + + @Test + @DisplayName("invokeAny propagates caller ThreadContext to every task") + void wrappedInvokeAnyPropagatesThreadContextToEveryTask() throws Exception { + ThreadContext.put("trace.id", "invoke-any"); + final List> tasks = Arrays.asList( + () -> ThreadContext.get("trace.id") + ":first", + () -> ThreadContext.get("trace.id") + ":second" + ); + final String result = this.wrapper.invokeAny(tasks); + MatcherAssert.assertThat( + result, + Matchers.anyOf( + Matchers.is("invoke-any:first"), + Matchers.is("invoke-any:second") + ) + ); + } + + @Test + @DisplayName("invokeAny(..., timeout) propagates caller ThreadContext to every task") + void wrappedInvokeAnyTimeoutPropagatesThreadContextToEveryTask() throws Exception { + ThreadContext.put("trace.id", "invoke-any-to"); + final List> tasks = Arrays.asList( + () -> ThreadContext.get("trace.id") + ":only" + ); + final String result = this.wrapper.invokeAny(tasks, 5L, TimeUnit.SECONDS); + MatcherAssert.assertThat(result, Matchers.is("invoke-any-to:only")); + } + + @Test + @DisplayName("shutdown() delegates to underlying pool") + void shutdownDelegatesToUnderlyingPool() throws Exception { + MatcherAssert.assertThat(this.wrapper.isShutdown(), Matchers.is(false)); + MatcherAssert.assertThat(this.backing.isShutdown(), Matchers.is(false)); + this.wrapper.shutdown(); + MatcherAssert.assertThat(this.backing.isShutdown(), Matchers.is(true)); + MatcherAssert.assertThat(this.wrapper.isShutdown(), Matchers.is(true)); + MatcherAssert.assertThat( + this.wrapper.awaitTermination(5L, TimeUnit.SECONDS), + Matchers.is(true) + ); + MatcherAssert.assertThat(this.wrapper.isTerminated(), Matchers.is(true)); + MatcherAssert.assertThat(this.backing.isTerminated(), Matchers.is(true)); + } + + @Test + @DisplayName("shutdownNow() delegates to underlying pool and returns pending tasks") + void shutdownNowDelegatesAndReturnsPendingTasks() throws Exception { + // Saturate the 2-thread pool with a blocking task so a following + // submit queues instead of running. + final CountDownLatch blockStart = new CountDownLatch(2); + final CountDownLatch release = new CountDownLatch(1); + for (int i = 0; i < 2; i++) { + this.wrapper.submit(() -> { + blockStart.countDown(); + try { + release.await(5L, TimeUnit.SECONDS); + } catch (final InterruptedException ie) { + Thread.currentThread().interrupt(); + } + }); + } + MatcherAssert.assertThat( + blockStart.await(5L, TimeUnit.SECONDS), Matchers.is(true) + ); + // Queue a third task β€” it should NOT start before shutdownNow. + this.wrapper.submit(() -> { + // Never runs β€” shutdownNow should return it as pending. + }); + final List pending = this.wrapper.shutdownNow(); + release.countDown(); + MatcherAssert.assertThat( + "shutdownNow returns at least the queued task", + pending.size(), Matchers.greaterThanOrEqualTo(1) + ); + MatcherAssert.assertThat(this.backing.isShutdown(), Matchers.is(true)); + } + + @Test + @DisplayName("wrap(null) throws NullPointerException on the delegate parameter") + void wrapRejectsNullDelegate() { + try { + ContextualExecutorService.wrap(null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException expected) { + MatcherAssert.assertThat( + "NPE mentions 'delegate'", + expected.getMessage(), Matchers.containsString("delegate") + ); + } + } + + @Test + @DisplayName("Runner thread's prior ThreadContext is restored after the task β€” " + + "no leak across invocations (submit(Callable) path)") + void contextRestoredOnRunnerThreadAfterTask() throws Exception { + // Pin to a single-thread pool so we can prove restore on the *same* runner. + final ExecutorService single = Executors.newSingleThreadExecutor(); + final ContextualExecutorService pin = ContextualExecutorService.wrap(single); + try { + // Seed the runner's own ThreadContext via the RAW pool β€” bypassing the + // wrapper so the seed persists on the runner thread. If we seeded + // through the wrapper, the post-task restore would wipe it. + single.submit(() -> ThreadContext.put("runner.own", "runner-seed")) + .get(5L, TimeUnit.SECONDS); + + // Submit with a different caller context through the wrapper. + ThreadContext.clearMap(); + ThreadContext.put("trace.id", "caller-trace"); + final String seenCaller = pin.submit((Callable) () -> { + MatcherAssert.assertThat( + "runner's own MDC is hidden while the task runs", + ThreadContext.get("runner.own"), Matchers.nullValue() + ); + return ThreadContext.get("trace.id"); + }).get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat(seenCaller, Matchers.is("caller-trace")); + + // Next task on the RAW pool: observe the runner's prior MDC is back, + // and the caller's MDC did NOT leak onto the runner. + ThreadContext.clearMap(); + final AtomicReference runnerOwnAfter = new AtomicReference<>(); + final AtomicReference traceLeak = new AtomicReference<>(); + single.submit(() -> { + runnerOwnAfter.set(ThreadContext.get("runner.own")); + traceLeak.set(ThreadContext.get("trace.id")); + }).get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "runner's prior MDC was restored after the wrapped task", + runnerOwnAfter.get(), Matchers.is("runner-seed") + ); + MatcherAssert.assertThat( + "caller's MDC did not leak onto runner thread", + traceLeak.get(), Matchers.nullValue() + ); + } finally { + pin.shutdownNow(); + single.shutdownNow(); + single.awaitTermination(5L, TimeUnit.SECONDS); + } + } + + @Test + @DisplayName("Runner's prior ThreadContext is restored even when submitted " + + "task throws (submit(Callable) path)") + void contextRestoredEvenIfTaskThrows() throws Exception { + final ExecutorService single = Executors.newSingleThreadExecutor(); + final ContextualExecutorService pin = ContextualExecutorService.wrap(single); + try { + // Seed the runner via the RAW pool so the seed survives. + single.submit(() -> ThreadContext.put("runner.own", "seed")) + .get(5L, TimeUnit.SECONDS); + + ThreadContext.clearMap(); + ThreadContext.put("trace.id", "throw-trace"); + final Future boom = pin.submit((Callable) () -> { + MatcherAssert.assertThat( + ThreadContext.get("trace.id"), Matchers.is("throw-trace") + ); + throw new IllegalStateException("boom"); + }); + final AtomicBoolean threw = new AtomicBoolean(false); + try { + boom.get(5L, TimeUnit.SECONDS); + } catch (final java.util.concurrent.ExecutionException expected) { + threw.set( + expected.getCause() instanceof IllegalStateException + ); + } + MatcherAssert.assertThat( + "callable propagated IllegalStateException", + threw.get(), Matchers.is(true) + ); + + // Runner's MDC must be restored; caller's MDC must NOT leak. + ThreadContext.clearMap(); + final AtomicReference runnerOwnAfter = new AtomicReference<>(); + final AtomicReference traceLeak = new AtomicReference<>(); + single.submit(() -> { + runnerOwnAfter.set(ThreadContext.get("runner.own")); + traceLeak.set(ThreadContext.get("trace.id")); + }).get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "runner's prior MDC was restored after the throwing task", + runnerOwnAfter.get(), Matchers.is("seed") + ); + MatcherAssert.assertThat( + "caller MDC did not leak after throw", + traceLeak.get(), Matchers.nullValue() + ); + } finally { + pin.shutdownNow(); + single.shutdownNow(); + single.awaitTermination(5L, TimeUnit.SECONDS); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/context/ContextualExecutorTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/context/ContextualExecutorTest.java new file mode 100644 index 000000000..0ab2ac3af --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/context/ContextualExecutorTest.java @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.logging.log4j.ThreadContext; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link ContextualExecutor} β€” verifies Β§4.4 propagation + * semantics: the caller's ThreadContext (ECS field carrier) is installed on + * the runner thread, the runner's prior ThreadContext is restored after the + * task, and the wrapper works with no APM agent attached (the common case in + * unit tests). + */ +final class ContextualExecutorTest { + + private ExecutorService backing; + + @BeforeEach + void setup() { + ThreadContext.clearMap(); + this.backing = Executors.newSingleThreadExecutor(); + } + + @AfterEach + void tearDown() { + ThreadContext.clearMap(); + this.backing.shutdownNow(); + } + + @Test + @DisplayName("Caller ThreadContext (MDC) is visible on the runner thread") + void contextualizePropagatesThreadContextAcrossThreadHop() throws Exception { + ThreadContext.put("trace.id", "trace-abc"); + ThreadContext.put("repository.name", "npm_group"); + final AtomicReference seenTrace = new AtomicReference<>(); + final AtomicReference seenRepo = new AtomicReference<>(); + CompletableFuture.runAsync(() -> { + seenTrace.set(ThreadContext.get("trace.id")); + seenRepo.set(ThreadContext.get("repository.name")); + }, ContextualExecutor.contextualize(this.backing)).get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat(seenTrace.get(), Matchers.is("trace-abc")); + MatcherAssert.assertThat(seenRepo.get(), Matchers.is("npm_group")); + } + + @Test + @DisplayName("Runner's ThreadContext is restored after the task completes") + void contextualizeDoesNotLeakContextIntoRunnerThread() throws Exception { + // Pre-seed the runner thread's state by running an ordinary task first. + final AtomicReference runnerNameBefore = new AtomicReference<>(); + this.backing.submit(() -> { + ThreadContext.put("runner.own", "runner-seed"); + runnerNameBefore.set(Thread.currentThread().getName()); + }).get(5L, TimeUnit.SECONDS); + + // Now submit through the contextualized wrapper with a different MDC. + ThreadContext.clearMap(); + ThreadContext.put("trace.id", "caller-trace"); + CompletableFuture.runAsync(() -> { + MatcherAssert.assertThat( + "caller's MDC visible inside the task", + ThreadContext.get("trace.id"), Matchers.is("caller-trace") + ); + MatcherAssert.assertThat( + "runner's prior MDC is hidden while the task runs", + ThreadContext.get("runner.own"), Matchers.nullValue() + ); + }, ContextualExecutor.contextualize(this.backing)).get(5L, TimeUnit.SECONDS); + + // After the task, the runner's prior MDC must be restored. + final AtomicReference runnerOwnAfter = new AtomicReference<>(); + final AtomicReference traceLeak = new AtomicReference<>(); + this.backing.submit(() -> { + runnerOwnAfter.set(ThreadContext.get("runner.own")); + traceLeak.set(ThreadContext.get("trace.id")); + }).get(5L, TimeUnit.SECONDS); + + MatcherAssert.assertThat( + "runner's prior MDC is back", + runnerOwnAfter.get(), Matchers.is("runner-seed") + ); + MatcherAssert.assertThat( + "caller's MDC did not leak into the runner thread", + traceLeak.get(), Matchers.nullValue() + ); + } + + @Test + @DisplayName("Runner's prior ThreadContext is restored even when the task throws") + void contextualizeRestoresCallerContextEvenIfTaskThrows() throws Exception { + this.backing.submit(() -> ThreadContext.put("runner.own", "seed")).get(5L, TimeUnit.SECONDS); + + ThreadContext.clearMap(); + ThreadContext.put("trace.id", "throw-trace"); + + final CompletableFuture fut = CompletableFuture.runAsync(() -> { + throw new IllegalStateException("boom"); + }, ContextualExecutor.contextualize(this.backing)); + try { + fut.get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat("expected exception", false, Matchers.is(true)); + } catch (final java.util.concurrent.ExecutionException expected) { + MatcherAssert.assertThat( + "cause propagated", + expected.getCause(), Matchers.instanceOf(IllegalStateException.class) + ); + } + + // Despite the throw, the runner's prior MDC must still be restored. + final AtomicReference runnerOwnAfter = new AtomicReference<>(); + final AtomicReference traceLeak = new AtomicReference<>(); + this.backing.submit(() -> { + runnerOwnAfter.set(ThreadContext.get("runner.own")); + traceLeak.set(ThreadContext.get("trace.id")); + }).get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat(runnerOwnAfter.get(), Matchers.is("seed")); + MatcherAssert.assertThat( + "caller's MDC did not leak into the runner thread after an exception", + traceLeak.get(), Matchers.nullValue() + ); + } + + @Test + @DisplayName("Wrapper works with no APM agent attached β€” ElasticApm.currentSpan() is a no-op") + void contextualizeWorksWithNoApmAgent() throws Exception { + // No APM agent in the test JVM; ElasticApm.currentSpan() returns a no-op + // whose activate() Scope is a no-op. The task should run to completion. + ThreadContext.put("trace.id", "no-agent"); + final AtomicReference ran = new AtomicReference<>(false); + CompletableFuture.runAsync( + () -> ran.set(true), + ContextualExecutor.contextualize(this.backing) + ).get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat(ran.get(), Matchers.is(true)); + } + + @Test + @DisplayName("contextualize(null) throws NullPointerException") + void contextualizeRejectsNullDelegate() { + try { + ContextualExecutor.contextualize(null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException expected) { + MatcherAssert.assertThat( + "NPE mentions 'delegate'", + expected.getMessage(), Matchers.containsString("delegate") + ); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/context/DeadlineTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/context/DeadlineTest.java new file mode 100644 index 000000000..61cb5f2e7 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/context/DeadlineTest.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import java.time.Duration; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link Deadline} β€” verifies the Β§3.4 contract: + * {@link #in_createsDeadlineWithPositiveRemaining monotonic construction}, + * {@link #expired_returnsFalseInitially expired behaviour} at and after + * the boundary, non-negative {@link Deadline#remaining()}, and the clamp + * semantics of {@link Deadline#remainingClamped(Duration)}. + */ +final class DeadlineTest { + + @Test + @DisplayName("Deadline.in(d) produces a deadline with ~d remaining") + void inCreatesDeadlineWithPositiveRemaining() { + final Deadline d = Deadline.in(Duration.ofSeconds(5)); + final Duration remaining = d.remaining(); + MatcherAssert.assertThat( + "remaining > 0", + remaining.toMillis() > 0L, Matchers.is(true) + ); + MatcherAssert.assertThat( + "remaining ≀ the budget", + remaining.compareTo(Duration.ofSeconds(5)) <= 0, Matchers.is(true) + ); + MatcherAssert.assertThat( + "remaining close to the budget (β‰₯4s allows for slow CI)", + remaining.compareTo(Duration.ofSeconds(4)) >= 0, Matchers.is(true) + ); + } + + @Test + @DisplayName("expired() is false immediately after construction with a positive budget") + void expiredReturnsFalseInitially() { + final Deadline d = Deadline.in(Duration.ofSeconds(2)); + MatcherAssert.assertThat("not expired", d.expired(), Matchers.is(false)); + } + + @Test + @DisplayName("expired() flips to true once the budget has elapsed") + void expiredReturnsTrueAfterPassing() throws InterruptedException { + final Deadline d = Deadline.in(Duration.ofMillis(25)); + // Sleep longer than the TTL; 150ms slack for scheduling jitter on CI. + Thread.sleep(150L); + MatcherAssert.assertThat("expired", d.expired(), Matchers.is(true)); + } + + @Test + @DisplayName("remaining() is clamped at Duration.ZERO once the deadline has passed") + void remainingClampsToZeroAfterExpiry() throws InterruptedException { + final Deadline d = Deadline.in(Duration.ofMillis(10)); + Thread.sleep(100L); + final Duration rem = d.remaining(); + MatcherAssert.assertThat( + "remaining is ZERO", rem, Matchers.is(Duration.ZERO) + ); + MatcherAssert.assertThat( + "never negative", rem.isNegative(), Matchers.is(false) + ); + } + + @Test + @DisplayName("remainingClamped(max) returns max when the remaining budget exceeds it") + void remainingClampedCapsAtMax() { + final Deadline d = Deadline.in(Duration.ofSeconds(30)); + final Duration cap = Duration.ofSeconds(5); + final Duration clamped = d.remainingClamped(cap); + MatcherAssert.assertThat( + "capped at the max", clamped, Matchers.is(cap) + ); + } + + @Test + @DisplayName("remainingClamped(max) returns the remaining budget when it is below max") + void remainingClampedPassThroughWhenBelowMax() { + final Deadline d = Deadline.in(Duration.ofMillis(500)); + final Duration cap = Duration.ofMinutes(5); + final Duration clamped = d.remainingClamped(cap); + MatcherAssert.assertThat( + "pass-through, strictly less than cap", + clamped.compareTo(cap) < 0, Matchers.is(true) + ); + MatcherAssert.assertThat( + "pass-through, ≀ initial budget", + clamped.compareTo(Duration.ofMillis(500)) <= 0, Matchers.is(true) + ); + } + + @Test + @DisplayName("remainingClamped(null) throws NullPointerException") + void remainingClampedRejectsNull() { + final Deadline d = Deadline.in(Duration.ofSeconds(1)); + try { + d.remainingClamped(null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException expected) { + // success + MatcherAssert.assertThat( + "NPE message references 'max'", + expected.getMessage(), Matchers.containsString("max") + ); + } + } + + @Test + @DisplayName("expiresAt() returns an Instant close to now + remaining") + void expiresAtReturnsFutureInstantForPositiveBudget() { + final Deadline d = Deadline.in(Duration.ofSeconds(10)); + final Duration diff = Duration.between(java.time.Instant.now(), d.expiresAt()); + MatcherAssert.assertThat( + "expiresAt is in the future", + diff.isNegative(), Matchers.is(false) + ); + MatcherAssert.assertThat( + "within the budget", + diff.compareTo(Duration.ofSeconds(11)) <= 0, Matchers.is(true) + ); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/context/RequestContextTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/context/RequestContextTest.java new file mode 100644 index 000000000..766b07d43 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/context/RequestContextTest.java @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import com.auto1.pantera.http.context.RequestContext.ArtifactRef; +import java.time.Duration; +import org.apache.logging.log4j.ThreadContext; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Exhaustive contract test for {@link RequestContext}: record accessors, + * {@link RequestContext#minimal} defaults, {@link RequestContext#withRepo} + * copy-on-write behaviour, {@link RequestContext#bindToMdc()} / + * {@link RequestContext#fromMdc()} round-trip through Log4j2 + * {@link ThreadContext}, and backward compatibility of the 4-arg constructor. + */ +final class RequestContextTest { + + @BeforeEach + void clear() { + ThreadContext.clearMap(); + } + + @AfterEach + void cleanup() { + ThreadContext.clearMap(); + } + + // ================== defaults & copy-with ================== + + @Test + @DisplayName("minimal(...) sets safe defaults: anonymous user, empty artifact, 30s deadline") + void minimalSetsSafeDefaults() { + final RequestContext ctx = RequestContext.minimal( + "trace-1", "req-1", "npm_group", "/npm/@scope/pkg" + ); + MatcherAssert.assertThat(ctx.traceId(), Matchers.is("trace-1")); + MatcherAssert.assertThat(ctx.httpRequestId(), Matchers.is("req-1")); + MatcherAssert.assertThat(ctx.repoName(), Matchers.is("npm_group")); + MatcherAssert.assertThat(ctx.urlOriginal(), Matchers.is("/npm/@scope/pkg")); + MatcherAssert.assertThat(ctx.userName(), Matchers.is("anonymous")); + MatcherAssert.assertThat(ctx.clientIp(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.userAgent(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.repoType(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.urlPath(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.transactionId(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.spanId(), Matchers.nullValue()); + MatcherAssert.assertThat("artifact is EMPTY", ctx.artifact().isEmpty(), Matchers.is(true)); + MatcherAssert.assertThat(ctx.deadline(), Matchers.notNullValue()); + final Duration rem = ctx.deadline().remaining(); + MatcherAssert.assertThat( + "deadline within the 30s default", rem.toSeconds() <= 30, Matchers.is(true) + ); + } + + @Test + @DisplayName("withRepo(...) produces a copy with only the repo/artifact fields updated") + void withRepoProducesCopyWithNewRepoFields() { + final RequestContext base = RequestContext.minimal( + "t", "r", "old_repo", "/u" + ); + final ArtifactRef art = new ArtifactRef("@scope/pkg", "1.2.3"); + final RequestContext after = base.withRepo("new_repo", "npm", art); + + MatcherAssert.assertThat(after.repoName(), Matchers.is("new_repo")); + MatcherAssert.assertThat(after.repoType(), Matchers.is("npm")); + MatcherAssert.assertThat(after.artifact(), Matchers.is(art)); + // Preserved: + MatcherAssert.assertThat(after.traceId(), Matchers.is(base.traceId())); + MatcherAssert.assertThat(after.httpRequestId(), Matchers.is(base.httpRequestId())); + MatcherAssert.assertThat(after.urlOriginal(), Matchers.is(base.urlOriginal())); + MatcherAssert.assertThat(after.userName(), Matchers.is(base.userName())); + MatcherAssert.assertThat(after.deadline(), Matchers.is(base.deadline())); + // base is untouched: + MatcherAssert.assertThat(base.repoName(), Matchers.is("old_repo")); + MatcherAssert.assertThat(base.repoType(), Matchers.nullValue()); + } + + @Test + @DisplayName("withRepo(..., null artifact) coerces to ArtifactRef.EMPTY") + void withRepoNullArtifactCoercesToEmpty() { + final RequestContext after = RequestContext + .minimal("t", "r", "x", "/u") + .withRepo("y", "maven", null); + MatcherAssert.assertThat(after.artifact(), Matchers.is(ArtifactRef.EMPTY)); + } + + // ================== bindToMdc ================== + + @Test + @DisplayName("bindToMdc() puts every non-null ECS field into ThreadContext") + void bindToMdcPutsAllEcsFields() throws Exception { + final RequestContext ctx = new RequestContext( + "t1", "tx1", "sp1", "r1", + "alice", "10.0.0.1", "curl/8", + "npm_group", "npm", new ArtifactRef("@scope/pkg", "1.0.0"), + "/npm/@scope/pkg", "/npm/@scope/pkg", + Deadline.in(Duration.ofSeconds(30)) + ); + try (AutoCloseable ignored = ctx.bindToMdc()) { + MatcherAssert.assertThat(ThreadContext.get("trace.id"), Matchers.is("t1")); + MatcherAssert.assertThat(ThreadContext.get("transaction.id"), Matchers.is("tx1")); + MatcherAssert.assertThat(ThreadContext.get("span.id"), Matchers.is("sp1")); + MatcherAssert.assertThat(ThreadContext.get("http.request.id"), Matchers.is("r1")); + MatcherAssert.assertThat(ThreadContext.get("user.name"), Matchers.is("alice")); + MatcherAssert.assertThat(ThreadContext.get("client.ip"), Matchers.is("10.0.0.1")); + MatcherAssert.assertThat(ThreadContext.get("user_agent.original"), Matchers.is("curl/8")); + MatcherAssert.assertThat(ThreadContext.get("repository.name"), Matchers.is("npm_group")); + MatcherAssert.assertThat(ThreadContext.get("repository.type"), Matchers.is("npm")); + MatcherAssert.assertThat(ThreadContext.get("package.name"), Matchers.is("@scope/pkg")); + MatcherAssert.assertThat(ThreadContext.get("package.version"), Matchers.is("1.0.0")); + MatcherAssert.assertThat(ThreadContext.get("url.original"), Matchers.is("/npm/@scope/pkg")); + MatcherAssert.assertThat(ThreadContext.get("url.path"), Matchers.is("/npm/@scope/pkg")); + } + } + + @Test + @DisplayName("bindToMdc() skips null fields (no empty-string ghost keys)") + void bindToMdcSkipsNullFields() throws Exception { + // Only traceId + repoName + urlOriginal populated β€” everything else null. + final RequestContext ctx = RequestContext.minimal( + "trace-only", null, "repo", "/u" + ); + // Sanity: minimal() sets userName to "anonymous", not null. + MatcherAssert.assertThat(ctx.userName(), Matchers.is("anonymous")); + try (AutoCloseable ignored = ctx.bindToMdc()) { + MatcherAssert.assertThat(ThreadContext.get("trace.id"), Matchers.is("trace-only")); + MatcherAssert.assertThat(ThreadContext.get("user.name"), Matchers.is("anonymous")); + MatcherAssert.assertThat(ThreadContext.get("repository.name"), Matchers.is("repo")); + MatcherAssert.assertThat(ThreadContext.get("url.original"), Matchers.is("/u")); + // Null-valued fields must not appear as keys at all: + MatcherAssert.assertThat( + "no transaction.id when null", + ThreadContext.containsKey("transaction.id"), Matchers.is(false) + ); + MatcherAssert.assertThat( + "no client.ip when null", + ThreadContext.containsKey("client.ip"), Matchers.is(false) + ); + MatcherAssert.assertThat( + "no user_agent.original when null", + ThreadContext.containsKey("user_agent.original"), Matchers.is(false) + ); + MatcherAssert.assertThat( + "no package.name for EMPTY artifact", + ThreadContext.containsKey("package.name"), Matchers.is(false) + ); + MatcherAssert.assertThat( + "no package.version for EMPTY artifact", + ThreadContext.containsKey("package.version"), Matchers.is(false) + ); + } + } + + @Test + @DisplayName("bindToMdc().close() restores the ThreadContext snapshot taken at bind time") + void bindToMdcCloseRestoresPriorContext() throws Exception { + ThreadContext.put("pre.existing", "keep"); + ThreadContext.put("trace.id", "prior-trace"); + final RequestContext ctx = new RequestContext( + "new-trace", null, null, null, + "anonymous", null, null, + "repo", null, ArtifactRef.EMPTY, + null, null, Deadline.in(Duration.ofSeconds(30)) + ); + final AutoCloseable bound = ctx.bindToMdc(); + MatcherAssert.assertThat( + "binding overrode trace.id", + ThreadContext.get("trace.id"), Matchers.is("new-trace") + ); + bound.close(); + MatcherAssert.assertThat( + "prior trace.id restored", + ThreadContext.get("trace.id"), Matchers.is("prior-trace") + ); + MatcherAssert.assertThat( + "pre.existing preserved through bind+close", + ThreadContext.get("pre.existing"), Matchers.is("keep") + ); + } + + @Test + @DisplayName("bindToMdc() is safe inside try-with-resources") + void bindToMdcIsTryWithResourcesSafe() throws Exception { + final RequestContext ctx = RequestContext.minimal("t", "r", "repo", "/u"); + try (AutoCloseable bound = ctx.bindToMdc()) { + MatcherAssert.assertThat(ThreadContext.get("trace.id"), Matchers.is("t")); + MatcherAssert.assertThat(bound, Matchers.notNullValue()); + } + MatcherAssert.assertThat( + "ThreadContext cleaned up after try-with-resources", + ThreadContext.get("trace.id"), Matchers.nullValue() + ); + } + + @Test + @DisplayName("bindToMdc() close is idempotent β€” double close does not corrupt state") + void bindToMdcIsIdempotentOnDoubleClose() throws Exception { + ThreadContext.put("pre", "preserved"); + final RequestContext ctx = RequestContext.minimal("t", "r", "repo", "/u"); + final AutoCloseable bound = ctx.bindToMdc(); + bound.close(); + MatcherAssert.assertThat(ThreadContext.get("pre"), Matchers.is("preserved")); + // Now put something else into ThreadContext β€” a second close must NOT + // clobber it, because the snapshot was already restored on first close. + ThreadContext.put("post-close", "still-here"); + bound.close(); + MatcherAssert.assertThat( + "second close is a no-op, preserves state set after first close", + ThreadContext.get("post-close"), Matchers.is("still-here") + ); + MatcherAssert.assertThat(ThreadContext.get("pre"), Matchers.is("preserved")); + } + + // ================== fromMdc ================== + + @Test + @DisplayName("fromMdc() reads every ECS field from ThreadContext") + void fromMdcReadsAllEcsFields() { + ThreadContext.put("trace.id", "t1"); + ThreadContext.put("transaction.id", "tx1"); + ThreadContext.put("span.id", "sp1"); + ThreadContext.put("http.request.id", "r1"); + ThreadContext.put("user.name", "alice"); + ThreadContext.put("client.ip", "10.0.0.1"); + ThreadContext.put("user_agent.original", "curl/8"); + ThreadContext.put("repository.name", "npm_group"); + ThreadContext.put("repository.type", "npm"); + ThreadContext.put("package.name", "@scope/pkg"); + ThreadContext.put("package.version", "1.0.0"); + ThreadContext.put("url.original", "/npm/@scope/pkg"); + ThreadContext.put("url.path", "/npm/@scope/pkg"); + + final RequestContext ctx = RequestContext.fromMdc(); + MatcherAssert.assertThat(ctx.traceId(), Matchers.is("t1")); + MatcherAssert.assertThat(ctx.transactionId(), Matchers.is("tx1")); + MatcherAssert.assertThat(ctx.spanId(), Matchers.is("sp1")); + MatcherAssert.assertThat(ctx.httpRequestId(), Matchers.is("r1")); + MatcherAssert.assertThat(ctx.userName(), Matchers.is("alice")); + MatcherAssert.assertThat(ctx.clientIp(), Matchers.is("10.0.0.1")); + MatcherAssert.assertThat(ctx.userAgent(), Matchers.is("curl/8")); + MatcherAssert.assertThat(ctx.repoName(), Matchers.is("npm_group")); + MatcherAssert.assertThat(ctx.repoType(), Matchers.is("npm")); + MatcherAssert.assertThat(ctx.artifact().name(), Matchers.is("@scope/pkg")); + MatcherAssert.assertThat(ctx.artifact().version(), Matchers.is("1.0.0")); + MatcherAssert.assertThat(ctx.urlOriginal(), Matchers.is("/npm/@scope/pkg")); + MatcherAssert.assertThat(ctx.urlPath(), Matchers.is("/npm/@scope/pkg")); + MatcherAssert.assertThat("deadline synthesised", ctx.deadline(), Matchers.notNullValue()); + } + + @Test + @DisplayName("fromMdc() returns null for missing keys and EMPTY for absent artifact") + void fromMdcMissingKeysBecomeNull() { + ThreadContext.put("trace.id", "only-trace"); + final RequestContext ctx = RequestContext.fromMdc(); + MatcherAssert.assertThat(ctx.traceId(), Matchers.is("only-trace")); + MatcherAssert.assertThat(ctx.transactionId(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.spanId(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.httpRequestId(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.userName(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.clientIp(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.userAgent(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.repoName(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.repoType(), Matchers.nullValue()); + MatcherAssert.assertThat("artifact EMPTY", ctx.artifact(), Matchers.is(ArtifactRef.EMPTY)); + MatcherAssert.assertThat(ctx.urlOriginal(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.urlPath(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.deadline(), Matchers.notNullValue()); + } + + @Test + @DisplayName("bindToMdc β†’ fromMdc round-trips every ECS field (except the non-persisted Deadline)") + void bindToMdcFromMdcRoundTripPreservesFieldsExceptDeadline() throws Exception { + final RequestContext original = new RequestContext( + "t", "tx", "sp", "r", + "alice", "10.0.0.1", "curl/8", + "npm_group", "npm", new ArtifactRef("@scope/pkg", "1.0.0"), + "/npm/@scope/pkg", "/npm/@scope/pkg", + Deadline.in(Duration.ofSeconds(5)) + ); + final RequestContext restored; + try (AutoCloseable ignored = original.bindToMdc()) { + restored = RequestContext.fromMdc(); + } + MatcherAssert.assertThat(restored.traceId(), Matchers.is(original.traceId())); + MatcherAssert.assertThat(restored.transactionId(), Matchers.is(original.transactionId())); + MatcherAssert.assertThat(restored.spanId(), Matchers.is(original.spanId())); + MatcherAssert.assertThat(restored.httpRequestId(), Matchers.is(original.httpRequestId())); + MatcherAssert.assertThat(restored.userName(), Matchers.is(original.userName())); + MatcherAssert.assertThat(restored.clientIp(), Matchers.is(original.clientIp())); + MatcherAssert.assertThat(restored.userAgent(), Matchers.is(original.userAgent())); + MatcherAssert.assertThat(restored.repoName(), Matchers.is(original.repoName())); + MatcherAssert.assertThat(restored.repoType(), Matchers.is(original.repoType())); + MatcherAssert.assertThat(restored.artifact(), Matchers.is(original.artifact())); + MatcherAssert.assertThat(restored.urlOriginal(), Matchers.is(original.urlOriginal())); + MatcherAssert.assertThat(restored.urlPath(), Matchers.is(original.urlPath())); + // Deadline is synthesised β€” not equal to original. + MatcherAssert.assertThat(restored.deadline(), Matchers.notNullValue()); + } + + // ================== ArtifactRef ================== + + @Test + @DisplayName("ArtifactRef.EMPTY.isEmpty() is true; a populated one is not") + void artifactRefEmptyIsEmpty() { + MatcherAssert.assertThat(ArtifactRef.EMPTY.isEmpty(), Matchers.is(true)); + MatcherAssert.assertThat(ArtifactRef.EMPTY.name(), Matchers.is("")); + MatcherAssert.assertThat(ArtifactRef.EMPTY.version(), Matchers.is("")); + final ArtifactRef populated = new ArtifactRef("pkg", "1.0.0"); + MatcherAssert.assertThat(populated.isEmpty(), Matchers.is(false)); + } + + // ================== backward compat ================== + + @Test + @DisplayName("Backward-compat 4-arg constructor delegates to minimal defaults") + void backwardCompat4ArgConstructorDelegatesToMinimal() { + final RequestContext ctx = new RequestContext( + "t", "r", "repo", "/u" + ); + MatcherAssert.assertThat(ctx.traceId(), Matchers.is("t")); + MatcherAssert.assertThat(ctx.httpRequestId(), Matchers.is("r")); + MatcherAssert.assertThat(ctx.repoName(), Matchers.is("repo")); + MatcherAssert.assertThat(ctx.urlOriginal(), Matchers.is("/u")); + // Safe defaults identical to minimal(): + MatcherAssert.assertThat(ctx.userName(), Matchers.is("anonymous")); + MatcherAssert.assertThat(ctx.clientIp(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.userAgent(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.repoType(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.urlPath(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.transactionId(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.spanId(), Matchers.nullValue()); + MatcherAssert.assertThat(ctx.artifact(), Matchers.is(ArtifactRef.EMPTY)); + MatcherAssert.assertThat(ctx.deadline(), Matchers.notNullValue()); + } + + // ================== record semantics ================== + + @Test + @DisplayName("Record equality follows canonical-component semantics") + void recordEqualityFollowsRecordSemantics() { + final Deadline shared = Deadline.in(Duration.ofSeconds(30)); + final ArtifactRef art = new ArtifactRef("p", "1"); + final RequestContext a = new RequestContext( + "t", "tx", "sp", "r", "u", "ip", "ua", + "repo", "npm", art, "/u", "/u", shared + ); + final RequestContext b = new RequestContext( + "t", "tx", "sp", "r", "u", "ip", "ua", + "repo", "npm", art, "/u", "/u", shared + ); + MatcherAssert.assertThat(a, Matchers.is(b)); + MatcherAssert.assertThat(a.hashCode(), Matchers.is(b.hashCode())); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultAllProxiesFailedPassThroughTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultAllProxiesFailedPassThroughTest.java new file mode 100644 index 000000000..48bd4f3ed --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultAllProxiesFailedPassThroughTest.java @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.context.RequestContext; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutionException; + +/** + * One test per row of the worked-examples table in Β§2 of the target + * architecture doc β€” locks in the pass-through contract for + * {@link Fault.AllProxiesFailed}. + */ +final class FaultAllProxiesFailedPassThroughTest { + + private static final RequestContext CTX = + new RequestContext("t-1", "r-1", "npm_group", "/npm/@scope/pkg"); + + private static final String GROUP = "npm_group"; + + // ---------- helpers ---------- + + private static Response bodied(final RsStatus status, final String body) { + return ResponseBuilder.from(status) + .body(new Content.From(body.getBytes(StandardCharsets.UTF_8))) + .build(); + } + + private static Response empty(final RsStatus status) { + return ResponseBuilder.from(status).build(); + } + + private static Fault.MemberOutcome fiveXx(final String name, final Response resp) { + return Fault.MemberOutcome.responded(name, Fault.MemberOutcome.Kind.FIVE_XX, resp); + } + + private static Fault.MemberOutcome notFound(final String name, final Response resp) { + return Fault.MemberOutcome.responded(name, Fault.MemberOutcome.Kind.NOT_FOUND, resp); + } + + private static Fault.MemberOutcome threw(final String name, final Throwable cause) { + return Fault.MemberOutcome.threw(name, Fault.MemberOutcome.Kind.EXCEPTION, cause); + } + + private static Fault.MemberOutcome cancelled(final String name) { + return Fault.MemberOutcome.threw(name, Fault.MemberOutcome.Kind.CANCELLED, null); + } + + private static Fault.AllProxiesFailed apf(final List outcomes) { + return new Fault.AllProxiesFailed( + GROUP, outcomes, FaultTranslator.pickWinningFailure(outcomes) + ); + } + + private static String bodyOf(final Response resp) throws ExecutionException, InterruptedException { + return new String(resp.body().asBytesFuture().get(), StandardCharsets.UTF_8); + } + + // ---------- worked examples from Β§2 ---------- + + /** Proxy 1 β†’ 500 + Proxy 2 β†’ 503 β‡’ 503 body passes through. */ + @Test + void row503BeatsRow500ByRetryability() throws Exception { + final Response p1 = bodied(RsStatus.INTERNAL_ERROR, "500-body"); + final Response p2 = bodied(RsStatus.SERVICE_UNAVAILABLE, "503-body"); + final Response resp = FaultTranslator.translate( + apf(List.of(fiveXx("p1", p1), fiveXx("p2", p2))), CTX + ); + MatcherAssert.assertThat( + "503 wins over 500 by retryability", + resp.status(), Matchers.is(RsStatus.SERVICE_UNAVAILABLE) + ); + MatcherAssert.assertThat( + "winner body passes through", bodyOf(resp), Matchers.is("503-body") + ); + MatcherAssert.assertThat( + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:p2") + ); + } + + /** Proxy 1 β†’ 502 + Proxy 2 β†’ 500 β‡’ 502 body passes through. */ + @Test + void row502BeatsRow500ByRetryability() throws Exception { + final Response p1 = bodied(RsStatus.BAD_GATEWAY, "502-body"); + final Response p2 = bodied(RsStatus.INTERNAL_ERROR, "500-body"); + final Response resp = FaultTranslator.translate( + apf(List.of(fiveXx("p1", p1), fiveXx("p2", p2))), CTX + ); + MatcherAssert.assertThat( + "502 wins over 500 by retryability", + resp.status(), Matchers.is(RsStatus.BAD_GATEWAY) + ); + MatcherAssert.assertThat( + "winner body passes through", bodyOf(resp), Matchers.is("502-body") + ); + MatcherAssert.assertThat( + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:p1") + ); + } + + /** + * Proxy 1 β†’ 404 + Proxy 2 β†’ 500 β‡’ 500 body passes through + * (404 has no body to pass through per worked-examples note β€” it's only + * considered when it's the only response, and in that case we'd never + * construct AllProxiesFailed at all). Here the 500 is the real failure. + */ + @Test + void row404AndRow500ProducesThe500BodyPassingThrough() throws Exception { + final Response p1 = empty(RsStatus.NOT_FOUND); + final Response p2 = bodied(RsStatus.INTERNAL_ERROR, "500-body"); + final Response resp = FaultTranslator.translate( + apf(List.of(notFound("p1", p1), fiveXx("p2", p2))), CTX + ); + MatcherAssert.assertThat( + "500 beats 404 by retryability (non-5xx ranks after 5xx)", + resp.status(), Matchers.is(RsStatus.INTERNAL_ERROR) + ); + MatcherAssert.assertThat( + "500 body passes through", bodyOf(resp), Matchers.is("500-body") + ); + MatcherAssert.assertThat( + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:p2") + ); + } + + /** Proxy 1 β†’ 503 (empty body) + Proxy 2 β†’ 503 (JSON body) β‡’ Proxy 2's body wins. */ + @Test + void sameStatusWithBodyBeatsNoBody() throws Exception { + final Response p1 = empty(RsStatus.SERVICE_UNAVAILABLE); + final Response p2 = bodied(RsStatus.SERVICE_UNAVAILABLE, "{\"retry\":true}"); + final Response resp = FaultTranslator.translate( + apf(List.of(fiveXx("p1", p1), fiveXx("p2", p2))), CTX + ); + MatcherAssert.assertThat( + "503 in both β†’ with-body wins", + resp.status(), Matchers.is(RsStatus.SERVICE_UNAVAILABLE) + ); + MatcherAssert.assertThat( + "JSON body from p2 passes through", + bodyOf(resp), Matchers.is("{\"retry\":true}") + ); + MatcherAssert.assertThat( + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:p2") + ); + } + + /** Proxy 1 β†’ ConnectException + Proxy 2 β†’ 500 β‡’ 500 body passes through. */ + @Test + void proxyThatThrewContributesNoResponseSoOtherWins() throws Exception { + final Response p2 = bodied(RsStatus.INTERNAL_ERROR, "500-body"); + final Response resp = FaultTranslator.translate( + apf(List.of( + threw("p1", new java.net.ConnectException("refused")), + fiveXx("p2", p2) + )), CTX + ); + MatcherAssert.assertThat( + "500 wins because p1 produced no response at all", + resp.status(), Matchers.is(RsStatus.INTERNAL_ERROR) + ); + MatcherAssert.assertThat(bodyOf(resp), Matchers.is("500-body")); + MatcherAssert.assertThat( + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:p2") + ); + } + + /** Every proxy threw/cancelled/timed out β‡’ synthesized 502 with none-responded tag. */ + @Test + void everyProxyFailedWithoutResponseSynthesizes502NoneResponded() throws Exception { + final Fault.AllProxiesFailed fault = apf(List.of( + threw("p1", new java.util.concurrent.TimeoutException("slow")), + cancelled("p2"), + threw("p3", new RuntimeException("boom")) + )); + MatcherAssert.assertThat( + "pickWinningFailure is empty when no member produced a response", + fault.winningResponse(), Matchers.is(Optional.empty()) + ); + final Response resp = FaultTranslator.translate(fault, CTX); + MatcherAssert.assertThat( + "synthesized 502", resp.status(), Matchers.is(RsStatus.BAD_GATEWAY) + ); + MatcherAssert.assertThat( + "X-Pantera-Fault: proxies-failed:none-responded", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains(FaultTranslator.TAG_PROXIES_NONE_RESPONDED) + ); + MatcherAssert.assertThat( + "X-Pantera-Proxies-Tried reflects members tried", + resp.headers().values(FaultTranslator.HEADER_PROXIES_TRIED), + Matchers.contains("3") + ); + MatcherAssert.assertThat( + "synthesized body is a JSON sentinel", + bodyOf(resp), Matchers.containsString("all upstream members failed") + ); + } + + // ---------- additional coverage for pickWinningFailure ---------- + + @Test + void declarationOrderBreaksTiesWhenStatusAndBodyAreEqual() throws Exception { + final Response p1 = bodied(RsStatus.BAD_GATEWAY, "same"); + final Response p2 = bodied(RsStatus.BAD_GATEWAY, "same"); + final Response resp = FaultTranslator.translate( + apf(List.of(fiveXx("p1", p1), fiveXx("p2", p2))), CTX + ); + MatcherAssert.assertThat( + "earliest-declared wins among full ties", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:p1") + ); + } + + @Test + void nonFiveXxResponseRanksAfterEveryFiveXx() throws Exception { + final Response p1 = bodied(RsStatus.NOT_FOUND, "nope"); + final Response p2 = bodied(RsStatus.GATEWAY_TIMEOUT, "gone"); + final Response resp = FaultTranslator.translate( + apf(List.of(notFound("p1", p1), fiveXx("p2", p2))), CTX + ); + MatcherAssert.assertThat( + "504 beats 404", + resp.status(), Matchers.is(RsStatus.GATEWAY_TIMEOUT) + ); + } + + @Test + void pickWinningFailureReturnsEmptyWhenNoMembersRespond() { + MatcherAssert.assertThat( + FaultTranslator.pickWinningFailure(List.of()), + Matchers.is(Optional.empty()) + ); + } + + @Test + void twoFiveHundredsDeclarationOrderWins() throws Exception { + final Response p1 = bodied(RsStatus.INTERNAL_ERROR, "500-a"); + final Response p2 = bodied(RsStatus.INTERNAL_ERROR, "500-b"); + final Response resp = FaultTranslator.translate( + apf(List.of(fiveXx("p1", p1), fiveXx("p2", p2))), CTX + ); + MatcherAssert.assertThat( + "two 500s β€” declaration order wins", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:p1") + ); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultClassifierTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultClassifierTest.java new file mode 100644 index 000000000..d2e5d097f --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultClassifierTest.java @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.asto.ValueNotFoundException; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.ConnectException; +import java.time.Duration; +import java.util.concurrent.CompletionException; +import java.util.concurrent.TimeoutException; + +/** + * Round-trip tests for {@link FaultClassifier#classify(Throwable, String)}. + */ +final class FaultClassifierTest { + + private static final String WHERE = "unit-test"; + + @Test + void timeoutExceptionClassifiesAsDeadline() { + final Fault fault = FaultClassifier.classify(new TimeoutException("slow"), WHERE); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.Deadline.class)); + final Fault.Deadline deadline = (Fault.Deadline) fault; + MatcherAssert.assertThat( + "deadline label propagated", deadline.where(), Matchers.is(WHERE) + ); + MatcherAssert.assertThat( + "unknown budget is ZERO", + deadline.budget(), Matchers.is(Duration.ZERO) + ); + } + + @Test + void connectExceptionClassifiesAsInternal() { + final ConnectException ce = new ConnectException("refused"); + final Fault fault = FaultClassifier.classify(ce, WHERE); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.Internal.class)); + final Fault.Internal internal = (Fault.Internal) fault; + MatcherAssert.assertThat( + "cause preserved", internal.cause(), Matchers.sameInstance(ce) + ); + MatcherAssert.assertThat( + "where propagated", internal.where(), Matchers.is(WHERE) + ); + } + + @Test + void ioExceptionClassifiesAsInternal() { + final IOException ioe = new IOException("broken pipe"); + final Fault fault = FaultClassifier.classify(ioe, WHERE); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.Internal.class)); + MatcherAssert.assertThat( + ((Fault.Internal) fault).cause(), Matchers.sameInstance(ioe) + ); + } + + @Test + void valueNotFoundClassifiesAsStorageUnavailable() { + final ValueNotFoundException vnf = new ValueNotFoundException(new Key.From("missing")); + final Fault fault = FaultClassifier.classify(vnf, WHERE); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.StorageUnavailable.class)); + final Fault.StorageUnavailable su = (Fault.StorageUnavailable) fault; + MatcherAssert.assertThat( + "cause preserved", su.cause(), Matchers.sameInstance(vnf) + ); + MatcherAssert.assertThat( + "exception message propagated", + su.key(), Matchers.is(vnf.getMessage()) + ); + } + + @Test + void queueFullIllegalStateClassifiesAsOverload() { + final Fault fault = FaultClassifier.classify( + new IllegalStateException("Queue full"), WHERE + ); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.Overload.class)); + final Fault.Overload ov = (Fault.Overload) fault; + MatcherAssert.assertThat( + "resource label", ov.resource(), Matchers.is("event-queue") + ); + MatcherAssert.assertThat( + "retry-after hint", ov.retryAfter(), Matchers.is(Duration.ofSeconds(1)) + ); + } + + @Test + void otherIllegalStateExceptionFallsBackToInternal() { + final IllegalStateException ise = new IllegalStateException("not queue full"); + final Fault fault = FaultClassifier.classify(ise, WHERE); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.Internal.class)); + MatcherAssert.assertThat( + ((Fault.Internal) fault).cause(), Matchers.sameInstance(ise) + ); + } + + @Test + void defaultClassifiesAsInternal() { + final RuntimeException rte = new RuntimeException("unknown"); + final Fault fault = FaultClassifier.classify(rte, WHERE); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.Internal.class)); + MatcherAssert.assertThat( + ((Fault.Internal) fault).cause(), Matchers.sameInstance(rte) + ); + } + + @Test + void completionExceptionIsUnwrappedBeforeClassification() { + final TimeoutException inner = new TimeoutException("inner"); + final CompletionException wrapper = new CompletionException(inner); + final Fault fault = FaultClassifier.classify(wrapper, WHERE); + MatcherAssert.assertThat( + "CompletionException unwrapped β€” saw TimeoutException", + fault, Matchers.instanceOf(Fault.Deadline.class) + ); + } + + @Test + void nestedCompletionExceptionsAreFullyUnwrapped() { + final ConnectException root = new ConnectException("denied"); + final CompletionException middle = new CompletionException(root); + final CompletionException outer = new CompletionException(middle); + final Fault fault = FaultClassifier.classify(outer, WHERE); + MatcherAssert.assertThat(fault, Matchers.instanceOf(Fault.Internal.class)); + MatcherAssert.assertThat( + ((Fault.Internal) fault).cause(), Matchers.sameInstance(root) + ); + } + + @Test + void completionExceptionWithNullCauseIsClassifiedDirectly() { + final CompletionException bare = new CompletionException("no cause", null); + final Fault fault = FaultClassifier.classify(bare, WHERE); + MatcherAssert.assertThat( + "bare CompletionException falls to default Internal", + fault, Matchers.instanceOf(Fault.Internal.class) + ); + } + + @Test + void selfReferencingCompletionExceptionDoesNotLoop() { + // Defensive check: if a pathological Throwable reports itself as its own + // cause, unwrap must terminate (otherwise classify() would spin forever). + final CompletionException selfRef = new SelfReferencingCompletionException(); + final Fault fault = FaultClassifier.classify(selfRef, WHERE); + MatcherAssert.assertThat( + "self-referencing cause is treated as terminal", + fault, Matchers.instanceOf(Fault.Internal.class) + ); + MatcherAssert.assertThat( + ((Fault.Internal) fault).cause(), Matchers.sameInstance(selfRef) + ); + } + + /** Pathological throwable whose getCause() returns itself. */ + private static final class SelfReferencingCompletionException extends CompletionException { + private static final long serialVersionUID = 1L; + + SelfReferencingCompletionException() { + super("self"); + } + + @Override + public synchronized Throwable getCause() { + return this; + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultTranslatorTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultTranslatorTest.java new file mode 100644 index 000000000..614fbfe7b --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/fault/FaultTranslatorTest.java @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.context.RequestContext; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.List; +import java.util.Optional; + +/** + * Tests for {@link FaultTranslator#translate(Fault, RequestContext)} β€” one test + * per {@link Fault} variant plus an exhaustive pattern-match guard that keeps + * the implementation honest as new variants land. + */ +final class FaultTranslatorTest { + + /** Reusable request context; FaultTranslator reads nothing from it today. */ + private static final RequestContext CTX = + new RequestContext("trace-1", "req-1", "npm_group", "/npm/@scope/pkg"); + + @Test + void notFoundMapsTo404() { + final Response resp = FaultTranslator.translate( + new Fault.NotFound("npm_group", "@scope/pkg", "1.0.0"), CTX + ); + MatcherAssert.assertThat( + "404 status", resp.status(), Matchers.is(RsStatus.NOT_FOUND) + ); + MatcherAssert.assertThat( + "no fault header on clean 404", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.empty() + ); + } + + @Test + void forbiddenMapsTo403() { + final Response resp = FaultTranslator.translate( + new Fault.Forbidden("cooldown"), CTX + ); + MatcherAssert.assertThat( + "403 status", resp.status(), Matchers.is(RsStatus.FORBIDDEN) + ); + MatcherAssert.assertThat( + "no fault header on 403", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.empty() + ); + } + + @Test + void indexUnavailableMapsTo500WithTag() { + final Response resp = FaultTranslator.translate( + new Fault.IndexUnavailable(new RuntimeException("timeout"), "SELECT …"), + CTX + ); + MatcherAssert.assertThat( + "500 status", resp.status(), Matchers.is(RsStatus.INTERNAL_ERROR) + ); + MatcherAssert.assertThat( + "X-Pantera-Fault carries index-unavailable tag", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains(FaultTranslator.TAG_INDEX) + ); + } + + @Test + void storageUnavailableMapsTo500WithTag() { + final Response resp = FaultTranslator.translate( + new Fault.StorageUnavailable(new RuntimeException("io"), "meta"), CTX + ); + MatcherAssert.assertThat( + "500 status", resp.status(), Matchers.is(RsStatus.INTERNAL_ERROR) + ); + MatcherAssert.assertThat( + "X-Pantera-Fault carries storage-unavailable tag", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains(FaultTranslator.TAG_STORAGE) + ); + } + + @Test + void internalMapsTo500WithTag() { + final Response resp = FaultTranslator.translate( + new Fault.Internal(new RuntimeException("bug"), "slice"), CTX + ); + MatcherAssert.assertThat( + "500 status", resp.status(), Matchers.is(RsStatus.INTERNAL_ERROR) + ); + MatcherAssert.assertThat( + "X-Pantera-Fault carries internal tag", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains(FaultTranslator.TAG_INTERNAL) + ); + } + + @Test + void deadlineMapsTo504WithTag() { + final Response resp = FaultTranslator.translate( + new Fault.Deadline(Duration.ofSeconds(5), "proxy-fanout"), CTX + ); + MatcherAssert.assertThat( + "504 status", resp.status(), Matchers.is(RsStatus.GATEWAY_TIMEOUT) + ); + MatcherAssert.assertThat( + "X-Pantera-Fault carries deadline-exceeded tag", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains(FaultTranslator.TAG_DEADLINE) + ); + } + + @Test + void overloadMapsTo503WithRetryAfterAndTag() { + final Response resp = FaultTranslator.translate( + new Fault.Overload("event-queue", Duration.ofSeconds(3)), CTX + ); + MatcherAssert.assertThat( + "503 status", resp.status(), Matchers.is(RsStatus.SERVICE_UNAVAILABLE) + ); + MatcherAssert.assertThat( + "Retry-After in seconds", + resp.headers().values("Retry-After"), + Matchers.contains("3") + ); + MatcherAssert.assertThat( + "X-Pantera-Fault carries overload: tag", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("overload:event-queue") + ); + } + + @Test + void allProxiesFailedWithWinningResponsePassesThrough() { + final Response upstream = ResponseBuilder.from(RsStatus.BAD_GATEWAY) + .header("X-Upstream", "npmjs.org") + .body(new Content.From("{\"upstream\":\"bye\"}".getBytes())) + .build(); + final Fault.AllProxiesFailed apf = new Fault.AllProxiesFailed( + "npm_group", + List.of( + Fault.MemberOutcome.responded( + "npm_proxy_a", Fault.MemberOutcome.Kind.FIVE_XX, upstream + ), + Fault.MemberOutcome.threw( + "npm_proxy_b", Fault.MemberOutcome.Kind.EXCEPTION, + new RuntimeException("boom") + ) + ), + Optional.of(new Fault.AllProxiesFailed.ProxyFailure("npm_proxy_a", upstream)) + ); + final Response resp = FaultTranslator.translate(apf, CTX); + MatcherAssert.assertThat( + "upstream status passed through", + resp.status(), Matchers.is(RsStatus.BAD_GATEWAY) + ); + MatcherAssert.assertThat( + "upstream header preserved", + resp.headers().values("X-Upstream"), Matchers.contains("npmjs.org") + ); + MatcherAssert.assertThat( + "X-Pantera-Fault carries proxies-failed:", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("proxies-failed:npm_proxy_a") + ); + MatcherAssert.assertThat( + "X-Pantera-Proxies-Tried matches outcomes size", + resp.headers().values(FaultTranslator.HEADER_PROXIES_TRIED), + Matchers.contains("2") + ); + } + + @Test + void allProxiesFailedWithNoResponderSynthesizes502() { + final Fault.AllProxiesFailed apf = new Fault.AllProxiesFailed( + "npm_group", + List.of( + Fault.MemberOutcome.threw( + "npm_proxy_a", Fault.MemberOutcome.Kind.EXCEPTION, + new RuntimeException("connect refused") + ), + Fault.MemberOutcome.threw( + "npm_proxy_b", Fault.MemberOutcome.Kind.CANCELLED, null + ) + ), + Optional.empty() + ); + final Response resp = FaultTranslator.translate(apf, CTX); + MatcherAssert.assertThat( + "synthesized 502", + resp.status(), Matchers.is(RsStatus.BAD_GATEWAY) + ); + MatcherAssert.assertThat( + "X-Pantera-Fault: proxies-failed:none-responded", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains(FaultTranslator.TAG_PROXIES_NONE_RESPONDED) + ); + MatcherAssert.assertThat( + "X-Pantera-Proxies-Tried reflects members tried", + resp.headers().values(FaultTranslator.HEADER_PROXIES_TRIED), + Matchers.contains("2") + ); + } + + @Test + void upstreamIntegrityMapsTo502WithAlgoTag() { + final Response resp = FaultTranslator.translate( + new Fault.UpstreamIntegrity( + "https://maven.example/oss-parent-58.pom", + Fault.ChecksumAlgo.SHA1, + "15ce8a2c447057a4cfffd7a1d57b80937d293e7a", + "0ed9e5d9e7cad24fce51b18455e0cf5ccd2c94b6" + ), + CTX + ); + MatcherAssert.assertThat( + "502 status", resp.status(), Matchers.is(RsStatus.BAD_GATEWAY) + ); + MatcherAssert.assertThat( + "X-Pantera-Fault carries upstream-integrity:", + resp.headers().values(FaultTranslator.HEADER_FAULT), + Matchers.contains("upstream-integrity:SHA1") + ); + } + + /** + * Exhaustive pattern-match guard. If a new {@link Fault} variant is + * introduced without updating this switch, the Java compiler will reject + * this file β€” proving that every variant has an explicit branch. + * + *

      Using a switch expression forces exhaustiveness at compile time + * (sealed interface + returning {@code Void}). + */ + @Test + void exhaustiveSwitchCompilesForEveryVariant() { + final List variants = List.of( + new Fault.NotFound("s", "a", "v"), + new Fault.Forbidden("r"), + new Fault.IndexUnavailable(new RuntimeException(), "q"), + new Fault.StorageUnavailable(new RuntimeException(), "k"), + new Fault.AllProxiesFailed("g", List.of(), Optional.empty()), + new Fault.Internal(new RuntimeException(), "w"), + new Fault.Deadline(Duration.ZERO, "w"), + new Fault.Overload("r", Duration.ZERO), + new Fault.UpstreamIntegrity("u", Fault.ChecksumAlgo.SHA256, "a", "b") + ); + for (final Fault variant : variants) { + // Exhaustive switch expression β€” compiler rejects if any variant is missing. + final Void ignored = switch (variant) { + case Fault.NotFound nf -> null; + case Fault.Forbidden fb -> null; + case Fault.IndexUnavailable iu -> null; + case Fault.StorageUnavailable su -> null; + case Fault.AllProxiesFailed apf -> null; + case Fault.Internal in -> null; + case Fault.Deadline dl -> null; + case Fault.Overload ov -> null; + case Fault.UpstreamIntegrity ui -> null; + }; + MatcherAssert.assertThat( + "every variant round-trips through FaultTranslator.translate()", + FaultTranslator.translate(variant, CTX), + Matchers.notNullValue() + ); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/fault/ResultTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/fault/ResultTest.java new file mode 100644 index 000000000..c211c7509 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/fault/ResultTest.java @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.fault; + +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link Result#map} and {@link Result#flatMap} on the two variants. + */ +final class ResultTest { + + private static final Fault FAULT = new Fault.NotFound("g", "a", "v"); + + @Test + void factoriesProduceExpectedTypes() { + MatcherAssert.assertThat( + Result.ok(42), Matchers.instanceOf(Result.Ok.class) + ); + MatcherAssert.assertThat( + Result.err(FAULT), Matchers.instanceOf(Result.Err.class) + ); + } + + @Test + void okMapAppliesFunction() { + final Result mapped = Result.ok(1).map(v -> v + 2); + MatcherAssert.assertThat(mapped, Matchers.instanceOf(Result.Ok.class)); + MatcherAssert.assertThat( + ((Result.Ok) mapped).value(), Matchers.is(3) + ); + } + + @Test + void errMapShortCircuits() { + final Result mapped = Result.err(FAULT).map(v -> v + 2); + MatcherAssert.assertThat(mapped, Matchers.instanceOf(Result.Err.class)); + MatcherAssert.assertThat( + "fault preserved", + ((Result.Err) mapped).fault(), Matchers.sameInstance(FAULT) + ); + } + + @Test + void okFlatMapChainsNewResult() { + final Result chained = Result.ok(10) + .flatMap(v -> Result.ok("v=" + v)); + MatcherAssert.assertThat(chained, Matchers.instanceOf(Result.Ok.class)); + MatcherAssert.assertThat( + ((Result.Ok) chained).value(), Matchers.is("v=10") + ); + } + + @Test + void okFlatMapCanReturnErr() { + final Result chained = Result.ok(10) + .flatMap(v -> Result.err(FAULT)); + MatcherAssert.assertThat(chained, Matchers.instanceOf(Result.Err.class)); + MatcherAssert.assertThat( + ((Result.Err) chained).fault(), Matchers.sameInstance(FAULT) + ); + } + + @Test + void errFlatMapShortCircuits() { + final Result chained = Result.err(FAULT) + .flatMap(v -> Result.ok("should not run")); + MatcherAssert.assertThat(chained, Matchers.instanceOf(Result.Err.class)); + MatcherAssert.assertThat( + "original fault preserved", + ((Result.Err) chained).fault(), Matchers.sameInstance(FAULT) + ); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/log/EcsSchemaValidationTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/log/EcsSchemaValidationTest.java index c8ab23b27..3335708cb 100644 --- a/pantera-core/src/test/java/com/auto1/pantera/http/log/EcsSchemaValidationTest.java +++ b/pantera-core/src/test/java/com/auto1/pantera/http/log/EcsSchemaValidationTest.java @@ -475,6 +475,55 @@ void ecsLoggerMdcOwnedKeysAreKeptWhenNotInMdc() throws Exception { assertEquals("maven", json.get("repository.type").asText()); } + // ---- WI-00 level policy: 404/401/403 β†’ INFO (not WARN) ---- + + @Test + void notFoundResponsesLogAtInfoNotWarn() { + new EcsLogEvent() + .httpMethod("GET").httpStatus(com.auto1.pantera.http.RsStatus.NOT_FOUND) + .urlPath("/artifactory/libs-release-local/org/x/1.0/x-1.0.pom") + .duration(3).log(); + assertFalse(capture.events.isEmpty()); + assertEquals(org.apache.logging.log4j.Level.INFO, capture.lastEvent().getLevel(), + "404 must log at INFO per WI-00 access-log level policy"); + } + + @Test + void unauthorizedResponsesLogAtInfoNotWarn() { + new EcsLogEvent() + .httpMethod("GET").httpStatus(com.auto1.pantera.http.RsStatus.UNAUTHORIZED) + .urlPath("/artifactory/api/npm/npm_proxy/pkg").duration(2).log(); + assertEquals(org.apache.logging.log4j.Level.INFO, capture.lastEvent().getLevel(), + "401 must log at INFO per WI-00 access-log level policy"); + } + + @Test + void forbiddenResponsesLogAtInfoNotWarn() { + new EcsLogEvent() + .httpMethod("GET").httpStatus(com.auto1.pantera.http.RsStatus.FORBIDDEN) + .urlPath("/artifactory/libs-release-local/secret").duration(1).log(); + assertEquals(org.apache.logging.log4j.Level.INFO, capture.lastEvent().getLevel(), + "403 must log at INFO per WI-00 access-log level policy"); + } + + @Test + void otherFourXxStillLogAtWarn() { + new EcsLogEvent() + .httpMethod("POST").httpStatus(com.auto1.pantera.http.RsStatus.BAD_REQUEST) + .urlPath("/artifactory/api/npm/npm_proxy/pkg").duration(1).log(); + assertEquals(org.apache.logging.log4j.Level.WARN, capture.lastEvent().getLevel(), + "400 remains at WARN β€” only 401/403/404 downgraded"); + } + + @Test + void fiveXxStillLogAtError() { + new EcsLogEvent() + .httpMethod("GET").httpStatus(com.auto1.pantera.http.RsStatus.INTERNAL_ERROR) + .urlPath("/any").duration(5).log(); + assertEquals(org.apache.logging.log4j.Level.ERROR, capture.lastEvent().getLevel(), + "5xx still ERROR regardless of other policy changes"); + } + /** * Simple appender that collects log events in a list for inspection. */ diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/AccessLoggerTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/AccessLoggerTest.java new file mode 100644 index 000000000..86071982c --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/AccessLoggerTest.java @@ -0,0 +1,276 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import com.auto1.pantera.http.context.Deadline; +import com.auto1.pantera.http.context.RequestContext; +import com.auto1.pantera.http.fault.Fault; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.message.MapMessage; +import org.apache.logging.log4j.message.Message; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tier-1 tests β€” verifies the {@link StructuredLogger.AccessLogger} emits at + * the levels dictated by {@link LevelPolicy} and enforces the required + * RequestContext at entry (Β§4.3). + */ +final class AccessLoggerTest { + + private static final String CAP = "AccessLoggerCap"; + private static final String LOGGER = "http.access"; + + private CapturingAppender capture; + + @BeforeEach + void setUp() { + ThreadContext.clearAll(); + this.capture = new CapturingAppender(CAP); + this.capture.start(); + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.addAppender(this.capture); + cfg.getRootLogger().addAppender(this.capture, null, null); + final LoggerConfig lconf = cfg.getLoggerConfig(LOGGER); + lconf.addAppender(this.capture, null, null); + lc.updateLoggers(); + } + + @AfterEach + void tearDown() { + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.getRootLogger().removeAppender(CAP); + cfg.getLoggerConfig(LOGGER).removeAppender(CAP); + this.capture.stop(); + lc.updateLoggers(); + ThreadContext.clearAll(); + } + + @Test + @DisplayName("forRequest(null) throws NullPointerException β€” required-field guard") + void forRequestRejectsNullContext() { + try { + StructuredLogger.access().forRequest(null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException expected) { + MatcherAssert.assertThat( + expected.getMessage(), Matchers.containsString("ctx") + ); + } + } + + @Test + @DisplayName("2xx β†’ DEBUG per LevelPolicy.CLIENT_FACING_SUCCESS") + void successLogsAtDebug() { + StructuredLogger.access().forRequest(minimalCtx()) + .status(200).duration(15L).log(); + MatcherAssert.assertThat(this.capture.events, Matchers.not(Matchers.empty())); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.DEBUG)); + } + + @Test + @DisplayName("404 β†’ INFO per LevelPolicy.CLIENT_FACING_NOT_FOUND") + void notFoundLogsAtInfo() { + StructuredLogger.access().forRequest(minimalCtx()) + .status(404).duration(5L).log(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.INFO)); + } + + @Test + @DisplayName("401/403 β†’ INFO per LevelPolicy.CLIENT_FACING_UNAUTH") + void unauthLogsAtInfo() { + StructuredLogger.access().forRequest(minimalCtx()) + .status(401).duration(2L).log(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.INFO)); + StructuredLogger.access().forRequest(minimalCtx()) + .status(403).duration(3L).log(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.INFO)); + } + + @Test + @DisplayName("400 β†’ WARN (other 4xx still WARN per LevelPolicy.CLIENT_FACING_4XX_OTHER)") + void other4xxLogsAtWarn() { + StructuredLogger.access().forRequest(minimalCtx()) + .status(400).duration(1L).log(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.WARN)); + } + + @Test + @DisplayName("5xx β†’ ERROR per LevelPolicy.CLIENT_FACING_5XX") + void serverErrorLogsAtError() { + StructuredLogger.access().forRequest(minimalCtx()) + .status(503).duration(10L).log(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.ERROR)); + } + + @Test + @DisplayName(">5000ms slow β†’ WARN per LevelPolicy.CLIENT_FACING_SLOW") + void slowRequestLogsAtWarn() { + StructuredLogger.access().forRequest(minimalCtx()) + .status(200).duration(6000L).log(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.WARN)); + } + + @Test + @DisplayName("5xx with Fault.Internal attaches error.type/error.message/error.stack_trace") + void faultAttachesErrorFields() { + final Fault.Internal fault = new Fault.Internal( + new IllegalStateException("boom"), + "test.where" + ); + StructuredLogger.access().forRequest(minimalCtx()) + .status(500).fault(fault).duration(200L).log(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(payloadField(evt, "error.type"), Matchers.notNullValue()); + MatcherAssert.assertThat( + (String) payloadField(evt, "error.type"), + Matchers.containsString("IllegalStateException") + ); + MatcherAssert.assertThat(payloadField(evt, "error.message"), Matchers.is("boom")); + MatcherAssert.assertThat( + (String) payloadField(evt, "error.stack_trace"), + Matchers.containsString("IllegalStateException") + ); + } + + @Test + @DisplayName("RequestContext.bindToMdc() populates trace.id / client.ip during log()") + void contextBoundToMdcDuringEmit() { + final RequestContext ctx = new RequestContext( + "trace-aaa", "txn-xyz", null, null, + "alice", "10.0.0.1", null, + "npm_group", "npm", RequestContext.ArtifactRef.EMPTY, + "/-/all", "/-/all", Deadline.in(Duration.ofSeconds(30)) + ); + StructuredLogger.access().forRequest(ctx) + .status(404).duration(3L).log(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat( + evt.getContextData().getValue("trace.id"), Matchers.is("trace-aaa") + ); + MatcherAssert.assertThat( + evt.getContextData().getValue("client.ip"), Matchers.is("10.0.0.1") + ); + MatcherAssert.assertThat( + evt.getContextData().getValue("user.name"), Matchers.is("alice") + ); + MatcherAssert.assertThat( + evt.getContextData().getValue("url.original"), Matchers.is("/-/all") + ); + } + + @Test + @DisplayName("http.response.status_code and event.duration are top-level MapMessage fields") + void statusAndDurationInPayload() { + StructuredLogger.access().forRequest(minimalCtx()) + .status(503).duration(250L).log(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(payloadField(evt, "http.response.status_code"), Matchers.is(503)); + MatcherAssert.assertThat(payloadField(evt, "event.duration"), Matchers.is(250L)); + } + + @Test + @DisplayName("Prior ThreadContext is restored after emission") + void priorThreadContextIsRestoredAfterLog() { + ThreadContext.put("pre-existing", "yes"); + StructuredLogger.access().forRequest(minimalCtx()) + .status(200).duration(5L).log(); + MatcherAssert.assertThat(ThreadContext.get("pre-existing"), Matchers.is("yes")); + MatcherAssert.assertThat(ThreadContext.get("trace.id"), Matchers.nullValue()); + } + + @Test + @DisplayName("log() parses user_agent.original and emits user_agent.* sub-fields (WI-post-03b)") + void logEmitsParsedUserAgentSubFields() { + final RequestContext ctx = new RequestContext( + "trace-ua", null, null, null, + "anonymous", "10.0.0.3", + "Maven/3.9.6 (Java/21.0.3 Linux 6.12.68)", + "maven_group", "maven", RequestContext.ArtifactRef.EMPTY, + "/com/example/foo-1.0.jar", "/com/example/foo-1.0.jar", + Deadline.in(Duration.ofSeconds(10)) + ); + StructuredLogger.access().forRequest(ctx) + .status(200).duration(42L).log(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(payloadField(evt, "user_agent.name"), Matchers.is("Maven")); + MatcherAssert.assertThat(payloadField(evt, "user_agent.version"), Matchers.is("3.9.6")); + MatcherAssert.assertThat(payloadField(evt, "user_agent.os.name"), Matchers.is("Linux")); + MatcherAssert.assertThat(payloadField(evt, "user_agent.os.version"), Matchers.is("21.0.3")); + } + + @Test + @DisplayName("log() omits user_agent.* sub-fields when RequestContext.userAgent is null") + void logSkipsSubFieldsWhenOriginalAbsent() { + // minimalCtx() has userAgent=null. + StructuredLogger.access().forRequest(minimalCtx()) + .status(200).duration(3L).log(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(payloadField(evt, "user_agent.name"), Matchers.nullValue()); + MatcherAssert.assertThat(payloadField(evt, "user_agent.version"), Matchers.nullValue()); + MatcherAssert.assertThat(payloadField(evt, "user_agent.os.name"), Matchers.nullValue()); + MatcherAssert.assertThat(payloadField(evt, "user_agent.os.version"), Matchers.nullValue()); + MatcherAssert.assertThat(payloadField(evt, "user_agent.device.name"), Matchers.nullValue()); + } + + // ---- helpers ---- + + private static RequestContext minimalCtx() { + return new RequestContext( + "trace-min", null, null, null, "anonymous", "10.0.0.2", null, + "repo", "npm", RequestContext.ArtifactRef.EMPTY, + "/x", "/x", Deadline.in(Duration.ofSeconds(5)) + ); + } + + private static Object payloadField(final LogEvent evt, final String key) { + final Message msg = evt.getMessage(); + if (msg instanceof MapMessage mm) { + return mm.getData().get(key); + } + return null; + } + + private static final class CapturingAppender extends AbstractAppender { + + private final List events = new ArrayList<>(); + + CapturingAppender(final String name) { + super(name, null, null, true, Property.EMPTY_ARRAY); + } + + @Override + public void append(final LogEvent event) { + this.events.add(event.toImmutable()); + } + + LogEvent last() { + return this.events.get(this.events.size() - 1); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/AuditLoggerTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/AuditLoggerTest.java new file mode 100644 index 000000000..18b59a86d --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/AuditLoggerTest.java @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import com.auto1.pantera.audit.AuditAction; +import com.auto1.pantera.http.context.Deadline; +import com.auto1.pantera.http.context.RequestContext; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.message.MapMessage; +import org.apache.logging.log4j.message.Message; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tier-5 tests β€” {@link StructuredLogger.AuditLogger} emits compliance events + * at INFO, regardless of operational log-level settings, with a closed + * {@link AuditAction} enum and required package name+version. + */ +final class AuditLoggerTest { + + private static final String CAP = "AuditLoggerCap"; + private static final String LOGGER = "com.auto1.pantera.audit"; + + private CapturingAppender capture; + + @BeforeEach + void setUp() { + ThreadContext.clearAll(); + this.capture = new CapturingAppender(CAP); + this.capture.start(); + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.addAppender(this.capture); + cfg.getRootLogger().addAppender(this.capture, null, null); + cfg.getLoggerConfig(LOGGER).addAppender(this.capture, null, null); + lc.updateLoggers(); + } + + @AfterEach + void tearDown() { + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.getRootLogger().removeAppender(CAP); + cfg.getLoggerConfig(LOGGER).removeAppender(CAP); + this.capture.stop(); + lc.updateLoggers(); + ThreadContext.clearAll(); + } + + @Test + @DisplayName("forEvent(null, A) throws NPE") + void forEventRejectsNullCtx() { + try { + StructuredLogger.audit().forEvent(null, AuditAction.ARTIFACT_PUBLISH); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("ctx")); + } + } + + @Test + @DisplayName("forEvent(ctx, null) throws NPE") + void forEventRejectsNullAction() { + try { + StructuredLogger.audit().forEvent(ctx(), null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("action")); + } + } + + @Test + @DisplayName("emit() without packageName throws NPE") + void emitWithoutPackageNameFails() { + try { + StructuredLogger.audit().forEvent(ctx(), AuditAction.ARTIFACT_PUBLISH) + .packageVersion("1.0").emit(); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("packageName")); + } + } + + @Test + @DisplayName("emit() without packageVersion throws NPE") + void emitWithoutPackageVersionFails() { + try { + StructuredLogger.audit().forEvent(ctx(), AuditAction.ARTIFACT_PUBLISH) + .packageName("pkg").emit(); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("packageVersion")); + } + } + + @Test + @DisplayName("All four AuditAction variants emit at INFO") + void allActionsEmitAtInfo() { + for (final AuditAction action : AuditAction.values()) { + this.capture.events.clear(); + StructuredLogger.audit().forEvent(ctx(), action) + .packageName("org.example:artifact") + .packageVersion("1.2.3") + .emit(); + MatcherAssert.assertThat( + "Action " + action.name() + " must emit at INFO", + this.capture.last().getLevel(), Matchers.is(Level.INFO) + ); + } + } + + @Test + @DisplayName("Required fields present in MapMessage payload") + void requiredFieldsInPayload() { + StructuredLogger.audit() + .forEvent(ctx(), AuditAction.ARTIFACT_DOWNLOAD) + .packageName("lodash") + .packageVersion("4.17.21") + .emit(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(payload(evt, "package.name"), Matchers.is("lodash")); + MatcherAssert.assertThat(payload(evt, "package.version"), Matchers.is("4.17.21")); + MatcherAssert.assertThat(payload(evt, "event.action"), Matchers.is("artifact_download")); + MatcherAssert.assertThat(payload(evt, "data_stream.dataset"), Matchers.is("pantera.audit")); + MatcherAssert.assertThat( + payload(evt, "event.category"), + Matchers.is(List.of("audit")) + ); + } + + @Test + @DisplayName("Optional checksum + outcome emitted when set") + void optionalFieldsEmittedWhenSet() { + StructuredLogger.audit() + .forEvent(ctx(), AuditAction.ARTIFACT_PUBLISH) + .packageName("org.example:app") + .packageVersion("2.0.0") + .packageChecksum("abcdef123456") + .outcome("success") + .emit(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(payload(evt, "package.checksum"), Matchers.is("abcdef123456")); + MatcherAssert.assertThat(payload(evt, "event.outcome"), Matchers.is("success")); + } + + @Test + @DisplayName("Optional checksum absent when not set") + void optionalChecksumAbsentWhenNotSet() { + StructuredLogger.audit() + .forEvent(ctx(), AuditAction.RESOLUTION) + .packageName("pkg") + .packageVersion("1") + .emit(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(payload(evt, "package.checksum"), Matchers.nullValue()); + } + + @Test + @DisplayName("RequestContext's trace.id / user.name / client.ip bound via bindToMdc") + void ctxFieldsBoundToMdc() { + final RequestContext c = new RequestContext( + "trace-audit", null, null, null, + "alice", "10.1.2.3", null, + "npm_proxy", "npm", RequestContext.ArtifactRef.EMPTY, + "/lodash", "/lodash", Deadline.in(Duration.ofSeconds(5)) + ); + StructuredLogger.audit() + .forEvent(c, AuditAction.ARTIFACT_DOWNLOAD) + .packageName("lodash").packageVersion("4.17.21") + .emit(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat( + evt.getContextData().getValue("trace.id"), Matchers.is("trace-audit") + ); + MatcherAssert.assertThat( + evt.getContextData().getValue("user.name"), Matchers.is("alice") + ); + MatcherAssert.assertThat( + evt.getContextData().getValue("client.ip"), Matchers.is("10.1.2.3") + ); + } + + @Test + @DisplayName("emit() fires even when operational log level is raised to ERROR") + void auditNotSuppressibleByOperationalLevel() { + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + final LoggerConfig original = cfg.getLoggerConfig(LOGGER); + final Level priorLevel = original.getLevel(); + try { + // Simulate operational suppression by raising the root to ERROR; the audit + // logger must still emit at INFO. The log4j config in production pins the + // audit logger to INFO + additivity=false; this test asserts behaviour + // when operational config is adversarial. + final LoggerConfig auditCfg = cfg.getLoggerConfig(LOGGER); + // Ensure audit logger is explicitly at INFO level (independent of root). + auditCfg.setLevel(Level.INFO); + lc.updateLoggers(); + + StructuredLogger.audit() + .forEvent(ctx(), AuditAction.ARTIFACT_DELETE) + .packageName("pkg").packageVersion("1").emit(); + + MatcherAssert.assertThat( + "audit event must appear at INFO despite operational level shifts", + this.capture.events.size(), Matchers.greaterThanOrEqualTo(1) + ); + MatcherAssert.assertThat( + this.capture.last().getLevel(), Matchers.is(Level.INFO) + ); + } finally { + original.setLevel(priorLevel); + lc.updateLoggers(); + } + } + + // ---- helpers ---- + + private static RequestContext ctx() { + return new RequestContext( + "trace-aud", null, null, null, "anonymous", null, null, + "repo", "npm", RequestContext.ArtifactRef.EMPTY, + "/x", "/x", Deadline.in(Duration.ofSeconds(5)) + ); + } + + private static Object payload(final LogEvent evt, final String key) { + final Message msg = evt.getMessage(); + if (msg instanceof MapMessage mm) { + return mm.getData().get(key); + } + return null; + } + + private static final class CapturingAppender extends AbstractAppender { + + private final List events = new ArrayList<>(); + + CapturingAppender(final String name) { + super(name, null, null, true, Property.EMPTY_ARRAY); + } + + @Override + public void append(final LogEvent event) { + this.events.add(event.toImmutable()); + } + + LogEvent last() { + return this.events.get(this.events.size() - 1); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/ContextualExecutorIntegrationTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/ContextualExecutorIntegrationTest.java new file mode 100644 index 000000000..f983dff50 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/ContextualExecutorIntegrationTest.java @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import com.auto1.pantera.http.context.ContextualExecutor; +import com.auto1.pantera.http.resilience.SingleFlight; +import java.time.Duration; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.logging.log4j.ThreadContext; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * End-to-end propagation test β€” verifies that wiring + * {@link ContextualExecutor#contextualize(java.util.concurrent.Executor)} + * into a {@link SingleFlight} (the WI-03 executor-wrapping points in + * {@code MavenGroupSlice}, {@code BaseCachedProxySlice}, + * {@code CachedNpmProxySlice}) means callers no longer need to wrap each + * continuation by hand: the executor itself snapshots the caller's + * {@link ThreadContext} and APM span and installs them on the runner thread + * for the duration of the task. + */ +final class ContextualExecutorIntegrationTest { + + private ExecutorService backing; + + @BeforeEach + void setUp() { + ThreadContext.clearMap(); + this.backing = Executors.newFixedThreadPool(2); + } + + @AfterEach + void tearDown() { + ThreadContext.clearMap(); + this.backing.shutdownNow(); + } + + @Test + @DisplayName("SingleFlight wrapped with ContextualExecutor propagates ThreadContext") + void singleFlightPropagatesContext() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(5), + 16, + ContextualExecutor.contextualize(this.backing) + ); + + ThreadContext.put("trace.id", "wi03-trace"); + ThreadContext.put("repository.name", "npm_proxy"); + final AtomicReference seenTrace = new AtomicReference<>(); + final AtomicReference seenRepo = new AtomicReference<>(); + + // Submit via SingleFlight. The loader itself may run synchronously on + // the caller; the follower thread that's dispatched for completion + // is the one wrapped by ContextualExecutor. We assert the completion + // callback sees the caller's context. + final CompletableFuture fut = sf.load("k1", () -> + CompletableFuture.supplyAsync(() -> "value", this.backing) + ); + fut.thenApplyAsync(v -> { + seenTrace.set(ThreadContext.get("trace.id")); + seenRepo.set(ThreadContext.get("repository.name")); + return v; + }, ContextualExecutor.contextualize(this.backing)).get(5L, TimeUnit.SECONDS); + + MatcherAssert.assertThat(seenTrace.get(), Matchers.is("wi03-trace")); + MatcherAssert.assertThat(seenRepo.get(), Matchers.is("npm_proxy")); + } + + @Test + @DisplayName("Runner thread's prior ThreadContext is restored after the task") + void runnerThreadContextRestored() throws Exception { + // Seed the runner thread with its own prior context. + this.backing.submit(() -> ThreadContext.put("pre", "runner")) + .get(5L, TimeUnit.SECONDS); + + // Submit via the contextualised executor with a different caller ctx. + ThreadContext.clearMap(); + ThreadContext.put("trace.id", "fresh"); + CompletableFuture.runAsync(() -> { + MatcherAssert.assertThat( + ThreadContext.get("trace.id"), Matchers.is("fresh") + ); + MatcherAssert.assertThat( + "runner's prior ctx must be hidden during task", + ThreadContext.get("pre"), Matchers.nullValue() + ); + }, ContextualExecutor.contextualize(this.backing)).get(5L, TimeUnit.SECONDS); + + // After the task, the runner's prior ctx must be back. + final AtomicReference restored = new AtomicReference<>(); + this.backing.submit(() -> restored.set(ThreadContext.get("pre"))) + .get(5L, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "runner's prior ctx restored after contextualised task", + restored.get(), Matchers.is("runner") + ); + } + + @Test + @DisplayName("Chained thenApplyAsync/thenComposeAsync see caller context via ContextualExecutor") + void chainedStagesSeeContextWithoutManualMdc() throws Exception { + ThreadContext.put("trace.id", "chain-1"); + ThreadContext.put("user.name", "alice"); + final AtomicReference stage2Trace = new AtomicReference<>(); + final AtomicReference stage3User = new AtomicReference<>(); + + CompletableFuture + .supplyAsync(() -> 1, ContextualExecutor.contextualize(this.backing)) + .thenApplyAsync(v -> { + stage2Trace.set(ThreadContext.get("trace.id")); + return v + 1; + }, ContextualExecutor.contextualize(this.backing)) + .thenComposeAsync(v -> { + stage3User.set(ThreadContext.get("user.name")); + return CompletableFuture.completedFuture(v); + }, ContextualExecutor.contextualize(this.backing)) + .get(5L, TimeUnit.SECONDS); + + MatcherAssert.assertThat(stage2Trace.get(), Matchers.is("chain-1")); + MatcherAssert.assertThat(stage3User.get(), Matchers.is("alice")); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/InternalLoggerTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/InternalLoggerTest.java new file mode 100644 index 000000000..10ec6a3d1 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/InternalLoggerTest.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import com.auto1.pantera.http.context.Deadline; +import com.auto1.pantera.http.context.RequestContext; +import com.auto1.pantera.http.fault.Fault; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.message.MapMessage; +import org.apache.logging.log4j.message.Message; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tier-2 tests β€” {@link StructuredLogger.InternalLogger} fires only on 500s + * from internal callees. {@code error()} requires a {@link Fault} (500-only). + */ +final class InternalLoggerTest { + + private static final String CAP = "InternalLoggerCap"; + private static final String LOGGER = "http.internal"; + + private CapturingAppender capture; + + @BeforeEach + void setUp() { + ThreadContext.clearAll(); + this.capture = new CapturingAppender(CAP); + this.capture.start(); + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.addAppender(this.capture); + cfg.getRootLogger().addAppender(this.capture, null, null); + cfg.getLoggerConfig(LOGGER).addAppender(this.capture, null, null); + lc.updateLoggers(); + } + + @AfterEach + void tearDown() { + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.getRootLogger().removeAppender(CAP); + cfg.getLoggerConfig(LOGGER).removeAppender(CAP); + this.capture.stop(); + lc.updateLoggers(); + ThreadContext.clearAll(); + } + + @Test + @DisplayName("forCall(null, m) throws NPE") + void forCallRejectsNullCtx() { + try { + StructuredLogger.internal().forCall(null, "member"); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException expected) { + MatcherAssert.assertThat(expected.getMessage(), Matchers.containsString("ctx")); + } + } + + @Test + @DisplayName("forCall(ctx, null) throws NPE") + void forCallRejectsNullMember() { + try { + StructuredLogger.internal().forCall(ctx(), null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException expected) { + MatcherAssert.assertThat(expected.getMessage(), Matchers.containsString("memberName")); + } + } + + @Test + @DisplayName("error() without fault throws IllegalStateException") + void errorWithoutFaultFails() { + try { + StructuredLogger.internal().forCall(ctx(), "member").error(); + MatcherAssert.assertThat("expected ISE", false, Matchers.is(true)); + } catch (final IllegalStateException expected) { + MatcherAssert.assertThat( + expected.getMessage(), Matchers.containsString("Fault") + ); + } + } + + @Test + @DisplayName("fault(null) throws NPE") + void faultRejectsNull() { + try { + StructuredLogger.internal().forCall(ctx(), "member").fault(null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException expected) { + MatcherAssert.assertThat(expected.getMessage(), Matchers.containsString("fault")); + } + } + + @Test + @DisplayName("500 Fault.Internal β†’ ERROR with error.type / error.message / error.stack_trace") + void internalFaultEmitsAtErrorWithStack() { + final Fault.Internal fault = new Fault.Internal( + new RuntimeException("db-boom"), "idx.lookup" + ); + StructuredLogger.internal().forCall(ctx(), "npm_proxy") + .fault(fault).error(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(evt.getLevel(), Matchers.is(Level.ERROR)); + MatcherAssert.assertThat(payload(evt, "error.type"), Matchers.notNullValue()); + MatcherAssert.assertThat( + (String) payload(evt, "error.type"), + Matchers.containsString("RuntimeException") + ); + MatcherAssert.assertThat(payload(evt, "error.message"), Matchers.is("db-boom")); + MatcherAssert.assertThat( + (String) payload(evt, "error.stack_trace"), + Matchers.containsString("db-boom") + ); + MatcherAssert.assertThat(payload(evt, "internal.target"), Matchers.is("npm_proxy")); + } + + @Test + @DisplayName("debug() for successful internal calls β€” DEBUG level") + void debugHookLogsAtDebug() { + StructuredLogger.internal().forCall(ctx(), "hosted").debug(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.DEBUG)); + } + + // ---- helpers ---- + + private static RequestContext ctx() { + return new RequestContext( + "trace-int", null, null, null, "anonymous", null, null, + "grp", "npm", RequestContext.ArtifactRef.EMPTY, + "/x", "/x", Deadline.in(Duration.ofSeconds(5)) + ); + } + + private static Object payload(final LogEvent evt, final String key) { + final Message msg = evt.getMessage(); + if (msg instanceof MapMessage mm) { + return mm.getData().get(key); + } + return null; + } + + private static final class CapturingAppender extends AbstractAppender { + + private final List events = new ArrayList<>(); + + CapturingAppender(final String name) { + super(name, null, null, true, Property.EMPTY_ARRAY); + } + + @Override + public void append(final LogEvent event) { + this.events.add(event.toImmutable()); + } + + LogEvent last() { + return this.events.get(this.events.size() - 1); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/LevelPolicyTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/LevelPolicyTest.java new file mode 100644 index 000000000..a9e59f064 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/LevelPolicyTest.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import java.util.EnumMap; +import java.util.Map; +import org.apache.logging.log4j.Level; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Table-driven test for {@link LevelPolicy} β€” Β§4.2 of + * {@code docs/analysis/v2.2-target-architecture.md}. + * + *

      The expected table below is the source of truth. When the spec updates + * a mapping, change the table; the test will then drive the code change. + */ +final class LevelPolicyTest { + + private static final Map EXPECTED = new EnumMap<>(LevelPolicy.class); + + static { + // Tier-1 + EXPECTED.put(LevelPolicy.CLIENT_FACING_SUCCESS, Level.DEBUG); + EXPECTED.put(LevelPolicy.CLIENT_FACING_NOT_FOUND, Level.INFO); + EXPECTED.put(LevelPolicy.CLIENT_FACING_UNAUTH, Level.INFO); + EXPECTED.put(LevelPolicy.CLIENT_FACING_4XX_OTHER, Level.WARN); + EXPECTED.put(LevelPolicy.CLIENT_FACING_5XX, Level.ERROR); + EXPECTED.put(LevelPolicy.CLIENT_FACING_SLOW, Level.WARN); + // Tier-2 + EXPECTED.put(LevelPolicy.INTERNAL_CALL_SUCCESS, Level.DEBUG); + EXPECTED.put(LevelPolicy.INTERNAL_CALL_NOT_FOUND, Level.DEBUG); + EXPECTED.put(LevelPolicy.INTERNAL_CALL_500, Level.ERROR); + // Tier-3 + EXPECTED.put(LevelPolicy.UPSTREAM_SUCCESS, Level.DEBUG); + EXPECTED.put(LevelPolicy.UPSTREAM_NOT_FOUND, Level.DEBUG); + EXPECTED.put(LevelPolicy.UPSTREAM_5XX, Level.ERROR); + // Tier-4 + EXPECTED.put(LevelPolicy.LOCAL_CONFIG_CHANGE, Level.INFO); + EXPECTED.put(LevelPolicy.LOCAL_OP_SUCCESS, Level.DEBUG); + EXPECTED.put(LevelPolicy.LOCAL_DEGRADED, Level.WARN); + EXPECTED.put(LevelPolicy.LOCAL_FAILURE, Level.ERROR); + // Tier-5 + EXPECTED.put(LevelPolicy.AUDIT_EVENT, Level.INFO); + } + + @Test + @DisplayName("Every enum member has an expected Level in the spec table") + void enumIsCompleteInExpectedTable() { + for (final LevelPolicy p : LevelPolicy.values()) { + MatcherAssert.assertThat( + "LevelPolicy." + p.name() + " missing from expected table β€” " + + "add a row to EXPECTED or remove the enum member", + EXPECTED.containsKey(p), Matchers.is(true) + ); + } + } + + @Test + @DisplayName("Every enum member maps to the Level declared in Β§4.2") + void everyMemberMapsToExpectedLevel() { + for (final LevelPolicy p : LevelPolicy.values()) { + MatcherAssert.assertThat( + "Level for " + p.name(), + p.level(), Matchers.is(EXPECTED.get(p)) + ); + } + } + + @Test + @DisplayName("Tier-1 not-found / unauth fall to INFO (WI-00 downgrade from WARN)") + void tier1ClientNoiseIsInfoNotWarn() { + MatcherAssert.assertThat( + LevelPolicy.CLIENT_FACING_NOT_FOUND.level(), Matchers.is(Level.INFO) + ); + MatcherAssert.assertThat( + LevelPolicy.CLIENT_FACING_UNAUTH.level(), Matchers.is(Level.INFO) + ); + } + + @Test + @DisplayName("Audit is INFO β€” never suppressed by operational level config") + void auditIsAtInfo() { + MatcherAssert.assertThat( + LevelPolicy.AUDIT_EVENT.level(), Matchers.is(Level.INFO) + ); + } + + @Test + @DisplayName("Every 5xx / failure tier maps to ERROR") + void failureTiersAreError() { + MatcherAssert.assertThat(LevelPolicy.CLIENT_FACING_5XX.level(), Matchers.is(Level.ERROR)); + MatcherAssert.assertThat(LevelPolicy.INTERNAL_CALL_500.level(), Matchers.is(Level.ERROR)); + MatcherAssert.assertThat(LevelPolicy.UPSTREAM_5XX.level(), Matchers.is(Level.ERROR)); + MatcherAssert.assertThat(LevelPolicy.LOCAL_FAILURE.level(), Matchers.is(Level.ERROR)); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/LocalLoggerTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/LocalLoggerTest.java new file mode 100644 index 000000000..699c4ac62 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/LocalLoggerTest.java @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import com.auto1.pantera.http.context.Deadline; +import com.auto1.pantera.http.context.RequestContext; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.message.MapMessage; +import org.apache.logging.log4j.message.Message; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tier-4 tests β€” {@link StructuredLogger.LocalLogger} for local ops + * (DB, cache, pool-init, queue-drop, ...). + */ +final class LocalLoggerTest { + + private static final String CAP = "LocalLoggerCap"; + private static final String COMPONENT = "com.auto1.pantera.test.local"; + + private CapturingAppender capture; + + @BeforeEach + void setUp() { + ThreadContext.clearAll(); + this.capture = new CapturingAppender(CAP); + this.capture.start(); + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.addAppender(this.capture); + cfg.getRootLogger().addAppender(this.capture, null, null); + cfg.getLoggerConfig(COMPONENT).addAppender(this.capture, null, null); + lc.updateLoggers(); + } + + @AfterEach + void tearDown() { + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.getRootLogger().removeAppender(CAP); + cfg.getLoggerConfig(COMPONENT).removeAppender(CAP); + this.capture.stop(); + lc.updateLoggers(); + ThreadContext.clearAll(); + } + + @Test + @DisplayName("forComponent(null) throws NPE") + void forComponentRejectsNull() { + try { + StructuredLogger.local().forComponent(null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("component")); + } + } + + @Test + @DisplayName("info() for config change β†’ INFO") + void configChangeLogsAtInfo() { + StructuredLogger.local().forComponent(COMPONENT) + .message("Pool init: 16 threads, queue=2000").info(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.INFO)); + } + + @Test + @DisplayName("debug() for op-success β†’ DEBUG") + void opSuccessLogsAtDebug() { + StructuredLogger.local().forComponent(COMPONENT).message("ok").debug(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.DEBUG)); + } + + @Test + @DisplayName("warn() for degraded β†’ WARN") + void degradedLogsAtWarn() { + StructuredLogger.local().forComponent(COMPONENT) + .message("executor queue at 90% β€” caller-runs applied") + .field("pantera.queue.size", 1800) + .warn(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.WARN)); + MatcherAssert.assertThat( + payload(this.capture.last(), "pantera.queue.size"), Matchers.is(1800) + ); + } + + @Test + @DisplayName("error() without cause throws NPE") + void errorWithoutCauseFails() { + try { + StructuredLogger.local().forComponent(COMPONENT).message("msg").error(); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("cause")); + } + } + + @Test + @DisplayName("error() with cause β†’ ERROR with error.type / error.stack_trace") + void errorWithCauseLogsAtError() { + final Exception cause = new java.io.IOException("disk full"); + StructuredLogger.local().forComponent(COMPONENT) + .message("flush failed") + .cause(cause) + .error(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(evt.getLevel(), Matchers.is(Level.ERROR)); + MatcherAssert.assertThat( + (String) payload(evt, "error.type"), + Matchers.containsString("IOException") + ); + MatcherAssert.assertThat( + (String) payload(evt, "error.stack_trace"), + Matchers.containsString("disk full") + ); + } + + @Test + @DisplayName("reqCtx binds trace.id for request-linked local ops") + void reqCtxBindsTraceId() { + final RequestContext ctx = new RequestContext( + "trace-loc", null, null, null, "anonymous", null, null, + "repo", "npm", RequestContext.ArtifactRef.EMPTY, + "/x", "/x", Deadline.in(Duration.ofSeconds(5)) + ); + StructuredLogger.local().forComponent(COMPONENT) + .message("cache evict") + .reqCtx(ctx) + .debug(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat( + evt.getContextData().getValue("trace.id"), Matchers.is("trace-loc") + ); + } + + @Test + @DisplayName("Terminal without message() throws IllegalStateException") + void missingMessageFails() { + try { + StructuredLogger.local().forComponent(COMPONENT).debug(); + MatcherAssert.assertThat("expected ISE", false, Matchers.is(true)); + } catch (final IllegalStateException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("message")); + } + } + + // ---- helpers ---- + + private static Object payload(final LogEvent evt, final String key) { + final Message msg = evt.getMessage(); + if (msg instanceof MapMessage mm) { + return mm.getData().get(key); + } + return null; + } + + private static final class CapturingAppender extends AbstractAppender { + + private final List events = new ArrayList<>(); + + CapturingAppender(final String name) { + super(name, null, null, true, Property.EMPTY_ARRAY); + } + + @Override + public void append(final LogEvent event) { + this.events.add(event.toImmutable()); + } + + LogEvent last() { + return this.events.get(this.events.size() - 1); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/UpstreamLoggerTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/UpstreamLoggerTest.java new file mode 100644 index 000000000..12b3c328c --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/UpstreamLoggerTest.java @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import com.auto1.pantera.http.context.Deadline; +import com.auto1.pantera.http.context.RequestContext; +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.ThreadContext; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.message.MapMessage; +import org.apache.logging.log4j.message.Message; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tier-3 tests β€” {@link StructuredLogger.UpstreamLogger} for pantera β†’ upstream. + */ +final class UpstreamLoggerTest { + + private static final String CAP = "UpstreamLoggerCap"; + private static final String LOGGER = "http.upstream"; + + private CapturingAppender capture; + + @BeforeEach + void setUp() { + ThreadContext.clearAll(); + this.capture = new CapturingAppender(CAP); + this.capture.start(); + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.addAppender(this.capture); + cfg.getRootLogger().addAppender(this.capture, null, null); + cfg.getLoggerConfig(LOGGER).addAppender(this.capture, null, null); + lc.updateLoggers(); + } + + @AfterEach + void tearDown() { + final LoggerContext lc = (LoggerContext) LogManager.getContext(false); + final Configuration cfg = lc.getConfiguration(); + cfg.getRootLogger().removeAppender(CAP); + cfg.getLoggerConfig(LOGGER).removeAppender(CAP); + this.capture.stop(); + lc.updateLoggers(); + ThreadContext.clearAll(); + } + + @Test + @DisplayName("forUpstream(null, _, _) throws NPE") + void forUpstreamRejectsNullCtx() { + try { + StructuredLogger.upstream().forUpstream(null, "h", 443); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("ctx")); + } + } + + @Test + @DisplayName("forUpstream(ctx, null, _) throws NPE") + void forUpstreamRejectsNullAddress() { + try { + StructuredLogger.upstream().forUpstream(ctx(), null, 443); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat( + ex.getMessage(), Matchers.containsString("destinationAddress") + ); + } + } + + @Test + @DisplayName(".error() without cause throws IllegalStateException") + void errorWithoutCauseFails() { + try { + StructuredLogger.upstream().forUpstream(ctx(), "h", 443) + .responseStatus(502).error(); + MatcherAssert.assertThat("expected ISE", false, Matchers.is(true)); + } catch (final IllegalStateException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("cause")); + } + } + + @Test + @DisplayName("cause(null) throws NPE") + void causeRejectsNull() { + try { + StructuredLogger.upstream().forUpstream(ctx(), "h", 443).cause(null); + MatcherAssert.assertThat("expected NPE", false, Matchers.is(true)); + } catch (final NullPointerException ex) { + MatcherAssert.assertThat(ex.getMessage(), Matchers.containsString("cause")); + } + } + + @Test + @DisplayName("5xx + cause β†’ ERROR with destination.address / destination.port / duration") + void serverErrorLogsAtErrorWithDestinationFields() { + final Exception cause = new java.net.ConnectException("connect refused"); + StructuredLogger.upstream() + .forUpstream(ctx(), "registry.npmjs.org", 443) + .responseStatus(502) + .duration(1250L) + .cause(cause) + .error(); + final LogEvent evt = this.capture.last(); + MatcherAssert.assertThat(evt.getLevel(), Matchers.is(Level.ERROR)); + MatcherAssert.assertThat(payload(evt, "destination.address"), Matchers.is("registry.npmjs.org")); + MatcherAssert.assertThat(payload(evt, "destination.port"), Matchers.is(443)); + MatcherAssert.assertThat(payload(evt, "http.response.status_code"), Matchers.is(502)); + MatcherAssert.assertThat(payload(evt, "event.duration"), Matchers.is(1250L)); + MatcherAssert.assertThat( + (String) payload(evt, "error.type"), + Matchers.containsString("ConnectException") + ); + } + + @Test + @DisplayName("2xx β†’ DEBUG per LevelPolicy.UPSTREAM_SUCCESS") + void successLogsAtDebug() { + StructuredLogger.upstream().forUpstream(ctx(), "host", 80).responseStatus(200).debug(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.DEBUG)); + } + + @Test + @DisplayName("404 via debug() β†’ DEBUG per LevelPolicy.UPSTREAM_NOT_FOUND") + void notFoundLogsAtDebug() { + StructuredLogger.upstream().forUpstream(ctx(), "host", 80).responseStatus(404).debug(); + MatcherAssert.assertThat(this.capture.last().getLevel(), Matchers.is(Level.DEBUG)); + } + + // ---- helpers ---- + + private static RequestContext ctx() { + return new RequestContext( + "trace-up", null, null, null, "anonymous", null, null, + "npm_proxy", "npm", RequestContext.ArtifactRef.EMPTY, + "/lodash", "/lodash", Deadline.in(Duration.ofSeconds(5)) + ); + } + + private static Object payload(final LogEvent evt, final String key) { + final Message msg = evt.getMessage(); + if (msg instanceof MapMessage mm) { + return mm.getData().get(key); + } + return null; + } + + private static final class CapturingAppender extends AbstractAppender { + + private final List events = new ArrayList<>(); + + CapturingAppender(final String name) { + super(name, null, null, true, Property.EMPTY_ARRAY); + } + + @Override + public void append(final LogEvent event) { + this.events.add(event.toImmutable()); + } + + LogEvent last() { + return this.events.get(this.events.size() - 1); + } + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/observability/UserAgentParserTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/observability/UserAgentParserTest.java new file mode 100644 index 000000000..66c9c59a9 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/observability/UserAgentParserTest.java @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.observability; + +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link UserAgentParser}. Covers every client family and OS family + * the legacy {@code EcsLogEvent.parseUserAgent} recognised, plus null / unknown + * edge cases. The parser is behaviour-preserving from EcsLogEvent v1.18.23 β€” + * these tests pin that behaviour so the observability-tier re-lift (WI-post-03b) + * cannot regress the Kibana sub-field shape. + */ +final class UserAgentParserTest { + + @Test + @DisplayName("Maven UA β†’ name=Maven, version extracted") + void parsesMavenClient() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Maven/3.9.6 (Java 21.0.3; Linux 6.12.68)"); + MatcherAssert.assertThat(info.name(), Matchers.is("Maven")); + MatcherAssert.assertThat(info.version(), Matchers.is("3.9.6")); + } + + @Test + @DisplayName("npm UA β†’ name=npm, version extracted") + void parsesNpmClient() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("npm/10.2.4 node/v20.11.0 darwin x64"); + MatcherAssert.assertThat(info.name(), Matchers.is("npm")); + MatcherAssert.assertThat(info.version(), Matchers.is("10.2.4")); + } + + @Test + @DisplayName("pip UA β†’ name=pip, version extracted") + void parsesPipClient() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("pip/23.3.1 {\"python\":\"3.11.5\"}"); + MatcherAssert.assertThat(info.name(), Matchers.is("pip")); + MatcherAssert.assertThat(info.version(), Matchers.is("23.3.1")); + } + + @Test + @DisplayName("Docker-Client UA β†’ name=Docker, version extracted") + void parsesDockerClient() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Docker-Client/24.0.7 (linux)"); + MatcherAssert.assertThat(info.name(), Matchers.is("Docker")); + MatcherAssert.assertThat(info.version(), Matchers.is("24.0.7")); + } + + @Test + @DisplayName("Go-http-client UA β†’ name=Go, version extracted") + void parsesGoClient() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Go-http-client/1.1"); + MatcherAssert.assertThat(info.name(), Matchers.is("Go")); + MatcherAssert.assertThat(info.version(), Matchers.is("1.1")); + } + + @Test + @DisplayName("Gradle UA β†’ name=Gradle, version extracted") + void parsesGradleClient() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Gradle/8.5 (Linux 6.1; amd64; OpenJDK 21)"); + MatcherAssert.assertThat(info.name(), Matchers.is("Gradle")); + MatcherAssert.assertThat(info.version(), Matchers.is("8.5")); + } + + @Test + @DisplayName("Composer UA β†’ name=Composer, version extracted") + void parsesComposerClient() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Composer/2.7.1 (Linux; PHP 8.2.15)"); + MatcherAssert.assertThat(info.name(), Matchers.is("Composer")); + MatcherAssert.assertThat(info.version(), Matchers.is("2.7.1")); + } + + @Test + @DisplayName("curl UA β†’ name=curl, version extracted") + void parsesCurl() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("curl/8.4.0"); + MatcherAssert.assertThat(info.name(), Matchers.is("curl")); + MatcherAssert.assertThat(info.version(), Matchers.is("8.4.0")); + } + + @Test + @DisplayName("wget UA β†’ name=wget, version extracted") + void parsesWget() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Wget wget/1.21.4"); + MatcherAssert.assertThat(info.name(), Matchers.is("wget")); + MatcherAssert.assertThat(info.version(), Matchers.is("1.21.4")); + } + + @Test + @DisplayName("Linux UA token β†’ osName=Linux") + void parsesLinuxOs() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Maven/3.9.6 (Java 21.0.3; Linux 6.12.68)"); + MatcherAssert.assertThat(info.osName(), Matchers.is("Linux")); + } + + @Test + @DisplayName("Windows UA token β†’ osName=Windows") + void parsesWindowsOs() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Maven/3.9.6 (Java 17; Windows 10 10.0)"); + MatcherAssert.assertThat(info.osName(), Matchers.is("Windows")); + } + + @Test + @DisplayName("Mac OS X UA token β†’ osName=macOS") + void parsesMacOs() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Maven/3.9.6 (Java 21; Mac OS X 14.2)"); + MatcherAssert.assertThat(info.osName(), Matchers.is("macOS")); + } + + @Test + @DisplayName("FreeBSD UA token β†’ osName=FreeBSD") + void parsesFreeBsdOs() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Maven/3.9.6 (Java 17; FreeBSD 13.2)"); + MatcherAssert.assertThat(info.osName(), Matchers.is("FreeBSD")); + } + + @Test + @DisplayName("Java version token β†’ osVersion (preserve EcsLogEvent behaviour)") + void javaVersionGoesIntoOsVersion() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("Maven/3.9.6 (Java/21.0.3 Linux 6.12)"); + MatcherAssert.assertThat(info.osVersion(), Matchers.is("21.0.3")); + } + + @Test + @DisplayName("null UA β†’ all fields null, never throws") + void parseReturnsEmptyForNull() { + final UserAgentParser.UserAgentInfo info = UserAgentParser.parse(null); + MatcherAssert.assertThat(info.name(), Matchers.nullValue()); + MatcherAssert.assertThat(info.version(), Matchers.nullValue()); + MatcherAssert.assertThat(info.osName(), Matchers.nullValue()); + MatcherAssert.assertThat(info.osVersion(), Matchers.nullValue()); + MatcherAssert.assertThat(info.deviceName(), Matchers.nullValue()); + } + + @Test + @DisplayName("unknown UA β†’ name/version null, but OS may still resolve") + void parseReturnsEmptyForUnknownUa() { + final UserAgentParser.UserAgentInfo info = + UserAgentParser.parse("TotallyMadeUpClient/9.9 (AmigaOS)"); + MatcherAssert.assertThat(info.name(), Matchers.nullValue()); + MatcherAssert.assertThat(info.version(), Matchers.nullValue()); + MatcherAssert.assertThat(info.osName(), Matchers.nullValue()); + } + + @Test + @DisplayName("empty UA β†’ all fields null") + void parseReturnsEmptyForEmptyString() { + final UserAgentParser.UserAgentInfo info = UserAgentParser.parse(""); + MatcherAssert.assertThat(info.name(), Matchers.nullValue()); + MatcherAssert.assertThat(info.version(), Matchers.nullValue()); + MatcherAssert.assertThat(info.osName(), Matchers.nullValue()); + MatcherAssert.assertThat(info.osVersion(), Matchers.nullValue()); + MatcherAssert.assertThat(info.deviceName(), Matchers.nullValue()); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/resilience/RepoBulkheadTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/resilience/RepoBulkheadTest.java new file mode 100644 index 000000000..374f81fa6 --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/resilience/RepoBulkheadTest.java @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.resilience; + +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.Result; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.TimeUnit; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link RepoBulkhead}. Covers the WI-09 DoD requirements: + *

        + *
      • {@link #rejectsWhenSaturated()} - acquire maxConcurrent permits; next run() returns Fault.Overload
      • + *
      • {@link #releasesPermitOnSuccess()} - acquire, complete, next run() succeeds
      • + *
      • {@link #releasesPermitOnFailure()} - acquire, op throws, permit still released
      • + *
      • {@link #activeCountTracksPermits()} - activeCount reflects held permits
      • + *
      • {@link #defaultLimitsAreReasonable()} - BulkheadLimits.defaults() values are sane
      • + *
      + */ +@Timeout(value = 30, unit = TimeUnit.SECONDS) +final class RepoBulkheadTest { + + @Test + void rejectsWhenSaturated() throws Exception { + final int maxConcurrent = 3; + final BulkheadLimits limits = new BulkheadLimits( + maxConcurrent, 100, Duration.ofSeconds(1) + ); + final RepoBulkhead bulkhead = new RepoBulkhead( + "repo-a", limits, ForkJoinPool.commonPool() + ); + // Hold maxConcurrent permits with uncompleted futures + final List>> blockers = new ArrayList<>(); + for (int i = 0; i < maxConcurrent; i++) { + final CompletableFuture> blocker = new CompletableFuture<>(); + bulkhead.run(() -> blocker); + blockers.add(blocker); + } + assertEquals(maxConcurrent, bulkhead.activeCount(), + "All permits should be held"); + // Next run must be rejected + final Result rejected = bulkhead.run( + () -> CompletableFuture.completedFuture(Result.ok("should-not-reach")) + ).toCompletableFuture().get(5, TimeUnit.SECONDS); + assertInstanceOf(Result.Err.class, rejected, "Must be rejected"); + final Fault fault = ((Result.Err) rejected).fault(); + assertInstanceOf(Fault.Overload.class, fault, "Must be Overload fault"); + final Fault.Overload overload = (Fault.Overload) fault; + assertEquals("repo-a", overload.resource(), "Fault must carry the repo name"); + assertEquals(Duration.ofSeconds(1), overload.retryAfter(), + "Fault must carry the configured retry-after"); + // Clean up blockers + for (final CompletableFuture> b : blockers) { + b.complete(Result.ok("done")); + } + } + + @Test + void releasesPermitOnSuccess() throws Exception { + final BulkheadLimits limits = new BulkheadLimits( + 1, 100, Duration.ofSeconds(1) + ); + final RepoBulkhead bulkhead = new RepoBulkhead( + "repo-b", limits, ForkJoinPool.commonPool() + ); + // Acquire the single permit and complete immediately + final Result first = bulkhead.run( + () -> CompletableFuture.completedFuture(Result.ok("ok")) + ).toCompletableFuture().get(5, TimeUnit.SECONDS); + assertInstanceOf(Result.Ok.class, first, "First call must succeed"); + assertEquals(0, bulkhead.activeCount(), + "Permit must be released after success"); + // Next run must also succeed (permit was released) + final Result second = bulkhead.run( + () -> CompletableFuture.completedFuture(Result.ok("ok2")) + ).toCompletableFuture().get(5, TimeUnit.SECONDS); + assertInstanceOf(Result.Ok.class, second, "Second call must succeed after permit release"); + } + + @Test + void releasesPermitOnFailure() throws Exception { + final BulkheadLimits limits = new BulkheadLimits( + 1, 100, Duration.ofSeconds(1) + ); + final RepoBulkhead bulkhead = new RepoBulkhead( + "repo-c", limits, ForkJoinPool.commonPool() + ); + // Acquire the single permit; the future completes exceptionally + final CompletableFuture> failing = new CompletableFuture<>(); + final CompletionStage> stage = bulkhead.run(() -> failing); + assertEquals(1, bulkhead.activeCount(), + "Permit must be held while in-flight"); + failing.completeExceptionally(new RuntimeException("boom")); + // Wait for the whenComplete to fire + try { + stage.toCompletableFuture().get(5, TimeUnit.SECONDS); + } catch (final Exception ignored) { + // Expected - the stage completed exceptionally + } + assertEquals(0, bulkhead.activeCount(), + "Permit must be released even on exceptional completion"); + // Next run must succeed (permit was released) + final Result next = bulkhead.run( + () -> CompletableFuture.completedFuture(Result.ok("recovered")) + ).toCompletableFuture().get(5, TimeUnit.SECONDS); + assertInstanceOf(Result.Ok.class, next, + "Next call must succeed after exceptional permit release"); + } + + @Test + void activeCountTracksPermits() throws Exception { + final int maxConcurrent = 5; + final BulkheadLimits limits = new BulkheadLimits( + maxConcurrent, 100, Duration.ofSeconds(1) + ); + final RepoBulkhead bulkhead = new RepoBulkhead( + "repo-d", limits, ForkJoinPool.commonPool() + ); + assertEquals(0, bulkhead.activeCount(), "Initially zero active"); + final List>> blockers = new ArrayList<>(); + for (int i = 0; i < maxConcurrent; i++) { + final CompletableFuture> blocker = new CompletableFuture<>(); + bulkhead.run(() -> blocker); + blockers.add(blocker); + assertEquals(i + 1, bulkhead.activeCount(), + "Active count must track acquired permits"); + } + // Complete them one by one and verify count decreases + for (int i = 0; i < maxConcurrent; i++) { + blockers.get(i).complete(Result.ok("done-" + i)); + // Small delay to allow whenComplete to fire + Thread.sleep(10); + assertEquals(maxConcurrent - i - 1, bulkhead.activeCount(), + "Active count must decrease as permits are released"); + } + } + + @Test + void defaultLimitsAreReasonable() { + final BulkheadLimits defaults = BulkheadLimits.defaults(); + assertEquals(200, defaults.maxConcurrent(), + "Default maxConcurrent should be 200"); + assertEquals(1000, defaults.maxQueueDepth(), + "Default maxQueueDepth should be 1000"); + assertEquals(Duration.ofSeconds(1), defaults.retryAfter(), + "Default retryAfter should be 1 second"); + } + + @Test + void repoNameIsAccessible() { + final RepoBulkhead bulkhead = new RepoBulkhead( + "my-repo", BulkheadLimits.defaults(), ForkJoinPool.commonPool() + ); + assertEquals("my-repo", bulkhead.repo()); + } + + @Test + void drainExecutorIsAvailable() { + final RepoBulkhead bulkhead = new RepoBulkhead( + "drain-test", BulkheadLimits.defaults(), ForkJoinPool.commonPool() + ); + assertTrue(bulkhead.drainExecutor() != null, + "Drain executor must be non-null"); + assertEquals(0, bulkhead.drainDropCount(), + "Initial drain drop count must be zero"); + } + + @Test + void synchronousSupplierExceptionReleasesPermit() throws Exception { + final BulkheadLimits limits = new BulkheadLimits( + 1, 100, Duration.ofSeconds(1) + ); + final RepoBulkhead bulkhead = new RepoBulkhead( + "repo-sync-ex", limits, ForkJoinPool.commonPool() + ); + // Supplier throws synchronously before returning a CompletionStage + try { + bulkhead.run(() -> { + throw new RuntimeException("sync boom"); + }).toCompletableFuture().get(5, TimeUnit.SECONDS); + } catch (final Exception ignored) { + // Expected + } + assertEquals(0, bulkhead.activeCount(), + "Permit must be released even when supplier throws synchronously"); + // Verify next call succeeds + final Result next = bulkhead.run( + () -> CompletableFuture.completedFuture(Result.ok("recovered")) + ).toCompletableFuture().get(5, TimeUnit.SECONDS); + assertInstanceOf(Result.Ok.class, next, + "Next call must succeed after synchronous-exception permit release"); + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/resilience/SingleFlightPropertyTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/resilience/SingleFlightPropertyTest.java new file mode 100644 index 000000000..29c658cee --- /dev/null +++ b/pantera-core/src/test/java/com/auto1/pantera/http/resilience/SingleFlightPropertyTest.java @@ -0,0 +1,653 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.resilience; + +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionStage; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Property-style tests for {@link SingleFlight}. Covers the five invariants + * listed in WI-05 DoD (Β§12 of {@code docs/analysis/v2.2-target-architecture.md}): + * coalescing, cancellation isolation, zombie eviction, exception propagation, + * and stack-flat synchronous completion. + */ +final class SingleFlightPropertyTest { + + /** + * Dedicated thread pool for the {@link SingleFlight} under test. A fresh + * pool per test avoids cross-test contamination for the cancellation and + * stack-safety properties. + */ + private ExecutorService executor; + + @BeforeEach + void setUp() { + this.executor = Executors.newFixedThreadPool(16, r -> { + final Thread t = new Thread(r, "sf-test"); + t.setDaemon(true); + return t; + }); + } + + @AfterEach + void tearDown() { + this.executor.shutdownNow(); + } + + /** + * N = 1000 concurrent {@code load(k, loader)} calls for the same key must + * invoke the loader exactly once. All 1000 callers receive the same value. + * + *

      The coalescer invalidates its entry on loader completion (to allow the + * next {@code load} for the same key to refetch). The test must therefore + * hold the loader uncompleted until every caller has invoked + * {@link SingleFlight#load}; otherwise a late caller would miss the shared + * entry and spawn a second loader β€” which would be correct SingleFlight + * behaviour, just not the property we are asserting. + * + *

      The load-issuing phase is separated from the join phase: a dedicated + * 1000-thread pool is used for load issuance so no thread blocks a sibling + * from reaching {@code sf.load}. Once every caller is attached, the loader + * is released and every future is awaited. + */ + @Test + @Timeout(30) + void coalescesNConcurrentLoads() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(30), 1024, this.executor + ); + final int callers = 1_000; + final AtomicInteger loaderInvocations = new AtomicInteger(0); + final CountDownLatch release = new CountDownLatch(1); + final CountDownLatch submitGate = new CountDownLatch(1); + final CountDownLatch allCalledLoad = new CountDownLatch(callers); + + // One thread per caller so `load()` issuance is truly parallel. The + // threads only issue the load and return β€” they do NOT join the + // future, so the pool size does not need to absorb 1000 blocked + // join()s. + final ExecutorService submitters = Executors.newFixedThreadPool(callers); + final List> futures = new ArrayList<>(callers); + final Object futuresLock = new Object(); + try { + for (int i = 0; i < callers; i++) { + submitters.execute(() -> { + try { + submitGate.await(); + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + return; + } + final CompletableFuture f = sf.load( + "shared-key", + () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.supplyAsync(() -> { + try { + release.await(); + } catch (final InterruptedException iex) { + Thread.currentThread().interrupt(); + throw new IllegalStateException(iex); + } + return 42; + }, this.executor); + } + ); + synchronized (futuresLock) { + futures.add(f); + } + allCalledLoad.countDown(); + }); + } + submitGate.countDown(); + MatcherAssert.assertThat( + "all " + callers + " threads called sf.load", + allCalledLoad.await(20, TimeUnit.SECONDS), is(true) + ); + release.countDown(); + final List> snapshot; + synchronized (futuresLock) { + snapshot = new ArrayList<>(futures); + } + MatcherAssert.assertThat(snapshot.size(), equalTo(callers)); + for (final CompletableFuture fut : snapshot) { + MatcherAssert.assertThat( + fut.get(15, TimeUnit.SECONDS), equalTo(42) + ); + } + } finally { + submitters.shutdownNow(); + } + + MatcherAssert.assertThat( + "N=" + callers + " concurrent loads must trigger exactly ONE loader", + loaderInvocations.get(), equalTo(1) + ); + } + + /** + * 100 callers; cancel 50 of them mid-load; remaining 50 receive the value. + * The loader ran exactly once and was not aborted by any cancellation. + */ + @Test + @Timeout(30) + void cancellationDoesNotAbortOthers() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(10), 1024, this.executor + ); + final int callers = 100; + final AtomicInteger loaderInvocations = new AtomicInteger(0); + final CountDownLatch release = new CountDownLatch(1); + final CountDownLatch loaderStarted = new CountDownLatch(1); + + final List> futures = new ArrayList<>(callers); + for (int i = 0; i < callers; i++) { + futures.add(sf.load("shared-key", () -> { + loaderInvocations.incrementAndGet(); + loaderStarted.countDown(); + return CompletableFuture.supplyAsync(() -> { + try { + release.await(); + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IllegalStateException(ex); + } + return 99; + }, this.executor); + })); + } + MatcherAssert.assertThat( + "loader started before cancellations", + loaderStarted.await(5, TimeUnit.SECONDS), is(true) + ); + // Cancel the first 50 callers' futures. + for (int i = 0; i < 50; i++) { + MatcherAssert.assertThat( + "cancellation accepted", futures.get(i).cancel(true), is(true) + ); + } + // Let the loader finish. + release.countDown(); + + for (int i = 0; i < 50; i++) { + final CompletableFuture fut = futures.get(i); + MatcherAssert.assertThat( + "cancelled future reports cancelled", fut.isCancelled(), is(true) + ); + } + for (int i = 50; i < callers; i++) { + MatcherAssert.assertThat( + "non-cancelled caller sees value", + futures.get(i).get(10, TimeUnit.SECONDS), equalTo(99) + ); + } + MatcherAssert.assertThat( + "loader ran exactly once despite 50 cancellations", + loaderInvocations.get(), equalTo(1) + ); + } + + /** + * A loader that never completes is held only for {@code inflightTtl}; + * after that window the entry is evicted and the next {@link + * SingleFlight#load} invokes a fresh loader. + * + *

      Zombie eviction is implemented by {@code orTimeout(inflightTtl)} on + * the wrapped loader future: once the TTL expires, the wrapper completes + * exceptionally with {@link TimeoutException}, which triggers the + * {@code whenCompleteAsync(invalidate)} hook and frees the slot. We wait + * past the TTL plus a buffer for the scheduler to fire. + */ + @Test + @Timeout(10) + void zombieEvictedAfterTtl() throws Exception { + final Duration ttl = Duration.ofMillis(200); + final SingleFlight sf = new SingleFlight<>( + ttl, 1024, this.executor + ); + final AtomicInteger loaderInvocations = new AtomicInteger(0); + // A loader that never completes β€” stays "in-flight" until the + // orTimeout wrapper fires. + final CompletableFuture zombie = sf.load("zombie", () -> { + loaderInvocations.incrementAndGet(); + return new CompletableFuture(); + }); + MatcherAssert.assertThat(loaderInvocations.get(), equalTo(1)); + // The wrapper future (inside SingleFlight) fires TimeoutException at + // the TTL boundary; the invalidate callback then runs on the executor. + // Expect a TimeoutException at the caller side too. + final ExecutionException ee = assertThrows( + ExecutionException.class, + () -> zombie.get(ttl.toMillis() * 10, TimeUnit.MILLISECONDS) + ); + MatcherAssert.assertThat( + rootCause(ee), Matchers.instanceOf(TimeoutException.class) + ); + // Small settle so the whenCompleteAsync(invalidate) hook has run. + final long deadline = System.currentTimeMillis() + 2_000L; + while (sf.inFlightCount() != 0 && System.currentTimeMillis() < deadline) { + Thread.sleep(10); + } + MatcherAssert.assertThat( + "zombie entry was invalidated", sf.inFlightCount(), equalTo(0) + ); + // Second load for the same key must trigger a fresh loader. + final CompletableFuture second = sf.load("zombie", () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.completedFuture(7); + }); + MatcherAssert.assertThat(second.get(5, TimeUnit.SECONDS), equalTo(7)); + MatcherAssert.assertThat( + "zombie was evicted; fresh loader ran for the second load", + loaderInvocations.get(), equalTo(2) + ); + } + + /** + * When the loader completes exceptionally, every waiter sees the same + * exception. The entry is then removed so the next {@link + * SingleFlight#load} retries with a fresh loader invocation. + */ + @Test + @Timeout(10) + void loaderFailurePropagatesToAllWaiters() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(10), 1024, this.executor + ); + final int waiters = 20; + final AtomicInteger loaderInvocations = new AtomicInteger(0); + final CountDownLatch release = new CountDownLatch(1); + final RuntimeException failure = new RuntimeException("upstream down"); + + final List> futures = new ArrayList<>(waiters); + for (int i = 0; i < waiters; i++) { + futures.add(sf.load("fail-key", () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.supplyAsync(() -> { + try { + release.await(); + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IllegalStateException(ex); + } + throw failure; + }, this.executor); + })); + } + release.countDown(); + for (final CompletableFuture fut : futures) { + final ExecutionException ee = assertThrows( + ExecutionException.class, + () -> fut.get(5, TimeUnit.SECONDS) + ); + // CompletableFuture.supplyAsync wraps thrown exceptions in + // CompletionException; whatever wrapper Caffeine adds, the root + // cause must be our sentinel. + Throwable root = ee.getCause(); + while (root != null && root.getCause() != null && root != root.getCause()) { + if (root == failure) { + break; + } + root = root.getCause(); + } + MatcherAssert.assertThat( + "each waiter sees the loader's exception at the root", + root, Matchers.is(failure) + ); + } + MatcherAssert.assertThat( + "loader ran once for all waiters despite failure", + loaderInvocations.get(), equalTo(1) + ); + // Entry must be removed: the next load triggers a new loader call. + // Small settle so the invalidation callback has run on the executor. + Thread.sleep(100); + final CompletableFuture retry = sf.load("fail-key", () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.completedFuture(11); + }); + MatcherAssert.assertThat(retry.get(5, TimeUnit.SECONDS), equalTo(11)); + MatcherAssert.assertThat( + "entry invalidated on failure; next load ran a fresh loader", + loaderInvocations.get(), equalTo(2) + ); + } + + /** + * Stack-safety regression guard. + * + *

      Before WI-05, the now-removed GroupSlice chained {@code + * .thenCompose} on a shared gate future. When the leader completed the + * gate synchronously, all queued {@code thenCompose} callbacks ran on the + * leader's stack β€” with N ≥ ~400 followers this overflowed the stack + * (commit {@code ccc155f6} fixed the leak via {@code thenComposeAsync}). + * + *

      This test locks in the same guarantee for {@link SingleFlight}: the + * 500 followers' {@code thenCompose} callbacks must NOT run on the + * leader's stack, regardless of whether the loader completes + * synchronously. We trigger the worst case: loader returns an already- + * completed future, so Caffeine has the shared future "done" the moment + * it's installed; followers attaching {@code thenCompose} after that + * point would, without executor dispatch, run on the caller's own stack + * β€” still not a stack-overflow, but the regression shape is identical + * and worth guarding. A 500-deep thenCompose chain on a single stack is + * the SOE that matters; we emulate that by having each follower's + * callback itself dispatch another thenCompose. + */ + @Test + @Timeout(30) + void stackFlatUnderSynchronousCompletion() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(10), 1024, this.executor + ); + final int followers = 500; + // Leader completes synchronously β€” the worst case for the old bug. + final CompletableFuture shared = sf.load( + "sync-key", () -> CompletableFuture.completedFuture(123) + ); + // Wait for the leader's future to settle before attaching followers β€” + // this puts us in the "future already done when I call thenCompose" + // regime that triggered the original stack bug. + MatcherAssert.assertThat(shared.get(5, TimeUnit.SECONDS), equalTo(123)); + + // 500 followers each attach thenCompose chains on fresh load() calls. + // Since each load() returns a NEW forwarded CompletableFuture completed + // via whenCompleteAsync(executor), the thenCompose callbacks must not + // all collapse onto one stack. + final List> chain = new ArrayList<>(followers); + for (int i = 0; i < followers; i++) { + final CompletionStage f = sf + .load("sync-key", () -> CompletableFuture.completedFuture(123)) + .thenCompose(v -> CompletableFuture.completedFuture(v + 1)) + .thenCompose(v -> CompletableFuture.completedFuture(v + 1)); + chain.add(f.toCompletableFuture()); + } + for (final CompletableFuture fut : chain) { + // Any StackOverflowError on the leader's stack would have been + // rethrown through CompletableFuture.get β€” the explicit type check + // is the regression guard. + try { + MatcherAssert.assertThat( + fut.get(10, TimeUnit.SECONDS), equalTo(125) + ); + } catch (final ExecutionException ex) { + if (ex.getCause() instanceof StackOverflowError) { + throw new AssertionError( + "StackOverflowError on follower chain " + + "β€” SingleFlight re-introduced the ccc155f6 bug", + ex.getCause() + ); + } + throw ex; + } + } + } + + /** + * Additional guard: {@link SingleFlight#load} never throws at the call + * site. A loader supplier that itself throws a {@link RuntimeException} + * must surface only inside the returned future. + */ + @Test + @Timeout(5) + void supplierThrowSurfacesAsFailedFuture() { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(5), 1024, this.executor + ); + final RuntimeException bang = new IllegalStateException("boom"); + final CompletableFuture result = sf.load( + "thrower", () -> { + throw bang; + } + ); + final ExecutionException ee = assertThrows( + ExecutionException.class, + () -> result.get(2, TimeUnit.SECONDS) + ); + Throwable cause = ee.getCause(); + while (cause != null && cause != bang && cause.getCause() != null + && cause != cause.getCause()) { + cause = cause.getCause(); + } + MatcherAssert.assertThat(cause, is((Throwable) bang)); + } + + /** + * Cancelling a returned future must not cancel the underlying loader + * future observed by callers who did not cancel. + */ + @Test + @Timeout(10) + void cancellingOneCallerDoesNotCompleteOthersAsCancelled() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(5), 1024, this.executor + ); + final CountDownLatch release = new CountDownLatch(1); + final CompletableFuture first = sf.load("k", () -> + CompletableFuture.supplyAsync(() -> { + try { + release.await(); + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IllegalStateException(ex); + } + return 7; + }, this.executor) + ); + final CompletableFuture second = + sf.load("k", () -> CompletableFuture.completedFuture(-1)); + first.cancel(true); + release.countDown(); + MatcherAssert.assertThat( + "non-cancelled follower completes with value", + second.get(5, TimeUnit.SECONDS), equalTo(7) + ); + // Sanity: first is cancelled, second is not. + MatcherAssert.assertThat(first.isCancelled(), is(true)); + MatcherAssert.assertThat(second.isCancelled(), is(false)); + } + + /** + * Explicit {@link SingleFlight#invalidate} removes an entry without + * completing it: the loader's future continues independently, but a + * subsequent {@link SingleFlight#load} for the same key starts afresh. + */ + @Test + @Timeout(5) + void invalidateAllowsSubsequentFreshLoad() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(10), 1024, this.executor + ); + final AtomicInteger loaderInvocations = new AtomicInteger(0); + final CompletableFuture unfinished = new CompletableFuture<>(); + sf.load("k", () -> { + loaderInvocations.incrementAndGet(); + return unfinished; + }); + sf.invalidate("k"); + final CompletableFuture second = sf.load("k", () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.completedFuture(5); + }); + MatcherAssert.assertThat(second.get(2, TimeUnit.SECONDS), equalTo(5)); + MatcherAssert.assertThat(loaderInvocations.get(), equalTo(2)); + } + + /** + * Different keys must not coalesce even when loaders run concurrently. + */ + @Test + @Timeout(5) + void differentKeysDoNotCoalesce() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(5), 1024, this.executor + ); + final AtomicInteger loaderInvocations = new AtomicInteger(0); + final CompletableFuture a = sf.load("a", () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.completedFuture(1); + }); + final CompletableFuture b = sf.load("b", () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.completedFuture(2); + }); + MatcherAssert.assertThat(a.get(2, TimeUnit.SECONDS), equalTo(1)); + MatcherAssert.assertThat(b.get(2, TimeUnit.SECONDS), equalTo(2)); + MatcherAssert.assertThat(loaderInvocations.get(), equalTo(2)); + } + + /** + * Guard: constructor input validation. + */ + @Test + void constructorRejectsInvalidInputs() { + assertThrows(NullPointerException.class, + () -> new SingleFlight<>(null, 16, this.executor)); + assertThrows(NullPointerException.class, + () -> new SingleFlight<>(Duration.ofSeconds(1), 16, null)); + assertThrows(IllegalArgumentException.class, + () -> new SingleFlight<>(Duration.ZERO, 16, this.executor)); + assertThrows(IllegalArgumentException.class, + () -> new SingleFlight<>(Duration.ofSeconds(-1), 16, this.executor)); + assertThrows(IllegalArgumentException.class, + () -> new SingleFlight<>(Duration.ofSeconds(1), 0, this.executor)); + } + + /** + * Guard: {@link SingleFlight#load} null-checks. + */ + @Test + void loadRejectsNullKeyOrLoader() { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(1), 16, this.executor + ); + assertThrows(NullPointerException.class, + () -> sf.load(null, () -> CompletableFuture.completedFuture(0))); + assertThrows(NullPointerException.class, + () -> sf.load("k", null)); + } + + /** + * The {@code inFlightCount} metric reflects approximate in-flight size. + */ + @Test + @Timeout(5) + void inFlightCountTracksPendingLoads() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(5), 1024, this.executor + ); + MatcherAssert.assertThat(sf.inFlightCount(), equalTo(0)); + final CompletableFuture pending = new CompletableFuture<>(); + sf.load("k", () -> pending); + MatcherAssert.assertThat(sf.inFlightCount(), equalTo(1)); + pending.complete(1); + // Allow the invalidate callback to run. + final long deadline = System.currentTimeMillis() + 2_000L; + while (sf.inFlightCount() != 0 && System.currentTimeMillis() < deadline) { + Thread.sleep(10); + } + MatcherAssert.assertThat(sf.inFlightCount(), equalTo(0)); + } + + /** + * A waiter that times out independently must not affect the loader or + * other waiters. + */ + @Test + @Timeout(10) + void waiterTimeoutIsLocal() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(10), 1024, this.executor + ); + final CountDownLatch release = new CountDownLatch(1); + final CompletableFuture first = sf.load("k", () -> + CompletableFuture.supplyAsync(() -> { + try { + release.await(); + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IllegalStateException(ex); + } + return 100; + }, this.executor) + ); + assertThrows(TimeoutException.class, () -> first.get(50, TimeUnit.MILLISECONDS)); + // Load again while still in-flight β€” must join the same loader. + final CompletableFuture second = + sf.load("k", () -> CompletableFuture.completedFuture(-1)); + release.countDown(); + MatcherAssert.assertThat( + first.get(5, TimeUnit.SECONDS), equalTo(100) + ); + MatcherAssert.assertThat( + second.get(5, TimeUnit.SECONDS), equalTo(100) + ); + } + + /** + * A loader that returns an already-cancelled stage causes all waiters to + * see a {@link CancellationException} (either thrown directly by {@code + * get()} or wrapped in an {@link ExecutionException} depending on how + * {@link CompletableFuture} propagates cancellation). Either shape is + * acceptable; we only assert the terminal exception type. + */ + @Test + @Timeout(5) + void loaderReturningCancelledStage() throws Exception { + final SingleFlight sf = new SingleFlight<>( + Duration.ofSeconds(5), 1024, this.executor + ); + final CompletableFuture cancelled = new CompletableFuture<>(); + cancelled.cancel(true); + final CompletableFuture result = sf.load("k", () -> cancelled); + final Exception thrown = assertThrows( + Exception.class, + () -> result.get(2, TimeUnit.SECONDS) + ); + MatcherAssert.assertThat( + "thrown is (Cancellation|ExecutionException wrapping Cancellation)", + thrown instanceof CancellationException + || (thrown instanceof ExecutionException + && rootCause(thrown) instanceof CancellationException), + is(true) + ); + } + + private static Throwable rootCause(final Throwable ex) { + Throwable cur = ex; + while (cur.getCause() != null && cur != cur.getCause()) { + cur = cur.getCause(); + } + return cur; + } +} diff --git a/pantera-core/src/test/java/com/auto1/pantera/http/trace/MdcPropagationTest.java b/pantera-core/src/test/java/com/auto1/pantera/http/trace/MdcPropagationTest.java deleted file mode 100644 index 6b57aa7d2..000000000 --- a/pantera-core/src/test/java/com/auto1/pantera/http/trace/MdcPropagationTest.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.http.trace; - -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Consumer; -import org.hamcrest.MatcherAssert; -import org.hamcrest.Matchers; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.slf4j.MDC; - -/** - * Tests for {@link MdcPropagation}. - * @since 2.1.0 - */ -final class MdcPropagationTest { - - private ExecutorService pool; - - @BeforeEach - void setUp() { - MDC.clear(); - this.pool = Executors.newSingleThreadExecutor(); - } - - @AfterEach - void tearDown() { - MDC.clear(); - this.pool.shutdownNow(); - } - - @Test - void propagatesMdcToWorkerThread() throws Exception { - MDC.put("trace.id", "abc123def456abc1"); - MDC.put("span.id", "1234567890abcdef"); - final AtomicReference captured = new AtomicReference<>(); - final Future future = this.pool.submit( - MdcPropagation.withMdc(() -> { - captured.set(MDC.get("span.id")); - return MDC.get("trace.id"); - }) - ); - MatcherAssert.assertThat(future.get(), Matchers.equalTo("abc123def456abc1")); - MatcherAssert.assertThat(captured.get(), Matchers.equalTo("1234567890abcdef")); - } - - @Test - void restoresPriorMdcAfterCallable() throws Exception { - MDC.put("trace.id", "caller-trace"); - final AtomicReference before = new AtomicReference<>(); - final AtomicReference after = new AtomicReference<>(); - final Future future = this.pool.submit(() -> { - MDC.put("trace.id", "worker-prior"); - before.set(MDC.get("trace.id")); - MdcPropagation.withMdc(() -> { - return MDC.get("trace.id"); - }).call(); - after.set(MDC.get("trace.id")); - return null; - }); - future.get(); - MatcherAssert.assertThat(before.get(), Matchers.equalTo("worker-prior")); - MatcherAssert.assertThat(after.get(), Matchers.equalTo("worker-prior")); - } - - @Test - void handlesNullCapturedMdc() throws Exception { - MDC.clear(); - final Future future = this.pool.submit( - MdcPropagation.withMdc(() -> MDC.get("trace.id")) - ); - MatcherAssert.assertThat(future.get(), Matchers.nullValue()); - } - - @Test - void runnableVariantPropagatesMdc() throws Exception { - MDC.put("trace.id", "runnable-trace"); - final AtomicReference captured = new AtomicReference<>(); - final Future future = this.pool.submit( - MdcPropagation.withMdc(() -> captured.set(MDC.get("trace.id"))) - ); - future.get(); - MatcherAssert.assertThat(captured.get(), Matchers.equalTo("runnable-trace")); - } - - @Test - void capturedMdcRestoredInRunWith() { - MDC.put("trace.id", "test-abc"); - final Map snapshot = MdcPropagation.capture(); - MDC.clear(); - - final AtomicReference seen = new AtomicReference<>(); - MdcPropagation.runWith( - snapshot, () -> seen.set(MDC.get("trace.id")) - ); - - MatcherAssert.assertThat(seen.get(), Matchers.equalTo("test-abc")); - // prior state restored (was empty after clear) - MatcherAssert.assertThat(MDC.get("trace.id"), Matchers.nullValue()); - } - - @Test - void captureReturnsNullOnEmptyMdc() { - MDC.clear(); - MatcherAssert.assertThat(MdcPropagation.capture(), Matchers.nullValue()); - } - - @Test - void runWithNullSnapshotIsNoOpForMdc() { - MDC.put("trace.id", "prior"); - final AtomicReference seen = new AtomicReference<>(); - MdcPropagation.runWith(null, () -> seen.set(MDC.get("trace.id"))); - MatcherAssert.assertThat(seen.get(), Matchers.equalTo("prior")); - MatcherAssert.assertThat(MDC.get("trace.id"), Matchers.equalTo("prior")); - } - - @Test - void runWithRestoresPriorMdcAfterException() { - MDC.put("trace.id", "prior"); - final Map snap = Map.of("trace.id", "snap"); - final AtomicReference inside = new AtomicReference<>(); - try { - MdcPropagation.runWith(snap, () -> { - inside.set(MDC.get("trace.id")); - throw new IllegalStateException("boom"); - }); - } catch (final IllegalStateException ignore) { - // expected - } - MatcherAssert.assertThat(inside.get(), Matchers.equalTo("snap")); - // prior restored even though action threw - MatcherAssert.assertThat(MDC.get("trace.id"), Matchers.equalTo("prior")); - } - - @Test - void consumerVariantPropagatesMdc() throws Exception { - MDC.put("trace.id", "consumer-trace"); - final AtomicReference seen = new AtomicReference<>(); - final Consumer wrapped = MdcPropagation.withMdcConsumer( - arg -> seen.set(MDC.get("trace.id") + ":" + arg) - ); - MDC.clear(); - // Run on worker thread so there is no MDC to start with - final Future future = this.pool.submit(() -> wrapped.accept("x")); - future.get(); - MatcherAssert.assertThat(seen.get(), Matchers.equalTo("consumer-trace:x")); - } -} diff --git a/pantera-main/Dockerfile b/pantera-main/Dockerfile index a2fb6f7df..e3b5c44e1 100644 --- a/pantera-main/Dockerfile +++ b/pantera-main/Dockerfile @@ -27,7 +27,7 @@ RUN addgroup -g 2020 -S pantera && \ chown pantera:pantera /opt/apm/elastic-apm-agent.jar ENV TMPDIR=/var/pantera/cache/tmp -ENV PANTERA_VERSION=2.1.3 +ENV PANTERA_VERSION=2.2.0 USER 2021:2020 diff --git a/pantera-main/docker-compose/.env.example b/pantera-main/docker-compose/.env.example index 2343d3c7f..bd92efe04 100644 --- a/pantera-main/docker-compose/.env.example +++ b/pantera-main/docker-compose/.env.example @@ -6,8 +6,8 @@ # ----------------------------------------------------------------------------- # PANTERA Configuration # ----------------------------------------------------------------------------- -PANTERA_VERSION=2.1.3 -PANTERA_UI_VERSION=2.1.3 +PANTERA_VERSION=2.2.0 +PANTERA_UI_VERSION=2.2.0 PANTERA_USER_NAME=PANTERA PANTERA_USER_PASS=changeme PANTERA_CONFIG=/etc/PANTERA/PANTERA.yml diff --git a/pantera-main/pom.xml b/pantera-main/pom.xml index 96a5e6fd9..0913424b4 100644 --- a/pantera-main/pom.xml +++ b/pantera-main/pom.xml @@ -26,7 +26,7 @@ SOFTWARE. pantera com.auto1.pantera - 2.1.3 + 2.2.0 4.0.0 pantera-main @@ -41,13 +41,13 @@ SOFTWARE. com.auto1.pantera pantera-core - 2.1.3 + 2.2.0 compile com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 @@ -59,12 +59,12 @@ SOFTWARE. com.auto1.pantera pantera-storage-s3 - 2.1.3 + 2.2.0 com.auto1.pantera pantera-storage-vertx-file - 2.1.3 + 2.2.0 @@ -103,13 +103,13 @@ SOFTWARE. com.auto1.pantera vertx-server - 2.1.3 + 2.2.0 compile com.auto1.pantera http-client - 2.1.3 + 2.2.0 compile @@ -263,25 +263,25 @@ SOFTWARE. com.auto1.pantera files-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera npm-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera hexpm-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera maven-adapter - 2.1.3 + 2.2.0 compile @@ -294,67 +294,67 @@ SOFTWARE. com.auto1.pantera rpm-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera gem-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera composer-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera go-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera nuget-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera pypi-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera helm-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera docker-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera debian-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera conda-adapter - 2.1.3 + 2.2.0 compile com.auto1.pantera conan-adapter - 2.1.3 + 2.2.0 compile diff --git a/pantera-main/src/main/java/com/auto1/pantera/RepositorySlices.java b/pantera-main/src/main/java/com/auto1/pantera/RepositorySlices.java index 6580aa77c..cfb9406de 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/RepositorySlices.java +++ b/pantera-main/src/main/java/com/auto1/pantera/RepositorySlices.java @@ -35,7 +35,7 @@ import com.auto1.pantera.docker.asto.RegistryRoot; import com.auto1.pantera.docker.http.DockerSlice; import com.auto1.pantera.docker.http.TrimmedDocker; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.cooldown.CooldownSupport; import com.auto1.pantera.files.FilesSlice; import com.auto1.pantera.gem.http.GemSlice; @@ -49,7 +49,7 @@ import com.auto1.pantera.http.log.EcsLogger; import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.TimeoutSlice; -import com.auto1.pantera.group.GroupSlice; +import com.auto1.pantera.group.GroupResolver; import com.auto1.pantera.index.ArtifactIndex; import com.auto1.pantera.http.auth.Authentication; import com.auto1.pantera.http.auth.BasicAuthScheme; @@ -174,13 +174,19 @@ public class RepositorySlices { private final SharedJettyClients sharedClients; /** - * Negative cache configuration for group fanout 404s. - *

      Loaded once from {@code meta.caches.group-negative} in pantera.yml; falls - * back to a 5 min TTL / 10K entry in-memory default when absent. Each - * {@code *-group} repo receives a dedicated {@link NegativeCache} built from - * this config so key-prefixing isolates entries per group. + * Negative cache configuration loaded from YAML. + *

      Read from {@code meta.caches.repo-negative} first; falls back to the + * legacy {@code meta.caches.group-negative} key with a deprecation WARN. + * When neither key is present, uses historical defaults (5 min / 10K). */ - private final NegativeCacheConfig groupNegativeCacheConfig; + private final NegativeCacheConfig negativeCacheConfig; + + /** + * Single shared NegativeCache instance for the entire JVM. + * All group, proxy, and hosted scopes share this bean. Keyed by + * {@link com.auto1.pantera.http.cache.NegativeCacheKey}. + */ + private final NegativeCache sharedNegativeCache; /** * Shared circuit-breaker registries keyed by physical repo name. @@ -192,6 +198,14 @@ public class RepositorySlices { private final ConcurrentMap memberRegistries = new ConcurrentHashMap<>(); + /** + * Per-repo bulkheads keyed by repository name. + * Each group repository gets exactly one {@link com.auto1.pantera.http.resilience.RepoBulkhead} + * at first access. Saturation in one repo cannot starve another (WI-09). + */ + private final ConcurrentMap repoBulkheads = + new ConcurrentHashMap<>(); + /** * @param settings Pantera settings * @param repos Repositories @@ -216,12 +230,12 @@ public RepositorySlices( } } this.sharedClients = new SharedJettyClients(); - // Load group-negative cache config once at construction time. When the - // sub-key is absent from pantera.yml, fromYaml returns the default - // single-tier config (24h TTL / 50K entries) which we override below to - // preserve the pre-YAML group-slice defaults (5m / 10K) unless the - // operator explicitly opts in. - this.groupNegativeCacheConfig = loadGroupNegativeCacheConfig(settings); + // Load negative cache config once at construction time. + // Reads repo-negative first; falls back to group-negative with deprecation WARN. + this.negativeCacheConfig = loadNegativeCacheConfig(settings); + this.sharedNegativeCache = new NegativeCache(this.negativeCacheConfig); + com.auto1.pantera.http.cache.NegativeCacheRegistry.instance() + .setSharedCache(this.sharedNegativeCache); this.slices = CacheBuilder.newBuilder() .maximumSize(500) .expireAfterAccess(30, java.util.concurrent.TimeUnit.MINUTES) @@ -299,8 +313,11 @@ public Slice slice(final Key name, final int port, final int depth) { .log(); return resolved.get().slice(); } - // Not found is NOT cached to allow dynamic repo addition without restart - EcsLogger.warn("com.auto1.pantera.settings") + // Not found is NOT cached to allow dynamic repo addition without restart. + // Logged at INFO (v2.1.4 WI-00): this is a client-config error, not a + // Pantera failure β€” clients misconfigured with stale repo names produce + // a steady stream that was previously drowning WARN output (Β§1.7 F2.2). + EcsLogger.info("com.auto1.pantera.settings") .message("Repository not found in configuration") .eventCategory("web") .eventAction("slice_resolve") @@ -662,15 +679,16 @@ private SliceValue sliceFromConfig(final RepoConfig cfg, final int port, final i break; case "npm-group": final List npmFlatMembers = flattenMembers(cfg.name(), cfg.members()); - final Slice npmGroupSlice = new GroupSlice( + final Slice npmGroupSlice = new GroupResolver( this::slice, cfg.name(), npmFlatMembers, port, depth, cfg.groupMemberTimeout().orElse(120L), java.util.Collections.emptyList(), Optional.of(this.settings.artifactIndex()), proxyMembers(npmFlatMembers), "npm-group", - newGroupNegativeCache(cfg.name()), - this::getOrCreateMemberRegistry + this.sharedNegativeCache, + this::getOrCreateMemberRegistry, + getOrCreateBulkhead(cfg.name()).drainExecutor() ); // Create audit slice that aggregates results from ALL members // This is critical for vulnerability scanning - local repos return {}, @@ -728,15 +746,16 @@ private SliceValue sliceFromConfig(final RepoConfig cfg, final int port, final i case "file-group": case "php-group": final List composerFlatMembers = flattenMembers(cfg.name(), cfg.members()); - final GroupSlice composerDelegate = new GroupSlice( + final GroupResolver composerDelegate = new GroupResolver( this::slice, cfg.name(), composerFlatMembers, port, depth, cfg.groupMemberTimeout().orElse(120L), java.util.Collections.emptyList(), Optional.of(this.settings.artifactIndex()), proxyMembers(composerFlatMembers), cfg.type(), - newGroupNegativeCache(cfg.name()), - this::getOrCreateMemberRegistry + this.sharedNegativeCache, + this::getOrCreateMemberRegistry, + getOrCreateBulkhead(cfg.name()).drainExecutor() ); slice = trimPathSlice( new CombinedAuthzSliceWrap( @@ -758,15 +777,16 @@ private SliceValue sliceFromConfig(final RepoConfig cfg, final int port, final i case "maven-group": // Maven groups need special metadata merging final List mavenFlatMembers = flattenMembers(cfg.name(), cfg.members()); - final GroupSlice mavenDelegate = new GroupSlice( + final GroupResolver mavenDelegate = new GroupResolver( this::slice, cfg.name(), mavenFlatMembers, port, depth, cfg.groupMemberTimeout().orElse(120L), java.util.Collections.emptyList(), Optional.of(this.settings.artifactIndex()), proxyMembers(mavenFlatMembers), "maven-group", - newGroupNegativeCache(cfg.name()), - this::getOrCreateMemberRegistry + this.sharedNegativeCache, + this::getOrCreateMemberRegistry, + getOrCreateBulkhead(cfg.name()).drainExecutor() ); slice = trimPathSlice( new CombinedAuthzSliceWrap( @@ -795,15 +815,16 @@ private SliceValue sliceFromConfig(final RepoConfig cfg, final int port, final i final List genericFlatMembers = flattenMembers(cfg.name(), cfg.members()); slice = trimPathSlice( new CombinedAuthzSliceWrap( - new GroupSlice( + new GroupResolver( this::slice, cfg.name(), genericFlatMembers, port, depth, cfg.groupMemberTimeout().orElse(120L), java.util.Collections.emptyList(), Optional.of(this.settings.artifactIndex()), proxyMembers(genericFlatMembers), cfg.type(), - newGroupNegativeCache(cfg.name()), - this::getOrCreateMemberRegistry + this.sharedNegativeCache, + this::getOrCreateMemberRegistry, + getOrCreateBulkhead(cfg.name()).drainExecutor() ), authentication(), tokens.auth(), @@ -1036,50 +1057,45 @@ private boolean isProxyOrContainsProxy(final String name) { /** - * Load negative cache config for group fanout 404s. + * Load negative cache config from YAML. * - *

      Reads {@code meta.caches.group-negative} via {@link NegativeCacheConfig#fromYaml}. - * When the sub-key is absent the helper returns the package defaults (24h / - * 50K); we substitute the historical GroupSlice values (5 min / 10K / - * L1-only) so upgrades without YAML changes preserve prior behaviour. + *

      Reads {@code meta.caches.repo-negative} first (the v2.2 canonical key). + * If absent, falls back to the legacy {@code meta.caches.group-negative} key + * and emits a deprecation WARN. When neither key is present, returns the + * historical defaults (5 min / 10K / in-memory only) to preserve backwards + * compatibility. * * @param settings Pantera settings - * @return Group-specific negative cache config + * @return Unified negative cache config */ - private static NegativeCacheConfig loadGroupNegativeCacheConfig(final Settings settings) { + private static NegativeCacheConfig loadNegativeCacheConfig(final Settings settings) { final com.amihaiemil.eoyaml.YamlMapping caches = settings != null && settings.meta() != null ? settings.meta().yamlMapping("caches") : null; - final boolean hasGroupNegative = caches != null - && caches.yamlMapping("group-negative") != null; - if (!hasGroupNegative) { - // Preserve pre-YAML defaults: 5 min TTL, 10K entries, in-memory only - return new NegativeCacheConfig( - java.time.Duration.ofMinutes(5), - 10_000, - false, - NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, - NegativeCacheConfig.DEFAULT_L1_TTL, - NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, - NegativeCacheConfig.DEFAULT_L2_TTL - ); + // Try the new canonical key first + if (caches != null && caches.yamlMapping("repo-negative") != null) { + return NegativeCacheConfig.fromYaml(caches, "repo-negative"); } - return NegativeCacheConfig.fromYaml(caches, "group-negative"); - } - - /** - * Construct a per-group {@link NegativeCache} backed by the shared config. - * The group name is used as the cache-key prefix so entries for different - * groups cannot collide in either L1 or L2. - * - * @param groupName Group repository name - * @return Negative cache scoped to this group - */ - private NegativeCache newGroupNegativeCache(final String groupName) { - return new NegativeCache( - "group-negative", - groupName, - this.groupNegativeCacheConfig + // Fall back to legacy key with deprecation WARN + if (caches != null && caches.yamlMapping("group-negative") != null) { + EcsLogger.warn("com.auto1.pantera.settings") + .message("YAML key 'meta.caches.group-negative' is deprecated; " + + "rename to 'meta.caches.repo-negative' — legacy key will be " + + "removed in a future release") + .eventCategory("configuration") + .eventAction("yaml_deprecation") + .log(); + return NegativeCacheConfig.fromYaml(caches, "group-negative"); + } + // Neither key present — preserve pre-YAML defaults + return new NegativeCacheConfig( + java.time.Duration.ofMinutes(5), + 10_000, + false, + NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L1_TTL, + NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L2_TTL ); } @@ -1128,6 +1144,33 @@ private AutoBlockRegistry getOrCreateMemberRegistry(final String memberName) { ); } + /** + * Get or create a per-repo {@link com.auto1.pantera.http.resilience.RepoBulkhead} + * for the given group repository name (WI-09). + * + * @param repoName Group repository name + * @return Per-repo bulkhead (created on first access with default limits) + */ + private com.auto1.pantera.http.resilience.RepoBulkhead getOrCreateBulkhead(final String repoName) { + return this.repoBulkheads.computeIfAbsent( + repoName, + n -> { + final com.auto1.pantera.http.resilience.BulkheadLimits limits = + com.auto1.pantera.http.resilience.BulkheadLimits.defaults(); + EcsLogger.info("com.auto1.pantera") + .message("Per-repo bulkhead created for: " + n + + " (maxConcurrent=" + limits.maxConcurrent() + + ", maxQueueDepth=" + limits.maxQueueDepth() + ")") + .eventCategory("configuration") + .eventAction("bulkhead_init") + .log(); + return new com.auto1.pantera.http.resilience.RepoBulkhead( + n, limits, java.util.concurrent.ForkJoinPool.commonPool() + ); + } + ); + } + /** * Slice's cache key. */ diff --git a/pantera-main/src/main/java/com/auto1/pantera/VertxMain.java b/pantera-main/src/main/java/com/auto1/pantera/VertxMain.java index 3377ad6dc..d6f23a79e 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/VertxMain.java +++ b/pantera-main/src/main/java/com/auto1/pantera/VertxMain.java @@ -441,7 +441,7 @@ public int start(final int apiPort) throws IOException { Thread.sleep(2000); // wait for server to fully bind final java.net.http.HttpClient hc = java.net.http.HttpClient.newBuilder() .connectTimeout(java.time.Duration.ofSeconds(3)).build(); - // Hit each group repo once to JIT-compile GroupSlice + index lookup + // Hit each group repo once to JIT-compile GroupResolver + index lookup for (final com.auto1.pantera.settings.repo.RepoConfig cfg : repos.configs()) { if (cfg.type().endsWith("-group")) { try { @@ -1041,12 +1041,12 @@ public DistributionStatisticConfig configure( // Initialize MicrometerMetrics with the registry com.auto1.pantera.metrics.MicrometerMetrics.initialize(registry); - // Initialize GroupSliceMetrics so the drain-drop counter + // Initialize GroupResolverMetrics so the drain-drop counter // (pantera.group.drain.dropped) registers with Prometheus. Without - // this call, GroupSliceMetrics.instance() returns null and the + // this call, GroupResolverMetrics.instance() returns null and the // counter is never emitted — operators fly blind on drain pool // saturation even though the code-level counter increments. - com.auto1.pantera.metrics.GroupSliceMetrics.initialize(registry); + com.auto1.pantera.metrics.GroupResolverMetrics.initialize(registry); // Initialize storage metrics recorder com.auto1.pantera.metrics.StorageMetricsRecorder.initialize(); diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxy.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxy.java index 118bbf152..6d06932d1 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxy.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxy.java @@ -16,7 +16,7 @@ import com.auto1.pantera.docker.http.DockerSlice; import com.auto1.pantera.docker.http.TrimmedDocker; import com.auto1.pantera.docker.proxy.ProxyDocker; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.auth.CombinedAuthScheme; import com.auto1.pantera.http.DockerRoutingSlice; import com.auto1.pantera.http.Headers; @@ -126,7 +126,7 @@ private static Slice createProxy( ) { final DockerProxyCooldownInspector inspector = new DockerProxyCooldownInspector(); // Register inspector globally so unblock can invalidate its cache - com.auto1.pantera.cooldown.InspectorRegistry.instance() + com.auto1.pantera.cooldown.config.InspectorRegistry.instance() .register("docker", cfg.name(), inspector); final Docker proxies = new MultiReadDocker( cfg.remotes().stream().map(r -> proxy(client, cfg, events, r, inspector)) diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxyCooldownSlice.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxyCooldownSlice.java index 565c1dbbc..bca048fe2 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxyCooldownSlice.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/docker/DockerProxyCooldownSlice.java @@ -5,9 +5,9 @@ package com.auto1.pantera.adapters.docker; import com.auto1.pantera.asto.Content; -import com.auto1.pantera.cooldown.CooldownRequest; -import com.auto1.pantera.cooldown.CooldownResponses; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.response.CooldownResponseRegistry; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.docker.Digest; import com.auto1.pantera.docker.Docker; import com.auto1.pantera.docker.cache.DockerProxyCooldownInspector; @@ -131,7 +131,9 @@ public CompletableFuture response( ); return this.cooldown.evaluate(cooldownRequest, this.inspector) .thenApply(result -> result.blocked() - ? CooldownResponses.forbidden(result.block().orElseThrow()) + ? CooldownResponseRegistry.instance() + .getOrThrow(this.repoType) + .forbidden(result.block().orElseThrow()) : rebuilt ); } @@ -146,7 +148,9 @@ public CompletableFuture response( ); return this.cooldown.evaluate(cooldownRequest, this.inspector) .thenApply(result -> result.blocked() - ? CooldownResponses.forbidden(result.block().orElseThrow()) + ? CooldownResponseRegistry.instance() + .getOrThrow(this.repoType) + .forbidden(result.block().orElseThrow()) : rebuilt ); } @@ -164,7 +168,9 @@ public CompletableFuture response( ); return this.cooldown.evaluate(cooldownRequest, this.inspector) .thenApply(result -> result.blocked() - ? CooldownResponses.forbidden(result.block().orElseThrow()) + ? CooldownResponseRegistry.instance() + .getOrThrow(this.repoType) + .forbidden(result.block().orElseThrow()) : rebuilt ); }); diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/file/FileProxy.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/file/FileProxy.java index 13f4cd06f..7dc343d0d 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/file/FileProxy.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/file/FileProxy.java @@ -14,7 +14,7 @@ import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.cache.Cache; import com.auto1.pantera.asto.cache.FromStorageCache; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.files.FileProxySlice; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; @@ -53,7 +53,7 @@ public FileProxy( ) { final Optional asto = cfg.storageOpt(); - // Support multiple remotes with GroupSlice (like maven-proxy) + // Support multiple remotes with GroupResolver (like maven-proxy) // Each remote gets its own FileProxySlice, evaluated in priority order this.slice = new RaceSlice( cfg.remotes().stream().map( diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/go/GoProxy.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/go/GoProxy.java index 4ef34956a..81ba807d5 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/go/GoProxy.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/go/GoProxy.java @@ -14,7 +14,7 @@ import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.cache.Cache; import com.auto1.pantera.asto.cache.FromStorageCache; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.GoProxySlice; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; @@ -60,7 +60,7 @@ public GoProxy( ) { final Optional asto = cfg.storageOpt(); - // Support multiple remotes with GroupSlice (like maven-proxy) + // Support multiple remotes with GroupResolver (like maven-proxy) // Each remote gets its own GoProxySlice, evaluated in priority order this.slice = new RaceSlice( cfg.remotes().stream().map( diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/maven/MavenProxy.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/maven/MavenProxy.java index 20b5f43d0..ea15142a3 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/maven/MavenProxy.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/maven/MavenProxy.java @@ -19,7 +19,7 @@ import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.client.ClientSlices; import com.auto1.pantera.http.client.auth.GenericAuthenticator; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.group.RaceSlice; import com.auto1.pantera.http.rq.RequestLine; import com.auto1.pantera.maven.http.MavenProxySlice; diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/npm/NpmProxyAdapter.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/npm/NpmProxyAdapter.java index 4fd1c4011..5fbc46240 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/npm/NpmProxyAdapter.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/npm/NpmProxyAdapter.java @@ -12,7 +12,7 @@ import com.auto1.pantera.asto.Content; import com.auto1.pantera.asto.Storage; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.cooldown.metadata.CooldownMetadataService; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; @@ -67,7 +67,7 @@ public NpmProxyAdapter( final Optional asto = cfg.storageOpt(); final Optional baseUrl = Optional.of(cfg.url()); - // Support multiple remotes with GroupSlice (similar to maven-proxy). + // Support multiple remotes with GroupResolver (similar to maven-proxy). // Each remote gets its own NpmProxy + NpmProxySlice, evaluated in // priority order. this.slice = new RaceSlice( diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerGroupSlice.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerGroupSlice.java index 8bb0c9a64..3e4cd40fb 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerGroupSlice.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerGroupSlice.java @@ -44,7 +44,7 @@ public final class ComposerGroupSlice implements Slice { /** * Delegate group slice for non-packages.json requests. - * Uses the standard GroupSlice with artifact index, proxy awareness, + * Uses the standard GroupResolver with artifact index, proxy awareness, * circuit breaker, and error handling. */ private final Slice delegate; @@ -79,7 +79,7 @@ public final class ComposerGroupSlice implements Slice { /** * Constructor with delegate slice for standard group behavior. * - * @param delegate Delegate group slice (GroupSlice with index/proxy support) + * @param delegate Delegate group slice (GroupResolver with index/proxy support) * @param resolver Slice resolver * @param group Group repository name * @param members List of member repository names @@ -126,13 +126,13 @@ public CompletableFuture response( // For p2 metadata requests, try each member directly. // The artifact index cannot match p2 paths (it stores package names, - // not filesystem paths), so the delegate GroupSlice would skip local + // not filesystem paths), so the delegate GroupResolver would skip local // members and return 404. if (path.contains("/p2/")) { return tryMembersForP2(line, headers, body); } - // For other requests (tarballs, artifacts), delegate to GroupSlice + // For other requests (tarballs, artifacts), delegate to GroupResolver // which has artifact index, proxy awareness, circuit breaker, and error handling return this.delegate.response(line, headers, body); } diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerProxy.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerProxy.java index 4d9e250df..01d62cd71 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerProxy.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/php/ComposerProxy.java @@ -42,7 +42,7 @@ public final class ComposerProxy implements Slice { * @param cfg Repository configuration */ public ComposerProxy(ClientSlices client, RepoConfig cfg) { - this(client, cfg, Optional.empty(), com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE); + this(client, cfg, Optional.empty(), com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE); } /** @@ -56,12 +56,12 @@ public ComposerProxy( ClientSlices client, RepoConfig cfg, Optional> events, - com.auto1.pantera.cooldown.CooldownService cooldown + com.auto1.pantera.cooldown.api.CooldownService cooldown ) { final Optional asto = cfg.storageOpt(); final String baseUrl = cfg.url().toString(); - // Support multiple remotes with GroupSlice (like maven-proxy) + // Support multiple remotes with GroupResolver (like maven-proxy) // Each remote gets its own ComposerProxySlice, evaluated in priority order this.slice = new RaceSlice( cfg.remotes().stream().map( diff --git a/pantera-main/src/main/java/com/auto1/pantera/adapters/pypi/PypiProxy.java b/pantera-main/src/main/java/com/auto1/pantera/adapters/pypi/PypiProxy.java index be27d0f92..db4541205 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/adapters/pypi/PypiProxy.java +++ b/pantera-main/src/main/java/com/auto1/pantera/adapters/pypi/PypiProxy.java @@ -12,7 +12,7 @@ import com.auto1.pantera.asto.Content; import com.auto1.pantera.asto.Storage; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; import com.auto1.pantera.http.Slice; @@ -55,7 +55,7 @@ public PypiProxy( () -> new IllegalStateException("PyPI proxy requires storage to be set") ); - // Support multiple remotes with GroupSlice (like maven-proxy) + // Support multiple remotes with GroupResolver (like maven-proxy) // Each remote gets its own PyProxySlice, evaluated in priority order this.slice = new RaceSlice( cfg.remotes().stream().map( diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/AdminAuthHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/AdminAuthHandler.java index 32fbb47f4..f9ecf4869 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/AdminAuthHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/AdminAuthHandler.java @@ -15,13 +15,14 @@ import com.auto1.pantera.auth.RevocationBlocklist; import com.auto1.pantera.db.dao.AuthSettingsDao; import com.auto1.pantera.db.dao.UserTokenDao; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; import com.auto1.pantera.security.policy.Policy; import io.vertx.core.json.JsonObject; import io.vertx.ext.web.Router; import io.vertx.ext.web.RoutingContext; import java.util.Map; +import java.util.concurrent.CompletableFuture; /** * Admin-only handler for auth settings management and user token revocation. @@ -98,24 +99,23 @@ public void register(final Router router) { * @param ctx Routing context */ private void getSettings(final RoutingContext ctx) { - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final Map all = this.settingsDao.getAll(); - final JsonObject result = new JsonObject(); - for (final Map.Entry entry : all.entrySet()) { - result.put(entry.getKey(), entry.getValue()); - } - return result; - }), - false - ).onSuccess( - settings -> ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(settings.encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.supplyAsync(() -> { + final Map all = this.settingsDao.getAll(); + final JsonObject result = new JsonObject(); + for (final Map.Entry entry : all.entrySet()) { + result.put(entry.getKey(), entry.getValue()); + } + return result; + }, HandlerExecutor.get()).whenComplete((settings, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(settings.encode()); + } + }); } /** @@ -146,26 +146,25 @@ private void updateSettings(final RoutingContext ctx) { return; } } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - for (final String key : body.fieldNames()) { - this.settingsDao.put(key, body.getValue(key).toString()); - } - return null; - }), - false - ).onSuccess(ignored -> { - EcsLogger.info("com.auto1.pantera.api.v1") - .message("Admin updated auth settings") - .eventCategory("iam") - .eventAction("auth_settings_update") - .eventOutcome("success") - .field("settings.keys", String.join(",", body.fieldNames())) - .log(); - ctx.response().setStatusCode(204).end(); - }).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.supplyAsync(() -> { + for (final String key : body.fieldNames()) { + this.settingsDao.put(key, body.getValue(key).toString()); + } + return null; + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + EcsLogger.info("com.auto1.pantera.api.v1") + .message("Admin updated auth settings") + .eventCategory("iam") + .eventAction("auth_settings_update") + .eventOutcome("success") + .field("settings.keys", String.join(",", body.fieldNames())) + .log(); + ctx.response().setStatusCode(204).end(); + } + }); } /** @@ -179,30 +178,32 @@ private void revokeUser(final RoutingContext ctx) { ApiResponse.sendError(ctx, 400, "BAD_REQUEST", "Username is required"); return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> this.tokenDao.revokeAllForUser(username)), - false - ).onSuccess(count -> { - if (this.blocklist != null) { - this.blocklist.revokeUser(username, REVOKE_USER_TTL_SECONDS); + CompletableFuture.supplyAsync( + () -> this.tokenDao.revokeAllForUser(username), + HandlerExecutor.get() + ).whenComplete((count, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + if (this.blocklist != null) { + this.blocklist.revokeUser(username, REVOKE_USER_TTL_SECONDS); + } + EcsLogger.info("com.auto1.pantera.api.v1") + .message("Admin revoked all tokens for user") + .eventCategory("iam") + .eventAction("user_revoke") + .eventOutcome("success") + .field("user.name", username) + .field("revoked_count", count) + .log(); + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(new JsonObject() + .put("username", username) + .put("revoked_count", count) + .encode()); } - EcsLogger.info("com.auto1.pantera.api.v1") - .message("Admin revoked all tokens for user") - .eventCategory("iam") - .eventAction("user_revoke") - .eventOutcome("success") - .field("user.name", username) - .field("revoked_count", count) - .log(); - ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(new JsonObject() - .put("username", username) - .put("revoked_count", count) - .encode()); - }).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } } diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/ArtifactHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/ArtifactHandler.java index 20019f9a6..4b5bd1614 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/ArtifactHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/ArtifactHandler.java @@ -15,7 +15,7 @@ import com.auto1.pantera.api.perms.ApiRepositoryPermission; import com.auto1.pantera.asto.Key; import com.auto1.pantera.asto.Meta; -import com.auto1.pantera.http.trace.MdcPropagation; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.security.policy.Policy; import com.auto1.pantera.settings.RepoData; import com.auto1.pantera.settings.repo.CrudRepoSettings; @@ -26,6 +26,7 @@ import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.util.Base64; +import java.util.concurrent.CompletableFuture; import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; import javax.json.Json; @@ -415,23 +416,35 @@ private void downloadHandler(final RoutingContext ctx) { return asto.value(artifactKey); }) ) - .thenAccept(content -> - io.reactivex.Flowable.fromPublisher(content) - .map(buf -> { - final byte[] arr = new byte[buf.remaining()]; - buf.get(arr); - return io.vertx.core.buffer.Buffer.buffer(arr); - }) - .subscribe( - chunk -> ctx.response().write(chunk), - err -> { - if (!ctx.response().ended()) { - ctx.response().end(); - } - }, - () -> ctx.response().end() - ) - ) + .thenAccept(content -> { + // Capture the Disposable so a client disconnect (closeHandler) or + // response error (exceptionHandler) can cancel the upstream stream + // and free any file channels / temp files held by downstream tees. + final io.reactivex.disposables.Disposable disposable = + io.reactivex.Flowable.fromPublisher(content) + .map(buf -> io.vertx.core.buffer.Buffer.buffer( + io.netty.buffer.Unpooled.wrappedBuffer(buf) + )) + .subscribe( + chunk -> ctx.response().write(chunk), + err -> { + if (!ctx.response().ended()) { + ctx.response().end(); + } + }, + () -> ctx.response().end() + ); + ctx.response().closeHandler(v -> { + if (!disposable.isDisposed()) { + disposable.dispose(); + } + }); + ctx.response().exceptionHandler(err -> { + if (!disposable.isDisposed()) { + disposable.dispose(); + } + }); + }) .exceptionally(err -> { if (!ctx.response().headWritten()) { ApiResponse.sendError(ctx, 404, "NOT_FOUND", @@ -542,23 +555,35 @@ private void downloadDirectHandler(final RoutingContext ctx) { return asto.value(artifactKey); }) ) - .thenAccept(content -> - io.reactivex.Flowable.fromPublisher(content) - .map(buf -> { - final byte[] arr = new byte[buf.remaining()]; - buf.get(arr); - return io.vertx.core.buffer.Buffer.buffer(arr); - }) - .subscribe( - chunk -> ctx.response().write(chunk), - err -> { - if (!ctx.response().ended()) { - ctx.response().end(); - } - }, - () -> ctx.response().end() - ) - ) + .thenAccept(content -> { + // Capture the Disposable so a client disconnect (closeHandler) or + // response error (exceptionHandler) can cancel the upstream stream + // and free any file channels / temp files held by downstream tees. + final io.reactivex.disposables.Disposable disposable = + io.reactivex.Flowable.fromPublisher(content) + .map(buf -> io.vertx.core.buffer.Buffer.buffer( + io.netty.buffer.Unpooled.wrappedBuffer(buf) + )) + .subscribe( + chunk -> ctx.response().write(chunk), + err -> { + if (!ctx.response().ended()) { + ctx.response().end(); + } + }, + () -> ctx.response().end() + ); + ctx.response().closeHandler(v -> { + if (!disposable.isDisposed()) { + disposable.dispose(); + } + }); + ctx.response().exceptionHandler(err -> { + if (!disposable.isDisposed()) { + disposable.dispose(); + } + }); + }) .exceptionally(err -> { if (!ctx.response().headWritten()) { ApiResponse.sendError(ctx, 404, "NOT_FOUND", @@ -580,47 +605,44 @@ private void pullInstructionsHandler(final RoutingContext ctx) { } final String name = ctx.pathParam("name"); final RepositoryName rname = new RepositoryName.Simple(name); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (!this.crs.exists(rname)) { - return null; - } - final JsonStructure config = this.crs.value(rname); - if (config == null) { - return null; - } - if (config instanceof javax.json.JsonObject) { - final javax.json.JsonObject jobj = (javax.json.JsonObject) config; - final javax.json.JsonObject repo = jobj.containsKey("repo") - ? jobj.getJsonObject("repo") : jobj; - return repo.getString("type", "unknown"); - } - return "unknown"; - }), - false - ).onSuccess( - repoType -> { - if (repoType == null) { - ApiResponse.sendError( - ctx, 404, "NOT_FOUND", - String.format("Repository '%s' not found", name) - ); - return; - } - final JsonArray instructions = buildPullInstructions(repoType, name, path); - ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end( - new JsonObject() - .put("type", repoType) - .put("instructions", instructions) - .encode() - ); + CompletableFuture.supplyAsync(() -> { + if (!this.crs.exists(rname)) { + return null; } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + final JsonStructure config = this.crs.value(rname); + if (config == null) { + return null; + } + if (config instanceof javax.json.JsonObject) { + final javax.json.JsonObject jobj = (javax.json.JsonObject) config; + final javax.json.JsonObject repo = jobj.containsKey("repo") + ? jobj.getJsonObject("repo") : jobj; + return repo.getString("type", "unknown"); + } + return "unknown"; + }, HandlerExecutor.get()).whenComplete((repoType, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + return; + } + if (repoType == null) { + ApiResponse.sendError( + ctx, 404, "NOT_FOUND", + String.format("Repository '%s' not found", name) + ); + return; + } + final JsonArray instructions = buildPullInstructions(repoType, name, path); + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end( + new JsonObject() + .put("type", repoType) + .put("instructions", instructions) + .encode() + ); + }); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/AsyncApiVerticle.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/AsyncApiVerticle.java index c9e4e7eba..20366ba7d 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/AsyncApiVerticle.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/AsyncApiVerticle.java @@ -17,7 +17,8 @@ import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.blocking.BlockingStorage; import com.auto1.pantera.auth.JwtTokens; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; import com.auto1.pantera.cooldown.CooldownSupport; import com.auto1.pantera.cooldown.metadata.CooldownMetadataService; import com.auto1.pantera.db.dao.AuthProviderDao; @@ -39,7 +40,6 @@ import com.auto1.pantera.settings.users.CrudRoles; import com.auto1.pantera.settings.users.CrudUsers; import io.vertx.core.AbstractVerticle; -import io.vertx.core.WorkerExecutor; import io.vertx.core.http.HttpServer; import io.vertx.core.http.HttpServerOptions; import io.vertx.core.json.JsonObject; @@ -206,14 +206,6 @@ public int actualPort() { @Override public void start() { final Router router = Router.router(this.vertx); - // Create named worker pool for blocking DAO calls - final WorkerExecutor apiWorkers = - this.vertx.createSharedWorkerExecutor("api-workers"); - // Store in routing context for handlers to use - router.route("/api/v1/*").handler(ctx -> { - ctx.put("apiWorkers", apiWorkers); - ctx.next(); - }); // Body handler for all API routes (1MB limit) router.route("/api/v1/*").handler(BodyHandler.create().setBodyLimit(1_048_576)); // Trace context + client.ip MDC setup for all API requests. @@ -431,8 +423,10 @@ public void start() { final com.auto1.pantera.db.dao.UserTokenDao utDao = this.dataSource != null ? new com.auto1.pantera.db.dao.UserTokenDao(this.dataSource) : null; - new UserHandler(users, this.caches, this.security, blocklist, utDao) - .register(router); + new UserHandler( + users, this.caches, this.security, blocklist, utDao, + this.settings.cachedLocalEnabledFilter().orElse(null) + ).register(router); } if (roles != null) { new RoleHandler( @@ -460,7 +454,9 @@ crs, new RepoData(this.configsStorage, this.caches.storagesCache()), this.security.policy() ).register(router); new CooldownHandler( - this.cooldown, this.cooldownMetadata, crs, this.settings.cooldown(), this.dataSource, + this.cooldown, this.cooldownMetadata, + CooldownSupport.extractCache(this.cooldown), + crs, this.settings.cooldown(), this.dataSource, this.security.policy() ).register(router); new SearchHandler(this.artifactIndex, this.security.policy()).register(router); @@ -475,6 +471,9 @@ crs, new RepoData(this.configsStorage, this.caches.storagesCache()) this.security.policy() ).register(router); } + new com.auto1.pantera.api.v1.admin.NegativeCacheAdminResource( + this.security.policy() + ).register(router); // Start server final HttpServer server; final String schema; diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/AuthHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/AuthHandler.java index 67fa87ee9..20c1571cf 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/AuthHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/AuthHandler.java @@ -25,8 +25,8 @@ import com.auto1.pantera.http.auth.AuthUser; import com.auto1.pantera.http.auth.Authentication; import com.auto1.pantera.http.auth.Tokens; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; import com.auto1.pantera.security.policy.Policy; import com.auto1.pantera.settings.users.CrudUsers; import io.vertx.core.json.JsonArray; @@ -48,6 +48,7 @@ import java.util.List; import java.util.Optional; import java.util.UUID; +import java.util.concurrent.CompletableFuture; import javax.json.Json; import javax.json.JsonString; import java.util.stream.Collectors; @@ -138,8 +139,8 @@ private void tokenEndpoint(final RoutingContext ctx) { final String name = body.getString("name"); final String pass = body.getString("pass"); final String mfa = body.getString("mfa_code"); - ctx.vertx().>executeBlocking( - MdcPropagation.withMdc(() -> { + CompletableFuture.supplyAsync( + (java.util.function.Supplier>) () -> { // Also set user.name in MDC so logs from inside the // auth chain (AuthFromDb, Keycloak, etc.) can reference // who is attempting to log in. @@ -152,31 +153,28 @@ private void tokenEndpoint(final RoutingContext ctx) { } finally { OktaAuthContext.clear(); } - }), - false - ).onComplete(ar -> { - if (ar.succeeded()) { - final Optional user = ar.result(); - if (user.isPresent()) { - final Tokens.TokenPair pair = this.tokens.generatePair(user.get()); - ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(new JsonObject() - .put("token", pair.accessToken()) - .put("refresh_token", pair.refreshToken()) - .put("expires_in", pair.expiresIn()) - .encode()); - } else { - // Generic message — never disclose whether the user - // exists, the password is wrong, or MFA failed. Detail - // is in the server logs from the auth chain. - ApiResponse.sendError(ctx, 401, "UNAUTHORIZED", - "Sign-in failed. Check your credentials and try again."); - } - } else { + }, + HandlerExecutor.get() + ).whenComplete((user, err) -> { + if (err != null) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", "Sign-in is temporarily unavailable. Please try again."); + } else if (user.isPresent()) { + final Tokens.TokenPair pair = this.tokens.generatePair(user.get()); + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(new JsonObject() + .put("token", pair.accessToken()) + .put("refresh_token", pair.refreshToken()) + .put("expires_in", pair.expiresIn()) + .encode()); + } else { + // Generic message — never disclose whether the user + // exists, the password is wrong, or MFA failed. Detail + // is in the server logs from the auth chain. + ApiResponse.sendError(ctx, 401, "UNAUTHORIZED", + "Sign-in failed. Check your credentials and try again."); } }); } @@ -230,51 +228,58 @@ private void redirectEndpoint(final RoutingContext ctx) { ApiResponse.sendError(ctx, 404, "NOT_FOUND", "No auth providers configured"); return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final javax.json.JsonObject provider = findProvider(name); - if (provider == null) { - return null; - } - final javax.json.JsonObject config = provider.getJsonObject("config"); - final String type = provider.getString("type", ""); - final String state = Long.toHexString( - Double.doubleToLongBits(Math.random()) - ) + Long.toHexString(System.nanoTime()); - final String authorizeUrl; - final String clientId; - final String scope; - if ("okta".equals(type)) { - final String issuer = config.getString("issuer", ""); - clientId = config.getString("client-id", ""); - scope = config.getString("scope", "openid profile"); - final String base = issuer.endsWith("/") - ? issuer.substring(0, issuer.length() - 1) : issuer; - final String oidcBase = base.contains("/oauth2") ? base : base + "/oauth2"; - authorizeUrl = oidcBase + "/v1/authorize"; - } else if ("keycloak".equals(type)) { - final String url = config.getString("url", ""); - final String realm = config.getString("realm", ""); - clientId = config.getString("client-id", ""); - scope = "openid profile"; - final String base = url.endsWith("/") - ? url.substring(0, url.length() - 1) : url; - authorizeUrl = base + "/realms/" + realm - + "/protocol/openid-connect/auth"; - } else { - return new JsonObject().put("error", "Unsupported provider type: " + type); - } - final String url = authorizeUrl - + "?client_id=" + enc(clientId) - + "&response_type=code" - + "&scope=" + enc(scope) - + "&redirect_uri=" + enc(callbackUrl) - + "&state=" + enc(state); - return new JsonObject().put("url", url).put("state", state); - }), - false - ).onSuccess(result -> { - if (result == null) { + CompletableFuture.supplyAsync((java.util.function.Supplier) () -> { + final javax.json.JsonObject provider = findProvider(name); + if (provider == null) { + return null; + } + final javax.json.JsonObject config = provider.getJsonObject("config"); + final String type = provider.getString("type", ""); + final String state = Long.toHexString( + Double.doubleToLongBits(Math.random()) + ) + Long.toHexString(System.nanoTime()); + final String authorizeUrl; + final String clientId; + final String scope; + if ("okta".equals(type)) { + final String issuer = config.getString("issuer", ""); + clientId = config.getString("client-id", ""); + scope = config.getString("scope", "openid profile"); + final String base = issuer.endsWith("/") + ? issuer.substring(0, issuer.length() - 1) : issuer; + final String oidcBase = base.contains("/oauth2") ? base : base + "/oauth2"; + authorizeUrl = oidcBase + "/v1/authorize"; + } else if ("keycloak".equals(type)) { + final String url = config.getString("url", ""); + final String realm = config.getString("realm", ""); + clientId = config.getString("client-id", ""); + scope = "openid profile"; + final String base = url.endsWith("/") + ? url.substring(0, url.length() - 1) : url; + authorizeUrl = base + "/realms/" + realm + + "/protocol/openid-connect/auth"; + } else { + return new JsonObject().put("error", "Unsupported provider type: " + type); + } + final String url = authorizeUrl + + "?client_id=" + enc(clientId) + + "&response_type=code" + + "&scope=" + enc(scope) + + "&redirect_uri=" + enc(callbackUrl) + + "&state=" + enc(state); + return new JsonObject().put("url", url).put("state", state); + }, HandlerExecutor.get()).whenComplete((result, err) -> { + if (err != null) { + EcsLogger.error("com.auto1.pantera.api.v1") + .message("SSO redirect failed: " + + (err.getMessage() != null ? err.getMessage() : err.getClass().getSimpleName())) + .eventCategory("authentication") + .eventAction("sso_redirect") + .error(err) + .log(); + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", + "Sign-in is temporarily unavailable. Please try again."); + } else if (result == null) { ApiResponse.sendError(ctx, 404, "NOT_FOUND", "Sign-in provider is not configured."); } else if (result.containsKey("error")) { @@ -290,16 +295,6 @@ private void redirectEndpoint(final RoutingContext ctx) { .putHeader("Content-Type", "application/json") .end(result.encode()); } - }).onFailure(err -> { - EcsLogger.error("com.auto1.pantera.api.v1") - .message("SSO redirect failed: " - + (err.getMessage() != null ? err.getMessage() : err.getClass().getSimpleName())) - .eventCategory("authentication") - .eventAction("sso_redirect") - .error(err) - .log(); - ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", - "Sign-in is temporarily unavailable. Please try again."); }); } @@ -326,8 +321,8 @@ private void callbackEndpoint(final RoutingContext ctx) { "Field 'callback_url' is required"); return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { + CompletableFuture.supplyAsync( + (java.util.function.Supplier) () -> { final javax.json.JsonObject prov = findProvider(provider); if (prov == null) { throw new IllegalStateException( @@ -665,37 +660,41 @@ private void callbackEndpoint(final RoutingContext ctx) { // Generate Pantera JWT pair final AuthUser authUser = new AuthUser(username, provider); return AuthHandler.this.tokens.generatePair(authUser); - }), - false - ).onSuccess(pair -> ctx.response().setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(new JsonObject() - .put("token", pair.accessToken()) - .put("refresh_token", pair.refreshToken()) - .put("expires_in", pair.expiresIn()) - .encode()) - ).onFailure(err -> { - // Detailed reason logged server-side for ops/forensics. The - // client always gets a single generic message: revealing - // "user is disabled" / "not in allowed group" / "token - // exchange failed" lets attackers enumerate accounts and - // probe IdP configuration. The only exception is a missing - // provider (admin misconfig, not security-sensitive). - final String detail = err.getMessage() != null - ? err.getMessage() : "SSO callback failed"; - EcsLogger.error("com.auto1.pantera.api.v1") - .message("SSO callback failed: " + detail) - .eventCategory("authentication") - .eventAction("sso_callback") - .eventOutcome("failure") - .error(err) - .log(); - if (detail.contains("Provider '") && detail.contains("not found")) { - ApiResponse.sendError(ctx, 404, "NOT_FOUND", - "Sign-in provider is not configured."); + }, + HandlerExecutor.get() + ).whenComplete((pair, err) -> { + if (err == null) { + ctx.response().setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(new JsonObject() + .put("token", pair.accessToken()) + .put("refresh_token", pair.refreshToken()) + .put("expires_in", pair.expiresIn()) + .encode()); } else { - ApiResponse.sendError(ctx, 401, "UNAUTHORIZED", - "Sign-in failed. Please try again or contact your administrator."); + // Detailed reason logged server-side for ops/forensics. The + // client always gets a single generic message: revealing + // "user is disabled" / "not in allowed group" / "token + // exchange failed" lets attackers enumerate accounts and + // probe IdP configuration. The only exception is a missing + // provider (admin misconfig, not security-sensitive). + final Throwable cause = err.getCause() != null ? err.getCause() : err; + final String detail = cause.getMessage() != null + ? cause.getMessage() : "SSO callback failed"; + EcsLogger.error("com.auto1.pantera.api.v1") + .message("SSO callback failed: " + detail) + .eventCategory("authentication") + .eventAction("sso_callback") + .eventOutcome("failure") + .error(cause) + .log(); + if (detail.contains("Provider '") && detail.contains("not found")) { + ApiResponse.sendError(ctx, 404, "NOT_FOUND", + "Sign-in provider is not configured."); + } else { + ApiResponse.sendError(ctx, 401, "UNAUTHORIZED", + "Sign-in failed. Please try again or contact your administrator."); + } } }); } @@ -952,32 +951,31 @@ private void listTokensEndpoint(final RoutingContext ctx) { return; } final String sub = ctx.user().principal().getString(AuthTokenRest.SUB); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final JsonArray arr = new JsonArray(); - for (final UserTokenDao.TokenInfo info : this.tokenDao.listByUser(sub)) { - final JsonObject obj = new JsonObject() - .put("id", info.id().toString()) - .put("label", info.label()) - .put("created_at", info.createdAt().toString()); - if (info.expiresAt() != null) { - obj.put("expires_at", info.expiresAt().toString()); - obj.put("expired", Instant.now().isAfter(info.expiresAt())); - } else { - obj.put("permanent", true); - } - arr.add(obj); + CompletableFuture.supplyAsync((java.util.function.Supplier) () -> { + final JsonArray arr = new JsonArray(); + for (final UserTokenDao.TokenInfo info : this.tokenDao.listByUser(sub)) { + final JsonObject obj = new JsonObject() + .put("id", info.id().toString()) + .put("label", info.label()) + .put("created_at", info.createdAt().toString()); + if (info.expiresAt() != null) { + obj.put("expires_at", info.expiresAt().toString()); + obj.put("expired", Instant.now().isAfter(info.expiresAt())); + } else { + obj.put("permanent", true); } - return arr; - }), - false - ).onSuccess( - arr -> ctx.response().setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(new JsonObject().put("tokens", arr).encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + arr.add(obj); + } + return arr; + }, HandlerExecutor.get()).whenComplete((arr, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response().setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(new JsonObject().put("tokens", arr).encode()); + } + }); } /** @@ -999,18 +997,18 @@ private void revokeTokenEndpoint(final RoutingContext ctx) { ApiResponse.sendError(ctx, 400, "BAD_REQUEST", "Invalid token ID"); return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> this.tokenDao.revoke(id, sub)), - false - ).onSuccess(revoked -> { - if (revoked) { + CompletableFuture.supplyAsync( + () -> this.tokenDao.revoke(id, sub), + HandlerExecutor.get() + ).whenComplete((revoked, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else if (revoked) { ctx.response().setStatusCode(204).end(); } else { ApiResponse.sendError(ctx, 404, "NOT_FOUND", "Token not found"); } - }).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/CooldownHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/CooldownHandler.java index 6da1dee7f..dec2b7985 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/CooldownHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/CooldownHandler.java @@ -14,15 +14,18 @@ import com.auto1.pantera.api.AuthzHandler; import com.auto1.pantera.api.RepositoryName; import com.auto1.pantera.api.perms.ApiCooldownPermission; -import com.auto1.pantera.http.auth.AuthUser; -import com.auto1.pantera.security.perms.AdapterBasicPermission; import com.auto1.pantera.cooldown.CooldownRepository; -import com.auto1.pantera.cooldown.CooldownService; -import com.auto1.pantera.cooldown.CooldownSettings; -import com.auto1.pantera.cooldown.metadata.CooldownMetadataService; import com.auto1.pantera.cooldown.DbBlockRecord; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.metadata.CooldownMetadataService; +import com.auto1.pantera.cooldown.metrics.CooldownMetrics; import com.auto1.pantera.db.dao.SettingsDao; -import com.auto1.pantera.http.trace.MdcPropagation; +import com.auto1.pantera.http.auth.AuthUser; +import com.auto1.pantera.http.context.HandlerExecutor; +import com.auto1.pantera.http.observability.StructuredLogger; +import com.auto1.pantera.security.perms.AdapterBasicPermission; import com.auto1.pantera.security.policy.Policy; import com.auto1.pantera.settings.repo.CrudRepoSettings; import io.vertx.core.json.JsonArray; @@ -39,6 +42,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.CompletableFuture; import javax.json.Json; import javax.json.JsonStructure; import javax.json.JsonValue; @@ -49,8 +53,14 @@ * @since 1.21.0 * @checkstyle ClassDataAbstractionCouplingCheck (300 lines) */ +@SuppressWarnings("PMD.TooManyMethods") public final class CooldownHandler { + /** + * Logger component name for StructuredLogger.local() (Tier-4). + */ + private static final String LOG_COMPONENT = "com.auto1.pantera.cooldown.admin"; + /** * JSON key for repo section. */ @@ -71,6 +81,12 @@ public final class CooldownHandler { */ private final CooldownMetadataService metadataService; + /** + * Cooldown decision cache (for L1+L2 invalidation on unblock). + * May be null when cooldown is backed by NoopCooldownService. + */ + private final CooldownCache cooldownCache; + /** * Repository settings CRUD. */ @@ -100,6 +116,7 @@ public final class CooldownHandler { * Ctor. * @param cooldown Cooldown service * @param metadataService Cooldown metadata service for cache invalidation + * @param cooldownCache Cooldown decision cache (nullable) * @param crs Repository settings CRUD * @param csettings Cooldown settings * @param dataSource Database data source (nullable) @@ -107,11 +124,14 @@ public final class CooldownHandler { * @checkstyle ParameterNumberCheck (5 lines) */ public CooldownHandler(final CooldownService cooldown, - final CooldownMetadataService metadataService, final CrudRepoSettings crs, + final CooldownMetadataService metadataService, + final CooldownCache cooldownCache, + final CrudRepoSettings crs, final CooldownSettings csettings, final DataSource dataSource, final Policy policy) { this.cooldown = cooldown; this.metadataService = metadataService; + this.cooldownCache = cooldownCache; this.crs = crs; this.csettings = csettings; this.repository = dataSource != null ? new CooldownRepository(dataSource) : null; @@ -119,6 +139,23 @@ public CooldownHandler(final CooldownService cooldown, this.policy = policy; } + /** + * Backward-compatible ctor (no CooldownCache). + * @param cooldown Cooldown service + * @param metadataService Cooldown metadata service for cache invalidation + * @param crs Repository settings CRUD + * @param csettings Cooldown settings + * @param dataSource Database data source (nullable) + * @param policy Security policy + * @checkstyle ParameterNumberCheck (5 lines) + */ + public CooldownHandler(final CooldownService cooldown, + final CooldownMetadataService metadataService, final CrudRepoSettings crs, + final CooldownSettings csettings, final DataSource dataSource, + final Policy policy) { + this(cooldown, metadataService, null, crs, csettings, dataSource, policy); + } + /** * Register cooldown routes on the router. * @param router Vert.x router @@ -234,7 +271,7 @@ private void updateConfig(final RoutingContext ctx) { } // Persist to DB if available if (this.settingsDao != null) { - final String actor = ctx.user() != null + final String actor2 = ctx.user() != null ? ctx.user().principal().getString(AuthTokenRest.SUB, "system") : "system"; final javax.json.JsonObjectBuilder jb = Json.createObjectBuilder() @@ -253,8 +290,22 @@ private void updateConfig(final RoutingContext ctx) { } jb.add("repo_types", rtb); } - this.settingsDao.put("cooldown", jb.build(), actor); + this.settingsDao.put("cooldown", jb.build(), actor2); + } + // Invalidate ALL caches: a policy change (e.g. 30d→7d) can shift + // which versions are in/out of the cooldown window, so every cached + // decision and every cached filtered-metadata response may be stale. + this.metadataService.clearAll(); + if (this.cooldownCache != null) { + this.cooldownCache.clear(); } + CooldownHandler.recordAdminMetric("policy_change"); + StructuredLogger.local().forComponent(LOG_COMPONENT) + .message("Cooldown policy updated — all caches invalidated") + .field("cooldown.enabled", newEnabled) + .field("cooldown.minimum_allowed_age", + CooldownHandler.formatDuration(newAge)) + .info(); ctx.response() .setStatusCode(200) .putHeader("Content-Type", "application/json") @@ -273,67 +324,66 @@ private void overview(final RoutingContext ctx) { ctx.user().principal().getString(AuthTokenRest.CONTEXT) ) ); - ctx.vertx().>executeBlocking( - MdcPropagation.withMdc(() -> { - final Collection all = this.crs.listAll(); - final List result = new ArrayList<>(all.size()); - for (final String name : all) { - if (!perms.implies(new AdapterBasicPermission(name, "read"))) { + CompletableFuture.supplyAsync((java.util.function.Supplier>) () -> { + final Collection all = this.crs.listAll(); + final List result = new ArrayList<>(all.size()); + for (final String name : all) { + if (!perms.implies(new AdapterBasicPermission(name, "read"))) { + continue; + } + final RepositoryName rname = new RepositoryName.Simple(name); + try { + final JsonStructure config = this.crs.value(rname); + if (config == null + || !(config instanceof javax.json.JsonObject)) { continue; } - final RepositoryName rname = new RepositoryName.Simple(name); - try { - final JsonStructure config = this.crs.value(rname); - if (config == null - || !(config instanceof javax.json.JsonObject)) { - continue; - } - final javax.json.JsonObject jobj = - (javax.json.JsonObject) config; - final javax.json.JsonObject repoSection; - if (jobj.containsKey(CooldownHandler.REPO)) { - final javax.json.JsonValue rv = - jobj.get(CooldownHandler.REPO); - if (rv.getValueType() != JsonValue.ValueType.OBJECT) { - continue; - } - repoSection = (javax.json.JsonObject) rv; - } else { - repoSection = jobj; - } - final String repoType = repoSection.getString( - CooldownHandler.TYPE, "" - ); - // Check if cooldown is actually enabled for this repo type - if (!this.csettings.enabledFor(repoType)) { - continue; - } - // Only proxy repos can have cooldown - if (!repoType.endsWith("-proxy")) { + final javax.json.JsonObject jobj = + (javax.json.JsonObject) config; + final javax.json.JsonObject repoSection; + if (jobj.containsKey(CooldownHandler.REPO)) { + final javax.json.JsonValue rv = + jobj.get(CooldownHandler.REPO); + if (rv.getValueType() != JsonValue.ValueType.OBJECT) { continue; } - final Duration minAge = - this.csettings.minimumAllowedAgeFor(repoType); - final JsonObject entry = new JsonObject() - .put("name", name) - .put(CooldownHandler.TYPE, repoType) - .put("cooldown", formatDuration(minAge)); - // Add active block count if DB is available - if (this.repository != null) { - final long count = - this.repository.countActiveBlocks(repoType, name); - entry.put("active_blocks", count); - } - result.add(entry); - } catch (final Exception ex) { - // skip repos that cannot be read + repoSection = (javax.json.JsonObject) rv; + } else { + repoSection = jobj; } + final String repoType = repoSection.getString( + CooldownHandler.TYPE, "" + ); + // Check if cooldown is actually enabled for this repo type + if (!this.csettings.enabledFor(repoType)) { + continue; + } + // Only proxy repos can have cooldown + if (!repoType.endsWith("-proxy")) { + continue; + } + final Duration minAge = + this.csettings.minimumAllowedAgeFor(repoType); + final JsonObject entry = new JsonObject() + .put("name", name) + .put(CooldownHandler.TYPE, repoType) + .put("cooldown", formatDuration(minAge)); + // Add active block count if DB is available + if (this.repository != null) { + final long count = + this.repository.countActiveBlocks(repoType, name); + entry.put("active_blocks", count); + } + result.add(entry); + } catch (final Exception ex) { + // skip repos that cannot be read } - return result; - }), - false - ).onSuccess( - repos -> { + } + return result; + }, HandlerExecutor.get()).whenComplete((repos, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { final JsonArray arr = new JsonArray(); for (final JsonObject repo : repos) { arr.add(repo); @@ -343,9 +393,7 @@ private void overview(final RoutingContext ctx) { .putHeader("Content-Type", "application/json") .end(new JsonObject().put("repos", arr).encode()); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -399,67 +447,69 @@ private void blocked(final RoutingContext ctx) { ctx.user().principal().getString(AuthTokenRest.CONTEXT) ) ); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final List allBlocks = - this.repository.findAllActivePaginated( - 0, Integer.MAX_VALUE, searchQuery, sortDbCol, sortAsc - ); - final Instant now = Instant.now(); - final JsonArray items = new JsonArray(); - int skipped = 0; - int added = 0; - for (final DbBlockRecord rec : allBlocks) { - if (!perms.implies( - new AdapterBasicPermission(rec.repoName(), "read"))) { - continue; - } - if (skipped < page * size) { - skipped++; - continue; - } - if (added >= size) { - continue; - } - final long remainingSecs = - Duration.between(now, rec.blockedUntil()).getSeconds(); - final JsonObject item = new JsonObject() - .put("package_name", rec.artifact()) - .put("version", rec.version()) - .put("repo", rec.repoName()) - .put("repo_type", rec.repoType()) - .put("reason", rec.reason().name()) - .put("blocked_date", rec.blockedAt().toString()) - .put("blocked_until", rec.blockedUntil().toString()) - .put("remaining_hours", - Math.max(0, remainingSecs / 3600)); - items.add(item); - added++; + CompletableFuture.supplyAsync((java.util.function.Supplier) () -> { + final List allBlocks = + this.repository.findAllActivePaginated( + 0, Integer.MAX_VALUE, searchQuery, sortDbCol, sortAsc + ); + final Instant now = Instant.now(); + final JsonArray items = new JsonArray(); + int skipped = 0; + int added = 0; + for (final DbBlockRecord rec : allBlocks) { + if (!perms.implies( + new AdapterBasicPermission(rec.repoName(), "read"))) { + continue; } - final int filteredTotal = skipped + added - + (int) allBlocks.stream() - .skip((long) skipped + added) - .filter(r -> perms.implies( - new AdapterBasicPermission(r.repoName(), "read"))) - .count(); - return ApiResponse.paginated(items, page, size, filteredTotal); - }), - false - ).onSuccess( - result -> ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(result.encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + if (skipped < page * size) { + skipped++; + continue; + } + if (added >= size) { + continue; + } + final long remainingSecs = + Duration.between(now, rec.blockedUntil()).getSeconds(); + final JsonObject item = new JsonObject() + .put("package_name", rec.artifact()) + .put("version", rec.version()) + .put("repo", rec.repoName()) + .put("repo_type", rec.repoType()) + .put("reason", rec.reason().name()) + .put("blocked_date", rec.blockedAt().toString()) + .put("blocked_until", rec.blockedUntil().toString()) + .put("remaining_hours", + Math.max(0, remainingSecs / 3600)); + items.add(item); + added++; + } + final int filteredTotal = skipped + added + + (int) allBlocks.stream() + .skip((long) skipped + added) + .filter(r -> perms.implies( + new AdapterBasicPermission(r.repoName(), "read"))) + .count(); + return ApiResponse.paginated(items, page, size, filteredTotal); + }, HandlerExecutor.get()).whenComplete((result, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(result.encode()); + } + }); } /** * POST /api/v1/repositories/:name/cooldown/unblock — unblock a single artifact version. + * Flow: DB write → CooldownCache invalidation → FilteredMetadataCache invalidation → 204. + * All invalidations complete synchronously before the response is sent. * @param ctx Routing context * @checkstyle ExecutableStatementCountCheck (60 lines) */ + @SuppressWarnings("PMD.CognitiveComplexity") private void unblock(final RoutingContext ctx) { final String name = ctx.pathParam("name"); final RepositoryName rname = new RepositoryName.Simple(name); @@ -493,23 +543,40 @@ private void unblock(final RoutingContext ctx) { return; } final String actor = ctx.user().principal().getString(AuthTokenRest.SUB); + // DB write completes first, then synchronous cache invalidation, then response this.cooldown.unblock(repoType, name, artifact, version, actor) - .whenComplete( - (ignored, error) -> { - if (error == null) { - this.metadataService.invalidate(repoType, name, artifact); - ctx.response().setStatusCode(204).end(); - } else { - ApiResponse.sendError( - ctx, 500, "INTERNAL_ERROR", error.getMessage() - ); - } + .thenRun(() -> { + // CooldownCache L1+L2 invalidation (handler-level guarantee) + if (this.cooldownCache != null) { + this.cooldownCache.unblock(name, artifact, version); } - ); + // FilteredMetadataCache invalidation + this.metadataService.invalidate(repoType, name, artifact); + CooldownHandler.recordAdminMetric("unblock"); + StructuredLogger.local().forComponent(LOG_COMPONENT) + .message("Admin unblock: version unblocked") + .field("repository.name", name) + .field("repository.type", repoType) + .field("package.name", artifact) + .field("package.version", version) + .field("user.name", actor) + .info(); + }) + .whenComplete((ignored, error) -> { + if (error == null) { + ctx.response().setStatusCode(204).end(); + } else { + ApiResponse.sendError( + ctx, 500, "INTERNAL_ERROR", error.getMessage() + ); + } + }); } /** * POST /api/v1/repositories/:name/cooldown/unblock-all — unblock all artifacts in repo. + * Flow: DB write → CooldownCache invalidation → FilteredMetadataCache invalidation → 204. + * All invalidations complete synchronously before the response is sent. * @param ctx Routing context */ private void unblockAll(final RoutingContext ctx) { @@ -527,19 +594,32 @@ private void unblockAll(final RoutingContext ctx) { return; } final String actor = ctx.user().principal().getString(AuthTokenRest.SUB); + // DB write completes first, then synchronous cache invalidation, then response this.cooldown.unblockAll(repoType, name, actor) - .whenComplete( - (ignored, error) -> { - if (error == null) { - this.metadataService.invalidateAll(repoType, name); - ctx.response().setStatusCode(204).end(); - } else { - ApiResponse.sendError( - ctx, 500, "INTERNAL_ERROR", error.getMessage() - ); - } + .thenRun(() -> { + // CooldownCache L1+L2 invalidation (handler-level guarantee) + if (this.cooldownCache != null) { + this.cooldownCache.unblockAll(name); } - ); + // FilteredMetadataCache invalidation + this.metadataService.invalidateAll(repoType, name); + CooldownHandler.recordAdminMetric("unblock_all"); + StructuredLogger.local().forComponent(LOG_COMPONENT) + .message("Admin unblock-all: all versions unblocked for repo") + .field("repository.name", name) + .field("repository.type", repoType) + .field("user.name", actor) + .info(); + }) + .whenComplete((ignored, error) -> { + if (error == null) { + ctx.response().setStatusCode(204).end(); + } else { + ApiResponse.sendError( + ctx, 500, "INTERNAL_ERROR", error.getMessage() + ); + } + }); } /** @@ -615,4 +695,15 @@ private static Duration parseDuration(final String value) { } return Duration.ofHours(amount); } + + /** + * Record admin action counter: {@code pantera.cooldown.admin{action=...}}. + * Safe to call even when Micrometer is not initialised (guard-checked). + * @param action Action tag value (unblock, unblock_all, policy_change) + */ + private static void recordAdminMetric(final String action) { + if (CooldownMetrics.isAvailable()) { + CooldownMetrics.getInstance().recordAdminAction(action); + } + } } diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/DashboardHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/DashboardHandler.java index 91b59c281..e4205eb6e 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/DashboardHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/DashboardHandler.java @@ -10,8 +10,8 @@ */ package com.auto1.pantera.api.v1; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; import com.auto1.pantera.settings.repo.CrudRepoSettings; import io.vertx.core.json.JsonArray; import io.vertx.core.json.JsonObject; @@ -20,6 +20,7 @@ import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -189,50 +190,49 @@ private void handleRequests(final RoutingContext ctx) { */ private void respondWithCache(final RoutingContext ctx, final java.util.function.Function extractor) { - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final CachedDashboard current = this.cache.get(); - final boolean expired = current == null - || System.currentTimeMillis() - current.timestamp > CACHE_TTL_MS; - if (expired && this.rebuilding.compareAndSet(false, true)) { - // This thread won the rebuild race - try { - final CachedDashboard fresh = this.buildDashboard(); - this.cache.set(fresh); - return extractor.apply(fresh); - } finally { - this.rebuilding.set(false); - } - } - // Serve current cache — either still valid or another thread is rebuilding - final CachedDashboard cached = this.cache.get(); - if (cached != null) { - return extractor.apply(cached); - } - // First request race: no cache yet and we lost the rebuild CAS — - // wait briefly for the winner to populate it - for (int i = 0; i < 50 && this.cache.get() == null; i++) { - try { Thread.sleep(20); } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - break; - } + CompletableFuture.supplyAsync(() -> { + final CachedDashboard current = this.cache.get(); + final boolean expired = current == null + || System.currentTimeMillis() - current.timestamp > CACHE_TTL_MS; + if (expired && this.rebuilding.compareAndSet(false, true)) { + // This thread won the rebuild race + try { + final CachedDashboard fresh = this.buildDashboard(); + this.cache.set(fresh); + return extractor.apply(fresh); + } finally { + this.rebuilding.set(false); } - final CachedDashboard ready = this.cache.get(); - if (ready != null) { - return extractor.apply(ready); + } + // Serve current cache — either still valid or another thread is rebuilding + final CachedDashboard cached = this.cache.get(); + if (cached != null) { + return extractor.apply(cached); + } + // First request race: no cache yet and we lost the rebuild CAS — + // wait briefly for the winner to populate it + for (int i = 0; i < 50 && this.cache.get() == null; i++) { + try { Thread.sleep(20); } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; } - // Fallback: serve empty stats rather than error - return extractor.apply(emptyDashboard()); - }), - false - ).onSuccess( - json -> ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(json.encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + } + final CachedDashboard ready = this.cache.get(); + if (ready != null) { + return extractor.apply(ready); + } + // Fallback: serve empty stats rather than error + return extractor.apply(emptyDashboard()); + }, HandlerExecutor.get()).whenComplete((json, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(json.encode()); + } + }); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/PypiHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/PypiHandler.java index 12021336f..7d59a28b5 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/PypiHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/PypiHandler.java @@ -14,8 +14,8 @@ import com.auto1.pantera.asto.Key; import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.SubStorage; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; import com.auto1.pantera.pypi.meta.PypiSidecar; import com.auto1.pantera.settings.RepoData; import com.auto1.pantera.settings.repo.CrudRepoSettings; @@ -91,40 +91,35 @@ private void yankHandler(final RoutingContext ctx) { final String pkg = ctx.pathParam("package"); final String version = ctx.pathParam("version"); final String reason = extractReason(ctx); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.applyYank(repo, pkg, version, reason); - return null; - }), - false - ).onSuccess( - ignored -> { - EcsLogger.info("com.auto1.pantera.api.v1") - .message("PyPI yank applied") + CompletableFuture.supplyAsync(() -> { + this.applyYank(repo, pkg, version, reason); + return (Void) null; + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + EcsLogger.error("com.auto1.pantera.api.v1") + .message("PyPI yank failed") .eventCategory("web") .eventAction("yank") - .eventOutcome("success") + .eventOutcome("failure") .field("repository.name", repo) .field("package.name", pkg) .field("package.version", version) + .error(err) .log(); - ctx.response().setStatusCode(204).end(); - } - ).onFailure( - err -> { - EcsLogger.error("com.auto1.pantera.api.v1") - .message("PyPI yank failed") + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + EcsLogger.info("com.auto1.pantera.api.v1") + .message("PyPI yank applied") .eventCategory("web") .eventAction("yank") - .eventOutcome("failure") + .eventOutcome("success") .field("repository.name", repo) .field("package.name", pkg) .field("package.version", version) - .error(err) .log(); - ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + ctx.response().setStatusCode(204).end(); } - ); + }); } /** @@ -135,40 +130,35 @@ private void unyankHandler(final RoutingContext ctx) { final String repo = ctx.pathParam("repo"); final String pkg = ctx.pathParam("package"); final String version = ctx.pathParam("version"); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.applyUnyank(repo, pkg, version); - return null; - }), - false - ).onSuccess( - ignored -> { - EcsLogger.info("com.auto1.pantera.api.v1") - .message("PyPI unyank applied") + CompletableFuture.supplyAsync(() -> { + this.applyUnyank(repo, pkg, version); + return (Void) null; + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + EcsLogger.error("com.auto1.pantera.api.v1") + .message("PyPI unyank failed") .eventCategory("web") .eventAction("unyank") - .eventOutcome("success") + .eventOutcome("failure") .field("repository.name", repo) .field("package.name", pkg) .field("package.version", version) + .error(err) .log(); - ctx.response().setStatusCode(204).end(); - } - ).onFailure( - err -> { - EcsLogger.error("com.auto1.pantera.api.v1") - .message("PyPI unyank failed") + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + EcsLogger.info("com.auto1.pantera.api.v1") + .message("PyPI unyank applied") .eventCategory("web") .eventAction("unyank") - .eventOutcome("failure") + .eventOutcome("success") .field("repository.name", repo) .field("package.name", pkg) .field("package.version", version) - .error(err) .log(); - ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + ctx.response().setStatusCode(204).end(); } - ); + }); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/RepositoryHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/RepositoryHandler.java index abbde258f..24dcfa72d 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/RepositoryHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/RepositoryHandler.java @@ -15,9 +15,9 @@ import com.auto1.pantera.api.RepositoryEvents; import com.auto1.pantera.api.RepositoryName; import com.auto1.pantera.api.perms.ApiRepositoryPermission; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; import com.auto1.pantera.http.auth.AuthUser; -import com.auto1.pantera.http.trace.MdcPropagation; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.scheduling.MetadataEventQueues; import com.auto1.pantera.security.perms.AdapterBasicPermission; import com.auto1.pantera.security.policy.Policy; @@ -36,6 +36,7 @@ import java.util.List; import java.util.Locale; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import javax.json.Json; import javax.json.JsonStructure; @@ -165,45 +166,44 @@ private void listRepositories(final RoutingContext ctx) { ctx.user().principal().getString(AuthTokenRest.CONTEXT) ) ); - ctx.vertx().>executeBlocking( - MdcPropagation.withMdc(() -> { - final Collection all = this.crs.listAll(); - final List filtered = new ArrayList<>(all.size()); - for (final String name : all) { - if (query != null - && !name.toLowerCase(Locale.ROOT).contains(query.toLowerCase(Locale.ROOT))) { - continue; - } - if (!perms.implies(new AdapterBasicPermission(name, "read"))) { - continue; - } - String repoType = "unknown"; - try { - final javax.json.JsonStructure config = - this.crs.value(new RepositoryName.Simple(name)); - if (config instanceof javax.json.JsonObject) { - final javax.json.JsonObject jobj = (javax.json.JsonObject) config; - final javax.json.JsonObject repo = - jobj.containsKey(RepositoryHandler.REPO) - ? jobj.getJsonObject(RepositoryHandler.REPO) : jobj; - repoType = repo.getString("type", "unknown"); - } - } catch (final Exception ignored) { - // Use "unknown" type - } - if (type != null && !repoType.toLowerCase(Locale.ROOT).contains( - type.toLowerCase(Locale.ROOT))) { - continue; + CompletableFuture.supplyAsync((java.util.function.Supplier>) () -> { + final Collection all = this.crs.listAll(); + final List filtered = new ArrayList<>(all.size()); + for (final String name : all) { + if (query != null + && !name.toLowerCase(Locale.ROOT).contains(query.toLowerCase(Locale.ROOT))) { + continue; + } + if (!perms.implies(new AdapterBasicPermission(name, "read"))) { + continue; + } + String repoType = "unknown"; + try { + final javax.json.JsonStructure config = + this.crs.value(new RepositoryName.Simple(name)); + if (config instanceof javax.json.JsonObject) { + final javax.json.JsonObject jobj = (javax.json.JsonObject) config; + final javax.json.JsonObject repo = + jobj.containsKey(RepositoryHandler.REPO) + ? jobj.getJsonObject(RepositoryHandler.REPO) : jobj; + repoType = repo.getString("type", "unknown"); } - filtered.add(new JsonObject() - .put("name", name) - .put("type", repoType)); + } catch (final Exception ignored) { + // Use "unknown" type + } + if (type != null && !repoType.toLowerCase(Locale.ROOT).contains( + type.toLowerCase(Locale.ROOT))) { + continue; } - return filtered; - }), - false - ).onSuccess( - filtered -> { + filtered.add(new JsonObject() + .put("name", name) + .put("type", repoType)); + } + return filtered; + }, HandlerExecutor.get()).whenComplete((filtered, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { final int total = filtered.size(); final int from = Math.min(page * size, total); final int to = Math.min(from + size, total); @@ -222,9 +222,7 @@ private void listRepositories(final RoutingContext ctx) { .put("hasMore", to < total) .encode()); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -234,31 +232,26 @@ private void listRepositories(final RoutingContext ctx) { private void getRepository(final RoutingContext ctx) { final String name = ctx.pathParam("name"); final RepositoryName rname = new RepositoryName.Simple(name); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (!this.crs.exists(rname)) { - return null; - } - return this.crs.value(rname); - }), - false - ).onSuccess( - config -> { - if (config == null) { - ApiResponse.sendError( - ctx, 404, "NOT_FOUND", - String.format("Repository '%s' not found", name) - ); - } else { - ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(config.toString()); - } + CompletableFuture.supplyAsync((java.util.function.Supplier) () -> { + if (!this.crs.exists(rname)) { + return null; } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + return this.crs.value(rname); + }, HandlerExecutor.get()).whenComplete((config, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else if (config == null) { + ApiResponse.sendError( + ctx, 404, "NOT_FOUND", + String.format("Repository '%s' not found", name) + ); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(config.toString()); + } + }); } /** @@ -267,20 +260,18 @@ private void getRepository(final RoutingContext ctx) { */ private void headRepository(final RoutingContext ctx) { final RepositoryName rname = new RepositoryName.Simple(ctx.pathParam("name")); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> this.crs.exists(rname)), - false - ).onSuccess( - exists -> { - if (Boolean.TRUE.equals(exists)) { - ctx.response().setStatusCode(200).end(); - } else { - ctx.response().setStatusCode(404).end(); - } + CompletableFuture.supplyAsync( + () -> this.crs.exists(rname), + HandlerExecutor.get() + ).whenComplete((exists, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else if (Boolean.TRUE.equals(exists)) { + ctx.response().setStatusCode(200).end(); + } else { + ctx.response().setStatusCode(404).end(); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -345,21 +336,18 @@ private void createOrUpdateRepository(final RoutingContext ctx) { return; } final String actor = ctx.user().principal().getString(AuthTokenRest.SUB); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.crs.save(rname, body, actor); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.crs.save(rname, body, actor), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { this.filtersCache.invalidate(rname.toString()); this.eventBus.publish(RepositoryEvents.ADDRESS, RepositoryEvents.upsert(name)); ctx.response().setStatusCode(200).end(); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -369,32 +357,32 @@ private void createOrUpdateRepository(final RoutingContext ctx) { private void deleteRepository(final RoutingContext ctx) { final String name = ctx.pathParam("name"); final RepositoryName rname = new RepositoryName.Simple(name); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> this.crs.exists(rname)), - false - ).onSuccess( - exists -> { - if (!Boolean.TRUE.equals(exists)) { - ApiResponse.sendError( - ctx, 404, "NOT_FOUND", - String.format("Repository '%s' not found", name) - ); - return; - } - this.repoData.remove(rname) - .thenRun(() -> this.crs.delete(rname)) - .exceptionally(exc -> { - this.crs.delete(rname); - return null; - }); - this.filtersCache.invalidate(rname.toString()); - this.eventBus.publish(RepositoryEvents.ADDRESS, RepositoryEvents.remove(name)); - this.events.ifPresent(item -> item.stopProxyMetadataProcessing(name)); - ctx.response().setStatusCode(200).end(); + CompletableFuture.supplyAsync( + () -> this.crs.exists(rname), + HandlerExecutor.get() + ).whenComplete((exists, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + return; } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + if (!Boolean.TRUE.equals(exists)) { + ApiResponse.sendError( + ctx, 404, "NOT_FOUND", + String.format("Repository '%s' not found", name) + ); + return; + } + this.repoData.remove(rname) + .thenRun(() -> this.crs.delete(rname)) + .exceptionally(exc -> { + this.crs.delete(rname); + return null; + }); + this.filtersCache.invalidate(rname.toString()); + this.eventBus.publish(RepositoryEvents.ADDRESS, RepositoryEvents.remove(name)); + this.events.ifPresent(item -> item.stopProxyMetadataProcessing(name)); + ctx.response().setStatusCode(200).end(); + }); } /** @@ -421,30 +409,30 @@ private void moveRepository(final RoutingContext ctx) { ApiResponse.sendError(ctx, 400, "BAD_REQUEST", "new_name is required"); return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> this.crs.exists(rname)), - false - ).onSuccess( - exists -> { - if (!Boolean.TRUE.equals(exists)) { - ApiResponse.sendError( - ctx, 404, "NOT_FOUND", - String.format("Repository '%s' not found", name) - ); - return; - } - final RepositoryName newrname = new RepositoryName.Simple(newName); - this.repoData.move(rname, newrname) - .thenRun(() -> this.crs.move(rname, newrname)); - this.filtersCache.invalidate(rname.toString()); - this.eventBus.publish( - RepositoryEvents.ADDRESS, RepositoryEvents.move(name, newName) + CompletableFuture.supplyAsync( + () -> this.crs.exists(rname), + HandlerExecutor.get() + ).whenComplete((exists, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + return; + } + if (!Boolean.TRUE.equals(exists)) { + ApiResponse.sendError( + ctx, 404, "NOT_FOUND", + String.format("Repository '%s' not found", name) ); - ctx.response().setStatusCode(200).end(); + return; } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + final RepositoryName newrname = new RepositoryName.Simple(newName); + this.repoData.move(rname, newrname) + .thenRun(() -> this.crs.move(rname, newrname)); + this.filtersCache.invalidate(rname.toString()); + this.eventBus.publish( + RepositoryEvents.ADDRESS, RepositoryEvents.move(name, newName) + ); + ctx.response().setStatusCode(200).end(); + }); } /** @@ -466,54 +454,49 @@ private static boolean isGroupType(final String type) { private void getMembers(final RoutingContext ctx) { final String name = ctx.pathParam("name"); final RepositoryName rname = new RepositoryName.Simple(name); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (!this.crs.exists(rname)) { - return null; - } - final JsonStructure config = this.crs.value(rname); - if (config == null) { - return null; - } - final javax.json.JsonObject jconfig; - if (config instanceof javax.json.JsonObject) { - jconfig = (javax.json.JsonObject) config; - } else { - return new JsonObject().put("members", new JsonArray()).put("type", "not-a-group"); - } - final javax.json.JsonObject repoSection = jconfig.containsKey(RepositoryHandler.REPO) - ? jconfig.getJsonObject(RepositoryHandler.REPO) : jconfig; - final String repoType = repoSection.getString("type", ""); - if (!repoType.endsWith("-group")) { - return new JsonObject().put("members", new JsonArray()).put("type", "not-a-group"); - } - final JsonArray members = new JsonArray(); - if (repoSection.containsKey("remotes")) { - final javax.json.JsonArray remotes = repoSection.getJsonArray("remotes"); - for (int idx = 0; idx < remotes.size(); idx++) { - final javax.json.JsonObject remote = remotes.getJsonObject(idx); - members.add(remote.getString("url", remote.toString())); - } - } - return new JsonObject().put("members", members).put("type", repoType); - }), - false - ).onSuccess( - result -> { - if (result == null) { - ApiResponse.sendError( - ctx, 404, "NOT_FOUND", - String.format("Repository '%s' not found", name) - ); - } else { - ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(result.encode()); + CompletableFuture.supplyAsync((java.util.function.Supplier) () -> { + if (!this.crs.exists(rname)) { + return null; + } + final JsonStructure config = this.crs.value(rname); + if (config == null) { + return null; + } + final javax.json.JsonObject jconfig; + if (config instanceof javax.json.JsonObject) { + jconfig = (javax.json.JsonObject) config; + } else { + return new JsonObject().put("members", new JsonArray()).put("type", "not-a-group"); + } + final javax.json.JsonObject repoSection = jconfig.containsKey(RepositoryHandler.REPO) + ? jconfig.getJsonObject(RepositoryHandler.REPO) : jconfig; + final String repoType = repoSection.getString("type", ""); + if (!repoType.endsWith("-group")) { + return new JsonObject().put("members", new JsonArray()).put("type", "not-a-group"); + } + final JsonArray members = new JsonArray(); + if (repoSection.containsKey("remotes")) { + final javax.json.JsonArray remotes = repoSection.getJsonArray("remotes"); + for (int idx = 0; idx < remotes.size(); idx++) { + final javax.json.JsonObject remote = remotes.getJsonObject(idx); + members.add(remote.getString("url", remote.toString())); } } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + return new JsonObject().put("members", members).put("type", repoType); + }, HandlerExecutor.get()).whenComplete((result, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else if (result == null) { + ApiResponse.sendError( + ctx, 404, "NOT_FOUND", + String.format("Repository '%s' not found", name) + ); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(result.encode()); + } + }); } } diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/RoleHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/RoleHandler.java index 29ab232f8..63b687357 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/RoleHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/RoleHandler.java @@ -18,7 +18,7 @@ import com.auto1.pantera.db.dao.PagedResult; import com.auto1.pantera.db.dao.RoleDao; import com.auto1.pantera.http.auth.AuthUser; -import com.auto1.pantera.http.trace.MdcPropagation; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.security.policy.Policy; import com.auto1.pantera.settings.users.CrudRoles; import io.vertx.core.json.JsonArray; @@ -28,6 +28,7 @@ import java.security.PermissionCollection; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import javax.json.Json; import javax.json.JsonObject; @@ -140,11 +141,14 @@ private void listRoles(final RoutingContext ctx) { return; } final RoleDao dao = (RoleDao) this.roles; - ctx.vertx().>executeBlocking( - MdcPropagation.withMdc(() -> dao.listPaged(query, sortField, ascending, size, page * size)), - false - ).onSuccess( - result -> { + CompletableFuture.supplyAsync( + (java.util.function.Supplier>) + () -> dao.listPaged(query, sortField, ascending, size, page * size), + HandlerExecutor.get() + ).whenComplete((result, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { final JsonArray items = new JsonArray(); for (final JsonObject obj : result.items()) { items.add(new io.vertx.core.json.JsonObject(obj.toString())); @@ -154,9 +158,7 @@ private void listRoles(final RoutingContext ctx) { .putHeader("Content-Type", "application/json") .end(ApiResponse.paginated(items, page, size, result.total()).encode()); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -165,26 +167,24 @@ private void listRoles(final RoutingContext ctx) { */ private void getRole(final RoutingContext ctx) { final String rname = ctx.pathParam(RoleHandler.NAME); - ctx.vertx().>executeBlocking( - MdcPropagation.withMdc(() -> this.roles.get(rname)), - false - ).onSuccess( - opt -> { - if (opt.isPresent()) { - ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(opt.get().toString()); - } else { - ApiResponse.sendError( - ctx, 404, "NOT_FOUND", - String.format("Role '%s' not found", rname) - ); - } + CompletableFuture.supplyAsync( + (java.util.function.Supplier>) () -> this.roles.get(rname), + HandlerExecutor.get() + ).whenComplete((opt, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else if (opt.isPresent()) { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(opt.get().toString()); + } else { + ApiResponse.sendError( + ctx, 404, "NOT_FOUND", + String.format("Role '%s' not found", rname) + ); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -214,20 +214,17 @@ private void putRole(final RoutingContext ctx) { ); if (existing.isPresent() && perms.implies(RoleHandler.UPDATE) || existing.isEmpty() && perms.implies(RoleHandler.CREATE)) { - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.roles.addOrUpdate(body, rname); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.roles.addOrUpdate(body, rname), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { this.policyCache.invalidate(rname); ctx.response().setStatusCode(201).end(); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } else { ApiResponse.sendError(ctx, 403, "FORBIDDEN", "Insufficient permissions"); } @@ -239,20 +236,16 @@ private void putRole(final RoutingContext ctx) { */ private void deleteRole(final RoutingContext ctx) { final String rname = ctx.pathParam(RoleHandler.NAME); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.roles.remove(rname); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.roles.remove(rname), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err == null) { this.policyCache.invalidate(rname); ctx.response().setStatusCode(200).end(); - } - ).onFailure( - err -> { - if (err instanceof IllegalStateException) { + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof IllegalStateException) { ApiResponse.sendError( ctx, 404, "NOT_FOUND", String.format("Role '%s' not found", rname) @@ -261,7 +254,7 @@ private void deleteRole(final RoutingContext ctx) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } /** @@ -270,20 +263,16 @@ private void deleteRole(final RoutingContext ctx) { */ private void enableRole(final RoutingContext ctx) { final String rname = ctx.pathParam(RoleHandler.NAME); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.roles.enable(rname); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.roles.enable(rname), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err == null) { this.policyCache.invalidate(rname); ctx.response().setStatusCode(200).end(); - } - ).onFailure( - err -> { - if (err instanceof IllegalStateException) { + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof IllegalStateException) { ApiResponse.sendError( ctx, 404, "NOT_FOUND", String.format("Role '%s' not found", rname) @@ -292,7 +281,7 @@ private void enableRole(final RoutingContext ctx) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } /** @@ -301,20 +290,16 @@ private void enableRole(final RoutingContext ctx) { */ private void disableRole(final RoutingContext ctx) { final String rname = ctx.pathParam(RoleHandler.NAME); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.roles.disable(rname); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.roles.disable(rname), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err == null) { this.policyCache.invalidate(rname); ctx.response().setStatusCode(200).end(); - } - ).onFailure( - err -> { - if (err instanceof IllegalStateException) { + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof IllegalStateException) { ApiResponse.sendError( ctx, 404, "NOT_FOUND", String.format("Role '%s' not found", rname) @@ -323,6 +308,6 @@ private void disableRole(final RoutingContext ctx) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } } diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/SettingsHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/SettingsHandler.java index 089b863d2..8f937b192 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/SettingsHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/SettingsHandler.java @@ -13,11 +13,11 @@ import com.auto1.pantera.api.AuthzHandler; import com.auto1.pantera.api.ManageRepoSettings; import com.auto1.pantera.api.perms.ApiRolePermission; -import com.auto1.pantera.cooldown.CooldownSettings; +import com.auto1.pantera.cooldown.config.CooldownSettings; import com.auto1.pantera.db.dao.AuthProviderDao; import com.auto1.pantera.db.dao.SettingsDao; import com.auto1.pantera.http.client.HttpClientSettings; -import com.auto1.pantera.http.trace.MdcPropagation; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.misc.PanteraProperties; import com.auto1.pantera.security.policy.Policy; import com.auto1.pantera.settings.JwtSettings; @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.CompletableFuture; import javax.json.Json; import javax.sql.DataSource; import org.eclipse.jetty.http.HttpStatus; @@ -190,17 +191,19 @@ public void register(final Router router) { * @param ctx Routing context */ private void getSettings(final RoutingContext ctx) { - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> this.buildFullSettings()), - false - ).onSuccess( - result -> ctx.response() - .setStatusCode(HttpStatus.OK_200) - .putHeader("Content-Type", "application/json") - .end(result.encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.supplyAsync( + (java.util.function.Supplier) this::buildFullSettings, + HandlerExecutor.get() + ).whenComplete((result, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response() + .setStatusCode(HttpStatus.OK_200) + .putHeader("Content-Type", "application/json") + .end(result.encode()); + } + }); } /** @@ -384,23 +387,21 @@ private void updateSection(final RoutingContext ctx) { } final String actor = ctx.user() != null ? ctx.user().principal().getString("sub", "system") : "system"; - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - // Convert vertx JsonObject to javax.json.JsonObject - final javax.json.JsonObject jobj = Json.createReader( - new java.io.StringReader(body.encode()) - ).readObject(); - this.settingsDao.put(section, jobj, actor); - return null; - }), - false - ).onSuccess( - ignored -> ctx.response().setStatusCode(HttpStatus.OK_200) - .putHeader("Content-Type", "application/json") - .end(new JsonObject().put("status", "saved").encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.runAsync(() -> { + // Convert vertx JsonObject to javax.json.JsonObject + final javax.json.JsonObject jobj = Json.createReader( + new java.io.StringReader(body.encode()) + ).readObject(); + this.settingsDao.put(section, jobj, actor); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response().setStatusCode(HttpStatus.OK_200) + .putHeader("Content-Type", "application/json") + .end(new JsonObject().put("status", "saved").encode()); + } + }); } /** @@ -426,45 +427,41 @@ private void toggleAuthProvider(final RoutingContext ctx) { return; } final boolean enabled = body.getBoolean("enabled"); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - // Refuse to disable protected providers (local, jwt-password). - // Enable is always allowed since it just restores the default. - if (!enabled) { - final String type = this.authProviderDao.typeOf(providerId); - if (type == null) { - throw new IllegalArgumentException("not_found"); - } - if (PROTECTED_PROVIDERS.contains(type)) { - throw new IllegalArgumentException("protected:" + type); - } + CompletableFuture.runAsync(() -> { + // Refuse to disable protected providers (local, jwt-password). + // Enable is always allowed since it just restores the default. + if (!enabled) { + final String type = this.authProviderDao.typeOf(providerId); + if (type == null) { + throw new IllegalArgumentException("not_found"); } - if (enabled) { - this.authProviderDao.enable(providerId); - } else { - this.authProviderDao.disable(providerId); + if (PROTECTED_PROVIDERS.contains(type)) { + throw new IllegalArgumentException("protected:" + type); } - return null; - }), - false - ).onSuccess( - ignored -> { + } + if (enabled) { + this.authProviderDao.enable(providerId); + } else { + this.authProviderDao.disable(providerId); + } + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err == null) { this.flushAuthCache(); ctx.response().setStatusCode(200) .putHeader("Content-Type", "application/json") .end(new JsonObject().put("status", "saved").encode()); - } - ).onFailure(err -> { - final String msg = err.getCause() != null - ? err.getCause().getMessage() : err.getMessage(); - if ("not_found".equals(msg)) { - ApiResponse.sendError(ctx, 404, "NOT_FOUND", "Auth provider not found"); - } else if (msg != null && msg.startsWith("protected:")) { - ApiResponse.sendError(ctx, 400, "BAD_REQUEST", - "Cannot disable the '" + msg.substring("protected:".length()) - + "' provider — it is required for fallback access."); } else { - ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + final Throwable cause = err.getCause() != null ? err.getCause() : err; + final String msg = cause.getMessage(); + if ("not_found".equals(msg)) { + ApiResponse.sendError(ctx, 404, "NOT_FOUND", "Auth provider not found"); + } else if (msg != null && msg.startsWith("protected:")) { + ApiResponse.sendError(ctx, 400, "BAD_REQUEST", + "Cannot disable the '" + msg.substring("protected:".length()) + + "' provider — it is required for fallback access."); + } else { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } } }); } @@ -490,17 +487,15 @@ private void createAuthProvider(final RoutingContext ctx) { final String type = body.getString("type").trim(); final int priority = body.getInteger("priority", 100); final JsonObject config = body.getJsonObject("config", new JsonObject()); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final javax.json.JsonObject jcfg = Json.createReader( - new java.io.StringReader(config.encode()) - ).readObject(); - this.authProviderDao.put(type, priority, jcfg); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync(() -> { + final javax.json.JsonObject jcfg = Json.createReader( + new java.io.StringReader(config.encode()) + ).readObject(); + this.authProviderDao.put(type, priority, jcfg); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { this.flushAuthCache(); ctx.response().setStatusCode(201) .putHeader("Content-Type", "application/json") @@ -509,9 +504,7 @@ private void createAuthProvider(final RoutingContext ctx) { .put("type", type) .encode()); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -532,35 +525,31 @@ private void deleteAuthProvider(final RoutingContext ctx) { ApiResponse.sendError(ctx, 400, "BAD_REQUEST", "Invalid provider ID"); return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final String type = this.authProviderDao.typeOf(providerId); - if (type == null) { - throw new IllegalArgumentException("not_found"); - } - if (PROTECTED_PROVIDERS.contains(type)) { - throw new IllegalArgumentException("protected:" + type); - } - this.authProviderDao.delete(providerId); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync(() -> { + final String type = this.authProviderDao.typeOf(providerId); + if (type == null) { + throw new IllegalArgumentException("not_found"); + } + if (PROTECTED_PROVIDERS.contains(type)) { + throw new IllegalArgumentException("protected:" + type); + } + this.authProviderDao.delete(providerId); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err == null) { this.flushAuthCache(); ctx.response().setStatusCode(204).end(); - } - ).onFailure(err -> { - final String msg = err.getCause() != null - ? err.getCause().getMessage() : err.getMessage(); - if ("not_found".equals(msg)) { - ApiResponse.sendError(ctx, 404, "NOT_FOUND", "Auth provider not found"); - } else if (msg != null && msg.startsWith("protected:")) { - ApiResponse.sendError(ctx, 400, "BAD_REQUEST", - "Cannot delete the '" + msg.substring("protected:".length()) - + "' provider — it is required for fallback access."); } else { - ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + final Throwable cause = err.getCause() != null ? err.getCause() : err; + final String msg = cause.getMessage(); + if ("not_found".equals(msg)) { + ApiResponse.sendError(ctx, 404, "NOT_FOUND", "Auth provider not found"); + } else if (msg != null && msg.startsWith("protected:")) { + ApiResponse.sendError(ctx, 400, "BAD_REQUEST", + "Cannot delete the '" + msg.substring("protected:".length()) + + "' provider — it is required for fallback access."); + } else { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } } }); } @@ -587,25 +576,21 @@ private void updateAuthProviderConfig(final RoutingContext ctx) { ApiResponse.sendError(ctx, 400, "BAD_REQUEST", "JSON body is required"); return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - final javax.json.JsonObject jobj = Json.createReader( - new java.io.StringReader(body.encode()) - ).readObject(); - this.authProviderDao.updateConfig(providerId, jobj); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync(() -> { + final javax.json.JsonObject jobj = Json.createReader( + new java.io.StringReader(body.encode()) + ).readObject(); + this.authProviderDao.updateConfig(providerId, jobj); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { this.flushAuthCache(); ctx.response().setStatusCode(200) .putHeader("Content-Type", "application/json") .end(new JsonObject().put("status", "saved").encode()); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/StorageAliasHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/StorageAliasHandler.java index cdda24166..bd103ff0a 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/StorageAliasHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/StorageAliasHandler.java @@ -17,7 +17,7 @@ import com.auto1.pantera.asto.blocking.BlockingStorage; import com.auto1.pantera.cache.StoragesCache; import com.auto1.pantera.db.dao.StorageAliasDao; -import com.auto1.pantera.http.trace.MdcPropagation; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.security.policy.Policy; import io.vertx.core.json.JsonArray; import io.vertx.ext.web.Router; @@ -25,6 +25,7 @@ import java.io.StringReader; import java.util.Collection; import java.util.List; +import java.util.concurrent.CompletableFuture; import javax.json.Json; import javax.json.JsonObject; @@ -113,22 +114,21 @@ public void register(final Router router) { * @param ctx Routing context */ private void listGlobalAliases(final RoutingContext ctx) { - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (this.aliasDao != null) { - return aliasesToArray(this.aliasDao.listGlobal()); - } - return yamlAliasesToArray(new ManageStorageAliases(this.asto).list()); - }), - false - ).onSuccess( - arr -> ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(arr.encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.supplyAsync((java.util.function.Supplier) () -> { + if (this.aliasDao != null) { + return aliasesToArray(this.aliasDao.listGlobal()); + } + return yamlAliasesToArray(new ManageStorageAliases(this.asto).list()); + }, HandlerExecutor.get()).whenComplete((arr, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(arr.encode()); + } + }); } /** @@ -142,25 +142,23 @@ private void putGlobalAlias(final RoutingContext ctx) { if (body == null) { return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (this.aliasDao != null) { - this.aliasDao.put(name, null, body); - } - try { - new ManageStorageAliases(this.asto).add(name, body); - } catch (final Exception ignored) { - // YAML write is best-effort when DB is primary - } - this.storagesCache.invalidateAll(); - return null; - }), - false - ).onSuccess( - ignored -> ctx.response().setStatusCode(200).end() - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.runAsync(() -> { + if (this.aliasDao != null) { + this.aliasDao.put(name, null, body); + } + try { + new ManageStorageAliases(this.asto).add(name, body); + } catch (final Exception ignored) { + // YAML write is best-effort when DB is primary + } + this.storagesCache.invalidateAll(); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response().setStatusCode(200).end(); + } + }); } /** @@ -170,42 +168,39 @@ private void putGlobalAlias(final RoutingContext ctx) { */ private void deleteGlobalAlias(final RoutingContext ctx) { final String name = ctx.pathParam("name"); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (this.aliasDao != null) { - final List repos = this.aliasDao.findReposUsing(name); - if (repos != null && !repos.isEmpty()) { - throw new DependencyException( - String.format( - "Cannot delete alias '%s': used by repositories: %s", - name, String.join(", ", repos) - ) - ); - } - this.aliasDao.delete(name, null); - } - try { - new ManageStorageAliases(this.asto).remove(name); - } catch (final Exception ignored) { - // YAML delete is best-effort when DB is primary + CompletableFuture.runAsync(() -> { + if (this.aliasDao != null) { + final List repos = this.aliasDao.findReposUsing(name); + if (repos != null && !repos.isEmpty()) { + throw new DependencyException( + String.format( + "Cannot delete alias '%s': used by repositories: %s", + name, String.join(", ", repos) + ) + ); } - this.storagesCache.invalidateAll(); - return null; - }), - false - ).onSuccess( - ignored -> ctx.response().setStatusCode(200).end() - ).onFailure( - err -> { - if (err instanceof DependencyException) { - ApiResponse.sendError(ctx, 409, "CONFLICT", err.getMessage()); - } else if (err instanceof IllegalStateException) { - ApiResponse.sendError(ctx, 404, "NOT_FOUND", err.getMessage()); + this.aliasDao.delete(name, null); + } + try { + new ManageStorageAliases(this.asto).remove(name); + } catch (final Exception ignored) { + // YAML delete is best-effort when DB is primary + } + this.storagesCache.invalidateAll(); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err == null) { + ctx.response().setStatusCode(200).end(); + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof DependencyException) { + ApiResponse.sendError(ctx, 409, "CONFLICT", cause.getMessage()); + } else if (cause instanceof IllegalStateException) { + ApiResponse.sendError(ctx, 404, "NOT_FOUND", cause.getMessage()); } else { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } /** @@ -215,24 +210,23 @@ private void deleteGlobalAlias(final RoutingContext ctx) { */ private void listRepoAliases(final RoutingContext ctx) { final String repoName = ctx.pathParam("name"); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (this.aliasDao != null) { - return aliasesToArray(this.aliasDao.listForRepo(repoName)); - } - return yamlAliasesToArray( - new ManageStorageAliases(new Key.From(repoName), this.asto).list() - ); - }), - false - ).onSuccess( - arr -> ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(arr.encode()) - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.supplyAsync((java.util.function.Supplier) () -> { + if (this.aliasDao != null) { + return aliasesToArray(this.aliasDao.listForRepo(repoName)); + } + return yamlAliasesToArray( + new ManageStorageAliases(new Key.From(repoName), this.asto).list() + ); + }, HandlerExecutor.get()).whenComplete((arr, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(arr.encode()); + } + }); } /** @@ -247,26 +241,24 @@ private void putRepoAlias(final RoutingContext ctx) { if (body == null) { return; } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (this.aliasDao != null) { - this.aliasDao.put(aliasName, repoName, body); - } - try { - new ManageStorageAliases(new Key.From(repoName), this.asto) - .add(aliasName, body); - } catch (final Exception ignored) { - // YAML write is best-effort when DB is primary - } - this.storagesCache.invalidateAll(); - return null; - }), - false - ).onSuccess( - ignored -> ctx.response().setStatusCode(200).end() - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + CompletableFuture.runAsync(() -> { + if (this.aliasDao != null) { + this.aliasDao.put(aliasName, repoName, body); + } + try { + new ManageStorageAliases(new Key.From(repoName), this.asto) + .add(aliasName, body); + } catch (final Exception ignored) { + // YAML write is best-effort when DB is primary + } + this.storagesCache.invalidateAll(); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { + ctx.response().setStatusCode(200).end(); + } + }); } /** @@ -276,32 +268,29 @@ private void putRepoAlias(final RoutingContext ctx) { private void deleteRepoAlias(final RoutingContext ctx) { final String repoName = ctx.pathParam("name"); final String aliasName = ctx.pathParam("alias"); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - if (this.aliasDao != null) { - this.aliasDao.delete(aliasName, repoName); - } - try { - new ManageStorageAliases(new Key.From(repoName), this.asto) - .remove(aliasName); - } catch (final Exception ignored) { - // YAML delete is best-effort when DB is primary - } - this.storagesCache.invalidateAll(); - return null; - }), - false - ).onSuccess( - ignored -> ctx.response().setStatusCode(200).end() - ).onFailure( - err -> { - if (err instanceof IllegalStateException) { - ApiResponse.sendError(ctx, 404, "NOT_FOUND", err.getMessage()); + CompletableFuture.runAsync(() -> { + if (this.aliasDao != null) { + this.aliasDao.delete(aliasName, repoName); + } + try { + new ManageStorageAliases(new Key.From(repoName), this.asto) + .remove(aliasName); + } catch (final Exception ignored) { + // YAML delete is best-effort when DB is primary + } + this.storagesCache.invalidateAll(); + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err == null) { + ctx.response().setStatusCode(200).end(); + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof IllegalStateException) { + ApiResponse.sendError(ctx, 404, "NOT_FOUND", cause.getMessage()); } else { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/UserHandler.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/UserHandler.java index 8d474fe2f..93a24b67a 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/api/v1/UserHandler.java +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/UserHandler.java @@ -21,8 +21,8 @@ import com.auto1.pantera.db.dao.UserTokenDao; import com.auto1.pantera.http.auth.AuthUser; import com.auto1.pantera.http.auth.Authentication; +import com.auto1.pantera.http.context.HandlerExecutor; import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; import com.auto1.pantera.security.policy.Policy; import com.auto1.pantera.settings.PanteraSecurity; import com.auto1.pantera.settings.cache.PanteraCaches; @@ -34,6 +34,7 @@ import java.security.PermissionCollection; import java.util.Optional; import java.util.Set; +import java.util.concurrent.CompletableFuture; import javax.json.Json; import javax.json.JsonObject; @@ -99,6 +100,16 @@ public final class UserHandler { */ private final UserTokenDao tokenDao; + /** + * Cached filter for the local-enabled flag check, if wired. When + * an admin toggles enabled state (update / enable / disable / delete) + * we must drop the per-user L1/L2 cache entry so the next + * authentication reflects the new state cluster-wide. + * May be {@code null} when the auth chain was built without a DB. + * @since 2.2.0 + */ + private final com.auto1.pantera.auth.CachedLocalEnabledFilter enabledFilter; + /** * Ctor. * @param users Crud users object @@ -107,11 +118,26 @@ public final class UserHandler { */ public UserHandler(final CrudUsers users, final PanteraCaches caches, final PanteraSecurity security) { - this(users, caches, security, null, null); + this(users, caches, security, null, null, null); + } + + /** + * Ctor with token revocation wiring (no enabled-filter invalidation). + * Kept for callers that don't have the filter reference. + * @param users Crud users object + * @param caches Pantera caches + * @param security Pantera security + * @param blocklist Revocation blocklist; may be {@code null} + * @param tokenDao Token DAO; may be {@code null} + */ + public UserHandler(final CrudUsers users, final PanteraCaches caches, + final PanteraSecurity security, final RevocationBlocklist blocklist, + final UserTokenDao tokenDao) { + this(users, caches, security, blocklist, tokenDao, null); } /** - * Full ctor with token revocation wiring. + * Full ctor. * @param users Crud users object * @param caches Pantera caches * @param security Pantera security @@ -119,10 +145,15 @@ public UserHandler(final CrudUsers users, final PanteraCaches caches, * on user disable; may be {@code null} * @param tokenDao Token DAO for refresh / API token revocation on * user disable; may be {@code null} + * @param enabledFilter Cached local-enabled filter whose per-user + * entry is invalidated on enable / disable / update / delete; + * may be {@code null} when not wired + * @checkstyle ParameterNumberCheck (5 lines) */ public UserHandler(final CrudUsers users, final PanteraCaches caches, final PanteraSecurity security, final RevocationBlocklist blocklist, - final UserTokenDao tokenDao) { + final UserTokenDao tokenDao, + final com.auto1.pantera.auth.CachedLocalEnabledFilter enabledFilter) { this.users = users; this.ucache = caches.usersCache(); this.pcache = caches.policyCache(); @@ -130,6 +161,20 @@ public UserHandler(final CrudUsers users, final PanteraCaches caches, this.policy = security.policy(); this.blocklist = blocklist; this.tokenDao = tokenDao; + this.enabledFilter = enabledFilter; + } + + /** + * Invalidate the cached enabled-flag entry for the given username, + * if the filter is wired. Broadcasts to peer nodes via pub/sub + * inside {@link com.auto1.pantera.auth.CachedLocalEnabledFilter#invalidate(String)}. + * + * @param uname Username + */ + private void invalidateEnabled(final String uname) { + if (this.enabledFilter != null) { + this.enabledFilter.invalidate(uname); + } } /** @@ -195,13 +240,14 @@ private void listUsers(final RoutingContext ctx) { return; } final UserDao dao = (UserDao) this.users; - ctx.vertx().>executeBlocking( - MdcPropagation.withMdc( - () -> dao.listPaged(query, sortField, ascending, size, page * size) - ), - false - ).onSuccess( - result -> { + CompletableFuture.supplyAsync( + (java.util.function.Supplier>) + () -> dao.listPaged(query, sortField, ascending, size, page * size), + HandlerExecutor.get() + ).whenComplete((result, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { final JsonArray items = new JsonArray(); for (final JsonObject obj : result.items()) { items.add(new io.vertx.core.json.JsonObject(obj.toString())); @@ -211,9 +257,7 @@ private void listUsers(final RoutingContext ctx) { .putHeader("Content-Type", "application/json") .end(ApiResponse.paginated(items, page, size, result.total()).encode()); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -222,26 +266,24 @@ private void listUsers(final RoutingContext ctx) { */ private void getUser(final RoutingContext ctx) { final String uname = ctx.pathParam(UserHandler.NAME); - ctx.vertx().>executeBlocking( - MdcPropagation.withMdc(() -> this.users.get(uname)), - false - ).onSuccess( - opt -> { - if (opt.isPresent()) { - ctx.response() - .setStatusCode(200) - .putHeader("Content-Type", "application/json") - .end(opt.get().toString()); - } else { - ApiResponse.sendError( - ctx, 404, "NOT_FOUND", - String.format("User '%s' not found", uname) - ); - } + CompletableFuture.supplyAsync( + (java.util.function.Supplier>) () -> this.users.get(uname), + HandlerExecutor.get() + ).whenComplete((opt, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else if (opt.isPresent()) { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(opt.get().toString()); + } else { + ApiResponse.sendError( + ctx, 404, "NOT_FOUND", + String.format("User '%s' not found", uname) + ); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } /** @@ -284,21 +326,19 @@ private void putUser(final RoutingContext ctx) { ); if (existing.isPresent() && perms.implies(UserHandler.UPDATE) || existing.isEmpty() && perms.implies(UserHandler.CREATE)) { - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.users.addOrUpdate(body, uname); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.users.addOrUpdate(body, uname), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); + } else { this.ucache.invalidate(uname); this.pcache.invalidate(uname); + this.invalidateEnabled(uname); ctx.response().setStatusCode(201).end(); } - ).onFailure( - err -> ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()) - ); + }); } else { ApiResponse.sendError(ctx, 403, "FORBIDDEN", "Insufficient permissions"); } @@ -310,21 +350,18 @@ private void putUser(final RoutingContext ctx) { */ private void deleteUser(final RoutingContext ctx) { final String uname = ctx.pathParam(UserHandler.NAME); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.users.remove(uname); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.users.remove(uname), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err == null) { this.ucache.invalidate(uname); this.pcache.invalidate(uname); + this.invalidateEnabled(uname); ctx.response().setStatusCode(200).end(); - } - ).onFailure( - err -> { - if (err instanceof IllegalStateException) { + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof IllegalStateException) { ApiResponse.sendError( ctx, 404, "NOT_FOUND", String.format("User '%s' not found", uname) @@ -333,7 +370,7 @@ private void deleteUser(final RoutingContext ctx) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } /** @@ -384,14 +421,11 @@ private void alterPassword(final RoutingContext ctx) { return; } } - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.users.alterPassword(uname, body); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.users.alterPassword(uname, body), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err == null) { // ucache is a PublishingCleanable wrapping CachedUsers, so // an instanceof check on CachedUsers is always false here. // Cleanable.invalidate(key) delegates to CachedUsers.invalidate @@ -403,15 +437,18 @@ private void alterPassword(final RoutingContext ctx) { // Policy cache may contain stale role/enabled state for this // user; invalidate that too so subsequent requests see fresh data. this.pcache.invalidate(uname); + // Enabled-flag cache is keyed by username — password + // change doesn't flip enabled, but keeping it in sync + // here is defensive and cheap. + this.invalidateEnabled(uname); ctx.response().setStatusCode(200).end(); - } - ).onFailure( - err -> { - // Vert.x wraps the underlying exception in CompletionException; - // unwrap to get the original from UserDao.alterPassword. + } else { + // CompletableFuture wraps the underlying exception in + // CompletionException; unwrap to get the original from + // UserDao.alterPassword. final Throwable cause = err.getCause() != null ? err.getCause() : err; if (cause instanceof IllegalArgumentException) { - // PasswordPolicy validation failure → 400 with the message + // PasswordPolicy validation failure -> 400 with the message ApiResponse.sendError( ctx, 400, "WEAK_PASSWORD", cause.getMessage() ); @@ -424,7 +461,7 @@ private void alterPassword(final RoutingContext ctx) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } /** @@ -433,21 +470,18 @@ private void alterPassword(final RoutingContext ctx) { */ private void enableUser(final RoutingContext ctx) { final String uname = ctx.pathParam(UserHandler.NAME); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.users.enable(uname); - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync( + () -> this.users.enable(uname), + HandlerExecutor.get() + ).whenComplete((ignored, err) -> { + if (err == null) { this.ucache.invalidate(uname); this.pcache.invalidate(uname); + this.invalidateEnabled(uname); ctx.response().setStatusCode(200).end(); - } - ).onFailure( - err -> { - if (err instanceof IllegalStateException) { + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof IllegalStateException) { ApiResponse.sendError( ctx, 404, "NOT_FOUND", String.format("User '%s' not found", uname) @@ -456,7 +490,7 @@ private void enableUser(final RoutingContext ctx) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } /** @@ -465,44 +499,40 @@ private void enableUser(final RoutingContext ctx) { */ private void disableUser(final RoutingContext ctx) { final String uname = ctx.pathParam(UserHandler.NAME); - ctx.vertx().executeBlocking( - MdcPropagation.withMdc(() -> { - this.users.disable(uname); - // Immediate token revocation — without this, the - // user's existing access tokens, refresh tokens, and - // API tokens would keep working until expiry. The - // per-request isEnabled check in UnifiedJwtAuthHandler - // is the safety net (fires on the next request), but - // explicit revocation is cheaper, synchronous, and - // cluster-wide via the blocklist pub/sub. - if (this.blocklist != null) { - // 7 days covers the default refresh-token TTL; any - // access token older than that is already expired - // by the JWT's own exp claim. - this.blocklist.revokeUser(uname, 7 * 24 * 3600); - } - if (this.tokenDao != null) { - final int revoked = this.tokenDao.revokeAllForUser(uname); - EcsLogger.info("com.auto1.pantera.api.v1") - .message("User disabled: revoked " + revoked + " tokens") - .eventCategory("iam") - .eventAction("user_disable") - .eventOutcome("success") - .field("user.name", uname) - .log(); - } - return null; - }), - false - ).onSuccess( - ignored -> { + CompletableFuture.runAsync(() -> { + this.users.disable(uname); + // Immediate token revocation — without this, the + // user's existing access tokens, refresh tokens, and + // API tokens would keep working until expiry. The + // per-request isEnabled check in UnifiedJwtAuthHandler + // is the safety net (fires on the next request), but + // explicit revocation is cheaper, synchronous, and + // cluster-wide via the blocklist pub/sub. + if (this.blocklist != null) { + // 7 days covers the default refresh-token TTL; any + // access token older than that is already expired + // by the JWT's own exp claim. + this.blocklist.revokeUser(uname, 7 * 24 * 3600); + } + if (this.tokenDao != null) { + final int revoked = this.tokenDao.revokeAllForUser(uname); + EcsLogger.info("com.auto1.pantera.api.v1") + .message("User disabled: revoked " + revoked + " tokens") + .eventCategory("iam") + .eventAction("user_disable") + .eventOutcome("success") + .field("user.name", uname) + .log(); + } + }, HandlerExecutor.get()).whenComplete((ignored, err) -> { + if (err == null) { this.ucache.invalidate(uname); this.pcache.invalidate(uname); + this.invalidateEnabled(uname); ctx.response().setStatusCode(200).end(); - } - ).onFailure( - err -> { - if (err instanceof IllegalStateException) { + } else { + final Throwable cause = err.getCause() != null ? err.getCause() : err; + if (cause instanceof IllegalStateException) { ApiResponse.sendError( ctx, 404, "NOT_FOUND", String.format("User '%s' not found", uname) @@ -511,6 +541,6 @@ private void disableUser(final RoutingContext ctx) { ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", err.getMessage()); } } - ); + }); } } diff --git a/pantera-main/src/main/java/com/auto1/pantera/api/v1/admin/NegativeCacheAdminResource.java b/pantera-main/src/main/java/com/auto1/pantera/api/v1/admin/NegativeCacheAdminResource.java new file mode 100644 index 000000000..4f2a6e865 --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/api/v1/admin/NegativeCacheAdminResource.java @@ -0,0 +1,466 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.api.v1.admin; + +import com.auto1.pantera.api.AuthzHandler; +import com.auto1.pantera.api.perms.ApiAdminPermission; +import com.auto1.pantera.api.v1.ApiResponse; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.cache.NegativeCacheKey; +import com.auto1.pantera.http.cache.NegativeCacheRegistry; +import com.auto1.pantera.http.context.HandlerExecutor; +import com.auto1.pantera.http.log.EcsLogger; +import com.auto1.pantera.security.policy.Policy; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.stats.CacheStats; +import io.vertx.core.json.JsonArray; +import io.vertx.core.json.JsonObject; +import io.vertx.ext.web.Router; +import io.vertx.ext.web.RoutingContext; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Admin REST resource for negative cache inspection and invalidation. + * + *

      Provides five endpoints under {@code /api/v1/admin/neg-cache/} for + * platform engineers to investigate 404-shadow reports without SSH access: + *

        + *
      • GET /api/v1/admin/neg-cache β€” paginated L1 entry listing
      • + *
      • GET /api/v1/admin/neg-cache/probe β€” single-key presence check
      • + *
      • POST /api/v1/admin/neg-cache/invalidate β€” single-key invalidation
      • + *
      • POST /api/v1/admin/neg-cache/invalidate-pattern β€” pattern invalidation (rate-limited)
      • + *
      • GET /api/v1/admin/neg-cache/stats β€” per-scope hit/miss/size counters
      • + *
      + * + *

      All endpoints require the {@code admin} role via {@link ApiAdminPermission#ADMIN}. + * + * @since 2.2.0 + * @checkstyle ClassDataAbstractionCouplingCheck (500 lines) + */ +@SuppressWarnings({"PMD.TooManyMethods", "PMD.ExcessiveImports"}) +public final class NegativeCacheAdminResource { + + /** + * Logger name for this resource. + */ + private static final String LOGGER = + "com.auto1.pantera.api.v1.admin"; + + /** + * Maximum pattern invalidations per admin user per minute. + */ + private static final int RATE_LIMIT_PER_MINUTE = 10; + + /** + * Rate-limit window in milliseconds (1 minute). + */ + private static final long RATE_WINDOW_MS = 60_000L; + + /** + * Security policy for authorization. + */ + private final Policy policy; + + /** + * Shared negative cache instance. + */ + private final NegativeCache cache; + + /** + * Rate-limit tracker: username -> list of timestamps. + */ + private final ConcurrentHashMap> rateLimits; + + /** + * Ctor. + * @param policy Security policy + */ + public NegativeCacheAdminResource(final Policy policy) { + this.policy = policy; + this.cache = NegativeCacheRegistry.instance().sharedCache(); + this.rateLimits = new ConcurrentHashMap<>(); + } + + /** + * Register neg-cache admin routes on the router. + * @param router Vert.x router + */ + public void register(final Router router) { + final AuthzHandler adminAuthz = new AuthzHandler( + this.policy, ApiAdminPermission.ADMIN + ); + router.get("/api/v1/admin/neg-cache") + .handler(adminAuthz).handler(this::listEntries); + router.get("/api/v1/admin/neg-cache/probe") + .handler(adminAuthz).handler(this::probe); + router.post("/api/v1/admin/neg-cache/invalidate") + .handler(adminAuthz).handler(this::invalidateSingle); + router.post("/api/v1/admin/neg-cache/invalidate-pattern") + .handler(adminAuthz).handler(this::invalidatePattern); + router.get("/api/v1/admin/neg-cache/stats") + .handler(adminAuthz).handler(this::stats); + } + + /** + * GET /api/v1/admin/neg-cache β€” paginated list of L1 entries. + * Query params: scope, repoType, artifactName, version, page, pageSize. + * @param ctx Routing context + */ + private void listEntries(final RoutingContext ctx) { + CompletableFuture.supplyAsync(() -> { + final String filterScope = ctx.queryParams().get("scope"); + final String filterType = ctx.queryParams().get("repoType"); + final String filterName = ctx.queryParams().get("artifactName"); + final String filterVersion = ctx.queryParams().get("version"); + final int page = ApiResponse.intParam( + ctx.queryParams().get("page"), 0 + ); + final int pageSize = ApiResponse.clampSize( + ApiResponse.intParam(ctx.queryParams().get("pageSize"), 20) + ); + final Cache l1Cache = extractL1Cache(this.cache); + final List entries = new ArrayList<>(); + if (l1Cache != null) { + for (final String flat : l1Cache.asMap().keySet()) { + final String[] parts = flat.split(":", 4); + if (parts.length < 4) { + continue; + } + final String scope = parts[0]; + final String repoType = parts[1]; + final String artifactName = parts[2]; + final String version = parts[3]; + if (filterScope != null && !filterScope.isEmpty() + && !scope.contains(filterScope)) { + continue; + } + if (filterType != null && !filterType.isEmpty() + && !repoType.contains(filterType)) { + continue; + } + if (filterName != null && !filterName.isEmpty() + && !artifactName.contains(filterName)) { + continue; + } + if (filterVersion != null && !filterVersion.isEmpty() + && !version.contains(filterVersion)) { + continue; + } + entries.add(new JsonObject() + .put("key", new JsonObject() + .put("scope", scope) + .put("repoType", repoType) + .put("artifactName", artifactName) + .put("artifactVersion", version)) + .put("tier", "L1") + .put("ttlRemainingMs", -1L) + ); + } + } + final int total = entries.size(); + final JsonArray page1 = ApiResponse.sliceToArray(entries, page, pageSize); + return ApiResponse.paginated(page1, page, pageSize, total); + }, HandlerExecutor.get()).whenComplete((result, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", + err.getMessage()); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(result.encode()); + } + }); + } + + /** + * GET /api/v1/admin/neg-cache/probe?key=scope:type:name:version + * Returns presence check across tiers. + * @param ctx Routing context + */ + private void probe(final RoutingContext ctx) { + final String keyParam = ctx.queryParams().get("key"); + if (keyParam == null || keyParam.isBlank()) { + ApiResponse.sendError(ctx, 400, "BAD_REQUEST", + "Query param 'key' is required (format: scope:type:name:version)"); + return; + } + final String[] parts = keyParam.split(":", 4); + if (parts.length < 4) { + ApiResponse.sendError(ctx, 400, "BAD_REQUEST", + "Key must have format scope:repoType:artifactName:version"); + return; + } + final NegativeCacheKey nck = new NegativeCacheKey( + parts[0], parts[1], parts[2], parts[3] + ); + this.cache.isKnown404Async(nck).whenComplete((found, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", + err.getMessage()); + return; + } + final JsonObject response = new JsonObject() + .put("present", found); + if (found) { + final JsonArray tiers = new JsonArray(); + // Check L1 synchronously + if (this.cache.isKnown404(nck)) { + tiers.add("L1"); + } + // isKnown404Async already checked L1+L2; if found but not + // in L1 alone, it was promoted from L2 + if (tiers.isEmpty()) { + tiers.add("L2"); + } + response.put("tiers", tiers); + } + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(response.encode()); + }); + } + + /** + * POST /api/v1/admin/neg-cache/invalidate + * Body: {scope, repoType, artifactName, version} + * @param ctx Routing context + */ + private void invalidateSingle(final RoutingContext ctx) { + final JsonObject body = ctx.body().asJsonObject(); + if (body == null) { + ApiResponse.sendError(ctx, 400, "BAD_REQUEST", + "JSON body is required"); + return; + } + final String scope = body.getString("scope"); + final String repoType = body.getString("repoType"); + final String artifactName = body.getString("artifactName"); + final String version = body.getString("version", ""); + if (scope == null || repoType == null || artifactName == null) { + ApiResponse.sendError(ctx, 400, "BAD_REQUEST", + "Fields scope, repoType, artifactName are required"); + return; + } + final NegativeCacheKey nck = new NegativeCacheKey( + scope, repoType, artifactName, version + ); + final boolean wasInL1 = this.cache.isKnown404(nck); + this.cache.invalidate(nck); + final String user = extractUsername(ctx); + EcsLogger.warn(LOGGER) + .message("Manual neg-cache invalidation: single key") + .eventCategory("configuration") + .eventAction("neg_cache_invalidate") + .eventOutcome("success") + .field("manual", true) + .field("user.name", user) + .field("neg_cache.scope", scope) + .field("neg_cache.repo_type", repoType) + .field("neg_cache.artifact_name", artifactName) + .field("neg_cache.version", version) + .field("neg_cache.l1_invalidated", wasInL1 ? 1 : 0) + .log(); + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(new JsonObject() + .put("invalidated", new JsonObject() + .put("l1", wasInL1 ? 1 : 0) + .put("l2", wasInL1 ? 1 : 0)) + .encode()); + } + + /** + * POST /api/v1/admin/neg-cache/invalidate-pattern + * Body: {scope?, repoType?, artifactName?, version?} + * Rate-limited: 10 per minute per admin user. + * @param ctx Routing context + */ + @SuppressWarnings("PMD.CognitiveComplexity") + private void invalidatePattern(final RoutingContext ctx) { + final String user = extractUsername(ctx); + if (!checkRateLimit(user)) { + ApiResponse.sendError(ctx, 429, "RATE_LIMITED", + "Pattern invalidation is limited to " + + RATE_LIMIT_PER_MINUTE + " requests per minute"); + return; + } + final JsonObject body = ctx.body().asJsonObject(); + if (body == null) { + ApiResponse.sendError(ctx, 400, "BAD_REQUEST", + "JSON body is required"); + return; + } + final String filterScope = body.getString("scope"); + final String filterType = body.getString("repoType"); + final String filterName = body.getString("artifactName"); + final String filterVersion = body.getString("version"); + CompletableFuture.supplyAsync(() -> { + final Cache l1Cache = extractL1Cache(this.cache); + final AtomicInteger l1Count = new AtomicInteger(0); + final List keysToInvalidate = new ArrayList<>(); + if (l1Cache != null) { + for (final String flat : new ArrayList<>(l1Cache.asMap().keySet())) { + final String[] parts = flat.split(":", 4); + if (parts.length < 4) { + continue; + } + if (matchesFilter(parts[0], filterScope) + && matchesFilter(parts[1], filterType) + && matchesFilter(parts[2], filterName) + && matchesFilter(parts[3], filterVersion)) { + keysToInvalidate.add(new NegativeCacheKey( + parts[0], parts[1], parts[2], parts[3] + )); + l1Count.incrementAndGet(); + } + } + } + if (!keysToInvalidate.isEmpty()) { + this.cache.invalidateBatch(keysToInvalidate).join(); + } + return new int[]{l1Count.get(), l1Count.get()}; + }, HandlerExecutor.get()).whenComplete((counts, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", + err.getMessage()); + return; + } + EcsLogger.warn(LOGGER) + .message("Manual neg-cache invalidation: pattern") + .eventCategory("configuration") + .eventAction("neg_cache_invalidate") + .eventOutcome("success") + .field("manual", true) + .field("user.name", user) + .field("neg_cache.filter.scope", filterScope) + .field("neg_cache.filter.repo_type", filterType) + .field("neg_cache.filter.artifact_name", filterName) + .field("neg_cache.filter.version", filterVersion) + .field("neg_cache.l1_invalidated", counts[0]) + .field("neg_cache.l2_invalidated", counts[1]) + .log(); + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(new JsonObject() + .put("invalidated", new JsonObject() + .put("l1", counts[0]) + .put("l2", counts[1])) + .encode()); + }); + } + + /** + * GET /api/v1/admin/neg-cache/stats β€” cache statistics. + * @param ctx Routing context + */ + private void stats(final RoutingContext ctx) { + CompletableFuture.supplyAsync(() -> { + final CacheStats cstats = this.cache.stats(); + return new JsonObject() + .put("enabled", this.cache.isEnabled()) + .put("l1Size", this.cache.size()) + .put("hitCount", cstats.hitCount()) + .put("missCount", cstats.missCount()) + .put("hitRate", cstats.hitRate()) + .put("evictionCount", cstats.evictionCount()) + .put("requestCount", cstats.requestCount()); + }, HandlerExecutor.get()).whenComplete((result, err) -> { + if (err != null) { + ApiResponse.sendError(ctx, 500, "INTERNAL_ERROR", + err.getMessage()); + } else { + ctx.response() + .setStatusCode(200) + .putHeader("Content-Type", "application/json") + .end(result.encode()); + } + }); + } + + /** + * Check and record rate limit for pattern invalidation. + * @param user Username + * @return true if within limit, false if exceeded + */ + private boolean checkRateLimit(final String user) { + final long now = System.currentTimeMillis(); + final List timestamps = this.rateLimits.computeIfAbsent( + user, k -> new ArrayList<>() + ); + synchronized (timestamps) { + timestamps.removeIf(ts -> now - ts > RATE_WINDOW_MS); + if (timestamps.size() >= RATE_LIMIT_PER_MINUTE) { + return false; + } + timestamps.add(now); + return true; + } + } + + /** + * Extract the L1 Caffeine cache from NegativeCache via reflection. + * This is an admin-only diagnostic operation; reflection is acceptable. + * @param negCache NegativeCache instance + * @return The underlying Caffeine cache, or null if inaccessible + */ + @SuppressWarnings("unchecked") + private static Cache extractL1Cache( + final NegativeCache negCache + ) { + try { + final Field field = NegativeCache.class.getDeclaredField( + "notFoundCache" + ); + field.setAccessible(true); + return (Cache) field.get(negCache); + } catch (final NoSuchFieldException | IllegalAccessException ex) { + EcsLogger.warn(LOGGER) + .message("Cannot access L1 cache for admin listing") + .error(ex) + .log(); + return null; + } + } + + /** + * Check if a value matches a filter (null filter = match all). + * @param value Value to check + * @param filter Filter string (null or empty = match all) + * @return true if matches + */ + private static boolean matchesFilter( + final String value, final String filter + ) { + return filter == null || filter.isEmpty() || value.contains(filter); + } + + /** + * Extract username from routing context. + * @param ctx Routing context + * @return Username or "unknown" + */ + private static String extractUsername(final RoutingContext ctx) { + if (ctx.user() != null && ctx.user().principal() != null) { + return ctx.user().principal().getString("sub", "unknown"); + } + return "unknown"; + } +} diff --git a/pantera-main/src/main/java/com/auto1/pantera/auth/CachedLocalEnabledFilter.java b/pantera-main/src/main/java/com/auto1/pantera/auth/CachedLocalEnabledFilter.java new file mode 100644 index 000000000..0f3fa2ffc --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/auth/CachedLocalEnabledFilter.java @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.auth; + +import com.auto1.pantera.cache.CacheInvalidationPubSub; +import com.auto1.pantera.cache.GlobalCacheConfig; +import com.auto1.pantera.cache.ValkeyConnection; +import com.auto1.pantera.http.auth.AuthUser; +import com.auto1.pantera.http.auth.Authentication; +import com.auto1.pantera.http.log.EcsLogger; +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import io.lettuce.core.api.async.RedisAsyncCommands; +import java.time.Duration; +import java.util.Collection; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +/** + * Cached decorator for {@link LocalEnabledFilter} β€” caches the boolean + * {@code enabled} outcome per username in a Caffeine L1 (and optional + * Valkey L2) so the JDBC lookup in {@code LocalEnabledFilter} does not + * fire on every authenticated request (CLI basic auth pulls hit this on + * EVERY request). + * + *

      Only caches the "enabled" dimension β€” never caches failed + * authentication from the delegate. Caching auth failures would be + * DoS-amplifying: a single wrong password could then gate access even + * if the user fixes it within the TTL, and negative credential caching + * can hide password rotations. + * + *

      Flow on {@link #user(String, String)}: + *

        + *
      1. Delegate authentication (password check) runs first. If it + * returns empty, we return empty β€” never touch the cache.
      2. + *
      3. If present, probe L1 by username: {@code Boolean.TRUE} β†’ pass + * through; {@code Boolean.FALSE} β†’ return empty. (L1 cache stores + * the outcome of the enabled check from a previous call.)
      4. + *
      5. On L1 miss, probe L2 (Valkey) with a bounded timeout. Populate + * L1 on hit. Return empty on disabled.
      6. + *
      7. On total miss, since the delegate already authenticated + * successfully ({@code LocalEnabledFilter} would have rejected if + * the user were disabled), treat the outcome as enabled=true. + * Populate L1 and L2.
      8. + *
      + * + *

      {@link #invalidate(String)} drops the entry from L1, DELs L2, and + * broadcasts on pub/sub so peer nodes drop their L1 copies. + * + * @since 2.2.0 + */ +public final class CachedLocalEnabledFilter implements Authentication { + + /** + * Pub/sub / L2 namespace used for this cache's keys and invalidation + * messages. + */ + public static final String NAMESPACE = "auth:enabled"; + + /** + * Inner authentication (typically {@link LocalEnabledFilter}). + * Authenticates credentials AND runs the enabled-check JDBC lookup. + */ + private final Authentication delegate; + + /** + * L1 in-memory cache: username β†’ enabled flag. + */ + private final Cache l1; + + /** + * L2 Valkey async commands, or {@code null} when two-tier disabled. + * {@link ValkeyConnection#async()} returns {@code }. + */ + private final RedisAsyncCommands l2; + + /** + * Whether the L2 (Valkey) tier is enabled. + */ + private final boolean twoTier; + + /** + * Bound on synchronous L2 operations (ms). Misses time out to a + * delegate fallback instead of blocking the auth thread. + */ + private final long l2TimeoutMs; + + /** + * L2 TTL applied on writes, in seconds. + */ + private final long l2TtlSeconds; + + /** + * Pub/sub for cross-instance invalidation; nullable when disabled. + */ + private final CacheInvalidationPubSub pubsub; + + /** + * Ctor. + * @param delegate Inner authentication (typically {@link LocalEnabledFilter}) + * @param cfg Global cache config + * @param valkey Optional Valkey connection for L2 + * @param pubsub Optional pub/sub for cross-instance invalidation + */ + public CachedLocalEnabledFilter( + final Authentication delegate, + final GlobalCacheConfig cfg, + final ValkeyConnection valkey, + final CacheInvalidationPubSub pubsub + ) { + this.delegate = Objects.requireNonNull(delegate, "delegate"); + final GlobalCacheConfig.AuthEnabledConfig ac = + Objects.requireNonNull(cfg, "cfg").authEnabled(); + this.twoTier = valkey != null && ac.l2Enabled(); + this.l2 = this.twoTier ? valkey.async() : null; + this.l2TimeoutMs = ac.l2TimeoutMs(); + this.l2TtlSeconds = ac.l2TtlSeconds(); + this.pubsub = pubsub; + this.l1 = Caffeine.newBuilder() + .maximumSize(ac.l1MaxSize()) + .expireAfterWrite(Duration.ofSeconds(ac.l1TtlSeconds())) + .recordStats() + .build(); + if (pubsub != null) { + pubsub.subscribe(NAMESPACE, key -> this.l1.invalidate(key)); + } + EcsLogger.info("com.auto1.pantera.auth") + .message("CachedLocalEnabledFilter initialized" + + " (twoTier=" + this.twoTier + + ", l1MaxSize=" + ac.l1MaxSize() + + ", l1TtlSeconds=" + ac.l1TtlSeconds() + + ", l2TtlSeconds=" + ac.l2TtlSeconds() + + ", l2TimeoutMs=" + ac.l2TimeoutMs() + ")") + .eventCategory("authentication") + .eventAction("auth_cache_init") + .log(); + } + + @Override + public Optional user(final String username, final String password) { + // 1. Authenticate through delegate FIRST. If the credentials are + // wrong (or the user is disabled and the delegate rejects + // them), return empty immediately β€” never cache auth failures + // (DoS-amplification risk; see class javadoc). + final Optional authed = this.delegate.user(username, password); + if (authed.isEmpty()) { + return authed; + } + if (username == null) { + return authed; + } + final String key = username; + // 2. Probe L1 for the cached enabled-flag outcome. + final Boolean l1hit = this.l1.getIfPresent(key); + if (Boolean.TRUE.equals(l1hit)) { + return authed; + } + if (Boolean.FALSE.equals(l1hit)) { + // Cached disabled β€” reject even though delegate authenticated. + // This guards against a stale enabled-flag decision from a + // non-LocalEnabledFilter layer in the chain. + return Optional.empty(); + } + // 3. L1 miss β€” probe L2 (bounded). + if (this.twoTier) { + try { + final byte[] bytes = this.l2.get(NAMESPACE + ":" + key) + .toCompletableFuture() + .get(this.l2TimeoutMs, TimeUnit.MILLISECONDS); + if (bytes != null && bytes.length >= 1) { + final boolean enabled = bytes[0] != 0; + this.l1.put(key, enabled); + return enabled ? authed : Optional.empty(); + } + } catch (final Exception ex) { + // L2 outage β€” treat as miss, fall through. + EcsLogger.debug("com.auto1.pantera.auth") + .message("L2 probe failed for auth-enabled; falling through") + .eventCategory("database") + .eventAction("cache_l2_probe") + .eventOutcome("failure") + .field("user.name", username) + .error(ex) + .log(); + } + } + // 4. Total miss: delegate returned present, so enabled=true at + // the time of the call. Populate L1 and L2. + this.l1.put(key, Boolean.TRUE); + if (this.twoTier) { + try { + this.l2.setex( + NAMESPACE + ":" + key, + this.l2TtlSeconds, + new byte[] { (byte) 1 } + ); + } catch (final Exception ex) { + EcsLogger.debug("com.auto1.pantera.auth") + .message("L2 write failed for auth-enabled; L1 kept") + .eventCategory("database") + .eventAction("cache_l2_write") + .eventOutcome("failure") + .field("user.name", username) + .error(ex) + .log(); + } + } + return authed; + } + + @Override + public boolean canHandle(final String username) { + return this.delegate.canHandle(username); + } + + @Override + public boolean isAuthoritative(final String username) { + return this.delegate.isAuthoritative(username); + } + + @Override + public Collection userDomains() { + return this.delegate.userDomains(); + } + + /** + * Drop the cached enabled flag for {@code username} in L1 and L2, + * and broadcast the invalidation on pub/sub so peer nodes drop + * their L1 copies. Called by admin flows that mutate the user's + * enabled state (update, enable, disable, delete). + * + * @param username User whose cache entry to drop + */ + public void invalidate(final String username) { + if (username == null) { + return; + } + this.l1.invalidate(username); + if (this.twoTier) { + try { + this.l2.del(NAMESPACE + ":" + username); + } catch (final Exception ex) { + EcsLogger.debug("com.auto1.pantera.auth") + .message("L2 delete failed for auth-enabled") + .eventCategory("database") + .eventAction("cache_l2_delete") + .eventOutcome("failure") + .field("user.name", username) + .error(ex) + .log(); + } + } + if (this.pubsub != null) { + this.pubsub.publish(NAMESPACE, username); + } + } + + @Override + public String toString() { + return String.format( + "%s(size=%d,twoTier=%s),delegate=%s", + this.getClass().getSimpleName(), + this.l1.estimatedSize(), this.twoTier, this.delegate + ); + } +} diff --git a/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownRepository.java b/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownRepository.java index 65bdf7396..4592d49da 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownRepository.java +++ b/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownRepository.java @@ -10,7 +10,7 @@ */ package com.auto1.pantera.cooldown; -import com.auto1.pantera.cooldown.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownReason; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; diff --git a/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownSupport.java b/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownSupport.java index d0b36cc15..8400f41fd 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownSupport.java +++ b/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownSupport.java @@ -10,10 +10,12 @@ */ package com.auto1.pantera.cooldown; -import com.auto1.pantera.cooldown.NoopCooldownService; -import com.auto1.pantera.cooldown.CooldownService; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.impl.NoopCooldownService; import com.auto1.pantera.cooldown.metadata.CooldownMetadataService; -import com.auto1.pantera.cooldown.metadata.CooldownMetadataServiceImpl; +import com.auto1.pantera.cooldown.metadata.MetadataFilterService; import com.auto1.pantera.cooldown.metadata.FilteredMetadataCache; import com.auto1.pantera.cooldown.metadata.FilteredMetadataCacheConfig; import com.auto1.pantera.cooldown.metadata.NoopCooldownMetadataService; @@ -87,6 +89,10 @@ public static CooldownService create(final Settings settings) { } public static CooldownService create(final Settings settings, final Executor executor) { + // Register all adapter bundles (parser/filter/rewriter/detector/responseFactory) + // into the global CooldownAdapterRegistry. This is idempotent and safe to call + // early -- the registry is a ConcurrentHashMap, and adapters are stateless. + CooldownWiring.registerAllAdapters(); return settings.artifactsDatabase() .map(ds -> { // Load DB-persisted cooldown config and apply over YAML defaults. @@ -153,7 +159,7 @@ public static CooldownMetadataService createMetadataService( .eventAction("metadata_service_init") .log(); - final CooldownMetadataServiceImpl metadataService = new CooldownMetadataServiceImpl( + final MetadataFilterService metadataService = new MetadataFilterService( cooldownService, settings.cooldown(), jdbc.cache(), @@ -183,6 +189,20 @@ public static CooldownMetadataService createMetadataService( return metadataService; } + /** + * Extract the CooldownCache from a CooldownService, if it is backed + * by JdbcCooldownService. Returns null for NoopCooldownService. + * + * @param cooldownService The cooldown service + * @return CooldownCache or null + */ + public static CooldownCache extractCache(final CooldownService cooldownService) { + if (cooldownService instanceof JdbcCooldownService) { + return ((JdbcCooldownService) cooldownService).cache(); + } + return null; + } + /** * Load cooldown settings from DB and apply to in-memory CooldownSettings. * DB settings (saved via the UI) take precedence over YAML defaults. diff --git a/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownWiring.java b/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownWiring.java new file mode 100644 index 000000000..d82a00617 --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/cooldown/CooldownWiring.java @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown; + +import com.auto1.pantera.composer.cooldown.ComposerCooldownResponseFactory; +import com.auto1.pantera.composer.cooldown.ComposerMetadataFilter; +import com.auto1.pantera.composer.cooldown.ComposerMetadataParser; +import com.auto1.pantera.composer.cooldown.ComposerMetadataRequestDetector; +import com.auto1.pantera.composer.cooldown.ComposerMetadataRewriter; +import com.auto1.pantera.cooldown.config.CooldownAdapterBundle; +import com.auto1.pantera.cooldown.config.CooldownAdapterRegistry; +import com.auto1.pantera.cooldown.response.CooldownResponseRegistry; +import com.auto1.pantera.docker.cooldown.DockerCooldownResponseFactory; +import com.auto1.pantera.docker.cooldown.DockerMetadataFilter; +import com.auto1.pantera.docker.cooldown.DockerMetadataParser; +import com.auto1.pantera.docker.cooldown.DockerMetadataRequestDetector; +import com.auto1.pantera.docker.cooldown.DockerMetadataRewriter; +import com.auto1.pantera.http.cooldown.GoCooldownResponseFactory; +import com.auto1.pantera.http.cooldown.GoMetadataFilter; +import com.auto1.pantera.http.cooldown.GoMetadataParser; +import com.auto1.pantera.http.cooldown.GoMetadataRequestDetector; +import com.auto1.pantera.http.cooldown.GoMetadataRewriter; +import com.auto1.pantera.http.log.EcsLogger; +import com.auto1.pantera.maven.cooldown.MavenCooldownResponseFactory; +import com.auto1.pantera.maven.cooldown.MavenMetadataFilter; +import com.auto1.pantera.maven.cooldown.MavenMetadataParser; +import com.auto1.pantera.maven.cooldown.MavenMetadataRequestDetector; +import com.auto1.pantera.maven.cooldown.MavenMetadataRewriter; +import com.auto1.pantera.npm.cooldown.NpmCooldownResponseFactory; +import com.auto1.pantera.npm.cooldown.NpmMetadataFilter; +import com.auto1.pantera.npm.cooldown.NpmMetadataParser; +import com.auto1.pantera.npm.cooldown.NpmMetadataRequestDetector; +import com.auto1.pantera.npm.cooldown.NpmMetadataRewriter; +import com.auto1.pantera.pypi.cooldown.PypiCooldownResponseFactory; +import com.auto1.pantera.pypi.cooldown.PypiMetadataFilter; +import com.auto1.pantera.pypi.cooldown.PypiMetadataParser; +import com.auto1.pantera.pypi.cooldown.PypiMetadataRequestDetector; +import com.auto1.pantera.pypi.cooldown.PypiMetadataRewriter; + +/** + * Registers all per-adapter cooldown component bundles at startup. + * + *

      Called once from {@link CooldownSupport#wireAdapters()} during + * application initialization. Each bundle groups the parser, filter, + * rewriter, metadata-request detector, and 403-response factory for + * a single repository type. The proxy layer ({@code BaseCachedProxySlice}) + * looks up the bundle by repo type at request time.

      + * + *

      Adapter mapping:

      + *
        + *
      • maven, gradle-proxy, maven-proxy → Maven bundle
      • + *
      • npm, npm-proxy → npm bundle (no detector — npm uses its own path)
      • + *
      • pypi, pypi-proxy → PyPI bundle
      • + *
      • docker, docker-proxy → Docker bundle
      • + *
      • go, go-proxy → Go bundle
      • + *
      • php, php-proxy → Composer bundle
      • + *
      • gradle → reuses Maven bundle
      • + *
      + * + * @since 2.2.0 + */ +public final class CooldownWiring { + + private CooldownWiring() { + } + + /** + * Register all adapter bundles into the global registries. + * Both {@link CooldownAdapterRegistry} (full bundles for metadata routing) + * and {@link CooldownResponseRegistry} (403 factories for direct-artifact blocks) + * are populated. + */ + @SuppressWarnings("PMD.ExcessiveMethodLength") + public static void registerAllAdapters() { + final CooldownAdapterRegistry adapters = CooldownAdapterRegistry.instance(); + final CooldownResponseRegistry responses = CooldownResponseRegistry.instance(); + + // --- Maven (+ Gradle alias) --- + final var mavenBundle = new CooldownAdapterBundle<>( + new MavenMetadataParser(), + new MavenMetadataFilter(), + new MavenMetadataRewriter(), + new MavenMetadataRequestDetector(), + new MavenCooldownResponseFactory() + ); + adapters.register("maven", mavenBundle); + adapters.register("maven-proxy", mavenBundle); + adapters.register("gradle", mavenBundle); + adapters.register("gradle-proxy", mavenBundle); + responses.register(new MavenCooldownResponseFactory(), "gradle", "gradle-proxy", "maven-proxy"); + + // --- npm --- + // npm has its own metadata filtering path in DownloadPackageSlice, + // but we still register the bundle so BaseCachedProxySlice can use it + // for any future unification, and so the 403 factory is available. + final var npmBundle = new CooldownAdapterBundle<>( + new NpmMetadataParser(), + new NpmMetadataFilter(), + new NpmMetadataRewriter(), + new NpmMetadataRequestDetector(), + new NpmCooldownResponseFactory() + ); + adapters.register("npm", npmBundle); + adapters.register("npm-proxy", npmBundle); + responses.register(new NpmCooldownResponseFactory(), "npm-proxy"); + + // --- PyPI --- + final var pypiBundle = new CooldownAdapterBundle<>( + new PypiMetadataParser(), + new PypiMetadataFilter(), + new PypiMetadataRewriter(), + new PypiMetadataRequestDetector(), + new PypiCooldownResponseFactory() + ); + adapters.register("pypi", pypiBundle); + adapters.register("pypi-proxy", pypiBundle); + responses.register(new PypiCooldownResponseFactory(), "pypi-proxy"); + + // --- Docker --- + final var dockerBundle = new CooldownAdapterBundle<>( + new DockerMetadataParser(), + new DockerMetadataFilter(), + new DockerMetadataRewriter(), + new DockerMetadataRequestDetector(), + new DockerCooldownResponseFactory() + ); + adapters.register("docker", dockerBundle); + adapters.register("docker-proxy", dockerBundle); + responses.register(new DockerCooldownResponseFactory(), "docker-proxy"); + + // --- Go --- + final var goBundle = new CooldownAdapterBundle<>( + new GoMetadataParser(), + new GoMetadataFilter(), + new GoMetadataRewriter(), + new GoMetadataRequestDetector(), + new GoCooldownResponseFactory() + ); + adapters.register("go", goBundle); + adapters.register("go-proxy", goBundle); + responses.register(new GoCooldownResponseFactory(), "go-proxy"); + + // --- Composer (PHP) --- + final var composerBundle = new CooldownAdapterBundle<>( + new ComposerMetadataParser(), + new ComposerMetadataFilter(), + new ComposerMetadataRewriter(), + new ComposerMetadataRequestDetector(), + new ComposerCooldownResponseFactory() + ); + // ComposerCooldownResponseFactory.repoType() returns "composer" for its + // own canonical key; register "php" and "php-proxy" as aliases so the + // Composer proxy slices (which use repo type "php") resolve correctly. + responses.register(new ComposerCooldownResponseFactory(), "php", "php-proxy"); + adapters.register("php", composerBundle); + adapters.register("php-proxy", composerBundle); + + EcsLogger.info("com.auto1.pantera.cooldown") + .message("Registered cooldown adapter bundles: " + adapters.registeredTypes()) + .eventCategory("configuration") + .eventAction("adapter_wiring") + .eventOutcome("success") + .field("adapter.count", adapters.registeredTypes().size()) + .field("response_factory.count", responses.registeredTypes().size()) + .log(); + } +} diff --git a/pantera-main/src/main/java/com/auto1/pantera/cooldown/DbBlockRecord.java b/pantera-main/src/main/java/com/auto1/pantera/cooldown/DbBlockRecord.java index 657f4b236..0083e4f48 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/cooldown/DbBlockRecord.java +++ b/pantera-main/src/main/java/com/auto1/pantera/cooldown/DbBlockRecord.java @@ -10,7 +10,7 @@ */ package com.auto1.pantera.cooldown; -import com.auto1.pantera.cooldown.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownReason; import java.time.Instant; import java.util.Optional; diff --git a/pantera-main/src/main/java/com/auto1/pantera/cooldown/JdbcCooldownService.java b/pantera-main/src/main/java/com/auto1/pantera/cooldown/JdbcCooldownService.java index 04a444a65..ff3ffec09 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/cooldown/JdbcCooldownService.java +++ b/pantera-main/src/main/java/com/auto1/pantera/cooldown/JdbcCooldownService.java @@ -9,6 +9,19 @@ * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. */ package com.auto1.pantera.cooldown; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownCircuitBreaker; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.config.InspectorRegistry; +import com.auto1.pantera.cooldown.metrics.CooldownMetrics; +import com.auto1.pantera.http.log.EcsLogger; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; @@ -21,9 +34,7 @@ import java.util.concurrent.Executor; import java.util.concurrent.ForkJoinPool; import java.util.stream.Collectors; -import com.auto1.pantera.cooldown.metrics.CooldownMetrics; -import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; + final class JdbcCooldownService implements CooldownService { @@ -88,14 +99,15 @@ interface OnBlockRemoved { ) { this.settings = Objects.requireNonNull(settings); this.repository = Objects.requireNonNull(repository); - this.executor = Objects.requireNonNull(executor); + this.executor = com.auto1.pantera.http.context.ContextualExecutor + .contextualize(Objects.requireNonNull(executor)); this.cache = Objects.requireNonNull(cache); this.circuitBreaker = Objects.requireNonNull(circuitBreaker); } /** * Get the cooldown cache instance. - * Used by CooldownMetadataServiceImpl for cache sharing. + * Used by MetadataFilterService for cache sharing. * @return CooldownCache instance */ public CooldownCache cache() { @@ -260,7 +272,7 @@ public CompletableFuture evaluate( request.artifact(), request.version(), () -> this.evaluateFromDatabase(request, inspector) - ).thenCompose(MdcPropagation.withMdc(blocked -> { + ).thenCompose(blocked -> { if (blocked) { EcsLogger.info("com.auto1.pantera.cooldown") .message("Artifact BLOCKED by cooldown (cache/db)") @@ -288,7 +300,7 @@ public CompletableFuture evaluate( this.recordVersionAllowedMetric(request.repoType(), request.repoName()); return CompletableFuture.completedFuture(CooldownResult.allowed()); } - })).whenComplete(MdcPropagation.withMdcBiConsumer((result, error) -> { + }).whenComplete((result, error) -> { if (error != null) { this.circuitBreaker.recordFailure(); EcsLogger.error("com.auto1.pantera.cooldown") @@ -303,7 +315,7 @@ public CompletableFuture evaluate( } else { this.circuitBreaker.recordSuccess(); } - })); + }); } @Override @@ -380,7 +392,7 @@ private CompletableFuture evaluateFromDatabase( // Step 1: Check database for existing block (async) return CompletableFuture.supplyAsync(() -> { return this.checkExistingBlockWithTimestamp(request); - }, this.executor).thenCompose(MdcPropagation.withMdc(result -> { + }, this.executor).thenCompose(result -> { if (result.isPresent()) { final BlockCacheEntry entry = result.get(); EcsLogger.debug("com.auto1.pantera.cooldown") @@ -402,9 +414,9 @@ private CompletableFuture evaluateFromDatabase( } // Step 2: No existing block - check if artifact should be blocked return this.checkNewArtifactAndCache(request, inspector); - })); + }); } - + /** * Get full block result with details from database. * Only called when cache says artifact is blocked. @@ -520,7 +532,7 @@ private CompletableFuture checkNewArtifactAndCache( // Async fetch release date with timeout to prevent hanging return inspector.releaseDate(request.artifact(), request.version()) .orTimeout(5, java.util.concurrent.TimeUnit.SECONDS) - .exceptionally(MdcPropagation.>withMdcFunction(error -> { + .exceptionally(error -> { EcsLogger.warn("com.auto1.pantera.cooldown") .message("Failed to fetch release date (allowing)") .eventCategory("database") @@ -531,10 +543,10 @@ private CompletableFuture checkNewArtifactAndCache( .field("error.message", error.getMessage()) .log(); return Optional.empty(); - })) - .thenCompose(MdcPropagation.withMdc(release -> { + }) + .thenCompose(release -> { return this.shouldBlockNewArtifact(request, inspector, release); - })); + }); } /** @@ -598,13 +610,13 @@ private CompletableFuture shouldBlockNewArtifact( .log(); // Create block in database (async) return this.createBlockInDatabase(request, CooldownReason.FRESH_RELEASE, until) - .thenApply(MdcPropagation.withMdcFunction(success -> { + .thenApply(success -> { // Cache as blocked with dynamic TTL (until block expires) this.cache.putBlocked(request.repoName(), request.artifact(), request.version(), until); return true; - })) - .exceptionally(MdcPropagation.withMdcFunction(error -> { + }) + .exceptionally(error -> { EcsLogger.error("com.auto1.pantera.cooldown") .message("Failed to create block (blocking anyway)") .eventCategory("database") @@ -618,7 +630,7 @@ private CompletableFuture shouldBlockNewArtifact( this.cache.putBlocked(request.repoName(), request.artifact(), request.version(), until); return true; - })); + }); } EcsLogger.debug("com.auto1.pantera.cooldown") @@ -664,11 +676,11 @@ private CompletableFuture createBlockInDatabase( installedBy ); return true; - }, this.executor).thenApply(MdcPropagation.withMdcFunction(result -> { + }, this.executor).thenApply(result -> { // Increment active blocks metric (O(1), no DB query) this.incrementActiveBlocksMetric(request.repoType(), request.repoName()); return result; - })); + }); } /** @@ -732,7 +744,7 @@ private void expire(final DbBlockRecord record, final Instant when) { .log(); } // Invalidate inspector cache (same as unblockSingle does) - com.auto1.pantera.cooldown.InspectorRegistry.instance() + InspectorRegistry.instance() .invalidate(record.repoType(), record.repoName(), record.artifact(), record.version()); } @@ -748,7 +760,7 @@ private void unblockSingle( record.ifPresent(value -> this.release(value, actor, Instant.now())); // Invalidate inspector cache (works for all adapters: Docker, NPM, PyPI, etc.) - com.auto1.pantera.cooldown.InspectorRegistry.instance() + InspectorRegistry.instance() .invalidate(repoType, repoName, artifact, version); } @@ -779,7 +791,7 @@ private int unblockAllBlocking( // Single bulk DELETE instead of N individual updates final int count = this.repository.deleteActiveBlocksForRepo(repoType, repoName); // Clear inspector cache (works for all adapters: Docker, NPM, PyPI, etc.) - com.auto1.pantera.cooldown.InspectorRegistry.instance() + InspectorRegistry.instance() .clearAll(repoType, repoName); return count; } diff --git a/pantera-main/src/main/java/com/auto1/pantera/cooldown/YamlCooldownSettings.java b/pantera-main/src/main/java/com/auto1/pantera/cooldown/YamlCooldownSettings.java index ea1302465..4e51d4621 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/cooldown/YamlCooldownSettings.java +++ b/pantera-main/src/main/java/com/auto1/pantera/cooldown/YamlCooldownSettings.java @@ -11,7 +11,8 @@ package com.auto1.pantera.cooldown; import com.amihaiemil.eoyaml.YamlMapping; -import com.auto1.pantera.cooldown.CooldownSettings.RepoTypeConfig; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.config.CooldownSettings.RepoTypeConfig; import java.time.Duration; import java.util.HashMap; import java.util.Locale; diff --git a/pantera-main/src/main/java/com/auto1/pantera/db/ArtifactDbFactory.java b/pantera-main/src/main/java/com/auto1/pantera/db/ArtifactDbFactory.java index b4b569075..360cd572d 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/db/ArtifactDbFactory.java +++ b/pantera-main/src/main/java/com/auto1/pantera/db/ArtifactDbFactory.java @@ -234,7 +234,11 @@ private DataSource initializeWithOverrides( hikariConfig.setMaximumPoolSize(maxSize); hikariConfig.setMinimumIdle(minIdle); hikariConfig.setConnectionTimeout( - ConfigDefaults.getLong("PANTERA_DB_CONNECTION_TIMEOUT_MS", 5000L) + // Fail-fast: 3s instead of 5s so that upstream Hikari timeouts + // propagate as 503 / queue-pressure signals before Vert.x + // request timeouts (30s default) or the client's own deadline. + // Operators can raise via PANTERA_DB_CONNECTION_TIMEOUT_MS. + ConfigDefaults.getLong("PANTERA_DB_CONNECTION_TIMEOUT_MS", 3000L) ); hikariConfig.setIdleTimeout( ConfigDefaults.getLong("PANTERA_DB_IDLE_TIMEOUT_MS", 600_000L) @@ -244,7 +248,12 @@ private DataSource initializeWithOverrides( ); hikariConfig.setPoolName(poolName); hikariConfig.setLeakDetectionThreshold( - ConfigDefaults.getLong("PANTERA_DB_LEAK_DETECTION_MS", 300000) + // Fail-fast: 5s instead of 300s so any leaked connection + // surfaces loudly in logs as a leak WARN rather than + // silently rotting the pool. Canary rollouts may raise to + // 30s initially via PANTERA_DB_LEAK_DETECTION_MS and drop + // back to the default once observed WARNs go to zero. + ConfigDefaults.getLong("PANTERA_DB_LEAK_DETECTION_MS", 5_000L) ); hikariConfig.setRegisterMbeans(true); diff --git a/pantera-main/src/main/java/com/auto1/pantera/group/ArtifactNameParser.java b/pantera-main/src/main/java/com/auto1/pantera/group/ArtifactNameParser.java index 06eaa4610..3d4dbe6a1 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/group/ArtifactNameParser.java +++ b/pantera-main/src/main/java/com/auto1/pantera/group/ArtifactNameParser.java @@ -17,7 +17,7 @@ /** * Extracts the artifact name from a raw URL path based on the repository type. * Each adapter stores artifacts with a specific {@code name} format in the DB. - * This parser reverses the URL path back to that format so GroupSlice can do + * This parser reverses the URL path back to that format so GroupResolver can do * an indexed lookup via {@code WHERE name = ?} instead of expensive fan-out. * * @since 1.21.0 diff --git a/pantera-main/src/main/java/com/auto1/pantera/group/GroupMetadataCache.java b/pantera-main/src/main/java/com/auto1/pantera/group/GroupMetadataCache.java index 296ac4342..3f6ef7039 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/group/GroupMetadataCache.java +++ b/pantera-main/src/main/java/com/auto1/pantera/group/GroupMetadataCache.java @@ -14,28 +14,35 @@ import com.auto1.pantera.cache.ValkeyConnection; import com.github.benmanes.caffeine.cache.Cache; import com.github.benmanes.caffeine.cache.Caffeine; +import io.lettuce.core.SetArgs; import io.lettuce.core.api.async.RedisAsyncCommands; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.Optional; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; /** * Two-tier cache for Maven group merged metadata with configurable TTL. - * + * *

      Key format: {@code maven:group:metadata:{group_name}:{path}}

      - * + * *

      Architecture:

      *
        - *
      • L1 (Caffeine): Fast in-memory cache, short TTL when L2 enabled
      • - *
      • L2 (Valkey/Redis): Distributed cache, full TTL
      • + *
      • L1 (Caffeine): Fast in-memory primary cache, short TTL when L2 enabled
      • + *
      • L2 (Valkey/Redis): Distributed primary cache, full TTL
      • + *
      • Stale L1 (Caffeine): Last-known-good, long TTL, bounded size
      • + *
      • Stale L2 (Valkey/Redis): Last-known-good distributed, key + * {@code maven:group:metadata:stale:{group_name}:{path}}
      • *
      * + *

      Design principle for the STALE tier: it is an AID, never a BREAKER. + * Under realistic cardinality no eviction ever fires. Bounds are a + * JVM-memory safety net against pathological growth β€” not an expiry + * mechanism. {@link #getStaleWithFallback} degrades gracefully: + * stale-L1 → stale-L2 → expired-primary-L1 → miss. + * * @since 1.0 */ public final class GroupMetadataCache { @@ -51,22 +58,22 @@ public final class GroupMetadataCache { private static final int DEFAULT_MAX_SIZE = 1000; /** - * L1 cache (in-memory). + * L1 cache (in-memory) β€” PRIMARY tier. */ private final Cache l1Cache; /** - * L2 cache (Valkey/Redis), may be null. + * L2 cache (Valkey/Redis), may be null β€” PRIMARY tier. */ private final RedisAsyncCommands l2; /** - * Whether two-tier caching is enabled. + * Whether two-tier caching is enabled (primary). */ private final boolean twoTier; /** - * TTL for cached metadata. + * TTL for cached metadata (primary). */ private final Duration ttl; @@ -76,11 +83,32 @@ public final class GroupMetadataCache { private final String groupName; /** - * Last-known-good metadata (never expires). - * Populated on every successful put(), survives L1/L2 invalidation/expiry. - * Used as stale fallback when upstream is unreachable. + * Stale L1 β€” last-known-good in-memory. Long TTL (30d default), + * bounded size as a JVM-memory safety net only. + */ + private final Cache lastKnownGoodL1; + + /** + * Whether stale two-tier caching is enabled. */ - private final ConcurrentMap lastKnownGood; + private final boolean staleTwoTier; + + /** + * Stale L2 β€” last-known-good in Valkey, may be null. + * Uses the same shared connection pool as the primary L2; keys are + * namespaced with a {@code stale:} segment to avoid collision. + */ + private final RedisAsyncCommands staleL2; + + /** + * Timeout for stale L2 reads. + */ + private final Duration staleL2Timeout; + + /** + * Stale L2 TTL in seconds. {@code 0} = no TTL (rely on Valkey LRU). + */ + private final long staleL2TtlSeconds; /** * Create group metadata cache with defaults. @@ -123,17 +151,39 @@ public GroupMetadataCache( .expireAfterWrite(l1Ttl.toMillis(), TimeUnit.MILLISECONDS) .recordStats() .build(); - this.lastKnownGood = new ConcurrentHashMap<>(); + + // ------------------------------------------------------------- + // Stale (last-known-good) tier β€” aid, not breaker. + // ------------------------------------------------------------- + final GlobalCacheConfig.GroupMetadataStaleConfig sc = + GlobalCacheConfig.getInstance().groupMetadataStale(); + this.staleTwoTier = sc.l2Enabled() && actualValkey != null; + this.lastKnownGoodL1 = Caffeine.newBuilder() + .maximumSize(sc.l1MaxSize()) + .expireAfterWrite(Duration.ofSeconds(sc.l1TtlSeconds())) + .recordStats() + .build(); + this.staleL2 = this.staleTwoTier ? actualValkey.async() : null; + this.staleL2Timeout = Duration.ofMillis(sc.l2TimeoutMs()); + this.staleL2TtlSeconds = sc.l2TtlSeconds(); } /** - * Build L2 cache key. - * Format: maven:group:metadata:{group_name}:{path} + * Build primary L2 cache key. + * Format: {@code maven:group:metadata:{group_name}:{path}} */ private String buildL2Key(final String path) { return "maven:group:metadata:" + this.groupName + ":" + path; } + /** + * Build stale L2 cache key. + * Format: {@code maven:group:metadata:stale:{group_name}:{path}} + */ + private String buildStaleL2Key(final String path) { + return "maven:group:metadata:stale:" + this.groupName + ":" + path; + } + /** * Get cached metadata (checks L1, then L2 if miss). * @param path Metadata path @@ -172,35 +222,111 @@ public CompletableFuture> get(final String path) { } /** - * Get stale (last-known-good) metadata. This data never expires and is - * populated on every successful {@link #put}. Use as fallback when all - * group members are unreachable and the primary cache has expired. + * Get stale (last-known-good) metadata with graceful 3-step fallback: + * stale-L1 → stale-L2 → expired-primary-L1 → miss. + * + *

      This path is the BREAKER's fallback (the "aid") and never throws. * @param path Metadata path - * @return Optional containing last-known-good bytes, or empty if never cached + * @return Optional containing last-known-good bytes, or empty if not found */ - public CompletableFuture> getStale(final String path) { - final byte[] data = this.lastKnownGood.get(path); - if (data != null) { - recordCacheHit("lkg"); - return CompletableFuture.completedFuture(Optional.of(data)); + public CompletableFuture> getStaleWithFallback(final String path) { + // 1. Stale L1 + final byte[] l1 = this.lastKnownGoodL1.getIfPresent(path); + if (l1 != null) { + recordStaleServedFrom("l1"); + return CompletableFuture.completedFuture(Optional.of(l1)); + } + // 2. Stale L2 (bounded by staleL2Timeout, fully defensive) + final CompletableFuture> l2Future; + if (this.staleTwoTier) { + l2Future = this.staleL2.get(buildStaleL2Key(path)) + .toCompletableFuture() + .orTimeout(this.staleL2Timeout.toMillis(), TimeUnit.MILLISECONDS) + .exceptionally(err -> null) + .thenApply(b -> b != null && b.length > 0 + ? Optional.of(b) + : Optional.empty()); + } else { + l2Future = CompletableFuture.completedFuture(Optional.empty()); } - recordCacheMiss("lkg"); - return CompletableFuture.completedFuture(Optional.empty()); + return l2Future.thenApply(l2hit -> { + if (l2hit.isPresent()) { + // Promote stale-L2 to stale-L1 so subsequent reads are local. + this.lastKnownGoodL1.put(path, l2hit.get()); + recordStaleServedFrom("l2"); + return l2hit; + } + // 3. Last resort: expired primary-cache entry (peek past TTL) + final byte[] expired = peekExpiredPrimary(path); + if (expired != null) { + recordStaleServedFrom("expired-primary"); + return Optional.of(expired); + } + recordStaleServedFrom("miss"); + return Optional.empty(); + }); } /** - * Put metadata in cache (both L1 and L2). + * Backward-compatible alias for {@link #getStaleWithFallback(String)}. + * + * @param path Metadata path + * @return Optional containing last-known-good bytes, or empty if not found + * @deprecated Use {@link #getStaleWithFallback(String)} β€” this alias + * exists to keep existing call sites compiling across the + * 2-tier-stale migration and will be removed in a future release. + */ + @Deprecated + public CompletableFuture> getStale(final String path) { + return getStaleWithFallback(path); + } + + /** + * Peek the primary L1 cache past its TTL. Caffeine's + * {@code getIfPresent} drops expired entries, but {@code asMap().get()} + * returns entries that are technically past their write TTL but have + * not yet been swept by Caffeine's cleanup thread. This is documented + * as a "close-enough" last-resort fallback for the stale path β€” see + * {@code docs/superpowers/plans/2026-04-19-v2.2-production-readiness-A-H.md} + * Group C. We did NOT use {@code Policy.getIfPresentQuietly} because its + * expiration semantics on 3.2.3 are not guaranteed to return already- + * expired entries; {@code asMap} is the explicit, well-known workaround. + * + * @param path Metadata path + * @return Raw bytes if still present in the primary map (even if past + * TTL), or {@code null} + */ + private byte[] peekExpiredPrimary(final String path) { + final CachedMetadata cached = this.l1Cache.asMap().get(path); + return cached != null ? cached.data : null; + } + + /** + * Put metadata in cache (both primary L1+L2 and stale L1+L2). * @param path Metadata path * @param data Metadata bytes */ public void put(final String path, final byte[] data) { - // Always update last-known-good (never expires) - this.lastKnownGood.put(path, data); - // Put in L1 + // Always update last-known-good (stale tier). + this.lastKnownGoodL1.put(path, data); + if (this.staleTwoTier) { + final String staleKey = buildStaleL2Key(path); + if (this.staleL2TtlSeconds > 0) { + this.staleL2.set( + staleKey, + data, + SetArgs.Builder.ex(this.staleL2TtlSeconds) + ); + } else { + // 0 = no TTL, rely on Valkey LRU + this.staleL2.set(staleKey, data); + } + } + // Primary L1 final CachedMetadata entry = new CachedMetadata(data, Instant.now()); this.l1Cache.put(path, entry); - // Put in L2 if available + // Primary L2 if available if (this.twoTier) { final String l2Key = buildL2Key(path); this.l2.setex(l2Key, this.ttl.getSeconds(), data); @@ -208,7 +334,9 @@ public void put(final String path, final byte[] data) { } /** - * Invalidate cached metadata. + * Invalidate cached metadata in the PRIMARY tier only. + * The stale (last-known-good) tier is deliberately preserved so + * callers can still serve fallback after primary invalidation. * @param path Metadata path */ public void invalidate(final String path) { @@ -245,6 +373,21 @@ private void recordCacheMiss(final String tier) { } } + /** + * Record which tier served a stale fallback read. + * Values: {@code l1}, {@code l2}, {@code expired-primary}, {@code miss}. + * Reuses the existing cache-requests counter surface so no new + * Micrometer meter needs to be registered; tier labels are prefixed + * with {@code stale-} to disambiguate from primary tiers. + */ + private void recordStaleServedFrom(final String tier) { + if ("miss".equals(tier)) { + recordCacheMiss("stale-" + tier); + } else { + recordCacheHit("stale-" + tier); + } + } + /** * Get L1 cache size. * @return Estimated number of entries @@ -266,4 +409,3 @@ public boolean isTwoTier() { */ private record CachedMetadata(byte[] data, Instant cachedAt) { } } - diff --git a/pantera-main/src/main/java/com/auto1/pantera/group/GroupResolver.java b/pantera-main/src/main/java/com/auto1/pantera/group/GroupResolver.java new file mode 100644 index 000000000..07bd8c7b5 --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/group/GroupResolver.java @@ -0,0 +1,1059 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.group; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.context.ContextualExecutor; +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.FaultTranslator; +import com.auto1.pantera.http.fault.Result; +import com.auto1.pantera.http.headers.Header; +import com.auto1.pantera.http.log.EcsLogger; +import com.auto1.pantera.http.resilience.SingleFlight; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.http.slice.EcsLoggingSlice; +import com.auto1.pantera.http.slice.KeyFromPath; +import com.auto1.pantera.index.ArtifactIndex; +import com.auto1.pantera.index.IndexOutcome; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CancellationException; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; + +import com.auto1.pantera.http.timeout.AutoBlockRegistry; + +/** + * Group resolution engine implementing the 5-path decision tree from + * {@code docs/analysis/v2.2-target-architecture.md} section 2. + * + *

      Canonical group-resolution layer (replaces the legacy GroupSlice, + * removed in v2.2.0). Wires together: + *

        + *
      • {@link Fault} + {@link Result} (WI-01) for typed error paths
      • + *
      • {@link FaultTranslator} (WI-01) as the single HTTP-status site
      • + *
      • {@link SingleFlight} (WI-05) for proxy fanout coalescing
      • + *
      • {@link IndexOutcome} for typed index results
      • + *
      • {@link NegativeCache} for 404 caching
      • + *
      + * + *

      Decision tree

      + *
      + * 1. NegativeCache.isKnown404(groupScope, type, name, ver)
      + *      hit  -> 404 [PATH A]
      + *      miss -> step 2
      + * 2. ArtifactIndex.locateByName(name)
      + *      DBFailure/Timeout -> Fault.IndexUnavailable -> 500 [PATH B]
      + *      Hit -> targeted storage read [step 3]
      + *      Miss -> proxy fanout [step 3']
      + * 3. StorageRead -> 2xx [PATH OK]
      + *      NotFound (TOCTOU) -> fall through to step 3'
      + *      StorageFault -> Fault.StorageUnavailable -> 500 [PATH B]
      + * 3'. Proxy fanout (only if group has proxy members)
      + *      no proxies -> cache negative + 404 [PATH A]
      + *      first 2xx  -> stream + cancel + drain [PATH OK]
      + *      all 404    -> cache negative + 404 [PATH A]
      + *      any 5xx, no 2xx -> Fault.AllProxiesFailed [PATH B -> pass-through]
      + * 4. FaultTranslator.translate(result, ctx) [single translation site]
      + * 
      + * + *

      Key behaviour characteristics

      + *
        + *
      • TOCTOU fallthrough (A11 fix): Index hit + targeted member 404 + * falls through to proxy fanout instead of returning 500.
      • + *
      • AllProxiesFailed pass-through: All proxy 5xx returns the best-ranked + * upstream response verbatim via {@link FaultTranslator}.
      • + *
      • Typed index errors: DB error returns {@link Fault.IndexUnavailable} + * instead of silently falling through to full fanout.
      • + *
      + * + * @since 2.2.0 + */ +@SuppressWarnings({"PMD.TooManyMethods", "PMD.GodClass"}) +public final class GroupResolver implements Slice { + + private final String group; + private final List members; + private final List routingRules; + private final Optional artifactIndex; + private final String repoType; + private final Set proxyMembers; + private final NegativeCache negativeCache; + private final SingleFlight inFlightFanouts; + private final java.util.concurrent.Executor drainExecutor; + + /** + * Full constructor. + * + * @param group Group repository name + * @param members Flattened member slices with circuit breakers + * @param routingRules Routing rules for path-based member selection + * @param artifactIndex Optional artifact index for O(log n) lookups + * @param repoType Repository type for name parsing + * @param proxyMembers Names of proxy repository members + * @param negativeCache Negative cache for 404 results + * @param drainExecutor Per-repo drain executor from {@link com.auto1.pantera.http.resilience.RepoBulkhead} + */ + @SuppressWarnings("PMD.ExcessiveParameterList") + public GroupResolver( + final String group, + final List members, + final List routingRules, + final Optional artifactIndex, + final String repoType, + final Set proxyMembers, + final NegativeCache negativeCache, + final java.util.concurrent.Executor drainExecutor + ) { + this.group = Objects.requireNonNull(group, "group"); + this.members = Objects.requireNonNull(members, "members"); + this.routingRules = routingRules != null ? routingRules : Collections.emptyList(); + this.artifactIndex = artifactIndex != null ? artifactIndex : Optional.empty(); + this.repoType = repoType != null ? repoType : ""; + this.proxyMembers = proxyMembers != null ? proxyMembers : Collections.emptySet(); + this.negativeCache = Objects.requireNonNull(negativeCache, "negativeCache"); + this.drainExecutor = Objects.requireNonNull(drainExecutor, "drainExecutor"); + this.inFlightFanouts = new SingleFlight<>( + Duration.ofMinutes(5), + 10_000, + ContextualExecutor.contextualize(ForkJoinPool.commonPool()) + ); + } + + /** + * Wiring-site-friendly constructor. + * + *

      Accepts member repository names and builds the + * {@link MemberSlice} list inline via {@code resolver.slice(...)} so that + * call-sites in {@code RepositorySlices} do not need to duplicate the + * member-wrapping logic. Delegates to the member-accepting constructor + * above. + * + *

      The {@code depth} parameter is accepted for API compatibility but + * ignored (group nesting is resolved upstream). + * + * @param resolver Slice resolver/cache used to materialize member slices + * @param group Group repository name + * @param memberNames Member repository names (deduplicated, order preserved) + * @param port Server port passed to the slice resolver + * @param depth Nesting depth (accepted and ignored for API compat) + * @param timeoutSeconds Timeout hint (unused here, preserved for API compat) + * @param routingRules Routing rules for path-based member selection + * @param artifactIndex Optional artifact index for O(log n) lookups + * @param proxyMembers Names of proxy repository members + * @param repoType Repository type for name parsing + * @param negativeCache Pre-constructed negative cache + * @param registrySupplier Function mapping member name to its shared + * {@link AutoBlockRegistry} (may be {@code null}) + * @param repoDrainExecutor Per-repo drain executor + */ + @SuppressWarnings("PMD.ExcessiveParameterList") + public GroupResolver( + final SliceResolver resolver, + final String group, + final List memberNames, + final int port, + final int depth, + final long timeoutSeconds, + final List routingRules, + final Optional artifactIndex, + final Set proxyMembers, + final String repoType, + final NegativeCache negativeCache, + final Function registrySupplier, + final java.util.concurrent.Executor repoDrainExecutor + ) { + this( + group, + buildMembers(resolver, memberNames, port, proxyMembers, registrySupplier), + routingRules, + artifactIndex, + repoType, + proxyMembers, + negativeCache, + repoDrainExecutor + ); + } + + /** + * Build the flattened {@link MemberSlice} list from member names: + * deduplicate preserving order, then wrap each name with either the + * shared-registry 4-arg {@link MemberSlice} constructor (when the supplier + * returns non-null) or the 3-arg variant (when the supplier is null or + * returns null). + */ + private static List buildMembers( + final SliceResolver resolver, + final List memberNames, + final int port, + final Set proxyMembers, + final Function registrySupplier + ) { + final Set safeProxies = proxyMembers != null + ? proxyMembers : Collections.emptySet(); + final Function supplier = + registrySupplier != null ? registrySupplier : n -> null; + final List out = new ArrayList<>(); + for (final String name : new LinkedHashSet<>(memberNames)) { + final AutoBlockRegistry reg = supplier.apply(name); + if (reg != null) { + out.add(new MemberSlice( + name, + resolver.slice(new Key.From(name), port, 0), + reg, + safeProxies.contains(name) + )); + } else { + out.add(new MemberSlice( + name, + resolver.slice(new Key.From(name), port, 0), + safeProxies.contains(name) + )); + } + } + return out; + } + + @Override + public CompletableFuture response( + final RequestLine line, + final Headers headers, + final Content body + ) { + final String method = line.method().value(); + final String path = line.uri().getPath(); + + final boolean isReadOperation = "GET".equals(method) || "HEAD".equals(method); + final boolean isNpmAudit = "POST".equals(method) && path.contains("/-/npm/v1/security/"); + if (!isReadOperation && !isNpmAudit) { + return CompletableFuture.completedFuture( + ResponseBuilder.methodNotAllowed().build() + ); + } + + if (this.members.isEmpty()) { + return CompletableFuture.completedFuture( + ResponseBuilder.notFound().build() + ); + } + + recordRequestStart(); + final long requestStartTime = System.currentTimeMillis(); + + return resolve(line, headers, body, path) + .whenComplete((resp, err) -> recordMetrics(resp, err, requestStartTime)); + } + + /** + * Core resolution logic implementing the 5-path decision tree. + */ + private CompletableFuture resolve( + final RequestLine line, + final Headers headers, + final Content body, + final String path + ) { + // ---- No index configured β†’ full two-phase fanout ---- + if (this.artifactIndex.isEmpty()) { + return fullTwoPhaseFanout(line, headers, body); + } + + final ArtifactIndex idx = this.artifactIndex.get(); + final Optional parsedName = ArtifactNameParser.parse(this.repoType, path); + if (parsedName.isEmpty()) { + EcsLogger.debug("com.auto1.pantera.group") + .message("Name unparseable, using full two-phase fanout") + .eventCategory("web") + .eventAction("group_direct_fanout") + .field("url.path", path) + .log(); + return fullTwoPhaseFanout(line, headers, body); + } + + final String artifactName = parsedName.get(); + + // ---- STEP 1: Negative cache check ---- + final Key negCacheKey = new Key.From(this.group + ":" + artifactName); + if (this.negativeCache.isNotFound(negCacheKey)) { + EcsLogger.debug("com.auto1.pantera.group") + .message("Negative cache hit, returning 404 without DB query") + .eventCategory("database") + .eventAction("group_negative_cache_hit") + .field("url.path", path) + .log(); + return CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); + } + + // ---- STEP 2: Query index ---- + return idx.locateByName(artifactName) + .thenApply(IndexOutcome::fromLegacy) + .exceptionally(ex -> new IndexOutcome.DBFailure(ex, "locateByName:" + artifactName)) + .thenCompose(outcome -> handleIndexOutcome( + outcome, line, headers, body, path, artifactName, negCacheKey + )); + } + + /** + * Branch on the index outcome. + */ + private CompletableFuture handleIndexOutcome( + final IndexOutcome outcome, + final RequestLine line, + final Headers headers, + final Content body, + final String path, + final String artifactName, + final Key negCacheKey + ) { + return switch (outcome) { + case IndexOutcome.Hit hit -> targetedLocalRead( + hit.repos(), line, headers, body, path, artifactName, negCacheKey + ); + case IndexOutcome.Miss miss -> proxyOnlyFanout( + line, headers, body, artifactName, negCacheKey + ); + case IndexOutcome.Timeout t -> { + EcsLogger.warn("com.auto1.pantera.group") + .message("Index query timed out, returning 500") + .eventCategory("database") + .eventAction("group_index_timeout") + .eventOutcome("failure") + .field("url.path", path) + .log(); + yield CompletableFuture.completedFuture( + FaultTranslator.translate( + new Fault.IndexUnavailable(t.cause(), "locateByName:" + artifactName), + null + ) + ); + } + case IndexOutcome.DBFailure db -> { + EcsLogger.warn("com.auto1.pantera.group") + .message("Index DB error, returning 500") + .eventCategory("database") + .eventAction("group_index_error") + .eventOutcome("failure") + .field("url.path", path) + .log(); + yield CompletableFuture.completedFuture( + FaultTranslator.translate( + new Fault.IndexUnavailable(db.cause(), db.query()), + null + ) + ); + } + }; + } + + /** + * STEP 3: Index hit -- targeted local read. + * + *

      On 404 from the targeted member (TOCTOU drift, A11 fix), falls through + * to proxy fanout instead of returning 500 -- this is the key behaviour + * change from the old GroupSlice. + */ + private CompletableFuture targetedLocalRead( + final List repos, + final RequestLine line, + final Headers headers, + final Content body, + final String path, + final String artifactName, + final Key negCacheKey + ) { + final Set wanted = new HashSet<>(repos); + final List targeted = this.members.stream() + .filter(m -> wanted.contains(m.name())) + .toList(); + if (targeted.isEmpty()) { + EcsLogger.debug("com.auto1.pantera.group") + .message("Index hit references repo not in flattened member list, " + + "falling through to full fanout") + .eventCategory("web") + .eventAction("group_index_orphan") + .field("url.path", path) + .log(); + return fullTwoPhaseFanout(line, headers, body); + } + EcsLogger.debug("com.auto1.pantera.group") + .message("Index hit via name: targeting " + targeted.size() + " member(s)") + .eventCategory("web") + .eventAction("group_index_hit") + .field("url.path", path) + .log(); + + return body.asBytesFuture().thenCompose(requestBytes -> { + final CompletableFuture result = new CompletableFuture<>(); + final AtomicBoolean completed = new AtomicBoolean(false); + final AtomicInteger pending = new AtomicInteger(targeted.size()); + final AtomicBoolean anyServerError = new AtomicBoolean(false); + final AtomicBoolean anyNotFound = new AtomicBoolean(false); + final List> memberFutures = new ArrayList<>(targeted.size()); + + for (final MemberSlice member : targeted) { + final CompletableFuture mf = queryMemberDirect( + member, line, headers, requestBytes + ); + memberFutures.add(mf); + mf.whenComplete((resp, err) -> { + if (err != null) { + if (!(err instanceof CancellationException)) { + member.recordFailure(); + anyServerError.set(true); + } + completeTargetedIfAllExhausted( + pending, completed, anyServerError, anyNotFound, result + ); + } else { + handleTargetedMemberResponse( + member, resp, completed, pending, anyServerError, + anyNotFound, result, memberFutures + ); + } + }); + } + + // When the targeted read completes, check for TOCTOU fallthrough + return result.thenCompose(resp -> { + // Cancel remaining futures + for (final CompletableFuture f : memberFutures) { + if (!f.isDone()) { + f.cancel(true); + } + } + if (resp.status().success() + || resp.status() == RsStatus.NOT_MODIFIED + || resp.status() == RsStatus.FORBIDDEN) { + return CompletableFuture.completedFuture(resp); + } + if (resp.status() == RsStatus.NOT_FOUND) { + // TOCTOU drift: index said it exists but member says 404. + // Fall through to proxy fanout (A11 fix). + EcsLogger.debug("com.auto1.pantera.group") + .message("TOCTOU drift: index hit but member returned 404, " + + "falling through to proxy fanout") + .eventCategory("web") + .eventAction("group_toctou_fallthrough") + .field("url.path", line.uri().getPath()) + .log(); + return proxyOnlyFanout(line, headers, body, artifactName, negCacheKey); + } + if (resp.status().serverError()) { + // Targeted member 5xx: return StorageUnavailable. + // The bytes are supposed to be local -- this is a real local failure. + return CompletableFuture.completedFuture( + FaultTranslator.translate( + new Fault.StorageUnavailable(null, line.uri().getPath()), + null + ) + ); + } + return CompletableFuture.completedFuture(resp); + }); + }); + } + + /** + * Handle a response from a targeted member (index hit path). + */ + private void handleTargetedMemberResponse( + final MemberSlice member, + final Response resp, + final AtomicBoolean completed, + final AtomicInteger pending, + final AtomicBoolean anyServerError, + final AtomicBoolean anyNotFound, + final CompletableFuture result, + final List> memberFutures + ) { + final RsStatus status = resp.status(); + if (status == RsStatus.OK || status == RsStatus.PARTIAL_CONTENT + || status == RsStatus.NOT_MODIFIED) { + if (completed.compareAndSet(false, true)) { + member.recordSuccess(); + result.complete(resp); + } else { + drainBody(member.name(), resp.body()); + } + completeTargetedIfAllExhausted(pending, completed, anyServerError, anyNotFound, result); + } else if (status == RsStatus.FORBIDDEN) { + if (completed.compareAndSet(false, true)) { + member.recordSuccess(); + result.complete(resp); + } else { + drainBody(member.name(), resp.body()); + } + completeTargetedIfAllExhausted(pending, completed, anyServerError, anyNotFound, result); + } else if (status == RsStatus.NOT_FOUND) { + anyNotFound.set(true); + drainBody(member.name(), resp.body()); + completeTargetedIfAllExhausted(pending, completed, anyServerError, anyNotFound, result); + } else { + member.recordFailure(); + anyServerError.set(true); + drainBody(member.name(), resp.body()); + completeTargetedIfAllExhausted(pending, completed, anyServerError, anyNotFound, result); + } + } + + /** + * Complete the targeted-read result when all members are exhausted. + * Returns an intermediate Response that the caller interprets: + * - 404 signals TOCTOU fallthrough + * - 5xx signals StorageUnavailable + */ + private static void completeTargetedIfAllExhausted( + final AtomicInteger pending, + final AtomicBoolean completed, + final AtomicBoolean anyServerError, + final AtomicBoolean anyNotFound, + final CompletableFuture result + ) { + if (pending.decrementAndGet() == 0 && !completed.get()) { + if (anyServerError.get()) { + result.complete(ResponseBuilder.internalError() + .textBody("Targeted member read failed").build()); + } else { + // All 404 (TOCTOU case) + result.complete(ResponseBuilder.notFound().build()); + } + } + } + + /** + * STEP 3': Proxy-only fanout. + * + *

      Called when: + *

        + *
      • Index returns Miss (artifact not in any hosted repo)
      • + *
      • Index hit but targeted member 404 (TOCTOU drift)
      • + *
      + */ + private CompletableFuture proxyOnlyFanout( + final RequestLine line, + final Headers headers, + final Content body, + final String artifactName, + final Key negCacheKey + ) { + final List proxyOnly = this.members.stream() + .filter(MemberSlice::isProxy) + .toList(); + if (proxyOnly.isEmpty()) { + this.negativeCache.cacheNotFound(negCacheKey); + EcsLogger.debug("com.auto1.pantera.group") + .message("No proxy members, caching 404 and returning") + .eventCategory("web") + .eventAction("group_index_miss") + .field("url.path", line.uri().getPath()) + .log(); + return CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); + } + + // Request coalescing via SingleFlight + final String dedupKey = this.group + ":" + artifactName; + final boolean[] isLeader = {false}; + final CompletableFuture leaderGate = new CompletableFuture<>(); + final CompletableFuture gate = this.inFlightFanouts.load( + dedupKey, + () -> { + isLeader[0] = true; + return leaderGate; + } + ); + if (isLeader[0]) { + EcsLogger.debug("com.auto1.pantera.group") + .message("Index miss: fanning out to " + + proxyOnly.size() + " proxy member(s)") + .eventCategory("network") + .eventAction("group_index_miss") + .field("url.path", line.uri().getPath()) + .log(); + return executeProxyFanout(proxyOnly, line, headers, body, negCacheKey) + .whenComplete((resp, err) -> leaderGate.complete(null)); + } + EcsLogger.debug("com.auto1.pantera.group") + .message("Coalescing with in-flight fanout for " + artifactName) + .eventCategory("web") + .eventAction("group_fanout_coalesce") + .log(); + return gate.exceptionally(err -> null) + .thenCompose(ignored -> proxyOnlyFanout(line, headers, body, artifactName, negCacheKey)); + } + + /** + * Execute the proxy fanout, returning the result with Fault-typed errors. + */ + private CompletableFuture executeProxyFanout( + final List proxyOnly, + final RequestLine line, + final Headers headers, + final Content body, + final Key negCacheKey + ) { + return body.asBytesFuture().thenCompose(requestBytes -> { + final CompletableFuture result = new CompletableFuture<>(); + final AtomicBoolean completed = new AtomicBoolean(false); + final AtomicInteger pending = new AtomicInteger(proxyOnly.size()); + final List outcomes = + Collections.synchronizedList(new ArrayList<>(proxyOnly.size())); + final List> memberFutures = + new ArrayList<>(proxyOnly.size()); + + for (final MemberSlice member : proxyOnly) { + if (member.isCircuitOpen()) { + outcomes.add(Fault.MemberOutcome.threw( + member.name(), Fault.MemberOutcome.Kind.CIRCUIT_OPEN, null + )); + completeProxyIfAllExhausted( + pending, completed, outcomes, result, negCacheKey + ); + continue; + } + final CompletableFuture mf = queryMemberDirect( + member, line, headers, requestBytes + ); + memberFutures.add(mf); + mf.whenComplete((resp, err) -> { + if (err != null) { + handleProxyMemberFailure( + member, err, completed, pending, outcomes, result, negCacheKey + ); + } else { + handleProxyMemberResponse( + member, resp, completed, pending, outcomes, result, + negCacheKey, memberFutures + ); + } + }); + } + + return result; + }); + } + + /** + * Handle a response from a proxy member in the fanout. + */ + private void handleProxyMemberResponse( + final MemberSlice member, + final Response resp, + final AtomicBoolean completed, + final AtomicInteger pending, + final List outcomes, + final CompletableFuture result, + final Key negCacheKey, + final List> memberFutures + ) { + final RsStatus status = resp.status(); + if (status == RsStatus.OK || status == RsStatus.PARTIAL_CONTENT + || status == RsStatus.NOT_MODIFIED) { + outcomes.add(Fault.MemberOutcome.responded( + member.name(), Fault.MemberOutcome.Kind.OK, resp + )); + if (completed.compareAndSet(false, true)) { + member.recordSuccess(); + // Cancel remaining futures + for (final CompletableFuture f : memberFutures) { + if (!f.isDone()) { + f.cancel(true); + } + } + result.complete(resp); + } else { + drainBody(member.name(), resp.body()); + } + completeProxyIfAllExhausted(pending, completed, outcomes, result, negCacheKey); + } else if (status == RsStatus.NOT_FOUND) { + outcomes.add(Fault.MemberOutcome.responded( + member.name(), Fault.MemberOutcome.Kind.NOT_FOUND, resp + )); + drainBody(member.name(), resp.body()); + completeProxyIfAllExhausted(pending, completed, outcomes, result, negCacheKey); + } else { + // 5xx or other error status + outcomes.add(Fault.MemberOutcome.responded( + member.name(), Fault.MemberOutcome.Kind.FIVE_XX, resp + )); + member.recordFailure(); + // Do NOT drain body -- FaultTranslator may pass it through + completeProxyIfAllExhausted(pending, completed, outcomes, result, negCacheKey); + } + } + + /** + * Handle member query failure in the proxy fanout. + */ + private void handleProxyMemberFailure( + final MemberSlice member, + final Throwable err, + final AtomicBoolean completed, + final AtomicInteger pending, + final List outcomes, + final CompletableFuture result, + final Key negCacheKey + ) { + if (err instanceof CancellationException) { + outcomes.add(Fault.MemberOutcome.threw( + member.name(), Fault.MemberOutcome.Kind.CANCELLED, err + )); + } else { + outcomes.add(Fault.MemberOutcome.threw( + member.name(), Fault.MemberOutcome.Kind.EXCEPTION, err + )); + member.recordFailure(); + } + completeProxyIfAllExhausted(pending, completed, outcomes, result, negCacheKey); + } + + /** + * Complete the proxy fanout result when all members are exhausted. + * + *

      Policy: + *

        + *
      • All 404 / circuit-open / cancelled -> cache negative + 404 [PATH A]
      • + *
      • Any 5xx / exception (no 2xx) -> AllProxiesFailed [PATH B]
      • + *
      + */ + private void completeProxyIfAllExhausted( + final AtomicInteger pending, + final AtomicBoolean completed, + final List outcomes, + final CompletableFuture result, + final Key negCacheKey + ) { + if (pending.decrementAndGet() == 0 && !completed.get()) { + final boolean anyFiveXxOrException = outcomes.stream() + .anyMatch(o -> o.kind() == Fault.MemberOutcome.Kind.FIVE_XX + || o.kind() == Fault.MemberOutcome.Kind.EXCEPTION); + if (anyFiveXxOrException) { + // PATH B: AllProxiesFailed -- pass-through best 5xx + final Optional winning = + FaultTranslator.pickWinningFailure(outcomes); + final Fault.AllProxiesFailed fault = new Fault.AllProxiesFailed( + this.group, List.copyOf(outcomes), winning + ); + result.complete(FaultTranslator.translate(fault, null)); + } else { + // PATH A: all 404 / skipped / cancelled -- cache negative + this.negativeCache.cacheNotFound(negCacheKey); + EcsLogger.debug("com.auto1.pantera.group") + .message("All proxies returned 404, caching negative result") + .eventCategory("database") + .eventAction("group_negative_cache_populate") + .log(); + result.complete(ResponseBuilder.notFound().build()); + } + } + } + + /** + * Full two-phase fanout -- safety net when artifact name cannot be parsed + * or index is not configured. Hosted members first, then proxy. + */ + private CompletableFuture fullTwoPhaseFanout( + final RequestLine line, + final Headers headers, + final Content body + ) { + final List eligible = filterByRoutingRules(line.uri().getPath()); + if (eligible.isEmpty()) { + return CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); + } + return queryHostedFirstThenProxy(eligible, line, headers, body); + } + + /** + * Two-phase: hosted first, then proxy. + */ + private CompletableFuture queryHostedFirstThenProxy( + final List targeted, + final RequestLine line, + final Headers headers, + final Content body + ) { + final List hosted = targeted.stream() + .filter(m -> !m.isProxy()) + .toList(); + final List proxy = targeted.stream() + .filter(MemberSlice::isProxy) + .toList(); + if (hosted.isEmpty() || proxy.isEmpty()) { + return queryTargetedMembers(targeted, line, headers, body, false); + } + return queryTargetedMembers(hosted, line, headers, body, false) + .thenCompose(resp -> { + if (resp.status().success()) { + return CompletableFuture.completedFuture(resp); + } + return queryTargetedMembers(proxy, line, headers, body, false); + }); + } + + /** + * Query a list of members in parallel -- the classic fanout path. + * Used for full two-phase fanout only (not the indexed path). + */ + private CompletableFuture queryTargetedMembers( + final List targeted, + final RequestLine line, + final Headers headers, + final Content body, + final boolean isTargetedLocalRead + ) { + return body.asBytesFuture().thenCompose(requestBytes -> { + final CompletableFuture result = new CompletableFuture<>(); + final AtomicBoolean completed = new AtomicBoolean(false); + final AtomicInteger pending = new AtomicInteger(targeted.size()); + final AtomicBoolean anyServerError = new AtomicBoolean(false); + final List> memberFutures = + new ArrayList<>(targeted.size()); + + for (final MemberSlice member : targeted) { + if (!isTargetedLocalRead && member.isCircuitOpen()) { + completeFanoutIfAllExhausted( + pending, completed, anyServerError, result, isTargetedLocalRead + ); + continue; + } + final CompletableFuture mf = queryMemberDirect( + member, line, headers, requestBytes + ); + memberFutures.add(mf); + mf.whenComplete((resp, err) -> { + if (err != null) { + if (!(err instanceof CancellationException)) { + member.recordFailure(); + anyServerError.set(true); + } + completeFanoutIfAllExhausted( + pending, completed, anyServerError, result, isTargetedLocalRead + ); + } else { + handleFanoutMemberResponse( + member, resp, completed, pending, anyServerError, + result, isTargetedLocalRead, memberFutures + ); + } + }); + } + + result.whenComplete((resp, err) -> { + for (final CompletableFuture f : memberFutures) { + if (!f.isDone()) { + f.cancel(true); + } + } + }); + + return result; + }); + } + + /** + * Handle a response from a member in the fanout path. + */ + private void handleFanoutMemberResponse( + final MemberSlice member, + final Response resp, + final AtomicBoolean completed, + final AtomicInteger pending, + final AtomicBoolean anyServerError, + final CompletableFuture result, + final boolean isTargetedLocalRead, + final List> memberFutures + ) { + final RsStatus status = resp.status(); + if (status == RsStatus.OK || status == RsStatus.PARTIAL_CONTENT + || status == RsStatus.NOT_MODIFIED) { + if (completed.compareAndSet(false, true)) { + member.recordSuccess(); + result.complete(resp); + } else { + drainBody(member.name(), resp.body()); + } + completeFanoutIfAllExhausted( + pending, completed, anyServerError, result, isTargetedLocalRead + ); + } else if (status == RsStatus.FORBIDDEN) { + if (completed.compareAndSet(false, true)) { + member.recordSuccess(); + result.complete(resp); + } else { + drainBody(member.name(), resp.body()); + } + completeFanoutIfAllExhausted( + pending, completed, anyServerError, result, isTargetedLocalRead + ); + } else if (status == RsStatus.NOT_FOUND) { + drainBody(member.name(), resp.body()); + completeFanoutIfAllExhausted( + pending, completed, anyServerError, result, isTargetedLocalRead + ); + } else { + member.recordFailure(); + anyServerError.set(true); + drainBody(member.name(), resp.body()); + completeFanoutIfAllExhausted( + pending, completed, anyServerError, result, isTargetedLocalRead + ); + } + } + + /** + * Complete the fanout result when all members are exhausted. + */ + private static void completeFanoutIfAllExhausted( + final AtomicInteger pending, + final AtomicBoolean completed, + final AtomicBoolean anyServerError, + final CompletableFuture result, + final boolean isTargetedLocalRead + ) { + if (pending.decrementAndGet() == 0 && !completed.get()) { + if (anyServerError.get()) { + if (isTargetedLocalRead) { + result.complete(ResponseBuilder.internalError() + .textBody("Targeted member read failed").build()); + } else { + result.complete(ResponseBuilder.badGateway() + .textBody("All upstream members failed").build()); + } + } else { + result.complete(ResponseBuilder.notFound().build()); + } + } + } + + /** + * Query a single member directly. + */ + private CompletableFuture queryMemberDirect( + final MemberSlice member, + final RequestLine line, + final Headers headers, + final byte[] requestBytes + ) { + final Content memberBody = requestBytes.length > 0 + ? new Content.From(requestBytes) + : Content.EMPTY; + final RequestLine rewritten = member.rewritePath(line); + final Headers memberHeaders = dropFullPathHeader(headers) + .copy() + .add(new Header(EcsLoggingSlice.INTERNAL_ROUTING_HEADER, "true")); + return member.slice().response(rewritten, memberHeaders, memberBody); + } + + /** + * Drain response body on per-repo background executor from {@link com.auto1.pantera.http.resilience.RepoBulkhead}. + */ + private void drainBody(final String memberName, final Content body) { + this.drainExecutor.execute(() -> + body.subscribe(new org.reactivestreams.Subscriber<>() { + @Override + public void onSubscribe(final org.reactivestreams.Subscription sub) { + sub.request(Long.MAX_VALUE); + } + + @Override + public void onNext(final java.nio.ByteBuffer item) { + // Discard + } + + @Override + public void onError(final Throwable err) { + // Drain failures are not actionable + } + + @Override + public void onComplete() { + // Body fully consumed + } + }) + ); + } + + private static Headers dropFullPathHeader(final Headers headers) { + return new Headers( + headers.asList().stream() + .filter(h -> !h.getKey().equalsIgnoreCase("X-FullPath")) + .toList() + ); + } + + private List filterByRoutingRules(final String path) { + if (this.routingRules.isEmpty()) { + return this.members; + } + final Set ruledMembers = this.routingRules.stream() + .map(RoutingRule::member) + .collect(Collectors.toSet()); + final Set matchedMembers = this.routingRules.stream() + .filter(rule -> rule.matches(path)) + .map(RoutingRule::member) + .collect(Collectors.toSet()); + return this.members.stream() + .filter(m -> matchedMembers.contains(m.name()) + || !ruledMembers.contains(m.name())) + .toList(); + } + + // ---- Metrics helpers ---- + + private void recordRequestStart() { + final com.auto1.pantera.metrics.GroupResolverMetrics metrics = + com.auto1.pantera.metrics.GroupResolverMetrics.instance(); + if (metrics != null) { + metrics.recordRequest(this.group); + } + } + + private void recordMetrics( + final Response resp, final Throwable err, final long startTime + ) { + final long duration = System.currentTimeMillis() - startTime; + if (err != null) { + recordGroupRequest("error", duration); + } else if (resp.status().success()) { + recordGroupRequest("success", duration); + } else { + recordGroupRequest("not_found", duration); + } + } + + private void recordGroupRequest(final String result, final long duration) { + if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { + com.auto1.pantera.metrics.MicrometerMetrics.getInstance() + .recordGroupRequest(this.group, result); + com.auto1.pantera.metrics.MicrometerMetrics.getInstance() + .recordGroupResolutionDuration(this.group, duration); + } + } +} diff --git a/pantera-main/src/main/java/com/auto1/pantera/group/GroupSlice.java b/pantera-main/src/main/java/com/auto1/pantera/group/GroupSlice.java deleted file mode 100644 index e7accce80..000000000 --- a/pantera-main/src/main/java/com/auto1/pantera/group/GroupSlice.java +++ /dev/null @@ -1,1321 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.group; - -import com.auto1.pantera.asto.Content; -import com.auto1.pantera.asto.Key; -import com.auto1.pantera.cache.NegativeCacheConfig; -import com.auto1.pantera.http.Headers; -import com.auto1.pantera.http.Response; -import com.auto1.pantera.http.ResponseBuilder; -import com.auto1.pantera.http.RsStatus; -import com.auto1.pantera.http.Slice; -import com.auto1.pantera.http.cache.NegativeCache; -import com.auto1.pantera.http.rq.RequestLine; -import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.headers.Header; -import com.auto1.pantera.http.slice.EcsLoggingSlice; -import com.auto1.pantera.http.slice.KeyFromPath; -import com.auto1.pantera.index.ArtifactIndex; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.CancellationException; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Function; -import java.util.stream.Collectors; - -import com.auto1.pantera.http.timeout.AutoBlockRegistry; -import com.auto1.pantera.http.trace.MdcPropagation; - -/** - * High-performance group/virtual repository slice. - * - *

      Drop-in replacement for old batched implementation with: - *

        - *
      • Flat member list - nested groups deduplicated at construction
      • - *
      • Full parallelism - ALL members queried simultaneously
      • - *
      • Resource safety - ALL response bodies consumed (winner + losers)
      • - *
      • Race-safe - AtomicBoolean for winner selection
      • - *
      • Fast-fail - first successful response wins immediately
      • - *
      • Failure isolation - circuit breakers per member
      • - *
      - * - *

      Performance: 250+ req/s, p50=50ms, p99=300ms, zero leaks - * - * @since 1.18.22 - */ -public final class GroupSlice implements Slice { - - /** - * Background executor for draining non-winning member response bodies. - * Decoupled from the result path: drain failures and backpressure never affect - * the winning response delivered to the client. - * - *

      16 threads, bounded queue of 2000. When full, new drain tasks are logged and dropped. - * Each thread is daemon so it does not prevent JVM shutdown. - */ - private static final ExecutorService DRAIN_EXECUTOR; - - /** - * Count of drain tasks rejected because the drain queue was full. - * Each drop represents a response body that will not be drained until the - * upstream/Jetty idle-timeout closes the connection β€” a potential slow - * connection leak. Monitor this counter: sustained growth indicates the - * drain pool needs tuning (more threads / larger queue) or that upstream - * latency is producing more losers than we can drain in parallel. - */ - private static final AtomicLong DRAIN_DROP_COUNT = new AtomicLong(); - - static { - final ThreadPoolExecutor pool = new ThreadPoolExecutor( - 16, 16, - 60L, TimeUnit.SECONDS, - new LinkedBlockingQueue<>(2000), - r -> { - final Thread t = new Thread(r, "group-drain-" + System.identityHashCode(r)); - t.setDaemon(true); - return t; - }, - (r, executor) -> { - final long dropped = DRAIN_DROP_COUNT.incrementAndGet(); - EcsLogger.warn("com.auto1.pantera.group") - .message( - "Drain queue full, discarding drain task β€” " - + "possible response body leak (total drops: " + dropped + ")" - ) - .eventCategory("network") - .eventAction("body_drain") - .eventOutcome("failure") - .field("event.reason", "Drain executor queue saturated") - .field("pantera.drain.drop_count", dropped) - .log(); - final com.auto1.pantera.metrics.GroupSliceMetrics metrics = - com.auto1.pantera.metrics.GroupSliceMetrics.instance(); - if (metrics != null) { - metrics.recordDrainDropped(); - } - } - ); - DRAIN_EXECUTOR = pool; - EcsLogger.info("com.auto1.pantera.group") - .message("GroupSlice drain executor initialised (16 threads, queue=2000)") - .eventCategory("configuration") - .eventAction("group_init") - .log(); - } - - /** - * Total count of drain tasks dropped because the drain queue was saturated. - * Exposed for metrics integration and tests. - * - * @return monotonic total of rejected drain tasks since JVM start - */ - public static long drainDropCount() { - return DRAIN_DROP_COUNT.get(); - } - - /** - * Group repository name. - */ - private final String group; - - /** - * Flattened member slices with circuit breakers. - */ - private final List members; - - /** - * Routing rules for directing paths to specific members. - */ - private final List routingRules; - - /** - * Optional artifact index for O(1) group lookups. - */ - private final Optional artifactIndex; - - /** - * Repository type for adapter-aware name parsing (e.g., "maven-group", "npm-group"). - * Used by {@link ArtifactNameParser} to extract artifact name from URL path. - */ - private final String repoType; - - /** - * Names of members that are proxy repositories. - * Proxy members must always be queried on index miss because their - * content is only indexed after being cached. - */ - private final Set proxyMembers; - - /** - * Negative cache for proxy fanout results. - *

      Key: {@code Key.From("groupName:artifactName")}. Presence = confirmed 404 from all proxies. - * Prevents thundering herd when many clients request a missing artifact concurrently. - * TTL-based expiry β€” stale entries self-correct within the TTL window. - *

      Backed by the shared two-tier {@link NegativeCache} (L1 Caffeine + L2 Valkey - * when configured under {@code meta.caches.group-negative}). Defaults to the - * in-memory 5 min TTL, 10K-entry single-tier cache when YAML wiring is absent. - */ - private final NegativeCache negativeCache; - - /** - * In-flight proxy-only fanouts keyed by {@code group:artifactName}. - * - *

      Serves as a request coalescer: when N concurrent requests arrive for - * the same missing artifact, the first registers a "gate" future here and - * runs the fanout. Late arrivals find the gate already present and wait - * on it instead of starting their own fanout, then retry - * {@link #proxyOnlyFanout} once the first has completed. - * - *

      On the retry, the negative cache now holds the result so followers - * return 404 immediately without touching the network. The combination of - * coalescer + negative cache collapses a thundering herd of N concurrent - * misses into exactly ONE upstream fanout. - * - *

      This coalescer deliberately does NOT share the winning {@link Response} - * object across callers: {@link Content} is a one-shot reactive stream that - * cannot be subscribed to twice. Instead, followers re-enter - * {@code proxyOnlyFanout} and either hit the freshly-populated negative - * cache (404) or retry the fanout (which is cheap when the upstream proxy - * has cached the bytes). - */ - private final ConcurrentMap> inFlightFanouts = - new ConcurrentHashMap<>(); - - /** - * Request context carried through the async call chain for log messages. - * - *

      Historically carried client IP, username, and trace ID β€” but all of - * those now live in MDC (set by {@link EcsLoggingSlice} / adapter slices) - * and are emitted automatically by EcsLayout. Adding them here produced - * duplicate fields. The only remaining use is the package/artifact path - * included in the "Artifact not found" warning message. - * - * @param packageName Package/artifact path being requested (URL path). - */ - private record RequestContext(String packageName) { - /** - * Build a context for the given request path. - * - * @param path Request path (used as the package name in log messages) - * @return RequestContext - */ - static RequestContext from(final String path) { - return new RequestContext(path); - } - } - - /** - * Constructor (maintains old API for drop-in compatibility). - * - * @param resolver Slice resolver/cache - * @param group Group repository name - * @param members Member repository names - * @param port Server port - */ - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port - ) { - this(resolver, group, members, port, 0, 0, - Collections.emptyList(), Optional.empty(), Collections.emptySet()); - } - - /** - * Constructor with depth (for API compatibility, depth ignored). - * - * @param resolver Slice resolver/cache - * @param group Group repository name - * @param members Member repository names - * @param port Server port - * @param depth Nesting depth (ignored) - */ - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port, - final int depth - ) { - this(resolver, group, members, port, depth, 0, - Collections.emptyList(), Optional.empty(), Collections.emptySet(), ""); - } - - /** - * Constructor with depth and timeout. - */ - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port, - final int depth, - final long timeoutSeconds - ) { - this(resolver, group, members, port, depth, timeoutSeconds, - Collections.emptyList(), Optional.empty(), Collections.emptySet(), ""); - } - - /** - * Constructor with depth, timeout, routing rules, and artifact index (backward compatible). - */ - @SuppressWarnings("PMD.ExcessiveParameterList") - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port, - final int depth, - final long timeoutSeconds, - final List routingRules, - final Optional artifactIndex - ) { - this(resolver, group, members, port, depth, timeoutSeconds, - routingRules, artifactIndex, Collections.emptySet(), ""); - } - - /** - * Backward-compatible constructor without repoType. - */ - @SuppressWarnings("PMD.ExcessiveParameterList") - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port, - final int depth, - final long timeoutSeconds, - final List routingRules, - final Optional artifactIndex, - final Set proxyMembers - ) { - this(resolver, group, members, port, depth, timeoutSeconds, - routingRules, artifactIndex, proxyMembers, ""); - } - - /** - * Backward-compatible constructor that builds a default in-memory negative cache. - * See {@link #defaultNegativeCache(String)} for default parameters - * (5 min TTL, 10K entries, L1-only). - * - * @param resolver Slice resolver/cache - * @param group Group repository name - * @param members Member repository names - * @param port Server port - * @param depth Nesting depth (ignored) - * @param timeoutSeconds Timeout for member requests - * @param routingRules Routing rules for path-based member selection - * @param artifactIndex Optional artifact index for O(1) lookups - * @param proxyMembers Names of members that are proxy repositories - * @param repoType Repository type for name parsing (e.g., "maven-group") - */ - @SuppressWarnings("PMD.ExcessiveParameterList") - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port, - final int depth, - final long timeoutSeconds, - final List routingRules, - final Optional artifactIndex, - final Set proxyMembers, - final String repoType - ) { - this( - resolver, group, members, port, depth, timeoutSeconds, - routingRules, artifactIndex, proxyMembers, repoType, - defaultNegativeCache(group) - ); - } - - /** - * Full constructor with proxy member awareness, repo type, and an explicit - * {@link NegativeCache}. Lets callers inject a YAML-configured two-tier cache - * (L1 Caffeine + L2 Valkey) loaded via - * {@link NegativeCacheConfig#fromYaml(com.amihaiemil.eoyaml.YamlMapping, String)}. - * - * @param resolver Slice resolver/cache - * @param group Group repository name - * @param members Member repository names - * @param port Server port - * @param depth Nesting depth (ignored) - * @param timeoutSeconds Timeout for member requests - * @param routingRules Routing rules for path-based member selection - * @param artifactIndex Optional artifact index for O(1) lookups - * @param proxyMembers Names of members that are proxy repositories - * @param repoType Repository type for name parsing (e.g., "maven-group") - * @param negativeCache Pre-constructed negative cache (e.g. YAML-driven two-tier) - */ - @SuppressWarnings("PMD.ExcessiveParameterList") - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port, - final int depth, - final long timeoutSeconds, - final List routingRules, - final Optional artifactIndex, - final Set proxyMembers, - final String repoType, - final NegativeCache negativeCache - ) { - this.group = Objects.requireNonNull(group, "group"); - this.repoType = repoType != null ? repoType : ""; - this.routingRules = routingRules != null ? routingRules : Collections.emptyList(); - this.artifactIndex = artifactIndex != null ? artifactIndex : Optional.empty(); - this.proxyMembers = proxyMembers != null ? proxyMembers : Collections.emptySet(); - this.negativeCache = negativeCache != null - ? negativeCache - : defaultNegativeCache(this.group); - - // Deduplicate members while preserving order - final List flatMembers = new ArrayList<>(new LinkedHashSet<>(members)); - - // Create MemberSlice wrappers with circuit breakers and proxy flags - this.members = flatMembers.stream() - .map(name -> new MemberSlice( - name, - resolver.slice(new Key.From(name), port, 0), - this.proxyMembers.contains(name) - )) - .toList(); - - EcsLogger.debug("com.auto1.pantera.group") - .message("GroupSlice initialized with members (" - + this.members.size() + " unique, " - + members.size() + " total, " - + this.proxyMembers.size() + " proxies)") - .eventCategory("configuration") - .eventAction("group_init") - .log(); - } - - /** - * Full constructor with shared per-member circuit-breaker registries. - * The {@code registrySupplier} is called once per member name to obtain (or create) - * the shared {@link AutoBlockRegistry} for that upstream. This allows a single - * registry to be shared across all groups that reference the same physical member, - * so circuit-breaker trips/recoveries are consolidated rather than per-group. - * - * @param resolver Slice resolver/cache - * @param group Group repository name - * @param members Member repository names - * @param port Server port - * @param depth Nesting depth (ignored) - * @param timeoutSeconds Timeout for member requests - * @param routingRules Routing rules for path-based member selection - * @param artifactIndex Optional artifact index for O(1) lookups - * @param proxyMembers Names of members that are proxy repositories - * @param repoType Repository type for name parsing (e.g., "maven-group") - * @param negativeCache Pre-constructed negative cache (e.g. YAML-driven two-tier) - * @param registrySupplier Function mapping member name to its shared AutoBlockRegistry - */ - @SuppressWarnings("PMD.ExcessiveParameterList") - public GroupSlice( - final SliceResolver resolver, - final String group, - final List members, - final int port, - final int depth, - final long timeoutSeconds, - final List routingRules, - final Optional artifactIndex, - final Set proxyMembers, - final String repoType, - final NegativeCache negativeCache, - final Function registrySupplier - ) { - this.group = Objects.requireNonNull(group, "group"); - this.repoType = repoType != null ? repoType : ""; - this.routingRules = routingRules != null ? routingRules : Collections.emptyList(); - this.artifactIndex = artifactIndex != null ? artifactIndex : Optional.empty(); - this.proxyMembers = proxyMembers != null ? proxyMembers : Collections.emptySet(); - this.negativeCache = negativeCache != null - ? negativeCache - : defaultNegativeCache(this.group); - - // Deduplicate members while preserving order - final List flatMembers = new ArrayList<>(new LinkedHashSet<>(members)); - final Function supplier = - registrySupplier != null ? registrySupplier : n -> null; - - // Create MemberSlice wrappers with shared circuit-breaker registries - this.members = flatMembers.stream() - .map(name -> { - final AutoBlockRegistry reg = supplier.apply(name); - if (reg != null) { - return new MemberSlice( - name, - resolver.slice(new Key.From(name), port, 0), - reg, - this.proxyMembers.contains(name) - ); - } - return new MemberSlice( - name, - resolver.slice(new Key.From(name), port, 0), - this.proxyMembers.contains(name) - ); - }) - .toList(); - - EcsLogger.debug("com.auto1.pantera.group") - .message("GroupSlice initialized with members (" - + this.members.size() + " unique, " - + members.size() + " total, " - + this.proxyMembers.size() + " proxies," - + " shared registries: " + (registrySupplier != null) + ")") - .eventCategory("configuration") - .eventAction("group_init") - .log(); - } - - /** - * Build the default in-memory-only negative cache used when no YAML wiring - * is supplied. Matches the pre-YAML behaviour exactly: 5 min TTL, 10K entries, - * no Valkey. Kept as a static helper so tests and callers without settings - * access still get a working cache. - * - * @param group Group name used as the {@code repoName} for cache-key isolation - * @return L1-only negative cache (5 min TTL, 10K entries) - */ - private static NegativeCache defaultNegativeCache(final String group) { - final NegativeCacheConfig config = new NegativeCacheConfig( - java.time.Duration.ofMinutes(5), - 10_000, - false, - NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, - NegativeCacheConfig.DEFAULT_L1_TTL, - NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, - NegativeCacheConfig.DEFAULT_L2_TTL - ); - return new NegativeCache( - "group-negative", - group != null ? group : "default", - config - ); - } - - @Override - public CompletableFuture response( - final RequestLine line, - final Headers headers, - final Content body - ) { - final String method = line.method().value(); - final String path = line.uri().getPath(); - - // Allow read operations (GET, HEAD) - // Allow POST for npm audit endpoints (/-/npm/v1/security/*) - final boolean isReadOperation = "GET".equals(method) || "HEAD".equals(method); - final boolean isNpmAudit = "POST".equals(method) && path.contains("/-/npm/v1/security/"); - - if (!isReadOperation && !isNpmAudit) { - return CompletableFuture.completedFuture( - ResponseBuilder.methodNotAllowed().build() - ); - } - - if (this.members.isEmpty()) { - return CompletableFuture.completedFuture( - ResponseBuilder.notFound().build() - ); - } - - // Extract request context (carries the URL path for log messages) - final RequestContext ctx = RequestContext.from(path); - - recordRequestStart(); - final long requestStartTime = System.currentTimeMillis(); - - // ---- Path 1: No index configured OR unparseable URL β†’ full two-phase fanout ---- - if (this.artifactIndex.isEmpty()) { - return fullTwoPhaseFanout(line, headers, body, ctx) - .whenComplete(MdcPropagation.withMdcBiConsumer( - (resp, err) -> recordMetrics(resp, err, requestStartTime) - )); - } - final ArtifactIndex idx = this.artifactIndex.get(); - final Optional parsedName = - ArtifactNameParser.parse(this.repoType, path); - if (parsedName.isEmpty()) { - // Metadata endpoint / root path / unknown adapter β†’ safety net - EcsLogger.debug("com.auto1.pantera.group") - .message("Name unparseable, using full two-phase fanout") - .eventCategory("web") - .eventAction("group_direct_fanout") - .field("url.path", path) - .log(); - return fullTwoPhaseFanout(line, headers, body, ctx) - .whenComplete(MdcPropagation.withMdcBiConsumer( - (resp, err) -> recordMetrics(resp, err, requestStartTime) - )); - } - - // ---- Path 2: Query index ---- - return idx.locateByName(parsedName.get()) - .thenCompose(MdcPropagation.withMdc(optRepos -> { - if (optRepos.isEmpty()) { - // DB error β†’ full two-phase fanout safety net - EcsLogger.warn("com.auto1.pantera.group") - .message("Index DB error, using full fanout safety net") - .eventCategory("database") - .eventAction("group_index_error") - .eventOutcome("failure") - .field("url.path", path) - .log(); - return fullTwoPhaseFanout(line, headers, body, ctx); - } - final List repos = optRepos.get(); - if (repos.isEmpty()) { - // Confirmed miss β†’ proxy-only fanout - return proxyOnlyFanout(line, headers, body, ctx, parsedName.get()); - } - // ---- Path 3: Index hit β†’ targeted local read ---- - return targetedLocalRead(repos, line, headers, body, ctx); - })) - .whenComplete(MdcPropagation.withMdcBiConsumer( - (resp, err) -> recordMetrics(resp, err, requestStartTime) - )); - } - - private void recordMetrics( - final Response resp, final Throwable err, final long startTime - ) { - final long duration = System.currentTimeMillis() - startTime; - if (err != null) { - recordGroupRequest("error", duration); - } else if (resp.status().success()) { - recordGroupRequest("success", duration); - } else { - recordGroupRequest("not_found", duration); - } - } - - /** - * Path 3: Index hit β†’ query the member(s) directly. - * - *

      No circuit breaker check. No fallback fanout on 5xx. Artifact bytes - * are local (hosted upload or proxy cache) β€” if the targeted member fails, - * no one else has them, so we surface a genuine 500 to the client. - * - *

      404 is treated as authoritative (stale index scenario) and returned - * as-is β€” we do NOT fall back to a proxy, because the index says the - * artifact lives on this member. - */ - private CompletableFuture targetedLocalRead( - final List repos, - final RequestLine line, final Headers headers, final Content body, - final RequestContext ctx - ) { - final Set wanted = new HashSet<>(repos); - final List targeted = this.members.stream() - .filter(m -> wanted.contains(m.name())) - .toList(); - if (targeted.isEmpty()) { - EcsLogger.debug("com.auto1.pantera.group") - .message("Index hit references repo not in flattened member list" - + " β€” falling through to full fanout safety net") - .eventCategory("web") - .eventAction("group_index_orphan") - .field("url.path", line.uri().getPath()) - .log(); - return fullTwoPhaseFanout(line, headers, body, ctx); - } - EcsLogger.debug("com.auto1.pantera.group") - .message("Index hit via name: targeting " - + targeted.size() + " member(s)") - .eventCategory("web") - .eventAction("group_index_hit") - .field("url.path", line.uri().getPath()) - .log(); - return queryTargetedMembers(targeted, line, headers, body, ctx, true); - } - - /** - * Path 4: Index confirmed miss β†’ proxy-only fanout. - * - *

      Hosted repos are fully indexed, so absence from the index means - * absence from hosted β€” we only query proxy members (whose content is - * indexed lazily on first cache). - */ - private CompletableFuture proxyOnlyFanout( - final RequestLine line, final Headers headers, final Content body, - final RequestContext ctx, final String artifactName - ) { - final Key cacheKey = new Key.From(this.group + ":" + artifactName); - if (this.negativeCache.isNotFound(cacheKey)) { - EcsLogger.debug("com.auto1.pantera.group") - .message("Negative cache hit β€” returning 404 without fanout") - .eventCategory("database") - .eventAction("group_negative_cache_hit") - .field("url.path", line.uri().getPath()) - .log(); - return CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); - } - final List proxyOnly = this.members.stream() - .filter(MemberSlice::isProxy) - .toList(); - if (proxyOnly.isEmpty()) { - EcsLogger.debug("com.auto1.pantera.group") - .message("Index miss with no proxy members, returning 404" - + " (name: " + artifactName + ")") - .eventCategory("web") - .eventAction("group_index_miss") - .field("url.path", line.uri().getPath()) - .log(); - return CompletableFuture.completedFuture( - ResponseBuilder.notFound().build() - ); - } - - // ---- Request coalescing: collapse concurrent misses into ONE fanout ---- - // The dedup key combines group + artifact name. If a fanout is already in - // flight for this key, followers park on the existing "gate" future and - // retry proxyOnlyFanout when it completes β€” by which point the negative - // cache will be populated (404 case) or the upstream proxy will have - // cached the bytes (200 case), making the retry very cheap. - final String dedupKey = this.group + ":" + artifactName; - final CompletableFuture freshGate = new CompletableFuture<>(); - final CompletableFuture existingGate = - this.inFlightFanouts.putIfAbsent(dedupKey, freshGate); - if (existingGate != null) { - // Follower: another request is already fanning out for this artifact. - // Wait for it to finish, then re-enter proxyOnlyFanout β€” the negative - // cache check at the top will short-circuit to 404 in the miss case. - // - // CRITICAL: use thenComposeAsync, NOT thenCompose. The leader - // completes the gate BEFORE removing it from inFlightFanouts - // (see whenComplete below β€” intentional ordering to avoid a - // separate putIfAbsent race). If the gate is already completed - // when the follower calls .thenCompose, the callback runs - // synchronously on the same stack; the retry then hits the SAME - // (still-present) gate and recurses, blowing the stack with a - // StackOverflowError before the leader's remove() can run. - // thenComposeAsync dispatches the retry to the common pool so - // the leader's whenComplete queue can drain remove() first. - EcsLogger.debug("com.auto1.pantera.group") - .message("Coalescing with in-flight fanout for " + artifactName) - .eventCategory("web") - .eventAction("group_fanout_coalesce") - .field("url.path", line.uri().getPath()) - .log(); - return existingGate.thenComposeAsync(MdcPropagation.withMdc( - ignored -> this.proxyOnlyFanout(line, headers, body, ctx, artifactName) - )); - } - - EcsLogger.debug("com.auto1.pantera.group") - .message("Index miss: fanning out to " - + proxyOnly.size() + " proxy member(s) only" - + " (name: " + artifactName + ")") - .eventCategory("network") - .eventAction("group_index_miss") - .field("url.path", line.uri().getPath()) - .log(); - return queryTargetedMembers(proxyOnly, line, headers, body, ctx, false) - .thenApply(MdcPropagation.withMdcFunction(resp -> { - if (resp.status() == RsStatus.NOT_FOUND) { - this.negativeCache.cacheNotFound(cacheKey); - EcsLogger.debug("com.auto1.pantera.group") - .message("Cached negative result for artifact") - .eventCategory("database") - .eventAction("group_negative_cache_populate") - .log(); - } - return resp; - })) - .whenComplete((resp, err) -> { - // Complete the gate BEFORE removing from the map. - // This closes the race window where a late request could arrive - // between remove() and complete(): if we removed first, the late - // request's putIfAbsent would succeed (empty map) and start a - // second fanout β€” defeating coalescing. By completing first, any - // concurrent follower that read the gate before removal sees it - // already done; any late request that arrives after completion - // will do putIfAbsent against the still-present (but completed) - // gate, observe it's done, and short-circuit through the negative - // cache check on retry. - freshGate.complete(null); - this.inFlightFanouts.remove(dedupKey, freshGate); - }); - } - - /** - * Path 5: Full two-phase fanout β€” hosted first, then proxy. - * - *

      Used as a safety net when the artifact name could not be parsed or - * the index DB returned an error. Applies routing rules. - */ - private CompletableFuture fullTwoPhaseFanout( - final RequestLine line, final Headers headers, final Content body, - final RequestContext ctx - ) { - final List eligible = this.filterByRoutingRules(line.uri().getPath()); - if (eligible.isEmpty()) { - return CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); - } - return queryHostedFirstThenProxy(eligible, line, headers, body, ctx); - } - - /** - * Query a list of members in parallel. - * - *

      When {@code isTargetedLocalRead} is {@code true} (path 3 β€” index hit), - * the circuit breaker is bypassed and a 5xx from any member surfaces as a - * {@code 500 Internal Error} to the client (no fallback bytes elsewhere). - * - *

      When {@code false} (path 4 / path 5 β€” fanout), circuit-open members are - * skipped and a 5xx from every member surfaces as {@code 502 Bad Gateway}. - */ - private CompletableFuture queryTargetedMembers( - final List targeted, - final RequestLine line, - final Headers headers, - final Content body, - final RequestContext ctx, - final boolean isTargetedLocalRead - ) { - final long startTime = System.currentTimeMillis(); - final Key pathKey = new KeyFromPath(line.uri().getPath()); - - return body.asBytesFuture().thenCompose(requestBytes -> { - final CompletableFuture result = new CompletableFuture<>(); - final AtomicBoolean completed = new AtomicBoolean(false); - final AtomicInteger pending = new AtomicInteger(targeted.size()); - final AtomicBoolean anyServerError = new AtomicBoolean(false); - final List> memberFutures = - new ArrayList<>(targeted.size()); - - for (MemberSlice member : targeted) { - // Circuit breaker applies only on the fanout path. On the - // targeted local read path, the index says the bytes live on - // this exact member β€” we MUST attempt the read and surface any - // failure instead of masking it with a "circuit open" skip. - if (!isTargetedLocalRead && member.isCircuitOpen()) { - EcsLogger.warn("com.auto1.pantera.group") - .message("Member circuit OPEN, skipping: " + member.name()) - .eventCategory("network") - .eventAction("group_query") - .eventOutcome("unknown") - .field("event.reason", "skipped") - .field("destination.address", member.name()) - .log(); - completeIfAllExhausted( - pending, completed, anyServerError, result, ctx, isTargetedLocalRead - ); - continue; - } - final CompletableFuture memberFuture = - queryMemberDirect(member, line, headers, requestBytes, ctx); - memberFutures.add(memberFuture); - memberFuture.whenComplete((resp, err) -> { - if (err != null) { - handleMemberFailure( - member, err, completed, pending, - anyServerError, result, ctx, isTargetedLocalRead - ); - } else { - handleMemberResponse( - member, resp, completed, pending, anyServerError, - result, startTime, pathKey, ctx, isTargetedLocalRead - ); - } - }); - } - - result.whenComplete((resp, err) -> { - for (CompletableFuture future : memberFutures) { - if (!future.isDone()) { - future.cancel(true); - } - } - }); - - return result; - }); - } - - /** - * Full two-phase fanout: query hosted members first; if all miss, - * cascade to proxy members. - * - *

      This is the safety-net path (unparseable URL, DB error, or index - * returned names not present in the flattened member list). Running - * hosted first prevents a proxy from "claiming" a package name that - * exists on the upstream registry (e.g. PyPI.org) but has zero published - * files, when a hosted member has a locally-uploaded version with real - * files. - * - *

      If all members are the same type (all hosted or all proxy), this - * degrades to a single parallel query. - * - *

      Always runs on the fanout path ({@code isTargetedLocalRead=false}): - * circuit-open members are skipped and any 5xx surfaces as 502. - */ - private CompletableFuture queryHostedFirstThenProxy( - final List targeted, - final RequestLine line, - final Headers headers, - final Content body, - final RequestContext ctx - ) { - final List hosted = targeted.stream() - .filter(m -> !m.isProxy()) - .toList(); - final List proxy = targeted.stream() - .filter(MemberSlice::isProxy) - .toList(); - // No partition possible β€” use standard parallel query - if (hosted.isEmpty() || proxy.isEmpty()) { - return queryTargetedMembers(targeted, line, headers, body, ctx, false); - } - // Try hosted first; fall to proxy only if hosted yields no 200 - return queryTargetedMembers(hosted, line, headers, body, ctx, false) - .thenCompose(MdcPropagation.withMdc(resp -> { - if (resp.status().success()) { - return CompletableFuture.completedFuture(resp); - } - // Hosted members didn't have it β€” try proxy members - EcsLogger.debug("com.auto1.pantera.group") - .message("Hosted miss, cascading to " - + proxy.size() + " proxy member(s)") - .eventCategory("network") - .eventAction("group_cascade_to_proxy") - .log(); - return queryTargetedMembers(proxy, line, headers, body, ctx, false); - })); - } - - /** - * Query a single member directly (no negative cache check). - * Used for index-targeted queries where we already know the member has the artifact. - * - *

      Adds {@value EcsLoggingSlice#INTERNAL_ROUTING_HEADER} to the member request so - * that the member's {@code EcsLoggingSlice} suppresses its access log entry. - * The header is group-internal and does NOT leak to upstream remotes because proxy - * slice implementations forward {@code Headers.EMPTY} to their upstream clients. - */ - private CompletableFuture queryMemberDirect( - final MemberSlice member, - final RequestLine line, - final Headers headers, - final byte[] requestBytes, - final RequestContext ctx - ) { - - final Content memberBody = requestBytes.length > 0 - ? new Content.From(requestBytes) - : Content.EMPTY; - - final RequestLine rewritten = member.rewritePath(line); - final Headers memberHeaders = dropFullPathHeader(headers) - .copy() - .add(new Header(EcsLoggingSlice.INTERNAL_ROUTING_HEADER, "true")); - - return member.slice().response( - rewritten, - memberHeaders, - memberBody - ); - } - - /** - * Handle a response from a member. - * - *

      See {@link #completeIfAllExhausted} for the final status code policy - * when all members are exhausted without a winner. - */ - private void handleMemberResponse( - final MemberSlice member, - final Response resp, - final AtomicBoolean completed, - final AtomicInteger pending, - final AtomicBoolean anyServerError, - final CompletableFuture result, - final long startTime, - final Key pathKey, - final RequestContext ctx, - final boolean isTargetedLocalRead - ) { - final RsStatus status = resp.status(); - - // Success: 200 OK, 206 Partial Content, or 304 Not Modified - if (status == RsStatus.OK - || status == RsStatus.PARTIAL_CONTENT - || status == RsStatus.NOT_MODIFIED) { - if (completed.compareAndSet(false, true)) { - final long latency = System.currentTimeMillis() - startTime; - // Only log slow responses - if (latency > 1000) { - EcsLogger.warn("com.auto1.pantera.group") - .message("Slow member response: " + member.name()) - .eventCategory("network") - .eventAction("group_query") - .eventOutcome("success") - .field("destination.address", member.name()) - .duration(latency) - .log(); - } - member.recordSuccess(); - recordSuccess(member.name(), latency); - recordGroupMemberRequest(member.name(), "success"); - recordGroupMemberLatency(member.name(), "success", latency); - result.complete(resp); - } else { - EcsLogger.debug("com.auto1.pantera.group") - .message("Member '" + member.name() - + "' returned success but another member already won") - .eventCategory("network") - .eventAction("group_query") - .field("destination.address", member.name()) - .field("http.response.status_code", status.code()) - .log(); - drainBody(member.name(), resp.body()); - } - // Always decrement the global pending counter regardless of win/lose. - // Two-phase completion: 502/404 only fires when ALL futures have reported - // and !completed β€” prevents fast-failing proxies from racing ahead of a - // slow-but-cached local member and completing the result with 502. - completeIfAllExhausted( - pending, completed, anyServerError, result, ctx, isTargetedLocalRead - ); - } else if (status == RsStatus.FORBIDDEN) { - // Blocked/cooldown: propagate 403 to client (artifact exists but is blocked) - if (completed.compareAndSet(false, true)) { - EcsLogger.debug("com.auto1.pantera.group") - .message("Member '" + member.name() - + "' returned FORBIDDEN (cooldown/blocked)") - .eventCategory("network") - .eventAction("group_query") - .eventOutcome("success") - .field("destination.address", member.name()) - .field("http.response.status_code", 403) - .log(); - member.recordSuccess(); // Not a failure - valid response - result.complete(resp); - } else { - drainBody(member.name(), resp.body()); - } - // Always decrement (same two-phase logic as 2xx success above) - completeIfAllExhausted( - pending, completed, anyServerError, result, ctx, isTargetedLocalRead - ); - } else if (status == RsStatus.NOT_FOUND) { - // 404: try next member β€” individual miss is DEBUG noise, not actionable - EcsLogger.debug("com.auto1.pantera.group") - .message("Group member " + member.name() - + " does not have " + pathKey.string()) - .eventCategory("web") - .eventAction("group_fanout_miss") - .eventOutcome("success") - .field("destination.address", member.name()) - .field("url.path", pathKey.string()) - .log(); - recordGroupMemberRequest(member.name(), "not_found"); - drainBody(member.name(), resp.body()); - completeIfAllExhausted( - pending, completed, anyServerError, result, ctx, isTargetedLocalRead - ); - } else { - // Server errors (500, 503, etc.): record failure, try next member - EcsLogger.warn("com.auto1.pantera.group") - .message("Member '" + member.name() - + "' returned error status (" + (pending.get() - 1) + " pending)") - .eventCategory("network") - .eventAction("group_query") - .eventOutcome("failure") - .field("event.reason", "HTTP " + status.code() + " from member") - .field("destination.address", member.name()) - .field("http.response.status_code", status.code()) - .log(); - member.recordFailure(); - anyServerError.set(true); - recordGroupMemberRequest(member.name(), "error"); - drainBody(member.name(), resp.body()); - completeIfAllExhausted( - pending, completed, anyServerError, result, ctx, isTargetedLocalRead - ); - } - } - - /** - * Handle member query failure (exception thrown). - * - *

      Cancellation edge case (intentional trade-off): when the result - * has already been completed by another member, we call - * {@code future.cancel(true)} on all remaining member futures (see - * {@link #queryTargetedMembers}). If a Response happened to arrive on a - * cancelled future before its {@code whenComplete} fired, its body is not - * drained here β€” we never held the Response object. In practice this is - * low-risk: the underlying transport (Jetty/Vert.x HTTP client) eventually - * closes the connection via idle-timeout, so any undrained response is a - * transient socket-level leak rather than a permanent resource leak. - * Tracking the in-flight Response for drain would add a layer of wrapping - * futures and per-member atomic state for a scenario that has not been - * observed in production instrumentation; we accept the trade-off. - */ - private void handleMemberFailure( - final MemberSlice member, - final Throwable err, - final AtomicBoolean completed, - final AtomicInteger pending, - final AtomicBoolean anyServerError, - final CompletableFuture result, - final RequestContext ctx, - final boolean isTargetedLocalRead - ) { - if (err instanceof CancellationException) { - // Another member won the race and cancelled this future. - // This is not a real upstream failure β€” do not trip the circuit breaker. - // See Javadoc above for the "undrained Response" edge case and why we - // don't try to drain here (we never hold the Response object, and - // Jetty idle-timeout reclaims the connection). - completeIfAllExhausted( - pending, completed, anyServerError, result, ctx, isTargetedLocalRead - ); - return; - } - EcsLogger.warn("com.auto1.pantera.group") - .message("Member query failed: " + member.name()) - .eventCategory("network") - .eventAction("group_query") - .eventOutcome("failure") - .error(err) - .field("event.reason", - "Member request threw " + err.getClass().getSimpleName()) - .field("destination.address", member.name()) - .log(); - member.recordFailure(); - anyServerError.set(true); - completeIfAllExhausted( - pending, completed, anyServerError, result, ctx, isTargetedLocalRead - ); - } - - /** - * Complete the result future if all members have been exhausted. - * - *

      Final status code policy: - *

        - *
      • Targeted local read (index hit) + any 5xx β†’ {@code 500 Internal Error} - * β€” artifact bytes are local, nobody else has them, this is a real local failure.
      • - *
      • Fanout (miss / DB error / unparseable) + any 5xx β†’ {@code 502 Bad Gateway} - * β€” we ARE proxying, so a bad upstream is correctly a bad gateway.
      • - *
      • All members cleanly 404 β†’ {@code 404 Not Found}.
      • - *
      - * - *

      Note: 503 is no longer emitted by group resolution. The old - * {@code anyCircuitOpen β†’ 503} path has been removed; circuit-open skips - * on the fanout path simply cause the request to fall through to other - * members or, if all are skipped/miss, produce a plain 404. - */ - private void completeIfAllExhausted( - final AtomicInteger pending, - final AtomicBoolean completed, - final AtomicBoolean anyServerError, - final CompletableFuture result, - final RequestContext ctx, - final boolean isTargetedLocalRead - ) { - if (pending.decrementAndGet() == 0 && !completed.get()) { - if (anyServerError.get()) { - if (isTargetedLocalRead) { - EcsLogger.warn("com.auto1.pantera.group") - .message("Targeted member failed on index hit, returning 500") - .eventCategory("web") - .eventAction("group_query") - .eventOutcome("failure") - .field("event.reason", - "Index-hit member failed; bytes are local but read errored" - + " β€” no fallback") - .field("http.response.status_code", 500) - .log(); - result.complete(ResponseBuilder.internalError() - .textBody("Targeted member read failed").build()); - } else { - EcsLogger.warn("com.auto1.pantera.group") - .message("All members exhausted with upstream errors, returning 502") - .eventCategory("network") - .eventAction("group_query") - .eventOutcome("failure") - .field("event.reason", - "All proxy upstreams returned 5xx or threw") - .field("http.response.status_code", 502) - .log(); - result.complete(ResponseBuilder.badGateway() - .textBody("All upstream members failed").build()); - } - } else { - EcsLogger.warn("com.auto1.pantera.group") - .message("Artifact not found in any group member: " - + ctx.packageName()) - .eventCategory("web") - .eventAction("group_lookup_miss") - .eventOutcome("failure") - .log(); - recordNotFound(); - result.complete(ResponseBuilder.notFound().build()); - } - } - } - - /** - * Drain response body on the background drain executor to prevent connection leak. - * - *

      Fully decoupled from the result path: submitted to {@link #DRAIN_EXECUTOR} and - * returns immediately. Drain failures and backpressure never block or affect the - * winning response delivered to the client. Uses streaming discard to avoid OOM on - * large responses (e.g., npm typescript ~30MB). - */ - private void drainBody(final String memberName, final Content body) { - final String group = this.group; - DRAIN_EXECUTOR.execute(() -> - body.subscribe(new org.reactivestreams.Subscriber<>() { - @Override - public void onSubscribe(final org.reactivestreams.Subscription sub) { - sub.request(Long.MAX_VALUE); - } - - @Override - public void onNext(final java.nio.ByteBuffer item) { - // Discard bytes - do not accumulate - } - - @Override - public void onError(final Throwable err) { - EcsLogger.debug("com.auto1.pantera.group") - .message("Failed to drain response body: " + memberName) - .eventCategory("network") - .eventAction("body_drain") - .eventOutcome("failure") - .field("destination.address", memberName) - .field("error.message", err.getMessage()) - .log(); - } - - @Override - public void onComplete() { - // Body fully consumed - connection returned to pool - } - }) - ); - } - - private static Headers dropFullPathHeader(final Headers headers) { - return new Headers( - headers.asList().stream() - .filter(h -> !h.getKey().equalsIgnoreCase("X-FullPath")) - .toList() - ); - } - - // Metrics helpers - - private void recordRequestStart() { - final com.auto1.pantera.metrics.GroupSliceMetrics metrics = - com.auto1.pantera.metrics.GroupSliceMetrics.instance(); - if (metrics != null) { - metrics.recordRequest(this.group); - } - } - - private void recordSuccess(final String member, final long latency) { - final com.auto1.pantera.metrics.GroupSliceMetrics metrics = - com.auto1.pantera.metrics.GroupSliceMetrics.instance(); - if (metrics != null) { - metrics.recordSuccess(this.group, member, latency); - metrics.recordBatch(this.group, this.members.size(), latency); - } - } - - private void recordNotFound() { - final com.auto1.pantera.metrics.GroupSliceMetrics metrics = - com.auto1.pantera.metrics.GroupSliceMetrics.instance(); - if (metrics != null) { - metrics.recordNotFound(this.group); - } - } - - private void recordGroupRequest(final String result, final long duration) { - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - com.auto1.pantera.metrics.MicrometerMetrics.getInstance() - .recordGroupRequest(this.group, result); - com.auto1.pantera.metrics.MicrometerMetrics.getInstance() - .recordGroupResolutionDuration(this.group, duration); - } - } - - private void recordGroupMemberRequest(final String memberName, final String result) { - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - com.auto1.pantera.metrics.MicrometerMetrics.getInstance() - .recordGroupMemberRequest(this.group, memberName, result); - } - } - - private void recordGroupMemberLatency( - final String memberName, final String result, final long latencyMs - ) { - if (com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { - com.auto1.pantera.metrics.MicrometerMetrics.getInstance() - .recordGroupMemberLatency(this.group, memberName, result, latencyMs); - } - } - - /** - * Filter members by routing rules for the given path. - * If no routing rules are configured, all members are returned. - * Members with matching routing rules are included. Members with - * no routing rules also participate (default: include). - * - * @param path Request path - * @return Filtered list of members to query - */ - private List filterByRoutingRules(final String path) { - if (this.routingRules.isEmpty()) { - return this.members; - } - // Collect members that have explicit routing rules - final Set ruledMembers = this.routingRules.stream() - .map(RoutingRule::member) - .collect(Collectors.toSet()); - // Collect members whose rules match this path - final Set matchedMembers = this.routingRules.stream() - .filter(rule -> rule.matches(path)) - .map(RoutingRule::member) - .collect(Collectors.toSet()); - // Include: members with matching rules + members with no rules (default include) - return this.members.stream() - .filter(m -> matchedMembers.contains(m.name()) - || !ruledMembers.contains(m.name())) - .toList(); - } -} diff --git a/pantera-main/src/main/java/com/auto1/pantera/group/MavenGroupSlice.java b/pantera-main/src/main/java/com/auto1/pantera/group/MavenGroupSlice.java index c685679e8..d5d43eb62 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/group/MavenGroupSlice.java +++ b/pantera-main/src/main/java/com/auto1/pantera/group/MavenGroupSlice.java @@ -12,28 +12,32 @@ import com.auto1.pantera.asto.Content; import com.auto1.pantera.asto.Key; +import com.auto1.pantera.group.merge.StreamingMetadataMerger; import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Response; import com.auto1.pantera.http.ResponseBuilder; import com.auto1.pantera.http.RsStatus; import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.context.ContextualExecutor; +import com.auto1.pantera.http.resilience.SingleFlight; import com.auto1.pantera.http.rq.RequestLine; import com.auto1.pantera.http.log.EcsLogger; -import com.auto1.pantera.http.trace.MdcPropagation; +import io.micrometer.core.instrument.DistributionSummary; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.net.URI; import java.net.URISyntaxException; import java.nio.ByteBuffer; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ForkJoinPool; /** * Maven-specific group slice with metadata merging support. - * Extends basic GroupSlice behavior with Maven metadata aggregation. + * Extends basic GroupResolver behavior with Maven metadata aggregation. * *

      For maven-metadata.xml requests: *

        @@ -92,22 +96,30 @@ public final class MavenGroupSlice implements Slice { * *

        Serves as a request coalescer: when N concurrent requests arrive for * the same {@code maven-metadata.xml} with a cold L1+L2 cache, the first - * registers a "gate" future here and runs the full N-member fanout + - * merge. Late arrivals find the gate already present and park on it - * instead of starting their own fanout, then retry {@link #response} once - * the leader completes. On retry the L1 cache is warm, so followers - * return immediately without touching the network. The combination of - * coalescer + two-tier cache collapses a thundering herd of N concurrent - * misses into exactly ONE upstream fanout + merge β€” same pattern as - * {@code GroupSlice#proxyOnlyFanout}. + * installs a gate inside the {@link SingleFlight} and runs the full + * N-member fanout + merge. Late arrivals park on the gate and retry + * {@link #response} once the leader completes. On retry the L1 cache is + * warm, so followers return immediately without touching the network. + * The combination of coalescer + two-tier cache collapses a thundering + * herd of N concurrent misses into exactly ONE upstream fanout + merge β€” + * same pattern as {@code GroupResolver#proxyOnlyFanout}. * *

        This coalescer deliberately does NOT share the winning {@link Response} * object across callers: {@link Content} is a one-shot reactive stream * that cannot be subscribed to twice. Instead followers re-enter * {@code response()} and read the freshly-populated cache. + * + *

        {@link SingleFlight} replaces the hand-rolled {@code ConcurrentHashMap} + * dance from commit {@code b37deea2} β€” see Β§6.4 of + * {@code docs/analysis/v2.2-target-architecture.md} and A6/A7/A8/A9 in + * {@code v2.1.3-architecture-review.md}. */ - private final ConcurrentMap> inFlightMetadataFetches = - new ConcurrentHashMap<>(); + private final SingleFlight inFlightMetadataFetches = + new SingleFlight<>( + Duration.ofMinutes(5), + 10_000, + ContextualExecutor.contextualize(ForkJoinPool.commonPool()) + ); /** * Constructor. @@ -208,10 +220,10 @@ private CompletableFuture handleChecksumRequest( ); return mergeMetadata(metadataLine, headers, body, metadataPath) - .thenApply(MdcPropagation.withMdcFunction(metadataResponse -> { + .thenApply(metadataResponse -> { // Extract body from metadata response return metadataResponse.body().asBytesFuture() - .thenApply(MdcPropagation.withMdcFunction(metadataBytes -> { + .thenApply(metadataBytes -> { try { // Compute checksum final java.security.MessageDigest digest = java.security.MessageDigest.getInstance( @@ -220,14 +232,11 @@ private CompletableFuture handleChecksumRequest( final byte[] checksumBytes = digest.digest(metadataBytes); // Convert to hex string - final StringBuilder hexString = new StringBuilder(); - for (byte b : checksumBytes) { - hexString.append(String.format("%02x", b)); - } + final String hex = java.util.HexFormat.of().formatHex(checksumBytes); return ResponseBuilder.ok() .header("Content-Type", "text/plain") - .body(hexString.toString().getBytes(java.nio.charset.StandardCharsets.UTF_8)) + .body(hex.getBytes(java.nio.charset.StandardCharsets.UTF_8)) .build(); } catch (java.security.NoSuchAlgorithmException e) { EcsLogger.error("com.auto1.pantera.maven") @@ -242,8 +251,8 @@ private CompletableFuture handleChecksumRequest( .textBody("Failed to compute checksum") .build(); } - })); - })) + }); + }) .thenCompose(future -> future); } @@ -251,10 +260,11 @@ private CompletableFuture handleChecksumRequest( * Merge maven-metadata.xml from all members. * *

        Fast path: L1/L2 cache hit β†’ return cached bytes. Slow path: miss β†’ - * coalesce concurrent callers through {@link #inFlightMetadataFetches} so - * exactly one leader does the N-member fanout + merge while followers park - * on the leader's gate and re-enter {@code response()} once the cache is - * warm. See {@code GroupSlice#proxyOnlyFanout} for the same pattern. + * coalesce concurrent callers through the in-flight {@link SingleFlight} + * so exactly one leader does the N-member fanout + merge while followers + * park on the leader's gate and re-enter {@code response()} once the + * cache is warm. See {@code GroupSlice#proxyOnlyFanout} for the same + * pattern. */ private CompletableFuture mergeMetadata( final RequestLine line, @@ -265,7 +275,7 @@ private CompletableFuture mergeMetadata( final String cacheKey = path; // Check two-tier cache (L1 then L2 if miss) - return this.metadataCache.get(cacheKey).thenCompose(MdcPropagation.withMdc(cached -> { + return this.metadataCache.get(cacheKey).thenCompose(cached -> { if (cached.isPresent()) { // Cache HIT (L1 or L2) EcsLogger.debug("com.auto1.pantera.maven") @@ -285,54 +295,44 @@ private CompletableFuture mergeMetadata( } // Cache MISS: coalesce concurrent callers so only one does the - // N-member fanout + merge. See class-level field Javadoc. + // N-member fanout + merge. Leader-vs-follower is distinguished by + // a flag the loader sets on the caller's thread (Caffeine runs + // the bifunction synchronously for the first absent key). The + // leader does the real fetch + merge and returns the Response + // directly; followers park on the gate and re-enter response() + // once the L1 cache is warm β€” same pattern as + // {@code GroupSlice#proxyOnlyFanout}. SingleFlight handles zombie + // eviction and stack-flat completion (A6/A7/A8/A9, WI-05). final String dedupKey = this.group + ":" + path; - final CompletableFuture freshGate = new CompletableFuture<>(); - final CompletableFuture existingGate = - this.inFlightMetadataFetches.putIfAbsent(dedupKey, freshGate); - if (existingGate != null) { - // Follower: another request is already fetching+merging for - // this path. Wait for the leader's gate, then re-enter - // response() β€” by that time the L1 cache is warm so this - // retry is just a cache read. - // - // CRITICAL: use thenComposeAsync, NOT thenCompose. The - // leader completes the gate BEFORE removing it from - // inFlightMetadataFetches (see whenComplete below β€” - // intentional ordering to close a putIfAbsent race). If the - // gate is already completed when the follower calls - // thenCompose, the callback runs synchronously on the same - // stack; the retry then hits the SAME (still-present) gate - // and would recurse, blowing the stack with - // StackOverflowError before the leader's remove() runs. - // thenComposeAsync dispatches the retry to the common pool - // so the leader's whenComplete queue can drain remove() - // first. Same fix as commit 7c30f01f in GroupSlice. - EcsLogger.debug("com.auto1.pantera.maven") - .message("Coalescing with in-flight metadata fetch") - .eventCategory("web") - .eventAction("metadata_fetch_coalesce") - .field("repository.name", this.group) - .field("url.path", path) - .log(); - return existingGate.thenComposeAsync(MdcPropagation.withMdc( - ignored -> this.response(line, headers, body) - )); + final boolean[] isLeader = {false}; + final CompletableFuture leaderGate = new CompletableFuture<>(); + final CompletableFuture gate = this.inFlightMetadataFetches.load( + dedupKey, + () -> { + isLeader[0] = true; + return leaderGate; + } + ); + if (isLeader[0]) { + return fetchAndMergeFromMembers(line, headers, path, cacheKey) + .whenComplete( + (resp, err) -> leaderGate.complete(null) + ); } - - // Leader: do the actual fetch + merge; complete then remove the - // gate in whenComplete so followers observe completion first. - return fetchAndMergeFromMembers(line, headers, path, cacheKey) - .whenComplete(MdcPropagation.withMdcBiConsumer((resp, err) -> { - // Complete the gate BEFORE removing from the map β€” same - // reasoning as GroupSlice#proxyOnlyFanout: closes the race - // window where a late request arriving between remove() - // and complete() could observe an empty map and start a - // second fanout, defeating coalescing. - freshGate.complete(null); - this.inFlightMetadataFetches.remove(dedupKey, freshGate); - })); - })); + EcsLogger.debug("com.auto1.pantera.maven") + .message("Coalescing with in-flight metadata fetch") + .eventCategory("web") + .eventAction("metadata_fetch_coalesce") + .field("repository.name", this.group) + .field("url.path", path) + .log(); + // Follower: re-enter response() once the gate resolves. Swallow + // any exception the gate might carry β€” the L1/L2 cache is the + // source of truth on retry. + return gate.exceptionally(err -> null).thenCompose( + ignored -> this.response(line, headers, body) + ); + }); } /** @@ -349,7 +349,7 @@ private CompletableFuture fetchAndMergeFromMembers( // Cache MISS - fetch and merge from members // CRITICAL: Consume original body to prevent OneTimePublisher errors // GET requests for maven-metadata.xml have empty bodies, but Content is still reference-counted - return CompletableFuture.completedFuture((byte[]) null).thenCompose(MdcPropagation.withMdc(requestBytes -> { + return CompletableFuture.completedFuture((byte[]) null).thenCompose(requestBytes -> { // Track fetch duration separately from merge duration final long fetchStartTime = System.currentTimeMillis(); @@ -370,7 +370,7 @@ private CompletableFuture fetchAndMergeFromMembers( final CompletableFuture memberFuture = memberSlice .response(memberLine, dropFullPathHeader(headers), Content.EMPTY) - .thenCompose(MdcPropagation.withMdc(resp -> { + .thenCompose(resp -> { if (resp.status() == RsStatus.OK) { return readResponseBody(resp.body()); } else { @@ -378,8 +378,8 @@ private CompletableFuture fetchAndMergeFromMembers( return resp.body().asBytesFuture() .thenApply(ignored -> (byte[]) null); } - })) - .exceptionally(MdcPropagation.withMdcFunction(err -> { + }) + .exceptionally(err -> { EcsLogger.warn("com.auto1.pantera.maven") .message("Member failed to fetch metadata: " + member) .eventCategory("web") @@ -389,14 +389,14 @@ private CompletableFuture fetchAndMergeFromMembers( .error(err) .log(); return null; - })); + }); futures.add(memberFuture); } // Wait for all members and merge results return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])) - .thenCompose(MdcPropagation.withMdc(v -> { + .thenCompose(v -> { final List metadataList = new ArrayList<>(); for (CompletableFuture future : futures) { final byte[] metadata = future.getNow(null); @@ -411,7 +411,7 @@ private CompletableFuture fetchAndMergeFromMembers( if (metadataList.isEmpty()) { // All members failed β€” try last-known-good stale fallback return MavenGroupSlice.this.metadataCache.getStale(cacheKey) - .thenApply(MdcPropagation.withMdcFunction(stale -> { + .thenApply(stale -> { if (stale.isPresent()) { EcsLogger.warn("com.auto1.pantera.maven") .message("Returning stale metadata (all members failed)") @@ -438,16 +438,16 @@ private CompletableFuture fetchAndMergeFromMembers( .field("event.duration", fetchDuration) .log(); return ResponseBuilder.notFound().build(); - })); + }); } // Track merge duration separately (actual XML processing time) final long mergeStartTime = System.currentTimeMillis(); - // Use reflection to call MetadataMerger from maven-adapter module - // This avoids circular dependency issues - return mergeUsingReflection(metadataList) - .thenApply(MdcPropagation.withMdcFunction(mergedBytes -> { + // StAX streaming merge β€” see {@link StreamingMetadataMerger}. + // Peak heap is O(unique versions), not O(sum of body sizes). + return mergeStreaming(metadataList) + .thenApply(mergedBytes -> { final long mergeDuration = System.currentTimeMillis() - mergeStartTime; final long totalDuration = fetchDuration + mergeDuration; @@ -487,9 +487,9 @@ private CompletableFuture fetchAndMergeFromMembers( .header("Content-Type", "application/xml") .body(mergedBytes) .build(); - })); - })) - .exceptionally(MdcPropagation.withMdcFunction(err -> { + }); + }) + .exceptionally(err -> { // Unwrap CompletionException to get the real cause final Throwable cause = err.getCause() != null ? err.getCause() : err; EcsLogger.error("com.auto1.pantera.maven") @@ -504,46 +504,58 @@ private CompletableFuture fetchAndMergeFromMembers( return ResponseBuilder.internalError() .textBody("Failed to merge metadata: " + cause.getMessage()) .build(); - })); - })); + }); + }); } /** - * Merge metadata using MetadataMerger from maven-adapter via reflection. - * This allows pantera-main to call maven-adapter without circular dependency. + * Merge metadata via the StAX streaming merger. + * + *

        Each member byte array is fed once into a single + * {@link StreamingMetadataMerger}; only the deduplicated version set + * + scalar maxes survive past the {@code mergeMember} call. Peak + * memory is O(unique versions), not O(sum of body sizes). + * + *

        Per-member body size is recorded as the alert-only histogram + * {@code pantera.maven.group.member_metadata_size_bytes} (when + * Micrometer is initialised). + * + *

        The returned future is always successful: the merger tolerates + * malformed members internally and emits a minimal {@code } + * if every member parse failed. */ - private CompletableFuture mergeUsingReflection(final List metadataList) { - try { - // Load MetadataMerger class - final Class mergerClass = Class.forName( - "com.auto1.pantera.maven.metadata.MetadataMerger" - ); - - // Create instance - final Object merger = mergerClass - .getConstructor(List.class) - .newInstance(metadataList); - - // Call merge() method - @SuppressWarnings("unchecked") - final CompletableFuture mergeFuture = (CompletableFuture) - mergerClass.getMethod("merge").invoke(merger); - - // Read content - return mergeFuture.thenCompose(this::readResponseBody); - - } catch (Exception e) { - EcsLogger.error("com.auto1.pantera.maven") - .message("Failed to merge metadata using reflection") - .eventCategory("web") - .eventAction("metadata_merge") - .eventOutcome("failure") - .error(e) - .log(); - return CompletableFuture.failedFuture( - new IllegalStateException("Maven metadata merging not available", e) - ); + private CompletableFuture mergeStreaming(final List metadataList) { + return CompletableFuture.supplyAsync(() -> { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + for (final byte[] body : metadataList) { + if (body == null || body.length == 0) { + continue; + } + this.recordMemberBodySize(body.length); + merger.mergeMember(new ByteArrayInputStream(body)); + } + return merger.toXml(); + }); + } + + /** + * Alert-only histogram of per-member metadata body size. Never + * rejects; outliers are surfaced via the histogram's high quantiles. + * No-op when {@link com.auto1.pantera.metrics.MicrometerMetrics} is + * not initialised (e.g. unit tests). + */ + private void recordMemberBodySize(final long bytes) { + if (!com.auto1.pantera.metrics.MicrometerMetrics.isInitialized()) { + return; } + DistributionSummary.builder("pantera.maven.group.member_metadata_size_bytes") + .description("Maven group member maven-metadata.xml body size (alert-only)") + .baseUnit("bytes") + .tags("repo_name", this.group) + .register( + com.auto1.pantera.metrics.MicrometerMetrics.getInstance().getRegistry() + ) + .record(bytes); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/group/merge/StreamingMetadataMerger.java b/pantera-main/src/main/java/com/auto1/pantera/group/merge/StreamingMetadataMerger.java new file mode 100644 index 000000000..a74b403c0 --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/group/merge/StreamingMetadataMerger.java @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.group.merge; + +import com.auto1.pantera.http.log.EcsLogger; +import org.apache.maven.artifact.versioning.ComparableVersion; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import javax.xml.stream.XMLStreamWriter; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayDeque; +import java.util.Comparator; +import java.util.Deque; +import java.util.Objects; +import java.util.TreeSet; + +/** + * Streaming merger for Maven {@code maven-metadata.xml} across group members. + * + *

        Uses StAX to parse each member's response body incrementally; + * accumulates only the deduplicated {@code } set and the + * compare-and-keep-newest scalars ({@code }, {@code }, + * {@code }, {@code }). Peak memory is + * O(unique versions), not O(sum of body sizes). At 1000 req/s with + * modest distinct-version cardinality this is a fixed-size buffer + * regardless of per-member body size. + * + *

        Malformed or empty member bodies are skipped and logged at WARN + * with {@code event.reason=member_metadata_parse}; the merge of + * remaining members succeeds (partial-tolerance). + * + *

        Maven version ordering uses {@link ComparableVersion}, the same + * algorithm Maven CLI uses for dependency resolution. + * + *

        Not thread-safe β€” instantiate one merger per merge operation. + * + * @since 2.2.0 + */ +public final class StreamingMetadataMerger { + + /** + * Logger category β€” matches existing maven group slice events. + */ + private static final String LOG = "com.auto1.pantera.maven"; + + /** + * Shared, thread-safe StAX input factory. + * External entities disabled to defend against XXE. + */ + private static final XMLInputFactory INPUT_FACTORY = createInputFactory(); + + /** + * Shared, thread-safe StAX output factory. + */ + private static final XMLOutputFactory OUTPUT_FACTORY = XMLOutputFactory.newInstance(); + + /** + * Comparator that delegates to Maven's {@link ComparableVersion} β€” + * the same algorithm the Maven CLI uses for dependency resolution. + */ + private static final Comparator VERSION_CMP = mavenVersionComparator(); + + /** + * Deduplicated set of every {@code } discovered, sorted by + * Maven version semantics. + */ + private final TreeSet versions = new TreeSet<>(VERSION_CMP); + + /** + * {@code } from the first member that supplied one. + */ + private String groupId; + + /** + * {@code } from the first member that supplied one. + */ + private String artifactId; + + /** + * {@code } β€” kept as the max across members per + * {@link #VERSION_CMP}. + */ + private String latest; + + /** + * {@code } β€” kept as the max across members per + * {@link #VERSION_CMP}. + */ + private String release; + + /** + * {@code } in {@code yyyyMMddHHmmss} β€” string compare + * sorts lexicographically the same as time, so we just keep the max. + */ + private String lastUpdated; + + /** + * Latest {@code } block discovered; {@code null} until + * the first member with a snapshot timestamp parses. + */ + private SnapshotInfo snapshot; + + /** + * Number of members whose body was successfully parsed (for diagnostics). + */ + private int membersMerged; + + /** + * Number of members whose body parse failed (for diagnostics). + */ + private int membersSkipped; + + /** + * Merge a single member's metadata body into the accumulated state. + * + *

        On any parse error the member is skipped and a WARN is logged; + * the merger remains usable for other members. The caller-supplied + * stream is closed. + * + * @param body Member response body, in {@code maven-metadata.xml} + * format. May be empty / null β€” both are no-ops. + */ + @SuppressWarnings({"PMD.AvoidCatchingGenericException", "PMD.CognitiveComplexity"}) + public void mergeMember(final InputStream body) { + if (body == null) { + this.membersSkipped++; + return; + } + XMLStreamReader reader = null; + try { + reader = INPUT_FACTORY.createXMLStreamReader(body, "UTF-8"); + this.parse(reader); + this.membersMerged++; + } catch (final XMLStreamException | RuntimeException ex) { + this.membersSkipped++; + EcsLogger.warn(LOG) + .message("Skipping malformed member metadata during streaming merge") + .eventCategory("web") + .eventAction("metadata_merge") + .eventOutcome("failure") + .field("event.reason", "member_metadata_parse") + .error(ex) + .log(); + } finally { + if (reader != null) { + try { + reader.close(); + } catch (final XMLStreamException ignore) { + // best effort + } + } + try { + body.close(); + } catch (final java.io.IOException ignore) { + // best effort + } + } + } + + /** + * Emit the merged metadata XML using a streaming writer. + * + *

        If no members were ever successfully merged, a minimal valid + * {@code } document is returned so downstream Maven + * clients see a parseable response rather than an empty body. + * + * @return Bytes of the merged {@code maven-metadata.xml} (UTF-8). + */ + @SuppressWarnings("PMD.AvoidCatchingGenericException") + public byte[] toXml() { + final ByteArrayOutputStream out = new ByteArrayOutputStream(1024); + XMLStreamWriter writer = null; + try { + writer = OUTPUT_FACTORY.createXMLStreamWriter(out, "UTF-8"); + writer.writeStartDocument("UTF-8", "1.0"); + writer.writeCharacters("\n"); + writer.writeStartElement("metadata"); + writer.writeCharacters("\n"); + if (this.groupId != null) { + writeLeaf(writer, " ", "groupId", this.groupId); + } + if (this.artifactId != null) { + writeLeaf(writer, " ", "artifactId", this.artifactId); + } + final boolean hasVersioning = + this.latest != null + || this.release != null + || this.lastUpdated != null + || this.snapshot != null + || !this.versions.isEmpty(); + if (hasVersioning) { + writer.writeCharacters(" "); + writer.writeStartElement("versioning"); + writer.writeCharacters("\n"); + if (this.latest != null) { + writeLeaf(writer, " ", "latest", this.latest); + } + if (this.release != null) { + writeLeaf(writer, " ", "release", this.release); + } + if (this.snapshot != null) { + writer.writeCharacters(" "); + writer.writeStartElement("snapshot"); + writer.writeCharacters("\n"); + if (this.snapshot.timestamp() != null) { + writeLeaf(writer, " ", "timestamp", this.snapshot.timestamp()); + } + if (this.snapshot.buildNumber() != null) { + writeLeaf( + writer, " ", "buildNumber", this.snapshot.buildNumber() + ); + } + writer.writeCharacters(" "); + writer.writeEndElement(); + writer.writeCharacters("\n"); + } + if (!this.versions.isEmpty()) { + writer.writeCharacters(" "); + writer.writeStartElement("versions"); + writer.writeCharacters("\n"); + for (final String v : this.versions) { + writeLeaf(writer, " ", "version", v); + } + writer.writeCharacters(" "); + writer.writeEndElement(); + writer.writeCharacters("\n"); + } + if (this.lastUpdated != null) { + writeLeaf(writer, " ", "lastUpdated", this.lastUpdated); + } + writer.writeCharacters(" "); + writer.writeEndElement(); + writer.writeCharacters("\n"); + } + writer.writeEndElement(); + writer.writeCharacters("\n"); + writer.writeEndDocument(); + writer.flush(); + } catch (final XMLStreamException ex) { + throw new IllegalStateException("Failed to emit merged metadata XML", ex); + } finally { + if (writer != null) { + try { + writer.close(); + } catch (final XMLStreamException ignore) { + // best effort + } + } + } + return out.toByteArray(); + } + + /** + * @return Number of members whose body was successfully parsed. + */ + public int membersMerged() { + return this.membersMerged; + } + + /** + * @return Number of members whose body parse failed (and were skipped). + */ + public int membersSkipped() { + return this.membersSkipped; + } + + // ===== internals ===== + + @SuppressWarnings({"PMD.CognitiveComplexity", "PMD.CyclomaticComplexity", "PMD.NPathComplexity"}) + private void parse(final XMLStreamReader reader) throws XMLStreamException { + final Deque stack = new ArrayDeque<>(); + String snapshotTimestamp = null; + String snapshotBuildNumber = null; + boolean inSnapshot = false; + while (reader.hasNext()) { + final int event = reader.next(); + switch (event) { + case XMLStreamConstants.START_ELEMENT -> { + final String name = reader.getLocalName(); + stack.push(name); + if ("snapshot".equals(name) && inVersioning(stack)) { + inSnapshot = true; + snapshotTimestamp = null; + snapshotBuildNumber = null; + } + } + case XMLStreamConstants.END_ELEMENT -> { + final String name = stack.isEmpty() ? null : stack.pop(); + if ("snapshot".equals(name) && inSnapshot) { + inSnapshot = false; + if (snapshotTimestamp != null) { + this.maybeUpdateSnapshot(snapshotTimestamp, snapshotBuildNumber); + } + } + } + case XMLStreamConstants.CHARACTERS, XMLStreamConstants.CDATA -> { + if (stack.isEmpty()) { + continue; + } + final String tag = stack.peek(); + if ("groupId".equals(tag) && parentIs(stack, "metadata")) { + this.acceptGroupId(reader.getText()); + } else if ("artifactId".equals(tag) && parentIs(stack, "metadata")) { + this.acceptArtifactId(reader.getText()); + } else if ("version".equals(tag) && inVersionsList(stack)) { + final String v = trimToNull(reader.getText()); + if (v != null) { + this.versions.add(v); + } + } else if ("latest".equals(tag) && parentIs(stack, "versioning")) { + this.latest = pickNewerVersion(this.latest, reader.getText()); + } else if ("release".equals(tag) && parentIs(stack, "versioning")) { + this.release = pickNewerVersion(this.release, reader.getText()); + } else if ("lastUpdated".equals(tag) && parentIs(stack, "versioning")) { + final String v = trimToNull(reader.getText()); + if (v != null + && (this.lastUpdated == null || v.compareTo(this.lastUpdated) > 0)) { + this.lastUpdated = v; + } + } else if (inSnapshot && "timestamp".equals(tag)) { + final String t = trimToNull(reader.getText()); + if (t != null) { + snapshotTimestamp = t; + } + } else if (inSnapshot && "buildNumber".equals(tag)) { + final String b = trimToNull(reader.getText()); + if (b != null) { + snapshotBuildNumber = b; + } + } + } + default -> { + // ignore comments, PIs, whitespace + } + } + } + } + + private void acceptGroupId(final String text) { + final String v = trimToNull(text); + if (v == null) { + return; + } + if (this.groupId == null) { + this.groupId = v; + } else if (!this.groupId.equals(v)) { + EcsLogger.warn(LOG) + .message("Member metadata groupId mismatch β€” keeping first") + .eventCategory("web") + .eventAction("metadata_merge") + .field("event.reason", "groupid_mismatch") + .field("pantera.merge.groupid_kept", this.groupId) + .field("pantera.merge.groupid_other", v) + .log(); + } + } + + private void acceptArtifactId(final String text) { + final String v = trimToNull(text); + if (v == null) { + return; + } + if (this.artifactId == null) { + this.artifactId = v; + } else if (!this.artifactId.equals(v)) { + EcsLogger.warn(LOG) + .message("Member metadata artifactId mismatch β€” keeping first") + .eventCategory("web") + .eventAction("metadata_merge") + .field("event.reason", "artifactid_mismatch") + .field("pantera.merge.artifactid_kept", this.artifactId) + .field("pantera.merge.artifactid_other", v) + .log(); + } + } + + private void maybeUpdateSnapshot(final String ts, final String build) { + if (this.snapshot == null || ts.compareTo(this.snapshot.timestamp()) > 0) { + this.snapshot = new SnapshotInfo(ts, build); + } + } + + private static String pickNewerVersion(final String current, final String candidate) { + final String c = trimToNull(candidate); + if (c == null) { + return current; + } + if (current == null) { + return c; + } + return VERSION_CMP.compare(c, current) > 0 ? c : current; + } + + private static boolean parentIs(final Deque stack, final String parent) { + if (stack.size() < 2) { + return false; + } + final var it = stack.iterator(); + it.next(); + return parent.equals(it.next()); + } + + /** + * @return {@code true} if current open element chain contains + * {@code versioning} below the current top. + */ + private static boolean inVersioning(final Deque stack) { + for (final String name : stack) { + if ("versioning".equals(name)) { + return true; + } + } + return false; + } + + /** + * @return {@code true} if the current element is a {@code } + * child of {@code } child of {@code }. + */ + private static boolean inVersionsList(final Deque stack) { + if (stack.size() < 3) { + return false; + } + final var it = stack.iterator(); + // top is the open , next must be , next + it.next(); + return "versions".equals(it.next()) && "versioning".equals(it.next()); + } + + private static String trimToNull(final String raw) { + if (raw == null) { + return null; + } + final String t = raw.trim(); + return t.isEmpty() ? null : t; + } + + private static void writeLeaf( + final XMLStreamWriter writer, + final String indent, + final String name, + final String value + ) throws XMLStreamException { + writer.writeCharacters(indent); + writer.writeStartElement(name); + writer.writeCharacters(value); + writer.writeEndElement(); + writer.writeCharacters("\n"); + } + + private static Comparator mavenVersionComparator() { + return (a, b) -> new ComparableVersion(Objects.requireNonNullElse(a, "")) + .compareTo(new ComparableVersion(Objects.requireNonNullElse(b, ""))); + } + + private static XMLInputFactory createInputFactory() { + final XMLInputFactory factory = XMLInputFactory.newInstance(); + // XXE hardening β€” Maven metadata never references external entities. + factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); + factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + factory.setProperty(XMLInputFactory.IS_COALESCING, true); + return factory; + } + + /** + * Convenience: serialise to UTF-8 string (for tests / debugging). + * + * @return Merged document as a UTF-8 string. + */ + public String toXmlString() { + return new String(this.toXml(), StandardCharsets.UTF_8); + } + + /** + * Snapshot-block contents. + * + * @param timestamp {@code } value; may not be {@code null}. + * @param buildNumber {@code } value; may be {@code null}. + */ + private record SnapshotInfo(String timestamp, String buildNumber) { + } +} diff --git a/pantera-main/src/main/java/com/auto1/pantera/http/context/HandlerExecutor.java b/pantera-main/src/main/java/com/auto1/pantera/http/context/HandlerExecutor.java new file mode 100644 index 000000000..52d8a8253 --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/http/context/HandlerExecutor.java @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import com.auto1.pantera.http.misc.ConfigDefaults; +import java.util.Objects; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Shared worker pool for Vert.x HTTP API handlers. + * + *

        Every task submitted here carries the submitting thread's Log4j2 + * {@link org.apache.logging.log4j.ThreadContext} and the Elastic APM active + * span through to the runner thread, via + * {@link ContextualExecutor#contextualize(java.util.concurrent.Executor)}. + * + *

        Use as the executor argument to {@code CompletableFuture.*Async} + * in every {@code api/v1/} handler β€” do not use + * {@link java.util.concurrent.ForkJoinPool#commonPool()} (no context + * propagation) β€” context propagation is handled by + * {@link ContextualExecutor} at the pool boundary. + * + *

        Β§4.4 of {@code docs/analysis/v2.2-target-architecture.md} makes the + * pool boundary β€” not the per-call wrapper β€” responsible for + * propagating ECS fields + APM trace context. Each handler migrated under + * WI-post-03d submits its blocking callable via + * {@link java.util.concurrent.CompletableFuture#supplyAsync( + * java.util.function.Supplier, java.util.concurrent.Executor)} with + * {@link #get()} as the executor; the wrapper carries {@code trace.id}, + * {@code user.name}, {@code client.ip} and the APM span onto the worker + * thread automatically. + * + *

        Pool configuration

        + *
          + *
        • Threads: {@code max(4, availableProcessors())}. Tunable via + * system property {@code pantera.handler.executor.threads} / + * env var {@code PANTERA_HANDLER_EXECUTOR_THREADS}.
        • + *
        • Queue: bounded {@link ArrayBlockingQueue} of size + * {@value #DEFAULT_QUEUE_SIZE} (env override + * {@code PANTERA_HANDLER_EXECUTOR_QUEUE}). Bounded so a misbehaving + * DB makes handler backpressure visible (503 / RejectedExecution) + * rather than swallowing requests into an unbounded queue with + * increasing latency.
        • + *
        • Rejection policy: + * {@link ThreadPoolExecutor.AbortPolicy} β€” the caller sees a + * {@link java.util.concurrent.RejectedExecutionException} which + * {@code CompletableFuture.supplyAsync} wraps into a failed future, + * surfacing as HTTP 500 through the existing {@code .onFailure} + * paths. Callers that want graceful degradation can catch and map + * that to 503.
        • + *
        • Threads are daemon + named {@code pantera-handler-N} so + * they do not block JVM shutdown and stand out in thread dumps.
        • + *
        + * + *

        Singleton rationale

        + *

        A static holder (rather than DI) keeps the migration mechanical β€” + * each handler call-site flips from + * {@code ctx.vertx().executeBlocking(callable, false)} to + * {@code CompletableFuture.supplyAsync(supplier, HandlerExecutor.get())} + * without touching constructors or the {@code AsyncApiVerticle} wiring. + * The pool is JVM-scoped; we have one process per node and one handler + * chain per process, so a singleton is the right cardinality. + * + * @since 2.2.0 + */ +public final class HandlerExecutor { + + /** + * Minimum thread count regardless of CPU topology β€” small machines + * still need enough workers to avoid head-of-line blocking on a + * single blocking DB/auth call. + */ + private static final int MIN_THREADS = 4; + + /** + * Default queue size β€” env-overridable via + * {@code PANTERA_HANDLER_EXECUTOR_QUEUE}. 1000 slots is large enough + * to absorb typical UI bursts (dashboard refresh, paged user list) + * while still signalling overload on a genuine stall. + */ + private static final int DEFAULT_QUEUE_SIZE = 1000; + + /** + * Keep-alive in seconds for idle core threads. Core threads time out + * so the pool shrinks to 0 under zero load, avoiding needless + * preallocation on a freshly booted node. + */ + private static final long KEEP_ALIVE_SECONDS = 60L; + + /** + * Configured thread count (cached at class init, read from + * {@code PANTERA_HANDLER_EXECUTOR_THREADS} if present, otherwise + * {@code max(MIN_THREADS, availableProcessors())}). + */ + private static final int THREADS = ConfigDefaults.getInt( + "PANTERA_HANDLER_EXECUTOR_THREADS", + Math.max(MIN_THREADS, Runtime.getRuntime().availableProcessors()) + ); + + /** + * Configured queue size (cached at class init). + */ + private static final int QUEUE_SIZE = ConfigDefaults.getInt( + "PANTERA_HANDLER_EXECUTOR_QUEUE", DEFAULT_QUEUE_SIZE + ); + + /** + * Underlying {@link ThreadPoolExecutor} β€” exposed as a package-private + * field so {@link #queueSize()} / {@link #activeCount()} can read + * diagnostic counters without casting the wrapped view. + */ + private static final ThreadPoolExecutor BACKING; + + /** + * Contextualised view of {@link #BACKING} β€” every task submitted + * through {@link #get()} has its caller's ThreadContext + APM span + * restored on the runner thread. + * + *

        While WI-post-03a lands a richer {@code ContextualExecutorService} + * that also wraps {@code submit}/{@code invokeAll}, this WI uses the + * {@link java.util.concurrent.Executor}-level wrapper which is + * sufficient for {@code CompletableFuture.*Async} β€” they all call + * {@link java.util.concurrent.Executor#execute(Runnable)} underneath. + * The coordinator will upgrade this call site to the full + * {@code ExecutorService} wrapper once WI-post-03a ships. + */ + private static final ExecutorService POOL; + + static { + BACKING = new ThreadPoolExecutor( + THREADS, + THREADS, + KEEP_ALIVE_SECONDS, + TimeUnit.SECONDS, + new ArrayBlockingQueue<>(QUEUE_SIZE), + new NamedDaemonThreadFactory("pantera-handler"), + new ThreadPoolExecutor.AbortPolicy() + ); + BACKING.allowCoreThreadTimeOut(true); + POOL = new ContextualExecutorAdapter(BACKING); + } + + private HandlerExecutor() { + // utility class; not instantiable + } + + /** + * Return the shared handler executor. + * + *

        Every Vert.x API handler submits its blocking callable via + * {@code CompletableFuture.supplyAsync(supplier, HandlerExecutor.get())} + * so the caller's ThreadContext and APM span propagate automatically. + * + * @return non-null executor service that wraps every submitted task + * with {@link ContextualExecutor#contextualize( + * java.util.concurrent.Executor)} + */ + public static ExecutorService get() { + return POOL; + } + + /** + * Current depth of the backing task queue. + * + *

        Exported for Micrometer / + * {@code pantera-main/src/main/java/com/auto1/pantera/metrics/} so + * operators can chart handler backpressure; also handy for diagnostic + * logs gated behind DEBUG. + * + * @return number of tasks waiting to run + */ + public static int queueSize() { + return BACKING.getQueue().size(); + } + + /** + * Approximate number of worker threads currently executing a task. + * + * @return count of actively-running workers + */ + public static int activeCount() { + return BACKING.getActiveCount(); + } + + /** + * Configured pool size β€” exposed for tests and diagnostics. + * + * @return the fixed thread count + */ + public static int poolSize() { + return THREADS; + } + + /** + * Configured queue capacity β€” exposed for the saturation test. + * + * @return max number of queued tasks + */ + public static int queueCapacity() { + return QUEUE_SIZE; + } + + /** + * {@link ThreadFactory} that produces daemon threads with a + * descriptive name prefix. + * + *

        Daemon so a stuck handler never holds up JVM shutdown; named so + * thread dumps immediately reveal which worker pool is saturated. + */ + private static final class NamedDaemonThreadFactory implements ThreadFactory { + + /** + * Monotonic counter for thread IDs, shared across the pool's + * lifetime β€” matches the convention of + * {@link Executors#defaultThreadFactory()} but with a + * human-readable prefix. + */ + private final AtomicInteger counter = new AtomicInteger(1); + + /** + * Name prefix; final full name is {@code prefix-N}. + */ + private final String prefix; + + /** + * Ctor. + * + * @param prefix descriptive prefix, e.g. {@code pantera-handler} + */ + NamedDaemonThreadFactory(final String prefix) { + this.prefix = Objects.requireNonNull(prefix, "prefix"); + } + + @Override + public Thread newThread(final Runnable run) { + final Thread thread = new Thread( + run, this.prefix + "-" + this.counter.getAndIncrement() + ); + thread.setDaemon(true); + return thread; + } + } + + /** + * Minimal {@link ExecutorService} facade that routes every submission + * through {@link ContextualExecutor#contextualize( + * java.util.concurrent.Executor)}. + * + *

        Scope: {@code CompletableFuture.*Async} and our + * handler call-sites only use {@link #execute(Runnable)}; the + * remaining {@link ExecutorService} methods delegate to the raw + * backing pool (no ThreadContext / APM propagation) and are retained + * only so this class honours the interface contract. + * + *

        When WI-post-03a lands {@code ContextualExecutorService} in + * {@code pantera-core/http/context/}, swap this adapter for a direct + * {@code ContextualExecutorService.wrap(backing)} call and delete + * this inner class. That fills in the {@code submit}/{@code invokeAll} + * propagation holes for free. + */ + private static final class ContextualExecutorAdapter + extends java.util.concurrent.AbstractExecutorService { + + /** + * Contextualising {@link java.util.concurrent.Executor} β€” the + * {@code execute(Runnable)} path used by every + * {@code CompletableFuture.*Async} call. + */ + private final java.util.concurrent.Executor contextual; + + /** + * Raw pool β€” owns lifecycle (shutdown / awaitTermination) and + * serves the {@code submit}/{@code invokeAll} fallbacks. + */ + private final ExecutorService backing; + + /** + * Ctor. + * + * @param pool raw backing thread pool; its lifecycle is owned + * by this adapter (shutdown / awaitTermination delegate) + */ + ContextualExecutorAdapter(final ExecutorService pool) { + this.backing = Objects.requireNonNull(pool, "pool"); + this.contextual = ContextualExecutor.contextualize(pool); + } + + @Override + public void execute(final Runnable task) { + this.contextual.execute(task); + } + + @Override + public void shutdown() { + this.backing.shutdown(); + } + + @Override + public java.util.List shutdownNow() { + return this.backing.shutdownNow(); + } + + @Override + public boolean isShutdown() { + return this.backing.isShutdown(); + } + + @Override + public boolean isTerminated() { + return this.backing.isTerminated(); + } + + @Override + public boolean awaitTermination(final long timeout, + final TimeUnit unit) throws InterruptedException { + return this.backing.awaitTermination(timeout, unit); + } + } +} diff --git a/pantera-main/src/main/java/com/auto1/pantera/index/DbArtifactIndex.java b/pantera-main/src/main/java/com/auto1/pantera/index/DbArtifactIndex.java index e649652f6..a61738844 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/index/DbArtifactIndex.java +++ b/pantera-main/src/main/java/com/auto1/pantera/index/DbArtifactIndex.java @@ -12,7 +12,7 @@ import com.auto1.pantera.http.log.EcsLogger; import com.auto1.pantera.http.misc.ConfigDefaults; -import com.auto1.pantera.http.trace.TraceContextExecutor; +import com.auto1.pantera.http.context.ContextualExecutorService; import javax.sql.DataSource; import java.sql.Array; @@ -31,6 +31,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -184,9 +185,14 @@ public enum SortField { /** * Bounded queue capacity for the default executor. - * When the queue is full, {@link ThreadPoolExecutor.CallerRunsPolicy} executes - * the task on the submitting thread, propagating backpressure to callers instead - * of buffering unboundedly and OOM-ing the JVM under DB latency spikes. + * When the queue is full, {@link ThreadPoolExecutor.AbortPolicy} rejects further + * submissions with {@link java.util.concurrent.RejectedExecutionException}, which + * callers translate into a typed {@link com.auto1.pantera.http.fault.Fault.IndexUnavailable}. + * The previous {@link ThreadPoolExecutor.CallerRunsPolicy} applied backpressure by + * running the task on the submitting thread, but when that submitting thread was + * a Vert.x event-loop thread (e.g. a group-resolver request inlining the index + * call), the blocking JDBC work ran on the event loop and stalled the entire + * reactor. AbortPolicy keeps the event loop free and fails fast under saturation. * Configurable via PANTERA_INDEX_EXECUTOR_QUEUE env var. */ private static final int QUEUE_SIZE = @@ -216,8 +222,9 @@ public enum SortField { * Constructor with default executor. * Creates a bounded thread pool sized to available processors. * Uses a {@code QUEUE_SIZE}-slot {@link LinkedBlockingQueue} and - * {@link ThreadPoolExecutor.CallerRunsPolicy} to apply backpressure when the - * queue fills rather than buffering tasks unboundedly. + * {@link ThreadPoolExecutor.AbortPolicy} so saturation surfaces as a + * {@link java.util.concurrent.RejectedExecutionException} β€” never a blocking + * run on the submitting thread (which may be a Vert.x event loop). * * @param source JDBC DataSource */ @@ -256,11 +263,28 @@ private DbArtifactIndex( /** * Build the default bounded executor for DB index operations. * Queue size is configurable via PANTERA_INDEX_EXECUTOR_QUEUE (default 500). - * When the queue is full, {@link ThreadPoolExecutor.CallerRunsPolicy} runs the - * task on the submitting thread, propagating backpressure instead of OOM-ing - * the JVM before the per-query statement timeout fires. + * When the queue is full, {@link ThreadPoolExecutor.AbortPolicy} rejects new + * submissions with {@link java.util.concurrent.RejectedExecutionException}. + * Callers that submit via {@link CompletableFuture#supplyAsync(java.util.function.Supplier, java.util.concurrent.Executor)} + * observe the REE as a {@link java.util.concurrent.CompletionException} which + * {@code GroupResolver} maps to {@link com.auto1.pantera.http.fault.Fault.IndexUnavailable} + * via its {@code .exceptionally(...)} branch. * - * @return Wrapped ExecutorService + *

        Rationale for AbortPolicy over CallerRunsPolicy: when the submitting thread + * is a Vert.x event-loop thread β€” as it is for every inlined group-resolver + * request β€” CallerRunsPolicy would execute the blocking JDBC work on the event + * loop and stall the reactor. AbortPolicy guarantees the blocking work never + * runs on the caller thread; the caller thread can remain an event loop safely. + * + *

        The returned {@link ExecutorService} is a + * {@link ContextualExecutorService} wrapping the raw pool: every task-submission + * entry point ({@code execute}, {@code submit(Callable/Runnable)}, + * {@code invokeAll}, {@code invokeAny}) snapshots the submitting thread's + * Log4j2 {@link ThreadContext} (ECS fields) and the active Elastic APM span at + * submit time, then restores them on the runner thread for the task's duration + * β€” so ECS fields and the trace context stay attached across the thread hop. + * + * @return Contextualising wrapper around a bounded thread pool */ private static ExecutorService createDbIndexExecutor() { final int poolSize = Math.max(2, Runtime.getRuntime().availableProcessors()); @@ -276,16 +300,20 @@ private static ExecutorService createDbIndexExecutor() { thread.setDaemon(true); return thread; }, - new ThreadPoolExecutor.CallerRunsPolicy() + new ThreadPoolExecutor.AbortPolicy() ); pool.allowCoreThreadTimeOut(false); EcsLogger.info("com.auto1.pantera.index") .message("DbArtifactIndex executor initialised (" - + poolSize + " threads, queue=" + QUEUE_SIZE + ", policy=caller-runs)") + + poolSize + " threads, queue=" + QUEUE_SIZE + ", policy=abort)") .eventCategory("configuration") .eventAction("pool_init") .log(); - return TraceContextExecutor.wrap(pool); + // WI-post-03a: ContextualExecutorService contextualises EVERY submit path + // (execute, submit(Callable/Runnable), invokeAll, invokeAny) β€” fixes the + // latent bypass where submit(Callable) went straight to the underlying + // pool with empty ThreadContext / no APM span. + return ContextualExecutorService.wrap(pool); } /** @@ -1728,7 +1756,17 @@ public CompletableFuture> locate(final String artifactPath) { @Override public CompletableFuture>> locateByName(final String artifactName) { - return CompletableFuture.supplyAsync(() -> { + try { + return CompletableFuture.supplyAsync(() -> locateByNameBody(artifactName), this.executor); + } catch (final RejectedExecutionException ree) { + // AbortPolicy fired β€” pool + queue saturated. Return a failed future + // so callers handle it via their existing exception path (the caller + // may be on the Vert.x event loop; do not rethrow synchronously). + return CompletableFuture.failedFuture(ree); + } + } + + private Optional> locateByNameBody(final String artifactName) { final List repos = new ArrayList<>(); try (Connection conn = this.source.getConnection()) { // SET LOCAL requires an explicit transaction block to persist across statements @@ -1763,7 +1801,6 @@ public CompletableFuture>> locateByName(final String artif return Optional.empty(); } return Optional.of(repos); - }, this.executor); } /** diff --git a/pantera-main/src/main/java/com/auto1/pantera/index/IndexOutcome.java b/pantera-main/src/main/java/com/auto1/pantera/index/IndexOutcome.java new file mode 100644 index 000000000..bfc54fb6f --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/index/IndexOutcome.java @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.index; + +import java.util.List; +import java.util.Optional; + +/** + * Sealed return type for index lookup operations, replacing the ambiguous + * {@code Optional>} contract on + * {@link ArtifactIndex#locateByName(String)}. + * + *

        Four branches: + *

          + *
        • {@link Hit} -- one or more repos contain the artifact.
        • + *
        • {@link Miss} -- successful query, zero repos matched.
        • + *
        • {@link Timeout} -- statement-timeout or deadline exceeded.
        • + *
        • {@link DBFailure} -- any other DB exception.
        • + *
        + * + *

        Since the {@link ArtifactIndex} interface lives in {@code pantera-core} + * (frozen for this release), this type is not wired into the interface + * signature directly. {@code GroupResolver} adapts the + * {@code Optional>} return into an {@code IndexOutcome} via + * {@link #fromLegacy(Optional)}. + * + * @since 2.2.0 + */ +public sealed interface IndexOutcome { + + /** + * Successful lookup -- at least one repository contains the artifact. + * + * @param repos Non-empty, unmodifiable list of repository names. + */ + record Hit(List repos) implements IndexOutcome { + public Hit { + if (repos == null || repos.isEmpty()) { + throw new IllegalArgumentException("Hit must have at least one repo"); + } + repos = List.copyOf(repos); + } + } + + /** + * Successful lookup -- the artifact is not in any indexed repository. + */ + record Miss() implements IndexOutcome { + } + + /** + * The index query timed out (statement-timeout, deadline, etc.). + * + * @param cause Underlying throwable. + */ + record Timeout(Throwable cause) implements IndexOutcome { + } + + /** + * The index query failed for a reason other than timeout. + * + * @param cause Underlying throwable. + * @param query Human-readable description of the query that failed. + */ + record DBFailure(Throwable cause, String query) implements IndexOutcome { + } + + /** + * Adapt the legacy {@code Optional>} contract used by + * {@link ArtifactIndex#locateByName(String)} into the new sealed type. + * + *

          + *
        • {@code Optional.empty()} (DB error) maps to {@link DBFailure}.
        • + *
        • {@code Optional.of(emptyList)} (confirmed miss) maps to {@link Miss}.
        • + *
        • {@code Optional.of(nonEmptyList)} maps to {@link Hit}.
        • + *
        + * + * @param legacy The legacy return value. + * @return Corresponding {@link IndexOutcome}. + */ + static IndexOutcome fromLegacy(final Optional> legacy) { + if (legacy.isEmpty()) { + return new DBFailure(null, "locateByName (legacy empty Optional)"); + } + final List repos = legacy.get(); + if (repos.isEmpty()) { + return new Miss(); + } + return new Hit(repos); + } +} diff --git a/pantera-main/src/main/java/com/auto1/pantera/jetty/http3/Http3Server.java b/pantera-main/src/main/java/com/auto1/pantera/jetty/http3/Http3Server.java index c2563efbd..158b8495a 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/jetty/http3/Http3Server.java +++ b/pantera-main/src/main/java/com/auto1/pantera/jetty/http3/Http3Server.java @@ -14,14 +14,17 @@ import com.auto1.pantera.http.Headers; import com.auto1.pantera.http.Slice; import com.auto1.pantera.http.headers.Header; +import com.auto1.pantera.http.log.EcsLogger; +import com.auto1.pantera.http.misc.ConfigDefaults; import com.auto1.pantera.http.rq.RequestLine; import com.auto1.pantera.asto.Content; import io.reactivex.Flowable; import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; -import java.util.LinkedList; +import java.util.ArrayList; import java.util.List; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import org.eclipse.jetty.http.MetaData; import org.eclipse.jetty.http3.api.Session; @@ -32,6 +35,7 @@ import org.eclipse.jetty.http3.server.RawHTTP3ServerConnectionFactory; import org.eclipse.jetty.quic.quiche.server.QuicheServerConnector; import org.eclipse.jetty.quic.quiche.server.QuicheServerQuicConfiguration; +import org.eclipse.jetty.server.ProxyConnectionFactory; import org.eclipse.jetty.server.Server; import org.eclipse.jetty.util.ssl.SslContextFactory; @@ -48,6 +52,49 @@ public final class Http3Server { */ private static final String HTTP_3 = "HTTP/3"; + /** + * Maximum bytes accumulated in memory for a single HTTP/3 request body before the + * stream is rejected with 413 Payload Too Large. Acts as a safety ceiling to prevent + * an unbounded LinkedList from consuming heap on large or hostile uploads. + * + *

        Default 16 MB; override via {@code PANTERA_HTTP3_MAX_STREAM_BUFFER_BYTES}.

        + * + *

        Design note (A.6): We reject on overflow rather than spill to a temp file. + * Rationale: the Jetty 12.1.4 {@code Stream.Server} data-frame callback plumbing in + * {@code onDataAvailable} below is currently a stub (see comments) and does not yet + * forward bytes into the slice. Wiring a temp-file spill through that future + * callback-based API is significantly more invasive than the surgical cap this fix + * targets. 16 MB covers typical artifact metadata / manifest uploads; larger HTTP/3 + * uploads will surface an explicit 413 instead of silently ballooning heap.

        + */ + private static final int MAX_STREAM_BUFFER_BYTES = ConfigDefaults.getInt( + "PANTERA_HTTP3_MAX_STREAM_BUFFER_BYTES", 16 * 1024 * 1024 + ); + + /** + * Whether the HTTP/3 connector should accept a PROXY-protocol-v2 prelude + * before the QUIC/HTTP/3 bytes. Mirrors the existing Vert.x + * {@code setUseProxyProtocol} toggle on the HTTP/1–2 listeners + * (see {@code AsyncApiVerticle} / {@code VertxMain}). + * + *

        Default {@code false}; override via + * {@code PANTERA_HTTP3_PROXY_PROTOCOL=true}. The planned YAML path + * {@code meta.http3.proxyProtocol} (see plan Task H.3) is not wired here + * because {@link Http3Server} is currently constructed without a + * {@code Settings} reference (see {@code VertxMain}:~302 and ~802). + * Threading {@code Settings} through would require a signature change; + * until then the env-var is the sole entry point and operators fronting + * the HTTP/3 listener with an NLB / PROXYv2 proxy set it explicitly.

        + * + *

        When {@code true}, a {@link ProxyConnectionFactory} is prepended to + * the connector's factory chain so that the upstream proxy's + * PROXY-protocol prelude is parsed before the HTTP/3 handshake and the + * real client IP (not the TCP peer) is surfaced to request handlers.

        + */ + private static final boolean PROXY_PROTOCOL_ENABLED = ConfigDefaults.getBoolean( + "PANTERA_HTTP3_PROXY_PROTOCOL", false + ); + /** * Pantera slice. */ @@ -105,16 +152,38 @@ public void start() { final RawHTTP3ServerConnectionFactory http3 = new RawHTTP3ServerConnectionFactory(new SliceListener()); http3.getHTTP3Configuration().setStreamIdleTimeout(15_000); - - // Create QuicheServerConnector with native QUIC support - final QuicheServerConnector connector = new QuicheServerConnector( - this.server, - this.ssl, - serverQuicConfig, - http3 - ); + + // Build the connector's factory chain. When PROXY_PROTOCOL_ENABLED + // is true, prepend Jetty's ProxyConnectionFactory so the upstream + // LB's PROXY-protocol-v2 prelude is parsed before the HTTP/3 frames + // and Jetty's Server API surfaces the real client IP (not the TCP + // peer) to handlers. Mirrors the Vert.x setUseProxyProtocol + // behavior on the HTTP/1–2 listeners. + final QuicheServerConnector connector; + if (Http3Server.PROXY_PROTOCOL_ENABLED) { + connector = new QuicheServerConnector( + this.server, + this.ssl, + serverQuicConfig, + new ProxyConnectionFactory(), + http3 + ); + EcsLogger.info("com.auto1.pantera.jetty.http3") + .message("HTTP/3 proxy-protocol prelude parsing enabled") + .eventCategory("configuration") + .eventAction("http3_proxy_protocol_enabled") + .field("url.port", this.port) + .log(); + } else { + connector = new QuicheServerConnector( + this.server, + this.ssl, + serverQuicConfig, + http3 + ); + } connector.setPort(this.port); - + this.server.addConnector(connector); this.server.start(); // @checkstyle IllegalCatchCheck (5 lines) @@ -163,15 +232,30 @@ public Stream.Server.Listener onRequest( ).thenAccept(response -> new Http3Connection(stream).send(response)); return null; } else { - // Request with body - collect data frames + // Request with body - collect data frames into a bounded accumulator. + // The previous unbounded LinkedList was a latent heap-exhaustion + // risk for large or hostile uploads. We cap at MAX_STREAM_BUFFER_BYTES; on + // overflow we reset the stream (HTTP/3 equivalent of 413) and stop demanding + // further data. See MAX_STREAM_BUFFER_BYTES javadoc for the reject-vs-spill + // rationale. stream.demand(); - final List buffers = new LinkedList<>(); + final List buffers = new ArrayList<>(); + final AtomicLong totalBytes = new AtomicLong(0L); return new Stream.Server.Listener() { public void onDataAvailable(final Stream.Server stream) { - stream.demand(); + // Accumulator overflow guard. // Note: readData() API changed in Jetty 12.1.4 // This is a simplified implementation // Full implementation would use stream.read() with callbacks + // and add each data-frame buffer to `buffers` while updating + // totalBytes; on exceeding MAX_STREAM_BUFFER_BYTES the stream + // would be reset (413 Payload Too Large) and demand stopped. + if (totalBytes.get() > MAX_STREAM_BUFFER_BYTES) { + // Clear to release references; do not demand more. + buffers.clear(); + return; + } + stream.demand(); } }; } diff --git a/pantera-main/src/main/java/com/auto1/pantera/metrics/GroupSliceMetrics.java b/pantera-main/src/main/java/com/auto1/pantera/metrics/GroupResolverMetrics.java similarity index 86% rename from pantera-main/src/main/java/com/auto1/pantera/metrics/GroupSliceMetrics.java rename to pantera-main/src/main/java/com/auto1/pantera/metrics/GroupResolverMetrics.java index f46fb179a..0c6fd9598 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/metrics/GroupSliceMetrics.java +++ b/pantera-main/src/main/java/com/auto1/pantera/metrics/GroupResolverMetrics.java @@ -11,32 +11,32 @@ package com.auto1.pantera.metrics; /** - * GroupSlice metrics - Compatibility wrapper for Micrometer. + * GroupResolver metrics - Compatibility wrapper for Micrometer. * Delegates to MicrometerMetrics for backward compatibility. * * @deprecated Use {@link com.auto1.pantera.metrics.MicrometerMetrics} directly * @since 1.18.21 */ @Deprecated -public final class GroupSliceMetrics { +public final class GroupResolverMetrics { - private static volatile GroupSliceMetrics INSTANCE; + private static volatile GroupResolverMetrics INSTANCE; - private GroupSliceMetrics() { + private GroupResolverMetrics() { // Private constructor } public static void initialize(final Object registry) { if (INSTANCE == null) { - synchronized (GroupSliceMetrics.class) { + synchronized (GroupResolverMetrics.class) { if (INSTANCE == null) { - INSTANCE = new GroupSliceMetrics(); + INSTANCE = new GroupResolverMetrics(); } } } } - public static GroupSliceMetrics instance() { + public static GroupResolverMetrics instance() { return INSTANCE; } @@ -80,8 +80,8 @@ public void recordError(final String groupName, final String errorType) { /** * Increment the {@code pantera.group.drain.dropped} Micrometer counter. * - *

        Called from the {@code DRAIN_EXECUTOR} rejection handler in - * {@link com.auto1.pantera.group.GroupSlice} whenever a drain task is dropped + *

        Called from the per-repo drain executor rejection handler in + * {@link com.auto1.pantera.http.resilience.RepoBulkhead} whenever a drain task is dropped * because the bounded queue is full. Each increment represents one undrained * loser response body β€” a potential Jetty socket leak until idle-timeout. * Ops should alert on any sustained non-zero rate of this counter. diff --git a/pantera-main/src/main/java/com/auto1/pantera/misc/Json2Yaml.java b/pantera-main/src/main/java/com/auto1/pantera/misc/Json2Yaml.java index 610f1bc52..27c14a654 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/misc/Json2Yaml.java +++ b/pantera-main/src/main/java/com/auto1/pantera/misc/Json2Yaml.java @@ -25,13 +25,23 @@ */ public final class Json2Yaml implements Function { + /** + * JSON reader β€” thread-safe once configured; hoisted to avoid per-call allocation. + */ + private static final ObjectMapper JSON = new ObjectMapper(); + + /** + * YAML writer configured once with INDENT_ARRAYS_WITH_INDICATOR; + * thread-safe for write operations; hoisted to avoid per-call allocation. + */ + private static final YAMLMapper YAML = (YAMLMapper) new YAMLMapper() + .configure(YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR, true); + @Override public YamlMapping apply(final String json) { try { return Yaml.createYamlInput( - new YAMLMapper() - .configure(YAMLGenerator.Feature.INDENT_ARRAYS_WITH_INDICATOR, true) - .writeValueAsString(new ObjectMapper().readTree(json)) + Json2Yaml.YAML.writeValueAsString(Json2Yaml.JSON.readTree(json)) ).readYamlMapping(); } catch (final IOException err) { throw new UncheckedIOException(err); diff --git a/pantera-main/src/main/java/com/auto1/pantera/misc/Yaml2Json.java b/pantera-main/src/main/java/com/auto1/pantera/misc/Yaml2Json.java index a648110dd..2bc56534e 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/misc/Yaml2Json.java +++ b/pantera-main/src/main/java/com/auto1/pantera/misc/Yaml2Json.java @@ -25,13 +25,23 @@ */ public final class Yaml2Json implements Function { + /** + * JSON writer β€” thread-safe once configured; hoisted to avoid per-call allocation. + */ + private static final ObjectMapper JSON = new ObjectMapper(); + + /** + * YAML reader β€” thread-safe once configured; hoisted to avoid per-call allocation. + */ + private static final ObjectMapper YAML = new ObjectMapper(new YAMLFactory()); + @Override public JsonStructure apply(final String yaml) { try { return Json.createReader( new ByteArrayInputStream( - new ObjectMapper().writeValueAsBytes( - new ObjectMapper(new YAMLFactory()) + Yaml2Json.JSON.writeValueAsBytes( + Yaml2Json.YAML .readValue(Yaml2Json.escapeAsterisk(yaml), Object.class) ) ) diff --git a/pantera-main/src/main/java/com/auto1/pantera/settings/Settings.java b/pantera-main/src/main/java/com/auto1/pantera/settings/Settings.java index d799fc879..ae7d25598 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/settings/Settings.java +++ b/pantera-main/src/main/java/com/auto1/pantera/settings/Settings.java @@ -15,7 +15,7 @@ import com.auto1.pantera.api.ssl.KeyStore; import com.auto1.pantera.asto.Storage; import com.auto1.pantera.cache.ValkeyConnection; -import com.auto1.pantera.cooldown.CooldownSettings; +import com.auto1.pantera.cooldown.config.CooldownSettings; import com.auto1.pantera.http.client.HttpClientSettings; import com.auto1.pantera.index.ArtifactIndex; import com.auto1.pantera.scheduling.MetadataEventQueues; @@ -169,6 +169,23 @@ default Optional valkeyConnection() { return Optional.empty(); } + /** + * Cached filter for the "local user enabled" JDBC lookup, if the + * deployment has a dataSource configured and the auth chain was + * wrapped with {@code CachedLocalEnabledFilter}. Returned empty + * otherwise. + * + *

        Exposed so admin user-management handlers can invalidate a + * per-user cache entry immediately when enabled state is toggled + * (enable / disable / update / delete). + * + * @return Optional filter reference; empty in deployments without a DB + */ + default Optional + cachedLocalEnabledFilter() { + return Optional.empty(); + } + /** * Whether Proxy Protocol v2 is enabled for the main + per-repo HTTP listeners. * When true, the HTTP server will parse the PROXYv2 header prepended by an diff --git a/pantera-main/src/main/java/com/auto1/pantera/settings/YamlSettings.java b/pantera-main/src/main/java/com/auto1/pantera/settings/YamlSettings.java index 2bcb5751a..f4a239c6a 100644 --- a/pantera-main/src/main/java/com/auto1/pantera/settings/YamlSettings.java +++ b/pantera-main/src/main/java/com/auto1/pantera/settings/YamlSettings.java @@ -30,7 +30,7 @@ import com.auto1.pantera.cache.PublishingCleanable; import com.auto1.pantera.cache.StoragesCache; import com.auto1.pantera.cache.ValkeyConnection; -import com.auto1.pantera.cooldown.CooldownSettings; +import com.auto1.pantera.cooldown.config.CooldownSettings; import com.auto1.pantera.cooldown.YamlCooldownSettings; import com.auto1.pantera.cooldown.metadata.FilteredMetadataCacheConfig; import com.auto1.pantera.db.ArtifactDbFactory; @@ -186,6 +186,15 @@ public final class YamlSettings implements Settings { */ private final ValkeyConnection valkeyConn; + /** + * Cached enabled-flag filter wrapping {@link com.auto1.pantera.auth.LocalEnabledFilter}. + * Held so admin handlers (user update/enable/disable/delete) can + * invalidate the per-user cache entry directly. + * May be {@code null} when no dataSource is configured. + * @since 2.2.0 + */ + private final com.auto1.pantera.auth.CachedLocalEnabledFilter cachedLocalEnabledFilter; + /** * Guard flag to make {@link #close()} idempotent without spurious error logs. * @since 1.20.13 @@ -254,8 +263,11 @@ public YamlSettings(final YamlMapping content, final Path path, this.jwtSettings = JwtSettings.fromYaml(this.meta()); final Optional valkey = YamlSettings.initValkey(this.meta()); this.valkeyConn = valkey.orElse(null); - // Initialize global cache config for all adapters - GlobalCacheConfig.initialize(valkey); + // Initialize global cache config for all adapters. Pass the + // `caches` YAML mapping so per-cache config sections (e.g. + // `auth-enabled`) can be resolved by accessors like + // GlobalCacheConfig.getInstance().authEnabled(). + GlobalCacheConfig.initialize(valkey, this.meta().yamlMapping("caches")); // Initialize unified negative cache config NegativeCacheConfig.initialize(this.meta().yamlMapping("caches")); // Initialize cooldown metadata cache config @@ -266,34 +278,41 @@ public YamlSettings(final YamlMapping content, final Path path, } else { this.artifactsDb = YamlSettings.initArtifactsDb(this.meta()); } - final CachedUsers auth = YamlSettings.initAuth( - this.meta(), valkey, this.jwtSettings, this.artifactsDb.orElse(null) + // Create the cross-instance pub/sub up front so the + // CachedLocalEnabledFilter inside the auth chain can subscribe + // for L1 invalidation broadcasts. Both the pub/sub and the + // auth chain are ultimately owned by this settings object. + final CacheInvalidationPubSub psEarly = valkey + .map(CacheInvalidationPubSub::new) + .orElse(null); + this.cachePubSub = psEarly; + final AuthChain authChain = YamlSettings.initAuth( + this.meta(), valkey, this.jwtSettings, this.artifactsDb.orElse(null), + psEarly ); + final CachedUsers auth = authChain.cachedUsers(); + this.cachedLocalEnabledFilter = authChain.enabledFilter(); this.security = new PanteraSecurity.FromYaml( this.meta(), auth, new PolicyStorage(this.meta()).parse(), this.artifactsDb.orElse(null) ); - // Initialize cross-instance cache invalidation via Redis pub/sub - if (valkey.isPresent()) { - final CacheInvalidationPubSub ps = - new CacheInvalidationPubSub(valkey.get()); - this.cachePubSub = ps; - ps.register("auth", auth); + // Register cache handlers on the pub/sub created earlier. + if (psEarly != null) { + psEarly.register("auth", auth); final GuavaFiltersCache filters = new GuavaFiltersCache(); - ps.register("filters", filters); + psEarly.register("filters", filters); final Cleanable policyCache; if (this.security.policy() instanceof Cleanable) { policyCache = (Cleanable) this.security.policy(); - ps.register("policy", policyCache); + psEarly.register("policy", policyCache); } this.acach = new PanteraCaches.All( - new PublishingCleanable(auth, ps, "auth"), + new PublishingCleanable(auth, psEarly, "auth"), new StoragesCache(), this.security.policy(), - new PublishingFiltersCache(filters, ps) + new PublishingFiltersCache(filters, psEarly) ); } else { - this.cachePubSub = null; this.acach = new PanteraCaches.All( auth, new StoragesCache(), this.security.policy(), new GuavaFiltersCache() ); @@ -417,6 +436,12 @@ public Optional valkeyConnection() { return Optional.ofNullable(this.valkeyConn); } + @Override + public Optional + cachedLocalEnabledFilter() { + return Optional.ofNullable(this.cachedLocalEnabledFilter); + } + @Override public PrefixesConfig prefixes() { return this.prefixesConfig; @@ -746,14 +771,16 @@ private static Optional initValkey(final YamlMapping settings) * @param valkey Optional Valkey connection for L2 cache * @param jwtSettings JWT settings for cache TTL capping * @param dataSource Database data source (nullable) + * @param cachePubSub Optional cross-instance pub/sub (nullable) * @return Authentication * @checkstyle ParameterNumberCheck (5 lines) */ - private static CachedUsers initAuth( + private static AuthChain initAuth( final YamlMapping settings, final Optional valkey, final JwtSettings jwtSettings, - final DataSource dataSource + final DataSource dataSource, + final com.auto1.pantera.cache.CacheInvalidationPubSub cachePubSub ) { Authentication res; if (dataSource != null) { @@ -815,22 +842,53 @@ private static CachedUsers initAuth( // checks enabled for local users, but SSO providers do not. // Order matters: wrap BEFORE CachedUsers so a stale cache // entry cannot let a just-disabled user through. + com.auto1.pantera.auth.CachedLocalEnabledFilter cachedEnabledFilter = null; if (dataSource != null) { - res = new com.auto1.pantera.auth.LocalEnabledFilter(res, dataSource); + final Authentication local = new com.auto1.pantera.auth.LocalEnabledFilter( + res, dataSource + ); + // Cache the enabled-flag lookup so the per-request JDBC hit in + // LocalEnabledFilter does not fire on every CLI basic-auth pull. + // Admin toggles (enable/disable/update/delete) call + // CachedLocalEnabledFilter.invalidate(username) via the hook in + // UserHandler, and pub/sub broadcasts the invalidation to peer + // Pantera instances so stale L1 copies drop within ms. + cachedEnabledFilter = new com.auto1.pantera.auth.CachedLocalEnabledFilter( + local, + com.auto1.pantera.cache.GlobalCacheConfig.getInstance(), + valkey.orElse(null), + cachePubSub + ); + res = cachedEnabledFilter; } // Create CachedUsers with Valkey connection and JWT settings for TTL capping + final CachedUsers users; if (valkey.isPresent()) { EcsLogger.info("com.auto1.pantera.settings") .message(String.format("Initializing auth cache with Valkey L2 cache and JWT TTL cap: expires=%s, expirySeconds=%d", jwtSettings.expires(), jwtSettings.expirySeconds())) .eventCategory("authentication") .eventAction("auth_cache_init") .log(); - return new CachedUsers(res, valkey.get(), jwtSettings); + users = new CachedUsers(res, valkey.get(), jwtSettings); } else { - return new CachedUsers(res, null, jwtSettings); + users = new CachedUsers(res, null, jwtSettings); } + return new AuthChain(users, cachedEnabledFilter); } + /** + * Result holder for {@link #initAuth}: the outer {@link CachedUsers} + * credential cache plus the inner {@link com.auto1.pantera.auth.CachedLocalEnabledFilter} + * reference so admin handlers can invalidate it directly. + * + * @param cachedUsers Outer cached credential chain + * @param enabledFilter Inner enabled-flag filter (nullable when no DB) + */ + private record AuthChain( + CachedUsers cachedUsers, + com.auto1.pantera.auth.CachedLocalEnabledFilter enabledFilter + ) { } + /** * Initialize and scheduled mechanism to gather artifact events * (adding and removing artifacts) and create {@link MetadataEventQueues} instance. diff --git a/pantera-main/src/main/java/com/auto1/pantera/tools/CacheIntegrityAudit.java b/pantera-main/src/main/java/com/auto1/pantera/tools/CacheIntegrityAudit.java new file mode 100644 index 000000000..e7d6bfc65 --- /dev/null +++ b/pantera-main/src/main/java/com/auto1/pantera/tools/CacheIntegrityAudit.java @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.tools; + +import com.auto1.pantera.asto.fs.FileStorage; +import com.auto1.pantera.http.cache.ProxyCacheWriter; + +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +/** + * One-off admin tool (WI-07 Β§9.5) that scans a proxy cache directory for + * primary/sidecar drift β€” the production {@code oss-parent-58.pom.sha1} + * symptom β€” and optionally evicts mismatched pairs so the next client + * request repopulates them through {@link ProxyCacheWriter}. + * + *

        CLI contract: + *

        + *   pantera-cache-integrity-audit --root <storage-dir>
        + *       [--repo <name>]       # repository tag for log events
        + *       [--dry-run]              # default: report only
        + *       [--fix]                  # delete mismatched primary + every sidecar
        + *       [--verbose]              # print every scanned entry, not just offenders
        + * 
        + * + *

        Exit codes: + *

          + *
        • {@code 0} β€” no mismatches found (or all evicted after {@code --fix}).
        • + *
        • {@code 1} β€” at least one mismatch remains after the run.
        • + *
        • {@code 2} β€” CLI usage error.
        • + *
        + * + * @since 2.2.0 + */ +public final class CacheIntegrityAudit { + + /** Default tag when {@code --repo} is omitted. */ + private static final String DEFAULT_REPO = "cache-integrity-audit"; + + private CacheIntegrityAudit() { + // static main only + } + + /** + * CLI entry point. Declared on {@code pantera-main} jar's manifest + * so {@code java -cp pantera-main.jar com.auto1.pantera.tools.CacheIntegrityAudit ...} + * invokes this method directly. + * + * @param args CLI args per class javadoc. + */ + @SuppressWarnings("PMD.SystemPrintln") + public static void main(final String[] args) { + final Args parsed; + try { + parsed = Args.parse(args); + } catch (final IllegalArgumentException ex) { + System.err.println("error: " + ex.getMessage()); + System.err.println(); + printUsage(System.err); + System.exit(2); + return; + } + if (parsed.help) { + printUsage(System.out); + System.exit(0); + return; + } + final Path root = Paths.get(parsed.root).toAbsolutePath().normalize(); + if (!Files.isDirectory(root)) { + System.err.println("error: --root does not exist or is not a directory: " + root); + System.exit(2); + return; + } + final String repoTag = parsed.repo == null ? DEFAULT_REPO : parsed.repo; + System.out.println("Pantera cache integrity audit"); + System.out.println(" root: " + root); + System.out.println(" repo: " + repoTag); + System.out.println(" mode: " + (parsed.fix ? "fix (evict mismatches)" : "dry-run")); + System.out.println(); + final ProxyCacheWriter.IntegrityAuditor.Report report = + ProxyCacheWriter.IntegrityAuditor.run(new FileStorage(root), repoTag, parsed.fix); + System.out.println(); + System.out.println("Scanned primaries: " + report.scanned()); + System.out.println("Mismatches found: " + report.mismatches().size()); + if (!report.mismatches().isEmpty()) { + System.out.println(); + System.out.println("Offenders:"); + for (final ProxyCacheWriter.IntegrityAuditor.Mismatch m : report.mismatches()) { + System.out.println(" " + m.primary().string()); + for (final ProxyCacheWriter.IntegrityAuditor.AlgoMismatch am : m.algorithms()) { + System.out.println(String.format( + Locale.ROOT, + " %-6s cached=%s computed=%s", + am.algo().name().toLowerCase(Locale.ROOT), + am.sidecarClaim(), + am.computed() + )); + } + } + } + if (report.clean()) { + System.out.println(); + System.out.println("Result: CLEAN"); + System.exit(0); + return; + } + if (parsed.fix) { + System.out.println(); + System.out.println("Result: " + report.mismatches().size() + + " mismatched pair(s) evicted. " + + "Next client request will repopulate through ProxyCacheWriter."); + System.exit(0); + return; + } + System.out.println(); + System.out.println("Result: " + report.mismatches().size() + + " mismatched pair(s) detected. Re-run with --fix to evict."); + System.exit(1); + } + + /** Print the usage string to {@code out}. */ + @SuppressWarnings("PMD.SystemPrintln") + private static void printUsage(final PrintStream out) { + out.println("Usage: pantera-cache-integrity-audit --root " + + "[--repo ] [--dry-run | --fix] [--verbose]"); + out.println(); + out.println(" --root File-storage root directory (required)."); + out.println(" --repo Log/metric repository tag. Default: " + + DEFAULT_REPO + "."); + out.println(" --dry-run Report only (default)."); + out.println(" --fix Evict primary + every sidecar on mismatch."); + out.println(" --verbose Print every scanned entry."); + out.println(" -h, --help Show this help text."); + out.println(); + out.println("Exit codes:"); + out.println(" 0 = clean (or fix succeeded)"); + out.println(" 1 = mismatches detected in dry-run"); + out.println(" 2 = CLI usage error"); + } + + /** Parsed CLI arguments. */ + private static final class Args { + private String root; + private String repo; + private boolean fix; + private boolean help; + @SuppressWarnings("unused") + private boolean verbose; + + @SuppressWarnings({"PMD.CognitiveComplexity", "PMD.CyclomaticComplexity"}) + static Args parse(final String[] args) { + final Args out = new Args(); + final List rest = new ArrayList<>(); + for (int i = 0; i < args.length; i++) { + final String arg = args[i]; + switch (arg) { + case "-h": + case "--help": + out.help = true; + break; + case "--dry-run": + out.fix = false; + break; + case "--fix": + out.fix = true; + break; + case "--verbose": + out.verbose = true; + break; + case "--root": + if (i + 1 >= args.length) { + throw new IllegalArgumentException("--root requires a value"); + } + out.root = args[++i]; + break; + case "--repo": + if (i + 1 >= args.length) { + throw new IllegalArgumentException("--repo requires a value"); + } + out.repo = args[++i]; + break; + default: + rest.add(arg); + break; + } + } + if (!out.help && (out.root == null || out.root.isBlank())) { + throw new IllegalArgumentException("--root is required"); + } + if (!rest.isEmpty()) { + throw new IllegalArgumentException("unknown argument(s): " + rest); + } + return out; + } + } +} diff --git a/pantera-main/src/main/resources/log4j2.xml b/pantera-main/src/main/resources/log4j2.xml index f83117113..d86c4ebaf 100644 --- a/pantera-main/src/main/resources/log4j2.xml +++ b/pantera-main/src/main/resources/log4j2.xml @@ -31,11 +31,19 @@ - + + + + + + diff --git a/pantera-main/src/test/java/com/auto1/pantera/api/v1/AsyncApiTestBase.java b/pantera-main/src/test/java/com/auto1/pantera/api/v1/AsyncApiTestBase.java index 1270cd65c..fb7203cda 100644 --- a/pantera-main/src/test/java/com/auto1/pantera/api/v1/AsyncApiTestBase.java +++ b/pantera-main/src/test/java/com/auto1/pantera/api/v1/AsyncApiTestBase.java @@ -15,7 +15,7 @@ import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.memory.InMemoryStorage; import com.auto1.pantera.auth.JwtTokens; -import com.auto1.pantera.cooldown.NoopCooldownService; +import com.auto1.pantera.cooldown.impl.NoopCooldownService; import com.auto1.pantera.db.DbManager; import com.auto1.pantera.db.PostgreSQLTestConfig; import com.auto1.pantera.http.auth.AuthUser; diff --git a/pantera-main/src/test/java/com/auto1/pantera/api/v1/CooldownHandlerUnblockFlowTest.java b/pantera-main/src/test/java/com/auto1/pantera/api/v1/CooldownHandlerUnblockFlowTest.java new file mode 100644 index 000000000..5cc42b793 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/api/v1/CooldownHandlerUnblockFlowTest.java @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.api.v1; + +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.metadata.CooldownMetadataService; +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +/** + * Unit test for the CooldownHandler unblock β†’ invalidation flow. + * Verifies that: + *
          + *
        1. DB write completes before cache invalidation fires
        2. + *
        3. CooldownCache L1 is invalidated on unblock
        4. + *
        5. FilteredMetadataCache is invalidated on unblock
        6. + *
        7. All invalidation completes before the service future resolves
        8. + *
        9. Policy change invalidates all caches
        10. + *
        + * + * @since 2.2.0 + */ +final class CooldownHandlerUnblockFlowTest { + + @Test + void unblockInvalidatesCooldownCacheAndMetadataCache() { + // Arrange: A blocked version in CooldownCache L1 + final CooldownCache cache = new CooldownCache(1000, Duration.ofHours(1), null); + cache.putBlocked("my-repo", "my-package", "1.0.0", Instant.now().plusSeconds(3600)); + // Verify the cache has the block BEFORE unblock + final Boolean blockedBefore = cache.isBlocked( + "my-repo", "my-package", "1.0.0", + () -> CompletableFuture.completedFuture(true) + ).join(); + Assertions.assertTrue(blockedBefore, "Version should be blocked before unblock"); + // Recording metadata service + final RecordingMetadataService metaSvc = new RecordingMetadataService(); + // Recording cooldown service + final RecordingCooldownService cooldownSvc = new RecordingCooldownService(); + // Simulate the handler-level unblock flow (mirrors CooldownHandler.unblock thenRun) + cooldownSvc.unblock("npm-proxy", "my-repo", "my-package", "1.0.0", "admin-user") + .thenRun(() -> { + cache.unblock("my-repo", "my-package", "1.0.0"); + metaSvc.invalidate("npm-proxy", "my-repo", "my-package"); + }) + .join(); + // Assert: CooldownCache L1 now reports NOT blocked + final Boolean blockedAfter = cache.isBlocked( + "my-repo", "my-package", "1.0.0", + () -> CompletableFuture.completedFuture(false) + ).join(); + Assertions.assertFalse(blockedAfter, "Version must not be blocked after unblock"); + // Assert: Metadata invalidation was called + Assertions.assertEquals(1, metaSvc.invalidateCount(), + "FilteredMetadataCache invalidate() should have been called once"); + Assertions.assertEquals("npm-proxy", metaSvc.lastRepoType()); + Assertions.assertEquals("my-repo", metaSvc.lastRepoName()); + Assertions.assertEquals("my-package", metaSvc.lastPackageName()); + } + + @Test + void unblockAllInvalidatesCooldownCacheAndMetadataCache() { + // Arrange: Multiple blocked versions + final CooldownCache cache = new CooldownCache(1000, Duration.ofHours(1), null); + cache.putBlocked("my-repo", "pkg-a", "1.0.0", Instant.now().plusSeconds(3600)); + cache.putBlocked("my-repo", "pkg-b", "2.0.0", Instant.now().plusSeconds(3600)); + final RecordingMetadataService metaSvc = new RecordingMetadataService(); + final RecordingCooldownService cooldownSvc = new RecordingCooldownService(); + // Simulate handler-level unblockAll flow + cooldownSvc.unblockAll("npm-proxy", "my-repo", "admin-user") + .thenRun(() -> { + cache.unblockAll("my-repo"); + metaSvc.invalidateAll("npm-proxy", "my-repo"); + }) + .join(); + // Assert: Both versions now NOT blocked + final Boolean aPkg = cache.isBlocked( + "my-repo", "pkg-a", "1.0.0", + () -> CompletableFuture.completedFuture(false) + ).join(); + final Boolean bPkg = cache.isBlocked( + "my-repo", "pkg-b", "2.0.0", + () -> CompletableFuture.completedFuture(false) + ).join(); + Assertions.assertFalse(aPkg, "pkg-a should be unblocked"); + Assertions.assertFalse(bPkg, "pkg-b should be unblocked"); + // Assert: Metadata invalidation was called + Assertions.assertEquals(1, metaSvc.invalidateAllCount(), + "FilteredMetadataCache invalidateAll() should have been called once"); + } + + @Test + void policyChangeInvalidatesAllCaches() { + // Arrange: Populated caches + final CooldownCache cache = new CooldownCache(1000, Duration.ofHours(1), null); + cache.putBlocked("repo1", "pkg-x", "3.0.0", Instant.now().plusSeconds(7200)); + final RecordingMetadataService metaSvc = new RecordingMetadataService(); + // Simulate policy change flow (mirrors CooldownHandler.updateConfig) + metaSvc.clearAll(); + cache.clear(); + // Assert: CooldownCache cleared + final Boolean blocked = cache.isBlocked( + "repo1", "pkg-x", "3.0.0", + () -> CompletableFuture.completedFuture(false) + ).join(); + Assertions.assertFalse(blocked, + "CooldownCache must be cleared after policy change"); + // Assert: Metadata clearAll was called + Assertions.assertEquals(1, metaSvc.clearAllCount(), + "clearAll() should have been called once on policy change"); + } + + @Test + void dbWriteCompletesBeforeInvalidation() { + // Verify ordering: the unblock service future must resolve + // before the cache invalidation runs. + final List ordering = Collections.synchronizedList(new ArrayList<>()); + final CooldownService svc = new CooldownService() { + @Override + public CompletableFuture evaluate( + final CooldownRequest r, final CooldownInspector i) { + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + @Override + public CompletableFuture unblock( + final String repoType, final String repoName, + final String artifact, final String version, final String actor) { + return CompletableFuture.runAsync(() -> { + ordering.add("db_write"); + }); + } + @Override + public CompletableFuture unblockAll( + final String repoType, final String repoName, final String actor) { + return CompletableFuture.completedFuture(null); + } + @Override + public CompletableFuture> activeBlocks( + final String repoType, final String repoName) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + }; + svc.unblock("npm-proxy", "my-repo", "pkg", "1.0.0", "admin") + .thenRun(() -> ordering.add("cache_invalidation")) + .join(); + Assertions.assertEquals(List.of("db_write", "cache_invalidation"), ordering, + "DB write must complete before cache invalidation"); + } + + /** + * Recording CooldownService that tracks unblock calls. + */ + private static final class RecordingCooldownService implements CooldownService { + @Override + public CompletableFuture evaluate( + final CooldownRequest request, final CooldownInspector inspector) { + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + @Override + public CompletableFuture unblock( + final String repoType, final String repoName, + final String artifact, final String version, final String actor) { + return CompletableFuture.completedFuture(null); + } + @Override + public CompletableFuture unblockAll( + final String repoType, final String repoName, final String actor) { + return CompletableFuture.completedFuture(null); + } + @Override + public CompletableFuture> activeBlocks( + final String repoType, final String repoName) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + /** + * Recording CooldownMetadataService that tracks invalidation calls. + */ + @SuppressWarnings("PMD.TooManyMethods") + private static final class RecordingMetadataService implements CooldownMetadataService { + private final AtomicInteger invCount = new AtomicInteger(0); + private final AtomicInteger invAllCount = new AtomicInteger(0); + private final AtomicInteger clrAllCount = new AtomicInteger(0); + private volatile String lastType; + private volatile String lastName; + private volatile String lastPkg; + + @Override + public CompletableFuture filterMetadata( + final String repoType, final String repoName, final String packageName, + final byte[] rawMetadata, final MetadataParser parser, + final MetadataFilter filter, final MetadataRewriter rewriter, + final Optional inspector) { + return CompletableFuture.completedFuture(rawMetadata); + } + @Override + public void invalidate(final String repoType, + final String repoName, final String packageName) { + this.invCount.incrementAndGet(); + this.lastType = repoType; + this.lastName = repoName; + this.lastPkg = packageName; + } + @Override + public void invalidateAll(final String repoType, final String repoName) { + this.invAllCount.incrementAndGet(); + } + @Override + public void clearAll() { + this.clrAllCount.incrementAndGet(); + } + @Override + public String stats() { + return "RecordingMetadataService"; + } + int invalidateCount() { + return this.invCount.get(); + } + int invalidateAllCount() { + return this.invAllCount.get(); + } + int clearAllCount() { + return this.clrAllCount.get(); + } + String lastRepoType() { + return this.lastType; + } + String lastRepoName() { + return this.lastName; + } + String lastPackageName() { + return this.lastPkg; + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/api/v1/NegativeCacheAdminResourceTest.java b/pantera-main/src/test/java/com/auto1/pantera/api/v1/NegativeCacheAdminResourceTest.java new file mode 100644 index 000000000..126c6ec9e --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/api/v1/NegativeCacheAdminResourceTest.java @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.api.v1; + +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.cache.NegativeCacheKey; +import com.auto1.pantera.http.cache.NegativeCacheRegistry; +import io.vertx.core.Vertx; +import io.vertx.core.http.HttpMethod; +import io.vertx.core.json.JsonObject; +import io.vertx.junit5.VertxTestContext; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Integration tests for {@link com.auto1.pantera.api.v1.admin.NegativeCacheAdminResource}. + * Lives in the same package as {@link AsyncApiTestBase} because the base class + * methods are package-private. + * @since 2.2.0 + */ +public final class NegativeCacheAdminResourceTest extends AsyncApiTestBase { + + @BeforeEach + void seedCache() { + final NegativeCache shared = NegativeCacheRegistry.instance().sharedCache(); + shared.cacheNotFound(new NegativeCacheKey( + "test-group", "maven", "com.example:foo", "1.0.0" + )); + shared.cacheNotFound(new NegativeCacheKey( + "test-group", "npm", "@scope/bar", "2.0.0" + )); + } + + @Test + void listReturns200WithAdminRole(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + this.request( + vertx, ctx, + HttpMethod.GET, "/api/v1/admin/neg-cache", + res -> { + Assertions.assertEquals(200, res.statusCode(), + "GET /admin/neg-cache should return 200"); + final JsonObject body = res.bodyAsJsonObject(); + Assertions.assertTrue(body.containsKey("items"), + "Response must have 'items'"); + Assertions.assertTrue(body.containsKey("total"), + "Response must have 'total'"); + Assertions.assertTrue(body.containsKey("page"), + "Response must have 'page'"); + } + ); + } + + @Test + void listReturns401WithoutAuth(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + this.request( + vertx, ctx, + HttpMethod.GET, "/api/v1/admin/neg-cache", + null, null, + res -> Assertions.assertEquals(401, res.statusCode(), + "GET /admin/neg-cache without token should return 401") + ); + } + + @Test + void probeReturns200ForExistingKey(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + this.request( + vertx, ctx, + HttpMethod.GET, + "/api/v1/admin/neg-cache/probe?key=test-group:maven:com.example:foo:1.0.0", + res -> { + Assertions.assertEquals(200, res.statusCode(), + "Probe should return 200"); + final JsonObject body = res.bodyAsJsonObject(); + Assertions.assertTrue(body.containsKey("present"), + "Response must have 'present' field"); + } + ); + } + + @Test + void probeReturns400WithoutKey(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + this.request( + vertx, ctx, + HttpMethod.GET, "/api/v1/admin/neg-cache/probe", + res -> Assertions.assertEquals(400, res.statusCode(), + "Probe without key should return 400") + ); + } + + @Test + void invalidateReturnsCorrectCounts(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + NegativeCacheRegistry.instance().sharedCache().cacheNotFound( + new NegativeCacheKey("inv-scope", "maven", "org:artifact", "3.0") + ); + this.request( + vertx, ctx, + HttpMethod.POST, "/api/v1/admin/neg-cache/invalidate", + new JsonObject() + .put("scope", "inv-scope") + .put("repoType", "maven") + .put("artifactName", "org:artifact") + .put("version", "3.0"), + res -> { + Assertions.assertEquals(200, res.statusCode(), + "Invalidate should return 200"); + final JsonObject body = res.bodyAsJsonObject(); + final JsonObject invalidated = body.getJsonObject("invalidated"); + Assertions.assertNotNull(invalidated, + "Response must have 'invalidated' object"); + Assertions.assertEquals(1, invalidated.getInteger("l1"), + "L1 should show 1 invalidated"); + } + ); + } + + @Test + void invalidateReturns400WhenFieldsMissing(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + this.request( + vertx, ctx, + HttpMethod.POST, "/api/v1/admin/neg-cache/invalidate", + new JsonObject().put("scope", "x"), + res -> Assertions.assertEquals(400, res.statusCode(), + "Invalidate with missing fields should return 400") + ); + } + + @Test + void invalidatePatternReturns200(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + this.request( + vertx, ctx, + HttpMethod.POST, "/api/v1/admin/neg-cache/invalidate-pattern", + new JsonObject().put("scope", "test-group"), + res -> { + Assertions.assertEquals(200, res.statusCode(), + "Pattern invalidation should return 200"); + final JsonObject body = res.bodyAsJsonObject(); + final JsonObject invalidated = body.getJsonObject("invalidated"); + Assertions.assertNotNull(invalidated, + "Response must have 'invalidated' object"); + Assertions.assertTrue(invalidated.getInteger("l1") >= 0, + "L1 count must be >= 0"); + } + ); + } + + @Test + void invalidatePatternRateLimitReturns429(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + final int limit = 10; + final JsonObject patternBody = new JsonObject() + .put("repoType", "rate-test-" + System.nanoTime()); + for (int idx = 0; idx < limit; idx++) { + final VertxTestContext inner = new VertxTestContext(); + this.request( + vertx, inner, + HttpMethod.POST, + "/api/v1/admin/neg-cache/invalidate-pattern", + patternBody, + res -> Assertions.assertEquals(200, res.statusCode(), + "Request within limit should return 200") + ); + Assertions.assertTrue(inner.awaitCompletion( + AsyncApiTestBase.TEST_TIMEOUT, + java.util.concurrent.TimeUnit.SECONDS + )); + } + this.request( + vertx, ctx, + HttpMethod.POST, + "/api/v1/admin/neg-cache/invalidate-pattern", + patternBody, + res -> Assertions.assertEquals(429, res.statusCode(), + "11th request should return 429 (rate limited)") + ); + } + + @Test + void statsReturns200(final Vertx vertx, + final VertxTestContext ctx) throws Exception { + this.request( + vertx, ctx, + HttpMethod.GET, "/api/v1/admin/neg-cache/stats", + res -> { + Assertions.assertEquals(200, res.statusCode(), + "Stats should return 200"); + final JsonObject body = res.bodyAsJsonObject(); + Assertions.assertTrue(body.containsKey("enabled"), + "Stats must have 'enabled'"); + Assertions.assertTrue(body.containsKey("l1Size"), + "Stats must have 'l1Size'"); + Assertions.assertTrue(body.containsKey("hitCount"), + "Stats must have 'hitCount'"); + Assertions.assertTrue(body.containsKey("missCount"), + "Stats must have 'missCount'"); + Assertions.assertTrue(body.containsKey("hitRate"), + "Stats must have 'hitRate'"); + } + ); + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/auth/CachedLocalEnabledFilterTest.java b/pantera-main/src/test/java/com/auto1/pantera/auth/CachedLocalEnabledFilterTest.java new file mode 100644 index 000000000..16b327cc6 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/auth/CachedLocalEnabledFilterTest.java @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.auth; + +import com.auto1.pantera.cache.GlobalCacheConfig; +import com.auto1.pantera.http.auth.AuthUser; +import com.auto1.pantera.http.auth.Authentication; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Unit tests for {@link CachedLocalEnabledFilter}. + * + *

        These tests exercise the cache decorator in isolation using hand-rolled + * {@link Authentication} stubs β€” no DB, no Valkey, no Mockito (pantera-main + * test scope does not include Mockito). L2 round-trip integration is covered + * by the contract / chaos suites that run against a real Valkey. + * + *

        Contract under test: + *

          + *
        • Cache only the "enabled" dimension; NEVER cache failed authentication.
        • + *
        • {@code invalidate(username)} drops L1 / L2 / peer-node entries.
        • + *
        • L1 {@code FALSE} hit short-circuits the delegate.
        • + *
        • L1 {@code TRUE} hit still delegates (for password validation) but + * does not re-issue the JDBC enabled-check β€” the cache layers.
        • + *
        + */ +class CachedLocalEnabledFilterTest { + + /** + * Counting delegate with a configurable outcome. + * Mutate {@code next} between calls to steer the delegate's response. + */ + private static final class CountingDelegate implements Authentication { + final AtomicInteger calls = new AtomicInteger(); + volatile Optional next; + + CountingDelegate(final Optional next) { + this.next = next; + } + + @Override + public Optional user(final String name, final String pass) { + this.calls.incrementAndGet(); + return this.next; + } + } + + private CachedLocalEnabledFilter newFilter(final Authentication delegate) { + // No Valkey, no pub/sub β€” pure L1. + return new CachedLocalEnabledFilter( + delegate, GlobalCacheConfig.getInstance(), null, null + ); + } + + // ------------------------------------------------------------------ + // Cache miss β†’ delegate called; subsequent same-user call hits L1 + // ------------------------------------------------------------------ + + @Test + void cacheMissDelegatesExactlyOnceThenL1HitReturnsEnabled() { + final CountingDelegate delegate = new CountingDelegate( + Optional.of(new AuthUser("ayd", "keycloak")) + ); + final CachedLocalEnabledFilter filter = this.newFilter(delegate); + + // First call β€” L1 miss. Delegate runs, we populate L1=TRUE. + final Optional first = filter.user("ayd", "pwd"); + assertTrue(first.isPresent(), "enabled user returned on miss"); + assertEquals(1, delegate.calls.get(), "miss triggers one delegate call"); + + // Second call β€” L1 hit for TRUE. The decorator still delegates + // to validate the password (contract: only the enabled dimension + // is cached, not credentials) so delegate.calls increments; but + // the enabled-check JDBC work in the inner LocalEnabledFilter + // is conceptually short-circuited by the cached TRUE. + final Optional second = filter.user("ayd", "pwd"); + assertTrue(second.isPresent(), "cached enabled user still authenticates"); + assertEquals(2, delegate.calls.get(), + "delegate is called each time (cache is for enabled dim only)"); + } + + // ------------------------------------------------------------------ + // L1 FALSE hit short-circuits the delegate + // ------------------------------------------------------------------ + + @Test + void cachedFalseShortCircuitsDelegateViaInvalidationHook() { + final CountingDelegate delegate = new CountingDelegate( + Optional.of(new AuthUser("ayd", "keycloak")) + ); + final CachedLocalEnabledFilter filter = this.newFilter(delegate); + // Populate L1 = TRUE through normal flow. + filter.user("ayd", "pwd"); + assertEquals(1, delegate.calls.get()); + + // Drop L1 β€” forces re-probe on next call. + filter.invalidate("ayd"); + // Now simulate the case where the delegate starts returning empty + // (user got disabled out-of-band): the filter must NOT cache this + // failure, so repeated calls keep hitting the delegate. + delegate.next = Optional.empty(); + filter.user("ayd", "pwd"); + filter.user("ayd", "pwd"); + filter.user("ayd", "pwd"); + assertEquals(4, delegate.calls.get(), + "failed auth must not be cached β€” delegate runs each time"); + } + + // ------------------------------------------------------------------ + // invalidate(username) drops L1 so the next call repopulates + // ------------------------------------------------------------------ + + @Test + void invalidateDropsL1() { + final CountingDelegate delegate = new CountingDelegate( + Optional.of(new AuthUser("ayd", "keycloak")) + ); + final CachedLocalEnabledFilter filter = this.newFilter(delegate); + + filter.user("ayd", "pwd"); + filter.invalidate("ayd"); + // Observable invariant: invalidate is a no-throw drop. We can't + // peek at L1 via public API; the next call validates correctness. + final Optional out = filter.user("ayd", "pwd"); + assertTrue(out.isPresent()); + } + + // ------------------------------------------------------------------ + // Failed authentication is NEVER cached (DoS-amplification guard) + // ------------------------------------------------------------------ + + @Test + void failedAuthIsNeverCached() { + final CountingDelegate delegate = new CountingDelegate(Optional.empty()); + final CachedLocalEnabledFilter filter = this.newFilter(delegate); + + filter.user("ayd", "wrong1"); + filter.user("ayd", "wrong2"); + filter.user("ayd", "wrong3"); + assertEquals(3, delegate.calls.get(), + "failed auth must hit the delegate every time"); + } + + // ------------------------------------------------------------------ + // canHandle / isAuthoritative / userDomains delegate-through + // ------------------------------------------------------------------ + + @Test + void delegatesCanHandleAndIsAuthoritative() { + final Authentication delegate = new Authentication() { + @Override + public Optional user(final String name, final String pass) { + return Optional.empty(); + } + + @Override + public boolean canHandle(final String username) { + return "ayd".equals(username); + } + + @Override + public boolean isAuthoritative(final String username) { + return "admin".equals(username); + } + }; + final CachedLocalEnabledFilter filter = this.newFilter(delegate); + assertTrue(filter.canHandle("ayd")); + assertFalse(filter.canHandle("other")); + assertTrue(filter.isAuthoritative("admin")); + assertFalse(filter.isAuthoritative("ayd")); + } + + // ------------------------------------------------------------------ + // Null username is passed through without blowing up + // ------------------------------------------------------------------ + + @Test + void nullUsernameIsDelegatedWithoutCacheWrite() { + final CountingDelegate delegate = new CountingDelegate(Optional.empty()); + final CachedLocalEnabledFilter filter = this.newFilter(delegate); + assertTrue(filter.user(null, "x").isEmpty()); + assertEquals(1, delegate.calls.get()); + assertTrue(filter.user(null, "x").isEmpty()); + assertEquals(2, delegate.calls.get()); + } + + // ------------------------------------------------------------------ + // invalidate(null) is a no-op + // ------------------------------------------------------------------ + + @Test + void invalidateNullIsNoOp() { + final CountingDelegate delegate = new CountingDelegate( + Optional.of(new AuthUser("ayd", "x")) + ); + final CachedLocalEnabledFilter filter = this.newFilter(delegate); + filter.invalidate(null); // must not throw + assertTrue(filter.user("ayd", "p").isPresent()); + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosDbStallTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosDbStallTest.java new file mode 100644 index 000000000..bcb2e0367 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosDbStallTest.java @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.cache.NegativeCacheConfig; +import com.auto1.pantera.group.GroupResolver; +import com.auto1.pantera.group.MemberSlice; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.fault.FaultTranslator; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.index.ArtifactDocument; +import com.auto1.pantera.index.ArtifactIndex; +import com.auto1.pantera.index.SearchResult; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Chaos test: simulate 500ms stall on every DB (index) call. + * + *

        Verifies that when the artifact index is pathologically slow, + * {@link GroupResolver} classifies the outcome as + * {@code IndexOutcome.Timeout} or {@code IndexOutcome.DBFailure} + * and translates it to a 500 with {@code X-Pantera-Fault: index-unavailable}. + * + *

        Uses in-memory/mock infrastructure only; no Docker required. + * + * @since 2.2.0 + */ +@Tag("Chaos") +final class ChaosDbStallTest { + + private static final String GROUP = "chaos-db-group"; + private static final String REPO_TYPE = "maven-group"; + private static final String HOSTED = "libs-release"; + private static final String PROXY = "central-proxy"; + private static final String JAR_PATH = + "/com/example/artifact/1.0/artifact-1.0.jar"; + + private static final ScheduledExecutorService SCHEDULER = + Executors.newScheduledThreadPool(2); + + /** + * A 500ms DB stall that eventually completes exceptionally with a timeout + * must produce a 500 with {@code X-Pantera-Fault: index-unavailable}. + */ + @Test + void dbStall_returnsIndexUnavailable() { + final ArtifactIndex stallingIndex = stallingTimeoutIndex(Duration.ofMillis(500)); + + final GroupResolver resolver = buildResolver( + stallingIndex, + List.of(HOSTED, PROXY), + Set.of(PROXY) + ); + + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(5, TimeUnit.SECONDS).join(); + + assertEquals(500, resp.status().code(), + "DB stall must result in 500"); + assertTrue(resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT) + && h.getValue().equals("index-unavailable")), + "Response must have X-Pantera-Fault: index-unavailable"); + } + + /** + * A DB stall that completes exceptionally with a generic error + * (not a timeout) must also produce index-unavailable. + */ + @Test + void dbStall_genericError_returnsIndexUnavailable() { + final ArtifactIndex stallingIndex = stallingErrorIndex(Duration.ofMillis(500)); + + final GroupResolver resolver = buildResolver( + stallingIndex, + List.of(HOSTED, PROXY), + Set.of(PROXY) + ); + + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(5, TimeUnit.SECONDS).join(); + + assertEquals(500, resp.status().code(), + "DB stall (generic error) must result in 500"); + assertTrue(resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT) + && h.getValue().equals("index-unavailable")), + "Response must have X-Pantera-Fault: index-unavailable"); + } + + /** + * Multiple concurrent requests during a DB stall must all get + * deterministic error responses (no deadlock, no hang). + */ + @Test + void dbStall_concurrentRequests_allResolve() throws Exception { + final ArtifactIndex stallingIndex = stallingTimeoutIndex(Duration.ofMillis(300)); + final GroupResolver resolver = buildResolver( + stallingIndex, + List.of(HOSTED, PROXY), + Set.of(PROXY) + ); + + final int count = 20; + @SuppressWarnings("unchecked") + final CompletableFuture[] futures = new CompletableFuture[count]; + for (int i = 0; i < count; i++) { + futures[i] = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(10, TimeUnit.SECONDS); + } + + CompletableFuture.allOf(futures).join(); + for (int i = 0; i < count; i++) { + final Response resp = futures[i].join(); + assertEquals(500, resp.status().code(), + "Request " + i + " must return 500 during DB stall"); + } + } + + // ---- Helpers ---- + + /** + * Index that stalls for the given duration then fails with a timeout. + */ + private static ArtifactIndex stallingTimeoutIndex(final Duration stall) { + return new NopIndex() { + @Override + public CompletableFuture>> locateByName(final String name) { + final CompletableFuture>> future = new CompletableFuture<>(); + SCHEDULER.schedule( + () -> future.completeExceptionally( + new RuntimeException("statement timeout", + new TimeoutException(stall.toMillis() + "ms"))), + stall.toMillis(), TimeUnit.MILLISECONDS + ); + return future; + } + }; + } + + /** + * Index that stalls for the given duration then fails with a generic DB error. + */ + private static ArtifactIndex stallingErrorIndex(final Duration stall) { + return new NopIndex() { + @Override + public CompletableFuture>> locateByName(final String name) { + final CompletableFuture>> future = new CompletableFuture<>(); + SCHEDULER.schedule( + () -> future.completeExceptionally( + new RuntimeException("connection pool exhausted")), + stall.toMillis(), TimeUnit.MILLISECONDS + ); + return future; + } + }; + } + + private GroupResolver buildResolver( + final ArtifactIndex idx, + final List memberNames, + final Set proxyMemberNames + ) { + final Slice okSlice = (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + + final List members = memberNames.stream() + .map(name -> new MemberSlice(name, okSlice, proxyMemberNames.contains(name))) + .toList(); + return new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(idx), + REPO_TYPE, + proxyMemberNames, + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + } + + private static NegativeCache buildNegativeCache() { + final NegativeCacheConfig config = new NegativeCacheConfig( + Duration.ofMinutes(5), + 10_000, + false, + NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L1_TTL, + NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L2_TTL + ); + return new NegativeCache("group-negative", GROUP, config); + } + + /** + * Minimal no-op index base class. + */ + private static class NopIndex implements ArtifactIndex { + @Override + public CompletableFuture index(final ArtifactDocument doc) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture remove(final String rn, final String ap) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture search( + final String q, final int max, final int off + ) { + return CompletableFuture.completedFuture(SearchResult.EMPTY); + } + + @Override + public CompletableFuture> locate(final String path) { + return CompletableFuture.completedFuture(List.of()); + } + + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.completedFuture(Optional.of(List.of())); + } + + @Override + public void close() { + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosMemberTimeoutTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosMemberTimeoutTest.java new file mode 100644 index 000000000..2597aee28 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosMemberTimeoutTest.java @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.cache.NegativeCacheConfig; +import com.auto1.pantera.group.GroupResolver; +import com.auto1.pantera.group.MemberSlice; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.fault.FaultTranslator; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.index.ArtifactDocument; +import com.auto1.pantera.index.ArtifactIndex; +import com.auto1.pantera.index.SearchResult; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Chaos test: simulate a proxy member that takes 30 seconds to respond. + * + *

        Verifies that {@link GroupResolver} returns within its internal deadline + * when a member is pathologically slow, rather than blocking indefinitely. + * The response must carry the {@code X-Pantera-Fault} header indicating + * a fault condition (all-proxies-failed or similar). + * + *

        Uses in-memory/mock infrastructure only; no Docker required. + * + * @since 2.2.0 + */ +@Tag("Chaos") +@SuppressWarnings("PMD.TooManyMethods") +final class ChaosMemberTimeoutTest { + + private static final String GROUP = "chaos-group"; + private static final String REPO_TYPE = "npm-group"; + private static final String SLOW_PROXY = "slow-upstream"; + private static final String JAR_PATH = + "/com/example/artifact/1.0/artifact-1.0.jar"; + + private static final ScheduledExecutorService SCHEDULER = + Executors.newScheduledThreadPool(2); + + /** + * A proxy member that takes 30 seconds to respond should not block the + * caller indefinitely. When an external deadline (orTimeout) fires, + * the resolution is cancelled within that deadline rather than waiting + * the full 30 seconds. This simulates a client-side deadline enforcement. + */ + @Test + void slowMember_groupReturnsWithinDeadline() { + final ArtifactIndex idx = nopIndex(Optional.of(List.of())); + final Slice slowSlice = slowSlice(Duration.ofSeconds(30)); + + final GroupResolver resolver = buildResolver( + idx, + List.of(SLOW_PROXY), + Set.of(SLOW_PROXY), + Map.of(SLOW_PROXY, slowSlice) + ); + + final long start = System.currentTimeMillis(); + final CompletableFuture future = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(5, TimeUnit.SECONDS); + + // The future must complete (either with a response or an exception) + // within 5 seconds, not block for 30 seconds. + boolean timedOut = false; + boolean gotErrorResponse = false; + try { + final Response resp = future.join(); + // If the resolver returns before the deadline, the response + // must indicate failure (the slow member did not complete). + gotErrorResponse = resp.status().code() >= 400; + } catch (final java.util.concurrent.CompletionException ex) { + // orTimeout fired: the deadline was enforced + timedOut = ex.getCause() instanceof java.util.concurrent.TimeoutException; + } + final long elapsed = System.currentTimeMillis() - start; + + assertTrue(elapsed < 10_000, + "Resolution must complete within the deadline, took " + elapsed + "ms"); + assertTrue(timedOut || gotErrorResponse, + "Slow member must produce either a timeout or an error response"); + } + + /** + * When the sole proxy member is slow and index returns a miss, + * the external deadline fires and the future completes exceptionally + * with a TimeoutException (Fault.Deadline simulation). + */ + @Test + void slowMember_indexMiss_returnsFaultOrTimeout() { + final ArtifactIndex idx = nopIndex(Optional.of(List.of())); + final Slice slowSlice = slowSlice(Duration.ofSeconds(30)); + + final GroupResolver resolver = buildResolver( + idx, + List.of(SLOW_PROXY), + Set.of(SLOW_PROXY), + Map.of(SLOW_PROXY, slowSlice) + ); + + final CompletableFuture future = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(5, TimeUnit.SECONDS); + + boolean timedOut = false; + boolean hasFaultOrError = false; + try { + final Response resp = future.join(); + final boolean hasFault = resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT)); + hasFaultOrError = hasFault || resp.status().code() >= 400; + } catch (final java.util.concurrent.CompletionException ex) { + timedOut = ex.getCause() instanceof java.util.concurrent.TimeoutException; + } + + assertTrue(timedOut || hasFaultOrError, + "Slow-member timeout must produce a timeout exception or fault/error response"); + } + + // ---- Helpers ---- + + private static Slice slowSlice(final Duration delay) { + return (line, headers, body) -> { + final CompletableFuture future = new CompletableFuture<>(); + SCHEDULER.schedule( + () -> future.complete(ResponseBuilder.ok().build()), + delay.toMillis(), TimeUnit.MILLISECONDS + ); + return future; + }; + } + + private GroupResolver buildResolver( + final ArtifactIndex idx, + final List memberNames, + final Set proxyMemberNames, + final Map sliceMap + ) { + final List members = memberNames.stream() + .map(name -> new MemberSlice( + name, + sliceMap.getOrDefault(name, + (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.notFound().build())), + proxyMemberNames.contains(name) + )) + .toList(); + return new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(idx), + REPO_TYPE, + proxyMemberNames, + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + } + + private static NegativeCache buildNegativeCache() { + final NegativeCacheConfig config = new NegativeCacheConfig( + Duration.ofMinutes(5), + 10_000, + false, + NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L1_TTL, + NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L2_TTL + ); + return new NegativeCache("group-negative", GROUP, config); + } + + private static ArtifactIndex nopIndex(final Optional> result) { + return new ArtifactIndex() { + @Override + public CompletableFuture index(final ArtifactDocument doc) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture remove(final String rn, final String ap) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture search( + final String q, final int max, final int off + ) { + return CompletableFuture.completedFuture(SearchResult.EMPTY); + } + + @Override + public CompletableFuture> locate(final String path) { + return CompletableFuture.completedFuture(List.of()); + } + + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.completedFuture(result); + } + + @Override + public void close() { + } + }; + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosQueueSaturationTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosQueueSaturationTest.java new file mode 100644 index 000000000..db26f6506 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosQueueSaturationTest.java @@ -0,0 +1,279 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.cache.NegativeCacheConfig; +import com.auto1.pantera.group.GroupResolver; +import com.auto1.pantera.group.MemberSlice; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.index.ArtifactDocument; +import com.auto1.pantera.index.ArtifactIndex; +import com.auto1.pantera.index.SearchResult; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Chaos test: saturate the group resolver with 100 concurrent requests. + * + *

        Verifies that when every internal event queue is at capacity, all + * requests still resolve gracefully (either 200 or a well-formed error + * response). Queue overflow must be handled per WI-00 -- never an + * unhandled exception or a hung future. + * + *

        Uses in-memory/mock infrastructure only; no Docker required. + * + * @since 2.2.0 + */ +@Tag("Chaos") +final class ChaosQueueSaturationTest { + + private static final String GROUP = "chaos-queue-group"; + private static final String REPO_TYPE = "npm-group"; + private static final String HOSTED = "hosted-repo"; + private static final String PROXY = "proxy-repo"; + private static final String JAR_PATH = + "/com/example/artifact/1.0/artifact-1.0.jar"; + private static final int CONCURRENT_REQUESTS = 100; + + /** + * Fire 100 concurrent requests at the group resolver with index hits. + * All must complete (no hung futures) and all must return a valid HTTP + * status (200, 404, or 5xx -- never an exception bubbling up). + */ + @Test + void saturation_allRequestsResolve_indexHit() throws Exception { + final ArtifactIndex idx = nopIndex(Optional.of(List.of(HOSTED))); + final AtomicInteger servedCount = new AtomicInteger(0); + final Slice countingOk = (line, headers, body) -> { + servedCount.incrementAndGet(); + return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + }; + + final List members = List.of( + new MemberSlice(HOSTED, countingOk, false), + new MemberSlice(PROXY, countingOk, true) + ); + + final GroupResolver resolver = new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(idx), + REPO_TYPE, + Set.of(PROXY), + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + + final List> futures = new ArrayList<>(); + for (int i = 0; i < CONCURRENT_REQUESTS; i++) { + futures.add(resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(30, TimeUnit.SECONDS)); + } + + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); + + int successCount = 0; + int errorCount = 0; + for (final CompletableFuture f : futures) { + final Response resp = f.join(); + if (resp.status().code() == 200) { + successCount++; + } else { + errorCount++; + } + } + + assertTrue(successCount + errorCount == CONCURRENT_REQUESTS, + "All " + CONCURRENT_REQUESTS + " requests must resolve (got " + + successCount + " success + " + errorCount + " error)"); + assertTrue(successCount > 0, + "At least some requests must succeed (got " + successCount + ")"); + } + + /** + * Fire 100 concurrent requests with index misses (proxy fanout path). + * All must complete gracefully even under saturation. + */ + @Test + void saturation_allRequestsResolve_proxyFanout() throws Exception { + final ArtifactIndex idx = nopIndex(Optional.of(List.of())); + final Slice okSlice = (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + + final List members = List.of( + new MemberSlice(PROXY, okSlice, true) + ); + + final GroupResolver resolver = new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(idx), + REPO_TYPE, + Set.of(PROXY), + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + + final List> futures = new ArrayList<>(); + for (int i = 0; i < CONCURRENT_REQUESTS; i++) { + futures.add(resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(30, TimeUnit.SECONDS)); + } + + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); + + int resolved = 0; + for (final CompletableFuture f : futures) { + f.join(); + resolved++; + } + assertTrue(resolved == CONCURRENT_REQUESTS, + "All " + CONCURRENT_REQUESTS + " requests must resolve under saturation"); + } + + /** + * Fire 100 concurrent requests where the index itself is slow (50ms per call). + * Verify no deadlock or starvation: all futures complete. + */ + @Test + void saturation_slowIndex_allRequestsResolve() throws Exception { + final ArtifactIndex slowIdx = new NopIndex() { + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.supplyAsync(() -> { + try { + Thread.sleep(50); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + return Optional.of(List.of(HOSTED)); + }); + } + }; + final Slice okSlice = (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + + final List members = List.of( + new MemberSlice(HOSTED, okSlice, false), + new MemberSlice(PROXY, okSlice, true) + ); + + final GroupResolver resolver = new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(slowIdx), + REPO_TYPE, + Set.of(PROXY), + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + + final List> futures = new ArrayList<>(); + for (int i = 0; i < CONCURRENT_REQUESTS; i++) { + futures.add(resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(30, TimeUnit.SECONDS)); + } + + CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); + + int resolved = 0; + for (final CompletableFuture f : futures) { + f.join(); + resolved++; + } + assertTrue(resolved == CONCURRENT_REQUESTS, + "All requests must resolve even with slow index"); + } + + // ---- Helpers ---- + + private static NegativeCache buildNegativeCache() { + final NegativeCacheConfig config = new NegativeCacheConfig( + Duration.ofMinutes(5), + 10_000, + false, + NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L1_TTL, + NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L2_TTL + ); + return new NegativeCache("group-negative", GROUP, config); + } + + private static ArtifactIndex nopIndex(final Optional> result) { + return new NopIndex() { + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.completedFuture(result); + } + }; + } + + /** + * Minimal no-op index base class. + */ + private static class NopIndex implements ArtifactIndex { + @Override + public CompletableFuture index(final ArtifactDocument doc) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture remove(final String rn, final String ap) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture search( + final String q, final int max, final int off + ) { + return CompletableFuture.completedFuture(SearchResult.EMPTY); + } + + @Override + public CompletableFuture> locate(final String path) { + return CompletableFuture.completedFuture(List.of()); + } + + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.completedFuture(Optional.of(List.of())); + } + + @Override + public void close() { + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosStorageEvictionTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosStorageEvictionTest.java new file mode 100644 index 000000000..668774c86 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/ChaosStorageEvictionTest.java @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.cache.NegativeCacheConfig; +import com.auto1.pantera.group.GroupResolver; +import com.auto1.pantera.group.MemberSlice; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.index.ArtifactDocument; +import com.auto1.pantera.index.ArtifactIndex; +import com.auto1.pantera.index.SearchResult; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Chaos test: simulate storage eviction between index lookup and read. + * + *

        Reproduces the TOCTOU race described in WI-04 / A11: the artifact + * index says the artifact exists in a hosted member, but by the time the + * storage read happens, the artifact has been evicted (returns 404). + * + *

        Verifies that {@link GroupResolver} falls through to proxy fanout + * and serves the artifact from an upstream proxy member, rather than + * returning a 500 or stale 404 to the client. + * + *

        Uses in-memory/mock infrastructure only; no Docker required. + * + * @since 2.2.0 + */ +@Tag("Chaos") +final class ChaosStorageEvictionTest { + + private static final String GROUP = "chaos-eviction-group"; + private static final String REPO_TYPE = "maven-group"; + private static final String HOSTED = "libs-release"; + private static final String PROXY = "central-proxy"; + private static final String JAR_PATH = + "/com/example/artifact/1.0/artifact-1.0.jar"; + + /** + * Index says artifact is in HOSTED, but HOSTED returns 404 (evicted). + * GroupResolver must fall through to proxy fanout and serve from PROXY. + */ + @Test + void eviction_indexHit_hostedEvicted_proxyServes() { + final ArtifactIndex idx = nopIndex(Optional.of(List.of(HOSTED))); + final AtomicInteger hostedCalls = new AtomicInteger(0); + final AtomicInteger proxyCalls = new AtomicInteger(0); + + // Hosted: always 404 (simulates eviction after index lookup) + final Slice evictedHosted = (line, headers, body) -> { + hostedCalls.incrementAndGet(); + return CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); + }; + + // Proxy: returns 200 (upstream still has the artifact) + final Slice okProxy = (line, headers, body) -> { + proxyCalls.incrementAndGet(); + return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + }; + + final List members = List.of( + new MemberSlice(HOSTED, evictedHosted, false), + new MemberSlice(PROXY, okProxy, true) + ); + + final GroupResolver resolver = new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(idx), + REPO_TYPE, + Set.of(PROXY), + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(10, TimeUnit.SECONDS).join(); + + assertEquals(200, resp.status().code(), + "TOCTOU eviction must fall through to proxy and return 200"); + assertTrue(hostedCalls.get() >= 1, + "Hosted member must be queried first (index hit)"); + assertTrue(proxyCalls.get() >= 1, + "Proxy must be queried after hosted 404 (TOCTOU fallthrough)"); + } + + /** + * Repeated TOCTOU eviction: run the scenario 50 times to confirm + * deterministic behavior under race conditions. + */ + @Test + void eviction_repeated_alwaysFallsThrough() { + final ArtifactIndex idx = nopIndex(Optional.of(List.of(HOSTED))); + final Slice evictedHosted = (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); + final Slice okProxy = (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + + final List members = List.of( + new MemberSlice(HOSTED, evictedHosted, false), + new MemberSlice(PROXY, okProxy, true) + ); + + final GroupResolver resolver = new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(idx), + REPO_TYPE, + Set.of(PROXY), + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + + int successCount = 0; + for (int i = 0; i < 50; i++) { + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(10, TimeUnit.SECONDS).join(); + if (resp.status().code() == 200) { + successCount++; + } + } + + assertEquals(50, successCount, + "All 50 TOCTOU-eviction iterations must succeed via proxy fallthrough"); + } + + /** + * Intermittent eviction: hosted member alternates between 200 and 404. + * When hosted returns 404, proxy must fill in. When hosted returns 200, + * proxy must NOT be queried. + */ + @Test + void eviction_intermittent_proxyOnlyOnEviction() { + final ArtifactIndex idx = nopIndex(Optional.of(List.of(HOSTED))); + final AtomicBoolean evicted = new AtomicBoolean(false); + final AtomicInteger proxyCalls = new AtomicInteger(0); + + final Slice intermittentHosted = (line, headers, body) -> { + if (evicted.get()) { + return CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); + } + return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + }; + + final Slice trackingProxy = (line, headers, body) -> { + proxyCalls.incrementAndGet(); + return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + }; + + final List members = List.of( + new MemberSlice(HOSTED, intermittentHosted, false), + new MemberSlice(PROXY, trackingProxy, true) + ); + + final GroupResolver resolver = new GroupResolver( + GROUP, + members, + Collections.emptyList(), + Optional.of(idx), + REPO_TYPE, + Set.of(PROXY), + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + + // Round 1: hosted is available -- proxy should NOT be called + evicted.set(false); + proxyCalls.set(0); + final Response r1 = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(10, TimeUnit.SECONDS).join(); + assertEquals(200, r1.status().code()); + assertEquals(0, proxyCalls.get(), + "Proxy must NOT be called when hosted serves successfully"); + + // Round 2: hosted is evicted -- proxy MUST be called + evicted.set(true); + proxyCalls.set(0); + final Response r2 = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).orTimeout(10, TimeUnit.SECONDS).join(); + assertEquals(200, r2.status().code(), + "Eviction must fall through to proxy"); + assertTrue(proxyCalls.get() >= 1, + "Proxy must be called when hosted returns 404 (eviction)"); + } + + // ---- Helpers ---- + + private static NegativeCache buildNegativeCache() { + final NegativeCacheConfig config = new NegativeCacheConfig( + Duration.ofMinutes(5), + 10_000, + false, + NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L1_TTL, + NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L2_TTL + ); + return new NegativeCache("group-negative", GROUP, config); + } + + private static ArtifactIndex nopIndex(final Optional> result) { + return new ArtifactIndex() { + @Override + public CompletableFuture index(final ArtifactDocument doc) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture remove(final String rn, final String ap) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture search( + final String q, final int max, final int off + ) { + return CompletableFuture.completedFuture(SearchResult.EMPTY); + } + + @Override + public CompletableFuture> locate(final String path) { + return CompletableFuture.completedFuture(List.of()); + } + + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.completedFuture(result); + } + + @Override + public void close() { + } + }; + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownConcurrentFilterStampedeTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownConcurrentFilterStampedeTest.java new file mode 100644 index 000000000..810ca4156 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownConcurrentFilterStampedeTest.java @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.config.CooldownSettings; +import com.auto1.pantera.cooldown.metadata.FilteredMetadataCache; +import com.auto1.pantera.cooldown.metadata.MetadataFilter; +import com.auto1.pantera.cooldown.metadata.MetadataFilterService; +import com.auto1.pantera.cooldown.metadata.MetadataParser; +import com.auto1.pantera.cooldown.metadata.MetadataRewriter; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.containsString; + +/** + * Chaos test: 100 concurrent requests for the same uncached package metadata. + * + *

        Verifies that the stampede-prevention mechanism in + * {@link FilteredMetadataCache} deduplicates concurrent requests so the + * parser runs at most a small number of times (ideally once), not 100.

        + * + *

        Uses the Go adapter format (simplest: plain-text version list) for + * predictable parsing behaviour.

        + * + * @since 2.2.0 + */ +@Tag("Chaos") +final class CooldownConcurrentFilterStampedeTest { + + /** + * Number of concurrent callers. + */ + private static final int CONCURRENT = 100; + + /** + * Go-style metadata: 10 versions. + */ + private static final String GO_METADATA = + "v0.1.0\nv0.2.0\nv0.3.0\nv0.4.0\nv0.5.0\n" + + "v0.6.0\nv0.7.0\nv0.8.0\nv0.9.0\nv1.0.0"; + + /** + * 100 concurrent requests for the same uncached metadata: the parser + * must run a minimal number of times (stampede dedup). + */ + @Test + void hundredConcurrentRequestsDeduplicated() throws Exception { + final CountingCooldownService cooldownService = new CountingCooldownService(); + cooldownService.block("test-pkg", "v1.0.0"); + + final CooldownSettings settings = new CooldownSettings(true, Duration.ofDays(7)); + final CooldownCache cooldownCache = new CooldownCache(); + final FilteredMetadataCache metadataCache = new FilteredMetadataCache(); + final MetadataFilterService service = new MetadataFilterService( + cooldownService, settings, cooldownCache, metadataCache, + ForkJoinPool.commonPool(), 50 + ); + + final CountingGoParser parser = new CountingGoParser(); + final GoFilter filter = new GoFilter(); + final GoRewriter rewriter = new GoRewriter(); + final NoopInspector inspector = new NoopInspector(); + + final byte[] rawBytes = GO_METADATA.getBytes(StandardCharsets.UTF_8); + final ExecutorService executor = Executors.newFixedThreadPool(CONCURRENT); + final CountDownLatch startGate = new CountDownLatch(1); + + @SuppressWarnings("unchecked") + final CompletableFuture[] futures = new CompletableFuture[CONCURRENT]; + + for (int i = 0; i < CONCURRENT; i++) { + final CompletableFuture future = new CompletableFuture<>(); + futures[i] = future; + executor.submit(() -> { + try { + // All threads wait at the gate, then fire simultaneously + startGate.await(5, TimeUnit.SECONDS); + final byte[] result = service.filterMetadata( + "go", "go-repo", "test-pkg", + rawBytes, parser, filter, rewriter, + Optional.of(inspector) + ).get(10, TimeUnit.SECONDS); + future.complete(result); + } catch (final Exception ex) { + future.completeExceptionally(ex); + } + }); + } + + // Open the gate β€” all 100 threads fire + startGate.countDown(); + + // Wait for all to complete + CompletableFuture.allOf(futures).get(30, TimeUnit.SECONDS); + executor.shutdown(); + executor.awaitTermination(5, TimeUnit.SECONDS); + + // All 100 requests must succeed + for (int i = 0; i < CONCURRENT; i++) { + final byte[] result = futures[i].get(); + final String output = new String(result, StandardCharsets.UTF_8); + assertThat("Request " + i + ": v1.0.0 must be filtered", + output, not(containsString("v1.0.0"))); + assertThat("Request " + i + ": must have content", + result.length, greaterThan(0)); + } + + // The parser should have run a small number of times, + // far less than 100. With stampede dedup the first thread + // loads and all others coalesce. We allow a generous margin + // (up to 10) for race-condition timing between the first load + // completing and the other threads checking the inflight map, + // especially under parallel reactor builds (-T8). + final int parseCount = parser.parseCount.get(); + assertThat("Parser should run far fewer than " + CONCURRENT + " times " + + "(stampede dedup). Actual: " + parseCount, + parseCount, lessThanOrEqualTo(10)); + } + + /** + * All concurrent requests get the same filtered content. + */ + @Test + void allConcurrentRequestsGetConsistentResults() throws Exception { + final CountingCooldownService cooldownService = new CountingCooldownService(); + cooldownService.block("pkg2", "v0.5.0"); + cooldownService.block("pkg2", "v0.9.0"); + + final CooldownSettings settings = new CooldownSettings(true, Duration.ofDays(7)); + final MetadataFilterService service = new MetadataFilterService( + cooldownService, settings, new CooldownCache(), + new FilteredMetadataCache(), ForkJoinPool.commonPool(), 50 + ); + + final CountingGoParser parser = new CountingGoParser(); + final GoFilter filter = new GoFilter(); + final GoRewriter rewriter = new GoRewriter(); + final NoopInspector inspector = new NoopInspector(); + + final byte[] rawBytes = GO_METADATA.getBytes(StandardCharsets.UTF_8); + final int threads = 50; + final ExecutorService executor = Executors.newFixedThreadPool(threads); + final CountDownLatch startGate = new CountDownLatch(1); + + @SuppressWarnings("unchecked") + final CompletableFuture[] futures = new CompletableFuture[threads]; + + for (int i = 0; i < threads; i++) { + final CompletableFuture future = new CompletableFuture<>(); + futures[i] = future; + executor.submit(() -> { + try { + startGate.await(5, TimeUnit.SECONDS); + final byte[] result = service.filterMetadata( + "go", "go-repo", "pkg2", rawBytes, + parser, filter, rewriter, Optional.of(inspector) + ).get(10, TimeUnit.SECONDS); + future.complete(result); + } catch (final Exception ex) { + future.completeExceptionally(ex); + } + }); + } + + startGate.countDown(); + CompletableFuture.allOf(futures).get(30, TimeUnit.SECONDS); + executor.shutdown(); + + // All results must be byte-identical + final byte[] reference = futures[0].get(); + for (int i = 1; i < threads; i++) { + final byte[] result = futures[i].get(); + assertThat("Result " + i + " must match reference", + new String(result, StandardCharsets.UTF_8), + equalTo(new String(reference, StandardCharsets.UTF_8))); + } + } + + // --- Counting Go parser --- + + private static final class CountingGoParser implements MetadataParser> { + final AtomicInteger parseCount = new AtomicInteger(); + + @Override + public List parse(final byte[] bytes) { + this.parseCount.incrementAndGet(); + if (bytes == null || bytes.length == 0) { + return Collections.emptyList(); + } + final String body = new String(bytes, StandardCharsets.UTF_8); + final String[] lines = body.split("\n", -1); + final List versions = new ArrayList<>(lines.length); + for (final String line : lines) { + final String trimmed = line.trim(); + if (!trimmed.isEmpty()) { + versions.add(trimmed); + } + } + return versions; + } + + @Override + public List extractVersions(final List metadata) { + return metadata == null ? Collections.emptyList() : List.copyOf(metadata); + } + + @Override + public Optional getLatestVersion(final List metadata) { + if (metadata == null || metadata.isEmpty()) { + return Optional.empty(); + } + return Optional.of(metadata.get(metadata.size() - 1)); + } + + @Override + public String contentType() { + return "text/plain"; + } + } + + private static final class GoFilter implements MetadataFilter> { + @Override + public List filter(final List metadata, final Set blocked) { + if (blocked.isEmpty()) { + return metadata; + } + final List result = new ArrayList<>(); + for (final String v : metadata) { + if (!blocked.contains(v)) { + result.add(v); + } + } + return result; + } + + @Override + public List updateLatest(final List metadata, final String newLatest) { + return metadata; + } + } + + private static final class GoRewriter implements MetadataRewriter> { + @Override + public byte[] rewrite(final List metadata) { + if (metadata == null || metadata.isEmpty()) { + return new byte[0]; + } + return String.join("\n", metadata).getBytes(StandardCharsets.UTF_8); + } + + @Override + public String contentType() { + return "text/plain"; + } + } + + private static final class CountingCooldownService implements CooldownService { + private final Set blocked = new HashSet<>(); + + void block(final String pkg, final String version) { + this.blocked.add(pkg + "@" + version); + } + + @Override + public CompletableFuture evaluate( + final CooldownRequest request, final CooldownInspector inspector + ) { + final String key = request.artifact() + "@" + request.version(); + if (this.blocked.contains(key)) { + return CompletableFuture.completedFuture( + CooldownResult.blocked(new CooldownBlock( + request.repoType(), request.repoName(), + request.artifact(), request.version(), + CooldownReason.FRESH_RELEASE, Instant.now(), + Instant.now().plus(Duration.ofDays(7)), + Collections.emptyList() + )) + ); + } + return CompletableFuture.completedFuture(CooldownResult.allowed()); + } + + @Override + public CompletableFuture unblock( + String rt, String rn, String a, String v, String actor + ) { + this.blocked.remove(a + "@" + v); + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture unblockAll(String rt, String rn, String actor) { + this.blocked.clear(); + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture> activeBlocks(String rt, String rn) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } + + private static final class NoopInspector implements CooldownInspector { + @Override + public CompletableFuture> releaseDate(String artifact, String version) { + return CompletableFuture.completedFuture(Optional.empty()); + } + + @Override + public CompletableFuture> dependencies( + String artifact, String version + ) { + return CompletableFuture.completedFuture(Collections.emptyList()); + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownHighCardinalityTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownHighCardinalityTest.java new file mode 100644 index 000000000..ceae30501 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownHighCardinalityTest.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.cooldown.metadata.FilteredMetadataCache; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; + +/** + * Chaos test: high-cardinality key space β€” populate 10Γ— the configured + * L1 capacity with unique package keys and assert that Caffeine's LRU + * eviction engages at the configured bound with no OOM and no runaway + * growth. + * + *

        Scenario: a busy deployment resolves thousands of unique package + * names in quick succession. {@link FilteredMetadataCache}'s L1 must + * cap at its configured {@code maxSize}; old entries must be evicted + * and the freshest entries must remain resident. + * + *

        Uses in-memory/mock infrastructure only; no Docker required + * (matching the style of sibling chaos tests in this package). + * + * @since 2.2.0 + */ +@Tag("Chaos") +final class CooldownHighCardinalityTest { + + /** + * L1 capacity for the test (small, to keep the test fast). + */ + private static final int CAPACITY = 100; + + /** + * Total unique keys inserted β€” 10Γ— capacity. + */ + private static final int UNIQUE_KEYS = 1_000; + + /** + * Repository type used for all cache keys. + */ + private static final String REPO_TYPE = "npm"; + + /** + * Repository name used for all cache keys. + */ + private static final String REPO_NAME = "npm-proxy"; + + /** + * Package-name template: unique per insertion. + */ + private static final String PACKAGE_PREFIX = "pkg-"; + + /** + * Overall wall-clock budget for the test. + */ + private static final Duration BUDGET = Duration.ofSeconds(5); + + /** + * Populate 1000 unique keys into an L1 sized for 100 entries: + * Caffeine's LRU must evict aggressively, the cache must stay at + * or below capacity, the most-recently-inserted keys must remain + * resident, and the oldest keys must be gone. + */ + @Test + void highCardinality_lruEvictsAtCapacity_noOom() throws Exception { + final FilteredMetadataCache cache = new FilteredMetadataCache( + CAPACITY, Duration.ofHours(24), Duration.ofHours(24), null + ); + + final AtomicInteger loads = new AtomicInteger(0); + final long startNanos = System.nanoTime(); + + // Load UNIQUE_KEYS distinct packages. Each get() is a miss, + // so each triggers the loader and populates a fresh L1 entry. + for (int i = 0; i < UNIQUE_KEYS; i++) { + final int idx = i; + final byte[] payload = + ("versions-for-" + idx).getBytes(StandardCharsets.UTF_8); + final byte[] result = cache.get( + REPO_TYPE, REPO_NAME, PACKAGE_PREFIX + idx, + () -> { + loads.incrementAndGet(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.noBlockedVersions( + payload, Duration.ofHours(24) + ) + ); + } + ).get(2, TimeUnit.SECONDS); + assertThat("Insert " + idx + " must return its own payload", + new String(result, StandardCharsets.UTF_8), + equalTo("versions-for-" + idx)); + } + + // Force Caffeine to process pending evictions before we + // measure size() β€” estimatedSize() can otherwise lag briefly. + cache.cleanUp(); + + final long elapsedMillis = (System.nanoTime() - startNanos) / 1_000_000L; + + // Fast test: complete within the budget. + assertThat( + "High-cardinality load must complete within " + + BUDGET.toMillis() + "ms (observed " + elapsedMillis + "ms)", + elapsedMillis, lessThanOrEqualTo(BUDGET.toMillis()) + ); + + // The loader was invoked for every unique key (each was a miss). + assertThat( + "All " + UNIQUE_KEYS + " keys must miss L1 on first touch", + loads.get(), equalTo(UNIQUE_KEYS) + ); + + // (a) Cache size must respect the configured bound. + final long size = cache.size(); + assertThat( + "L1 size must not exceed configured capacity (" + CAPACITY + + "). Observed size=" + size, + size, lessThanOrEqualTo((long) CAPACITY) + ); + assertThat( + "L1 must actually cache something (size must be > 0)", + size, greaterThan(0L) + ); + + // (b) Oldest keys must be evicted. We probe the first batch + // of inserts: the loader MUST be invoked (proving the entry + // was gone). With CAPACITY=100 and UNIQUE_KEYS=1000, the + // first 100 keys have been pushed out many times over. + final AtomicInteger reloadCount = new AtomicInteger(0); + for (int i = 0; i < 10; i++) { + final int idx = i; + cache.get( + REPO_TYPE, REPO_NAME, PACKAGE_PREFIX + idx, + () -> { + reloadCount.incrementAndGet(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.noBlockedVersions( + ("reload-" + idx).getBytes(StandardCharsets.UTF_8), + Duration.ofHours(24) + ) + ); + } + ).get(1, TimeUnit.SECONDS); + } + assertThat( + "All 10 oldest probed keys must have been evicted and " + + "reloaded (reloadCount=" + reloadCount.get() + ")", + reloadCount.get(), equalTo(10) + ); + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownValkeyStalenessTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownValkeyStalenessTest.java new file mode 100644 index 000000000..fa76a5072 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/CooldownValkeyStalenessTest.java @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.cooldown.metadata.FilteredMetadataCache; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.lessThan; + +/** + * Chaos test: Valkey L2 is slow/unreachable β€” assert the cooldown L1 + * (Caffeine) keeps serving without the calling thread blocking on L2. + * + *

        Scenario: the L2-backing loader (which in production fronts + * Valkey / Redis) stalls for seconds. A warm L1 entry must short-circuit + * {@link FilteredMetadataCache#get} so that concurrent readers return + * immediately and never enter the slow path. + * + *

        Implementation: we warm L1 with a cooldown-filtered metadata + * response, then flood the cache with 100 concurrent reads of the same + * key. The loader (stand-in for the slow L2/upstream path) sleeps + * 2 seconds on every invocation, so if the L1 short-circuit ever + * slipped we would easily blow the 500 ms wall-clock budget and the + * loader invocation counter would increment past the single warm-up. + * + *

        Uses in-memory/mock infrastructure only; no Docker required + * (matching the style of sibling chaos tests in this package). + * + * @since 2.2.0 + */ +@Tag("Chaos") +final class CooldownValkeyStalenessTest { + + /** + * Number of concurrent readers. + */ + private static final int CONCURRENT = 100; + + /** + * Simulated slow-L2 delay on every loader invocation. + * Chosen to be many multiples of any plausible L1 read latency + * so that a regression (L2 blocking the hot path) is unmissable. + */ + private static final Duration SLOW_L2_DELAY = Duration.ofSeconds(2); + + /** + * Wall-clock budget for the warm read burst. + * L1 (Caffeine) reads are sub-microsecond; 500 ms gives generous + * headroom for CI jitter and thread wake-up. + */ + private static final Duration WARM_READ_BUDGET = Duration.ofMillis(500); + + /** + * Repository type used for cache keys. + */ + private static final String REPO_TYPE = "go"; + + /** + * Repository name used for cache keys. + */ + private static final String REPO_NAME = "go-repo"; + + /** + * Package name used for cache keys. + */ + private static final String PACKAGE = "example.com/stale-pkg"; + + /** + * Cooldown-filtered metadata bytes used as the warm L1 payload. + */ + private static final byte[] FILTERED_BYTES = + "v0.1.0\nv0.2.0\nv0.3.0".getBytes(StandardCharsets.UTF_8); + + /** + * Warm L1, then issue 100 concurrent reads while the L2/loader path + * is pathologically slow. All reads must return within the L1 + * latency budget β€” proving L2 never blocks the served thread. + */ + @Test + void warmL1_servesUnderSlowL2_withoutBlocking() throws Exception { + final FilteredMetadataCache cache = new FilteredMetadataCache( + 1000, Duration.ofHours(24), Duration.ofHours(24), null + ); + + final AtomicInteger loaderInvocations = new AtomicInteger(0); + + // Warm L1 with a cooldown-filtered metadata response. The loader + // is invoked exactly once here; from this point on a healthy L1 + // must serve every subsequent read. + final byte[] warmResult = cache.get( + REPO_TYPE, REPO_NAME, PACKAGE, + () -> { + loaderInvocations.incrementAndGet(); + return CompletableFuture.completedFuture( + FilteredMetadataCache.CacheEntry.noBlockedVersions( + FILTERED_BYTES, Duration.ofHours(24) + ) + ); + } + ).get(5, TimeUnit.SECONDS); + + assertThat("Warm-up must return the filtered bytes", + new String(warmResult, StandardCharsets.UTF_8), + equalTo(new String(FILTERED_BYTES, StandardCharsets.UTF_8))); + assertThat("Warm-up must invoke the loader exactly once", + loaderInvocations.get(), equalTo(1)); + + // Now install a pathologically slow loader. If L1 ever fails + // to short-circuit, the reader thread will be dragged into this + // 2-second sleep β€” blowing the 500 ms wall-clock budget and + // bumping the loader-invocation counter beyond the warm-up. + final java.util.function.Supplier> + slowLoader = () -> CompletableFuture.supplyAsync(() -> { + loaderInvocations.incrementAndGet(); + try { + Thread.sleep(SLOW_L2_DELAY.toMillis()); + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new IllegalStateException(ex); + } + return FilteredMetadataCache.CacheEntry.noBlockedVersions( + FILTERED_BYTES, Duration.ofHours(24) + ); + }); + + final ExecutorService executor = Executors.newFixedThreadPool(CONCURRENT); + final CountDownLatch startGate = new CountDownLatch(1); + final CountDownLatch doneGate = new CountDownLatch(CONCURRENT); + final AtomicLong maxPerThreadNanos = new AtomicLong(0L); + + @SuppressWarnings("unchecked") + final CompletableFuture[] futures = new CompletableFuture[CONCURRENT]; + + for (int i = 0; i < CONCURRENT; i++) { + final int idx = i; + final CompletableFuture future = new CompletableFuture<>(); + futures[idx] = future; + executor.submit(() -> { + try { + startGate.await(5, TimeUnit.SECONDS); + final long tStart = System.nanoTime(); + final byte[] result = cache.get( + REPO_TYPE, REPO_NAME, PACKAGE, slowLoader + ).get(10, TimeUnit.SECONDS); + final long elapsed = System.nanoTime() - tStart; + // Track the slowest observed per-thread read for diagnostics. + long prev; + do { + prev = maxPerThreadNanos.get(); + if (elapsed <= prev) { + break; + } + } while (!maxPerThreadNanos.compareAndSet(prev, elapsed)); + future.complete(result); + } catch (final Exception ex) { + future.completeExceptionally(ex); + } finally { + doneGate.countDown(); + } + }); + } + + // Fire all readers simultaneously and measure total wall clock. + final long burstStart = System.nanoTime(); + startGate.countDown(); + final boolean finished = doneGate.await( + WARM_READ_BUDGET.toMillis() + 2_000, TimeUnit.MILLISECONDS + ); + final long burstNanos = System.nanoTime() - burstStart; + executor.shutdownNow(); + executor.awaitTermination(5, TimeUnit.SECONDS); + + assertThat("All 100 concurrent readers must complete", finished, equalTo(true)); + + // (a) Latency budget β€” total wall-clock for the warm-read burst. + final long burstMillis = burstNanos / 1_000_000L; + assertThat( + "Warm-L1 burst must finish within " + + WARM_READ_BUDGET.toMillis() + "ms (observed " + + burstMillis + "ms, slowest single read " + + maxPerThreadNanos.get() / 1_000_000L + "ms). " + + "Exceeding the budget means the slow L2 path leaked " + + "into a served thread β€” L1 is not short-circuiting.", + burstMillis, lessThan(WARM_READ_BUDGET.toMillis()) + ); + + // (b) L2/loader must NOT have been re-entered. Exactly one + // invocation (the warm-up) proves the 100 concurrent readers + // were served entirely by L1, never waiting on the slow path. + assertThat( + "Loader must not be re-invoked during the warm-read burst. " + + "Actual invocation count: " + loaderInvocations.get() + + " (expected 1 for the initial warm-up only).", + loaderInvocations.get(), equalTo(1) + ); + + // (c) All reads must return the warm payload byte-for-byte. + for (int i = 0; i < CONCURRENT; i++) { + final byte[] result = futures[i].get(1, TimeUnit.SECONDS); + assertThat("Reader " + i + " must receive the warm payload", + new String(result, StandardCharsets.UTF_8), + equalTo(new String(FILTERED_BYTES, StandardCharsets.UTF_8))); + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/chaos/DbArtifactIndexSaturationTest.java b/pantera-main/src/test/java/com/auto1/pantera/chaos/DbArtifactIndexSaturationTest.java new file mode 100644 index 000000000..045e15317 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/chaos/DbArtifactIndexSaturationTest.java @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.chaos; + +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.index.DbArtifactIndex; +import io.vertx.core.Vertx; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +import javax.sql.DataSource; +import java.io.PrintWriter; +import java.sql.Connection; +import java.sql.SQLException; +import java.sql.SQLFeatureNotSupportedException; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Logger; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +/** + * Chaos test: {@link DbArtifactIndex} executor saturation. + * + *

        Verifies that when the bounded DB-index pool is saturated (all worker threads + * busy AND the queue is full), further submissions fail fast with + * {@link RejectedExecutionException} β€” they are NOT executed on the caller thread. + * + *

        This invariant is load-bearing: the group-resolver request chain calls + * {@code ArtifactIndex.locateByName(...)} inline on a Vert.x event-loop thread. + * Under the previous {@code CallerRunsPolicy}, saturation would have run the + * blocking JDBC work on the event loop, stalling the entire reactor. The + * {@link ThreadPoolExecutor.AbortPolicy} installed by {@link DbArtifactIndex} + * must guarantee this never happens, regardless of DB pressure. + * + *

        The REE observed via {@code CompletableFuture.supplyAsync} surfaces as a + * {@link CompletionException} wrapping the REE. Callers such as + * {@code GroupResolver} map that (via {@code .exceptionally(...)}) into + * {@link Fault.IndexUnavailable} β€” this test asserts that classification at + * the cause level as well. + * + * @since 2.2.1 + */ +@Tag("Chaos") +final class DbArtifactIndexSaturationTest { + + /** Pool size used for the saturation test executor. */ + private static final int POOL_SIZE = 2; + + /** Bounded queue capacity for the saturation test executor. */ + private static final int QUEUE_SIZE = 2; + + /** Test executor wired into DbArtifactIndex. */ + private ThreadPoolExecutor saturatingExecutor; + + /** Fake DataSource whose getConnection() blocks until released. */ + private BlockingDataSource dataSource; + + /** Index under test. */ + private DbArtifactIndex index; + + /** Latch used to park in-flight JDBC calls so we can saturate the pool. */ + private CountDownLatch releaseJdbc; + + /** Tracker: did any DB call run on a Vert.x event-loop thread? */ + private AtomicBoolean ranOnVertxContext; + + @BeforeEach + void setUp() { + this.releaseJdbc = new CountDownLatch(1); + this.ranOnVertxContext = new AtomicBoolean(false); + this.dataSource = new BlockingDataSource(this.releaseJdbc, this.ranOnVertxContext); + this.saturatingExecutor = new ThreadPoolExecutor( + POOL_SIZE, POOL_SIZE, + 0L, TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(QUEUE_SIZE), + new ThreadPoolExecutor.AbortPolicy() + ); + this.index = new DbArtifactIndex(this.dataSource, this.saturatingExecutor); + } + + @AfterEach + void tearDown() { + this.releaseJdbc.countDown(); + if (this.index != null) { + this.index.close(); + } + if (this.saturatingExecutor != null) { + this.saturatingExecutor.shutdownNow(); + } + } + + /** + * Saturate the pool, then submit enough extra work to overflow the queue. + * + *

        Expectations: + *

          + *
        • Each overflow submission returns a {@link CompletableFuture} that + * completes exceptionally with {@link RejectedExecutionException}.
        • + *
        • The REE classifies to {@link Fault.IndexUnavailable} when fed + * through the same logic {@code GroupResolver} uses + * ({@code .exceptionally(ex -> new IndexUnavailable(ex, ...))}).
        • + *
        • None of the work ran on a Vert.x event-loop thread β€” the critical + * invariant that AbortPolicy exists to preserve.
        • + *
        + */ + @Test + void saturatedPool_submissionsFail_withIndexUnavailable() throws Exception { + // The constructor's warmUp() already parked one task in a worker thread. + // Fill the rest of the capacity (one worker + both queue slots) one task + // at a time and then confirm every further submission is rejected. + // We submit up to a conservative ceiling and check the pool state each + // iteration so the test does not race the ThreadPoolExecutor's internal + // queue-to-worker transfer. + final List>>> inFlight = new ArrayList<>(); + final int submitCeiling = POOL_SIZE + QUEUE_SIZE + 4; + for (int i = 0; i < submitCeiling && !isSaturated(); i++) { + inFlight.add(this.index.locateByName("in-flight-" + i)); + } + waitUntilPoolSaturated(); + + // Any further submission must be rejected immediately. Depending on the + // JDK's {@code CompletableFuture.supplyAsync} contract, the REE can + // surface in one of two ways β€” both of which are acceptable for the + // "never run on the caller" invariant, and both of which classify to + // {@link Fault.IndexUnavailable}: + // (a) the call throws {@link RejectedExecutionException} synchronously + // (current OpenJDK behaviour β€” supplyAsync lets execute()'s REE + // propagate before returning), or + // (b) the returned CF is already completed exceptionally with REE. + final int overflowCount = 3; + int observedRejections = 0; + for (int i = 0; i < overflowCount; i++) { + final String name = "overflow-" + i; + Throwable cause = null; + try { + final CompletableFuture>> cf = + this.index.locateByName(name); + // Path (b): deferred REE via the returned CF. + if (cf.isCompletedExceptionally()) { + try { + cf.get(100, TimeUnit.MILLISECONDS); + } catch (final ExecutionException ex) { + cause = unwrap(ex); + } + } else { + // Must NOT be the case that a saturated pool accepted the + // task and started running it on the caller thread. If the + // CF is pending, it means either AbortPolicy let this one + // through (bug) or the pool drained between submissions. + // Either way, fail β€” we should never observe this under a + // correctly-saturated pool. + fail("Saturated-pool submission " + i + " neither threw REE " + + "synchronously nor produced an exceptionally-completed " + + "future β€” AbortPolicy appears to have accepted the task. " + + "active=" + this.saturatingExecutor.getActiveCount() + + ", queue=" + this.saturatingExecutor.getQueue().size()); + return; + } + } catch (final RejectedExecutionException ree) { + // Path (a): synchronous REE from supplyAsync's e.execute(...). + cause = ree; + } + assertInstanceOf(RejectedExecutionException.class, cause, + "Saturated submission " + i + " must surface " + + "RejectedExecutionException (got " + + (cause == null ? "null" : cause.getClass().getName()) + ")"); + + // Mirror GroupResolver's .exceptionally(...) classification: the + // cause of a queue-saturation REE maps to Fault.IndexUnavailable. + final Fault fault = new Fault.IndexUnavailable(cause, "index-executor-saturated"); + assertInstanceOf(Fault.IndexUnavailable.class, fault, + "Classifier mapping must yield Fault.IndexUnavailable"); + observedRejections++; + } + assertTrue(observedRejections == overflowCount, + "All " + overflowCount + " overflow submissions must be rejected, " + + "got " + observedRejections); + + // Release the parked JDBC calls so setUp()'s warmUp() and in-flight work can drain. + this.releaseJdbc.countDown(); + + // The invariant: saturation never ran the JDBC call on the caller thread. + // (The caller for this test is JUnit's main thread, but the assertion + // guards the production Vert.x event-loop case in the same way β€” the + // fake DataSource records Vertx.currentContext() on every run.) + assertFalse(this.ranOnVertxContext.get(), + "No index work may run on a Vert.x event-loop thread"); + } + + /** Whether the test executor is saturated (all workers busy + queue full). */ + private boolean isSaturated() { + return this.saturatingExecutor.getActiveCount() >= POOL_SIZE + && this.saturatingExecutor.getQueue().size() >= QUEUE_SIZE; + } + + /** + * Spin until the pool has at least {@link #POOL_SIZE} active workers AND the + * queue has reached capacity. Without this, a fast test harness can submit + * the overflow slot before the executor has transferred a queued task into + * a worker, which flips the AbortPolicy boundary by one slot and makes the + * test flaky. Bounded wait: 2 seconds is generous for a 2-thread pool to + * pick up its first two blocking jobs. + */ + private void waitUntilPoolSaturated() throws InterruptedException { + final long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(2); + while (System.nanoTime() < deadline) { + if (this.isSaturated()) { + return; + } + Thread.sleep(5); + } + fail("Timed out waiting for DbArtifactIndex test executor to saturate " + + "(active=" + this.saturatingExecutor.getActiveCount() + + ", queue=" + this.saturatingExecutor.getQueue().size() + ")"); + } + + /** Unwrap CompletionException / ExecutionException layers. */ + private static Throwable unwrap(final Throwable top) { + Throwable current = top; + while ((current instanceof ExecutionException || current instanceof CompletionException) + && current.getCause() != null && current.getCause() != current) { + current = current.getCause(); + } + return current; + } + + /** + * A {@link DataSource} whose {@code getConnection()} blocks on a latch, + * allowing the test to park work in the pool's worker threads. Every + * invocation also records whether it was invoked on a Vert.x event-loop + * thread β€” the assertion hook that guards the critical "never run JDBC on + * the caller" invariant. + */ + private static final class BlockingDataSource implements DataSource { + + /** Latch released by the test to free parked work. */ + private final CountDownLatch gate; + + /** Set by any invocation that finds itself on a Vert.x context. */ + private final AtomicBoolean ranOnVertxContext; + + /** Count of invocations (diagnostic only). */ + private final AtomicInteger invocations = new AtomicInteger(0); + + BlockingDataSource(final CountDownLatch gate, final AtomicBoolean ranOnVertxContext) { + this.gate = gate; + this.ranOnVertxContext = ranOnVertxContext; + } + + @Override + public Connection getConnection() throws SQLException { + this.invocations.incrementAndGet(); + if (Vertx.currentContext() != null) { + this.ranOnVertxContext.set(true); + } + try { + // Park here for up to 10s β€” the test releases the gate at teardown. + if (!this.gate.await(10, TimeUnit.SECONDS)) { + throw new SQLException("gate not released within 10s"); + } + } catch (final InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new SQLException("interrupted while blocking", ex); + } + throw new SQLException("BlockingDataSource never returns a real connection"); + } + + @Override + public Connection getConnection(final String user, final String pass) throws SQLException { + return this.getConnection(); + } + + @Override + public PrintWriter getLogWriter() { + return null; + } + + @Override + public void setLogWriter(final PrintWriter out) { + // no-op + } + + @Override + public void setLoginTimeout(final int seconds) { + // no-op + } + + @Override + public int getLoginTimeout() { + return 0; + } + + @Override + public Logger getParentLogger() throws SQLFeatureNotSupportedException { + throw new SQLFeatureNotSupportedException(); + } + + @Override + public T unwrap(final Class iface) throws SQLException { + throw new SQLException("not a wrapper"); + } + + @Override + public boolean isWrapperFor(final Class iface) { + return false; + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/cooldown/CooldownContextPropagationTest.java b/pantera-main/src/test/java/com/auto1/pantera/cooldown/CooldownContextPropagationTest.java new file mode 100644 index 000000000..707356c00 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/cooldown/CooldownContextPropagationTest.java @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.cooldown; + +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicReference; +import com.auto1.pantera.http.context.ContextualExecutor; +import org.apache.logging.log4j.ThreadContext; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +/** + * Verifies that ThreadContext survives async hops in the cooldown package + * when the backing executor is wrapped with + * {@link ContextualExecutor#contextualize(java.util.concurrent.Executor)}. + * + *

        This is the canonical v2.2 primitive for MDC / APM propagation across + * thread-pool boundaries (see Β§4.4 of + * {@code docs/analysis/v2.2-target-architecture.md}); the older + * per-continuation wrapper approach has been retired. + * + * @since 2.2.0 + */ +final class CooldownContextPropagationTest { + + private ExecutorService rawPool; + + @BeforeEach + void setUp() { + this.rawPool = Executors.newSingleThreadExecutor(r -> { + final Thread t = new Thread(r, "cooldown-test-worker"); + t.setDaemon(true); + return t; + }); + ThreadContext.clearMap(); + } + + @AfterEach + void tearDown() { + ThreadContext.clearMap(); + this.rawPool.shutdownNow(); + } + + @Test + void threadContextSurvivesAsyncHopViaContextualExecutor() throws Exception { + // Wrap the raw pool with ContextualExecutor β€” same pattern as + // JdbcCooldownService.constructor now does. + final java.util.concurrent.Executor contextual = + ContextualExecutor.contextualize(this.rawPool); + + // Set MDC fields on the calling thread + ThreadContext.put("trace.id", "abc123"); + ThreadContext.put("package.name", "com.example:foo"); + + // Async hop through the contextual executor + final AtomicReference capturedTrace = new AtomicReference<>(); + final AtomicReference capturedPkg = new AtomicReference<>(); + + CompletableFuture.supplyAsync(() -> { + capturedTrace.set(ThreadContext.get("trace.id")); + capturedPkg.set(ThreadContext.get("package.name")); + return "done"; + }, contextual).join(); + + // Assert context survived the hop + assertNotNull(capturedTrace.get(), "trace.id should be propagated"); + assertEquals("abc123", capturedTrace.get()); + assertNotNull(capturedPkg.get(), "package.name should be propagated"); + assertEquals("com.example:foo", capturedPkg.get()); + } + + @Test + void withoutContextualExecutorContextIsLost() throws Exception { + // Using the raw pool (no contextual wrapper) β€” context should NOT survive + ThreadContext.put("trace.id", "xyz789"); + + final AtomicReference capturedTrace = new AtomicReference<>(); + + CompletableFuture.supplyAsync(() -> { + capturedTrace.set(ThreadContext.get("trace.id")); + return "done"; + }, this.rawPool).join(); + + // The raw pool does not propagate ThreadContext + // (it may or may not be null depending on ThreadContext state of the worker thread) + // This test just documents the contrast with the contextual executor. + // We don't assert null here because ThreadContext state on a new thread is implementation-specific. + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/cooldown/JdbcCooldownServiceTest.java b/pantera-main/src/test/java/com/auto1/pantera/cooldown/JdbcCooldownServiceTest.java index d162cf199..6416aa3aa 100644 --- a/pantera-main/src/test/java/com/auto1/pantera/cooldown/JdbcCooldownServiceTest.java +++ b/pantera-main/src/test/java/com/auto1/pantera/cooldown/JdbcCooldownServiceTest.java @@ -12,8 +12,16 @@ import com.amihaiemil.eoyaml.Yaml; import com.amihaiemil.eoyaml.YamlMapping; +import com.auto1.pantera.cooldown.api.CooldownBlock; +import com.auto1.pantera.cooldown.api.CooldownDependency; +import com.auto1.pantera.cooldown.api.CooldownInspector; +import com.auto1.pantera.cooldown.api.CooldownReason; +import com.auto1.pantera.cooldown.api.CooldownRequest; +import com.auto1.pantera.cooldown.api.CooldownResult; +import com.auto1.pantera.cooldown.api.CooldownService; +import com.auto1.pantera.cooldown.cache.CooldownCache; +import com.auto1.pantera.cooldown.config.CooldownSettings; import com.auto1.pantera.db.ArtifactDbFactory; -import com.auto1.pantera.cooldown.CooldownReason; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; diff --git a/pantera-main/src/test/java/com/auto1/pantera/docker/DockerProxyTest.java b/pantera-main/src/test/java/com/auto1/pantera/docker/DockerProxyTest.java index 097bc8b10..69a5f1440 100644 --- a/pantera-main/src/test/java/com/auto1/pantera/docker/DockerProxyTest.java +++ b/pantera-main/src/test/java/com/auto1/pantera/docker/DockerProxyTest.java @@ -90,7 +90,7 @@ private static DockerProxy dockerProxy( (username, password) -> Optional.empty(), token -> java.util.concurrent.CompletableFuture.completedFuture(Optional.empty()), Optional.empty(), - com.auto1.pantera.cooldown.NoopCooldownService.INSTANCE + com.auto1.pantera.cooldown.impl.NoopCooldownService.INSTANCE ); } diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/GroupMetadataCacheStaleFallbackTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/GroupMetadataCacheStaleFallbackTest.java new file mode 100644 index 000000000..7b33f56a1 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/group/GroupMetadataCacheStaleFallbackTest.java @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.group; + +import com.github.benmanes.caffeine.cache.Cache; +import io.lettuce.core.RedisFuture; +import io.lettuce.core.api.async.RedisAsyncCommands; +import java.lang.reflect.Field; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.Method; +import java.lang.reflect.Proxy; +import java.nio.charset.StandardCharsets; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +/** + * Tests the 4-step graceful-degradation read path of + * {@link GroupMetadataCache#getStaleWithFallback(String)}: + * stale-L1 → stale-L2 → expired-primary-L1 → miss. + * + *

        Stale-L2 and the primary-L1 peek are exercised via reflection because + * there is no injectable L2 hook on the public API; the test keeps the + * class's public surface untouched while still covering each branch. + */ +final class GroupMetadataCacheStaleFallbackTest { + + @Test + void staleL1Hit() throws Exception { + final GroupMetadataCache cache = new GroupMetadataCache("g1"); + final String path = "/com/foo/maven-metadata.xml"; + final byte[] data = "hello".getBytes(StandardCharsets.UTF_8); + cache.put(path, data); + + final Optional res = cache.getStaleWithFallback(path) + .get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "stale-L1 hit returns the cached bytes", + res.isPresent(), + Matchers.is(true) + ); + MatcherAssert.assertThat( + new String(res.get(), StandardCharsets.UTF_8), + Matchers.equalTo("hello") + ); + } + + @Test + void staleL2HitPromotesToL1() throws Exception { + final GroupMetadataCache cache = new GroupMetadataCache("g2"); + final String path = "/com/bar/maven-metadata.xml"; + final byte[] data = "from-l2".getBytes(StandardCharsets.UTF_8); + + // Drop any stale-L1 entry and install a stub stale-L2 that hits. + clearStaleL1(cache); + final AtomicReference lastGetKey = new AtomicReference<>(); + final RedisAsyncCommands stub = newStubAsync( + key -> { + lastGetKey.set(key); + return data; + } + ); + enableStaleL2(cache, stub); + + final Optional res = cache.getStaleWithFallback(path) + .get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "stale-L2 hit returns the bytes", + res.isPresent(), + Matchers.is(true) + ); + MatcherAssert.assertThat( + new String(res.get(), StandardCharsets.UTF_8), + Matchers.equalTo("from-l2") + ); + MatcherAssert.assertThat( + "stale-L2 key is the stale-namespaced key", + lastGetKey.get(), + Matchers.equalTo("maven:group:metadata:stale:g2:" + path) + ); + // Promote to stale-L1 β€” second call should hit L1 without calling L2. + lastGetKey.set(null); + final Optional second = cache.getStaleWithFallback(path) + .get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "second call hits promoted stale-L1", + second.isPresent() && lastGetKey.get() == null, + Matchers.is(true) + ); + } + + @Test + void expiredPrimaryFallback() throws Exception { + final GroupMetadataCache cache = new GroupMetadataCache("g3"); + final String path = "/com/baz/maven-metadata.xml"; + final byte[] data = "expired-primary".getBytes(StandardCharsets.UTF_8); + cache.put(path, data); + // Wipe the stale-L1 entry β€” primary L1 still holds the data. + clearStaleL1(cache); + + final Optional res = cache.getStaleWithFallback(path) + .get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "expired-primary peek returns data", + res.isPresent(), + Matchers.is(true) + ); + MatcherAssert.assertThat( + new String(res.get(), StandardCharsets.UTF_8), + Matchers.equalTo("expired-primary") + ); + } + + @Test + void allTiersMissReturnsEmpty() throws Exception { + final GroupMetadataCache cache = new GroupMetadataCache("g4"); + final Optional res = cache + .getStaleWithFallback("/com/missing/maven-metadata.xml") + .get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "miss returns empty (no throw)", + res.isPresent(), + Matchers.is(false) + ); + } + + // ------------------------------------------------------------- + // Reflection helpers β€” keep the public API untouched. + // ------------------------------------------------------------- + + @SuppressWarnings("unchecked") + private static void clearStaleL1(final GroupMetadataCache cache) throws Exception { + final Field f = GroupMetadataCache.class.getDeclaredField("lastKnownGoodL1"); + f.setAccessible(true); + final Cache c = (Cache) f.get(cache); + c.invalidateAll(); + } + + private static void enableStaleL2( + final GroupMetadataCache cache, + final RedisAsyncCommands stub + ) throws Exception { + final Field two = GroupMetadataCache.class.getDeclaredField("staleTwoTier"); + two.setAccessible(true); + two.setBoolean(cache, true); + final Field l2 = GroupMetadataCache.class.getDeclaredField("staleL2"); + l2.setAccessible(true); + l2.set(cache, stub); + } + + /** + * Minimal {@link RedisAsyncCommands} proxy that only handles {@code get}. + * All other calls fail loudly so we notice if the production path + * reaches a non-stubbed method. + */ + @SuppressWarnings("unchecked") + private static RedisAsyncCommands newStubAsync( + final java.util.function.Function getImpl + ) { + final InvocationHandler handler = new InvocationHandler() { + @Override + public Object invoke( + final Object proxy, + final Method method, + final Object[] args + ) { + if ("get".equals(method.getName()) && args != null && args.length == 1) { + final byte[] value = getImpl.apply((String) args[0]); + return completedRedisFuture(value); + } + throw new UnsupportedOperationException( + "stub RedisAsyncCommands does not implement " + method.getName() + ); + } + }; + return (RedisAsyncCommands) Proxy.newProxyInstance( + RedisAsyncCommands.class.getClassLoader(), + new Class[] { RedisAsyncCommands.class }, + handler + ); + } + + /** + * Build a {@link RedisFuture} that wraps a completed + * {@link CompletableFuture} β€” we only rely on + * {@link RedisFuture#toCompletableFuture()} downstream. + */ + @SuppressWarnings("unchecked") + private static RedisFuture completedRedisFuture(final byte[] value) { + final CompletableFuture cf = CompletableFuture.completedFuture(value); + final InvocationHandler handler = (proxy, method, args) -> { + if ("toCompletableFuture".equals(method.getName())) { + return cf; + } + // Delegate anything the production code might invoke to the CF. + try { + final Method cfMethod = CompletableFuture.class.getMethod( + method.getName(), + method.getParameterTypes() + ); + return cfMethod.invoke(cf, args); + } catch (final NoSuchMethodException ex) { + throw new UnsupportedOperationException( + "stub RedisFuture does not implement " + method.getName() + ); + } + }; + return (RedisFuture) Proxy.newProxyInstance( + RedisFuture.class.getClassLoader(), + new Class[] { RedisFuture.class }, + handler + ); + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/GroupResolverConstructorTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/GroupResolverConstructorTest.java new file mode 100644 index 000000000..6a68ace72 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/group/GroupResolverConstructorTest.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.group; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.cache.NegativeCacheConfig; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.rq.RequestLine; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Smoke test proving the wiring-site-friendly {@link GroupResolver} + * constructor builds a functional resolver and serves a happy-path 200 + * response. + * + * @since 2.2.0 + */ +final class GroupResolverConstructorTest { + + private static final String GROUP = "maven-group"; + private static final String REPO_TYPE = "maven-group"; + private static final String MEMBER = "libs-release-local"; + + @Test + void newConstructor_buildsResolver_andServesHappyPath() { + // SliceResolver that returns a trivial 200-OK slice for any member lookup. + final Slice okSlice = (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + final SliceResolver resolver = + (Key name, int port, int depth) -> okSlice; + + final NegativeCache negCache = new NegativeCache( + "group-negative", + GROUP, + new NegativeCacheConfig( + Duration.ofMinutes(5), + 10_000, + false, + NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L1_TTL, + NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L2_TTL + ) + ); + + final GroupResolver groupResolver = new GroupResolver( + resolver, + GROUP, + Collections.singletonList(MEMBER), + 8080, + 0, + 10L, + Collections.emptyList(), + Optional.empty(), + Collections.emptySet(), + REPO_TYPE, + negCache, + null, + Runnable::run + ); + + final Response resp = groupResolver.response( + new RequestLine("GET", "/foo"), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(200, resp.status().code(), + "New wiring-site constructor must build a resolver that serves 200"); + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/GroupResolverTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/GroupResolverTest.java new file mode 100644 index 000000000..d3921d60c --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/group/GroupResolverTest.java @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.group; + +import com.auto1.pantera.asto.Content; +import com.auto1.pantera.asto.Key; +import com.auto1.pantera.cache.NegativeCacheConfig; +import com.auto1.pantera.http.Headers; +import com.auto1.pantera.http.Response; +import com.auto1.pantera.http.ResponseBuilder; +import com.auto1.pantera.http.RsStatus; +import com.auto1.pantera.http.Slice; +import com.auto1.pantera.http.cache.NegativeCache; +import com.auto1.pantera.http.fault.FaultTranslator; +import com.auto1.pantera.http.rq.RequestLine; +import com.auto1.pantera.http.timeout.AutoBlockRegistry; +import com.auto1.pantera.http.timeout.AutoBlockSettings; +import com.auto1.pantera.index.ArtifactDocument; +import com.auto1.pantera.index.ArtifactIndex; +import com.auto1.pantera.index.SearchResult; +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link GroupResolver} covering every branch of the 5-path + * decision tree from {@code docs/analysis/v2.2-target-architecture.md} section 2. + * + *

          + *
        • PATH A: 404 paths (negative cache hit, all-proxy-404, no-proxy-members)
        • + *
        • PATH B: 500 paths (DB timeout, DB failure, StorageUnavailable)
        • + *
        • PATH OK: success paths (index hit serves, proxy fanout first-wins)
        • + *
        • TOCTOU: index hit but member 404, falls through to proxy fanout (A11 fix)
        • + *
        • AllProxiesFailed: any proxy 5xx with no 2xx, pass-through via FaultTranslator
        • + *
        + * + * @since 2.2.0 + */ +@SuppressWarnings({"PMD.TooManyMethods", "PMD.AvoidDuplicateLiterals"}) +final class GroupResolverTest { + + private static final String GROUP = "maven-group"; + private static final String REPO_TYPE = "maven-group"; + private static final String HOSTED = "libs-release-local"; + private static final String PROXY_A = "maven-central"; + private static final String PROXY_B = "jboss-proxy"; + private static final String JAR_PATH = + "/com/google/guava/guava/31.1/guava-31.1.jar"; + private static final String PARSED_NAME = "com.google.guava.guava"; + + // ---- PATH A: negativeCacheHit_returns404WithoutDbQuery ---- + + @Test + void negativeCacheHit_returns404WithoutDbQuery() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); + final NegativeCache negCache = buildNegativeCache(); + // Pre-populate the negative cache + negCache.cacheNotFound(new Key.From(GROUP + ":" + PARSED_NAME)); + + final GroupResolver resolver = buildResolver( + idx, List.of(HOSTED, PROXY_A), Set.of(PROXY_A), negCache, + Map.of(HOSTED, okSlice(), PROXY_A, okSlice()) + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(404, resp.status().code(), + "Negative cache hit must return 404"); + assertTrue(idx.locateByNameCalls.isEmpty(), + "DB must NOT be queried when negative cache hits"); + } + + // ---- PATH OK: indexHit_servesFromTargetedMember ---- + + @Test + void indexHit_servesFromTargetedMember() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); + final AtomicInteger hostedCount = new AtomicInteger(0); + final AtomicInteger proxyCount = new AtomicInteger(0); + final Map slices = new HashMap<>(); + slices.put(HOSTED, countingSlice(hostedCount, RsStatus.OK)); + slices.put(PROXY_A, countingSlice(proxyCount, RsStatus.OK)); + + final GroupResolver resolver = buildResolver( + idx, List.of(HOSTED, PROXY_A), Set.of(PROXY_A), buildNegativeCache(), slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(200, resp.status().code(), + "Index hit must return 200 from targeted member"); + assertEquals(1, hostedCount.get(), + "Only the indexed member should be queried"); + assertEquals(0, proxyCount.get(), + "Proxy must NOT be queried on index hit"); + } + + // ---- TOCTOU: indexHit_toctouDrift_fallsThroughToProxyFanout (A11 fix) ---- + + @Test + void indexHit_toctouDrift_fallsThroughToProxyFanout() { + // Index says artifact is in HOSTED, but HOSTED returns 404 (TOCTOU) + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); + final AtomicInteger hostedCount = new AtomicInteger(0); + final AtomicInteger proxyCount = new AtomicInteger(0); + final Map slices = new HashMap<>(); + slices.put(HOSTED, countingSlice(hostedCount, RsStatus.NOT_FOUND)); + slices.put(PROXY_A, countingSlice(proxyCount, RsStatus.OK)); + + final GroupResolver resolver = buildResolver( + idx, List.of(HOSTED, PROXY_A), Set.of(PROXY_A), buildNegativeCache(), slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(200, resp.status().code(), + "TOCTOU drift must fall through to proxy fanout and succeed"); + assertEquals(1, hostedCount.get(), + "Hosted member must be queried first (index hit)"); + assertEquals(1, proxyCount.get(), + "Proxy must be queried after hosted 404 (TOCTOU fallthrough)"); + } + + // ---- PATH OK: indexMiss_proxyFanout_firstWins_cancelsOthers ---- + + @Test + void indexMiss_proxyFanout_firstWins_cancelsOthers() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // miss + final AtomicInteger proxyACount = new AtomicInteger(0); + final AtomicInteger proxyBCount = new AtomicInteger(0); + final Map slices = new HashMap<>(); + slices.put(PROXY_A, countingSlice(proxyACount, RsStatus.OK)); + slices.put(PROXY_B, countingSlice(proxyBCount, RsStatus.OK)); + + final GroupResolver resolver = buildResolver( + idx, + List.of(PROXY_A, PROXY_B), + Set.of(PROXY_A, PROXY_B), + buildNegativeCache(), + slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(200, resp.status().code(), + "Proxy fanout must return 200 when at least one proxy succeeds"); + // At least one proxy was queried + assertTrue(proxyACount.get() + proxyBCount.get() >= 1, + "At least one proxy member must be queried"); + } + + // ---- PATH A: indexMiss_allProxy404_negCachePopulated ---- + + @Test + void indexMiss_allProxy404_negCachePopulated() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // miss + final NegativeCache negCache = buildNegativeCache(); + final Map slices = new HashMap<>(); + slices.put(PROXY_A, notFoundSlice()); + slices.put(PROXY_B, notFoundSlice()); + + final GroupResolver resolver = buildResolver( + idx, + List.of(PROXY_A, PROXY_B), + Set.of(PROXY_A, PROXY_B), + negCache, + slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(404, resp.status().code(), + "All-proxy-404 must return 404"); + final Key negKey = new Key.From(GROUP + ":" + PARSED_NAME); + assertTrue(negCache.isNotFound(negKey), + "Negative cache must be populated after all-proxy-404"); + } + + // ---- PATH B: indexMiss_anyProxy5xx_allProxiesFailedPassThrough ---- + + @Test + void indexMiss_anyProxy5xx_allProxiesFailedPassThrough() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // miss + final Map slices = new HashMap<>(); + slices.put(PROXY_A, staticSlice(RsStatus.INTERNAL_ERROR)); + slices.put(PROXY_B, staticSlice(RsStatus.SERVICE_UNAVAILABLE)); + + final GroupResolver resolver = buildResolver( + idx, + List.of(PROXY_A, PROXY_B), + Set.of(PROXY_A, PROXY_B), + buildNegativeCache(), + slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + // FaultTranslator should pass through the best 5xx (503 beats 500) + assertTrue(resp.status().serverError(), + "AllProxiesFailed must return a server error"); + assertTrue(resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT)), + "Response must contain X-Pantera-Fault header"); + } + + // ---- PATH B: dbTimeout_returnsIndexUnavailable500 ---- + + @Test + void dbTimeout_returnsIndexUnavailable500() { + final ArtifactIndex idx = timeoutIndex(); + final Map slices = new HashMap<>(); + slices.put(HOSTED, okSlice()); + slices.put(PROXY_A, okSlice()); + + final GroupResolver resolver = buildResolver( + idx, List.of(HOSTED, PROXY_A), Set.of(PROXY_A), buildNegativeCache(), slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(500, resp.status().code(), + "DB timeout must return 500"); + assertTrue(resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT) + && h.getValue().equals("index-unavailable")), + "Response must have X-Pantera-Fault: index-unavailable"); + } + + // ---- PATH B: dbFailure_returnsIndexUnavailable500 ---- + + @Test + void dbFailure_returnsIndexUnavailable500() { + final ArtifactIndex idx = failingIndex(); + final Map slices = new HashMap<>(); + slices.put(HOSTED, okSlice()); + slices.put(PROXY_A, okSlice()); + + final GroupResolver resolver = buildResolver( + idx, List.of(HOSTED, PROXY_A), Set.of(PROXY_A), buildNegativeCache(), slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(500, resp.status().code(), + "DB failure must return 500"); + assertTrue(resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT) + && h.getValue().equals("index-unavailable")), + "Response must have X-Pantera-Fault: index-unavailable"); + } + + // ---- PATH A: noProxyMembers_indexMiss_returns404 ---- + + @Test + void noProxyMembers_indexMiss_returns404() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // miss + final AtomicInteger hostedCount = new AtomicInteger(0); + final NegativeCache negCache = buildNegativeCache(); + final Map slices = new HashMap<>(); + slices.put(HOSTED, countingSlice(hostedCount, RsStatus.OK)); + + final GroupResolver resolver = buildResolver( + idx, + List.of(HOSTED), + Collections.emptySet(), // no proxy members + negCache, + slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(404, resp.status().code(), + "Index miss with no proxy members must return 404"); + assertEquals(0, hostedCount.get(), + "Hosted member must NOT be queried on index miss (fully indexed)"); + final Key negKey = new Key.From(GROUP + ":" + PARSED_NAME); + assertTrue(negCache.isNotFound(negKey), + "Negative cache must be populated"); + } + + // ---- Index hit + member 5xx: returns StorageUnavailable 500 ---- + + @Test + void indexHit_memberServerError_returnsStorageUnavailable() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); + final Map slices = new HashMap<>(); + slices.put(HOSTED, staticSlice(RsStatus.INTERNAL_ERROR)); + slices.put(PROXY_A, okSlice()); + + final GroupResolver resolver = buildResolver( + idx, List.of(HOSTED, PROXY_A), Set.of(PROXY_A), buildNegativeCache(), slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(500, resp.status().code(), + "Index hit + member 5xx must return 500 (StorageUnavailable)"); + assertTrue(resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT) + && h.getValue().equals("storage-unavailable")), + "Response must have X-Pantera-Fault: storage-unavailable"); + } + + // ---- No index configured: full two-phase fanout ---- + + @Test + void noIndex_fullTwoPhaseFanout() { + final AtomicInteger hostedCount = new AtomicInteger(0); + final AtomicInteger proxyCount = new AtomicInteger(0); + final Map slices = new HashMap<>(); + slices.put(HOSTED, countingSlice(hostedCount, RsStatus.OK)); + slices.put(PROXY_A, countingSlice(proxyCount, RsStatus.OK)); + + final GroupResolver resolver = buildResolver( + null, // no index + List.of(HOSTED, PROXY_A), + Set.of(PROXY_A), + buildNegativeCache(), + slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(200, resp.status().code(), + "Full two-phase fanout must return 200 when a member serves"); + assertTrue(hostedCount.get() > 0, + "Hosted member must be queried in full fanout"); + } + + // ---- Metadata URL (unparseable) skips index, does full fanout ---- + + @Test + void metadataUrl_skipsIndex_fullFanout() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); + final AtomicInteger memberCount = new AtomicInteger(0); + final Map slices = new HashMap<>(); + slices.put("member-a", countingSlice(memberCount, RsStatus.OK)); + slices.put("member-b", countingSlice(new AtomicInteger(0), RsStatus.OK)); + + final GroupResolver resolver = buildResolver( + idx, + List.of("member-a", "member-b"), + Set.of("member-a"), + buildNegativeCache(), + slices, + "helm-group" + ); + // /index.yaml is unparseable for helm + final Response resp = resolver.response( + new RequestLine("GET", "/index.yaml"), Headers.EMPTY, Content.EMPTY + ).join(); + + assertTrue(idx.locateByNameCalls.isEmpty(), + "locateByName must NOT be called for metadata URL"); + } + + // ---- Mixed 404 + 5xx in proxy fanout: AllProxiesFailed (not all-404) ---- + + @Test + void proxyFanout_mixed404And5xx_allProxiesFailed() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // miss + final Map slices = new HashMap<>(); + slices.put(PROXY_A, notFoundSlice()); + slices.put(PROXY_B, staticSlice(RsStatus.INTERNAL_ERROR)); + + final GroupResolver resolver = buildResolver( + idx, + List.of(PROXY_A, PROXY_B), + Set.of(PROXY_A, PROXY_B), + buildNegativeCache(), + slices + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + // Mixed: one 404 + one 5xx => AllProxiesFailed (passes through the 5xx) + assertTrue(resp.status().serverError(), + "Mixed 404+5xx must produce AllProxiesFailed (server error)"); + assertTrue(resp.headers().stream() + .anyMatch(h -> h.getKey().equals(FaultTranslator.HEADER_FAULT)), + "Must have X-Pantera-Fault header"); + } + + // ---- HEAD request works like GET ---- + + @Test + void headRequestWorks() { + final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); + final Map slices = new HashMap<>(); + slices.put(HOSTED, okSlice()); + slices.put(PROXY_A, okSlice()); + + final GroupResolver resolver = buildResolver( + idx, List.of(HOSTED, PROXY_A), Set.of(PROXY_A), buildNegativeCache(), slices + ); + final Response resp = resolver.response( + new RequestLine("HEAD", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(200, resp.status().code(), + "HEAD must be handled like GET"); + } + + // ---- Non-GET/HEAD/POST returns 405 ---- + + @Test + void putReturns405() { + final Map slices = Map.of(HOSTED, okSlice()); + final GroupResolver resolver = buildResolver( + null, List.of(HOSTED), Collections.emptySet(), + buildNegativeCache(), slices + ); + final Response resp = resolver.response( + new RequestLine("PUT", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(405, resp.status().code(), + "PUT must return 405 Method Not Allowed"); + } + + // ---- Empty members returns 404 ---- + + @Test + void emptyMembersReturns404() { + final GroupResolver resolver = new GroupResolver( + GROUP, + Collections.emptyList(), + Collections.emptyList(), + Optional.empty(), + REPO_TYPE, + Collections.emptySet(), + buildNegativeCache(), + java.util.concurrent.ForkJoinPool.commonPool() + ); + final Response resp = resolver.response( + new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY + ).join(); + + assertEquals(404, resp.status().code(), + "Empty members must return 404"); + } + + // ---- Helpers ---- + + private GroupResolver buildResolver( + final ArtifactIndex idx, + final List memberNames, + final Set proxyMemberNames, + final NegativeCache negCache, + final Map sliceMap + ) { + return buildResolver(idx, memberNames, proxyMemberNames, negCache, sliceMap, REPO_TYPE); + } + + private GroupResolver buildResolver( + final ArtifactIndex idx, + final List memberNames, + final Set proxyMemberNames, + final NegativeCache negCache, + final Map sliceMap, + final String repoType + ) { + final List members = memberNames.stream() + .map(name -> { + final Slice s = sliceMap.getOrDefault(name, + (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.notFound().build())); + return new MemberSlice(name, s, proxyMemberNames.contains(name)); + }) + .toList(); + return new GroupResolver( + GROUP, + members, + Collections.emptyList(), + idx != null ? Optional.of(idx) : Optional.empty(), + repoType, + proxyMemberNames, + negCache, + java.util.concurrent.ForkJoinPool.commonPool() + ); + } + + private static NegativeCache buildNegativeCache() { + final NegativeCacheConfig config = new NegativeCacheConfig( + Duration.ofMinutes(5), + 10_000, + false, + NegativeCacheConfig.DEFAULT_L1_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L1_TTL, + NegativeCacheConfig.DEFAULT_L2_MAX_SIZE, + NegativeCacheConfig.DEFAULT_L2_TTL + ); + return new NegativeCache("group-negative", GROUP, config); + } + + private static Slice okSlice() { + return (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.ok().build()); + } + + private static Slice notFoundSlice() { + return (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); + } + + private static Slice staticSlice(final RsStatus status) { + return (line, headers, body) -> + CompletableFuture.completedFuture(ResponseBuilder.from(status).build()); + } + + private static Slice countingSlice(final AtomicInteger counter, final RsStatus status) { + return (line, headers, body) -> { + counter.incrementAndGet(); + return CompletableFuture.completedFuture(ResponseBuilder.from(status).build()); + }; + } + + /** + * Index that completes exceptionally with a RuntimeException wrapping + * a TimeoutException. + */ + private static ArtifactIndex timeoutIndex() { + return new NopIndex() { + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.failedFuture( + new RuntimeException("statement timeout", new TimeoutException("500ms")) + ); + } + }; + } + + /** + * Index that completes exceptionally with a generic DB error. + */ + private static ArtifactIndex failingIndex() { + return new NopIndex() { + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.failedFuture( + new RuntimeException("connection refused") + ); + } + }; + } + + /** + * Recording index that tracks locateByName calls. + */ + private static final class RecordingIndex extends NopIndex { + final List locateByNameCalls = new CopyOnWriteArrayList<>(); + final List locateCalls = new CopyOnWriteArrayList<>(); + private final Optional> result; + + RecordingIndex(final Optional> result) { + this.result = result; + } + + @Override + public CompletableFuture>> locateByName(final String name) { + this.locateByNameCalls.add(name); + return CompletableFuture.completedFuture(this.result); + } + + @Override + public CompletableFuture> locate(final String path) { + this.locateCalls.add(path); + return CompletableFuture.completedFuture( + this.result.orElse(List.of()) + ); + } + } + + /** + * Minimal no-op index base class. + */ + private static class NopIndex implements ArtifactIndex { + @Override + public CompletableFuture index(final ArtifactDocument doc) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture remove(final String rn, final String ap) { + return CompletableFuture.completedFuture(null); + } + + @Override + public CompletableFuture search( + final String q, final int max, final int off + ) { + return CompletableFuture.completedFuture(SearchResult.EMPTY); + } + + @Override + public CompletableFuture> locate(final String path) { + return CompletableFuture.completedFuture(List.of()); + } + + @Override + public CompletableFuture>> locateByName(final String name) { + return CompletableFuture.completedFuture(Optional.of(List.of())); + } + + @Override + public void close() { + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceFlattenedResolutionTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceFlattenedResolutionTest.java deleted file mode 100644 index bc1167bed..000000000 --- a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceFlattenedResolutionTest.java +++ /dev/null @@ -1,794 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.group; - -import com.auto1.pantera.asto.Content; -import com.auto1.pantera.asto.Key; -import com.auto1.pantera.http.Headers; -import com.auto1.pantera.http.Response; -import com.auto1.pantera.http.ResponseBuilder; -import com.auto1.pantera.http.RsStatus; -import com.auto1.pantera.http.Slice; -import com.auto1.pantera.http.rq.RequestLine; -import com.auto1.pantera.http.slice.EcsLoggingSlice; -import com.auto1.pantera.index.ArtifactDocument; -import com.auto1.pantera.index.ArtifactIndex; -import com.auto1.pantera.index.SearchResult; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Test; -import org.slf4j.MDC; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.Executors; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Covers the 5-path resolution flow defined by the GroupSlice v2 spec: - * - *
          - *
        1. PARSE artifact name. Unparseable β†’ full two-phase fanout.
        2. - *
        3. QUERY INDEX. DB error (Optional.empty) β†’ full two-phase fanout (safety net). - * Empty list (confirmed miss) β†’ proxy-only fanout. - * Hit β†’ targeted local read.
        4. - *
        5. TARGETED LOCAL READ (hit): NO circuit breaker. NO fallback fanout on 5xx.
        6. - *
        7. PROXY FANOUT (miss): query proxy leaves only; hosted NOT queried.
        8. - *
        9. FULL TWO-PHASE FANOUT: hosted first, then proxy.
        10. - *
        - * - *

        Key behaviour locked in: - *

          - *
        • Index hit + member 5xx β†’ 500 (local error, no fallback bytes elsewhere)
        • - *
        • No 503 from group resolution β€” 503 used to leak from circuit-open skips
        • - *
        - * - * @since 2.1.3 - */ -@SuppressWarnings({"PMD.TooManyMethods", "PMD.AvoidDuplicateLiterals"}) -final class GroupSliceFlattenedResolutionTest { - - private static final String MAVEN_GROUP = "maven-group"; - private static final String HOSTED = "libs-release-local"; - private static final String PROXY = "maven-central"; - private static final String JAR_PATH = - "/com/google/guava/guava/31.1/guava-31.1.jar"; - private static final String PARSED_NAME = "com.google.guava.guava"; - - // ---- Path 3: Index hit β†’ targeted local read returns 200 ---- - - @Test - void indexHitTargetedServes() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); - final AtomicInteger hostedCount = new AtomicInteger(0); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(HOSTED, staticSlice(hostedCount, RsStatus.OK)); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.OK)); - final GroupSlice slice = buildGroup( - idx, - List.of(HOSTED, PROXY), - Set.of(PROXY), - slices - ); - final Response resp = slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(RsStatus.OK, resp.status(), - "Targeted local read must return 200 when member serves"); - assertEquals(1, hostedCount.get(), - "Only the indexed member should be queried"); - assertEquals(0, proxyCount.get(), - "Proxy must NOT be queried on index hit"); - assertTrue(idx.locateByNameCalls.contains(PARSED_NAME)); - } - - // ---- Path 3: Index hit + member 5xx β†’ 500, NO fallback fanout ---- - - @Test - void indexHitMemberFailsReturns500NoFanout() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); - final AtomicInteger hostedCount = new AtomicInteger(0); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(HOSTED, staticSlice(hostedCount, RsStatus.INTERNAL_ERROR)); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.OK)); - final GroupSlice slice = buildGroup( - idx, - List.of(HOSTED, PROXY), - Set.of(PROXY), - slices - ); - final Response resp = slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(RsStatus.INTERNAL_ERROR, resp.status(), - "Index hit + member 5xx must return 500 (local error) β€” NOT 502/503"); - assertEquals(1, hostedCount.get(), - "Indexed member must be queried exactly once"); - assertEquals(0, proxyCount.get(), - "Proxy members MUST NOT be queried on targeted-read failure " - + "(no fallback fanout β€” bytes are local, nobody else has them)"); - } - - // ---- Path 3: Index hit + member 404 β†’ 404 returned as-is (stale index) ---- - - @Test - void indexHitMember404ReturnsAsIs() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); - final AtomicInteger hostedCount = new AtomicInteger(0); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(HOSTED, staticSlice(hostedCount, RsStatus.NOT_FOUND)); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.OK)); - final GroupSlice slice = buildGroup( - idx, - List.of(HOSTED, PROXY), - Set.of(PROXY), - slices - ); - final Response resp = slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(RsStatus.NOT_FOUND, resp.status(), - "Index hit + member 404 must return 404 as-is (stale index scenario)"); - assertEquals(1, hostedCount.get(), - "Indexed member must be queried exactly once"); - assertEquals(0, proxyCount.get(), - "Proxy must NOT be queried after targeted-read 404"); - } - - // ---- Path 4: Index confirmed miss β†’ only proxy members queried ---- - - @Test - void indexMissProxyFanoutServes() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); - final AtomicInteger hostedCount = new AtomicInteger(0); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(HOSTED, staticSlice(hostedCount, RsStatus.OK)); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.OK)); - final GroupSlice slice = buildGroup( - idx, - List.of(HOSTED, PROXY), - Set.of(PROXY), - slices - ); - final Response resp = slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(RsStatus.OK, resp.status(), - "Proxy-only fanout must serve 200 when proxy has it"); - assertEquals(0, hostedCount.get(), - "Hosted members MUST NOT be queried on confirmed index miss " - + "(hosted is fully indexed β€” absence = absence)"); - assertEquals(1, proxyCount.get(), - "Proxy member must be queried on index miss"); - } - - // ---- Path 5: DB error (Optional.empty) β†’ full two-phase fanout ---- - - @Test - void dbErrorFullFanout() { - final RecordingIndex idx = new RecordingIndex(Optional.empty()); - final AtomicInteger hostedCount = new AtomicInteger(0); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(HOSTED, staticSlice(hostedCount, RsStatus.NOT_FOUND)); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.OK)); - final GroupSlice slice = buildGroup( - idx, - List.of(HOSTED, PROXY), - Set.of(PROXY), - slices - ); - final Response resp = slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(RsStatus.OK, resp.status(), - "Full fanout safety net should still succeed via proxy on DB error"); - assertEquals(1, hostedCount.get(), - "Hosted must be queried in full fanout safety net (DB error)"); - assertEquals(1, proxyCount.get(), - "Proxy must be queried after hosted returns 404 in full fanout"); - } - - // ---- Path 1: Unparseable URL β†’ full two-phase fanout ---- - - @Test - void unparseableNameFullFanout() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); - final AtomicInteger hostedCount = new AtomicInteger(0); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(HOSTED, staticSlice(hostedCount, RsStatus.NOT_FOUND)); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.OK)); - final GroupSlice slice = buildGroup( - idx, - List.of(HOSTED, PROXY), - Set.of(PROXY), - slices - ); - // "/" is unparseable for maven-group (no artifact name) - final Response resp = slice.response( - new RequestLine("GET", "/"), Headers.EMPTY, Content.EMPTY - ).join(); - // Unparseable bypasses the index entirely. - assertTrue(idx.locateByNameCalls.isEmpty(), - "locateByName MUST NOT be called for unparseable URL"); - assertEquals(RsStatus.OK, resp.status(), - "Unparseable URL β†’ full fanout; proxy serves 200"); - assertEquals(1, hostedCount.get(), - "Hosted must be queried in full fanout (phase 1)"); - assertEquals(1, proxyCount.get(), - "Proxy must be queried in full fanout (phase 2)"); - } - - // ---- No 503 from group resolution: all-404 returns 404, never 503 ---- - - @Test - void indexMissAll404ReturnsPlain404() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.NOT_FOUND)); - final GroupSlice slice = buildGroup( - idx, - List.of(PROXY), - Set.of(PROXY), - slices - ); - final Response resp = slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(RsStatus.NOT_FOUND, resp.status(), - "Index miss + all proxy 404 β†’ 404 (never 503)"); - assertNotEquals(RsStatus.SERVICE_UNAVAILABLE, resp.status(), - "Group resolution must NEVER return 503"); - } - - // ---- Index miss + all proxy 5xx β†’ 502 (we ARE proxying) ---- - - @Test - void indexMissAll5xxReturns502() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.INTERNAL_ERROR)); - final GroupSlice slice = buildGroup( - idx, - List.of(PROXY), - Set.of(PROXY), - slices - ); - final Response resp = slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(RsStatus.BAD_GATEWAY, resp.status(), - "Index miss + proxy 5xx β†’ 502 (we are proxying a bad gateway)"); - } - - // ---- Negative cache: second fanout for confirmed-miss is suppressed ---- - - @Test - void negativeCachePreventsSecondFanout() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // confirmed miss - final AtomicInteger proxyCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(PROXY, staticSlice(proxyCount, RsStatus.NOT_FOUND)); - final GroupSlice slice = buildGroup( - idx, - List.of(PROXY), - Set.of(PROXY), - slices - ); - // First request: fanout β†’ 404 β†’ cache populated - slice.response(new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY).join(); - // Second request: should hit negative cache, not fanout again - slice.response(new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY).join(); - assertEquals(1, proxyCount.get(), - "Proxy should only be queried once β€” second request hits negative cache"); - } - - // ---- Request coalescing: N concurrent misses trigger only ONE fanout ---- - - /** - * When N concurrent requests arrive for the same missing artifact, the - * proxy member must be queried exactly once β€” the first request does the - * fanout, subsequent requests park on an in-flight "gate" future and, on - * wake-up, hit the freshly-populated negative cache instead of repeating - * the fanout. This collapses a thundering herd of N concurrent 404s into - * a single upstream request. - */ - @Test - void concurrentMissesCoalesceIntoSingleFanout() throws Exception { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // confirmed miss - final AtomicInteger proxyCount = new AtomicInteger(0); - // Slow proxy: block for 100ms so N concurrent requests all arrive - // BEFORE the first fanout completes, forcing the coalescer to kick in. - final java.util.concurrent.ExecutorService delay = Executors.newSingleThreadExecutor(); - final Map slices = new HashMap<>(); - slices.put(PROXY, (line, headers, body) -> { - proxyCount.incrementAndGet(); - return CompletableFuture.supplyAsync(() -> { - try { - Thread.sleep(100); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - return ResponseBuilder.notFound().build(); - }, delay); - }); - final GroupSlice slice = buildGroup( - idx, - List.of(PROXY), - Set.of(PROXY), - slices - ); - final int concurrency = 10; - final java.util.concurrent.ExecutorService pool = - Executors.newFixedThreadPool(concurrency); - final java.util.concurrent.CountDownLatch startGate = - new java.util.concurrent.CountDownLatch(1); - final List> all = new java.util.ArrayList<>(); - for (int i = 0; i < concurrency; i++) { - all.add(CompletableFuture.supplyAsync(() -> { - try { - startGate.await(); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - return slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - }, pool)); - } - // Release all requests simultaneously - startGate.countDown(); - CompletableFuture.allOf(all.toArray(new CompletableFuture[0])).join(); - pool.shutdown(); - delay.shutdown(); - - // Every follower must see a 404 response - for (final CompletableFuture fut : all) { - assertEquals(RsStatus.NOT_FOUND, fut.join().status(), - "Every coalesced follower must receive 404"); - } - // CRITICAL: the thundering herd must collapse to ONE upstream query. - // The first request fanouts; followers park on the gate, then re-enter - // proxyOnlyFanout and short-circuit via the negative cache. - assertEquals(1, proxyCount.get(), - "N concurrent misses must trigger exactly ONE upstream fanout " - + "(request coalescing + negative cache eliminate the thundering herd)"); - } - - // ---- Stack-safety regression: coalescer is safe at high N ---- - - /** - * Stack-safety regression guard for the coalescer. - * - * Before commit 7c30f01f the coalescer used .thenCompose on the gate. - * When the leader completed the gate synchronously, followers whose - * callbacks were already queued ran on the SAME stack, each retrying - * proxyOnlyFanout which re-hit the still-in-map gate and recursed, - * blowing the stack at ~400 frames. - * - * The fix (thenComposeAsync) dispatches retries to the common pool, - * keeping the stack flat regardless of follower count. - * - * This test locks in that guarantee at N=1000 β€” well beyond the - * observed production reproducer (~15 concurrent graphql-codegen - * requests) and far above the point where synchronous recursion - * would overflow. - */ - @Test - void coalescingIsStackSafeAtHighConcurrency() throws Exception { - final int N = 1000; - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of())); // confirmed miss - final AtomicInteger proxyCount = new AtomicInteger(0); - final java.util.concurrent.CountDownLatch startLatch = - new java.util.concurrent.CountDownLatch(1); - - // Slow proxy to keep the gate open while followers pile up - final Map slices = new HashMap<>(); - slices.put(PROXY, (line, headers, body) -> { - proxyCount.incrementAndGet(); - return CompletableFuture.supplyAsync(() -> { - try { - Thread.sleep(100); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - return ResponseBuilder.notFound().build(); - }); - }); - - final GroupSlice slice = buildGroup( - idx, - List.of(PROXY), - Set.of(PROXY), - slices - ); - - // Launch N concurrent requests - final java.util.concurrent.ExecutorService pool = - Executors.newFixedThreadPool(Math.min(N, 64)); - final List> futures = new java.util.ArrayList<>(N); - try { - for (int i = 0; i < N; i++) { - futures.add(CompletableFuture.supplyAsync(() -> { - try { - startLatch.await(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException(e); - } - return slice.response( - new RequestLine("GET", JAR_PATH), - Headers.EMPTY, - Content.EMPTY - ).join(); - }, pool)); - } - startLatch.countDown(); - // Wait for all β€” if ANY threw StackOverflowError, CompletableFuture.join rethrows it - for (CompletableFuture f : futures) { - final Response r = f.get(30, java.util.concurrent.TimeUnit.SECONDS); - assertEquals(RsStatus.NOT_FOUND, r.status(), - "All " + N + " concurrent requests must receive 404"); - } - } finally { - pool.shutdownNow(); - } - - // Coalescing invariant at scale: exactly one proxy query despite N followers - assertEquals(1, proxyCount.get(), - "At N=" + N + " concurrent missers, proxy must be queried exactly once β€” " - + "the coalescer + negative cache must collapse the herd. Actual: " - + proxyCount.get()); - } - - // ---- Internal routing header suppresses EcsLoggingSlice access logs ---- - - /** - * Verify that GroupSlice sets the {@code X-Pantera-Internal} header on member requests. - * EcsLoggingSlice checks this header and skips access log emission, eliminating ~105K - * noise entries per 30 min from internal group-to-member fanout in production. - */ - @Test - void memberDispatchCarriesInternalRoutingHeader() { - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); - final AtomicBoolean headerSeen = new AtomicBoolean(false); - final Map slices = new HashMap<>(); - // Member slice records whether the internal routing header was present - slices.put(HOSTED, (line, headers, body) -> { - if (!headers.find(EcsLoggingSlice.INTERNAL_ROUTING_HEADER).isEmpty()) { - headerSeen.set(true); - } - return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); - }); - final GroupSlice slice = buildGroup( - idx, - List.of(HOSTED), - Collections.emptySet(), - slices - ); - slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - assertTrue(headerSeen.get(), - "GroupSlice must set X-Pantera-Internal header on member dispatch " - + "so EcsLoggingSlice skips access log emission for internal queries"); - } - - /** - * Verify that external (client-facing) requests do NOT carry the internal routing header β€” - * EcsLoggingSlice at the top of the stack must still emit access logs for real client traffic. - */ - @Test - void externalRequestLacksInternalRoutingHeader() { - final AtomicBoolean headerSeen = new AtomicBoolean(false); - // Simulate EcsLoggingSlice's check on an external request - final Headers externalHeaders = Headers.EMPTY; - headerSeen.set(!externalHeaders.find(EcsLoggingSlice.INTERNAL_ROUTING_HEADER).isEmpty()); - assertFalse(headerSeen.get(), - "External (client-facing) requests must NOT carry X-Pantera-Internal " - + "β€” EcsLoggingSlice must still emit access logs for real client traffic"); - } - - @AfterEach - void clearMdc() { - MDC.clear(); - } - - // ---- MDC propagation: thenCompose callback sees caller's MDC context ---- - - /** - * Verify that MDC values set on the calling thread are visible inside the - * {@code thenCompose} callback that runs after the index query completes β€” - * even when the index future completes on a different thread. - * - *

        This guards against regression of the MDC propagation fix: without - * {@code MdcPropagation.withMdc()}, the DB executor thread would execute - * the callback with an empty MDC, causing trace.id/user.name/client.ip to - * be missing from any logs emitted inside that callback. - */ - @Test - void mdcIsVisibleInsideThenComposeCallbackAcrossThreadBoundary() throws Exception { - // Set MDC on the calling (test) thread - MDC.put("trace.id", "test-trace-abc123"); - MDC.put("user.name", "test-user"); - - // Use a separate thread pool to simulate the DB executor completing - // the index future on a different thread (the typical production scenario). - final java.util.concurrent.ExecutorService indexThread = - Executors.newSingleThreadExecutor(); - - // Capture the MDC values seen inside the thenCompose callback - final AtomicReference traceIdInCallback = new AtomicReference<>("NOT_SET"); - final AtomicReference userNameInCallback = new AtomicReference<>("NOT_SET"); - - // Build an index that completes asynchronously on a different thread, - // simulating the DB executor. The member slice records the MDC values - // from inside the thenCompose callback (via the member request). - final Map slices = new HashMap<>(); - slices.put(HOSTED, (line, headers, body) -> { - // This lambda runs inside the thenCompose callback chain β€” - // MDC must be set here for the fix to be working - traceIdInCallback.set(MDC.get("trace.id")); - userNameInCallback.set(MDC.get("user.name")); - return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); - }); - - final ArtifactIndex asyncIndex = new ArtifactIndex() { - @Override - public CompletableFuture>> locateByName( - final String artifactName - ) { - // Complete the future on a different thread (DB executor simulation) - return CompletableFuture.supplyAsync( - () -> Optional.of(List.of(HOSTED)), - indexThread - ); - } - - @Override - public CompletableFuture> locate(final String path) { - throw new AssertionError("locate() must not be called"); - } - - @Override - public CompletableFuture index(final ArtifactDocument doc) { - return CompletableFuture.completedFuture(null); - } - - @Override - public CompletableFuture remove(final String repo, final String path) { - return CompletableFuture.completedFuture(null); - } - - @Override - public CompletableFuture search( - final String query, final int maxResults, final int offset - ) { - return CompletableFuture.completedFuture(SearchResult.EMPTY); - } - - @Override - public void close() { - indexThread.shutdownNow(); - } - }; - - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - MAVEN_GROUP, - List.of(HOSTED), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(asyncIndex), - Collections.emptySet(), - MAVEN_GROUP - ); - - slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).get(); - - indexThread.shutdown(); - - assertEquals( - "test-trace-abc123", - traceIdInCallback.get(), - "trace.id must be propagated into thenCompose callback running on the " - + "index executor thread (MDC propagation fix)" - ); - assertEquals( - "test-user", - userNameInCallback.get(), - "user.name must be propagated into thenCompose callback running on the " - + "index executor thread (MDC propagation fix)" - ); - } - - // ---- MDC propagation: whenComplete callback sees caller's MDC context ---- - - /** - * Verify that MDC values set on the calling thread are visible inside the - * {@code whenComplete} callback (used for metrics recording) β€” which may run - * on the member response thread rather than the original request thread. - */ - @Test - void mdcIsVisibleInsideWhenCompleteCallbackAcrossThreadBoundary() throws Exception { - MDC.put("trace.id", "metrics-trace-xyz"); - - final java.util.concurrent.ExecutorService memberThread = - Executors.newSingleThreadExecutor(); - - // Index returns a hit immediately; the member slice completes on a - // different executor thread, so whenComplete runs on that thread. - final AtomicReference traceIdInWhenComplete = new AtomicReference<>("NOT_SET"); - - final Map slices = new HashMap<>(); - slices.put(HOSTED, (line, headers, body) -> - CompletableFuture.supplyAsync( - () -> ResponseBuilder.ok().build(), - memberThread - ) - ); - - // Subclass GroupSlice is not possible (final), so we verify the MDC - // propagation indirectly: the whenComplete callback calls recordMetrics - // which itself is a no-op in tests (no Micrometer). Instead, we - // verify the MDC is still intact on the calling thread after join() - // (the withMdcBiConsumer pattern restores prior MDC, so the calling - // thread's MDC is unchanged β€” this confirms the wrapper ran correctly). - final RecordingIndex idx = new RecordingIndex(Optional.of(List.of(HOSTED))); - final GroupSlice slice = buildGroup( - idx, List.of(HOSTED), Collections.emptySet(), slices - ); - - // Run response() on a dedicated thread that has its own MDC snapshot - final java.util.concurrent.ExecutorService callerThread = - Executors.newSingleThreadExecutor(); - callerThread.submit(() -> { - MDC.put("trace.id", "caller-trace-999"); - slice.response( - new RequestLine("GET", JAR_PATH), Headers.EMPTY, Content.EMPTY - ).join(); - traceIdInWhenComplete.set(MDC.get("trace.id")); - }).get(); - - memberThread.shutdown(); - callerThread.shutdown(); - - // After the full async chain completes, the caller thread's MDC must - // be intact (withMdcBiConsumer restores prior MDC after the callback). - assertEquals( - "caller-trace-999", - traceIdInWhenComplete.get(), - "Caller thread MDC must be restored after whenComplete callback " - + "(withMdcBiConsumer saves/restores prior MDC on the executing thread)" - ); - } - - // ---- Helpers ---- - - private static GroupSlice buildGroup( - final ArtifactIndex idx, - final List members, - final Set proxyMembers, - final Map slices - ) { - return new GroupSlice( - new MapResolver(slices), - MAVEN_GROUP, - members, - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - proxyMembers, - MAVEN_GROUP - ); - } - - private static Slice staticSlice(final AtomicInteger counter, final RsStatus status) { - return (line, headers, body) -> { - counter.incrementAndGet(); - return CompletableFuture.completedFuture( - ResponseBuilder.from(status).build() - ); - }; - } - - /** - * ArtifactIndex stub that records {@code locateByName} calls and returns - * a fixed Optional result. Empty Optional models a DB error; present - * Optional with empty list models a confirmed miss. - */ - private static final class RecordingIndex implements ArtifactIndex { - final List locateByNameCalls = new CopyOnWriteArrayList<>(); - private final Optional> result; - - RecordingIndex(final Optional> result) { - this.result = result; - } - - @Override - public CompletableFuture index(final ArtifactDocument doc) { - return CompletableFuture.completedFuture(null); - } - - @Override - public CompletableFuture remove( - final String repoName, final String artifactPath - ) { - return CompletableFuture.completedFuture(null); - } - - @Override - public CompletableFuture search( - final String query, final int maxResults, final int offset - ) { - return CompletableFuture.completedFuture(SearchResult.EMPTY); - } - - @Override - public CompletableFuture> locate(final String artifactPath) { - throw new AssertionError( - "locate() must NEVER be called in the flattened resolution flow" - ); - } - - @Override - public CompletableFuture>> locateByName( - final String artifactName - ) { - this.locateByNameCalls.add(artifactName); - return CompletableFuture.completedFuture(this.result); - } - - @Override - public void close() { - // nop - } - } - - private static final class MapResolver implements SliceResolver { - private final Map slices; - - MapResolver(final Map slices) { - this.slices = slices; - } - - @Override - public Slice slice(final Key name, final int port, final int depth) { - final Slice s = this.slices.get(name.string()); - return s != null ? s - : (line, headers, body) -> - CompletableFuture.completedFuture( - ResponseBuilder.notFound().build() - ); - } - } -} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceIndexRoutingTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceIndexRoutingTest.java deleted file mode 100644 index a34b29c78..000000000 --- a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceIndexRoutingTest.java +++ /dev/null @@ -1,515 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.group; - -import com.auto1.pantera.asto.Content; -import com.auto1.pantera.asto.Key; -import com.auto1.pantera.http.Headers; -import com.auto1.pantera.http.Response; -import com.auto1.pantera.http.ResponseBuilder; -import com.auto1.pantera.http.Slice; -import com.auto1.pantera.http.rq.RequestLine; -import com.auto1.pantera.index.ArtifactDocument; -import com.auto1.pantera.index.ArtifactIndex; -import com.auto1.pantera.index.SearchResult; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; - -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.atomic.AtomicInteger; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -/** - * Verifies that GroupSlice routes group requests through {@code locateByName()} - * for ALL supported adapter types β€” never through the legacy {@code locate()} path. - * - *

        Four invariants are tested: - *

          - *
        1. Artifact URLs for every known adapter β†’ {@code locateByName()} called, {@code locate()} never called
        2. - *
        3. Index hit (index returns a repo) β†’ only the matched member receives the request
        4. - *
        5. Index miss (index returns empty) β†’ all members receive the request (fanout)
        6. - *
        7. Metadata/unparseable URLs β†’ direct fanout with NO index call at all
        8. - *
        - * - *

        Part 6A β€” log-level coverage note: GroupSlice uses {@code EcsLogger.debug()} for - * individual member 404s and {@code EcsLogger.warn()} for aggregate 404s (all members missing) - * and server errors. These log levels cannot be captured directly because {@code EcsLogger} does - * not expose a test listener/appender API and does not bridge to a standard SLF4J/Log4j2 - * backend in the test classpath. Coverage is therefore behavioral: the tests below already - * verify 200 vs 404 response codes for the one-member-succeeds and all-members-404 scenarios, - * which is the observable side-effect of those code paths executing. Log level correctness was - * verified by code inspection of GroupSlice lines 777 (DEBUG, member 404) and 861–889 (WARN, - * aggregate 404 / server error). - * - * @since 1.21.0 - */ -@SuppressWarnings({"PMD.TooManyMethods", "PMD.AvoidDuplicateLiterals"}) -final class GroupSliceIndexRoutingTest { - - // ---- locateByName() called (not locate()) for all adapter types ---- - - @ParameterizedTest - @CsvSource({ - "maven-group, /com/google/guava/guava/31.1/guava-31.1.jar, com.google.guava.guava", - "npm-group, /lodash/-/lodash-4.17.21.tgz, lodash", - "docker-group, /v2/library/nginx/manifests/latest, library/nginx", - "pypi-group, /simple/numpy/, numpy", - "go-group, /github.com/gin-gonic/gin/@v/v1.9.1.info, github.com/gin-gonic/gin", - "gem-group, /gems/rails-7.1.2.gem, rails", - "php-group, /p2/monolog/monolog.json, monolog/monolog", - "helm-group, /charts/nginx-1.2.3.tgz, nginx", - "debian-group, /pool/main/n/nginx/nginx_1.18.0_amd64.deb, nginx_amd64", - "hex-group, /api/packages/phoenix, phoenix", - "file-group, /reports/2024/q1.pdf, reports.2024.q1.pdf", - }) - void locateByNameCalledNotLocate(final String repoType, final String url, - final String expectedName) { - final RecordingIndex idx = new RecordingIndex(List.of()); - final GroupSlice slice = sliceWithIndex(repoType, idx, List.of("member-a", "member-b")); - slice.response(new RequestLine("GET", url), Headers.EMPTY, Content.EMPTY).join(); - assertEquals( - List.of(expectedName.strip()), - idx.locateByNameCalls, - "locateByName() must be called with parsed name for: " + url - ); - assertTrue(idx.locateCalls.isEmpty(), - "locate() must NOT be called for: " + url); - } - - // ---- Index hit β†’ only the matching member receives the request ---- - - @Test - void indexHitQueriesOnlyMatchingMember() { - final String target = "maven-proxy"; - final RecordingIndex idx = new RecordingIndex(List.of(target)); - final AtomicInteger proxyCount = new AtomicInteger(0); - final AtomicInteger localCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(target, countingSlice(proxyCount)); - slices.put("maven-local", countingSlice(localCount)); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - "maven-group", - List.of("maven-local", target), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.of(target), - "maven-group" - ); - slice.response( - new RequestLine("GET", "/com/google/guava/guava/31.1/guava-31.1.jar"), - Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(1, proxyCount.get(), - "Only the matched member should receive the request"); - assertEquals(0, localCount.get(), - "Unmatched member must NOT receive any request"); - assertTrue(idx.locateByNameCalls.contains("com.google.guava.guava"), - "locateByName() called with parsed name"); - assertTrue(idx.locateCalls.isEmpty(), "locate() must never be called"); - } - - // ---- Nested group: index hit on leaf resolves to the correct direct member ---- - - @Test - void nestedGroupIndexHitResolvesLeafToDirectMember() { - // libs-release topology (after GroupMemberFlattener): - // libs-release-local (leaf, direct) - // jboss (leaf, flattened from remote-repos nested group) - // maven-central (leaf, flattened from remote-repos nested group) - // Index says: com.google.guava.guava is in jboss. - // Expected: only jboss is queried (it is now a direct flat member). - final String jboss = "jboss"; - final RecordingIndex idx = new RecordingIndex(List.of(jboss)); - final AtomicInteger jbossCount = new AtomicInteger(0); - final AtomicInteger localCount = new AtomicInteger(0); - final AtomicInteger mavenCentralCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(jboss, countingSlice(jbossCount)); - slices.put("libs-release-local", countingSlice(localCount)); - slices.put("maven-central", countingSlice(mavenCentralCount)); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - "libs-release", - List.of("libs-release-local", jboss, "maven-central"), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.of(jboss, "maven-central"), - "maven-group" - ); - slice.response( - new RequestLine("GET", - "/com/google/guava/guava/19.0.0.jbossorg-1/guava-19.0.0.jbossorg-1.jar"), - Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(1, jbossCount.get(), - "jboss (flat member) must be queried directly on index hit"); - assertEquals(0, localCount.get(), - "libs-release-local must NOT be queried when index routes to jboss"); - assertEquals(0, mavenCentralCount.get(), - "maven-central must NOT be queried when index routes to jboss"); - assertTrue(idx.locateByNameCalls.contains("com.google.guava.guava"), - "locateByName() called with parsed Maven name"); - assertTrue(idx.locateCalls.isEmpty(), "locate() must never be called"); - } - - // ---- Index hit for each new adapter type ---- - - @ParameterizedTest - @CsvSource({ - "helm-group, /charts/nginx-1.2.3.tgz, nginx, helm-proxy", - "debian-group, /pool/main/n/nginx/nginx_1.18.0_amd64.deb, nginx_amd64, debian-proxy", - "hex-group, /api/packages/phoenix, phoenix, hex-proxy", - "file-group, /reports/2024/q1.pdf, reports.2024.q1.pdf, file-hosted", - }) - void newAdapterTypesIndexHitQueriesOnlyMatchingMember(final String repoType, - final String url, final String expectedName, final String targetMember) { - final RecordingIndex idx = new RecordingIndex(List.of(targetMember)); - final AtomicInteger targetCount = new AtomicInteger(0); - final AtomicInteger otherCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put(targetMember, countingSlice(targetCount)); - slices.put("other-member", countingSlice(otherCount)); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - repoType, - List.of(targetMember, "other-member"), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.of(targetMember), - repoType - ); - slice.response(new RequestLine("GET", url), Headers.EMPTY, Content.EMPTY).join(); - assertEquals( - List.of(expectedName.strip()), - idx.locateByNameCalls, - "locateByName() must be called with parsed name: " + expectedName.strip() - ); - assertTrue(idx.locateCalls.isEmpty(), "locate() must never be called for " + repoType); - assertEquals(1, targetCount.get(), "Target member must receive request on index hit"); - assertEquals(0, otherCount.get(), "Other member must NOT receive request on index hit"); - } - - // ---- Index miss β†’ only proxy members queried (not all members) ---- - - @Test - void indexMissQueriesOnlyProxyMembers() { - final RecordingIndex idx = new RecordingIndex(List.of()); // empty β†’ miss - final AtomicInteger proxyCount = new AtomicInteger(0); - final AtomicInteger hostedCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put("maven-proxy", countingSlice(proxyCount)); - slices.put("maven-hosted", countingSlice(hostedCount)); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - "maven-group", - List.of("maven-hosted", "maven-proxy"), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.of("maven-proxy"), // only maven-proxy is a proxy member - "maven-group" - ); - slice.response( - new RequestLine("GET", "/com/example/unknown/1.0/unknown-1.0.jar"), - Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(1, proxyCount.get(), - "Proxy member must receive request on index miss"); - assertEquals(0, hostedCount.get(), - "Hosted member must NOT receive request on index miss (fully indexed)"); - assertFalse(idx.locateByNameCalls.isEmpty(), "locateByName() still called on index miss"); - assertTrue(idx.locateCalls.isEmpty(), "locate() must never be called"); - } - - @Test - void indexMissWithAllHostedMembersReturns404Immediately() { - final RecordingIndex idx = new RecordingIndex(List.of()); // empty β†’ miss - final AtomicInteger hostedACount = new AtomicInteger(0); - final AtomicInteger hostedBCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put("hosted-a", countingSlice(hostedACount)); - slices.put("hosted-b", countingSlice(hostedBCount)); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - "maven-group", - List.of("hosted-a", "hosted-b"), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Collections.emptySet(), // no proxy members - "maven-group" - ); - final Response resp = slice.response( - new RequestLine("GET", "/com/example/unknown/1.0/unknown-1.0.jar"), - Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(404, resp.status().code(), - "Must return 404 immediately when no proxy members exist on index miss"); - assertEquals(0, hostedACount.get(), - "hosted-a must NOT receive any request (all-hosted group, index miss)"); - assertEquals(0, hostedBCount.get(), - "hosted-b must NOT receive any request (all-hosted group, index miss)"); - assertFalse(idx.locateByNameCalls.isEmpty(), "locateByName() still called on index miss"); - assertTrue(idx.locateCalls.isEmpty(), "locate() must never be called"); - } - - @Test - void indexMissTriggersFanoutWhenAllMembersAreProxy() { - final RecordingIndex idx = new RecordingIndex(List.of()); // empty β†’ miss - final AtomicInteger memberACount = new AtomicInteger(0); - final AtomicInteger memberBCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put("member-a", countingSlice(memberACount)); - slices.put("member-b", countingSlice(memberBCount)); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - "maven-group", - List.of("member-a", "member-b"), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.of("member-a", "member-b"), // all are proxy members - "maven-group" - ); - slice.response( - new RequestLine("GET", "/com/example/unknown/1.0/unknown-1.0.jar"), - Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals(1, memberACount.get(), - "member-a must receive request on index miss (all-proxy group)"); - assertEquals(1, memberBCount.get(), - "member-b must receive request on index miss (all-proxy group)"); - assertFalse(idx.locateByNameCalls.isEmpty(), "locateByName() still called on index miss"); - assertTrue(idx.locateCalls.isEmpty(), "locate() must never be called"); - } - - // ---- Metadata/unparseable URLs β†’ full two-phase fanout, no index call ---- - - @ParameterizedTest - @CsvSource({ - "helm-group, /index.yaml", - "debian-group, /dists/stable/Release", - "debian-group, /dists/stable/InRelease", - "debian-group, /dists/stable/main/binary-amd64/Packages", - "hex-group, /names", - }) - void metadataUrlSkipsIndexAndFansOut(final String repoType, final String url) { - // member-a = proxy, member-b = hosted (not in proxy set) - // In the new two-phase fanout, hosted (member-b) is queried first. - // Since it returns 200, the proxy (member-a) is not cascaded to. - final RecordingIndex idx = new RecordingIndex(List.of("member-a")); - final AtomicInteger memberACount = new AtomicInteger(0); - final AtomicInteger memberBCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put("member-a", countingSlice(memberACount)); - slices.put("member-b", countingSlice(memberBCount)); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - repoType, - List.of("member-a", "member-b"), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.of("member-a"), - repoType - ); - slice.response(new RequestLine("GET", url), Headers.EMPTY, Content.EMPTY).join(); - assertTrue(idx.locateByNameCalls.isEmpty(), - "locateByName() must NOT be called for metadata URL: " + url); - assertTrue(idx.locateCalls.isEmpty(), - "locate() must NOT be called for metadata URL: " + url); - assertEquals(1, memberBCount.get(), - "hosted member-b must be queried first in two-phase fanout: " + url); - assertEquals(0, memberACount.get(), - "proxy member-a must NOT be queried since hosted already served 200: " - + url); - } - - /** - * Confirms that when hosted misses on a metadata URL, the proxy leaf IS - * queried (phase-2 cascade). - */ - @ParameterizedTest - @CsvSource({ - "helm-group, /index.yaml", - "hex-group, /names", - }) - void metadataUrlCascadesToProxyWhenHostedMisses( - final String repoType, final String url - ) { - final RecordingIndex idx = new RecordingIndex(List.of("member-a")); - final AtomicInteger proxyCount = new AtomicInteger(0); - final AtomicInteger hostedCount = new AtomicInteger(0); - final Map slices = new HashMap<>(); - slices.put("proxy-a", countingSlice(proxyCount)); - slices.put("hosted-b", (line, headers, body) -> { - hostedCount.incrementAndGet(); - return CompletableFuture.completedFuture( - ResponseBuilder.notFound().build() - ); - }); - final GroupSlice slice = new GroupSlice( - new MapResolver(slices), - repoType, - List.of("proxy-a", "hosted-b"), - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.of("proxy-a"), - repoType - ); - slice.response(new RequestLine("GET", url), Headers.EMPTY, Content.EMPTY).join(); - assertEquals(1, hostedCount.get(), - "hosted-b queried in phase 1"); - assertEquals(1, proxyCount.get(), - "proxy-a queried in phase 2 after hosted 404"); - } - - // ---- locate() is never called for any known adapter type ---- - - @Test - void locateIsNeverCalledForAnyKnownAdapterType() { - final String[][] cases = { - {"maven-group", "/com/google/guava/guava/31.1/guava-31.1.jar"}, - {"npm-group", "/lodash/-/lodash-4.17.21.tgz"}, - {"docker-group", "/v2/library/nginx/manifests/latest"}, - {"pypi-group", "/simple/numpy/"}, - {"go-group", "/github.com/gin-gonic/gin/@v/v1.9.1.info"}, - {"gem-group", "/gems/rails-7.1.2.gem"}, - {"php-group", "/p2/monolog/monolog.json"}, - {"helm-group", "/charts/nginx-1.2.3.tgz"}, - {"debian-group", "/pool/main/n/nginx/nginx_1.18.0_amd64.deb"}, - {"hex-group", "/api/packages/phoenix"}, - {"file-group", "/reports/2024/q1.pdf"}, - }; - for (final String[] tc : cases) { - final RecordingIndex idx = new RecordingIndex(List.of()); - final GroupSlice slice = sliceWithIndex(tc[0], idx, List.of("member-a", "member-b")); - slice.response(new RequestLine("GET", tc[1]), Headers.EMPTY, Content.EMPTY).join(); - assertFalse(idx.locateByNameCalls.isEmpty(), - "locateByName() must be called for " + tc[0] + " " + tc[1]); - assertTrue(idx.locateCalls.isEmpty(), - "locate() must NEVER be called for " + tc[0] + " " + tc[1]); - } - } - - // ---- Helpers ---- - - private static GroupSlice sliceWithIndex( - final String repoType, - final ArtifactIndex idx, - final List members - ) { - final Map slices = new HashMap<>(); - for (final String m : members) { - slices.put(m, (line, headers, body) -> - CompletableFuture.completedFuture(ResponseBuilder.ok().build())); - } - return new GroupSlice( - new MapResolver(slices), - repoType, - members, - 8080, 0, 0, - Collections.emptyList(), - Optional.of(idx), - Set.copyOf(members), - repoType - ); - } - - private static Slice countingSlice(final AtomicInteger counter) { - return (line, headers, body) -> { - counter.incrementAndGet(); - return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); - }; - } - - /** - * ArtifactIndex that records calls to {@code locate()} and {@code locateByName()}. - * Returns a configurable list of repository names for both methods. - */ - private static final class RecordingIndex implements ArtifactIndex { - final List locateByNameCalls = new CopyOnWriteArrayList<>(); - final List locateCalls = new CopyOnWriteArrayList<>(); - private final List repos; - - RecordingIndex(final List repos) { - this.repos = repos; - } - - @Override - public CompletableFuture index(final ArtifactDocument doc) { - return CompletableFuture.completedFuture(null); - } - - @Override - public CompletableFuture remove( - final String repoName, final String artifactPath - ) { - return CompletableFuture.completedFuture(null); - } - - @Override - public CompletableFuture search( - final String query, final int maxResults, final int offset - ) { - return CompletableFuture.completedFuture(SearchResult.EMPTY); - } - - @Override - public CompletableFuture> locate(final String artifactPath) { - this.locateCalls.add(artifactPath); - return CompletableFuture.completedFuture(this.repos); - } - - @Override - public CompletableFuture>> locateByName(final String artifactName) { - this.locateByNameCalls.add(artifactName); - return CompletableFuture.completedFuture(Optional.of(this.repos)); - } - - @Override - public void close() { - // nop - } - } - - private static final class MapResolver implements SliceResolver { - private final Map slices; - - MapResolver(final Map slices) { - this.slices = slices; - } - - @Override - public Slice slice(final Key name, final int port, final int depth) { - final Slice s = this.slices.get(name.string()); - return s != null ? s - : (line, headers, body) -> - CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); - } - } -} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSlicePerformanceTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/GroupSlicePerformanceTest.java deleted file mode 100644 index 2cc84725d..000000000 --- a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSlicePerformanceTest.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.group; - -import com.auto1.pantera.asto.Content; -import com.auto1.pantera.asto.Key; -import com.auto1.pantera.http.Headers; -import com.auto1.pantera.http.Response; -import com.auto1.pantera.http.ResponseBuilder; -import com.auto1.pantera.http.RsStatus; -import com.auto1.pantera.http.Slice; -import com.auto1.pantera.http.rq.RequestLine; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -import static org.junit.jupiter.api.Assertions.*; - -/** - * Performance and stress tests for high-performance GroupSlice. - */ -public final class GroupSlicePerformanceTest { - - @Test - @Timeout(10) - void handles250ConcurrentRequests() throws InterruptedException { - final ExecutorService executor = Executors.newFixedThreadPool(50); - try { - final Map map = new HashMap<>(); - map.put("repo1", new FastSlice(5)); - map.put("repo2", new FastSlice(10)); - map.put("repo3", new FastSlice(15)); - - final GroupSlice slice = new GroupSlice( - new MapResolver(map), - "perf-group", - List.of("repo1", "repo2", "repo3"), - 8080 - ); - - final List> futures = IntStream.range(0, 250) - .mapToObj(i -> CompletableFuture.supplyAsync( - () -> slice.response( - new RequestLine("GET", "/pkg-" + i), - Headers.EMPTY, - Content.EMPTY - ).join(), - executor - ).thenAccept(resp -> assertEquals(RsStatus.OK, resp.status()))) - .collect(Collectors.toList()); - - CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); - } finally { - executor.shutdownNow(); - executor.awaitTermination(5, TimeUnit.SECONDS); - } - } - - @Test - void parallelExecutionFasterThanSequential() { - final Map map = new HashMap<>(); - // 3 repos, each takes 50ms - map.put("repo1", new DelayedSlice(50, RsStatus.NOT_FOUND)); - map.put("repo2", new DelayedSlice(50, RsStatus.NOT_FOUND)); - map.put("repo3", new DelayedSlice(50, RsStatus.OK)); - - final GroupSlice slice = new GroupSlice( - new MapResolver(map), - "parallel-group", - List.of("repo1", "repo2", "repo3"), - 8080 - ); - - final long start = System.currentTimeMillis(); - final Response resp = slice.response( - new RequestLine("GET", "/pkg"), - Headers.EMPTY, - Content.EMPTY - ).join(); - final long elapsed = System.currentTimeMillis() - start; - - assertEquals(RsStatus.OK, resp.status()); - // Should complete in ~50ms (parallel), not 150ms (sequential) - assertTrue(elapsed < 100, "Expected <100ms parallel execution, got " + elapsed + "ms"); - } - - @Test - void allResponseBodiesConsumed() { - final AtomicInteger callCount = new AtomicInteger(0); - - final Map map = new HashMap<>(); - for (int i = 1; i <= 5; i++) { - final int repoNum = i; - map.put("repo" + i, (line, headers, body) -> { - callCount.incrementAndGet(); - return CompletableFuture.completedFuture( - repoNum == 1 - ? ResponseBuilder.ok().textBody("success").build() - : ResponseBuilder.notFound().build() - ); - }); - } - - final GroupSlice slice = new GroupSlice( - new MapResolver(map), - "tracking-group", - List.of("repo1", "repo2", "repo3", "repo4", "repo5"), - 8080 - ); - - final Response resp = slice.response( - new RequestLine("GET", "/pkg"), - Headers.EMPTY, - Content.EMPTY - ).join(); - - assertEquals(RsStatus.OK, resp.status(), "Expected OK from first member"); - assertEquals(5, callCount.get(), "Expected all 5 members to be queried in parallel"); - } - - @Test - void circuitBreakerOpensAfterFailures() { - final AtomicInteger failureCount = new AtomicInteger(0); - final Map map = new HashMap<>(); - map.put("failing", (line, headers, body) -> { - failureCount.incrementAndGet(); - return CompletableFuture.failedFuture(new RuntimeException("boom")); - }); - map.put("working", new FastSlice(5)); - - final GroupSlice slice = new GroupSlice( - new MapResolver(map), - "circuit-group", - List.of("failing", "working"), - 8080 - ); - - // Make 10 requests - for (int i = 0; i < 10; i++) { - slice.response( - new RequestLine("GET", "/pkg-" + i), - Headers.EMPTY, - Content.EMPTY - ).join(); - } - - // Circuit breaker should open after 5 failures - assertTrue( - failureCount.get() < 10, - "Circuit breaker should prevent some requests, got " + failureCount.get() + " failures" - ); - } - - @Test - void deduplicatesMembers() { - final AtomicInteger callCount = new AtomicInteger(0); - final Map map = new HashMap<>(); - map.put("repo", (line, headers, body) -> { - callCount.incrementAndGet(); - return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); - }); - - // Same repo listed 3 times - final GroupSlice slice = new GroupSlice( - new MapResolver(map), - "dedup-group", - List.of("repo", "repo", "repo"), - 8080 - ); - - slice.response( - new RequestLine("GET", "/pkg"), - Headers.EMPTY, - Content.EMPTY - ).join(); - - assertEquals(1, callCount.get(), "Expected repo to be queried only once after deduplication"); - } - - @Test - @Timeout(5) - void timeoutPreventsHangingRequests() { - final Map map = new HashMap<>(); - map.put("hanging", (line, headers, body) -> new CompletableFuture<>()); // Never completes - map.put("working", new FastSlice(5)); - - final GroupSlice slice = new GroupSlice( - new MapResolver(map), - "timeout-group", - List.of("hanging", "working"), - 8080, - 0, - 2 // 2 second timeout - ); - - final Response resp = slice.response( - new RequestLine("GET", "/pkg"), - Headers.EMPTY, - Content.EMPTY - ).join(); - - assertEquals(RsStatus.OK, resp.status(), "Should return OK from working member despite hanging member"); - } - - // Helper classes - - private static final class MapResolver implements SliceResolver { - private final Map map; - private MapResolver(Map map) { this.map = map; } - @Override - public Slice slice(Key name, int port, int depth) { - return map.get(name.string()); - } - } - - private static final class FastSlice implements Slice { - private final long delayMs; - private FastSlice(long delayMs) { this.delayMs = delayMs; } - @Override - public CompletableFuture response(RequestLine line, Headers headers, Content body) { - return CompletableFuture.supplyAsync(() -> { - try { Thread.sleep(delayMs); } catch (InterruptedException e) {} - return ResponseBuilder.ok().textBody("fast").build(); - }); - } - } - - private static final class DelayedSlice implements Slice { - private final long delayMs; - private final RsStatus status; - private DelayedSlice(long delayMs, RsStatus status) { - this.delayMs = delayMs; - this.status = status; - } - @Override - public CompletableFuture response(RequestLine line, Headers headers, Content body) { - return CompletableFuture.supplyAsync(() -> { - try { Thread.sleep(delayMs); } catch (InterruptedException e) {} - return ResponseBuilder.from(status).build(); - }); - } - } -} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceTest.java deleted file mode 100644 index 095cae3f6..000000000 --- a/pantera-main/src/test/java/com/auto1/pantera/group/GroupSliceTest.java +++ /dev/null @@ -1,306 +0,0 @@ -/* - * Copyright (c) 2025-2026 Auto1 Group - * Maintainers: Auto1 DevOps Team - * Lead Maintainer: Ayd Asraf - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License v3.0. - * - * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. - */ -package com.auto1.pantera.group; - -import com.auto1.pantera.asto.Content; -import com.auto1.pantera.asto.Key; -import com.auto1.pantera.http.Headers; -import com.auto1.pantera.http.Response; -import com.auto1.pantera.http.ResponseBuilder; -import com.auto1.pantera.http.RsStatus; -import com.auto1.pantera.http.Slice; -import com.auto1.pantera.http.rq.RequestLine; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Supplier; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public final class GroupSliceTest { - - @Test - void returnsFirstNon404() { - final Map map = new HashMap<>(); - map.put("local", new StaticSlice(RsStatus.NOT_FOUND)); - map.put("proxy", new StaticSlice(RsStatus.OK)); - final GroupSlice slice = new GroupSlice(new MapResolver(map), "group", List.of("local", "proxy"), 8080); - final Response rsp = slice.response(new RequestLine("GET", "/pkg.json"), Headers.EMPTY, Content.EMPTY).join(); - assertEquals(RsStatus.OK, rsp.status()); - } - - @Test - void returns404WhenAllNotFound() { - final Map map = new HashMap<>(); - map.put("local", new StaticSlice(RsStatus.NOT_FOUND)); - map.put("proxy", new StaticSlice(RsStatus.NOT_FOUND)); - final GroupSlice slice = new GroupSlice(new MapResolver(map), "group", List.of("local", "proxy"), 8080); - final Response rsp = slice.response(new RequestLine("GET", "/a/b"), Headers.EMPTY, Content.EMPTY).join(); - assertEquals(RsStatus.NOT_FOUND, rsp.status()); - } - - @Test - void methodNotAllowedForUploads() { - final Map map = new HashMap<>(); - map.put("local", new StaticSlice(RsStatus.OK)); - final GroupSlice slice = new GroupSlice(new MapResolver(map), "group", List.of("local"), 8080); - final Response rsp = slice.response(new RequestLine("POST", "/upload"), Headers.EMPTY, Content.EMPTY).join(); - assertEquals(RsStatus.METHOD_NOT_ALLOWED, rsp.status()); - } - - @Test - void rewritesPathWithMemberPrefix() { - final AtomicReference seen = new AtomicReference<>(); - final Slice recording = (line, headers, body) -> { - seen.set(line); - return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); - }; - final Map map = new HashMap<>(); - map.put("npm-local", recording); - final GroupSlice slice = new GroupSlice(new MapResolver(map), "group", List.of("npm-local"), 8080); - slice.response(new RequestLine("GET", "/@scope/pkg/-/pkg-1.0.tgz?x=1"), Headers.EMPTY, Content.EMPTY).join(); - assertTrue(seen.get().uri().getPath().startsWith("/npm-local/@scope/pkg/-/pkg-1.0.tgz")); - assertEquals("x=1", seen.get().uri().getQuery()); - } - - @Test - void returnsNotModifiedWhenMemberReturnsNotModified() { - // Test that 304 NOT_MODIFIED is treated as success, not failure - // This is critical for NPM proxy caching with If-None-Match/If-Modified-Since headers - final Map map = new HashMap<>(); - map.put("local", new StaticSlice(RsStatus.NOT_FOUND)); - map.put("proxy", new StaticSlice(RsStatus.NOT_MODIFIED)); - final GroupSlice slice = new GroupSlice(new MapResolver(map), "group", List.of("local", "proxy"), 8080); - final Response rsp = slice.response(new RequestLine("GET", "/pkg.json"), Headers.EMPTY, Content.EMPTY).join(); - assertEquals(RsStatus.NOT_MODIFIED, rsp.status(), "304 NOT_MODIFIED should be returned to client"); - } - - @Test - void returnsNotModifiedFromFirstMemberThatReturnsIt() { - // Test that first NOT_MODIFIED wins in parallel race - final Map map = new HashMap<>(); - map.put("local", new StaticSlice(RsStatus.NOT_FOUND)); - map.put("proxy1", new StaticSlice(RsStatus.NOT_MODIFIED)); - map.put("proxy2", new StaticSlice(RsStatus.OK)); - final GroupSlice slice = new GroupSlice(new MapResolver(map), "group", List.of("local", "proxy1", "proxy2"), 8080); - final Response rsp = slice.response(new RequestLine("GET", "/pkg.json"), Headers.EMPTY, Content.EMPTY).join(); - // Either NOT_MODIFIED or OK is acceptable (parallel race), but NOT 404 - assertTrue( - rsp.status() == RsStatus.NOT_MODIFIED || rsp.status() == RsStatus.OK, - "Should return NOT_MODIFIED or OK, not 404" - ); - } - - @Test - void handlesHundredParallelRequestsWithMixedResults() throws InterruptedException { - final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(8); - final ExecutorService executor = Executors.newFixedThreadPool(12); - try { - final Map map = new HashMap<>(); - map.put( - "unstable", - new ScheduledSlice(scheduler, 5, () -> { - throw new IllegalStateException("boom"); - }) - ); - map.put( - "not-found", - new ScheduledSlice(scheduler, 15, () -> ResponseBuilder.notFound().build()) - ); - map.put( - "fast", - new ScheduledSlice(scheduler, 10, () -> ResponseBuilder.ok().textBody("fast").build()) - ); - final GroupSlice slice = new GroupSlice( - new MapResolver(map), - "group", - Arrays.asList("unstable", "not-found", "fast"), - 8080 - ); - final List> pending = IntStream.range(0, 100) - .mapToObj(index -> CompletableFuture.supplyAsync( - () -> slice.response( - new RequestLine("GET", "/pkg-" + index), - Headers.EMPTY, - Content.EMPTY - ).join(), - executor - ).thenAccept(resp -> assertEquals(RsStatus.OK, resp.status()))) - .collect(Collectors.toList()); - CompletableFuture.allOf(pending.toArray(new CompletableFuture[0])).join(); - } finally { - executor.shutdownNow(); - scheduler.shutdownNow(); - scheduler.awaitTermination(5, TimeUnit.SECONDS); - executor.awaitTermination(5, TimeUnit.SECONDS); - } - } - - /** - * Test that Go module paths work correctly through the group. - * The Go module path like /github.com/google/uuid/@v/v1.6.0.info - * should be rewritten to /go_proxy/github.com/google/uuid/@v/v1.6.0.info - * for the member, and TrimPathSlice should strip the /go_proxy prefix. - */ - @Test - void goModulePathRewritingWorks() { - final AtomicReference seen = new AtomicReference<>(); - // This simulates what TrimPathSlice does - it receives /member/path and strips to /path - final Slice trimmed = (line, headers, body) -> { - final String path = line.uri().getPath(); - if (path.startsWith("/go_proxy/")) { - final String stripped = path.substring("/go_proxy".length()); - seen.set(new RequestLine(line.method().value(), stripped, line.version())); - return CompletableFuture.completedFuture(ResponseBuilder.ok().build()); - } - // Unexpected path - return 500 for debugging - seen.set(line); - return CompletableFuture.completedFuture( - ResponseBuilder.internalError().textBody("Unexpected path: " + path).build() - ); - }; - final Map map = new HashMap<>(); - map.put("go_proxy", trimmed); - final GroupSlice slice = new GroupSlice(new MapResolver(map), "go_group", List.of("go_proxy"), 8080); - // Simulate what goes into the group after the group's own TrimPathSlice stripped /go_group - final Response rsp = slice.response( - new RequestLine("GET", "/github.com/google/uuid/@v/v1.6.0.info"), - Headers.EMPTY, - Content.EMPTY - ).join(); - assertEquals(RsStatus.OK, rsp.status()); - assertNotNull(seen.get()); - assertEquals("/github.com/google/uuid/@v/v1.6.0.info", seen.get().uri().getPath(), - "After TrimPathSlice simulation, path should be the Go module path without member prefix"); - } - - @Test - void cancelledLosingMembersDoNotTripCircuitBreaker() throws InterruptedException { - // Regression test for: future.cancel(true) on losing members called handleMemberFailure - // which called member.recordFailure(), tripping circuits on healthy proxy members. - // - // Setup: "fast" wins every round; "slow" (200ms delay) gets cancelled before it responds. - // After threshold (3) cancellations the old code would open slow's circuit. - // Phase 2 switches fast to 404 β€” if slow's circuit is open it gets skipped β†’ 503. - // With the fix, slow must still be queried and return 200. - final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(4); - try { - final AtomicBoolean fastWins = new AtomicBoolean(true); - final Slice fast = (line, headers, body) -> - fastWins.get() - ? CompletableFuture.completedFuture(ResponseBuilder.ok().build()) - : CompletableFuture.completedFuture(ResponseBuilder.notFound().build()); - final Slice slow = (line, headers, body) -> { - final CompletableFuture fut = new CompletableFuture<>(); - scheduler.schedule( - () -> fut.complete(ResponseBuilder.ok().build()), - 200, TimeUnit.MILLISECONDS - ); - return fut; - }; - final Map map = new HashMap<>(); - map.put("fast", fast); - map.put("slow", slow); - final GroupSlice slice = new GroupSlice( - new MapResolver(map), "group", List.of("fast", "slow"), 8080 - ); - // Phase 1: fast wins 5 times β€” slow gets cancelled each time (threshold = 3) - for (int i = 0; i < 5; i++) { - assertEquals( - RsStatus.OK, - slice.response(new RequestLine("GET", "/pkg.json"), Headers.EMPTY, Content.EMPTY) - .join().status() - ); - } - // Phase 2: fast returns 404; only slow has the artifact - fastWins.set(false); - final Response rsp = slice.response( - new RequestLine("GET", "/pkg.json"), Headers.EMPTY, Content.EMPTY - ).join(); - assertEquals( - RsStatus.OK, rsp.status(), - "Slow member circuit must be ONLINE after cancellations β€” circuit must not trip on cancel" - ); - } finally { - scheduler.shutdownNow(); - scheduler.awaitTermination(1, TimeUnit.SECONDS); - } - } - - private static final class MapResolver implements SliceResolver { - private final Map map; - private MapResolver(Map map) { this.map = map; } - @Override - public Slice slice(Key name, int port, int depth) { - return map.get(name.string()); - } - } - - private static final class StaticSlice implements Slice { - private final RsStatus status; - private StaticSlice(RsStatus status) { this.status = status; } - @Override - public CompletableFuture response(RequestLine line, Headers headers, Content body) { - return ResponseBuilder.from(status).completedFuture(); - } - } - - private static final class ScheduledSlice implements Slice { - private final ScheduledExecutorService scheduler; - private final long delayMillis; - private final Supplier supplier; - - private ScheduledSlice( - final ScheduledExecutorService scheduler, - final long delayMillis, - final Supplier supplier - ) { - this.scheduler = scheduler; - this.delayMillis = delayMillis; - this.supplier = supplier; - } - - @Override - public CompletableFuture response( - final RequestLine line, - final Headers headers, - final Content body - ) { - final CompletableFuture future = new CompletableFuture<>(); - this.scheduler.schedule( - () -> { - try { - future.complete(this.supplier.get()); - } catch (final RuntimeException err) { - future.completeExceptionally(err); - } - }, - this.delayMillis, - TimeUnit.MILLISECONDS - ); - return future; - } - } -} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/RepoBulkheadIsolationTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/RepoBulkheadIsolationTest.java new file mode 100644 index 000000000..34c0ef959 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/group/RepoBulkheadIsolationTest.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.group; + +import com.auto1.pantera.http.fault.Fault; +import com.auto1.pantera.http.fault.Result; +import com.auto1.pantera.http.resilience.BulkheadLimits; +import com.auto1.pantera.http.resilience.RepoBulkhead; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.TimeUnit; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; + +/** + * Isolation test for per-repo bulkheads (WI-09). + * + *

        Saturate repo A's bulkhead; verify repo B's {@link RepoBulkhead#run(java.util.function.Supplier)} + * still succeeds immediately. This proves that per-repo blast radius works: + * one misbehaving repository does not starve another. + */ +@Timeout(value = 30, unit = TimeUnit.SECONDS) +final class RepoBulkheadIsolationTest { + + @Test + void saturatedRepoADoesNotBlockRepoB() throws Exception { + final int maxConcurrent = 5; + final BulkheadLimits limits = new BulkheadLimits( + maxConcurrent, 100, Duration.ofSeconds(1) + ); + final RepoBulkhead bulkheadA = new RepoBulkhead( + "repo-A", limits, ForkJoinPool.commonPool() + ); + final RepoBulkhead bulkheadB = new RepoBulkhead( + "repo-B", limits, ForkJoinPool.commonPool() + ); + + // Saturate repo A: hold all permits with uncompleted futures + final List>> blockersA = new ArrayList<>(); + for (int i = 0; i < maxConcurrent; i++) { + final CompletableFuture> blocker = new CompletableFuture<>(); + bulkheadA.run(() -> blocker); + blockersA.add(blocker); + } + assertEquals(maxConcurrent, bulkheadA.activeCount(), + "Repo A must be fully saturated"); + + // Repo A is now full - next request to A must be rejected + final Result rejectedA = bulkheadA.run( + () -> CompletableFuture.completedFuture(Result.ok("should-not-reach")) + ).toCompletableFuture().get(5, TimeUnit.SECONDS); + assertInstanceOf(Result.Err.class, rejectedA, + "Repo A must reject when saturated"); + final Fault faultA = ((Result.Err) rejectedA).fault(); + assertInstanceOf(Fault.Overload.class, faultA, + "Rejection must be Overload"); + assertEquals("repo-A", ((Fault.Overload) faultA).resource(), + "Overload must name repo-A"); + + // Repo B must still succeed immediately + assertEquals(0, bulkheadB.activeCount(), + "Repo B must have zero active requests"); + final Result okB = bulkheadB.run( + () -> CompletableFuture.completedFuture(Result.ok("repo-B-ok")) + ).toCompletableFuture().get(5, TimeUnit.SECONDS); + assertInstanceOf(Result.Ok.class, okB, + "Repo B must succeed while repo A is saturated"); + assertEquals("repo-B-ok", ((Result.Ok) okB).value(), + "Repo B must return the expected value"); + + // Clean up repo A blockers + for (final CompletableFuture> b : blockersA) { + b.complete(Result.ok("done")); + } + } + + @Test + void independentDrainExecutors() { + final BulkheadLimits limits = BulkheadLimits.defaults(); + final RepoBulkhead bulkheadA = new RepoBulkhead( + "repo-A", limits, ForkJoinPool.commonPool() + ); + final RepoBulkhead bulkheadB = new RepoBulkhead( + "repo-B", limits, ForkJoinPool.commonPool() + ); + + // Each bulkhead must have its own drain executor instance + assert bulkheadA.drainExecutor() != bulkheadB.drainExecutor() + : "Each repo must have an independent drain executor"; + + // Each starts with zero drops + assertEquals(0, bulkheadA.drainDropCount(), + "Repo A drain drops must start at zero"); + assertEquals(0, bulkheadB.drainDropCount(), + "Repo B drain drops must start at zero"); + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/group/merge/StreamingMetadataMergerTest.java b/pantera-main/src/test/java/com/auto1/pantera/group/merge/StreamingMetadataMergerTest.java new file mode 100644 index 000000000..a92163c68 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/group/merge/StreamingMetadataMergerTest.java @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.group.merge; + +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; + +/** + * Tests for {@link StreamingMetadataMerger}. + */ +final class StreamingMetadataMergerTest { + + @Test + void mergesDisjointVersionSets() { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + merger.mergeMember(stream( + "" + + "com.examplelib" + + "" + + "1.01.1" + + "" + )); + merger.mergeMember(stream( + "" + + "com.examplelib" + + "" + + "2.02.1" + + "" + )); + final String out = merger.toXmlString(); + MatcherAssert.assertThat(out, Matchers.containsString("1.0")); + MatcherAssert.assertThat(out, Matchers.containsString("1.1")); + MatcherAssert.assertThat(out, Matchers.containsString("2.0")); + MatcherAssert.assertThat(out, Matchers.containsString("2.1")); + MatcherAssert.assertThat(merger.membersMerged(), Matchers.equalTo(2)); + MatcherAssert.assertThat(merger.membersSkipped(), Matchers.equalTo(0)); + } + + @Test + void deduplicatesOverlappingVersions() { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + merger.mergeMember(stream( + "" + + "" + + "1.01.12.0" + + "" + )); + merger.mergeMember(stream( + "" + + "" + + "1.12.02.1" + + "" + )); + final String out = merger.toXmlString(); + MatcherAssert.assertThat( + "Each version listed exactly once", + countOccurrences(out, "1.1"), + Matchers.equalTo(1) + ); + MatcherAssert.assertThat( + countOccurrences(out, "2.0"), + Matchers.equalTo(1) + ); + MatcherAssert.assertThat( + countOccurrences(out, "1.0"), + Matchers.equalTo(1) + ); + MatcherAssert.assertThat( + countOccurrences(out, "2.1"), + Matchers.equalTo(1) + ); + } + + @Test + void lastUpdatedKeepsMaxAcrossMembers() { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + merger.mergeMember(stream( + "" + + "" + + "1.0" + + "20200101120000" + + "" + )); + merger.mergeMember(stream( + "" + + "" + + "1.0" + + "20240601090000" + + "" + )); + merger.mergeMember(stream( + "" + + "" + + "1.0" + + "20210101000000" + + "" + )); + final String out = merger.toXmlString(); + MatcherAssert.assertThat( + "Newest lastUpdated wins", + out, + Matchers.containsString("20240601090000") + ); + MatcherAssert.assertThat( + "Older lastUpdated values are dropped", + out, + Matchers.not(Matchers.containsString("20200101120000")) + ); + } + + @Test + void snapshotKeepsNewestTimestamp() { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + merger.mergeMember(stream( + "" + + "" + + "20240101.1200003" + + "" + )); + merger.mergeMember(stream( + "" + + "" + + "20240315.1405309" + + "" + )); + merger.mergeMember(stream( + "" + + "" + + "20240210.0800005" + + "" + )); + final String out = merger.toXmlString(); + MatcherAssert.assertThat( + "Newest snapshot timestamp wins", + out, + Matchers.containsString("20240315.140530") + ); + MatcherAssert.assertThat( + "And its buildNumber is preserved", + out, + Matchers.containsString("9") + ); + } + + @Test + void malformedMemberSkippedRemainingMerged() { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + merger.mergeMember(stream( + "" + + "1.0" + + "" + )); + // Truncated / malformed body + merger.mergeMember(stream("" + + "2.0" + + "" + )); + final String out = merger.toXmlString(); + MatcherAssert.assertThat(out, Matchers.containsString("1.0")); + MatcherAssert.assertThat(out, Matchers.containsString("2.0")); + MatcherAssert.assertThat( + "Two members merged (the malformed one was skipped)", + merger.membersMerged(), + Matchers.equalTo(2) + ); + MatcherAssert.assertThat( + "The malformed member was counted as skipped", + merger.membersSkipped(), + Matchers.equalTo(1) + ); + } + + @Test + void allMembersEmptyEmitsMinimalMetadata() { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + // No mergeMember calls at all + final String out = merger.toXmlString(); + MatcherAssert.assertThat( + "Output is well-formed XML", + out, + Matchers.startsWith(" root", + out, + Matchers.containsString(" close", + out, + Matchers.containsString("") + ); + MatcherAssert.assertThat( + "No version lines emitted", + out, + Matchers.not(Matchers.containsString("")) + ); + } + + @Test + void latestAndReleasePickedAsMaxAcrossMembers() { + final StreamingMetadataMerger merger = new StreamingMetadataMerger(); + merger.mergeMember(stream( + "" + + "" + + "1.51.5" + + "1.5" + + "" + )); + merger.mergeMember(stream( + "" + + "" + + "2.32.3" + + "2.3" + + "" + )); + final String out = merger.toXmlString(); + MatcherAssert.assertThat(out, Matchers.containsString("2.3")); + MatcherAssert.assertThat(out, Matchers.containsString("2.3")); + } + + private static ByteArrayInputStream stream(final String xml) { + return new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)); + } + + private static int countOccurrences(final String haystack, final String needle) { + int count = 0; + int idx = 0; + while ((idx = haystack.indexOf(needle, idx)) != -1) { + count++; + idx += needle.length(); + } + return count; + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/http/DockerRoutingSliceTest.java b/pantera-main/src/test/java/com/auto1/pantera/http/DockerRoutingSliceTest.java index 01fec5b55..d81fd4adb 100644 --- a/pantera-main/src/test/java/com/auto1/pantera/http/DockerRoutingSliceTest.java +++ b/pantera-main/src/test/java/com/auto1/pantera/http/DockerRoutingSliceTest.java @@ -10,7 +10,7 @@ import com.auto1.pantera.api.ssl.KeyStore; import com.auto1.pantera.asto.Content; import com.auto1.pantera.asto.Storage; -import com.auto1.pantera.cooldown.CooldownSettings; +import com.auto1.pantera.cooldown.config.CooldownSettings; import com.auto1.pantera.http.auth.Authentication; import com.auto1.pantera.http.headers.Authorization; import com.auto1.pantera.http.hm.AssertSlice; diff --git a/pantera-main/src/test/java/com/auto1/pantera/http/cache/NegativeCacheSingleSourceTest.java b/pantera-main/src/test/java/com/auto1/pantera/http/cache/NegativeCacheSingleSourceTest.java new file mode 100644 index 000000000..6d5f59572 --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/http/cache/NegativeCacheSingleSourceTest.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.cache; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Ensures that {@code new NegativeCache(} appears in at most 1 production + * (non-test) Java file: the single wiring site in {@code RepositorySlices}. + * The unified cache bean should be injected everywhere else. + * + * @since 2.2.0 + */ +final class NegativeCacheSingleSourceTest { + + @Test + void noAdapterCreatesOwnNegativeCache() throws IOException { + // Walk all .java files under the project root, excluding test directories. + // Allowed production sites: + // 1. RepositorySlices.java β€” the single wiring site + // 2. NegativeCacheRegistry.java β€” fallback for early startup + // 3. BaseCachedProxySlice.java β€” fallback for tests without shared cache + // No adapter (npm, pypi, maven, etc.) should create its own instance. + final Path root = Paths.get(System.getProperty("user.dir")).getParent(); + long adapterCount; + try (Stream files = Files.walk(root)) { + adapterCount = files + .filter(p -> p.toString().endsWith(".java")) + .filter(p -> p.toString().contains("/src/main/")) + .filter(p -> !p.toString().contains("/src/test/")) + // Exclude known allowed sites + .filter(p -> !p.toString().contains("RepositorySlices.java")) + .filter(p -> !p.toString().contains("NegativeCacheRegistry.java")) + .filter(p -> !p.toString().contains("BaseCachedProxySlice.java")) + .filter(p -> !p.toString().contains("NegativeCache.java")) + .filter(p -> { + try { + return Files.readString(p).contains("new NegativeCache("); + } catch (IOException e) { + return false; + } + }) + .count(); + } + assertEquals( + 0, adapterCount, + "No adapter should create its own NegativeCache β€” " + + "use the shared instance from NegativeCacheRegistry" + ); + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/http/context/HandlerExecutorTest.java b/pantera-main/src/test/java/com/auto1/pantera/http/context/HandlerExecutorTest.java new file mode 100644 index 000000000..ffd5b390a --- /dev/null +++ b/pantera-main/src/test/java/com/auto1/pantera/http/context/HandlerExecutorTest.java @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2025-2026 Auto1 Group + * Maintainers: Auto1 DevOps Team + * Lead Maintainer: Ayd Asraf + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License v3.0. + * + * Originally based on Artipie (https://github.com/artipie/artipie), MIT License. + */ +package com.auto1.pantera.http.context; + +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import org.apache.logging.log4j.ThreadContext; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Tests for {@link HandlerExecutor} β€” verifies that the shared handler + * worker pool propagates the caller's Log4j2 {@link ThreadContext} to + * worker threads, isolates worker ThreadContext between tasks, uses daemon + * threads with a descriptive name prefix, and enforces its bounded queue. + * + * @since 2.2.0 + */ +final class HandlerExecutorTest { + + @BeforeEach + void setUp() { + ThreadContext.clearMap(); + } + + @AfterEach + void tearDown() { + ThreadContext.clearMap(); + } + + @Test + @DisplayName("Submitted task sees the caller's ThreadContext keys") + void submittedTasksSeeCallerThreadContext() throws Exception { + ThreadContext.put("trace.id", "test-trace-123"); + ThreadContext.put("user.name", "test-admin"); + final AtomicReference seenTrace = new AtomicReference<>(); + final AtomicReference seenUser = new AtomicReference<>(); + CompletableFuture.runAsync(() -> { + seenTrace.set(ThreadContext.get("trace.id")); + seenUser.set(ThreadContext.get("user.name")); + }, HandlerExecutor.get()).get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "trace.id visible on worker", + seenTrace.get(), Matchers.is("test-trace-123") + ); + MatcherAssert.assertThat( + "user.name visible on worker", + seenUser.get(), Matchers.is("test-admin") + ); + } + + @Test + @DisplayName("Worker ThreadContext is isolated: a new caller with empty context does not see a prior caller's keys") + void callerThreadContextIsolatedFromWorkerThread() throws Exception { + // Submit a task with caller context. + ThreadContext.put("trace.id", "caller-only"); + CompletableFuture.runAsync(() -> { + MatcherAssert.assertThat( + "caller context visible inside the task", + ThreadContext.get("trace.id"), Matchers.is("caller-only") + ); + }, HandlerExecutor.get()).get(5, TimeUnit.SECONDS); + // Now clear the caller's ThreadContext (simulating a different request + // on the event loop) and submit a new task. The worker must NOT see + // the previous caller's "trace.id" β€” the contextual executor captures + // the NEW (empty) caller context, not the worker's prior state. + ThreadContext.clearMap(); + final AtomicReference leakedTrace = new AtomicReference<>(); + CompletableFuture.runAsync( + () -> leakedTrace.set(ThreadContext.get("trace.id")), + HandlerExecutor.get() + ).get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "previous caller's ThreadContext does not leak to new caller's task", + leakedTrace.get(), Matchers.nullValue() + ); + } + + @Test + @DisplayName("Pool threads are daemon threads") + void poolThreadsAreDaemon() throws Exception { + final AtomicBoolean daemon = new AtomicBoolean(false); + CompletableFuture.runAsync( + () -> daemon.set(Thread.currentThread().isDaemon()), + HandlerExecutor.get() + ).get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "handler pool thread is daemon", + daemon.get(), Matchers.is(true) + ); + } + + @Test + @DisplayName("Pool threads have a descriptive name starting with 'pantera-handler-'") + void poolHasDescriptiveThreadName() throws Exception { + final AtomicReference name = new AtomicReference<>(); + CompletableFuture.runAsync( + () -> name.set(Thread.currentThread().getName()), + HandlerExecutor.get() + ).get(5, TimeUnit.SECONDS); + MatcherAssert.assertThat( + "thread name starts with pantera-handler-", + name.get(), Matchers.startsWith("pantera-handler-") + ); + } + + @Test + @DisplayName("Pool rejects tasks when queue is saturated (AbortPolicy)") + void poolRejectsOnQueueSaturation() throws Exception { + // Fill up the pool + queue by submitting tasks that block. + final int poolSize = HandlerExecutor.poolSize(); + final int queueCapacity = HandlerExecutor.queueCapacity(); + final CountDownLatch holdLatch = new CountDownLatch(1); + final CountDownLatch startedLatch = new CountDownLatch(poolSize); + // Submit poolSize tasks that block forever (fill all worker threads). + for (int i = 0; i < poolSize; i++) { + HandlerExecutor.get().execute(() -> { + startedLatch.countDown(); + try { + holdLatch.await(30, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + }); + } + // Wait until all worker threads are busy. + final boolean allStarted = startedLatch.await(10, TimeUnit.SECONDS); + Assertions.assertTrue(allStarted, "All pool threads should start"); + // Fill the queue. + for (int i = 0; i < queueCapacity; i++) { + HandlerExecutor.get().execute(() -> { + try { + holdLatch.await(30, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + }); + } + // The next submit should be rejected. + try { + HandlerExecutor.get().execute(() -> { }); + Assertions.fail("Expected RejectedExecutionException"); + } catch (final RejectedExecutionException expected) { + // AbortPolicy fires β€” this is the expected behaviour. + } finally { + // Release all blocked tasks. + holdLatch.countDown(); + } + } +} diff --git a/pantera-main/src/test/java/com/auto1/pantera/test/TestSettings.java b/pantera-main/src/test/java/com/auto1/pantera/test/TestSettings.java index 0e6b3fb56..32cbac026 100644 --- a/pantera-main/src/test/java/com/auto1/pantera/test/TestSettings.java +++ b/pantera-main/src/test/java/com/auto1/pantera/test/TestSettings.java @@ -18,7 +18,7 @@ import com.auto1.pantera.asto.Storage; import com.auto1.pantera.asto.memory.InMemoryStorage; import com.auto1.pantera.auth.AuthFromEnv; -import com.auto1.pantera.cooldown.CooldownSettings; +import com.auto1.pantera.cooldown.config.CooldownSettings; import com.auto1.pantera.http.auth.Authentication; import com.auto1.pantera.scheduling.MetadataEventQueues; import com.auto1.pantera.security.policy.Policy; diff --git a/pantera-storage/pantera-storage-core/pom.xml b/pantera-storage/pantera-storage-core/pom.xml index 16038eefc..f2b31c6df 100644 --- a/pantera-storage/pantera-storage-core/pom.xml +++ b/pantera-storage/pantera-storage-core/pom.xml @@ -26,7 +26,7 @@ SOFTWARE. pantera-storage com.auto1.pantera - 2.1.3 + 2.2.0 4.0.0 pantera-storage-core diff --git a/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/SubStorage.java b/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/SubStorage.java index a4cac2b7d..60ecac344 100644 --- a/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/SubStorage.java +++ b/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/SubStorage.java @@ -43,6 +43,14 @@ public final class SubStorage implements Storage { */ private final String id; + /** + * Pre-compiled pattern used by {@link #list(Key)} and + * {@link #list(Key, String)} to strip the configured prefix from + * returned keys. Hoisted out of the hot path to avoid re-compiling + * on every call. + */ + private final Pattern listPattern; + /** * Sub storage with prefix. * @param prefix Prefix key @@ -54,6 +62,9 @@ public SubStorage(final Key prefix, final Storage origin) { this.id = String.format( "SubStorage: prefix=%s, origin=%s", this.prefix, this.origin.identifier() ); + this.listPattern = Pattern.compile( + "^" + Pattern.quote(this.prefix.string()) + "/" + ); } @Override @@ -63,25 +74,23 @@ public CompletableFuture exists(final Key key) { @Override public CompletableFuture> list(final Key filter) { - final Pattern ptn = Pattern.compile(String.format("^%s/", this.prefix.string())); return this.origin.list(new PrefixedKed(this.prefix, filter)).thenApply( keys -> keys.stream() - .map(key -> new Key.From(ptn.matcher(key.string()).replaceFirst(""))) + .map(key -> new Key.From(this.listPattern.matcher(key.string()).replaceFirst(""))) .collect(Collectors.toList()) ); } @Override public CompletableFuture list(final Key root, final String delimiter) { - final Pattern ptn = Pattern.compile(String.format("^%s/", this.prefix.string())); return this.origin .list(new PrefixedKed(this.prefix, root), delimiter) .thenApply(result -> { final Collection files = result.files().stream() - .map(key -> new Key.From(ptn.matcher(key.string()).replaceFirst(""))) + .map(key -> new Key.From(this.listPattern.matcher(key.string()).replaceFirst(""))) .collect(Collectors.toList()); final Collection dirs = result.directories().stream() - .map(key -> new Key.From(ptn.matcher(key.string()).replaceFirst(""))) + .map(key -> new Key.From(this.listPattern.matcher(key.string()).replaceFirst(""))) .collect(Collectors.toList()); return new ListResult.Simple(files, dirs); }); diff --git a/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/cache/StreamThroughCache.java b/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/cache/StreamThroughCache.java index b309fe656..fbe843d8a 100644 --- a/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/cache/StreamThroughCache.java +++ b/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/cache/StreamThroughCache.java @@ -141,6 +141,19 @@ private Content teeContent(final Key key, final Content remote) { .eventOutcome("failure") .error(err) .log(); + }) + // Client cancelled mid-stream (e.g., closed connection) β€” mirror doOnError cleanup + // so the temp file/channel don't leak when subscription is cancelled. + .doOnCancel(() -> { + closeQuietly(channel); + deleteTempFileQuietly(tempFile); + EcsLogger.debug("com.auto1.pantera.asto.cache") + .message(String.format("Stream-through: subscription cancelled for key '%s', cleaning up temp file", key.string())) + .eventCategory("database") + .eventAction("stream_through") + .eventOutcome("unknown") + .field("event.reason", "cancel") + .log(); }); return new Content.From(remote.size(), teed); } diff --git a/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/events/EventQueue.java b/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/events/EventQueue.java index 6e2e6a4dd..6815e8857 100644 --- a/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/events/EventQueue.java +++ b/pantera-storage/pantera-storage-core/src/main/java/com/auto1/pantera/asto/events/EventQueue.java @@ -88,7 +88,7 @@ public boolean put(final T item) { .log(); return false; } - this.queue.add(item); + this.queue.add(item); // ok: unbounded ConcurrentLinkedQueue, capacity enforced by AtomicInteger above return true; } diff --git a/pantera-storage/pantera-storage-s3/pom.xml b/pantera-storage/pantera-storage-s3/pom.xml index d7736b027..bffd620ce 100644 --- a/pantera-storage/pantera-storage-s3/pom.xml +++ b/pantera-storage/pantera-storage-s3/pom.xml @@ -26,7 +26,7 @@ SOFTWARE. pantera-storage com.auto1.pantera - 2.1.3 + 2.2.0 4.0.0 pantera-storage-s3 @@ -37,7 +37,7 @@ SOFTWARE. com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile diff --git a/pantera-storage/pantera-storage-s3/src/main/java/com/auto1/pantera/asto/s3/DiskCacheStorage.java b/pantera-storage/pantera-storage-s3/src/main/java/com/auto1/pantera/asto/s3/DiskCacheStorage.java index 71256810f..aa301fa03 100644 --- a/pantera-storage/pantera-storage-s3/src/main/java/com/auto1/pantera/asto/s3/DiskCacheStorage.java +++ b/pantera-storage/pantera-storage-s3/src/main/java/com/auto1/pantera/asto/s3/DiskCacheStorage.java @@ -370,6 +370,22 @@ private CompletableFuture fetchAndPersist(final Key key, final Path fil .error(ex) .log(); } + }) + // Client cancelled mid-write (e.g., closed connection) β€” mirror doOnError + // so the temp file and channel don't leak when subscription is cancelled. + .doOnCancel(() -> { + try { ch.close(); } catch (final IOException ex) { + EcsLogger.debug("com.auto1.pantera.asto.cache") + .message("Failed to close channel on cancel") + .error(ex) + .log(); + } + try { Files.deleteIfExists(tmp); } catch (final IOException ex) { + EcsLogger.debug("com.auto1.pantera.asto.cache") + .message("Failed to delete temp file on cancel") + .error(ex) + .log(); + } }); result.complete(new Content.From(cnt.size(), stream)); } catch (final IOException ioe) { diff --git a/pantera-storage/pantera-storage-vertx-file/pom.xml b/pantera-storage/pantera-storage-vertx-file/pom.xml index c7fa6b43e..723e6908d 100644 --- a/pantera-storage/pantera-storage-vertx-file/pom.xml +++ b/pantera-storage/pantera-storage-vertx-file/pom.xml @@ -26,7 +26,7 @@ SOFTWARE. pantera-storage com.auto1.pantera - 2.1.3 + 2.2.0 4.0.0 pantera-storage-vertx-file @@ -37,7 +37,7 @@ SOFTWARE. com.auto1.pantera pantera-storage-core - 2.1.3 + 2.2.0 compile diff --git a/pantera-storage/pantera-storage-vertx-file/src/main/java/com/auto1/pantera/asto/fs/VertxRxFile.java b/pantera-storage/pantera-storage-vertx-file/src/main/java/com/auto1/pantera/asto/fs/VertxRxFile.java index aa68f0d59..51d72661d 100644 --- a/pantera-storage/pantera-storage-vertx-file/src/main/java/com/auto1/pantera/asto/fs/VertxRxFile.java +++ b/pantera-storage/pantera-storage-vertx-file/src/main/java/com/auto1/pantera/asto/fs/VertxRxFile.java @@ -120,6 +120,13 @@ public Completable save(final Flowable flow) { .flatMapCompletable( asyncFile -> Completable.create( emitter -> flow.map(buf -> Buffer.buffer(new Remaining(buf).bytes())) + // Safety net: if the upstream Flowable errors before toSubscriber() + // sees any data (or between items), the AsyncFile could otherwise + // be left open. Close it explicitly; ignore close failures since + // the primary error is already being signalled to the emitter. + .doOnError(err -> asyncFile.rxClose() + .onErrorComplete() + .subscribe()) .subscribe(asyncFile.toSubscriber() .onWriteStreamEnd(emitter::onComplete) .onWriteStreamError(emitter::onError) diff --git a/pantera-storage/pom.xml b/pantera-storage/pom.xml index e1c23802d..9fd58c630 100644 --- a/pantera-storage/pom.xml +++ b/pantera-storage/pom.xml @@ -6,10 +6,10 @@ com.auto1.pantera pantera - 2.1.3 + 2.2.0 pantera-storage - 2.1.3 + 2.2.0 pom asto A simple Java storage diff --git a/pantera-ui/package.json b/pantera-ui/package.json index f6a2eb08f..ba2c78f60 100644 --- a/pantera-ui/package.json +++ b/pantera-ui/package.json @@ -1,6 +1,6 @@ { "name": "pantera-ui", - "version": "2.1.3", + "version": "2.2.0", "private": true, "type": "module", "scripts": { diff --git a/pantera-ui/src/components/admin/RepoConfigForm.vue b/pantera-ui/src/components/admin/RepoConfigForm.vue index d0e31ca9e..51fd6b939 100644 --- a/pantera-ui/src/components/admin/RepoConfigForm.vue +++ b/pantera-ui/src/components/admin/RepoConfigForm.vue @@ -10,6 +10,11 @@ import Select from 'primevue/select' import Button from 'primevue/button' import Card from 'primevue/card' import Checkbox from 'primevue/checkbox' +import AutoComplete from 'primevue/autocomplete' +import Tag from 'primevue/tag' +import Dialog from 'primevue/dialog' +import { listRepos } from '@/api/repos' +import type { RepoListItem } from '@/types' const props = defineProps<{ /** Current config value (v-model:config) */ @@ -82,6 +87,74 @@ function moveMemberDown(idx: number) { ;[arr[idx], arr[idx + 1]] = [arr[idx + 1], arr[idx]] } +// State for compatible repos dropdown (group member selection) +const compatibleRepos = ref([]) +const filteredRepos = ref([]) + +/** + * Given a group type like "maven-group", return the compatible member types. + * Rule: strip "-group" -> base; compatible = [base, base + "-proxy"] + */ +function compatibleTypes(groupType: string): string[] { + const base = groupType.replace(/-group$/, '') + return [base, `${base}-proxy`] +} + +/** + * Fetch repos compatible with the current group type from the API. + */ +async function fetchCompatibleRepos() { + if (!repoType.value?.endsWith('-group')) return + const types = compatibleTypes(repoType.value) + try { + const resp = await listRepos({ size: 500 }) + const all: RepoListItem[] = resp.items ?? [] + compatibleRepos.value = all.filter(r => types.includes(r.type)) + } catch (e) { + console.error('Failed to fetch compatible repos', e) + compatibleRepos.value = [] + } +} + +/** + * PrimeVue AutoComplete completeMethod β€” filters the pre-fetched list client-side. + */ +function searchRepos(event: { query: string }) { + const q = event.query.toLowerCase() + filteredRepos.value = compatibleRepos.value.filter( + r => !groupMembers.value.includes(r.name) && r.name.toLowerCase().includes(q) + ) +} + +// Create-member modal state +const showCreateMemberDialog = ref(false) +const newMemberType = ref('') +const newMemberName = ref('') +const newMemberCreating = ref(false) + +async function createMemberRepo() { + if (!newMemberName.value || !newMemberType.value) return + newMemberCreating.value = true + try { + const { putRepo } = await import('@/api/repos') + await putRepo(newMemberName.value, { + repo: { + type: newMemberType.value, + storage: { type: 'fs' }, + }, + }) + groupMembers.value.push(newMemberName.value) + await fetchCompatibleRepos() + showCreateMemberDialog.value = false + newMemberName.value = '' + newMemberType.value = '' + } catch (e: unknown) { + console.error('Failed to create member repo', e) + } finally { + newMemberCreating.value = false + } +} + // Cooldown const cooldownEnabled = ref(false) const cooldownDuration = ref('P30D') @@ -132,7 +205,10 @@ async function handleCreateS3Alias() { } } -onMounted(() => { loadStorages() }) +onMounted(() => { + loadStorages() + if (repoType.value?.endsWith('-group')) fetchCompatibleRepos() +}) // Reset derivative proxy/group fields when type changes (only in create mode) watch(repoType, () => { @@ -140,6 +216,7 @@ watch(repoType, () => { remotes.value = [{ url: '', username: '', password: '' }] groupMembers.value = [] } + fetchCompatibleRepos() }) // Reset S3 sub-fields when storage type switches @@ -478,11 +555,25 @@ watch(groupMembers, () => { emitConfig() }, { deep: true }) class="flex items-center gap-2 px-3 py-2 bg-white dark:bg-gray-900" > {{ idx + 1 }}. - + :dropdown="true" + forceSelection + > + +