From 7a718ece44d129ccc71ed56eaacc405e4aaf7e9a Mon Sep 17 00:00:00 2001 From: Eric Anderson Date: Tue, 7 Oct 2025 15:16:40 -0700 Subject: [PATCH] Include causal status details in higher-level statuses When an operation fails and we want to produce a new status at a higher level, we commonly are turning the first status into an exception to attach to the new exception. We should instead prefer to keep as much information in the status description itself, as cause is not as reliable to be logged/propagated. I do expect long-term we'll want to expose an API in grpc-api for this, but for the moment let's keep it internal. In particular, we'd have to figure out its name. I could also believe we might want different formatting, which becomes a clearer discussion when we can see the usages. I'm pretty certain there are some other places that could benefit from this utility, as I remember really wishing I had these functions a month or two ago. But these are the places I could immediately find. OutlierDetectionLoadBalancerConfig had its status code changed from INTERNAL to UNAVAILABLE because the value comes externally, and so isn't a gRPC bug or such. I didn't change the xds policies in the same way because it's murkier as the configuration for those is largely generated within xds itself. --- .../main/java/io/grpc/internal/GrpcUtil.java | 25 +++++++++++++++++++ .../io/grpc/internal/RetriableStream.java | 5 ++-- .../OpenTelemetryTracingModule.java | 11 ++------ .../OutlierDetectionLoadBalancerProvider.java | 8 +++--- .../WeightedTargetLoadBalancerProvider.java | 8 +++--- .../xds/WrrLocalityLoadBalancerProvider.java | 8 +++--- 6 files changed, 44 insertions(+), 21 deletions(-) diff --git a/core/src/main/java/io/grpc/internal/GrpcUtil.java b/core/src/main/java/io/grpc/internal/GrpcUtil.java index a512fff6af2..048812ac1de 100644 --- a/core/src/main/java/io/grpc/internal/GrpcUtil.java +++ b/core/src/main/java/io/grpc/internal/GrpcUtil.java @@ -821,6 +821,31 @@ public static Status replaceInappropriateControlPlaneStatus(Status status) { + status.getDescription()).withCause(status.getCause()) : status; } + /** + * Returns a "clean" representation of a status code and description (not cause) like + * "UNAVAILABLE: The description". Should be similar to Status.formatThrowableMessage(). + */ + public static String statusToPrettyString(Status status) { + if (status.getDescription() == null) { + return status.getCode().toString(); + } else { + return status.getCode() + ": " + status.getDescription(); + } + } + + /** + * Create a status with contextual information, propagating details from a non-null status that + * contributed to the failure. For example, if UNAVAILABLE, "Couldn't load bar", and status + * "FAILED_PRECONDITION: Foo missing" were passed as arguments, then this method would produce the + * status "UNAVAILABLE: Couldn't load bar: FAILED_PRECONDITION: Foo missing" with a cause if the + * passed status had a cause. + */ + public static Status statusWithDetails(Status.Code code, String description, Status causeStatus) { + return code.toStatus() + .withDescription(description + ": " + statusToPrettyString(causeStatus)) + .withCause(causeStatus.getCause()); + } + /** * Checks whether the given item exists in the iterable. This is copied from Guava Collect's * {@code Iterables.contains()} because Guava Collect is not Android-friendly thus core can't diff --git a/core/src/main/java/io/grpc/internal/RetriableStream.java b/core/src/main/java/io/grpc/internal/RetriableStream.java index 9fe8ab4ffff..96816eb6f15 100644 --- a/core/src/main/java/io/grpc/internal/RetriableStream.java +++ b/core/src/main/java/io/grpc/internal/RetriableStream.java @@ -938,9 +938,8 @@ public void run() { && localOnlyTransparentRetries.incrementAndGet() > 1_000) { commitAndRun(substream); if (state.winningSubstream == substream) { - Status tooManyTransparentRetries = Status.INTERNAL - .withDescription("Too many transparent retries. Might be a bug in gRPC") - .withCause(status.asRuntimeException()); + Status tooManyTransparentRetries = GrpcUtil.statusWithDetails( + Status.Code.INTERNAL, "Too many transparent retries. Might be a bug in gRPC", status); safeCloseMasterListener(tooManyTransparentRetries, rpcProgress, trailers); } return; diff --git a/opentelemetry/src/main/java/io/grpc/opentelemetry/OpenTelemetryTracingModule.java b/opentelemetry/src/main/java/io/grpc/opentelemetry/OpenTelemetryTracingModule.java index 8c42a189ac2..4fa4f802dc0 100644 --- a/opentelemetry/src/main/java/io/grpc/opentelemetry/OpenTelemetryTracingModule.java +++ b/opentelemetry/src/main/java/io/grpc/opentelemetry/OpenTelemetryTracingModule.java @@ -36,6 +36,7 @@ import io.grpc.ServerCallHandler; import io.grpc.ServerInterceptor; import io.grpc.ServerStreamTracer; +import io.grpc.internal.GrpcUtil; import io.grpc.opentelemetry.internal.OpenTelemetryConstants; import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.common.AttributesBuilder; @@ -463,19 +464,11 @@ private void recordInboundMessageSize(Span span, int seqNo, long bytes) { span.addEvent("Inbound message", attributesBuilder.build()); } - private String generateErrorStatusDescription(io.grpc.Status status) { - if (status.getDescription() != null) { - return status.getCode() + ": " + status.getDescription(); - } else { - return status.getCode().toString(); - } - } - private void endSpanWithStatus(Span span, io.grpc.Status status) { if (status.isOk()) { span.setStatus(StatusCode.OK); } else { - span.setStatus(StatusCode.ERROR, generateErrorStatusDescription(status)); + span.setStatus(StatusCode.ERROR, GrpcUtil.statusToPrettyString(status)); } span.end(); } diff --git a/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancerProvider.java b/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancerProvider.java index c76d68a03de..084898bc38f 100644 --- a/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancerProvider.java +++ b/util/src/main/java/io/grpc/util/OutlierDetectionLoadBalancerProvider.java @@ -23,6 +23,7 @@ import io.grpc.LoadBalancerProvider; import io.grpc.NameResolver.ConfigOrError; import io.grpc.Status; +import io.grpc.internal.GrpcUtil; import io.grpc.internal.JsonUtil; import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionLoadBalancerConfig; import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionLoadBalancerConfig.FailurePercentageEjection; @@ -148,9 +149,10 @@ private ConfigOrError parseLoadBalancingPolicyConfigInternal(Map rawC ConfigOrError childConfig = GracefulSwitchLoadBalancer.parseLoadBalancingPolicyConfig( JsonUtil.getListOfObjects(rawConfig, "childPolicy")); if (childConfig.getError() != null) { - return ConfigOrError.fromError(Status.INTERNAL - .withDescription("Failed to parse child in outlier_detection_experimental: " + rawConfig) - .withCause(childConfig.getError().asRuntimeException())); + return ConfigOrError.fromError(GrpcUtil.statusWithDetails( + Status.Code.UNAVAILABLE, + "Failed to parse child in outlier_detection_experimental", + childConfig.getError())); } configBuilder.setChildConfig(childConfig.getConfig()); diff --git a/xds/src/main/java/io/grpc/xds/WeightedTargetLoadBalancerProvider.java b/xds/src/main/java/io/grpc/xds/WeightedTargetLoadBalancerProvider.java index 55f33fb11aa..15318693aca 100644 --- a/xds/src/main/java/io/grpc/xds/WeightedTargetLoadBalancerProvider.java +++ b/xds/src/main/java/io/grpc/xds/WeightedTargetLoadBalancerProvider.java @@ -25,6 +25,7 @@ import io.grpc.LoadBalancerRegistry; import io.grpc.NameResolver.ConfigOrError; import io.grpc.Status; +import io.grpc.internal.GrpcUtil; import io.grpc.internal.JsonUtil; import io.grpc.util.GracefulSwitchLoadBalancer; import java.util.LinkedHashMap; @@ -99,9 +100,10 @@ public ConfigOrError parseLoadBalancingPolicyConfig(Map rawConfig) { ConfigOrError childConfig = GracefulSwitchLoadBalancer.parseLoadBalancingPolicyConfig( JsonUtil.getListOfObjects(rawWeightedTarget, "childPolicy"), lbRegistry); if (childConfig.getError() != null) { - return ConfigOrError.fromError(Status.INTERNAL - .withDescription("Could not parse weighted_target's child policy:" + name) - .withCause(childConfig.getError().asRuntimeException())); + return ConfigOrError.fromError(GrpcUtil.statusWithDetails( + Status.Code.INTERNAL, + "Could not parse weighted_target's child policy: " + name, + childConfig.getError())); } parsedChildConfigs.put(name, new WeightedPolicySelection(weight, childConfig.getConfig())); } diff --git a/xds/src/main/java/io/grpc/xds/WrrLocalityLoadBalancerProvider.java b/xds/src/main/java/io/grpc/xds/WrrLocalityLoadBalancerProvider.java index 384831b8a05..3693df9208a 100644 --- a/xds/src/main/java/io/grpc/xds/WrrLocalityLoadBalancerProvider.java +++ b/xds/src/main/java/io/grpc/xds/WrrLocalityLoadBalancerProvider.java @@ -23,6 +23,7 @@ import io.grpc.LoadBalancerRegistry; import io.grpc.NameResolver.ConfigOrError; import io.grpc.Status; +import io.grpc.internal.GrpcUtil; import io.grpc.internal.JsonUtil; import io.grpc.util.GracefulSwitchLoadBalancer; import io.grpc.xds.WrrLocalityLoadBalancer.WrrLocalityConfig; @@ -62,9 +63,10 @@ public ConfigOrError parseLoadBalancingPolicyConfig(Map rawConfig) { ConfigOrError childConfig = GracefulSwitchLoadBalancer.parseLoadBalancingPolicyConfig( JsonUtil.getListOfObjects(rawConfig, "childPolicy")); if (childConfig.getError() != null) { - return ConfigOrError.fromError(Status.INTERNAL - .withDescription("Failed to parse child policy in wrr_locality LB policy: " + rawConfig) - .withCause(childConfig.getError().asRuntimeException())); + return ConfigOrError.fromError(GrpcUtil.statusWithDetails( + Status.Code.INTERNAL, + "Failed to parse child policy in wrr_locality LB policy", + childConfig.getError())); } return ConfigOrError.fromConfig(new WrrLocalityConfig(childConfig.getConfig())); } catch (RuntimeException e) {