Skip to content

Java: Diff-informed queries: phase 3 (non-trivial locations) #20077

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 25 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5c2cf79
[TEST] Java: CWE-020/ExternalAPI: new test based on qhelp
d10c Jul 15, 2025
7aced48
[TEST] Java: LogInjection: convert test to qlref
d10c Jul 15, 2025
49e03b4
[TEST] Java: UnsafeCertTrust: convert test to qlref
d10c Jul 15, 2025
94386f0
[TEST] Java: TrustBoundaryViolations: convert test to qlref
d10c Jul 15, 2025
6134518
[TEST] Java: SensitiveLogInfo: convert to qlref
d10c Jul 15, 2025
44bb5e7
[TEST] Java: ConditionalBypass: convert to qlref
d10c Jul 15, 2025
b33058c
[TEST] Java: SensitiveCommunication: convert to qlref
d10c Jul 15, 2025
8353fdd
[DIFF-INFORMED] Java: (Android)SensitiveCommunication
d10c Jul 16, 2025
54546f6
[DIFF-INFORMED] Java: ArithmeticTainted
d10c Jul 16, 2025
0bcdb42
[DIFF-INFORMED] Java: ArithmeticUncontrolled
d10c Jul 16, 2025
0cf1195
[DIFF-INFORMED] Java: ConditionalBypass
d10c Jul 16, 2025
1c6ecf1
[DIFF-INFORMED] Java: UntrustedDataToExternalAPI
d10c Jul 16, 2025
919fea5
[DIFF-INFORMED] Java: ExternallyControlledFormatString
d10c Jul 16, 2025
19e5c3d
[DIFF-INFORMED] Java: ImproperValidationOfArray…
d10c Jul 16, 2025
74b37e7
[DIFF-INFORMED] Java: InsecureCookie
d10c Jul 16, 2025
2d73405
[DIFF-INFORMED] Java: InsecureLdapAuth
d10c Jul 16, 2025
b688df9
[DIFF-INFORMED] Java: LogInjection
d10c Jul 16, 2025
bc0b383
[DIFF-INFORMED] Java: MaybeBrokenCryptoAlgorithm
d10c Jul 16, 2025
45b627d
[DIFF-INFORMED] Java: SensitiveLogging
d10c Jul 16, 2025
b3b139b
[DIFF-INFORMED] Java: SqlConcatenated
d10c Jul 16, 2025
3785dbe
[DIFF-INFORMED] Java: TaintedEnvironmentVariable
d10c Jul 16, 2025
7888dcb
[DIFF-INFORMED] Java: TempDirLocalInformationDisclosure
d10c Jul 16, 2025
ea4af83
[DIFF-INFORMED] Java: TrustBoundaryViolation
d10c Jul 16, 2025
24c28ed
[DIFF-INFORMED] Java: UnsafeCertTrust
d10c Jul 16, 2025
05df1d3
[DIFF-INFORMED] Java: AndroidWebViewSettingsAllowsContentAccess
d10c Jul 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ module SensitiveCommunicationConfig implements DataFlow::ConfigSig {
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
isSink(node) and exists(c)
}

predicate observeDiffInformedIncrementalMode() { any() }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ module ArithmeticOverflowConfig implements DataFlow::ConfigSig {
predicate isBarrier(DataFlow::Node n) { overflowBarrier(n) }

predicate isBarrierIn(DataFlow::Node node) { isSource(node) }

predicate observeDiffInformedIncrementalMode() {
any() // merged with ArithmeticUnderflow in ArithmeticTainted.ql
}

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(ArithExpr exp | result = exp.getLocation() | overflowSink(exp, sink.asExpr()))
}
}

/**
Expand All @@ -29,6 +37,14 @@ module ArithmeticUnderflowConfig implements DataFlow::ConfigSig {
predicate isBarrier(DataFlow::Node n) { underflowBarrier(n) }

predicate isBarrierIn(DataFlow::Node node) { isSource(node) }

predicate observeDiffInformedIncrementalMode() {
any() // merged with ArithmeticOverflow in ArithmeticTainted.ql
}

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(ArithExpr exp | result = exp.getLocation() | underflowSink(exp, sink.asExpr()))
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ module ArithmeticUncontrolledOverflowConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node sink) { overflowSink(_, sink.asExpr()) }

predicate isBarrier(DataFlow::Node n) { overflowBarrier(n) }

predicate observeDiffInformedIncrementalMode() {
any() // merged with ArithmeticUncontrolledUnderflow in ArithmeticUncontrolled.ql
}

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(ArithExpr exp | result = exp.getLocation() | overflowSink(exp, sink.asExpr()))
}
}

/** Taint-tracking flow to reason about overflow from arithmetic with uncontrolled values. */
Expand All @@ -32,6 +40,14 @@ module ArithmeticUncontrolledUnderflowConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node sink) { underflowSink(_, sink.asExpr()) }

predicate isBarrier(DataFlow::Node n) { underflowBarrier(n) }

predicate observeDiffInformedIncrementalMode() {
any() // merged with ArithmeticUncontrolledOverflow in ArithmeticUncontrolled.ql
}

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(ArithExpr exp | result = exp.getLocation() | underflowSink(exp, sink.asExpr()))
}
}

/** Taint-tracking flow to reason about underflow from arithmetic with uncontrolled values. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ module ConditionalBypassFlowConfig implements DataFlow::ConfigSig {
predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
endsWithStep(node1, node2)
}

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(MethodCall m, Expr e | result = [m, e].getLocation() |
conditionControlsMethod(m, e) and
sink.asExpr() = e
)
}
}

/**
Expand Down
4 changes: 4 additions & 0 deletions java/ql/lib/semmle/code/java/security/ExternalAPIs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ module UntrustedDataToExternalApiConfig implements DataFlow::ConfigSig {
predicate isSource(DataFlow::Node source) { source instanceof ActiveThreatModelSource }

predicate isSink(DataFlow::Node sink) { sink instanceof ExternalApiDataNode }

predicate observeDiffInformedIncrementalMode() {
any() // Simple use in UntrustedDataToExternalAPI.ql; also used through ExternalApiUsedWithUntrustedData in ExternalAPIsUsedWithUntrustedData.ql
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ module BoundedFlowSourceConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node sink) {
any(CheckableArrayAccess caa).canThrowOutOfBoundsDueToEmptyArray(sink.asExpr(), _)
}

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(ArrayCreationExpr arrayCreation, CheckableArrayAccess arrayAccess |
result = [arrayCreation, arrayAccess.getIndexExpr()].getLocation() and
arrayAccess.canThrowOutOfBoundsDueToEmptyArray(sink.asExpr(), arrayCreation)
)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ module ImproperValidationOfArrayConstructionConfig implements DataFlow::ConfigSi
predicate isSink(DataFlow::Node sink) {
any(CheckableArrayAccess caa).canThrowOutOfBoundsDueToEmptyArray(sink.asExpr(), _)
}

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(ArrayCreationExpr arrayCreation, CheckableArrayAccess arrayAccess |
result = [arrayCreation, arrayAccess.getIndexExpr()].getLocation() and
arrayAccess.canThrowOutOfBoundsDueToEmptyArray(sink.asExpr(), arrayCreation)
)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ module BoundedFlowSourceConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node sink) {
exists(CheckableArrayAccess arrayAccess | arrayAccess.canThrowOutOfBounds(sink.asExpr()))
}

predicate observeDiffInformedIncrementalMode() { any() }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ module ImproperValidationOfArrayIndexConfig implements DataFlow::ConfigSig {
predicate isBarrier(DataFlow::Node node) { node.getType() instanceof BooleanType }

predicate isBarrierIn(DataFlow::Node node) { isSource(node) }

predicate observeDiffInformedIncrementalMode() { any() }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ module SecureCookieConfig implements DataFlow::ConfigSig {
sink.asExpr() =
any(MethodCall add | add.getMethod() instanceof ResponseAddCookieMethod).getArgument(0)
}

predicate observeDiffInformedIncrementalMode() {
none() // only used negatively in InsecureCookie.ql
}
}

/** Data flow to reason about the failure to use secure cookies. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ private module BasicAuthConfig implements DataFlow::ConfigSig {
}

predicate isSink(DataFlow::Node sink) { sink instanceof InsecureLdapUrlSink }

predicate observeDiffInformedIncrementalMode() {
none() // used as secondary flow to InsecureLdapUrlFlow in InsecureLdapAuth.ql
}
}

module BasicAuthFlow = DataFlow::Global<BasicAuthConfig>;
Expand All @@ -56,6 +60,10 @@ private module RequiresSslConfig implements DataFlow::ConfigSig {
}

predicate isSink(DataFlow::Node sink) { sink instanceof InsecureLdapUrlSink }

predicate observeDiffInformedIncrementalMode() {
none() // only used negatively in InsecureLdapAuth.ql
}
}

module RequiresSslFlow = DataFlow::Global<RequiresSslConfig>;
4 changes: 4 additions & 0 deletions java/ql/lib/semmle/code/java/security/LogInjectionQuery.qll
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ module LogInjectionConfig implements DataFlow::ConfigSig {
}

predicate isBarrierIn(DataFlow::Node node) { isSource(node) }

predicate observeDiffInformedIncrementalMode() {
none() // straightforward case; but the large test source is causing OOMs under `--check-diff-informed`.
Copy link
Preview

Copilot AI Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment mentions OOMs under --check-diff-informed but doesn't provide sufficient context about the issue or potential solutions. Consider adding more details about the specific test case causing the problem and any planned follow-up actions.

Suggested change
none() // straightforward case; but the large test source is causing OOMs under `--check-diff-informed`.
none() // The large test source used in this query causes Out-Of-Memory (OOM) issues under `--check-diff-informed` mode.
// This predicate is intentionally disabled to prevent OOMs. Future work may involve optimizing the test source
// or refining the query to handle large datasets more efficiently.

Copilot uses AI. Check for mistakes.

}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ module InsecureCryptoConfig implements DataFlow::ConfigSig {
objectToString(n.asExpr()) or
n.getType().getErasure() instanceof TypeObject
}

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSinkLocation(DataFlow::Node sink) {
exists(CryptoAlgoSpec c | result = c.getLocation() | sink.asExpr() = c.getAlgoSpec())
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ module SensitiveLoggerConfig implements DataFlow::ConfigSig {
}

predicate isBarrierIn(DataFlow::Node node) { isSource(node) }

predicate observeDiffInformedIncrementalMode() { any() }
}

module SensitiveLoggerFlow = TaintTracking::Global<SensitiveLoggerConfig>;
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ module UncontrolledStringBuilderSourceFlowConfig implements DataFlow::ConfigSig
predicate isSink(DataFlow::Node sink) { sink instanceof QueryInjectionSink }

predicate isBarrier(DataFlow::Node node) { node instanceof SimpleTypeSanitizer }

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSourceLocation(DataFlow::Node source) {
exists(Expr uncontrolled, StringBuilderVar sbv | result = uncontrolled.getLocation() |
uncontrolledStringBuilderQuery(sbv, uncontrolled) and
source = DataFlow::exprNode(sbv.getToStringCall())
)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ module ExecTaintedEnvironmentConfig implements DataFlow::ConfigSig {
ProcessBuilderEnvironmentFlow::flowToExpr(mm.getQualifier())
)
}

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSourceLocation(DataFlow::Node source) { none() }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should exclude sources here, as the path is fully reported.

Copy link
Contributor Author

@d10c d10c Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean path-problems always report the location of source and sink? That seems to contradict the example of WebviewDebuggingEnabled.ql/WebviewDebuggingEnabledQuery.qll mentioned in the incremental codeql docs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does seem to contradict that example, yes. I'm not completely sure here, but both ends of the path are a part of the result tuple. From the QL point of view, it's a bit arbitrary whether the second end-point is included in the message or not - a lot of queries do this, but from the query writers perspective it's perhaps a bit of a coin-toss, since the path includes both end-points regardless. Now if this coin-toss decision affects whether or not a result is included in a PR, then of course it shouldn't be made arbitrarily, and if that's the case then perhaps we should institute a rule (e.g. in ql-for-ql) to always include the second end-point in the message.

Now, as mentioned, I'm unsure about the filtering semantics of the downstream consumption in PRs, but considering the two possible cases then either these kind of results aren't filtered in which case we shouldn't exclude sources here, or they are filtered in which case I think we also shouldn't filter here, since then I'd argue we'd want to modify the query to include the source in the message to prevent the filtering.

We should move this conversation to slack and figure it out.

Copy link
Contributor

@michaelnebel michaelnebel Jul 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the documentation we only want to report alerts on locations that are pertaining to either the primary location (location of the first element in the select) or an alert pertaining to a related location - locations for elements used in placeholders @.
For the ExecTaintedEvironment the "source" is the second column (so not a primary or related location), so it should be fine to set the getASelectedSourceLocation to none(). Note that this doesn't mean that we will disregard "all" sources as a part of the flow path computation as a source is still relevant if there is a relevant sink.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, you are questioning the design (saw the thread in slack).

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ module TempDirSystemGetPropertyToCreateConfig implements DataFlow::ConfigSig {
or
sanitizer instanceof WindowsOsSanitizer
}

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSinkLocation(DataFlow::Node sink) { none() }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same. The query reports the full path, so we shouldn't exclude sinks like this.

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ module TrustBoundaryConfig implements DataFlow::ConfigSig {
}

predicate isSink(DataFlow::Node sink) { sink instanceof TrustBoundaryViolationSink }

predicate observeDiffInformedIncrementalMode() { any() }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ module SslEndpointIdentificationFlowConfig implements DataFlow::ConfigSig {
predicate isSink(DataFlow::Node sink) { sink instanceof SslConnectionCreation }

predicate isBarrier(DataFlow::Node sanitizer) { sanitizer instanceof SslUnsafeCertTrustSanitizer }

predicate observeDiffInformedIncrementalMode() { any() }

Location getASelectedSourceLocation(DataFlow::Node source) { none() }
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ module WebViewDisallowContentAccessConfig implements DataFlow::StateConfigSig {
state instanceof IsSettings and
node instanceof WebSettingsDisallowContentAccessSink
}

predicate observeDiffInformedIncrementalMode() {
none() // only used negatively
}
}

module WebViewDisallowContentAccessFlow =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.ServletException;
import java.io.IOException;

public class ExternalAPISinkExample extends HttpServlet {
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
// BAD: a request parameter is written directly to an error response page
response.sendError(HttpServletResponse.SC_NOT_FOUND,
"The page \"" + request.getParameter("page") + "\" was not found."); // $ Alert
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.ServletException;
import java.io.IOException;

public class ExternalAPITaintStepExample extends HttpServlet {
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {

StringBuilder sqlQueryBuilder = new StringBuilder();
sqlQueryBuilder.append("SELECT * FROM user WHERE user_id='");
// BAD: a request parameter is concatenated directly into a SQL query
sqlQueryBuilder.append(request.getParameter("user_id"));
sqlQueryBuilder.append("'");

// ...
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
| javax.servlet.http.HttpServletResponse.sendError(int,java.lang.String) [param 1] | 1 | 1 |
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Security/CWE/CWE-020/ExternalAPIsUsedWithUntrustedData.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#select
| ExternalAPISinkExample.java:12:5:12:70 | ... + ... | ExternalAPISinkExample.java:12:21:12:48 | getParameter(...) : String | ExternalAPISinkExample.java:12:5:12:70 | ... + ... | Call to javax.servlet.http.HttpServletResponse.sendError with untrusted data from $@. | ExternalAPISinkExample.java:12:21:12:48 | getParameter(...) : String | getParameter(...) : String |
edges
| ExternalAPISinkExample.java:12:21:12:48 | getParameter(...) : String | ExternalAPISinkExample.java:12:5:12:70 | ... + ... | provenance | Src:MaD:2 Sink:MaD:1 |
models
| 1 | Sink: javax.servlet.http; HttpServletResponse; false; sendError; (int,String); ; Argument[1]; information-leak; manual |
| 2 | Source: javax.servlet; ServletRequest; false; getParameter; (String); ; ReturnValue; remote; manual |
nodes
| ExternalAPISinkExample.java:12:5:12:70 | ... + ... | semmle.label | ... + ... |
| ExternalAPISinkExample.java:12:21:12:48 | getParameter(...) : String | semmle.label | getParameter(...) : String |
subpaths
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
query: Security/CWE/CWE-020/UntrustedDataToExternalAPI.ql
postprocess:
- utils/test/PrettyPrintModels.ql
- utils/test/InlineExpectationsTestQuery.ql
Loading