Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* Parsing during charset sniffing no longer fails if an advisory `available()` call throws `IOException`, as seen on JDK 8 `HttpURLConnection`. [#2474](https://github.com/jhy/jsoup/issues/2474)
* `Cleaner` no longer makes relative URL attributes in the input document absolute when cleaning or validating a `Document`. URL normalization now applies only to the cleaned output, and `Safelist.isSafeAttribute()` is side effect free. [#2475](https://github.com/jhy/jsoup/issues/2475)
* `Cleaner` no longer duplicates enforced attributes when the input `Document` preserves attribute case. A case-variant source attribute is now replaced by the enforced attribute in the cleaned output. [#2476](https://github.com/jhy/jsoup/issues/2476)
* If a per-request SOCKS proxy is configured, jsoup now avoids using the JDK `HttpClient`, because the JDK would silently ignore that proxy and attempt to connect directly. Those requests now fall back to the legacy `HttpURLConnection` transport instead, which does support SOCKS. [#2468](https://github.com/jhy/jsoup/issues/2468)

## 1.22.1 (2026-Jan-01)

Expand Down
13 changes: 12 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -412,13 +412,24 @@
<phase>compile</phase>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<release>8</release>
</configuration>
</execution>

<execution>
<id>testCompile-java-11</id>
<phase>test-compile</phase>
<goals>
<goal>testCompile</goal>
</goals>
<configuration>
<useModulePath>false</useModulePath>
<release>11</release>
</configuration>
</execution>

<execution>
<id>compile-java-11</id>
<phase>compile</phase>
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/org/jsoup/helper/RequestDispatch.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import static org.jsoup.helper.HttpConnection.Request;
import static org.jsoup.helper.HttpConnection.Response;

import java.net.Proxy;
import java.lang.reflect.Constructor;

/**
Expand Down Expand Up @@ -35,6 +36,9 @@ static RequestExecutor get(Request request, @Nullable Response previousResponse)

if (request.sslSocketFactory() != null) // downgrade if a socket factory is set, as it can't be supplied to the HttpClient
useHttpClient = false;
Proxy proxy = request.proxy();
if (proxy != null && proxy.type() == Proxy.Type.SOCKS) // HttpClient doesn't support SOCKS proxies
useHttpClient = false;

if (useHttpClient && clientConstructor != null) {
try {
Expand Down
60 changes: 44 additions & 16 deletions src/test/java11/org/jsoup/helper/HttpClientExecutorTest.java
Original file line number Diff line number Diff line change
@@ -1,29 +1,54 @@
package org.jsoup.helper;

import org.jsoup.internal.SharedConstants;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.net.*;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.ProxySelector;
import java.net.SocketAddress;
import java.net.URI;
import java.util.Collections;
import java.util.List;

import static org.junit.jupiter.api.Assertions.*;

public class HttpClientExecutorTest {
@Test void loadsMultiReleaseHttpClientExecutor() {
// sanity check that the test is resolving the packaged Java 11 override, not a copy on the test classpath
String resource = HttpClientTestAccess.executorClassResource().toExternalForm();
assertTrue(resource.contains("/META-INF/versions/11/"), resource);
}

@Test void getsHttpClient() {
try {
enableHttpClient();
RequestExecutor executor = RequestDispatch.get(new HttpConnection.Request(), null);
assertInstanceOf(HttpClientExecutor.class, executor);
assertTrue(HttpClientTestAccess.isHttpClientExecutor(executor));
} finally {
disableHttpClient(); // reset to previous default for JDK8 compat tests
}
}

@Test void getsHttpUrlConnectionByDefault() {
@Test void getsHttpClientByDefault() {
System.clearProperty(SharedConstants.UseHttpClient);
RequestExecutor executor = RequestDispatch.get(new HttpConnection.Request(), null);
assertInstanceOf(HttpClientExecutor.class, executor);
assertTrue(HttpClientTestAccess.isHttpClientExecutor(executor));
}

@Test void downgradesSocksProxyToUrlConnectionExecutor() {
try {
enableHttpClient();
HttpConnection.Request request = new HttpConnection.Request();
request.proxy(new Proxy(Proxy.Type.SOCKS, new InetSocketAddress("localhost", 1080)));

// SOCKS handling only matters on the Java 11+ path where HttpClient would otherwise be selected (and just bypasses)
RequestExecutor executor = RequestDispatch.get(request, null);
assertInstanceOf(UrlConnectionExecutor.class, executor);
} finally {
disableHttpClient(); // reset to previous default for JDK8 compat tests
}
}

public static void enableHttpClient() {
Expand Down Expand Up @@ -51,9 +76,9 @@ public List<Proxy> select(URI uri) {
public void connectFailed(URI uri, SocketAddress sa, IOException ioe) {}
});

HttpClientExecutor.ProxyWrap wrap = new HttpClientExecutor.ProxyWrap();
ProxySelector wrap = HttpClientTestAccess.newProxyWrap();
List<Proxy> proxies = wrap.select(URI.create("http://example.com"));

assertEquals(1, proxies.size());
assertSame(defaultProxy, proxies.get(0).address());
} finally {
Expand All @@ -62,12 +87,15 @@ public void connectFailed(URI uri, SocketAddress sa, IOException ioe) {}
}

@Test void proxyWrapConnectFailedOnlyForSystemProxy() {
HttpClientExecutor.ProxyWrap wrap = new HttpClientExecutor.ProxyWrap();
HttpClientExecutor.perRequestProxy.set(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("custom", 9090)));
wrap.connectFailed(URI.create("http://example.com"),
new InetSocketAddress("custom", 9090),
new IOException("test"));
HttpClientExecutor.perRequestProxy.remove();
try {
ProxySelector wrap = HttpClientTestAccess.newProxyWrap();
HttpClientTestAccess.setPerRequestProxy(new Proxy(Proxy.Type.HTTP, new InetSocketAddress("custom", 9090)));
wrap.connectFailed(URI.create("http://example.com"),
new InetSocketAddress("custom", 9090),
new IOException("test"));
} finally {
HttpClientTestAccess.clearPerRequestProxy();
}
}

@Test
Expand All @@ -86,14 +114,14 @@ public List<Proxy> select(URI uri) {
public void connectFailed(URI uri, SocketAddress sa, IOException ioe) {}
});

HttpClientExecutor.perRequestProxy.set(
HttpClientTestAccess.setPerRequestProxy(
new Proxy(Proxy.Type.HTTP, perReqProxy));

HttpClientExecutor.ProxyWrap wrap = new HttpClientExecutor.ProxyWrap();
ProxySelector wrap = HttpClientTestAccess.newProxyWrap();
List<Proxy> proxies = wrap.select(URI.create("http://example.com"));
assertSame(perReqProxy, proxies.get(0).address());
} finally {
HttpClientExecutor.perRequestProxy.remove();
HttpClientTestAccess.clearPerRequestProxy();
ProxySelector.setDefault(original);
}
}
Expand All @@ -108,7 +136,7 @@ public void connectFailed(URI uri, SocketAddress sa, IOException ioe) {}
@Override
public void connectFailed(URI uri, SocketAddress sa, IOException ioe) { called[0] = true; }
});
new HttpClientExecutor.ProxyWrap()
HttpClientTestAccess.newProxyWrap()
.connectFailed(URI.create("http://example.com"), new InetSocketAddress("x", 80), new IOException("x"));
assertTrue(called[0]);
} finally {
Expand Down
74 changes: 74 additions & 0 deletions src/test/java11/org/jsoup/helper/HttpClientTestAccess.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package org.jsoup.helper;

import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.net.Proxy;
import java.net.ProxySelector;
import java.net.URL;

/**
Test access shim for the Java 11 multi-release classes.
<p>These tests need to exercise the Java 11 implementation as loaded from
{@code META-INF/versions/11}. Using reflection here keeps them bound to that
packaged implementation, without adding the Java 11 sources to the test
compile path.</p>
*/
final class HttpClientTestAccess {
private static final String ExecutorClassName = "org.jsoup.helper.HttpClientExecutor";
private static final String ProxyWrapClassName = ExecutorClassName + "$ProxyWrap";
private static final String ExecutorClassResource = "org/jsoup/helper/HttpClientExecutor.class";

private HttpClientTestAccess() {}

static boolean isHttpClientExecutor(RequestExecutor executor) {
return executorClass().isInstance(executor);
}

static URL executorClassResource() {
URL resource = HttpClientTestAccess.class.getClassLoader().getResource(ExecutorClassResource);
if (resource == null)
throw new IllegalStateException("Could not load " + ExecutorClassResource);
return resource;
}

static ProxySelector newProxyWrap() {
try {
Constructor<?> constructor = loadClass(ProxyWrapClassName).getDeclaredConstructor();
constructor.setAccessible(true);
return (ProxySelector) constructor.newInstance();
} catch (ReflectiveOperationException e) {
throw new IllegalStateException("Could not construct HttpClientExecutor.ProxyWrap", e);
}
}

static void setPerRequestProxy(Proxy proxy) {
perRequestProxy().set(proxy);
}

static void clearPerRequestProxy() {
perRequestProxy().remove();
}

@SuppressWarnings("unchecked")
private static ThreadLocal<Proxy> perRequestProxy() {
try {
Field field = executorClass().getDeclaredField("perRequestProxy");
field.setAccessible(true);
return (ThreadLocal<Proxy>) field.get(null);
} catch (ReflectiveOperationException e) {
throw new IllegalStateException("Could not access HttpClientExecutor.perRequestProxy", e);
}
}

private static Class<?> executorClass() {
return loadClass(ExecutorClassName);
}

private static Class<?> loadClass(String className) {
try {
return Class.forName(className);
} catch (ClassNotFoundException e) {
throw new IllegalStateException("Could not load " + className, e);
}
}
}