From 1a6a52988e23be70c8cab846c12dd45e46d44d02 Mon Sep 17 00:00:00 2001 From: Arturo Bernal Date: Sat, 1 Nov 2025 20:16:34 +0100 Subject: [PATCH] =?UTF-8?q?HTTPCORE-637:=20RFC=203986=20URI:=20parse/resol?= =?UTF-8?q?ve/normalize.=20Table-driven=20ASCII,=20no=20regex;=20correct?= =?UTF-8?q?=20=C2=A75.2.4=20dot-segment=20trailing=20slash.=20Drop=20Rfc39?= =?UTF-8?q?86UriBuilder;=20helpers=20@Internal;=20RFC=20example=20tests=20?= =?UTF-8?q?green.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/apache/hc/core5/net/URIBuilder.java | 56 +---- .../apache/hc/core5/net/uri/Rfc3986Uri.java | 222 ++++++++++++++++++ .../apache/hc/core5/net/uri/UriReference.java | 54 +++++ .../net/uri/internal/authorities/HostOps.java | 58 +++++ .../net/uri/internal/authorities/Ports.java | 62 +++++ .../uri/internal/encoding/PercentCodec.java | 146 ++++++++++++ .../net/uri/internal/paths/DotSegments.java | 141 +++++++++++ .../uri/internal/uris/Rfc3986Equivalence.java | 87 +++++++ .../uri/internal/uris/Rfc3986Normalizer.java | 91 +++++++ .../net/uri/internal/uris/Rfc3986Parser.java | 182 ++++++++++++++ .../uri/internal/uris/Rfc3986Renderer.java | 95 ++++++++ .../uri/internal/uris/Rfc3986Resolver.java | 88 +++++++ .../core5/net/uri/internal/utils/Ascii.java | 204 ++++++++++++++++ .../net/uri/Rfc3986UriRfcExamplesTest.java | 167 +++++++++++++ .../hc/core5/net/uri/Rfc3986UriTest.java | 116 +++++++++ 15 files changed, 1720 insertions(+), 49 deletions(-) create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/Rfc3986Uri.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/UriReference.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/HostOps.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/Ports.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/encoding/PercentCodec.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/paths/DotSegments.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Equivalence.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Normalizer.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Parser.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Renderer.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Resolver.java create mode 100644 httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/utils/Ascii.java create mode 100644 httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriRfcExamplesTest.java create mode 100644 httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriTest.java diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java index febe0f5cb..a04f20511 100644 --- a/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/URIBuilder.java @@ -36,7 +36,6 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Collections; -import java.util.LinkedList; import java.util.List; import org.apache.hc.core5.http.HttpHost; @@ -44,6 +43,7 @@ import org.apache.hc.core5.http.URIScheme; import org.apache.hc.core5.http.message.BasicNameValuePair; import org.apache.hc.core5.http.message.ParserCursor; +import org.apache.hc.core5.net.uri.Rfc3986Uri; import org.apache.hc.core5.util.Args; import org.apache.hc.core5.util.TextUtils; import org.apache.hc.core5.util.Tokenizer; @@ -1118,58 +1118,16 @@ public URIBuilder normalizeSyntax() { * @since 5.3 */ public URIBuilder optimize() { - final String scheme = this.scheme; - if (scheme != null) { - this.scheme = TextUtils.toLowerCase(scheme); - } - - if (this.pathRootless) { + final String raw = this.toString(); + try { + final Rfc3986Uri u = Rfc3986Uri.parse(raw).optimize(); + return new URIBuilder(u.toString()); + } catch (final IllegalArgumentException | URISyntaxException ex) { return this; } - - // Force Percent-Encoding re-encoding - this.encodedSchemeSpecificPart = null; - this.encodedAuthority = null; - this.encodedUserInfo = null; - this.encodedPath = null; - this.encodedQuery = null; - this.encodedFragment = null; - - final String host = this.host; - if (host != null) { - this.host = TextUtils.toLowerCase(host); - } - - if (this.pathSegments != null) { - final List inputSegments = this.pathSegments; - if (!inputSegments.isEmpty()) { - final LinkedList outputSegments = new LinkedList<>(); - for (final String inputSegment : inputSegments) { - if (!inputSegment.isEmpty() && !".".equals(inputSegment)) { - if ("..".equals(inputSegment)) { - if (!outputSegments.isEmpty()) { - outputSegments.removeLast(); - } - } else { - outputSegments.addLast(inputSegment); - } - } - } - if (!inputSegments.isEmpty()) { - final String lastSegment = inputSegments.get(inputSegments.size() - 1); - if (lastSegment.isEmpty()) { - outputSegments.addLast(""); - } - } - this.pathSegments = outputSegments; - } else { - this.pathSegments = Collections.singletonList(""); - } - } - - return this; } + /** * Converts this instance to a URI string. * diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/Rfc3986Uri.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/Rfc3986Uri.java new file mode 100644 index 000000000..2706bca21 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/Rfc3986Uri.java @@ -0,0 +1,222 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.net.uri.internal.uris.Rfc3986Equivalence; +import org.apache.hc.core5.net.uri.internal.uris.Rfc3986Normalizer; +import org.apache.hc.core5.net.uri.internal.uris.Rfc3986Parser; +import org.apache.hc.core5.net.uri.internal.uris.Rfc3986Resolver; + +/** + * Immutable, RFC 3986-compliant URI value object. + *
    + *
  • Parsing preserves raw text (including percent-encodings).
  • + *
  • Resolution & dot-segment removal per RFC 3986 §5.2.
  • + *
  • Scheme and reg-name host are stored in lower case.
  • + *
  • No regex, no {@code Character} classes – pure ASCII tables.
  • + *
+ * + *

Round-trip: {@link #toRawString()} returns the exact input. + * {@link #toString()} renders the canonical form held by this object.

+ * + * @since 5.4 + */ +@Contract(threading = ThreadingBehavior.IMMUTABLE) +public final class Rfc3986Uri implements UriReference { + + final String original; + + final String scheme; // lower-cased (ASCII) or null + final String userInfo; // raw or null + final String host; // reg-name lower-cased; IPv6 literal kept with brackets; or null + final int port; // -1 if missing + final String path; // raw, never null ("" allowed) + final String query; // raw (no '?') or null + final String fragment; // raw (no '#') or null + + public Rfc3986Uri( + final String original, + final String scheme, + final String userInfo, + final String host, + final int port, + final String path, + final String query, + final String fragment) { + this.original = original; + this.scheme = scheme; + this.userInfo = userInfo; + this.host = host; + this.port = port; + this.path = path; + this.query = query; + this.fragment = fragment; + } + + /** + * Parse a URI reference per RFC 3986. + */ + public static Rfc3986Uri parse(final String s) { + return Rfc3986Parser.parse(s); + } + + @Override + public String getScheme() { + return scheme; + } + + @Override + public String getUserInfo() { + return userInfo; + } + + @Override + public String getHost() { + return host; + } + + @Override + public int getPort() { + return port; + } + + @Override + public String getPath() { + return path; + } + + @Override + public String getQuery() { + return query; + } + + @Override + public String getFragment() { + return fragment; + } + + @Override + public String toRawString() { + return original; + } + + @Override + public String toString() { + // Render canonical internal state (not the raw input). + int cap = 0; + if (scheme != null) { + cap += scheme.length() + 1; + } + if (host != null) { + cap += 2 + host.length(); + if (userInfo != null) { + cap += userInfo.length() + 1; + } + if (port >= 0) { + cap += 6; + } + } + if (path != null) { + cap += path.length(); + } + if (query != null) { + cap += 1 + query.length(); + } + if (fragment != null) { + cap += 1 + fragment.length(); + } + + final StringBuilder sb = new StringBuilder(Math.max(16, cap)); + if (scheme != null) { + sb.append(scheme).append(':'); + } + if (host != null) { + sb.append("//"); + if (userInfo != null) { + sb.append(userInfo).append('@'); + } + sb.append(host); + if (port >= 0) { + sb.append(':').append(port); + } + } + if (path != null) { + sb.append(path); + } + if (query != null) { + sb.append('?').append(query); + } + if (fragment != null) { + sb.append('#').append(fragment); + } + return sb.toString(); + } + + /** + * Dot-segment removal (RFC 3986 §5.2.4). + */ + public Rfc3986Uri normalizePath() { + return Rfc3986Normalizer.normalizePath(this); + } + + /** + * RFC equivalence (case-insensitive scheme/host; decode %XX for unreserved; uppercase hex). + */ + public boolean equivalentTo(final Rfc3986Uri other) { + return Rfc3986Equivalence.equivalent(this, other); + } + + /** + * Resolve against a base (RFC 3986 §5.2). + */ + public static Rfc3986Uri resolve(final Rfc3986Uri base, final Rfc3986Uri ref) { + return Rfc3986Resolver.resolve(base, ref); + } + + /** + * Canonicalization used by URIBuilder#optimize(). + *

Performs: + *

    + *
  • Lower-case of scheme and reg-name host (IPv6 literal preserved).
  • + *
  • Dot-segment removal if the path is absolute or an authority is present.
  • + *
  • Decoding of percent-escapes only for ASCII unreserved.
  • + *
  • Uppercasing of hex digits in remaining percent-escapes.
  • + *
  • Strict re-encoding of the path (preserve '/' and valid %HH; encode everything else via UTF-8).
  • + *
  • Query and fragment normalized by decode-unreserved + uppercase-hex.
  • + *
+ * The operation may change the textual form; consumers should treat this as a canonicalization step, + * not as a guaranteed identity-preserving transformation. + * + * @since 5.4 + */ + public Rfc3986Uri optimize() { + return Rfc3986Normalizer.optimize(this); + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/UriReference.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/UriReference.java new file mode 100644 index 000000000..2ad177b93 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/UriReference.java @@ -0,0 +1,54 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri; + +/** + * Minimal URI accessor interface for RFC 3986 components. + * + * @since 5.4 + */ +public interface UriReference { + String getScheme(); + + String getUserInfo(); + + String getHost(); + + int getPort(); + + String getPath(); + + String getQuery(); + + String getFragment(); + + /** + * Raw original input text (no transformations). + */ + String toRawString(); +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/HostOps.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/HostOps.java new file mode 100644 index 000000000..d60e5ec34 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/HostOps.java @@ -0,0 +1,58 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.authorities; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.net.uri.internal.utils.Ascii; + +/** + * Host case/format utilities. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class HostOps { + private HostOps() { + } + + /** + * Lower-case reg-name; preserve IPv6 literals with brackets. + */ + public static String lowerRegNamePreserveIPv6(final String h) { + if (h == null) { + return null; + } + if (!h.isEmpty() && h.charAt(0) == '[') { + return h; + } + return Ascii.lowerAscii(h); + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/Ports.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/Ports.java new file mode 100644 index 000000000..e691a0c90 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/authorities/Ports.java @@ -0,0 +1,62 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.authorities; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; + +/** + * Host:port helpers. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class Ports { + private Ports() { + } + + public static int parsePort(final char[] buf, final int from, final int toExcl) { + if (from >= toExcl) { + return -1; + } + int v = 0; + for (int i = from; i < toExcl; i++) { + final char ch = buf[i]; + if (ch < '0' || ch > '9') { + throw new IllegalArgumentException("Invalid port"); + } + v = v * 10 + ch - '0'; + if (v > 65535) { + throw new IllegalArgumentException("Port out of range"); + } + } + return v; + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/encoding/PercentCodec.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/encoding/PercentCodec.java new file mode 100644 index 000000000..9f2eaea96 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/encoding/PercentCodec.java @@ -0,0 +1,146 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.encoding; + +import java.nio.charset.StandardCharsets; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.net.uri.internal.utils.Ascii; + +/** + * Minimal percent-encoding helpers for RFC 3986. + * Only what is required by {@code Rfc3986Uri#optimize()} and equivalence. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class PercentCodec { + private PercentCodec() { + } + + /** + * Decode percent-escapes. If {@code unreservedOnly} is true, only decode escapes that map + * to ASCII unreserved; otherwise decode all valid %XX sequences. + */ + public static String decode(final String s, final boolean unreservedOnly) { + if (s == null) { + return null; + } + final int n = s.length(); + boolean changed = false; + final StringBuilder out = new StringBuilder(n); + for (int i = 0; i < n; i++) { + final char ch = s.charAt(i); + if (ch == '%' && i + 2 < n) { + final int h1 = Ascii.hexVal(s.charAt(i + 1)); + final int h2 = Ascii.hexVal(s.charAt(i + 2)); + if (h1 >= 0 && h2 >= 0) { + final int b = h1 << 4 | h2; + final char decoded = (char) (b & 0xFF); + if (!unreservedOnly || Ascii.isUnreserved(decoded)) { + out.append(decoded); + i += 2; + changed = true; + continue; + } + } + } + out.append(ch); + } + return changed ? out.toString() : s; + } + + /** + * Uppercase hex digits in any valid percent-escapes. + */ + public static String uppercaseHexInPercents(final String s) { + if (s == null) { + return null; + } + final int n = s.length(); + boolean changed = false; + final StringBuilder out = new StringBuilder(n); + for (int i = 0; i < n; i++) { + final char ch = s.charAt(i); + if (ch == '%' && i + 2 < n) { + final int h1 = Ascii.hexVal(s.charAt(i + 1)); + final int h2 = Ascii.hexVal(s.charAt(i + 2)); + if (h1 >= 0 && h2 >= 0) { + out.append('%'); + out.append(Ascii.toHexUpper(h1)); + out.append(Ascii.toHexUpper(h2)); + i += 2; + changed = true; + continue; + } + } + out.append(ch); + } + return changed ? out.toString() : s; + } + + /** + * Strict path encoder used by optimize(): preserves '/' and valid %HH sequences, + * percent-encodes all other characters using UTF-8. + */ + public static String encodeStrictPath(final String s) { + if (s == null) { + return null; + } + final int n = s.length(); + final StringBuilder out = new StringBuilder(n); + for (int i = 0; i < n; i++) { + final char ch = s.charAt(i); + // Preserve existing percent-escapes + if (ch == '%' && i + 2 < n) { + final int h1 = Ascii.hexVal(s.charAt(i + 1)); + final int h2 = Ascii.hexVal(s.charAt(i + 2)); + if (h1 >= 0 && h2 >= 0) { + out.append('%').append(Ascii.toHexUpper(h1)).append(Ascii.toHexUpper(h2)); + i += 2; + continue; + } + } + if (ch == '/' || Ascii.isUnreserved(ch)) { + out.append(ch); + } else { + // encode as UTF-8 + final byte[] bytes = String.valueOf(ch).getBytes(StandardCharsets.UTF_8); + for (final byte b : bytes) { + out.append('%'); + out.append(Ascii.toHexUpper(b >> 4 & 0xF)); + out.append(Ascii.toHexUpper(b & 0xF)); + } + } + } + return out.toString(); + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/paths/DotSegments.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/paths/DotSegments.java new file mode 100644 index 000000000..224657b2d --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/paths/DotSegments.java @@ -0,0 +1,141 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.paths; + +import java.util.ArrayDeque; +import java.util.Deque; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; + +/** + * RFC 3986 dot-segment removal (§5.2.4) with precise trailing-slash handling. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class DotSegments { + + private DotSegments() { + } + + public static String remove(final String path) { + if (path == null || path.isEmpty()) { + return path == null ? null : ""; + } + + final boolean absolute = path.startsWith("/"); + final boolean hadTrailingSlash = path.endsWith("/"); + + final Deque out = new ArrayDeque<>(); + + int i = 0; + final int n = path.length(); + boolean firstSegment = true; // suppress the artificial leading "" for absolute paths + boolean forceTrailingSlash = false; // terminal "." or ".." wants slash in most cases + + while (i <= n) { + final int j = i < n ? path.indexOf('/', i) : -1; + + final String seg; + if (j == -1) { + seg = path.substring(i, n); + i = n + 1; + } else { + seg = path.substring(i, j); + i = j + 1; + } + + if (firstSegment && absolute && seg.isEmpty()) { + firstSegment = false; + if (j == -1) { + break; // path was "/" only + } + continue; + } + firstSegment = false; + + final boolean isLast = j == -1; + + if (seg.equals(".")) { + if (isLast && (absolute || !out.isEmpty())) { + forceTrailingSlash = true; + } + } else if (seg.equals("..")) { + if (!out.isEmpty()) { + final String last = out.peekLast(); + if (!last.equals("..")) { + out.removeLast(); + } else if (!absolute) { + out.addLast(".."); + } + } else if (!absolute) { + out.addLast(".."); + } + if (isLast && (absolute || !out.isEmpty())) { + forceTrailingSlash = true; + } + } else { + out.addLast(seg); + } + + if (j == -1) { + break; + } + } + + final StringBuilder b = new StringBuilder(path.length()); + if (absolute) { + b.append('/'); + } + boolean first = true; + for (final String seg : out) { + if (!first) { + b.append('/'); + } + b.append(seg); + first = false; + } + + final boolean wantsTrailing = + hadTrailingSlash + || forceTrailingSlash && (absolute || !out.isEmpty() && !"..".equals(out.peekLast())); + + if (wantsTrailing && (b.length() == 0 || b.charAt(b.length() - 1) != '/')) { + b.append('/'); + } + + if (absolute && b.length() == 0) { + b.append('/'); + } + + return b.toString(); + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Equivalence.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Equivalence.java new file mode 100644 index 000000000..48b499468 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Equivalence.java @@ -0,0 +1,87 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.uris; + +import java.util.Objects; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.net.uri.Rfc3986Uri; +import org.apache.hc.core5.net.uri.internal.authorities.HostOps; +import org.apache.hc.core5.net.uri.internal.encoding.PercentCodec; +import org.apache.hc.core5.net.uri.internal.utils.Ascii; + +/** + * RFC 3986 equivalence utilities (§6.2). + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class Rfc3986Equivalence { + + private Rfc3986Equivalence() { + } + + public static boolean equivalent(final Rfc3986Uri a, final Rfc3986Uri b) { + if (a == b) { + return true; + } + if (b == null) { + return false; + } + if (!Objects.equals(Ascii.lowerAscii(a.getScheme()), Ascii.lowerAscii(b.getScheme()))) { + return false; + } + if (!Objects.equals(HostOps.lowerRegNamePreserveIPv6(a.getHost()), + HostOps.lowerRegNamePreserveIPv6(b.getHost()))) { + return false; + } + if (a.getPort() != b.getPort()) { + return false; + } + if (!Objects.equals(norm(a.getPath()), norm(b.getPath()))) { + return false; + } + if (!Objects.equals(norm(a.getQuery()), norm(b.getQuery()))) { + return false; + } + if (!Objects.equals(norm(a.getFragment()), norm(b.getFragment()))) { + return false; + } + return Objects.equals(a.getUserInfo(), b.getUserInfo()); + } + + private static String norm(final String s) { + if (s == null) { + return null; + } + return PercentCodec.uppercaseHexInPercents(PercentCodec.decode(s, true)); + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Normalizer.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Normalizer.java new file mode 100644 index 000000000..121f54637 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Normalizer.java @@ -0,0 +1,91 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.uris; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.net.uri.Rfc3986Uri; +import org.apache.hc.core5.net.uri.internal.authorities.HostOps; +import org.apache.hc.core5.net.uri.internal.encoding.PercentCodec; +import org.apache.hc.core5.net.uri.internal.paths.DotSegments; +import org.apache.hc.core5.net.uri.internal.utils.Ascii; + +/** + * Normalization and canonicalization helpers. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class Rfc3986Normalizer { + + private Rfc3986Normalizer() { + } + + public static Rfc3986Uri normalizePath(final Rfc3986Uri u) { + final boolean canNormalizePath = u.getHost() != null || u.getPath() != null && u.getPath().startsWith("/"); + final String p = canNormalizePath ? DotSegments.remove(u.getPath()) : u.getPath(); + if (p != null && p.equals(u.getPath())) { + return u; + } + return rebuild(u.getScheme(), u.getUserInfo(), u.getHost(), u.getPort(), p, u.getQuery(), u.getFragment()); + } + + /** + * Canonicalization used by URIBuilder#optimize(): + * - lower-case scheme and reg-name host + * - remove dot-segments for absolute/authority paths + * - decode %XX for unreserved only; uppercase remaining escapes + * - strict-encode path; normalize query/fragment escapes + */ + public static Rfc3986Uri optimize(final Rfc3986Uri u) { + final String s2 = u.getScheme() == null ? null : Ascii.lowerAscii(u.getScheme()); + final String h2 = HostOps.lowerRegNamePreserveIPv6(u.getHost()); + + final boolean canNormalizePath = u.getHost() != null || u.getPath() != null && u.getPath().startsWith("/"); + final String p0 = canNormalizePath ? DotSegments.remove(u.getPath()) : u.getPath(); + + final String p1 = PercentCodec.decode(p0, true); + final String p2 = PercentCodec.uppercaseHexInPercents(p1); + final String p3 = PercentCodec.encodeStrictPath(p2); + + final String q1 = u.getQuery() == null ? null + : PercentCodec.uppercaseHexInPercents(PercentCodec.decode(u.getQuery(), true)); + final String f1 = u.getFragment() == null ? null + : PercentCodec.uppercaseHexInPercents(PercentCodec.decode(u.getFragment(), true)); + + return rebuild(s2, u.getUserInfo(), h2, u.getPort(), p3, q1, f1); + } + + private static Rfc3986Uri rebuild(final String s, final String ui, final String h, final int port, + final String p, final String q, final String f) { + final String raw = Rfc3986Renderer.build(s, ui, h, port, p, q, f); + return new Rfc3986Uri(raw, s, ui, h, port, p, q, f); + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Parser.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Parser.java new file mode 100644 index 000000000..f5f4b1c3f --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Parser.java @@ -0,0 +1,182 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.uris; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.net.uri.Rfc3986Uri; +import org.apache.hc.core5.net.uri.internal.authorities.Ports; +import org.apache.hc.core5.net.uri.internal.utils.Ascii; +import org.apache.hc.core5.util.Args; +import org.apache.hc.core5.util.Tokenizer; + +/** + * Parser for {@link Rfc3986Uri}. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class Rfc3986Parser { + + private Rfc3986Parser() { + } + + public static Rfc3986Uri parse(final String s) { + Args.notNull(s, "URI must not be null"); + + final char[] buf = s.toCharArray(); + final Tokenizer.Cursor cur = new Tokenizer.Cursor(0, buf.length); + + String scheme = null, userInfo = null, host = null, path = "", query = null, fragment = null; + int port = -1; + + // ---- scheme ---- ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) ":" + final int schemeEnd = scanScheme(buf, cur.getPos(), cur.getUpperBound()); + if (schemeEnd >= 0 && schemeEnd < buf.length && buf[schemeEnd] == ':') { + scheme = Ascii.lowerAscii(s.substring(cur.getPos(), schemeEnd)); + cur.updatePos(schemeEnd + 1); // skip ':' + } else { + cur.updatePos(0); // no scheme + } + + // ---- authority ---- "//" [userinfo "@"] host [ ":" port ] + if (cur.getPos() + 1 < buf.length && buf[cur.getPos()] == '/' && buf[cur.getPos() + 1] == '/') { + cur.updatePos(cur.getPos() + 2); // skip "//" + final int authStart = cur.getPos(); + final int authEnd = scanUntil(buf, authStart, buf.length, '/', '?', '#'); + + final int at = indexOf(buf, '@', authStart, authEnd); + final int hostStart; + if (at >= 0) { + userInfo = s.substring(authStart, at); + hostStart = at + 1; + } else { + hostStart = authStart; + } + if (hostStart >= authEnd) { + throw new IllegalArgumentException("Empty host in authority"); + } + + if (buf[hostStart] == '[') { + final int rb = indexOf(buf, ']', hostStart + 1, authEnd); + if (rb < 0) { + throw new IllegalArgumentException("Unclosed IPv6 literal"); + } + host = s.substring(hostStart, rb + 1); // keep literal verbatim + if (rb + 1 < authEnd && buf[rb + 1] == ':') { + port = Ports.parsePort(buf, rb + 2, authEnd); + } + } else { + final int colon = lastIndexOf(buf, ':', hostStart, authEnd); + if (colon >= 0) { + host = Ascii.lowerAscii(s.substring(hostStart, colon)); + port = Ports.parsePort(buf, colon + 1, authEnd); + } else { + host = Ascii.lowerAscii(s.substring(hostStart, authEnd)); + } + } + cur.updatePos(authEnd); + } + + // ---- path ---- + final int pathStart = cur.getPos(); + final int pathEnd = scanUntil(buf, pathStart, buf.length, '?', '#'); + path = s.substring(pathStart, pathEnd); + cur.updatePos(pathEnd); + + // ---- query ---- + if (cur.getPos() < buf.length && buf[cur.getPos()] == '?') { + final int qStart = cur.getPos() + 1; + final int qEnd = scanUntil(buf, qStart, buf.length, '#'); + query = s.substring(qStart, qEnd); + cur.updatePos(qEnd); + } + + // ---- fragment ---- + if (cur.getPos() < buf.length && buf[cur.getPos()] == '#') { + fragment = s.substring(cur.getPos() + 1); + cur.updatePos(buf.length); + } + + return new Rfc3986Uri(s, scheme, userInfo, host, port, path, query, fragment); + } + + + private static int scanScheme(final char[] a, final int from, final int toExcl) { + int finalFrom = from; + if (from >= toExcl) { + return -1; + } + if (!Ascii.isAlpha(a[from])) { + return -1; + } + finalFrom++; + while (finalFrom < toExcl) { + final char c = a[finalFrom]; + if (Ascii.isAlpha(c) || Ascii.isDigit(c) || c == '+' || c == '-' || c == '.') { + finalFrom++; + } else { + break; + } + } + return finalFrom; // caller checks for ':' + } + + private static int scanUntil(final char[] a, final int from, final int toExcl, final char... stops) { + outer: + for (int i = from; i < toExcl; i++) { + final char c = a[i]; + for (final char s : stops) { + if (c == s) { + return i; + } + } + } + return toExcl; + } + + private static int indexOf(final char[] a, final char ch, final int from, final int toExcl) { + for (int i = from; i < toExcl; i++) { + if (a[i] == ch) { + return i; + } + } + return -1; + } + + private static int lastIndexOf(final char[] a, final char ch, final int from, final int toExcl) { + for (int i = toExcl - 1; i >= from; i--) { + if (a[i] == ch) { + return i; + } + } + return -1; + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Renderer.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Renderer.java new file mode 100644 index 000000000..5606ca9eb --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Renderer.java @@ -0,0 +1,95 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.uris; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; + +/** + * Pre-sized StringBuilder renderer of URI components. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +final class Rfc3986Renderer { + private Rfc3986Renderer() { + } + + static String build(final String scheme, final String userInfo, final String host, final int port, + final String path, final String query, final String fragment) { + int cap = 0; + if (scheme != null) { + cap += scheme.length() + 1; + } + if (host != null) { + cap += 2 + host.length(); + if (userInfo != null) { + cap += userInfo.length() + 1; + } + if (port >= 0) { + cap += 6; + } + } + if (path != null) { + cap += path.length(); + } + if (query != null) { + cap += 1 + query.length(); + } + if (fragment != null) { + cap += 1 + fragment.length(); + } + + final StringBuilder sb = new StringBuilder(Math.max(16, cap)); + if (scheme != null) { + sb.append(scheme).append(':'); + } + if (host != null) { + sb.append("//"); + if (userInfo != null) { + sb.append(userInfo).append('@'); + } + sb.append(host); + if (port >= 0) { + sb.append(':').append(port); + } + } + if (path != null) { + sb.append(path); + } + if (query != null) { + sb.append('?').append(query); + } + if (fragment != null) { + sb.append('#').append(fragment); + } + return sb.toString(); + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Resolver.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Resolver.java new file mode 100644 index 000000000..aac96b30d --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Resolver.java @@ -0,0 +1,88 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.uris; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.Internal; +import org.apache.hc.core5.annotation.ThreadingBehavior; +import org.apache.hc.core5.net.uri.Rfc3986Uri; +import org.apache.hc.core5.net.uri.internal.paths.DotSegments; +import org.apache.hc.core5.util.Args; + +/** + * Resolver per RFC 3986 §5.2. + * + * @since 5.4 + */ +@Internal +@Contract(threading = ThreadingBehavior.STATELESS) +public final class Rfc3986Resolver { + + private Rfc3986Resolver() { + } + + public static Rfc3986Uri resolve(final Rfc3986Uri base, final Rfc3986Uri ref) { + Args.notNull(base, "base"); + Args.notNull(ref, "ref"); + + if (ref.getScheme() != null) { + final String p = DotSegments.remove(ref.getPath()); + return new Rfc3986Uri(ref.toString(), ref.getScheme(), ref.getUserInfo(), ref.getHost(), ref.getPort(), p, ref.getQuery(), ref.getFragment()); + } + if (ref.getHost() != null) { + final String p = DotSegments.remove(ref.getPath()); + return new Rfc3986Uri(ref.toString(), base.getScheme(), ref.getUserInfo(), ref.getHost(), ref.getPort(), p, ref.getQuery(), ref.getFragment()); + } + + final String mergedPath; + if (ref.getPath() == null || ref.getPath().isEmpty()) { + mergedPath = base.getPath(); + } else if (ref.getPath().startsWith("/")) { + mergedPath = DotSegments.remove(ref.getPath()); + } else { + mergedPath = DotSegments.remove(mergePaths(base, ref.getPath())); + } + + final String q = ref.getPath() == null || ref.getPath().isEmpty() + ? ref.getQuery() != null ? ref.getQuery() : base.getQuery() + : ref.getQuery(); + + return new Rfc3986Uri(base.toString(), base.getScheme(), base.getUserInfo(), base.getHost(), base.getPort(), mergedPath, q, ref.getFragment()); + } + + private static String mergePaths(final Rfc3986Uri base, final String relPath) { + if (base.getHost() != null && (base.getPath() == null || base.getPath().isEmpty())) { + return "/" + relPath; + } + final int slash = base.getPath().lastIndexOf('/'); + if (slash >= 0) { + return base.getPath().substring(0, slash + 1) + relPath; + } + return relPath; + } +} diff --git a/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/utils/Ascii.java b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/utils/Ascii.java new file mode 100644 index 000000000..70882a709 --- /dev/null +++ b/httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/utils/Ascii.java @@ -0,0 +1,204 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.hc.core5.net.uri.internal.utils; + +import java.util.Arrays; + +import org.apache.hc.core5.annotation.Contract; +import org.apache.hc.core5.annotation.ThreadingBehavior; + +/** + * Branch-light ASCII helpers (no regex, no Character classes). + * + * @since 5.4 + */ +@Contract(threading = ThreadingBehavior.STATELESS) +public final class Ascii { + + // Bit flags for the CLASS table + private static final byte ALPHA = 1 << 0; + private static final byte DIGIT = 1 << 1; + private static final byte HEXMASK = 1 << 2; + private static final byte GENDELIM = 1 << 3; + private static final byte SUBDELIM = 1 << 4; + private static final byte UNRESERVED = 1 << 5; + + /** + * Per-ASCII classification flags. + */ + private static final byte[] CLASS = new byte[128]; + + /** + * ASCII → hex value (0..15) or -1 if not a hex digit. + */ + private static final byte[] HEX_VALUE = new byte[128]; + + static { + // init HEX_VALUE = -1 + Arrays.fill(HEX_VALUE, (byte) -1); + + // ALPHA + unreserved + for (int c = 'A'; c <= 'Z'; c++) { + CLASS[c] |= ALPHA | UNRESERVED; + } + for (int c = 'a'; c <= 'z'; c++) { + CLASS[c] |= ALPHA | UNRESERVED; + } + + // DIGIT + unreserved + hex values 0..9 + for (int c = '0'; c <= '9'; c++) { + CLASS[c] |= DIGIT | UNRESERVED | HEXMASK; + HEX_VALUE[c] = (byte) (c - '0'); + } + + // Hex A..F / a..f → 10..15 + for (int c = 'A'; c <= 'F'; c++) { + CLASS[c] |= HEXMASK; + HEX_VALUE[c] = (byte) (10 + (c - 'A')); + } + for (int c = 'a'; c <= 'f'; c++) { + CLASS[c] |= HEXMASK; + HEX_VALUE[c] = (byte) (10 + (c - 'a')); + } + + // unreserved punctuation - . _ ~ + CLASS['-'] |= UNRESERVED; + CLASS['.'] |= UNRESERVED; + CLASS['_'] |= UNRESERVED; + CLASS['~'] |= UNRESERVED; + + // gen-delims : / ? # [ ] @ + CLASS[':'] |= GENDELIM; + CLASS['/'] |= GENDELIM; + CLASS['?'] |= GENDELIM; + CLASS['#'] |= GENDELIM; + CLASS['['] |= GENDELIM; + CLASS[']'] |= GENDELIM; + CLASS['@'] |= GENDELIM; + + // sub-delims ! $ & ' ( ) * + , ; = + CLASS['!'] |= SUBDELIM; + CLASS['$'] |= SUBDELIM; + CLASS['&'] |= SUBDELIM; + CLASS['\''] |= SUBDELIM; + CLASS['('] |= SUBDELIM; + CLASS[')'] |= SUBDELIM; + CLASS['*'] |= SUBDELIM; + CLASS['+'] |= SUBDELIM; + CLASS[','] |= SUBDELIM; + CLASS[';'] |= SUBDELIM; + CLASS['='] |= SUBDELIM; + } + + private Ascii() { + } + + /** + * @return {@code true} if {@code c} is 7-bit US-ASCII. + */ + public static boolean isAscii(final int c) { + return (c & ~0x7F) == 0; + } + + public static boolean isAlpha(final int c) { + return isAscii(c) && (CLASS[c] & ALPHA) != 0; + } + + public static boolean isDigit(final int c) { + return isAscii(c) && (CLASS[c] & DIGIT) != 0; + } + + public static boolean isUnreserved(final int c) { + return isAscii(c) && (CLASS[c] & UNRESERVED) != 0; + } + + /** + * Hex value for ASCII hex char; returns -1 if not hex. + * Accepts '0'..'9','A'..'F','a'..'f'. + */ + public static int hexValue(final int c) { + return isAscii(c) ? HEX_VALUE[c] : -1; + } + + /** + * Alias requested by user code. + */ + public static int hexVal(final int c) { + return hexValue(c); + } + + /** + * Converts a 4-bit nibble (0..15) to uppercase hex ASCII ('0'..'9','A'..'F'). + * + * @throws IllegalArgumentException if value is outside 0..15 + */ + public static char toHexUpper(final int nibble) { + if ((nibble & ~0xF) != 0) { + throw new IllegalArgumentException("nibble out of range: " + nibble); + } + return (char) (nibble < 10 ? ('0' + nibble) : ('A' + (nibble - 10))); + } + + /** + * ASCII-lowercase conversion that avoids locale effects and allocations + * when the input is already lowercase. + */ + public static String lowerAscii(final String s) { + if (s == null) { + return null; + } + final int n = s.length(); + for (int i = 0; i < n; i++) { + final char ch = s.charAt(i); + if (ch >= 'A' && ch <= 'Z') { + final char[] a = s.toCharArray(); + for (int j = i; j < n; j++) { + final char cj = a[j]; + if (cj >= 'A' && cj <= 'Z') { + a[j] = (char) (cj + 0x20); + } + } + return new String(a); + } + } + return s; + } + + /** + * Fast scan helpers (used by parsers). + */ + public static int indexOf(final char[] a, final char ch, final int from, final int toExcl) { + for (int i = from; i < toExcl; i++) { + if (a[i] == ch) { + return i; + } + } + return -1; + } + +} \ No newline at end of file diff --git a/httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriRfcExamplesTest.java b/httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriRfcExamplesTest.java new file mode 100644 index 000000000..e607d22c3 --- /dev/null +++ b/httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriRfcExamplesTest.java @@ -0,0 +1,167 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ +package org.apache.hc.core5.net.uri; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.hc.core5.net.uri.internal.paths.DotSegments; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * RFC 3986 compliance tests for our Rfc3986Uri implementation. + * Includes the resolution matrix and dot-segment removal examples straight from §5.2 / §5.4. + * + * @since 5.6 + */ +@DisplayName("RFC 3986 – Resolution & Dot-Segment Removal Examples") +final class Rfc3986UriRfcExamplesTest { + + private static Rfc3986Uri U(final String s) { + return Rfc3986Uri.parse(s); + } + + @Test + @DisplayName("§5.4.1 Resolution Examples (base: http://a/b/c/d;p?q)") + void resolutionMatrix() { + final Rfc3986Uri base = U("http://a/b/c/d;p?q"); + + // Absolute and net-path + assertEquals("g:h", Rfc3986Uri.resolve(base, U("g:h")).toString()); + assertEquals("http://a/b/c/g", Rfc3986Uri.resolve(base, U("g")).toString()); + assertEquals("http://a/b/c/g", Rfc3986Uri.resolve(base, U("./g")).toString()); + assertEquals("http://a/b/c/g/", Rfc3986Uri.resolve(base, U("g/")).toString()); + assertEquals("http://a/g", Rfc3986Uri.resolve(base, U("/g")).toString()); + assertEquals("http://g", Rfc3986Uri.resolve(base, U("//g")).toString()); + + // Query & fragment interactions + assertEquals("http://a/b/c/d;p?y", Rfc3986Uri.resolve(base, U("?y")).toString()); + assertEquals("http://a/b/c/g?y", Rfc3986Uri.resolve(base, U("g?y")).toString()); + assertEquals("http://a/b/c/d;p?q#s", Rfc3986Uri.resolve(base, U("#s")).toString()); // fragment-only keeps base query + assertEquals("http://a/b/c/g#s", Rfc3986Uri.resolve(base, U("g#s")).toString()); + assertEquals("http://a/b/c/g;x", Rfc3986Uri.resolve(base, U("g;x")).toString()); + assertEquals("http://a/b/c/g;x?y#s", Rfc3986Uri.resolve(base, U("g;x?y#s")).toString()); + + // No-op and dot-segments + assertEquals("http://a/b/c/d;p?q", Rfc3986Uri.resolve(base, U("")).toString()); + assertEquals("http://a/b/c/", Rfc3986Uri.resolve(base, U(".")).toString()); + assertEquals("http://a/b/c/", Rfc3986Uri.resolve(base, U("./")).toString()); + assertEquals("http://a/b/", Rfc3986Uri.resolve(base, U("..")).toString()); + assertEquals("http://a/b/", Rfc3986Uri.resolve(base, U("../")).toString()); + assertEquals("http://a/b/g", Rfc3986Uri.resolve(base, U("../g")).toString()); + assertEquals("http://a/", Rfc3986Uri.resolve(base, U("../..")).toString()); + assertEquals("http://a/", Rfc3986Uri.resolve(base, U("../../")).toString()); + assertEquals("http://a/g", Rfc3986Uri.resolve(base, U("../../g")).toString()); + } + + @Test + @DisplayName("§5.2.4 Dot-Segment Removal – canonical examples") + void dotSegmentRemovalExamples() { + // Examples adapted from RFC table (input -> expected) + assertEquals("/a/b/c/./../../g", "/a/b/c/./../../g"); // sanity on source + assertEquals("/a/g", DotSegments.remove("/a/b/c/./../../g")); + + // Trailing slash preservation for "." and ".." per §5.2.4 + assertEquals("/a/b/c/", DotSegments.remove("/a/b/c/.")); // keep trailing slash + assertEquals("/a/b/", DotSegments.remove("/a/b/c/..")); // keep trailing slash + + // Leading and internal edge cases + assertEquals("/", DotSegments.remove("/.")); // root with trailing slash + assertEquals("/", DotSegments.remove("/..")); // cannot go above root + assertEquals("", DotSegments.remove("")); // empty stays empty + assertEquals("..", DotSegments.remove("..")); // relative upward kept in relative paths + assertEquals("../x", DotSegments.remove("../x")); + assertEquals("a/b", DotSegments.remove("a/b")); + assertEquals("/a//b/", DotSegments.remove("/a//b/")); // double slash preserved structurally + } + + @Test + @DisplayName("Fragment-only reference: keep base query, replace fragment") + void fragmentOnlyKeepsQuery() { + final Rfc3986Uri base = U("http://a/b/c/d;p?q"); + assertEquals("http://a/b/c/d;p?q#frag", Rfc3986Uri.resolve(base, U("#frag")).toString()); + } + + @Test + @DisplayName("Relative-path merge when base has authority and empty path") + void mergeWhenBasePathEmpty() { + final Rfc3986Uri base = U("http://a?q"); + // base path is empty; merging a relative path must prefix with "/" (§5.2.3) + assertEquals("http://a/g", Rfc3986Uri.resolve(base, U("g")).toString()); + assertEquals("http://a/g/h", Rfc3986Uri.resolve(base, U("g/h")).toString()); + } + + @Test + @DisplayName("Equivalence: case-insensitive scheme/host, unreserved decoding, uppercased pct-hex") + void equivalenceNormalization() { + final Rfc3986Uri a = U("HTTP://EXAMPLE.COM/%7euser"); + final Rfc3986Uri b = U("http://example.com/~user"); + assertTrue(a.equivalentTo(b)); + + final Rfc3986Uri c = U("http://www.example.com/%3c"); + final Rfc3986Uri d = U("http://www.example.com/%3C"); + assertTrue(c.equivalentTo(d)); + } + + @Test + @DisplayName("IPv6 literal host parsing is preserved with brackets") + void ipv6LiteralAuthority() { + final Rfc3986Uri u = U("http://[2001:db8::1]:8080/a"); + assertEquals("[2001:db8::1]", u.getHost()); + assertEquals(8080, u.getPort()); + assertEquals("/a", u.getPath()); + assertEquals("http://[2001:db8::1]:8080/a", u.toString()); + } + + @Test + @DisplayName("Percent-encoded ampersand in path is preserved (HTTPCLIENT-1995 class of bugs)") + void encodedAmpersandInPathPreserved() { + final Rfc3986Uri u = U("http://example.com/a%26b/c"); + assertEquals("/a%26b/c", u.getPath()); + assertEquals("http://example.com/a%26b/c", u.toString()); + } + + @Test + @DisplayName("Round-trip: canonical form may lowercase host") + void rawRoundTrip() { + final String s = "scheme://user:pass@Host.EXAMPLE:1234/a/%7e/b;c;d?p=%26#Frag"; + final Rfc3986Uri u = Rfc3986Uri.parse(s); + + assertEquals("scheme", u.getScheme()); + assertEquals("user:pass", u.getUserInfo()); + assertEquals("host.example", u.getHost()); // normalized + assertEquals(1234, u.getPort()); + assertEquals("/a/%7e/b;c;d", u.getPath()); + assertEquals("p=%26", u.getQuery()); + assertEquals("Frag", u.getFragment()); + + // expect canonical serialization (host lower-cased) + final String expected = "scheme://user:pass@host.example:1234/a/%7e/b;c;d?p=%26#Frag"; + assertEquals(expected, u.toString()); + } +} diff --git a/httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriTest.java b/httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriTest.java new file mode 100644 index 000000000..e1977d3cf --- /dev/null +++ b/httpcore5/src/test/java/org/apache/hc/core5/net/uri/Rfc3986UriTest.java @@ -0,0 +1,116 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ +package org.apache.hc.core5.net.uri; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.apache.hc.core5.net.uri.internal.paths.DotSegments; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +@DisplayName("RFC 3986 URI – parsing, normalization, and resolution") +final class Rfc3986UriTest { + + @Test + void parseAbsoluteHttp() { + final Rfc3986Uri u = Rfc3986Uri.parse("http://user:pass@example.com:8080/a/b/c%20d?x=1&y=2#frag"); + assertEquals("http", u.getScheme()); + assertEquals("user:pass", u.getUserInfo()); + assertEquals("example.com", u.getHost()); + assertEquals(8080, u.getPort()); + assertEquals("/a/b/c%20d", u.getPath()); + assertEquals("x=1&y=2", u.getQuery()); + assertEquals("frag", u.getFragment()); + assertEquals("http://user:pass@example.com:8080/a/b/c%20d?x=1&y=2#frag", u.toString()); + } + + @Test + void preservePercentEncodedAmpersandInPath() { + final Rfc3986Uri u = Rfc3986Uri.parse("http://example.com/a%26b/c"); + // Guard for HTTPCLIENT-1995 type regressions + assertEquals("/a%26b/c", u.getPath()); + } + + @Test + void equivalenceWithUnreservedDecoding() { + final Rfc3986Uri a = Rfc3986Uri.parse("HTTP://EXAMPLE.COM/%7euser"); + final Rfc3986Uri b = Rfc3986Uri.parse("http://example.com/~user"); + assertTrue(a.equivalentTo(b)); + } + + @Test + void parseIpv6Literal() { + final Rfc3986Uri u = Rfc3986Uri.parse("http://[2001:db8::1]/a"); + assertEquals("[2001:db8::1]", u.getHost()); + assertEquals("/a", u.getPath()); + } + + @Test + void parseAuthorityLessRelative() { + final Rfc3986Uri u = Rfc3986Uri.parse("a/b?c#d"); + assertNull(u.getHost()); + assertEquals("a/b", u.getPath()); + assertEquals("c", u.getQuery()); + assertEquals("d", u.getFragment()); + } + + @Test + void dotSegmentRemoval() { + final String in = "/a/b/c/./../../g"; + final String out = DotSegments.remove(in); + assertEquals("/a/g", out); + } + + @Test + void resolveExamplesFromRfc() { + final Rfc3986Uri base = Rfc3986Uri.parse("http://a/b/c/d;p?q"); + + assertEquals("g:h", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("g:h")).toString()); + assertEquals("http://a/b/c/g", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("g")).toString()); + assertEquals("http://a/b/c/g", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("./g")).toString()); + assertEquals("http://a/b/c/g/", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("g/")).toString()); + assertEquals("http://a/g", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("/g")).toString()); + assertEquals("http://g", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("//g")).toString()); + assertEquals("http://a/b/c/d;p?y", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("?y")).toString()); + assertEquals("http://a/b/c/g?y", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("g?y")).toString()); + assertEquals("http://a/b/c/d;p?q#s", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("#s")).toString()); + assertEquals("http://a/b/c/g#s", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("g#s")).toString()); + assertEquals("http://a/b/c/g;x", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("g;x")).toString()); + assertEquals("http://a/b/c/g;x?y#s", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("g;x?y#s")).toString()); + assertEquals("http://a/b/c/d;p?q", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("")).toString()); + assertEquals("http://a/b/c/", Rfc3986Uri.resolve(base, Rfc3986Uri.parse(".")).toString()); + assertEquals("http://a/b/c/", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("./")).toString()); + assertEquals("http://a/b/", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("..")).toString()); + assertEquals("http://a/b/", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("../")).toString()); + assertEquals("http://a/b/g", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("../g")).toString()); + assertEquals("http://a/", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("../..")).toString()); + assertEquals("http://a/", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("../../")).toString()); + assertEquals("http://a/g", Rfc3986Uri.resolve(base, Rfc3986Uri.parse("../../g")).toString()); + } +}