From c5ed3947c4b0996162701b021db24b2758ed9880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volkan=20Yaz=C4=B1c=C4=B1?= Date: Fri, 20 Jun 2025 12:50:43 +0200 Subject: [PATCH 1/4] Rename `JavaLangAccess::*NoRepl` methods --- .../classes/java/io/DataInputStream.java | 2 +- .../share/classes/java/lang/String.java | 22 +++++++++---------- .../share/classes/java/lang/System.java | 22 +++++++++++-------- .../share/classes/java/math/BigDecimal.java | 2 +- .../share/classes/java/nio/file/Files.java | 4 ++-- .../share/classes/java/util/HexFormat.java | 12 +++++----- .../share/classes/java/util/UUID.java | 2 +- .../share/classes/java/util/zip/ZipCoder.java | 10 ++++----- .../jdk/internal/access/JavaLangAccess.java | 12 +++++----- .../unix/classes/sun/nio/fs/UnixPath.java | 2 +- 10 files changed, 47 insertions(+), 43 deletions(-) diff --git a/src/java.base/share/classes/java/io/DataInputStream.java b/src/java.base/share/classes/java/io/DataInputStream.java index daf75b1318f6e..ffb5ac70780f1 100644 --- a/src/java.base/share/classes/java/io/DataInputStream.java +++ b/src/java.base/share/classes/java/io/DataInputStream.java @@ -599,7 +599,7 @@ public static final String readUTF(DataInput in) throws IOException { if (ascii == utflen) { String str; if (trusted) { - str = JLA.uncheckedNewStringNoRepl(bytearr, StandardCharsets.ISO_8859_1); + str = JLA.uncheckedNewString(bytearr, StandardCharsets.ISO_8859_1); } else { str = new String(bytearr, 0, utflen, StandardCharsets.ISO_8859_1); } diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index eac8a1355b7f2..4a031ece8caca 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -696,7 +696,7 @@ private static String decode(Charset charset, byte[] bytes, int offset, int leng * {@code false} if the byte array can be exclusively used to construct * the string and is not modified or used for any other purpose. */ - static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean noShare) { + static String newStringUTF8NoReplacementChar(byte[] bytes, int offset, int length, boolean noShare) { checkBoundsOffCount(offset, length, bytes.length); if (length == 0) { return ""; @@ -759,11 +759,11 @@ static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean return new String(dst, UTF16); } - static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException { + static String newString(byte[] src, Charset cs) throws CharacterCodingException { try { - return newStringNoRepl1(src, cs); + return newStringNoReplacementChar(src, cs); } catch (IllegalArgumentException e) { - //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause + // newStringNoReplacementChar throws IAE with MalformedInputException or CCE as the cause Throwable cause = e.getCause(); if (cause instanceof MalformedInputException mie) { throw mie; @@ -772,13 +772,13 @@ static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingExce } } - private static String newStringNoRepl1(byte[] src, Charset cs) { + private static String newStringNoReplacementChar(byte[] src, Charset cs) { int len = src.length; if (len == 0) { return ""; } if (cs == UTF_8.INSTANCE) { - return newStringUTF8NoRepl(src, 0, src.length, false); + return newStringUTF8NoReplacementChar(src, 0, src.length, false); } if (cs == ISO_8859_1.INSTANCE) { if (COMPACT_STRINGS) @@ -916,7 +916,7 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool /* * Throws iae, instead of replacing, if unmappable. */ - static byte[] getBytesUTF8NoRepl(String s) { + static byte[] getBytesUTF8NoReplacementChar(String s) { return encodeUTF8(s.coder(), s.value(), false); } @@ -927,11 +927,11 @@ private static boolean isASCII(byte[] src) { /* * Throws CCE, instead of replacing, if unmappable. */ - static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException { + static byte[] getBytes(String s, Charset cs) throws CharacterCodingException { try { - return getBytesNoRepl1(s, cs); + return getBytesNoReplacementChar(s, cs); } catch (IllegalArgumentException e) { - //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause + // getBytesNoReplacementChar throws IAE with UnmappableCharacterException or CCE as the cause Throwable cause = e.getCause(); if (cause instanceof UnmappableCharacterException) { throw (UnmappableCharacterException)cause; @@ -940,7 +940,7 @@ static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingExcepti } } - private static byte[] getBytesNoRepl1(String s, Charset cs) { + private static byte[] getBytesNoReplacementChar(String s, Charset cs) { byte[] val = s.value(); byte coder = s.coder(); if (cs == UTF_8.INSTANCE) { diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index 0175558d31348..dfa8b06d22c57 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -55,7 +55,6 @@ import java.util.ResourceBundle; import java.util.Set; import java.util.concurrent.Executor; -import java.util.concurrent.ScheduledExecutorService; import java.util.function.Supplier; import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Stream; @@ -2121,28 +2120,33 @@ public Stream layers(ClassLoader loader) { public int countPositives(byte[] bytes, int offset, int length) { return StringCoding.countPositives(bytes, offset, length); } + public int countNonZeroAscii(String s) { return StringCoding.countNonZeroAscii(s); } - public String uncheckedNewStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException { - return String.newStringNoRepl(bytes, cs); + + public String uncheckedNewString(byte[] bytes, Charset cs) throws CharacterCodingException { + return String.newString(bytes, cs); } + public char uncheckedGetUTF16Char(byte[] bytes, int index) { return StringUTF16.getChar(bytes, index); } + public void uncheckedPutCharUTF16(byte[] bytes, int index, int ch) { StringUTF16.putChar(bytes, index, ch); } - public byte[] uncheckedGetBytesNoRepl(String s, Charset cs) throws CharacterCodingException { - return String.getBytesNoRepl(s, cs); + + public byte[] uncheckedGetBytes(String s, Charset cs) throws CharacterCodingException { + return String.getBytes(s, cs); } - public String newStringUTF8NoRepl(byte[] bytes, int off, int len) { - return String.newStringUTF8NoRepl(bytes, off, len, true); + public String newStringUTF8NoReplacementChar(byte[] bytes, int off, int len) { + return String.newStringUTF8NoReplacementChar(bytes, off, len, true); } - public byte[] getBytesUTF8NoRepl(String s) { - return String.getBytesUTF8NoRepl(s); + public byte[] getBytesUTF8NoReplacementChar(String s) { + return String.getBytesUTF8NoReplacementChar(s); } public void inflateBytesToChars(byte[] src, int srcOff, char[] dst, int dstOff, int len) { diff --git a/src/java.base/share/classes/java/math/BigDecimal.java b/src/java.base/share/classes/java/math/BigDecimal.java index 534f840174cde..aba7bfd8a5692 100644 --- a/src/java.base/share/classes/java/math/BigDecimal.java +++ b/src/java.base/share/classes/java/math/BigDecimal.java @@ -4145,7 +4145,7 @@ private String layoutChars(boolean sci) { buf[highIntSize] = '.'; DecimalDigits.uncheckedPutPairLatin1(buf, highIntSize + 1, lowInt); try { - return JLA.uncheckedNewStringNoRepl(buf, StandardCharsets.ISO_8859_1); + return JLA.uncheckedNewString(buf, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } diff --git a/src/java.base/share/classes/java/nio/file/Files.java b/src/java.base/share/classes/java/nio/file/Files.java index f8278fa2642f2..aaf7398cdd234 100644 --- a/src/java.base/share/classes/java/nio/file/Files.java +++ b/src/java.base/share/classes/java/nio/file/Files.java @@ -3043,7 +3043,7 @@ public static String readString(Path path, Charset cs) throws IOException { byte[] ba = readAllBytes(path); if (path.getClass().getModule() != Object.class.getModule()) ba = ba.clone(); - return JLA.uncheckedNewStringNoRepl(ba, cs); + return JLA.uncheckedNewString(ba, cs); } /** @@ -3362,7 +3362,7 @@ public static Path writeString(Path path, CharSequence csq, Charset cs, OpenOpti Objects.requireNonNull(csq); Objects.requireNonNull(cs); - byte[] bytes = JLA.uncheckedGetBytesNoRepl(String.valueOf(csq), cs); + byte[] bytes = JLA.uncheckedGetBytes(String.valueOf(csq), cs); if (path.getClass().getModule() != Object.class.getModule()) bytes = bytes.clone(); write(path, bytes, options); diff --git a/src/java.base/share/classes/java/util/HexFormat.java b/src/java.base/share/classes/java/util/HexFormat.java index 99d047995fdb9..87c3cd4960b90 100644 --- a/src/java.base/share/classes/java/util/HexFormat.java +++ b/src/java.base/share/classes/java/util/HexFormat.java @@ -462,7 +462,7 @@ private String formatOptDelimiter(byte[] bytes, int fromIndex, int toIndex) { } try { // Return a new string using the bytes without making a copy - return jla.uncheckedNewStringNoRepl(rep, StandardCharsets.ISO_8859_1); + return jla.uncheckedNewString(rep, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } @@ -696,7 +696,7 @@ public String toHexDigits(byte value) { rep[0] = (byte)toHighHexDigit(value); rep[1] = (byte)toLowHexDigit(value); try { - return jla.uncheckedNewStringNoRepl(rep, StandardCharsets.ISO_8859_1); + return jla.uncheckedNewString(rep, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } @@ -732,7 +732,7 @@ public String toHexDigits(short value) { rep[3] = (byte)toLowHexDigit((byte)value); try { - return jla.uncheckedNewStringNoRepl(rep, StandardCharsets.ISO_8859_1); + return jla.uncheckedNewString(rep, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } @@ -760,7 +760,7 @@ public String toHexDigits(int value) { rep[7] = (byte)toLowHexDigit((byte)value); try { - return jla.uncheckedNewStringNoRepl(rep, StandardCharsets.ISO_8859_1); + return jla.uncheckedNewString(rep, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } @@ -796,7 +796,7 @@ public String toHexDigits(long value) { rep[15] = (byte)toLowHexDigit((byte)value); try { - return jla.uncheckedNewStringNoRepl(rep, StandardCharsets.ISO_8859_1); + return jla.uncheckedNewString(rep, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } @@ -824,7 +824,7 @@ public String toHexDigits(long value, int digits) { value = value >>> 4; } try { - return jla.uncheckedNewStringNoRepl(rep, StandardCharsets.ISO_8859_1); + return jla.uncheckedNewString(rep, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } diff --git a/src/java.base/share/classes/java/util/UUID.java b/src/java.base/share/classes/java/util/UUID.java index 5961fce9cb23c..425423d2746d9 100644 --- a/src/java.base/share/classes/java/util/UUID.java +++ b/src/java.base/share/classes/java/util/UUID.java @@ -480,7 +480,7 @@ public String toString() { ByteArrayLittleEndian.setLong(buf, 28, hex8(leastSigBits)); try { - return jla.uncheckedNewStringNoRepl(buf, StandardCharsets.ISO_8859_1); + return jla.uncheckedNewString(buf, StandardCharsets.ISO_8859_1); } catch (CharacterCodingException cce) { throw new AssertionError(cce); } diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index 0c3282e351841..c35250c1704f1 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -252,12 +252,12 @@ boolean isUTF8() { @Override String toString(byte[] ba, int off, int length) { - return JLA.newStringUTF8NoRepl(ba, off, length); + return JLA.newStringUTF8NoReplacementChar(ba, off, length); } @Override byte[] getBytes(String s) { - return JLA.getBytesUTF8NoRepl(s); + return JLA.getBytesUTF8NoReplacementChar(s); } @Override @@ -271,9 +271,9 @@ int checkedHash(byte[] a, int off, int len) throws Exception { // Non-ASCII, fall back to decoding a String // We avoid using decoder() here since the UTF8ZipCoder is // shared and that decoder is not thread safe. - // We use the JLA.newStringUTF8NoRepl variant to throw + // We use the JLA.newStringUTF8NoReplacementChar variant to throw // exceptions eagerly when opening ZipFiles - return hash(JLA.newStringUTF8NoRepl(a, off, len)); + return hash(JLA.newStringUTF8NoReplacementChar(a, off, len)); } int h = ArraysSupport.hashCodeOfUnsigned(a, off, len, 0); if (a[end - 1] != '/') { @@ -289,7 +289,7 @@ private boolean hasTrailingSlash(byte[] a, int end) { @Override byte compare(String str, byte[] b, int off, int len, boolean matchDirectory) { try { - byte[] encoded = JLA.uncheckedGetBytesNoRepl(str, UTF_8.INSTANCE); + byte[] encoded = JLA.uncheckedGetBytes(str, UTF_8.INSTANCE); int mismatch = Arrays.mismatch(encoded, 0, encoded.length, b, off, off+len); if (mismatch == -1) { return EXACT_MATCH; diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index e8343274caca7..4af401588679c 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -325,7 +325,7 @@ public interface JavaLangAccess { * @return the newly created string * @throws CharacterCodingException for malformed or unmappable bytes */ - String uncheckedNewStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException; + String uncheckedNewString(byte[] bytes, Charset cs) throws CharacterCodingException; /** * Encode the given string into a sequence of bytes using the specified @@ -343,7 +343,7 @@ public interface JavaLangAccess { * @return the encoded bytes * @throws CharacterCodingException for malformed input or unmappable characters */ - byte[] uncheckedGetBytesNoRepl(String s, Charset cs) throws CharacterCodingException; + byte[] uncheckedGetBytes(String s, Charset cs) throws CharacterCodingException; /** * Returns a new string by decoding from the given UTF-8 bytes array. @@ -353,7 +353,7 @@ public interface JavaLangAccess { * @return the newly created string * @throws IllegalArgumentException for malformed or unmappable bytes. */ - String newStringUTF8NoRepl(byte[] bytes, int off, int len); + String newStringUTF8NoReplacementChar(byte[] bytes, int off, int len); /** * Get the {@code char} at {@code index} in a {@code byte[]} in internal @@ -379,13 +379,13 @@ public interface JavaLangAccess { void uncheckedPutCharUTF16(byte[] bytes, int index, int ch); /** - * Encode the given string into a sequence of bytes using utf8. + * Encode the given string into a sequence of bytes using UTF-8. * * @param s the string to encode - * @return the encoded bytes in utf8 + * @return the encoded bytes in UTF-8 * @throws IllegalArgumentException for malformed surrogates */ - byte[] getBytesUTF8NoRepl(String s); + byte[] getBytesUTF8NoReplacementChar(String s); /** * Inflated copy from {@code byte[]} to {@code char[]}, as defined by diff --git a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java index 5dfc73f57aaed..31f3ba2aad202 100644 --- a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java +++ b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java @@ -126,7 +126,7 @@ private static String normalize(String input, int len, int off) { private static byte[] encode(UnixFileSystem fs, String input) { input = fs.normalizeNativePath(input); try { - return JLA.uncheckedGetBytesNoRepl(input, Util.jnuEncoding()); + return JLA.uncheckedGetBytes(input, Util.jnuEncoding()); } catch (CharacterCodingException cce) { throw new InvalidPathException(input, "Malformed input or input contains unmappable characters"); From b4845109e1851ae9fd1bc4443e5d4b735c9430d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volkan=20Yaz=C4=B1c=C4=B1?= Date: Thu, 17 Jul 2025 09:27:16 +0200 Subject: [PATCH 2/4] Convert IAE-throwing methods into CCE-throwing ones --- .../share/classes/java/lang/String.java | 127 ++++++++++-------- .../share/classes/java/lang/System.java | 8 +- .../share/classes/java/util/zip/ZipCoder.java | 16 ++- .../jdk/internal/access/JavaLangAccess.java | 8 +- 4 files changed, 90 insertions(+), 69 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 4a031ece8caca..6facb713b5c87 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -557,7 +557,7 @@ private String(Charset charset, byte[] bytes, int offset, int length) { if (length == 0) { str = ""; } else if (charset == UTF_8.INSTANCE) { - str = utf8(bytes, offset, length); + str = utf8ThrowingIae(bytes, offset, length); } else if (charset == ISO_8859_1.INSTANCE) { str = iso88591(bytes, offset, length); } else if (charset == US_ASCII.INSTANCE) { @@ -568,7 +568,35 @@ private String(Charset charset, byte[] bytes, int offset, int length) { this(str); } - private static String utf8(byte[] bytes, int offset, int length) { + private static String utf8ThrowingIae(byte[] bytes, int offset, int length) { + try { + return utf8(bytes, offset, length); + } catch (CharacterCodingException cce) { + throw cce2iae(cce); + } + } + + private static IllegalArgumentException cce2iae(CharacterCodingException cce) { + Throwable cause = cce.getCause(); + // If the CCE is caused by an IAE, it implies that IAE is injected by + // us to provide more context into CCE. Try swapping them to obtain an + // IAE caused by a CCE. + if (cause instanceof IllegalArgumentException iae) { + if (cce instanceof MalformedInputException mie) { + return new IllegalArgumentException( + iae.getMessage(), + new MalformedInputException(mie.getInputLength())); + } + if (cce instanceof UnmappableCharacterException uce) { + return new IllegalArgumentException( + iae.getMessage(), + new UnmappableCharacterException(uce.getInputLength())); + } + } + return new IllegalArgumentException(cce); + } + + private static String utf8(byte[] bytes, int offset, int length) throws CharacterCodingException { if (COMPACT_STRINGS) { int dp = StringCoding.countPositives(bytes, offset, length); if (dp == length) { @@ -696,7 +724,7 @@ private static String decode(Charset charset, byte[] bytes, int offset, int leng * {@code false} if the byte array can be exclusively used to construct * the string and is not modified or used for any other purpose. */ - static String newStringUTF8NoReplacementChar(byte[] bytes, int offset, int length, boolean noShare) { + static String newStringUTF8(byte[] bytes, int offset, int length, boolean noShare) throws CharacterCodingException { checkBoundsOffCount(offset, length, bytes.length); if (length == 0) { return ""; @@ -760,25 +788,12 @@ static String newStringUTF8NoReplacementChar(byte[] bytes, int offset, int lengt } static String newString(byte[] src, Charset cs) throws CharacterCodingException { - try { - return newStringNoReplacementChar(src, cs); - } catch (IllegalArgumentException e) { - // newStringNoReplacementChar throws IAE with MalformedInputException or CCE as the cause - Throwable cause = e.getCause(); - if (cause instanceof MalformedInputException mie) { - throw mie; - } - throw (CharacterCodingException)cause; - } - } - - private static String newStringNoReplacementChar(byte[] src, Charset cs) { int len = src.length; if (len == 0) { return ""; } if (cs == UTF_8.INSTANCE) { - return newStringUTF8NoReplacementChar(src, 0, src.length, false); + return newStringUTF8(src, 0, src.length, false); } if (cs == ISO_8859_1.INSTANCE) { if (COMPACT_STRINGS) @@ -806,13 +821,7 @@ private static String newStringNoReplacementChar(byte[] src, Charset cs) { } int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; - int caLen; - try { - caLen = decodeWithDecoder(cd, ca, src, 0, src.length); - } catch (CharacterCodingException x) { - // throw via IAE - throw new IllegalArgumentException(x); - } + int caLen = decodeWithDecoder(cd, ca, src, 0, src.length); if (COMPACT_STRINGS) { byte[] val = StringUTF16.compress(ca, 0, caLen); byte coder = StringUTF16.coderFromArrayLen(val, caLen); @@ -847,7 +856,7 @@ private static Charset lookupCharset(String csn) throws UnsupportedEncodingExcep } } - private static byte[] encode(Charset cs, byte coder, byte[] val) { + private static byte[] encode(Charset cs, byte coder, byte[] val) throws CharacterCodingException { if (cs == UTF_8.INSTANCE) { return encodeUTF8(coder, val, true); } @@ -860,7 +869,7 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) { return encodeWithEncoder(cs, coder, val, true); } - private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) { + private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) throws CharacterCodingException { CharsetEncoder ce = cs.newEncoder(); int len = val.length >> coder; // assume LATIN1=0/UTF16=1; int en = scale(len, ce.maxBytesPerChar()); @@ -905,7 +914,7 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool cr.throwException(); } catch (CharacterCodingException x) { if (!doReplace) { - throw new IllegalArgumentException(x); + throw x; } else { throw new Error(x); } @@ -916,7 +925,7 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool /* * Throws iae, instead of replacing, if unmappable. */ - static byte[] getBytesUTF8NoReplacementChar(String s) { + static byte[] getBytesUTF8(String s) throws CharacterCodingException { return encodeUTF8(s.coder(), s.value(), false); } @@ -928,19 +937,6 @@ private static boolean isASCII(byte[] src) { * Throws CCE, instead of replacing, if unmappable. */ static byte[] getBytes(String s, Charset cs) throws CharacterCodingException { - try { - return getBytesNoReplacementChar(s, cs); - } catch (IllegalArgumentException e) { - // getBytesNoReplacementChar throws IAE with UnmappableCharacterException or CCE as the cause - Throwable cause = e.getCause(); - if (cause instanceof UnmappableCharacterException) { - throw (UnmappableCharacterException)cause; - } - throw (CharacterCodingException)cause; - } - } - - private static byte[] getBytesNoReplacementChar(String s, Charset cs) { byte[] val = s.value(); byte coder = s.coder(); if (cs == UTF_8.INSTANCE) { @@ -1005,11 +1001,11 @@ private static void replaceNegatives(byte[] val, int fromIndex) { } } - private static byte[] encode8859_1(byte coder, byte[] val) { + private static byte[] encode8859_1(byte coder, byte[] val) throws UnmappableCharacterException { return encode8859_1(coder, val, true); } - private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) { + private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) throws UnmappableCharacterException { if (coder == LATIN1) { return val.clone(); } @@ -1118,7 +1114,8 @@ private static int decode4(int b1, int b2, int b3, int b4) { ((byte) 0x80 << 0)))); } - private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) { + private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) + throws CharacterCodingException { while (sp < sl) { int b1 = src[sp++]; if (b1 >= 0) { @@ -1259,27 +1256,31 @@ private static int malformed4(byte[] src, int sp) { return 3; } - private static void throwMalformed(int off, int nb) { + private static void throwMalformed(int off, int nb) throws MalformedInputException { + MalformedInputException mie = new MalformedInputException(nb); String msg = "malformed input off : " + off + ", length : " + nb; - throw new IllegalArgumentException(msg, new MalformedInputException(nb)); + mie.initCause(new IllegalArgumentException(msg)); + throw mie; } - private static void throwMalformed(byte[] val) { + private static void throwMalformed(byte[] val) throws MalformedInputException { int dp = StringCoding.countPositives(val, 0, val.length); throwMalformed(dp, 1); } - private static void throwUnmappable(int off) { + private static void throwUnmappable(int off) throws UnmappableCharacterException { + UnmappableCharacterException uce = new UnmappableCharacterException(1); String msg = "malformed input off : " + off + ", length : 1"; - throw new IllegalArgumentException(msg, new UnmappableCharacterException(1)); + uce.initCause(new IllegalArgumentException(msg, uce)); + throw uce; } - private static void throwUnmappable(byte[] val) { + private static void throwUnmappable(byte[] val) throws UnmappableCharacterException { int dp = StringCoding.countPositives(val, 0, val.length); throwUnmappable(dp); } - private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { + private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) throws UnmappableCharacterException { if (coder == UTF16) { return encodeUTF8_UTF16(val, doReplace); } @@ -1304,7 +1305,7 @@ private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) { return Arrays.copyOf(dst, dp); } - private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { + private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) throws UnmappableCharacterException { int dp = 0; int sp = 0; int sl = val.length >> 1; @@ -1369,7 +1370,7 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) { * @param val UTF16 encoded byte array * @param doReplace true to replace unmappable characters */ - private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) { + private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) throws UnmappableCharacterException { long dp = 0L; int sp = 0; int sl = val.length >> 1; @@ -1823,7 +1824,11 @@ public void getBytes(int srcBegin, int srcEnd, byte[] dst, int dstBegin) { */ public byte[] getBytes(String charsetName) throws UnsupportedEncodingException { - return encode(lookupCharset(charsetName), coder(), value); + try { + return encode(lookupCharset(charsetName), coder(), value); + } catch (CharacterCodingException uce) { + throw cce2iae(uce); + } } /** @@ -1846,8 +1851,12 @@ public byte[] getBytes(String charsetName) */ public byte[] getBytes(Charset charset) { if (charset == null) throw new NullPointerException(); - return encode(charset, coder(), value); - } + try { + return encode(charset, coder(), value); + } catch (CharacterCodingException uce) { + throw cce2iae(uce); + } + } /** * Encodes this {@code String} into a sequence of bytes using the @@ -1864,7 +1873,11 @@ public byte[] getBytes(Charset charset) { * @since 1.1 */ public byte[] getBytes() { - return encode(Charset.defaultCharset(), coder(), value); + try { + return encode(Charset.defaultCharset(), coder(), value); + } catch (CharacterCodingException uce) { + throw cce2iae(uce); + } } boolean bytesCompatible(Charset charset) { diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index dfa8b06d22c57..1c5300e269b10 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2141,12 +2141,12 @@ public byte[] uncheckedGetBytes(String s, Charset cs) throws CharacterCodingExce return String.getBytes(s, cs); } - public String newStringUTF8NoReplacementChar(byte[] bytes, int off, int len) { - return String.newStringUTF8NoReplacementChar(bytes, off, len, true); + public String newStringUTF8(byte[] bytes, int off, int len) throws CharacterCodingException { + return String.newStringUTF8(bytes, off, len, true); } - public byte[] getBytesUTF8NoReplacementChar(String s) { - return String.getBytesUTF8NoReplacementChar(s); + public byte[] getBytesUTF8(String s) throws CharacterCodingException { + return String.getBytesUTF8(s); } public void inflateBytesToChars(byte[] src, int srcOff, char[] dst, int dstOff, int len) { diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index c35250c1704f1..be7e6e46ae547 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -252,12 +252,20 @@ boolean isUTF8() { @Override String toString(byte[] ba, int off, int length) { - return JLA.newStringUTF8NoReplacementChar(ba, off, length); + try { + return JLA.newStringUTF8(ba, off, length); + } catch (CharacterCodingException cce) { + throw new IllegalArgumentException(cce); + } } @Override byte[] getBytes(String s) { - return JLA.getBytesUTF8NoReplacementChar(s); + try { + return JLA.getBytesUTF8(s); + } catch (CharacterCodingException cce) { + throw new IllegalArgumentException(cce); + } } @Override @@ -271,9 +279,9 @@ int checkedHash(byte[] a, int off, int len) throws Exception { // Non-ASCII, fall back to decoding a String // We avoid using decoder() here since the UTF8ZipCoder is // shared and that decoder is not thread safe. - // We use the JLA.newStringUTF8NoReplacementChar variant to throw + // We use the JLA.newStringUTF8 variant to throw // exceptions eagerly when opening ZipFiles - return hash(JLA.newStringUTF8NoReplacementChar(a, off, len)); + return hash(JLA.newStringUTF8(a, off, len)); } int h = ArraysSupport.hashCodeOfUnsigned(a, off, len, 0); if (a[end - 1] != '/') { diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index 4af401588679c..5d605f9ba56c3 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -353,7 +353,7 @@ public interface JavaLangAccess { * @return the newly created string * @throws IllegalArgumentException for malformed or unmappable bytes. */ - String newStringUTF8NoReplacementChar(byte[] bytes, int off, int len); + String newStringUTF8(byte[] bytes, int off, int len) throws CharacterCodingException; /** * Get the {@code char} at {@code index} in a {@code byte[]} in internal @@ -379,13 +379,13 @@ public interface JavaLangAccess { void uncheckedPutCharUTF16(byte[] bytes, int index, int ch); /** - * Encode the given string into a sequence of bytes using UTF-8. + * Encode the given string into a sequence of bytes using utf-8. * * @param s the string to encode - * @return the encoded bytes in UTF-8 + * @return the encoded bytes in utf-8 * @throws IllegalArgumentException for malformed surrogates */ - byte[] getBytesUTF8NoReplacementChar(String s); + byte[] getBytesUTF8(String s) throws CharacterCodingException; /** * Inflated copy from {@code byte[]} to {@code char[]}, as defined by From 10cb72cf17bd400d2408e71b453ead21b674791e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volkan=20Yaz=C4=B1c=C4=B1?= Date: Thu, 24 Jul 2025 10:49:51 +0200 Subject: [PATCH 3/4] Improve docs of touched methods and add NPE checks --- .../share/classes/java/lang/String.java | 47 ++++++++++++++----- .../jdk/internal/access/JavaLangAccess.java | 32 ++++++------- 2 files changed, 52 insertions(+), 27 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 6facb713b5c87..36d9b5ad06aa5 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -717,14 +717,22 @@ private static String decode(Charset charset, byte[] bytes, int offset, int leng } /* - * Throws iae, instead of replacing, if malformed or unmappable. + * {@return a new string by decoding from the given UTF-8 bytes array} * + * @param offset the index of the first byte to decode + * @param length the number of bytes to decode * @param noShare * {@code true} if the resulting string MUST NOT share the byte array, * {@code false} if the byte array can be exclusively used to construct * the string and is not modified or used for any other purpose. + * @throws NullPointerException If {@code bytes} is null + * @throws StringIndexOutOfBoundsException If {@code offset} is negative, + * {@code length} is negative, or {@code offset} is greater than + * {@code bytes.length - length} + * @throws CharacterCodingException for malformed input or unmappable characters */ static String newStringUTF8(byte[] bytes, int offset, int length, boolean noShare) throws CharacterCodingException { + Objects.requireNonNull(bytes, "bytes"); checkBoundsOffCount(offset, length, bytes.length); if (length == 0) { return ""; @@ -922,10 +930,15 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool return trimArray(ba, bb.position()); } - /* - * Throws iae, instead of replacing, if unmappable. + /** + * {@return the sequence of bytes obtained by encoding the given string in UTF-8} + * + * @param s the string to encode + * @throws NullPointerException If {@code s} is null + * @throws CharacterCodingException For malformed input or unmappable characters */ static byte[] getBytesUTF8(String s) throws CharacterCodingException { + Objects.requireNonNull(s, "s"); return encodeUTF8(s.coder(), s.value(), false); } @@ -933,10 +946,22 @@ private static boolean isASCII(byte[] src) { return !StringCoding.hasNegatives(src, 0, src.length); } - /* - * Throws CCE, instead of replacing, if unmappable. + /** + * {@return the sequence of bytes obtained by encoding the given string in + * the specified {@linkplain java.nio.charset.Charset charset}} + *

+ * WARNING: This method returns the {@code byte[]} backing the provided + * {@code String}, if the input is ASCII. Hence, the returned byte array + * must not be modified. + * + * @param s the string to encode + * @param cs the charset + * @throws NullPointerException If {@code s} or {@code cs} is null + * @throws CharacterCodingException For malformed input or unmappable characters */ static byte[] getBytes(String s, Charset cs) throws CharacterCodingException { + Objects.requireNonNull(s, "s"); + Objects.requireNonNull(cs, "cs"); byte[] val = s.value(); byte coder = s.coder(); if (cs == UTF_8.INSTANCE) { @@ -1826,8 +1851,8 @@ public byte[] getBytes(String charsetName) throws UnsupportedEncodingException { try { return encode(lookupCharset(charsetName), coder(), value); - } catch (CharacterCodingException uce) { - throw cce2iae(uce); + } catch (CharacterCodingException cce) { + throw cce2iae(cce); } } @@ -1853,8 +1878,8 @@ public byte[] getBytes(Charset charset) { if (charset == null) throw new NullPointerException(); try { return encode(charset, coder(), value); - } catch (CharacterCodingException uce) { - throw cce2iae(uce); + } catch (CharacterCodingException cce) { + throw cce2iae(cce); } } @@ -1875,8 +1900,8 @@ public byte[] getBytes(Charset charset) { public byte[] getBytes() { try { return encode(Charset.defaultCharset(), coder(), value); - } catch (CharacterCodingException uce) { - throw cce2iae(uce); + } catch (CharacterCodingException cce) { + throw cce2iae(cce); } } diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index 5d605f9ba56c3..44c1bfa819b99 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -328,32 +328,32 @@ public interface JavaLangAccess { String uncheckedNewString(byte[] bytes, Charset cs) throws CharacterCodingException; /** - * Encode the given string into a sequence of bytes using the specified - * {@linkplain java.nio.charset.Charset charset}. + * {@return the sequence of bytes obtained by encoding the given string in + * the specified {@linkplain java.nio.charset.Charset charset}} *

* WARNING: This method returns the {@code byte[]} backing the provided * {@code String}, if the input is ASCII. Hence, the returned byte array * must not be modified. - *

- * This method throws {@code CharacterCodingException} instead of replacing - * when malformed input or unmappable characters are encountered. * * @param s the string to encode * @param cs the charset - * @return the encoded bytes - * @throws CharacterCodingException for malformed input or unmappable characters + * @throws NullPointerException If {@code s} or {@code cs} is null + * @throws CharacterCodingException For malformed input or unmappable characters */ byte[] uncheckedGetBytes(String s, Charset cs) throws CharacterCodingException; /** - * Returns a new string by decoding from the given UTF-8 bytes array. + * {@return a new string by decoding from the given UTF-8 bytes array} * - * @param off the index of the first byte to decode - * @param len the number of bytes to decode - * @return the newly created string - * @throws IllegalArgumentException for malformed or unmappable bytes. + * @param offset the index of the first byte to decode + * @param length the number of bytes to decode + * @throws NullPointerException If {@code bytes} is null + * @throws StringIndexOutOfBoundsException If {@code offset} is negative, + * {@code length} is negative, or {@code offset} is greater than + * {@code bytes.length - length} + * @throws CharacterCodingException for malformed input or unmappable characters */ - String newStringUTF8(byte[] bytes, int off, int len) throws CharacterCodingException; + String newStringUTF8(byte[] bytes, int offset, int length) throws CharacterCodingException; /** * Get the {@code char} at {@code index} in a {@code byte[]} in internal @@ -379,11 +379,11 @@ public interface JavaLangAccess { void uncheckedPutCharUTF16(byte[] bytes, int index, int ch); /** - * Encode the given string into a sequence of bytes using utf-8. + * {@return the sequence of bytes obtained by encoding the given string in UTF-8} * * @param s the string to encode - * @return the encoded bytes in utf-8 - * @throws IllegalArgumentException for malformed surrogates + * @throws NullPointerException If {@code s} is null + * @throws CharacterCodingException For malformed input or unmappable characters */ byte[] getBytesUTF8(String s) throws CharacterCodingException; From 5f555a684e2d50519d9b10def137eca16da4b695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Volkan=20Yaz=C4=B1c=C4=B1?= Date: Fri, 25 Jul 2025 10:08:14 +0200 Subject: [PATCH 4/4] Replace `requireNonNull` with implicit null checks --- src/java.base/share/classes/java/lang/String.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 36d9b5ad06aa5..6b5f309aad299 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -732,8 +732,7 @@ private static String decode(Charset charset, byte[] bytes, int offset, int leng * @throws CharacterCodingException for malformed input or unmappable characters */ static String newStringUTF8(byte[] bytes, int offset, int length, boolean noShare) throws CharacterCodingException { - Objects.requireNonNull(bytes, "bytes"); - checkBoundsOffCount(offset, length, bytes.length); + checkBoundsOffCount(offset, length, bytes.length); // Implicit null check on `bytes` if (length == 0) { return ""; } @@ -938,8 +937,7 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool * @throws CharacterCodingException For malformed input or unmappable characters */ static byte[] getBytesUTF8(String s) throws CharacterCodingException { - Objects.requireNonNull(s, "s"); - return encodeUTF8(s.coder(), s.value(), false); + return encodeUTF8(s.coder(), s.value(), false); // Implicit null check on `s` } private static boolean isASCII(byte[] src) {