diff --git a/src/java.base/share/classes/java/io/DataInputStream.java b/src/java.base/share/classes/java/io/DataInputStream.java index daf75b1318f6e..f5447226a68ce 100644 --- a/src/java.base/share/classes/java/io/DataInputStream.java +++ b/src/java.base/share/classes/java/io/DataInputStream.java @@ -599,7 +599,7 @@ public static final String readUTF(DataInput in) throws IOException { if (ascii == utflen) { String str; if (trusted) { - str = JLA.uncheckedNewStringNoRepl(bytearr, StandardCharsets.ISO_8859_1); + str = JLA.uncheckedNewStringNoReplacement(bytearr, StandardCharsets.ISO_8859_1); } else { str = new String(bytearr, 0, utflen, StandardCharsets.ISO_8859_1); } diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index eac8a1355b7f2..6255d1dfd7657 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -604,14 +604,14 @@ private static String utf8(byte[] bytes, int offset, int length) { } byte[] utf16 = StringUTF16.newBytesFor(length); StringLatin1.inflate(latin1, 0, utf16, 0, dp); - dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp, true); + dp = decodeUTF8_UTF16(latin1, sp, length, utf16, dp); if (dp != length) { utf16 = Arrays.copyOf(utf16, dp << 1); } return new String(utf16, UTF16); } else { // !COMPACT_STRINGS byte[] dst = StringUTF16.newBytesFor(length); - int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, true); + int dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0); if (dp != length) { dst = Arrays.copyOf(dst, dp << 1); } @@ -689,15 +689,22 @@ private static String decode(Charset charset, byte[] bytes, int offset, int leng } /* - * Throws iae, instead of replacing, if malformed or unmappable. + * {@return a new string by decoding from the given UTF-8 bytes array} * + * @param offset the index of the first byte to decode + * @param length the number of bytes to decode * @param noShare * {@code true} if the resulting string MUST NOT share the byte array, * {@code false} if the byte array can be exclusively used to construct * the string and is not modified or used for any other purpose. + * @throws NullPointerException If {@code bytes} is null + * @throws StringIndexOutOfBoundsException If {@code offset} is negative, + * {@code length} is negative, or {@code offset} is greater than + * {@code bytes.length - length} + * @throws CharacterCodingException for malformed input or unmappable characters */ - static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean noShare) { - checkBoundsOffCount(offset, length, bytes.length); + static String newStringUTF8NoReplacement(byte[] bytes, int offset, int length, boolean noShare) throws CharacterCodingException { + checkBoundsOffCount(offset, length, bytes.length); // Implicit null check on `bytes` if (length == 0) { return ""; } @@ -748,10 +755,10 @@ static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean StringLatin1.inflate(dst, 0, buf, 0, dp); dst = buf; } - dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false); + dp = decodeUTF8_UTF16NoReplacement(bytes, offset, sl, dst, dp); } else { // !COMPACT_STRINGS dst = StringUTF16.newBytesFor(length); - dp = decodeUTF8_UTF16(bytes, offset, offset + length, dst, 0, false); + dp = decodeUTF8_UTF16NoReplacement(bytes, offset, offset + length, dst, 0); } if (dp != length) { dst = Arrays.copyOf(dst, dp << 1); @@ -759,26 +766,13 @@ static String newStringUTF8NoRepl(byte[] bytes, int offset, int length, boolean return new String(dst, UTF16); } - static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException { - try { - return newStringNoRepl1(src, cs); - } catch (IllegalArgumentException e) { - //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause - Throwable cause = e.getCause(); - if (cause instanceof MalformedInputException mie) { - throw mie; - } - throw (CharacterCodingException)cause; - } - } - - private static String newStringNoRepl1(byte[] src, Charset cs) { + static String newStringNoReplacement(byte[] src, Charset cs) throws CharacterCodingException { int len = src.length; if (len == 0) { return ""; } if (cs == UTF_8.INSTANCE) { - return newStringUTF8NoRepl(src, 0, src.length, false); + return newStringUTF8NoReplacement(src, 0, src.length, false); } if (cs == ISO_8859_1.INSTANCE) { if (COMPACT_STRINGS) @@ -791,7 +785,7 @@ private static String newStringNoRepl1(byte[] src, Charset cs) { return new String(src, LATIN1); return new String(StringLatin1.inflate(src, 0, src.length), UTF16); } else { - throwMalformed(src); + throw malformedInputException(src); } } @@ -806,13 +800,7 @@ private static String newStringNoRepl1(byte[] src, Charset cs) { } int en = scale(len, cd.maxCharsPerByte()); char[] ca = new char[en]; - int caLen; - try { - caLen = decodeWithDecoder(cd, ca, src, 0, src.length); - } catch (CharacterCodingException x) { - // throw via IAE - throw new IllegalArgumentException(x); - } + int caLen = decodeWithDecoder(cd, ca, src, 0, src.length); if (COMPACT_STRINGS) { byte[] val = StringUTF16.compress(ca, 0, caLen); byte coder = StringUTF16.coderFromArrayLen(val, caLen); @@ -849,7 +837,7 @@ private static Charset lookupCharset(String csn) throws UnsupportedEncodingExcep private static byte[] encode(Charset cs, byte coder, byte[] val) { if (cs == UTF_8.INSTANCE) { - return encodeUTF8(coder, val, true); + return encodeUTF8(coder, val); } if (cs == ISO_8859_1.INSTANCE) { return encode8859_1(coder, val); @@ -857,15 +845,15 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) { if (cs == US_ASCII.INSTANCE) { return encodeASCII(coder, val); } - return encodeWithEncoder(cs, coder, val, true); + return encodeWithEncoder(cs, coder, val); } - private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) { + private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val) { CharsetEncoder ce = cs.newEncoder(); int len = val.length >> coder; // assume LATIN1=0/UTF16=1; int en = scale(len, ce.maxBytesPerChar()); - // fastpath with ArrayEncoder implies `doReplace`. - if (doReplace && ce instanceof ArrayEncoder ae) { + // Fast-path with `ArrayEncoder` implies replacement. + if (ce instanceof ArrayEncoder ae) { // fastpath for ascii compatible if (coder == LATIN1 && ae.isASCIICompatible() && @@ -888,10 +876,8 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool if (len == 0) { return ba; } - if (doReplace) { - ce.onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE); - } + ce.onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) : StringUTF16.toChars(val); ByteBuffer bb = ByteBuffer.wrap(ba); @@ -904,67 +890,87 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool if (!cr.isUnderflow()) cr.throwException(); } catch (CharacterCodingException x) { - if (!doReplace) { - throw new IllegalArgumentException(x); - } else { - throw new Error(x); - } + throw new Error(x); } return trimArray(ba, bb.position()); } - /* - * Throws iae, instead of replacing, if unmappable. + private static byte[] encodeWithEncoderNoReplacement(Charset cs, byte coder, byte[] val) throws CharacterCodingException { + CharsetEncoder ce = cs.newEncoder(); + int len = val.length >> coder; // assume LATIN1=0/UTF16=1; + int en = scale(len, ce.maxBytesPerChar()); + byte[] ba = new byte[en]; + if (len == 0) { + return ba; + } + char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val) + : StringUTF16.toChars(val); + ByteBuffer bb = ByteBuffer.wrap(ba); + CharBuffer cb = CharBuffer.wrap(ca, 0, len); + CoderResult cr = ce.encode(cb, bb, true); + if (!cr.isUnderflow()) + cr.throwException(); + cr = ce.flush(bb); + if (!cr.isUnderflow()) + cr.throwException(); + return trimArray(ba, bb.position()); + } + + /** + * {@return the sequence of bytes obtained by encoding the given string in UTF-8} + * + * @param s the string to encode + * @throws NullPointerException If {@code s} is null + * @throws CharacterCodingException For malformed input or unmappable characters */ - static byte[] getBytesUTF8NoRepl(String s) { - return encodeUTF8(s.coder(), s.value(), false); + static byte[] getBytesUTF8NoReplacement(String s) throws CharacterCodingException { + return encodeUTF8NoReplacement(s.coder(), s.value()); // Implicit null check on `s` } private static boolean isASCII(byte[] src) { return !StringCoding.hasNegatives(src, 0, src.length); } - /* - * Throws CCE, instead of replacing, if unmappable. + /** + * {@return the sequence of bytes obtained by encoding the given string in + * the specified {@linkplain java.nio.charset.Charset charset}} + *
+ * WARNING: This method returns the {@code byte[]} backing the provided
+ * {@code String}, if the input is ASCII. Hence, the returned byte array
+ * must not be modified.
+ *
+ * @param s the string to encode
+ * @param cs the charset
+ * @throws NullPointerException If {@code s} or {@code cs} is null
+ * @throws CharacterCodingException For malformed input or unmappable characters
*/
- static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
- try {
- return getBytesNoRepl1(s, cs);
- } catch (IllegalArgumentException e) {
- //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
- Throwable cause = e.getCause();
- if (cause instanceof UnmappableCharacterException) {
- throw (UnmappableCharacterException)cause;
- }
- throw (CharacterCodingException)cause;
- }
- }
-
- private static byte[] getBytesNoRepl1(String s, Charset cs) {
+ static byte[] getBytesNoReplacement(String s, Charset cs) throws CharacterCodingException {
+ Objects.requireNonNull(s, "s");
+ Objects.requireNonNull(cs, "cs");
byte[] val = s.value();
byte coder = s.coder();
if (cs == UTF_8.INSTANCE) {
if (coder == LATIN1 && isASCII(val)) {
return val;
}
- return encodeUTF8(coder, val, false);
+ return encodeUTF8NoReplacement(coder, val);
}
if (cs == ISO_8859_1.INSTANCE) {
if (coder == LATIN1) {
return val;
}
- return encode8859_1(coder, val, false);
+ return encode8859_1NoReplacement(coder, val);
}
if (cs == US_ASCII.INSTANCE) {
if (coder == LATIN1) {
if (isASCII(val)) {
return val;
} else {
- throwUnmappable(val);
+ throw unmappableCharacterException(val);
}
}
}
- return encodeWithEncoder(cs, coder, val, false);
+ return encodeWithEncoderNoReplacement(cs, coder, val);
}
private static byte[] encodeASCII(byte coder, byte[] val) {
@@ -1006,10 +1012,6 @@ private static void replaceNegatives(byte[] val, int fromIndex) {
}
private static byte[] encode8859_1(byte coder, byte[] val) {
- return encode8859_1(coder, val, true);
- }
-
- private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
if (coder == LATIN1) {
return val.clone();
}
@@ -1023,9 +1025,6 @@ private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
sp = sp + ret;
dp = dp + ret;
if (ret != len) {
- if (!doReplace) {
- throwUnmappable(sp);
- }
char c = StringUTF16.getChar(val, sp++);
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
@@ -1041,6 +1040,28 @@ private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
return Arrays.copyOf(dst, dp);
}
+ private static byte[] encode8859_1NoReplacement(byte coder, byte[] val) throws UnmappableCharacterException {
+ if (coder == LATIN1) {
+ return val.clone();
+ }
+ int len = val.length >> 1;
+ byte[] dst = new byte[len];
+ int dp = 0;
+ int sp = 0;
+ while (sp < len) {
+ int ret = StringCoding.implEncodeISOArray(val, sp, dst, dp, len);
+ sp = sp + ret;
+ dp = dp + ret;
+ if (ret != len) {
+ throw unmappableCharacterException(sp);
+ }
+ }
+ if (dp == dst.length) {
+ return dst;
+ }
+ return Arrays.copyOf(dst, dp);
+ }
+
//------------------------------ utf8 ------------------------------------
/**
@@ -1118,7 +1139,7 @@ private static int decode4(int b1, int b2, int b3, int b4) {
((byte) 0x80 << 0))));
}
- private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
+ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp) {
while (sp < sl) {
int b1 = src[sp++];
if (b1 >= 0) {
@@ -1127,9 +1148,6 @@ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int
if (sp < sl) {
int b2 = src[sp++];
if (isNotContinuation(b2)) {
- if (!doReplace) {
- throwMalformed(sp - 1, 1);
- }
StringUTF16.putChar(dst, dp++, REPL);
sp--;
} else {
@@ -1137,9 +1155,6 @@ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int
}
continue;
}
- if (!doReplace) {
- throwMalformed(sp, 1); // underflow()
- }
StringUTF16.putChar(dst, dp++, REPL);
break;
} else if ((b1 >> 4) == -2) {
@@ -1147,18 +1162,12 @@ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int
int b2 = src[sp++];
int b3 = src[sp++];
if (isMalformed3(b1, b2, b3)) {
- if (!doReplace) {
- throwMalformed(sp - 3, 3);
- }
StringUTF16.putChar(dst, dp++, REPL);
sp -= 3;
sp += malformed3(src, sp);
} else {
char c = decode3(b1, b2, b3);
if (Character.isSurrogate(c)) {
- if (!doReplace) {
- throwMalformed(sp - 3, 3);
- }
StringUTF16.putChar(dst, dp++, REPL);
} else {
StringUTF16.putChar(dst, dp++, c);
@@ -1167,15 +1176,9 @@ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int
continue;
}
if (sp < sl && isMalformed3_2(b1, src[sp])) {
- if (!doReplace) {
- throwMalformed(sp - 1, 2);
- }
StringUTF16.putChar(dst, dp++, REPL);
continue;
}
- if (!doReplace) {
- throwMalformed(sp, 1);
- }
StringUTF16.putChar(dst, dp++, REPL);
break;
} else if ((b1 >> 3) == -2) {
@@ -1186,9 +1189,6 @@ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int
int uc = decode4(b1, b2, b3, b4);
if (isMalformed4(b2, b3, b4) ||
!Character.isSupplementaryCodePoint(uc)) { // shortest form check
- if (!doReplace) {
- throwMalformed(sp - 4, 4);
- }
StringUTF16.putChar(dst, dp++, REPL);
sp -= 4;
sp += malformed4(src, sp);
@@ -1200,15 +1200,9 @@ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int
}
b1 &= 0xff;
if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
- if (!doReplace) {
- throwMalformed(sp - 1, 1); // or 2
- }
StringUTF16.putChar(dst, dp++, REPL);
continue;
}
- if (!doReplace) {
- throwMalformed(sp - 1, 1);
- }
sp++;
StringUTF16.putChar(dst, dp++, REPL);
if (sp < sl && isMalformed4_3(src[sp])) {
@@ -1216,15 +1210,76 @@ private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int
}
break;
} else {
- if (!doReplace) {
- throwMalformed(sp - 1, 1);
- }
StringUTF16.putChar(dst, dp++, REPL);
}
}
return dp;
}
+ private static int decodeUTF8_UTF16NoReplacement(byte[] src, int sp, int sl, byte[] dst, int dp)
+ throws CharacterCodingException {
+ while (sp < sl) {
+ int b1 = src[sp++];
+ if (b1 >= 0) {
+ StringUTF16.putChar(dst, dp++, (char) b1);
+ } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+ if (sp < sl) {
+ int b2 = src[sp++];
+ if (isNotContinuation(b2)) {
+ throw malformedInputException(sp - 1, 1);
+ } else {
+ StringUTF16.putChar(dst, dp++, decode2(b1, b2));
+ }
+ continue;
+ }
+ throw malformedInputException(sp, 1); // underflow()
+ } else if ((b1 >> 4) == -2) {
+ if (sp + 1 < sl) {
+ int b2 = src[sp++];
+ int b3 = src[sp++];
+ if (isMalformed3(b1, b2, b3)) {
+ throw malformedInputException(sp - 3, 3);
+ } else {
+ char c = decode3(b1, b2, b3);
+ if (Character.isSurrogate(c)) {
+ throw malformedInputException(sp - 3, 3);
+ } else {
+ StringUTF16.putChar(dst, dp++, c);
+ }
+ }
+ continue;
+ }
+ if (sp < sl && isMalformed3_2(b1, src[sp])) {
+ throw malformedInputException(sp - 1, 2);
+ }
+ throw malformedInputException(sp, 1);
+ } else if ((b1 >> 3) == -2) {
+ if (sp + 2 < sl) {
+ int b2 = src[sp++];
+ int b3 = src[sp++];
+ int b4 = src[sp++];
+ int uc = decode4(b1, b2, b3, b4);
+ if (isMalformed4(b2, b3, b4) ||
+ !Character.isSupplementaryCodePoint(uc)) { // shortest form check
+ throw malformedInputException(sp - 4, 4);
+ } else {
+ StringUTF16.putChar(dst, dp++, Character.highSurrogate(uc));
+ StringUTF16.putChar(dst, dp++, Character.lowSurrogate(uc));
+ }
+ continue;
+ }
+ b1 &= 0xff;
+ if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
+ throw malformedInputException(sp - 1, 1); // or 2
+ }
+ throw malformedInputException(sp - 1, 1);
+ } else {
+ throw malformedInputException(sp - 1, 1);
+ }
+ }
+ return dp;
+ }
+
private static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length)
throws CharacterCodingException {
ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
@@ -1259,29 +1314,58 @@ private static int malformed4(byte[] src, int sp) {
return 3;
}
- private static void throwMalformed(int off, int nb) {
+ private static MalformedInputException malformedInputException(int off, int nb) {
+ MalformedInputException mie = new MalformedInputException(nb);
String msg = "malformed input off : " + off + ", length : " + nb;
- throw new IllegalArgumentException(msg, new MalformedInputException(nb));
+ mie.initCause(new IllegalArgumentException(msg));
+ return mie;
}
- private static void throwMalformed(byte[] val) {
+ private static MalformedInputException malformedInputException(byte[] val) {
int dp = StringCoding.countPositives(val, 0, val.length);
- throwMalformed(dp, 1);
+ return malformedInputException(dp, 1);
}
- private static void throwUnmappable(int off) {
+ private static UnmappableCharacterException unmappableCharacterException(int off) {
+ UnmappableCharacterException uce = new UnmappableCharacterException(1);
String msg = "malformed input off : " + off + ", length : 1";
- throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
+ uce.initCause(new IllegalArgumentException(msg, uce));
+ return uce;
}
- private static void throwUnmappable(byte[] val) {
+ private static UnmappableCharacterException unmappableCharacterException(byte[] val) {
int dp = StringCoding.countPositives(val, 0, val.length);
- throwUnmappable(dp);
+ return unmappableCharacterException(dp);
+ }
+
+ private static byte[] encodeUTF8(byte coder, byte[] val) {
+ if (coder == UTF16) {
+ return encodeUTF8_UTF16(val);
+ }
+
+ if (!StringCoding.hasNegatives(val, 0, val.length)) {
+ return val.clone();
+ }
+
+ int dp = 0;
+ byte[] dst = StringUTF16.newBytesFor(val.length);
+ for (byte c : val) {
+ if (c < 0) {
+ dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
+ dst[dp++] = (byte) (0x80 | (c & 0x3f));
+ } else {
+ dst[dp++] = c;
+ }
+ }
+ if (dp == dst.length) {
+ return dst;
+ }
+ return Arrays.copyOf(dst, dp);
}
- private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
+ private static byte[] encodeUTF8NoReplacement(byte coder, byte[] val) throws UnmappableCharacterException {
if (coder == UTF16) {
- return encodeUTF8_UTF16(val, doReplace);
+ return encodeUTF8_UTF16NoReplacement(val);
}
if (!StringCoding.hasNegatives(val, 0, val.length)) {
@@ -1304,13 +1388,13 @@ private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
return Arrays.copyOf(dst, dp);
}
- private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
+ private static byte[] encodeUTF8_UTF16(byte[] val) {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
// UTF-8 encoded can be as much as 3 times the string length
// For very large estimate, (as in overflow of 32 bit int), precompute the exact size
- long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, doReplace) : sl * 3;
+ long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val) : sl * 3;
if (allocLen > (long)Integer.MAX_VALUE) {
throw new OutOfMemoryError("Required length exceeds implementation limit");
}
@@ -1339,11 +1423,63 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
- if (doReplace) {
- dst[dp++] = '?';
- } else {
- throwUnmappable(sp - 1);
- }
+ dst[dp++] = '?';
+ } else {
+ dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
+ dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
+ dst[dp++] = (byte)(0x80 | ((uc >> 6) & 0x3f));
+ dst[dp++] = (byte)(0x80 | (uc & 0x3f));
+ sp++; // 2 chars
+ }
+ } else {
+ // 3 bytes, 16 bits
+ dst[dp++] = (byte)(0xe0 | ((c >> 12)));
+ dst[dp++] = (byte)(0x80 | ((c >> 6) & 0x3f));
+ dst[dp++] = (byte)(0x80 | (c & 0x3f));
+ }
+ }
+ if (dp == dst.length) {
+ return dst;
+ }
+ return Arrays.copyOf(dst, dp);
+ }
+
+ private static byte[] encodeUTF8_UTF16NoReplacement(byte[] val) throws UnmappableCharacterException {
+ int dp = 0;
+ int sp = 0;
+ int sl = val.length >> 1;
+ // UTF-8 encoded can be as much as 3 times the string length
+ // For very large estimate, (as in overflow of 32 bit int), precompute the exact size
+ long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16NoReplacement(val) : sl * 3;
+ if (allocLen > (long)Integer.MAX_VALUE) {
+ throw new OutOfMemoryError("Required length exceeds implementation limit");
+ }
+ byte[] dst = new byte[(int) allocLen];
+ while (sp < sl) {
+ // ascii fast loop;
+ char c = StringUTF16.getChar(val, sp);
+ if (c >= '\u0080') {
+ break;
+ }
+ dst[dp++] = (byte)c;
+ sp++;
+ }
+ while (sp < sl) {
+ char c = StringUTF16.getChar(val, sp++);
+ if (c < 0x80) {
+ dst[dp++] = (byte)c;
+ } else if (c < 0x800) {
+ dst[dp++] = (byte)(0xc0 | (c >> 6));
+ dst[dp++] = (byte)(0x80 | (c & 0x3f));
+ } else if (Character.isSurrogate(c)) {
+ int uc = -1;
+ char c2;
+ if (Character.isHighSurrogate(c) && sp < sl &&
+ Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
+ uc = Character.toCodePoint(c, c2);
+ }
+ if (uc < 0) {
+ throw unmappableCharacterException(sp - 1);
} else {
dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
@@ -1367,9 +1503,8 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
/**
* {@return the exact size required to UTF_8 encode this UTF16 string}
* @param val UTF16 encoded byte array
- * @param doReplace true to replace unmappable characters
*/
- private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) {
+ private static long computeSizeUTF8_UTF16(byte[] val) {
long dp = 0L;
int sp = 0;
int sl = val.length >> 1;
@@ -1388,11 +1523,43 @@ private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
- if (doReplace) {
- dp++;
- } else {
- throwUnmappable(sp - 1);
- }
+ dp++;
+ } else {
+ dp += 4;
+ sp++; // 2 chars
+ }
+ } else {
+ // 3 bytes, 16 bits
+ dp += 3;
+ }
+ }
+ return dp;
+ }
+
+ /**
+ * {@return the exact size required to UTF_8 encode this UTF16 string}
+ * @param val UTF16 encoded byte array
+ */
+ private static long computeSizeUTF8_UTF16NoReplacement(byte[] val) throws UnmappableCharacterException {
+ long dp = 0L;
+ int sp = 0;
+ int sl = val.length >> 1;
+
+ while (sp < sl) {
+ char c = StringUTF16.getChar(val, sp++);
+ if (c < 0x80) {
+ dp++;
+ } else if (c < 0x800) {
+ dp += 2;
+ } else if (Character.isSurrogate(c)) {
+ int uc = -1;
+ char c2;
+ if (Character.isHighSurrogate(c) && sp < sl &&
+ Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
+ uc = Character.toCodePoint(c, c2);
+ }
+ if (uc < 0) {
+ throw unmappableCharacterException(sp - 1);
} else {
dp += 4;
sp++; // 2 chars
@@ -1847,7 +2014,7 @@ public byte[] getBytes(String charsetName)
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return encode(charset, coder(), value);
- }
+ }
/**
* Encodes this {@code String} into a sequence of bytes using the
diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java
index 1d62d69889603..2db2953aac693 100644
--- a/src/java.base/share/classes/java/lang/System.java
+++ b/src/java.base/share/classes/java/lang/System.java
@@ -55,7 +55,6 @@
import java.util.ResourceBundle;
import java.util.Set;
import java.util.concurrent.Executor;
-import java.util.concurrent.ScheduledExecutorService;
import java.util.function.Supplier;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Stream;
@@ -2124,28 +2123,33 @@ public Stream
* WARNING: This method returns the {@code byte[]} backing the provided
* {@code String}, if the input is ASCII. Hence, the returned byte array
* must not be modified.
- *
- * This method throws {@code CharacterCodingException} instead of replacing
- * when malformed input or unmappable characters are encountered.
*
* @param s the string to encode
* @param cs the charset
- * @return the encoded bytes
- * @throws CharacterCodingException for malformed input or unmappable characters
+ * @throws NullPointerException If {@code s} or {@code cs} is null
+ * @throws CharacterCodingException For malformed input or unmappable characters
*/
- byte[] uncheckedGetBytesNoRepl(String s, Charset cs) throws CharacterCodingException;
+ byte[] uncheckedGetBytesNoReplacement(String s, Charset cs) throws CharacterCodingException;
/**
- * Returns a new string by decoding from the given UTF-8 bytes array.
+ * {@return a new string by decoding from the given UTF-8 bytes array}
*
- * @param off the index of the first byte to decode
- * @param len the number of bytes to decode
- * @return the newly created string
- * @throws IllegalArgumentException for malformed or unmappable bytes.
+ * @param offset the index of the first byte to decode
+ * @param length the number of bytes to decode
+ * @throws NullPointerException If {@code bytes} is null
+ * @throws StringIndexOutOfBoundsException If {@code offset} is negative,
+ * {@code length} is negative, or {@code offset} is greater than
+ * {@code bytes.length - length}
+ * @throws CharacterCodingException For malformed input or unmappable characters
*/
- String newStringUTF8NoRepl(byte[] bytes, int off, int len);
+ String newStringUTF8NoReplacement(byte[] bytes, int offset, int length) throws CharacterCodingException;
/**
* Get the {@code char} at {@code index} in a {@code byte[]} in internal
@@ -384,13 +384,13 @@ public interface JavaLangAccess {
void uncheckedPutCharUTF16(byte[] bytes, int index, int ch);
/**
- * Encode the given string into a sequence of bytes using utf8.
+ * {@return the sequence of bytes obtained by encoding the given string in UTF-8}
*
* @param s the string to encode
- * @return the encoded bytes in utf8
- * @throws IllegalArgumentException for malformed surrogates
+ * @throws NullPointerException If {@code s} is null
+ * @throws CharacterCodingException For malformed input or unmappable characters
*/
- byte[] getBytesUTF8NoRepl(String s);
+ byte[] getBytesUTF8NoReplacement(String s) throws CharacterCodingException;
/**
* Inflated copy from {@code byte[]} to {@code char[]}, as defined by
diff --git a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java
index 5dfc73f57aaed..c47c3fb70f1b2 100644
--- a/src/java.base/unix/classes/sun/nio/fs/UnixPath.java
+++ b/src/java.base/unix/classes/sun/nio/fs/UnixPath.java
@@ -126,7 +126,7 @@ private static String normalize(String input, int len, int off) {
private static byte[] encode(UnixFileSystem fs, String input) {
input = fs.normalizeNativePath(input);
try {
- return JLA.uncheckedGetBytesNoRepl(input, Util.jnuEncoding());
+ return JLA.uncheckedGetBytesNoReplacement(input, Util.jnuEncoding());
} catch (CharacterCodingException cce) {
throw new InvalidPathException(input,
"Malformed input or input contains unmappable characters");
diff --git a/test/jdk/java/lang/String/NoReplTest.java b/test/jdk/java/lang/String/NoReplTest.java
index 1817a1ffe7367..2f7344327824a 100644
--- a/test/jdk/java/lang/String/NoReplTest.java
+++ b/test/jdk/java/lang/String/NoReplTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,7 @@
/*
* @test
* @bug 8286287 8288589
- * @summary Tests for *NoRepl() shared secret methods.
+ * @summary Tests for *NoReplacement() shared secret methods.
* @run testng NoReplTest
* @modules jdk.charsets
*/
@@ -45,11 +45,11 @@ public class NoReplTest {
private final static Charset WINDOWS_1252 = Charset.forName("windows-1252");
/**
- * Verifies newStringNoRepl() throws a CharacterCodingException.
+ * Verifies `uncheckedNewStringNoReplacement()` throws a `CharacterCodingException`.
* The method is invoked by `Files.readString()` method.
*/
@Test
- public void newStringNoReplTest() throws IOException {
+ public void uncheckedNewStringNoReplacementTest() throws IOException {
var f = Files.createTempFile(null, null);
try (var fos = Files.newOutputStream(f)) {
fos.write(MALFORMED_UTF16);
@@ -67,11 +67,11 @@ public void newStringNoReplTest() throws IOException {
}
/**
- * Verifies getBytesNoRepl() throws a CharacterCodingException.
+ * Verifies `uncheckedGetBytesNoReplacement()` throws a `CharacterCodingException`.
* The method is invoked by `Files.writeString()` method.
*/
@Test
- public void getBytesNoReplTest() throws IOException {
+ public void uncheckedGetBytesNoReplacementTest() throws IOException {
var f = Files.createTempFile(null, null);
try {
Files.writeString(f, MALFORMED_WINDOWS_1252, WINDOWS_1252);