Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions sjsonnet/src-js/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,14 @@ object CharSWAR {
}
false
}

def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
i += 1
}
-1
}
}
25 changes: 25 additions & 0 deletions sjsonnet/src-jvm/sjsonnet/CharSWAR.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,31 @@ static boolean hasEscapeChar(char[] arr, int from, int to) {
return false;
}

/**
* Find the first byte in {@code arr[from..to)} that needs JSON string escaping, or {@code -1}
* when the range is clean.
*/
static int findFirstEscapeChar(byte[] arr, int from, int to) {
int i = from;
int limit = to - 7;
while (i < limit) {
long word = (long) LONG_VIEW.get(arr, i);
if (swarHasMatch(word)) {
for (int j = i; j < i + 8; j++) {
int b = arr[j] & 0xFF;
if (b < 32 || b == '"' || b == '\\') return j;
}
}
i += 8;
}
while (i < to) {
int b = arr[i] & 0xFF;
if (b < 32 || b == '"' || b == '\\') return i;
i++;
}
return -1;
}

private static boolean hasEscapeCharSWAR(byte[] arr, int from, int to) {
int i = from;
int limit = to - 7; // 8 bytes per VarHandle.get
Expand Down
36 changes: 36 additions & 0 deletions sjsonnet/src-native/sjsonnet/CharSWAR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,32 @@ object CharSWAR {
false
}

def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = {
val len = to - from
if (len < 8) return findFirstEscapeCharScalar(arr, from, to)
val barr = arr.asInstanceOf[ByteArray]
var i = from
val limit = to - 7
while (i < limit) {
val word = Intrinsics.loadLong(barr.atRawUnsafe(i))
if (swarHasMatch(word)) {
var j = i
while (j < i + 8) {
val b = arr(j) & 0xff
if (b < 32 || b == '"' || b == '\\') return j
j += 1
}
}
i += 8
}
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
i += 1
}
-1
}

@inline private def hasEscapeCharScalar(s: String, len: Int): Boolean = {
var i = 0
while (i < len) {
Expand All @@ -108,4 +134,14 @@ object CharSWAR {
}
false
}

@inline private def findFirstEscapeCharScalar(arr: Array[Byte], from: Int, to: Int): Int = {
var i = from
while (i < to) {
val b = arr(i) & 0xff
if (b < 32 || b == '"' || b == '\\') return i
i += 1
}
-1
}
}
100 changes: 89 additions & 11 deletions sjsonnet/src/sjsonnet/BaseByteRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,14 @@ class BaseByteRenderer[T <: java.io.OutputStream](
}

/**
* SWAR-accelerated path for long strings. Converts to UTF-8 bytes once, scans with SWAR, and
* bulk-copies if clean. The getBytes allocation is amortized by avoiding per-char processing.
* SWAR-accelerated path for long strings. Converts to UTF-8 bytes once, then bulk-copies clean
* chunks and escapes only the bytes that require it.
*/
private def visitLongString(str: String): Unit = {
val bytes = str.getBytes(java.nio.charset.StandardCharsets.UTF_8)
if (!CharSWAR.hasEscapeChar(bytes, 0, bytes.length)) {
val bLen = bytes.length
val bLen = bytes.length
val firstEscape = CharSWAR.findFirstEscapeChar(bytes, 0, bLen)
if (firstEscape < 0) {
elemBuilder.ensureLength(bLen + 2)
val arr = elemBuilder.arr
val pos = elemBuilder.length
Expand All @@ -322,13 +323,87 @@ class BaseByteRenderer[T <: java.io.OutputStream](
arr(pos + 1 + bLen) = '"'.toByte
elemBuilder.length = pos + bLen + 2
} else {
upickle.core.RenderUtils.escapeByte(
unicodeCharBuilder,
elemBuilder,
str,
escapeUnicode = false,
wrapQuotes = true
)
val escapedLen = escapedStringLength(bytes, bLen, firstEscape)
elemBuilder.ensureLength(escapedLen)
elemBuilder.appendUnsafeC('"')
var from = 0
var escPos = firstEscape
while (escPos >= 0) {
if (escPos > from) {
val chunkLen = escPos - from
elemBuilder.ensureLength(chunkLen)
val arr = elemBuilder.arr
val pos = elemBuilder.length
System.arraycopy(bytes, from, arr, pos, chunkLen)
elemBuilder.length = pos + chunkLen
}
escapeByteInline(bytes(escPos) & 0xff)
from = escPos + 1
escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1
}
if (from < bLen) {
val tailLen = bLen - from
elemBuilder.ensureLength(tailLen)
val arr = elemBuilder.arr
val pos = elemBuilder.length
System.arraycopy(bytes, from, arr, pos, tailLen)
elemBuilder.length = pos + tailLen
}
elemBuilder.ensureLength(1)
elemBuilder.appendUnsafeC('"')
}
}

private def escapedStringLength(bytes: Array[Byte], bLen: Int, firstEscape: Int): Int = {
var len = bLen + 2
var from = firstEscape
var escPos = firstEscape
while (escPos >= 0) {
len += escapeExtraLength(bytes(escPos) & 0xff)
from = escPos + 1
escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1
}
len
}

@inline private def escapeExtraLength(b: Int): Int =
(b: @scala.annotation.switch) match {
case '"' | '\\' | '\b' | '\f' | '\n' | '\r' | '\t' => 1
case _ => 5
}

/** Inline JSON escape for one byte that is known to require escaping. */
private def escapeByteInline(b: Int): Unit = {
elemBuilder.ensureLength(6)
(b: @scala.annotation.switch) match {
case '"' =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('"')
case '\\' =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('\\')
case '\b' =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('b')
case '\f' =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('f')
case '\n' =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('n')
case '\r' =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('r')
case '\t' =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('t')
case c =>
elemBuilder.appendUnsafeC('\\')
elemBuilder.appendUnsafeC('u')
elemBuilder.appendUnsafeC('0')
elemBuilder.appendUnsafeC('0')
elemBuilder.appendUnsafeC(BaseByteRenderer.HEX_CHARS((c >> 4) & 0xf))
elemBuilder.appendUnsafeC(BaseByteRenderer.HEX_CHARS(c & 0xf))
}
}

Expand Down Expand Up @@ -377,6 +452,9 @@ object BaseByteRenderer {
a
}

/** Hex digits used by inline byte escaping for control chars. */
private[sjsonnet] val HEX_CHARS: Array[Char] = "0123456789abcdef".toCharArray

/**
* Reusable scratch buffer for writeLongDirect (max 20 bytes for Long.MinValue). Not thread-safe,
* but renderers are single-threaded.
Expand Down
Loading