diff --git a/src/lib_col_pic.py b/src/lib_col_pic.py index 92379b9..41e971c 100644 --- a/src/lib_col_pic.py +++ b/src/lib_col_pic.py @@ -1,234 +1,264 @@ # Copyright (c) 2023 Molodos - # The ElegooNeptuneThumbnails plugin is released under the terms of the AGPLv3 or higher. + from threading import Lock import numpy as np from PIL import ImageColor thumbnail_lock = Lock() -def parse_thumbnail(img, width, height, default_background) -> str: +_PALETTE_DTYPE = np.dtype([ + ('colo16', np.uint16), + ('A0', np.uint8), + ('A1', np.uint8), + ('A2', np.uint8), + ('qty', np.uint32), +]) + +# Pre-computed LUT for base64-like encoding: 6-bit value → ASCII byte +# Avoids per-byte arithmetic and branch in hot loop +_ENCODE_LUT = np.array([ + (126 if (i + 48) == 92 else i + 48) for i in range(64) +], dtype=np.uint8) + + +def parse_thumbnail(img, width: int, height: int, default_background: str) -> str: with thumbnail_lock: img.thumbnail((width, height)) img = img.convert("RGBA") - pixels = np.array(img) - img_size = pixels.shape[:2] + pixels = np.array(img, dtype=np.uint8) + img_h, img_w = pixels.shape[:2] - # Ensure the background color is in the correct format - r_bkg, g_bkg, b_bkg = ImageColor.getcolor( - default_background if default_background.startswith("#") else "#" + default_background, - "RGB" - ) + bg_hex = default_background if default_background.startswith("#") else "#" + default_background + r_bkg, g_bkg, b_bkg = ImageColor.getcolor(bg_hex, "RGB") - # Alpha blending optimization - alpha = pixels[:, :, 3] / 255.0 - non_opaque_mask = alpha != 1.0 - pixels[non_opaque_mask, 0] = (pixels[non_opaque_mask, 0] * alpha[non_opaque_mask] + (1 - alpha[non_opaque_mask]) * r_bkg).astype(np.uint8) - pixels[non_opaque_mask, 1] = (pixels[non_opaque_mask, 1] * alpha[non_opaque_mask] + (1 - alpha[non_opaque_mask]) * g_bkg).astype(np.uint8) - pixels[non_opaque_mask, 2] = (pixels[non_opaque_mask, 2] * alpha[non_opaque_mask] + (1 - alpha[non_opaque_mask]) * b_bkg).astype(np.uint8) + # Alpha compositing - single pass, minimal intermediates + alpha = pixels[:, :, 3].astype(np.float32) * (1.0 / 255.0) + inv_alpha = 1.0 - alpha - # Convert to 16-bit color - r = (pixels[:, :, 0].astype(np.uint16) >> 3) << 11 - g = (pixels[:, :, 1].astype(np.uint16) >> 2) << 5 - b = (pixels[:, :, 2].astype(np.uint16) >> 3) - color16 = (r | g | b).flatten() + r = (pixels[:, :, 0] * alpha + r_bkg * inv_alpha).astype(np.uint8) + g = (pixels[:, :, 1] * alpha + g_bkg * inv_alpha).astype(np.uint8) + b = (pixels[:, :, 2] * alpha + b_bkg * inv_alpha).astype(np.uint8) - output_data = bytearray(img_size[0] * img_size[1] * 10) - ColPic_EncodeStr(color16, img_size[1], img_size[0], output_data, len(output_data), 1024) + # RGB565 - direct computation, no intermediate array + color16 = ( + ((r.astype(np.uint16) >> 3) << 11) | + ((g.astype(np.uint16) >> 2) << 5) | + (b.astype(np.uint16) >> 3) + ).ravel() # ravel() avoids copy if already contiguous - result = ''.join(chr(byte) for byte in output_data if byte) - return result + # Tighter buffer estimate: header(32) + palette(2048) + rle(~n/2) + base64 expansion(4/3) + # Worst case RLE is 2 bytes per pixel, but typical is much less + buffer_size = max(8192, int(img_h * img_w * 3)) + output_data = bytearray(buffer_size) -def ColPic_EncodeStr(fromcolor16, picw, pich, outputdata: bytearray, outputmaxtsize, colorsmax): - qty = ColPicEncode(fromcolor16, picw, pich, outputdata, outputmaxtsize, colorsmax) + encoded_len = _colpic_encode_str(color16, img_w, img_h, output_data, len(output_data), 1024) + + if encoded_len <= 0: + return "" + return output_data[:encoded_len].decode("ascii") + + +def _colpic_encode_str( + fromcolor16: np.ndarray, + picw: int, + pich: int, + outputdata: bytearray, + outputmaxsize: int, + colorsmax: int, +) -> int: + qty = _colpic_encode(fromcolor16, picw, pich, outputdata, outputmaxsize, colorsmax) if qty == 0: return 0 - # Ensure the data length is a multiple of 3 for encoding padding = (3 - qty % 3) % 3 + if qty + padding > outputmaxsize: + return 0 + for i in range(padding): + outputdata[qty + i] = 0 qty += padding - outputdata.extend([0] * padding) - hexindex = qty - strindex = qty * 4 // 3 - TempBytes = bytearray(4) + final_len = (qty * 4) // 3 + if final_len >= outputmaxsize: + return 0 + + # Vectorized base64-like encoding using LUT + raw = np.frombuffer(outputdata, dtype=np.uint8, count=qty) + + # Reshape to groups of 3 bytes + raw_padded = raw.reshape(-1, 3) - while hexindex > 0: - hexindex -= 3 - strindex -= 4 - TempBytes[0] = outputdata[hexindex] >> 2 - TempBytes[1] = (outputdata[hexindex] & 0x03) << 4 | outputdata[hexindex + 1] >> 4 - TempBytes[2] = (outputdata[hexindex + 1] & 0x0F) << 2 | outputdata[hexindex + 2] >> 6 - TempBytes[3] = outputdata[hexindex + 2] & 0x3F + # Extract 6-bit chunks: 3 bytes → 4 values + c0 = raw_padded[:, 0] >> 2 + c1 = ((raw_padded[:, 0] & 0x03) << 4) | (raw_padded[:, 1] >> 4) + c2 = ((raw_padded[:, 1] & 0x0F) << 2) | (raw_padded[:, 2] >> 6) + c3 = raw_padded[:, 2] & 0x3F - for k in range(4): - TempBytes[k] += 48 - if TempBytes[k] == ord('\\'): - TempBytes[k] = 126 + # Apply LUT and interleave + encoded = np.column_stack([ + _ENCODE_LUT[c0], + _ENCODE_LUT[c1], + _ENCODE_LUT[c2], + _ENCODE_LUT[c3], + ]).ravel() - outputdata[int(strindex):int(strindex) + 4] = TempBytes + outputdata[:final_len] = encoded.tobytes() + outputdata[final_len] = 0 + return final_len - outputdata[int(qty * 4 // 3)] = 0 - return qty * 4 // 3 -def ColPicEncode(fromcolor16, picw, pich, outputdata: bytearray, outputmaxtsize, colorsmax): - Head0 = ColPicHead3() +def _colpic_encode( + fromcolor16: np.ndarray, + picw: int, + pich: int, + outputdata: bytearray, + outputmaxsize: int, + colorsmax: int, +) -> int: + HEADER_SIZE = 32 + + # Unconditional defensive copy: palette reduction mutates `pixels`. + pixels = np.array(fromcolor16, dtype=np.uint16, copy=True) - dotsqty = picw * pich colorsmax = min(colorsmax, 1024) - # Use NumPy to count unique colors and their frequencies - unique_colors, counts = np.unique(fromcolor16, return_counts=True) - Listu16 = np.array([U16HEAD() for _ in range(len(unique_colors))]) - - for i, (color, qty) in enumerate(zip(unique_colors, counts, strict=True)): - Listu16[i].colo16 = color - Listu16[i].qty = qty - Listu16[i].A0 = (color >> 11) & 31 - Listu16[i].A1 = (color >> 5) & 63 - Listu16[i].A2 = color & 31 - - # Sort the color list by frequency (descending) - Listu16 = sorted(Listu16, key=lambda x: x.qty, reverse=True) - - # Reduce color list to `colorsmax` by merging similar colors - while len(Listu16) > colorsmax: - l0 = Listu16.pop() - cha = np.array([ - abs(l0.A0 - u16.A0) + abs(l0.A1 - u16.A1) + abs(l0.A2 - u16.A2) - for u16 in Listu16 - ]) - fid = np.argmin(cha) - replacement_color = Listu16[fid].colo16 - fromcolor16 = np.where(fromcolor16 == l0.colo16, replacement_color, fromcolor16) - - # Clear the output data - outputdata[:] = bytearray(outputmaxtsize) - - # Set up header - Head0.encodever = 3 - Head0.mark = 98419516 - Head0.ListDataSize = len(Listu16) * 2 - - # Write header information + unique_colors, counts = np.unique(pixels, return_counts=True) + + palette = np.zeros(len(unique_colors), dtype=_PALETTE_DTYPE) + palette['colo16'] = unique_colors + palette['qty'] = counts + palette['A0'] = (unique_colors >> 11) & 0x1F + palette['A1'] = (unique_colors >> 5) & 0x3F + palette['A2'] = unique_colors & 0x1F + + sort_order = np.argsort(palette['qty'])[::-1] + palette = palette[sort_order] + + # Optimized palette reduction: batch distance calc + LUT remap + if len(palette) > colorsmax: + keep = palette[:colorsmax] + discard = palette[colorsmax:] + + # Vectorized distance: (num_discard, num_keep) matrix + # Use int16 to avoid overflow on subtraction + d_a0 = np.abs( + discard['A0'].astype(np.int16)[:, np.newaxis] - + keep['A0'].astype(np.int16) + ) + d_a1 = np.abs( + discard['A1'].astype(np.int16)[:, np.newaxis] - + keep['A1'].astype(np.int16) + ) + d_a2 = np.abs( + discard['A2'].astype(np.int16)[:, np.newaxis] - + keep['A2'].astype(np.int16) + ) + distances = d_a0 + d_a1 + d_a2 + + nearest_indices = np.argmin(distances, axis=1) + + # Build 65536-entry LUT for O(1) pixel remapping + remap_lut = np.arange(65536, dtype=np.uint16) + remap_lut[discard['colo16']] = keep['colo16'][nearest_indices] + + # Single-pass remap - cache-friendly sequential access + pixels = remap_lut[pixels] + palette = keep + + color_to_idx = {int(c): i for i, c in enumerate(palette['colo16'])} + list_data_size = len(palette) * 2 + + if HEADER_SIZE + list_data_size + 2 > outputmaxsize: + return 0 + + outputdata[:outputmaxsize] = b"\x00" * outputmaxsize + outputdata[0] = 3 - outputdata[12:16] = [60, 195, 221, 5] - outputdata[16:20] = Head0.ListDataSize.to_bytes(4, 'little') - - sizeofColPicHead3 = 32 - - # Convert the Listu16 color data to bytes - for i in range(len(Listu16)): - color_bytes = np.array([Listu16[i].colo16], dtype=np.uint16).view(np.uint8) - outputdata[sizeofColPicHead3 + i * 2: sizeofColPicHead3 + i * 2 + 2] = list(color_bytes) # Convert to list of ints - - enqty = Byte8bitEncode( - fromcolor16, - sizeofColPicHead3, - Head0.ListDataSize >> 1, - dotsqty, - outputdata, - sizeofColPicHead3 + Head0.ListDataSize, - outputmaxtsize - sizeofColPicHead3 - Head0.ListDataSize, - ) - - # Finalize header with encoding details - Head0.ColorDataSize = enqty - Head0.PicW = picw - Head0.PicH = pich - - outputdata[4:8] = picw.to_bytes(4, 'little') - outputdata[8:12] = pich.to_bytes(4, 'little') - outputdata[20:24] = enqty.to_bytes(4, 'little') - - return sizeofColPicHead3 + Head0.ListDataSize + Head0.ColorDataSize - -def ADList0(val, Listu16, ListQty, maxqty): - if ListQty >= maxqty: - return ListQty - - for i in range(ListQty): - if Listu16[i].colo16 == val: - Listu16[i].qty += 1 - return ListQty - - A0 = (val >> 11) & 31 - A1 = (val >> 5) & 63 - A2 = val & 31 - Listu16[ListQty].colo16 = val - Listu16[ListQty].A0 = A0 - Listu16[ListQty].A1 = A1 - Listu16[ListQty].A2 = A2 - Listu16[ListQty].qty = 1 - return ListQty + 1 - -def Byte8bitEncode( - fromcolor16, - listu16Index, - listqty, - dotsqty, - outputdata: bytearray, - outputdataIndex, - decMaxBytesize, -): - listu16 = outputdata - dots = 0 - srcindex = 0 - decindex = 0 - lastid = 0 - - while dotsqty > 0: - dots = min(255, next((i + 1 for i in range(dotsqty - 1) if fromcolor16[srcindex + i] != fromcolor16[srcindex + i + 1]), dotsqty)) - - temp = next((i for i in range(listqty) if listu16[i * 2 + 1 + listu16Index] << 8 | listu16[i * 2 + listu16Index] == fromcolor16[srcindex]), 0) - tid = temp % 32 - sid = temp // 32 - - if lastid != sid: - if decindex >= decMaxBytesize: - break - outputdata[decindex + outputdataIndex] = 7 << 5 | sid - decindex += 1 - lastid = sid - - if dots <= 6: - if decindex >= decMaxBytesize: - break - outputdata[decindex + outputdataIndex] = dots << 5 | tid - decindex += 1 - else: - if decindex >= decMaxBytesize: - break - outputdata[decindex + outputdataIndex] = tid - decindex += 1 - if decindex >= decMaxBytesize: - break - outputdata[decindex + outputdataIndex] = dots - decindex += 1 - - srcindex += dots - dotsqty -= dots - - return decindex - -class U16HEAD: - def __init__(self): - self.colo16 = 0 - self.A0 = 0 - self.A1 = 0 - self.A2 = 0 - self.res0 = 0 - self.res1 = 0 - self.qty = 0 - -class ColPicHead3: - def __init__(self): - self.encodever = 0 - self.res0 = 0 - self.oncelistqty = 0 - self.PicW = 0 - self.PicH = 0 - self.mark = 0 - self.ListDataSize = 0 - self.ColorDataSize = 0 - self.res1 = 0 + outputdata[4:8] = int(picw).to_bytes(4, 'little') + outputdata[8:12] = int(pich).to_bytes(4, 'little') + outputdata[12:16] = (98419516).to_bytes(4, 'little') + outputdata[16:20] = list_data_size.to_bytes(4, 'little') + + palette_bytes = palette['colo16'].astype(' int: + """ + RLE encode with NumPy-accelerated run detection. + Falls back to pure Python for the encoding step but minimizes iterations. + """ + n = len(pixels) + if n == 0: + return 0 + + # Find run boundaries using NumPy - O(n) but vectorized + # diff_mask[i] is True where a new run starts + diff_mask = np.empty(n, dtype=np.bool_) + diff_mask[0] = True + np.not_equal(pixels[1:], pixels[:-1], out=diff_mask[1:]) + + run_starts = np.flatnonzero(diff_mask) + run_ends = np.concatenate([run_starts[1:], [n]]) + run_lengths = run_ends - run_starts + run_colors = pixels[run_starts] + + # Batch lookup palette indices + palette_indices = np.array([color_to_idx.get(int(c), 0) for c in run_colors], dtype=np.uint16) + tids = (palette_indices & 0x1F).astype(np.uint8) + sids = ((palette_indices >> 5) & 0x1F).astype(np.uint8) + + # Encode runs - still need Python loop but iterating over runs, not pixels + dst = 0 + last_sid = 0 + + for i in range(len(run_lengths)): + length = int(run_lengths[i]) + tid = tids[i] + sid = sids[i] + + # Handle runs > 255 by splitting + while length > 0: + chunk = min(255, length) + + # Segment switch + if sid != last_sid: + if dst >= max_size: + return -1 + output[output_offset + dst] = (7 << 5) | sid + dst += 1 + last_sid = sid + + # Encode run + if chunk <= 6: + if dst >= max_size: + return -1 + output[output_offset + dst] = (chunk << 5) | tid + dst += 1 + else: + if dst + 1 >= max_size: + return -1 + output[output_offset + dst] = tid + output[output_offset + dst + 1] = chunk + dst += 2 + + length -= chunk + + return dst