diff --git a/.gitignore b/.gitignore index 9c1a3a1..36a4e8a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ lzstring.pro.user +/old/ +/build/ diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..12fd2fe --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +qt_lzstring \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..0b76fe5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..1dc577c --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/qt-lzstring.iml b/.idea/qt-lzstring.iml new file mode 100644 index 0000000..f08604b --- /dev/null +++ b/.idea/qt-lzstring.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..4e0795a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,71 @@ +cmake_minimum_required(VERSION 3.16) +project(qt_lzstring LANGUAGES CXX) + +include(CTest) +enable_testing() + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Enable Qt's automoc so test target with Q_OBJECT works when built (only affects Qt test target). +set(CMAKE_AUTOMOC ON) + +# Core sources reused by main executable and (optionally) tests (Qt sources kept separate so non-Qt builds don't require Qt headers). +set(LZSTRING_SOURCES + src/lzstring.cpp + src/LZCore.cpp + src/StdLZPlatform.cpp +) +set(LZSTRING_HEADERS + src/ILZPlatform.h + src/LZCore.h + src/StdLZPlatform.h + src/lzstring.h +) + +# Build the CLI without Qt dependency (forces use of std platform implementation) +add_executable(lzstring + ${LZSTRING_SOURCES} + ${LZSTRING_HEADERS} + src/main.cpp +) +# Only the CLI gets this define; tests (if enabled) can use Qt implementation. +target_compile_definitions(lzstring PRIVATE LZSTRING_NO_QT) + +# Organize sources in IDEs (portable) +source_group(TREE ${CMAKE_SOURCE_DIR}/src PREFIX "Source" FILES ${LZSTRING_SOURCES} ${LZSTRING_HEADERS}) + +# ------------------------------------------------------------- +# Optional Qt tests (converted from original qmake project) +# ------------------------------------------------------------- +option(BUILD_QT_TESTS "Build Qt-based unit and benchmark tests" ON) +if(BUILD_QT_TESTS) + find_package(QT NAMES Qt6 Qt5 COMPONENTS Core Test QUIET) + if(QT_FOUND) + find_package(Qt${QT_VERSION_MAJOR} COMPONENTS Core Test REQUIRED) + # Flag to let subdirectory know Qt is available. + set(LZSTRING_QT_AVAILABLE ON CACHE INTERNAL "Qt available for tests") + add_subdirectory(tests) + else() + message(STATUS "Qt not found; skipping BUILD_QT_TESTS") + endif() +endif() + +# Non-Qt round-trip test exercising StdLZPlatform (always available, no Qt dependency) +add_executable(lzstring_std_test + tests/lzstring_test/std_roundtrip_test.cpp + ${LZSTRING_SOURCES} + ${LZSTRING_HEADERS} +) + +target_compile_definitions(lzstring_std_test PRIVATE LZSTRING_NO_QT) + +target_include_directories(lzstring_std_test PRIVATE ${CMAKE_SOURCE_DIR}/src) + +add_test(NAME lzstring_std_test COMMAND lzstring_std_test) + +# Register Qt test if it exists +if(BUILD_QT_TESTS AND TARGET lzstring_test) + add_test(NAME lzstring_test COMMAND lzstring_test) + set_tests_properties(lzstring_test PROPERTIES WORKING_DIRECTORY $) +endif() diff --git a/README.md b/README.md index 4d6d382..3bcaf38 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,130 @@ +# qt-lzstring -

Qt implementation of LZ-String, version 1.4.4.

-

-Based on the LZ-String compression algorithm found here:
-http://pieroxy.net/blog/pages/lz-string/index.html -

-

-WTFPL Licence
-http://www.wtfpl.net/ -

-Implemented functions:
- +Qt implementation of LZ-String with a platform-agnostic core and a simple CLI. + +Refactor highlights +- LZ core extracted to `src/LZCore.{h,cpp}`. It contains the compression/decompression logic and depends only on an abstract platform interface. +- New platform interface `src/ILZPlatform.h` encapsulates string/character operations the core needs. +- Two platform implementations: + - `src/QtLZPlatform.{h,cpp}` for Qt builds (uses QString/QChar/QHash). + - `src/StdLZPlatform.{h,cpp}` for non-Qt builds (uses std::string and simple containers). +- `src/lzstring.cpp` is a thin facade that selects the platform at compile time and delegates to `LZCore`. +- Build files (CMake and qmake .pri) updated to include the new sources. + +Public API +- `LZString` public static methods are unchanged: `compress`, `compressToUTF16`, `compressToBase64`, and corresponding `decompress*` variants. + +Notes +- The core does not depend on Qt; only the platform layer bridges to Qt or std. This improves separation of concerns and makes the LZ implementation reusable. +- Non-Qt mode (CMake default) is intended for simple CLI usage. For full Unicode fidelity and UTF‑16/Base64 behavior identical to the original Qt implementation, build/run the Qt test target which uses `QString` code units. + +Installation (Linux) +- Prerequisites: a C++ compiler and either Qt (for the full-featured test suite) or just CMake (for the minimal non‑Qt CLI + std round‑trip tests). + +Debian/Ubuntu +```bash +sudo apt-get update +sudo apt-get install -y build-essential cmake +# For Qt tests (optional) +sudo apt-get install -y qtbase5-dev qtbase5-dev-tools +``` + +Fedora +```bash +sudo dnf install -y cmake make gcc-c++ +# For Qt tests (optional) +sudo dnf install -y qt5-qtbase-devel +``` + +Build (CMake) +- Default (non-Qt CLI only): + - Windows (cmd.exe): + ```cmd + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_QT_TESTS=OFF + cmake --build build --config Release + ``` + - Linux/macOS: + ```bash + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_QT_TESTS=OFF + cmake --build build --config Release + ``` + - Binary: `lzstring` / `lzstring.exe` in the build tree. + +- With Qt tests (auto-detect Qt5/Qt6): + ```bash + cmake -S . -B build -DBUILD_QT_TESTS=ON + cmake --build build --target lzstring_test + ctest --test-dir build -V -R lzstring_test + ``` + If Qt isn't found, a status message is printed and the normal CLI plus non-Qt tests still build. + +Tests (CMake) +- Always built: `lzstring_std_test` (non-Qt, exercises `StdLZPlatform`). + ```bash + cmake -S . -B build -DBUILD_QT_TESTS=OFF + cmake --build build --target lzstring_std_test + ctest --test-dir build -V -R lzstring_std_test + ``` +- Optionally (with Qt): `lzstring_test` (original Qt/QString test + benchmarks). Data file `data.json` is auto-copied beside the test binary and the working directory set via CTest so relative open succeeds. + +Build (qmake, legacy / optional) +- The `.pri` pulls in all sources; tests are under `tests/`. +- Linux/macOS: + ```bash + qmake + make -j + ``` +- Windows (from a Qt command prompt): + ```cmd + qmake + nmake + ``` + +Getting the sources +```bash +git clone +cd qt-lzstring +``` + +Usage (CLI) +- The CLI reads from a file or stdin (`-`) and writes to a file or stdout (`-`). +- Binary name: `lzstring` (`.exe` on Windows) +- Operations: + - `--compress` + - `--compressToUTF16` + - `--compressToBase64` + - `--decompress` + - `--decompressFromUTF16` + - `--decompressFromBase64` + - `--test` (round‑trip checks for all codecs on the provided input; prints boolean results and exits) + +Examples (Linux/macOS) +```bash +./lzstring --compress input.txt output.lz +./lzstring --compressToUTF16 input.txt output.lz16 +./lzstring --compressToBase64 input.txt output.lz64 +./lzstring --decompress input.lz output.txt +./lzstring --decompressFromUTF16 input.lz16 output.txt +./lzstring --decompressFromBase64 input.lz64 output.txt +./lzstring --test input.txt +cat input.txt | ./lzstring --compress - - +cat input.lz | ./lzstring --decompress - - +``` + +Examples (Windows cmd.exe) +```cmd +lzstring.exe --compress input.txt output.lz +lzstring.exe --decompress input.lz output.txt +lzstring.exe --test input.txt +:: Pipes +type input.txt | lzstring.exe --compress - - +type input.lz | lzstring.exe --decompress - - +``` + +Extending tests +- Add more corpus cases: modify `tests/std_roundtrip_test.cpp` or the Qt test's `test_data()` method. +- For performance comparisons, run the Qt benchmarks (they use `QBENCHMARK`). +- To disable the std test (e.g., in a packaging build), you can wrap its target creation in an option similar to `BUILD_QT_TESTS`. + +License +- This project is licensed under the WTFPL: http://www.wtfpl.net/ diff --git a/lzstring.pro b/lzstring.pro index 8515aa2..9a379a3 100644 --- a/lzstring.pro +++ b/lzstring.pro @@ -1,5 +1,13 @@ - -TEMPLATE = subdirs +TEMPLATE = app SUBDIRS += tests +#EDIT to fit you cofiguration +LIBS += -L/usr/lib/x86_64-linux-gnu -lQt5Core -lQt5Widgets +INCLUDEPATH += /usr/include/x86_64-linux-gnu/qt5/ +QT += core widgets + include(src/lzstring.pri) + +TARGET = lzstring + +CONFIG = release console diff --git a/src/ILZPlatform.h b/src/ILZPlatform.h new file mode 100644 index 0000000..dc5e326 --- /dev/null +++ b/src/ILZPlatform.h @@ -0,0 +1,32 @@ +#ifndef ILZPLATFORM_H +#define ILZPLATFORM_H + +#include "lzstring.h" + +// Interface abstracting platform-specific string and character utilities +class ILZPlatform { +public: + virtual ~ILZPlatform() = default; + + // String operations + virtual int length(const QString& s) const = 0; + virtual bool isEmpty(const QString& s) const = 0; + virtual QString slice(const QString& s, int pos, int len) const = 0; + virtual QString concat(const QString& a, const QString& b) const = 0; + virtual void append(QString& dst, const QString& src) const = 0; + + // Character/code unit operations + virtual int charCodeAt(const QString& s, int index) const = 0; // 0..65535 (Qt) or 0..255 (std) + virtual QString charFromCode(int code) const = 0; // single-code-unit string + + // Output mappers for compression writers + virtual void appendFromInt_Normal(QString& out, int code) const = 0; // 16-bit path uses code as-is + virtual void appendFromInt_UTF16(QString& out, int code) const = 0; // code + 32 + virtual void appendFromInt_Base64(QString& out, int code) const = 0; // map using Base64 alphabet + + // Reverse mapping for Base64 during decompression + virtual int base64ReverseIndex(int charCode) const = 0; // -1 if not found +}; + +#endif // ILZPLATFORM_H + diff --git a/src/LZCore.cpp b/src/LZCore.cpp new file mode 100644 index 0000000..c1066c7 --- /dev/null +++ b/src/LZCore.cpp @@ -0,0 +1,491 @@ +#include +#include +#include +#include "lzstring.h" +#include "LZCore.h" + +// Helper struct for bitstream during decompression +struct LZDecompressData { + int val; + int position; + int index; +}; + +QString LZCore::compress(const QString& uncompressed) const { +#ifdef LZSTRING_NO_QT + // In non-Qt mode we only have 8-bit code units, so flush every 8 bits + return compressImpl(uncompressed, 8, &ILZPlatform::appendFromInt_Normal); +#else + return compressImpl(uncompressed, 16, &ILZPlatform::appendFromInt_Normal); +#endif +} + +QString LZCore::compressToUTF16(const QString& uncompressed) const { + if (m_platform.isEmpty(uncompressed)) return QString(); +#ifdef LZSTRING_NO_QT + // With 8-bit storage, use 7 bits + 32 offset to stay within printable range + QString result = compressImpl(uncompressed, 7, &ILZPlatform::appendFromInt_UTF16); +#else + QString result = compressImpl(uncompressed, 15, &ILZPlatform::appendFromInt_UTF16); +#endif + m_platform.append(result, m_platform.charFromCode(' ')); + return result; +} + +QString LZCore::compressToBase64(const QString& uncompressed) const { + if (m_platform.isEmpty(uncompressed)) return QString(); + QString res = compressImpl(uncompressed, 6, &ILZPlatform::appendFromInt_Base64); + int mod = m_platform.length(res) % 4; + if (mod == 0) return res; + if (mod == 1) { m_platform.append(res, m_platform.charFromCode('=')); m_platform.append(res, m_platform.charFromCode('=')); m_platform.append(res, m_platform.charFromCode('=')); } + else if (mod == 2) { m_platform.append(res, m_platform.charFromCode('=')); m_platform.append(res, m_platform.charFromCode('=')); } + else { m_platform.append(res, m_platform.charFromCode('=')); } + return res; +} + +QString LZCore::decompress(const QString& compressed) const { + if (m_platform.isEmpty(compressed)) return QString(); + auto getNext = [&](int index) { + return m_platform.charCodeAt(compressed, index); + }; +#ifdef LZSTRING_NO_QT + return decompressImpl(m_platform.length(compressed), 128, getNext); +#else + return decompressImpl(m_platform.length(compressed), 32768, getNext); +#endif +} + +QString LZCore::decompressFromUTF16(const QString& compressed) const { + if (m_platform.isEmpty(compressed)) return QString(); + auto getNext = [&](int index) { + return m_platform.charCodeAt(compressed, index) - 32; + }; +#ifdef LZSTRING_NO_QT + return decompressImpl(m_platform.length(compressed), 64, getNext); +#else + return decompressImpl(m_platform.length(compressed), 16384, getNext); +#endif +} + +QString LZCore::decompressFromBase64(const QString& compressed) const { + if (m_platform.isEmpty(compressed)) return QString(); + auto getNext = [&](int index) { + return m_platform.base64ReverseIndex(m_platform.charCodeAt(compressed, index)); + }; + // Base64 path always uses 6 bits per char; resetValue per original algorithm is 32 (2^5) + return decompressImpl(m_platform.length(compressed), 32, getNext); +} + +QString LZCore::compressImpl(const QString& uncompressed, int bitsPerChar, + void (ILZPlatform::*appendFromInt)(QString&, int) const) const +{ + if (m_platform.isEmpty(uncompressed)) return QString(); + + int i = 0; + int value = 0; + + std::vector context_c_dictionary(65536, false); + std::map context_dictionary; + std::map context_dictionaryToCreate; + + QString context_c; + QString context_wc; + QString context_w; + QString context_data; + + int context_dictSize = 3; + int context_numBits = 2; + int context_data_val = 0; + int context_data_position = 0; + int context_enlargeIn = 2; + + // Pre-reserve some capacity + context_data.reserve(std::max(255, m_platform.length(uncompressed) / 5)); + + auto writeBit = [&](int bit) { + context_data_val = (context_data_val << 1) | (bit & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + }; + + auto writeNBits = [&](int n, int v) { + for (i = 0; i < n; ++i) { + writeBit(v & 1); + v >>= 1; + } + }; + + for (int ii = 0, size = m_platform.length(uncompressed); ii < size; ++ii) { + context_c = m_platform.slice(uncompressed, ii, 1); + int c_val = m_platform.charCodeAt(uncompressed, ii); + if (!context_c_dictionary[static_cast(c_val)]) { + context_c_dictionary[static_cast(c_val)] = true; + context_dictionary[context_c] = context_dictSize++; + context_dictionaryToCreate[context_c] = true; + } + + context_wc = m_platform.concat(context_w, context_c); + if (context_dictionary.count(context_wc)) { + context_w = context_wc; + } else { + auto itCreate = context_dictionaryToCreate.find(context_w); + if (itCreate != context_dictionaryToCreate.end() && !m_platform.isEmpty(context_w)) { + int w0 = m_platform.charCodeAt(context_w, 0); + if (w0 < 256) { + // Write context_numBits zeros first + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + } + // Then write 8 bits of the character value + value = w0; + for (i = 0; i < 8; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } else { + // Write 1 followed by zeros + value = 1; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | value; + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = 0; + } + // Then write 16 bits of the character value + value = w0; + for (i = 0; i < 16; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + context_dictionaryToCreate.erase(itCreate); + } else { + value = context_dictionary[context_w]; + writeNBits(context_numBits, value); + } + + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + + // Add wc to the dictionary + context_dictionary[context_wc] = context_dictSize++; + context_w = context_c; + } + } + + // Output the code for w + if (!m_platform.isEmpty(context_w)) { + auto itCreate = context_dictionaryToCreate.find(context_w); + if (itCreate != context_dictionaryToCreate.end()) { + int w0 = m_platform.charCodeAt(context_w, 0); + if (w0 < 256) { + // Write context_numBits zeros first + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + } + // Then write 8 bits of the character value + value = w0; + for (i = 0; i < 8; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } else { + // Write 1 followed by zeros + value = 1; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | value; + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = 0; + } + // Then write 16 bits of the character value + value = w0; + for (i = 0; i < 16; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + context_dictionaryToCreate.erase(itCreate); + } else { + value = context_dictionary[context_w]; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } + + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + } + + // Mark the end of the stream + value = 2; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + + // Flush the last char + while (true) { + context_data_val = (context_data_val << 1); + if (context_data_position == bitsPerChar - 1) { + (m_platform.*appendFromInt)(context_data, context_data_val); + break; + } else { + ++context_data_position; + } + } + + return context_data; +} + +// Decompression core + +template +QString LZCore::decompressImpl(int length, int resetValue, GetNextValue getNextValue) const { + std::vector dictionary; + int next = 0; + int enlargeIn = 4; + int dictSize = 4; + int numBits = 3; + QString entry; + QString result; + QString w; + + LZDecompressData data{ getNextValue(0), resetValue, 1 }; + + result.reserve(length * 5); + + // Dictionary indexes 0-2 are never read, push placeholders + for (int i = 0; i < 3; ++i) { + dictionary.push_back(m_platform.charFromCode(i)); + } + + auto readBit = [&]() { + int resb = data.val & data.position; + data.position >>= 1; + if (data.position == 0) { + data.position = resetValue; + data.val = getNextValue(data.index++); + } + return resb > 0 ? 1 : 0; + }; + + auto readNBits = [&](int n) { + int bits = 0; + int power = 1; + for (int i = 0; i < n; ++i) { + bits |= readBit() * power; + power <<= 1; + } + return bits; + }; + + next = readNBits(2); + QString c; + switch (next) { + case 0: { + int bits = readNBits(8); + c = m_platform.charFromCode(bits); + break; + } + case 1: { + int bits = readNBits(16); + c = m_platform.charFromCode(bits); + break; + } + case 2: + return QString(); + } + + dictionary.push_back(c); + w = c; + m_platform.append(result, c); + + while (true) { + if (data.index > length) return QString(); + + int cc = readNBits(numBits); + switch (cc) { + case 0: { + int bits = readNBits(8); + dictionary.push_back(m_platform.charFromCode(bits)); + cc = dictSize++; + --enlargeIn; + break; + } + case 1: { + int bits = readNBits(16); + dictionary.push_back(m_platform.charFromCode(bits)); + cc = dictSize++; + --enlargeIn; + break; + } + case 2: + return result; + } + + if (enlargeIn == 0) { + enlargeIn = 1 << numBits; + ++numBits; + } + + if (cc < dictSize) { + entry = dictionary[static_cast(cc)]; + } else if (cc == dictSize) { + entry = m_platform.concat(w, m_platform.charFromCode(m_platform.charCodeAt(w, 0))); + } else { + return QString(); + } + + m_platform.append(result, entry); + + // Add w + entry[0] + dictionary.push_back(m_platform.concat(w, m_platform.charFromCode(m_platform.charCodeAt(entry, 0)))); + ++dictSize; + --enlargeIn; + + w = entry; + + if (enlargeIn == 0) { + enlargeIn = 1 << numBits; + ++numBits; + } + } +} + +// Explicit instantiations for common lambda types aren't needed as it's header-only template used locally + +/* +------------------------------------------------------------------------------- +High-level overview of the compression / decompression algorithm implemented: + +This is a variant of the LZ-based dictionary encoder popularly known from the +JavaScript LZ-String project. It is closest in spirit to LZW / LZ78 but with a +few notable twists to optimize for short textual payloads: + +1. Dynamic Dictionary: + - The dictionary starts with 3 reserved entries (0,1,2) used for control / end. + - New sequences are added on-the-fly: when encountering a new concatenation + (w + c) not yet in the dictionary, it is assigned the next incremental code. + +2. Variable Code Width Growth: + - The number of bits used to emit dictionary codes (context_numBits / numBits) + grows as the dictionary size crosses powers of two. + - A countdown (enlargeIn) tracks when to increase the code width. + +3. Literal Emission Strategy: + - Raw characters not yet seen are first queued for creation. When flushed, + they are emitted in one of two literal forms: + * 0 + 8 data bits (for char codes < 256) + * 1 + 16 data bits (for char codes >= 256) + - This mirrors the original LZ-String design to support full Unicode ranges + (or limited ranges when compiled without Qt depending on platform build). + +4. Bit Packing Abstraction: + - Output is packed into an integer accumulator (context_data_val) and flushed + once bitsPerChar bits are filled. Different front-end functions choose + bitsPerChar and a corresponding ILZPlatform::appendFromInt_* method to map + the packed value into a storage character (normal 16-bit, UTF-16-friendly, + Base64 alphabet, etc.). + +5. Termination: + - A final code (2) marks end-of-stream, after which the accumulator is padded + and flushed. + +6. Decompression Mirrors Compression: + - Rebuilds the dictionary in the same order. + - Handles the special case when a code equals the next dictionary index + (classic LZW edge case: entry = w + firstChar(w)). + - Increases bit width in lockstep using enlargeIn / numBits logic. + +7. Design Goals: + - Small, allocation-conscious (pre-reserving output & using simple containers). + - Fidelity with original LZ-String output across modes (Base64, UTF-16, raw). + - Separation of platform-specific character / string operations via ILZPlatform. + +------------------------------------------------------------------------------- +*/ diff --git a/src/LZCore.h b/src/LZCore.h new file mode 100644 index 0000000..cd14ad7 --- /dev/null +++ b/src/LZCore.h @@ -0,0 +1,30 @@ +#ifndef LZCORE_H +#define LZCORE_H + +#include "lzstring.h" +#include "ILZPlatform.h" + +class LZCore { +public: + explicit LZCore(const ILZPlatform& platform) : m_platform(platform) {} + + QString compress(const QString& uncompressed) const; + QString compressToUTF16(const QString& uncompressed) const; + QString compressToBase64(const QString& uncompressed) const; + + QString decompress(const QString& compressed) const; + QString decompressFromUTF16(const QString& compressed) const; + QString decompressFromBase64(const QString& compressed) const; + +private: + const ILZPlatform& m_platform; + + // Core helpers + QString compressImpl(const QString& uncompressed, int bitsPerChar, + void (ILZPlatform::*appendFromInt)(QString&, int) const) const; + + template + QString decompressImpl(int length, int resetValue, GetNextValue getNextValue) const; +}; + +#endif // LZCORE_H diff --git a/src/QtLZPlatform.cpp b/src/QtLZPlatform.cpp new file mode 100644 index 0000000..8ad8531 --- /dev/null +++ b/src/QtLZPlatform.cpp @@ -0,0 +1,18 @@ +#ifndef LZSTRING_NO_QT +#include "QtLZPlatform.h" + +QtLZPlatform::QtLZPlatform() + : m_keyStrBase64(QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")) +{ + // Precompute reverse dictionary + for (int i = 0, len = m_keyStrBase64.length(); i < len; ++i) { + m_baseReverseDic.insert(m_keyStrBase64.at(i), i); + } +} + +int QtLZPlatform::base64ReverseIndex(int charCode) const { + return m_baseReverseDic.value(QChar(charCode), -1); +} + +#endif // LZSTRING_NO_QT + diff --git a/src/QtLZPlatform.h b/src/QtLZPlatform.h new file mode 100644 index 0000000..60eb1af --- /dev/null +++ b/src/QtLZPlatform.h @@ -0,0 +1,36 @@ +#ifndef QTLZPLATFORM_H +#define QTLZPLATFORM_H + +#ifndef LZSTRING_NO_QT +#include +#include +#include "ILZPlatform.h" + +class QtLZPlatform : public ILZPlatform { +public: + QtLZPlatform(); + + int length(const QString& s) const override { return s.length(); } + bool isEmpty(const QString& s) const override { return s.isEmpty(); } + QString slice(const QString& s, int pos, int len) const override { return s.mid(pos, len); } + QString concat(const QString& a, const QString& b) const override { return a + b; } + void append(QString& dst, const QString& src) const override { dst.append(src); } + + int charCodeAt(const QString& s, int index) const override { return s.at(index).unicode(); } + QString charFromCode(int code) const override { return QString(1, QChar(code)); } + + void appendFromInt_Normal(QString& out, int code) const override { out.append(QChar(code)); } + void appendFromInt_UTF16(QString& out, int code) const override { out.append(QChar(code + 32)); } + void appendFromInt_Base64(QString& out, int code) const override { out.append(m_keyStrBase64.at(code)); } + + int base64ReverseIndex(int charCode) const override; + +private: + QString m_keyStrBase64; + QHash m_baseReverseDic; +}; + +#endif // LZSTRING_NO_QT + +#endif // QTLZPLATFORM_H + diff --git a/src/StdLZPlatform.cpp b/src/StdLZPlatform.cpp new file mode 100644 index 0000000..9bb1a0c --- /dev/null +++ b/src/StdLZPlatform.cpp @@ -0,0 +1,14 @@ +#ifdef LZSTRING_NO_QT +#include "StdLZPlatform.h" + +StdLZPlatform::StdLZPlatform() + : m_keyStrBase64("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") + , m_baseReverse(256, -1) +{ + for (size_t i = 0; i < m_keyStrBase64.size(); ++i) { + m_baseReverse[static_cast(m_keyStrBase64[i])] = static_cast(i); + } +} + +#endif // LZSTRING_NO_QT + diff --git a/src/StdLZPlatform.h b/src/StdLZPlatform.h new file mode 100644 index 0000000..6589e6f --- /dev/null +++ b/src/StdLZPlatform.h @@ -0,0 +1,40 @@ +#ifndef STDLZPLATFORM_H +#define STDLZPLATFORM_H + +#ifdef LZSTRING_NO_QT +#include +#include +#include "ILZPlatform.h" + +class StdLZPlatform : public ILZPlatform { +public: + StdLZPlatform(); + + int length(const QString& s) const override { return static_cast(s.size()); } + bool isEmpty(const QString& s) const override { return s.empty(); } + QString slice(const QString& s, int pos, int len) const override { return s.substr(pos, len); } + QString concat(const QString& a, const QString& b) const override { return a + b; } + void append(QString& dst, const QString& src) const override { dst.append(src); } + + int charCodeAt(const QString& s, int index) const override { + if (index < 0 || index >= static_cast(s.size())) return 0; + return static_cast(s.at(index)); + } + QString charFromCode(int code) const override { return QString(1, static_cast(code)); } + + void appendFromInt_Normal(QString& out, int code) const override { out.push_back(static_cast(code)); } + void appendFromInt_UTF16(QString& out, int code) const override { out.push_back(static_cast(code + 32)); } + void appendFromInt_Base64(QString& out, int code) const override { out.push_back(m_keyStrBase64.at(static_cast(code))); } + + int base64ReverseIndex(int charCode) const override { + return m_baseReverse[static_cast(charCode)]; + } + +private: + QString m_keyStrBase64; + std::vector m_baseReverse; // size 256 +}; + +#endif // LZSTRING_NO_QT + +#endif // STDLZPLATFORM_H diff --git a/src/lzstring.cpp b/src/lzstring.cpp index 7c7f116..b980732 100644 --- a/src/lzstring.cpp +++ b/src/lzstring.cpp @@ -1,647 +1,49 @@ #include "lzstring.h" +#include "LZCore.h" +#include "ILZPlatform.h" +#ifndef LZSTRING_NO_QT +#include "QtLZPlatform.h" +#else +#include "StdLZPlatform.h" +#endif -#include -#include -#include -#include -#include -#include - -static const QString keyStrBase64 = QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="); -static const QString keyStrUriSafe = QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$"); - -static QChar compressGetCharFromInt(int a) -{ - return QChar(a); -} - -static QChar compressToUTF16GetCharFromInt(int a) -{ - return QChar(a+32); -} - -static QChar compressToBase64GetCharFromInt(int a) -{ - return keyStrBase64.at(a); -} - -QString LZString::compress(const QString &uncompressed) -{ - return _compress(uncompressed, 16, &compressGetCharFromInt); +// Provide a single access point to the platform abstraction +static const ILZPlatform* getPlatform() { +#ifndef LZSTRING_NO_QT + static QtLZPlatform platform; +#else + static StdLZPlatform platform; +#endif + return &platform; } -QString LZString::compressToUTF16(const QString &uncompressed) -{ - if (uncompressed.isEmpty()) - return ""; - - QString result = _compress(uncompressed, 15, &compressToUTF16GetCharFromInt); - result.append(QLatin1String(" ")); - return result; +// Lightweight delegations to the core algorithm +QString LZString::compress(const QString &uncompressed) { + LZCore core(*getPlatform()); + return core.compress(uncompressed); } -QString LZString::compressToBase64(const QString &uncompressed) -{ - if (uncompressed.isEmpty()) - return ""; - - QString res = _compress(uncompressed, 6, &compressToBase64GetCharFromInt); - switch (res.length() % 4) // To produce valid Base64 - { - default: // When could this happen ? - case 0: return res; - case 1: return res.append(QLatin1String("===")); - case 2: return res.append(QLatin1String("==")); - case 3: return res.append(QLatin1String("=")); - } - return res; +QString LZString::compressToUTF16(const QString &uncompressed) { + LZCore core(*getPlatform()); + return core.compressToUTF16(uncompressed); } -template -QString LZString::_compress(const QString &uncompressed, int bitsPerChar, GetCharFromInt getCharFromInt) -{ - int i = 0; - int value = 0; - QBitArray context_c_dictionary(65536); // All bits initialized to 0 - QHash context_dictionary; - QHash context_dictionaryToCreate; - QStringRef context_c; - QStringRef context_wc; - QStringRef context_w; - int context_enlargeIn = 2; // Compensate for the first entry which should not count - int context_dictSize = 3; - int context_numBits = 2; - QString context_data; - int context_data_val = 0; - int context_data_position = 0; - - context_data.reserve( qMax(255, uncompressed.length()/5) ); - context_w = QStringRef(&uncompressed, 0, 0); - - for (int ii=0, size=uncompressed.length(); ii::iterator context_w_it = - context_dictionaryToCreate.find(context_w); - - if (context_w_it != context_dictionaryToCreate.end()) - { - if (context_w.at(0).unicode() < 256) - { - for (i=0; i> 1; - } - } - else - { - value = 1; - for (i=0; i> 1; - } - } - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - context_dictionaryToCreate.erase(context_w_it); - } - else - { - value = context_dictionary.value(context_w); - for (i=0; i> 1; - } - } - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - - // Add wc to the dictionary. - Q_ASSERT(context_wc.length() > 1); - context_dictionary.insert(context_wc, context_dictSize++); - context_w = context_c; - } - } - - // Output the code for w. - if (!context_w.isEmpty()) - { - QHash::iterator context_w_it = - context_dictionaryToCreate.find(context_w); - - if (context_w_it != context_dictionaryToCreate.end()) - { - if (context_w.at(0).unicode() < 256) - { - for (i=0; i> 1; - } - } - else - { - value = 1; - for (i=0; i> 1; - } - } - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - context_dictionaryToCreate.erase(context_w_it); - } - else - { - value = context_dictionary.value(context_w); - for (i=0; i> 1; - } - } - - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - } - - // Mark the end of the stream - value = 2; - for (i=0; i> 1; - } - - // Flush the last char - while (true) - { - context_data_val = (context_data_val << 1); - if (context_data_position == bitsPerChar-1) - { - context_data.append(getCharFromInt(context_data_val)); - break; - } - else - { - context_data_position++; - } - } - - return context_data; -} - -class DecompressGetNextValue -{ -public: - DecompressGetNextValue(const QString &compressed) : - m_compressed(compressed.constData()) {} - - int operator()(int index) const - { - return m_compressed[index].unicode(); - } - -private: - const QChar *m_compressed; -}; - -class DecompressFromUTF16GetNextValue -{ -public: - DecompressFromUTF16GetNextValue(const QString &compressed) : - m_compressed(compressed.constData()) {} - - int operator()(int index) const - { - return m_compressed[index].unicode() - 32; - } - -private: - const QChar *m_compressed; -}; - -class DecompressGetBaseValue -{ -public: - DecompressGetBaseValue(const QString &compressed, const QString &alphabet) : - m_compressed(compressed.constData()), - m_alphabet(alphabet) - { - for (int i=0, len=alphabet.length(); i m_baseReverseDic; -}; - -QString LZString::decompress(const QString &compressed) -{ - if (compressed.isEmpty()) - return ""; - - return _decompress(compressed.length(), 32768, DecompressGetNextValue(compressed)); +QString LZString::compressToBase64(const QString &uncompressed) { + LZCore core(*getPlatform()); + return core.compressToBase64(uncompressed); } -QString LZString::decompressFromUTF16(const QString &compressed) -{ - if (compressed.isEmpty()) - return ""; - - return _decompress(compressed.length(), 16384, DecompressFromUTF16GetNextValue(compressed)); +QString LZString::decompress(const QString &compressed) { + LZCore core(*getPlatform()); + return core.decompress(compressed); } -QString LZString::decompressFromBase64(const QString &compressed) -{ - if (compressed.isEmpty()) - return ""; - - return _decompress(compressed.length(), 32, DecompressGetBaseValue(compressed, keyStrBase64)); +QString LZString::decompressFromUTF16(const QString &compressed) { + LZCore core(*getPlatform()); + return core.decompressFromUTF16(compressed); } -struct DecompressData -{ - int val; - int position; - int index; -}; - -template -QString LZString::_decompress(int length, int resetValue, GetNextValue getNextValue) -{ - QVector dictionary; - int next = 0; - int enlargeIn = 4; - int dictSize = 4; - int numBits = 3; - QString entry; - QString result; - QString w; - int bits, resb, maxpower, power; - QString c; - DecompressData data; - data.val = getNextValue(0); - data.position = resetValue; - data.index = 1; - - result.reserve(length*5); - - // Put anything at dictionary indexes 0-2 - this will never be read. - // See question: - // http://pieroxy.net/blog/replyToBlogEntry.action?entry=1368091620000&comment=1474381950353 - // and response: - // http://pieroxy.net/blog/replyToBlogEntry.action?entry=1368091620000&comment=1474435834165 - for (int i=0; i<3; ++i) - { - dictionary.append(QString::number(i)); - } - - bits = 0; - maxpower = 4; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - switch (next = bits) - { - case 0: - bits = 0; - maxpower = 256; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - c = QChar(bits); - break; - - case 1: - bits = 0; - maxpower = 65536; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - c = QChar(bits); - break; - - case 2: - return ""; - } - - dictionary.append(c); // "c" may be empty string - w = c; - result.append(c); - while (true) - { - if (data.index > length) - return ""; - - bits = 0; - maxpower = 1 << numBits; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - // Very strange here, "c" above is as char/string, but - // further "c" is a int, rename "c" in the switch as "cc". - int cc; - switch (cc = bits) - { - case 0: - bits = 0; - maxpower = 256; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - cc = dictSize++; - dictionary.append(QChar(bits)); - enlargeIn--; - break; - - case 1: - bits = 0; - maxpower = 65536; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - cc = dictSize++; - dictionary.append(QChar(bits)); - enlargeIn--; - break; - - case 2: - return result; - } - - if (enlargeIn == 0) - { - enlargeIn = 1 << numBits; - numBits++; - } - - Q_ASSERT(dictSize == dictionary.size()); - if (cc < dictSize && !dictionary.at(cc).isEmpty()) - { - entry = dictionary.at(cc); - } - else - { - if (cc == dictSize) - entry = w % w.at(0); // % - QStringBuilder - else - return QString(); - } - result.append(entry); - - // Add w+entry[0] to the dictionary. - dictSize++; - dictionary.append(w % entry.at(0)); // % - QStringBuilder - enlargeIn--; - - w = entry; - - if (enlargeIn == 0) - { - enlargeIn = 1 << numBits; - numBits++; - } - } +QString LZString::decompressFromBase64(const QString &compressed) { + LZCore core(*getPlatform()); + return core.decompressFromBase64(compressed); } diff --git a/src/lzstring.h b/src/lzstring.h index 8101e0c..8e04dd9 100644 --- a/src/lzstring.h +++ b/src/lzstring.h @@ -11,7 +11,14 @@ * */ -#include +// Conditional compilation for Qt vs non-Qt builds +#ifndef LZSTRING_NO_QT + #include + #define LZSTRING_USE_QT +#else + #include + typedef std::string QString; +#endif class LZString { @@ -23,13 +30,6 @@ class LZString static QString decompress(const QString &compressed); static QString decompressFromUTF16(const QString &compressed); static QString decompressFromBase64(const QString &compressed); - -private: - template - static QString _compress(const QString &uncompressed, int bitsPerChar, GetCharFromInt getCharFromInt); - - template - static QString _decompress(int length, int resetValue, GetNextValue getNextValue); }; #endif // LZSTRING_H diff --git a/src/lzstring.pri b/src/lzstring.pri index 15ee7b8..00c65b1 100644 --- a/src/lzstring.pri +++ b/src/lzstring.pri @@ -1,6 +1,33 @@ - INCLUDEPATH += $$PWD DEPENDPATH += $$PWD -HEADERS += $$PWD/lzstring.h -SOURCES += $$PWD/lzstring.cpp +HEADERS += $$PWD/lzstring.h \ + $$PWD/LZCore.h \ + $$PWD/ILZPlatform.h \ + $$PWD/QtLZPlatform.h \ + $$PWD/StdLZPlatform.h +SOURCES += $$PWD/lzstring.cpp \ + $$PWD/LZCore.cpp \ + $$PWD/QtLZPlatform.cpp \ + $$PWD/StdLZPlatform.cpp + +# Include main.cpp by default for executable builds +# Only exclude it when building as a library +!contains(CONFIG, staticlib):!contains(CONFIG, lib):!contains(CONFIG, no-main) { + SOURCES += $$PWD/main.cpp +} + +# Conditional compilation for non-Qt builds +contains(CONFIG, no-qt) { + DEFINES += LZSTRING_NO_QT + QT = +} else { + QT += core testlib +} + +# Legacy support for explicit CLI configuration +contains(CONFIG, lzstring-cli) { + !contains(SOURCES, $$PWD/main.cpp) { + SOURCES += $$PWD/main.cpp + } +} diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..45435e5 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,223 @@ +#ifndef LZSTRING_NO_QT +#include +#include +#include +#include +#include +#else +#include +#include +#include +#endif + +#include "lzstring.h" + +int main(int argc, char* argv[]) { +#ifndef LZSTRING_NO_QT + QCoreApplication app(argc, argv); + QCoreApplication::setApplicationName("LZString CLI"); + QCoreApplication::setApplicationVersion("1.0"); + + QCommandLineParser parser; + parser.setApplicationDescription("LZString Compression Utility"); + + QCommandLineOption compressOption("compress", "Compress input stream"); + QCommandLineOption compressToUTF16Option("compressToUTF16", "Compress input stream to UTF16 format"); + QCommandLineOption compressToBase64Option("compressToBase64", "Compress input stream to Base64 format"); + QCommandLineOption decompressOption("decompress", "Decompress input stream"); + QCommandLineOption decompressFromUTF16Option("decompressFromUTF16", "Decompress input stream from UTF16 format"); + QCommandLineOption decompressFromBase64Option("decompressFromBase64", "Decompress input stream from Base64 format"); + QCommandLineOption testOption("test", "Run internal round-trip tests on the provided input"); + + parser.addOption(compressOption); + parser.addOption(compressToUTF16Option); + parser.addOption(compressToBase64Option); + parser.addOption(decompressOption); + parser.addOption(decompressFromUTF16Option); + parser.addOption(decompressFromBase64Option); + parser.addOption(testOption); + + parser.addPositionalArgument("[input]", "The input file to process or '-' for standard input", "[input]"); + parser.addPositionalArgument("[output]", "The output file or '-' for standard output", "[output]"); + + parser.process(app); + + const QStringList args = parser.positionalArguments(); + QTextStream in(stdin); + QTextStream out(stdout); + QString inputContents, outputContents; + + if (args.isEmpty() || args.first() == "-") { + inputContents = in.readAll(); + } + else { + QFile inputFile(args.first()); + if (!inputFile.open(QIODevice::ReadOnly | QIODevice::Text)) { + qCritical() << "Error: Cannot open input file for reading."; + return -1; + } + in.setDevice(&inputFile); + inputContents = in.readAll(); + inputFile.close(); + } + + // Handle test option first (ignores other operation flags if present) + if (parser.isSet(testOption)) { + QString result; + // compress / decompress + result = LZString::decompress(LZString::compress(inputContents)); + out << "compress: " << (result == inputContents ? "true" : "false") << '\n'; + // UTF16 + result = LZString::decompressFromUTF16(LZString::compressToUTF16(inputContents)); + out << "compressToUTF16: " << (result == inputContents ? "true" : "false") << '\n'; + // Base64 + result = LZString::decompressFromBase64(LZString::compressToBase64(inputContents)); + out << "compressToBase64: " << (result == inputContents ? "true" : "false") << '\n'; + return 0; + } + + if (parser.isSet(compressOption)) { + outputContents = LZString::compress(inputContents); + } + else if (parser.isSet(compressToUTF16Option)) { + outputContents = LZString::compressToUTF16(inputContents); + } + else if (parser.isSet(compressToBase64Option)) { + outputContents = LZString::compressToBase64(inputContents); + } + else if (parser.isSet(decompressOption)) { + outputContents = LZString::decompress(inputContents); + } + else if (parser.isSet(decompressFromUTF16Option)) { + outputContents = LZString::decompressFromUTF16(inputContents); + } + else if (parser.isSet(decompressFromBase64Option)) { + outputContents = LZString::decompressFromBase64(inputContents); + } + else { + qCritical() << "Error: No valid operation selected."; + return -1; + } + + if (args.size() < 2 || args[1] == "-") { + out << outputContents; + } + else { + QFile outputFile(args[1]); + if (!outputFile.open(QIODevice::WriteOnly | QIODevice::Text)) { + qCritical() << "Error: Cannot open output file for writing."; + return -1; + } + out.setDevice(&outputFile); + out << outputContents; + outputFile.close(); + } + + return 0; +#else + // Simple non-Qt implementation for basic functionality + auto print_usage = [argv]() { + std::cerr + << "Usage: " << argv[0] << " [input] [output]\n" + << "Operations:\n" + << " --compress Compress input\n" + << " --compressToUTF16 Compress input to UTF16\n" + << " --compressToBase64 Compress input to Base64\n" + << " --decompress Decompress input\n" + << " --decompressFromUTF16 Decompress input from UTF16\n" + << " --decompressFromBase64 Decompress input from Base64\n" + << " --test Run internal round-trip tests on the provided input\n" + << "Notes: Use '-' as [input] or [output] to read from stdin or write to stdout.\n"; + }; + + if (argc < 2 || std::string(argv[1]) == "--help" || std::string(argv[1]) == "-h") { + print_usage(); + return (argc < 2) ? -1 : 0; + } + + std::string operation = argv[1]; + std::string inputContents; + + // Read input + if (argc < 3 || std::string(argv[2]) == "-") { + std::string line; + while (std::getline(std::cin, line)) { + inputContents += line + "\n"; + } + } else { + std::ifstream file(argv[2], std::ios::in | std::ios::binary); + if (!file.is_open()) { + std::cerr << "Error: Cannot open input file for reading." << std::endl; + return -1; + } + std::string line; + while (std::getline(file, line)) { + inputContents += line + "\n"; + } + file.close(); + } + + std::string outputContents; + if (operation == "--compress") { + outputContents = LZString::compress(inputContents); + } else if (operation == "--compressToUTF16") { + outputContents = LZString::compressToUTF16(inputContents); + } else if (operation == "--compressToBase64") { + outputContents = LZString::compressToBase64(inputContents); + } else if (operation == "--decompress") { + outputContents = LZString::decompress(inputContents); + } else if (operation == "--decompressFromUTF16") { + outputContents = LZString::decompressFromUTF16(inputContents); + } else if (operation == "--decompressFromBase64") { + outputContents = LZString::decompressFromBase64(inputContents); + } else if (operation == "--test") { + // Test compress/decompress + outputContents = LZString::decompress(LZString::compress(inputContents)); + std::cout << (outputContents == inputContents ? "compress: true\n" : "compress: false\n"); + + // Test compressToUTF16/decompressFromUTF16 + outputContents = LZString::decompressFromUTF16(LZString::compressToUTF16(inputContents)); + std::cout << (outputContents == inputContents ? "compressToUTF16: true\n" : "compressToUTF16: false\n"); + + // Test compressToBase64/decompressFromBase64 + outputContents = LZString::decompressFromBase64(LZString::compressToBase64(inputContents)); + std::cout << (outputContents == inputContents ? "compressToBase64: true\n" : "compressToBase64: false\n"); + + return 0; + } else { + std::cerr << "Error: Invalid operation. Use --help for usage." << std::endl; + return -1; + } + + // Write output + if (argc < 4 || std::string(argv[3]) == "-") { + std::cout << outputContents; + } else { + std::ofstream file(argv[3], std::ios::out | std::ios::binary); + if (!file.is_open()) { + std::cerr << "Error: Cannot open output file for writing." << std::endl; + return -1; + } + file << outputContents; + file.close(); + } + + return 0; +#endif +} + +// Compress a file lzstring --compress input.txt output.lz + +// Compress a file to UTF16 lzstring --compressToUTF16 input.txt output.lz16 + +// Compress a file to Base64 +// lzstring --compressToBase64 input.txt output.lz64 + +// Decompress a file +//lzstring --decompress input.lz output.txt + +// Decompress a file from UTF16 +// lzstring --decompressFromUTF16 input.lz16 output.txt + +// Decompress a file from Base64 +// lzstring --decompressFromBase64 input.lz64 output.txt diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..3fdb4a0 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,30 @@ +# CMake test configuration converted from original qmake project. + +set(TEST_SRC + lzstring_test/lzstring_test.cpp +) + +add_executable(lzstring_test + ${TEST_SRC} + ${LZSTRING_SOURCES} + ${LZSTRING_HEADERS} + ${CMAKE_SOURCE_DIR}/src/QtLZPlatform.cpp + ${CMAKE_SOURCE_DIR}/src/QtLZPlatform.h +) + +# Link Qt modules (Core + Test) detected in parent. +target_link_libraries(lzstring_test PRIVATE Qt::Core Qt::Test) + +# Include source directory for headers like lzstring.h +target_include_directories(lzstring_test PRIVATE ${CMAKE_SOURCE_DIR}/src) + +# Copy test data (data.json) next to the test binary for runtime access. +add_custom_command(TARGET lzstring_test POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_SOURCE_DIR}/lzstring_test/data.json + $/data.json) + +# On Windows add console definition for consistency with original CONFIG += console +if(WIN32) + target_compile_definitions(lzstring_test PRIVATE _CONSOLE) +endif() diff --git a/tests/lzstring_test/std_roundtrip_test.cpp b/tests/lzstring_test/std_roundtrip_test.cpp new file mode 100644 index 0000000..3463952 --- /dev/null +++ b/tests/lzstring_test/std_roundtrip_test.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include "lzstring.h" + +// Simple assertion helper +static void check(bool cond, const std::string &msg) { + if(!cond) { + std::cerr << "[FAIL] " << msg << std::endl; + std::exit(1); + } +} + +static std::string generateSequence(int len) { + std::string s; + s.reserve(len); + for(int i=0;i((i % 126) + 1); + if(c=='\0') c = 'X'; + s.push_back(c); + } + return s; +} + +int main() { + std::vector corpus = { + "", "a", "hello", "The quick brown fox jumps over the lazy dog", + std::string(100, 'A'), + "0123456789!@#$%^&*()_+-={}|[]:'<>?,./\\\"", + }; + // Add varying length sequences including an edge near 0xD7FF but scaled down for speed + for(int len : {1,2,3,10,63,127,255,512,1024}) { + corpus.push_back(generateSequence(len)); + } + + for(const auto &plain : corpus) { + auto c1 = LZString::compress(plain); + auto d1 = LZString::decompress(c1); + check(d1 == plain, "compress/decompress mismatch"); + + auto c2 = LZString::compressToBase64(plain); + auto d2 = LZString::decompressFromBase64(c2); + check(d2 == plain, "base64 round-trip mismatch"); + + auto c3 = LZString::compressToUTF16(plain); + auto d3 = LZString::decompressFromUTF16(c3); + check(d3 == plain, "UTF16 round-trip mismatch"); + } + + std::cout << "[OK] All std::string round-trip tests passed (" << corpus.size() << " cases)." << std::endl; + return 0; +}