From 374b88078830be26354d0c50e3ef4416b61865fd Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:20:13 -0800 Subject: [PATCH 01/10] Update lzstring.pro --- lzstring.pro | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lzstring.pro b/lzstring.pro index 8515aa2..6e05144 100644 --- a/lzstring.pro +++ b/lzstring.pro @@ -1,5 +1,8 @@ - -TEMPLATE = subdirs -SUBDIRS += tests +TEMPLATE = app +# SUBDIRS += tests include(src/lzstring.pri) + +TARGET = lzstring + +CONFIG = release console From 52859342da1062f9d5174590af7f7e1b46e9611c Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:22:11 -0800 Subject: [PATCH 02/10] Update lzstring.h --- src/lzstring.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lzstring.h b/src/lzstring.h index 8101e0c..fb3e9c8 100644 --- a/src/lzstring.h +++ b/src/lzstring.h @@ -11,7 +11,7 @@ * */ -#include +#include class LZString { From f7be8e15226ea9051353bb5e62cb6f19fd47fd6d Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:23:07 -0800 Subject: [PATCH 03/10] Update lzstring.cpp --- src/lzstring.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lzstring.cpp b/src/lzstring.cpp index 7c7f116..b751b9f 100644 --- a/src/lzstring.cpp +++ b/src/lzstring.cpp @@ -1,11 +1,11 @@ #include "lzstring.h" -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include static const QString keyStrBase64 = QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="); static const QString keyStrUriSafe = QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$"); From 43f1d286f692ae54fc7ab564269c332e8d6082b1 Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:24:48 -0800 Subject: [PATCH 04/10] Update lzstring.pri --- src/lzstring.pri | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lzstring.pri b/src/lzstring.pri index 15ee7b8..abd9b0f 100644 --- a/src/lzstring.pri +++ b/src/lzstring.pri @@ -3,4 +3,4 @@ INCLUDEPATH += $$PWD DEPENDPATH += $$PWD HEADERS += $$PWD/lzstring.h -SOURCES += $$PWD/lzstring.cpp +SOURCES += $$PWD/main.cpp $$PWD/lzstring.cpp From 1e66f540607598f844442578033c57a5549533a7 Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:26:21 -0800 Subject: [PATCH 05/10] Update lzstring.pro --- lzstring.pro | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lzstring.pro b/lzstring.pro index 6e05144..84002d0 100644 --- a/lzstring.pro +++ b/lzstring.pro @@ -1,5 +1,11 @@ TEMPLATE = app -# SUBDIRS += tests +SUBDIRS += tests + +LIBS += -L/usr/lib/x86_64-linux-gnu -lQt5Core -lQt5Widgets + +INCLUDEPATH += /usr/include/x86_64-linux-gnu/qt5/ + +QT += core widgets include(src/lzstring.pri) From bf25df7ebdb8606397916d48a1185e6d3e1a004c Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:31:15 -0800 Subject: [PATCH 06/10] Create main.cpp The file defining the exec file --- src/main.cpp | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 src/main.cpp diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..a7d2c0e --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include "lzstring.h" + +int main(int argc, char* argv[]) { + QCoreApplication app(argc, argv); + QCoreApplication::setApplicationName("LZString CLI"); + QCoreApplication::setApplicationVersion("1.0"); + + QCommandLineParser parser; + parser.setApplicationDescription("LZString Compression Utility"); + + QCommandLineOption compressOption("compress", "Compress input stream"); + QCommandLineOption compressToUTF16Option("compressToUTF16", "Compress input stream to UTF16 format"); + QCommandLineOption compressToBase64Option("compressToBase64", "Compress input stream to Base64 format"); + QCommandLineOption decompressOption("decompress", "Decompress input stream"); + QCommandLineOption decompressFromUTF16Option("decompressFromUTF16", "Decompress input stream from UTF16 format"); + QCommandLineOption decompressFromBase64Option("decompressFromBase64", "Decompress input stream from Base64 format"); + + parser.addOption(compressOption); + parser.addOption(compressToUTF16Option); + parser.addOption(compressToBase64Option); + parser.addOption(decompressOption); + parser.addOption(decompressFromUTF16Option); + parser.addOption(decompressFromBase64Option); + + parser.addPositionalArgument("[input]", "The input file to process or '-' for standard input", "[input]"); + parser.addPositionalArgument("[output]", "The output file or '-' for standard output", "[output]"); + + parser.process(app); + + const QStringList args = parser.positionalArguments(); + QTextStream in(stdin); + QTextStream out(stdout); + QString inputContents, outputContents; + + if (args.isEmpty() || args.first() == "-") { + inputContents = in.readAll(); + } + else { + QFile inputFile(args.first()); + if (!inputFile.open(QIODevice::ReadOnly | QIODevice::Text)) { + qCritical() << "Error: Cannot open input file for reading."; + return -1; + } + in.setDevice(&inputFile); + inputContents = in.readAll(); + inputFile.close(); + } + + if (parser.isSet(compressOption)) { + outputContents = LZString::compress(inputContents); + } + else if (parser.isSet(compressToUTF16Option)) { + outputContents = LZString::compressToUTF16(inputContents); + } + else if (parser.isSet(compressToBase64Option)) { + outputContents = LZString::compressToBase64(inputContents); + } + else if (parser.isSet(decompressOption)) { + outputContents = LZString::decompress(inputContents); + } + else if (parser.isSet(decompressFromUTF16Option)) { + outputContents = LZString::decompressFromUTF16(inputContents); + } + else if (parser.isSet(decompressFromBase64Option)) { + outputContents = LZString::decompressFromBase64(inputContents); + } + else { + qCritical() << "Error: No valid operation selected."; + return -1; + } + + if (args.size() < 2 || args[1] == "-") { + out << outputContents; + } + else { + QFile outputFile(args[1]); + if (!outputFile.open(QIODevice::WriteOnly | QIODevice::Text)) { + qCritical() << "Error: Cannot open output file for writing."; + return -1; + } + out.setDevice(&outputFile); + out << outputContents; + outputFile.close(); + } + + return 0; +} + // Compress a file lzstring --compress input.txt output.lz + + // Compress a file to UTF16 lzstring --compressToUTF16 input.txt output.lz16 + + // Compress a file to Base64 + // lzstring --compressToBase64 input.txt output.lz64 + + // Decompress a file + //lzstring --decompress input.lz output.txt + + // Decompress a file from UTF16 + // lzstring --decompressFromUTF16 input.lz16 output.txt + + // Decompress a file from Base64 + // lzstring --decompressFromBase64 input.lz64 output.txt + + From dc3202ace83dc7a0b343cdbc50d417656bbbbbe7 Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:32:13 -0800 Subject: [PATCH 07/10] Update lzstring.pro --- lzstring.pro | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lzstring.pro b/lzstring.pro index 84002d0..9a379a3 100644 --- a/lzstring.pro +++ b/lzstring.pro @@ -1,10 +1,9 @@ TEMPLATE = app SUBDIRS += tests +#EDIT to fit you cofiguration LIBS += -L/usr/lib/x86_64-linux-gnu -lQt5Core -lQt5Widgets - INCLUDEPATH += /usr/include/x86_64-linux-gnu/qt5/ - QT += core widgets include(src/lzstring.pri) From 65727a16fc559a710752cb091e35e26aeee84b4a Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 13:44:34 -0800 Subject: [PATCH 08/10] Update README.md --- README.md | 128 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 112 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 4d6d382..edb5fa5 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,114 @@ -

Qt implementation of LZ-String, version 1.4.4.

-

-Based on the LZ-String compression algorithm found here:
+# LZString +Qt implementation of LZ-String as an executable + +Based on the LZ-String compression algorithm found here: http://pieroxy.net/blog/pages/lz-string/index.html -

-

-WTFPL Licence
-http://www.wtfpl.net/ -

-Implemented functions:
-
    -
  • compress() / decompress() - YES
  • -
  • compressToUTF16() / decompressFromUTF16() - YES
  • -
  • compressToBase64() / decompressFromBase64() - YES
  • -
  • compressToUint8Array() / decompressFromUint8Array() - NO
  • -
  • compressToEncodedURIComponent() / decompressFromEncodedURIComponent() - NO
  • -
+ +## Installation + +### Prerequisites + +Before installing lzstring, you need to ensure that Qt5 is installed on your system. +Use the following commands to install Qt5: + +#### For Debian/Ubuntu based systems: + +```shell +sudo apt-get update +sudo apt-get install qt5-default qtbase5-dev-tools qtchooser qt5-qmake qtbase5-dev +``` + +#### For Fedora systems: + +```shell +sudo dnf install qt5 qt5-qtbase-devel +``` + +### Compiling lzstring + +Once Qt5 is installed, you can compile lzstring with `qmake` and `make`. + +Clone the repository and navigate into the directory: + +```shell +git clone https://github.com/vakarelov/lzstring.git +cd lzstring +``` + +Use `qmake` to create the Makefile: + +```shell +qmake +``` + +Now compile the project using `make`: + +```shell +make +``` + +The `lzstring` executable will be generated in the current directory. + +## Usage + +To use `lzstring`, you can either specify input and output files or use standard input and output to work with pipes. + +### Use Cases + +_Compress a file to LZ-String format:_ + +```shell +./lzstring --compress input.txt output.lz +``` + +_Compress a file to LZ-String format with UTF-16 encoding:_ + +```shell +./lzstring --compressToUTF16 input.txt output.lz16 +``` + +_Compress a file to Base64 encoded LZ-String format:_ + +```shell +./lzstring --compressToBase64 input.txt output.lz64 +``` + +_Decompress from LZ-String format to original text:_ + +```shell +./lzstring --decompress input.lz output.txt +``` + +_Decompress from UTF-16 encoded LZ-String format:_ + +```shell +./lzstring --decompressFromUTF16 input.lz16 output.txt +``` + +_Decompress from Base64 encoded LZ-String format:_ + +```shell +./lzstring --decompressFromBase64 input.lz64 output.txt +``` + +_Working with pipes, compress to LZ-String format:_ + +```shell +cat input.txt | ./lzstring --compress - - +``` + +_Working with pipes, decompress from LZ-String format:_ + +```shell +cat input.lz | ./lzstring --decompress - - +``` + +_Note: Replace `-` with file paths to work with files or use `-` for both input and output to use standard input and output._ + +## License + +This project is licensed under the [WTFPL License](http://www.wtfpl.net/). +``` + +Please make sure to replace `https://github.com/vakarelov/lzstring.git` with the actual URL of your repository where the `lzstring` project is hosted From dfdbdfa37d531d01fbc226457dd524955d72f7b4 Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 22 Nov 2023 14:34:11 -0800 Subject: [PATCH 09/10] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index edb5fa5..b753f8e 100644 --- a/README.md +++ b/README.md @@ -111,4 +111,3 @@ _Note: Replace `-` with file paths to work with files or use `-` for both input This project is licensed under the [WTFPL License](http://www.wtfpl.net/). ``` -Please make sure to replace `https://github.com/vakarelov/lzstring.git` with the actual URL of your repository where the `lzstring` project is hosted From ab544c65c5db0a2d0f5183bd3cb847b1610fcadf Mon Sep 17 00:00:00 2001 From: vakarelov Date: Wed, 24 Sep 2025 13:31:56 -0700 Subject: [PATCH 10/10] =?UTF-8?q?This=20change=20restructures=20the=20impl?= =?UTF-8?q?ementation=20from=20a=20single=20tightly=20coupled=20block=20in?= =?UTF-8?q?to=20a=20layered,=20extensible=20architecture=20while=20preserv?= =?UTF-8?q?ing=20the=20public=20API=20(LZString::{compress,compressToUTF16?= =?UTF-8?q?,compressToBase64,decompress,decompressFromUTF16,decompressFrom?= =?UTF-8?q?Base64}).=20Highlights:=20Core=20/=20Algorithm=20Separation=20I?= =?UTF-8?q?ntroduced=20LZCore=20to=20encapsulate=20compression=20and=20dec?= =?UTF-8?q?ompression=20logic.=20LZString=20is=20now=20a=20thin=20fa=C3=A7?= =?UTF-8?q?ade=20delegating=20to=20LZCore,=20improving=20readability=20and?= =?UTF-8?q?=20testability.=20Unified=20internal=20logic=20across=20normal,?= =?UTF-8?q?=20UTF-16,=20and=20Base64=20variants=20by=20parameterizing=20bi?= =?UTF-8?q?t=20width=20and=20output=20mapping.=20Platform=20Abstraction=20?= =?UTF-8?q?Added=20ILZPlatform=20interface=20to=20abstract=20string=20leng?= =?UTF-8?q?th,=20slicing,=20concatenation,=20character=20code=20translatio?= =?UTF-8?q?n,=20and=20output=20symbol=20mapping.=20Implemented=20QtLZPlatf?= =?UTF-8?q?orm=20for=20QString-based=20builds.=20Added=20optional=20StdLZP?= =?UTF-8?q?latform=20(activated=20via=20LZSTRING=5FNO=5FQT)=20enabling=20u?= =?UTF-8?q?se=20without=20Qt=20by=20typedefing=20QString=20to=20std::strin?= =?UTF-8?q?g.=20Base64=20alphabet=20and=20reverse=20lookup=20moved=20into?= =?UTF-8?q?=20platform=20objects=20with=20precomputed=20reverse=20maps.=20?= =?UTF-8?q?Compression=20Path=20Improvements=20Replaced=20ad=20hoc=20state?= =?UTF-8?q?=20handling=20with=20clearer=20local=20lambdas=20(writeBit,=20w?= =?UTF-8?q?riteNBits).=20Reduced=20duplication=20in=20emitting=20character?= =?UTF-8?q?=20codes=20(8-bit=20vs=2016-bit=20paths)=20by=20passing=20a=20p?= =?UTF-8?q?latform=20callback.=20Preallocation=20heuristics=20retained=20(?= =?UTF-8?q?reserve=20based=20on=20input=20size)=20to=20minimize=20realloca?= =?UTF-8?q?tions.=20Dictionary=20and=20creation=20tracking=20now=20use=20s?= =?UTF-8?q?td=20containers=20with=20explicit=20lifecycle.=20Decompression?= =?UTF-8?q?=20Path=20Improvements=20Rewrote=20bit=20reading=20using=20comp?= =?UTF-8?q?act=20helpers=20(readBit,=20readNBits)=20for=20clarity.=20Deleg?= =?UTF-8?q?ated=20source=20symbol=20acquisition=20to=20lambdas=20instead?= =?UTF-8?q?=20of=20multiple=20helper=20classes.=20Maintains=20original=20d?= =?UTF-8?q?ictionary=20growth=20semantics=20(enlargeIn=20/=20numBits)=20an?= =?UTF-8?q?d=20special=20handling=20for=20dictSize=20edge=20cases.=20Behav?= =?UTF-8?q?ior=20&=20Compatibility=20Public=20API=20unchanged;=20return=20?= =?UTF-8?q?values=20and=20edge=20case=20handling=20(empty=20input=20=3D>?= =?UTF-8?q?=20empty=20output)=20preserved.=20Base64=20padding=20logic=20re?= =?UTF-8?q?tained=20(appends=20'=3D',=20'=3D=3D',=20or=20'=3D=3D=3D').=20U?= =?UTF-8?q?TF-16=20compressed=20form=20still=20appends=20a=20trailing=20sp?= =?UTF-8?q?ace=20for=20compatibility.=20Numeric=20bounds=20(reset=20values?= =?UTF-8?q?,=20bit=20widths)=20match=20established=20LZ-String=20logic;=20?= =?UTF-8?q?conditional=20adjustments=20for=208-bit=20vs=2016-bit=20environ?= =?UTF-8?q?ments=20included.=20Extensibility=20&=20Portability=20Clear=20p?= =?UTF-8?q?ath=20for=20adding=20new=20output=20encodings=20or=20URI-safe?= =?UTF-8?q?=20Base64=20variant.=20Non-Qt=20builds=20now=20feasible=20witho?= =?UTF-8?q?ut=20altering=20algorithm=20code.=20Encapsulation=20enables=20f?= =?UTF-8?q?uture=20streaming=20or=20incremental=20APIs.=20Maintainability?= =?UTF-8?q?=20Concentrated=20algorithmic=20complexity=20inside=20LZCore=20?= =?UTF-8?q?with=20a=20smaller,=20stable=20external=20surface.=20Removed=20?= =?UTF-8?q?repeated=20bit-flush=20sequences=20via=20reusable=20abstraction?= =?UTF-8?q?s.=20Improved=20naming=20and=20structure=20around=20dictionary?= =?UTF-8?q?=20operations=20and=20termination=20markers.=20Performance=20Co?= =?UTF-8?q?nsiderations=20Fewer=20temporary=20string=20fragment=20objects?= =?UTF-8?q?=20(no=20substring=20view=20objects).=20Precomputed=20Base64=20?= =?UTF-8?q?reverse=20map=20avoids=20per-call=20reconstruction.=20Structure?= =?UTF-8?q?=20now=20amenable=20to=20future=20switch=20from=20std::map=20to?= =?UTF-8?q?=20unordered=5Fmap=20if=20profiling=20justifies=20it.=20Follow-?= =?UTF-8?q?up=20opportunities=20(not=20included=20here):=20Add=20benchmark?= =?UTF-8?q?=20+=20regression=20tests=20comparing=20outputs=20against=20can?= =?UTF-8?q?onical=20vectors.=20Introduce=20URI-safe=20Base64=20variant.=20?= =?UTF-8?q?Provide=20streaming=20compression/decompression=20APIs=20for=20?= =?UTF-8?q?large=20inputs.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + .idea/.name | 1 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + .idea/qt-lzstring.iml | 2 + CMakeLists.txt | 71 +++ README.md | 193 +++--- src/ILZPlatform.h | 32 + src/LZCore.cpp | 491 +++++++++++++++ src/LZCore.h | 30 + src/QtLZPlatform.cpp | 18 + src/QtLZPlatform.h | 36 ++ src/StdLZPlatform.cpp | 14 + src/StdLZPlatform.h | 40 ++ src/lzstring.cpp | 666 ++------------------- src/lzstring.h | 16 +- src/lzstring.pri | 33 +- src/main.cpp | 136 ++++- tests/CMakeLists.txt | 30 + tests/lzstring_test/std_roundtrip_test.cpp | 54 ++ 20 files changed, 1138 insertions(+), 742 deletions(-) create mode 100644 .idea/.name create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/qt-lzstring.iml create mode 100644 CMakeLists.txt create mode 100644 src/ILZPlatform.h create mode 100644 src/LZCore.cpp create mode 100644 src/LZCore.h create mode 100644 src/QtLZPlatform.cpp create mode 100644 src/QtLZPlatform.h create mode 100644 src/StdLZPlatform.cpp create mode 100644 src/StdLZPlatform.h create mode 100644 tests/CMakeLists.txt create mode 100644 tests/lzstring_test/std_roundtrip_test.cpp diff --git a/.gitignore b/.gitignore index 9c1a3a1..36a4e8a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ lzstring.pro.user +/old/ +/build/ diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..12fd2fe --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +qt_lzstring \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..0b76fe5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..1dc577c --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/qt-lzstring.iml b/.idea/qt-lzstring.iml new file mode 100644 index 0000000..f08604b --- /dev/null +++ b/.idea/qt-lzstring.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..4e0795a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,71 @@ +cmake_minimum_required(VERSION 3.16) +project(qt_lzstring LANGUAGES CXX) + +include(CTest) +enable_testing() + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Enable Qt's automoc so test target with Q_OBJECT works when built (only affects Qt test target). +set(CMAKE_AUTOMOC ON) + +# Core sources reused by main executable and (optionally) tests (Qt sources kept separate so non-Qt builds don't require Qt headers). +set(LZSTRING_SOURCES + src/lzstring.cpp + src/LZCore.cpp + src/StdLZPlatform.cpp +) +set(LZSTRING_HEADERS + src/ILZPlatform.h + src/LZCore.h + src/StdLZPlatform.h + src/lzstring.h +) + +# Build the CLI without Qt dependency (forces use of std platform implementation) +add_executable(lzstring + ${LZSTRING_SOURCES} + ${LZSTRING_HEADERS} + src/main.cpp +) +# Only the CLI gets this define; tests (if enabled) can use Qt implementation. +target_compile_definitions(lzstring PRIVATE LZSTRING_NO_QT) + +# Organize sources in IDEs (portable) +source_group(TREE ${CMAKE_SOURCE_DIR}/src PREFIX "Source" FILES ${LZSTRING_SOURCES} ${LZSTRING_HEADERS}) + +# ------------------------------------------------------------- +# Optional Qt tests (converted from original qmake project) +# ------------------------------------------------------------- +option(BUILD_QT_TESTS "Build Qt-based unit and benchmark tests" ON) +if(BUILD_QT_TESTS) + find_package(QT NAMES Qt6 Qt5 COMPONENTS Core Test QUIET) + if(QT_FOUND) + find_package(Qt${QT_VERSION_MAJOR} COMPONENTS Core Test REQUIRED) + # Flag to let subdirectory know Qt is available. + set(LZSTRING_QT_AVAILABLE ON CACHE INTERNAL "Qt available for tests") + add_subdirectory(tests) + else() + message(STATUS "Qt not found; skipping BUILD_QT_TESTS") + endif() +endif() + +# Non-Qt round-trip test exercising StdLZPlatform (always available, no Qt dependency) +add_executable(lzstring_std_test + tests/lzstring_test/std_roundtrip_test.cpp + ${LZSTRING_SOURCES} + ${LZSTRING_HEADERS} +) + +target_compile_definitions(lzstring_std_test PRIVATE LZSTRING_NO_QT) + +target_include_directories(lzstring_std_test PRIVATE ${CMAKE_SOURCE_DIR}/src) + +add_test(NAME lzstring_std_test COMMAND lzstring_std_test) + +# Register Qt test if it exists +if(BUILD_QT_TESTS AND TARGET lzstring_test) + add_test(NAME lzstring_test COMMAND lzstring_test) + set_tests_properties(lzstring_test PROPERTIES WORKING_DIRECTORY $) +endif() diff --git a/README.md b/README.md index b753f8e..3bcaf38 100644 --- a/README.md +++ b/README.md @@ -1,113 +1,130 @@ +# qt-lzstring -# LZString -Qt implementation of LZ-String as an executable +Qt implementation of LZ-String with a platform-agnostic core and a simple CLI. -Based on the LZ-String compression algorithm found here: -http://pieroxy.net/blog/pages/lz-string/index.html +Refactor highlights +- LZ core extracted to `src/LZCore.{h,cpp}`. It contains the compression/decompression logic and depends only on an abstract platform interface. +- New platform interface `src/ILZPlatform.h` encapsulates string/character operations the core needs. +- Two platform implementations: + - `src/QtLZPlatform.{h,cpp}` for Qt builds (uses QString/QChar/QHash). + - `src/StdLZPlatform.{h,cpp}` for non-Qt builds (uses std::string and simple containers). +- `src/lzstring.cpp` is a thin facade that selects the platform at compile time and delegates to `LZCore`. +- Build files (CMake and qmake .pri) updated to include the new sources. -## Installation +Public API +- `LZString` public static methods are unchanged: `compress`, `compressToUTF16`, `compressToBase64`, and corresponding `decompress*` variants. -### Prerequisites +Notes +- The core does not depend on Qt; only the platform layer bridges to Qt or std. This improves separation of concerns and makes the LZ implementation reusable. +- Non-Qt mode (CMake default) is intended for simple CLI usage. For full Unicode fidelity and UTF‑16/Base64 behavior identical to the original Qt implementation, build/run the Qt test target which uses `QString` code units. -Before installing lzstring, you need to ensure that Qt5 is installed on your system. -Use the following commands to install Qt5: +Installation (Linux) +- Prerequisites: a C++ compiler and either Qt (for the full-featured test suite) or just CMake (for the minimal non‑Qt CLI + std round‑trip tests). -#### For Debian/Ubuntu based systems: - -```shell +Debian/Ubuntu +```bash sudo apt-get update -sudo apt-get install qt5-default qtbase5-dev-tools qtchooser qt5-qmake qtbase5-dev +sudo apt-get install -y build-essential cmake +# For Qt tests (optional) +sudo apt-get install -y qtbase5-dev qtbase5-dev-tools ``` -#### For Fedora systems: - -```shell -sudo dnf install qt5 qt5-qtbase-devel -``` - -### Compiling lzstring - -Once Qt5 is installed, you can compile lzstring with `qmake` and `make`. - -Clone the repository and navigate into the directory: - -```shell -git clone https://github.com/vakarelov/lzstring.git -cd lzstring +Fedora +```bash +sudo dnf install -y cmake make gcc-c++ +# For Qt tests (optional) +sudo dnf install -y qt5-qtbase-devel ``` -Use `qmake` to create the Makefile: - -```shell -qmake +Build (CMake) +- Default (non-Qt CLI only): + - Windows (cmd.exe): + ```cmd + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_QT_TESTS=OFF + cmake --build build --config Release + ``` + - Linux/macOS: + ```bash + cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DBUILD_QT_TESTS=OFF + cmake --build build --config Release + ``` + - Binary: `lzstring` / `lzstring.exe` in the build tree. + +- With Qt tests (auto-detect Qt5/Qt6): + ```bash + cmake -S . -B build -DBUILD_QT_TESTS=ON + cmake --build build --target lzstring_test + ctest --test-dir build -V -R lzstring_test + ``` + If Qt isn't found, a status message is printed and the normal CLI plus non-Qt tests still build. + +Tests (CMake) +- Always built: `lzstring_std_test` (non-Qt, exercises `StdLZPlatform`). + ```bash + cmake -S . -B build -DBUILD_QT_TESTS=OFF + cmake --build build --target lzstring_std_test + ctest --test-dir build -V -R lzstring_std_test + ``` +- Optionally (with Qt): `lzstring_test` (original Qt/QString test + benchmarks). Data file `data.json` is auto-copied beside the test binary and the working directory set via CTest so relative open succeeds. + +Build (qmake, legacy / optional) +- The `.pri` pulls in all sources; tests are under `tests/`. +- Linux/macOS: + ```bash + qmake + make -j + ``` +- Windows (from a Qt command prompt): + ```cmd + qmake + nmake + ``` + +Getting the sources +```bash +git clone +cd qt-lzstring ``` -Now compile the project using `make`: - -```shell -make -``` - -The `lzstring` executable will be generated in the current directory. - -## Usage - -To use `lzstring`, you can either specify input and output files or use standard input and output to work with pipes. - -### Use Cases - -_Compress a file to LZ-String format:_ - -```shell +Usage (CLI) +- The CLI reads from a file or stdin (`-`) and writes to a file or stdout (`-`). +- Binary name: `lzstring` (`.exe` on Windows) +- Operations: + - `--compress` + - `--compressToUTF16` + - `--compressToBase64` + - `--decompress` + - `--decompressFromUTF16` + - `--decompressFromBase64` + - `--test` (round‑trip checks for all codecs on the provided input; prints boolean results and exits) + +Examples (Linux/macOS) +```bash ./lzstring --compress input.txt output.lz -``` - -_Compress a file to LZ-String format with UTF-16 encoding:_ - -```shell ./lzstring --compressToUTF16 input.txt output.lz16 -``` - -_Compress a file to Base64 encoded LZ-String format:_ - -```shell ./lzstring --compressToBase64 input.txt output.lz64 -``` - -_Decompress from LZ-String format to original text:_ - -```shell ./lzstring --decompress input.lz output.txt -``` - -_Decompress from UTF-16 encoded LZ-String format:_ - -```shell ./lzstring --decompressFromUTF16 input.lz16 output.txt -``` - -_Decompress from Base64 encoded LZ-String format:_ - -```shell ./lzstring --decompressFromBase64 input.lz64 output.txt -``` - -_Working with pipes, compress to LZ-String format:_ - -```shell +./lzstring --test input.txt cat input.txt | ./lzstring --compress - - +cat input.lz | ./lzstring --decompress - - ``` -_Working with pipes, decompress from LZ-String format:_ - -```shell -cat input.lz | ./lzstring --decompress - - +Examples (Windows cmd.exe) +```cmd +lzstring.exe --compress input.txt output.lz +lzstring.exe --decompress input.lz output.txt +lzstring.exe --test input.txt +:: Pipes +type input.txt | lzstring.exe --compress - - +type input.lz | lzstring.exe --decompress - - ``` -_Note: Replace `-` with file paths to work with files or use `-` for both input and output to use standard input and output._ - -## License - -This project is licensed under the [WTFPL License](http://www.wtfpl.net/). -``` +Extending tests +- Add more corpus cases: modify `tests/std_roundtrip_test.cpp` or the Qt test's `test_data()` method. +- For performance comparisons, run the Qt benchmarks (they use `QBENCHMARK`). +- To disable the std test (e.g., in a packaging build), you can wrap its target creation in an option similar to `BUILD_QT_TESTS`. +License +- This project is licensed under the WTFPL: http://www.wtfpl.net/ diff --git a/src/ILZPlatform.h b/src/ILZPlatform.h new file mode 100644 index 0000000..dc5e326 --- /dev/null +++ b/src/ILZPlatform.h @@ -0,0 +1,32 @@ +#ifndef ILZPLATFORM_H +#define ILZPLATFORM_H + +#include "lzstring.h" + +// Interface abstracting platform-specific string and character utilities +class ILZPlatform { +public: + virtual ~ILZPlatform() = default; + + // String operations + virtual int length(const QString& s) const = 0; + virtual bool isEmpty(const QString& s) const = 0; + virtual QString slice(const QString& s, int pos, int len) const = 0; + virtual QString concat(const QString& a, const QString& b) const = 0; + virtual void append(QString& dst, const QString& src) const = 0; + + // Character/code unit operations + virtual int charCodeAt(const QString& s, int index) const = 0; // 0..65535 (Qt) or 0..255 (std) + virtual QString charFromCode(int code) const = 0; // single-code-unit string + + // Output mappers for compression writers + virtual void appendFromInt_Normal(QString& out, int code) const = 0; // 16-bit path uses code as-is + virtual void appendFromInt_UTF16(QString& out, int code) const = 0; // code + 32 + virtual void appendFromInt_Base64(QString& out, int code) const = 0; // map using Base64 alphabet + + // Reverse mapping for Base64 during decompression + virtual int base64ReverseIndex(int charCode) const = 0; // -1 if not found +}; + +#endif // ILZPLATFORM_H + diff --git a/src/LZCore.cpp b/src/LZCore.cpp new file mode 100644 index 0000000..c1066c7 --- /dev/null +++ b/src/LZCore.cpp @@ -0,0 +1,491 @@ +#include +#include +#include +#include "lzstring.h" +#include "LZCore.h" + +// Helper struct for bitstream during decompression +struct LZDecompressData { + int val; + int position; + int index; +}; + +QString LZCore::compress(const QString& uncompressed) const { +#ifdef LZSTRING_NO_QT + // In non-Qt mode we only have 8-bit code units, so flush every 8 bits + return compressImpl(uncompressed, 8, &ILZPlatform::appendFromInt_Normal); +#else + return compressImpl(uncompressed, 16, &ILZPlatform::appendFromInt_Normal); +#endif +} + +QString LZCore::compressToUTF16(const QString& uncompressed) const { + if (m_platform.isEmpty(uncompressed)) return QString(); +#ifdef LZSTRING_NO_QT + // With 8-bit storage, use 7 bits + 32 offset to stay within printable range + QString result = compressImpl(uncompressed, 7, &ILZPlatform::appendFromInt_UTF16); +#else + QString result = compressImpl(uncompressed, 15, &ILZPlatform::appendFromInt_UTF16); +#endif + m_platform.append(result, m_platform.charFromCode(' ')); + return result; +} + +QString LZCore::compressToBase64(const QString& uncompressed) const { + if (m_platform.isEmpty(uncompressed)) return QString(); + QString res = compressImpl(uncompressed, 6, &ILZPlatform::appendFromInt_Base64); + int mod = m_platform.length(res) % 4; + if (mod == 0) return res; + if (mod == 1) { m_platform.append(res, m_platform.charFromCode('=')); m_platform.append(res, m_platform.charFromCode('=')); m_platform.append(res, m_platform.charFromCode('=')); } + else if (mod == 2) { m_platform.append(res, m_platform.charFromCode('=')); m_platform.append(res, m_platform.charFromCode('=')); } + else { m_platform.append(res, m_platform.charFromCode('=')); } + return res; +} + +QString LZCore::decompress(const QString& compressed) const { + if (m_platform.isEmpty(compressed)) return QString(); + auto getNext = [&](int index) { + return m_platform.charCodeAt(compressed, index); + }; +#ifdef LZSTRING_NO_QT + return decompressImpl(m_platform.length(compressed), 128, getNext); +#else + return decompressImpl(m_platform.length(compressed), 32768, getNext); +#endif +} + +QString LZCore::decompressFromUTF16(const QString& compressed) const { + if (m_platform.isEmpty(compressed)) return QString(); + auto getNext = [&](int index) { + return m_platform.charCodeAt(compressed, index) - 32; + }; +#ifdef LZSTRING_NO_QT + return decompressImpl(m_platform.length(compressed), 64, getNext); +#else + return decompressImpl(m_platform.length(compressed), 16384, getNext); +#endif +} + +QString LZCore::decompressFromBase64(const QString& compressed) const { + if (m_platform.isEmpty(compressed)) return QString(); + auto getNext = [&](int index) { + return m_platform.base64ReverseIndex(m_platform.charCodeAt(compressed, index)); + }; + // Base64 path always uses 6 bits per char; resetValue per original algorithm is 32 (2^5) + return decompressImpl(m_platform.length(compressed), 32, getNext); +} + +QString LZCore::compressImpl(const QString& uncompressed, int bitsPerChar, + void (ILZPlatform::*appendFromInt)(QString&, int) const) const +{ + if (m_platform.isEmpty(uncompressed)) return QString(); + + int i = 0; + int value = 0; + + std::vector context_c_dictionary(65536, false); + std::map context_dictionary; + std::map context_dictionaryToCreate; + + QString context_c; + QString context_wc; + QString context_w; + QString context_data; + + int context_dictSize = 3; + int context_numBits = 2; + int context_data_val = 0; + int context_data_position = 0; + int context_enlargeIn = 2; + + // Pre-reserve some capacity + context_data.reserve(std::max(255, m_platform.length(uncompressed) / 5)); + + auto writeBit = [&](int bit) { + context_data_val = (context_data_val << 1) | (bit & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + }; + + auto writeNBits = [&](int n, int v) { + for (i = 0; i < n; ++i) { + writeBit(v & 1); + v >>= 1; + } + }; + + for (int ii = 0, size = m_platform.length(uncompressed); ii < size; ++ii) { + context_c = m_platform.slice(uncompressed, ii, 1); + int c_val = m_platform.charCodeAt(uncompressed, ii); + if (!context_c_dictionary[static_cast(c_val)]) { + context_c_dictionary[static_cast(c_val)] = true; + context_dictionary[context_c] = context_dictSize++; + context_dictionaryToCreate[context_c] = true; + } + + context_wc = m_platform.concat(context_w, context_c); + if (context_dictionary.count(context_wc)) { + context_w = context_wc; + } else { + auto itCreate = context_dictionaryToCreate.find(context_w); + if (itCreate != context_dictionaryToCreate.end() && !m_platform.isEmpty(context_w)) { + int w0 = m_platform.charCodeAt(context_w, 0); + if (w0 < 256) { + // Write context_numBits zeros first + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + } + // Then write 8 bits of the character value + value = w0; + for (i = 0; i < 8; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } else { + // Write 1 followed by zeros + value = 1; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | value; + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = 0; + } + // Then write 16 bits of the character value + value = w0; + for (i = 0; i < 16; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + context_dictionaryToCreate.erase(itCreate); + } else { + value = context_dictionary[context_w]; + writeNBits(context_numBits, value); + } + + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + + // Add wc to the dictionary + context_dictionary[context_wc] = context_dictSize++; + context_w = context_c; + } + } + + // Output the code for w + if (!m_platform.isEmpty(context_w)) { + auto itCreate = context_dictionaryToCreate.find(context_w); + if (itCreate != context_dictionaryToCreate.end()) { + int w0 = m_platform.charCodeAt(context_w, 0); + if (w0 < 256) { + // Write context_numBits zeros first + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + } + // Then write 8 bits of the character value + value = w0; + for (i = 0; i < 8; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } else { + // Write 1 followed by zeros + value = 1; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | value; + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = 0; + } + // Then write 16 bits of the character value + value = w0; + for (i = 0; i < 16; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + context_dictionaryToCreate.erase(itCreate); + } else { + value = context_dictionary[context_w]; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + } + + if (--context_enlargeIn == 0) { + context_enlargeIn = 1 << context_numBits; + ++context_numBits; + } + } + + // Mark the end of the stream + value = 2; + for (i = 0; i < context_numBits; ++i) { + context_data_val = (context_data_val << 1) | (value & 1); + if (context_data_position == bitsPerChar - 1) { + context_data_position = 0; + (m_platform.*appendFromInt)(context_data, context_data_val); + context_data_val = 0; + } else { + ++context_data_position; + } + value = value >> 1; + } + + // Flush the last char + while (true) { + context_data_val = (context_data_val << 1); + if (context_data_position == bitsPerChar - 1) { + (m_platform.*appendFromInt)(context_data, context_data_val); + break; + } else { + ++context_data_position; + } + } + + return context_data; +} + +// Decompression core + +template +QString LZCore::decompressImpl(int length, int resetValue, GetNextValue getNextValue) const { + std::vector dictionary; + int next = 0; + int enlargeIn = 4; + int dictSize = 4; + int numBits = 3; + QString entry; + QString result; + QString w; + + LZDecompressData data{ getNextValue(0), resetValue, 1 }; + + result.reserve(length * 5); + + // Dictionary indexes 0-2 are never read, push placeholders + for (int i = 0; i < 3; ++i) { + dictionary.push_back(m_platform.charFromCode(i)); + } + + auto readBit = [&]() { + int resb = data.val & data.position; + data.position >>= 1; + if (data.position == 0) { + data.position = resetValue; + data.val = getNextValue(data.index++); + } + return resb > 0 ? 1 : 0; + }; + + auto readNBits = [&](int n) { + int bits = 0; + int power = 1; + for (int i = 0; i < n; ++i) { + bits |= readBit() * power; + power <<= 1; + } + return bits; + }; + + next = readNBits(2); + QString c; + switch (next) { + case 0: { + int bits = readNBits(8); + c = m_platform.charFromCode(bits); + break; + } + case 1: { + int bits = readNBits(16); + c = m_platform.charFromCode(bits); + break; + } + case 2: + return QString(); + } + + dictionary.push_back(c); + w = c; + m_platform.append(result, c); + + while (true) { + if (data.index > length) return QString(); + + int cc = readNBits(numBits); + switch (cc) { + case 0: { + int bits = readNBits(8); + dictionary.push_back(m_platform.charFromCode(bits)); + cc = dictSize++; + --enlargeIn; + break; + } + case 1: { + int bits = readNBits(16); + dictionary.push_back(m_platform.charFromCode(bits)); + cc = dictSize++; + --enlargeIn; + break; + } + case 2: + return result; + } + + if (enlargeIn == 0) { + enlargeIn = 1 << numBits; + ++numBits; + } + + if (cc < dictSize) { + entry = dictionary[static_cast(cc)]; + } else if (cc == dictSize) { + entry = m_platform.concat(w, m_platform.charFromCode(m_platform.charCodeAt(w, 0))); + } else { + return QString(); + } + + m_platform.append(result, entry); + + // Add w + entry[0] + dictionary.push_back(m_platform.concat(w, m_platform.charFromCode(m_platform.charCodeAt(entry, 0)))); + ++dictSize; + --enlargeIn; + + w = entry; + + if (enlargeIn == 0) { + enlargeIn = 1 << numBits; + ++numBits; + } + } +} + +// Explicit instantiations for common lambda types aren't needed as it's header-only template used locally + +/* +------------------------------------------------------------------------------- +High-level overview of the compression / decompression algorithm implemented: + +This is a variant of the LZ-based dictionary encoder popularly known from the +JavaScript LZ-String project. It is closest in spirit to LZW / LZ78 but with a +few notable twists to optimize for short textual payloads: + +1. Dynamic Dictionary: + - The dictionary starts with 3 reserved entries (0,1,2) used for control / end. + - New sequences are added on-the-fly: when encountering a new concatenation + (w + c) not yet in the dictionary, it is assigned the next incremental code. + +2. Variable Code Width Growth: + - The number of bits used to emit dictionary codes (context_numBits / numBits) + grows as the dictionary size crosses powers of two. + - A countdown (enlargeIn) tracks when to increase the code width. + +3. Literal Emission Strategy: + - Raw characters not yet seen are first queued for creation. When flushed, + they are emitted in one of two literal forms: + * 0 + 8 data bits (for char codes < 256) + * 1 + 16 data bits (for char codes >= 256) + - This mirrors the original LZ-String design to support full Unicode ranges + (or limited ranges when compiled without Qt depending on platform build). + +4. Bit Packing Abstraction: + - Output is packed into an integer accumulator (context_data_val) and flushed + once bitsPerChar bits are filled. Different front-end functions choose + bitsPerChar and a corresponding ILZPlatform::appendFromInt_* method to map + the packed value into a storage character (normal 16-bit, UTF-16-friendly, + Base64 alphabet, etc.). + +5. Termination: + - A final code (2) marks end-of-stream, after which the accumulator is padded + and flushed. + +6. Decompression Mirrors Compression: + - Rebuilds the dictionary in the same order. + - Handles the special case when a code equals the next dictionary index + (classic LZW edge case: entry = w + firstChar(w)). + - Increases bit width in lockstep using enlargeIn / numBits logic. + +7. Design Goals: + - Small, allocation-conscious (pre-reserving output & using simple containers). + - Fidelity with original LZ-String output across modes (Base64, UTF-16, raw). + - Separation of platform-specific character / string operations via ILZPlatform. + +------------------------------------------------------------------------------- +*/ diff --git a/src/LZCore.h b/src/LZCore.h new file mode 100644 index 0000000..cd14ad7 --- /dev/null +++ b/src/LZCore.h @@ -0,0 +1,30 @@ +#ifndef LZCORE_H +#define LZCORE_H + +#include "lzstring.h" +#include "ILZPlatform.h" + +class LZCore { +public: + explicit LZCore(const ILZPlatform& platform) : m_platform(platform) {} + + QString compress(const QString& uncompressed) const; + QString compressToUTF16(const QString& uncompressed) const; + QString compressToBase64(const QString& uncompressed) const; + + QString decompress(const QString& compressed) const; + QString decompressFromUTF16(const QString& compressed) const; + QString decompressFromBase64(const QString& compressed) const; + +private: + const ILZPlatform& m_platform; + + // Core helpers + QString compressImpl(const QString& uncompressed, int bitsPerChar, + void (ILZPlatform::*appendFromInt)(QString&, int) const) const; + + template + QString decompressImpl(int length, int resetValue, GetNextValue getNextValue) const; +}; + +#endif // LZCORE_H diff --git a/src/QtLZPlatform.cpp b/src/QtLZPlatform.cpp new file mode 100644 index 0000000..8ad8531 --- /dev/null +++ b/src/QtLZPlatform.cpp @@ -0,0 +1,18 @@ +#ifndef LZSTRING_NO_QT +#include "QtLZPlatform.h" + +QtLZPlatform::QtLZPlatform() + : m_keyStrBase64(QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")) +{ + // Precompute reverse dictionary + for (int i = 0, len = m_keyStrBase64.length(); i < len; ++i) { + m_baseReverseDic.insert(m_keyStrBase64.at(i), i); + } +} + +int QtLZPlatform::base64ReverseIndex(int charCode) const { + return m_baseReverseDic.value(QChar(charCode), -1); +} + +#endif // LZSTRING_NO_QT + diff --git a/src/QtLZPlatform.h b/src/QtLZPlatform.h new file mode 100644 index 0000000..60eb1af --- /dev/null +++ b/src/QtLZPlatform.h @@ -0,0 +1,36 @@ +#ifndef QTLZPLATFORM_H +#define QTLZPLATFORM_H + +#ifndef LZSTRING_NO_QT +#include +#include +#include "ILZPlatform.h" + +class QtLZPlatform : public ILZPlatform { +public: + QtLZPlatform(); + + int length(const QString& s) const override { return s.length(); } + bool isEmpty(const QString& s) const override { return s.isEmpty(); } + QString slice(const QString& s, int pos, int len) const override { return s.mid(pos, len); } + QString concat(const QString& a, const QString& b) const override { return a + b; } + void append(QString& dst, const QString& src) const override { dst.append(src); } + + int charCodeAt(const QString& s, int index) const override { return s.at(index).unicode(); } + QString charFromCode(int code) const override { return QString(1, QChar(code)); } + + void appendFromInt_Normal(QString& out, int code) const override { out.append(QChar(code)); } + void appendFromInt_UTF16(QString& out, int code) const override { out.append(QChar(code + 32)); } + void appendFromInt_Base64(QString& out, int code) const override { out.append(m_keyStrBase64.at(code)); } + + int base64ReverseIndex(int charCode) const override; + +private: + QString m_keyStrBase64; + QHash m_baseReverseDic; +}; + +#endif // LZSTRING_NO_QT + +#endif // QTLZPLATFORM_H + diff --git a/src/StdLZPlatform.cpp b/src/StdLZPlatform.cpp new file mode 100644 index 0000000..9bb1a0c --- /dev/null +++ b/src/StdLZPlatform.cpp @@ -0,0 +1,14 @@ +#ifdef LZSTRING_NO_QT +#include "StdLZPlatform.h" + +StdLZPlatform::StdLZPlatform() + : m_keyStrBase64("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") + , m_baseReverse(256, -1) +{ + for (size_t i = 0; i < m_keyStrBase64.size(); ++i) { + m_baseReverse[static_cast(m_keyStrBase64[i])] = static_cast(i); + } +} + +#endif // LZSTRING_NO_QT + diff --git a/src/StdLZPlatform.h b/src/StdLZPlatform.h new file mode 100644 index 0000000..6589e6f --- /dev/null +++ b/src/StdLZPlatform.h @@ -0,0 +1,40 @@ +#ifndef STDLZPLATFORM_H +#define STDLZPLATFORM_H + +#ifdef LZSTRING_NO_QT +#include +#include +#include "ILZPlatform.h" + +class StdLZPlatform : public ILZPlatform { +public: + StdLZPlatform(); + + int length(const QString& s) const override { return static_cast(s.size()); } + bool isEmpty(const QString& s) const override { return s.empty(); } + QString slice(const QString& s, int pos, int len) const override { return s.substr(pos, len); } + QString concat(const QString& a, const QString& b) const override { return a + b; } + void append(QString& dst, const QString& src) const override { dst.append(src); } + + int charCodeAt(const QString& s, int index) const override { + if (index < 0 || index >= static_cast(s.size())) return 0; + return static_cast(s.at(index)); + } + QString charFromCode(int code) const override { return QString(1, static_cast(code)); } + + void appendFromInt_Normal(QString& out, int code) const override { out.push_back(static_cast(code)); } + void appendFromInt_UTF16(QString& out, int code) const override { out.push_back(static_cast(code + 32)); } + void appendFromInt_Base64(QString& out, int code) const override { out.push_back(m_keyStrBase64.at(static_cast(code))); } + + int base64ReverseIndex(int charCode) const override { + return m_baseReverse[static_cast(charCode)]; + } + +private: + QString m_keyStrBase64; + std::vector m_baseReverse; // size 256 +}; + +#endif // LZSTRING_NO_QT + +#endif // STDLZPLATFORM_H diff --git a/src/lzstring.cpp b/src/lzstring.cpp index b751b9f..b980732 100644 --- a/src/lzstring.cpp +++ b/src/lzstring.cpp @@ -1,647 +1,49 @@ #include "lzstring.h" +#include "LZCore.h" +#include "ILZPlatform.h" +#ifndef LZSTRING_NO_QT +#include "QtLZPlatform.h" +#else +#include "StdLZPlatform.h" +#endif -#include -#include -#include -#include -#include -#include - -static const QString keyStrBase64 = QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="); -static const QString keyStrUriSafe = QLatin1String("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-$"); - -static QChar compressGetCharFromInt(int a) -{ - return QChar(a); -} - -static QChar compressToUTF16GetCharFromInt(int a) -{ - return QChar(a+32); -} - -static QChar compressToBase64GetCharFromInt(int a) -{ - return keyStrBase64.at(a); -} - -QString LZString::compress(const QString &uncompressed) -{ - return _compress(uncompressed, 16, &compressGetCharFromInt); +// Provide a single access point to the platform abstraction +static const ILZPlatform* getPlatform() { +#ifndef LZSTRING_NO_QT + static QtLZPlatform platform; +#else + static StdLZPlatform platform; +#endif + return &platform; } -QString LZString::compressToUTF16(const QString &uncompressed) -{ - if (uncompressed.isEmpty()) - return ""; - - QString result = _compress(uncompressed, 15, &compressToUTF16GetCharFromInt); - result.append(QLatin1String(" ")); - return result; +// Lightweight delegations to the core algorithm +QString LZString::compress(const QString &uncompressed) { + LZCore core(*getPlatform()); + return core.compress(uncompressed); } -QString LZString::compressToBase64(const QString &uncompressed) -{ - if (uncompressed.isEmpty()) - return ""; - - QString res = _compress(uncompressed, 6, &compressToBase64GetCharFromInt); - switch (res.length() % 4) // To produce valid Base64 - { - default: // When could this happen ? - case 0: return res; - case 1: return res.append(QLatin1String("===")); - case 2: return res.append(QLatin1String("==")); - case 3: return res.append(QLatin1String("=")); - } - return res; +QString LZString::compressToUTF16(const QString &uncompressed) { + LZCore core(*getPlatform()); + return core.compressToUTF16(uncompressed); } -template -QString LZString::_compress(const QString &uncompressed, int bitsPerChar, GetCharFromInt getCharFromInt) -{ - int i = 0; - int value = 0; - QBitArray context_c_dictionary(65536); // All bits initialized to 0 - QHash context_dictionary; - QHash context_dictionaryToCreate; - QStringRef context_c; - QStringRef context_wc; - QStringRef context_w; - int context_enlargeIn = 2; // Compensate for the first entry which should not count - int context_dictSize = 3; - int context_numBits = 2; - QString context_data; - int context_data_val = 0; - int context_data_position = 0; - - context_data.reserve( qMax(255, uncompressed.length()/5) ); - context_w = QStringRef(&uncompressed, 0, 0); - - for (int ii=0, size=uncompressed.length(); ii::iterator context_w_it = - context_dictionaryToCreate.find(context_w); - - if (context_w_it != context_dictionaryToCreate.end()) - { - if (context_w.at(0).unicode() < 256) - { - for (i=0; i> 1; - } - } - else - { - value = 1; - for (i=0; i> 1; - } - } - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - context_dictionaryToCreate.erase(context_w_it); - } - else - { - value = context_dictionary.value(context_w); - for (i=0; i> 1; - } - } - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - - // Add wc to the dictionary. - Q_ASSERT(context_wc.length() > 1); - context_dictionary.insert(context_wc, context_dictSize++); - context_w = context_c; - } - } - - // Output the code for w. - if (!context_w.isEmpty()) - { - QHash::iterator context_w_it = - context_dictionaryToCreate.find(context_w); - - if (context_w_it != context_dictionaryToCreate.end()) - { - if (context_w.at(0).unicode() < 256) - { - for (i=0; i> 1; - } - } - else - { - value = 1; - for (i=0; i> 1; - } - } - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - context_dictionaryToCreate.erase(context_w_it); - } - else - { - value = context_dictionary.value(context_w); - for (i=0; i> 1; - } - } - - context_enlargeIn--; - if (context_enlargeIn == 0) - { - context_enlargeIn = 1 << context_numBits; - context_numBits++; - } - } - - // Mark the end of the stream - value = 2; - for (i=0; i> 1; - } - - // Flush the last char - while (true) - { - context_data_val = (context_data_val << 1); - if (context_data_position == bitsPerChar-1) - { - context_data.append(getCharFromInt(context_data_val)); - break; - } - else - { - context_data_position++; - } - } - - return context_data; -} - -class DecompressGetNextValue -{ -public: - DecompressGetNextValue(const QString &compressed) : - m_compressed(compressed.constData()) {} - - int operator()(int index) const - { - return m_compressed[index].unicode(); - } - -private: - const QChar *m_compressed; -}; - -class DecompressFromUTF16GetNextValue -{ -public: - DecompressFromUTF16GetNextValue(const QString &compressed) : - m_compressed(compressed.constData()) {} - - int operator()(int index) const - { - return m_compressed[index].unicode() - 32; - } - -private: - const QChar *m_compressed; -}; - -class DecompressGetBaseValue -{ -public: - DecompressGetBaseValue(const QString &compressed, const QString &alphabet) : - m_compressed(compressed.constData()), - m_alphabet(alphabet) - { - for (int i=0, len=alphabet.length(); i m_baseReverseDic; -}; - -QString LZString::decompress(const QString &compressed) -{ - if (compressed.isEmpty()) - return ""; - - return _decompress(compressed.length(), 32768, DecompressGetNextValue(compressed)); +QString LZString::compressToBase64(const QString &uncompressed) { + LZCore core(*getPlatform()); + return core.compressToBase64(uncompressed); } -QString LZString::decompressFromUTF16(const QString &compressed) -{ - if (compressed.isEmpty()) - return ""; - - return _decompress(compressed.length(), 16384, DecompressFromUTF16GetNextValue(compressed)); +QString LZString::decompress(const QString &compressed) { + LZCore core(*getPlatform()); + return core.decompress(compressed); } -QString LZString::decompressFromBase64(const QString &compressed) -{ - if (compressed.isEmpty()) - return ""; - - return _decompress(compressed.length(), 32, DecompressGetBaseValue(compressed, keyStrBase64)); +QString LZString::decompressFromUTF16(const QString &compressed) { + LZCore core(*getPlatform()); + return core.decompressFromUTF16(compressed); } -struct DecompressData -{ - int val; - int position; - int index; -}; - -template -QString LZString::_decompress(int length, int resetValue, GetNextValue getNextValue) -{ - QVector dictionary; - int next = 0; - int enlargeIn = 4; - int dictSize = 4; - int numBits = 3; - QString entry; - QString result; - QString w; - int bits, resb, maxpower, power; - QString c; - DecompressData data; - data.val = getNextValue(0); - data.position = resetValue; - data.index = 1; - - result.reserve(length*5); - - // Put anything at dictionary indexes 0-2 - this will never be read. - // See question: - // http://pieroxy.net/blog/replyToBlogEntry.action?entry=1368091620000&comment=1474381950353 - // and response: - // http://pieroxy.net/blog/replyToBlogEntry.action?entry=1368091620000&comment=1474435834165 - for (int i=0; i<3; ++i) - { - dictionary.append(QString::number(i)); - } - - bits = 0; - maxpower = 4; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - switch (next = bits) - { - case 0: - bits = 0; - maxpower = 256; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - c = QChar(bits); - break; - - case 1: - bits = 0; - maxpower = 65536; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - c = QChar(bits); - break; - - case 2: - return ""; - } - - dictionary.append(c); // "c" may be empty string - w = c; - result.append(c); - while (true) - { - if (data.index > length) - return ""; - - bits = 0; - maxpower = 1 << numBits; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - // Very strange here, "c" above is as char/string, but - // further "c" is a int, rename "c" in the switch as "cc". - int cc; - switch (cc = bits) - { - case 0: - bits = 0; - maxpower = 256; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - cc = dictSize++; - dictionary.append(QChar(bits)); - enlargeIn--; - break; - - case 1: - bits = 0; - maxpower = 65536; - power = 1; - while (power != maxpower) - { - resb = data.val & data.position; - data.position >>= 1; - if (data.position == 0) - { - data.position = resetValue; - data.val = getNextValue(data.index++); - } - bits |= (resb>0 ? 1 : 0) * power; - power <<= 1; - } - - cc = dictSize++; - dictionary.append(QChar(bits)); - enlargeIn--; - break; - - case 2: - return result; - } - - if (enlargeIn == 0) - { - enlargeIn = 1 << numBits; - numBits++; - } - - Q_ASSERT(dictSize == dictionary.size()); - if (cc < dictSize && !dictionary.at(cc).isEmpty()) - { - entry = dictionary.at(cc); - } - else - { - if (cc == dictSize) - entry = w % w.at(0); // % - QStringBuilder - else - return QString(); - } - result.append(entry); - - // Add w+entry[0] to the dictionary. - dictSize++; - dictionary.append(w % entry.at(0)); // % - QStringBuilder - enlargeIn--; - - w = entry; - - if (enlargeIn == 0) - { - enlargeIn = 1 << numBits; - numBits++; - } - } +QString LZString::decompressFromBase64(const QString &compressed) { + LZCore core(*getPlatform()); + return core.decompressFromBase64(compressed); } diff --git a/src/lzstring.h b/src/lzstring.h index fb3e9c8..8e04dd9 100644 --- a/src/lzstring.h +++ b/src/lzstring.h @@ -11,7 +11,14 @@ * */ -#include +// Conditional compilation for Qt vs non-Qt builds +#ifndef LZSTRING_NO_QT + #include + #define LZSTRING_USE_QT +#else + #include + typedef std::string QString; +#endif class LZString { @@ -23,13 +30,6 @@ class LZString static QString decompress(const QString &compressed); static QString decompressFromUTF16(const QString &compressed); static QString decompressFromBase64(const QString &compressed); - -private: - template - static QString _compress(const QString &uncompressed, int bitsPerChar, GetCharFromInt getCharFromInt); - - template - static QString _decompress(int length, int resetValue, GetNextValue getNextValue); }; #endif // LZSTRING_H diff --git a/src/lzstring.pri b/src/lzstring.pri index abd9b0f..00c65b1 100644 --- a/src/lzstring.pri +++ b/src/lzstring.pri @@ -1,6 +1,33 @@ - INCLUDEPATH += $$PWD DEPENDPATH += $$PWD -HEADERS += $$PWD/lzstring.h -SOURCES += $$PWD/main.cpp $$PWD/lzstring.cpp +HEADERS += $$PWD/lzstring.h \ + $$PWD/LZCore.h \ + $$PWD/ILZPlatform.h \ + $$PWD/QtLZPlatform.h \ + $$PWD/StdLZPlatform.h +SOURCES += $$PWD/lzstring.cpp \ + $$PWD/LZCore.cpp \ + $$PWD/QtLZPlatform.cpp \ + $$PWD/StdLZPlatform.cpp + +# Include main.cpp by default for executable builds +# Only exclude it when building as a library +!contains(CONFIG, staticlib):!contains(CONFIG, lib):!contains(CONFIG, no-main) { + SOURCES += $$PWD/main.cpp +} + +# Conditional compilation for non-Qt builds +contains(CONFIG, no-qt) { + DEFINES += LZSTRING_NO_QT + QT = +} else { + QT += core testlib +} + +# Legacy support for explicit CLI configuration +contains(CONFIG, lzstring-cli) { + !contains(SOURCES, $$PWD/main.cpp) { + SOURCES += $$PWD/main.cpp + } +} diff --git a/src/main.cpp b/src/main.cpp index a7d2c0e..45435e5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,11 +1,19 @@ +#ifndef LZSTRING_NO_QT #include #include #include #include #include +#else +#include +#include +#include +#endif + #include "lzstring.h" int main(int argc, char* argv[]) { +#ifndef LZSTRING_NO_QT QCoreApplication app(argc, argv); QCoreApplication::setApplicationName("LZString CLI"); QCoreApplication::setApplicationVersion("1.0"); @@ -19,6 +27,7 @@ int main(int argc, char* argv[]) { QCommandLineOption decompressOption("decompress", "Decompress input stream"); QCommandLineOption decompressFromUTF16Option("decompressFromUTF16", "Decompress input stream from UTF16 format"); QCommandLineOption decompressFromBase64Option("decompressFromBase64", "Decompress input stream from Base64 format"); + QCommandLineOption testOption("test", "Run internal round-trip tests on the provided input"); parser.addOption(compressOption); parser.addOption(compressToUTF16Option); @@ -26,6 +35,7 @@ int main(int argc, char* argv[]) { parser.addOption(decompressOption); parser.addOption(decompressFromUTF16Option); parser.addOption(decompressFromBase64Option); + parser.addOption(testOption); parser.addPositionalArgument("[input]", "The input file to process or '-' for standard input", "[input]"); parser.addPositionalArgument("[output]", "The output file or '-' for standard output", "[output]"); @@ -51,6 +61,21 @@ int main(int argc, char* argv[]) { inputFile.close(); } + // Handle test option first (ignores other operation flags if present) + if (parser.isSet(testOption)) { + QString result; + // compress / decompress + result = LZString::decompress(LZString::compress(inputContents)); + out << "compress: " << (result == inputContents ? "true" : "false") << '\n'; + // UTF16 + result = LZString::decompressFromUTF16(LZString::compressToUTF16(inputContents)); + out << "compressToUTF16: " << (result == inputContents ? "true" : "false") << '\n'; + // Base64 + result = LZString::decompressFromBase64(LZString::compressToBase64(inputContents)); + out << "compressToBase64: " << (result == inputContents ? "true" : "false") << '\n'; + return 0; + } + if (parser.isSet(compressOption)) { outputContents = LZString::compress(inputContents); } @@ -89,21 +114,110 @@ int main(int argc, char* argv[]) { } return 0; -} - // Compress a file lzstring --compress input.txt output.lz +#else + // Simple non-Qt implementation for basic functionality + auto print_usage = [argv]() { + std::cerr + << "Usage: " << argv[0] << " [input] [output]\n" + << "Operations:\n" + << " --compress Compress input\n" + << " --compressToUTF16 Compress input to UTF16\n" + << " --compressToBase64 Compress input to Base64\n" + << " --decompress Decompress input\n" + << " --decompressFromUTF16 Decompress input from UTF16\n" + << " --decompressFromBase64 Decompress input from Base64\n" + << " --test Run internal round-trip tests on the provided input\n" + << "Notes: Use '-' as [input] or [output] to read from stdin or write to stdout.\n"; + }; + + if (argc < 2 || std::string(argv[1]) == "--help" || std::string(argv[1]) == "-h") { + print_usage(); + return (argc < 2) ? -1 : 0; + } + + std::string operation = argv[1]; + std::string inputContents; + + // Read input + if (argc < 3 || std::string(argv[2]) == "-") { + std::string line; + while (std::getline(std::cin, line)) { + inputContents += line + "\n"; + } + } else { + std::ifstream file(argv[2], std::ios::in | std::ios::binary); + if (!file.is_open()) { + std::cerr << "Error: Cannot open input file for reading." << std::endl; + return -1; + } + std::string line; + while (std::getline(file, line)) { + inputContents += line + "\n"; + } + file.close(); + } - // Compress a file to UTF16 lzstring --compressToUTF16 input.txt output.lz16 + std::string outputContents; + if (operation == "--compress") { + outputContents = LZString::compress(inputContents); + } else if (operation == "--compressToUTF16") { + outputContents = LZString::compressToUTF16(inputContents); + } else if (operation == "--compressToBase64") { + outputContents = LZString::compressToBase64(inputContents); + } else if (operation == "--decompress") { + outputContents = LZString::decompress(inputContents); + } else if (operation == "--decompressFromUTF16") { + outputContents = LZString::decompressFromUTF16(inputContents); + } else if (operation == "--decompressFromBase64") { + outputContents = LZString::decompressFromBase64(inputContents); + } else if (operation == "--test") { + // Test compress/decompress + outputContents = LZString::decompress(LZString::compress(inputContents)); + std::cout << (outputContents == inputContents ? "compress: true\n" : "compress: false\n"); + + // Test compressToUTF16/decompressFromUTF16 + outputContents = LZString::decompressFromUTF16(LZString::compressToUTF16(inputContents)); + std::cout << (outputContents == inputContents ? "compressToUTF16: true\n" : "compressToUTF16: false\n"); + + // Test compressToBase64/decompressFromBase64 + outputContents = LZString::decompressFromBase64(LZString::compressToBase64(inputContents)); + std::cout << (outputContents == inputContents ? "compressToBase64: true\n" : "compressToBase64: false\n"); + + return 0; + } else { + std::cerr << "Error: Invalid operation. Use --help for usage." << std::endl; + return -1; + } + + // Write output + if (argc < 4 || std::string(argv[3]) == "-") { + std::cout << outputContents; + } else { + std::ofstream file(argv[3], std::ios::out | std::ios::binary); + if (!file.is_open()) { + std::cerr << "Error: Cannot open output file for writing." << std::endl; + return -1; + } + file << outputContents; + file.close(); + } + + return 0; +#endif +} - // Compress a file to Base64 - // lzstring --compressToBase64 input.txt output.lz64 +// Compress a file lzstring --compress input.txt output.lz - // Decompress a file - //lzstring --decompress input.lz output.txt +// Compress a file to UTF16 lzstring --compressToUTF16 input.txt output.lz16 - // Decompress a file from UTF16 - // lzstring --decompressFromUTF16 input.lz16 output.txt +// Compress a file to Base64 +// lzstring --compressToBase64 input.txt output.lz64 - // Decompress a file from Base64 - // lzstring --decompressFromBase64 input.lz64 output.txt +// Decompress a file +//lzstring --decompress input.lz output.txt +// Decompress a file from UTF16 +// lzstring --decompressFromUTF16 input.lz16 output.txt +// Decompress a file from Base64 +// lzstring --decompressFromBase64 input.lz64 output.txt diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..3fdb4a0 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,30 @@ +# CMake test configuration converted from original qmake project. + +set(TEST_SRC + lzstring_test/lzstring_test.cpp +) + +add_executable(lzstring_test + ${TEST_SRC} + ${LZSTRING_SOURCES} + ${LZSTRING_HEADERS} + ${CMAKE_SOURCE_DIR}/src/QtLZPlatform.cpp + ${CMAKE_SOURCE_DIR}/src/QtLZPlatform.h +) + +# Link Qt modules (Core + Test) detected in parent. +target_link_libraries(lzstring_test PRIVATE Qt::Core Qt::Test) + +# Include source directory for headers like lzstring.h +target_include_directories(lzstring_test PRIVATE ${CMAKE_SOURCE_DIR}/src) + +# Copy test data (data.json) next to the test binary for runtime access. +add_custom_command(TARGET lzstring_test POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_SOURCE_DIR}/lzstring_test/data.json + $/data.json) + +# On Windows add console definition for consistency with original CONFIG += console +if(WIN32) + target_compile_definitions(lzstring_test PRIVATE _CONSOLE) +endif() diff --git a/tests/lzstring_test/std_roundtrip_test.cpp b/tests/lzstring_test/std_roundtrip_test.cpp new file mode 100644 index 0000000..3463952 --- /dev/null +++ b/tests/lzstring_test/std_roundtrip_test.cpp @@ -0,0 +1,54 @@ +#include +#include +#include +#include +#include "lzstring.h" + +// Simple assertion helper +static void check(bool cond, const std::string &msg) { + if(!cond) { + std::cerr << "[FAIL] " << msg << std::endl; + std::exit(1); + } +} + +static std::string generateSequence(int len) { + std::string s; + s.reserve(len); + for(int i=0;i((i % 126) + 1); + if(c=='\0') c = 'X'; + s.push_back(c); + } + return s; +} + +int main() { + std::vector corpus = { + "", "a", "hello", "The quick brown fox jumps over the lazy dog", + std::string(100, 'A'), + "0123456789!@#$%^&*()_+-={}|[]:'<>?,./\\\"", + }; + // Add varying length sequences including an edge near 0xD7FF but scaled down for speed + for(int len : {1,2,3,10,63,127,255,512,1024}) { + corpus.push_back(generateSequence(len)); + } + + for(const auto &plain : corpus) { + auto c1 = LZString::compress(plain); + auto d1 = LZString::decompress(c1); + check(d1 == plain, "compress/decompress mismatch"); + + auto c2 = LZString::compressToBase64(plain); + auto d2 = LZString::decompressFromBase64(c2); + check(d2 == plain, "base64 round-trip mismatch"); + + auto c3 = LZString::compressToUTF16(plain); + auto d3 = LZString::decompressFromUTF16(c3); + check(d3 == plain, "UTF16 round-trip mismatch"); + } + + std::cout << "[OK] All std::string round-trip tests passed (" << corpus.size() << " cases)." << std::endl; + return 0; +}