From 38c4c50a52aede1c2468b79ad7b11536783fdb89 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 10:50:19 -0500 Subject: [PATCH 01/13] various fixes to compile with latest versions of both MSVC and Clang --- .clangd | 24 ++++++++ .cspell.jsonc | 50 +++++++++++++++++ .github/workflows/build.yml | 9 ++- .github/workflows/doxygen.yml | 4 +- .github/workflows/purge.yml | 4 +- .gitignore | 3 + .vscode/ltex.dictionary.en-US.txt | 60 ++++++++++++++++++++ BUILD.bat | 2 +- CMakeLists.txt | 32 +++++++++-- CMakePresets.json | 92 +++++++++++++++++++++++-------- README.md | 89 ++++++++++++++++++++---------- cmake/ConfigFetchContent.cmake | 7 +++ cmake/utpp.cmake | 18 ++++++ examples/CMakeLists.txt | 7 +-- examples/sample.cpp | 8 +-- include/CMakeLists.txt | 17 ++++++ include/utf8/ini.h | 7 ++- include/utf8/utf8.h | 5 +- include/utf8/winutf8.h | 23 ++++---- src/CMakeLists.txt | 34 +++--------- src/ini.cpp | 71 ++++++++++++------------ src/utf8.cpp | 28 +++++----- src/win.cpp | 24 ++++---- tests/CMakeLists.txt | 20 +++++-- tests/tests_ini.cpp | 8 +-- tests/tests_utf8.cpp | 30 +++++----- tests/tests_win.cpp | 14 +++-- tests/utpp_shim.h | 11 ++++ tools/gen_casetab/CMakeLists.txt | 9 ++- tools/gen_casetab/gen_casetab.cpp | 2 +- utf8.code-workspace | 49 ++++++++++++++++ 31 files changed, 549 insertions(+), 212 deletions(-) create mode 100644 .clangd create mode 100644 .cspell.jsonc create mode 100644 .vscode/ltex.dictionary.en-US.txt create mode 100644 cmake/ConfigFetchContent.cmake create mode 100644 cmake/utpp.cmake create mode 100644 include/CMakeLists.txt create mode 100644 tests/utpp_shim.h create mode 100644 utf8.code-workspace diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..ce4f571 --- /dev/null +++ b/.clangd @@ -0,0 +1,24 @@ +If: + PathMatch: .*\.in + +CompileFlags: + Remove: + - -Wall + - -Wextra + - -Wpedantic + +Diagnostics: + Suppress: + - "*" + +--- + +CompileFlags: + Add: + - -Wall + - -Wextra + - -Wpedantic + # Force .h files to be treated as C++ + - -xc++ + # Ensure we're using C++20 like the build system + - -std=c++20 diff --git a/.cspell.jsonc b/.cspell.jsonc new file mode 100644 index 0000000..8d0f5d7 --- /dev/null +++ b/.cspell.jsonc @@ -0,0 +1,50 @@ +{ + "version": "0.2", + "ignorePaths": [ + ".cspell.jsonc", + ".git/**", + "*.code-workspace", + ".local-gitignore", + "data/UnicodeData.txt" + ], + "dictionaryDefinitions": [ + { + "name": "project-words", + "path": ".vscode/ltex.dictionary.en-US.txt", + "addWords": true + } + ], + "dictionaries": [ + "bash", + "win32", + "scientific-terms-us", + "project-words" + ], + "ignoreWords": [ + "ăâățî", + "ĂÂȚÎ", + "BCDEFGH", + "MIRCEANEACȘUĂÂȚÎ", + "ȚEPUȘ", + "αλφάβητο", + "αξία", + "αρχείο", + "ελληνικό", + "ΚΛΕΙΔΙ", + "Հայերեն", + "पंजाबी", + "ᓀᐦᐃᔭᐍᐏᐣ", + "Ελληνικός", + "العربي", + "اللغة", + "ܐܪܡܝܐ" + ], + "ignoreRegExpList": [ + "/-W.*/", + "/-D.*/", + "/-X.*/", + "/-I.*/" + ], + "allowCompoundWords": true, + "words": [] +} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d064d24..576ae69 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,17 +11,17 @@ permissions: jobs: build: runs-on: windows-latest - - steps: + + steps: - name: Get CPM uses: neacsum/configurator@v0.0.11 with: name: cpm.exe url: https://github.com/neacsum/cpm/releases/latest/download/cpm.exe - + - name: Build libraries run: cpm -v --proto https -u https://github.com/neacsum/utf8.git -r $HOME utf8 - + - name: Build and run tests shell: cmd run: | @@ -32,4 +32,3 @@ jobs: with: name: test_results path: ~/utf8/build/exe/x64/debug/utf8_tests.xml - diff --git a/.github/workflows/doxygen.yml b/.github/workflows/doxygen.yml index 075e2bd..67f5992 100644 --- a/.github/workflows/doxygen.yml +++ b/.github/workflows/doxygen.yml @@ -26,11 +26,11 @@ jobs: - name: Generate Doxygen documentation run: doxygen tools/doxygen/Doxyfile - + - name: Upload pages uses: actions/upload-pages-artifact@v3 with: path: docs - + - name: Deploy pages uses: actions/deploy-pages@v4 diff --git a/.github/workflows/purge.yml b/.github/workflows/purge.yml index 32a786d..32ea94c 100644 --- a/.github/workflows/purge.yml +++ b/.github/workflows/purge.yml @@ -5,11 +5,11 @@ on: days: description: 'Number of days.' required: true - default: 30 + default: '30' minimum_runs: description: 'The minimum runs to keep for each workflow.' required: true - default: 6 + default: '6' delete_workflow_pattern: description: 'The name or filename of the workflow. if not set then it will target all workflows.' required: false diff --git a/.gitignore b/.gitignore index dc67040..ffdae9e 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ /build /.vs* +!/.vscode/ +/.vscode/* +!/.vscode/ltex.dictionary.en-US.txt /lib /.editorconfig /utf8.cppcheck diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt new file mode 100644 index 0000000..4c502a6 --- /dev/null +++ b/.vscode/ltex.dictionary.en-US.txt @@ -0,0 +1,60 @@ +asmx +basecvt +casecvt +codept +cppcheck +endl +exetest +fdat +gclef +icompare +ifndef +ifstream +ipch +IWYU +ized +Kaspersky +keyxx +lfont +ltex +Mattraks +mfcribbon +msbuild +msvc +NDEBUG +Neacsu +neacșu +neacsum +ofstream +opensdf +resx +sectionxx +stdcpp +SYMED +usebackq +utpp +vect +vsmsbuildcmd +VSWHERE +wcmd +wdat +wdata +wdir +wdrive +wemoji +wfile +wfname +wfull +winutf +wnam +wnew +wpath +wpfx +wptr +wrel +wrhs +wsmiley +wsubkey +wval +wvalue +wvar diff --git a/BUILD.bat b/BUILD.bat index 3db582e..e411f0a 100644 --- a/BUILD.bat +++ b/BUILD.bat @@ -23,7 +23,7 @@ echo Visual studio installation folder is: %VSInstallDir% call "%VSInstallDir%\common7\tools\vsmsbuildcmd.bat" rem -rem Build tragets. Valid targets are "lib" and "tests" +rem Build targets. Valid targets are "lib" and "tests" rem Default is to build all rem if "%~1"=="" (msbuild "%~dp0build.proj") else (msbuild -target:%1 "%~dp0build.proj") diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d74409..b6735cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 4.0) project(utf8) if (WIN32) @@ -7,17 +7,41 @@ else() set(pfx "") endif() -add_compile_options("$<$:/utf-8>") -add_compile_options("$<$:/utf-8>") -add_definitions(-DUNICODE -D_UNICODE) +add_compile_definitions(_UNICODE UNICODE) +if(MSVC) + add_compile_options(/utf-8) +endif() + +# https://wg21.link/p2513 +if(MSVC) + add_compile_options(/Zc:char8_t-) +else() + add_compile_options(-fno-char8_t) +endif() + +## configure CMake module search paths that depend on the project +## proj src dir is location of this file +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +include(ConfigFetchContent) +# include dependencies +include(utpp) add_subdirectory(tools/gen_casetab) + +add_library(${PROJECT_NAME}) +set_target_properties(${PROJECT_NAME} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib/${pfx}/$ + CXX_STANDARD 17 +) add_subdirectory(src) +add_subdirectory(include) +set(BUILD_TESTS TRUE CACHE BOOL "Build tests") if (BUILD_TESTS) add_subdirectory(tests) endif () +set(BUILD_EXAMPLES TRUE CACHE BOOL "Build examples") if (BUILD_EXAMPLES) add_subdirectory(examples) endif () diff --git a/CMakePresets.json b/CMakePresets.json index a6ee76a..556b44b 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -1,13 +1,34 @@ { - "version": 6, - "configurePresets":[ + "version": 8, + "configurePresets": [ { - "name": "x64", + "name": "clang-x86_64-pc-windows-msvc", + "displayName": "Clang (WinSDK/MSVC libs) Win x64", + "description": "Using WinSDK/MSVC libs and Clang compilers", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/x64-clang", + "environment": { + "WinSdkVersion": "10.0.26100.0", + "WinSdkDir": "C:/Program Files (x86)/Windows Kits/10/bin/$env{WinSdkVersion}/x64", + "PATH": "$env{WinSdkDir};$penv{PATH}", + "CMAKE_POLICY_DEFAULT_CMP0174": "NEW" + }, + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_C_COMPILER": "clang.exe", + "CMAKE_CXX_COMPILER": "clang++.exe", + "CMAKE_RC_COMPILER": "rc.exe", + "CMAKE_C_COMPILER_TARGET": "x86_64-pc-windows-msvc", + "CMAKE_CXX_COMPILER_TARGET": "x86_64-pc-windows-msvc" + } + }, + { + "name": "MSVC x64", "displayName": "x64 Config", "binaryDir": "${sourceDir}/build/x64" }, { - "name": "x86", + "name": "MSVC x86", "displayName": "x86 Config", "architecture": { "value": "win32" @@ -15,64 +36,89 @@ "binaryDir": "${sourceDir}/build/x86" } ], - "buildPresets":[ { - "name": "debug_x64", - "configurePreset": "x64", + "name": "msvc_debug_x64", + "configurePreset": "MSVC x64", "configuration": "Debug" }, { - "name": "release_x64", - "configurePreset": "x64", + "name": "msvc_release_x64", + "configurePreset": "MSVC x64", "configuration": "Release" }, { - "name": "debug_x86", - "configurePreset": "x86", + "name": "msvc_debug_x86", + "configurePreset": "MSVC x86", "configuration": "Debug" }, { - "name": "release_x86", - "configurePreset": "x86", + "name": "msvc_release_x86", + "configurePreset": "MSVC x86", + "configuration": "Release" + }, + { + "name": "clang_debug_x64", + "configurePreset": "clang-x86_64-pc-windows-msvc", + "configuration": "Debug" + }, + { + "name": "clang_release_x64", + "configurePreset": "clang-x86_64-pc-windows-msvc", "configuration": "Release" } ], - "workflowPresets":[ { - "name": "x64", + "name": "MSVC x64", + "steps": [ + { + "type": "configure", + "name": "MSVC x64" + }, + { + "type": "build", + "name": "msvc_debug_x64" + }, + { + "type": "build", + "name": "msvc_release_x64" + } + ] + }, + { + "name": "MSVC x86", "steps": [ { "type": "configure", - "name": "x64" + "name": "MSVC x86" }, { "type": "build", - "name": "debug_x64" + "name": "msvc_debug_x86" }, { "type": "build", - "name": "release_x64" + "name": "msvc_release_x86" } ] }, { - "name": "x86", + "name": "Clang x64", "steps": [ { "type": "configure", - "name": "x86" + "name": "clang-x86_64-pc-windows-msvc" }, { "type": "build", - "name": "debug_x86" + "name": "clang_debug_x64" }, { "type": "build", - "name": "release_x86" + "name": "clang_release_x64" } ] } ] -} \ No newline at end of file +} diff --git a/README.md b/README.md index 31e6ff4..6b72d7d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -UTF8 - Simple Library for Internationalization -============================================= +# UTF8 - Simple Library for Internationalization While most of the (computing) world has standardized on using UTF-8 encoding, Win32 has remained stuck with wide character strings (also called UTF-16 encoding). @@ -7,10 +6,13 @@ Win32 has remained stuck with wide character strings (also called UTF-16 encodin This library simplifies usage of UTF-8 encoded strings under Win32 using principles outlined in the [UTF-8 Everywhere Manifesto](http://utf8everywhere.org/). Here is an example of a function call: + ```C++ utf8::mkdir ("ελληνικό"); //create a directory with a UTF8-encoded name ``` + and another example of a C++ stream with a name and content that are not ASCII characters: + ```C++ utf8::ofstream u8strm("😃😎😛"); @@ -19,63 +21,75 @@ and another example of a C++ stream with a name and content that are not ASCII c ``` A call to Windows API functions can be written as: + ```C++ HANDLE f = CreateFile (utf8::widen ("ελληνικό").c_str (), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); ``` ## Usage + Before using this library, please review the guidelines from the [UTF-8 Everywhere Manifesto](http://utf8everywhere.org/). In particular: -- define UNICODE or _UNICODE in your program - -- for Visual C++ users, make sure "Use Unicode Character Set" option is defined (under "Configuration Properties" > "General" > "Project Defaults" page). -- for Visual C++ users, add [`/utf-8`](https://docs.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8) option under "C/C++" > "All Options" > "Additional Options". +- Define UNICODE or _UNICODE in your program -- use only `std::string` and `char*` variables. Assume they all contain UTF-8 encoded strings. +- For Visual C++ users, make sure "Use Unicode Character Set" option is defined (under "Configuration Properties" > "General" > "Project Defaults" page). -- for Visual C++ users, if compiling under C++20 language standard, add the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) option under "C/C++" > "All Options" >"Additional Options" (see discussion below.) - -- use UTF-16 strings **only** in arguments to Windows API calls. +- For Visual C++ users, add [`/utf-8`](https://docs.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8) option under "C/C++" > "All Options" > "Additional Options". + +- Use only `std::string` and `char*` variables. Assume they all contain UTF-8 encoded strings. + +- For Visual C++ users, if compiling under C++20 language standard, add the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) option under "C/C++" > "All Options" >"Additional Options" (see discussion below.) + +- Use UTF-16 strings **only** in arguments to Windows API calls. All functions and classes in this library are included in the `utf8` namespace. It is a good idea **not** to have a using directive for this namespace. That makes it more evident in the code where UTF8-aware functions are used. ### Narrowing and Widening Functions + The basic conversion functions change the encoding between UTF-8, UTF-16 and UTF-32. `narrow()` function converts strings from UTF-16 or UTF-32 encoding to UTF-8: + ```C++ std::string utf8::narrow (const wchar_t* s, size_t nch=0); std::string utf8::narrow (const std::wstring & s); std::string utf8::narrow (const char32_t* s, size_t nch=0); -std::string utf8::narrow (const std::u32string& s); +std::string utf8::narrow (const std::u32string& s); ``` The `widen()` function converts UTF-8 to UTF-16: + ```C++ std::wstring utf8::widen (const char* s, size_t nch); std::wstring utf8::widen (const std::string& s); ``` + The `runes()` function converts UTF-8 to UTF-32: + ```C++ std::u32string runes (const char* s, size_t nch = 0); std::u32string utf8::runes (const std::string& s); ``` There are also functions for: + - character counting - string traversal - validity checking ### Case Folding Functions + Case folding (conversion between upper case and lower case) in Unicode is more complicated than traditional ASCII case conversion. This library uses standard tables published by Unicode Consortium to perform upper case to lower case conversions and case-insensitive string comparison. -- case folding - `toupper()`, `tolower()`, `make_upper()`, `make_lower()` -- case-insensitive string comparison - `icompare()` +- Case folding - `toupper()`, `tolower()`, `make_upper()`, `make_lower()` +- Case-insensitive string comparison - `icompare()` ### Common "C" Functions Wrappers + The library provides UTF-8 wrappings most frequently used C functions. Function name and arguments match their traditional C counterparts. + - Common file access operations: `utf8::fopen`, `utf8::access`, `utf8::remove`, `utf8::chmod`, `utf8::rename` - Directory operations: `utf8::mkdir`, `utf8::rmdir`, `utf8::chdir`, `utf8::getcwd` - Environment functions: `utf8::getenv`, `utf8::putenv` @@ -83,74 +97,91 @@ The library provides UTF-8 wrappings most frequently used C functions. Function - Character classification functions *is...* (`isalnum`, `isdigit`, etc.) ### C++ File I/O Streams + C++ I/O streams (`utf8::ifstream`, `utf8::ofstream`, `utf8::fstream`) provide and easy way to create files with names that are encoded using UTF-8. Because UTF-8 strings are character strings, reading and writing from these files can be done with standard insertion and extraction operators. ### Windows-Specific Functions -- path management: `splitpath`, `makepath` -- conversion of command-line arguments: `get_argv` and `free_argv` -- popular Windows API functions: `MessageBox`, `LoadString`, `ShellExecute`, `CopyFile`, etc. + +- Path management: `splitpath`, `makepath` +- Conversion of command-line arguments: `get_argv` and `free_argv` +- Popular Windows API functions: `MessageBox`, `LoadString`, `ShellExecute`, `CopyFile`, etc. - Registry API (`RegCreateKey`, `RegOpenKey`, `RegSetValue`, `RegGetValue`, etc.) The API for Windows profile files (also called INI files) was replaced with an object `utf8::IniFile`. ### Error Handling + Invalid characters or sequences can be handled in two different ways: -- the invalid character/sequence is replaced by a `REPLACEMENT_CHARACTER` (0xFFFD) -- the functions throw an exception `utf8::exception`. The member `utf8::exception::code` indicates what has triggered the exception. + +- The invalid character/sequence is replaced by a `REPLACEMENT_CHARACTER` (0xFFFD) +- The functions throw an exception `utf8::exception`. The member `utf8::exception::code` indicates what has triggered the exception. The function `error_mode()` selects the error handling strategy. The error handling strategy is thread-safe. ## Using the library under C++20 standard + The C++20 standard has [added an additional type `char8_t`](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html), designed to keep UTF-8 encoded characters, and a string type `std::u8string`. By making it a separate type from `char` and `unsigned char`, the committee has also created a number of incompatibilities. For instance the following fragment will produce an error: + ```C++ std::string s {"English text"}; //this is ok s = {u8"日本語テキスト"}; //"Japaneese text" - error ``` + You would have to change it to something like: + ```C++ -std::u8string s {u8"English text"}; -s = {u8"日本語テキスト"}; +std::u8string s {u8"English text"}; +s = {u8"日本語テキスト"}; ``` -Recently (June, 2022) the committee seems to have changed position and introduced a [compatibility and portability fix - DR2513R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2513r3.html) allowing initialization of arrays of `char` or `unsigned char` with UTF-8 string literals. Until the defect report makes its way into the next standard edition, the solution for Visual C++ users who compile under C++20 standard rules is to use the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) compiler option. -In my opinion, by introducing the `char8_t` type, the committee went against the very principles of UTF-8 encoding. The purpose of the encoding was to extend usage of the `char` type to additional Unicode code points. It has been so successful that it is now the de-facto standard used all across the Internet. Even Windows, that has been a bastion of UTF-16 encoding, is now slowly [moving toward UTF-8](https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page). +Recently (June 2022) the committee seems to have changed position and introduced a [compatibility and portability fix - DR2513R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2513r3.html) allowing initialization of arrays of `char` or `unsigned char` with UTF-8 string literals. Until the defect report makes its way into the next standard edition, the solution for Visual C++ users who compile under C++20 standard rules is to use the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) compiler option. + +In my opinion, by introducing the `char8_t` type, the committee went against the very principles of UTF-8 encoding. The purpose of the encoding was to extend usage of the `char` type to additional Unicode code points. It has been so successful that it is now the de facto standard used all across the Internet. Even Windows, that has been a bastion of UTF-16 encoding, is now slowly [moving toward UTF-8](https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page). In this context, the use of `char` data type for anything other than holding encodings of strings, seems out of place. In particular arithmetic computations with `char` or `unsigned char` entities are just a small fraction of the use cases. The standard should try to simplify usage in the most common cases leaving the infrequent ones to bear the burden of complexity. Following this principle, you would want to write: + ```C++ std::string s {"English text"}; s += " and "; s += "日本語テキスト"; ``` + with the implied assumption that all `char` strings are UTF-8 encoded character strings. ## Using the library under Linux + While the library was specifically built for Windows environment, a reduced version can be compiled and used under Linux. It has been tested under Ubuntu 22.04 with GCC. Obviously, functions that are specific to the Windows environment are not available. ## Documentation -[Doxygen](http://www.doxygen.nl/) documentation can be found at https://neacsum.github.io/utf8/ - + +[Doxygen](http://www.doxygen.nl/) documentation can be found at [https://neacsum.github.io/utf8/](https://neacsum.github.io/utf8/) + ## Building -The UTF8 library doesn't have any dependencies. The test program however uses the [UTTP library](https://github.com/neacsum/utpp). + +The UTF8 library doesn't have any dependencies. The test program however uses the [UTPP library](https://github.com/neacsum/utpp). The preferred method is to use the [CPM - C/C++ Package Manager](https://github.com/neacsum/cpm) to fetch all dependent packages and build them. Download the [CPM program](https://github.com/neacsum/cpm/releases/latest/download/cpm.exe) and, from the root of the development tree, issue the `cpm` command: -``` + +```bash cpm -u https://github.com/neacsum/utf8.git utf8 ``` The Visual C++ projects are set to compile under C++17 rules and can also be compiled under C++20 rules. If you are using C++20 rules, you have to add the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) option as discussed above. -You can build the library using CMake. From the _utf8_ directory: -``` +You can build the library using CMake. From the `utf8` directory: + +```bash cmake -S . -B build cmake --build build ``` + Alternatively, `BUILD.bat` script will build the libraries and test programs. Under Linux, the library can be build using `CPM` as explained before, or with `cmake` using the same commands shown above. - ## License + [The MIT License](https://github.com/neacsum/utf8/blob/master/LICENSE) diff --git a/cmake/ConfigFetchContent.cmake b/cmake/ConfigFetchContent.cmake new file mode 100644 index 0000000..d061213 --- /dev/null +++ b/cmake/ConfigFetchContent.cmake @@ -0,0 +1,7 @@ +set(FETCHCONTENT_QUIET FALSE CACHE BOOL "Suppress output from FetchContent" FORCE) + +# Cache FetchContent downloads outside build directory +set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/../.dep-cache" CACHE PATH "FetchContent cache directory") + +# Include FetchContent for external dependencies +include(FetchContent) diff --git a/cmake/utpp.cmake b/cmake/utpp.cmake new file mode 100644 index 0000000..1221fe8 --- /dev/null +++ b/cmake/utpp.cmake @@ -0,0 +1,18 @@ +FetchContent_Declare( + utpp-repo + GIT_REPOSITORY https://github.com/neacsum/utpp.git + GIT_SHALLOW TRUE + GIT_PROGRESS TRUE + SOURCE_SUBDIR "" # Prevent any build system from being processed + CONFIGURE_COMMAND "echo" +) +FetchContent_MakeAvailable(utpp-repo) + +# Header-only interface target that depends on the fetched repo +add_library(utpp INTERFACE) + +target_include_directories(utpp INTERFACE ${utpp-repo_SOURCE_DIR}/include) +target_sources(utpp INTERFACE ${utpp-repo_SOURCE_DIR}/include/utpp/utpp.h) + +# target_include_directories(utpp INTERFACE ${PROJECT_SOURCE_DIR}/include) +# target_sources(utpp INTERFACE ${PROJECT_SOURCE_DIR}/include/utpp_shim.h) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5ed06e6..172fa44 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,8 +1,8 @@ add_executable(sample sample.cpp) -set_target_properties(sample PROPERTIES - CXX_STANDARD 17 - ) +set_target_properties(sample PROPERTIES + CXX_STANDARD 20 +) # All link directories are subfolders of ./lib if (WIN32) @@ -13,4 +13,3 @@ endif() # Add dependent libraries target_link_libraries (sample PRIVATE utf8) - diff --git a/examples/sample.cpp b/examples/sample.cpp index 97dcf8f..f016ca8 100644 --- a/examples/sample.cpp +++ b/examples/sample.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +// #include #include using namespace std; @@ -32,7 +32,7 @@ int main (int /*unused*/, char ** /*unused*/) ofstream fout; if (GetACP () != 65001) { - cout << "Windows ACP is not UTF-8. Output will be sent to " << FNAME + cout << "Windows ACP is not UTF-8. Output will be sent to " << FNAME << endl << endl; fout.open (FNAME); } @@ -104,7 +104,7 @@ int main (int /*unused*/, char ** /*unused*/) #ifdef _WIN32 //Set an environment variable and retrieve its value utf8::putenv ("Punjabi=पंजाबी"); - out << "The environment variable Punjabi is " + out << "The environment variable Punjabi is " << utf8::getenv ("Punjabi") << endl; #endif @@ -143,4 +143,4 @@ int main (int /*unused*/, char ** /*unused*/) utf8::free_argv (argc, argv); #endif return 0; -} \ No newline at end of file +} diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt new file mode 100644 index 0000000..3732649 --- /dev/null +++ b/include/CMakeLists.txt @@ -0,0 +1,17 @@ +# target_include_directories(${PROJECT_NAME} PUBLIC utf8) +target_include_directories(${PROJECT_NAME} PUBLIC .) +target_sources(${PROJECT_NAME} PUBLIC + utf8/ini.h + utf8/utf8.h +) +# Windows specific stuff +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + target_sources(${PROJECT_NAME} PUBLIC + utf8/winutf8.h + ) +endif () + +target_sources(${PROJECT_NAME} PRIVATE + uppertab.h + lowertab.h +) diff --git a/include/utf8/ini.h b/include/utf8/ini.h index c83692b..b19d8b5 100644 --- a/include/utf8/ini.h +++ b/include/utf8/ini.h @@ -4,11 +4,14 @@ */ /// \file ini.h Definition of IniFile class -/// This file should not be included directly. It is included by utf8.h header. +/// This file should not be included directly. It is included by utf8.h header. #pragma once #include #include +#ifdef _WIN32 + #include +#endif namespace utf8 { @@ -37,7 +40,7 @@ class IniFile /// Set the file name associated with this object void File (const std::string& filename); - + ///Get a string key size_t GetString (char *value, size_t len, const std::string& key, const std::string& section, const std::string& defval = std::string()) const; diff --git a/include/utf8/utf8.h b/include/utf8/utf8.h index c8e4ab8..f548d7a 100644 --- a/include/utf8/utf8.h +++ b/include/utf8/utf8.h @@ -7,7 +7,6 @@ #pragma once #include -#include #include // ------------- Global configuration options --------------------------------- @@ -268,7 +267,7 @@ bool is_valid (const char* p) inline bool is_valid (std::string::const_iterator p, const std::string::const_iterator last) { - auto len = last - p; + // auto len = last - p; auto prev_mode = error_mode (action::replace); bool valid = (next (p, last) != REPLACEMENT_CHARACTER); error_mode (prev_mode); @@ -744,7 +743,7 @@ bool rename (const std::string& oldname, const std::string& newname) } /// \copydoc utf8::rename() -inline +inline bool rename (const char* oldname, const char* newname) { #if UTF8_USE_WINDOWS_API diff --git a/include/utf8/winutf8.h b/include/utf8/winutf8.h index c0575c7..6f2f3d7 100644 --- a/include/utf8/winutf8.h +++ b/include/utf8/winutf8.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #undef MessageBox #undef CopyFile @@ -97,7 +99,7 @@ std::string GetFullPathName (const std::string& rel_path); bool GetModuleFileName (HMODULE hModule, std::string& filename); std::string GetModuleFileName (HMODULE hModule = NULL); -/// File enumeration structure used by find_first() and find_next() functions +/// File enumeration structure used by find_first() and find_next() functions struct find_data { find_data () ///< Initializes the structure : handle{ INVALID_HANDLE_VALUE } @@ -112,7 +114,7 @@ struct find_data { FILETIME creation_time; ///< file creation time FILETIME access_time; ///< file last access time FILETIME write_time; ///< file last write time - __int64 size; ///< file size + int64_t size; ///< file size std::string filename; ///< file name std::string short_name; ///< 8.3 file name }; @@ -147,16 +149,16 @@ class file_enumerator : protected find_data operator bool () const; - find_data::attributes; - find_data::creation_time; - find_data::access_time; - find_data::write_time; - find_data::size; - find_data::filename; - find_data::short_name; + using find_data::attributes; + using find_data::creation_time; + using find_data::access_time; + using find_data::write_time; + using find_data::size; + using find_data::filename; + using find_data::short_name; }; -/// A simple buffer for caching values returned by Windows API +/// A simple buffer for caching values returned by Windows API class buffer { public: explicit buffer (size_t size_); @@ -343,4 +345,3 @@ inline DWORD } } //end namespace - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f04d071..8c3f020 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,33 +1,13 @@ -add_library(${PROJECT_NAME}) - -set_target_properties(${PROJECT_NAME} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib/${pfx}/$ - CXX_STANDARD 17 -) - -target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) - -add_custom_command( - OUTPUT ${PROJECT_SOURCE_DIR}/include/uppertab.h ${PROJECT_SOURCE_DIR}/include/lowertab.h - COMMAND $ ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt ${PROJECT_SOURCE_DIR}/include - MAIN_DEPENDENCY ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt - DEPENDS gen_casetab - VERBATIM -) -target_sources(${PROJECT_NAME} - PRIVATE ${PROJECT_SOURCE_DIR}/include/uppertab.h ${PROJECT_SOURCE_DIR}/include/lowertab.h -) - -target_sources(${PROJECT_NAME} PRIVATE - casecvt.cpp +target_sources(${PROJECT_NAME} PRIVATE + casecvt.cpp ini.cpp - utf8.cpp + utf8.cpp ) # Windows specific stuff if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") -target_sources(${PROJECT_NAME} PRIVATE - buffer.cpp - win.cpp -) + target_sources(${PROJECT_NAME} PRIVATE + buffer.cpp + win.cpp + ) endif () diff --git a/src/ini.cpp b/src/ini.cpp index b0433b7..9481fa9 100644 --- a/src/ini.cpp +++ b/src/ini.cpp @@ -29,7 +29,7 @@ namespace utf8 { \defgroup inifile INI File Replacement API An object-oriented replacement for working with INI files - The basic Windows API functions for reading and writing INI files, + The basic Windows API functions for reading and writing INI files, [GetPrivateProfileStringW] (https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getprivateprofilestringw) and [WritePrivateProfileStringW] @@ -120,7 +120,7 @@ static char *trimtrailing (char *str) } //----------------------------------------------------------------------------- -// File manipulation functions +// File manipulation functions inline static FILE *openread (const std::string& fname) @@ -161,30 +161,30 @@ static std::string tempname (const std::string& source) \ingroup inifile */ -/// Constructor -IniFile::IniFile (const std::string& file) - : temp_file {false} +/// Constructor +IniFile::IniFile (const std::string& file): /* get the fully qualified path name in case current directory changes after creation */ -#ifdef _WIN32 -# if UTF8_USE_WINDOWS_API - , filename { utf8::fullpath (file) } -# else - , filename{ narrow (std::filesystem::absolute (widen (file))) } -# endif -#else - , filename{ std::filesystem::absolute (file) } -#endif + #ifdef _WIN32 + #if UTF8_USE_WINDOWS_API + filename { utf8::fullpath (file) }, + #else + filename{ narrow (std::filesystem::absolute (widen (file))) }, + #endif + #else + filename{ std::filesystem::absolute (file) }, + #endif + temp_file {false} { } /// Creates a temporary file as filename. -IniFile::IniFile () - : temp_file {true} -#if UTF8_USE_WINDOWS_API - , filename (utf8::GetTempFileName(".", "INI", 0)) -#else - , filename (tmpnam(NULL)) -#endif +IniFile::IniFile (): + #if UTF8_USE_WINDOWS_API + filename (utf8::GetTempFileName(".", "INI", 0)), + #else + filename (tmpnam(NULL)), + #endif + temp_file {true} { } @@ -205,7 +205,7 @@ IniFile::~IniFile() /*! Changes the file associated with this object. If previous one was a temporary file, it is deleted now (loosing all settings in the process). - + \param fname New file name. If empty it creates a temporary file. */ void IniFile::File (const std::string& fname) @@ -418,7 +418,7 @@ HFONT IniFile::GetFont (const std::string& key, const std::string& section, HFON } /*! - Color is assumed to be in the same format as written by PutColor i.e. + Color is assumed to be in the same format as written by PutColor i.e. R G B numbers separated by spaces. \param key key name @@ -502,8 +502,8 @@ bool IniFile::GetBool (const std::string& key, const std::string& section, bool if (!GetString (buffer, sizeof(buffer), key, section)) return defval; - return (!icompare (buffer, "on") - || !icompare (buffer, "yes") + return (!icompare (buffer, "on") + || !icompare (buffer, "yes") || !icompare (buffer, "true") || (atoi (buffer) == 1)); } @@ -672,7 +672,7 @@ int IniFile::GetKeys (char *keys, size_t sz, const std::string& section) int cnt = 0; sz -= 2; //leave space for terminating NULL - auto f = [&keys, &sz] (const char *k) + auto f = [&keys, &sz] (const char *k) { size_t l = min (strlen(k), sz); strncpy (keys, k, sz); @@ -725,7 +725,7 @@ size_t IniFile::GetString (char *value, size_t len, const std::string& key, cons if (!value || !len) return 0; fp = openread (filename); - if (fp) + if (fp) { found = getkey (fp, section.c_str(), key.c_str(), value, len); fclose(fp); @@ -771,7 +771,7 @@ bool IniFile::PutString (const std::string& key, const std::string& value, const } /*! - Section names are returned as null-terminated strings followed by one + Section names are returned as null-terminated strings followed by one final null. \param sects buffer for returned keys @@ -913,7 +913,7 @@ static bool findsection (const char *section, FILE *rf, FILE *wf, char *buffer, if (*sp == '[' && strchr(buffer, ']')) { sp = skipleading (sp + 1); - + if (!icomparen (sp, section, len)) return true; } @@ -946,10 +946,10 @@ static bool putkey (const char *key, const char *value, const char *section, con assert (section); - if (!(rfp = openread(filename))) + if (!(rfp = openread(filename))) { /* If the .ini file doesn't exist, make a new file */ - if (key && value) + if (key && value) { if (!(wfp = openwrite (filename))) return false; @@ -975,7 +975,7 @@ static bool putkey (const char *key, const char *value, const char *section, con // key not found, or different value -> proceed (but rewind the input file first) fseek (rfp, 0, SEEK_SET); - if (!(wfp = openwrite(tempname(filename)))) + if (!(wfp = openwrite(tempname(filename)))) { fclose (rfp); return false; @@ -1010,7 +1010,7 @@ static bool putkey (const char *key, const char *value, const char *section, con } else { - //deleting the section -> skip all entries until next section or end of file + //deleting the section -> skip all entries until next section or end of file while ((sp = fgets (buffer, sizeof (buffer), rfp)) && *(sp = skipleading (buffer)) != '[') ; } @@ -1058,7 +1058,7 @@ static bool getkey(FILE *fp, const char *section, const char *key, char *val, si assert (fp); assert (section); assert (key); - + // Move through file 1 line at a time until the section is matched or EOF. if (!findsection (section, fp, NULL, buffer, sizeof(buffer))) return false; @@ -1068,7 +1068,7 @@ static bool getkey(FILE *fp, const char *section, const char *key, char *val, si key = skipleading (key); len = trimmed_len (key); bool found = false; - do + do { if (!fgets (buffer, sizeof (buffer), fp) || *(sp = skipleading (buffer)) == '[') return false; @@ -1158,4 +1158,3 @@ static bool same_file (const std::string& f1, const std::string& f2) } } - diff --git a/src/utf8.cpp b/src/utf8.cpp index 965e18d..9c96239 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -13,7 +13,7 @@ using namespace std; namespace utf8 { -static thread_local action ermode{action::replace}; +static thread_local action errmode{action::replace}; /*! \param mode new error handling mode @@ -21,8 +21,8 @@ static thread_local action ermode{action::replace}; */ action error_mode (action mode) { - auto prev = ermode; - ermode = mode; + auto prev = errmode; + errmode = mode; return prev; } @@ -31,7 +31,7 @@ static void encode (char32_t c, std::string& s); inline char32_t throw_or_replace (exception::cause err) { - if (ermode == action::except) + if (errmode == action::except) throw exception (err); else return REPLACEMENT_CHARACTER; @@ -369,7 +369,7 @@ bool valid_str (const char *s, size_t nch) Decodes a UTF-8 encoded character and advances iterator to next code point \param ptr Reference to iterator to be advanced - \param last Iterator pointing to the end of range + \param last Iterator pointing to the end of range \return decoded character If the iterator points to an invalid UTF-8 encoding or is at end, the function @@ -454,7 +454,7 @@ char32_t next (std::string::const_iterator& ptr, const std::string::const_iterat \param ptr Reference to character pointer to be advanced \return decoded character - If the string contains an invalid UTF-8 encoding, the function throws an + If the string contains an invalid UTF-8 encoding, the function throws an exception or returns utf8::REPLACEMENT_CHARACTER (0xfffd) depending on error handling mode. In any case, the pointer is advanced to beginning of next character or end of string. @@ -533,7 +533,7 @@ char32_t next (const char*& ptr) \param ptr Reference to character pointer to be decremented \return previous UTF-8 encoded character - If the string contains an invalid UTF-8 encoding, the function throws an + If the string contains an invalid UTF-8 encoding, the function throws an exception or returns utf8::REPLACEMENT_CHARACTER (0xfffd) depending on error handling mode. In this case the pointer remains unchanged. */ @@ -727,7 +727,7 @@ void encode (char32_t c, std::string& s) else if (c <= 0x7ff) { s.push_back (0xC0 | c >> 6); - s.push_back (0x80 | c & 0x3f); + s.push_back (0x80 | (c & 0x3f)); } else if (c <= 0xFFFF) { @@ -735,17 +735,17 @@ void encode (char32_t c, std::string& s) c= throw_or_replace(exception::cause::invalid_char32); s.push_back (0xE0 | c >> 12); - s.push_back (0x80 | c >> 6 & 0x3f); - s.push_back (0x80 | c & 0x3f); + s.push_back (0x80 | (c >> 6 & 0x3f)); + s.push_back (0x80 | (c & 0x3f)); } else if (c <= 0x10ffff) { s.push_back (0xF0 | c >> 18); - s.push_back (0x80 | c >> 12 & 0x3f); - s.push_back (0x80 | c >> 6 & 0x3f); - s.push_back (0x80 | c & 0x3f); + s.push_back (0x80 | (c >> 12 & 0x3f)); + s.push_back (0x80 | (c >> 6 & 0x3f)); + s.push_back (0x80 | (c & 0x3f)); } - else if (ermode == action::except) + else if (errmode == action::except) throw exception (exception::cause::invalid_char32); else s.append ("\xEF\xBF\xBD"); //append replacement character diff --git a/src/win.cpp b/src/win.cpp index c2a62ca..529876c 100644 --- a/src/win.cpp +++ b/src/win.cpp @@ -3,7 +3,7 @@ This is part of UTF8 project. See LICENSE file for full license terms. */ -/// \file win.cpp Wrappers for common Windows functions +/// \file win.cpp Wrappers for common Windows functions #include #include @@ -18,7 +18,7 @@ static void copy_fdat (WIN32_FIND_DATAW& wfd, find_data& fdat) fdat.creation_time = wfd.ftCreationTime; fdat.access_time = wfd.ftLastAccessTime; fdat.write_time = wfd.ftLastWriteTime; - fdat.size = ((__int64)wfd.nFileSizeHigh << 32) | (wfd.nFileSizeLow); + fdat.size = ((int64_t)wfd.nFileSizeHigh << 32) | (wfd.nFileSizeLow); fdat.filename = narrow (wfd.cFileName); fdat.short_name = narrow (wfd.cAlternateFileName); } @@ -57,7 +57,7 @@ bool find_first (const std::string& name, find_data& fdat) \note Wrapper for [FindNextFileW](https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-findnextfilew) Windows API function. - + If there are no more files, the function returns _false_ and GetLastError function returns __ERROR_NO_MORE_FILES__ */ @@ -458,13 +458,13 @@ std::vector get_argv () //============================================================================= -/*! +/*! \defgroup reg Registry Functions Wrappers for Windows registry functions. For all these functions wide character strings arguments are replaced with UTF-8 encoded C++ strings. -@{ +@{ */ /*! @@ -562,7 +562,7 @@ LSTATUS RegDeleteTree (HKEY key, const std::string& subkey) } /*! - Wrapper for + Wrapper for [RegRenameKey](https://learn.microsoft.com/en-us/windows/win32/api/winreg/nf-winreg-regrenamekey) \param key handle to an open registry key @@ -644,7 +644,7 @@ LSTATUS RegSetValue (HKEY key, const std::string& value, const std::vector(ptr - buf) == key_size); auto ret = RegSetValue (key, value, REG_MULTI_SZ, buf, (DWORD)key_size*sizeof(wchar_t)); delete []buf; return ret; @@ -680,7 +680,7 @@ LSTATUS RegQueryValue (HKEY key, const std::string& value, DWORD* type, void* da \param size pointer to size data size (in bytes) \param type pointer to type of data */ -LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& value, +LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& value, DWORD flags, void* data, DWORD* size, DWORD* type) { auto wsubkey = widen (subkey); @@ -707,7 +707,7 @@ LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& val auto wvalue = widen (value); DWORD sz = 0; const DWORD flags = RRF_RT_REG_SZ | RRF_RT_REG_EXPAND_SZ | (expand ? 0 :RRF_NOEXPAND); - auto ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), + auto ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), flags, NULL, NULL, &sz); if (ret == ERROR_SUCCESS) { @@ -716,7 +716,7 @@ LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& val https://stackoverflow.com/questions/29223180/successive-calls-to-reggetvalue-return-two-different-sizes-for-the-same-string */ wchar_t *wdat = new wchar_t[sz / sizeof (wchar_t)]; - ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), flags, NULL, + ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), flags, NULL, wdat, &sz); if (ret == ERROR_SUCCESS) data = narrow (wdat); @@ -808,7 +808,7 @@ LSTATUS RegEnumKey (HKEY key, std::vector& names) if (ret != ERROR_SUCCESS) return ret; maxlen++; //for terminating NULL - + wchar_t* wnam = new wchar_t[maxlen]; DWORD index = 0; names.clear (); @@ -898,4 +898,4 @@ LSTATUS RegEnumValue (HKEY key, std::vector& values) } -} //end namespace utf8 \ No newline at end of file +} //end namespace utf8 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 912f686..7d7c778 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,13 +2,21 @@ add_executable(tests tests_ini.cpp tests_win.cpp tests_utf8.cpp tests.rc ) +target_include_directories(tests PRIVATE .) +set_property(TARGET tests PROPERTY CXX_STANDARD 20) +set_property(DIRECTORY PROPERTY VS_STARTUP_PROJECT tests) +set_target_properties(tests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" + # RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_BINARY_DIR}/tests" + # RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_BINARY_DIR}/tests" +) + +# Install the tests executable (helps with CMake extension discovery) +install(TARGETS tests + RUNTIME DESTINATION tests +) -target_include_directories(tests PUBLIC ${PROJECT_SOURCE_DIR}/include) -set_property(TARGET tests PROPERTY CXX_STANDARD 17) +target_link_libraries (tests PRIVATE utf8 utpp) # All link directories are subfolders of ./lib target_link_directories (tests PUBLIC ${PROJECT_SOURCE_DIR}/lib/${pfx}/$) - -# Add dependent libraries -add_dependencies(tests utf8) -target_link_libraries (tests PRIVATE utf8) diff --git a/tests/tests_ini.cpp b/tests/tests_ini.cpp index 7ff4aae..ec00dec 100644 --- a/tests/tests_ini.cpp +++ b/tests/tests_ini.cpp @@ -5,7 +5,7 @@ #define _CRT_SECURE_NO_WARNINGS #include -#include +#include #include #include @@ -247,7 +247,7 @@ SUITE (IniTests) utf8::IniFile ini{ "test.ini" }; char val[80]; ini.PutString (" key00 ", " value00 ", " section0 "); - GetPrivateProfileStringA ("section0", "key00", "bad", val, + GetPrivateProfileStringA ("section0", "key00", "bad", val, sizeof(val), ".\\test.ini"); CHECK_EQUAL ("value00", val); remove ("test.ini"); @@ -466,7 +466,7 @@ SUITE (IniTests) utf8::IniFile f2 ("test2.ini"); f2.CopySection (f1, "section0", "section1"); - + CHECK_EQUAL ("value00", f2.GetString ("key0", "section1")); CHECK_EQUAL ("value01", f2.GetString ("key1", "section1")); @@ -508,7 +508,7 @@ SUITE (IniTests) f2.CopySection (f1, "section1"); deque keys; - + //previous content of section1 was erased CHECK_EQUAL (2, f2.GetKeys (keys, "section1")); diff --git a/tests/tests_utf8.cpp b/tests/tests_utf8.cpp index fd1fb21..0c49ac3 100644 --- a/tests/tests_utf8.cpp +++ b/tests/tests_utf8.cpp @@ -2,7 +2,7 @@ Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License. This is part of UTF8 project. See LICENSE file for full license terms. */ -#include +#include #include #include #include @@ -23,7 +23,7 @@ using namespace utf8; TEST_MAIN (int argc, char **argv) { - const char* suite_under_test = nullptr; + // const char* suite_under_test = nullptr; std::cerr << "Running " << *argv++ << endl << "working directory is: " << getcwd () << endl; --argc; @@ -45,7 +45,7 @@ TEST_MAIN (int argc, char **argv) std::filesystem::path xml_filename(*argv); std::ofstream xml_stream (xml_filename); UnitTest::ReporterXml xml(xml_stream); - std::cerr << "Output sent to " + std::cerr << "Output sent to " << std::filesystem::absolute (xml_filename) << endl; return RunAllTests (xml); } @@ -237,7 +237,7 @@ TEST (next_ptr) TEST (next_non_const) { char emojis[20]; - strcpy (emojis, u8"😃😎😛" ); + strcpy_s(emojis, sizeof(emojis), u8"😃😎😛"); int i = 0; char* ptr = emojis; while (utf8::next (ptr)) @@ -487,7 +487,7 @@ TEST (invalid_utf8) TEST (throw_invalid_char32) { auto prev_mode = utf8::error_mode (action::except); - bool thrown = false; + // bool thrown = false; CHECK_THROW (narrow (0xd800), utf8::exception); CHECK_THROW (narrow (0xdbff), utf8::exception); utf8::error_mode (prev_mode); @@ -525,7 +525,7 @@ TEST (dir) //Path returned by getcwd should end in our Greek string string cwd = getcwd (); - + //find last path separator #ifdef _WIN32 size_t idx = cwd.rfind ("\\"); @@ -537,7 +537,7 @@ TEST (dir) //Move out of directory and remove it utf8::chdir (".."); - CHECK (utf8::rmdir (dirname)); //rmdir returrs true for success + CHECK (utf8::rmdir (dirname)); //rmdir returns true for success } @@ -598,14 +598,14 @@ TEST (char_class) temp[1] = 0; char tst[80]; snprintf (tst, sizeof(tst), "testing char %d", i); - CHECK_EQUAL_EX ((bool)isalpha (chartab[i]), utf8::isalpha (temp), tst); - CHECK_EQUAL_EX ((bool)isalnum (chartab[i]), utf8::isalnum (temp), tst); - CHECK_EQUAL_EX ((bool)(isdigit) (chartab[i]), utf8::isdigit (temp), tst); - CHECK_EQUAL_EX ((bool)(isspace) (chartab[i]), utf8::isspace (temp), tst); - CHECK_EQUAL_EX ((bool)(isblank)(chartab[i]), utf8::isblank (temp), tst); - CHECK_EQUAL_EX ((bool)(isxdigit) (chartab[i]), utf8::isxdigit (temp), tst); - CHECK_EQUAL_EX ((bool)isupper (chartab[i]), utf8::isupper (temp), tst); - CHECK_EQUAL_EX ((bool)islower (chartab[i]), utf8::islower (temp), tst); + CHECK_EQUAL_EX ((bool)isalpha (chartab[i]), utf8::isalpha (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)isalnum (chartab[i]), utf8::isalnum (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isdigit) (chartab[i]), utf8::isdigit (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isspace) (chartab[i]), utf8::isspace (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isblank)(chartab[i]), utf8::isblank (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isxdigit) (chartab[i]), utf8::isxdigit (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)isupper (chartab[i]), utf8::isupper (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)islower (chartab[i]), utf8::islower (temp), "%s", tst); } } diff --git a/tests/tests_win.cpp b/tests/tests_win.cpp index 031c7e7..d70e566 100644 --- a/tests/tests_win.cpp +++ b/tests/tests_win.cpp @@ -2,7 +2,7 @@ Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License. This is part of UTF8 project. See LICENSE file for full license terms. */ -#include +#include #include #if UTF8_USE_WINDOWS_API @@ -82,7 +82,9 @@ SUITE (MS_Windows) TEST (full_path) { const char* fname = "file.txt"; - FILE* f = ::fopen (fname, "w"); + FILE* f = nullptr; + if (fopen_s(&f, fname, "w") != 0 || !f) + ABORT_EX(true, "Failed to open file"); fclose (f); char full[_MAX_PATH]; @@ -140,7 +142,7 @@ SUITE (MS_Windows) buf = tmp; CHECK_EQUAL (tmp, (string)buf); - // size doesn't shrink when assigning a string + // size doesn't shrink when assigning a string CHECK_EQUAL (_MAX_PATH, buf.size ()); //Copy ctor @@ -313,7 +315,7 @@ SUITE (MS_Windows) SUITE (Registry) { - + const string key_name{ u8"αρχείο" };//Greek for "registry" according to Google TEST (create_open) @@ -379,7 +381,7 @@ TEST (enum_keys) { HKEY key; utf8::RegCreateKey (HKEY_CURRENT_USER, key_name, key); - + vectorin_name{ u8"α1", u8"β2", u8"γ3", u8"😃😎😛"}; vectorout_name(4); @@ -425,4 +427,4 @@ TEST (enum_values) } } //end suite -#endif \ No newline at end of file +#endif diff --git a/tests/utpp_shim.h b/tests/utpp_shim.h new file mode 100644 index 0000000..e01de1f --- /dev/null +++ b/tests/utpp_shim.h @@ -0,0 +1,11 @@ +#pragma once + +// Work around Visual Studio 2022's deleted char32_t stream operator +// This allows utpp's CheckEqual to print char32_t values properly +// Must be defined BEFORE including utpp to ensure it's found by ADL +#include // IWYU pragma: keep +inline std::ostream& operator<<(std::ostream& os, char32_t c) { + return os << static_cast(c); +} + +#include // IWYU pragma: keep diff --git a/tools/gen_casetab/CMakeLists.txt b/tools/gen_casetab/CMakeLists.txt index 5e9d84a..b0cc6a3 100644 --- a/tools/gen_casetab/CMakeLists.txt +++ b/tools/gen_casetab/CMakeLists.txt @@ -1,2 +1,9 @@ add_executable(gen_casetab gen_casetab.cpp) -set_property(TARGET gen_casetab PROPERTY CXX_STANDARD 20) \ No newline at end of file +set_property(TARGET gen_casetab PROPERTY CXX_STANDARD 20) +add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/include/uppertab.h ${PROJECT_SOURCE_DIR}/include/lowertab.h + COMMAND $ ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt ${PROJECT_SOURCE_DIR}/include + MAIN_DEPENDENCY ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt + DEPENDS gen_casetab + VERBATIM +) diff --git a/tools/gen_casetab/gen_casetab.cpp b/tools/gen_casetab/gen_casetab.cpp index a78340f..342fa75 100644 --- a/tools/gen_casetab/gen_casetab.cpp +++ b/tools/gen_casetab/gen_casetab.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +// #include #include using namespace std; diff --git a/utf8.code-workspace b/utf8.code-workspace new file mode 100644 index 0000000..2557242 --- /dev/null +++ b/utf8.code-workspace @@ -0,0 +1,49 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "files.watcherExclude": { + "build/**": true, + "**/.git/**": true, + }, + "files.insertFinalNewline": true, + "clangd.arguments": [ + "--compile-commands-dir=${workspaceFolder}/build/x64-clang" + ], + "cmake.configureOnOpen": true, + "cmake.configureOnEdit": true, + "cmake.buildBeforeRun": true, + "cmake.launchBehavior": "breakAndReuseTerminal", + "lldb.verboseLogging": true, + "lldb.useNativePDBReader": true, + // "cmake.copyCompileCommands": ${workspaceFolder}/build + }, + "extensions": { + // See http://go.microsoft.com/fwlink/?LinkId=827846 + // for the documentation about the extensions.json format + "recommendations": [ + "streetsidesoftware.code-spell-checker", + "streetsidesoftware.code-spell-checker-scientific-terms", + "streetsidesoftware.code-spell-checker-win32", + "ltex-plus.vscode-ltex-plus", + "vadimcn.vscode-lldb" + ] + }, + "launch": { + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Launch", + "program": "${command:cmake.launchTargetPath}", + "args": [], + "cwd": "${command:cmake.getLaunchTargetDirectory}", + "console": "integratedTerminal" + } + ] + }, +} From 01c3104c7ab944192d683945de29b52ce1dfbff6 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 11:09:37 -0500 Subject: [PATCH 02/13] add explicit dependency between utf8 and gen_casetab --- .gitignore | 1 + .vscode/ltex.hiddenFalsePositives.en-US.txt | 2 ++ src/CMakeLists.txt | 6 ++++++ 3 files changed, 9 insertions(+) create mode 100644 .vscode/ltex.hiddenFalsePositives.en-US.txt diff --git a/.gitignore b/.gitignore index ffdae9e..2e37759 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ !/.vscode/ /.vscode/* !/.vscode/ltex.dictionary.en-US.txt +!/.vscode/ltex.hiddenFalsePositives.en-US.txt /lib /.editorconfig /utf8.cppcheck diff --git a/.vscode/ltex.hiddenFalsePositives.en-US.txt b/.vscode/ltex.hiddenFalsePositives.en-US.txt new file mode 100644 index 0000000..99514bc --- /dev/null +++ b/.vscode/ltex.hiddenFalsePositives.en-US.txt @@ -0,0 +1,2 @@ +{"rule":"ADD_AN_ADDITIONAL","sentence":"^\\QThe C++20 standard has added an additional type \\E(?:Dummy|Ina|Jimmy-|Dummy-|Maniquí-|Maniquíes-)[0-9]+\\Q, designed to keep UTF-8 encoded characters, and a string type \\E(?:Dummy|Ina|Jimmy-|Dummy-|Maniquí-|Maniquíes-)[0-9]+\\Q.\\E$"} +{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qwith the implied assumption that all \\E(?:Dummy|Ina|Jimmy-|Dummy-|Maniquí-|Maniquíes-)[0-9]+\\Q strings are UTF-8 encoded character strings.\\E$"} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c3f020..578cdc5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,6 +4,12 @@ target_sources(${PROJECT_NAME} PRIVATE utf8.cpp ) +# Set up dependency for casecvt.cpp on generated headers +add_dependencies(${PROJECT_NAME} gen_casetab) +set_source_files_properties(casecvt.cpp PROPERTIES + OBJECT_DEPENDS "${PROJECT_SOURCE_DIR}/include/uppertab.h;${PROJECT_SOURCE_DIR}/include/lowertab.h" +) + # Windows specific stuff if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") target_sources(${PROJECT_NAME} PRIVATE From b95314084cb4ff03f64c30bb39615109d2d91e39 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 11:19:02 -0500 Subject: [PATCH 03/13] disable build tests and examples by default --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b6735cf..3a01169 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,12 +36,12 @@ set_target_properties(${PROJECT_NAME} PROPERTIES add_subdirectory(src) add_subdirectory(include) -set(BUILD_TESTS TRUE CACHE BOOL "Build tests") +set(BUILD_TESTS FALSE CACHE BOOL "Build tests") if (BUILD_TESTS) add_subdirectory(tests) endif () -set(BUILD_EXAMPLES TRUE CACHE BOOL "Build examples") +set(BUILD_EXAMPLES FALSE CACHE BOOL "Build examples") if (BUILD_EXAMPLES) add_subdirectory(examples) endif () From f11b811cda1881302515f011526a58288014cf70 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 13:17:39 -0500 Subject: [PATCH 04/13] resolve CI errors, add better clangd defaults --- .clangd | 20 +++++++++++++++++--- .gitignore | 2 ++ .vscode/ltex.dictionary.en-US.txt | 2 ++ CMakeLists.txt | 1 + cmake/CompileCommands.cmake | 2 ++ compile_flags.txt | 0 tests/tests.vcxproj | 11 ++++++----- tests/utpp_shim.h | 18 ++++++++++++++---- utf8.code-workspace | 4 ++-- 9 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 cmake/CompileCommands.cmake create mode 100644 compile_flags.txt diff --git a/.clangd b/.clangd index ce4f571..97521e2 100644 --- a/.clangd +++ b/.clangd @@ -18,7 +18,21 @@ CompileFlags: - -Wall - -Wextra - -Wpedantic - # Force .h files to be treated as C++ - - -xc++ - # Ensure we're using C++20 like the build system + - -Ibuild/.dep-cache/utpp-repo-src/include/. + - -Iinclude/. + - -Itests/. + - -xc++ # Force .h files to be treated as C++ + # - -std=gnu++17 - -std=c++20 + - --target=x86_64-pc-windows-msvc + - -DUNICODE + - -D_UNICODE + - -D_DEBUG + - -D_DLL + - -D_MT + - -Xclang + - --dependent-lib=msvcrtd + - -g + - -Xclang + - -gcodeview + - -fno-char8_t diff --git a/.gitignore b/.gitignore index 2e37759..bd2305f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,5 @@ /utf8.cppcheck /docs /.DS_Store +.cache/ +/compile_commands.json diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt index 4c502a6..63e8b7f 100644 --- a/.vscode/ltex.dictionary.en-US.txt +++ b/.vscode/ltex.dictionary.en-US.txt @@ -7,6 +7,7 @@ endl exetest fdat gclef +gcodeview icompare ifndef ifstream @@ -21,6 +22,7 @@ Mattraks mfcribbon msbuild msvc +msvcrtd NDEBUG Neacsu neacșu diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a01169..82dcda8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,7 @@ endif() ## proj src dir is location of this file list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) include(ConfigFetchContent) +include(CompileCommands) # include dependencies include(utpp) diff --git a/cmake/CompileCommands.cmake b/cmake/CompileCommands.cmake new file mode 100644 index 0000000..00b0793 --- /dev/null +++ b/cmake/CompileCommands.cmake @@ -0,0 +1,2 @@ +set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE CACHE BOOL "Generate compile_commands.json" FORCE) +mark_as_advanced(CMAKE_EXPORT_COMPILE_COMMANDS) diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..e69de29 diff --git a/tests/tests.vcxproj b/tests/tests.vcxproj index 32b756b..6ae3391 100644 --- a/tests/tests.vcxproj +++ b/tests/tests.vcxproj @@ -96,7 +96,7 @@ Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 stdcpp17 @@ -112,7 +112,7 @@ Level3 Disabled _DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 true stdcpp17 @@ -132,7 +132,7 @@ true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 stdcpp17 @@ -152,7 +152,7 @@ true true NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 true stdcpp17 @@ -173,6 +173,7 @@ + @@ -180,4 +181,4 @@ - \ No newline at end of file + diff --git a/tests/utpp_shim.h b/tests/utpp_shim.h index e01de1f..fe44e23 100644 --- a/tests/utpp_shim.h +++ b/tests/utpp_shim.h @@ -1,11 +1,21 @@ #pragma once -// Work around Visual Studio 2022's deleted char32_t stream operator +// Work around missing/deleted char32_t stream operator // This allows utpp's CheckEqual to print char32_t values properly // Must be defined BEFORE including utpp to ensure it's found by ADL + #include // IWYU pragma: keep -inline std::ostream& operator<<(std::ostream& os, char32_t c) { - return os << static_cast(c); -} + +// Provide the operator for compilers/standards that need it: +// - MSVC 2022+ deletes the char32_t stream operator +// - Clang with C++20 also seems to need this fix +#if (defined(_MSC_VER) && _MSC_VER >= 1930) || \ + (defined(__clang__) && __cplusplus >= 202002L) + + inline std::ostream& operator<<(std::ostream& os, char32_t c) { + return os << static_cast(c); + } + +#endif #include // IWYU pragma: keep diff --git a/utf8.code-workspace b/utf8.code-workspace index 2557242..a4005d4 100644 --- a/utf8.code-workspace +++ b/utf8.code-workspace @@ -11,7 +11,7 @@ }, "files.insertFinalNewline": true, "clangd.arguments": [ - "--compile-commands-dir=${workspaceFolder}/build/x64-clang" + // "--compile-commands-dir=${workspaceFolder}" ], "cmake.configureOnOpen": true, "cmake.configureOnEdit": true, @@ -19,7 +19,7 @@ "cmake.launchBehavior": "breakAndReuseTerminal", "lldb.verboseLogging": true, "lldb.useNativePDBReader": true, - // "cmake.copyCompileCommands": ${workspaceFolder}/build + "cmake.copyCompileCommands": "${workspaceFolder}/compile_commands.json" }, "extensions": { // See http://go.microsoft.com/fwlink/?LinkId=827846 From 792cb5154ab9d223c16795f9f422a7bc763abf29 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 13:34:22 -0500 Subject: [PATCH 05/13] more c++17 vs c++20 fixes --- .clangd | 5 ++++ .vscode/ltex.dictionary.en-US.txt | 1 + tests/utpp_shim.h | 46 +++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/.clangd b/.clangd index 97521e2..7324f06 100644 --- a/.clangd +++ b/.clangd @@ -18,9 +18,14 @@ CompileFlags: - -Wall - -Wextra - -Wpedantic + + # The empty compile_flags.txt in the root directory causes clangd to treat + # these paths as relative to the root of the project when compile_commands.json + # is not present. - -Ibuild/.dep-cache/utpp-repo-src/include/. - -Iinclude/. - -Itests/. + - -xc++ # Force .h files to be treated as C++ # - -std=gnu++17 - -std=c++20 diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt index 63e8b7f..03079e0 100644 --- a/.vscode/ltex.dictionary.en-US.txt +++ b/.vscode/ltex.dictionary.en-US.txt @@ -1,6 +1,7 @@ asmx basecvt casecvt +clangd codept cppcheck endl diff --git a/tests/utpp_shim.h b/tests/utpp_shim.h index fe44e23..2302964 100644 --- a/tests/utpp_shim.h +++ b/tests/utpp_shim.h @@ -5,6 +5,9 @@ // Must be defined BEFORE including utpp to ensure it's found by ADL #include // IWYU pragma: keep +#include // IWYU pragma: keep +#include // IWYU pragma: keep +#include // IWYU pragma: keep // Provide the operator for compilers/standards that need it: // - MSVC 2022+ deletes the char32_t stream operator @@ -18,4 +21,47 @@ #endif +// C++17 compatibility shims for utpp +#if __cplusplus < 202002L + + // Add std::chrono::milliseconds stream operator for C++17 + namespace std { + namespace chrono { + inline std::ostream& operator<<(std::ostream& os, const milliseconds& ms) { + return os << ms.count() << "ms"; + } + + inline std::ostream& operator<<(std::ostream& os, const duration& dur) { + return os << dur.count() << "s"; + } + + // Provide utc_clock as alias to system_clock for C++17 + using utc_clock = system_clock; + } + + // Simple format replacement for C++17 + template + inline std::string format(const std::string& fmt, Args&&... args) { + std::ostringstream oss; + // For time formatting, just return a simple ISO-like format + if (fmt.find(":%Y-%m-%dT%H:%M:%S") != std::string::npos) { + auto now = std::chrono::system_clock::now(); + auto time_t = std::chrono::system_clock::to_time_t(now); + +#ifdef _WIN32 + struct tm tm_buf; + gmtime_s(&tm_buf, &time_t); + oss << std::put_time(&tm_buf, "%Y-%m-%dT%H:%M:%SZ"); +#else + oss << std::put_time(std::gmtime(&time_t), "%Y-%m-%dT%H:%M:%SZ"); +#endif + } else { + oss << "formatted_output"; + } + return oss.str(); + } + } + +#endif + #include // IWYU pragma: keep From cb6b093709601885be66d25af7657b84817710b7 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 13:54:46 -0500 Subject: [PATCH 06/13] adjust build.yml to work with current branch --- .github/workflows/build.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 576ae69..d445d3f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,22 +13,25 @@ jobs: runs-on: windows-latest steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Get CPM uses: neacsum/configurator@v0.0.11 with: name: cpm.exe url: https://github.com/neacsum/cpm/releases/latest/download/cpm.exe - - name: Build libraries - run: cpm -v --proto https -u https://github.com/neacsum/utf8.git -r $HOME utf8 + - name: Build dependencies + run: cpm -v --proto https -r $HOME - name: Build and run tests shell: cmd run: | - %USERPROFILE%\utf8\build.bat tests + build.bat tests - name: Save tests result uses: actions/upload-artifact@v4 with: name: test_results - path: ~/utf8/build/exe/x64/debug/utf8_tests.xml + path: build/exe/x64/debug/utf8_tests.xml From 7ef83b23a8801005bd914d0cc97efe3b2bc59c7e Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 14:12:11 -0500 Subject: [PATCH 07/13] adjust paths for cpm --- .github/workflows/build.yml | 7 ++++++- cpm.json | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d445d3f..97a9732 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,12 @@ jobs: url: https://github.com/neacsum/cpm/releases/latest/download/cpm.exe - name: Build dependencies - run: cpm -v --proto https -r $HOME + run: cpm -v --proto https -r build\.dep-cache + + - name: Create symlink for utpp + shell: cmd + run: | + mklink /D build\.dep-cache\utpp-repo-src build\.dep-cache\utpp - name: Build and run tests shell: cmd diff --git a/cpm.json b/cpm.json index cbbf555..68bf394 100644 --- a/cpm.json +++ b/cpm.json @@ -4,8 +4,8 @@ "https": "https://github.com/neacsum/utf8.git", "build": [ {"os": "windows", "cmd": "build.bat", "args": ["lib"]}, - {"os": "linux darwin", "cmd": "cmake", "args": ["--workflow", "--preset", "x64"]} - ], + {"os": "linux darwin", "cmd": "cmake", "args": ["--workflow", "--preset", "MSVC x64"]} + ], "depends": [ { "name": "utpp", From a887e73c1618d64ed79af3207d09919fea10e07d Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 14:57:25 -0500 Subject: [PATCH 08/13] create missing directory for test code --- .github/workflows/build.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 97a9732..aa38a35 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,6 +22,11 @@ jobs: name: cpm.exe url: https://github.com/neacsum/cpm/releases/latest/download/cpm.exe + - name: Create dependencies directory + shell: cmd + run: | + if not exist "build\.dep-cache" mkdir "build\.dep-cache" + - name: Build dependencies run: cpm -v --proto https -r build\.dep-cache From ef7c2e15a962262ccd9f16834c3cd32e22a10c01 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 15:02:16 -0500 Subject: [PATCH 09/13] debugging the failed include path --- .github/workflows/build.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index aa38a35..bafeee6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,6 +30,13 @@ jobs: - name: Build dependencies run: cpm -v --proto https -r build\.dep-cache + - name: Debug directory structure + shell: cmd + run: | + dir build\.dep-cache\utpp + dir build\.dep-cache\utpp\include + if exist "build\.dep-cache\utpp\include\utpp\utpp.h" (echo Found utpp.h) else (echo utpp.h not found) + - name: Create symlink for utpp shell: cmd run: | From 2caee07e235944f93f97c80a789b01cb5cdd164e Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 15:06:30 -0500 Subject: [PATCH 10/13] more ci debugging --- .github/workflows/build.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bafeee6..07c9545 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,6 +47,21 @@ jobs: run: | build.bat tests + - name: Debug filesystem state after build failure + if: failure() + shell: cmd + run: | + echo "=== Checking symlink status ===" + dir build\.dep-cache\utpp-repo-src + echo "=== Checking include directory ===" + dir build\.dep-cache\utpp-repo-src\include + echo "=== Checking utpp subdirectory ===" + dir build\.dep-cache\utpp-repo-src\include\utpp + echo "=== Checking for utpp.h file ===" + if exist "build\.dep-cache\utpp-repo-src\include\utpp\utpp.h" (echo Found via symlink) else (echo NOT found via symlink) + echo "=== Checking original utpp directory ===" + dir build\.dep-cache\utpp\include\utpp + - name: Save tests result uses: actions/upload-artifact@v4 with: From 2978d37192949bef0282e0025c20462be3623ba9 Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 15:10:08 -0500 Subject: [PATCH 11/13] do xcopy instead of synlink for utpp --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 07c9545..b7714fd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,10 +37,10 @@ jobs: dir build\.dep-cache\utpp\include if exist "build\.dep-cache\utpp\include\utpp\utpp.h" (echo Found utpp.h) else (echo utpp.h not found) - - name: Create symlink for utpp + - name: Copy utpp directory (symlink gets deleted by build system) shell: cmd run: | - mklink /D build\.dep-cache\utpp-repo-src build\.dep-cache\utpp + xcopy /E /I build\.dep-cache\utpp build\.dep-cache\utpp-repo-src - name: Build and run tests shell: cmd From be091726cc99fb5212bad05e777e6bffd644e6cd Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 15:17:25 -0500 Subject: [PATCH 12/13] build library locally due rather than CPM --- .github/workflows/build.yml | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b7714fd..7b44d5d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,41 +27,24 @@ jobs: run: | if not exist "build\.dep-cache" mkdir "build\.dep-cache" - - name: Build dependencies + - name: Download dependencies run: cpm -v --proto https -r build\.dep-cache - - name: Debug directory structure + - name: Copy utpp directory shell: cmd run: | - dir build\.dep-cache\utpp - dir build\.dep-cache\utpp\include - if exist "build\.dep-cache\utpp\include\utpp\utpp.h" (echo Found utpp.h) else (echo utpp.h not found) + xcopy /E /I build\.dep-cache\utpp build\.dep-cache\utpp-repo-src - - name: Copy utpp directory (symlink gets deleted by build system) + - name: Build library (local version, not from CPM) shell: cmd run: | - xcopy /E /I build\.dep-cache\utpp build\.dep-cache\utpp-repo-src + build.bat lib - name: Build and run tests shell: cmd run: | build.bat tests - - name: Debug filesystem state after build failure - if: failure() - shell: cmd - run: | - echo "=== Checking symlink status ===" - dir build\.dep-cache\utpp-repo-src - echo "=== Checking include directory ===" - dir build\.dep-cache\utpp-repo-src\include - echo "=== Checking utpp subdirectory ===" - dir build\.dep-cache\utpp-repo-src\include\utpp - echo "=== Checking for utpp.h file ===" - if exist "build\.dep-cache\utpp-repo-src\include\utpp\utpp.h" (echo Found via symlink) else (echo NOT found via symlink) - echo "=== Checking original utpp directory ===" - dir build\.dep-cache\utpp\include\utpp - - name: Save tests result uses: actions/upload-artifact@v4 with: From 844022c3c4b4b89bf0517fbb2e3735a60c21feff Mon Sep 17 00:00:00 2001 From: Randy Eckman Date: Sun, 28 Sep 2025 15:36:12 -0500 Subject: [PATCH 13/13] add separate build and run targets for gen_casetab --- .github/workflows/build.yml | 2 +- .gitignore | 1 + .vscode/ltex.dictionary.en-US.txt | 1 + src/CMakeLists.txt | 2 +- tools/gen_casetab/CMakeLists.txt | 17 +++++++++++++---- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7b44d5d..d5966bb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ jobs: if not exist "build\.dep-cache" mkdir "build\.dep-cache" - name: Download dependencies - run: cpm -v --proto https -r build\.dep-cache + run: cpm -v -f --proto https -r build\.dep-cache - name: Copy utpp directory shell: cmd diff --git a/.gitignore b/.gitignore index bd2305f..3313c0f 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ /.DS_Store .cache/ /compile_commands.json +.dep-cache/ diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt index 03079e0..f9c5927 100644 --- a/.vscode/ltex.dictionary.en-US.txt +++ b/.vscode/ltex.dictionary.en-US.txt @@ -61,3 +61,4 @@ wsubkey wval wvalue wvar +xcopy diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 578cdc5..afd62da 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ target_sources(${PROJECT_NAME} PRIVATE ) # Set up dependency for casecvt.cpp on generated headers -add_dependencies(${PROJECT_NAME} gen_casetab) +add_dependencies(${PROJECT_NAME} run_gen_casetab) set_source_files_properties(casecvt.cpp PROPERTIES OBJECT_DEPENDS "${PROJECT_SOURCE_DIR}/include/uppertab.h;${PROJECT_SOURCE_DIR}/include/lowertab.h" ) diff --git a/tools/gen_casetab/CMakeLists.txt b/tools/gen_casetab/CMakeLists.txt index b0cc6a3..39884e0 100644 --- a/tools/gen_casetab/CMakeLists.txt +++ b/tools/gen_casetab/CMakeLists.txt @@ -1,9 +1,18 @@ -add_executable(gen_casetab gen_casetab.cpp) -set_property(TARGET gen_casetab PROPERTY CXX_STANDARD 20) +add_executable(build_gen_casetab gen_casetab.cpp) +set_property(TARGET build_gen_casetab PROPERTY CXX_STANDARD 20) + +# Custom command to generate the header files add_custom_command( OUTPUT ${PROJECT_SOURCE_DIR}/include/uppertab.h ${PROJECT_SOURCE_DIR}/include/lowertab.h - COMMAND $ ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt ${PROJECT_SOURCE_DIR}/include + COMMAND $ ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt ${PROJECT_SOURCE_DIR}/include MAIN_DEPENDENCY ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt - DEPENDS gen_casetab + DEPENDS build_gen_casetab VERBATIM + COMMENT "Generating Unicode case conversion tables" +) + +# Custom target that depends on the generated files +add_custom_target(run_gen_casetab DEPENDS + ${PROJECT_SOURCE_DIR}/include/uppertab.h + ${PROJECT_SOURCE_DIR}/include/lowertab.h )