diff --git a/.clangd b/.clangd new file mode 100644 index 0000000..7324f06 --- /dev/null +++ b/.clangd @@ -0,0 +1,43 @@ +If: + PathMatch: .*\.in + +CompileFlags: + Remove: + - -Wall + - -Wextra + - -Wpedantic + +Diagnostics: + Suppress: + - "*" + +--- + +CompileFlags: + Add: + - -Wall + - -Wextra + - -Wpedantic + + # The empty compile_flags.txt in the root directory causes clangd to treat + # these paths as relative to the root of the project when compile_commands.json + # is not present. + - -Ibuild/.dep-cache/utpp-repo-src/include/. + - -Iinclude/. + - -Itests/. + + - -xc++ # Force .h files to be treated as C++ + # - -std=gnu++17 + - -std=c++20 + - --target=x86_64-pc-windows-msvc + - -DUNICODE + - -D_UNICODE + - -D_DEBUG + - -D_DLL + - -D_MT + - -Xclang + - --dependent-lib=msvcrtd + - -g + - -Xclang + - -gcodeview + - -fno-char8_t diff --git a/.cspell.jsonc b/.cspell.jsonc new file mode 100644 index 0000000..8d0f5d7 --- /dev/null +++ b/.cspell.jsonc @@ -0,0 +1,50 @@ +{ + "version": "0.2", + "ignorePaths": [ + ".cspell.jsonc", + ".git/**", + "*.code-workspace", + ".local-gitignore", + "data/UnicodeData.txt" + ], + "dictionaryDefinitions": [ + { + "name": "project-words", + "path": ".vscode/ltex.dictionary.en-US.txt", + "addWords": true + } + ], + "dictionaries": [ + "bash", + "win32", + "scientific-terms-us", + "project-words" + ], + "ignoreWords": [ + "ăâățî", + "ĂÂȚÎ", + "BCDEFGH", + "MIRCEANEACȘUĂÂȚÎ", + "ȚEPUȘ", + "αλφάβητο", + "αξία", + "αρχείο", + "ελληνικό", + "ΚΛΕΙΔΙ", + "Հայերեն", + "पंजाबी", + "ᓀᐦᐃᔭᐍᐏᐣ", + "Ελληνικός", + "العربي", + "اللغة", + "ܐܪܡܝܐ" + ], + "ignoreRegExpList": [ + "/-W.*/", + "/-D.*/", + "/-X.*/", + "/-I.*/" + ], + "allowCompoundWords": true, + "words": [] +} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d064d24..d5966bb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,25 +11,42 @@ permissions: jobs: build: runs-on: windows-latest - - steps: + + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Get CPM uses: neacsum/configurator@v0.0.11 with: name: cpm.exe url: https://github.com/neacsum/cpm/releases/latest/download/cpm.exe - - - name: Build libraries - run: cpm -v --proto https -u https://github.com/neacsum/utf8.git -r $HOME utf8 - + + - name: Create dependencies directory + shell: cmd + run: | + if not exist "build\.dep-cache" mkdir "build\.dep-cache" + + - name: Download dependencies + run: cpm -v -f --proto https -r build\.dep-cache + + - name: Copy utpp directory + shell: cmd + run: | + xcopy /E /I build\.dep-cache\utpp build\.dep-cache\utpp-repo-src + + - name: Build library (local version, not from CPM) + shell: cmd + run: | + build.bat lib + - name: Build and run tests shell: cmd run: | - %USERPROFILE%\utf8\build.bat tests + build.bat tests - name: Save tests result uses: actions/upload-artifact@v4 with: name: test_results - path: ~/utf8/build/exe/x64/debug/utf8_tests.xml - + path: build/exe/x64/debug/utf8_tests.xml diff --git a/.github/workflows/doxygen.yml b/.github/workflows/doxygen.yml index 075e2bd..67f5992 100644 --- a/.github/workflows/doxygen.yml +++ b/.github/workflows/doxygen.yml @@ -26,11 +26,11 @@ jobs: - name: Generate Doxygen documentation run: doxygen tools/doxygen/Doxyfile - + - name: Upload pages uses: actions/upload-pages-artifact@v3 with: path: docs - + - name: Deploy pages uses: actions/deploy-pages@v4 diff --git a/.github/workflows/purge.yml b/.github/workflows/purge.yml index 32a786d..32ea94c 100644 --- a/.github/workflows/purge.yml +++ b/.github/workflows/purge.yml @@ -5,11 +5,11 @@ on: days: description: 'Number of days.' required: true - default: 30 + default: '30' minimum_runs: description: 'The minimum runs to keep for each workflow.' required: true - default: 6 + default: '6' delete_workflow_pattern: description: 'The name or filename of the workflow. if not set then it will target all workflows.' required: false diff --git a/.gitignore b/.gitignore index dc67040..3313c0f 100644 --- a/.gitignore +++ b/.gitignore @@ -10,8 +10,15 @@ /build /.vs* +!/.vscode/ +/.vscode/* +!/.vscode/ltex.dictionary.en-US.txt +!/.vscode/ltex.hiddenFalsePositives.en-US.txt /lib /.editorconfig /utf8.cppcheck /docs /.DS_Store +.cache/ +/compile_commands.json +.dep-cache/ diff --git a/.vscode/ltex.dictionary.en-US.txt b/.vscode/ltex.dictionary.en-US.txt new file mode 100644 index 0000000..f9c5927 --- /dev/null +++ b/.vscode/ltex.dictionary.en-US.txt @@ -0,0 +1,64 @@ +asmx +basecvt +casecvt +clangd +codept +cppcheck +endl +exetest +fdat +gclef +gcodeview +icompare +ifndef +ifstream +ipch +IWYU +ized +Kaspersky +keyxx +lfont +ltex +Mattraks +mfcribbon +msbuild +msvc +msvcrtd +NDEBUG +Neacsu +neacșu +neacsum +ofstream +opensdf +resx +sectionxx +stdcpp +SYMED +usebackq +utpp +vect +vsmsbuildcmd +VSWHERE +wcmd +wdat +wdata +wdir +wdrive +wemoji +wfile +wfname +wfull +winutf +wnam +wnew +wpath +wpfx +wptr +wrel +wrhs +wsmiley +wsubkey +wval +wvalue +wvar +xcopy diff --git a/.vscode/ltex.hiddenFalsePositives.en-US.txt b/.vscode/ltex.hiddenFalsePositives.en-US.txt new file mode 100644 index 0000000..99514bc --- /dev/null +++ b/.vscode/ltex.hiddenFalsePositives.en-US.txt @@ -0,0 +1,2 @@ +{"rule":"ADD_AN_ADDITIONAL","sentence":"^\\QThe C++20 standard has added an additional type \\E(?:Dummy|Ina|Jimmy-|Dummy-|Maniquí-|Maniquíes-)[0-9]+\\Q, designed to keep UTF-8 encoded characters, and a string type \\E(?:Dummy|Ina|Jimmy-|Dummy-|Maniquí-|Maniquíes-)[0-9]+\\Q.\\E$"} +{"rule":"UPPERCASE_SENTENCE_START","sentence":"^\\Qwith the implied assumption that all \\E(?:Dummy|Ina|Jimmy-|Dummy-|Maniquí-|Maniquíes-)[0-9]+\\Q strings are UTF-8 encoded character strings.\\E$"} diff --git a/BUILD.bat b/BUILD.bat index 3db582e..e411f0a 100644 --- a/BUILD.bat +++ b/BUILD.bat @@ -23,7 +23,7 @@ echo Visual studio installation folder is: %VSInstallDir% call "%VSInstallDir%\common7\tools\vsmsbuildcmd.bat" rem -rem Build tragets. Valid targets are "lib" and "tests" +rem Build targets. Valid targets are "lib" and "tests" rem Default is to build all rem if "%~1"=="" (msbuild "%~dp0build.proj") else (msbuild -target:%1 "%~dp0build.proj") diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d74409..82dcda8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 4.0) project(utf8) if (WIN32) @@ -7,17 +7,42 @@ else() set(pfx "") endif() -add_compile_options("$<$:/utf-8>") -add_compile_options("$<$:/utf-8>") -add_definitions(-DUNICODE -D_UNICODE) +add_compile_definitions(_UNICODE UNICODE) +if(MSVC) + add_compile_options(/utf-8) +endif() + +# https://wg21.link/p2513 +if(MSVC) + add_compile_options(/Zc:char8_t-) +else() + add_compile_options(-fno-char8_t) +endif() + +## configure CMake module search paths that depend on the project +## proj src dir is location of this file +list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +include(ConfigFetchContent) +include(CompileCommands) +# include dependencies +include(utpp) add_subdirectory(tools/gen_casetab) + +add_library(${PROJECT_NAME}) +set_target_properties(${PROJECT_NAME} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib/${pfx}/$ + CXX_STANDARD 17 +) add_subdirectory(src) +add_subdirectory(include) +set(BUILD_TESTS FALSE CACHE BOOL "Build tests") if (BUILD_TESTS) add_subdirectory(tests) endif () +set(BUILD_EXAMPLES FALSE CACHE BOOL "Build examples") if (BUILD_EXAMPLES) add_subdirectory(examples) endif () diff --git a/CMakePresets.json b/CMakePresets.json index a6ee76a..556b44b 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -1,13 +1,34 @@ { - "version": 6, - "configurePresets":[ + "version": 8, + "configurePresets": [ { - "name": "x64", + "name": "clang-x86_64-pc-windows-msvc", + "displayName": "Clang (WinSDK/MSVC libs) Win x64", + "description": "Using WinSDK/MSVC libs and Clang compilers", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/x64-clang", + "environment": { + "WinSdkVersion": "10.0.26100.0", + "WinSdkDir": "C:/Program Files (x86)/Windows Kits/10/bin/$env{WinSdkVersion}/x64", + "PATH": "$env{WinSdkDir};$penv{PATH}", + "CMAKE_POLICY_DEFAULT_CMP0174": "NEW" + }, + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_C_COMPILER": "clang.exe", + "CMAKE_CXX_COMPILER": "clang++.exe", + "CMAKE_RC_COMPILER": "rc.exe", + "CMAKE_C_COMPILER_TARGET": "x86_64-pc-windows-msvc", + "CMAKE_CXX_COMPILER_TARGET": "x86_64-pc-windows-msvc" + } + }, + { + "name": "MSVC x64", "displayName": "x64 Config", "binaryDir": "${sourceDir}/build/x64" }, { - "name": "x86", + "name": "MSVC x86", "displayName": "x86 Config", "architecture": { "value": "win32" @@ -15,64 +36,89 @@ "binaryDir": "${sourceDir}/build/x86" } ], - "buildPresets":[ { - "name": "debug_x64", - "configurePreset": "x64", + "name": "msvc_debug_x64", + "configurePreset": "MSVC x64", "configuration": "Debug" }, { - "name": "release_x64", - "configurePreset": "x64", + "name": "msvc_release_x64", + "configurePreset": "MSVC x64", "configuration": "Release" }, { - "name": "debug_x86", - "configurePreset": "x86", + "name": "msvc_debug_x86", + "configurePreset": "MSVC x86", "configuration": "Debug" }, { - "name": "release_x86", - "configurePreset": "x86", + "name": "msvc_release_x86", + "configurePreset": "MSVC x86", + "configuration": "Release" + }, + { + "name": "clang_debug_x64", + "configurePreset": "clang-x86_64-pc-windows-msvc", + "configuration": "Debug" + }, + { + "name": "clang_release_x64", + "configurePreset": "clang-x86_64-pc-windows-msvc", "configuration": "Release" } ], - "workflowPresets":[ { - "name": "x64", + "name": "MSVC x64", + "steps": [ + { + "type": "configure", + "name": "MSVC x64" + }, + { + "type": "build", + "name": "msvc_debug_x64" + }, + { + "type": "build", + "name": "msvc_release_x64" + } + ] + }, + { + "name": "MSVC x86", "steps": [ { "type": "configure", - "name": "x64" + "name": "MSVC x86" }, { "type": "build", - "name": "debug_x64" + "name": "msvc_debug_x86" }, { "type": "build", - "name": "release_x64" + "name": "msvc_release_x86" } ] }, { - "name": "x86", + "name": "Clang x64", "steps": [ { "type": "configure", - "name": "x86" + "name": "clang-x86_64-pc-windows-msvc" }, { "type": "build", - "name": "debug_x86" + "name": "clang_debug_x64" }, { "type": "build", - "name": "release_x86" + "name": "clang_release_x64" } ] } ] -} \ No newline at end of file +} diff --git a/README.md b/README.md index 31e6ff4..6b72d7d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -UTF8 - Simple Library for Internationalization -============================================= +# UTF8 - Simple Library for Internationalization While most of the (computing) world has standardized on using UTF-8 encoding, Win32 has remained stuck with wide character strings (also called UTF-16 encoding). @@ -7,10 +6,13 @@ Win32 has remained stuck with wide character strings (also called UTF-16 encodin This library simplifies usage of UTF-8 encoded strings under Win32 using principles outlined in the [UTF-8 Everywhere Manifesto](http://utf8everywhere.org/). Here is an example of a function call: + ```C++ utf8::mkdir ("ελληνικό"); //create a directory with a UTF8-encoded name ``` + and another example of a C++ stream with a name and content that are not ASCII characters: + ```C++ utf8::ofstream u8strm("😃😎😛"); @@ -19,63 +21,75 @@ and another example of a C++ stream with a name and content that are not ASCII c ``` A call to Windows API functions can be written as: + ```C++ HANDLE f = CreateFile (utf8::widen ("ελληνικό").c_str (), GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); ``` ## Usage + Before using this library, please review the guidelines from the [UTF-8 Everywhere Manifesto](http://utf8everywhere.org/). In particular: -- define UNICODE or _UNICODE in your program - -- for Visual C++ users, make sure "Use Unicode Character Set" option is defined (under "Configuration Properties" > "General" > "Project Defaults" page). -- for Visual C++ users, add [`/utf-8`](https://docs.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8) option under "C/C++" > "All Options" > "Additional Options". +- Define UNICODE or _UNICODE in your program -- use only `std::string` and `char*` variables. Assume they all contain UTF-8 encoded strings. +- For Visual C++ users, make sure "Use Unicode Character Set" option is defined (under "Configuration Properties" > "General" > "Project Defaults" page). -- for Visual C++ users, if compiling under C++20 language standard, add the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) option under "C/C++" > "All Options" >"Additional Options" (see discussion below.) - -- use UTF-16 strings **only** in arguments to Windows API calls. +- For Visual C++ users, add [`/utf-8`](https://docs.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8) option under "C/C++" > "All Options" > "Additional Options". + +- Use only `std::string` and `char*` variables. Assume they all contain UTF-8 encoded strings. + +- For Visual C++ users, if compiling under C++20 language standard, add the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) option under "C/C++" > "All Options" >"Additional Options" (see discussion below.) + +- Use UTF-16 strings **only** in arguments to Windows API calls. All functions and classes in this library are included in the `utf8` namespace. It is a good idea **not** to have a using directive for this namespace. That makes it more evident in the code where UTF8-aware functions are used. ### Narrowing and Widening Functions + The basic conversion functions change the encoding between UTF-8, UTF-16 and UTF-32. `narrow()` function converts strings from UTF-16 or UTF-32 encoding to UTF-8: + ```C++ std::string utf8::narrow (const wchar_t* s, size_t nch=0); std::string utf8::narrow (const std::wstring & s); std::string utf8::narrow (const char32_t* s, size_t nch=0); -std::string utf8::narrow (const std::u32string& s); +std::string utf8::narrow (const std::u32string& s); ``` The `widen()` function converts UTF-8 to UTF-16: + ```C++ std::wstring utf8::widen (const char* s, size_t nch); std::wstring utf8::widen (const std::string& s); ``` + The `runes()` function converts UTF-8 to UTF-32: + ```C++ std::u32string runes (const char* s, size_t nch = 0); std::u32string utf8::runes (const std::string& s); ``` There are also functions for: + - character counting - string traversal - validity checking ### Case Folding Functions + Case folding (conversion between upper case and lower case) in Unicode is more complicated than traditional ASCII case conversion. This library uses standard tables published by Unicode Consortium to perform upper case to lower case conversions and case-insensitive string comparison. -- case folding - `toupper()`, `tolower()`, `make_upper()`, `make_lower()` -- case-insensitive string comparison - `icompare()` +- Case folding - `toupper()`, `tolower()`, `make_upper()`, `make_lower()` +- Case-insensitive string comparison - `icompare()` ### Common "C" Functions Wrappers + The library provides UTF-8 wrappings most frequently used C functions. Function name and arguments match their traditional C counterparts. + - Common file access operations: `utf8::fopen`, `utf8::access`, `utf8::remove`, `utf8::chmod`, `utf8::rename` - Directory operations: `utf8::mkdir`, `utf8::rmdir`, `utf8::chdir`, `utf8::getcwd` - Environment functions: `utf8::getenv`, `utf8::putenv` @@ -83,74 +97,91 @@ The library provides UTF-8 wrappings most frequently used C functions. Function - Character classification functions *is...* (`isalnum`, `isdigit`, etc.) ### C++ File I/O Streams + C++ I/O streams (`utf8::ifstream`, `utf8::ofstream`, `utf8::fstream`) provide and easy way to create files with names that are encoded using UTF-8. Because UTF-8 strings are character strings, reading and writing from these files can be done with standard insertion and extraction operators. ### Windows-Specific Functions -- path management: `splitpath`, `makepath` -- conversion of command-line arguments: `get_argv` and `free_argv` -- popular Windows API functions: `MessageBox`, `LoadString`, `ShellExecute`, `CopyFile`, etc. + +- Path management: `splitpath`, `makepath` +- Conversion of command-line arguments: `get_argv` and `free_argv` +- Popular Windows API functions: `MessageBox`, `LoadString`, `ShellExecute`, `CopyFile`, etc. - Registry API (`RegCreateKey`, `RegOpenKey`, `RegSetValue`, `RegGetValue`, etc.) The API for Windows profile files (also called INI files) was replaced with an object `utf8::IniFile`. ### Error Handling + Invalid characters or sequences can be handled in two different ways: -- the invalid character/sequence is replaced by a `REPLACEMENT_CHARACTER` (0xFFFD) -- the functions throw an exception `utf8::exception`. The member `utf8::exception::code` indicates what has triggered the exception. + +- The invalid character/sequence is replaced by a `REPLACEMENT_CHARACTER` (0xFFFD) +- The functions throw an exception `utf8::exception`. The member `utf8::exception::code` indicates what has triggered the exception. The function `error_mode()` selects the error handling strategy. The error handling strategy is thread-safe. ## Using the library under C++20 standard + The C++20 standard has [added an additional type `char8_t`](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html), designed to keep UTF-8 encoded characters, and a string type `std::u8string`. By making it a separate type from `char` and `unsigned char`, the committee has also created a number of incompatibilities. For instance the following fragment will produce an error: + ```C++ std::string s {"English text"}; //this is ok s = {u8"日本語テキスト"}; //"Japaneese text" - error ``` + You would have to change it to something like: + ```C++ -std::u8string s {u8"English text"}; -s = {u8"日本語テキスト"}; +std::u8string s {u8"English text"}; +s = {u8"日本語テキスト"}; ``` -Recently (June, 2022) the committee seems to have changed position and introduced a [compatibility and portability fix - DR2513R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2513r3.html) allowing initialization of arrays of `char` or `unsigned char` with UTF-8 string literals. Until the defect report makes its way into the next standard edition, the solution for Visual C++ users who compile under C++20 standard rules is to use the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) compiler option. -In my opinion, by introducing the `char8_t` type, the committee went against the very principles of UTF-8 encoding. The purpose of the encoding was to extend usage of the `char` type to additional Unicode code points. It has been so successful that it is now the de-facto standard used all across the Internet. Even Windows, that has been a bastion of UTF-16 encoding, is now slowly [moving toward UTF-8](https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page). +Recently (June 2022) the committee seems to have changed position and introduced a [compatibility and portability fix - DR2513R3](https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2513r3.html) allowing initialization of arrays of `char` or `unsigned char` with UTF-8 string literals. Until the defect report makes its way into the next standard edition, the solution for Visual C++ users who compile under C++20 standard rules is to use the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) compiler option. + +In my opinion, by introducing the `char8_t` type, the committee went against the very principles of UTF-8 encoding. The purpose of the encoding was to extend usage of the `char` type to additional Unicode code points. It has been so successful that it is now the de facto standard used all across the Internet. Even Windows, that has been a bastion of UTF-16 encoding, is now slowly [moving toward UTF-8](https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page). In this context, the use of `char` data type for anything other than holding encodings of strings, seems out of place. In particular arithmetic computations with `char` or `unsigned char` entities are just a small fraction of the use cases. The standard should try to simplify usage in the most common cases leaving the infrequent ones to bear the burden of complexity. Following this principle, you would want to write: + ```C++ std::string s {"English text"}; s += " and "; s += "日本語テキスト"; ``` + with the implied assumption that all `char` strings are UTF-8 encoded character strings. ## Using the library under Linux + While the library was specifically built for Windows environment, a reduced version can be compiled and used under Linux. It has been tested under Ubuntu 22.04 with GCC. Obviously, functions that are specific to the Windows environment are not available. ## Documentation -[Doxygen](http://www.doxygen.nl/) documentation can be found at https://neacsum.github.io/utf8/ - + +[Doxygen](http://www.doxygen.nl/) documentation can be found at [https://neacsum.github.io/utf8/](https://neacsum.github.io/utf8/) + ## Building -The UTF8 library doesn't have any dependencies. The test program however uses the [UTTP library](https://github.com/neacsum/utpp). + +The UTF8 library doesn't have any dependencies. The test program however uses the [UTPP library](https://github.com/neacsum/utpp). The preferred method is to use the [CPM - C/C++ Package Manager](https://github.com/neacsum/cpm) to fetch all dependent packages and build them. Download the [CPM program](https://github.com/neacsum/cpm/releases/latest/download/cpm.exe) and, from the root of the development tree, issue the `cpm` command: -``` + +```bash cpm -u https://github.com/neacsum/utf8.git utf8 ``` The Visual C++ projects are set to compile under C++17 rules and can also be compiled under C++20 rules. If you are using C++20 rules, you have to add the [`Zc:char8_t-`](https://learn.microsoft.com/en-us/cpp/build/reference/zc-char8-t?view=msvc-170) option as discussed above. -You can build the library using CMake. From the _utf8_ directory: -``` +You can build the library using CMake. From the `utf8` directory: + +```bash cmake -S . -B build cmake --build build ``` + Alternatively, `BUILD.bat` script will build the libraries and test programs. Under Linux, the library can be build using `CPM` as explained before, or with `cmake` using the same commands shown above. - ## License + [The MIT License](https://github.com/neacsum/utf8/blob/master/LICENSE) diff --git a/cmake/CompileCommands.cmake b/cmake/CompileCommands.cmake new file mode 100644 index 0000000..00b0793 --- /dev/null +++ b/cmake/CompileCommands.cmake @@ -0,0 +1,2 @@ +set(CMAKE_EXPORT_COMPILE_COMMANDS TRUE CACHE BOOL "Generate compile_commands.json" FORCE) +mark_as_advanced(CMAKE_EXPORT_COMPILE_COMMANDS) diff --git a/cmake/ConfigFetchContent.cmake b/cmake/ConfigFetchContent.cmake new file mode 100644 index 0000000..d061213 --- /dev/null +++ b/cmake/ConfigFetchContent.cmake @@ -0,0 +1,7 @@ +set(FETCHCONTENT_QUIET FALSE CACHE BOOL "Suppress output from FetchContent" FORCE) + +# Cache FetchContent downloads outside build directory +set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/../.dep-cache" CACHE PATH "FetchContent cache directory") + +# Include FetchContent for external dependencies +include(FetchContent) diff --git a/cmake/utpp.cmake b/cmake/utpp.cmake new file mode 100644 index 0000000..1221fe8 --- /dev/null +++ b/cmake/utpp.cmake @@ -0,0 +1,18 @@ +FetchContent_Declare( + utpp-repo + GIT_REPOSITORY https://github.com/neacsum/utpp.git + GIT_SHALLOW TRUE + GIT_PROGRESS TRUE + SOURCE_SUBDIR "" # Prevent any build system from being processed + CONFIGURE_COMMAND "echo" +) +FetchContent_MakeAvailable(utpp-repo) + +# Header-only interface target that depends on the fetched repo +add_library(utpp INTERFACE) + +target_include_directories(utpp INTERFACE ${utpp-repo_SOURCE_DIR}/include) +target_sources(utpp INTERFACE ${utpp-repo_SOURCE_DIR}/include/utpp/utpp.h) + +# target_include_directories(utpp INTERFACE ${PROJECT_SOURCE_DIR}/include) +# target_sources(utpp INTERFACE ${PROJECT_SOURCE_DIR}/include/utpp_shim.h) diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..e69de29 diff --git a/cpm.json b/cpm.json index cbbf555..68bf394 100644 --- a/cpm.json +++ b/cpm.json @@ -4,8 +4,8 @@ "https": "https://github.com/neacsum/utf8.git", "build": [ {"os": "windows", "cmd": "build.bat", "args": ["lib"]}, - {"os": "linux darwin", "cmd": "cmake", "args": ["--workflow", "--preset", "x64"]} - ], + {"os": "linux darwin", "cmd": "cmake", "args": ["--workflow", "--preset", "MSVC x64"]} + ], "depends": [ { "name": "utpp", diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 5ed06e6..172fa44 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,8 +1,8 @@ add_executable(sample sample.cpp) -set_target_properties(sample PROPERTIES - CXX_STANDARD 17 - ) +set_target_properties(sample PROPERTIES + CXX_STANDARD 20 +) # All link directories are subfolders of ./lib if (WIN32) @@ -13,4 +13,3 @@ endif() # Add dependent libraries target_link_libraries (sample PRIVATE utf8) - diff --git a/examples/sample.cpp b/examples/sample.cpp index 97dcf8f..f016ca8 100644 --- a/examples/sample.cpp +++ b/examples/sample.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +// #include #include using namespace std; @@ -32,7 +32,7 @@ int main (int /*unused*/, char ** /*unused*/) ofstream fout; if (GetACP () != 65001) { - cout << "Windows ACP is not UTF-8. Output will be sent to " << FNAME + cout << "Windows ACP is not UTF-8. Output will be sent to " << FNAME << endl << endl; fout.open (FNAME); } @@ -104,7 +104,7 @@ int main (int /*unused*/, char ** /*unused*/) #ifdef _WIN32 //Set an environment variable and retrieve its value utf8::putenv ("Punjabi=पंजाबी"); - out << "The environment variable Punjabi is " + out << "The environment variable Punjabi is " << utf8::getenv ("Punjabi") << endl; #endif @@ -143,4 +143,4 @@ int main (int /*unused*/, char ** /*unused*/) utf8::free_argv (argc, argv); #endif return 0; -} \ No newline at end of file +} diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt new file mode 100644 index 0000000..3732649 --- /dev/null +++ b/include/CMakeLists.txt @@ -0,0 +1,17 @@ +# target_include_directories(${PROJECT_NAME} PUBLIC utf8) +target_include_directories(${PROJECT_NAME} PUBLIC .) +target_sources(${PROJECT_NAME} PUBLIC + utf8/ini.h + utf8/utf8.h +) +# Windows specific stuff +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + target_sources(${PROJECT_NAME} PUBLIC + utf8/winutf8.h + ) +endif () + +target_sources(${PROJECT_NAME} PRIVATE + uppertab.h + lowertab.h +) diff --git a/include/utf8/ini.h b/include/utf8/ini.h index c83692b..b19d8b5 100644 --- a/include/utf8/ini.h +++ b/include/utf8/ini.h @@ -4,11 +4,14 @@ */ /// \file ini.h Definition of IniFile class -/// This file should not be included directly. It is included by utf8.h header. +/// This file should not be included directly. It is included by utf8.h header. #pragma once #include #include +#ifdef _WIN32 + #include +#endif namespace utf8 { @@ -37,7 +40,7 @@ class IniFile /// Set the file name associated with this object void File (const std::string& filename); - + ///Get a string key size_t GetString (char *value, size_t len, const std::string& key, const std::string& section, const std::string& defval = std::string()) const; diff --git a/include/utf8/utf8.h b/include/utf8/utf8.h index c8e4ab8..f548d7a 100644 --- a/include/utf8/utf8.h +++ b/include/utf8/utf8.h @@ -7,7 +7,6 @@ #pragma once #include -#include #include // ------------- Global configuration options --------------------------------- @@ -268,7 +267,7 @@ bool is_valid (const char* p) inline bool is_valid (std::string::const_iterator p, const std::string::const_iterator last) { - auto len = last - p; + // auto len = last - p; auto prev_mode = error_mode (action::replace); bool valid = (next (p, last) != REPLACEMENT_CHARACTER); error_mode (prev_mode); @@ -744,7 +743,7 @@ bool rename (const std::string& oldname, const std::string& newname) } /// \copydoc utf8::rename() -inline +inline bool rename (const char* oldname, const char* newname) { #if UTF8_USE_WINDOWS_API diff --git a/include/utf8/winutf8.h b/include/utf8/winutf8.h index c0575c7..6f2f3d7 100644 --- a/include/utf8/winutf8.h +++ b/include/utf8/winutf8.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #undef MessageBox #undef CopyFile @@ -97,7 +99,7 @@ std::string GetFullPathName (const std::string& rel_path); bool GetModuleFileName (HMODULE hModule, std::string& filename); std::string GetModuleFileName (HMODULE hModule = NULL); -/// File enumeration structure used by find_first() and find_next() functions +/// File enumeration structure used by find_first() and find_next() functions struct find_data { find_data () ///< Initializes the structure : handle{ INVALID_HANDLE_VALUE } @@ -112,7 +114,7 @@ struct find_data { FILETIME creation_time; ///< file creation time FILETIME access_time; ///< file last access time FILETIME write_time; ///< file last write time - __int64 size; ///< file size + int64_t size; ///< file size std::string filename; ///< file name std::string short_name; ///< 8.3 file name }; @@ -147,16 +149,16 @@ class file_enumerator : protected find_data operator bool () const; - find_data::attributes; - find_data::creation_time; - find_data::access_time; - find_data::write_time; - find_data::size; - find_data::filename; - find_data::short_name; + using find_data::attributes; + using find_data::creation_time; + using find_data::access_time; + using find_data::write_time; + using find_data::size; + using find_data::filename; + using find_data::short_name; }; -/// A simple buffer for caching values returned by Windows API +/// A simple buffer for caching values returned by Windows API class buffer { public: explicit buffer (size_t size_); @@ -343,4 +345,3 @@ inline DWORD } } //end namespace - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f04d071..afd62da 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,33 +1,19 @@ -add_library(${PROJECT_NAME}) - -set_target_properties(${PROJECT_NAME} PROPERTIES - ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib/${pfx}/$ - CXX_STANDARD 17 -) - -target_include_directories(${PROJECT_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/include) - -add_custom_command( - OUTPUT ${PROJECT_SOURCE_DIR}/include/uppertab.h ${PROJECT_SOURCE_DIR}/include/lowertab.h - COMMAND $ ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt ${PROJECT_SOURCE_DIR}/include - MAIN_DEPENDENCY ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt - DEPENDS gen_casetab - VERBATIM -) -target_sources(${PROJECT_NAME} - PRIVATE ${PROJECT_SOURCE_DIR}/include/uppertab.h ${PROJECT_SOURCE_DIR}/include/lowertab.h +target_sources(${PROJECT_NAME} PRIVATE + casecvt.cpp + ini.cpp + utf8.cpp ) -target_sources(${PROJECT_NAME} PRIVATE - casecvt.cpp - ini.cpp - utf8.cpp +# Set up dependency for casecvt.cpp on generated headers +add_dependencies(${PROJECT_NAME} run_gen_casetab) +set_source_files_properties(casecvt.cpp PROPERTIES + OBJECT_DEPENDS "${PROJECT_SOURCE_DIR}/include/uppertab.h;${PROJECT_SOURCE_DIR}/include/lowertab.h" ) # Windows specific stuff if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") -target_sources(${PROJECT_NAME} PRIVATE - buffer.cpp - win.cpp -) + target_sources(${PROJECT_NAME} PRIVATE + buffer.cpp + win.cpp + ) endif () diff --git a/src/ini.cpp b/src/ini.cpp index b0433b7..9481fa9 100644 --- a/src/ini.cpp +++ b/src/ini.cpp @@ -29,7 +29,7 @@ namespace utf8 { \defgroup inifile INI File Replacement API An object-oriented replacement for working with INI files - The basic Windows API functions for reading and writing INI files, + The basic Windows API functions for reading and writing INI files, [GetPrivateProfileStringW] (https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-getprivateprofilestringw) and [WritePrivateProfileStringW] @@ -120,7 +120,7 @@ static char *trimtrailing (char *str) } //----------------------------------------------------------------------------- -// File manipulation functions +// File manipulation functions inline static FILE *openread (const std::string& fname) @@ -161,30 +161,30 @@ static std::string tempname (const std::string& source) \ingroup inifile */ -/// Constructor -IniFile::IniFile (const std::string& file) - : temp_file {false} +/// Constructor +IniFile::IniFile (const std::string& file): /* get the fully qualified path name in case current directory changes after creation */ -#ifdef _WIN32 -# if UTF8_USE_WINDOWS_API - , filename { utf8::fullpath (file) } -# else - , filename{ narrow (std::filesystem::absolute (widen (file))) } -# endif -#else - , filename{ std::filesystem::absolute (file) } -#endif + #ifdef _WIN32 + #if UTF8_USE_WINDOWS_API + filename { utf8::fullpath (file) }, + #else + filename{ narrow (std::filesystem::absolute (widen (file))) }, + #endif + #else + filename{ std::filesystem::absolute (file) }, + #endif + temp_file {false} { } /// Creates a temporary file as filename. -IniFile::IniFile () - : temp_file {true} -#if UTF8_USE_WINDOWS_API - , filename (utf8::GetTempFileName(".", "INI", 0)) -#else - , filename (tmpnam(NULL)) -#endif +IniFile::IniFile (): + #if UTF8_USE_WINDOWS_API + filename (utf8::GetTempFileName(".", "INI", 0)), + #else + filename (tmpnam(NULL)), + #endif + temp_file {true} { } @@ -205,7 +205,7 @@ IniFile::~IniFile() /*! Changes the file associated with this object. If previous one was a temporary file, it is deleted now (loosing all settings in the process). - + \param fname New file name. If empty it creates a temporary file. */ void IniFile::File (const std::string& fname) @@ -418,7 +418,7 @@ HFONT IniFile::GetFont (const std::string& key, const std::string& section, HFON } /*! - Color is assumed to be in the same format as written by PutColor i.e. + Color is assumed to be in the same format as written by PutColor i.e. R G B numbers separated by spaces. \param key key name @@ -502,8 +502,8 @@ bool IniFile::GetBool (const std::string& key, const std::string& section, bool if (!GetString (buffer, sizeof(buffer), key, section)) return defval; - return (!icompare (buffer, "on") - || !icompare (buffer, "yes") + return (!icompare (buffer, "on") + || !icompare (buffer, "yes") || !icompare (buffer, "true") || (atoi (buffer) == 1)); } @@ -672,7 +672,7 @@ int IniFile::GetKeys (char *keys, size_t sz, const std::string& section) int cnt = 0; sz -= 2; //leave space for terminating NULL - auto f = [&keys, &sz] (const char *k) + auto f = [&keys, &sz] (const char *k) { size_t l = min (strlen(k), sz); strncpy (keys, k, sz); @@ -725,7 +725,7 @@ size_t IniFile::GetString (char *value, size_t len, const std::string& key, cons if (!value || !len) return 0; fp = openread (filename); - if (fp) + if (fp) { found = getkey (fp, section.c_str(), key.c_str(), value, len); fclose(fp); @@ -771,7 +771,7 @@ bool IniFile::PutString (const std::string& key, const std::string& value, const } /*! - Section names are returned as null-terminated strings followed by one + Section names are returned as null-terminated strings followed by one final null. \param sects buffer for returned keys @@ -913,7 +913,7 @@ static bool findsection (const char *section, FILE *rf, FILE *wf, char *buffer, if (*sp == '[' && strchr(buffer, ']')) { sp = skipleading (sp + 1); - + if (!icomparen (sp, section, len)) return true; } @@ -946,10 +946,10 @@ static bool putkey (const char *key, const char *value, const char *section, con assert (section); - if (!(rfp = openread(filename))) + if (!(rfp = openread(filename))) { /* If the .ini file doesn't exist, make a new file */ - if (key && value) + if (key && value) { if (!(wfp = openwrite (filename))) return false; @@ -975,7 +975,7 @@ static bool putkey (const char *key, const char *value, const char *section, con // key not found, or different value -> proceed (but rewind the input file first) fseek (rfp, 0, SEEK_SET); - if (!(wfp = openwrite(tempname(filename)))) + if (!(wfp = openwrite(tempname(filename)))) { fclose (rfp); return false; @@ -1010,7 +1010,7 @@ static bool putkey (const char *key, const char *value, const char *section, con } else { - //deleting the section -> skip all entries until next section or end of file + //deleting the section -> skip all entries until next section or end of file while ((sp = fgets (buffer, sizeof (buffer), rfp)) && *(sp = skipleading (buffer)) != '[') ; } @@ -1058,7 +1058,7 @@ static bool getkey(FILE *fp, const char *section, const char *key, char *val, si assert (fp); assert (section); assert (key); - + // Move through file 1 line at a time until the section is matched or EOF. if (!findsection (section, fp, NULL, buffer, sizeof(buffer))) return false; @@ -1068,7 +1068,7 @@ static bool getkey(FILE *fp, const char *section, const char *key, char *val, si key = skipleading (key); len = trimmed_len (key); bool found = false; - do + do { if (!fgets (buffer, sizeof (buffer), fp) || *(sp = skipleading (buffer)) == '[') return false; @@ -1158,4 +1158,3 @@ static bool same_file (const std::string& f1, const std::string& f2) } } - diff --git a/src/utf8.cpp b/src/utf8.cpp index 965e18d..9c96239 100644 --- a/src/utf8.cpp +++ b/src/utf8.cpp @@ -13,7 +13,7 @@ using namespace std; namespace utf8 { -static thread_local action ermode{action::replace}; +static thread_local action errmode{action::replace}; /*! \param mode new error handling mode @@ -21,8 +21,8 @@ static thread_local action ermode{action::replace}; */ action error_mode (action mode) { - auto prev = ermode; - ermode = mode; + auto prev = errmode; + errmode = mode; return prev; } @@ -31,7 +31,7 @@ static void encode (char32_t c, std::string& s); inline char32_t throw_or_replace (exception::cause err) { - if (ermode == action::except) + if (errmode == action::except) throw exception (err); else return REPLACEMENT_CHARACTER; @@ -369,7 +369,7 @@ bool valid_str (const char *s, size_t nch) Decodes a UTF-8 encoded character and advances iterator to next code point \param ptr Reference to iterator to be advanced - \param last Iterator pointing to the end of range + \param last Iterator pointing to the end of range \return decoded character If the iterator points to an invalid UTF-8 encoding or is at end, the function @@ -454,7 +454,7 @@ char32_t next (std::string::const_iterator& ptr, const std::string::const_iterat \param ptr Reference to character pointer to be advanced \return decoded character - If the string contains an invalid UTF-8 encoding, the function throws an + If the string contains an invalid UTF-8 encoding, the function throws an exception or returns utf8::REPLACEMENT_CHARACTER (0xfffd) depending on error handling mode. In any case, the pointer is advanced to beginning of next character or end of string. @@ -533,7 +533,7 @@ char32_t next (const char*& ptr) \param ptr Reference to character pointer to be decremented \return previous UTF-8 encoded character - If the string contains an invalid UTF-8 encoding, the function throws an + If the string contains an invalid UTF-8 encoding, the function throws an exception or returns utf8::REPLACEMENT_CHARACTER (0xfffd) depending on error handling mode. In this case the pointer remains unchanged. */ @@ -727,7 +727,7 @@ void encode (char32_t c, std::string& s) else if (c <= 0x7ff) { s.push_back (0xC0 | c >> 6); - s.push_back (0x80 | c & 0x3f); + s.push_back (0x80 | (c & 0x3f)); } else if (c <= 0xFFFF) { @@ -735,17 +735,17 @@ void encode (char32_t c, std::string& s) c= throw_or_replace(exception::cause::invalid_char32); s.push_back (0xE0 | c >> 12); - s.push_back (0x80 | c >> 6 & 0x3f); - s.push_back (0x80 | c & 0x3f); + s.push_back (0x80 | (c >> 6 & 0x3f)); + s.push_back (0x80 | (c & 0x3f)); } else if (c <= 0x10ffff) { s.push_back (0xF0 | c >> 18); - s.push_back (0x80 | c >> 12 & 0x3f); - s.push_back (0x80 | c >> 6 & 0x3f); - s.push_back (0x80 | c & 0x3f); + s.push_back (0x80 | (c >> 12 & 0x3f)); + s.push_back (0x80 | (c >> 6 & 0x3f)); + s.push_back (0x80 | (c & 0x3f)); } - else if (ermode == action::except) + else if (errmode == action::except) throw exception (exception::cause::invalid_char32); else s.append ("\xEF\xBF\xBD"); //append replacement character diff --git a/src/win.cpp b/src/win.cpp index c2a62ca..529876c 100644 --- a/src/win.cpp +++ b/src/win.cpp @@ -3,7 +3,7 @@ This is part of UTF8 project. See LICENSE file for full license terms. */ -/// \file win.cpp Wrappers for common Windows functions +/// \file win.cpp Wrappers for common Windows functions #include #include @@ -18,7 +18,7 @@ static void copy_fdat (WIN32_FIND_DATAW& wfd, find_data& fdat) fdat.creation_time = wfd.ftCreationTime; fdat.access_time = wfd.ftLastAccessTime; fdat.write_time = wfd.ftLastWriteTime; - fdat.size = ((__int64)wfd.nFileSizeHigh << 32) | (wfd.nFileSizeLow); + fdat.size = ((int64_t)wfd.nFileSizeHigh << 32) | (wfd.nFileSizeLow); fdat.filename = narrow (wfd.cFileName); fdat.short_name = narrow (wfd.cAlternateFileName); } @@ -57,7 +57,7 @@ bool find_first (const std::string& name, find_data& fdat) \note Wrapper for [FindNextFileW](https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-findnextfilew) Windows API function. - + If there are no more files, the function returns _false_ and GetLastError function returns __ERROR_NO_MORE_FILES__ */ @@ -458,13 +458,13 @@ std::vector get_argv () //============================================================================= -/*! +/*! \defgroup reg Registry Functions Wrappers for Windows registry functions. For all these functions wide character strings arguments are replaced with UTF-8 encoded C++ strings. -@{ +@{ */ /*! @@ -562,7 +562,7 @@ LSTATUS RegDeleteTree (HKEY key, const std::string& subkey) } /*! - Wrapper for + Wrapper for [RegRenameKey](https://learn.microsoft.com/en-us/windows/win32/api/winreg/nf-winreg-regrenamekey) \param key handle to an open registry key @@ -644,7 +644,7 @@ LSTATUS RegSetValue (HKEY key, const std::string& value, const std::vector(ptr - buf) == key_size); auto ret = RegSetValue (key, value, REG_MULTI_SZ, buf, (DWORD)key_size*sizeof(wchar_t)); delete []buf; return ret; @@ -680,7 +680,7 @@ LSTATUS RegQueryValue (HKEY key, const std::string& value, DWORD* type, void* da \param size pointer to size data size (in bytes) \param type pointer to type of data */ -LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& value, +LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& value, DWORD flags, void* data, DWORD* size, DWORD* type) { auto wsubkey = widen (subkey); @@ -707,7 +707,7 @@ LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& val auto wvalue = widen (value); DWORD sz = 0; const DWORD flags = RRF_RT_REG_SZ | RRF_RT_REG_EXPAND_SZ | (expand ? 0 :RRF_NOEXPAND); - auto ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), + auto ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), flags, NULL, NULL, &sz); if (ret == ERROR_SUCCESS) { @@ -716,7 +716,7 @@ LSTATUS RegGetValue (HKEY key, const std::string& subkey, const std::string& val https://stackoverflow.com/questions/29223180/successive-calls-to-reggetvalue-return-two-different-sizes-for-the-same-string */ wchar_t *wdat = new wchar_t[sz / sizeof (wchar_t)]; - ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), flags, NULL, + ret = RegGetValueW (key, wsubkey.c_str (), wvalue.c_str (), flags, NULL, wdat, &sz); if (ret == ERROR_SUCCESS) data = narrow (wdat); @@ -808,7 +808,7 @@ LSTATUS RegEnumKey (HKEY key, std::vector& names) if (ret != ERROR_SUCCESS) return ret; maxlen++; //for terminating NULL - + wchar_t* wnam = new wchar_t[maxlen]; DWORD index = 0; names.clear (); @@ -898,4 +898,4 @@ LSTATUS RegEnumValue (HKEY key, std::vector& values) } -} //end namespace utf8 \ No newline at end of file +} //end namespace utf8 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 912f686..7d7c778 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,13 +2,21 @@ add_executable(tests tests_ini.cpp tests_win.cpp tests_utf8.cpp tests.rc ) +target_include_directories(tests PRIVATE .) +set_property(TARGET tests PROPERTY CXX_STANDARD 20) +set_property(DIRECTORY PROPERTY VS_STARTUP_PROJECT tests) +set_target_properties(tests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests" + # RUNTIME_OUTPUT_DIRECTORY_DEBUG "${CMAKE_BINARY_DIR}/tests" + # RUNTIME_OUTPUT_DIRECTORY_RELEASE "${CMAKE_BINARY_DIR}/tests" +) + +# Install the tests executable (helps with CMake extension discovery) +install(TARGETS tests + RUNTIME DESTINATION tests +) -target_include_directories(tests PUBLIC ${PROJECT_SOURCE_DIR}/include) -set_property(TARGET tests PROPERTY CXX_STANDARD 17) +target_link_libraries (tests PRIVATE utf8 utpp) # All link directories are subfolders of ./lib target_link_directories (tests PUBLIC ${PROJECT_SOURCE_DIR}/lib/${pfx}/$) - -# Add dependent libraries -add_dependencies(tests utf8) -target_link_libraries (tests PRIVATE utf8) diff --git a/tests/tests.vcxproj b/tests/tests.vcxproj index 32b756b..6ae3391 100644 --- a/tests/tests.vcxproj +++ b/tests/tests.vcxproj @@ -96,7 +96,7 @@ Disabled WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 stdcpp17 @@ -112,7 +112,7 @@ Level3 Disabled _DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 true stdcpp17 @@ -132,7 +132,7 @@ true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 stdcpp17 @@ -152,7 +152,7 @@ true true NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(SolutionDir)include + $(SolutionDir)include;$(SolutionDir)tests;$(SolutionDir)build\.dep-cache\utpp-repo-src\include /utf-8 true stdcpp17 @@ -173,6 +173,7 @@ + @@ -180,4 +181,4 @@ - \ No newline at end of file + diff --git a/tests/tests_ini.cpp b/tests/tests_ini.cpp index 7ff4aae..ec00dec 100644 --- a/tests/tests_ini.cpp +++ b/tests/tests_ini.cpp @@ -5,7 +5,7 @@ #define _CRT_SECURE_NO_WARNINGS #include -#include +#include #include #include @@ -247,7 +247,7 @@ SUITE (IniTests) utf8::IniFile ini{ "test.ini" }; char val[80]; ini.PutString (" key00 ", " value00 ", " section0 "); - GetPrivateProfileStringA ("section0", "key00", "bad", val, + GetPrivateProfileStringA ("section0", "key00", "bad", val, sizeof(val), ".\\test.ini"); CHECK_EQUAL ("value00", val); remove ("test.ini"); @@ -466,7 +466,7 @@ SUITE (IniTests) utf8::IniFile f2 ("test2.ini"); f2.CopySection (f1, "section0", "section1"); - + CHECK_EQUAL ("value00", f2.GetString ("key0", "section1")); CHECK_EQUAL ("value01", f2.GetString ("key1", "section1")); @@ -508,7 +508,7 @@ SUITE (IniTests) f2.CopySection (f1, "section1"); deque keys; - + //previous content of section1 was erased CHECK_EQUAL (2, f2.GetKeys (keys, "section1")); diff --git a/tests/tests_utf8.cpp b/tests/tests_utf8.cpp index fd1fb21..0c49ac3 100644 --- a/tests/tests_utf8.cpp +++ b/tests/tests_utf8.cpp @@ -2,7 +2,7 @@ Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License. This is part of UTF8 project. See LICENSE file for full license terms. */ -#include +#include #include #include #include @@ -23,7 +23,7 @@ using namespace utf8; TEST_MAIN (int argc, char **argv) { - const char* suite_under_test = nullptr; + // const char* suite_under_test = nullptr; std::cerr << "Running " << *argv++ << endl << "working directory is: " << getcwd () << endl; --argc; @@ -45,7 +45,7 @@ TEST_MAIN (int argc, char **argv) std::filesystem::path xml_filename(*argv); std::ofstream xml_stream (xml_filename); UnitTest::ReporterXml xml(xml_stream); - std::cerr << "Output sent to " + std::cerr << "Output sent to " << std::filesystem::absolute (xml_filename) << endl; return RunAllTests (xml); } @@ -237,7 +237,7 @@ TEST (next_ptr) TEST (next_non_const) { char emojis[20]; - strcpy (emojis, u8"😃😎😛" ); + strcpy_s(emojis, sizeof(emojis), u8"😃😎😛"); int i = 0; char* ptr = emojis; while (utf8::next (ptr)) @@ -487,7 +487,7 @@ TEST (invalid_utf8) TEST (throw_invalid_char32) { auto prev_mode = utf8::error_mode (action::except); - bool thrown = false; + // bool thrown = false; CHECK_THROW (narrow (0xd800), utf8::exception); CHECK_THROW (narrow (0xdbff), utf8::exception); utf8::error_mode (prev_mode); @@ -525,7 +525,7 @@ TEST (dir) //Path returned by getcwd should end in our Greek string string cwd = getcwd (); - + //find last path separator #ifdef _WIN32 size_t idx = cwd.rfind ("\\"); @@ -537,7 +537,7 @@ TEST (dir) //Move out of directory and remove it utf8::chdir (".."); - CHECK (utf8::rmdir (dirname)); //rmdir returrs true for success + CHECK (utf8::rmdir (dirname)); //rmdir returns true for success } @@ -598,14 +598,14 @@ TEST (char_class) temp[1] = 0; char tst[80]; snprintf (tst, sizeof(tst), "testing char %d", i); - CHECK_EQUAL_EX ((bool)isalpha (chartab[i]), utf8::isalpha (temp), tst); - CHECK_EQUAL_EX ((bool)isalnum (chartab[i]), utf8::isalnum (temp), tst); - CHECK_EQUAL_EX ((bool)(isdigit) (chartab[i]), utf8::isdigit (temp), tst); - CHECK_EQUAL_EX ((bool)(isspace) (chartab[i]), utf8::isspace (temp), tst); - CHECK_EQUAL_EX ((bool)(isblank)(chartab[i]), utf8::isblank (temp), tst); - CHECK_EQUAL_EX ((bool)(isxdigit) (chartab[i]), utf8::isxdigit (temp), tst); - CHECK_EQUAL_EX ((bool)isupper (chartab[i]), utf8::isupper (temp), tst); - CHECK_EQUAL_EX ((bool)islower (chartab[i]), utf8::islower (temp), tst); + CHECK_EQUAL_EX ((bool)isalpha (chartab[i]), utf8::isalpha (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)isalnum (chartab[i]), utf8::isalnum (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isdigit) (chartab[i]), utf8::isdigit (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isspace) (chartab[i]), utf8::isspace (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isblank)(chartab[i]), utf8::isblank (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)(isxdigit) (chartab[i]), utf8::isxdigit (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)isupper (chartab[i]), utf8::isupper (temp), "%s", tst); + CHECK_EQUAL_EX ((bool)islower (chartab[i]), utf8::islower (temp), "%s", tst); } } diff --git a/tests/tests_win.cpp b/tests/tests_win.cpp index 031c7e7..d70e566 100644 --- a/tests/tests_win.cpp +++ b/tests/tests_win.cpp @@ -2,7 +2,7 @@ Copyright (c) Mircea Neacsu (2014-2024) Licensed under MIT License. This is part of UTF8 project. See LICENSE file for full license terms. */ -#include +#include #include #if UTF8_USE_WINDOWS_API @@ -82,7 +82,9 @@ SUITE (MS_Windows) TEST (full_path) { const char* fname = "file.txt"; - FILE* f = ::fopen (fname, "w"); + FILE* f = nullptr; + if (fopen_s(&f, fname, "w") != 0 || !f) + ABORT_EX(true, "Failed to open file"); fclose (f); char full[_MAX_PATH]; @@ -140,7 +142,7 @@ SUITE (MS_Windows) buf = tmp; CHECK_EQUAL (tmp, (string)buf); - // size doesn't shrink when assigning a string + // size doesn't shrink when assigning a string CHECK_EQUAL (_MAX_PATH, buf.size ()); //Copy ctor @@ -313,7 +315,7 @@ SUITE (MS_Windows) SUITE (Registry) { - + const string key_name{ u8"αρχείο" };//Greek for "registry" according to Google TEST (create_open) @@ -379,7 +381,7 @@ TEST (enum_keys) { HKEY key; utf8::RegCreateKey (HKEY_CURRENT_USER, key_name, key); - + vectorin_name{ u8"α1", u8"β2", u8"γ3", u8"😃😎😛"}; vectorout_name(4); @@ -425,4 +427,4 @@ TEST (enum_values) } } //end suite -#endif \ No newline at end of file +#endif diff --git a/tests/utpp_shim.h b/tests/utpp_shim.h new file mode 100644 index 0000000..2302964 --- /dev/null +++ b/tests/utpp_shim.h @@ -0,0 +1,67 @@ +#pragma once + +// Work around missing/deleted char32_t stream operator +// This allows utpp's CheckEqual to print char32_t values properly +// Must be defined BEFORE including utpp to ensure it's found by ADL + +#include // IWYU pragma: keep +#include // IWYU pragma: keep +#include // IWYU pragma: keep +#include // IWYU pragma: keep + +// Provide the operator for compilers/standards that need it: +// - MSVC 2022+ deletes the char32_t stream operator +// - Clang with C++20 also seems to need this fix +#if (defined(_MSC_VER) && _MSC_VER >= 1930) || \ + (defined(__clang__) && __cplusplus >= 202002L) + + inline std::ostream& operator<<(std::ostream& os, char32_t c) { + return os << static_cast(c); + } + +#endif + +// C++17 compatibility shims for utpp +#if __cplusplus < 202002L + + // Add std::chrono::milliseconds stream operator for C++17 + namespace std { + namespace chrono { + inline std::ostream& operator<<(std::ostream& os, const milliseconds& ms) { + return os << ms.count() << "ms"; + } + + inline std::ostream& operator<<(std::ostream& os, const duration& dur) { + return os << dur.count() << "s"; + } + + // Provide utc_clock as alias to system_clock for C++17 + using utc_clock = system_clock; + } + + // Simple format replacement for C++17 + template + inline std::string format(const std::string& fmt, Args&&... args) { + std::ostringstream oss; + // For time formatting, just return a simple ISO-like format + if (fmt.find(":%Y-%m-%dT%H:%M:%S") != std::string::npos) { + auto now = std::chrono::system_clock::now(); + auto time_t = std::chrono::system_clock::to_time_t(now); + +#ifdef _WIN32 + struct tm tm_buf; + gmtime_s(&tm_buf, &time_t); + oss << std::put_time(&tm_buf, "%Y-%m-%dT%H:%M:%SZ"); +#else + oss << std::put_time(std::gmtime(&time_t), "%Y-%m-%dT%H:%M:%SZ"); +#endif + } else { + oss << "formatted_output"; + } + return oss.str(); + } + } + +#endif + +#include // IWYU pragma: keep diff --git a/tools/gen_casetab/CMakeLists.txt b/tools/gen_casetab/CMakeLists.txt index 5e9d84a..39884e0 100644 --- a/tools/gen_casetab/CMakeLists.txt +++ b/tools/gen_casetab/CMakeLists.txt @@ -1,2 +1,18 @@ -add_executable(gen_casetab gen_casetab.cpp) -set_property(TARGET gen_casetab PROPERTY CXX_STANDARD 20) \ No newline at end of file +add_executable(build_gen_casetab gen_casetab.cpp) +set_property(TARGET build_gen_casetab PROPERTY CXX_STANDARD 20) + +# Custom command to generate the header files +add_custom_command( + OUTPUT ${PROJECT_SOURCE_DIR}/include/uppertab.h ${PROJECT_SOURCE_DIR}/include/lowertab.h + COMMAND $ ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt ${PROJECT_SOURCE_DIR}/include + MAIN_DEPENDENCY ${PROJECT_SOURCE_DIR}/data/UnicodeData.txt + DEPENDS build_gen_casetab + VERBATIM + COMMENT "Generating Unicode case conversion tables" +) + +# Custom target that depends on the generated files +add_custom_target(run_gen_casetab DEPENDS + ${PROJECT_SOURCE_DIR}/include/uppertab.h + ${PROJECT_SOURCE_DIR}/include/lowertab.h +) diff --git a/tools/gen_casetab/gen_casetab.cpp b/tools/gen_casetab/gen_casetab.cpp index a78340f..342fa75 100644 --- a/tools/gen_casetab/gen_casetab.cpp +++ b/tools/gen_casetab/gen_casetab.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +// #include #include using namespace std; diff --git a/utf8.code-workspace b/utf8.code-workspace new file mode 100644 index 0000000..a4005d4 --- /dev/null +++ b/utf8.code-workspace @@ -0,0 +1,49 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "files.watcherExclude": { + "build/**": true, + "**/.git/**": true, + }, + "files.insertFinalNewline": true, + "clangd.arguments": [ + // "--compile-commands-dir=${workspaceFolder}" + ], + "cmake.configureOnOpen": true, + "cmake.configureOnEdit": true, + "cmake.buildBeforeRun": true, + "cmake.launchBehavior": "breakAndReuseTerminal", + "lldb.verboseLogging": true, + "lldb.useNativePDBReader": true, + "cmake.copyCompileCommands": "${workspaceFolder}/compile_commands.json" + }, + "extensions": { + // See http://go.microsoft.com/fwlink/?LinkId=827846 + // for the documentation about the extensions.json format + "recommendations": [ + "streetsidesoftware.code-spell-checker", + "streetsidesoftware.code-spell-checker-scientific-terms", + "streetsidesoftware.code-spell-checker-win32", + "ltex-plus.vscode-ltex-plus", + "vadimcn.vscode-lldb" + ] + }, + "launch": { + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Launch", + "program": "${command:cmake.launchTargetPath}", + "args": [], + "cwd": "${command:cmake.getLaunchTargetDirectory}", + "console": "integratedTerminal" + } + ] + }, +}