Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions ionc/ion_writer_text.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <decNumber/decNumber.h>

#if defined(_MSC_VER)
Expand All @@ -36,6 +37,42 @@

#define LOCAL_INT_CHAR_BUFFER_LENGTH 257

// Helper function to format floating point numbers in a locale-independent way
// Always uses "." as decimal point regardless of system locale
static iERR _ion_writer_vsnprintf_double(char *buffer, size_t buffer_size, const char *format, ...)
{
va_list args;
va_start(args, format);

int snprintf_result = vsnprintf(buffer, buffer_size, format, args);

va_end(args);

// Check for formatting errors
if (snprintf_result < 0) {
return IERR_INVALID_STATE; // Formatting error
}

// Post-process: replace any comma decimal separators with periods
// Note: it is tempting to try to optimize this by only checking if a non-standard
// locale is used. However, that approach has thread-safety issues because the
// global locale could have been changed between when we check it and when it
// is used to format the float.
char *cp = buffer;
while (*cp) {
if (*cp == ',') {
*cp = '.'; // Convert comma decimal separator to period
break; // No float will contain more than one decimal point
Comment on lines +63 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will break if the provided format string specifies to provide thousands' grouping characters. For instance:

int main(int argc, char **argv) {
   setlocale(LC_ALL, "it_IT.UTF-8");
   printf("%'f\n", M_PI * 10000);
}

Prints:

% ./test-float
31.415,926536

Since _ion_writer_vsnprintf_double is static no users would be able to actually use it, but we may want to make a note in the description that any attempt to add separators to the output could interfere with the function.

Would be awesome to get the time to visit #112 and support locales.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... I had not considered that separators might be added elsewhere depending on locale. This isn't valid Ion, so in order to be correct we'd need to prevent that too.

A locale-independent implementation might be best. Maybe Ryu (or similar) would be suitable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear, it's not added by the locale. It's added by the apostrophe in the format string.

I don't think a user would be able to use the function or influence the inclusion of separators with just the locale. It would just be up to us to ensure that we don't inadvertently expose the format capability to the user, and ensure we don't use the format to add separators, without changing this function.

} else if (*cp == '.') {
// The decimal point already uses the correct character
break; // No float will contain more than one decimal point
}
cp++;
}

return IERR_OK;
}

iERR _ion_writer_text_initialize(ION_WRITER *pwriter)
{
iENTER;
Expand Down Expand Up @@ -585,7 +622,8 @@ iERR _ion_writer_text_write_double(ION_WRITER *pwriter, double value)
// the final result must match the original number."
// (https://en.wikipedia.org/wiki/Double-precision_floating-point_format)
// Leaving room for '.', '+'/'-', and 'e', we get 17 + 1 + 1 +1 = 20
sprintf(image, "%.20g", value);
// Use locale-independent formatting to ensure '.' decimal point
IONCHECK(_ion_writer_vsnprintf_double(image, sizeof(image), "%.20g", value));
assert(strlen(image) < sizeof(image));

mark = strchr(image, 'e');
Expand Down Expand Up @@ -648,7 +686,8 @@ iERR _ion_writer_text_write_double_json(ION_WRITER *pwriter, double value) {
// DBL_DIG contains the number of decimal digits that are guaranteed to be preserved
// in a text to double roundtrip without change due to rounding or overflow. We subtract
// one, since the precision is digits right of the decimal point, and DBL_DIG is total digits.
snprintf(image, sizeof(image), "%.*g", DBL_DIG - 1, value);
// Use locale-independent formatting to ensure '.' decimal point
IONCHECK(_ion_writer_vsnprintf_double(image, sizeof(image), "%.*g", DBL_DIG - 1, value));

for (mark = image; *mark == ' '; ) mark++; // strip leading spaces
IONCHECK(_ion_writer_text_append_ascii_cstr(pwriter->output, mark));
Expand Down
89 changes: 89 additions & 0 deletions test/test_ion_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "ion_event_stream.h"
#include "ion_helpers.h"
#include "ion_test_util.h"
#include <locale.h>

class WriterTest : public ::testing::Test {
protected:
Expand Down Expand Up @@ -106,3 +107,91 @@ TEST_F(WriterTest, TextWriterCloseMustFlushStream) {

ASSERT_EQ(file_size, 4);
}

TEST_F(WriterTest, TextWriterUsesCorrectDecimalPointRegardlessOfLocale) {
hWRITER writer = NULL;
ION_WRITER_OPTIONS options;
BYTE buffer[1024];
SIZE bytes_written;
iERR err = IERR_OK;
std::string output; // Declare early to avoid C++ goto issues
char original_locale_copy[256] = {0};
char locale_before_writer_copy[256] = {0};
const char* test_locale = nullptr;
char *original_locale = nullptr;
char *locale_before_writer = nullptr;
char *locale_after_writer = nullptr;

// Save original locale
original_locale = setlocale(LC_ALL, NULL);
if (original_locale && strlen(original_locale) < sizeof(original_locale_copy)) {
strcpy(original_locale_copy, original_locale);
}

// Try multiple common locales that use comma as decimal separator
const char* comma_locales[] = {
"uk_UA.UTF-8", // Ukrainian
"de_DE.UTF-8", // German
"fr_FR.UTF-8", // French
"es_ES.UTF-8", // Spanish
"it_IT.UTF-8", // Italian
"ru_RU.UTF-8", // Russian
nullptr
};

test_locale = nullptr;
for (int i = 0; comma_locales[i] != nullptr; i++) {
test_locale = setlocale(LC_ALL, comma_locales[i]);
if (test_locale) {
break; // Successfully set a locale with comma as decimal separator
}
}

// If no locale with comma as decimal separator is available, skip this test
if (!test_locale) {
GTEST_SKIP() << "No locale with comma as decimal separator available - cannot test locale independence";
}

// Store locale to verify later that the writer does not change it
locale_before_writer = setlocale(LC_ALL, NULL);
if (locale_before_writer && strlen(locale_before_writer) < sizeof(locale_before_writer_copy)) {
strcpy(locale_before_writer_copy, locale_before_writer);
}

memset(&options, 0, sizeof(ION_WRITER_OPTIONS));
options.output_as_binary = FALSE;
options.pretty_print = FALSE;

memset(buffer, 0, sizeof(buffer));
IONCHECK(ion_writer_open_buffer(&writer, buffer, sizeof(buffer), &options));
IONCHECK(ion_writer_write_float(writer, 1.5f));
IONCHECK(ion_writer_finish(writer, &bytes_written));
IONCHECK(ion_writer_close(writer));

// Verify global locale is unchanged after writer operations
locale_after_writer = setlocale(LC_ALL, NULL);
EXPECT_STREQ(locale_before_writer_copy, locale_after_writer)
<< "Global locale changed during writer operation - before: " << locale_before_writer_copy
<< ", after: " << locale_after_writer;

// Convert to string for verification
output.assign((char*)buffer, bytes_written);

// Should contain "1.5" and NOT contain any commas as decimal separators
EXPECT_NE(std::string::npos, output.find("1.5"))
<< "Expected '1.5' in output, got: " << output;
EXPECT_EQ(std::string::npos, output.find("1,5"))
<< "Found incorrect comma decimal separator in output: " << output;

writer = NULL;

fail:
// Restore original locale
if (original_locale_copy[0]) {
setlocale(LC_ALL, original_locale_copy);
}
if (writer) {
ion_writer_close(writer);
}
ASSERT_EQ(IERR_OK, err);
}