diff --git a/spotfire/sbdf_helpers.c b/spotfire/sbdf_helpers.c index c2f97e2..f7ae388 100644 --- a/spotfire/sbdf_helpers.c +++ b/spotfire/sbdf_helpers.c @@ -8,33 +8,51 @@ /* Utility functions for opening FILE pointers Pythonically from ``Union[str,bytes,int]`` * (similar in behavior to Python's open() function). */ -FILE *_pathlike_to_fileptr(PyObject *file, const char* mode) { +FILE *_pathlike_to_fileptr(PyObject *file, const char *mode) { FILE *the_file = NULL; int fd; - char *filename; +#if defined(_WIN32) + wchar_t wmode[8]; +#endif PyObject *filename_obj; /* int: use the given file descriptor */ - if(PyLong_Check(file)) { + if (PyLong_Check(file)) { fd = PyObject_AsFileDescriptor(file); - if(fd == -1) return NULL; + if (fd == -1) return NULL; the_file = fdopen(fd, mode); - /* bytes: use the given file name */ - } else if(PyBytes_Check(file)) { - filename = PyBytes_AsString(file); +#if defined(_WIN32) + } else if (PyUnicode_Check(file)) { + filename_obj = PyUnicode_AsWideCharString(file, NULL); + if (!filename_obj) return NULL; + mbstowcs(wmode, mode, sizeof(wmode)/sizeof(wchar_t)); + the_file = _wfopen((wchar_t *)filename_obj, wmode); + PyMem_Free(filename_obj); + } else if (PyBytes_Check(file)) { + PyObject *unicode_obj = PyUnicode_FromEncodedObject(file, "utf-8", "strict"); + if (!unicode_obj) return NULL; + filename_obj = PyUnicode_AsWideCharString(unicode_obj, NULL); + Py_DECREF(unicode_obj); + if (!filename_obj) return NULL; + mbstowcs(wmode, mode, sizeof(wmode)/sizeof(wchar_t)); + the_file = _wfopen((wchar_t *)filename_obj, wmode); + PyMem_Free(filename_obj); +#else + } else if (PyBytes_Check(file)) { + const char *filename = PyBytes_AsString(file); the_file = fopen(filename, mode); /* unicode/str: decode the given filename as utf-8 */ - } else if(PyUnicode_Check(file)) { - if(!PyUnicode_FSConverter(file, &filename_obj)) return NULL; - filename = PyBytes_AsString(filename_obj); + } else if (PyUnicode_Check(file)) { + if (!PyUnicode_FSConverter(file, &filename_obj)) return NULL; + const char *filename = PyBytes_AsString(filename_obj); the_file = fopen(filename, mode); Py_XDECREF(filename_obj); - /* else: raise an exception */ +#endif } else { PyErr_SetString(PyExc_TypeError, "str, bytes, or integer argument expected"); } - if(the_file == NULL) { + if (the_file == NULL) { PyErr_SetFromErrno(PyExc_IOError); } return the_file; diff --git a/spotfire/test/test_sbdf.py b/spotfire/test/test_sbdf.py index b7af2fc..cdc6b26 100644 --- a/spotfire/test/test_sbdf.py +++ b/spotfire/test/test_sbdf.py @@ -1,9 +1,11 @@ """Tests for importing and exporting data to SBDF files.""" +from pathlib import Path import datetime import decimal import unittest import tempfile +import os import pkg_resources import pandas @@ -474,3 +476,21 @@ def test_image_pil(self): self.assertEqual(df2.columns[0], 'x') val = df2.at[0, "x"] self.assertEqual(val[0:8], b'\x89PNG\x0d\x0a\x1a\x0a') + + def test_export_import_unicode_path(self): + """Test export and import with a Unicode file path.""" + dataframe = pandas.DataFrame({"col": [1, 2, 3], "txt": ["a", "b", "c"]}) + with tempfile.TemporaryDirectory() as tempdir: + unicode_filename = Path(tempdir) / "日本語ファイル" / "test.sbdf" + os.makedirs(os.path.dirname(unicode_filename), exist_ok=True) + # Export to Unicode path + sbdf.export_data(dataframe, str(unicode_filename)) + + # Import from Unicode path + imported = sbdf.import_data(str(unicode_filename)) + + # Check roundtrip + pandas.testing.assert_frame_equal(imported[["col", "txt"]], dataframe, check_dtype=False) + # Check dtype of the column + self.assertEqual(dataframe["col"].dtype, "int64") + self.assertEqual(dataframe["txt"].dtype, "object")