diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 61fda78..8de6d80 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -1,6 +1,35 @@ -import sys +import math +import decimal -from yyjson import Document, ReaderFlags +import pytest + +from yyjson import Document, ReaderFlags, WriterFlags + + +test_numbers = [ + 1, + 2, + -1, + -2, + 1.0, + 2.0, + -1.0, + -2.0, + 2**63, + -2**63, + -2**63-1, + 2**64, + -2**64, + 2**64+1, + 2**128, + 2**128+1, + -2**128, + -2**128-1, +] + + +def is_bignum(num): + return -2**63 <= num < 2**64 def test_big_numbers(): @@ -10,7 +39,7 @@ def test_big_numbers(): The test set is from: https://blog.trl.sn/blog/what-is-a-json-number/#python-3-8-1 """ - test_numbers = [ + test_str_numbers = [ "10", "1000000000", "10000000000000001", @@ -23,10 +52,148 @@ def test_big_numbers(): "1E+309", ] - for num in test_numbers: - deserialized = Document(num, flags=ReaderFlags.NUMBERS_AS_RAW) + for num in test_str_numbers: + deserialized = Document(num, flags=ReaderFlags.NUMBERS_AS_DECIMAL) obj = deserialized.as_obj assert str(obj) == num assert Document(obj).dumps() == num + +def test_numbers_no_flags(): + ''' + Verify expected behavior with no flags - big numbers are converted to + python floats. + ''' + for num in test_numbers: + obj = Document(num).dumps() + assert str(num) == obj + + val = Document(obj).as_obj + if is_bignum(num): + assert val == num + assert isinstance(val, (int, float)) + else: + assert isinstance(val, float) + + +def test_numbers_as_raw(): + ''' + Verify expected behavior of NUMBERS_AS_RAW - all numbers are deserialized + as python ints/floats. + ''' + for num in test_numbers: + obj = Document(num).dumps() + assert str(num) == obj + + val = Document(obj, flags=ReaderFlags.NUMBERS_AS_RAW).as_obj + assert val == num + assert not isinstance(val, decimal.Decimal) + + +def test_numbers_as_decimal(): + ''' + Verify expected behavior of NUMBERS_AS_DECIMAL - all numbers are + deserialized as python decimal.Decimals. + ''' + for num in test_numbers: + obj = Document(num).dumps() + assert str(num) == obj + + val = Document(obj, flags=ReaderFlags.NUMBERS_AS_DECIMAL).as_obj + assert val == num + assert isinstance(val, decimal.Decimal) + + +def test_big_numbers_as_raw(): + ''' + Verify expected behavior of BIGNUM_AS_RAW - numbers GE 2**64 and LT -2**63 + are deserialized as python ints/floats. + ''' + for num in test_numbers: + obj = Document(num).dumps() + assert str(num) == obj + + val = Document(obj, flags=ReaderFlags.BIGNUM_AS_RAW).as_obj + assert val == num + assert not isinstance(val, decimal.Decimal) + + +def test_big_numbers_as_decimal(): + ''' + Verify expected behavior of BIGNUM_AS_DECIMAL - numbers GE 2**64 and LT + -2**63 are deserialized as python decimal.Decimals. + ''' + for num in test_numbers: + obj = Document(num).dumps() + assert str(num) == obj + + val = Document(obj, flags=ReaderFlags.BIG_NUMBERS_AS_DECIMAL).as_obj + assert val == num + if is_bignum(num): + assert not isinstance(val, decimal.Decimal) + else: + assert isinstance(val, decimal.Decimal) + + +def test_float_inf_nan(): + ''' + Verify expected behavior of deserializing Infinity, -Infinity, and NaN + special values. + ''' + inf = float('inf') + ninf = float('-inf') + nan = float('nan') + + mesg = 'nan or inf number is not allowed' + with pytest.raises(ValueError) as exc: + Document([inf]).dumps() + assert exc.type is ValueError + assert exc.value.args[0] == mesg + + mesg = 'nan or inf number is not allowed' + with pytest.raises(ValueError) as exc: + Document([ninf]).dumps() + assert exc.type is ValueError + assert exc.value.args[0] == mesg + + with pytest.raises(ValueError) as exc: + Document([nan]).dumps() + assert exc.type is ValueError + assert exc.value.args[0] == mesg + + obj = Document([inf, ninf, nan]).dumps(flags=WriterFlags.ALLOW_INF_AND_NAN) + assert obj == '[Infinity,-Infinity,NaN]' + + mesg = 'unexpected character, expected a valid JSON value' + with pytest.raises(ValueError) as exc: + Document(obj).as_obj + assert exc.type is ValueError + assert exc.value.args[0] == mesg + + for flags in ( + ReaderFlags.ALLOW_INF_AND_NAN, + ReaderFlags.ALLOW_INF_AND_NAN | ReaderFlags.BIGNUM_AS_RAW, + ReaderFlags.ALLOW_INF_AND_NAN | ReaderFlags.NUMBERS_AS_RAW, + ReaderFlags.ALLOW_INF_AND_NAN | ReaderFlags.BIG_NUMBERS_AS_DECIMAL, + ): + val = Document(obj, flags=flags).as_obj + assert isinstance(val, list) + assert len(val) == 3 + assert isinstance(val[0], float) + assert val[0] == inf + assert isinstance(val[1], float) + assert val[1] == ninf + assert isinstance(val[2], float) + assert math.isnan(val[2]) + + flags = ReaderFlags.ALLOW_INF_AND_NAN | ReaderFlags.NUMBERS_AS_DECIMAL + val = Document(obj, flags=flags).as_obj + assert isinstance(val, list) + assert len(val) == 3 + assert isinstance(val[0], decimal.Decimal) + assert val[0] == decimal.Decimal(inf) + assert isinstance(val[1], decimal.Decimal) + assert val[1] == decimal.Decimal(ninf) + assert isinstance(val[2], decimal.Decimal) + assert math.isnan(val[2]) diff --git a/yyjson/__init__.py b/yyjson/__init__.py index aa8bed3..3db9c46 100644 --- a/yyjson/__init__.py +++ b/yyjson/__init__.py @@ -4,6 +4,7 @@ from cyyjson import Document +READER_RAW_AS_DECIMAL = 0x100 class ReaderFlags(enum.IntFlag): """ @@ -23,17 +24,17 @@ class ReaderFlags(enum.IntFlag): #: Allow inf/nan number and literal, case-insensitive, such as 1e999, NaN, #: inf, -Infinity ALLOW_INF_AND_NAN = 0x10 - #: Alias for `NUMBERS_AS_DECIMAL`. + #: Read all numbers as Python long integers. NUMBERS_AS_RAW = 0x20 #: Read all numbers as Decimal objects instead of native types. This option #: is useful for preserving the exact precision of numbers or for handling #: numbers that are too large to fit in a native type. - NUMBERS_AS_DECIMAL = 0x20 - #: Alias for `BIG_NUMBERS_AS_DECIMAL`. + NUMBERS_AS_DECIMAL = 0x20 | READER_RAW_AS_DECIMAL + #: Read big numbers as Python long integers. BIGNUM_AS_RAW = 0x80 #: Like `NUMBERS_AS_DECIMAL`, but only for numbers that are too large to #: fit in a native type. - BIG_NUMBERS_AS_DECIMAL = 0x80 + BIG_NUMBERS_AS_DECIMAL = 0x80 | READER_RAW_AS_DECIMAL class WriterFlags(enum.IntFlag): diff --git a/yyjson/document.c b/yyjson/document.c index 69c6c5f..f582e2c 100644 --- a/yyjson/document.c +++ b/yyjson/document.c @@ -10,8 +10,8 @@ self->i_doc = NULL; \ } -static PyObject *mut_element_to_primitive(yyjson_mut_val *val); -static PyObject *element_to_primitive(yyjson_val *val); +static PyObject *mut_element_to_primitive(yyjson_mut_val *val, bool raw_as_decimal); +static PyObject *element_to_primitive(yyjson_val *val, bool raw_as_decimal); static PyObject *pathlib = NULL; static PyObject *path = NULL; @@ -55,11 +55,28 @@ static inline PyObject *unicode_from_str(const char *src, size_t len) { return PyUnicode_DecodeUTF8(src, len, NULL); } +static bool is_float(const char * str, size_t str_len) { + size_t ii = 0; + + for (ii = 0; ii < str_len; ii++) { + switch (str[ii]) { + case 'e': + case 'E': + case 'n': + case 'N': + case '.': + return true; + } + } + + return false; +} + /** * Recursively convert the given value into an equivalent high-level Python * object. **/ -static PyObject *element_to_primitive(yyjson_val *val) { +static PyObject *element_to_primitive(yyjson_val *val, bool raw_as_decimal) { yyjson_type type = yyjson_get_type(val); switch (type) { @@ -100,7 +117,7 @@ static PyObject *element_to_primitive(yyjson_val *val) { size_t idx = 0; while ((obj_val = yyjson_arr_iter_next(&iter))) { - py_val = element_to_primitive(obj_val); + py_val = element_to_primitive(obj_val, raw_as_decimal); if (!py_val) { return NULL; } @@ -131,7 +148,7 @@ static PyObject *element_to_primitive(yyjson_val *val) { str = yyjson_get_str(obj_key); py_key = unicode_from_str(str, str_len); - py_val = element_to_primitive(obj_val); + py_val = element_to_primitive(obj_val, raw_as_decimal); if (!py_key) { return NULL; @@ -152,12 +169,28 @@ static PyObject *element_to_primitive(yyjson_val *val) { return dict; } case YYJSON_TYPE_RAW: { - size_t str_len = yyjson_get_len(val); - const char *str = yyjson_get_raw(val); - PyObject *uni = unicode_from_str(str, str_len); - PyObject *result = PyObject_CallOneArg(YY_DecimalClass, uni); - Py_DECREF(uni); - return result; + if (raw_as_decimal) { + size_t str_len = yyjson_get_len(val); + const char *str = yyjson_get_raw(val); + PyObject *uni = unicode_from_str(str, str_len); + PyObject *result = PyObject_CallOneArg(YY_DecimalClass, uni); + Py_DECREF(uni); + return result; + + } else { + size_t str_len = yyjson_get_len(val); + const char * str = yyjson_get_raw(val); + + if (is_float(str, str_len)) { + PyObject *uni = unicode_from_str(str, str_len); + PyObject *result = PyFloat_FromString(uni); + Py_DECREF(uni); + return result; + } else { + return PyLong_FromString(str, NULL, 10); + } + + } } case YYJSON_TYPE_NONE: default: @@ -170,7 +203,7 @@ static PyObject *element_to_primitive(yyjson_val *val) { * Recursively convert the given value into an equivalent high-level Python * object. **/ -static PyObject *mut_element_to_primitive(yyjson_mut_val *val) { +static PyObject *mut_element_to_primitive(yyjson_mut_val *val, bool raw_as_decimal) { yyjson_type type = yyjson_mut_get_type(val); switch (type) { @@ -212,7 +245,7 @@ static PyObject *mut_element_to_primitive(yyjson_mut_val *val) { size_t idx = 0; while ((obj_val = yyjson_mut_arr_iter_next(&iter))) { - py_val = mut_element_to_primitive(obj_val); + py_val = mut_element_to_primitive(obj_val, raw_as_decimal); if (!py_val) { return NULL; } @@ -237,8 +270,8 @@ static PyObject *mut_element_to_primitive(yyjson_mut_val *val) { while ((obj_key = yyjson_mut_obj_iter_next(&iter))) { obj_val = yyjson_mut_obj_iter_get_val(obj_key); - py_key = mut_element_to_primitive(obj_key); - py_val = mut_element_to_primitive(obj_val); + py_key = mut_element_to_primitive(obj_key, raw_as_decimal); + py_val = mut_element_to_primitive(obj_val, raw_as_decimal); if (!py_key) { return NULL; @@ -259,12 +292,28 @@ static PyObject *mut_element_to_primitive(yyjson_mut_val *val) { return dict; } case YYJSON_TYPE_RAW: { - size_t str_len = yyjson_mut_get_len(val); - const char *str = yyjson_mut_get_raw(val); - PyObject *uni = unicode_from_str(str, str_len); - PyObject *result = PyObject_CallOneArg(YY_DecimalClass, uni); - Py_DECREF(uni); - return result; + if (raw_as_decimal) { + size_t str_len = yyjson_mut_get_len(val); + const char *str = yyjson_mut_get_raw(val); + PyObject *uni = unicode_from_str(str, str_len); + PyObject *result = PyObject_CallOneArg(YY_DecimalClass, uni); + Py_DECREF(uni); + return result; + + } else { + size_t str_len = yyjson_mut_get_len(val); + const char * str = yyjson_mut_get_raw(val); + + if (is_float(str, str_len)) { + PyObject *uni = unicode_from_str(str, str_len); + PyObject *result = PyFloat_FromString(uni); + Py_DECREF(uni); + return result; + } else { + return PyLong_FromString(str, NULL, 10); + } + + } } case YYJSON_TYPE_NONE: default: @@ -476,6 +525,10 @@ static int Document_init(DocumentObject *self, PyObject *args, PyObject *kwds) { return -1; } +#define READER_RAW_AS_DECIMAL 0x100 + self->raw_as_decimal = (r_flag & READER_RAW_AS_DECIMAL) != 0; + r_flag = r_flag ^ READER_RAW_AS_DECIMAL; + if (default_func && default_func != Py_None && !PyCallable_Check(default_func)) { PyErr_SetString(PyExc_TypeError, "default must be callable"); return -1; @@ -584,9 +637,9 @@ static int Document_init(DocumentObject *self, PyObject *args, PyObject *kwds) { */ static PyObject *Document_as_obj(DocumentObject *self, void *closure) { if (self->i_doc) { - return element_to_primitive(yyjson_doc_get_root(self->i_doc)); + return element_to_primitive(yyjson_doc_get_root(self->i_doc), self->raw_as_decimal); } else { - return mut_element_to_primitive(yyjson_mut_doc_get_root(self->m_doc)); + return mut_element_to_primitive(yyjson_mut_doc_get_root(self->m_doc), self->raw_as_decimal); } } @@ -724,7 +777,7 @@ static PyObject *Document_get_pointer(DocumentObject *self, PyObject *args) { return NULL; } - return element_to_primitive(result); + return element_to_primitive(result, self->raw_as_decimal); } else { yyjson_mut_val *result = yyjson_mut_doc_ptr_getx(self->m_doc, pointer, pointer_len, NULL, &err); @@ -736,7 +789,7 @@ static PyObject *Document_get_pointer(DocumentObject *self, PyObject *args) { return NULL; } - return mut_element_to_primitive(result); + return mut_element_to_primitive(result, self->raw_as_decimal); } } diff --git a/yyjson/document.h b/yyjson/document.h index c260b26..6ccddeb 100644 --- a/yyjson/document.h +++ b/yyjson/document.h @@ -11,8 +11,10 @@ */ typedef struct { PyObject_HEAD - /** A mutable document. */ - yyjson_mut_doc* m_doc; + /** numbers and bignums as decimal? */ + bool raw_as_decimal; + /** A mutable document. */ + yyjson_mut_doc* m_doc; /** An immutable document. */ yyjson_doc* i_doc; /** The memory allocator in use for this document. */ @@ -23,4 +25,4 @@ typedef struct { extern PyTypeObject DocumentType; -#endif \ No newline at end of file +#endif