From 0fd27eece1d27b6218d61bb79a8557dffacfdd74 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 3 Apr 2025 13:08:09 +0700 Subject: [PATCH 1/7] Make shallow copy of data_ --- quasardb/numpy/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/quasardb/numpy/__init__.py b/quasardb/numpy/__init__.py index 29aa9ec0..d40e35c5 100644 --- a/quasardb/numpy/__init__.py +++ b/quasardb/numpy/__init__.py @@ -787,6 +787,12 @@ def write_arrays( assert len(dtype) is len(cinfos) if index is None and isinstance(data_, dict) and "$timestamp" in data_: + # Create shallow copy of `data_` so that we don't modify the reference, i.e. + # delete keys. + # + # This ensures that the user can call the same function multiple times without + # side-effects. + data_ = data_.copy() index_ = data_.pop("$timestamp") assert "$timestamp" not in data_ elif index is not None: From 2b2c2f131d9376aa870f4be2bc8150e60a643bac Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Thu, 3 Apr 2025 13:11:41 +0700 Subject: [PATCH 2/7] Fix formatting --- quasardb/numpy/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quasardb/numpy/__init__.py b/quasardb/numpy/__init__.py index d40e35c5..f86453d0 100644 --- a/quasardb/numpy/__init__.py +++ b/quasardb/numpy/__init__.py @@ -792,7 +792,7 @@ def write_arrays( # # This ensures that the user can call the same function multiple times without # side-effects. - data_ = data_.copy() + data_ = data_.copy() index_ = data_.pop("$timestamp") assert "$timestamp" not in data_ elif index is not None: From b3b967326004a52d726b2e2e879877ad0c04137e Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 7 Apr 2025 09:40:18 +0700 Subject: [PATCH 3/7] Use proper env-based shell --- scripts/teamcity/20.test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/teamcity/20.test.sh b/scripts/teamcity/20.test.sh index a3b052f2..c3d5d2aa 100644 --- a/scripts/teamcity/20.test.sh +++ b/scripts/teamcity/20.test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash SCRIPT_DIR="$(cd "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null && pwd)" From 0ff0bd80f6491ee2bb535ec40643940428a0627e Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 7 Apr 2025 09:43:29 +0700 Subject: [PATCH 4/7] Add test case that tests we can use `$timestamp` as dict key --- tests/test_numpy.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/test_numpy.py b/tests/test_numpy.py index c3c2f761..78a1a134 100644 --- a/tests/test_numpy.py +++ b/tests/test_numpy.py @@ -141,6 +141,30 @@ def test_arrays_read_write_data_as_dict(array_with_index_and_table, qdbd_connect assert_indexed_arrays_equal((index, data), res) +@conftest.override_cdtypes("native") +def test_provide_index_as_dict(array_with_index_and_table, qdbd_connection): + """ + * qdbnp.write_arrays() + * => pinned writer + * => no conversion + """ + (ctype, dtype, data, index, table) = array_with_index_and_table + + col = table.column_id_by_index(0) + qdbnp.write_arrays( + {"$timestamp": index, col: data}, + qdbd_connection, + table, + dtype=dtype, + infer_types=False, + truncate=True, + ) + + res = qdbnp.read_array(table, col) + + assert_indexed_arrays_equal((index, data), res) + + ###### # # Arrays tests From 51b6de94953645bea68644567ab3b7fedf519843 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 7 Apr 2025 09:51:48 +0700 Subject: [PATCH 5/7] Fix comment, cleanup code --- tests/test_numpy.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_numpy.py b/tests/test_numpy.py index 78a1a134..69dd3c70 100644 --- a/tests/test_numpy.py +++ b/tests/test_numpy.py @@ -144,15 +144,16 @@ def test_arrays_read_write_data_as_dict(array_with_index_and_table, qdbd_connect @conftest.override_cdtypes("native") def test_provide_index_as_dict(array_with_index_and_table, qdbd_connection): """ - * qdbnp.write_arrays() - * => pinned writer - * => no conversion + For convenience, we allow the `$timestamp` index also to provided as a dict + key. """ (ctype, dtype, data, index, table) = array_with_index_and_table col = table.column_id_by_index(0) + dict_ = {"$timestamp": index, col: data} + qdbnp.write_arrays( - {"$timestamp": index, col: data}, + dict_, qdbd_connection, table, dtype=dtype, From 6dff6b17149f96c7b3de44484edd0934e54f80af Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 7 Apr 2025 09:52:01 +0700 Subject: [PATCH 6/7] Add test case for QDB-16279 to protect against regressions --- tests/test_numpy.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test_numpy.py b/tests/test_numpy.py index 69dd3c70..ed4f8347 100644 --- a/tests/test_numpy.py +++ b/tests/test_numpy.py @@ -165,6 +165,33 @@ def test_provide_index_as_dict(array_with_index_and_table, qdbd_connection): assert_indexed_arrays_equal((index, data), res) +@conftest.override_cdtypes("native") +def test_provide_index_as_dict_has_no_side_effects_sc16279(array_with_index_and_table, qdbd_connection): + """ + In earlier versions of the API, we `pop`'ed the $timestamp from the provided dict without making a + shallow copy of the dict. This would cause re-invocations of the same function (e.g. in case of an + error) to not work, as the original dict had been modified. + + The test below has been confirmed to trigger the original bug described in QDB-16279, and was fixed + afterwards. + """ + (ctype, dtype, data, index, table) = array_with_index_and_table + + col = table.column_id_by_index(0) + + dict_ = {"$timestamp": index, col: data} + qdbnp.write_arrays( + dict_, + qdbd_connection, + table, + dtype=dtype, + infer_types=False, + truncate=True, + ) + + assert '$timestamp' in dict_ + + ###### # From 483ef20829e7cad306ddc9ec20cad8340c2f97a1 Mon Sep 17 00:00:00 2001 From: Leon Mergen Date: Mon, 7 Apr 2025 09:57:32 +0700 Subject: [PATCH 7/7] Fix formatting --- tests/test_numpy.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_numpy.py b/tests/test_numpy.py index ed4f8347..86dda992 100644 --- a/tests/test_numpy.py +++ b/tests/test_numpy.py @@ -165,8 +165,11 @@ def test_provide_index_as_dict(array_with_index_and_table, qdbd_connection): assert_indexed_arrays_equal((index, data), res) + @conftest.override_cdtypes("native") -def test_provide_index_as_dict_has_no_side_effects_sc16279(array_with_index_and_table, qdbd_connection): +def test_provide_index_as_dict_has_no_side_effects_sc16279( + array_with_index_and_table, qdbd_connection +): """ In earlier versions of the API, we `pop`'ed the $timestamp from the provided dict without making a shallow copy of the dict. This would cause re-invocations of the same function (e.g. in case of an @@ -189,8 +192,7 @@ def test_provide_index_as_dict_has_no_side_effects_sc16279(array_with_index_and_ truncate=True, ) - assert '$timestamp' in dict_ - + assert "$timestamp" in dict_ ######