From bd09c31119ef04e4edaf234fdc3a22b3cae1e8bb Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 1 Sep 2025 12:01:53 +0800 Subject: [PATCH 1/8] [CPP/C] implement default encoding/compression configuration interface --- cpp/src/common/global.h | 68 ++++++++------- .../table_view/tsfile_writer_table_test.cc | 85 +++++++++++++++++++ 2 files changed, 122 insertions(+), 31 deletions(-) diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index 50ca8c8a8..e9fc98329 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -58,51 +58,53 @@ FORCE_INLINE int set_global_time_compression(uint8_t compression) { } FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { - TSDataType dtype = static_cast(data_type); + const TSDataType dtype = static_cast(data_type); + const TSEncoding encoding_type = static_cast(encoding); + + // Validate input parameters ASSERT(dtype >= BOOLEAN && dtype <= STRING); - TSEncoding encoding_type = static_cast(encoding); - ASSERT(encoding >= PLAIN && encoding <= FREQ); + ASSERT(encoding >= PLAIN && encoding <= SPRINTZ); + + // Check encoding support for each data type switch (dtype) { case BOOLEAN: - if (encoding_type != PLAIN) { - return E_NOT_SUPPORT; - } + if (encoding_type != PLAIN) return E_NOT_SUPPORT; g_config_value_.boolean_encoding_type_ = encoding_type; break; + case INT32: - if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { - return E_NOT_SUPPORT; - } - g_config_value_.int32_encoding_type_ = encoding_type; - break; case INT64: - if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { - return E_NOT_SUPPORT; - } - g_config_value_.int64_encoding_type_ = encoding_type; - break; - case STRING: - if (encoding_type != PLAIN) { + if (encoding_type != PLAIN && + encoding_type != TS_2DIFF && + encoding_type != GORILLA && + encoding_type != ZIGZAG && + encoding_type != RLE && + encoding_type != SPRINTZ) { return E_NOT_SUPPORT; - } - g_config_value_.string_encoding_type_ = encoding_type; + } + dtype == INT32 ? g_config_value_.int32_encoding_type_ = encoding_type + : g_config_value_.int64_encoding_type_ = encoding_type; break; + case FLOAT: - if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { + case DOUBLE: + if (encoding_type != PLAIN && + encoding_type != TS_2DIFF && + encoding_type != GORILLA && + encoding_type != SPRINTZ) { return E_NOT_SUPPORT; - } - g_config_value_.float_encoding_type_ = encoding_type; + } + dtype == FLOAT ? g_config_value_.float_encoding_type_ = encoding_type + : g_config_value_.double_encoding_type_ = encoding_type; break; - case DOUBLE: - if (encoding_type != PLAIN && encoding_type != TS_2DIFF && - encoding_type != GORILLA) { + + case STRING: + if (encoding_type != PLAIN && encoding_type != DICTIONARY) { return E_NOT_SUPPORT; } - g_config_value_.double_encoding_type_ = encoding_type; + g_config_value_.string_encoding_type_ = encoding_type; break; + default: break; } @@ -111,7 +113,11 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { FORCE_INLINE int set_global_compression(uint8_t compression) { ASSERT(compression >= UNCOMPRESSED && compression <= LZ4); - if (compression != UNCOMPRESSED && compression != LZ4) { + if (compression != UNCOMPRESSED && + compression != SNAPPY && + compression != GZIP && + compression != LZO && + compression != LZ4) { return E_NOT_SUPPORT; } g_config_value_.default_compression_type_ = diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc b/cpp/test/writer/table_view/tsfile_writer_table_test.cc index 2bc9fd9af..2ef08468d 100644 --- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc +++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc @@ -987,3 +987,88 @@ TEST_F(TsFileWriterTableTest, DiffCodecTypes) { ASSERT_EQ(reader.close(), common::E_OK); delete[] literal; } + +TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { + // 1. Test setting global compression type + ASSERT_EQ(E_OK, set_global_compression(SNAPPY)); + + // 2. Test setting encoding types for different data types + ASSERT_EQ(E_OK, set_datatype_encoding(INT32, SPRINTZ)); + ASSERT_EQ(E_OK, set_datatype_encoding(INT64, TS_2DIFF)); + ASSERT_EQ(E_OK, set_datatype_encoding(FLOAT, GORILLA)); + ASSERT_EQ(E_OK, set_datatype_encoding(DOUBLE, GORILLA)); + ASSERT_EQ(E_OK, set_datatype_encoding(STRING, DICTIONARY)); + + // 3. Create schema using these configurations + std::vector measurement_schemas; + std::vector column_categories; + + std::vector measurement_names = { + "int32_sprintz", "int64_ts2diff", "float_gorilla", + "double_gorilla", "string_dict" + }; + + std::vector data_types = { + INT32, INT64, FLOAT, DOUBLE, STRING + }; + + std::vector encodings = { + SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY + }; + + // Create measurement schemas with configured encodings and compression + for (int i = 0; i < measurement_names.size(); i++) { + measurement_schemas.emplace_back(new MeasurementSchema( + measurement_names[i], data_types[i], encodings[i], SNAPPY)); + column_categories.emplace_back(ColumnCategory::FIELD); + } + + // 4. Write and verify data + auto table_schema = new TableSchema("configTestTable", measurement_schemas, column_categories); + auto tsfile_table_writer = std::make_shared(&write_file_, table_schema); + + // Create test data tablet + Tablet tablet(table_schema->get_measurement_names(), table_schema->get_data_types(), 10); + char* literal = new char[std::strlen("test_str") + 1]; + std::strcpy(literal, "test_str"); + String literal_str(literal, std::strlen("test_str")); + + // Fill tablet with test values + for (int i = 0; i < 10; i++) { + tablet.add_timestamp(i, static_cast(i)); + tablet.add_value(i, 0, (int32_t)32); // INT32 with SPRINTZ encoding + tablet.add_value(i, 1, (int64_t)64); // INT64 with TS_2DIFF encoding + tablet.add_value(i, 2, (float)1.0); // FLOAT with GORILLA encoding + tablet.add_value(i, 3, (double)2.0); // DOUBLE with GORILLA encoding + tablet.add_value(i, 4, literal_str); // STRING with DICTIONARY encoding + } + + // Write and flush data + ASSERT_EQ(tsfile_table_writer->write_table(tablet), E_OK); + ASSERT_EQ(tsfile_table_writer->flush(), E_OK); + ASSERT_EQ(tsfile_table_writer->close(), E_OK); + + // 5. Verify read data matches what was written + auto reader = TsFileReader(); + reader.open(write_file_.get_file_path()); + ResultSet* ret = nullptr; + int ret_value = reader.query("configTestTable", measurement_names, 0, 10, ret); + ASSERT_EQ(common::E_OK, ret_value); + + auto table_result_set = (TableResultSet*)ret; + bool has_next = false; + while (IS_SUCC(table_result_set->next(has_next)) && has_next) { + // Verify all values were correctly encoded/decoded + ASSERT_EQ(table_result_set->get_value(2), 32); // INT32 + ASSERT_EQ(table_result_set->get_value(3), 64); // INT64 + ASSERT_FLOAT_EQ(table_result_set->get_value(4), 1.0f); // FLOAT + ASSERT_DOUBLE_EQ(table_result_set->get_value(5), 2.0); // DOUBLE + ASSERT_EQ(table_result_set->get_value(6)->compare(literal_str), 0); // STRING + } + + // 6. Clean up resources + reader.destroy_query_data_set(table_result_set); + ASSERT_EQ(reader.close(), common::E_OK); + delete[] literal; + delete table_schema; +} \ No newline at end of file From e8beec9f7227c3ab6882ae5e64ff8b8dfdec5b16 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 1 Sep 2025 12:03:58 +0800 Subject: [PATCH 2/8] [CPP/C] implement default encoding/compression configuration interface(c demo) --- cpp/examples/examples.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/examples/examples.cc b/cpp/examples/examples.cc index edbd819a0..a4daa34b1 100644 --- a/cpp/examples/examples.cc +++ b/cpp/examples/examples.cc @@ -23,6 +23,8 @@ int main() { // C++ examples // std::cout << "begin write and read tsfile by cpp" << std::endl; + common::set_global_compression(TS_COMPRESSION_LZ4); + common::set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF); demo_write(); demo_read(); std::cout << "begin write and read tsfile by c" << std::endl; From 3b3cf064426283a3421be85e3d538fa865cf404c Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 1 Sep 2025 12:24:45 +0800 Subject: [PATCH 3/8] [CPP/C] implement default encoding/compression configuration interface(c demo) --- cpp/examples/c_examples/demo_write.c | 2 ++ cpp/examples/examples.cc | 2 -- cpp/src/common/global.h | 17 ++++++++--- cpp/src/cwrapper/tsfile_cwrapper.cc | 20 +++++++++++++ cpp/src/cwrapper/tsfile_cwrapper.h | 45 ++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 6 deletions(-) diff --git a/cpp/examples/c_examples/demo_write.c b/cpp/examples/c_examples/demo_write.c index 444cbe662..927289802 100644 --- a/cpp/examples/c_examples/demo_write.c +++ b/cpp/examples/c_examples/demo_write.c @@ -26,6 +26,8 @@ // This example shows you how to write tsfile. ERRNO write_tsfile() { + set_global_compression(TS_COMPRESSION_LZ4); + set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF); ERRNO code = 0; char* table_name = "table1"; diff --git a/cpp/examples/examples.cc b/cpp/examples/examples.cc index a4daa34b1..edbd819a0 100644 --- a/cpp/examples/examples.cc +++ b/cpp/examples/examples.cc @@ -23,8 +23,6 @@ int main() { // C++ examples // std::cout << "begin write and read tsfile by cpp" << std::endl; - common::set_global_compression(TS_COMPRESSION_LZ4); - common::set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF); demo_write(); demo_read(); std::cout << "begin write and read tsfile by c" << std::endl; diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index e9fc98329..41561e88b 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -40,18 +40,27 @@ FORCE_INLINE int set_global_time_data_type(uint8_t data_type) { FORCE_INLINE int set_global_time_encoding(uint8_t encoding) { ASSERT(encoding >= PLAIN && encoding <= FREQ); - if (encoding != TS_2DIFF && encoding != PLAIN) { + if (encoding != TS_2DIFF && + encoding != PLAIN && + encoding != GORILLA && + encoding != ZIGZAG && + encoding != RLE && + encoding != SPRINTZ) { return E_NOT_SUPPORT; - } + } g_config_value_.time_encoding_type_ = static_cast(encoding); return E_OK; } FORCE_INLINE int set_global_time_compression(uint8_t compression) { ASSERT(compression >= UNCOMPRESSED && compression <= LZ4); - if (compression != UNCOMPRESSED && compression != LZ4) { + if (compression != UNCOMPRESSED && + compression != SNAPPY && + compression != GZIP && + compression != LZO && + compression != LZ4) { return E_NOT_SUPPORT; - } + } g_config_value_.time_compress_type_ = static_cast(compression); return E_OK; diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 7b09f26d7..3d082fa4b 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -42,6 +42,26 @@ void init_tsfile_config() { } } +int set_global_time_data_type(uint8_t data_type) { + return common::set_global_time_data_type(data_type); +} + +int set_global_time_encoding(uint8_t encoding) { + return common::set_global_time_encoding(encoding); +} + +int set_global_time_compression(uint8_t compression) { + return common::set_global_time_compression(compression); +} + +int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { + return common::set_datatype_encoding(data_type, encoding); +} + +int set_global_compression(uint8_t compression) { + return common::set_global_compression(compression); +} + WriteFile write_file_new(const char *pathname, ERRNO *err_code) { int ret; init_tsfile_config(); diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 1f651f5d1..454ec47ec 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -119,6 +119,51 @@ typedef void* ResultSet; typedef int32_t ERRNO; typedef int64_t Timestamp; +/** + * @brief Set the global time column data type + * @param data_type Time column data type (must be INT64) + * @return E_OK if success, E_NOT_SUPPORT if data_type is not INT64 + * @note Only INT64 is supported for time column data type + */ +int set_global_time_data_type(uint8_t data_type); + +/** + * @brief Set the global time column encoding type + * @param encoding Time column encoding type (must be TS_2DIFF or PLAIN) + * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported + * @note Supported encodings: TS_2DIFF, PLAIN + */ +int set_global_time_encoding(uint8_t encoding); + +/** + * @brief Set the global time column compression type + * @param compression Time column compression type (must be UNCOMPRESSED or LZ4) + * @return E_OK if success, E_NOT_SUPPORT if compression is not supported + * @note Supported compressions: UNCOMPRESSED, LZ4 + */ +int set_global_time_compression(uint8_t compression); + +/** + * @brief Set encoding type for specific data type + * @param data_type The data type to configure + * @param encoding The encoding type to set + * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported for the data type + * @note Supported encodings per data type: + * - BOOLEAN: PLAIN only + * - INT32/INT64: PLAIN, TS_2DIFF, GORILLA, ZIGZAG, RLE, SPRINTZ + * - FLOAT/DOUBLE: PLAIN, TS_2DIFF, GORILLA, SPRINTZ + * - STRING: PLAIN, DICTIONARY + */ +int set_datatype_encoding(uint8_t data_type, uint8_t encoding); + +/** + * @brief Set the global default compression type + * @param compression Compression type to set + * @return E_OK if success, E_NOT_SUPPORT if compression is not supported + * @note Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4 + */ +int set_global_compression(uint8_t compression); + /*--------------------------TsFile Reader and Writer------------------------ */ /** From 9f9b6952322907959dcd72bad7e747758955016f Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 1 Sep 2025 12:30:57 +0800 Subject: [PATCH 4/8] [CPP/C] implement default encoding/compression configuration interface(c demo) --- cpp/src/common/global.h | 2 ++ .../table_view/tsfile_writer_table_test.cc | 36 +++++++++++++++---- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index 41561e88b..b200df109 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -82,6 +82,7 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { break; case INT32: + case DATE: case INT64: if (encoding_type != PLAIN && encoding_type != TS_2DIFF && @@ -108,6 +109,7 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { break; case STRING: + case TEXT: if (encoding_type != PLAIN && encoding_type != DICTIONARY) { return E_NOT_SUPPORT; } diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc b/cpp/test/writer/table_view/tsfile_writer_table_test.cc index 2ef08468d..bdf27b70c 100644 --- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc +++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc @@ -998,6 +998,8 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { ASSERT_EQ(E_OK, set_datatype_encoding(FLOAT, GORILLA)); ASSERT_EQ(E_OK, set_datatype_encoding(DOUBLE, GORILLA)); ASSERT_EQ(E_OK, set_datatype_encoding(STRING, DICTIONARY)); + ASSERT_EQ(E_OK, set_datatype_encoding(DATE, PLAIN)); // Added DATE support + ASSERT_EQ(E_OK, set_datatype_encoding(TEXT, DICTIONARY)); // Added TEXT support // 3. Create schema using these configurations std::vector measurement_schemas; @@ -1005,15 +1007,15 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { std::vector measurement_names = { "int32_sprintz", "int64_ts2diff", "float_gorilla", - "double_gorilla", "string_dict" + "double_gorilla", "string_dict", "date_plain", "text_dict" }; std::vector data_types = { - INT32, INT64, FLOAT, DOUBLE, STRING + INT32, INT64, FLOAT, DOUBLE, STRING, DATE, TEXT }; std::vector encodings = { - SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY + SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY, PLAIN, DICTIONARY }; // Create measurement schemas with configured encodings and compression @@ -1033,6 +1035,17 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { std::strcpy(literal, "test_str"); String literal_str(literal, std::strlen("test_str")); + // Prepare DATE and TEXT values + std::time_t now = std::time(nullptr); + std::tm *local_time = std::localtime(&now); + std::tm today = {}; + today.tm_year = local_time->tm_year; + today.tm_mon = local_time->tm_mon; + today.tm_mday = local_time->tm_mday; + char* text_literal = new char[std::strlen("sample_text") + 1]; + std::strcpy(text_literal, "sample_text"); + String text_str(text_literal, std::strlen("sample_text")); + // Fill tablet with test values for (int i = 0; i < 10; i++) { tablet.add_timestamp(i, static_cast(i)); @@ -1041,6 +1054,8 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { tablet.add_value(i, 2, (float)1.0); // FLOAT with GORILLA encoding tablet.add_value(i, 3, (double)2.0); // DOUBLE with GORILLA encoding tablet.add_value(i, 4, literal_str); // STRING with DICTIONARY encoding + tablet.add_value(i, 5, today); // DATE with PLAIN encoding (added) + tablet.add_value(i, 6, text_str); // TEXT with DICTIONARY encoding (added) } // Write and flush data @@ -1059,16 +1074,25 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { bool has_next = false; while (IS_SUCC(table_result_set->next(has_next)) && has_next) { // Verify all values were correctly encoded/decoded - ASSERT_EQ(table_result_set->get_value(2), 32); // INT32 - ASSERT_EQ(table_result_set->get_value(3), 64); // INT64 + ASSERT_EQ(table_result_set->get_value(2), 32); // INT32 + ASSERT_EQ(table_result_set->get_value(3), 64); // INT64 ASSERT_FLOAT_EQ(table_result_set->get_value(4), 1.0f); // FLOAT ASSERT_DOUBLE_EQ(table_result_set->get_value(5), 2.0); // DOUBLE - ASSERT_EQ(table_result_set->get_value(6)->compare(literal_str), 0); // STRING + ASSERT_EQ( + table_result_set->get_value(6)->compare(literal_str + ), 0); // STRING + ASSERT_TRUE( + DateConverter::is_tm_ymd_equal(table_result_set->get_value( + 7), today)); + ASSERT_EQ( + table_result_set->get_value(8)->compare(text_str), + 0); // TEXT (added) } // 6. Clean up resources reader.destroy_query_data_set(table_result_set); ASSERT_EQ(reader.close(), common::E_OK); delete[] literal; + delete[] text_literal; delete table_schema; } \ No newline at end of file From e6bd0909f23d0504f8aabca28e3afd13b256714d Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 1 Sep 2025 12:36:02 +0800 Subject: [PATCH 5/8] mvn spotless:apply -P with-cpp --- cpp/src/common/global.h | 55 +++++++--------- cpp/src/cwrapper/tsfile_cwrapper.h | 3 +- .../table_view/tsfile_writer_table_test.cc | 63 ++++++++++--------- 3 files changed, 56 insertions(+), 65 deletions(-) diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index b200df109..3ecb59216 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -40,27 +40,20 @@ FORCE_INLINE int set_global_time_data_type(uint8_t data_type) { FORCE_INLINE int set_global_time_encoding(uint8_t encoding) { ASSERT(encoding >= PLAIN && encoding <= FREQ); - if (encoding != TS_2DIFF && - encoding != PLAIN && - encoding != GORILLA && - encoding != ZIGZAG && - encoding != RLE && - encoding != SPRINTZ) { + if (encoding != TS_2DIFF && encoding != PLAIN && encoding != GORILLA && + encoding != ZIGZAG && encoding != RLE && encoding != SPRINTZ) { return E_NOT_SUPPORT; - } + } g_config_value_.time_encoding_type_ = static_cast(encoding); return E_OK; } FORCE_INLINE int set_global_time_compression(uint8_t compression) { ASSERT(compression >= UNCOMPRESSED && compression <= LZ4); - if (compression != UNCOMPRESSED && - compression != SNAPPY && - compression != GZIP && - compression != LZO && - compression != LZ4) { + if (compression != UNCOMPRESSED && compression != SNAPPY && + compression != GZIP && compression != LZO && compression != LZ4) { return E_NOT_SUPPORT; - } + } g_config_value_.time_compress_type_ = static_cast(compression); return E_OK; @@ -84,28 +77,25 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { case INT32: case DATE: case INT64: - if (encoding_type != PLAIN && - encoding_type != TS_2DIFF && - encoding_type != GORILLA && - encoding_type != ZIGZAG && - encoding_type != RLE && - encoding_type != SPRINTZ) { + if (encoding_type != PLAIN && encoding_type != TS_2DIFF && + encoding_type != GORILLA && encoding_type != ZIGZAG && + encoding_type != RLE && encoding_type != SPRINTZ) { return E_NOT_SUPPORT; - } - dtype == INT32 ? g_config_value_.int32_encoding_type_ = encoding_type - : g_config_value_.int64_encoding_type_ = encoding_type; + } + dtype == INT32 + ? g_config_value_.int32_encoding_type_ = encoding_type + : g_config_value_.int64_encoding_type_ = encoding_type; break; case FLOAT: case DOUBLE: - if (encoding_type != PLAIN && - encoding_type != TS_2DIFF && - encoding_type != GORILLA && - encoding_type != SPRINTZ) { + if (encoding_type != PLAIN && encoding_type != TS_2DIFF && + encoding_type != GORILLA && encoding_type != SPRINTZ) { return E_NOT_SUPPORT; - } - dtype == FLOAT ? g_config_value_.float_encoding_type_ = encoding_type - : g_config_value_.double_encoding_type_ = encoding_type; + } + dtype == FLOAT + ? g_config_value_.float_encoding_type_ = encoding_type + : g_config_value_.double_encoding_type_ = encoding_type; break; case STRING: @@ -124,11 +114,8 @@ FORCE_INLINE int set_datatype_encoding(uint8_t data_type, uint8_t encoding) { FORCE_INLINE int set_global_compression(uint8_t compression) { ASSERT(compression >= UNCOMPRESSED && compression <= LZ4); - if (compression != UNCOMPRESSED && - compression != SNAPPY && - compression != GZIP && - compression != LZO && - compression != LZ4) { + if (compression != UNCOMPRESSED && compression != SNAPPY && + compression != GZIP && compression != LZO && compression != LZ4) { return E_NOT_SUPPORT; } g_config_value_.default_compression_type_ = diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 454ec47ec..ae123a7ab 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -147,7 +147,8 @@ int set_global_time_compression(uint8_t compression); * @brief Set encoding type for specific data type * @param data_type The data type to configure * @param encoding The encoding type to set - * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported for the data type + * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported for the + * data type * @note Supported encodings per data type: * - BOOLEAN: PLAIN only * - INT32/INT64: PLAIN, TS_2DIFF, GORILLA, ZIGZAG, RLE, SPRINTZ diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc b/cpp/test/writer/table_view/tsfile_writer_table_test.cc index bdf27b70c..8c373a3cb 100644 --- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc +++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc @@ -998,25 +998,23 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { ASSERT_EQ(E_OK, set_datatype_encoding(FLOAT, GORILLA)); ASSERT_EQ(E_OK, set_datatype_encoding(DOUBLE, GORILLA)); ASSERT_EQ(E_OK, set_datatype_encoding(STRING, DICTIONARY)); - ASSERT_EQ(E_OK, set_datatype_encoding(DATE, PLAIN)); // Added DATE support - ASSERT_EQ(E_OK, set_datatype_encoding(TEXT, DICTIONARY)); // Added TEXT support + ASSERT_EQ(E_OK, set_datatype_encoding(DATE, PLAIN)); // Added DATE support + ASSERT_EQ(E_OK, + set_datatype_encoding(TEXT, DICTIONARY)); // Added TEXT support // 3. Create schema using these configurations std::vector measurement_schemas; std::vector column_categories; std::vector measurement_names = { - "int32_sprintz", "int64_ts2diff", "float_gorilla", - "double_gorilla", "string_dict", "date_plain", "text_dict" - }; + "int32_sprintz", "int64_ts2diff", "float_gorilla", "double_gorilla", + "string_dict", "date_plain", "text_dict"}; - std::vector data_types = { - INT32, INT64, FLOAT, DOUBLE, STRING, DATE, TEXT - }; + std::vector data_types = {INT32, INT64, FLOAT, DOUBLE, + STRING, DATE, TEXT}; std::vector encodings = { - SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY, PLAIN, DICTIONARY - }; + SPRINTZ, TS_2DIFF, GORILLA, GORILLA, DICTIONARY, PLAIN, DICTIONARY}; // Create measurement schemas with configured encodings and compression for (int i = 0; i < measurement_names.size(); i++) { @@ -1026,18 +1024,21 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { } // 4. Write and verify data - auto table_schema = new TableSchema("configTestTable", measurement_schemas, column_categories); - auto tsfile_table_writer = std::make_shared(&write_file_, table_schema); + auto table_schema = new TableSchema("configTestTable", measurement_schemas, + column_categories); + auto tsfile_table_writer = + std::make_shared(&write_file_, table_schema); // Create test data tablet - Tablet tablet(table_schema->get_measurement_names(), table_schema->get_data_types(), 10); + Tablet tablet(table_schema->get_measurement_names(), + table_schema->get_data_types(), 10); char* literal = new char[std::strlen("test_str") + 1]; std::strcpy(literal, "test_str"); String literal_str(literal, std::strlen("test_str")); // Prepare DATE and TEXT values std::time_t now = std::time(nullptr); - std::tm *local_time = std::localtime(&now); + std::tm* local_time = std::localtime(&now); std::tm today = {}; today.tm_year = local_time->tm_year; today.tm_mon = local_time->tm_mon; @@ -1049,13 +1050,14 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { // Fill tablet with test values for (int i = 0; i < 10; i++) { tablet.add_timestamp(i, static_cast(i)); - tablet.add_value(i, 0, (int32_t)32); // INT32 with SPRINTZ encoding - tablet.add_value(i, 1, (int64_t)64); // INT64 with TS_2DIFF encoding - tablet.add_value(i, 2, (float)1.0); // FLOAT with GORILLA encoding - tablet.add_value(i, 3, (double)2.0); // DOUBLE with GORILLA encoding - tablet.add_value(i, 4, literal_str); // STRING with DICTIONARY encoding - tablet.add_value(i, 5, today); // DATE with PLAIN encoding (added) - tablet.add_value(i, 6, text_str); // TEXT with DICTIONARY encoding (added) + tablet.add_value(i, 0, (int32_t)32); // INT32 with SPRINTZ encoding + tablet.add_value(i, 1, (int64_t)64); // INT64 with TS_2DIFF encoding + tablet.add_value(i, 2, (float)1.0); // FLOAT with GORILLA encoding + tablet.add_value(i, 3, (double)2.0); // DOUBLE with GORILLA encoding + tablet.add_value(i, 4, literal_str); // STRING with DICTIONARY encoding + tablet.add_value(i, 5, today); // DATE with PLAIN encoding (added) + tablet.add_value(i, 6, + text_str); // TEXT with DICTIONARY encoding (added) } // Write and flush data @@ -1067,7 +1069,8 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { auto reader = TsFileReader(); reader.open(write_file_.get_file_path()); ResultSet* ret = nullptr; - int ret_value = reader.query("configTestTable", measurement_names, 0, 10, ret); + int ret_value = + reader.query("configTestTable", measurement_names, 0, 10, ret); ASSERT_EQ(common::E_OK, ret_value); auto table_result_set = (TableResultSet*)ret; @@ -1077,16 +1080,16 @@ TEST_F(TsFileWriterTableTest, EncodingConfigIntegration) { ASSERT_EQ(table_result_set->get_value(2), 32); // INT32 ASSERT_EQ(table_result_set->get_value(3), 64); // INT64 ASSERT_FLOAT_EQ(table_result_set->get_value(4), 1.0f); // FLOAT - ASSERT_DOUBLE_EQ(table_result_set->get_value(5), 2.0); // DOUBLE - ASSERT_EQ( - table_result_set->get_value(6)->compare(literal_str - ), 0); // STRING - ASSERT_TRUE( - DateConverter::is_tm_ymd_equal(table_result_set->get_value( - 7), today)); + ASSERT_DOUBLE_EQ(table_result_set->get_value(5), + 2.0); // DOUBLE + ASSERT_EQ(table_result_set->get_value(6)->compare( + literal_str), + 0); // STRING + ASSERT_TRUE(DateConverter::is_tm_ymd_equal( + table_result_set->get_value(7), today)); ASSERT_EQ( table_result_set->get_value(8)->compare(text_str), - 0); // TEXT (added) + 0); // TEXT (added) } // 6. Clean up resources From eadafff122890afb2f1582486ad6c3a696e1b9b9 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Mon, 1 Sep 2025 16:01:58 +0800 Subject: [PATCH 6/8] [CPP/C] implement get configuration interface --- cpp/examples/c_examples/demo_write.c | 10 ++++-- cpp/src/common/global.h | 40 +++++++++++++++++++++ cpp/src/cwrapper/tsfile_cwrapper.cc | 14 ++++++-- cpp/src/cwrapper/tsfile_cwrapper.h | 53 +++++++++++++++++++++------- 4 files changed, 100 insertions(+), 17 deletions(-) diff --git a/cpp/examples/c_examples/demo_write.c b/cpp/examples/c_examples/demo_write.c index 927289802..326cfdcf9 100644 --- a/cpp/examples/c_examples/demo_write.c +++ b/cpp/examples/c_examples/demo_write.c @@ -26,9 +26,15 @@ // This example shows you how to write tsfile. ERRNO write_tsfile() { - set_global_compression(TS_COMPRESSION_LZ4); - set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF); ERRNO code = 0; + code = set_global_compression(TS_COMPRESSION_LZ4); + if (code != RET_OK) { + return code; + } + code = set_datatype_encoding(TS_DATATYPE_INT32, TS_ENCODING_TS_2DIFF); + if (code != RET_OK) { + return code; + } char* table_name = "table1"; // Create table schema to describe a table in a tsfile. diff --git a/cpp/src/common/global.h b/cpp/src/common/global.h index 3ecb59216..564f30c66 100644 --- a/cpp/src/common/global.h +++ b/cpp/src/common/global.h @@ -123,6 +123,46 @@ FORCE_INLINE int set_global_compression(uint8_t compression) { return E_OK; } +FORCE_INLINE uint8_t get_global_time_encoding() { + return static_cast(g_config_value_.time_encoding_type_); +} + +FORCE_INLINE uint8_t get_global_time_compression() { + return static_cast(g_config_value_.time_compress_type_); +} + +FORCE_INLINE uint8_t get_datatype_encoding(uint8_t data_type) { + const TSDataType dtype = static_cast(data_type); + + // Validate input parameter + ASSERT(dtype >= BOOLEAN && dtype <= STRING); + + switch (dtype) { + case BOOLEAN: + return static_cast(g_config_value_.boolean_encoding_type_); + case INT32: + return static_cast(g_config_value_.int32_encoding_type_); + case INT64: + return static_cast(g_config_value_.int64_encoding_type_); + case FLOAT: + return static_cast(g_config_value_.float_encoding_type_); + case DOUBLE: + return static_cast(g_config_value_.double_encoding_type_); + case STRING: + case TEXT: + return static_cast(g_config_value_.string_encoding_type_); + case DATE: + return static_cast(g_config_value_.int64_encoding_type_); + default: + return static_cast( + PLAIN); // Return default encoding for unknown types + } +} + +FORCE_INLINE uint8_t get_global_compression() { + return static_cast(g_config_value_.default_compression_type_); +} + extern int init_common(); extern bool is_timestamp_column_name(const char *time_col_name); extern void cols_to_json(ByteStream *byte_stream, diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 3d082fa4b..e6e15dd48 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -42,10 +42,20 @@ void init_tsfile_config() { } } -int set_global_time_data_type(uint8_t data_type) { - return common::set_global_time_data_type(data_type); +uint8_t get_global_time_encoding() { + return common::get_global_time_encoding(); } +uint8_t get_global_time_compression() { + return common::get_global_time_compression(); +} + +uint8_t get_datatype_encoding(uint8_t data_type) { + return common::get_datatype_encoding(data_type); +} + +uint8_t get_global_compression() { return common::get_global_compression(); } + int set_global_time_encoding(uint8_t encoding) { return common::set_global_time_encoding(encoding); } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index ae123a7ab..75dc03643 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -120,26 +120,53 @@ typedef int32_t ERRNO; typedef int64_t Timestamp; /** - * @brief Set the global time column data type - * @param data_type Time column data type (must be INT64) - * @return E_OK if success, E_NOT_SUPPORT if data_type is not INT64 - * @note Only INT64 is supported for time column data type + * @brief Get the encoding type for global time column + * + * @return uint8_t Time encoding type enum value (cast to uint8_t) + */ +uint8_t get_global_time_encoding(); + +/** + * @brief Get the compression type for global time column + * + * @return uint8_t Time compression type enum value (cast to uint8_t) + */ +uint8_t get_global_time_compression(); + +/** + * @brief Get the encoding type for specified data type + * + * @param data_type The data type to query encoding for + * @return uint8_t Encoding type enum value (cast to uint8_t) */ -int set_global_time_data_type(uint8_t data_type); +uint8_t get_datatype_encoding(uint8_t data_type); /** - * @brief Set the global time column encoding type - * @param encoding Time column encoding type (must be TS_2DIFF or PLAIN) - * @return E_OK if success, E_NOT_SUPPORT if encoding is not supported - * @note Supported encodings: TS_2DIFF, PLAIN + * @brief Get the global default compression type + * + * @return uint8_t Compression type enum value (cast to uint8_t) + */ +uint8_t get_global_compression(); + +/** + * @brief Sets the global time column encoding method + * + * Validates and sets the encoding type for time series timestamps. + * Supported encodings: TS_2DIFF, PLAIN, GORILLA, ZIGZAG, RLE, SPRINTZ + * + * @param encoding The encoding type to set (as uint8_t) + * @return int E_OK on success, E_NOT_SUPPORT for invalid encoding */ int set_global_time_encoding(uint8_t encoding); /** - * @brief Set the global time column compression type - * @param compression Time column compression type (must be UNCOMPRESSED or LZ4) - * @return E_OK if success, E_NOT_SUPPORT if compression is not supported - * @note Supported compressions: UNCOMPRESSED, LZ4 + * @brief Sets the global time column compression method + * + * Validates and sets the compression type for time series timestamps. + * Supported compressions: UNCOMPRESSED, SNAPPY, GZIP, LZO, LZ4 + * + * @param compression The compression type to set (as uint8_t) + * @return int E_OK on success, E_NOT_SUPPORT for invalid compression */ int set_global_time_compression(uint8_t compression); From 388f1e9b07954694d84950555613767d95ef265b Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Tue, 2 Sep 2025 11:42:50 +0800 Subject: [PATCH 7/8] [Python] implement default compressor/encoder configuration interface --- python/tests/test_write_and_read.py | 51 +++++++++++++++++++ python/tsfile/__init__.py | 10 ++++ python/tsfile/tsfile_cpp.pxd | 12 +++-- python/tsfile/tsfile_py_cpp.pyx | 77 +++++++++++++++++++++++++++-- 4 files changed, 142 insertions(+), 8 deletions(-) diff --git a/python/tests/test_write_and_read.py b/python/tests/test_write_and_read.py index 787318995..2705ee6cb 100644 --- a/python/tests/test_write_and_read.py +++ b/python/tests/test_write_and_read.py @@ -268,3 +268,54 @@ def test_tsfile_config(): set_tsfile_config({"float_encoding_type_": TSEncoding.BITMAP}) with pytest.raises(NotSupportedError): set_tsfile_config({"time_compress_type_": Compressor.PAA}) + +def test_configuration_manager(): + """Test TSFile configuration getter and setter functions""" + from tsfile.tsfile_py_cpp import ( + tsconf_get_global_time_encoding, + tsconf_set_global_time_encoding, + tsconf_get_global_time_compression, + tsconf_set_global_time_compression, + tsconf_get_datatype_encoding, + tsconf_set_datatype_encoding, + tsconf_get_global_compression, + tsconf_set_global_compression, + ) + from tsfile.constants import TSDataType, TSEncoding, Compressor + + def test_config(getter, setter, test_value, original_value): + assert setter(test_value) == 0 + assert getter() == test_value + assert setter(original_value) == 0 + assert getter() == original_value + + # Test global configurations + test_config(tsconf_get_global_time_encoding, tsconf_set_global_time_encoding, + TSEncoding.PLAIN, tsconf_get_global_time_encoding()) + test_config(tsconf_get_global_time_compression, tsconf_set_global_time_compression, + Compressor.UNCOMPRESSED, tsconf_get_global_time_compression()) + test_config(tsconf_get_global_compression, tsconf_set_global_compression, + Compressor.SNAPPY, tsconf_get_global_compression()) + + # Test datatype encodings + test_cases = [ + (TSDataType.BOOLEAN, TSEncoding.PLAIN), + (TSDataType.INT32, TSEncoding.TS_2DIFF), + (TSDataType.FLOAT, TSEncoding.GORILLA), + (TSDataType.TEXT, TSEncoding.DICTIONARY), + ] + + for dtype, enc in test_cases: + orig = tsconf_get_datatype_encoding(dtype) + assert tsconf_set_datatype_encoding(dtype, enc) == 0 + assert tsconf_get_datatype_encoding(dtype) == enc + assert tsconf_set_datatype_encoding(dtype, orig) == 0 + + # Test error cases + invalid_dtype = 255 + for func in [tsconf_get_datatype_encoding, tsconf_set_datatype_encoding]: + try: + func(invalid_dtype, TSEncoding.PLAIN) if func.__name__ == 'tsconf_set_datatype_encoding' else func(invalid_dtype) + assert False, f"{func.__name__} should raise error for invalid dtype" + except Exception: + pass \ No newline at end of file diff --git a/python/tsfile/__init__.py b/python/tsfile/__init__.py index 0c5081fa8..f5a927e61 100644 --- a/python/tsfile/__init__.py +++ b/python/tsfile/__init__.py @@ -33,4 +33,14 @@ from .tsfile_reader import TsFileReaderPy as TsFileReader, ResultSetPy as ResultSet from .tsfile_writer import TsFileWriterPy as TsFileWriter from .tsfile_py_cpp import get_tsfile_config, set_tsfile_config +from .tsfile_py_cpp import ( + tsconf_set_datatype_encoding, + tsconf_get_datatype_encoding, + tsconf_get_global_time_encoding, + tsconf_get_global_time_compression, + tsconf_get_global_compression, + tsconf_set_global_compression, + tsconf_set_global_time_encoding, + tsconf_set_global_time_compression +) from .tsfile_table_writer import TsFileTableWriter \ No newline at end of file diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 1b04051c9..2aa378354 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -220,8 +220,14 @@ cdef extern from "./common/config/config.h" namespace "common": cdef extern from "./common/global.h" namespace "common": ConfigValue g_config_value_ + # Getter functions + uint8_t get_global_time_encoding() + uint8_t get_global_time_compression() + uint8_t get_datatype_encoding(uint8_t data_type) + uint8_t get_global_compression() + + # Setter functions int set_datatype_encoding(uint8_t data_type, uint8_t encoding) int set_global_compression(uint8_t compression) - int set_global_time_data_type(uint8_t data_type); - int set_global_time_encoding(uint8_t encoding); - int set_global_time_compression(uint8_t compression); + int set_global_time_encoding(uint8_t encoding) + int set_global_time_compression(uint8_t compression) diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index e17430399..be4fdd36a 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -396,11 +396,6 @@ cpdef void set_tsfile_config(dict new_config): raise TypeError(f"Unsupported TSEncoding: {new_config['time_encoding_type_']}") code = set_global_time_encoding((new_config["time_encoding_type_"].value)) check_error(code) - if "time_data_type_" in new_config: - if not isinstance(new_config["time_data_type_"], TSDataTypePy): - raise TypeError(f"Unsupported TSDataType: {new_config['time_data_type_']}") - code = set_global_time_data_type((new_config["time_data_type_"].value)) - check_error(code) if "time_compress_type_" in new_config: if not isinstance(new_config["time_compress_type_"], CompressorPy): raise TypeError(f"Unsupported Compressor: {new_config['time_compress_type_']}") @@ -562,3 +557,75 @@ cdef object get_all_table_schema(TsFileReader reader): free(schemas) return table_schemas +# Getter functions to retrieve configuration values +cpdef int tsconf_get_global_time_encoding(): + """Get the global time encoding type""" + return get_global_time_encoding() + +cpdef int tsconf_get_global_time_compression(): + """Get the global time compression type""" + return get_global_time_compression() + +cpdef int tsconf_get_datatype_encoding(uint8_t data_type): + """Get the encoding type for a specific data type + + Args: + data_type: The TSDataType to query encoding for + Returns: + The encoding type for the specified data type + """ + return get_datatype_encoding(data_type) + +cpdef int tsconf_get_global_compression(): + """Get the global compression type""" + return get_global_compression() + +# Setter functions to modify configuration values +cpdef ErrorCode tsconf_set_datatype_encoding(uint8_t data_type, uint8_t encoding) except -1: + """Set the encoding type for a specific data type + + Args: + data_type: The TSDataType to configure + encoding: The encoding type to set + Returns: + ErrorCode indicating success or failure + """ + cdef ErrorCode errno = set_datatype_encoding(data_type, encoding) + check_error(errno) + return errno + +cpdef ErrorCode tsconf_set_global_compression(uint8_t compression) except -1: + """Set the global compression type + + Args: + compression: The compression type to set + Returns: + ErrorCode indicating success or failure + """ + cdef ErrorCode errno = set_global_compression(compression) + check_error(errno) + return errno + +cpdef ErrorCode tsconf_set_global_time_encoding(uint8_t encoding) except -1: + """Set the global time encoding type + + Args: + encoding: The encoding type to set + Returns: + ErrorCode indicating success or failure + """ + cdef ErrorCode errno = set_global_time_encoding(encoding) + check_error(errno) + return errno + +cpdef ErrorCode tsconf_set_global_time_compression(uint8_t compression) except -1: + """Set the global time compression type + + Args: + compression: The compression type to set + Returns: + ErrorCode indicating success or failure + """ + cdef ErrorCode errno = set_global_time_compression(compression) + check_error(errno) + return errno From 4794ad08786e70bd757f66e9166bea88501330c7 Mon Sep 17 00:00:00 2001 From: 761417898 <761417898@qq.com> Date: Tue, 2 Sep 2025 14:36:54 +0800 Subject: [PATCH 8/8] [Python] implement default compressor/encoder configuration interface --- python/tests/test_write_and_read.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/python/tests/test_write_and_read.py b/python/tests/test_write_and_read.py index 2705ee6cb..9e7cdc18d 100644 --- a/python/tests/test_write_and_read.py +++ b/python/tests/test_write_and_read.py @@ -310,12 +310,3 @@ def test_config(getter, setter, test_value, original_value): assert tsconf_set_datatype_encoding(dtype, enc) == 0 assert tsconf_get_datatype_encoding(dtype) == enc assert tsconf_set_datatype_encoding(dtype, orig) == 0 - - # Test error cases - invalid_dtype = 255 - for func in [tsconf_get_datatype_encoding, tsconf_set_datatype_encoding]: - try: - func(invalid_dtype, TSEncoding.PLAIN) if func.__name__ == 'tsconf_set_datatype_encoding' else func(invalid_dtype) - assert False, f"{func.__name__} should raise error for invalid dtype" - except Exception: - pass \ No newline at end of file