From a115abf7e3cbdb97d898f347ecdf9e09a6979180 Mon Sep 17 00:00:00 2001 From: Arsen Mukuchyan Date: Sat, 26 Jul 2025 09:36:14 +0100 Subject: [PATCH 1/2] Add Hash output format --- src/Formats/registerFormats.cpp | 2 + .../Formats/Impl/HashOutputFormat.cpp | 47 +++++++++++++++++++ .../Formats/Impl/HashOutputFormat.h | 22 +++++++++ .../0_stateless/03577_hash_format.reference | 3 ++ .../queries/0_stateless/03577_hash_format.sql | 3 ++ 5 files changed, 77 insertions(+) create mode 100644 src/Processors/Formats/Impl/HashOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/HashOutputFormat.h create mode 100644 tests/queries/0_stateless/03577_hash_format.reference create mode 100644 tests/queries/0_stateless/03577_hash_format.sql diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 768ba4a6edbb..20f4cb6523df 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -94,6 +94,7 @@ void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); void registerOutputFormatPrometheus(FormatFactory & factory); void registerOutputFormatSQLInsert(FormatFactory & factory); +void registerOutputFormatHash(FormatFactory & factory); /// Input only formats. @@ -240,6 +241,7 @@ void registerFormats() registerOutputFormatCapnProto(factory); registerOutputFormatPrometheus(factory); registerOutputFormatSQLInsert(factory); + registerOutputFormatHash(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Processors/Formats/Impl/HashOutputFormat.cpp b/src/Processors/Formats/Impl/HashOutputFormat.cpp new file mode 100644 index 000000000000..b12dfa0f07a2 --- /dev/null +++ b/src/Processors/Formats/Impl/HashOutputFormat.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +HashOutputFormat::HashOutputFormat(WriteBuffer & out_, SharedHeader header_) + : IOutputFormat(header_, out_) +{ +} + +String HashOutputFormat::getName() const +{ + return "HashOutputFormat"; +} + +void HashOutputFormat::consume(Chunk chunk) +{ + for (const auto & column : chunk.getColumns()) + { + for (size_t i = 0; i < column->size(); ++i) + column->updateHashWithValue(i, hash); + } +} + +void HashOutputFormat::finalizeImpl() +{ + std::string hashString = getSipHash128AsHexString(hash); + out.write(hashString.data(), hashString.size()); + out.write("\n", 1); + out.next(); +} + +void registerOutputFormatHash(FormatFactory & factory) +{ + factory.registerOutputFormat("Hash", + [](WriteBuffer & buf, const Block & header, const FormatSettings &) + { + return std::make_shared(buf, std::make_shared(header)); + }); +} + +} diff --git a/src/Processors/Formats/Impl/HashOutputFormat.h b/src/Processors/Formats/Impl/HashOutputFormat.h new file mode 100644 index 000000000000..322d1f811ecf --- /dev/null +++ b/src/Processors/Formats/Impl/HashOutputFormat.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class HashOutputFormat final : public IOutputFormat +{ +public: + HashOutputFormat(WriteBuffer & out_, SharedHeader header_); + String getName() const override; + +private: + void consume(Chunk chunk) override; + void finalizeImpl() override; + + SipHash hash; +}; + +} diff --git a/tests/queries/0_stateless/03577_hash_format.reference b/tests/queries/0_stateless/03577_hash_format.reference new file mode 100644 index 000000000000..7f2336a0083a --- /dev/null +++ b/tests/queries/0_stateless/03577_hash_format.reference @@ -0,0 +1,3 @@ +75b419a3aa739a211291e7cc119bd3c9 +d3b90098d049660862d6dc53ac7505e5 +92d47ccfb2950a8e10ac9ddf4314f1bf diff --git a/tests/queries/0_stateless/03577_hash_format.sql b/tests/queries/0_stateless/03577_hash_format.sql new file mode 100644 index 000000000000..1b21ac2ad3a9 --- /dev/null +++ b/tests/queries/0_stateless/03577_hash_format.sql @@ -0,0 +1,3 @@ +SELECT number FROM system.numbers LIMIT 1 FORMAT Hash; +SELECT number FROM system.numbers LIMIT 20 FORMAT Hash; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; From 8f6976ccdf281c0c0475b838a4ab72923e8db509 Mon Sep 17 00:00:00 2001 From: Arsen Mukuchyan Date: Sun, 27 Jul 2025 11:33:43 +0100 Subject: [PATCH 2/2] Add Hash output format with customization --- src/Core/FormatFactorySettings.h | 3 + src/Core/Settings.h | 1 + src/Core/SettingsEnums.cpp | 9 ++ src/Core/SettingsEnums.h | 2 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 16 ++++ .../Formats/Impl/HashOutputFormat.cpp | 85 +++++++++++++++++-- .../Formats/Impl/HashOutputFormat.h | 16 +++- src/Storages/FileLog/FileLogSettings.h | 1 + src/Storages/Hive/HiveSettings.h | 1 + src/Storages/Kafka/KafkaSettings.h | 3 +- src/Storages/NATS/NATSSettings.h | 1 + .../DataLakes/DataLakeStorageSettings.h | 1 + .../StorageObjectStorageSettings.h | 1 + .../ObjectStorageQueueSettings.h | 1 + src/Storages/RabbitMQ/RabbitMQSettings.h | 1 + src/Storages/SetSettings.h | 1 + .../0_stateless/03577_hash_format.reference | 7 ++ .../queries/0_stateless/03577_hash_format.sql | 21 +++++ 19 files changed, 161 insertions(+), 11 deletions(-) diff --git a/src/Core/FormatFactorySettings.h b/src/Core/FormatFactorySettings.h index eccfc655ba13..dfd290a73ce5 100644 --- a/src/Core/FormatFactorySettings.h +++ b/src/Core/FormatFactorySettings.h @@ -231,6 +231,9 @@ The number of columns in inserted MsgPack data. Used for automatic schema infere )", 0) \ DECLARE(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, R"( The way how to output UUID in MsgPack format. +)", 0) \ + DECLARE(HashEnumFunction, output_format_hash_function, FormatSettings::HashEnumFunction::SIP_HASH_128, R"( +Hash function for Hash output format. Supported: sipHash128 (default), sipHash64, cityHash128, cityHash64, murmurHash3_128, murmurHash3_64, xxHash64 )", 0) \ DECLARE(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, R"( The maximum rows of data to read for automatic schema inference. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c93a1589c44d..507fc0172e3b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -80,6 +80,7 @@ class WriteBuffer; M(CLASS_NAME, MaxThreads) \ M(CLASS_NAME, Milliseconds) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, MySQLDataTypesSupport) \ M(CLASS_NAME, NonZeroUInt64) \ M(CLASS_NAME, ORCCompression) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 228d08e293d7..60842e41d84e 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -173,6 +173,15 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation, ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) +IMPLEMENT_SETTING_ENUM(HashEnumFunction, ErrorCodes::BAD_ARGUMENTS, + {{"sipHash128", FormatSettings::HashEnumFunction::SIP_HASH_128}, + {"sipHash64", FormatSettings::HashEnumFunction::SIP_HASH_64}, + {"cityHash128", FormatSettings::HashEnumFunction::CITY_HASH_128}, + {"cityHash64", FormatSettings::HashEnumFunction::CITY_HASH_64}, + {"murmurHash3_128", FormatSettings::HashEnumFunction::MURMUR_HASH3_128}, + {"murmurHash3_64", FormatSettings::HashEnumFunction::MURMUR_HASH3_64}, + {"xxHash64", FormatSettings::HashEnumFunction::XX_HASH_64}}) + IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, {"kusto", Dialect::kusto}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index a030c7fb741e..c242fb0d2160 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -283,6 +283,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) +DECLARE_SETTING_ENUM_WITH_RENAME(HashEnumFunction, FormatSettings::HashEnumFunction) + DECLARE_SETTING_ENUM_WITH_RENAME(ParquetCompression, FormatSettings::ParquetCompression) DECLARE_SETTING_ENUM_WITH_RENAME(ArrowCompression, FormatSettings::ArrowCompression) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 56b06c5e6e25..0dbe725bb67c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -340,6 +340,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.native.decode_types_in_binary_format = settings[Setting::input_format_native_decode_types_in_binary_format]; format_settings.native.write_json_as_string = settings[Setting::output_format_native_write_json_as_string]; format_settings.native.use_flattened_dynamic_and_json_serialization = settings[Setting::output_format_native_use_flattened_dynamic_and_json_serialization]; + format_settings.hash.function = settings[Setting::output_format_hash_function]; format_settings.max_parser_depth = settings[Setting::max_parser_depth]; format_settings.date_time_overflow_behavior = settings[Setting::date_time_overflow_behavior]; format_settings.try_infer_variant = settings[Setting::input_format_try_infer_variants]; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 38d8ae44ce27..6ac69e239606 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -527,6 +527,22 @@ struct FormatSettings { bool escape_special_characters = false; } markdown{}; + + enum class HashEnumFunction : uint8_t + { + SIP_HASH_128, + SIP_HASH_64, + CITY_HASH_128, + CITY_HASH_64, + MURMUR_HASH3_128, + MURMUR_HASH3_64, + XX_HASH_64, + }; + + struct + { + HashEnumFunction function = HashEnumFunction::SIP_HASH_128; + } hash{}; }; } diff --git a/src/Processors/Formats/Impl/HashOutputFormat.cpp b/src/Processors/Formats/Impl/HashOutputFormat.cpp index b12dfa0f07a2..8fc8421fdfa3 100644 --- a/src/Processors/Formats/Impl/HashOutputFormat.cpp +++ b/src/Processors/Formats/Impl/HashOutputFormat.cpp @@ -1,18 +1,25 @@ #include #include #include -#include #include +#include +#include +#include namespace DB { -HashOutputFormat::HashOutputFormat(WriteBuffer & out_, SharedHeader header_) - : IOutputFormat(header_, out_) +HashOutputFormat::HashOutputFormat(WriteBuffer & out_, SharedHeader header_, const FormatSettings & format_settings_) + : IOutputFormat(header_, out_), format_settings(format_settings_) { } +HashOutputFormat::~HashOutputFormat() +{ + buf.cancel(); +} + String HashOutputFormat::getName() const { return "HashOutputFormat"; @@ -20,16 +27,80 @@ String HashOutputFormat::getName() const void HashOutputFormat::consume(Chunk chunk) { + // TODO - hide arena declarion inside of non-SIP scope + Arena arena; + const char * begin = nullptr; + for (const auto & column : chunk.getColumns()) { for (size_t i = 0; i < column->size(); ++i) - column->updateHashWithValue(i, hash); + + switch (format_settings.hash.function) { + + case FormatSettings::HashEnumFunction::SIP_HASH_64: + case FormatSettings::HashEnumFunction::SIP_HASH_128: + column->updateHashWithValue(i, sip); + break; + + default: + { + StringRef ref = column->serializeValueIntoArena(i, arena, begin); + buf.write(ref.data, ref.size); + } + } } } void HashOutputFormat::finalizeImpl() { - std::string hashString = getSipHash128AsHexString(hash); + std::string hashString; + switch (format_settings.hash.function) { + + case FormatSettings::HashEnumFunction::SIP_HASH_128: + hashString = getSipHash128AsHexString(sip); + break; + + case FormatSettings::HashEnumFunction::SIP_HASH_64: + hashString = getHexUIntLowercase(sip.get64()); + break; + + case FormatSettings::HashEnumFunction::CITY_HASH_128: + { + CityHash_v1_0_2::uint128 hash = CityHash_v1_0_2::CityHash128(buf.buffer().begin(), buf.buffer().size()); + hashString += getHexUIntLowercase(hash.high64); + hashString += getHexUIntLowercase(hash.low64); + break; + } + + case FormatSettings::HashEnumFunction::CITY_HASH_64: + hashString = getHexUIntLowercase(CityHash_v1_0_2::CityHash64(buf.buffer().begin(), buf.buffer().size())); + break; + + case FormatSettings::HashEnumFunction::MURMUR_HASH3_128: + { + HashState bytes; + MurmurHash3_x64_128(buf.buffer().begin(), buf.buffer().size(), 0, bytes); + hashString += getHexUIntLowercase(bytes.h1); + hashString += getHexUIntLowercase(bytes.h2); + break; + } + + case FormatSettings::HashEnumFunction::MURMUR_HASH3_64: + { + HashState bytes; + MurmurHash3_x64_128(buf.buffer().begin(), buf.buffer().size(), 0, bytes); + hashString = getHexUIntLowercase(bytes.h1 ^ bytes.h2); + break; + } + + case FormatSettings::HashEnumFunction::XX_HASH_64: + { + UInt64 hash = XXH64(buf.buffer().begin(), buf.buffer().size(), 0); + hashString = getHexUIntLowercase(hash); + break; + } + } + out.write(hashString.data(), hashString.size()); out.write("\n", 1); out.next(); @@ -38,9 +109,9 @@ void HashOutputFormat::finalizeImpl() void registerOutputFormatHash(FormatFactory & factory) { factory.registerOutputFormat("Hash", - [](WriteBuffer & buf, const Block & header, const FormatSettings &) + [](WriteBuffer & buf, const Block & header, const FormatSettings & format_settings) { - return std::make_shared(buf, std::make_shared(header)); + return std::make_shared(buf, std::make_shared(header), format_settings); }); } diff --git a/src/Processors/Formats/Impl/HashOutputFormat.h b/src/Processors/Formats/Impl/HashOutputFormat.h index 322d1f811ecf..67b37a9ebe4a 100644 --- a/src/Processors/Formats/Impl/HashOutputFormat.h +++ b/src/Processors/Formats/Impl/HashOutputFormat.h @@ -1,22 +1,32 @@ #pragma once #include +#include +#include +#include #include namespace DB { -class HashOutputFormat final : public IOutputFormat +class HashOutputFormat final : public IOutputFormat { public: - HashOutputFormat(WriteBuffer & out_, SharedHeader header_); + HashOutputFormat(WriteBuffer & out_, SharedHeader header_, const FormatSettings & format_settings_); + ~HashOutputFormat() override; + String getName() const override; private: void consume(Chunk chunk) override; void finalizeImpl() override; - SipHash hash; + // TODO - check whether global format settings object can mutate during lifecyle, is it safe to keep a reference instead of copy? + const FormatSettings & format_settings; + + // TODO - make as union, as we don't need them all at once + SipHash sip; + BufferWithOwnMemory buf; }; } diff --git a/src/Storages/FileLog/FileLogSettings.h b/src/Storages/FileLog/FileLogSettings.h index 49964a2b86a5..35ade3526f2c 100644 --- a/src/Storages/FileLog/FileLogSettings.h +++ b/src/Storages/FileLog/FileLogSettings.h @@ -28,6 +28,7 @@ struct FileLogSettingsImpl; M(CLASS_NAME, MaxThreads) \ M(CLASS_NAME, Milliseconds) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ORCCompression) \ M(CLASS_NAME, ParquetCompression) \ M(CLASS_NAME, ParquetVersion) \ diff --git a/src/Storages/Hive/HiveSettings.h b/src/Storages/Hive/HiveSettings.h index 0dedf0d2a598..cf549ce0f4da 100644 --- a/src/Storages/Hive/HiveSettings.h +++ b/src/Storages/Hive/HiveSettings.h @@ -35,6 +35,7 @@ struct HiveSettingsImpl; M(CLASS_NAME, Int64) \ M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ORCCompression) \ M(CLASS_NAME, ParquetCompression) \ M(CLASS_NAME, ParquetVersion) \ diff --git a/src/Storages/Kafka/KafkaSettings.h b/src/Storages/Kafka/KafkaSettings.h index a7362c8ff44c..a20d748cf43a 100644 --- a/src/Storages/Kafka/KafkaSettings.h +++ b/src/Storages/Kafka/KafkaSettings.h @@ -17,7 +17,7 @@ const auto KAFKA_MAX_THREAD_WORK_DURATION_MS = 60000; // 10min const auto KAFKA_CONSUMERS_POOL_TTL_MS_MAX = 600'000; -/// List of available types supported in RabbitMQSettings object +/// List of available types supported in KafkaSettings object #define KAFKA_SETTINGS_SUPPORTED_TYPES(CLASS_NAME, M) \ M(CLASS_NAME, ArrowCompression) \ M(CLASS_NAME, Bool) \ @@ -35,6 +35,7 @@ const auto KAFKA_CONSUMERS_POOL_TTL_MS_MAX = 600'000; M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, Milliseconds) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ORCCompression) \ M(CLASS_NAME, ParquetCompression) \ M(CLASS_NAME, ParquetVersion) \ diff --git a/src/Storages/NATS/NATSSettings.h b/src/Storages/NATS/NATSSettings.h index ba2f526c992f..6cde75fd2646 100644 --- a/src/Storages/NATS/NATSSettings.h +++ b/src/Storages/NATS/NATSSettings.h @@ -28,6 +28,7 @@ struct NATSSettingsImpl; M(CLASS_NAME, Int64) \ M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, Milliseconds) \ M(CLASS_NAME, ORCCompression) \ M(CLASS_NAME, ParquetCompression) \ diff --git a/src/Storages/ObjectStorage/DataLakes/DataLakeStorageSettings.h b/src/Storages/ObjectStorage/DataLakes/DataLakeStorageSettings.h index bca3d5e110c1..345499a88ad5 100644 --- a/src/Storages/ObjectStorage/DataLakes/DataLakeStorageSettings.h +++ b/src/Storages/ObjectStorage/DataLakes/DataLakeStorageSettings.h @@ -31,6 +31,7 @@ class SettingsChanges; M(CLASS_NAME, Int64) \ M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ORCCompression) \ M(CLASS_NAME, ParquetCompression) \ M(CLASS_NAME, ParquetVersion) \ diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSettings.h b/src/Storages/ObjectStorage/StorageObjectStorageSettings.h index 1314b7d87c3d..99d978c7f95d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSettings.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSettings.h @@ -30,6 +30,7 @@ class SettingsChanges; M(CLASS_NAME, Int64) \ M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ObjectStorageQueueAction) \ M(CLASS_NAME, ObjectStorageQueueMode) \ M(CLASS_NAME, ORCCompression) \ diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h index 2e4ccb86f70c..f86b24bbea46 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSettings.h @@ -31,6 +31,7 @@ struct StorageID; M(CLASS_NAME, Int64) \ M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ObjectStorageQueueAction) \ M(CLASS_NAME, ObjectStorageQueueMode) \ M(CLASS_NAME, ORCCompression) \ diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 491dbe3bbf15..a488c53468e9 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -28,6 +28,7 @@ struct RabbitMQSettingsImpl; M(CLASS_NAME, Int64) \ M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ORCCompression) \ M(CLASS_NAME, ParquetCompression) \ M(CLASS_NAME, ParquetVersion) \ diff --git a/src/Storages/SetSettings.h b/src/Storages/SetSettings.h index 552a4c08e51f..335d149d0f78 100644 --- a/src/Storages/SetSettings.h +++ b/src/Storages/SetSettings.h @@ -26,6 +26,7 @@ struct SetSettingsImpl; M(CLASS_NAME, Int64) \ M(CLASS_NAME, IntervalOutputFormat) \ M(CLASS_NAME, MsgPackUUIDRepresentation) \ + M(CLASS_NAME, HashEnumFunction) \ M(CLASS_NAME, ORCCompression) \ M(CLASS_NAME, ParquetCompression) \ M(CLASS_NAME, ParquetVersion) \ diff --git a/tests/queries/0_stateless/03577_hash_format.reference b/tests/queries/0_stateless/03577_hash_format.reference index 7f2336a0083a..9b4e5c98d068 100644 --- a/tests/queries/0_stateless/03577_hash_format.reference +++ b/tests/queries/0_stateless/03577_hash_format.reference @@ -1,3 +1,10 @@ 75b419a3aa739a211291e7cc119bd3c9 d3b90098d049660862d6dc53ac7505e5 92d47ccfb2950a8e10ac9ddf4314f1bf +92d47ccfb2950a8e10ac9ddf4314f1bf +31fb81f110e17882 +2a9f9daff6924303859b32e3c35adbeb +5901aa2f330b940a +27f838a79558f7139d53e0496ef2b454 +baabd8eefbaa4347 +c71d18d5553075ef diff --git a/tests/queries/0_stateless/03577_hash_format.sql b/tests/queries/0_stateless/03577_hash_format.sql index 1b21ac2ad3a9..20d74ab7be88 100644 --- a/tests/queries/0_stateless/03577_hash_format.sql +++ b/tests/queries/0_stateless/03577_hash_format.sql @@ -1,3 +1,24 @@ SELECT number FROM system.numbers LIMIT 1 FORMAT Hash; SELECT number FROM system.numbers LIMIT 20 FORMAT Hash; SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; + +SET output_format_hash_function = 'sipHash128'; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; + +SET output_format_hash_function = 'sipHash64'; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; + +SET output_format_hash_function = 'cityHash128'; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; + +SET output_format_hash_function = 'cityHash64'; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; + +SET output_format_hash_function = 'murmurHash3_128'; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; + +SET output_format_hash_function = 'murmurHash3_64'; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash; + +SET output_format_hash_function = 'xxHash64'; +SELECT number AS hello, toString(number) AS world, (hello, world) AS tuple, nullIf(hello % 3, 0) AS sometimes_nulls FROM system.numbers LIMIT 20 FORMAT Hash;