diff --git a/CHANGELOG.md b/CHANGELOG.md index 435d6af6dd..ab483d621b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ #### Bug Fixes - Fixed a bug where using parameter bindings for `CALL` queries issued through `session.sql` would raise an error. +- Fixed a bug where `StringType` columns from Iceberg tables were not recognized as max-size strings. ## 1.50.0 (2026-04-23) diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 60558d0080..dd816a6ba6 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -278,7 +278,11 @@ def convert_sf_to_sp_type( ) if column_type_name == "TEXT": if internal_size > 0: - return StringType(internal_size, internal_size == max_string_size) + return StringType( + internal_size, + internal_size == max_string_size + or internal_size == _MAX_ICEBERG_STRING_SIZE, + ) elif internal_size == 0: return StringType() raise ValueError("Negative value is not a valid input for StringType") diff --git a/tests/integ/test_xml_reader_row_tag.py b/tests/integ/test_xml_reader_row_tag.py index fe9785e144..9f3f345d76 100644 --- a/tests/integ/test_xml_reader_row_tag.py +++ b/tests/integ/test_xml_reader_row_tag.py @@ -232,9 +232,11 @@ def test_read_xml_row_tag( session, file, row_tag, expected_row_count, expected_column_count ): df = session.read.option("rowTag", row_tag).xml(f"@{tmp_stage_name}/{file}") - result = df.collect() - assert len(result) == expected_row_count - assert len(result[0]) == expected_column_count + # Use count() + len(df.columns) instead of collect() to avoid materializing + # large result sets (e.g. 740 rows) that trigger paginated download URLs + # unsupported by StoredProcRestfulSession inside a stored procedure. + assert df.count() == expected_row_count + assert len(df.columns) == expected_column_count def test_read_xml_no_xxe(session): diff --git a/tests/unit/test_types.py b/tests/unit/test_types.py index d6b08d7792..0f9a064ad7 100644 --- a/tests/unit/test_types.py +++ b/tests/unit/test_types.py @@ -53,6 +53,7 @@ split_top_level_comma_fields, type_string_to_type_object, find_top_level_colon, + _MAX_ICEBERG_STRING_SIZE, ) from snowflake.snowpark.types import ( ArrayType, @@ -1044,6 +1045,25 @@ def test_convert_sf_to_sp_type_internal_size(): assert snowpark_type.length == 16777216 assert snowpark_type._is_max_size + # Iceberg deployments report internal_size=134217728 for max-length strings, + # which differs from the regular max_string_size (16777216). This must still + # be recognized as a max-size string so that StringType(134217728) == StringType(). + snowpark_type = convert_sf_to_sp_type( + "TEXT", 0, 0, _MAX_ICEBERG_STRING_SIZE, 16777216 + ) + assert isinstance(snowpark_type, StringType) + assert snowpark_type.length == _MAX_ICEBERG_STRING_SIZE + assert snowpark_type._is_max_size + assert snowpark_type == StringType() + + snowpark_type = convert_sf_to_sp_type( + "TEXT", 0, 0, _MAX_ICEBERG_STRING_SIZE, _MAX_ICEBERG_STRING_SIZE + ) + assert isinstance(snowpark_type, StringType) + assert snowpark_type.length == _MAX_ICEBERG_STRING_SIZE + assert snowpark_type._is_max_size + assert snowpark_type == StringType() + with pytest.raises( ValueError, match="Negative value is not a valid input for StringType" ):