Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@

- Added `artifact_repository` support to `udtf_configs` in `session.read.dbapi()`, enabling users to specify a custom artifact repository (e.g. PyPI) for packages used by the internal UDTF during distributed ingestion.

#### Improvements

- When `Session.reduce_describe_query_enabled` is enabled, fewer DESCRIBE queries are issued when the outer query only projects or renames columns from an inner subquery whose column types are already known.

#### Bug Fixes

- Fixed a bug where `TRY_CAST` reader option is ignored when calling `DataFrameReader.schema().csv()`.
Expand Down
69 changes: 1 addition & 68 deletions src/snowflake/snowpark/_internal/analyzer/select_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,8 @@
has_invalid_projection_merge_functions,
)
from snowflake.snowpark._internal.utils import (
ExprAliasUpdateDict,
is_sql_select_statement,
quote_name,
ExprAliasUpdateDict,
)
import snowflake.snowpark.context as context

Expand Down Expand Up @@ -1593,72 +1592,6 @@ def select(self, cols: List[Expression]) -> "SelectStatement":
)
)

# When describe reduction is on and the inner select already has resolved
# attributes, infer new.attributes for this outer select by reusing datatype and
# nullable from the subquery: (0) skip if parent column names collide, (1) index
# attributes by quote_name (Snowflake identifier rules; invalid delimited forms
# raise), (2) walk new.projection, (3) only handle plain columns or Alias(column),
# (4) resolve source via the same quote_name key lookup, (5) assign only if every
# output column was inferred (length matches projection).
if self._session.reduce_describe_query_enabled and self.attributes is not None:
parent_attributes = self.attributes
projection = new.projection
inferred_attributes: Optional[List[Attribute]] = None
# Skip: no projection to walk (do not assert; leave new.attributes unchanged).
if projection is not None:
# Skip: duplicate output names on the parent — dict/lookup would be ambiguous.
attributes_by_normalized: Dict[str, Attribute] = {}
collision = False
for attr in parent_attributes:
key = quote_name(attr.name)
existing = attributes_by_normalized.get(key)
# Skip: two parent columns map to the same quote_name key.
if existing is not None and existing is not attr:
collision = True
break
attributes_by_normalized[key] = attr
if not collision:
inferred_attributes = []
for expr in projection:
source_column_name: Optional[str] = None
projected_column_name: Optional[str] = None
if isinstance(expr, (Attribute, UnresolvedAttribute)):
source_column_name = expr.name
projected_column_name = expr.name
elif isinstance(expr, Alias) and isinstance(
expr.child, (Attribute, UnresolvedAttribute)
):
source_column_name = expr.child.name
projected_column_name = expr.name
else:
# Skip: not a plain column or Alias(Attribute|UnresolvedAttribute).
inferred_attributes = []
break

if source_column_name is None or projected_column_name is None:
# Skip: missing projected output name.
inferred_attributes = []
break
source_attr = attributes_by_normalized.get(
quote_name(source_column_name)
)
# Skip: no parent column for this source name.
if source_attr is None:
inferred_attributes = []
break
inferred_attributes.append(
Attribute(
projected_column_name,
source_attr.datatype,
source_attr.nullable,
)
)
if len(inferred_attributes) != len(projection):
# Skip: incomplete inference (includes defensive mismatch).
inferred_attributes = None
if inferred_attributes is not None:
new.attributes = inferred_attributes

new.flatten_disabled = disable_next_level_flatten
assert new.projection is not None
new._column_states = derive_column_states_from_subquery(
Expand Down
2 changes: 1 addition & 1 deletion tests/integ/test_cte.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def test_binary(session, type, action):

def test_join_with_alias_dataframe(session):
expected_describe_count = (
2
3
if (session.reduce_describe_query_enabled and session.sql_simplifier_enabled)
else 4
)
Expand Down
Loading
Loading