From 14ee4f8e62bf59466b8f68c4d8671e707ffb6fd6 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 13 Nov 2025 11:28:49 +0800 Subject: [PATCH 1/4] test --- python/pyspark/sql/connect/dataframe.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 65dea5cf1a57..ee75bcf63d3f 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1737,7 +1737,10 @@ def __getattr__(self, name: str) -> "Column": errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name} ) - if name not in self.columns: + if ( + os.environ.get("PYSPARK_VALIDATE_COLUMN_NAME_LEGACY") == "1" + and name not in self.columns + ): raise PySparkAttributeError( errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name} ) From 54dcf0daffb3b483a82a96a317b4a6e3e5e0cb60 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 14 Nov 2025 13:47:35 +0800 Subject: [PATCH 2/4] test --- python/pyspark/sql/connect/dataframe.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index ee75bcf63d3f..1211fe10de89 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1737,9 +1737,8 @@ def __getattr__(self, name: str) -> "Column": errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name} ) - if ( - os.environ.get("PYSPARK_VALIDATE_COLUMN_NAME_LEGACY") == "1" - and name not in self.columns + if os.environ.get("PYSPARK_VALIDATE_COLUMN_NAME_LEGACY", "1") == "1" and not any( + field.name == name for field in self._schema ): raise PySparkAttributeError( errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name} From d40722614e5baa280522de12cb884578c4ea9e9b Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 14 Nov 2025 15:41:59 +0800 Subject: [PATCH 3/4] doc --- python/docs/source/migration_guide/pyspark_upgrade.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst index c6cf69dadc93..d588e73ef919 100644 --- a/python/docs/source/migration_guide/pyspark_upgrade.rst +++ b/python/docs/source/migration_guide/pyspark_upgrade.rst @@ -25,7 +25,7 @@ Upgrading from PySpark 4.0 to 4.1 * In Spark 4.1, Python 3.9 support was dropped in PySpark. * In Spark 4.1, the minimum supported version for PyArrow has been raised from 11.0.0 to 15.0.0 in PySpark. * In Spark 4.1, the minimum supported version for Pandas has been raised from 2.0.0 to 2.2.0 in PySpark. -* In Spark 4.1, ``DataFrame['name']`` on Spark Connect Python Client no longer eagerly validate the column name. To restore the legacy behavior, set ``PYSPARK_VALIDATE_COLUMN_NAME_LEGACY`` environment variable to ``1``. +* In Spark 4.1, ``DataFrame['name']`` and ``DataFrame.name`` on Spark Connect Python Client no longer eagerly validate the column name. To restore the legacy behavior, set ``PYSPARK_VALIDATE_COLUMN_NAME_LEGACY`` environment variable to ``1``. * In Spark 4.1, Arrow-optimized Python UDF supports UDT input / output instead of falling back to the regular UDF. To restore the legacy behavior, set ``spark.sql.execution.pythonUDF.arrow.legacy.fallbackOnUDT`` to ``true``. * In Spark 4.1, unnecessary conversion to pandas instances is removed when ``spark.sql.execution.pythonUDF.arrow.enabled`` is enabled. As a result, the type coercion changes when the produced output has a schema different from the specified schema. To restore the previous behavior, enable ``spark.sql.legacy.execution.pythonUDF.pandas.conversion.enabled``. * In Spark 4.1, unnecessary conversion to pandas instances is removed when ``spark.sql.execution.pythonUDTF.arrow.enabled`` is enabled. As a result, the type coercion changes when the produced output has a schema different from the specified schema. To restore the previous behavior, enable ``spark.sql.legacy.execution.pythonUDTF.pandas.conversion.enabled``. From 79b54c8ad7e18d6efbff8367746233d9cf100bc6 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 14 Nov 2025 15:52:38 +0800 Subject: [PATCH 4/4] fix --- python/pyspark/sql/connect/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 1211fe10de89..c4f54f77de60 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -1737,7 +1737,7 @@ def __getattr__(self, name: str) -> "Column": errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name} ) - if os.environ.get("PYSPARK_VALIDATE_COLUMN_NAME_LEGACY", "1") == "1" and not any( + if os.environ.get("PYSPARK_VALIDATE_COLUMN_NAME_LEGACY") == "1" and not any( field.name == name for field in self._schema ): raise PySparkAttributeError(