From d5cf2fdaf20ca435721c63e5751c551df7c39d84 Mon Sep 17 00:00:00 2001 From: Felix He Date: Mon, 27 Apr 2026 11:08:06 -0700 Subject: [PATCH 1/3] SNOW-3385961: improve INFER_SCHEMA zero-row FileNotFoundError message When INFER_SCHEMA returns zero rows, the reader used to raise: FileNotFoundError: Given path: '{path}' could not be found or is empty. Zero rows can also mean file-format options silently filtered every row/header (e.g. PARSE_HEADER on a file with a leading blank line, SKIP_HEADER exceeding row count, or ON_ERROR=CONTINUE dropping bad rows). The old message sends users chasing a path-not-found issue that isn't real. Keep the exception type (FileNotFoundError) for back-compat, but rewrite the message to surface both possibilities and append any applied PARSE_HEADER / SKIP_HEADER / ON_ERROR values that are already in scope (no extra round-trip). Update the existing integ assertions in test_filepath_not_exist_or_empty to match the new wording and to access the message via str(ex_info.value) so the full (longer) message is compared. --- src/snowflake/snowpark/dataframe_reader.py | 25 ++++++++++++++++++- .../scala/test_dataframe_reader_suite.py | 13 +++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/snowflake/snowpark/dataframe_reader.py b/src/snowflake/snowpark/dataframe_reader.py index b13d3c33b2..21b63d056f 100644 --- a/src/snowflake/snowpark/dataframe_reader.py +++ b/src/snowflake/snowpark/dataframe_reader.py @@ -1338,8 +1338,31 @@ def _infer_schema_for_file_format( raise e if len(results) == 0: + # Zero rows from INFER_SCHEMA can mean the path is empty/missing, + # or that file format options silently filtered every row/header + # (e.g. PARSE_HEADER on a file with a leading blank line, + # SKIP_HEADER exceeding row count, or ON_ERROR=CONTINUE dropping + # bad rows). Surface both possibilities so callers don't chase a + # phantom path-not-found issue when the real cause is a format + # option. + option_hints = [] + for key in ("PARSE_HEADER", "SKIP_HEADER", "ON_ERROR"): + if key in format_type_options: + option_hints.append(f"{key}={format_type_options[key]}") + elif key in infer_schema_options: + option_hints.append(f"{key}={infer_schema_options[key]}") + hint_suffix = ( + f" Applied file format options: {', '.join(option_hints)}." + if option_hints + else "" + ) raise FileNotFoundError( - f"Given path: '{path}' could not be found or is empty." + f"Given path: '{path}' returned no results from INFER_SCHEMA. " + "The path may be empty or missing, or file format options may " + "have filtered every row/header (e.g. PARSE_HEADER on a file " + "with a leading blank line, SKIP_HEADER exceeding row count, " + "or ON_ERROR=CONTINUE silently dropping bad rows). Check the " + "file contents and file format options." + hint_suffix ) new_schema = [] schema_to_cast = [] diff --git a/tests/integ/scala/test_dataframe_reader_suite.py b/tests/integ/scala/test_dataframe_reader_suite.py index 6f0cae72e5..4d62e6a16a 100644 --- a/tests/integ/scala/test_dataframe_reader_suite.py +++ b/tests/integ/scala/test_dataframe_reader_suite.py @@ -2119,18 +2119,23 @@ def test_filepath_not_exist_or_empty(session): session.read.option("PARSE_HEADER", True).option("INFER_SCHEMA", True).csv( empty_file_path ) - assert f"Given path: '{empty_file_path}' could not be found or is empty." in str( - ex_info + assert ( + f"Given path: '{empty_file_path}' returned no results from INFER_SCHEMA." + in str(ex_info.value) ) + # Message should mention file format options so users don't chase a phantom + # path-not-found when the real cause is PARSE_HEADER/SKIP_HEADER/ON_ERROR. + assert "file format options" in str(ex_info.value) with pytest.raises(FileNotFoundError) as ex_info: session.read.option("PARSE_HEADER", True).option("INFER_SCHEMA", True).csv( not_exist_file_path ) assert ( - f"Given path: '{not_exist_file_path}' could not be found or is empty." - in str(ex_info) + f"Given path: '{not_exist_file_path}' returned no results from INFER_SCHEMA." + in str(ex_info.value) ) + assert "file format options" in str(ex_info.value) @pytest.mark.skipif( From 854b7d4684470a029828299b3b5282ceeaf643e6 Mon Sep 17 00:00:00 2001 From: Felix He Date: Mon, 27 Apr 2026 11:18:23 -0700 Subject: [PATCH 2/3] SNOW-3385961: simplify INFER_SCHEMA zero-row error message Trim the multi-line comment to a single 3-line note, replace the manual option-hint loop with a list comprehension, and shorten the error message without losing the "path missing or format-option-filtered" information. --- src/snowflake/snowpark/dataframe_reader.py | 35 ++++++++-------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/src/snowflake/snowpark/dataframe_reader.py b/src/snowflake/snowpark/dataframe_reader.py index 21b63d056f..f435e9a1b4 100644 --- a/src/snowflake/snowpark/dataframe_reader.py +++ b/src/snowflake/snowpark/dataframe_reader.py @@ -1338,31 +1338,20 @@ def _infer_schema_for_file_format( raise e if len(results) == 0: - # Zero rows from INFER_SCHEMA can mean the path is empty/missing, - # or that file format options silently filtered every row/header - # (e.g. PARSE_HEADER on a file with a leading blank line, - # SKIP_HEADER exceeding row count, or ON_ERROR=CONTINUE dropping - # bad rows). Surface both possibilities so callers don't chase a - # phantom path-not-found issue when the real cause is a format - # option. - option_hints = [] - for key in ("PARSE_HEADER", "SKIP_HEADER", "ON_ERROR"): - if key in format_type_options: - option_hints.append(f"{key}={format_type_options[key]}") - elif key in infer_schema_options: - option_hints.append(f"{key}={infer_schema_options[key]}") - hint_suffix = ( - f" Applied file format options: {', '.join(option_hints)}." - if option_hints - else "" - ) + # Zero rows can mean the path is empty/missing, or that file + # format options (PARSE_HEADER, SKIP_HEADER, ON_ERROR=CONTINUE) + # silently filtered everything out. + hints = [ + f"{k}={format_type_options.get(k, infer_schema_options.get(k))}" + for k in ("PARSE_HEADER", "SKIP_HEADER", "ON_ERROR") + if k in format_type_options or k in infer_schema_options + ] + suffix = f" Applied options: {', '.join(hints)}." if hints else "" raise FileNotFoundError( f"Given path: '{path}' returned no results from INFER_SCHEMA. " - "The path may be empty or missing, or file format options may " - "have filtered every row/header (e.g. PARSE_HEADER on a file " - "with a leading blank line, SKIP_HEADER exceeding row count, " - "or ON_ERROR=CONTINUE silently dropping bad rows). Check the " - "file contents and file format options." + hint_suffix + "The path may be empty/missing, or file format options may " + "have filtered every row/header. Check the file contents and " + "file format options." + suffix ) new_schema = [] schema_to_cast = [] From e5a0dee58cd62b7010d057ec685d1b1a80de351a Mon Sep 17 00:00:00 2001 From: Felix He Date: Wed, 29 Apr 2026 14:19:17 -0700 Subject: [PATCH 3/3] SNOW-3385961: update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 435d6af6dd..9b902b13ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ #### Bug Fixes - Fixed a bug where using parameter bindings for `CALL` queries issued through `session.sql` would raise an error. +- Improved the `FileNotFoundError` message raised when `INFER_SCHEMA` returns zero rows so it also points to file format options (`PARSE_HEADER`, `SKIP_HEADER`, `ON_ERROR=CONTINUE`) that can silently filter everything out, instead of only suggesting a missing path. ## 1.50.0 (2026-04-23)