From 41347509fe79d2aac4c51ac67519392e28993a05 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 5 Feb 2026 22:29:58 +0000 Subject: [PATCH 1/7] feat: Disable progress bars in Anywidget mode to reduce notebook clutter --- bigframes/display/anywidget.py | 39 +-- bigframes/display/html.py | 3 +- notebooks/dataframes/anywidget_mode.ipynb | 336 +++++++--------------- 3 files changed, 128 insertions(+), 250 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index be0d2b45d0..9bd20fce43 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -133,25 +133,26 @@ def _initial_load(self) -> None: # obtain the row counts # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()` # before we get here so that the count might already be cached. - self._reset_batches_for_new_page_size() + with bigframes.option_context("display.progress_bar", None): + self._reset_batches_for_new_page_size() - if self._batches is None: - self._error_message = ( - "Could not retrieve data batches. Data might be unavailable or " - "an error occurred." - ) - self.row_count = None - elif self._batches.total_rows is None: - # Total rows is unknown, this is an expected state. - # TODO(b/461536343): Cheaply discover if we have exactly 1 page. - # There are cases where total rows is not set, but there are no additional - # pages. We could disable the "next" button in these cases. - self.row_count = None - else: - self.row_count = self._batches.total_rows + if self._batches is None: + self._error_message = ( + "Could not retrieve data batches. Data might be unavailable or " + "an error occurred." + ) + self.row_count = None + elif self._batches.total_rows is None: + # Total rows is unknown, this is an expected state. + # TODO(b/461536343): Cheaply discover if we have exactly 1 page. + # There are cases where total rows is not set, but there are no additional + # pages. We could disable the "next" button in these cases. + self.row_count = None + else: + self.row_count = self._batches.total_rows - # get the initial page - self._set_table_html() + # get the initial page + self._set_table_html() @traitlets.observe("_initial_load_complete") def _on_initial_load_complete(self, change: dict[str, Any]): @@ -281,7 +282,9 @@ def _reset_batches_for_new_page_size(self) -> None: def _set_table_html(self) -> None: """Sets the current html data based on the current page and page size.""" new_page = None - with self._setting_html_lock: + with self._setting_html_lock, bigframes.option_context( + "display.progress_bar", None + ): if self._error_message: self.table_html = ( f"
" diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 6102d1512c..2bf847a259 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -363,7 +363,8 @@ def repr_mimebundle( if opts.repr_mode == "anywidget": try: - return get_anywidget_bundle(obj, include=include, exclude=exclude) + with bigframes.option_context("display.progress_bar", None): + return get_anywidget_bundle(obj, include=include, exclude=exclude) except ImportError: # Anywidget is an optional dependency, so warn rather than fail. # TODO(shuowei): When Anywidget becomes the default for all repr modes, diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index e9491610ac..3bc12617fc 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "d10bfca4", "metadata": {}, "outputs": [], @@ -91,7 +91,9 @@ "outputs": [ { "data": { - "text/html": [], + "text/html": [ + "Starting." + ], "text/plain": [ "" ] @@ -117,17 +119,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Lillian 99\n", - " AL F 1910 Ruby 204\n", - " AL F 1910 Helen 76\n", - " AL F 1910 Eunice 41\n", - " AR F 1910 Dora 42\n", - " CA F 1910 Edna 62\n", - " CA F 1910 Helen 239\n", - " CO F 1910 Alice 46\n", - " FL F 1910 Willie 71\n", - " FL F 1910 Thelma 65\n", + "state gender year name number\n", + " AL F 1910 Annie 482\n", + " AL F 1910 Myrtle 104\n", + " AR F 1910 Lillian 56\n", + " CT F 1910 Anne 38\n", + " CT F 1910 Frances 45\n", + " FL F 1910 Margaret 53\n", + " GA F 1910 Mae 73\n", + " GA F 1910 Beatrice 96\n", + " GA F 1910 Lola 47\n", + " IA F 1910 Viola 49\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -145,30 +147,10 @@ "id": "220340b0", "metadata": {}, "outputs": [ - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "6fb22be7f21f4d1dacd76dc62a1a7818", + "model_id": "c74c3719ba43489890185b5c9880acfc", "version_major": 2, "version_minor": 1 }, @@ -204,80 +186,80 @@ " AL\n", " F\n", " 1910\n", - " Lillian\n", - " 99\n", + " Hazel\n", + " 51\n", " \n", " \n", " 1\n", " AL\n", " F\n", " 1910\n", - " Ruby\n", - " 204\n", + " Lucy\n", + " 76\n", " \n", " \n", " 2\n", - " AL\n", + " AR\n", " F\n", " 1910\n", - " Helen\n", - " 76\n", + " Nellie\n", + " 39\n", " \n", " \n", " 3\n", - " AL\n", + " AR\n", " F\n", " 1910\n", - " Eunice\n", - " 41\n", + " Lena\n", + " 40\n", " \n", " \n", " 4\n", - " AR\n", + " CO\n", " F\n", " 1910\n", - " Dora\n", - " 42\n", + " Thelma\n", + " 36\n", " \n", " \n", " 5\n", - " CA\n", + " CO\n", " F\n", " 1910\n", - " Edna\n", - " 62\n", + " Ruth\n", + " 68\n", " \n", " \n", " 6\n", - " CA\n", + " CT\n", " F\n", " 1910\n", - " Helen\n", - " 239\n", + " Elizabeth\n", + " 86\n", " \n", " \n", " 7\n", - " CO\n", + " DC\n", " F\n", " 1910\n", - " Alice\n", - " 46\n", + " Mary\n", + " 80\n", " \n", " \n", " 8\n", " FL\n", " F\n", " 1910\n", - " Willie\n", - " 71\n", + " Annie\n", + " 101\n", " \n", " \n", " 9\n", " FL\n", " F\n", " 1910\n", - " Thelma\n", - " 65\n", + " Alma\n", + " 39\n", " \n", " \n", "\n", @@ -285,17 +267,17 @@ "
[5552452 rows x 5 columns in total]" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Lillian 99\n", - " AL F 1910 Ruby 204\n", - " AL F 1910 Helen 76\n", - " AL F 1910 Eunice 41\n", - " AR F 1910 Dora 42\n", - " CA F 1910 Edna 62\n", - " CA F 1910 Helen 239\n", - " CO F 1910 Alice 46\n", - " FL F 1910 Willie 71\n", - " FL F 1910 Thelma 65\n", + "state gender year name number\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -329,7 +311,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 171.4 MB in 41 seconds of slot time. [Job bigframes-dev:US.492b5260-9f44-495c-be09-2ae1324a986c details]\n", + " Query processed 171.4 MB in 35 seconds of slot time. [Job bigframes-dev:US.e15f1b34-e414-42d2-857b-926ea25947c4 details]\n", " " ], "text/plain": [ @@ -355,7 +337,9 @@ }, { "data": { - "text/html": [], + "text/html": [ + "Starting." + ], "text/plain": [ "" ] @@ -404,38 +388,10 @@ "id": "da23e0f3", "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_gsx0h2jHoOSYwqGKUS3lAYLf_qi3 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 88.8 MB in 3 seconds of slot time. [Job bigframes-dev:US.job_1VivAJ2InPdg5RXjWfvAJ1B0oxO3 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "7d82208e7e5e40dd9dbf64c4c561cab3", + "model_id": "2ad9004bda464950ab6eda63b1b86a3a", "version_major": 2, "version_minor": 1 }, @@ -533,34 +489,6 @@ "id": "6920d49b", "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in 10 seconds of slot time. [Job bigframes-dev:US.job_cmNyG5sJ1IDCyFINx7teExQOZ6UQ details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_aQvP3Sn04Ss4flSLaLhm0sKzFvrd details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", @@ -571,12 +499,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "52d11291ba1d42e6b544acbd86eef6cf", + "model_id": "c1b84125429c4fbc90a22e1adbeea901", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -648,34 +576,6 @@ "id": "a9d5d13a", "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 215.9 MB in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", @@ -686,12 +586,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "32c61c84740d45a0ac37202a76c7c14e", + "model_id": "23fc730e004b4eec807d2829bffa3ce0", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -752,33 +652,7 @@ "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", @@ -792,7 +666,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9d60a47296214553bb10c434b5ee8330", + "model_id": "a3bf6021c6fd44299d317d3b44213b50", "version_major": 2, "version_minor": 1 }, @@ -839,24 +713,6 @@ " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", - " 29.08.018\n", - " E04H 6/12\n", - " <NA>\n", - " 18157874.1\n", - " 21.02.2018\n", - " 22.02.2017\n", - " Liedtke & Partner Patentanw√§lte\n", - " SHB Hebezeugbau GmbH\n", - " VOLGER, Alexander\n", - " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", - " EP 3 366 869 A1\n", - " \n", - " \n", - " 1\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", " 03.10.2018\n", " H05B 6/12\n", " <NA>\n", @@ -870,7 +726,7 @@ " EP 3 383 141 A2\n", " \n", " \n", - " 2\n", + " 1\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -888,7 +744,7 @@ " EP 3 382 744 A1\n", " \n", " \n", - " 3\n", + " 2\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -906,7 +762,7 @@ " EP 3 382 553 A1\n", " \n", " \n", - " 4\n", + " 3\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -923,6 +779,24 @@ " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", " EP 3 381 276 A1\n", " \n", + " \n", + " 4\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 29.08.018\n", + " E04H 6/12\n", + " <NA>\n", + " 18157874.1\n", + " 21.02.2018\n", + " 22.02.2017\n", + " Liedtke & Partner Patentanw√§lte\n", + " SHB Hebezeugbau GmbH\n", + " VOLGER, Alexander\n", + " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", + " EP 3 366 869 A1\n", + " \n", " \n", "\n", "

5 rows × 15 columns

\n", @@ -944,32 +818,32 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 18157874.1 \n", - "1 03.10.2018 H05B 6/12 18165514.3 \n", - "2 03.10.2018 H01L 21/20 18166536.5 \n", - "3 03.10.2018 G06F 11/30 18157347.8 \n", - "4 03.10.2018 A01K 31/00 18171005.4 \n", + "0 03.10.2018 H05B 6/12 18165514.3 \n", + "1 03.10.2018 H01L 21/20 18166536.5 \n", + "2 03.10.2018 G06F 11/30 18157347.8 \n", + "3 03.10.2018 A01K 31/00 18171005.4 \n", + "4 29.08.018 E04H 6/12 18157874.1 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", - "1 03.04.2018 30.03.2017 \n", - "2 16.02.2016 Scheider, Sascha et al \n", - "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "4 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "0 03.04.2018 30.03.2017 \n", + "1 16.02.2016 Scheider, Sascha et al \n", + "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "3 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "4 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "1 BSH Hausger√§te GmbH Acero Acero, Jesus \n", - "2 EV Group E. Thallner GmbH Kurz, Florian \n", - "3 FUJITSU LIMITED Kukihara, Kensuke \n", - "4 Linco Food Systems A/S Thrane, Uffe \n", + "0 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "1 EV Group E. Thallner GmbH Kurz, Florian \n", + "2 FUJITSU LIMITED Kukihara, Kensuke \n", + "3 Linco Food Systems A/S Thrane, Uffe \n", + "4 SHB Hebezeugbau GmbH VOLGER, Alexander \n", "\n", " title_line_1 number \n", - "0 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", - "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", - "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "4 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "1 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "3 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "4 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", "\n", "[5 rows x 15 columns]" ] From 3b7ce8e4785f1d2c185d78f92997062938192d2a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 6 Feb 2026 01:15:01 +0000 Subject: [PATCH 2/7] feat: guard polar engine --- bigframes/core/compile/polars/compiler.py | 8 +++++++- bigframes/core/compile/polars/lowering.py | 2 +- bigframes/operations/json_ops.py | 1 + bigframes/session/polars_executor.py | 2 ++ 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index 1f0ca592e5..b07d2489a1 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -429,7 +429,13 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: @compile_op.register(json_ops.JSONDecode) def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: assert isinstance(op, json_ops.JSONDecode) - return input.str.json_decode(_DTYPE_MAPPING[op.to_type]) + if op.safe: + # Polars does not support safe JSON decoding (returning null on failure). + # Fallback to BigQuery execution. + raise NotImplementedError( + "Safe JSON decoding is not supported in Polars executor." + ) + return input.str.json_decode(_bigframes_dtype_to_polars_dtype(op.to_type)) @compile_op.register(arr_ops.ToArrayOp) def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr: diff --git a/bigframes/core/compile/polars/lowering.py b/bigframes/core/compile/polars/lowering.py index bf617d6879..5f80904b3b 100644 --- a/bigframes/core/compile/polars/lowering.py +++ b/bigframes/core/compile/polars/lowering.py @@ -391,7 +391,7 @@ def _lower_cast(cast_op: ops.AsTypeOp, arg: expression.Expression): return arg if arg.output_type == dtypes.JSON_DTYPE: - return json_ops.JSONDecode(cast_op.to_type).as_expr(arg) + return json_ops.JSONDecode(cast_op.to_type, safe=cast_op.safe).as_expr(arg) if ( arg.output_type == dtypes.STRING_DTYPE and cast_op.to_type == dtypes.DATETIME_DTYPE diff --git a/bigframes/operations/json_ops.py b/bigframes/operations/json_ops.py index 7260a79223..3d3ccfef11 100644 --- a/bigframes/operations/json_ops.py +++ b/bigframes/operations/json_ops.py @@ -220,6 +220,7 @@ def output_type(self, *input_types): class JSONDecode(base_ops.UnaryOp): name: typing.ClassVar[str] = "json_decode" to_type: dtypes.Dtype + safe: bool = False def output_type(self, *input_types): input_type = input_types[0] diff --git a/bigframes/session/polars_executor.py b/bigframes/session/polars_executor.py index 575beff8fc..ead0b0591b 100644 --- a/bigframes/session/polars_executor.py +++ b/bigframes/session/polars_executor.py @@ -34,6 +34,7 @@ numeric_ops, string_ops, ) +import bigframes.operations.json_ops as json_ops from bigframes.session import executor, semi_executor if TYPE_CHECKING: @@ -94,6 +95,7 @@ string_ops.EndsWithOp, string_ops.StrContainsOp, string_ops.StrContainsRegexOp, + json_ops.JSONDecode, ) _COMPATIBLE_AGG_OPS = ( agg_ops.SizeOp, From 196c11197945f990452e5d05489521b8d240b18d Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 6 Feb 2026 02:13:10 +0000 Subject: [PATCH 3/7] fix: Implement safe JSON decoding in Polars compiler --- bigframes/core/compile/polars/compiler.py | 66 +++++++++++++++++++++-- tests/unit/test_series_polars.py | 1 - 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index b07d2489a1..a9fd492f98 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -16,6 +16,7 @@ import dataclasses import functools import itertools +import json from typing import cast, Literal, Optional, Sequence, Tuple, Type, TYPE_CHECKING import pandas as pd @@ -429,13 +430,68 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: @compile_op.register(json_ops.JSONDecode) def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: assert isinstance(op, json_ops.JSONDecode) + target_dtype = _bigframes_dtype_to_polars_dtype(op.to_type) if op.safe: # Polars does not support safe JSON decoding (returning null on failure). - # Fallback to BigQuery execution. - raise NotImplementedError( - "Safe JSON decoding is not supported in Polars executor." - ) - return input.str.json_decode(_bigframes_dtype_to_polars_dtype(op.to_type)) + # We use map_elements to provide safe JSON decoding. + def safe_decode(val): + if val is None: + return None + try: + decoded = json.loads(val) + except Exception: + return None + + if decoded is None: + return None + + if op.to_type == bigframes.dtypes.INT_DTYPE: + if type(decoded) is bool: + return None + if isinstance(decoded, int): + return decoded + if isinstance(decoded, float): + if decoded.is_integer(): + return int(decoded) + if isinstance(decoded, str): + try: + return int(decoded) + except Exception: + pass + return None + + if op.to_type == bigframes.dtypes.FLOAT_DTYPE: + if type(decoded) is bool: + return None + if isinstance(decoded, (int, float)): + return float(decoded) + if isinstance(decoded, str): + try: + return float(decoded) + except Exception: + pass + return None + + if op.to_type == bigframes.dtypes.BOOL_DTYPE: + if isinstance(decoded, bool): + return decoded + if isinstance(decoded, str): + if decoded.lower() == "true": + return True + if decoded.lower() == "false": + return False + return None + + if op.to_type == bigframes.dtypes.STRING_DTYPE: + if isinstance(decoded, str): + return decoded + return None + + return decoded + + return input.map_elements(safe_decode, return_dtype=target_dtype) + + return input.str.json_decode(target_dtype) @compile_op.register(arr_ops.ToArrayOp) def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr: diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index 516a46d4dd..6f04631264 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -4142,7 +4142,6 @@ def test_json_astype_others_raise_error(data, to_type): bf_series.astype(to_type, errors="raise").to_pandas() -@pytest.mark.skip(reason="AssertionError: Series NA mask are different") @pytest.mark.parametrize( ("data", "to_type"), [ From 4740708447367bc8ccd464912e72b69ada80f81a Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 6 Feb 2026 20:07:33 +0000 Subject: [PATCH 4/7] revert: Roll back to state at 41347509f --- bigframes/core/compile/polars/compiler.py | 64 +---------------------- bigframes/core/compile/polars/lowering.py | 2 +- bigframes/ml/base.py | 9 ++-- bigframes/ml/compose.py | 6 +-- bigframes/ml/core.py | 5 +- bigframes/ml/imported.py | 15 +++--- bigframes/ml/llm.py | 20 +++---- bigframes/ml/model_selection.py | 7 ++- bigframes/ml/preprocessing.py | 4 +- bigframes/operations/json_ops.py | 1 - bigframes/session/polars_executor.py | 2 - tests/unit/test_series_polars.py | 1 + 12 files changed, 32 insertions(+), 104 deletions(-) diff --git a/bigframes/core/compile/polars/compiler.py b/bigframes/core/compile/polars/compiler.py index a9fd492f98..1f0ca592e5 100644 --- a/bigframes/core/compile/polars/compiler.py +++ b/bigframes/core/compile/polars/compiler.py @@ -16,7 +16,6 @@ import dataclasses import functools import itertools -import json from typing import cast, Literal, Optional, Sequence, Tuple, Type, TYPE_CHECKING import pandas as pd @@ -430,68 +429,7 @@ def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: @compile_op.register(json_ops.JSONDecode) def _(self, op: ops.ScalarOp, input: pl.Expr) -> pl.Expr: assert isinstance(op, json_ops.JSONDecode) - target_dtype = _bigframes_dtype_to_polars_dtype(op.to_type) - if op.safe: - # Polars does not support safe JSON decoding (returning null on failure). - # We use map_elements to provide safe JSON decoding. - def safe_decode(val): - if val is None: - return None - try: - decoded = json.loads(val) - except Exception: - return None - - if decoded is None: - return None - - if op.to_type == bigframes.dtypes.INT_DTYPE: - if type(decoded) is bool: - return None - if isinstance(decoded, int): - return decoded - if isinstance(decoded, float): - if decoded.is_integer(): - return int(decoded) - if isinstance(decoded, str): - try: - return int(decoded) - except Exception: - pass - return None - - if op.to_type == bigframes.dtypes.FLOAT_DTYPE: - if type(decoded) is bool: - return None - if isinstance(decoded, (int, float)): - return float(decoded) - if isinstance(decoded, str): - try: - return float(decoded) - except Exception: - pass - return None - - if op.to_type == bigframes.dtypes.BOOL_DTYPE: - if isinstance(decoded, bool): - return decoded - if isinstance(decoded, str): - if decoded.lower() == "true": - return True - if decoded.lower() == "false": - return False - return None - - if op.to_type == bigframes.dtypes.STRING_DTYPE: - if isinstance(decoded, str): - return decoded - return None - - return decoded - - return input.map_elements(safe_decode, return_dtype=target_dtype) - - return input.str.json_decode(target_dtype) + return input.str.json_decode(_DTYPE_MAPPING[op.to_type]) @compile_op.register(arr_ops.ToArrayOp) def _(self, op: ops.ToArrayOp, *inputs: pl.Expr) -> pl.Expr: diff --git a/bigframes/core/compile/polars/lowering.py b/bigframes/core/compile/polars/lowering.py index 5f80904b3b..bf617d6879 100644 --- a/bigframes/core/compile/polars/lowering.py +++ b/bigframes/core/compile/polars/lowering.py @@ -391,7 +391,7 @@ def _lower_cast(cast_op: ops.AsTypeOp, arg: expression.Expression): return arg if arg.output_type == dtypes.JSON_DTYPE: - return json_ops.JSONDecode(cast_op.to_type, safe=cast_op.safe).as_expr(arg) + return json_ops.JSONDecode(cast_op.to_type).as_expr(arg) if ( arg.output_type == dtypes.STRING_DTYPE and cast_op.to_type == dtypes.DATETIME_DTYPE diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index 3f6ccecaa2..9b38702cce 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -24,8 +24,7 @@ """ import abc -import typing -from typing import Optional, TypeVar, Union +from typing import cast, Optional, TypeVar, Union import warnings import bigframes_vendored.sklearn.base @@ -134,7 +133,7 @@ def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T: self._bqml_model = self._create_bqml_model() # type: ignore except AttributeError: raise RuntimeError("A model must be trained before register.") - self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) + self._bqml_model = cast(core.BqmlModel, self._bqml_model) self._bqml_model.register(vertex_ai_model_id) return self @@ -287,7 +286,7 @@ def _predict_and_retry( bpd.concat([df_result, df_succ]) if df_result is not None else df_succ ) - df_result = typing.cast( + df_result = cast( bpd.DataFrame, bpd.concat([df_result, df_fail]) if df_result is not None else df_fail, ) @@ -307,7 +306,7 @@ def _extract_output_names(self): output_names = [] for transform_col in self._bqml_model._model._properties["transformColumns"]: - transform_col_dict = typing.cast(dict, transform_col) + transform_col_dict = cast(dict, transform_col) # pass the columns that are not transformed if "transformSql" not in transform_col_dict: continue diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index f8244fb0d8..d638e026e4 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -21,7 +21,7 @@ import re import types import typing -from typing import Iterable, List, Optional, Set, Tuple, Union +from typing import cast, Iterable, List, Optional, Set, Tuple, Union from bigframes_vendored import constants import bigframes_vendored.sklearn.compose._column_transformer @@ -218,7 +218,7 @@ def camel_to_snake(name): output_names = [] for transform_col in bq_model._properties["transformColumns"]: - transform_col_dict = typing.cast(dict, transform_col) + transform_col_dict = cast(dict, transform_col) # pass the columns that are not transformed if "transformSql" not in transform_col_dict: continue @@ -282,7 +282,7 @@ def _merge( return self # SQLScalarColumnTransformer only work inside ColumnTransformer feature_columns_sorted = sorted( [ - typing.cast(str, feature_column.name) + cast(str, feature_column.name) for feature_column in bq_model.feature_columns ] ) diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py index 620843fb6e..4dbc1a5fa3 100644 --- a/bigframes/ml/core.py +++ b/bigframes/ml/core.py @@ -18,8 +18,7 @@ import dataclasses import datetime -import typing -from typing import Callable, Iterable, Mapping, Optional, Union +from typing import Callable, cast, Iterable, Mapping, Optional, Union import uuid from google.cloud import bigquery @@ -377,7 +376,7 @@ def copy(self, new_model_name: str, replace: bool = False) -> BqmlModel: def register(self, vertex_ai_model_id: Optional[str] = None) -> BqmlModel: if vertex_ai_model_id is None: # vertex id needs to start with letters. https://cloud.google.com/vertex-ai/docs/general/resource-naming - vertex_ai_model_id = "bigframes_" + typing.cast(str, self._model.model_id) + vertex_ai_model_id = "bigframes_" + cast(str, self._model.model_id) # truncate as Vertex ID only accepts 63 characters, easily exceeding the limit for temp models. # The possibility of conflicts should be low. diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index 56b5d6735c..295649ed7f 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -16,8 +16,7 @@ from __future__ import annotations -import typing -from typing import Mapping, Optional +from typing import cast, Mapping, Optional from google.cloud import bigquery @@ -79,7 +78,7 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) + self._bqml_model = cast(core.BqmlModel, self._bqml_model) (X,) = utils.batch_convert_to_dataframe(X) @@ -100,7 +99,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> TensorFlowModel: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) + self._bqml_model = cast(core.BqmlModel, self._bqml_model) new_model = self._bqml_model.copy(model_name, replace) return new_model.session.read_gbq_model(model_name) @@ -158,7 +157,7 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) + self._bqml_model = cast(core.BqmlModel, self._bqml_model) (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) @@ -179,7 +178,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> ONNXModel: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) + self._bqml_model = cast(core.BqmlModel, self._bqml_model) new_model = self._bqml_model.copy(model_name, replace) return new_model.session.read_gbq_model(model_name) @@ -277,7 +276,7 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) + self._bqml_model = cast(core.BqmlModel, self._bqml_model) (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) @@ -298,7 +297,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBoostModel: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) + self._bqml_model = cast(core.BqmlModel, self._bqml_model) new_model = self._bqml_model.copy(model_name, replace) return new_model.session.read_gbq_model(model_name) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 585599c9b6..f4e60f3f9d 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -16,8 +16,7 @@ from __future__ import annotations -import typing -from typing import Iterable, Literal, Mapping, Optional, Union +from typing import cast, Iterable, Literal, Mapping, Optional, Union import warnings import bigframes_vendored.constants as constants @@ -253,7 +252,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = typing.cast(blocks.Label, X.columns[0]) + col_label = cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "content"}) options: dict = {} @@ -392,7 +391,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = typing.cast(blocks.Label, X.columns[0]) + col_label = cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "content"}) # TODO(garrettwu): remove transform to ObjRefRuntime when BQML supports ObjRef as input @@ -605,10 +604,7 @@ def fit( options["prompt_col"] = X.columns.tolist()[0] self._bqml_model = self._bqml_model_factory.create_llm_remote_model( - X, - y, - options=options, - connection_name=typing.cast(str, self.connection_name), + X, y, options=options, connection_name=cast(str, self.connection_name) ) return self @@ -739,7 +735,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = typing.cast(blocks.Label, X.columns[0]) + col_label = cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "prompt"}) options: dict = { @@ -824,8 +820,8 @@ def score( ) # BQML identified the column by name - X_col_label = typing.cast(blocks.Label, X.columns[0]) - y_col_label = typing.cast(blocks.Label, y.columns[0]) + X_col_label = cast(blocks.Label, X.columns[0]) + y_col_label = cast(blocks.Label, y.columns[0]) X = X.rename(columns={X_col_label: "input_text"}) y = y.rename(columns={y_col_label: "output_text"}) @@ -1037,7 +1033,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = typing.cast(blocks.Label, X.columns[0]) + col_label = cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "prompt"}) options = { diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py index 3d23fbf568..5adfb03b7f 100644 --- a/bigframes/ml/model_selection.py +++ b/bigframes/ml/model_selection.py @@ -20,8 +20,7 @@ import inspect from itertools import chain import time -import typing -from typing import Generator, List, Optional, Union +from typing import cast, Generator, List, Optional, Union import bigframes_vendored.sklearn.model_selection._split as vendored_model_selection_split import bigframes_vendored.sklearn.model_selection._validation as vendored_model_selection_validation @@ -100,10 +99,10 @@ def _stratify_split(df: bpd.DataFrame, stratify: bpd.Series) -> List[bpd.DataFra train_dfs.append(train) test_dfs.append(test) - train_df = typing.cast( + train_df = cast( bpd.DataFrame, bpd.concat(train_dfs).drop(columns="bigframes_stratify_col") ) - test_df = typing.cast( + test_df = cast( bpd.DataFrame, bpd.concat(test_dfs).drop(columns="bigframes_stratify_col") ) return [train_df, test_df] diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 22a3e7e222..8bf89b0838 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -18,7 +18,7 @@ from __future__ import annotations import typing -from typing import Iterable, List, Literal, Optional, Union +from typing import cast, Iterable, List, Literal, Optional, Union import bigframes_vendored.sklearn.preprocessing._data import bigframes_vendored.sklearn.preprocessing._discretization @@ -470,7 +470,7 @@ def _parse_from_sql(cls, sql: str) -> tuple[OneHotEncoder, str]: s = sql[sql.find("(") + 1 : sql.find(")")] col_label, drop_str, top_k, frequency_threshold = s.split(", ") drop = ( - typing.cast(Literal["most_frequent"], "most_frequent") + cast(Literal["most_frequent"], "most_frequent") if drop_str.lower() == "'most_frequent'" else None ) diff --git a/bigframes/operations/json_ops.py b/bigframes/operations/json_ops.py index 3d3ccfef11..7260a79223 100644 --- a/bigframes/operations/json_ops.py +++ b/bigframes/operations/json_ops.py @@ -220,7 +220,6 @@ def output_type(self, *input_types): class JSONDecode(base_ops.UnaryOp): name: typing.ClassVar[str] = "json_decode" to_type: dtypes.Dtype - safe: bool = False def output_type(self, *input_types): input_type = input_types[0] diff --git a/bigframes/session/polars_executor.py b/bigframes/session/polars_executor.py index ead0b0591b..575beff8fc 100644 --- a/bigframes/session/polars_executor.py +++ b/bigframes/session/polars_executor.py @@ -34,7 +34,6 @@ numeric_ops, string_ops, ) -import bigframes.operations.json_ops as json_ops from bigframes.session import executor, semi_executor if TYPE_CHECKING: @@ -95,7 +94,6 @@ string_ops.EndsWithOp, string_ops.StrContainsOp, string_ops.StrContainsRegexOp, - json_ops.JSONDecode, ) _COMPATIBLE_AGG_OPS = ( agg_ops.SizeOp, diff --git a/tests/unit/test_series_polars.py b/tests/unit/test_series_polars.py index 6f04631264..516a46d4dd 100644 --- a/tests/unit/test_series_polars.py +++ b/tests/unit/test_series_polars.py @@ -4142,6 +4142,7 @@ def test_json_astype_others_raise_error(data, to_type): bf_series.astype(to_type, errors="raise").to_pandas() +@pytest.mark.skip(reason="AssertionError: Series NA mask are different") @pytest.mark.parametrize( ("data", "to_type"), [ From 1e5f52ae1a0a1d0b7f5f963a8293fc9054765455 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 6 Feb 2026 20:30:50 +0000 Subject: [PATCH 5/7] test: pass the session in --- tests/system/small/test_series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index a95c9623e5..0fdfb6415e 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -4099,8 +4099,8 @@ def test_json_astype_others_raise_error(data, to_type): pytest.param(["true", None], dtypes.STRING_DTYPE, id="to_string"), ], ) -def test_json_astype_others_in_safe_mode(data, to_type): - bf_series = series.Series(data, dtype=dtypes.JSON_DTYPE) +def test_json_astype_others_in_safe_mode(data, to_type, session): + bf_series = series.Series(data, dtype=dtypes.JSON_DTYPE, session=session) bf_result = bf_series.astype(to_type, errors="null") assert bf_result.dtype == to_type From f162040420493a8019c16d3be2113b6c2fd5401e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 6 Feb 2026 20:51:59 +0000 Subject: [PATCH 6/7] chore: remove all unrealted change --- bigframes/ml/base.py | 9 +- bigframes/ml/compose.py | 6 +- bigframes/ml/core.py | 5 +- bigframes/ml/imported.py | 15 +- bigframes/ml/llm.py | 20 +- bigframes/ml/model_selection.py | 7 +- bigframes/ml/preprocessing.py | 4 +- notebooks/dataframes/anywidget_mode.ipynb | 336 +++++++++++++++------- 8 files changed, 268 insertions(+), 134 deletions(-) diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index 9b38702cce..3f6ccecaa2 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -24,7 +24,8 @@ """ import abc -from typing import cast, Optional, TypeVar, Union +import typing +from typing import Optional, TypeVar, Union import warnings import bigframes_vendored.sklearn.base @@ -133,7 +134,7 @@ def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T: self._bqml_model = self._create_bqml_model() # type: ignore except AttributeError: raise RuntimeError("A model must be trained before register.") - self._bqml_model = cast(core.BqmlModel, self._bqml_model) + self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) self._bqml_model.register(vertex_ai_model_id) return self @@ -286,7 +287,7 @@ def _predict_and_retry( bpd.concat([df_result, df_succ]) if df_result is not None else df_succ ) - df_result = cast( + df_result = typing.cast( bpd.DataFrame, bpd.concat([df_result, df_fail]) if df_result is not None else df_fail, ) @@ -306,7 +307,7 @@ def _extract_output_names(self): output_names = [] for transform_col in self._bqml_model._model._properties["transformColumns"]: - transform_col_dict = cast(dict, transform_col) + transform_col_dict = typing.cast(dict, transform_col) # pass the columns that are not transformed if "transformSql" not in transform_col_dict: continue diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index d638e026e4..f8244fb0d8 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -21,7 +21,7 @@ import re import types import typing -from typing import cast, Iterable, List, Optional, Set, Tuple, Union +from typing import Iterable, List, Optional, Set, Tuple, Union from bigframes_vendored import constants import bigframes_vendored.sklearn.compose._column_transformer @@ -218,7 +218,7 @@ def camel_to_snake(name): output_names = [] for transform_col in bq_model._properties["transformColumns"]: - transform_col_dict = cast(dict, transform_col) + transform_col_dict = typing.cast(dict, transform_col) # pass the columns that are not transformed if "transformSql" not in transform_col_dict: continue @@ -282,7 +282,7 @@ def _merge( return self # SQLScalarColumnTransformer only work inside ColumnTransformer feature_columns_sorted = sorted( [ - cast(str, feature_column.name) + typing.cast(str, feature_column.name) for feature_column in bq_model.feature_columns ] ) diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py index 4dbc1a5fa3..620843fb6e 100644 --- a/bigframes/ml/core.py +++ b/bigframes/ml/core.py @@ -18,7 +18,8 @@ import dataclasses import datetime -from typing import Callable, cast, Iterable, Mapping, Optional, Union +import typing +from typing import Callable, Iterable, Mapping, Optional, Union import uuid from google.cloud import bigquery @@ -376,7 +377,7 @@ def copy(self, new_model_name: str, replace: bool = False) -> BqmlModel: def register(self, vertex_ai_model_id: Optional[str] = None) -> BqmlModel: if vertex_ai_model_id is None: # vertex id needs to start with letters. https://cloud.google.com/vertex-ai/docs/general/resource-naming - vertex_ai_model_id = "bigframes_" + cast(str, self._model.model_id) + vertex_ai_model_id = "bigframes_" + typing.cast(str, self._model.model_id) # truncate as Vertex ID only accepts 63 characters, easily exceeding the limit for temp models. # The possibility of conflicts should be low. diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index 295649ed7f..56b5d6735c 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -16,7 +16,8 @@ from __future__ import annotations -from typing import cast, Mapping, Optional +import typing +from typing import Mapping, Optional from google.cloud import bigquery @@ -78,7 +79,7 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = cast(core.BqmlModel, self._bqml_model) + self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) (X,) = utils.batch_convert_to_dataframe(X) @@ -99,7 +100,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> TensorFlowModel: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = cast(core.BqmlModel, self._bqml_model) + self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) new_model = self._bqml_model.copy(model_name, replace) return new_model.session.read_gbq_model(model_name) @@ -157,7 +158,7 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = cast(core.BqmlModel, self._bqml_model) + self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) @@ -178,7 +179,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> ONNXModel: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = cast(core.BqmlModel, self._bqml_model) + self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) new_model = self._bqml_model.copy(model_name, replace) return new_model.session.read_gbq_model(model_name) @@ -276,7 +277,7 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = cast(core.BqmlModel, self._bqml_model) + self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) (X,) = utils.batch_convert_to_dataframe(X, session=self._bqml_model.session) @@ -297,7 +298,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBoostModel: if self.model_path is None: raise ValueError("Model GCS path must be provided.") self._bqml_model = self._create_bqml_model() - self._bqml_model = cast(core.BqmlModel, self._bqml_model) + self._bqml_model = typing.cast(core.BqmlModel, self._bqml_model) new_model = self._bqml_model.copy(model_name, replace) return new_model.session.read_gbq_model(model_name) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index f4e60f3f9d..585599c9b6 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -16,7 +16,8 @@ from __future__ import annotations -from typing import cast, Iterable, Literal, Mapping, Optional, Union +import typing +from typing import Iterable, Literal, Mapping, Optional, Union import warnings import bigframes_vendored.constants as constants @@ -252,7 +253,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = cast(blocks.Label, X.columns[0]) + col_label = typing.cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "content"}) options: dict = {} @@ -391,7 +392,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = cast(blocks.Label, X.columns[0]) + col_label = typing.cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "content"}) # TODO(garrettwu): remove transform to ObjRefRuntime when BQML supports ObjRef as input @@ -604,7 +605,10 @@ def fit( options["prompt_col"] = X.columns.tolist()[0] self._bqml_model = self._bqml_model_factory.create_llm_remote_model( - X, y, options=options, connection_name=cast(str, self.connection_name) + X, + y, + options=options, + connection_name=typing.cast(str, self.connection_name), ) return self @@ -735,7 +739,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = cast(blocks.Label, X.columns[0]) + col_label = typing.cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "prompt"}) options: dict = { @@ -820,8 +824,8 @@ def score( ) # BQML identified the column by name - X_col_label = cast(blocks.Label, X.columns[0]) - y_col_label = cast(blocks.Label, y.columns[0]) + X_col_label = typing.cast(blocks.Label, X.columns[0]) + y_col_label = typing.cast(blocks.Label, y.columns[0]) X = X.rename(columns={X_col_label: "input_text"}) y = y.rename(columns={y_col_label: "output_text"}) @@ -1033,7 +1037,7 @@ def predict( if len(X.columns) == 1: # BQML identified the column by name - col_label = cast(blocks.Label, X.columns[0]) + col_label = typing.cast(blocks.Label, X.columns[0]) X = X.rename(columns={col_label: "prompt"}) options = { diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py index 5adfb03b7f..3d23fbf568 100644 --- a/bigframes/ml/model_selection.py +++ b/bigframes/ml/model_selection.py @@ -20,7 +20,8 @@ import inspect from itertools import chain import time -from typing import cast, Generator, List, Optional, Union +import typing +from typing import Generator, List, Optional, Union import bigframes_vendored.sklearn.model_selection._split as vendored_model_selection_split import bigframes_vendored.sklearn.model_selection._validation as vendored_model_selection_validation @@ -99,10 +100,10 @@ def _stratify_split(df: bpd.DataFrame, stratify: bpd.Series) -> List[bpd.DataFra train_dfs.append(train) test_dfs.append(test) - train_df = cast( + train_df = typing.cast( bpd.DataFrame, bpd.concat(train_dfs).drop(columns="bigframes_stratify_col") ) - test_df = cast( + test_df = typing.cast( bpd.DataFrame, bpd.concat(test_dfs).drop(columns="bigframes_stratify_col") ) return [train_df, test_df] diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 8bf89b0838..22a3e7e222 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -18,7 +18,7 @@ from __future__ import annotations import typing -from typing import cast, Iterable, List, Literal, Optional, Union +from typing import Iterable, List, Literal, Optional, Union import bigframes_vendored.sklearn.preprocessing._data import bigframes_vendored.sklearn.preprocessing._discretization @@ -470,7 +470,7 @@ def _parse_from_sql(cls, sql: str) -> tuple[OneHotEncoder, str]: s = sql[sql.find("(") + 1 : sql.find(")")] col_label, drop_str, top_k, frequency_threshold = s.split(", ") drop = ( - cast(Literal["most_frequent"], "most_frequent") + typing.cast(Literal["most_frequent"], "most_frequent") if drop_str.lower() == "'most_frequent'" else None ) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 3bc12617fc..e9491610ac 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "d10bfca4", "metadata": {}, "outputs": [], @@ -91,9 +91,7 @@ "outputs": [ { "data": { - "text/html": [ - "Starting." - ], + "text/html": [], "text/plain": [ "" ] @@ -119,17 +117,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Annie 482\n", - " AL F 1910 Myrtle 104\n", - " AR F 1910 Lillian 56\n", - " CT F 1910 Anne 38\n", - " CT F 1910 Frances 45\n", - " FL F 1910 Margaret 53\n", - " GA F 1910 Mae 73\n", - " GA F 1910 Beatrice 96\n", - " GA F 1910 Lola 47\n", - " IA F 1910 Viola 49\n", + "state gender year name number\n", + " AL F 1910 Lillian 99\n", + " AL F 1910 Ruby 204\n", + " AL F 1910 Helen 76\n", + " AL F 1910 Eunice 41\n", + " AR F 1910 Dora 42\n", + " CA F 1910 Edna 62\n", + " CA F 1910 Helen 239\n", + " CO F 1910 Alice 46\n", + " FL F 1910 Willie 71\n", + " FL F 1910 Thelma 65\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -147,10 +145,30 @@ "id": "220340b0", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c74c3719ba43489890185b5c9880acfc", + "model_id": "6fb22be7f21f4d1dacd76dc62a1a7818", "version_major": 2, "version_minor": 1 }, @@ -186,80 +204,80 @@ " AL\n", " F\n", " 1910\n", - " Hazel\n", - " 51\n", + " Lillian\n", + " 99\n", " \n", " \n", " 1\n", " AL\n", " F\n", " 1910\n", - " Lucy\n", - " 76\n", + " Ruby\n", + " 204\n", " \n", " \n", " 2\n", - " AR\n", + " AL\n", " F\n", " 1910\n", - " Nellie\n", - " 39\n", + " Helen\n", + " 76\n", " \n", " \n", " 3\n", - " AR\n", + " AL\n", " F\n", " 1910\n", - " Lena\n", - " 40\n", + " Eunice\n", + " 41\n", " \n", " \n", " 4\n", - " CO\n", + " AR\n", " F\n", " 1910\n", - " Thelma\n", - " 36\n", + " Dora\n", + " 42\n", " \n", " \n", " 5\n", - " CO\n", + " CA\n", " F\n", " 1910\n", - " Ruth\n", - " 68\n", + " Edna\n", + " 62\n", " \n", " \n", " 6\n", - " CT\n", + " CA\n", " F\n", " 1910\n", - " Elizabeth\n", - " 86\n", + " Helen\n", + " 239\n", " \n", " \n", " 7\n", - " DC\n", + " CO\n", " F\n", " 1910\n", - " Mary\n", - " 80\n", + " Alice\n", + " 46\n", " \n", " \n", " 8\n", " FL\n", " F\n", " 1910\n", - " Annie\n", - " 101\n", + " Willie\n", + " 71\n", " \n", " \n", " 9\n", " FL\n", " F\n", " 1910\n", - " Alma\n", - " 39\n", + " Thelma\n", + " 65\n", " \n", " \n", "\n", @@ -267,17 +285,17 @@ "[5552452 rows x 5 columns in total]" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Hazel 51\n", - " AL F 1910 Lucy 76\n", - " AR F 1910 Nellie 39\n", - " AR F 1910 Lena 40\n", - " CO F 1910 Thelma 36\n", - " CO F 1910 Ruth 68\n", - " CT F 1910 Elizabeth 86\n", - " DC F 1910 Mary 80\n", - " FL F 1910 Annie 101\n", - " FL F 1910 Alma 39\n", + "state gender year name number\n", + " AL F 1910 Lillian 99\n", + " AL F 1910 Ruby 204\n", + " AL F 1910 Helen 76\n", + " AL F 1910 Eunice 41\n", + " AR F 1910 Dora 42\n", + " CA F 1910 Edna 62\n", + " CA F 1910 Helen 239\n", + " CO F 1910 Alice 46\n", + " FL F 1910 Willie 71\n", + " FL F 1910 Thelma 65\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -311,7 +329,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 171.4 MB in 35 seconds of slot time. [Job bigframes-dev:US.e15f1b34-e414-42d2-857b-926ea25947c4 details]\n", + " Query processed 171.4 MB in 41 seconds of slot time. [Job bigframes-dev:US.492b5260-9f44-495c-be09-2ae1324a986c details]\n", " " ], "text/plain": [ @@ -337,9 +355,7 @@ }, { "data": { - "text/html": [ - "Starting." - ], + "text/html": [], "text/plain": [ "" ] @@ -388,10 +404,38 @@ "id": "da23e0f3", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 88.8 MB in 2 seconds of slot time. [Job bigframes-dev:US.job_gsx0h2jHoOSYwqGKUS3lAYLf_qi3 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 88.8 MB in 3 seconds of slot time. [Job bigframes-dev:US.job_1VivAJ2InPdg5RXjWfvAJ1B0oxO3 details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2ad9004bda464950ab6eda63b1b86a3a", + "model_id": "7d82208e7e5e40dd9dbf64c4c561cab3", "version_major": 2, "version_minor": 1 }, @@ -489,6 +533,34 @@ "id": "6920d49b", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 10 seconds of slot time. [Job bigframes-dev:US.job_cmNyG5sJ1IDCyFINx7teExQOZ6UQ details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in 8 seconds of slot time. [Job bigframes-dev:US.job_aQvP3Sn04Ss4flSLaLhm0sKzFvrd details]\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", @@ -499,12 +571,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c1b84125429c4fbc90a22e1adbeea901", + "model_id": "52d11291ba1d42e6b544acbd86eef6cf", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 8, @@ -576,6 +648,34 @@ "id": "a9d5d13a", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 215.9 MB in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", @@ -586,12 +686,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "23fc730e004b4eec807d2829bffa3ce0", + "model_id": "32c61c84740d45a0ac37202a76c7c14e", "version_major": 2, "version_minor": 1 }, "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -652,7 +752,33 @@ "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, + { + "data": { + "text/html": [], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", @@ -666,7 +792,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a3bf6021c6fd44299d317d3b44213b50", + "model_id": "9d60a47296214553bb10c434b5ee8330", "version_major": 2, "version_minor": 1 }, @@ -713,6 +839,24 @@ " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", + " 29.08.018\n", + " E04H 6/12\n", + " <NA>\n", + " 18157874.1\n", + " 21.02.2018\n", + " 22.02.2017\n", + " Liedtke & Partner Patentanw√§lte\n", + " SHB Hebezeugbau GmbH\n", + " VOLGER, Alexander\n", + " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", + " EP 3 366 869 A1\n", + " \n", + " \n", + " 1\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", " 03.10.2018\n", " H05B 6/12\n", " <NA>\n", @@ -726,7 +870,7 @@ " EP 3 383 141 A2\n", " \n", " \n", - " 1\n", + " 2\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -744,7 +888,7 @@ " EP 3 382 744 A1\n", " \n", " \n", - " 2\n", + " 3\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -762,7 +906,7 @@ " EP 3 382 553 A1\n", " \n", " \n", - " 3\n", + " 4\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -779,24 +923,6 @@ " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", " EP 3 381 276 A1\n", " \n", - " \n", - " 4\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 29.08.018\n", - " E04H 6/12\n", - " <NA>\n", - " 18157874.1\n", - " 21.02.2018\n", - " 22.02.2017\n", - " Liedtke & Partner Patentanw√§lte\n", - " SHB Hebezeugbau GmbH\n", - " VOLGER, Alexander\n", - " STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER\n", - " EP 3 366 869 A1\n", - " \n", " \n", "\n", "

5 rows × 15 columns

\n", @@ -818,32 +944,32 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 03.10.2018 H05B 6/12 18165514.3 \n", - "1 03.10.2018 H01L 21/20 18166536.5 \n", - "2 03.10.2018 G06F 11/30 18157347.8 \n", - "3 03.10.2018 A01K 31/00 18171005.4 \n", - "4 29.08.018 E04H 6/12 18157874.1 \n", + "0 29.08.018 E04H 6/12 18157874.1 \n", + "1 03.10.2018 H05B 6/12 18165514.3 \n", + "2 03.10.2018 H01L 21/20 18166536.5 \n", + "3 03.10.2018 G06F 11/30 18157347.8 \n", + "4 03.10.2018 A01K 31/00 18171005.4 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 03.04.2018 30.03.2017 \n", - "1 16.02.2016 Scheider, Sascha et al \n", - "2 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "3 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", - "4 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", + "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", + "1 03.04.2018 30.03.2017 \n", + "2 16.02.2016 Scheider, Sascha et al \n", + "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "4 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 BSH Hausgeräte GmbH Acero Acero, Jesus \n", - "1 EV Group E. Thallner GmbH Kurz, Florian \n", - "2 FUJITSU LIMITED Kukihara, Kensuke \n", - "3 Linco Food Systems A/S Thrane, Uffe \n", - "4 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "1 BSH Hausgeräte GmbH Acero Acero, Jesus \n", + "2 EV Group E. Thallner GmbH Kurz, Florian \n", + "3 FUJITSU LIMITED Kukihara, Kensuke \n", + "4 Linco Food Systems A/S Thrane, Uffe \n", "\n", " title_line_1 number \n", - "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", - "1 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "2 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "3 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "4 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", + "0 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", + "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", + "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "4 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", "\n", "[5 rows x 15 columns]" ] From 4ce97d5267be22a3bcee7fbdc4ee8ab2890b2363 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 6 Feb 2026 21:01:23 +0000 Subject: [PATCH 7/7] chore: move the place to take fflag --- bigframes/display/anywidget.py | 3 ++- bigframes/display/html.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py index 9bd20fce43..eca435e960 100644 --- a/bigframes/display/anywidget.py +++ b/bigframes/display/anywidget.py @@ -275,7 +275,8 @@ def _reset_batch_cache(self) -> None: def _reset_batches_for_new_page_size(self) -> None: """Reset the batch iterator when page size changes.""" - self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size) + with bigframes.option_context("display.progress_bar", None): + self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size) self._reset_batch_cache() diff --git a/bigframes/display/html.py b/bigframes/display/html.py index 2bf847a259..6102d1512c 100644 --- a/bigframes/display/html.py +++ b/bigframes/display/html.py @@ -363,8 +363,7 @@ def repr_mimebundle( if opts.repr_mode == "anywidget": try: - with bigframes.option_context("display.progress_bar", None): - return get_anywidget_bundle(obj, include=include, exclude=exclude) + return get_anywidget_bundle(obj, include=include, exclude=exclude) except ImportError: # Anywidget is an optional dependency, so warn rather than fail. # TODO(shuowei): When Anywidget becomes the default for all repr modes,