diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py index 17af5dd5cf..bc2ab8dd20 100644 --- a/bigframes/bigquery/_operations/ai.py +++ b/bigframes/bigquery/_operations/ai.py @@ -26,6 +26,7 @@ from bigframes import clients, dataframe, dtypes from bigframes import pandas as bpd from bigframes import series, session +from bigframes.bigquery._operations import utils as bq_utils from bigframes.core import convert from bigframes.core.logging import log_adapter import bigframes.core.sql.literals @@ -391,7 +392,7 @@ def generate_double( @log_adapter.method_logger(custom_base_name="bigquery_ai") def generate_embedding( - model_name: str, + model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series], *, output_dimensionality: Optional[int] = None, @@ -415,9 +416,8 @@ def generate_embedding( ... ) # doctest: +SKIP Args: - model_name (str): - The name of a remote model from Vertex AI, such as the - multimodalembedding@001 model. + model (bigframes.ml.base.BaseEstimator or str): + The model to use for text embedding. data (bigframes.pandas.DataFrame or bigframes.pandas.Series): The data to generate embeddings for. If a Series is provided, it is treated as the 'content' column. If a DataFrame is provided, it @@ -454,20 +454,9 @@ def generate_embedding( `_ for details. """ - if isinstance(data, (pd.DataFrame, pd.Series)): - data = bpd.read_pandas(data) - - if isinstance(data, series.Series): - data = data.copy() - data.name = "content" - data_df = data.to_frame() - elif isinstance(data, dataframe.DataFrame): - data_df = data - else: - raise ValueError(f"Unsupported data type: {type(data)}") - - # We need to get the SQL for the input data to pass as a subquery to the TVF - source_sql = data_df.sql + data = _to_dataframe(data, series_rename="content") + model_name, session = bq_utils.get_model_name_and_session(model, data) + table_sql = bq_utils.to_sql(data) struct_fields: Dict[str, bigframes.core.sql.literals.STRUCT_VALUES] = {} if output_dimensionality is not None: @@ -488,12 +477,128 @@ def generate_embedding( SELECT * FROM AI.GENERATE_EMBEDDING( MODEL `{model_name}`, - ({source_sql}), - {bigframes.core.sql.literals.struct_literal(struct_fields)}) + ({table_sql}), + {bigframes.core.sql.literals.struct_literal(struct_fields)} ) """ - return data_df._session.read_gbq(query) + if session is None: + return bpd.read_gbq_query(query) + else: + return session.read_gbq_query(query) + + +@log_adapter.method_logger(custom_base_name="bigquery_ai") +def generate_text( + model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], + data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series], + *, + temperature: Optional[float] = None, + max_output_tokens: Optional[int] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + stop_sequences: Optional[List[str]] = None, + ground_with_google_search: Optional[bool] = None, + request_type: Optional[str] = None, +) -> dataframe.DataFrame: + """ + Generates text using a BigQuery ML model. + + See the `BigQuery ML GENERATE_TEXT function syntax + `_ + for additional reference. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> df = bpd.DataFrame({"prompt": ["write a poem about apples"]}) + >>> bbq.ai.generate_text( + ... "project.dataset.model_name", + ... df + ... ) # doctest: +SKIP + + Args: + model (bigframes.ml.base.BaseEstimator or str): + The model to use for text generation. + data (bigframes.pandas.DataFrame or bigframes.pandas.Series): + The data to generate embeddings for. If a Series is provided, it is + treated as the 'content' column. If a DataFrame is provided, it + must contain a 'content' column, or you must rename the column you + wish to embed to 'content'. + temperature (float, optional): + A FLOAT64 value that is used for sampling promiscuity. The value + must be in the range ``[0.0, 1.0]``. A lower temperature works well + for prompts that expect a more deterministic and less open-ended + or creative response, while a higher temperature can lead to more + diverse or creative results. A temperature of ``0`` is + deterministic, meaning that the highest probability response is + always selected. + max_output_tokens (int, optional): + An INT64 value that sets the maximum number of tokens in the + generated text. + top_k (int, optional): + An INT64 value that changes how the model selects tokens for + output. A ``top_k`` of ``1`` means the next selected token is the + most probable among all tokens in the model's vocabulary. A + ``top_k`` of ``3`` means that the next token is selected from + among the three most probable tokens by using temperature. The + default value is ``40``. + top_p (float, optional): + A FLOAT64 value that changes how the model selects tokens for + output. Tokens are selected from most probable to least probable + until the sum of their probabilities equals the ``top_p`` value. + For example, if tokens A, B, and C have a probability of 0.3, 0.2, + and 0.1 and the ``top_p`` value is ``0.5``, then the model will + select either A or B as the next token by using temperature. The + default value is ``0.95``. + stop_sequences (List[str], optional): + An ARRAY value that contains the stop sequences for the model. + ground_with_google_search (bool, optional): + A BOOL value that determines whether to ground the model with Google Search. + request_type (str, optional): + A STRING value that contains the request type for the model. + + Returns: + bigframes.pandas.DataFrame: + The generated text. + """ + data = _to_dataframe(data, series_rename="prompt") + model_name, session = bq_utils.get_model_name_and_session(model, data) + table_sql = bq_utils.to_sql(data) + + struct_fields: Dict[ + str, + Union[str, int, float, bool, Mapping[str, str], List[str], Mapping[str, Any]], + ] = {} + if temperature is not None: + struct_fields["TEMPERATURE"] = temperature + if max_output_tokens is not None: + struct_fields["MAX_OUTPUT_TOKENS"] = max_output_tokens + if top_k is not None: + struct_fields["TOP_K"] = top_k + if top_p is not None: + struct_fields["TOP_P"] = top_p + if stop_sequences is not None: + struct_fields["STEP_SEQUENCES"] = stop_sequences + if ground_with_google_search is not None: + struct_fields["GROUND_WITH_GOOGLE_SEARCH"] = ground_with_google_search + if request_type is not None: + struct_fields["REQUEST_TYPE"] = request_type + + query = f""" + SELECT * + FROM AI.GENERATE_TEXT( + MODEL `{model_name}`, + ({table_sql}), + {bigframes.core.sql.literals.struct_literal(struct_fields)} + ) + """ + + if session is None: + return bpd.read_gbq_query(query) + else: + return session.read_gbq_query(query) @log_adapter.method_logger(custom_base_name="bigquery_ai") @@ -811,3 +916,20 @@ def _resolve_connection_id(series: series.Series, connection_id: str | None): series._session._project, series._session._location, ) + + +def _to_dataframe( + data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series], + series_rename: str, +) -> dataframe.DataFrame: + if isinstance(data, (pd.DataFrame, pd.Series)): + data = bpd.read_pandas(data) + + if isinstance(data, series.Series): + data = data.copy() + data.name = series_rename + return data.to_frame() + elif isinstance(data, dataframe.DataFrame): + return data + + raise ValueError(f"Unsupported data type: {type(data)}") diff --git a/bigframes/bigquery/_operations/ml.py b/bigframes/bigquery/_operations/ml.py index cc5a961af7..d5b1786b25 100644 --- a/bigframes/bigquery/_operations/ml.py +++ b/bigframes/bigquery/_operations/ml.py @@ -14,12 +14,13 @@ from __future__ import annotations -from typing import cast, List, Mapping, Optional, Union +from typing import List, Mapping, Optional, Union import bigframes_vendored.constants import google.cloud.bigquery import pandas as pd +from bigframes.bigquery._operations import utils import bigframes.core.logging.log_adapter as log_adapter import bigframes.core.sql.ml import bigframes.dataframe as dataframe @@ -27,53 +28,6 @@ import bigframes.session -# Helper to convert DataFrame to SQL string -def _to_sql(df_or_sql: Union[pd.DataFrame, dataframe.DataFrame, str]) -> str: - import bigframes.pandas as bpd - - if isinstance(df_or_sql, str): - return df_or_sql - - if isinstance(df_or_sql, pd.DataFrame): - bf_df = bpd.read_pandas(df_or_sql) - else: - bf_df = cast(dataframe.DataFrame, df_or_sql) - - # Cache dataframes to make sure base table is not a snapshot. - # Cached dataframe creates a full copy, never uses snapshot. - # This is a workaround for internal issue b/310266666. - bf_df.cache() - sql, _, _ = bf_df._to_sql_query(include_index=False) - return sql - - -def _get_model_name_and_session( - model: Union[bigframes.ml.base.BaseEstimator, str, pd.Series], - # Other dataframe arguments to extract session from - *dataframes: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]], -) -> tuple[str, Optional[bigframes.session.Session]]: - if isinstance(model, pd.Series): - try: - model_ref = model["modelReference"] - model_name = f"{model_ref['projectId']}.{model_ref['datasetId']}.{model_ref['modelId']}" # type: ignore - except KeyError: - raise ValueError("modelReference must be present in the pandas Series.") - elif isinstance(model, str): - model_name = model - else: - if model._bqml_model is None: - raise ValueError("Model must be fitted to be used in ML operations.") - return model._bqml_model.model_name, model._bqml_model.session - - session = None - for df in dataframes: - if isinstance(df, dataframe.DataFrame): - session = df._session - break - - return model_name, session - - def _get_model_metadata( *, bqclient: google.cloud.bigquery.Client, @@ -143,8 +97,12 @@ def create_model( """ import bigframes.pandas as bpd - training_data_sql = _to_sql(training_data) if training_data is not None else None - custom_holiday_sql = _to_sql(custom_holiday) if custom_holiday is not None else None + training_data_sql = ( + utils.to_sql(training_data) if training_data is not None else None + ) + custom_holiday_sql = ( + utils.to_sql(custom_holiday) if custom_holiday is not None else None + ) # Determine session from DataFrames if not provided if session is None: @@ -227,8 +185,8 @@ def evaluate( """ import bigframes.pandas as bpd - model_name, session = _get_model_name_and_session(model, input_) - table_sql = _to_sql(input_) if input_ is not None else None + model_name, session = utils.get_model_name_and_session(model, input_) + table_sql = utils.to_sql(input_) if input_ is not None else None sql = bigframes.core.sql.ml.evaluate( model_name=model_name, @@ -281,8 +239,8 @@ def predict( """ import bigframes.pandas as bpd - model_name, session = _get_model_name_and_session(model, input_) - table_sql = _to_sql(input_) + model_name, session = utils.get_model_name_and_session(model, input_) + table_sql = utils.to_sql(input_) sql = bigframes.core.sql.ml.predict( model_name=model_name, @@ -340,8 +298,8 @@ def explain_predict( """ import bigframes.pandas as bpd - model_name, session = _get_model_name_and_session(model, input_) - table_sql = _to_sql(input_) + model_name, session = utils.get_model_name_and_session(model, input_) + table_sql = utils.to_sql(input_) sql = bigframes.core.sql.ml.explain_predict( model_name=model_name, @@ -383,7 +341,7 @@ def global_explain( """ import bigframes.pandas as bpd - model_name, session = _get_model_name_and_session(model) + model_name, session = utils.get_model_name_and_session(model) sql = bigframes.core.sql.ml.global_explain( model_name=model_name, class_level_explain=class_level_explain, @@ -419,8 +377,8 @@ def transform( """ import bigframes.pandas as bpd - model_name, session = _get_model_name_and_session(model, input_) - table_sql = _to_sql(input_) + model_name, session = utils.get_model_name_and_session(model, input_) + table_sql = utils.to_sql(input_) sql = bigframes.core.sql.ml.transform( model_name=model_name, @@ -500,8 +458,8 @@ def generate_text( """ import bigframes.pandas as bpd - model_name, session = _get_model_name_and_session(model, input_) - table_sql = _to_sql(input_) + model_name, session = utils.get_model_name_and_session(model, input_) + table_sql = utils.to_sql(input_) sql = bigframes.core.sql.ml.generate_text( model_name=model_name, @@ -565,8 +523,8 @@ def generate_embedding( """ import bigframes.pandas as bpd - model_name, session = _get_model_name_and_session(model, input_) - table_sql = _to_sql(input_) + model_name, session = utils.get_model_name_and_session(model, input_) + table_sql = utils.to_sql(input_) sql = bigframes.core.sql.ml.generate_embedding( model_name=model_name, diff --git a/bigframes/bigquery/_operations/utils.py b/bigframes/bigquery/_operations/utils.py new file mode 100644 index 0000000000..f94616786e --- /dev/null +++ b/bigframes/bigquery/_operations/utils.py @@ -0,0 +1,70 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import cast, Optional, Union + +import pandas as pd + +import bigframes +from bigframes import dataframe +from bigframes.ml import base as ml_base + + +def get_model_name_and_session( + model: Union[ml_base.BaseEstimator, str, pd.Series], + # Other dataframe arguments to extract session from + *dataframes: Optional[Union[pd.DataFrame, dataframe.DataFrame, str]], +) -> tuple[str, Optional[bigframes.session.Session]]: + if isinstance(model, pd.Series): + try: + model_ref = model["modelReference"] + model_name = f"{model_ref['projectId']}.{model_ref['datasetId']}.{model_ref['modelId']}" # type: ignore + except KeyError: + raise ValueError("modelReference must be present in the pandas Series.") + elif isinstance(model, str): + model_name = model + else: + if model._bqml_model is None: + raise ValueError("Model must be fitted to be used in ML operations.") + return model._bqml_model.model_name, model._bqml_model.session + + session = None + for df in dataframes: + if isinstance(df, dataframe.DataFrame): + session = df._session + break + + return model_name, session + + +def to_sql(df_or_sql: Union[pd.DataFrame, dataframe.DataFrame, str]) -> str: + """ + Helper to convert DataFrame to SQL string + """ + import bigframes.pandas as bpd + + if isinstance(df_or_sql, str): + return df_or_sql + + if isinstance(df_or_sql, pd.DataFrame): + bf_df = bpd.read_pandas(df_or_sql) + else: + bf_df = cast(dataframe.DataFrame, df_or_sql) + + # Cache dataframes to make sure base table is not a snapshot. + # Cached dataframe creates a full copy, never uses snapshot. + # This is a workaround for internal issue b/310266666. + bf_df.cache() + sql, _, _ = bf_df._to_sql_query(include_index=False) + return sql diff --git a/bigframes/bigquery/ai.py b/bigframes/bigquery/ai.py index b0d9b62f9b..053ee7352a 100644 --- a/bigframes/bigquery/ai.py +++ b/bigframes/bigquery/ai.py @@ -24,6 +24,7 @@ generate_double, generate_embedding, generate_int, + generate_text, if_, score, ) @@ -36,6 +37,7 @@ "generate_double", "generate_embedding", "generate_int", + "generate_text", "if_", "score", ] diff --git a/tests/system/large/bigquery/test_ai.py b/tests/system/large/bigquery/test_ai.py new file mode 100644 index 0000000000..e318a8a720 --- /dev/null +++ b/tests/system/large/bigquery/test_ai.py @@ -0,0 +1,96 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from bigframes.bigquery import ai, ml +import bigframes.pandas as bpd + + +@pytest.fixture(scope="session") +def embedding_model(bq_connection, dataset_id): + model_name = f"{dataset_id}.embedding_model" + return ml.create_model( + model_name=model_name, + options={"endpoint": "gemini-embedding-001"}, + connection_name=bq_connection, + ) + + +@pytest.fixture(scope="session") +def text_model(bq_connection, dataset_id): + model_name = f"{dataset_id}.text_model" + return ml.create_model( + model_name=model_name, + options={"endpoint": "gemini-2.5-flash"}, + connection_name=bq_connection, + ) + + +def test_generate_embedding(embedding_model): + df = bpd.DataFrame( + { + "content": [ + "What is BigQuery?", + "What is BQML?", + ] + } + ) + + result = ai.generate_embedding(embedding_model, df) + + assert len(result) == 2 + assert "embedding" in result.columns + assert "statistics" in result.columns + assert "status" in result.columns + + +def test_generate_embedding_with_options(embedding_model): + df = bpd.DataFrame( + { + "content": [ + "What is BigQuery?", + "What is BQML?", + ] + } + ) + + result = ai.generate_embedding( + embedding_model, df, task_type="RETRIEVAL_DOCUMENT", output_dimensionality=256 + ) + + assert len(result) == 2 + embedding = result["embedding"].to_pandas() + assert len(embedding[0]) == 256 + + +def test_generate_text(text_model): + df = bpd.DataFrame({"prompt": ["Dog", "Cat"]}) + + result = ai.generate_text(text_model, df) + + assert len(result) == 2 + assert "result" in result.columns + assert "statistics" in result.columns + assert "full_response" in result.columns + assert "status" in result.columns + + +def test_generate_text_with_options(text_model): + df = bpd.DataFrame({"prompt": ["Dog", "Cat"]}) + + result = ai.generate_text(text_model, df, max_output_tokens=1) + + # It basically asserts that the results are still returned. + assert len(result) == 2 diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py index 0f9df6cc26..0be32b9e8a 100644 --- a/tests/unit/bigquery/test_ai.py +++ b/tests/unit/bigquery/test_ai.py @@ -33,17 +33,41 @@ def mock_dataframe(mock_session): df = mock.create_autospec(spec=bigframes.dataframe.DataFrame) df._session = mock_session df.sql = "SELECT * FROM my_table" + df._to_sql_query.return_value = ("SELECT * FROM my_table", None, None) return df @pytest.fixture -def mock_series(mock_session): +def mock_embedding_series(mock_session): series = mock.create_autospec(spec=bigframes.series.Series) series._session = mock_session # Mock to_frame to return a mock dataframe df = mock.create_autospec(spec=bigframes.dataframe.DataFrame) df._session = mock_session df.sql = "SELECT my_col AS content FROM my_table" + df._to_sql_query.return_value = ( + "SELECT my_col AS content FROM my_table", + None, + None, + ) + series.copy.return_value = series + series.to_frame.return_value = df + return series + + +@pytest.fixture +def mock_text_series(mock_session): + series = mock.create_autospec(spec=bigframes.series.Series) + series._session = mock_session + # Mock to_frame to return a mock dataframe + df = mock.create_autospec(spec=bigframes.dataframe.DataFrame) + df._session = mock_session + df.sql = "SELECT my_col AS prompt FROM my_table" + df._to_sql_query.return_value = ( + "SELECT my_col AS prompt FROM my_table", + None, + None, + ) series.copy.return_value = series series.to_frame.return_value = df return series @@ -58,8 +82,8 @@ def test_generate_embedding_with_dataframe(mock_dataframe, mock_session): output_dimensionality=256, ) - mock_session.read_gbq.assert_called_once() - query = mock_session.read_gbq.call_args[0][0] + mock_session.read_gbq_query.assert_called_once() + query = mock_session.read_gbq_query.call_args[0][0] # Normalize whitespace for comparison query = " ".join(query.split()) @@ -75,15 +99,19 @@ def test_generate_embedding_with_dataframe(mock_dataframe, mock_session): assert expected_part_4 in query -def test_generate_embedding_with_series(mock_series, mock_session): +def test_generate_embedding_with_series(mock_embedding_series, mock_session): model_name = "project.dataset.model" bbq.ai.generate_embedding( - model_name, mock_series, start_second=0.0, end_second=10.0, interval_seconds=5.0 + model_name, + mock_embedding_series, + start_second=0.0, + end_second=10.0, + interval_seconds=5.0, ) - mock_session.read_gbq.assert_called_once() - query = mock_session.read_gbq.call_args[0][0] + mock_session.read_gbq_query.assert_called_once() + query = mock_session.read_gbq_query.call_args[0][0] query = " ".join(query.split()) assert f"MODEL `{model_name}`" in query @@ -102,8 +130,8 @@ def test_generate_embedding_defaults(mock_dataframe, mock_session): mock_dataframe, ) - mock_session.read_gbq.assert_called_once() - query = mock_session.read_gbq.call_args[0][0] + mock_session.read_gbq_query.assert_called_once() + query = mock_session.read_gbq_query.call_args[0][0] query = " ".join(query.split()) assert f"MODEL `{model_name}`" in query @@ -131,4 +159,86 @@ def test_generate_embedding_with_pandas_dataframe( # Check that read_pandas was called with something (the pandas df) assert read_pandas_mock.call_args[0][0] is pandas_df - mock_session.read_gbq.assert_called_once() + mock_session.read_gbq_query.assert_called_once() + + +def test_generate_text_with_dataframe(mock_dataframe, mock_session): + model_name = "project.dataset.model" + + bbq.ai.generate_text( + model_name, + mock_dataframe, + max_output_tokens=256, + ) + + mock_session.read_gbq_query.assert_called_once() + query = mock_session.read_gbq_query.call_args[0][0] + + # Normalize whitespace for comparison + query = " ".join(query.split()) + + expected_part_1 = "SELECT * FROM AI.GENERATE_TEXT(" + expected_part_2 = f"MODEL `{model_name}`," + expected_part_3 = "(SELECT * FROM my_table)," + expected_part_4 = "STRUCT(256 AS MAX_OUTPUT_TOKENS)" + + assert expected_part_1 in query + assert expected_part_2 in query + assert expected_part_3 in query + assert expected_part_4 in query + + +def test_generate_text_with_series(mock_text_series, mock_session): + model_name = "project.dataset.model" + + bbq.ai.generate_text( + model_name, + mock_text_series, + ) + + mock_session.read_gbq_query.assert_called_once() + query = mock_session.read_gbq_query.call_args[0][0] + query = " ".join(query.split()) + + assert f"MODEL `{model_name}`" in query + assert "(SELECT my_col AS prompt FROM my_table)" in query + + +def test_generate_text_defaults(mock_dataframe, mock_session): + model_name = "project.dataset.model" + + bbq.ai.generate_text( + model_name, + mock_dataframe, + ) + + mock_session.read_gbq_query.assert_called_once() + query = mock_session.read_gbq_query.call_args[0][0] + query = " ".join(query.split()) + + assert f"MODEL `{model_name}`" in query + assert "STRUCT()" in query + + +@mock.patch("bigframes.pandas.read_pandas") +def test_generate_text_with_pandas_dataframe( + read_pandas_mock, mock_dataframe, mock_session +): + # This tests that pandas input path works and calls read_pandas + model_name = "project.dataset.model" + + # Mock return value of read_pandas to be a BigFrames DataFrame + read_pandas_mock.return_value = mock_dataframe + + pandas_df = pd.DataFrame({"content": ["test"]}) + + bbq.ai.generate_text( + model_name, + pandas_df, + ) + + read_pandas_mock.assert_called_once() + # Check that read_pandas was called with something (the pandas df) + assert read_pandas_mock.call_args[0][0] is pandas_df + + mock_session.read_gbq_query.assert_called_once() diff --git a/tests/unit/bigquery/test_ml.py b/tests/unit/bigquery/test_ml.py index fd77469152..e5c957767b 100644 --- a/tests/unit/bigquery/test_ml.py +++ b/tests/unit/bigquery/test_ml.py @@ -40,31 +40,6 @@ def mock_session(): MODEL_NAME = "test-project.test-dataset.test-model" -def test_get_model_name_and_session_with_pandas_series_model_input(): - model_name, _ = ml_ops._get_model_name_and_session(MODEL_SERIES) - assert model_name == MODEL_NAME - - -def test_get_model_name_and_session_with_pandas_series_model_input_missing_model_reference(): - model_series = pd.Series({"some_other_key": "value"}) - with pytest.raises( - ValueError, match="modelReference must be present in the pandas Series" - ): - ml_ops._get_model_name_and_session(model_series) - - -@mock.patch("bigframes.pandas.read_pandas") -def test_to_sql_with_pandas_dataframe(read_pandas_mock): - df = pd.DataFrame({"col1": [1, 2, 3]}) - read_pandas_mock.return_value._to_sql_query.return_value = ( - "SELECT * FROM `pandas_df`", - [], - [], - ) - ml_ops._to_sql(df) - read_pandas_mock.assert_called_once() - - @mock.patch("bigframes.bigquery._operations.ml._get_model_metadata") @mock.patch("bigframes.pandas.read_pandas") def test_create_model_with_pandas_dataframe(