From 0d793f9efd4eb9ae06f052e88374d72d7a29d24a Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Thu, 18 Dec 2025 23:57:53 +0000
Subject: [PATCH 1/9] feat: Implement AI.GENERATE_EMBEDDING wrapper

This change implements the `bigframes.bigquery.ai.generate_embedding` function, which wraps the BigQuery `AI.GENERATE_EMBEDDING` TVF.

It supports:
- Generating embeddings from DataFrames and Series.
- Generating embeddings from pandas DataFrames and Series.
- Specifying model name and arguments like `output_dimensionality`, `start_second`, `end_second`, and `interval_seconds`.

The function is exposed in `bigframes.bigquery.ai`.

Unit tests have been added to verify the generated SQL and argument mapping.
---
 bigframes/bigquery/_operations/ai.py |  87 ++++++++++++++++-
 tests/unit/bigquery/test_ai.py       | 135 +++++++++++++++++++++++++++
 2 files changed, 221 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/bigquery/test_ai.py
diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index e8c28e61f5e..a2ae3044945 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -19,7 +19,7 @@
 from __future__ import annotations
 
 import json
-from typing import Any, Iterable, List, Literal, Mapping, Tuple, Union
+from typing import Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
 
 import pandas as pd
 
@@ -387,6 +387,91 @@ def generate_double(
     return series_list[0]._apply_nary_op(operator, series_list[1:])
 
 
+@log_adapter.method_logger(custom_base_name="bigquery_ai")
+def generate_embedding(
+    model_name: str,
+    data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series],
+    *,
+    output_dimensionality: Optional[int] = None,
+    start_second: Optional[float] = None,
+    end_second: Optional[float] = None,
+    interval_seconds: Optional[float] = None,
+) -> dataframe.DataFrame:
+    """
+    Creates embeddings that describe an entity—for example, a piece of text or an image.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> df = bpd.DataFrame({"content": ["apple", "bear", "pear"]})
+        >>> bbq.ai.generate_embedding(
+        ...     "project.dataset.model_name",
+        ...     df
+        ... ) # doctest: +SKIP
+
+    Args:
+        model_name (str):
+            The name of a remote model over a Vertex AI multimodalembedding@001 model.
+        data (DataFrame or Series):
+            The data to generate embeddings for. If a Series is provided, it is treated as the 'content' column.
+            If a DataFrame is provided, it must contain a 'content' column, or you must rename the column you wish to embed to 'content'.
+        output_dimensionality (int, optional):
+            The number of dimensions to use when generating embeddings. Valid values are 128, 256, 512, and 1408. The default value is 1408.
+        start_second (float, optional):
+            The second in the video at which to start the embedding. The default value is 0.
+        end_second (float, optional):
+            The second in the video at which to end the embedding. The default value is 120.
+        interval_seconds (float, optional):
+            The interval to use when creating embeddings. The default value is 16.
+
+    Returns:
+        bigframes.dataframe.DataFrame:
+            A new DataFrame with the generated embeddings. It contains the input table columns and the following columns:
+            * "embedding": an ARRAY<FLOAT64> value that contains the generated embedding vector.
+            * "status": a STRING value that contains the API response status for the corresponding row.
+            * "video_start_sec": for video content, an INT64 value that contains the starting second.
+            * "video_end_sec": for video content, an INT64 value that contains the ending second.
+    """
+    if isinstance(data, (pd.DataFrame, pd.Series)):
+        data = bpd.read_pandas(data)
+
+    if isinstance(data, series.Series):
+        # Rename series to 'content' and convert to DataFrame
+        data_df = data.rename("content").to_frame()
+    elif isinstance(data, dataframe.DataFrame):
+        data_df = data
+    else:
+        raise ValueError(f"Unsupported data type: {type(data)}")
+
+    # We need to get the SQL for the input data to pass as a subquery to the TVF
+    source_sql = data_df.sql
+
+    struct_fields = []
+    if output_dimensionality is not None:
+        struct_fields.append(f"{output_dimensionality} AS output_dimensionality")
+    if start_second is not None:
+        struct_fields.append(f"{start_second} AS start_second")
+    if end_second is not None:
+        struct_fields.append(f"{end_second} AS end_second")
+    if interval_seconds is not None:
+        struct_fields.append(f"{interval_seconds} AS interval_seconds")
+
+    struct_args = ", ".join(struct_fields)
+
+    # Construct the TVF query
+    query = f"""
+        SELECT *
+        FROM AI.GENERATE_EMBEDDING(
+            MODEL `{model_name}`,
+            ({source_sql}),
+            STRUCT({struct_args})
+        )
+    """
+
+    return data_df._session.read_gbq(query)
+
+
 @log_adapter.method_logger(custom_base_name="bigquery_ai")
 def if_(
     prompt: PROMPT_TYPE,
diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py
new file mode 100644
index 00000000000..c9c046664f7
--- /dev/null
+++ b/tests/unit/bigquery/test_ai.py
@@ -0,0 +1,135 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import pandas as pd
+import pytest
+
+import bigframes.bigquery._operations.ai as ai_ops
+import bigframes.dataframe
+import bigframes.series
+import bigframes.session
+
+
+@pytest.fixture
+def mock_session():
+    return mock.create_autospec(spec=bigframes.session.Session)
+
+
+@pytest.fixture
+def mock_dataframe(mock_session):
+    df = mock.create_autospec(spec=bigframes.dataframe.DataFrame)
+    df._session = mock_session
+    df.sql = "SELECT * FROM my_table"
+    return df
+
+
+@pytest.fixture
+def mock_series(mock_session):
+    s = mock.create_autospec(spec=bigframes.series.Series)
+    s._session = mock_session
+    # Mock to_frame to return a mock dataframe
+    df = mock.create_autospec(spec=bigframes.dataframe.DataFrame)
+    df._session = mock_session
+    df.sql = "SELECT my_col AS content FROM my_table"
+    s.rename.return_value.to_frame.return_value = df
+    return s
+
+
+def test_generate_embedding_with_dataframe(mock_dataframe, mock_session):
+    model_name = "project.dataset.model"
+
+    ai_ops.generate_embedding(
+        model_name,
+        mock_dataframe,
+        output_dimensionality=256,
+    )
+
+    mock_session.read_gbq.assert_called_once()
+    query = mock_session.read_gbq.call_args[0][0]
+
+    # Normalize whitespace for comparison
+    query = " ".join(query.split())
+
+    expected_part_1 = "SELECT * FROM AI.GENERATE_EMBEDDING("
+    expected_part_2 = f"MODEL `{model_name}`,"
+    expected_part_3 = "(SELECT * FROM my_table),"
+    expected_part_4 = "STRUCT(256 AS output_dimensionality)"
+
+    assert expected_part_1 in query
+    assert expected_part_2 in query
+    assert expected_part_3 in query
+    assert expected_part_4 in query
+
+
+def test_generate_embedding_with_series(mock_series, mock_session):
+    model_name = "project.dataset.model"
+
+    ai_ops.generate_embedding(
+        model_name,
+        mock_series,
+        start_second=0.0,
+        end_second=10.0,
+        interval_seconds=5.0
+    )
+
+    mock_series.rename.assert_called_with("content")
+    mock_series.rename.return_value.to_frame.assert_called_once()
+
+    mock_session.read_gbq.assert_called_once()
+    query = mock_session.read_gbq.call_args[0][0]
+    query = " ".join(query.split())
+
+    assert f"MODEL `{model_name}`" in query
+    assert "(SELECT my_col AS content FROM my_table)" in query
+    assert "STRUCT(0.0 AS start_second, 10.0 AS end_second, 5.0 AS interval_seconds)" in query
+
+
+def test_generate_embedding_defaults(mock_dataframe, mock_session):
+    model_name = "project.dataset.model"
+
+    ai_ops.generate_embedding(
+        model_name,
+        mock_dataframe,
+    )
+
+    mock_session.read_gbq.assert_called_once()
+    query = mock_session.read_gbq.call_args[0][0]
+    query = " ".join(query.split())
+
+    assert f"MODEL `{model_name}`" in query
+    assert "STRUCT()" in query
+
+
+@mock.patch("bigframes.pandas.read_pandas")
+def test_generate_embedding_with_pandas_dataframe(read_pandas_mock, mock_dataframe, mock_session):
+    # This tests that pandas input path works and calls read_pandas
+    model_name = "project.dataset.model"
+
+    # Mock return value of read_pandas to be a BigFrames DataFrame
+    read_pandas_mock.return_value = mock_dataframe
+
+    pandas_df = pd.DataFrame({"content": ["test"]})
+
+    ai_ops.generate_embedding(
+        model_name,
+        pandas_df,
+    )
+
+    read_pandas_mock.assert_called_once()
+    # Check that read_pandas was called with something (the pandas df)
+    assert read_pandas_mock.call_args[0][0] is pandas_df
+
+    mock_session.read_gbq.assert_called_once()

From 9a774ac2cd38aa4f5e71f0bad1c8a0b01528a806 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Fri, 16 Jan 2026 21:02:28 +0000
Subject: [PATCH 2/9] update some unit tests

---
 bigframes/bigquery/_operations/ai.py          | 62 ++++++++----
 bigframes/core/pyformat.py                    |  3 +-
 bigframes/core/sql/__init__.py                | 74 +-------------
 bigframes/core/sql/literals.py                | 99 +++++++++++++++++++
 bigframes/core/sql/ml.py                      |  7 +-
 tests/unit/bigquery/test_ai.py                | 15 +--
 .../evaluate_model_with_options.sql           |  2 +-
 .../explain_predict_model_with_options.sql    |  2 +-
 .../global_explain_model_with_options.sql     |  2 +-
 .../predict_model_with_options.sql            |  2 +-
 10 files changed, 158 insertions(+), 110 deletions(-)
 create mode 100644 bigframes/core/sql/literals.py

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index a3cd6deac26..4811ab8e190 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -28,6 +28,7 @@
 from bigframes import series, session
 from bigframes.core import convert
 from bigframes.core.logging import log_adapter
+import bigframes.core.sql.literals
 from bigframes.ml import core as ml_core
 from bigframes.operations import ai_ops, output_schemas
 
@@ -394,9 +395,11 @@ def generate_embedding(
     data: Union[dataframe.DataFrame, series.Series, pd.DataFrame, pd.Series],
     *,
     output_dimensionality: Optional[int] = None,
+    task_type: Optional[str] = None,
     start_second: Optional[float] = None,
     end_second: Optional[float] = None,
     interval_seconds: Optional[float] = None,
+    trial_id: Optional[int] = None,
 ) -> dataframe.DataFrame:
     """
     Creates embeddings that describe an entity—for example, a piece of text or an image.
@@ -414,32 +417,49 @@ def generate_embedding(
     Args:
         model_name (str):
             The name of a remote model over a Vertex AI multimodalembedding@001 model.
-        data (DataFrame or Series):
-            The data to generate embeddings for. If a Series is provided, it is treated as the 'content' column.
-            If a DataFrame is provided, it must contain a 'content' column, or you must rename the column you wish to embed to 'content'.
+        data (bigframes.pandas.DataFrame or bigframes.pandas.Series):
+            The data to generate embeddings for. If a Series is provided, it is
+            treated as the 'content' column.  If a DataFrame is provided, it
+            must contain a 'content' column, or you must rename the column you
+            wish to embed to 'content'.
         output_dimensionality (int, optional):
-            The number of dimensions to use when generating embeddings. Valid values are 128, 256, 512, and 1408. The default value is 1408.
+            An INT64 value that specifies the number of dimensions to use when
+            generating embeddings. For example, if you specify 256 AS
+            output_dimensionality, then the embedding output column contains a
+            256-dimensional embedding for each input value. To find the
+            supported range of output dimensions, read about the available
+            `Google text embedding models <https://docs.cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#google-models>`_.
+        task_type (str, optional):
+            A STRING literal that specifies the intended downstream application to
+            help the model produce better quality embeddings. For a list of
+            supported task types and how to choose which one to use, see `Choose an
+            embeddings task type <http://docs.cloud.google.com/vertex-ai/generative-ai/docs/embeddings/task-types>`_.
         start_second (float, optional):
             The second in the video at which to start the embedding. The default value is 0.
         end_second (float, optional):
             The second in the video at which to end the embedding. The default value is 120.
         interval_seconds (float, optional):
             The interval to use when creating embeddings. The default value is 16.
+        trial_id (int, optional):
+            An INT64 value that identifies the hyperparameter tuning trial that
+            you want the function to evaluate. The function uses the optimal
+            trial by default. Only specify this argument if you ran
+            hyperparameter tuning when creating the model.
 
     Returns:
-        bigframes.dataframe.DataFrame:
-            A new DataFrame with the generated embeddings. It contains the input table columns and the following columns:
-            * "embedding": an ARRAY<FLOAT64> value that contains the generated embedding vector.
-            * "status": a STRING value that contains the API response status for the corresponding row.
-            * "video_start_sec": for video content, an INT64 value that contains the starting second.
-            * "video_end_sec": for video content, an INT64 value that contains the ending second.
+        bigframes.pandas.DataFrame:
+            A new DataFrame with the generated embeddings. See the `SQL
+            reference for AI.GENERATE_EMBEDDING
+            <https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-generate-embedding#output>`_
+            for details.
     """
     if isinstance(data, (pd.DataFrame, pd.Series)):
         data = bpd.read_pandas(data)
 
     if isinstance(data, series.Series):
-        # Rename series to 'content' and convert to DataFrame
-        data_df = data.rename("content").to_frame()
+        data = data.copy()
+        data.name = "content"
+        data_df = data.to_frame()
     elif isinstance(data, dataframe.DataFrame):
         data_df = data
     else:
@@ -448,17 +468,19 @@ def generate_embedding(
     # We need to get the SQL for the input data to pass as a subquery to the TVF
     source_sql = data_df.sql
 
-    struct_fields = []
+    struct_fields = {}
     if output_dimensionality is not None:
-        struct_fields.append(f"{output_dimensionality} AS output_dimensionality")
+        struct_fields["OUTPUT_DIMENSIONALITY"] = output_dimensionality
+    if task_type is not None:
+        struct_fields["TASK_TYPE"] = task_type
     if start_second is not None:
-        struct_fields.append(f"{start_second} AS start_second")
+        struct_fields["START_SECOND"] = start_second
     if end_second is not None:
-        struct_fields.append(f"{end_second} AS end_second")
+        struct_fields["END_SECOND"] = end_second
     if interval_seconds is not None:
-        struct_fields.append(f"{interval_seconds} AS interval_seconds")
-
-    struct_args = ", ".join(struct_fields)
+        struct_fields["INTERVAL_SECONDS"] = interval_seconds
+    if trial_id is not None:
+        struct_fields["TRIAL_ID"] = trial_id
 
     # Construct the TVF query
     query = f"""
@@ -466,7 +488,7 @@ def generate_embedding(
         FROM AI.GENERATE_EMBEDDING(
             MODEL `{model_name}`,
             ({source_sql}),
-            STRUCT({struct_args})
+            {bigframes.core.sql.literals.struct_literal(struct_fields)})
         )
     """
 
diff --git a/bigframes/core/pyformat.py b/bigframes/core/pyformat.py
index 8f49556ff4c..7d08dd4da74 100644
--- a/bigframes/core/pyformat.py
+++ b/bigframes/core/pyformat.py
@@ -28,6 +28,7 @@
 
 from bigframes.core import utils
 import bigframes.core.local_data
+import bigframes.core.sql.literals
 from bigframes.core.tools import bigquery_schema
 import bigframes.session
 
@@ -120,7 +121,7 @@ def _validate_type(name: str, value: Any):
 
     supported_types = (
         typing.get_args(_BQ_TABLE_TYPES)
-        + typing.get_args(bigframes.core.sql.SIMPLE_LITERAL_TYPES)
+        + typing.get_args(bigframes.core.sql.literals.SIMPLE_LITERAL_TYPES)
         + (bigframes.dataframe.DataFrame,)
         + (pandas.DataFrame,)
     )
diff --git a/bigframes/core/sql/__init__.py b/bigframes/core/sql/__init__.py
index ccd2a16ddcd..521c13c6bdf 100644
--- a/bigframes/core/sql/__init__.py
+++ b/bigframes/core/sql/__init__.py
@@ -17,15 +17,11 @@
 Utility functions for SQL construction.
 """
 
-import datetime
-import decimal
 import json
-import math
 from typing import cast, Collection, Iterable, Mapping, Optional, TYPE_CHECKING, Union
 
-import shapely.geometry.base  # type: ignore
-
 import bigframes.core.compile.googlesql as googlesql
+from bigframes.core.sql.literals import simple_literal
 
 if TYPE_CHECKING:
     import google.cloud.bigquery as bigquery
@@ -33,75 +29,7 @@
     import bigframes.core.ordering
 
 
-# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0.
-try:
-    from shapely.io import to_wkt  # type: ignore
-except ImportError:
-    from shapely.wkt import dumps  # type: ignore
-
-    to_wkt = dumps
-
-
-SIMPLE_LITERAL_TYPES = Union[
-    bytes,
-    str,
-    int,
-    bool,
-    float,
-    datetime.datetime,
-    datetime.date,
-    datetime.time,
-    decimal.Decimal,
-    list,
-]
-
-
 ### Writing SQL Values (literals, column references, table references, etc.)
-def simple_literal(value: Union[SIMPLE_LITERAL_TYPES, None]) -> str:
-    """Return quoted input string."""
-
-    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals
-    if value is None:
-        return "NULL"
-    elif isinstance(value, str):
-        # Single quoting seems to work nicer with ibis than double quoting
-        return f"'{googlesql._escape_chars(value)}'"
-    elif isinstance(value, bytes):
-        return repr(value)
-    elif isinstance(value, (bool, int)):
-        return str(value)
-    elif isinstance(value, float):
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#floating_point_literals
-        if math.isnan(value):
-            return 'CAST("nan" as FLOAT)'
-        if value == math.inf:
-            return 'CAST("+inf" as FLOAT)'
-        if value == -math.inf:
-            return 'CAST("-inf" as FLOAT)'
-        return str(value)
-    # Check datetime first as it is a subclass of date
-    elif isinstance(value, datetime.datetime):
-        if value.tzinfo is None:
-            return f"DATETIME('{value.isoformat()}')"
-        else:
-            return f"TIMESTAMP('{value.isoformat()}')"
-    elif isinstance(value, datetime.date):
-        return f"DATE('{value.isoformat()}')"
-    elif isinstance(value, datetime.time):
-        return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))"
-    elif isinstance(value, shapely.geometry.base.BaseGeometry):
-        return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})"
-    elif isinstance(value, decimal.Decimal):
-        # TODO: disambiguate BIGNUMERIC based on scale and/or precision
-        return f"CAST('{str(value)}' AS NUMERIC)"
-    elif isinstance(value, list):
-        simple_literals = [simple_literal(i) for i in value]
-        return f"[{', '.join(simple_literals)}]"
-
-    else:
-        raise ValueError(f"Cannot produce literal for {value}")
-
-
 def multi_literal(*values: str):
     literal_strings = [simple_literal(i) for i in values]
     return "(" + ", ".join(literal_strings) + ")"
diff --git a/bigframes/core/sql/literals.py b/bigframes/core/sql/literals.py
new file mode 100644
index 00000000000..b9db3590c16
--- /dev/null
+++ b/bigframes/core/sql/literals.py
@@ -0,0 +1,99 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import datetime
+import decimal
+import math
+from typing import Mapping, Union
+
+import shapely.geometry.base  # type: ignore
+
+import bigframes.core.compile.googlesql as googlesql
+
+# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0.
+try:
+    from shapely.io import to_wkt  # type: ignore
+except ImportError:
+    from shapely.wkt import dumps  # type: ignore
+
+    to_wkt = dumps
+
+
+SIMPLE_LITERAL_TYPES = Union[
+    bytes,
+    str,
+    int,
+    bool,
+    float,
+    datetime.datetime,
+    datetime.date,
+    datetime.time,
+    decimal.Decimal,
+    list,
+]
+
+
+def simple_literal(value: Union[SIMPLE_LITERAL_TYPES, None]) -> str:
+    """Return quoted input string."""
+
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals
+    if value is None:
+        return "NULL"
+    elif isinstance(value, str):
+        # Single quoting seems to work nicer with ibis than double quoting
+        return f"'{googlesql._escape_chars(value)}'"
+    elif isinstance(value, bytes):
+        return repr(value)
+    elif isinstance(value, (bool, int)):
+        return str(value)
+    elif isinstance(value, float):
+        # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#floating_point_literals
+        if math.isnan(value):
+            return 'CAST("nan" as FLOAT)'
+        if value == math.inf:
+            return 'CAST("+inf" as FLOAT)'
+        if value == -math.inf:
+            return 'CAST("-inf" as FLOAT)'
+        return str(value)
+    # Check datetime first as it is a subclass of date
+    elif isinstance(value, datetime.datetime):
+        if value.tzinfo is None:
+            return f"DATETIME('{value.isoformat()}')"
+        else:
+            return f"TIMESTAMP('{value.isoformat()}')"
+    elif isinstance(value, datetime.date):
+        return f"DATE('{value.isoformat()}')"
+    elif isinstance(value, datetime.time):
+        return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))"
+    elif isinstance(value, shapely.geometry.base.BaseGeometry):
+        return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})"
+    elif isinstance(value, decimal.Decimal):
+        # TODO: disambiguate BIGNUMERIC based on scale and/or precision
+        return f"CAST('{str(value)}' AS NUMERIC)"
+    elif isinstance(value, list):
+        simple_literals = [simple_literal(i) for i in value]
+        return f"[{', '.join(simple_literals)}]"
+
+    else:
+        raise ValueError(f"Cannot produce literal for {value}")
+
+
+def struct_literal(struct_options: Mapping[str, SIMPLE_LITERAL_TYPES]) -> str:
+    rendered_options = []
+    for option_name, option_value in struct_options.items():
+        rendered_val = simple_literal(option_value)
+        rendered_options.append(f"{rendered_val} AS {option_name}")
+    return f"STRUCT({', '.join(rendered_options)})"
diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py
index ec55fe04269..31102ddd3c4 100644
--- a/bigframes/core/sql/ml.py
+++ b/bigframes/core/sql/ml.py
@@ -18,6 +18,7 @@
 
 import bigframes.core.compile.googlesql as googlesql
 import bigframes.core.sql
+import bigframes.core.sql.literals
 
 
 def create_model_ddl(
@@ -105,11 +106,7 @@ def _build_struct_sql(
     if not struct_options:
         return ""
 
-    rendered_options = []
-    for option_name, option_value in struct_options.items():
-        rendered_val = bigframes.core.sql.simple_literal(option_value)
-        rendered_options.append(f"{rendered_val} AS {option_name}")
-    return f", STRUCT({', '.join(rendered_options)})"
+    return f", {bigframes.core.sql.literals.struct_literal}"
 
 
 def evaluate(
diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py
index c9c046664f7..e3bc7d69d32 100644
--- a/tests/unit/bigquery/test_ai.py
+++ b/tests/unit/bigquery/test_ai.py
@@ -78,11 +78,7 @@ def test_generate_embedding_with_series(mock_series, mock_session):
     model_name = "project.dataset.model"
 
     ai_ops.generate_embedding(
-        model_name,
-        mock_series,
-        start_second=0.0,
-        end_second=10.0,
-        interval_seconds=5.0
+        model_name, mock_series, start_second=0.0, end_second=10.0, interval_seconds=5.0
     )
 
     mock_series.rename.assert_called_with("content")
@@ -94,7 +90,10 @@ def test_generate_embedding_with_series(mock_series, mock_session):
 
     assert f"MODEL `{model_name}`" in query
     assert "(SELECT my_col AS content FROM my_table)" in query
-    assert "STRUCT(0.0 AS start_second, 10.0 AS end_second, 5.0 AS interval_seconds)" in query
+    assert (
+        "STRUCT(0.0 AS start_second, 10.0 AS end_second, 5.0 AS interval_seconds)"
+        in query
+    )
 
 
 def test_generate_embedding_defaults(mock_dataframe, mock_session):
@@ -114,7 +113,9 @@ def test_generate_embedding_defaults(mock_dataframe, mock_session):
 
 
 @mock.patch("bigframes.pandas.read_pandas")
-def test_generate_embedding_with_pandas_dataframe(read_pandas_mock, mock_dataframe, mock_session):
+def test_generate_embedding_with_pandas_dataframe(
+    read_pandas_mock, mock_dataframe, mock_session
+):
     # This tests that pandas input path works and calls read_pandas
     model_name = "project.dataset.model"
 
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql
index 01eb4d37819..91d2e03696e 100644
--- a/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql
+++ b/tests/unit/core/sql/snapshots/test_ml/test_evaluate_model_with_options/evaluate_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.EVALUATE(MODEL `my_model`, STRUCT(False AS perform_aggregation, 10 AS horizon, 0.95 AS confidence_level))
+SELECT * FROM ML.EVALUATE(MODEL `my_model`, <function struct_literal at 0x7fed1a1cc4a0>)
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql
index 1214bba8706..c8e1fa555fe 100644
--- a/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql
+++ b/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(5 AS top_k_features))
+SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), <function struct_literal at 0x7fed1a1cc4a0>)
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql
index 1a3baa0c13b..81c399f63ff 100644
--- a/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql
+++ b/tests/unit/core/sql/snapshots/test_ml/test_global_explain_model_with_options/global_explain_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, STRUCT(True AS class_level_explain))
+SELECT * FROM ML.GLOBAL_EXPLAIN(MODEL `my_model`, <function struct_literal at 0x7fed1a1cc4a0>)
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql
index 96c8074e4c1..267815415b7 100644
--- a/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql
+++ b/tests/unit/core/sql/snapshots/test_ml/test_predict_model_with_options/predict_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(True AS keep_original_columns))
+SELECT * FROM ML.PREDICT(MODEL `my_model`, (SELECT * FROM new_data), <function struct_literal at 0x7fed1a1cc4a0>)

From 26201e4603b8a76a1f2c1cc654396783047e62e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 3 Feb 2026 16:53:06 +0000
Subject: [PATCH 3/9] revert move to literals submodule

---
 bigframes/core/pyformat.py                    |  3 +-
 bigframes/core/sql/__init__.py                | 74 +++++++++++++-
 bigframes/core/sql/literals.py                | 99 -------------------
 bigframes/core/sql/ml.py                      |  1 -
 .../explain_predict_model_with_options.sql    |  2 +-
 5 files changed, 75 insertions(+), 104 deletions(-)
 delete mode 100644 bigframes/core/sql/literals.py

diff --git a/bigframes/core/pyformat.py b/bigframes/core/pyformat.py
index 7d08dd4da74..8f49556ff4c 100644
--- a/bigframes/core/pyformat.py
+++ b/bigframes/core/pyformat.py
@@ -28,7 +28,6 @@
 
 from bigframes.core import utils
 import bigframes.core.local_data
-import bigframes.core.sql.literals
 from bigframes.core.tools import bigquery_schema
 import bigframes.session
 
@@ -121,7 +120,7 @@ def _validate_type(name: str, value: Any):
 
     supported_types = (
         typing.get_args(_BQ_TABLE_TYPES)
-        + typing.get_args(bigframes.core.sql.literals.SIMPLE_LITERAL_TYPES)
+        + typing.get_args(bigframes.core.sql.SIMPLE_LITERAL_TYPES)
         + (bigframes.dataframe.DataFrame,)
         + (pandas.DataFrame,)
     )
diff --git a/bigframes/core/sql/__init__.py b/bigframes/core/sql/__init__.py
index 521c13c6bdf..ccd2a16ddcd 100644
--- a/bigframes/core/sql/__init__.py
+++ b/bigframes/core/sql/__init__.py
@@ -17,11 +17,15 @@
 Utility functions for SQL construction.
 """
 
+import datetime
+import decimal
 import json
+import math
 from typing import cast, Collection, Iterable, Mapping, Optional, TYPE_CHECKING, Union
 
+import shapely.geometry.base  # type: ignore
+
 import bigframes.core.compile.googlesql as googlesql
-from bigframes.core.sql.literals import simple_literal
 
 if TYPE_CHECKING:
     import google.cloud.bigquery as bigquery
@@ -29,7 +33,75 @@
     import bigframes.core.ordering
 
 
+# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0.
+try:
+    from shapely.io import to_wkt  # type: ignore
+except ImportError:
+    from shapely.wkt import dumps  # type: ignore
+
+    to_wkt = dumps
+
+
+SIMPLE_LITERAL_TYPES = Union[
+    bytes,
+    str,
+    int,
+    bool,
+    float,
+    datetime.datetime,
+    datetime.date,
+    datetime.time,
+    decimal.Decimal,
+    list,
+]
+
+
 ### Writing SQL Values (literals, column references, table references, etc.)
+def simple_literal(value: Union[SIMPLE_LITERAL_TYPES, None]) -> str:
+    """Return quoted input string."""
+
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals
+    if value is None:
+        return "NULL"
+    elif isinstance(value, str):
+        # Single quoting seems to work nicer with ibis than double quoting
+        return f"'{googlesql._escape_chars(value)}'"
+    elif isinstance(value, bytes):
+        return repr(value)
+    elif isinstance(value, (bool, int)):
+        return str(value)
+    elif isinstance(value, float):
+        # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#floating_point_literals
+        if math.isnan(value):
+            return 'CAST("nan" as FLOAT)'
+        if value == math.inf:
+            return 'CAST("+inf" as FLOAT)'
+        if value == -math.inf:
+            return 'CAST("-inf" as FLOAT)'
+        return str(value)
+    # Check datetime first as it is a subclass of date
+    elif isinstance(value, datetime.datetime):
+        if value.tzinfo is None:
+            return f"DATETIME('{value.isoformat()}')"
+        else:
+            return f"TIMESTAMP('{value.isoformat()}')"
+    elif isinstance(value, datetime.date):
+        return f"DATE('{value.isoformat()}')"
+    elif isinstance(value, datetime.time):
+        return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))"
+    elif isinstance(value, shapely.geometry.base.BaseGeometry):
+        return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})"
+    elif isinstance(value, decimal.Decimal):
+        # TODO: disambiguate BIGNUMERIC based on scale and/or precision
+        return f"CAST('{str(value)}' AS NUMERIC)"
+    elif isinstance(value, list):
+        simple_literals = [simple_literal(i) for i in value]
+        return f"[{', '.join(simple_literals)}]"
+
+    else:
+        raise ValueError(f"Cannot produce literal for {value}")
+
+
 def multi_literal(*values: str):
     literal_strings = [simple_literal(i) for i in values]
     return "(" + ", ".join(literal_strings) + ")"
diff --git a/bigframes/core/sql/literals.py b/bigframes/core/sql/literals.py
deleted file mode 100644
index b9db3590c16..00000000000
--- a/bigframes/core/sql/literals.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright 2026 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import annotations
-
-import datetime
-import decimal
-import math
-from typing import Mapping, Union
-
-import shapely.geometry.base  # type: ignore
-
-import bigframes.core.compile.googlesql as googlesql
-
-# shapely.wkt.dumps was moved to shapely.io.to_wkt in 2.0.
-try:
-    from shapely.io import to_wkt  # type: ignore
-except ImportError:
-    from shapely.wkt import dumps  # type: ignore
-
-    to_wkt = dumps
-
-
-SIMPLE_LITERAL_TYPES = Union[
-    bytes,
-    str,
-    int,
-    bool,
-    float,
-    datetime.datetime,
-    datetime.date,
-    datetime.time,
-    decimal.Decimal,
-    list,
-]
-
-
-def simple_literal(value: Union[SIMPLE_LITERAL_TYPES, None]) -> str:
-    """Return quoted input string."""
-
-    # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#literals
-    if value is None:
-        return "NULL"
-    elif isinstance(value, str):
-        # Single quoting seems to work nicer with ibis than double quoting
-        return f"'{googlesql._escape_chars(value)}'"
-    elif isinstance(value, bytes):
-        return repr(value)
-    elif isinstance(value, (bool, int)):
-        return str(value)
-    elif isinstance(value, float):
-        # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#floating_point_literals
-        if math.isnan(value):
-            return 'CAST("nan" as FLOAT)'
-        if value == math.inf:
-            return 'CAST("+inf" as FLOAT)'
-        if value == -math.inf:
-            return 'CAST("-inf" as FLOAT)'
-        return str(value)
-    # Check datetime first as it is a subclass of date
-    elif isinstance(value, datetime.datetime):
-        if value.tzinfo is None:
-            return f"DATETIME('{value.isoformat()}')"
-        else:
-            return f"TIMESTAMP('{value.isoformat()}')"
-    elif isinstance(value, datetime.date):
-        return f"DATE('{value.isoformat()}')"
-    elif isinstance(value, datetime.time):
-        return f"TIME(DATETIME('1970-01-01 {value.isoformat()}'))"
-    elif isinstance(value, shapely.geometry.base.BaseGeometry):
-        return f"ST_GEOGFROMTEXT({simple_literal(to_wkt(value))})"
-    elif isinstance(value, decimal.Decimal):
-        # TODO: disambiguate BIGNUMERIC based on scale and/or precision
-        return f"CAST('{str(value)}' AS NUMERIC)"
-    elif isinstance(value, list):
-        simple_literals = [simple_literal(i) for i in value]
-        return f"[{', '.join(simple_literals)}]"
-
-    else:
-        raise ValueError(f"Cannot produce literal for {value}")
-
-
-def struct_literal(struct_options: Mapping[str, SIMPLE_LITERAL_TYPES]) -> str:
-    rendered_options = []
-    for option_name, option_value in struct_options.items():
-        rendered_val = simple_literal(option_value)
-        rendered_options.append(f"{rendered_val} AS {option_name}")
-    return f"STRUCT({', '.join(rendered_options)})"
diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py
index 1fc89e8becf..d77c5aa4a0b 100644
--- a/bigframes/core/sql/ml.py
+++ b/bigframes/core/sql/ml.py
@@ -20,7 +20,6 @@
 
 import bigframes.core.compile.googlesql as googlesql
 import bigframes.core.sql
-import bigframes.core.sql.literals
 
 
 def create_model_ddl(
diff --git a/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql b/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql
index c8e1fa555fe..1214bba8706 100644
--- a/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql
+++ b/tests/unit/core/sql/snapshots/test_ml/test_explain_predict_model_with_options/explain_predict_model_with_options.sql
@@ -1 +1 @@
-SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), <function struct_literal at 0x7fed1a1cc4a0>)
+SELECT * FROM ML.EXPLAIN_PREDICT(MODEL `my_model`, (SELECT * FROM new_data), STRUCT(5 AS top_k_features))

From 7056f4c124018c803101987a73d799aac14f03b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 3 Feb 2026 16:55:42 +0000
Subject: [PATCH 4/9] fix missing import

---
 bigframes/bigquery/_operations/ai.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index 720d6dc5b7b..7917c5c1730 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -28,7 +28,7 @@
 from bigframes import series, session
 from bigframes.core import convert
 from bigframes.core.logging import log_adapter
-import bigframes.core.sql.literals
+import bigframes.core.sql
 from bigframes.ml import core as ml_core
 from bigframes.operations import ai_ops, output_schemas
 
@@ -488,7 +488,7 @@ def generate_embedding(
         FROM AI.GENERATE_EMBEDDING(
             MODEL `{model_name}`,
             ({source_sql}),
-            {bigframes.core.sql.literals.struct_literal(struct_fields)})
+            {bigframes.core.sql.struct_literal(struct_fields)})
         )
     """
 

From fae425deff9b58b7c694c498b64cde79b02fcfc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 3 Feb 2026 17:06:31 +0000
Subject: [PATCH 5/9] try again at literals import

---
 bigframes/bigquery/_operations/ai.py |  4 +-
 bigframes/core/sql/literals.py       | 59 ++++++++++++++++++++++++++++
 bigframes/core/sql/ml.py             | 32 +--------------
 3 files changed, 63 insertions(+), 32 deletions(-)
 create mode 100644 bigframes/core/sql/literals.py

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index 7917c5c1730..720d6dc5b7b 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -28,7 +28,7 @@
 from bigframes import series, session
 from bigframes.core import convert
 from bigframes.core.logging import log_adapter
-import bigframes.core.sql
+import bigframes.core.sql.literals
 from bigframes.ml import core as ml_core
 from bigframes.operations import ai_ops, output_schemas
 
@@ -488,7 +488,7 @@ def generate_embedding(
         FROM AI.GENERATE_EMBEDDING(
             MODEL `{model_name}`,
             ({source_sql}),
-            {bigframes.core.sql.struct_literal(struct_fields)})
+            {bigframes.core.sql.literals.struct_literal(struct_fields)})
         )
     """
 
diff --git a/bigframes/core/sql/literals.py b/bigframes/core/sql/literals.py
new file mode 100644
index 00000000000..693c9d629c2
--- /dev/null
+++ b/bigframes/core/sql/literals.py
@@ -0,0 +1,59 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import collections.abc
+import json
+from typing import Any, Dict, List, Mapping, Optional, Union
+
+import bigframes.core.compile.googlesql as googlesql
+import bigframes.core.sql
+
+
+def struct_literal(
+    struct_options: Mapping[
+        str,
+        Union[str, int, float, bool, Mapping[str, str], List[str], Mapping[str, Any]],
+    ]
+) -> str:
+    rendered_options = []
+    for option_name, option_value in struct_options.items():
+        if option_name == "model_params":
+            json_str = json.dumps(option_value)
+            # Escape single quotes for SQL string literal
+            sql_json_str = json_str.replace("'", "''")
+            rendered_val = f"JSON'{sql_json_str}'"
+        elif isinstance(option_value, collections.abc.Mapping):
+            struct_body = ", ".join(
+                [
+                    f"{bigframes.core.sql.simple_literal(v)} AS {k}"
+                    for k, v in option_value.items()
+                ]
+            )
+            rendered_val = f"STRUCT({struct_body})"
+        elif isinstance(option_value, list):
+            rendered_val = (
+                "["
+                + ", ".join(
+                    [bigframes.core.sql.simple_literal(v) for v in option_value]
+                )
+                + "]"
+            )
+        elif isinstance(option_value, bool):
+            rendered_val = str(option_value).lower()
+        else:
+            rendered_val = bigframes.core.sql.simple_literal(option_value)
+        rendered_options.append(f"{rendered_val} AS {option_name}")
+    return f"STRUCT({', '.join(rendered_options)})"
diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py
index d77c5aa4a0b..5b05572b174 100644
--- a/bigframes/core/sql/ml.py
+++ b/bigframes/core/sql/ml.py
@@ -20,6 +20,7 @@
 
 import bigframes.core.compile.googlesql as googlesql
 import bigframes.core.sql
+import bigframes.core.sql.literals
 
 
 def create_model_ddl(
@@ -109,36 +110,7 @@ def _build_struct_sql(
 ) -> str:
     if not struct_options:
         return ""
-
-    rendered_options = []
-    for option_name, option_value in struct_options.items():
-        if option_name == "model_params":
-            json_str = json.dumps(option_value)
-            # Escape single quotes for SQL string literal
-            sql_json_str = json_str.replace("'", "''")
-            rendered_val = f"JSON'{sql_json_str}'"
-        elif isinstance(option_value, collections.abc.Mapping):
-            struct_body = ", ".join(
-                [
-                    f"{bigframes.core.sql.simple_literal(v)} AS {k}"
-                    for k, v in option_value.items()
-                ]
-            )
-            rendered_val = f"STRUCT({struct_body})"
-        elif isinstance(option_value, list):
-            rendered_val = (
-                "["
-                + ", ".join(
-                    [bigframes.core.sql.simple_literal(v) for v in option_value]
-                )
-                + "]"
-            )
-        elif isinstance(option_value, bool):
-            rendered_val = str(option_value).lower()
-        else:
-            rendered_val = bigframes.core.sql.simple_literal(option_value)
-        rendered_options.append(f"{rendered_val} AS {option_name}")
-    return f", STRUCT({', '.join(rendered_options)})"
+    return f", {bigframes.core.sql.literals.struct_literal(struct_options)}"
 
 
 def evaluate(

From 2624a78d8fefd485980b3e184c122fec78753ba9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 3 Feb 2026 17:28:56 +0000
Subject: [PATCH 6/9] fix tests

---
 tests/unit/bigquery/test_ai.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py
index e3bc7d69d32..a1507997b60 100644
--- a/tests/unit/bigquery/test_ai.py
+++ b/tests/unit/bigquery/test_ai.py
@@ -38,14 +38,15 @@ def mock_dataframe(mock_session):
 
 @pytest.fixture
 def mock_series(mock_session):
-    s = mock.create_autospec(spec=bigframes.series.Series)
-    s._session = mock_session
+    series = mock.create_autospec(spec=bigframes.series.Series)
+    series._session = mock_session
     # Mock to_frame to return a mock dataframe
     df = mock.create_autospec(spec=bigframes.dataframe.DataFrame)
     df._session = mock_session
     df.sql = "SELECT my_col AS content FROM my_table"
-    s.rename.return_value.to_frame.return_value = df
-    return s
+    series.copy.return_value = series
+    series.to_frame.return_value = df
+    return series
 
 
 def test_generate_embedding_with_dataframe(mock_dataframe, mock_session):
@@ -66,7 +67,7 @@ def test_generate_embedding_with_dataframe(mock_dataframe, mock_session):
     expected_part_1 = "SELECT * FROM AI.GENERATE_EMBEDDING("
     expected_part_2 = f"MODEL `{model_name}`,"
     expected_part_3 = "(SELECT * FROM my_table),"
-    expected_part_4 = "STRUCT(256 AS output_dimensionality)"
+    expected_part_4 = "STRUCT(256 AS OUTPUT_DIMENSIONALITY)"
 
     assert expected_part_1 in query
     assert expected_part_2 in query
@@ -81,9 +82,6 @@ def test_generate_embedding_with_series(mock_series, mock_session):
         model_name, mock_series, start_second=0.0, end_second=10.0, interval_seconds=5.0
     )
 
-    mock_series.rename.assert_called_with("content")
-    mock_series.rename.return_value.to_frame.assert_called_once()
-
     mock_session.read_gbq.assert_called_once()
     query = mock_session.read_gbq.call_args[0][0]
     query = " ".join(query.split())
@@ -91,7 +89,7 @@ def test_generate_embedding_with_series(mock_series, mock_session):
     assert f"MODEL `{model_name}`" in query
     assert "(SELECT my_col AS content FROM my_table)" in query
     assert (
-        "STRUCT(0.0 AS start_second, 10.0 AS end_second, 5.0 AS interval_seconds)"
+        "STRUCT(0.0 AS START_SECOND, 10.0 AS END_SECOND, 5.0 AS INTERVAL_SECONDS)"
         in query
     )
 

From 93e92d92cd5ca8f186a534d3af1fcedff7ac8ad0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 3 Feb 2026 17:30:42 +0000
Subject: [PATCH 7/9] fix docs

---
 bigframes/bigquery/_operations/ai.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index 720d6dc5b7b..96f4d667431 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -416,7 +416,8 @@ def generate_embedding(
 
     Args:
         model_name (str):
-            The name of a remote model over a Vertex AI multimodalembedding@001 model.
+            The name of a remote model from Vertex AI, such as the
+            multimodalembedding@001 model.
         data (bigframes.pandas.DataFrame or bigframes.pandas.Series):
             The data to generate embeddings for. If a Series is provided, it is
             treated as the 'content' column.  If a DataFrame is provided, it

From f09cac6163e91bd4987c2b1cc711d3de45bbfa7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 3 Feb 2026 18:45:01 +0000
Subject: [PATCH 8/9] fix lint and add imports

---
 bigframes/bigquery/_operations/ai.py |  2 +-
 bigframes/bigquery/ai.py             |  2 ++
 bigframes/core/sql/literals.py       | 15 +++++++--------
 bigframes/core/sql/ml.py             |  2 --
 tests/unit/bigquery/test_ai.py       | 10 +++++-----
 5 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index 96f4d667431..20ec60c5b8d 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -469,7 +469,7 @@ def generate_embedding(
     # We need to get the SQL for the input data to pass as a subquery to the TVF
     source_sql = data_df.sql
 
-    struct_fields = {}
+    struct_fields: bigframes.core.sql.literals.STRUCT_TYPE = {}
     if output_dimensionality is not None:
         struct_fields["OUTPUT_DIMENSIONALITY"] = output_dimensionality
     if task_type is not None:
diff --git a/bigframes/bigquery/ai.py b/bigframes/bigquery/ai.py
index 3af52205a65..b0d9b62f9be 100644
--- a/bigframes/bigquery/ai.py
+++ b/bigframes/bigquery/ai.py
@@ -22,6 +22,7 @@
     generate,
     generate_bool,
     generate_double,
+    generate_embedding,
     generate_int,
     if_,
     score,
@@ -33,6 +34,7 @@
     "generate",
     "generate_bool",
     "generate_double",
+    "generate_embedding",
     "generate_int",
     "if_",
     "score",
diff --git a/bigframes/core/sql/literals.py b/bigframes/core/sql/literals.py
index 693c9d629c2..0c8c78a3d92 100644
--- a/bigframes/core/sql/literals.py
+++ b/bigframes/core/sql/literals.py
@@ -16,18 +16,17 @@
 
 import collections.abc
 import json
-from typing import Any, Dict, List, Mapping, Optional, Union
+from typing import Any, List, Mapping, Union
 
-import bigframes.core.compile.googlesql as googlesql
 import bigframes.core.sql
 
+STRUCT_TYPE = Mapping[
+    str,
+    Union[str, int, float, bool, Mapping[str, str], List[str], Mapping[str, Any]],
+]
 
-def struct_literal(
-    struct_options: Mapping[
-        str,
-        Union[str, int, float, bool, Mapping[str, str], List[str], Mapping[str, Any]],
-    ]
-) -> str:
+
+def struct_literal(struct_options: STRUCT_TYPE) -> str:
     rendered_options = []
     for option_name, option_value in struct_options.items():
         if option_name == "model_params":
diff --git a/bigframes/core/sql/ml.py b/bigframes/core/sql/ml.py
index 5b05572b174..a2a4d32ae84 100644
--- a/bigframes/core/sql/ml.py
+++ b/bigframes/core/sql/ml.py
@@ -14,8 +14,6 @@
 
 from __future__ import annotations
 
-import collections.abc
-import json
 from typing import Any, Dict, List, Mapping, Optional, Union
 
 import bigframes.core.compile.googlesql as googlesql
diff --git a/tests/unit/bigquery/test_ai.py b/tests/unit/bigquery/test_ai.py
index a1507997b60..0f9df6cc268 100644
--- a/tests/unit/bigquery/test_ai.py
+++ b/tests/unit/bigquery/test_ai.py
@@ -17,7 +17,7 @@
 import pandas as pd
 import pytest
 
-import bigframes.bigquery._operations.ai as ai_ops
+import bigframes.bigquery as bbq
 import bigframes.dataframe
 import bigframes.series
 import bigframes.session
@@ -52,7 +52,7 @@ def mock_series(mock_session):
 def test_generate_embedding_with_dataframe(mock_dataframe, mock_session):
     model_name = "project.dataset.model"
 
-    ai_ops.generate_embedding(
+    bbq.ai.generate_embedding(
         model_name,
         mock_dataframe,
         output_dimensionality=256,
@@ -78,7 +78,7 @@ def test_generate_embedding_with_dataframe(mock_dataframe, mock_session):
 def test_generate_embedding_with_series(mock_series, mock_session):
     model_name = "project.dataset.model"
 
-    ai_ops.generate_embedding(
+    bbq.ai.generate_embedding(
         model_name, mock_series, start_second=0.0, end_second=10.0, interval_seconds=5.0
     )
 
@@ -97,7 +97,7 @@ def test_generate_embedding_with_series(mock_series, mock_session):
 def test_generate_embedding_defaults(mock_dataframe, mock_session):
     model_name = "project.dataset.model"
 
-    ai_ops.generate_embedding(
+    bbq.ai.generate_embedding(
         model_name,
         mock_dataframe,
     )
@@ -122,7 +122,7 @@ def test_generate_embedding_with_pandas_dataframe(
 
     pandas_df = pd.DataFrame({"content": ["test"]})
 
-    ai_ops.generate_embedding(
+    bbq.ai.generate_embedding(
         model_name,
         pandas_df,
     )

From e0cd6cb64f2baedb6c794d4207bf5fb7b5e3463d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Tue, 3 Feb 2026 18:48:04 +0000
Subject: [PATCH 9/9] types

---
 bigframes/bigquery/_operations/ai.py | 4 ++--
 bigframes/core/sql/literals.py       | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bigframes/bigquery/_operations/ai.py b/bigframes/bigquery/_operations/ai.py
index 20ec60c5b8d..17af5dd5cfb 100644
--- a/bigframes/bigquery/_operations/ai.py
+++ b/bigframes/bigquery/_operations/ai.py
@@ -19,7 +19,7 @@
 from __future__ import annotations
 
 import json
-from typing import Any, Iterable, List, Literal, Mapping, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Literal, Mapping, Optional, Tuple, Union
 
 import pandas as pd
 
@@ -469,7 +469,7 @@ def generate_embedding(
     # We need to get the SQL for the input data to pass as a subquery to the TVF
     source_sql = data_df.sql
 
-    struct_fields: bigframes.core.sql.literals.STRUCT_TYPE = {}
+    struct_fields: Dict[str, bigframes.core.sql.literals.STRUCT_VALUES] = {}
     if output_dimensionality is not None:
         struct_fields["OUTPUT_DIMENSIONALITY"] = output_dimensionality
     if task_type is not None:
diff --git a/bigframes/core/sql/literals.py b/bigframes/core/sql/literals.py
index 0c8c78a3d92..59c81977315 100644
--- a/bigframes/core/sql/literals.py
+++ b/bigframes/core/sql/literals.py
@@ -20,10 +20,10 @@
 
 import bigframes.core.sql
 
-STRUCT_TYPE = Mapping[
-    str,
-    Union[str, int, float, bool, Mapping[str, str], List[str], Mapping[str, Any]],
+STRUCT_VALUES = Union[
+    str, int, float, bool, Mapping[str, str], List[str], Mapping[str, Any]
 ]
+STRUCT_TYPE = Mapping[str, STRUCT_VALUES]
 
 
 def struct_literal(struct_options: STRUCT_TYPE) -> str: