comppolicylab · jnu · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/bc2/core/common/openai.py b/bc2/core/common/openai.py
@@ -6,7 +6,7 @@
 from abc import abstractmethod
 from dataclasses import dataclass
 from functools import cached_property
-from typing import Literal, Sequence, TypeAlias, cast
+from typing import Any, Literal, Sequence, TypeAlias, cast
 
 from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
 from openai.types.chat import (
@@ -28,7 +28,7 @@
     ChatCompletionUserMessageParam as _OpenAIChatCompletionUserMessageParam,
 )
 from openai.types.chat.chat_completion_content_part_image_param import ImageURL
-from pydantic import BaseModel, Field, PositiveInt
+from pydantic import BaseModel, Field, PositiveInt, SerializationInfo, model_serializer
 
 from .datafile import DataType, load_data_file, load_data_file_from_path
 from .image import ImageUrl
@@ -214,6 +214,33 @@ def prompt_value(self) -> str: ...
     @abstractmethod
     def examples_value(self) -> list[dict[str, str]] | None: ...
 
+    @model_serializer()
+    def serialize_prompt(self, info: SerializationInfo | None) -> dict[str, Any]:
+        """Serialize the prompt.
+
+        When context.freeze is True, this will serialize as an Inline prompt,
+        so that all the information needed to re-run the pipeline perfectly
+        is preserved.
+
+        Otherwise, it will serialize as a BuiltIn prompt. If the external
+        prompt file changes, the pipeline results could change, even if the
+        pipeline itself is unchanged.
+
+        Args:
+            info: The serialization info.
+
+        Returns:
+            The serialized prompt.
+        """
+        if info and info.context and info.context.get("freeze", False):
+            return {
+                "engine": self.engine,
+                "prompt": self.prompt_value,
+                "examples": self.examples_value,
+            }
+        else:
+            return {k: getattr(self, k) for k in getattr(self, "model_fields", {})}
+
     def format(
         self, input: AnyChatInput | Sequence[AnyChatInput], **kwargs
     ) -> list[OpenAIChatTurn]:

diff --git a/bc2/core/common/test_openai.py b/bc2/core/common/test_openai.py
@@ -10,6 +10,57 @@
 )
 
 
+def test_chat_prompt_builtin_serialize():
+    c = OpenAIChatPromptBuiltIn(prompt_id="redact")
+    d = c.model_dump()
+    assert d == {
+        "engine": "string",
+        "prompt_id": c.prompt_id,
+        "examples_id": c.examples_id,
+    }
+    d2 = c.model_dump(context={"freeze": True})
+    assert d2 == {
+        "engine": "string",
+        "prompt": c.prompt_value,
+        "examples": c.examples_value,
+    }
+
+
+def test_chat_prompt_inline_serialize():
+    c = OpenAIChatPromptInline(prompt="Hello, {alias}!", engine="string")
+    d = c.model_dump()
+    assert d == {
+        "engine": "string",
+        "prompt": c.prompt,
+        "examples": c.examples,
+    }
+    d2 = c.model_dump(context={"freeze": True})
+    assert d2 == {
+        "engine": "string",
+        "prompt": c.prompt,
+        "examples": c.examples,
+    }
+
+
+def test_chat_prompt_file_serialize():
+    with tempfile.NamedTemporaryFile(mode="w") as f:
+        f.write("Hello, {alias}!")
+        f.flush()
+        c = OpenAIChatPromptFile(prompt_file=f.name, engine="string")
+        d = c.model_dump()
+        assert d == {
+            "engine": "string",
+            "prompt_file": c.prompt_file,
+            "examples_file": c.examples_file,
+        }
+        d2 = c.model_dump(context={"freeze": True})
+        assert d2 == {
+            "engine": "string",
+            "prompt": c.prompt_value,
+            "examples": c.examples_value,
+        }
+
+
 def test_chat_prompt_builtin():
     c = OpenAIChatPromptBuiltIn(prompt_id="redact")
     # Real file is in ../../../data/prompts/redact.txt