From f33155365977766ff2e3c32270d2a1c16b6884d6 Mon Sep 17 00:00:00 2001 From: Woojin Son Date: Mon, 23 Feb 2026 23:47:18 +0900 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8feat(resume=5Fingestor):=20add=20o?= =?UTF-8?q?llama=20support?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 7 +++- app/main.py | 3 ++ casts/resume_ingestor/modules/models.py | 52 ++++++++++++++++++++++--- casts/resume_ingestor/modules/nodes.py | 46 ++++++++++++++++------ casts/resume_ingestor/modules/state.py | 2 + pyproject.toml | 1 + uv.lock | 28 +++++++++++++ 7 files changed, 121 insertions(+), 18 deletions(-) diff --git a/.env.example b/.env.example index 7e4453a..c0bea2f 100644 --- a/.env.example +++ b/.env.example @@ -6,11 +6,16 @@ LANGSMITH_PROJECT=interviewgraph LANGSMITH_API_KEY= # LLM generation runtime config -# Supported providers in this repo: openai, anthropic +# Supported providers in this repo: openai, anthropic, ollama INTERVIEWGRAPH_LLM_PROVIDER=openai INTERVIEWGRAPH_LLM_MODEL=gpt-4o-mini INTERVIEWGRAPH_LLM_TEMPERATURE=0.2 +# Ollama (local inference) +# INTERVIEWGRAPH_LLM_PROVIDER=ollama +# INTERVIEWGRAPH_OLLAMA_MODEL=llama3.1:8b +# INTERVIEWGRAPH_OLLAMA_BASE_URL=http://127.0.0.1:11434 + # Provider API keys (set the one matching provider) OPENAI_API_KEY= ANTHROPIC_API_KEY= diff --git a/app/main.py b/app/main.py index 7b6b89e..b4f0f2b 100644 --- a/app/main.py +++ b/app/main.py @@ -39,6 +39,7 @@ class GenerateResponse(BaseModel): markdown: str errors: list[dict[str, object]] generation_mode: str = "fallback" + generation_reason: str = "unknown" @app.get("/health") @@ -62,6 +63,7 @@ def generate_interview_questions(payload: GenerateRequest) -> GenerateResponse: markdown=result.get("markdown", ""), errors=result.get("errors", []), generation_mode=str(result.get("generation_mode", "fallback")), + generation_reason=str(result.get("generation_reason", "unknown")), ) @@ -96,4 +98,5 @@ async def generate_from_pdf(file: Annotated[UploadFile, File(...)]) -> GenerateR markdown=result.get("markdown", ""), errors=result.get("errors", []), generation_mode=str(result.get("generation_mode", "fallback")), + generation_reason=str(result.get("generation_reason", "unknown")), ) diff --git a/casts/resume_ingestor/modules/models.py b/casts/resume_ingestor/modules/models.py index 7f2c4f7..d53d5c4 100644 --- a/casts/resume_ingestor/modules/models.py +++ b/casts/resume_ingestor/modules/models.py @@ -17,6 +17,11 @@ def get_generation_model() -> Any | None: + model, _reason = get_generation_model_with_reason() + return model + + +def get_generation_model_with_reason() -> tuple[Any | None, str]: """Returns a configured LangChain chat model, or None if unavailable. Configuration (all optional): @@ -25,28 +30,61 @@ def get_generation_model() -> Any | None: - INTERVIEWGRAPH_LLM_TEMPERATURE (default: 0.2) """ - provider = os.getenv("INTERVIEWGRAPH_LLM_PROVIDER", "openai").strip().lower() - model = os.getenv("INTERVIEWGRAPH_LLM_MODEL", "gpt-4o-mini").strip() + provider = _provider_name() temp_raw = os.getenv("INTERVIEWGRAPH_LLM_TEMPERATURE", "0.2").strip() if not _has_provider_credentials(provider): - return None + return None, "missing_credentials" try: temperature = float(temp_raw) except ValueError: temperature = 0.2 + if provider == "ollama": + ollama_model = _ollama_model_name() + base_url = os.getenv("INTERVIEWGRAPH_OLLAMA_BASE_URL", "").strip() + try: + from langchain_ollama import ChatOllama + + kwargs: dict[str, object] = { + "model": ollama_model, + "temperature": temperature, + } + if base_url: + kwargs["base_url"] = base_url + return ChatOllama(**kwargs), "ready" + except Exception as exc: + return None, f"ollama_init_error:{type(exc).__name__}" + + model = os.getenv("INTERVIEWGRAPH_LLM_MODEL", "gpt-4o-mini").strip() try: from langchain.chat_models import init_chat_model - return init_chat_model( + model_obj = init_chat_model( model=model, model_provider=provider, temperature=temperature, ) - except Exception: - return None + return model_obj, "ready" + except Exception as exc: + return None, f"provider_init_error:{type(exc).__name__}" + + +def _provider_name() -> str: + return os.getenv("INTERVIEWGRAPH_LLM_PROVIDER", "openai").strip().lower() + + +def _ollama_model_name() -> str: + explicit = os.getenv("INTERVIEWGRAPH_OLLAMA_MODEL", "").strip() + if explicit: + return explicit + + generic = os.getenv("INTERVIEWGRAPH_LLM_MODEL", "").strip() + if generic: + return generic + + return "llama3.1:8b" def _has_provider_credentials(provider: str) -> bool: @@ -54,4 +92,6 @@ def _has_provider_credentials(provider: str) -> bool: return bool(os.getenv("OPENAI_API_KEY", "").strip()) if provider == "anthropic": return bool(os.getenv("ANTHROPIC_API_KEY", "").strip()) + if provider == "ollama": + return True return False diff --git a/casts/resume_ingestor/modules/nodes.py b/casts/resume_ingestor/modules/nodes.py index f6ce11d..9a2ce17 100644 --- a/casts/resume_ingestor/modules/nodes.py +++ b/casts/resume_ingestor/modules/nodes.py @@ -8,7 +8,7 @@ from pathlib import Path from casts.base_node import BaseNode -from casts.resume_ingestor.modules.models import get_generation_model +from casts.resume_ingestor.modules.models import get_generation_model_with_reason from casts.resume_ingestor.modules.prompts import build_question_generation_messages @@ -50,6 +50,7 @@ def execute(self, state): "markdown": "", "errors": [], "generation_mode": "fallback", + "generation_reason": "not_generated_yet", } if isinstance(resume_path, str) and resume_path.strip(): @@ -75,6 +76,7 @@ def execute(self, state): ) ], "generation_mode": "fallback", + "generation_reason": "extract_text_file_not_found", } try: @@ -100,6 +102,7 @@ def execute(self, state): ) ], "generation_mode": "fallback", + "generation_reason": "extract_text_read_failed", } if not loaded_text: @@ -123,6 +126,7 @@ def execute(self, state): ) ], "generation_mode": "fallback", + "generation_reason": "extract_text_empty_text", } return { @@ -138,6 +142,7 @@ def execute(self, state): "markdown": "", "errors": [], "generation_mode": "fallback", + "generation_reason": "not_generated_yet", } return { @@ -160,6 +165,7 @@ def execute(self, state): ) ], "generation_mode": "fallback", + "generation_reason": "extract_text_missing_input", } @@ -375,7 +381,11 @@ class GenerateQuestionsNode(BaseNode): def execute(self, state): existing_errors = list(state.get("errors", [])) if existing_errors: - return {"questions": [], "generation_mode": "fallback"} + return { + "questions": [], + "generation_mode": "fallback", + "generation_reason": "upstream_errors", + } sections = state.get("sections") signals = state.get("signals") @@ -398,7 +408,7 @@ def execute(self, state): keywords = self._as_list(signals.get("keywords")) evidence = self._as_list(signals.get("evidence")) - llm_questions = self._generate_questions_with_llm( + llm_questions, llm_reason = self._generate_questions_with_llm( raw_text=str(state.get("raw_text", "")), sections=sections, signals={ @@ -409,14 +419,22 @@ def execute(self, state): }, ) if llm_questions is not None: - return {"questions": llm_questions, "generation_mode": "llm"} + return { + "questions": llm_questions, + "generation_mode": "llm", + "generation_reason": llm_reason, + } prompts = self._build_prompt_seeds(skills, projects, keywords, evidence) questions = [ self._make_question(index=idx + 1, seed=seed) for idx, seed in enumerate(prompts[:15]) ] - return {"questions": questions, "generation_mode": "fallback"} + return { + "questions": questions, + "generation_mode": "fallback", + "generation_reason": llm_reason, + } def _generate_questions_with_llm( self, @@ -424,10 +442,10 @@ def _generate_questions_with_llm( raw_text: str, sections: dict[str, object], signals: dict[str, object], - ) -> list[dict[str, object]] | None: - model = get_generation_model() + ) -> tuple[list[dict[str, object]] | None, str]: + model, model_reason = get_generation_model_with_reason() if model is None: - return None + return None, model_reason try: messages = build_question_generation_messages( @@ -436,9 +454,12 @@ def _generate_questions_with_llm( signals=signals, ) response = model.invoke(messages) - return self._parse_llm_questions(response) - except Exception: - return None + parsed = self._parse_llm_questions(response) + if parsed is None: + return None, "llm_parse_failed" + return parsed, "llm_success" + except Exception as exc: + return None, f"llm_invoke_error:{type(exc).__name__}" def _parse_llm_questions(self, response: object) -> list[dict[str, object]] | None: content = getattr(response, "content", response) @@ -909,6 +930,7 @@ def execute(self, state): questions = state.get("questions") errors = state.get("errors") generation_mode = state.get("generation_mode", "fallback") + generation_reason = state.get("generation_reason", "unknown") if not isinstance(errors, list): errors = [] @@ -918,6 +940,7 @@ def execute(self, state): "questions": [], "markdown": "", "generation_mode": "fallback", + "generation_reason": "format_output_invalid_questions", "errors": errors + [ _error_item( @@ -934,6 +957,7 @@ def execute(self, state): "questions": questions, "markdown": markdown, "generation_mode": str(generation_mode), + "generation_reason": str(generation_reason), } def _render_markdown(self, questions: list[object]) -> str: diff --git a/casts/resume_ingestor/modules/state.py b/casts/resume_ingestor/modules/state.py index 461b75d..3fba8e1 100644 --- a/casts/resume_ingestor/modules/state.py +++ b/casts/resume_ingestor/modules/state.py @@ -61,6 +61,7 @@ class OutputState(TypedDict): markdown: str errors: list[ErrorItem] generation_mode: str + generation_reason: str class State(MessagesState): @@ -75,3 +76,4 @@ class State(MessagesState): markdown: str errors: list[ErrorItem] generation_mode: str + generation_reason: str diff --git a/pyproject.toml b/pyproject.toml index 33e5cb6..36f8981 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ dependencies = [ "fastapi>=0.116.1", "langchain-anthropic>=0.3.0", "langchain>=1.0.0", + "langchain-ollama>=0.3.0", "langchain-openai>=0.3.0", "langgraph>=1.0.0", "pypdf>=6.0.0", diff --git a/uv.lock b/uv.lock index d80082a..5ed3724 100644 --- a/uv.lock +++ b/uv.lock @@ -599,6 +599,7 @@ dependencies = [ { name = "fastapi" }, { name = "langchain" }, { name = "langchain-anthropic" }, + { name = "langchain-ollama" }, { name = "langchain-openai" }, { name = "langgraph" }, { name = "pypdf" }, @@ -629,6 +630,7 @@ requires-dist = [ { name = "fastapi", specifier = ">=0.116.1" }, { name = "langchain", specifier = ">=1.0.0" }, { name = "langchain-anthropic", specifier = ">=0.3.0" }, + { name = "langchain-ollama", specifier = ">=0.3.0" }, { name = "langchain-openai", specifier = ">=0.3.0" }, { name = "langgraph", specifier = ">=1.0.0" }, { name = "pypdf", specifier = ">=6.0.0" }, @@ -823,6 +825,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/41/fe6ae9065b866b1397adbfc98db5e1648e8dcd78126b8e1266fcbe2d6395/langchain_core-1.2.14-py3-none-any.whl", hash = "sha256:b349ca28c057ac1f9b5280ea091bddb057db24d0f1c3c89bbb590713e1715838", size = 501411, upload-time = "2026-02-19T14:22:32.013Z" }, ] +[[package]] +name = "langchain-ollama" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain-core" }, + { name = "ollama" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/51/72cd04d74278f3575f921084f34280e2f837211dc008c9671c268c578afe/langchain_ollama-1.0.1.tar.gz", hash = "sha256:e37880c2f41cdb0895e863b1cfd0c2c840a117868b3f32e44fef42569e367443", size = 153850, upload-time = "2025-12-12T21:48:28.68Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/46/f2907da16dc5a5a6c679f83b7de21176178afad8d2ca635a581429580ef6/langchain_ollama-1.0.1-py3-none-any.whl", hash = "sha256:37eb939a4718a0255fe31e19fbb0def044746c717b01b97d397606ebc3e9b440", size = 29207, upload-time = "2025-12-12T21:48:27.832Z" }, +] + [[package]] name = "langchain-openai" version = "1.1.10" @@ -1078,6 +1093,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" }, ] +[[package]] +name = "ollama" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/5a/652dac4b7affc2b37b95386f8ae78f22808af09d720689e3d7a86b6ed98e/ollama-0.6.1.tar.gz", hash = "sha256:478c67546836430034b415ed64fa890fd3d1ff91781a9d548b3325274e69d7c6", size = 51620, upload-time = "2025-11-13T23:02:17.416Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/4f/4a617ee93d8208d2bcf26b2d8b9402ceaed03e3853c754940e2290fed063/ollama-0.6.1-py3-none-any.whl", hash = "sha256:fc4c984b345735c5486faeee67d8a265214a31cbb828167782dc642ce0a2bf8c", size = 14354, upload-time = "2025-11-13T23:02:16.292Z" }, +] + [[package]] name = "openai" version = "2.21.0" From b4a7d10907f4bd9c41e3a4ea70745a1ac1d8e1d8 Mon Sep 17 00:00:00 2001 From: Woojin Son Date: Mon, 23 Feb 2026 23:48:18 +0900 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9C=85test:=20add=20test=20about=20loadi?= =?UTF-8?q?ng=20llm=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/staging_quality_check.py | 21 ++++++++++++++++++- tests/node_tests/test_models.py | 36 ++++++++++++++++++++++++++++++++ tests/node_tests/test_node.py | 8 +++---- 3 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 tests/node_tests/test_models.py diff --git a/scripts/staging_quality_check.py b/scripts/staging_quality_check.py index 073711e..83f004b 100644 --- a/scripts/staging_quality_check.py +++ b/scripts/staging_quality_check.py @@ -3,13 +3,26 @@ import argparse import json import os -import sys from pathlib import Path from casts.resume_ingestor.graph import resume_ingestor_graph from casts.resume_ingestor.modules.models import get_generation_model +def _load_runtime_env() -> None: + try: + from dotenv import load_dotenv + except Exception: + return + + root = Path(__file__).resolve().parents[1] + load_dotenv(root / ".env", override=False) + load_dotenv(root / ".env.local", override=False) + + +_load_runtime_env() + + def _parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Run staging quality check") parser.add_argument( @@ -35,6 +48,8 @@ def _has_provider_key() -> bool: return bool(os.getenv("OPENAI_API_KEY", "").strip()) if provider == "anthropic": return bool(os.getenv("ANTHROPIC_API_KEY", "").strip()) + if provider == "ollama": + return True return False @@ -42,6 +57,7 @@ def _quality_report(result: dict[str, object]) -> dict[str, object]: questions = result.get("questions", []) errors = result.get("errors", []) generation_mode = str(result.get("generation_mode", "fallback")) + generation_reason = str(result.get("generation_reason", "unknown")) if not isinstance(questions, list): questions = [] @@ -67,6 +83,7 @@ def _quality_report(result: dict[str, object]) -> dict[str, object]: unique_categories = sorted(set(categories)) return { "generation_mode": generation_mode, + "generation_reason": generation_reason, "question_count": len(questions), "unique_categories": unique_categories, "category_count": len(unique_categories), @@ -115,6 +132,7 @@ def main() -> int: category_count = _as_int(report.get("category_count"), 0) generation_mode = str(report.get("generation_mode", "fallback")) + generation_reason = str(report.get("generation_reason", "unknown")) if question_count != 15: return 1 @@ -124,6 +142,7 @@ def main() -> int: return 1 if llm_ready and generation_mode != "llm": + print(f"LLM ready but fallback occurred: reason={generation_reason}") return 1 return 0 diff --git a/tests/node_tests/test_models.py b/tests/node_tests/test_models.py new file mode 100644 index 0000000..0f7378c --- /dev/null +++ b/tests/node_tests/test_models.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import pytest + +from casts.resume_ingestor.modules import models + + +def test_openai_provider_requires_key(monkeypatch) -> None: + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + assert models._has_provider_credentials("openai") is False + + +def test_ollama_provider_does_not_require_key() -> None: + assert models._has_provider_credentials("ollama") is True + + +def test_ollama_model_name_prefers_dedicated_env(monkeypatch) -> None: + monkeypatch.setenv("INTERVIEWGRAPH_OLLAMA_MODEL", "llama3.2:latest") + monkeypatch.setenv("INTERVIEWGRAPH_LLM_MODEL", "ignored-model") + assert models._ollama_model_name() == "llama3.2:latest" + + +def test_generation_model_returns_none_without_required_key(monkeypatch) -> None: + monkeypatch.setenv("INTERVIEWGRAPH_LLM_PROVIDER", "openai") + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + assert models.get_generation_model() is None + + +def test_generation_model_builds_ollama_instance(monkeypatch) -> None: + pytest.importorskip("langchain_ollama") + monkeypatch.setenv("INTERVIEWGRAPH_LLM_PROVIDER", "ollama") + monkeypatch.setenv("INTERVIEWGRAPH_OLLAMA_MODEL", "llama3.1:8b") + monkeypatch.setenv("INTERVIEWGRAPH_OLLAMA_BASE_URL", "http://127.0.0.1:11434") + + model = models.get_generation_model() + assert model is not None diff --git a/tests/node_tests/test_node.py b/tests/node_tests/test_node.py index 5af62b8..3cb39d4 100644 --- a/tests/node_tests/test_node.py +++ b/tests/node_tests/test_node.py @@ -94,8 +94,8 @@ def test_extract_signals_node_returns_error_without_sections() -> None: def test_generate_questions_node_creates_15_structured_items(monkeypatch) -> None: monkeypatch.setattr( - "casts.resume_ingestor.modules.nodes.get_generation_model", - lambda: None, + "casts.resume_ingestor.modules.nodes.get_generation_model_with_reason", + lambda: (None, "missing_credentials"), ) node = GenerateQuestionsNode() result = node( @@ -141,8 +141,8 @@ def invoke(self, _messages): return FakeResponse() monkeypatch.setattr( - "casts.resume_ingestor.modules.nodes.get_generation_model", - lambda: FakeModel(), + "casts.resume_ingestor.modules.nodes.get_generation_model_with_reason", + lambda: (FakeModel(), "ready"), ) result = node(