From 0075289c6601c1601fd1dc55cbda5fb9867b06d9 Mon Sep 17 00:00:00 2001 From: ehddnr301 Date: Sat, 13 Sep 2025 16:18:19 +0900 Subject: [PATCH 1/3] =?UTF-8?q?=EC=A7=88=EB=AC=B8=20=EC=A0=81=ED=95=A9?= =?UTF-8?q?=EC=84=B1=20=ED=8C=90=EB=B3=84=20=EA=B8=B0=EB=8A=A5=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=20=EB=B0=8F=20=EA=B4=80=EB=A0=A8=20=EB=AA=A8=EB=93=88?= =?UTF-8?q?=20=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8.=20-=20=EC=83=88?= =?UTF-8?q?=EB=A1=9C=EC=9A=B4=20=EC=A7=88=EB=AC=B8=20=EA=B2=8C=EC=9D=B4?= =?UTF-8?q?=ED=8A=B8=20=EC=B2=B4=EC=9D=B8=EA=B3=BC=20=EC=B6=9C=EB=A0=A5=20?= =?UTF-8?q?=EB=AA=A8=EB=8D=B8=EC=9D=84=20=EA=B5=AC=ED=98=84=20-=20UI?= =?UTF-8?q?=EC=97=90=EC=84=9C=20=EA=B2=B0=EA=B3=BC=EB=A5=BC=20=ED=91=9C?= =?UTF-8?q?=EC=8B=9C=ED=95=98=EB=8F=84=EB=A1=9D=20=EC=88=98=EC=A0=95?= =?UTF-8?q?=ED=95=A8.=20-=20=EC=A7=88=EB=AC=B8=20=EA=B2=8C=EC=9D=B4?= =?UTF-8?q?=ED=8A=B8=20=EA=B2=B0=EA=B3=BC=EB=A5=BC=20=EC=B2=98=EB=A6=AC?= =?UTF-8?q?=ED=95=98=EB=8A=94=20=EB=85=B8=EB=93=9C=20=EB=B0=8F=20=EA=B7=B8?= =?UTF-8?q?=EB=9E=98=ED=94=84=20=EA=B5=AC=EC=84=B1=EB=8F=84=20=ED=8F=AC?= =?UTF-8?q?=ED=95=A8=EB=90=A8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- interface/lang2sql.py | 16 +++++ llm_utils/chains.py | 68 +++++++++++++++++-- llm_utils/graph_utils/base.py | 31 +++++++-- llm_utils/graph_utils/basic_graph.py | 17 ++++- llm_utils/graph_utils/enriched_graph.py | 17 ++++- llm_utils/output_parser/__init__.py | 6 ++ .../output_parser/question_suitability.py | 25 +++++++ prompt/question_gate_prompt.md | 19 ++++++ 8 files changed, 189 insertions(+), 10 deletions(-) create mode 100644 llm_utils/output_parser/__init__.py create mode 100644 llm_utils/output_parser/question_suitability.py create mode 100644 prompt/question_gate_prompt.md diff --git a/interface/lang2sql.py b/interface/lang2sql.py index 3a3cbe1..0a4e875 100644 --- a/interface/lang2sql.py +++ b/interface/lang2sql.py @@ -30,6 +30,7 @@ "show_sql": "Show SQL", "show_question_reinterpreted_by_ai": "Show User Question Reinterpreted by AI", "show_referenced_tables": "Show List of Referenced Tables", + "show_question_gate_result": "Show Question Gate Result", "show_table": "Show Table", "show_chart": "Show Chart", } @@ -103,8 +104,23 @@ def should_show(_key: str) -> bool: show_sql_section = has_query and should_show("show_sql") show_result_desc = has_query and should_show("show_result_description") show_reinterpreted = has_query and should_show("show_question_reinterpreted_by_ai") + show_gate_result = should_show("show_question_gate_result") show_table_section = has_query and should_show("show_table") show_chart_section = has_query and should_show("show_chart") + if show_gate_result and ("question_gate_result" in res): + st.markdown("---") + st.markdown("**Question Gate 결과:**") + details = res.get("question_gate_result") + if details: + passed = details.get("is_sql_like") + if passed is not None: + st.write(f"적합성 통과 여부: `{passed}`") + try: + import json as _json + st.code(_json.dumps(details, ensure_ascii=False, indent=2), language="json") + except Exception: + st.write(details) + if should_show("show_token_usage"): st.markdown("---") diff --git a/llm_utils/chains.py b/llm_utils/chains.py index 72121ba..475f91c 100644 --- a/llm_utils/chains.py +++ b/llm_utils/chains.py @@ -1,10 +1,19 @@ +""" +LLM 체인 생성 모듈. + +이 모듈은 Lang2SQL에서 사용하는 다양한 LangChain 기반 체인을 정의합니다. +- Query Maker +- Query Enrichment +- Profile Extraction +- Question Gate (SQL 적합성 분류) +""" import os from langchain_core.prompts import ( ChatPromptTemplate, - MessagesPlaceholder, SystemMessagePromptTemplate, ) from pydantic import BaseModel, Field +from llm_utils.output_parser.question_suitability import QuestionSuitability from llm_utils.llm import get_llm @@ -15,6 +24,11 @@ class QuestionProfile(BaseModel): + """ + 자연어 질문의 특징을 구조화해 표현하는 프로파일 모델. + + 이 프로파일은 이후 컨텍스트 보강 및 SQL 생성 시 힌트로 사용됩니다. + """ is_timeseries: bool = Field(description="시계열 분석 필요 여부") is_aggregation: bool = Field(description="집계 함수 필요 여부") has_filter: bool = Field(description="조건 필터 필요 여부") @@ -26,6 +40,15 @@ class QuestionProfile(BaseModel): # QueryMakerChain def create_query_maker_chain(llm): + """ + SQL 쿼리 생성을 위한 체인을 생성합니다. + + Args: + llm: LangChain 호환 LLM 인스턴스 + + Returns: + Runnable: 입력 프롬프트를 받아 SQL을 생성하는 체인 + """ prompt = get_prompt_template("query_maker_prompt") query_maker_prompt = ChatPromptTemplate.from_messages( [ @@ -36,6 +59,15 @@ def create_query_maker_chain(llm): def create_query_enrichment_chain(llm): + """ + 사용자 질문을 메타데이터로 보강하기 위한 체인을 생성합니다. + + Args: + llm: LangChain 호환 LLM 인스턴스 + + Returns: + Runnable: 보강된 질문 텍스트를 반환하는 체인 + """ prompt = get_prompt_template("query_enrichment_prompt") enrichment_prompt = ChatPromptTemplate.from_messages( @@ -49,6 +81,15 @@ def create_query_enrichment_chain(llm): def create_profile_extraction_chain(llm): + """ + 질문으로부터 `QuestionProfile`을 추출하는 체인을 생성합니다. + + Args: + llm: LangChain 호환 LLM 인스턴스 + + Returns: + Runnable: `QuestionProfile` 구조화 출력을 반환하는 체인 + """ prompt = get_prompt_template("profile_extraction_prompt") profile_prompt = ChatPromptTemplate.from_messages( @@ -61,9 +102,28 @@ def create_profile_extraction_chain(llm): return chain +def create_question_gate_chain(llm): + """ + 질문 적합성(Question Gate) 체인을 생성합니다. + + ChatPromptTemplate(SystemMessage) + LLM 구조화 출력으로 + `QuestionSuitability`를 반환합니다. + + Args: + llm: LangChain 호환 LLM 인스턴스 + + Returns: + Runnable: invoke({"question": str}) -> QuestionSuitability + """ + + prompt = get_prompt_template("question_gate_prompt") + gate_prompt = ChatPromptTemplate.from_messages( + [SystemMessagePromptTemplate.from_template(prompt)] + ) + return gate_prompt | llm.with_structured_output(QuestionSuitability) + + query_maker_chain = create_query_maker_chain(llm) profile_extraction_chain = create_profile_extraction_chain(llm) query_enrichment_chain = create_query_enrichment_chain(llm) - -if __name__ == "__main__": - pass +question_gate_chain = create_question_gate_chain(llm) \ No newline at end of file diff --git a/llm_utils/graph_utils/base.py b/llm_utils/graph_utils/base.py index 9358877..532ccf5 100644 --- a/llm_utils/graph_utils/base.py +++ b/llm_utils/graph_utils/base.py @@ -1,8 +1,6 @@ -import os import json from typing_extensions import TypedDict, Annotated -from langgraph.graph import END, StateGraph from langgraph.graph.message import add_messages @@ -10,13 +8,13 @@ query_maker_chain, profile_extraction_chain, query_enrichment_chain, + question_gate_chain, ) -from llm_utils.tools import get_info_from_db from llm_utils.retrieval import search_tables -from llm_utils.graph_utils.profile_utils import profile_to_text # 노드 식별자 정의 +QUESTION_GATE = "question_gate" GET_TABLE_INFO = "get_table_info" TOOL = "tool" TABLE_FILTER = "table_filter" @@ -36,6 +34,31 @@ class QueryMakerState(TypedDict): retriever_name: str top_n: int device: str + question_gate_result: dict + +# 노드 함수: QUESTION_GATE 노드 +def question_gate_node(state: QueryMakerState): + """ + 사용자의 질문이 SQL로 답변 가능한지 판별하고, 구조화된 결과를 반환하는 게이트 노드입니다. + + - question_gate_chain 으로 적합성을 판정하여 + `question_gate_result`를 설정합니다. + + Args: + state (QueryMakerState): 그래프 상태 + + Returns: + QueryMakerState: 게이트 판정 결과가 반영된 상태 + """ + + question_text = state["messages"][0].content + suitability = question_gate_chain.invoke({"question": question_text}) + state["question_gate_result"] = { + "reason": getattr(suitability, "reason", ""), + "missing_entities": getattr(suitability, "missing_entities", []), + "requires_data_science": getattr(suitability, "requires_data_science", False), + } + return state # 노드 함수: PROFILE_EXTRACTION 노드 diff --git a/llm_utils/graph_utils/basic_graph.py b/llm_utils/graph_utils/basic_graph.py index 911b11d..0708b9b 100644 --- a/llm_utils/graph_utils/basic_graph.py +++ b/llm_utils/graph_utils/basic_graph.py @@ -3,8 +3,10 @@ from langgraph.graph import StateGraph, END from llm_utils.graph_utils.base import ( QueryMakerState, + QUESTION_GATE, GET_TABLE_INFO, QUERY_MAKER, + question_gate_node, get_table_info_node, query_maker_node, ) @@ -16,12 +18,25 @@ # StateGraph 생성 및 구성 builder = StateGraph(QueryMakerState) -builder.set_entry_point(GET_TABLE_INFO) +builder.set_entry_point(QUESTION_GATE) # 노드 추가 +builder.add_node(QUESTION_GATE, question_gate_node) builder.add_node(GET_TABLE_INFO, get_table_info_node) builder.add_node(QUERY_MAKER, query_maker_node) +def _route_after_gate(state: QueryMakerState): + return GET_TABLE_INFO + +builder.add_conditional_edges( + QUESTION_GATE, + _route_after_gate, + { + GET_TABLE_INFO: GET_TABLE_INFO, + END: END, + }, +) + # 기본 엣지 설정 builder.add_edge(GET_TABLE_INFO, QUERY_MAKER) diff --git a/llm_utils/graph_utils/enriched_graph.py b/llm_utils/graph_utils/enriched_graph.py index 636384b..884ffcd 100644 --- a/llm_utils/graph_utils/enriched_graph.py +++ b/llm_utils/graph_utils/enriched_graph.py @@ -3,10 +3,12 @@ from langgraph.graph import StateGraph, END from llm_utils.graph_utils.base import ( QueryMakerState, + QUESTION_GATE, GET_TABLE_INFO, PROFILE_EXTRACTION, CONTEXT_ENRICHMENT, QUERY_MAKER, + question_gate_node, get_table_info_node, profile_extraction_node, context_enrichment_node, @@ -20,14 +22,27 @@ # StateGraph 생성 및 구성 builder = StateGraph(QueryMakerState) -builder.set_entry_point(GET_TABLE_INFO) +builder.set_entry_point(QUESTION_GATE) # 노드 추가 +builder.add_node(QUESTION_GATE, question_gate_node) builder.add_node(GET_TABLE_INFO, get_table_info_node) builder.add_node(PROFILE_EXTRACTION, profile_extraction_node) builder.add_node(CONTEXT_ENRICHMENT, context_enrichment_node) builder.add_node(QUERY_MAKER, query_maker_node) +def _route_after_gate(state: QueryMakerState): + return GET_TABLE_INFO + +builder.add_conditional_edges( + QUESTION_GATE, + _route_after_gate, + { + GET_TABLE_INFO: GET_TABLE_INFO, + END: END, + }, +) + # 기본 엣지 설정 builder.add_edge(GET_TABLE_INFO, PROFILE_EXTRACTION) builder.add_edge(PROFILE_EXTRACTION, CONTEXT_ENRICHMENT) diff --git a/llm_utils/output_parser/__init__.py b/llm_utils/output_parser/__init__.py new file mode 100644 index 0000000..f5896dd --- /dev/null +++ b/llm_utils/output_parser/__init__.py @@ -0,0 +1,6 @@ +""" +출력 파서 모듈 패키지 초기화. + +이 패키지는 LLM의 구조화 출력 모델과 파서들을 포함합니다. +""" + diff --git a/llm_utils/output_parser/question_suitability.py b/llm_utils/output_parser/question_suitability.py new file mode 100644 index 0000000..11cf46f --- /dev/null +++ b/llm_utils/output_parser/question_suitability.py @@ -0,0 +1,25 @@ +""" +QuestionSuitability 출력 모델. + +LLM 구조화 출력으로부터 SQL 적합성 판단 결과를 표현하는 Pydantic 모델입니다. +""" + +from pydantic import BaseModel, Field + + +class QuestionSuitability(BaseModel): + """ + SQL 생성 적합성 결과 모델. + + LLM 구조화 출력으로 직렬화 가능한 필드를 정의합니다. + """ + + reason: str = Field(description="보완/설명 사유 요약") + missing_entities: list[str] = Field( + default_factory=list, description="질문에서 누락된 핵심 엔터티/기간 등" + ) + requires_data_science: bool = Field( + default=False, description="SQL을 넘어 ML/통계 분석이 필요한지 여부" + ) + + diff --git a/prompt/question_gate_prompt.md b/prompt/question_gate_prompt.md new file mode 100644 index 0000000..aea3865 --- /dev/null +++ b/prompt/question_gate_prompt.md @@ -0,0 +1,19 @@ +당신은 데이터 분석 도우미입니다. 아래 사용자 질문이 SQL로 답변 가능한지 판별하고, 구조화된 결과를 반환하세요. + +요건: +- reason: 한 줄 설명(어떤 보완이 필요한지 요약) +- missing_entities: 기간, 대상 엔터티, 측정값 등 누락된 핵심 요소 리스트(없으면 빈 리스트) +- requires_data_science: 통계/ML 분석이 필요한지 여부(Boolean) + +언어/출력 형식: +- 모든 텍스트 값은 한국어로 작성하세요. (reason는 한국어 문장, missing_entities 항목은 한국어 명사구) +- Boolean 값은 JSON의 true/false로 표기하세요. + +주의: +- 데이터 분석 맥락에서 SQL 집계/필터/조인으로 해결 가능한지 판단합니다. +- 정책/운영/가이드/설치/권한/오류 해결 등은 SQL 부적합으로 간주합니다. + +입력: {question} + +출력은 반드시 지정된 스키마의 JSON으로만 반환하세요. + From 3008455d16f810a4f198b89e3c6e466f8cb2a299 Mon Sep 17 00:00:00 2001 From: ehddnr301 Date: Sat, 13 Sep 2025 16:59:44 +0900 Subject: [PATCH 2/3] =?UTF-8?q?pre-commit=20=EC=9C=BC=EB=A1=9C=20=EC=BD=94?= =?UTF-8?q?=EB=93=9C=20=EC=A0=95=EB=A6=AC=20=EB=B0=8F=20=ED=8F=AC=EB=A7=B7?= =?UTF-8?q?=ED=8C=85=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- interface/lang2sql.py | 6 ++++-- llm_utils/chains.py | 4 +++- llm_utils/graph_utils/base.py | 1 + llm_utils/graph_utils/basic_graph.py | 2 ++ llm_utils/graph_utils/enriched_graph.py | 2 ++ llm_utils/output_parser/__init__.py | 1 - llm_utils/output_parser/question_suitability.py | 2 -- 7 files changed, 12 insertions(+), 6 deletions(-) diff --git a/interface/lang2sql.py b/interface/lang2sql.py index 0a4e875..34d593f 100644 --- a/interface/lang2sql.py +++ b/interface/lang2sql.py @@ -117,11 +117,13 @@ def should_show(_key: str) -> bool: st.write(f"적합성 통과 여부: `{passed}`") try: import json as _json - st.code(_json.dumps(details, ensure_ascii=False, indent=2), language="json") + + st.code( + _json.dumps(details, ensure_ascii=False, indent=2), language="json" + ) except Exception: st.write(details) - if should_show("show_token_usage"): st.markdown("---") token_summary = TokenUtils.get_token_usage_summary(data=res["messages"]) diff --git a/llm_utils/chains.py b/llm_utils/chains.py index 475f91c..b4d90fb 100644 --- a/llm_utils/chains.py +++ b/llm_utils/chains.py @@ -7,6 +7,7 @@ - Profile Extraction - Question Gate (SQL 적합성 분류) """ + import os from langchain_core.prompts import ( ChatPromptTemplate, @@ -29,6 +30,7 @@ class QuestionProfile(BaseModel): 이 프로파일은 이후 컨텍스트 보강 및 SQL 생성 시 힌트로 사용됩니다. """ + is_timeseries: bool = Field(description="시계열 분석 필요 여부") is_aggregation: bool = Field(description="집계 함수 필요 여부") has_filter: bool = Field(description="조건 필터 필요 여부") @@ -126,4 +128,4 @@ def create_question_gate_chain(llm): query_maker_chain = create_query_maker_chain(llm) profile_extraction_chain = create_profile_extraction_chain(llm) query_enrichment_chain = create_query_enrichment_chain(llm) -question_gate_chain = create_question_gate_chain(llm) \ No newline at end of file +question_gate_chain = create_question_gate_chain(llm) diff --git a/llm_utils/graph_utils/base.py b/llm_utils/graph_utils/base.py index 532ccf5..d199b7d 100644 --- a/llm_utils/graph_utils/base.py +++ b/llm_utils/graph_utils/base.py @@ -36,6 +36,7 @@ class QueryMakerState(TypedDict): device: str question_gate_result: dict + # 노드 함수: QUESTION_GATE 노드 def question_gate_node(state: QueryMakerState): """ diff --git a/llm_utils/graph_utils/basic_graph.py b/llm_utils/graph_utils/basic_graph.py index 0708b9b..62714f5 100644 --- a/llm_utils/graph_utils/basic_graph.py +++ b/llm_utils/graph_utils/basic_graph.py @@ -25,9 +25,11 @@ builder.add_node(GET_TABLE_INFO, get_table_info_node) builder.add_node(QUERY_MAKER, query_maker_node) + def _route_after_gate(state: QueryMakerState): return GET_TABLE_INFO + builder.add_conditional_edges( QUESTION_GATE, _route_after_gate, diff --git a/llm_utils/graph_utils/enriched_graph.py b/llm_utils/graph_utils/enriched_graph.py index 884ffcd..5e61552 100644 --- a/llm_utils/graph_utils/enriched_graph.py +++ b/llm_utils/graph_utils/enriched_graph.py @@ -31,9 +31,11 @@ builder.add_node(CONTEXT_ENRICHMENT, context_enrichment_node) builder.add_node(QUERY_MAKER, query_maker_node) + def _route_after_gate(state: QueryMakerState): return GET_TABLE_INFO + builder.add_conditional_edges( QUESTION_GATE, _route_after_gate, diff --git a/llm_utils/output_parser/__init__.py b/llm_utils/output_parser/__init__.py index f5896dd..5a4f0b0 100644 --- a/llm_utils/output_parser/__init__.py +++ b/llm_utils/output_parser/__init__.py @@ -3,4 +3,3 @@ 이 패키지는 LLM의 구조화 출력 모델과 파서들을 포함합니다. """ - diff --git a/llm_utils/output_parser/question_suitability.py b/llm_utils/output_parser/question_suitability.py index 11cf46f..210a307 100644 --- a/llm_utils/output_parser/question_suitability.py +++ b/llm_utils/output_parser/question_suitability.py @@ -21,5 +21,3 @@ class QuestionSuitability(BaseModel): requires_data_science: bool = Field( default=False, description="SQL을 넘어 ML/통계 분석이 필요한지 여부" ) - - From 827db804c65d301da6f9adec2b7ccc84dbc81f01 Mon Sep 17 00:00:00 2001 From: ehddnr301 Date: Sun, 14 Sep 2025 12:33:44 +0900 Subject: [PATCH 3/3] =?UTF-8?q?=EB=AC=B8=EC=84=9C=20=EC=A0=81=ED=95=A9?= =?UTF-8?q?=EC=84=B1=20=ED=8F=89=EA=B0=80=20=EA=B8=B0=EB=8A=A5=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=20=EB=B0=8F=20=EA=B4=80=EB=A0=A8=20=EB=AA=A8=EB=93=88?= =?UTF-8?q?=20=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 새로운 문서 적합성 체인 및 출력 모델 구현 - UI에서 문서 적합성 결과를 표시하도록 수정 - 그래프 빌더에 문서 적합성 노드 추가 --- interface/lang2sql.py | 38 +++++++++-- llm_utils/chains.py | 22 ++++++ llm_utils/graph_utils/base.py | 67 +++++++++++++++++++ llm_utils/graph_utils/basic_graph.py | 6 +- llm_utils/graph_utils/enriched_graph.py | 6 +- .../output_parser/document_suitability.py | 35 ++++++++++ prompt/document_suitability_prompt.md | 47 +++++++++++++ 7 files changed, 215 insertions(+), 6 deletions(-) create mode 100644 llm_utils/output_parser/document_suitability.py create mode 100644 prompt/document_suitability_prompt.md diff --git a/interface/lang2sql.py b/interface/lang2sql.py index 34d593f..dcff652 100644 --- a/interface/lang2sql.py +++ b/interface/lang2sql.py @@ -13,7 +13,6 @@ from db_utils import get_db_connector from db_utils.base_connector import BaseConnector -from infra.db.connect_db import ConnectDB from viz.display_chart import DisplayChart from engine.query_executor import execute_query as execute_query_common from llm_utils.llm_response_parser import LLMResponseParser @@ -31,6 +30,7 @@ "show_question_reinterpreted_by_ai": "Show User Question Reinterpreted by AI", "show_referenced_tables": "Show List of Referenced Tables", "show_question_gate_result": "Show Question Gate Result", + "show_document_suitability": "Show Document Suitability", "show_table": "Show Table", "show_chart": "Show Chart", } @@ -105,6 +105,7 @@ def should_show(_key: str) -> bool: show_result_desc = has_query and should_show("show_result_description") show_reinterpreted = has_query and should_show("show_question_reinterpreted_by_ai") show_gate_result = should_show("show_question_gate_result") + show_doc_suitability = should_show("show_document_suitability") show_table_section = has_query and should_show("show_table") show_chart_section = has_query and should_show("show_chart") if show_gate_result and ("question_gate_result" in res): @@ -112,9 +113,6 @@ def should_show(_key: str) -> bool: st.markdown("**Question Gate 결과:**") details = res.get("question_gate_result") if details: - passed = details.get("is_sql_like") - if passed is not None: - st.write(f"적합성 통과 여부: `{passed}`") try: import json as _json @@ -124,6 +122,38 @@ def should_show(_key: str) -> bool: except Exception: st.write(details) + if show_doc_suitability and ("document_suitability" in res): + st.markdown("---") + st.markdown("**문서 적합성 평가:**") + ds = res.get("document_suitability") + if not isinstance(ds, dict): + st.write(ds) + else: + + def _as_float(value): + try: + return float(value) + except Exception: + return -1.0 + + rows = [ + { + "table": table_name, + "score": _as_float(info.get("score", -1)), + "matched_columns": ", ".join(info.get("matched_columns", [])), + "missing_entities": ", ".join(info.get("missing_entities", [])), + "reason": info.get("reason", ""), + } + for table_name, info in ds.items() + if isinstance(info, dict) + ] + + rows.sort(key=lambda r: r["score"], reverse=True) + if rows: + st.dataframe(rows, use_container_width=True) + else: + st.info("문서 적합성 평가 결과가 비어 있습니다.") + if should_show("show_token_usage"): st.markdown("---") token_summary = TokenUtils.get_token_usage_summary(data=res["messages"]) diff --git a/llm_utils/chains.py b/llm_utils/chains.py index b4d90fb..ac0a854 100644 --- a/llm_utils/chains.py +++ b/llm_utils/chains.py @@ -15,6 +15,9 @@ ) from pydantic import BaseModel, Field from llm_utils.output_parser.question_suitability import QuestionSuitability +from llm_utils.output_parser.document_suitability import ( + DocumentSuitabilityList, +) from llm_utils.llm import get_llm @@ -125,7 +128,26 @@ def create_question_gate_chain(llm): return gate_prompt | llm.with_structured_output(QuestionSuitability) +def create_document_suitability_chain(llm): + """ + 문서 적합성 평가 체인을 생성합니다. + + 질문(question)과 검색 결과(tables)를 입력으로 받아 + 테이블별 적합도 점수를 포함한 JSON 딕셔너리를 반환합니다. + + Returns: + Runnable: invoke({"question": str, "tables": dict}) -> {"results": DocumentSuitability[]} + """ + + prompt = get_prompt_template("document_suitability_prompt") + doc_prompt = ChatPromptTemplate.from_messages( + [SystemMessagePromptTemplate.from_template(prompt)] + ) + return doc_prompt | llm.with_structured_output(DocumentSuitabilityList) + + query_maker_chain = create_query_maker_chain(llm) profile_extraction_chain = create_profile_extraction_chain(llm) query_enrichment_chain = create_query_enrichment_chain(llm) question_gate_chain = create_question_gate_chain(llm) +document_suitability_chain = create_document_suitability_chain(llm) diff --git a/llm_utils/graph_utils/base.py b/llm_utils/graph_utils/base.py index d199b7d..e3b8cdc 100644 --- a/llm_utils/graph_utils/base.py +++ b/llm_utils/graph_utils/base.py @@ -9,12 +9,14 @@ profile_extraction_chain, query_enrichment_chain, question_gate_chain, + document_suitability_chain, ) from llm_utils.retrieval import search_tables # 노드 식별자 정의 QUESTION_GATE = "question_gate" +EVALUATE_DOCUMENT_SUITABILITY = "evaluate_document_suitability" GET_TABLE_INFO = "get_table_info" TOOL = "tool" TABLE_FILTER = "table_filter" @@ -28,6 +30,7 @@ class QueryMakerState(TypedDict): messages: Annotated[list, add_messages] user_database_env: str searched_tables: dict[str, dict[str, str]] + document_suitability: dict best_practice_query: str question_profile: dict generated_query: str @@ -156,6 +159,70 @@ def get_table_info_node(state: QueryMakerState): return state +# 노드 함수: DOCUMENT_SUITABILITY 노드 +def document_suitability_node(state: QueryMakerState): + """ + GET_TABLE_INFO에서 수집된 테이블 후보들에 대해 문서 적합성 점수를 계산하는 노드입니다. + + 질문(`messages[0].content`)과 `searched_tables`(테이블→칼럼 설명 맵)를 입력으로 + 프롬프트 체인(`document_suitability_chain`)을 호출하고, 결과 딕셔너리를 + `document_suitability` 상태 키에 저장합니다. + + Returns: + QueryMakerState: 문서 적합성 평가 결과가 포함된 상태 + """ + + # 관련 테이블이 없으면 즉시 반환 + if not state.get("searched_tables"): + state["document_suitability"] = {} + return state + + res = document_suitability_chain.invoke( + { + "question": state["messages"][0].content, + "tables": state["searched_tables"], + } + ) + + items = ( + res.get("results", []) + if isinstance(res, dict) + else getattr(res, "results", None) + or (res.model_dump().get("results", []) if hasattr(res, "model_dump") else []) + ) + + normalized = {} + for x in items: + d = ( + x.model_dump() + if hasattr(x, "model_dump") + else ( + x + if isinstance(x, dict) + else { + "table_name": getattr(x, "table_name", ""), + "score": getattr(x, "score", 0), + "reason": getattr(x, "reason", ""), + "matched_columns": getattr(x, "matched_columns", []), + "missing_entities": getattr(x, "missing_entities", []), + } + ) + ) + t = d.get("table_name") + if not t: + continue + normalized[t] = { + "score": float(d.get("score", 0)), + "reason": d.get("reason", ""), + "matched_columns": d.get("matched_columns", []), + "missing_entities": d.get("missing_entities", []), + } + + state["document_suitability"] = normalized + + return state + + # 노드 함수: QUERY_MAKER 노드 def query_maker_node(state: QueryMakerState): # 사용자 원 질문 + (있다면) 컨텍스트 보강 결과를 하나의 문자열로 결합 diff --git a/llm_utils/graph_utils/basic_graph.py b/llm_utils/graph_utils/basic_graph.py index 62714f5..0a1b35f 100644 --- a/llm_utils/graph_utils/basic_graph.py +++ b/llm_utils/graph_utils/basic_graph.py @@ -5,9 +5,11 @@ QueryMakerState, QUESTION_GATE, GET_TABLE_INFO, + EVALUATE_DOCUMENT_SUITABILITY, QUERY_MAKER, question_gate_node, get_table_info_node, + document_suitability_node, query_maker_node, ) @@ -23,6 +25,7 @@ # 노드 추가 builder.add_node(QUESTION_GATE, question_gate_node) builder.add_node(GET_TABLE_INFO, get_table_info_node) +builder.add_node(EVALUATE_DOCUMENT_SUITABILITY, document_suitability_node) builder.add_node(QUERY_MAKER, query_maker_node) @@ -40,7 +43,8 @@ def _route_after_gate(state: QueryMakerState): ) # 기본 엣지 설정 -builder.add_edge(GET_TABLE_INFO, QUERY_MAKER) +builder.add_edge(GET_TABLE_INFO, EVALUATE_DOCUMENT_SUITABILITY) +builder.add_edge(EVALUATE_DOCUMENT_SUITABILITY, QUERY_MAKER) # QUERY_MAKER 노드 후 종료 builder.add_edge(QUERY_MAKER, END) diff --git a/llm_utils/graph_utils/enriched_graph.py b/llm_utils/graph_utils/enriched_graph.py index 5e61552..17e9f36 100644 --- a/llm_utils/graph_utils/enriched_graph.py +++ b/llm_utils/graph_utils/enriched_graph.py @@ -5,11 +5,13 @@ QueryMakerState, QUESTION_GATE, GET_TABLE_INFO, + EVALUATE_DOCUMENT_SUITABILITY, PROFILE_EXTRACTION, CONTEXT_ENRICHMENT, QUERY_MAKER, question_gate_node, get_table_info_node, + document_suitability_node, profile_extraction_node, context_enrichment_node, query_maker_node, @@ -27,6 +29,7 @@ # 노드 추가 builder.add_node(QUESTION_GATE, question_gate_node) builder.add_node(GET_TABLE_INFO, get_table_info_node) +builder.add_node(EVALUATE_DOCUMENT_SUITABILITY, document_suitability_node) builder.add_node(PROFILE_EXTRACTION, profile_extraction_node) builder.add_node(CONTEXT_ENRICHMENT, context_enrichment_node) builder.add_node(QUERY_MAKER, query_maker_node) @@ -46,7 +49,8 @@ def _route_after_gate(state: QueryMakerState): ) # 기본 엣지 설정 -builder.add_edge(GET_TABLE_INFO, PROFILE_EXTRACTION) +builder.add_edge(GET_TABLE_INFO, EVALUATE_DOCUMENT_SUITABILITY) +builder.add_edge(EVALUATE_DOCUMENT_SUITABILITY, PROFILE_EXTRACTION) builder.add_edge(PROFILE_EXTRACTION, CONTEXT_ENRICHMENT) builder.add_edge(CONTEXT_ENRICHMENT, QUERY_MAKER) diff --git a/llm_utils/output_parser/document_suitability.py b/llm_utils/output_parser/document_suitability.py new file mode 100644 index 0000000..dacdb6e --- /dev/null +++ b/llm_utils/output_parser/document_suitability.py @@ -0,0 +1,35 @@ +""" +DocumentSuitability 출력 모델. + +LLM 구조화 출력으로부터 테이블별 적합성 평가 결과를 표현하는 Pydantic 모델입니다. +최상위는 테이블명(string) -> 평가 객체 매핑을 담는 Root 모델입니다. +""" + +from typing import Dict, List +from pydantic import BaseModel, Field + + +class DocumentSuitability(BaseModel): + """ + 단일 테이블에 대한 적합성 평가 결과. + """ + + table_name: str = Field(description="테이블명") + score: float = Field(description="0.0~1.0 사이의 적합도 점수") + reason: str = Field(description="한국어 한두 문장 근거") + matched_columns: List[str] = Field( + default_factory=list, description="질문과 직접 연관된 컬럼명 목록" + ) + missing_entities: List[str] = Field( + default_factory=list, description="부족한 엔티티/지표/기간 등" + ) + + +class DocumentSuitabilityList(BaseModel): + """ + 문서 적합성 평가 결과 리스트 래퍼. + + OpenAI Structured Outputs 호환을 위해 명시적 최상위 키(`results`)를 둡니다. + """ + + results: List[DocumentSuitability] = Field(description="평가 결과 목록") diff --git a/prompt/document_suitability_prompt.md b/prompt/document_suitability_prompt.md new file mode 100644 index 0000000..e8bcbd1 --- /dev/null +++ b/prompt/document_suitability_prompt.md @@ -0,0 +1,47 @@ +## 문서 적합성 평가 프롬프트 (Table Search 재랭킹) + +당신은 데이터 카탈로그 평가자입니다. 주어진 사용자 질문과 검색 결과(테이블 → 칼럼 설명 맵)를 바탕으로, 각 테이블이 질문에 얼마나 적합한지 0~1 사이의 실수 점수로 평가하세요. + +### 입력 +- **question**: {question} +- **tables**: {tables} + +### 과업 +1. **핵심 신호 추출**: 질문에서 엔터티/지표/시간/필터/그룹화 단서를 추출합니다. +2. **테이블별 점수화**: 각 테이블의 칼럼·설명과의 연관성으로 적합도를 점수화합니다(0~1, 소수 셋째 자리 반올림). +3. **근거와 보완점 제시**: 매칭된 칼럼과 부족한 요소(엔터티/지표/기간 등)를 한국어로 설명합니다. +4. **정렬**: 결과를 점수 내림차순으로 정렬해 반환합니다. + +### 평가 규칙(가이드) +- **0.90~1.00**: 필요한 엔터티, 기간/시간 컬럼, 핵심 지표/측정 칼럼이 모두 존재. 직접 조회/집계만으로 답 가능. +- **0.60~0.89**: 주요 신호 매칭, 일부 보완(기간/그룹 키/보조 칼럼) 필요. 조인 없이 근사 가능. +- **0.30~0.59**: 일부만 매칭. 외부 컨텍스트나 조인 없이는 부정확/제한적. +- **0.00~0.29**: 연관성 낮음. 스키마/도메인 불일치 또는 정책/운영성 테이블. + +### 주의 +- 칼럼 이름/설명에 실제로 존재하지 않는 항목을 매칭하지 마세요(환각 금지). +- 시간 요구(특정 날짜/기간)가 있으면 timestamp/date/created_at 등 시간 계열 키를 중시하세요. +- 엔티티 키(예: id, user_id, product_id)의 존재 여부를 가산점으로 반영하세요. +- 키 이름은 정확히 입력 맵의 키만 사용하세요(자유 추측 금지). + +### 언어/출력 형식 +- 모든 텍스트 값은 한국어로 작성하세요. +- 결과는 반드시 아래 JSON 스키마로만 반환하세요(추가/누락 키 금지). + +### 출력(JSON 스키마) +{{ + "results": [ + {{ + "table_name": string, + "score": number, // 0.0~1.0, 소수 셋째 자리 반올림 + "reason": string, // 한국어 한두 문장 근거 + "matched_columns": string[], + "missing_entities": string[] + }} + ] +}} + +### 검증 규칙 +- score는 [0, 1] 범위로 클램핑하고 소수 셋째 자리까지 반올림하세요. +- matched_columns는 해당 테이블 객체의 실제 키만 포함하세요(단, table_description 제외). +- reason 및 missing_entities는 한국어로 작성하세요. \ No newline at end of file