Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""add regulation chunk search tsvector

Revision ID: 20260504_0013
Revises: 20260429_0012
Create Date: 2026-05-04 00:00:00.000000
"""

from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql


revision = "20260504_0013"
down_revision = "20260429_0012"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.add_column(
"regulation_chunk",
sa.Column("search_tsvector", postgresql.TSVECTOR(), nullable=True),
)

op.execute(
"""
UPDATE regulation_chunk AS rc
SET search_tsvector = to_tsvector(
'simple',
COALESCE(rc.chunk_text, '') || ' ' ||
COALESCE(rd.content, '') || ' ' ||
COALESCE(rc.keywords::text, '')
)
FROM regulation_document AS rd
WHERE rd.regulation_document_id = rc.regulation_document_id
"""
)
Comment thread
kimssirr marked this conversation as resolved.

op.create_index(
"idx_regulation_chunk_search_tsvector",
"regulation_chunk",
["search_tsvector"],
postgresql_using="gin",
)
Comment thread
kimssirr marked this conversation as resolved.


def downgrade() -> None:
op.drop_index("idx_regulation_chunk_search_tsvector", table_name="regulation_chunk")
op.drop_column("regulation_chunk", "search_tsvector")
3 changes: 3 additions & 0 deletions app/db/models/regulation_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pgvector.sqlalchemy import Vector
from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, Index, Integer, String, Text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import TSVECTOR
from sqlalchemy import func
from sqlalchemy import text
from sqlalchemy.orm import Mapped, mapped_column
Expand All @@ -20,6 +21,7 @@ class RegulationChunk(Base):
Index("idx_regulation_chunk_document_version", "document_version"),
Index("idx_regulation_chunk_chunk_id", "chunk_id"),
Index("idx_regulation_chunk_is_active", "is_active"),
Index("idx_regulation_chunk_search_tsvector", "search_tsvector", postgresql_using="gin"),
)

regulation_chunk_id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
Expand All @@ -33,6 +35,7 @@ class RegulationChunk(Base):
chunk_index: Mapped[int] = mapped_column(Integer, nullable=False)
chunk_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
keywords: Mapped[Optional[list[str]]] = mapped_column(JSONB, nullable=True)
search_tsvector: Mapped[Optional[str]] = mapped_column(TSVECTOR, nullable=True)
embedding: Mapped[Optional[List[float]]] = mapped_column(Vector(1536), nullable=True)
chunk_hash: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
embedding_model: Mapped[Optional[str]] = mapped_column(String(100), nullable=True)
Expand Down
55 changes: 37 additions & 18 deletions app/repositories/regulation_chunk_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,45 @@ def create_regulation_chunks_for_document(
created_chunks.append(regulation_chunk)

db.flush()
refresh_search_vectors_for_chunks(
db,
[
regulation_chunk.regulation_chunk_id
for regulation_chunk in created_chunks
if regulation_chunk.regulation_chunk_id is not None
],
)
Comment thread
kimssirr marked this conversation as resolved.
for regulation_chunk in created_chunks:
db.refresh(regulation_chunk)
return created_chunks


def refresh_search_vectors_for_chunks(db: Session, regulation_chunk_ids: list[int]) -> int:
"""저장된 청크 검색 텍스트를 tsvector 컬럼에 반영합니다."""

if not regulation_chunk_ids:
return 0

result = db.execute(
text(
"""
UPDATE regulation_chunk AS rc
SET search_tsvector = to_tsvector(
'simple',
COALESCE(rc.chunk_text, '') || ' ' ||
COALESCE(rd.content, '') || ' ' ||
COALESCE(rc.keywords::text, '')
)
FROM regulation_document AS rd
WHERE rd.regulation_document_id = rc.regulation_document_id
AND rc.regulation_chunk_id = ANY(:regulation_chunk_ids)
"""
),
{"regulation_chunk_ids": regulation_chunk_ids},
)
return result.rowcount or 0


def deactivate_chunks_for_document(db: Session, regulation_document_id: int) -> int:
statement = (
update(RegulationChunk)
Expand Down Expand Up @@ -392,23 +426,13 @@ def _search_hybrid_chunks(
rd.dormitory,
1 - (rc.embedding <=> CAST(:embedding AS vector)) AS vector_similarity,
ts_rank_cd(
to_tsvector(
'simple',
COALESCE(rc.chunk_text, '') || ' ' ||
COALESCE(rd.content, '') || ' ' ||
COALESCE(rc.keywords::text, '')
),
rc.search_tsvector,
websearch_to_tsquery('simple', :query_text)
) AS keyword_score,
NULL::bigint AS vector_rank,
ROW_NUMBER() OVER (
ORDER BY ts_rank_cd(
to_tsvector(
'simple',
COALESCE(rc.chunk_text, '') || ' ' ||
COALESCE(rd.content, '') || ' ' ||
COALESCE(rc.keywords::text, '')
),
rc.search_tsvector,
websearch_to_tsquery('simple', :query_text)
) DESC
) AS keyword_rank
Expand All @@ -419,12 +443,7 @@ def _search_hybrid_chunks(
AND rc.is_active = TRUE
AND rd.is_active = TRUE
AND rc.embedding IS NOT NULL
AND to_tsvector(
'simple',
COALESCE(rc.chunk_text, '') || ' ' ||
COALESCE(rd.content, '') || ' ' ||
COALESCE(rc.keywords::text, '')
) @@ websearch_to_tsquery('simple', :query_text)
AND rc.search_tsvector @@ websearch_to_tsquery('simple', :query_text)
ORDER BY keyword_score DESC
LIMIT :candidate_k
),
Expand Down
26 changes: 17 additions & 9 deletions app/services/chat_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@
from app.repositories.regulation_chunk_repository import search_hybrid_chunks
from app.repositories.regulation_chunk_repository import search_hybrid_chunks_all_dormitories
from app.repositories.regulation_chunk_repository import search_hybrid_chunks_for_dormitories
from app.repositories.regulation_chunk_repository import search_similar_chunks
from app.repositories.regulation_chunk_repository import search_similar_chunks_for_dormitories
from app.repositories.regulation_chunk_repository import search_similar_chunks_all_dormitories
from app.schemas.chat import ChatRequest
from app.schemas.chat import ChatResponse
from app.services.embeddings import create_query_embedding
from app.services.generator import AnswerGenerationResult
from app.services.generator import generate_answer
from app.services.validator import validate_question
from app.services.query_rewriter import expand_query_for_retrieval
from app.repositories.regulation_chunk_repository import search_similar_chunks_all_dormitories
from app.services.room_floor_resolver import resolve_room_floor_question

ERROR_TYPE_TIMEOUT = "TIMEOUT"
Expand Down Expand Up @@ -274,10 +276,13 @@ def _answer_single_dormitory_chat(
# 전체 생활관 fallback까지 했는데도 답변을 못 만들면
# LLM query expansion으로 검색용 질의를 확장한 뒤 재검색
if _is_no_answer(answer_result.answer):
expanded_query = expand_query_for_retrieval(
question=question,
dormitory=dormitory,
)
if rewritten_query != question:
expanded_query = rewritten_query
else:
expanded_query = expand_query_for_retrieval(
question=question,
dormitory=dormitory,
)
Comment thread
kimssirr marked this conversation as resolved.

if expanded_query != question:
expanded_query_embedding = create_query_embedding(expanded_query)
Expand Down Expand Up @@ -442,10 +447,13 @@ def _answer_unspecified_dormitory_chat(
# 비로그인/생활관 미지정 상태에서 원문 검색으로 답변을 못 만들면
# query expansion으로 검색용 질의를 확장한 뒤 전체 생활관 대상으로 재검색
if _is_no_answer(answer_result.answer):
expanded_query = expand_query_for_retrieval(
question=question,
dormitory=None,
)
if rewritten_query != question:
expanded_query = rewritten_query
else:
expanded_query = expand_query_for_retrieval(
question=question,
dormitory=None,
)
Comment thread
kimssirr marked this conversation as resolved.


if expanded_query != question:
Expand Down
Loading
Loading