diff --git a/app/api/v1/endpoints/jobs.py b/app/api/v1/endpoints/jobs.py index 4581db1..e0f1f78 100644 --- a/app/api/v1/endpoints/jobs.py +++ b/app/api/v1/endpoints/jobs.py @@ -131,6 +131,8 @@ async def get_job_result( caption=result.caption if result else None, instagram_meta=result.instagram_meta if result else None, extraction_result=result.extraction_result if result else None, + place_candidates=result.place_candidates if result else [], + selected_place=result.selected_place if result else None, error_message=job.error_message, updated_at=job.updated_at, ) diff --git a/app/core/config.py b/app/core/config.py index aebefbc..2cdd569 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -90,6 +90,7 @@ class Settings(BaseSettings): kakao_base_url: str = "https://dapi.kakao.com" kakao_timeout_seconds: int = 5 kakao_max_places_per_candidate: int = 5 + kakao_min_place_confidence: float = 0.7 hf_extraction_endpoint_url: str = "" hf_extraction_api_token: str = "" diff --git a/app/domain/job/model.py b/app/domain/job/model.py index ff9f040..8b43186 100644 --- a/app/domain/job/model.py +++ b/app/domain/job/model.py @@ -46,6 +46,8 @@ class JobResultRecord: caption: str | None instagram_meta: dict[str, Any] | None extraction_result: dict[str, Any] | None + place_candidates: list[dict[str, Any]] + selected_place: dict[str, Any] | None created_at: datetime updated_at: datetime @@ -60,11 +62,17 @@ class ExtractedCandidate: @dataclass(slots=True) class PlaceCandidate: - place_name: str - road_address: str | None - address: str | None - category: str | None kakao_place_id: str + place_name: str + category_name: str | None + category_group_code: str | None + category_group_name: str | None + phone: str | None + address_name: str | None + road_address_name: str | None + x: str | None + y: str | None + place_url: str | None confidence: float source_keyword: str source_sentence: str @@ -83,11 +91,17 @@ class CrawlArtifact: def as_place_dict(place: PlaceCandidate) -> dict[str, Any]: return { - "place_name": place.place_name, - "road_address": place.road_address, - "address": place.address, - "category": place.category, "kakao_place_id": place.kakao_place_id, + "place_name": place.place_name, + "category_name": place.category_name, + "category_group_code": place.category_group_code, + "category_group_name": place.category_group_name, + "phone": place.phone, + "address_name": place.address_name, + "road_address_name": place.road_address_name, + "x": place.x, + "y": place.y, + "place_url": place.place_url, "confidence": round(place.confidence, 4), "source_keyword": place.source_keyword, "source_sentence": place.source_sentence, diff --git a/app/infra/db/repository.py b/app/infra/db/repository.py index ae77a0c..51fce43 100644 --- a/app/infra/db/repository.py +++ b/app/infra/db/repository.py @@ -106,17 +106,21 @@ async def upsert_job_result( caption: str | None, instagram_meta: dict[str, Any] | None, extraction_result: dict[str, Any] | None = None, + place_candidates: list[dict[str, Any]] | None = None, + selected_place: dict[str, Any] | None = None, ) -> JobResultRecord: sql = f""" INSERT INTO {self._results_table} - (job_id, caption, instagram_meta, extraction_result) + (job_id, caption, instagram_meta, extraction_result, place_candidates, selected_place) VALUES - ($1, $2, $3::jsonb, $4::jsonb) + ($1, $2, $3::jsonb, $4::jsonb, $5::jsonb, $6::jsonb) ON CONFLICT (job_id) DO UPDATE SET caption = EXCLUDED.caption, instagram_meta = EXCLUDED.instagram_meta, extraction_result = EXCLUDED.extraction_result, + place_candidates = EXCLUDED.place_candidates, + selected_place = EXCLUDED.selected_place, updated_at = NOW() RETURNING * """ @@ -126,6 +130,8 @@ async def upsert_job_result( caption, json.dumps(instagram_meta or {}), json.dumps(extraction_result) if extraction_result is not None else None, + json.dumps(place_candidates or []), + json.dumps(selected_place) if selected_place is not None else None, ) if row is None: raise RuntimeError("Failed to upsert job result") @@ -148,6 +154,8 @@ def _to_job_result_record(self, row: asyncpg.Record) -> JobResultRecord: caption=row["caption"], instagram_meta=self._json_to_dict(row["instagram_meta"]), extraction_result=self._json_to_dict(row["extraction_result"]), + place_candidates=self._json_to_list(row["place_candidates"]), + selected_place=self._json_to_dict(row["selected_place"]), created_at=row["created_at"], updated_at=row["updated_at"], ) @@ -161,3 +169,13 @@ def _json_to_dict(value: Any) -> dict[str, Any] | None: if isinstance(value, dict): return value return dict(value) + + @staticmethod + def _json_to_list(value: Any) -> list[dict[str, Any]]: + if value is None: + return [] + if isinstance(value, str): + value = json.loads(value) + if isinstance(value, list): + return value + return list(value) diff --git a/app/infra/kakao/client.py b/app/infra/kakao/client.py index ba653f2..3bec02f 100644 --- a/app/infra/kakao/client.py +++ b/app/infra/kakao/client.py @@ -28,9 +28,15 @@ class KakaoSearchResult: class KakaoLocalClient: - def __init__(self, settings: Settings) -> None: + def __init__( + self, + settings: Settings, + *, + transport: httpx.AsyncBaseTransport | None = None, + ) -> None: self._settings = settings self._headers = {"Authorization": f"KakaoAK {settings.kakao_rest_api_key}"} + self._transport = transport async def search_places( self, @@ -51,7 +57,7 @@ async def search_places( url = f"{self._settings.kakao_base_url}/v2/local/search/keyword.json" try: - async with httpx.AsyncClient(timeout=timeout) as client: + async with httpx.AsyncClient(timeout=timeout, transport=self._transport) as client: response = await client.get(url, params=params, headers=self._headers) except (httpx.TimeoutException, httpx.NetworkError) as exc: raise KakaoRetryableError(str(exc)) from exc @@ -88,11 +94,17 @@ def _to_places( confidence = self._score_place(candidate.keyword, place_name, idx, doc, location_hints) places.append( PlaceCandidate( - place_name=place_name, - road_address=(doc.get("road_address_name") or "").strip() or None, - address=(doc.get("address_name") or "").strip() or None, - category=(doc.get("category_name") or "").strip() or None, kakao_place_id=str(doc.get("id") or ""), + place_name=place_name, + category_name=(doc.get("category_name") or "").strip() or None, + category_group_code=(doc.get("category_group_code") or "").strip() or None, + category_group_name=(doc.get("category_group_name") or "").strip() or None, + phone=(doc.get("phone") or "").strip() or None, + address_name=(doc.get("address_name") or "").strip() or None, + road_address_name=(doc.get("road_address_name") or "").strip() or None, + x=(doc.get("x") or "").strip() or None, + y=(doc.get("y") or "").strip() or None, + place_url=(doc.get("place_url") or "").strip() or None, confidence=confidence, source_keyword=candidate.source_keyword, source_sentence=candidate.source_sentence, @@ -110,7 +122,7 @@ def _score_place( location_hints: list[str], ) -> float: score = 0.35 - if keyword.lower() in place_name.lower(): + if _normalize_place_text(keyword) in _normalize_place_text(place_name): score += 0.3 if rank == 0: score += 0.2 @@ -129,3 +141,7 @@ def _score_place( score += 0.1 return max(0.0, min(0.99, score)) + + +def _normalize_place_text(value: str) -> str: + return "".join((value or "").lower().split()) diff --git a/app/schemas/jobs.py b/app/schemas/jobs.py index ff7420e..e989443 100644 --- a/app/schemas/jobs.py +++ b/app/schemas/jobs.py @@ -17,6 +17,24 @@ class ExtractionResultResponse(BaseModel): certainty: Literal["high", "medium", "low"] +class PlaceCandidateResponse(BaseModel): + kakao_place_id: str + place_name: str + category_name: str | None = None + category_group_code: str | None = None + category_group_name: str | None = None + phone: str | None = None + address_name: str | None = None + road_address_name: str | None = None + x: str | None = None + y: str | None = None + place_url: str | None = None + confidence: float + source_keyword: str | None = None + source_sentence: str | None = None + raw_candidate: str | None = None + + class CreateJobRequest(BaseModel): url: HttpUrl = Field(..., examples=["https://www.instagram.com/reel/abcde/"]) room_id: UUID @@ -49,6 +67,8 @@ class JobResultResponse(BaseModel): caption: str | None instagram_meta: dict[str, object] | None extraction_result: ExtractionResultResponse | None = None + place_candidates: list[PlaceCandidateResponse] = Field(default_factory=list) + selected_place: PlaceCandidateResponse | None = None error_message: str | None updated_at: datetime diff --git a/app/worker/processor.py b/app/worker/processor.py index 0720a89..119e8ab 100644 --- a/app/worker/processor.py +++ b/app/worker/processor.py @@ -2,6 +2,7 @@ import asyncio import logging +import re import time from dataclasses import dataclass from typing import Protocol @@ -11,10 +12,14 @@ from app.domain.crawl import crawl_and_parse from app.domain.job import ( CrawlArtifact, + ExtractedCandidate, ExtractionResult, JobRecord, + PlaceCandidate, as_extraction_result_dict, + as_place_dict, ) +from app.infra.kakao import KakaoNonRetryableError logger = logging.getLogger("processing.worker.processor") @@ -47,6 +52,18 @@ async def extract( ) -> ExtractionResult | None: ... +class PlaceSearchResultPort(Protocol): + places: list[PlaceCandidate] + + +class PlaceSearchPort(Protocol): + async def search_places( + self, + candidate: ExtractedCandidate, + location_hints: list[str], + ) -> PlaceSearchResultPort: ... + + class JobProcessor: def __init__( self, @@ -54,10 +71,12 @@ def __init__( repository: JobRepositoryPort, settings: Settings, extraction_client: ExtractionPort | None = None, + place_search_client: PlaceSearchPort | None = None, ) -> None: self._repository = repository self._settings = settings self._extraction_client = extraction_client + self._place_search_client = place_search_client async def process_job(self, job_id: UUID) -> JobProcessOutcome: started = time.monotonic() @@ -75,11 +94,15 @@ async def process_job(self, job_id: UUID) -> JobProcessOutcome: try: crawl_artifact = await crawl_and_parse(job.source_url, self._settings) extraction_result = await self._extract_result(job.source_url, crawl_artifact) - # TODO(kakao): Add Kakao Local enrichment and final place ranking in next migration step. + place_candidates, selected_place = await self._enrich_place( + extraction_result, + crawl_artifact, + ) logger.info( - "job crawl completed job_id=%s caption_len=%s", + "job crawl completed job_id=%s caption_len=%s place_candidates=%s", job.job_id, len(crawl_artifact.caption or ""), + len(place_candidates), ) await self._repository.upsert_job_result( @@ -89,6 +112,8 @@ async def process_job(self, job_id: UUID) -> JobProcessOutcome: extraction_result=( as_extraction_result_dict(extraction_result) if extraction_result else None ), + place_candidates=place_candidates, + selected_place=selected_place, ) await self._repository.mark_succeeded(job.job_id) elapsed_ms = int((time.monotonic() - started) * 1000) @@ -130,3 +155,134 @@ async def _extract_result( except Exception: logger.exception("extraction failed source_url=%s", source_url) return None + + async def _enrich_place( + self, + extraction_result: ExtractionResult | None, + crawl_artifact: CrawlArtifact, + ) -> tuple[list[dict[str, object]], dict[str, object] | None]: + if not self._place_search_client or not extraction_result: + return [], None + + store_name = (extraction_result.store_name or "").strip() + if not store_name: + return [], None + + candidate = ExtractedCandidate( + keyword=store_name, + source_keyword=store_name, + source_sentence=( + extraction_result.store_name_evidence + or extraction_result.address_evidence + or crawl_artifact.caption + or "" + ), + raw_candidate=store_name, + ) + location_hints = self._build_location_hints(extraction_result.address) + + try: + places = await self._search_places_by_hints(candidate, location_hints) + except KakaoNonRetryableError: + logger.error("kakao enrichment non-retryable failure", exc_info=True) + return [], None + except Exception: + logger.exception("kakao enrichment failed") + return [], None + + places = sorted(places, key=lambda place: place.confidence, reverse=True) + places = [ + place + for place in places + if place.confidence >= self._settings.kakao_min_place_confidence + ] + place_candidates = [as_place_dict(place) for place in places] + selected_place = place_candidates[0] if place_candidates else None + return place_candidates, selected_place + + async def _search_places( + self, + candidate: ExtractedCandidate, + location_hints: list[str], + ) -> list[PlaceCandidate]: + if not self._place_search_client: + return [] + result = await self._place_search_client.search_places(candidate, location_hints) + return result.places + + async def _search_places_by_hints( + self, + candidate: ExtractedCandidate, + location_hints: list[str], + ) -> list[PlaceCandidate]: + for hint in location_hints: + places = await self._search_places(candidate, [hint]) + qualified = self._qualified_places(places) + if qualified: + return qualified + + places = await self._search_places(candidate, []) + return self._qualified_places(places) + + def _qualified_places(self, places: list[PlaceCandidate]) -> list[PlaceCandidate]: + return [ + place + for place in places + if place.confidence >= self._settings.kakao_min_place_confidence + ] + + @staticmethod + def _build_location_hints(address: str | None) -> list[str]: + raw = (address or "").strip() + if not raw: + return [] + + tokens = [token.strip(",") for token in re.split(r"\s+", raw) if token.strip(",")] + hints = [raw] + + district_suffixes = ("\uad6c", "\uad70") + locality_suffixes = ("\ub3d9", "\uc74d", "\uba74", "\ub9ac", "\uac00") + + district_idx = next( + (idx for idx, token in enumerate(tokens) if token.endswith(district_suffixes)), + None, + ) + if district_idx is not None: + hints.append(" ".join(tokens[: district_idx + 1])) + + locality_idx = next( + ( + idx + for idx in range(district_idx + 1, len(tokens)) + if tokens[idx].endswith(locality_suffixes) + ), + None, + ) + if locality_idx is not None: + hints.append(" ".join(tokens[: locality_idx + 1])) + + road_hint = JobProcessor._build_road_hint(tokens, district_idx) + if road_hint: + hints.append(road_hint) + + deduped: list[str] = [] + for hint in hints: + if hint and hint not in deduped: + deduped.append(hint) + return deduped + + @staticmethod + def _build_road_hint(tokens: list[str], district_idx: int) -> str | None: + prefix = tokens[: district_idx + 1] + rest = tokens[district_idx + 1 :] + if not prefix or not rest: + return None + + for idx, token in enumerate(rest): + if token.endswith("\uae38"): + return " ".join(prefix + [token]) + if token.endswith("\ub85c"): + if idx + 1 < len(rest) and re.fullmatch(r"\d+\uae38", rest[idx + 1]): + return " ".join(prefix + [f"{token}{rest[idx + 1]}"]) + return " ".join(prefix + [token]) + return None diff --git a/app/worker/runner.py b/app/worker/runner.py index 624626a..7612e63 100644 --- a/app/worker/runner.py +++ b/app/worker/runner.py @@ -8,6 +8,7 @@ from app.core.config import get_settings from app.infra.db import JobRepository, create_db_pool +from app.infra.kakao import KakaoLocalClient from app.infra.llm import HFExtractionClient from app.infra.queue import RedisJobQueue from app.services.crawler.playwright_service import prewarm_crawler_runtime, shutdown_crawler_runtime @@ -92,6 +93,13 @@ def build_extraction_client(settings) -> ExtractionPort | None: return HFExtractionClient(settings) +def build_place_search_client(settings): + if not settings.kakao_rest_api_key: + logger.warning("worker kakao client disabled (KAKAO_REST_API_KEY is empty)") + return None + return KakaoLocalClient(settings) + + async def run_worker() -> None: settings = get_settings() pool = await create_db_pool(settings) @@ -103,6 +111,7 @@ async def run_worker() -> None: repository=repository, settings=settings, extraction_client=build_extraction_client(settings), + place_search_client=build_place_search_client(settings), ) if settings.worker_prewarm_browser: diff --git a/migrations/002_add_kakao_place_results_to_job_results.sql b/migrations/002_add_kakao_place_results_to_job_results.sql new file mode 100644 index 0000000..6524a64 --- /dev/null +++ b/migrations/002_add_kakao_place_results_to_job_results.sql @@ -0,0 +1,3 @@ +ALTER TABLE processing.job_results +ADD COLUMN IF NOT EXISTS place_candidates JSONB NOT NULL DEFAULT '[]'::jsonb, +ADD COLUMN IF NOT EXISTS selected_place JSONB; diff --git a/tests/test_job_repository.py b/tests/test_job_repository.py index 82e9e8a..c8b8e4a 100644 --- a/tests/test_job_repository.py +++ b/tests/test_job_repository.py @@ -50,6 +50,8 @@ async def fetchrow(self, sql: str, *args): "caption": args[1], "instagram_meta": args[2], "extraction_result": args[3], + "place_candidates": args[4], + "selected_place": args[5], "created_at": now, "updated_at": now, } @@ -67,6 +69,23 @@ def test_upsert_job_result_persists_extraction_result() -> None: "address_evidence": "1-102 Sinmunro 2-ga, Jongno-gu, Seoul", "certainty": "high", } + selected_place = { + "kakao_place_id": "123", + "place_name": "Common Mansion", + "category_name": "음식점 > 카페", + "category_group_code": "CE7", + "category_group_name": "카페", + "phone": "02-0000-0000", + "address_name": "서울 종로구 신문로2가 1-102", + "road_address_name": "서울 종로구 새문안로 1", + "x": "126.970000", + "y": "37.570000", + "place_url": "https://place.map.kakao.com/123", + "confidence": 0.95, + "source_keyword": "Common Mansion", + "source_sentence": "Common Mansion 1-102 Sinmunro 2-ga", + "raw_candidate": "Common Mansion", + } record = _run( repository.upsert_job_result( @@ -74,6 +93,8 @@ def test_upsert_job_result_persists_extraction_result() -> None: caption="Common Mansion review", instagram_meta={"media_type": "reel"}, extraction_result=extraction_result, + place_candidates=[selected_place], + selected_place=selected_place, ) ) @@ -85,8 +106,12 @@ def test_upsert_job_result_persists_extraction_result() -> None: "Common Mansion review", json.dumps({"media_type": "reel"}), json.dumps(extraction_result), + json.dumps([selected_place]), + json.dumps(selected_place), ) assert record.extraction_result == extraction_result + assert record.place_candidates == [selected_place] + assert record.selected_place == selected_place @pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") @@ -106,6 +131,8 @@ def test_get_job_result_maps_extraction_result() -> None: "caption": "caption", "instagram_meta": json.dumps({"caption": "caption"}), "extraction_result": json.dumps(extraction_result), + "place_candidates": json.dumps([]), + "selected_place": None, "created_at": now, "updated_at": now, } @@ -116,3 +143,5 @@ def test_get_job_result_maps_extraction_result() -> None: assert record is not None assert record.extraction_result == extraction_result + assert record.place_candidates == [] + assert record.selected_place is None diff --git a/tests/test_job_result_schema.py b/tests/test_job_result_schema.py index e22c3a6..66da15c 100644 --- a/tests/test_job_result_schema.py +++ b/tests/test_job_result_schema.py @@ -45,3 +45,45 @@ def test_job_result_response_allows_missing_extraction_result() -> None: ) assert response.extraction_result is None + assert response.place_candidates == [] + assert response.selected_place is None + + +def test_job_result_response_accepts_kakao_place_result() -> None: + selected_place = { + "kakao_place_id": "123", + "place_name": "커먼맨션", + "category_name": "음식점 > 카페", + "category_group_code": "CE7", + "category_group_name": "카페", + "address_name": "서울 종로구 신문로2가 1-102", + "road_address_name": "서울 종로구 새문안로 1", + "x": "126.970000", + "y": "37.570000", + "place_url": "https://place.map.kakao.com/123", + "phone": None, + "confidence": 0.95, + "source_keyword": "커먼맨션", + "source_sentence": "브런치 맛집 커먼맨션 입니다", + "raw_candidate": "커먼맨션", + } + + response = JobResultResponse( + job_id=uuid4(), + source_url="https://www.instagram.com/reel/example/", + source="instagram", + status=JobStatus.SUCCEEDED, + caption="caption", + instagram_meta=None, + extraction_result=None, + place_candidates=[selected_place], + selected_place=selected_place, + error_message=None, + updated_at=datetime.now(timezone.utc), + ) + + dumped = response.model_dump() + + assert dumped["selected_place"]["place_name"] == "커먼맨션" + assert dumped["selected_place"]["category_group_code"] == "CE7" + assert dumped["place_candidates"][0]["road_address_name"] == "서울 종로구 새문안로 1" diff --git a/tests/test_jobs_api_result.py b/tests/test_jobs_api_result.py index 251ddef..c271f16 100644 --- a/tests/test_jobs_api_result.py +++ b/tests/test_jobs_api_result.py @@ -40,6 +40,23 @@ def test_get_job_result_returns_extraction_result() -> None: "address_evidence": "1-102 Sinmunro 2-ga, Jongno-gu, Seoul", "certainty": "high", } + selected_place = { + "kakao_place_id": "123", + "place_name": "Common Mansion", + "category_name": "음식점 > 카페", + "category_group_code": "CE7", + "category_group_name": "카페", + "phone": None, + "address_name": "서울 종로구 신문로2가 1-102", + "road_address_name": "서울 종로구 새문안로 1", + "x": "126.970000", + "y": "37.570000", + "place_url": "https://place.map.kakao.com/123", + "confidence": 0.95, + "source_keyword": "Common Mansion", + "source_sentence": "Common Mansion 1-102 Sinmunro 2-ga", + "raw_candidate": "Common Mansion", + } app = FastAPI() app.include_router(router) app.state.job_service = FakeJobService( @@ -59,6 +76,8 @@ def test_get_job_result_returns_extraction_result() -> None: caption="Common Mansion review", instagram_meta={"media_type": "reel"}, extraction_result=extraction_result, + place_candidates=[selected_place], + selected_place=selected_place, created_at=now, updated_at=now, ) @@ -72,3 +91,5 @@ def test_get_job_result_returns_extraction_result() -> None: assert response.status_code == 200 assert response.json()["extraction_result"] == extraction_result + assert response.json()["place_candidates"] == [selected_place] + assert response.json()["selected_place"] == selected_place diff --git a/tests/test_kakao_client.py b/tests/test_kakao_client.py new file mode 100644 index 0000000..ff49538 --- /dev/null +++ b/tests/test_kakao_client.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +import asyncio + +import httpx +import pytest + +from app.core.config import Settings +from app.domain.job import ExtractedCandidate +from app.infra.kakao import KakaoLocalClient, KakaoNonRetryableError + +if hasattr(asyncio, "WindowsSelectorEventLoopPolicy"): + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + + +def _can_create_event_loop() -> bool: + try: + loop = asyncio.new_event_loop() + loop.close() + return True + except OSError: + return False + + +EVENT_LOOP_AVAILABLE = _can_create_event_loop() + + +def _run(coro): + try: + return asyncio.run(coro) + except OSError as exc: + pytest.skip(f"Event loop creation is blocked in this environment: {exc}") + + +def _settings() -> Settings: + return Settings( + kakao_rest_api_key="test-kakao-key", + kakao_base_url="https://dapi.kakao.com", + kakao_max_places_per_candidate=5, + ) + + +def _candidate() -> ExtractedCandidate: + return ExtractedCandidate( + keyword="커먼맨션", + source_keyword="커먼맨션", + source_sentence="브런치 맛집 커먼맨션 입니다", + raw_candidate="커먼맨션", + ) + + +@pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") +def test_kakao_local_client_maps_place_fields() -> None: + seen_requests: list[httpx.Request] = [] + + async def handler(request: httpx.Request) -> httpx.Response: + seen_requests.append(request) + return httpx.Response( + 200, + json={ + "documents": [ + { + "id": "123", + "place_name": "커먼맨션", + "category_name": "음식점 > 카페", + "category_group_code": "CE7", + "category_group_name": "카페", + "phone": "02-0000-0000", + "address_name": "서울 종로구 신문로2가 1-102", + "road_address_name": "서울 종로구 새문안로 1", + "x": "126.970000", + "y": "37.570000", + "place_url": "https://place.map.kakao.com/123", + } + ], + "meta": {"total_count": 1}, + }, + ) + + client = KakaoLocalClient( + _settings(), + transport=httpx.MockTransport(handler), + ) + + result = _run( + client.search_places( + _candidate(), + location_hints=["서울 종로구 신문로2가 1-102"], + ) + ) + + assert seen_requests[0].headers["Authorization"] == "KakaoAK test-kakao-key" + assert seen_requests[0].url.params["query"] == "서울 종로구 신문로2가 1-102 커먼맨션" + assert seen_requests[0].url.params["size"] == "5" + place = result.places[0] + assert place.kakao_place_id == "123" + assert place.place_name == "커먼맨션" + assert place.category_name == "음식점 > 카페" + assert place.category_group_code == "CE7" + assert place.category_group_name == "카페" + assert place.address_name == "서울 종로구 신문로2가 1-102" + assert place.road_address_name == "서울 종로구 새문안로 1" + assert place.x == "126.970000" + assert place.y == "37.570000" + assert place.place_url == "https://place.map.kakao.com/123" + assert place.confidence > 0 + + +@pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") +def test_kakao_local_client_requires_api_key() -> None: + client = KakaoLocalClient(Settings(kakao_rest_api_key="")) + + with pytest.raises(KakaoNonRetryableError): + _run(client.search_places(_candidate(), [])) diff --git a/tests/test_worker_processor.py b/tests/test_worker_processor.py index 980cd82..7c9308f 100644 --- a/tests/test_worker_processor.py +++ b/tests/test_worker_processor.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +from dataclasses import dataclass from datetime import datetime, timezone from uuid import UUID, uuid4 @@ -13,6 +14,7 @@ ExtractionResult, JobRecord, JobStatus, + PlaceCandidate, ) from app.worker.processor import JobProcessor @@ -97,6 +99,42 @@ async def extract( raise RuntimeError("endpoint unavailable") +@dataclass +class FakePlaceSearchResult: + places: list[PlaceCandidate] + + +class FakePlaceSearchClient: + def __init__(self, places: list[PlaceCandidate]) -> None: + self.places = places + self.calls: list[dict[str, object]] = [] + + async def search_places(self, candidate, location_hints: list[str]) -> FakePlaceSearchResult: + self.calls.append( + { + "keyword": candidate.keyword, + "source_keyword": candidate.source_keyword, + "location_hints": location_hints, + } + ) + return FakePlaceSearchResult(self.places) + + +class HintAwarePlaceSearchClient: + def __init__(self, places_by_hint: dict[tuple[str, ...], list[PlaceCandidate]]) -> None: + self.places_by_hint = places_by_hint + self.calls: list[list[str]] = [] + + async def search_places(self, candidate, location_hints: list[str]) -> FakePlaceSearchResult: + self.calls.append(location_hints) + return FakePlaceSearchResult(self.places_by_hint.get(tuple(location_hints), [])) + + +class FailingPlaceSearchClient: + async def search_places(self, candidate, location_hints: list[str]) -> FakePlaceSearchResult: + raise RuntimeError("kakao unavailable") + + def _new_job() -> JobRecord: now = datetime.now(timezone.utc) return JobRecord( @@ -110,6 +148,26 @@ def _new_job() -> JobRecord: ) +def _place_candidate(*, confidence: float = 0.95) -> PlaceCandidate: + return PlaceCandidate( + kakao_place_id="123", + place_name="Common Mansion", + category_name="Food > Cafe", + category_group_code="CE7", + category_group_name="Cafe", + phone="02-0000-0000", + address_name="Seoul Jongno-gu Sinmunro 2-ga 1-102", + road_address_name="Seoul Jongno-gu Saemunan-ro 1", + x="126.970000", + y="37.570000", + place_url="https://place.map.kakao.com/123", + confidence=confidence, + source_keyword="Common Mansion", + source_sentence="Common Mansion 1-102 Sinmunro 2-ga", + raw_candidate="Common Mansion", + ) + + @pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") def test_processor_success(monkeypatch) -> None: job = _new_job() @@ -196,6 +254,213 @@ async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: assert repo.failed is None +@pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") +def test_processor_tries_broader_location_hints_before_keyword_only(monkeypatch) -> None: + job = _new_job() + repo = FakeRepository(job) + settings = Settings(kakao_min_place_confidence=0.7) + extractor = FakeExtractionClient( + ExtractionResult( + store_name="Geumdonok", + address="서울 서초구 방배로 23길 31-6", + store_name_evidence="Geumdonok", + address_evidence="서울 서초구 방배로 23길 31-6", + certainty=ExtractionCertainty.HIGH, + ) + ) + place = _place_candidate(confidence=0.95) + place_search = HintAwarePlaceSearchClient( + { + ("서울 서초구",): [place], + tuple(): [_place_candidate(confidence=0.85)], + } + ) + + async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: + return CrawlArtifact( + url=url, + html=None, + text="Geumdonok 서울 서초구 방배로 23길 31-6", + media_type="reel", + caption="Geumdonok 서울 서초구 방배로 23길 31-6", + instagram_meta=None, + ) + + monkeypatch.setattr("app.worker.processor.crawl_and_parse", fake_crawl) + + processor = JobProcessor( + repository=repo, + settings=settings, + extraction_client=extractor, + place_search_client=place_search, + ) + + _run(processor.process_job(job.job_id)) + + assert place_search.calls == [ + ["서울 서초구 방배로 23길 31-6"], + ["서울 서초구"], + ] + assert repo.saved_result is not None + assert repo.saved_result["selected_place"]["confidence"] == 0.95 + + +def test_build_location_hints_from_korean_address() -> None: + assert JobProcessor._build_location_hints("서울 서초구 방배로 23길 31-6") == [ + "서울 서초구 방배로 23길 31-6", + "서울 서초구", + "서울 서초구 방배로23길", + ] + assert JobProcessor._build_location_hints("서울 강남구 도곡동 954-17") == [ + "서울 강남구 도곡동 954-17", + "서울 강남구", + "서울 강남구 도곡동", + ] + + +@pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") +def test_processor_enriches_place_from_extraction_result(monkeypatch) -> None: + job = _new_job() + repo = FakeRepository(job) + settings = Settings() + extractor = FakeExtractionClient( + ExtractionResult( + store_name="Common Mansion", + address="1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + store_name_evidence="Common Mansion", + address_evidence="1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + certainty=ExtractionCertainty.HIGH, + ) + ) + place_search = FakePlaceSearchClient( + [ + _place_candidate(confidence=0.75), + _place_candidate(confidence=0.95), + ] + ) + + async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: + return CrawlArtifact( + url=url, + html=None, + text="Common Mansion 1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + media_type="reel", + caption="Common Mansion 1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + instagram_meta=None, + ) + + monkeypatch.setattr("app.worker.processor.crawl_and_parse", fake_crawl) + + processor = JobProcessor( + repository=repo, + settings=settings, + extraction_client=extractor, + place_search_client=place_search, + ) + + _run(processor.process_job(job.job_id)) + + assert place_search.calls == [ + { + "keyword": "Common Mansion", + "source_keyword": "Common Mansion", + "location_hints": ["1-102 Sinmunro 2-ga, Jongno-gu, Seoul"], + } + ] + assert repo.succeeded is True + assert repo.saved_result is not None + assert len(repo.saved_result["place_candidates"]) == 2 + assert repo.saved_result["selected_place"]["confidence"] == 0.95 + assert repo.saved_result["selected_place"]["kakao_place_id"] == "123" + assert repo.failed is None + + +@pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") +def test_processor_succeeds_when_place_search_fails(monkeypatch) -> None: + job = _new_job() + repo = FakeRepository(job) + settings = Settings() + extractor = FakeExtractionClient( + ExtractionResult( + store_name="Common Mansion", + address="1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + store_name_evidence="Common Mansion", + address_evidence="1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + certainty=ExtractionCertainty.HIGH, + ) + ) + + async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: + return CrawlArtifact( + url=url, + html=None, + text="Common Mansion 1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + media_type="reel", + caption="Common Mansion 1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + instagram_meta=None, + ) + + monkeypatch.setattr("app.worker.processor.crawl_and_parse", fake_crawl) + + processor = JobProcessor( + repository=repo, + settings=settings, + extraction_client=extractor, + place_search_client=FailingPlaceSearchClient(), + ) + + _run(processor.process_job(job.job_id)) + + assert repo.succeeded is True + assert repo.saved_result is not None + assert repo.saved_result["place_candidates"] == [] + assert repo.saved_result["selected_place"] is None + assert repo.failed is None + + +@pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") +def test_processor_drops_low_confidence_place_candidates(monkeypatch) -> None: + job = _new_job() + repo = FakeRepository(job) + settings = Settings(kakao_min_place_confidence=0.7) + extractor = FakeExtractionClient( + ExtractionResult( + store_name="Common Mansion", + address="1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + store_name_evidence="Common Mansion", + address_evidence="1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + certainty=ExtractionCertainty.HIGH, + ) + ) + + async def fake_crawl(url: str, _settings: Settings) -> CrawlArtifact: + return CrawlArtifact( + url=url, + html=None, + text="Common Mansion 1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + media_type="reel", + caption="Common Mansion 1-102 Sinmunro 2-ga, Jongno-gu, Seoul", + instagram_meta=None, + ) + + monkeypatch.setattr("app.worker.processor.crawl_and_parse", fake_crawl) + + processor = JobProcessor( + repository=repo, + settings=settings, + extraction_client=extractor, + place_search_client=FakePlaceSearchClient([_place_candidate(confidence=0.55)]), + ) + + _run(processor.process_job(job.job_id)) + + assert repo.succeeded is True + assert repo.saved_result is not None + assert repo.saved_result["place_candidates"] == [] + assert repo.saved_result["selected_place"] is None + assert repo.failed is None + + @pytest.mark.skipif(not EVENT_LOOP_AVAILABLE, reason="Event loop creation is blocked in this environment") def test_processor_succeeds_when_extraction_client_fails(monkeypatch) -> None: job = _new_job() diff --git a/tests/test_worker_runner.py b/tests/test_worker_runner.py index 0c38e45..cbd1e11 100644 --- a/tests/test_worker_runner.py +++ b/tests/test_worker_runner.py @@ -1,8 +1,9 @@ from __future__ import annotations from app.core.config import Settings +from app.infra.kakao import KakaoLocalClient from app.infra.llm import HFExtractionClient -from app.worker.runner import build_extraction_client +from app.worker.runner import build_extraction_client, build_place_search_client def test_build_extraction_client_returns_none_without_endpoint() -> None: @@ -30,3 +31,15 @@ def test_build_extraction_client_returns_hf_client_when_configured() -> None: ) assert isinstance(build_extraction_client(settings), HFExtractionClient) + + +def test_build_place_search_client_returns_none_without_key() -> None: + settings = Settings(kakao_rest_api_key="") + + assert build_place_search_client(settings) is None + + +def test_build_place_search_client_returns_kakao_client_when_configured() -> None: + settings = Settings(kakao_rest_api_key="test-kakao-key") + + assert isinstance(build_place_search_client(settings), KakaoLocalClient)