Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/api/v1/endpoints/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ async def get_job_result(
caption=result.caption if result else None,
instagram_meta=result.instagram_meta if result else None,
extraction_result=result.extraction_result if result else None,
place_candidates=result.place_candidates if result else [],
selected_place=result.selected_place if result else None,
error_message=job.error_message,
updated_at=job.updated_at,
)
1 change: 1 addition & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class Settings(BaseSettings):
kakao_base_url: str = "https://dapi.kakao.com"
kakao_timeout_seconds: int = 5
kakao_max_places_per_candidate: int = 5
kakao_min_place_confidence: float = 0.7

hf_extraction_endpoint_url: str = ""
hf_extraction_api_token: str = ""
Expand Down
30 changes: 22 additions & 8 deletions app/domain/job/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class JobResultRecord:
caption: str | None
instagram_meta: dict[str, Any] | None
extraction_result: dict[str, Any] | None
place_candidates: list[dict[str, Any]]
selected_place: dict[str, Any] | None
created_at: datetime
updated_at: datetime

Expand All @@ -60,11 +62,17 @@ class ExtractedCandidate:

@dataclass(slots=True)
class PlaceCandidate:
place_name: str
road_address: str | None
address: str | None
category: str | None
kakao_place_id: str
place_name: str
category_name: str | None
category_group_code: str | None
category_group_name: str | None
phone: str | None
address_name: str | None
road_address_name: str | None
x: str | None
y: str | None
place_url: str | None
confidence: float
source_keyword: str
source_sentence: str
Expand All @@ -83,11 +91,17 @@ class CrawlArtifact:

def as_place_dict(place: PlaceCandidate) -> dict[str, Any]:
return {
"place_name": place.place_name,
"road_address": place.road_address,
"address": place.address,
"category": place.category,
"kakao_place_id": place.kakao_place_id,
"place_name": place.place_name,
"category_name": place.category_name,
"category_group_code": place.category_group_code,
"category_group_name": place.category_group_name,
"phone": place.phone,
"address_name": place.address_name,
"road_address_name": place.road_address_name,
"x": place.x,
"y": place.y,
"place_url": place.place_url,
"confidence": round(place.confidence, 4),
"source_keyword": place.source_keyword,
"source_sentence": place.source_sentence,
Expand Down
22 changes: 20 additions & 2 deletions app/infra/db/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,21 @@ async def upsert_job_result(
caption: str | None,
instagram_meta: dict[str, Any] | None,
extraction_result: dict[str, Any] | None = None,
place_candidates: list[dict[str, Any]] | None = None,
selected_place: dict[str, Any] | None = None,
) -> JobResultRecord:
sql = f"""
INSERT INTO {self._results_table}
(job_id, caption, instagram_meta, extraction_result)
(job_id, caption, instagram_meta, extraction_result, place_candidates, selected_place)
VALUES
($1, $2, $3::jsonb, $4::jsonb)
($1, $2, $3::jsonb, $4::jsonb, $5::jsonb, $6::jsonb)
ON CONFLICT (job_id)
DO UPDATE SET
caption = EXCLUDED.caption,
instagram_meta = EXCLUDED.instagram_meta,
extraction_result = EXCLUDED.extraction_result,
place_candidates = EXCLUDED.place_candidates,
selected_place = EXCLUDED.selected_place,
updated_at = NOW()
RETURNING *
"""
Expand All @@ -126,6 +130,8 @@ async def upsert_job_result(
caption,
json.dumps(instagram_meta or {}),
json.dumps(extraction_result) if extraction_result is not None else None,
json.dumps(place_candidates or []),
json.dumps(selected_place) if selected_place is not None else None,
)
if row is None:
raise RuntimeError("Failed to upsert job result")
Expand All @@ -148,6 +154,8 @@ def _to_job_result_record(self, row: asyncpg.Record) -> JobResultRecord:
caption=row["caption"],
instagram_meta=self._json_to_dict(row["instagram_meta"]),
extraction_result=self._json_to_dict(row["extraction_result"]),
place_candidates=self._json_to_list(row["place_candidates"]),
selected_place=self._json_to_dict(row["selected_place"]),
created_at=row["created_at"],
updated_at=row["updated_at"],
)
Expand All @@ -161,3 +169,13 @@ def _json_to_dict(value: Any) -> dict[str, Any] | None:
if isinstance(value, dict):
return value
return dict(value)

@staticmethod
def _json_to_list(value: Any) -> list[dict[str, Any]]:
if value is None:
return []
if isinstance(value, str):
value = json.loads(value)
if isinstance(value, list):
return value
return list(value)
30 changes: 23 additions & 7 deletions app/infra/kakao/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,15 @@ class KakaoSearchResult:


class KakaoLocalClient:
def __init__(self, settings: Settings) -> None:
def __init__(
self,
settings: Settings,
*,
transport: httpx.AsyncBaseTransport | None = None,
) -> None:
self._settings = settings
self._headers = {"Authorization": f"KakaoAK {settings.kakao_rest_api_key}"}
self._transport = transport

async def search_places(
self,
Expand All @@ -51,7 +57,7 @@ async def search_places(
url = f"{self._settings.kakao_base_url}/v2/local/search/keyword.json"

try:
async with httpx.AsyncClient(timeout=timeout) as client:
async with httpx.AsyncClient(timeout=timeout, transport=self._transport) as client:
response = await client.get(url, params=params, headers=self._headers)
except (httpx.TimeoutException, httpx.NetworkError) as exc:
raise KakaoRetryableError(str(exc)) from exc
Expand Down Expand Up @@ -88,11 +94,17 @@ def _to_places(
confidence = self._score_place(candidate.keyword, place_name, idx, doc, location_hints)
places.append(
PlaceCandidate(
place_name=place_name,
road_address=(doc.get("road_address_name") or "").strip() or None,
address=(doc.get("address_name") or "").strip() or None,
category=(doc.get("category_name") or "").strip() or None,
kakao_place_id=str(doc.get("id") or ""),
place_name=place_name,
category_name=(doc.get("category_name") or "").strip() or None,
category_group_code=(doc.get("category_group_code") or "").strip() or None,
category_group_name=(doc.get("category_group_name") or "").strip() or None,
phone=(doc.get("phone") or "").strip() or None,
address_name=(doc.get("address_name") or "").strip() or None,
road_address_name=(doc.get("road_address_name") or "").strip() or None,
x=(doc.get("x") or "").strip() or None,
y=(doc.get("y") or "").strip() or None,
place_url=(doc.get("place_url") or "").strip() or None,
confidence=confidence,
source_keyword=candidate.source_keyword,
source_sentence=candidate.source_sentence,
Expand All @@ -110,7 +122,7 @@ def _score_place(
location_hints: list[str],
) -> float:
score = 0.35
if keyword.lower() in place_name.lower():
if _normalize_place_text(keyword) in _normalize_place_text(place_name):
score += 0.3
if rank == 0:
score += 0.2
Expand All @@ -129,3 +141,7 @@ def _score_place(
score += 0.1

return max(0.0, min(0.99, score))


def _normalize_place_text(value: str) -> str:
return "".join((value or "").lower().split())
20 changes: 20 additions & 0 deletions app/schemas/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,24 @@ class ExtractionResultResponse(BaseModel):
certainty: Literal["high", "medium", "low"]


class PlaceCandidateResponse(BaseModel):
kakao_place_id: str
place_name: str
category_name: str | None = None
category_group_code: str | None = None
category_group_name: str | None = None
phone: str | None = None
address_name: str | None = None
road_address_name: str | None = None
x: str | None = None
y: str | None = None
place_url: str | None = None
confidence: float
source_keyword: str | None = None
source_sentence: str | None = None
raw_candidate: str | None = None


class CreateJobRequest(BaseModel):
url: HttpUrl = Field(..., examples=["https://www.instagram.com/reel/abcde/"])
room_id: UUID
Expand Down Expand Up @@ -49,6 +67,8 @@ class JobResultResponse(BaseModel):
caption: str | None
instagram_meta: dict[str, object] | None
extraction_result: ExtractionResultResponse | None = None
place_candidates: list[PlaceCandidateResponse] = Field(default_factory=list)
selected_place: PlaceCandidateResponse | None = None
error_message: str | None
updated_at: datetime

Expand Down
Loading
Loading