Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
8 changes: 7 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.11-slim
FROM python:3.11

WORKDIR /app

Expand All @@ -8,4 +8,10 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY . .

RUN python -m leftovers.domain.recommend.service.train

# 스레드 수 조정
ENV OMP_NUM_THREADS=4
ENV OPENBLAS_NUM_THREADS=4
ENV MKL_NUM_THREADS=4

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
19 changes: 17 additions & 2 deletions leftovers/domain/recommend/api/recommend_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,35 @@
from leftovers.domain.recommend.schemas.recommend_response import RecommendRes
from leftovers.domain.recommend.service import evaluator, loader

import time

router = APIRouter(prefix="/menus")

@router.post("/recommend", response_model=Envelope[RecommendRes])
def recommend(req: RecommendReq):
start = time.time()
if not loader._DB_ROWS: # DB, 모델이 안 불러와졌으면 500 에러
return fail(500, {"message": "DB/모델이 비어있습니다."}).model_dump()
if req.concept not in loader.CONCEPTS: # 컨셉명이 올바르지 않으면 400 에러
return fail(400, {"message": f"알 수 없는 컨셉: {req.concept}"}).model_dump()

after_load = time.time()

# 요청 메뉴를 돌면서 evaluate_items 호출
items = evaluator.evaluate_items(req.concept, req.items)

after_evaluate = time.time()

# 요청 메뉴를 돌면서 evaluate_item 호출
items = [evaluator.evaluate_item(req.concept, a) for a in req.items]
ranked = [r for r in items if r.matched_name] # 이름이 매칭되지 않으면 제외
ranked.sort(key=lambda r: (r.suitability, r.similarity), reverse=True) # 적합도와 유사도가 높은 순으로 정렬
topn = ranked[: max(1, int(req.count))] # 요청한 개수만큼만 반환
after_ranked = time.time()

res = RecommendRes(concept=req.concept, count=len(topn), items=topn)

print(f"[DEBUG] 0. start {(start):.3f}s")
print(f"[DEBUG] 1. Loader check took {(after_load):.3f}s")
print(f"[DEBUG] 2. Evaluation took {(after_evaluate):.3f}s")
print(f"[DEBUG] 3. Ranking took {(after_ranked):.3f}s")

return ok(res)
81 changes: 48 additions & 33 deletions leftovers/domain/recommend/service/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,56 @@ def to_feat(n: dict) -> np.ndarray:
return np.array([kcal, protein, fat, carbs, sugar, fiber, sodium, sat_fat, netcarb], dtype=float)

# 데이터에서 유사한 메뉴 찾아 점수를 계산하여 반환
def evaluate_item(concept: str, menu: str):
idx, b_name, sim = matcher.match_top1(menu) # 입력 메뉴명과 가장 유사한 메뉴 찾기
def evaluate_items(concept: str, menus: list[str]) -> list[MatchItem]:
matched = []
features = []

if idx < 0:
return MatchItem(input_menu=menu, note="매칭 실패")
for menu in menus:
idx, b_name, sim = matcher.match_top1(menu) # 메뉴 매칭
if idx < 0:
matched.append(MatchItem(input_menu=menu, note="매칭 실패"))
features.append(None)
continue

b_row = dict(loader._DB_ROWS[idx]) # 음식 데이터에서 해당 행을 딕셔너리 형태로 가져옴
b_row["name"] = b_name

b_row = dict(loader._DB_ROWS[idx]) # 음식 데이터에서 해당 행을 딕셔너리 형태로 가져옴
b_row["name"] = b_name
x_num = to_feat(b_row) # 피쳐 추출
matched.append((menu, b_name, sim, b_row, idx)) # 후처리용
features.append(x_num)

valid_idx = [i for i, f in enumerate(features) if f is not None] # 매칭 실패 제거하고 batch 변환

if not valid_idx:
return matched # 전부 실패면 그대로 반환

X = loader._DB_FEATS[[m[-1] for m in matched if not isinstance(m, MatchItem)]] # dict -> numpy 변환 대신 캐시된 _DB_FEATS 사용

x_num = to_feat(b_row).reshape(1, -1) # 영양성분을 숫자 벡터로 변환
x_imp = loader._IMPUTER.transform(x_num) # 결측치 보간
x_scaled = loader._SCALER.transform(x_imp) # 모델 학습 범위에 맞게 정규화
X = loader._IMPUTER.transform(X) # 결측치 보간
X = loader._SCALER.transform(X) # 모델 학습 범위에 맞게 정규화

model = loader._MODELS[concept]
pred = float(model.predict(x_scaled)[0]) # 모델이 예측한 적합도
rule = float(compute_score(concept, b_row, loader._CALIB or None)) # 규칙 기반 점수
fused = 0.3 * pred + 0.7 * rule
score_int = int(round(fused)) # 최종 점수

def nz(v):
try:
x = float(v) # NaN, inf 같은 값 들어오면 0.0 보정
return 0.0 if not np.isfinite(x) else x
except Exception:
return 0.0

return MatchItem(
input_menu=menu,
matched_name=b_name,
similarity=round(sim, 3),
suitability=score_int
# detail=NutritionDetail( # 개발 시에만 반환
# kcal=nz(b_row.get("kcal")), protein=nz(b_row.get("protein")), fat=nz(b_row.get("fat")),
# carbs=nz(b_row.get("carbs")), sugar=nz(b_row.get("sugar")), fiber=nz(b_row.get("fiber")),
# sodium=nz(b_row.get("sodium")), sat_fat=nz(b_row.get("sat_fat", b_row.get("fat", 0.0)))
# ),
# note=None
)
preds = model.predict(X) # 모델 배치 예측

results = []
pred_i = 0
for m in matched:
if isinstance(m, MatchItem): # 매칭 실패
results.append(m)
else:
menu, b_name, sim, b_row, idx = m
pred = float(preds[pred_i])
rule = float(compute_score(concept, b_row, loader._CALIB or None))
fused = 0.3 * pred + 0.7 * rule
score_int = int(round(fused))
pred_i += 1

results.append(
MatchItem(
input_menu=menu,
matched_name=b_name,
similarity=round(sim, 3),
suitability=score_int,
)
)
return results
43 changes: 39 additions & 4 deletions leftovers/domain/recommend/service/loader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from typing import List
import joblib
import numpy as np
import hnswlib
from sklearn.decomposition import TruncatedSVD

from leftovers.domain.recommend.service.food_kfda_loader import load_kfda_excels
from leftovers.domain.recommend.service.evaluator import to_feat

FOOD_FILES = ["leftovers/domain/recommend/data/foodData1.xlsx", "leftovers/domain/recommend/data/foodData2.xlsx"]
MODEL_DIR = "leftovers/domain/recommend/model_store"
Expand All @@ -11,32 +15,63 @@

# 캐시 이용 -> 서버 시작 시, 메모리에 로딩해두고 API 요청마다 바로 쓰게
_DB_ROWS: List[dict] = [] # 음식 데이터
_DB_FEATS = None # numpy 캐시
_NAME_LIST: List[str] = [] # 음식 이름 리스트
_NAME_VEC = None # 음식 이름 벡터화(음식 이름 문자열을 숫자 벡터로 변환)
_NAME_MAT = None # 벡터화 결과 저장소(매트릭스)
_NAME_MAT = None # 벡터화 결과 저장소(매트릭스) : 유사도 계산 전체 돌릴 때 사용
_NAME_LOOKUP = {} # 벡터화 결과 딕셔너리 : 메뉴가 DB에 있는 경우, 그 벡터만 필요할 때 사용
_IMPUTER = None # 결측치를 적절한 값으로 채워주는 보간기
_SCALER = None # 값들의 크기를 일정한 값으로 맞춰주는 도구
_MODELS = {} # 컨셉별 ML 모델
_CALIB = None # 점수 보정기

_HNSW_INDEX = None # ANN 인덱스

# 캐시에 DB와 모델 전부 로딩
def load_all():
global _DB_ROWS, _NAME_LIST, _NAME_VEC, _NAME_MAT, _IMPUTER, _SCALER, _MODELS, _CALIB

global _DB_ROWS, _DB_FEATS
global _NAME_LIST, _NAME_VEC, _NAME_MAT, _NAME_LOOKUP
global _IMPUTER, _SCALER, _MODELS, _CALIB, _HNSW_INDEX

_DB_ROWS = load_kfda_excels(FOOD_FILES, sheet_name=None)

feats = [to_feat(row) for row in _DB_ROWS] # dict -> numpy 변환을 미리해두기
_DB_FEATS = np.vstack(feats).astype(np.float32) # float32로 메모리 최적화
print("1차 진입")

# 이름 벡터 관련
_NAME_VEC = joblib.load(f"{MODEL_DIR}/name_vectorizer.joblib")
_NAME_MAT = joblib.load(f"{MODEL_DIR}/name_matrix.joblib")
_NAME_LIST = joblib.load(f"{MODEL_DIR}/name_list.joblib")

# sparse -> dense float32 변환
svd = TruncatedSVD(n_components=256, random_state=42)
_NAME_MAT_DENSE = svd.fit_transform(_NAME_MAT).astype(np.float32)

# dens 기반 이름 벡터 캐시
_NAME_LOOKUP = {name: vec for name, vec in zip(_NAME_LIST, _NAME_MAT_DENSE)}

print(f"[DEBUG] load_all: _HNSW_INDEX before = {id(_HNSW_INDEX)}")

# HNSW 인덱스 구축
dim = _NAME_MAT_DENSE.shape[1]
_HNSW_INDEX = hnswlib.Index(space='cosine', dim=dim)
_HNSW_INDEX.init_index(max_elements=_NAME_MAT.shape[0],
ef_construction=200, M=16)
_HNSW_INDEX.add_items(_NAME_MAT_DENSE)
_HNSW_INDEX.set_ef(50)

print(f"[DEBUG] load_all: _HNSW_INDEX after = {id(_HNSW_INDEX)}")

# 영양성분 전처리기
_IMPUTER = joblib.load(f"{MODEL_DIR}/nutrition_imputer.joblib")
_SCALER = joblib.load(f"{MODEL_DIR}/nutrition_scaler.joblib")

# ML 모델
_MODELS = {c: joblib.load(f"{MODEL_DIR}/concept_model_{c}.joblib") for c in CONCEPTS}
print("마지막 진입")

try:
_CALIB = joblib.load(f"{MODEL_DIR}/calibration.joblib")
_CALIB = joblib.load(f"{MODEL_DIR}/calibration.joblib") # calibration 로딩
except Exception:
_CALIB = None
15 changes: 9 additions & 6 deletions leftovers/domain/recommend/service/matcher.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from leftovers.domain.recommend.service import loader

# 메뉴 이름이 유사한 것 찾기
def match_top1(query: str):
if not loader._NAME_LIST: # 로딩된 메뉴가 없을 경우
if not loader._NAME_LIST or loader._HNSW_INDEX is None : # 로딩된 메뉴가 없을 경우, ANN 인덱스가 없을 경우
return (-1, "", 0.0) # 매칭 실패

query_vector = loader._NAME_VEC.transform([str(query)]) # 문자열을 벡터로 변환
similarity_list = cosine_similarity(query_vector, loader._NAME_MAT).ravel() # 코사인 유사도(두 벡터 간의 유사도) 측정
idx = int(np.argmax(similarity_list)) # 가장 높은 유사도를 가진 인덱스
similarity = float(similarity_list[idx])
if query in loader._NAME_LOOKUP: # 입력이 DB에 존재하면 변환 스킵하고 캐싱된 벡터 사용
query_vector = loader._NAME_LOOKUP[query].astype(np.float32).reshape(1, -1)
else:
query_vector = loader._NAME_VEC.transform([str(query)]).toarray().astype(np.float32)

labels, distances = loader._HNSW_INDEX.knn_query(query_vector, k=1) # ANN(HNSW) 검색
idx = int(labels[0][0])
similarity = 1 - float(distances[0][0]) # 가장 높은 유사도를 가진 인덱스

if not np.isfinite(similarity): # NaN이나 inf 나오면
similarity = 0.0 # 0.0.으로 보정
Expand Down
1 change: 1 addition & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

@asynccontextmanager
async def lifespan(app: FastAPI):
print("loop 진입")
loop = asyncio.get_event_loop()
await loop.run_in_executor(None, loader.load_all)
print("모델/DB 로딩 완료")
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ python-dotenv==1.0.1
scikit-learn==1.5.1
joblib==1.4.2
scipy==1.13.1
openai>=1.50.2
openai>=1.50.2
hnswlib==0.8.0