From 814c23d7651a480731bd8e9a90bc25487ee3b402 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:08:06 +0900 Subject: [PATCH 01/33] =?UTF-8?q?Chore:=20=ED=94=84=EB=A1=9C=EC=A0=9D?= =?UTF-8?q?=ED=8A=B8=20=EA=B5=AC=EC=A1=B0=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/data/__init__.py | 4 ++++ src/main/ai/models/__init__.py | 3 ++- src/main/ai/router/__init__.py | 2 ++ src/main/ai/service/__init__.py | 2 ++ src/tests/ai/__init__.py | 3 +++ src/tests/ai/data/__init__.py | 3 +++ src/tests/ai/models/__init__.py | 3 +++ src/tests/ai/router/__init__.py | 3 +++ src/tests/ai/service/__init__.py | 3 +++ 9 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/main/ai/data/__init__.py b/src/main/ai/data/__init__.py index e69de29..815c852 100644 --- a/src/main/ai/data/__init__.py +++ b/src/main/ai/data/__init__.py @@ -0,0 +1,4 @@ +from src.main.ai.data.CategoryRecommendationRepository import CategoryRecommendationRepository +from src.main.ai.data.CategoryRecommendationQueue import CategoryRecommendationQueue +from src.main.ai.data.FileDuplicateCheckRepository import FileDuplicateCheckRepository +from src.main.ai.data.FileDuplicateCheckQueue import FileDuplicateCheckQueue diff --git a/src/main/ai/models/__init__.py b/src/main/ai/models/__init__.py index 0519ecb..4334c6f 100644 --- a/src/main/ai/models/__init__.py +++ b/src/main/ai/models/__init__.py @@ -1 +1,2 @@ - \ No newline at end of file +from src.main.ai.models.CategoryRecommendation import * +from src.main.ai.models.FileDuplicateCheck import * \ No newline at end of file diff --git a/src/main/ai/router/__init__.py b/src/main/ai/router/__init__.py index e69de29..2247572 100644 --- a/src/main/ai/router/__init__.py +++ b/src/main/ai/router/__init__.py @@ -0,0 +1,2 @@ +from src.main.ai.router.AIPublicAPIRouter import router as public_router +from src.main.ai.router.AIInternalAPIRouter import router as internal_router diff --git a/src/main/ai/service/__init__.py b/src/main/ai/service/__init__.py index e69de29..12c5f3f 100644 --- a/src/main/ai/service/__init__.py +++ b/src/main/ai/service/__init__.py @@ -0,0 +1,2 @@ +from src.main.ai.service.CategoryRecommendationService import CategoryRecommendationService +from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService diff --git a/src/tests/ai/__init__.py b/src/tests/ai/__init__.py index e69de29..b28b04f 100644 --- a/src/tests/ai/__init__.py +++ b/src/tests/ai/__init__.py @@ -0,0 +1,3 @@ + + + diff --git a/src/tests/ai/data/__init__.py b/src/tests/ai/data/__init__.py index e69de29..b28b04f 100644 --- a/src/tests/ai/data/__init__.py +++ b/src/tests/ai/data/__init__.py @@ -0,0 +1,3 @@ + + + diff --git a/src/tests/ai/models/__init__.py b/src/tests/ai/models/__init__.py index e69de29..b28b04f 100644 --- a/src/tests/ai/models/__init__.py +++ b/src/tests/ai/models/__init__.py @@ -0,0 +1,3 @@ + + + diff --git a/src/tests/ai/router/__init__.py b/src/tests/ai/router/__init__.py index e69de29..b28b04f 100644 --- a/src/tests/ai/router/__init__.py +++ b/src/tests/ai/router/__init__.py @@ -0,0 +1,3 @@ + + + diff --git a/src/tests/ai/service/__init__.py b/src/tests/ai/service/__init__.py index e69de29..b28b04f 100644 --- a/src/tests/ai/service/__init__.py +++ b/src/tests/ai/service/__init__.py @@ -0,0 +1,3 @@ + + + From 40e4f3aa99f4d98df46bff6bf603bd54fa2684e5 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:10:52 +0900 Subject: [PATCH 02/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=EB=A5=BC=20=EC=9C=84=ED=95=9C=20?= =?UTF-8?q?SQS=20=EB=A9=94=EC=8B=9C=EC=A7=80=20=EC=A0=84=EC=86=A1=20?= =?UTF-8?q?=EA=B8=B0=EB=8A=A5=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/data/FileDuplicateCheckQueue.py | 36 +++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/main/ai/data/FileDuplicateCheckQueue.py diff --git a/src/main/ai/data/FileDuplicateCheckQueue.py b/src/main/ai/data/FileDuplicateCheckQueue.py new file mode 100644 index 0000000..a508a52 --- /dev/null +++ b/src/main/ai/data/FileDuplicateCheckQueue.py @@ -0,0 +1,36 @@ +import os +import json +import boto3 +from dotenv import load_dotenv + +load_dotenv() + + +class FileDuplicateCheckQueue: + def __init__(self, sqs_client: boto3.client, queue_url: str): + self.sqs = sqs_client + self.queue_url = queue_url + + def send_message(self, request_id: str, user_id: str, s3_bucket: str, s3_key: str): + try: + message_body = { + 'request_type': 'file_duplicate_check_embedding_file', + 'request_id': request_id, + 'user_id': str(user_id), + 'payload': { + 's3_bucket': s3_bucket, + 's3_key': s3_key + } + } + + response = self.sqs.send_message( + QueueUrl=self.queue_url, + MessageGroupId=str(user_id), + MessageDeduplicationId=str(request_id), + MessageBody=json.dumps(message_body) + ) + + return response + except Exception as e: + print(f"Error sending message to SQS: {e}") + raise \ No newline at end of file From b4e0a206afb23d4dab486adcc09c435bff86f55c Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:10:57 +0900 Subject: [PATCH 03/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=EB=A5=BC=20=EC=9C=84=ED=95=9C=20?= =?UTF-8?q?=EB=A6=AC=ED=8F=AC=EC=A7=80=ED=86=A0=EB=A6=AC=20=ED=81=B4?= =?UTF-8?q?=EB=9E=98=EC=8A=A4=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/data/FileDuplicateCheckRepository.py | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 src/main/ai/data/FileDuplicateCheckRepository.py diff --git a/src/main/ai/data/FileDuplicateCheckRepository.py b/src/main/ai/data/FileDuplicateCheckRepository.py new file mode 100644 index 0000000..37c0cf1 --- /dev/null +++ b/src/main/ai/data/FileDuplicateCheckRepository.py @@ -0,0 +1,72 @@ +from typing import Optional +from pymongo.collection import Collection +from pymongo import MongoClient +from bson import ObjectId +from datetime import datetime, timezone + + +class FileDuplicateCheckRepository: + def __init__(self, client: MongoClient): + self.db = client.get_database() + self.collection: Collection = self.db.get_collection('file_duplicate_checks') + self.files_collection: Collection = self.db.get_collection('files') + + def create_duplicate_check_request(self, file_id: str, user_id: str) -> dict: + document = { + "file_id": file_id, + "user_id": user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": self.get_current_time() + } + result = self.collection.insert_one(document) + document = document.copy() + document["_id"] = result.inserted_id + return document + + def get_duplicate_check_by_id(self, request_id: str) -> Optional[dict]: + try: + object_id = ObjectId(request_id) + return self.collection.find_one({"_id": object_id}) + except: + return None + + def get_duplicate_check_by_file_id(self, file_id: str, user_id: str) -> Optional[dict]: + try: + return self.collection.find_one({ + "file_id": file_id, + "user_id": user_id + }) + except: + return None + + def update_duplicate_check_result(self, request_id: str, is_duplicated: bool) -> Optional[dict]: + try: + object_id = ObjectId(request_id) + result = self.collection.update_one( + {"_id": object_id}, + { + "$set": { + "is_completed": True, + "is_duplicated": is_duplicated, + "updated_at": self.get_current_time() + } + } + ) + + if result.modified_count == 0: + return None + + return self.collection.find_one({"_id": object_id}) + except: + return None + + def get_file_by_id(self, file_id: str) -> Optional[dict]: + try: + object_id = ObjectId(file_id) + return self.files_collection.find_one({"_id": object_id}) + except: + return None + + def get_current_time(self): + return datetime.now(timezone.utc) \ No newline at end of file From c51f07afc5db57860abb920e36b55d08ad0d9f9f Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:04 +0900 Subject: [PATCH 04/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=EB=A5=BC=20=EC=9C=84=ED=95=9C=20?= =?UTF-8?q?=EC=9A=94=EC=B2=AD=20=EB=B0=8F=20=EC=9D=91=EB=8B=B5=20=EB=AA=A8?= =?UTF-8?q?=EB=8D=B8=20=ED=81=B4=EB=9E=98=EC=8A=A4=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/models/FileDuplicateCheck.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 src/main/ai/models/FileDuplicateCheck.py diff --git a/src/main/ai/models/FileDuplicateCheck.py b/src/main/ai/models/FileDuplicateCheck.py new file mode 100644 index 0000000..f4ab2a4 --- /dev/null +++ b/src/main/ai/models/FileDuplicateCheck.py @@ -0,0 +1,23 @@ +from pydantic import BaseModel, Field +from typing import Optional, List, Any + + +class FileDuplicateCheckRequest(BaseModel): + user_id: str + file_id: str + + +class FileDuplicateCheckResponse(BaseModel): + request_id: str + + +class FileDuplicateCheckStatusResponse(BaseModel): + request_id: str + file_id: str + is_completed: bool + is_duplicated: Optional[bool] = None + + +class FileDuplicateCheckEmbeddingsRequest(BaseModel): + request_id: str + embeddings: List[Any] \ No newline at end of file From 6cbf01621b0579e890733ef0aaab1c6b64bcc0bb Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:10 +0900 Subject: [PATCH 05/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20=EC=84=9C=EB=B9=84=EC=8A=A4=20?= =?UTF-8?q?=ED=81=B4=EB=9E=98=EC=8A=A4=20=EC=B6=94=EA=B0=80=20=EB=B0=8F=20?= =?UTF-8?q?=EC=9A=94=EC=B2=AD=20=EC=B2=98=EB=A6=AC=20=EA=B8=B0=EB=8A=A5=20?= =?UTF-8?q?=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/service/FileDuplicateCheckService.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 src/main/ai/service/FileDuplicateCheckService.py diff --git a/src/main/ai/service/FileDuplicateCheckService.py b/src/main/ai/service/FileDuplicateCheckService.py new file mode 100644 index 0000000..769faca --- /dev/null +++ b/src/main/ai/service/FileDuplicateCheckService.py @@ -0,0 +1,93 @@ +from typing import Optional +import uuid +from bson import ObjectId +from fastapi import HTTPException, status + +from src.main.ai.data.FileDuplicateCheckRepository import FileDuplicateCheckRepository +from src.main.ai.data.FileDuplicateCheckQueue import FileDuplicateCheckQueue +from src.main.ai.models.FileDuplicateCheck import ( + FileDuplicateCheckRequest, + FileDuplicateCheckResponse, + FileDuplicateCheckStatusResponse, + FileDuplicateCheckEmbeddingsRequest +) + + +class FileDuplicateCheckService: + def __init__(self, repository: FileDuplicateCheckRepository, queue: FileDuplicateCheckQueue): + self.repository = repository + self.queue = queue + + def create_duplicate_check_request(self, request: FileDuplicateCheckRequest) -> FileDuplicateCheckResponse: + # 파일이 존재하는지 확인 + file = self.repository.get_file_by_id(request.file_id) + if not file: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." + ) + + # MongoDB에 저장 - ObjectId 자동 생성 + document = self.repository.create_duplicate_check_request( + file_id=request.file_id, + user_id=request.user_id + ) + + # request_id는 MongoDB의 _id를 문자열로 변환 + request_id = str(document["_id"]) + + # S3 버킷과 키 정보를 가져옴 + s3_bucket = file.get("s3_url", "").split(".s3.amazonaws.com/")[0].replace("https://", "") + s3_key = file.get("file_key", "") + + # 메시지 발행 + self.queue.send_message( + request_id=request_id, + user_id=request.user_id, + s3_bucket=s3_bucket, + s3_key=s3_key + ) + + return FileDuplicateCheckResponse(request_id=request_id) + + def get_duplicate_check_status(self, file_id: str, user_id: str) -> Optional[FileDuplicateCheckStatusResponse]: + result = self.repository.get_duplicate_check_by_file_id(file_id, user_id) + + if not result: + return None + + return FileDuplicateCheckStatusResponse( + request_id=str(result["_id"]), + file_id=result["file_id"], + is_completed=result["is_completed"], + is_duplicated=result.get("is_duplicated") + ) + + def update_duplicate_check_result(self, request_id: str, request: FileDuplicateCheckEmbeddingsRequest) -> Optional[FileDuplicateCheckStatusResponse]: + # 요청이 존재하는지 확인 + check_request = self.repository.get_duplicate_check_by_id(request_id) + if not check_request: + return None + + # 임베딩을 기반으로 중복 검사 결과 결정 - 간단한 예시 + is_duplicated = False + if request.embeddings and len(request.embeddings) > 0: + # 실제로는 여기서 임베딩 벡터 유사도 검사 등을 수행해야 함 + # 일단은 간단하게 임베딩이 비어있지 않으면 중복이 아니라고 처리 + is_duplicated = False + + # 결과 업데이트 + updated = self.repository.update_duplicate_check_result( + request_id=request_id, + is_duplicated=is_duplicated + ) + + if not updated: + return None + + return FileDuplicateCheckStatusResponse( + request_id=str(updated["_id"]), + file_id=updated["file_id"], + is_completed=updated["is_completed"], + is_duplicated=updated.get("is_duplicated") + ) \ No newline at end of file From d9c877ea6def6cd492688904de690760e691b9b2 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:20 +0900 Subject: [PATCH 06/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20API=20=EC=97=94=EB=93=9C?= =?UTF-8?q?=ED=8F=AC=EC=9D=B8=ED=8A=B8=20=EC=B6=94=EA=B0=80=20=EB=B0=8F=20?= =?UTF-8?q?=EC=9A=94=EC=B2=AD=20=EC=B2=98=EB=A6=AC=20=EA=B8=B0=EB=8A=A5=20?= =?UTF-8?q?=EA=B5=AC=ED=98=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/router/AIPublicAPIRouter.py | 43 ++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/main/ai/router/AIPublicAPIRouter.py b/src/main/ai/router/AIPublicAPIRouter.py index 7c4cdf4..14beb95 100644 --- a/src/main/ai/router/AIPublicAPIRouter.py +++ b/src/main/ai/router/AIPublicAPIRouter.py @@ -2,9 +2,11 @@ import uuid from src.main.auth.dependencies import get_current_user -from src.main.ai.di.dependencies import get_category_recommendation_service +from src.main.ai.di.dependencies import get_category_recommendation_service, get_file_duplicate_check_service from src.main.ai.models.CategoryRecommendation import CategoryRecommendationRequest, CategoryRecommendationResponse, CategoryRecommendationStatusResponse from src.main.ai.service.CategoryRecommendationService import CategoryRecommendationService +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse, FileDuplicateCheckRequest, FileDuplicateCheckResponse +from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService router = APIRouter( @@ -43,3 +45,42 @@ async def get_category_recommendation_status( ) return result + + +@router.post("/file-duplicate-checks", response_model=FileDuplicateCheckResponse) +async def create_file_duplicate_check_request( + request: FileDuplicateCheckRequest, + service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) +): + """ + 업로드한 파일의 중복 검사 시작 요청 + """ + try: + return service.create_duplicate_check_request(request) + except HTTPException as e: + raise e + except Exception as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="잘못된 요청입니다. 명세에 맞지 않은 요청입니다." + ) + + +@router.get("/file-duplicate-checks", response_model=FileDuplicateCheckStatusResponse) +async def get_file_duplicate_check_status( + file_id: str, + user_id: uuid.UUID = Depends(get_current_user), + service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) +): + """ + 업로드한 파일의 중복 검사 결과 조회 + """ + result = service.get_duplicate_check_status(file_id, str(user_id)) + + if not result: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="요청을 찾을 수 없습니다. 존재하지 않는 ID입니다." + ) + + return result From 1f6e750a8fe1a7c1918ed31d5af498ab8ed1f88e Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:25 +0900 Subject: [PATCH 07/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EA=B2=B0=EA=B3=BC=20=EC=97=85?= =?UTF-8?q?=EB=8D=B0=EC=9D=B4=ED=8A=B8=20API=20=EC=97=94=EB=93=9C=ED=8F=AC?= =?UTF-8?q?=EC=9D=B8=ED=8A=B8=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/router/AIInternalAPIRouter.py | 27 ++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/main/ai/router/AIInternalAPIRouter.py b/src/main/ai/router/AIInternalAPIRouter.py index 59f7578..d4c284a 100644 --- a/src/main/ai/router/AIInternalAPIRouter.py +++ b/src/main/ai/router/AIInternalAPIRouter.py @@ -1,9 +1,11 @@ from fastapi import APIRouter, Depends, HTTPException, status from fastapi.responses import JSONResponse -from src.main.ai.di.dependencies import get_category_recommendation_service +from src.main.ai.di.dependencies import get_category_recommendation_service, get_file_duplicate_check_service from src.main.ai.models.CategoryRecommendation import CategoryRecommendationResultRequest, CategoryRecommendationStatusResponse from src.main.ai.service.CategoryRecommendationService import CategoryRecommendationService +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse, FileDuplicateCheckEmbeddingsRequest +from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService router = APIRouter( @@ -31,3 +33,26 @@ async def update_category_recommendation_result( ) return result + + +@router.post("/file-duplicate-check-embeddings", response_model=FileDuplicateCheckStatusResponse) +async def update_file_duplicate_check_result( + request: FileDuplicateCheckEmbeddingsRequest, + service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) +): + """ + 업로드한 파일의 중복 검사 결과 업데이트 + """ + result = service.update_duplicate_check_result(request.request_id, request) + + if not result: + return JSONResponse( + status_code=status.HTTP_404_NOT_FOUND, + content={ + "status": 404, + "message": "요청을 찾을 수 없습니다.", + "detail": "존재하지 않는 ID입니다." + } + ) + + return result From 10f44694967d53c80e7e1b09bc820f4cbd0e7d59 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:30 +0900 Subject: [PATCH 08/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=EB=A5=BC=20=EC=9C=84=ED=95=9C=20?= =?UTF-8?q?=EB=A6=AC=ED=8F=AC=EC=A7=80=ED=86=A0=EB=A6=AC,=20=EC=84=9C?= =?UTF-8?q?=EB=B9=84=EC=8A=A4=20=EB=B0=8F=20=ED=81=90=20=EC=83=9D=EC=84=B1?= =?UTF-8?q?=20=ED=95=A8=EC=88=98=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/di/dependencies.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/main/ai/di/dependencies.py b/src/main/ai/di/dependencies.py index b6a9c67..190a494 100644 --- a/src/main/ai/di/dependencies.py +++ b/src/main/ai/di/dependencies.py @@ -5,6 +5,9 @@ from src.main.ai.data.CategoryRecommendationRepository import CategoryRecommendationRepository from src.main.ai.service.CategoryRecommendationService import CategoryRecommendationService from src.main.ai.data.CategoryRecommendationQueue import CategoryRecommendationQueue +from src.main.ai.data.FileDuplicateCheckRepository import FileDuplicateCheckRepository +from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService +from src.main.ai.data.FileDuplicateCheckQueue import FileDuplicateCheckQueue from src.main.config.mongodb import get_mongo_client load_dotenv() @@ -31,3 +34,26 @@ def get_category_recommendation_service(): repository = get_category_recommendation_repository() queue = get_category_recommendation_queue() return CategoryRecommendationService(repository, queue) + + +def get_file_duplicate_check_repository(): + client = get_mongo_client() + return FileDuplicateCheckRepository(client) + + +def get_file_duplicate_check_queue(): + if os.getenv('ENV') == 'local': + aws_profile = os.getenv('AWS_PROFILE', 'default') + session = boto3.Session(profile_name=aws_profile) + sqs_client = session.client('sqs') + else: + sqs_client = boto3.client('sqs', region_name=os.getenv('AWS_REGION')) + + queue_url = os.getenv('SQS_FILE_DUPLICATE_CHECK_QUEUE_URL') + return FileDuplicateCheckQueue(sqs_client, queue_url) + + +def get_file_duplicate_check_service(): + repository = get_file_duplicate_check_repository() + queue = get_file_duplicate_check_queue() + return FileDuplicateCheckService(repository, queue) From f25b40b0e6166cd06a60da21b204e2a91a2b848b Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:35 +0900 Subject: [PATCH 09/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20=ED=81=90=EC=97=90=20=EB=8C=80?= =?UTF-8?q?=ED=95=9C=20=EB=8B=A8=EC=9C=84=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data/test_file_duplicate_check_queue.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/tests/ai/data/test_file_duplicate_check_queue.py diff --git a/src/tests/ai/data/test_file_duplicate_check_queue.py b/src/tests/ai/data/test_file_duplicate_check_queue.py new file mode 100644 index 0000000..afa91c5 --- /dev/null +++ b/src/tests/ai/data/test_file_duplicate_check_queue.py @@ -0,0 +1,69 @@ +import pytest +import json +from unittest.mock import MagicMock, patch + +from src.main.ai.data.FileDuplicateCheckQueue import FileDuplicateCheckQueue + + +class TestFileDuplicateCheckQueue: + def setup_method(self): + # 목업 SQS 클라이언트 생성 + self.mock_sqs = MagicMock() + self.test_queue_url = "https://sqs.ap-northeast-2.amazonaws.com/123456789012/file-duplicate-check-queue.fifo" + + # 테스트 대상 큐 생성 + self.queue = FileDuplicateCheckQueue(self.mock_sqs, self.test_queue_url) + + # 테스트 공통 데이터 + self.test_request_id = "6123456789abcdef01234567" + self.test_user_id = "12345678-1234-5678-1234-567812345678" + self.test_s3_bucket = "test-bucket" + self.test_s3_key = "example.pdf" + + def test_send_message_success(self): + # given + expected_message_body = { + 'request_type': 'file_duplicate_check_embedding_file', + 'request_id': self.test_request_id, + 'user_id': self.test_user_id, + 'payload': { + 's3_bucket': self.test_s3_bucket, + 's3_key': self.test_s3_key + } + } + + expected_response = {"MessageId": "12345"} + self.mock_sqs.send_message.return_value = expected_response + + # when + response = self.queue.send_message( + request_id=self.test_request_id, + user_id=self.test_user_id, + s3_bucket=self.test_s3_bucket, + s3_key=self.test_s3_key + ) + + # then + self.mock_sqs.send_message.assert_called_once_with( + QueueUrl=self.test_queue_url, + MessageGroupId=self.test_user_id, + MessageDeduplicationId=self.test_request_id, + MessageBody=json.dumps(expected_message_body) + ) + assert response == expected_response + + def test_send_message_exception(self): + # given + self.mock_sqs.send_message.side_effect = Exception("SQS Error") + + # when & then + with pytest.raises(Exception) as exc_info: + self.queue.send_message( + request_id=self.test_request_id, + user_id=self.test_user_id, + s3_bucket=self.test_s3_bucket, + s3_key=self.test_s3_key + ) + + assert str(exc_info.value) == "SQS Error" + self.mock_sqs.send_message.assert_called_once() \ No newline at end of file From be347f3c351071c808689bd4590b1288d4ab1705 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:39 +0900 Subject: [PATCH 10/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20=EB=A6=AC=ED=8F=AC=EC=A7=80?= =?UTF-8?q?=ED=86=A0=EB=A6=AC=EC=97=90=20=EB=8C=80=ED=95=9C=20=EB=8B=A8?= =?UTF-8?q?=EC=9C=84=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_file_duplicate_check_repository.py | 232 ++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 src/tests/ai/data/test_file_duplicate_check_repository.py diff --git a/src/tests/ai/data/test_file_duplicate_check_repository.py b/src/tests/ai/data/test_file_duplicate_check_repository.py new file mode 100644 index 0000000..716ecae --- /dev/null +++ b/src/tests/ai/data/test_file_duplicate_check_repository.py @@ -0,0 +1,232 @@ +import pytest +from unittest.mock import MagicMock, patch +from bson import ObjectId +from datetime import datetime, timezone + +from src.main.ai.data.FileDuplicateCheckRepository import FileDuplicateCheckRepository + + +class TestFileDuplicateCheckRepository: + def setup_method(self): + # 목업 MongoDB 클라이언트 생성 + self.mock_collection = MagicMock() + self.mock_files_collection = MagicMock() + self.mock_db = MagicMock() + self.mock_db.get_collection.side_effect = lambda name: self.mock_collection if name == 'file_duplicate_checks' else self.mock_files_collection + + self.mock_client = MagicMock() + self.mock_client.get_database.return_value = self.mock_db + + # 테스트 대상 레포지토리 생성 + self.repository = FileDuplicateCheckRepository(self.mock_client) + + # 테스트 공통 데이터 + self.test_file_id = "6123456789abcdef01234567" + self.test_user_id = "12345678-1234-5678-1234-567812345678" + self.test_request_id = "7123456789abcdef01234567" + self.test_object_id = ObjectId(self.test_request_id) + self.test_file_object_id = ObjectId(self.test_file_id) + self.test_time = datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + # 시간 고정을 위한 패치 + self.time_patch = patch.object(self.repository, 'get_current_time', return_value=self.test_time) + self.time_patch.start() + + def teardown_method(self): + self.time_patch.stop() + + def test_create_duplicate_check_request(self): + # given + expected_document = { + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": self.test_time + } + self.mock_collection.insert_one.return_value.inserted_id = self.test_object_id + + # when + result = self.repository.create_duplicate_check_request(self.test_file_id, self.test_user_id) + + # then + self.mock_collection.insert_one.assert_called_once_with(expected_document) + assert result["_id"] == self.test_object_id + assert result["file_id"] == self.test_file_id + assert result["user_id"] == self.test_user_id + assert result["is_completed"] == False + assert result["is_duplicated"] is None + assert result["created_at"] == self.test_time + + def test_get_duplicate_check_by_id_found(self): + # given + expected_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": self.test_time + } + self.mock_collection.find_one.return_value = expected_document + + # when + result = self.repository.get_duplicate_check_by_id(self.test_request_id) + + # then + self.mock_collection.find_one.assert_called_once_with({"_id": self.test_object_id}) + assert result == expected_document + + def test_get_duplicate_check_by_id_not_found(self): + # given + self.mock_collection.find_one.return_value = None + + # when + result = self.repository.get_duplicate_check_by_id(self.test_request_id) + + # then + self.mock_collection.find_one.assert_called_once_with({"_id": self.test_object_id}) + assert result is None + + def test_get_duplicate_check_by_id_invalid_id(self): + # given + # ObjectId 변환 실패 + + # when + result = self.repository.get_duplicate_check_by_id("invalid_id") + + # then + self.mock_collection.find_one.assert_not_called() + assert result is None + + def test_get_duplicate_check_by_file_id_found(self): + # given + expected_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": self.test_time + } + self.mock_collection.find_one.return_value = expected_document + + # when + result = self.repository.get_duplicate_check_by_file_id(self.test_file_id, self.test_user_id) + + # then + self.mock_collection.find_one.assert_called_once_with({ + "file_id": self.test_file_id, + "user_id": self.test_user_id + }) + assert result == expected_document + + def test_get_duplicate_check_by_file_id_not_found(self): + # given + self.mock_collection.find_one.return_value = None + + # when + result = self.repository.get_duplicate_check_by_file_id(self.test_file_id, self.test_user_id) + + # then + self.mock_collection.find_one.assert_called_once_with({ + "file_id": self.test_file_id, + "user_id": self.test_user_id + }) + assert result is None + + def test_update_duplicate_check_result_success(self): + # given + # 업데이트 성공 + self.mock_collection.update_one.return_value.modified_count = 1 + + expected_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": True, + "is_duplicated": False, + "created_at": self.test_time, + "updated_at": self.test_time + } + self.mock_collection.find_one.return_value = expected_document + + # when + result = self.repository.update_duplicate_check_result(self.test_request_id, False) + + # then + self.mock_collection.update_one.assert_called_once_with( + {"_id": self.test_object_id}, + { + "$set": { + "is_completed": True, + "is_duplicated": False, + "updated_at": self.test_time + } + } + ) + self.mock_collection.find_one.assert_called_once_with({"_id": self.test_object_id}) + assert result == expected_document + + def test_update_duplicate_check_result_not_found(self): + # given + # 업데이트 실패 + self.mock_collection.update_one.return_value.modified_count = 0 + + # when + result = self.repository.update_duplicate_check_result(self.test_request_id, False) + + # then + self.mock_collection.update_one.assert_called_once() + self.mock_collection.find_one.assert_not_called() + assert result is None + + def test_update_duplicate_check_result_invalid_id(self): + # given + # ObjectId 변환 실패 + + # when + result = self.repository.update_duplicate_check_result("invalid_id", False) + + # then + self.mock_collection.update_one.assert_not_called() + self.mock_collection.find_one.assert_not_called() + assert result is None + + def test_get_file_by_id_found(self): + # given + expected_document = { + "_id": self.test_file_object_id, + "file_key": "example.pdf", + "s3_url": "https://bucket.s3.amazonaws.com/example.pdf" + } + self.mock_files_collection.find_one.return_value = expected_document + + # when + result = self.repository.get_file_by_id(self.test_file_id) + + # then + self.mock_files_collection.find_one.assert_called_once_with({"_id": self.test_file_object_id}) + assert result == expected_document + + def test_get_file_by_id_not_found(self): + # given + self.mock_files_collection.find_one.return_value = None + + # when + result = self.repository.get_file_by_id(self.test_file_id) + + # then + self.mock_files_collection.find_one.assert_called_once_with({"_id": self.test_file_object_id}) + assert result is None + + def test_get_file_by_id_invalid_id(self): + # given + # ObjectId 변환 실패 + + # when + result = self.repository.get_file_by_id("invalid_id") + + # then + self.mock_files_collection.find_one.assert_not_called() + assert result is None \ No newline at end of file From 8575689509f941f8479b0af82263e4aa37ce2941 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:42 +0900 Subject: [PATCH 11/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20=EC=9E=84=EB=B2=A0=EB=94=A9=20?= =?UTF-8?q?=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8=EC=97=90=20=EB=8C=80?= =?UTF-8?q?=ED=95=9C=20=EB=8B=A8=EC=9C=84=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/router/test_ai_internal_api_router.py | 67 +++++++++++++++++-- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/src/tests/ai/router/test_ai_internal_api_router.py b/src/tests/ai/router/test_ai_internal_api_router.py index fd48baa..7a66a17 100644 --- a/src/tests/ai/router/test_ai_internal_api_router.py +++ b/src/tests/ai/router/test_ai_internal_api_router.py @@ -1,9 +1,10 @@ import pytest from unittest.mock import MagicMock, patch -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException from fastapi.testclient import TestClient from src.main.ai.models.CategoryRecommendation import CategoryRecommendationStatusResponse +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse from src.main.ai.router.AIInternalAPIRouter import router as internal_router @@ -21,6 +22,8 @@ def setup_method(self): # 테스트 공통 데이터 self.test_request_id = "6123456789abcdef01234567" self.test_category = "기술" + self.test_user_id = "12345678-1234-5678-1234-567812345678" + self.test_file_id = "7123456789abcdef01234567" def test_update_category_recommendation_result_success(self, client): # given @@ -48,7 +51,6 @@ def test_update_category_recommendation_result_success(self, client): "predicted_category": self.test_category } - # 서비스가 한 번 호출됐는지 확인 mock_service.assert_called_once() def test_update_category_recommendation_result_not_found(self, client): @@ -57,9 +59,9 @@ def test_update_category_recommendation_result_not_found(self, client): "predicted_category": self.test_category } - # 서비스 응답 모의 설정 - 업데이트 실패 + # 서비스 응답 모의 설정 with patch('src.main.ai.service.CategoryRecommendationService.CategoryRecommendationService.update_recommendation_result') as mock_service: - # 서비스 응답 설정 + # 서비스 응답 설정 - 요청 없음 mock_service.return_value = None # when @@ -73,5 +75,60 @@ def test_update_category_recommendation_result_not_found(self, client): "detail": "존재하지 않는 ID입니다." } - # 서비스가 한 번 호출됐는지 확인 + mock_service.assert_called_once() + + def test_update_file_duplicate_check_embeddings_success(self, client): + # given + request_data = { + "request_id": self.test_request_id, + "embeddings": [1.0, 2.0, 3.0] + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.update_duplicate_check_result') as mock_service: + # 서비스 응답 설정 + mock_service.return_value = FileDuplicateCheckStatusResponse( + request_id=self.test_request_id, + file_id=self.test_file_id, + is_completed=True, + is_duplicated=False + ) + + # when + response = client.post("/ai-proxy/file-duplicate-check-embeddings", json=request_data) + + # then + assert response.status_code == 200 + assert response.json() == { + "request_id": self.test_request_id, + "file_id": self.test_file_id, + "is_completed": True, + "is_duplicated": False + } + + mock_service.assert_called_once() + + def test_update_file_duplicate_check_embeddings_not_found(self, client): + # given + request_data = { + "request_id": self.test_request_id, + "embeddings": [1.0, 2.0, 3.0] + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.update_duplicate_check_result') as mock_service: + # 서비스 응답 설정 - 요청 없음 + mock_service.return_value = None + + # when + response = client.post("/ai-proxy/file-duplicate-check-embeddings", json=request_data) + + # then + assert response.status_code == 404 + assert response.json() == { + "status": 404, + "message": "요청을 찾을 수 없습니다.", + "detail": "존재하지 않는 ID입니다." + } + mock_service.assert_called_once() \ No newline at end of file From 95ab859cfed6df8184ffdc5945778963bf749c35 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:47 +0900 Subject: [PATCH 12/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20API=EC=97=90=20=EB=8C=80?= =?UTF-8?q?=ED=95=9C=20=EB=8B=A8=EC=9C=84=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80=20=EB=B0=8F=20=EA=B8=B0=EC=A1=B4=20=ED=85=8C?= =?UTF-8?q?=EC=8A=A4=ED=8A=B8=20=EA=B0=9C=EC=84=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/router/test_ai_public_api_router.py | 134 ++++++++++++++++-- 1 file changed, 125 insertions(+), 9 deletions(-) diff --git a/src/tests/ai/router/test_ai_public_api_router.py b/src/tests/ai/router/test_ai_public_api_router.py index cc8e247..74f650d 100644 --- a/src/tests/ai/router/test_ai_public_api_router.py +++ b/src/tests/ai/router/test_ai_public_api_router.py @@ -1,7 +1,7 @@ import pytest import uuid from unittest.mock import MagicMock, patch -from fastapi import FastAPI, Depends +from fastapi import FastAPI, Depends, HTTPException from fastapi.testclient import TestClient from src.main.ai.models.CategoryRecommendation import ( @@ -9,6 +9,7 @@ CategoryRecommendationResponse, CategoryRecommendationStatusResponse ) +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse, FileDuplicateCheckResponse, FileDuplicateCheckRequest from src.main.ai.router.AIPublicAPIRouter import router as public_router from src.main.auth.dependencies import get_current_user @@ -36,6 +37,7 @@ def setup_method(self): self.test_title = "테스트 제목" self.test_user_id = uuid.UUID("12345678-1234-5678-1234-567812345678") self.test_request_id = "6123456789abcdef01234567" + self.test_file_id = "7123456789abcdef01234567" def test_create_category_recommendation_request(self, client): # given @@ -53,15 +55,16 @@ def test_create_category_recommendation_request(self, client): # then assert response.status_code == 200 - assert response.json() == {"request_id": self.test_request_id} - - # 서비스가 한 번 호출됐는지 확인 + assert response.json() == { + "request_id": self.test_request_id + } mock_service.assert_called_once() def test_get_category_recommendation_status_exists(self, client): # given + # 서비스 응답 모의 설정 with patch('src.main.ai.service.CategoryRecommendationService.CategoryRecommendationService.get_recommendation_status') as mock_service: - # 서비스 응답 설정 - 완료된 추천 + # 서비스 응답 설정 mock_service.return_value = CategoryRecommendationStatusResponse( request_id=self.test_request_id, is_completed=True, @@ -78,14 +81,13 @@ def test_get_category_recommendation_status_exists(self, client): "is_completed": True, "predicted_category": "기술" } - - # 서비스가 한 번 호출됐는지 확인 mock_service.assert_called_once() def test_get_category_recommendation_status_not_found(self, client): # given + # 서비스 응답 모의 설정 with patch('src.main.ai.service.CategoryRecommendationService.CategoryRecommendationService.get_recommendation_status') as mock_service: - # 서비스 응답 설정 - 요청 없음 + # 서비스 응답 설정 mock_service.return_value = None # when @@ -93,4 +95,118 @@ def test_get_category_recommendation_status_not_found(self, client): # then assert response.status_code == 404 - assert "요청을 찾을 수 없습니다" in response.json().get("detail", "") \ No newline at end of file + assert response.json() == { + "detail": "요청을 찾을 수 없습니다. 존재하지 않는 ID입니다." + } + mock_service.assert_called_once() + + def test_create_file_duplicate_check_request_success(self, client): + # given + request_data = { + "user_id": str(self.test_user_id), + "file_id": self.test_file_id + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: + # 서비스 응답 설정 + mock_service.return_value = FileDuplicateCheckResponse(request_id=self.test_request_id) + + # when + response = client.post("/ai/file-duplicate-checks", json=request_data) + + # then + assert response.status_code == 200 + assert response.json() == { + "request_id": self.test_request_id + } + mock_service.assert_called_once_with(FileDuplicateCheckRequest(**request_data)) + + def test_create_file_duplicate_check_request_file_not_found(self, client): + # given + request_data = { + "user_id": str(self.test_user_id), + "file_id": self.test_file_id + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: + # 서비스에서 예외 발생 + mock_service.side_effect = HTTPException( + status_code=404, + detail="파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." + ) + + # when + response = client.post("/ai/file-duplicate-checks", json=request_data) + + # then + assert response.status_code == 404 + assert response.json() == { + "detail": "파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." + } + mock_service.assert_called_once() + + def test_create_file_duplicate_check_request_bad_request(self, client): + # given + request_data = { + "user_id": str(self.test_user_id), + "file_id": self.test_file_id + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: + # 서비스에서 일반 예외 발생 + mock_service.side_effect = Exception("서비스 오류") + + # when + response = client.post("/ai/file-duplicate-checks", json=request_data) + + # then + assert response.status_code == 400 + assert response.json() == { + "detail": "잘못된 요청입니다. 명세에 맞지 않은 요청입니다." + } + mock_service.assert_called_once() + + def test_get_file_duplicate_check_status_exists(self, client): + # given + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.get_duplicate_check_status') as mock_service: + # 서비스 응답 설정 + mock_service.return_value = FileDuplicateCheckStatusResponse( + request_id=self.test_request_id, + file_id=self.test_file_id, + is_completed=True, + is_duplicated=False + ) + + # when + response = client.get(f"/ai/file-duplicate-checks?file_id={self.test_file_id}") + + # then + assert response.status_code == 200 + assert response.json() == { + "request_id": self.test_request_id, + "file_id": self.test_file_id, + "is_completed": True, + "is_duplicated": False + } + mock_service.assert_called_once_with(self.test_file_id, str(self.test_user_id)) + + def test_get_file_duplicate_check_status_not_found(self, client): + # given + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.get_duplicate_check_status') as mock_service: + # 서비스 응답 설정 + mock_service.return_value = None + + # when + response = client.get(f"/ai/file-duplicate-checks?file_id={self.test_file_id}") + + # then + assert response.status_code == 404 + assert response.json() == { + "detail": "요청을 찾을 수 없습니다. 존재하지 않는 ID입니다." + } + mock_service.assert_called_once_with(self.test_file_id, str(self.test_user_id)) \ No newline at end of file From 579f15a5cd443f21ffd6f335c46a12f949f4862f Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:11:50 +0900 Subject: [PATCH 13/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20=EC=84=9C=EB=B9=84=EC=8A=A4?= =?UTF-8?q?=EC=97=90=20=EB=8C=80=ED=95=9C=20=EB=8B=A8=EC=9C=84=20=ED=85=8C?= =?UTF-8?q?=EC=8A=A4=ED=8A=B8=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_file_duplicate_check_service.py | 227 ++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 src/tests/ai/service/test_file_duplicate_check_service.py diff --git a/src/tests/ai/service/test_file_duplicate_check_service.py b/src/tests/ai/service/test_file_duplicate_check_service.py new file mode 100644 index 0000000..16cdc76 --- /dev/null +++ b/src/tests/ai/service/test_file_duplicate_check_service.py @@ -0,0 +1,227 @@ +import pytest +import uuid +from unittest.mock import MagicMock, patch +from bson import ObjectId +from datetime import datetime, timezone +from fastapi import HTTPException + +from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService +from src.main.ai.models.FileDuplicateCheck import ( + FileDuplicateCheckRequest, + FileDuplicateCheckResponse, + FileDuplicateCheckStatusResponse, + FileDuplicateCheckEmbeddingsRequest +) + + +class TestFileDuplicateCheckService: + def setup_method(self): + # 목업 리포지토리 및 큐 생성 + self.mock_repository = MagicMock() + self.mock_queue = MagicMock() + + # 테스트 대상 서비스 생성 + self.service = FileDuplicateCheckService(self.mock_repository, self.mock_queue) + + # 테스트 공통 데이터 + self.test_file_id = "6123456789abcdef01234567" + self.test_user_id = "12345678-1234-5678-1234-567812345678" + self.test_request_id = "7123456789abcdef01234567" + self.test_object_id = ObjectId(self.test_request_id) + self.test_file_object_id = ObjectId(self.test_file_id) + + def test_create_duplicate_check_request_success(self): + # given + request = FileDuplicateCheckRequest( + user_id=self.test_user_id, + file_id=self.test_file_id + ) + + # 파일 존재함 + file_document = { + "_id": self.test_file_object_id, + "file_key": "example.pdf", + "s3_url": "https://bucket.s3.amazonaws.com/example.pdf" + } + self.mock_repository.get_file_by_id.return_value = file_document + + # 요청 생성 응답 + mongo_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + } + self.mock_repository.create_duplicate_check_request.return_value = mongo_document + + # when + result = self.service.create_duplicate_check_request(request) + + # then + self.mock_repository.get_file_by_id.assert_called_once_with(self.test_file_id) + self.mock_repository.create_duplicate_check_request.assert_called_once_with( + file_id=self.test_file_id, + user_id=self.test_user_id + ) + self.mock_queue.send_message.assert_called_once() + + assert isinstance(result, FileDuplicateCheckResponse) + assert result.request_id == str(self.test_object_id) + + def test_create_duplicate_check_request_file_not_found(self): + # given + request = FileDuplicateCheckRequest( + user_id=self.test_user_id, + file_id=self.test_file_id + ) + + # 파일이 없음 + self.mock_repository.get_file_by_id.return_value = None + + # when & then + with pytest.raises(HTTPException) as exc_info: + self.service.create_duplicate_check_request(request) + + assert exc_info.value.status_code == 404 + self.mock_repository.get_file_by_id.assert_called_once_with(self.test_file_id) + self.mock_repository.create_duplicate_check_request.assert_not_called() + self.mock_queue.send_message.assert_not_called() + + def test_get_duplicate_check_status_exists(self): + # given + # 리포지토리 응답 설정 - 완료되지 않은 중복 검사 + mongo_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + } + self.mock_repository.get_duplicate_check_by_file_id.return_value = mongo_document + + # when + result = self.service.get_duplicate_check_status(self.test_file_id, self.test_user_id) + + # then + self.mock_repository.get_duplicate_check_by_file_id.assert_called_once_with( + self.test_file_id, + self.test_user_id + ) + + assert isinstance(result, FileDuplicateCheckStatusResponse) + assert result.request_id == str(self.test_object_id) + assert result.file_id == self.test_file_id + assert result.is_completed == False + assert result.is_duplicated is None + + def test_get_duplicate_check_status_completed(self): + # given + # 리포지토리 응답 설정 - 완료된 중복 검사 + mongo_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": True, + "is_duplicated": False, + "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + } + self.mock_repository.get_duplicate_check_by_file_id.return_value = mongo_document + + # when + result = self.service.get_duplicate_check_status(self.test_file_id, self.test_user_id) + + # then + self.mock_repository.get_duplicate_check_by_file_id.assert_called_once_with( + self.test_file_id, + self.test_user_id + ) + + assert isinstance(result, FileDuplicateCheckStatusResponse) + assert result.request_id == str(self.test_object_id) + assert result.file_id == self.test_file_id + assert result.is_completed == True + assert result.is_duplicated == False + + def test_get_duplicate_check_status_not_found(self): + # given + # 리포지토리 응답 설정 - 문서 없음 + self.mock_repository.get_duplicate_check_by_file_id.return_value = None + + # when + result = self.service.get_duplicate_check_status(self.test_file_id, self.test_user_id) + + # then + self.mock_repository.get_duplicate_check_by_file_id.assert_called_once_with( + self.test_file_id, + self.test_user_id + ) + + assert result is None + + def test_update_duplicate_check_result_success(self): + # given + request = FileDuplicateCheckEmbeddingsRequest( + request_id=self.test_request_id, + embeddings=[1.0, 2.0, 3.0] + ) + + # 요청이 존재함 + check_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + } + self.mock_repository.get_duplicate_check_by_id.return_value = check_document + + # 업데이트 응답 + updated_document = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": True, + "is_duplicated": False, + "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc), + "updated_at": datetime(2023, 1, 2, tzinfo=timezone.utc) + } + self.mock_repository.update_duplicate_check_result.return_value = updated_document + + # when + result = self.service.update_duplicate_check_result(self.test_request_id, request) + + # then + self.mock_repository.get_duplicate_check_by_id.assert_called_once_with(self.test_request_id) + self.mock_repository.update_duplicate_check_result.assert_called_once_with( + request_id=self.test_request_id, + is_duplicated=False + ) + + assert isinstance(result, FileDuplicateCheckStatusResponse) + assert result.request_id == str(self.test_object_id) + assert result.file_id == self.test_file_id + assert result.is_completed == True + assert result.is_duplicated == False + + def test_update_duplicate_check_result_not_found(self): + # given + request = FileDuplicateCheckEmbeddingsRequest( + request_id=self.test_request_id, + embeddings=[1.0, 2.0, 3.0] + ) + + # 요청이 존재하지 않음 + self.mock_repository.get_duplicate_check_by_id.return_value = None + + # when + result = self.service.update_duplicate_check_result(self.test_request_id, request) + + # then + self.mock_repository.get_duplicate_check_by_id.assert_called_once_with(self.test_request_id) + self.mock_repository.update_duplicate_check_result.assert_not_called() + + assert result is None \ No newline at end of file From 3b669c7adebebdd823a5d710e022a312dec03e1e Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:30:48 +0900 Subject: [PATCH 14/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20=ED=81=90=20=ED=81=B4=EB=9E=98?= =?UTF-8?q?=EC=8A=A4=EC=9D=98=20=EB=A9=94=EC=8B=9C=EC=A7=80=20=EC=A0=84?= =?UTF-8?q?=EC=86=A1=20=EB=A9=94=EC=84=9C=EB=93=9C=20=EA=B0=9C=EC=84=A0=20?= =?UTF-8?q?=EB=B0=8F=20=EC=B4=88=EA=B8=B0=ED=99=94=20=EB=A9=94=EC=84=9C?= =?UTF-8?q?=EB=93=9C=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/data/FileDuplicateCheckQueue.py | 45 +++++---------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/src/main/ai/data/FileDuplicateCheckQueue.py b/src/main/ai/data/FileDuplicateCheckQueue.py index a508a52..d5b2286 100644 --- a/src/main/ai/data/FileDuplicateCheckQueue.py +++ b/src/main/ai/data/FileDuplicateCheckQueue.py @@ -1,36 +1,13 @@ -import os -import json -import boto3 -from dotenv import load_dotenv - -load_dotenv() - - class FileDuplicateCheckQueue: - def __init__(self, sqs_client: boto3.client, queue_url: str): - self.sqs = sqs_client + def __init__(self, sqs_client, queue_url): + self.sqs_client = sqs_client self.queue_url = queue_url - - def send_message(self, request_id: str, user_id: str, s3_bucket: str, s3_key: str): - try: - message_body = { - 'request_type': 'file_duplicate_check_embedding_file', - 'request_id': request_id, - 'user_id': str(user_id), - 'payload': { - 's3_bucket': s3_bucket, - 's3_key': s3_key - } - } - - response = self.sqs.send_message( - QueueUrl=self.queue_url, - MessageGroupId=str(user_id), - MessageDeduplicationId=str(request_id), - MessageBody=json.dumps(message_body) - ) - - return response - except Exception as e: - print(f"Error sending message to SQS: {e}") - raise \ No newline at end of file + + def send_message(self, message: str): + """SQS 큐에 메시지를 전송합니다.""" + response = self.sqs_client.send_message( + QueueUrl=self.queue_url, + MessageBody=message + ) + + return response \ No newline at end of file From 5cfaffc54a679b423f35674b3c7c1edb19d5ed99 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:30:53 +0900 Subject: [PATCH 15/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EC=B2=B4=ED=81=AC=20=EB=A6=AC=ED=8F=AC=EC=A7=80?= =?UTF-8?q?=ED=86=A0=EB=A6=AC=EC=9D=98=20=EB=A9=94=EC=84=9C=EB=93=9C=20?= =?UTF-8?q?=EA=B0=9C=EC=84=A0=20=EB=B0=8F=20=EC=83=88=EB=A1=9C=EC=9A=B4=20?= =?UTF-8?q?=EB=A9=94=EC=84=9C=EB=93=9C=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/data/FileDuplicateCheckRepository.py | 118 +++++++++++------- 1 file changed, 71 insertions(+), 47 deletions(-) diff --git a/src/main/ai/data/FileDuplicateCheckRepository.py b/src/main/ai/data/FileDuplicateCheckRepository.py index 37c0cf1..b1683e8 100644 --- a/src/main/ai/data/FileDuplicateCheckRepository.py +++ b/src/main/ai/data/FileDuplicateCheckRepository.py @@ -1,72 +1,96 @@ -from typing import Optional -from pymongo.collection import Collection -from pymongo import MongoClient from bson import ObjectId from datetime import datetime, timezone class FileDuplicateCheckRepository: - def __init__(self, client: MongoClient): - self.db = client.get_database() - self.collection: Collection = self.db.get_collection('file_duplicate_checks') - self.files_collection: Collection = self.db.get_collection('files') - - def create_duplicate_check_request(self, file_id: str, user_id: str) -> dict: + def __init__(self, mongo_client): + self.mongo_client = mongo_client + self.db = mongo_client.get_database() + self.file_checks_collection = self.db["file_duplicate_checks"] + self.files_collection = self.db["files"] + self.file_embeddings_collection = self.db["file_embeddings"] + + def get_file_by_id(self, file_id: str): + """파일 ID로 파일 정보를 조회합니다.""" + try: + file_obj_id = ObjectId(file_id) + return self.files_collection.find_one({"_id": file_obj_id}) + except Exception as e: + return None + + def has_file_embedding(self, file_id: str) -> bool: + """파일의 임베딩 존재 여부를 확인합니다.""" + try: + file_obj_id = ObjectId(file_id) + result = self.file_embeddings_collection.find_one({"file_id": file_obj_id}) + return result is not None + except Exception as e: + return False + + def create_duplicate_check_request(self, file_id: str, user_id: str): + """중복 검사 요청을 생성합니다.""" + now = datetime.now(timezone.utc) document = { "file_id": file_id, "user_id": user_id, "is_completed": False, "is_duplicated": None, - "created_at": self.get_current_time() + "created_at": now } - result = self.collection.insert_one(document) - document = document.copy() - document["_id"] = result.inserted_id - return document - - def get_duplicate_check_by_id(self, request_id: str) -> Optional[dict]: + + result = self.file_checks_collection.insert_one(document) + + return self.file_checks_collection.find_one({"_id": result.inserted_id}) + + def get_duplicate_check_by_file_id(self, file_id: str, user_id: str): + """파일 ID와 사용자 ID로 중복 검사 요청을 조회합니다.""" + return self.file_checks_collection.find_one({ + "file_id": file_id, + "user_id": user_id + }) + + def get_duplicate_check_by_id(self, request_id: str): + """요청 ID로 중복 검사 요청을 조회합니다.""" try: - object_id = ObjectId(request_id) - return self.collection.find_one({"_id": object_id}) - except: + request_obj_id = ObjectId(request_id) + return self.file_checks_collection.find_one({"_id": request_obj_id}) + except Exception as e: return None - - def get_duplicate_check_by_file_id(self, file_id: str, user_id: str) -> Optional[dict]: + + def update_file_duplicate_status(self, file_id: str, is_duplicated: bool): + """파일의 중복 상태를 업데이트합니다.""" try: - return self.collection.find_one({ - "file_id": file_id, - "user_id": user_id - }) - except: + file_obj_id = ObjectId(file_id) + result = self.files_collection.update_one( + {"_id": file_obj_id}, + {"$set": {"is_duplicated": is_duplicated}} + ) + + if result.modified_count > 0: + return self.files_collection.find_one({"_id": file_obj_id}) return None - - def update_duplicate_check_result(self, request_id: str, is_duplicated: bool) -> Optional[dict]: + except Exception as e: + return None + + def update_duplicate_check_result(self, request_id: str, is_duplicated: bool): + """중복 검사 결과를 업데이트합니다.""" try: - object_id = ObjectId(request_id) - result = self.collection.update_one( - {"_id": object_id}, + now = datetime.now(timezone.utc) + request_obj_id = ObjectId(request_id) + + result = self.file_checks_collection.update_one( + {"_id": request_obj_id}, { "$set": { "is_completed": True, "is_duplicated": is_duplicated, - "updated_at": self.get_current_time() + "updated_at": now } } ) - if result.modified_count == 0: - return None - - return self.collection.find_one({"_id": object_id}) - except: + if result.modified_count > 0: + return self.file_checks_collection.find_one({"_id": request_obj_id}) return None - - def get_file_by_id(self, file_id: str) -> Optional[dict]: - try: - object_id = ObjectId(file_id) - return self.files_collection.find_one({"_id": object_id}) - except: - return None - - def get_current_time(self): - return datetime.now(timezone.utc) \ No newline at end of file + except Exception as e: + return None \ No newline at end of file From 626a1a5227fa645f91a2611c4521d6cc3b590542 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:31:05 +0900 Subject: [PATCH 16/33] =?UTF-8?q?Feat:=20=EC=98=88=EC=99=B8=20=EC=B2=98?= =?UTF-8?q?=EB=A6=AC=20=ED=8C=A8=ED=82=A4=EC=A7=80=20=EB=B0=8F=20BadReques?= =?UTF-8?q?tException,=20FileNotFoundException=20=ED=81=B4=EB=9E=98?= =?UTF-8?q?=EC=8A=A4=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/exception/BadRequestException.py | 13 +++++++++++++ src/main/ai/exception/FileNotFoundException.py | 13 +++++++++++++ src/main/ai/exception/__init__.py | 3 +++ 3 files changed, 29 insertions(+) create mode 100644 src/main/ai/exception/BadRequestException.py create mode 100644 src/main/ai/exception/FileNotFoundException.py create mode 100644 src/main/ai/exception/__init__.py diff --git a/src/main/ai/exception/BadRequestException.py b/src/main/ai/exception/BadRequestException.py new file mode 100644 index 0000000..7792b38 --- /dev/null +++ b/src/main/ai/exception/BadRequestException.py @@ -0,0 +1,13 @@ +from fastapi import HTTPException + + +class BadRequestException(HTTPException): + """ + 요청이 잘못되었을 때 발생하는 예외 + """ + + def __init__(self, detail: str = "Bad request"): + super().__init__( + status_code=400, + detail=detail + ) \ No newline at end of file diff --git a/src/main/ai/exception/FileNotFoundException.py b/src/main/ai/exception/FileNotFoundException.py new file mode 100644 index 0000000..a5ff576 --- /dev/null +++ b/src/main/ai/exception/FileNotFoundException.py @@ -0,0 +1,13 @@ +from fastapi import HTTPException + + +class FileNotFoundException(HTTPException): + """ + 파일을 찾을 수 없을 때 발생하는 예외 + """ + + def __init__(self, file_id: str): + super().__init__( + status_code=404, + detail=f"File with ID '{file_id}' not found" + ) \ No newline at end of file diff --git a/src/main/ai/exception/__init__.py b/src/main/ai/exception/__init__.py new file mode 100644 index 0000000..b25c7c4 --- /dev/null +++ b/src/main/ai/exception/__init__.py @@ -0,0 +1,3 @@ +""" +예외 패키지 +""" \ No newline at end of file From 622f0af416763d7f619841419fbc917655820375 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:31:14 +0900 Subject: [PATCH 17/33] =?UTF-8?q?Feat:=20CategoryRecommendationResultReque?= =?UTF-8?q?st=EC=97=90=20request=5Fid=20=ED=95=84=EB=93=9C=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/models/CategoryRecommendation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/ai/models/CategoryRecommendation.py b/src/main/ai/models/CategoryRecommendation.py index 06b8c91..504440e 100644 --- a/src/main/ai/models/CategoryRecommendation.py +++ b/src/main/ai/models/CategoryRecommendation.py @@ -18,4 +18,5 @@ class CategoryRecommendationStatusResponse(BaseModel): class CategoryRecommendationResultRequest(BaseModel): + request_id: str predicted_category: str \ No newline at end of file From c81be915b07463a2baecd7a24120af7f613884a6 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:31:20 +0900 Subject: [PATCH 18/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EB=AA=A8=EB=8D=B8=20=EB=B0=8F?= =?UTF-8?q?=20=EC=9A=94=EC=B2=AD=20=ED=81=B4=EB=9E=98=EC=8A=A4=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=20=EB=B0=8F=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/models/FileDuplicateCheck.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/main/ai/models/FileDuplicateCheck.py b/src/main/ai/models/FileDuplicateCheck.py index f4ab2a4..e9950b6 100644 --- a/src/main/ai/models/FileDuplicateCheck.py +++ b/src/main/ai/models/FileDuplicateCheck.py @@ -1,23 +1,34 @@ from pydantic import BaseModel, Field -from typing import Optional, List, Any +from typing import Optional, List +from datetime import datetime class FileDuplicateCheckRequest(BaseModel): + """파일 중복 검사 요청 모델""" user_id: str file_id: str class FileDuplicateCheckResponse(BaseModel): + """파일 중복 검사 응답 모델""" request_id: str class FileDuplicateCheckStatusResponse(BaseModel): + """파일 중복 검사 상태 응답 모델""" request_id: str file_id: str is_completed: bool is_duplicated: Optional[bool] = None -class FileDuplicateCheckEmbeddingsRequest(BaseModel): +class FileDuplicateCheckResultRequest(BaseModel): + """파일 중복 검사 결과 요청 모델""" request_id: str - embeddings: List[Any] \ No newline at end of file + is_duplicated: bool + + +class FileDuplicateCheckEmbeddingsRequest(BaseModel): + """파일 임베딩 저장 요청 모델""" + file_id: str + embeddings: List[float] \ No newline at end of file From 1ebb9f5cd5afeb34511c71a07e68e6c404dca0a0 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:31:24 +0900 Subject: [PATCH 19/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20API=20=EB=B0=8F=20=EC=B9=B4?= =?UTF-8?q?=ED=85=8C=EA=B3=A0=EB=A6=AC=20=EC=B6=94=EC=B2=9C=20=EA=B2=B0?= =?UTF-8?q?=EA=B3=BC=20=EC=97=85=EB=8D=B0=EC=9D=B4=ED=8A=B8=20=EC=97=94?= =?UTF-8?q?=EB=93=9C=ED=8F=AC=EC=9D=B8=ED=8A=B8=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/router/AIInternalAPIRouter.py | 55 ++++++++++++++++------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/main/ai/router/AIInternalAPIRouter.py b/src/main/ai/router/AIInternalAPIRouter.py index d4c284a..c73101a 100644 --- a/src/main/ai/router/AIInternalAPIRouter.py +++ b/src/main/ai/router/AIInternalAPIRouter.py @@ -4,7 +4,7 @@ from src.main.ai.di.dependencies import get_category_recommendation_service, get_file_duplicate_check_service from src.main.ai.models.CategoryRecommendation import CategoryRecommendationResultRequest, CategoryRecommendationStatusResponse from src.main.ai.service.CategoryRecommendationService import CategoryRecommendationService -from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse, FileDuplicateCheckEmbeddingsRequest +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse, FileDuplicateCheckEmbeddingsRequest, FileDuplicateCheckRequest, FileDuplicateCheckResponse, FileDuplicateCheckResultRequest from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService @@ -14,36 +14,51 @@ ) -@router.post("/category-recommendation-results/{request_id}", response_model=CategoryRecommendationStatusResponse) +@router.post("/category-recommendation-results", response_model=dict) async def update_category_recommendation_result( - request_id: str, request: CategoryRecommendationResultRequest, service: CategoryRecommendationService = Depends(get_category_recommendation_service) ): - result = service.update_recommendation_result(request_id, request) + result = service.update_recommendation_result(request.request_id, request) if not result: return JSONResponse( status_code=status.HTTP_404_NOT_FOUND, content={ - "status": 404, - "message": "요청을 찾을 수 없습니다.", - "detail": "존재하지 않는 ID입니다." + "detail": "요청을 찾을 수 없습니다. 존재하지 않는 ID입니다." } ) - return result + return {"success": True} + + +@router.post("/file-duplicate-checks", response_model=FileDuplicateCheckResponse) +async def create_file_duplicate_check( + request: FileDuplicateCheckRequest, + service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) +): + try: + result = service.create_duplicate_check_request(request) + return result + except HTTPException as e: + raise e + except Exception as e: + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content={ + "status": 400, + "message": "잘못된 요청입니다.", + "detail": "명세에 맞지 않은 요청입니다." + } + ) -@router.post("/file-duplicate-check-embeddings", response_model=FileDuplicateCheckStatusResponse) +@router.post("/file-duplicate-check-embeddings", response_model=dict) async def update_file_duplicate_check_result( - request: FileDuplicateCheckEmbeddingsRequest, + request: FileDuplicateCheckResultRequest, service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) ): - """ - 업로드한 파일의 중복 검사 결과 업데이트 - """ - result = service.update_duplicate_check_result(request.request_id, request) + result = service.update_duplicate_check_result(request.request_id, request.is_duplicated) if not result: return JSONResponse( @@ -54,5 +69,13 @@ async def update_file_duplicate_check_result( "detail": "존재하지 않는 ID입니다." } ) - - return result + + # 요청 정보 조회 + check = service.repository.get_duplicate_check_by_id(request.request_id) + + return { + "request_id": request.request_id, + "file_id": check["file_id"], + "is_completed": check["is_completed"], + "is_duplicated": check["is_duplicated"] + } From 8597e9ff20116b2966699582a9eb6b86a4d6d59a Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:31:29 +0900 Subject: [PATCH 20/33] =?UTF-8?q?=ED=8C=8C=EC=9D=BC=20=EC=A4=91=EB=B3=B5?= =?UTF-8?q?=20=EA=B2=80=EC=82=AC=20API=EC=97=90=EC=84=9C=20=EC=9A=94?= =?UTF-8?q?=EC=B2=AD=20=EC=B2=98=EB=A6=AC=20=EB=A9=94=EC=84=9C=EB=93=9C=20?= =?UTF-8?q?=EC=A0=9C=EA=B1=B0=20=EB=B0=8F=20=EC=A3=BC=EC=84=9D=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/router/AIPublicAPIRouter.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/src/main/ai/router/AIPublicAPIRouter.py b/src/main/ai/router/AIPublicAPIRouter.py index 14beb95..e507e74 100644 --- a/src/main/ai/router/AIPublicAPIRouter.py +++ b/src/main/ai/router/AIPublicAPIRouter.py @@ -5,7 +5,7 @@ from src.main.ai.di.dependencies import get_category_recommendation_service, get_file_duplicate_check_service from src.main.ai.models.CategoryRecommendation import CategoryRecommendationRequest, CategoryRecommendationResponse, CategoryRecommendationStatusResponse from src.main.ai.service.CategoryRecommendationService import CategoryRecommendationService -from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse, FileDuplicateCheckRequest, FileDuplicateCheckResponse +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService @@ -47,25 +47,6 @@ async def get_category_recommendation_status( return result -@router.post("/file-duplicate-checks", response_model=FileDuplicateCheckResponse) -async def create_file_duplicate_check_request( - request: FileDuplicateCheckRequest, - service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) -): - """ - 업로드한 파일의 중복 검사 시작 요청 - """ - try: - return service.create_duplicate_check_request(request) - except HTTPException as e: - raise e - except Exception as e: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail="잘못된 요청입니다. 명세에 맞지 않은 요청입니다." - ) - - @router.get("/file-duplicate-checks", response_model=FileDuplicateCheckStatusResponse) async def get_file_duplicate_check_status( file_id: str, @@ -73,7 +54,7 @@ async def get_file_duplicate_check_status( service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) ): """ - 업로드한 파일의 중복 검사 결과 조회 + 파일 중복 검사 결과 조회 """ result = service.get_duplicate_check_status(file_id, str(user_id)) From a4dc03e3522cdc6d44f6325ed78c6c34bda464be Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:31:37 +0900 Subject: [PATCH 21/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EC=84=9C=EB=B9=84=EC=8A=A4?= =?UTF-8?q?=EC=9D=98=20SQS=20=EB=A9=94=EC=8B=9C=EC=A7=80=20=EB=B0=9C?= =?UTF-8?q?=EC=86=A1=20=EB=A1=9C=EC=A7=81=20=EA=B0=9C=EC=84=A0=20=EB=B0=8F?= =?UTF-8?q?=20=EC=A4=91=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EC=9A=94=EC=B2=AD?= =?UTF-8?q?=20=EC=B2=98=EB=A6=AC=20=ED=9D=90=EB=A6=84=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/service/FileDuplicateCheckService.py | 119 +++++++++--------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/src/main/ai/service/FileDuplicateCheckService.py b/src/main/ai/service/FileDuplicateCheckService.py index 769faca..4fe1e63 100644 --- a/src/main/ai/service/FileDuplicateCheckService.py +++ b/src/main/ai/service/FileDuplicateCheckService.py @@ -1,25 +1,26 @@ -from typing import Optional -import uuid -from bson import ObjectId from fastapi import HTTPException, status +from bson import ObjectId +import json -from src.main.ai.data.FileDuplicateCheckRepository import FileDuplicateCheckRepository -from src.main.ai.data.FileDuplicateCheckQueue import FileDuplicateCheckQueue from src.main.ai.models.FileDuplicateCheck import ( FileDuplicateCheckRequest, FileDuplicateCheckResponse, FileDuplicateCheckStatusResponse, + FileDuplicateCheckResultRequest, FileDuplicateCheckEmbeddingsRequest ) class FileDuplicateCheckService: - def __init__(self, repository: FileDuplicateCheckRepository, queue: FileDuplicateCheckQueue): + def __init__(self, repository, sqs_service): self.repository = repository - self.queue = queue - + self.sqs_service = sqs_service + def create_duplicate_check_request(self, request: FileDuplicateCheckRequest) -> FileDuplicateCheckResponse: - # 파일이 존재하는지 확인 + """ + 파일 중복 검사 요청을 생성하고 SQS에 메시지를 발송합니다. + """ + # 1. 파일 존재 여부 확인 file = self.repository.get_file_by_id(request.file_id) if not file: raise HTTPException( @@ -27,67 +28,73 @@ def create_duplicate_check_request(self, request: FileDuplicateCheckRequest) -> detail="파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." ) - # MongoDB에 저장 - ObjectId 자동 생성 - document = self.repository.create_duplicate_check_request( + # 2. 중복 검사 결과가 있는지 확인 + existing_check = self.repository.get_duplicate_check_by_file_id(request.file_id, request.user_id) + if existing_check: + return FileDuplicateCheckResponse(request_id=str(existing_check["_id"])) + + # 3. 중복 검사 요청 생성 + result = self.repository.create_duplicate_check_request( file_id=request.file_id, user_id=request.user_id ) - # request_id는 MongoDB의 _id를 문자열로 변환 - request_id = str(document["_id"]) + # 4. SQS에 메시지 발송 + file_data = { + "s3_bucket": file["s3_bucket"], + "s3_key": file["s3_key"] + } - # S3 버킷과 키 정보를 가져옴 - s3_bucket = file.get("s3_url", "").split(".s3.amazonaws.com/")[0].replace("https://", "") - s3_key = file.get("file_key", "") + message = { + "request_type": "file_duplicate_check_embedding_file", + "request_id": str(result["_id"]), + "user_id": request.user_id, + "payload": file_data + } - # 메시지 발행 - self.queue.send_message( - request_id=request_id, - user_id=request.user_id, - s3_bucket=s3_bucket, - s3_key=s3_key - ) + self.sqs_service.send_message(json.dumps(message)) - return FileDuplicateCheckResponse(request_id=request_id) - - def get_duplicate_check_status(self, file_id: str, user_id: str) -> Optional[FileDuplicateCheckStatusResponse]: - result = self.repository.get_duplicate_check_by_file_id(file_id, user_id) + # 5. 요청 ID 응답 + return FileDuplicateCheckResponse(request_id=str(result["_id"])) + + def get_duplicate_check_status(self, file_id: str, user_id: str) -> FileDuplicateCheckStatusResponse: + """ + 파일 중복 검사 상태를 조회합니다. + """ + # 1. 중복 검사 요청 조회 + check = self.repository.get_duplicate_check_by_file_id(file_id, user_id) - if not result: + # 2. 요청이 없으면 None 반환 + if not check: return None - + + # 3. 요청이 있으면 상태 반환 return FileDuplicateCheckStatusResponse( - request_id=str(result["_id"]), - file_id=result["file_id"], - is_completed=result["is_completed"], - is_duplicated=result.get("is_duplicated") + request_id=str(check["_id"]), + file_id=check["file_id"], + is_completed=check["is_completed"], + is_duplicated=check["is_duplicated"] ) - - def update_duplicate_check_result(self, request_id: str, request: FileDuplicateCheckEmbeddingsRequest) -> Optional[FileDuplicateCheckStatusResponse]: - # 요청이 존재하는지 확인 - check_request = self.repository.get_duplicate_check_by_id(request_id) - if not check_request: - return None + + def update_duplicate_check_result(self, request_id: str, is_duplicated: bool) -> bool: + """ + 파일 중복 검사 결과를 업데이트합니다. + """ + # 1. 중복 검사 요청 조회 + check = self.repository.get_duplicate_check_by_id(request_id) - # 임베딩을 기반으로 중복 검사 결과 결정 - 간단한 예시 - is_duplicated = False - if request.embeddings and len(request.embeddings) > 0: - # 실제로는 여기서 임베딩 벡터 유사도 검사 등을 수행해야 함 - # 일단은 간단하게 임베딩이 비어있지 않으면 중복이 아니라고 처리 - is_duplicated = False + # 2. 요청이 없으면 False 반환 + if not check: + return False - # 결과 업데이트 - updated = self.repository.update_duplicate_check_result( + # 3. 파일 중복 상태 업데이트 + file_result = self.repository.update_file_duplicate_status(check["file_id"], is_duplicated) + + # 4. 중복 검사 결과 업데이트 + result = self.repository.update_duplicate_check_result( request_id=request_id, is_duplicated=is_duplicated ) - if not updated: - return None - - return FileDuplicateCheckStatusResponse( - request_id=str(updated["_id"]), - file_id=updated["file_id"], - is_completed=updated["is_completed"], - is_duplicated=updated.get("is_duplicated") - ) \ No newline at end of file + # 5. 업데이트 성공 여부 반환 + return result is not None \ No newline at end of file From 1330f36026d665cec926df2649bb970ff2154bb7 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 17:31:46 +0900 Subject: [PATCH 22/33] =?UTF-8?q?Chore:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=ED=81=90=EC=9D=98=20SQS=20URL?= =?UTF-8?q?=EC=9D=84=20=EC=9A=94=EC=B2=AD=20=ED=81=90=20URL=EB=A1=9C=20?= =?UTF-8?q?=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/di/dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/ai/di/dependencies.py b/src/main/ai/di/dependencies.py index 190a494..f5a2956 100644 --- a/src/main/ai/di/dependencies.py +++ b/src/main/ai/di/dependencies.py @@ -49,7 +49,7 @@ def get_file_duplicate_check_queue(): else: sqs_client = boto3.client('sqs', region_name=os.getenv('AWS_REGION')) - queue_url = os.getenv('SQS_FILE_DUPLICATE_CHECK_QUEUE_URL') + queue_url = os.getenv('SQS_REQUEST_QUEUE_URL') return FileDuplicateCheckQueue(sqs_client, queue_url) From cf73aab706e3af64ba68bf448e0345e555ec7bf5 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:19:30 +0900 Subject: [PATCH 23/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=ED=81=90=EC=9D=98=20=EB=A9=94?= =?UTF-8?q?=EC=8B=9C=EC=A7=80=20=EC=A0=84=EC=86=A1=20=EB=A9=94=EC=84=9C?= =?UTF-8?q?=EB=93=9C=EC=97=90=20=EC=82=AC=EC=9A=A9=EC=9E=90=20ID=20?= =?UTF-8?q?=EB=B0=8F=20=EC=9A=94=EC=B2=AD=20ID=20=EB=A7=A4=EA=B0=9C?= =?UTF-8?q?=EB=B3=80=EC=88=98=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/data/FileDuplicateCheckQueue.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/ai/data/FileDuplicateCheckQueue.py b/src/main/ai/data/FileDuplicateCheckQueue.py index d5b2286..948f2c5 100644 --- a/src/main/ai/data/FileDuplicateCheckQueue.py +++ b/src/main/ai/data/FileDuplicateCheckQueue.py @@ -3,11 +3,16 @@ def __init__(self, sqs_client, queue_url): self.sqs_client = sqs_client self.queue_url = queue_url - def send_message(self, message: str): + def send_message(self, message: str, user_id: str, request_id: str): """SQS 큐에 메시지를 전송합니다.""" + print(message) response = self.sqs_client.send_message( QueueUrl=self.queue_url, - MessageBody=message + MessageGroupId=str(user_id), + MessageDeduplicationId=str(request_id), + MessageBody=message, + ) + return response \ No newline at end of file From 5a3618ac18bb9ce355f5bc90fd8b866ff1837e35 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:19:37 +0900 Subject: [PATCH 24/33] =?UTF-8?q?=EB=A1=9C=EA=B7=B8=20=EC=B6=94=EA=B0=80:?= =?UTF-8?q?=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91=EB=B3=B5=20=EA=B2=80=EC=82=AC?= =?UTF-8?q?=20=EC=84=9C=EB=B9=84=EC=8A=A4=EC=97=90=20=EB=A1=9C=EA=B9=85=20?= =?UTF-8?q?=EA=B8=B0=EB=8A=A5=EC=9D=84=20=EC=B6=94=EA=B0=80=ED=95=98?= =?UTF-8?q?=EC=97=AC=20=EC=A4=91=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EC=9A=94?= =?UTF-8?q?=EC=B2=AD=20=EB=B0=8F=20SQS=20=EB=A9=94=EC=8B=9C=EC=A7=80=20?= =?UTF-8?q?=EB=B0=9C=EC=86=A1=20=EC=83=81=ED=83=9C=EB=A5=BC=20=EA=B8=B0?= =?UTF-8?q?=EB=A1=9D=ED=95=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/service/FileDuplicateCheckService.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/main/ai/service/FileDuplicateCheckService.py b/src/main/ai/service/FileDuplicateCheckService.py index 4fe1e63..ce0b6c1 100644 --- a/src/main/ai/service/FileDuplicateCheckService.py +++ b/src/main/ai/service/FileDuplicateCheckService.py @@ -1,6 +1,7 @@ from fastapi import HTTPException, status from bson import ObjectId import json +import logging from src.main.ai.models.FileDuplicateCheck import ( FileDuplicateCheckRequest, @@ -10,6 +11,9 @@ FileDuplicateCheckEmbeddingsRequest ) +# 로거 설정 +logger = logging.getLogger(__name__) + class FileDuplicateCheckService: def __init__(self, repository, sqs_service): @@ -31,7 +35,11 @@ def create_duplicate_check_request(self, request: FileDuplicateCheckRequest) -> # 2. 중복 검사 결과가 있는지 확인 existing_check = self.repository.get_duplicate_check_by_file_id(request.file_id, request.user_id) if existing_check: - return FileDuplicateCheckResponse(request_id=str(existing_check["_id"])) + logger.info(f"이미 중복 검사 요청이 존재합니다. request_id: {str(existing_check['_id'])}, file_id: {request.file_id}, user_id: {request.user_id}") + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="이미 중복 검사 요청이 존재합니다." + ) # 3. 중복 검사 요청 생성 result = self.repository.create_duplicate_check_request( @@ -52,7 +60,10 @@ def create_duplicate_check_request(self, request: FileDuplicateCheckRequest) -> "payload": file_data } - self.sqs_service.send_message(json.dumps(message)) + message_json = json.dumps(message) + logger.info(f"SQS 메시지 발송: {message_json}") + response = self.sqs_service.send_message(message_json) + logger.info(f"SQS 메시지 발송 응답: {response}") # 5. 요청 ID 응답 return FileDuplicateCheckResponse(request_id=str(result["_id"])) @@ -66,9 +77,11 @@ def get_duplicate_check_status(self, file_id: str, user_id: str) -> FileDuplicat # 2. 요청이 없으면 None 반환 if not check: + logger.info(f"중복 검사 요청을 찾을 수 없습니다. file_id: {file_id}, user_id: {user_id}") return None # 3. 요청이 있으면 상태 반환 + logger.info(f"중복 검사 요청을 찾았습니다. request_id: {str(check['_id'])}, is_completed: {check['is_completed']}") return FileDuplicateCheckStatusResponse( request_id=str(check["_id"]), file_id=check["file_id"], @@ -85,16 +98,21 @@ def update_duplicate_check_result(self, request_id: str, is_duplicated: bool) -> # 2. 요청이 없으면 False 반환 if not check: + logger.info(f"중복 검사 요청을 찾을 수 없습니다. request_id: {request_id}") return False # 3. 파일 중복 상태 업데이트 + logger.info(f"파일 중복 상태 업데이트 시작. file_id: {check['file_id']}, is_duplicated: {is_duplicated}") file_result = self.repository.update_file_duplicate_status(check["file_id"], is_duplicated) # 4. 중복 검사 결과 업데이트 + logger.info(f"중복 검사 결과 업데이트 시작. request_id: {request_id}, is_duplicated: {is_duplicated}") result = self.repository.update_duplicate_check_result( request_id=request_id, is_duplicated=is_duplicated ) # 5. 업데이트 성공 여부 반환 - return result is not None \ No newline at end of file + is_success = result is not None + logger.info(f"중복 검사 결과 업데이트 결과: {is_success}") + return is_success \ No newline at end of file From d6bdee3ab92bd14a9ea3c1bede6b87c2968b98ba Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:20:43 +0900 Subject: [PATCH 25/33] =?UTF-8?q?=ED=8C=8C=EC=9D=BC=20=EC=A4=91=EB=B3=B5?= =?UTF-8?q?=20=EA=B2=80=EC=82=AC=20API=EC=9D=98=20=EC=9A=94=EC=B2=AD=20?= =?UTF-8?q?=EC=B2=98=EB=A6=AC=20=EB=A1=9C=EC=A7=81=20=EA=B0=84=EC=86=8C?= =?UTF-8?q?=ED=99=94:=20=EC=98=88=EC=99=B8=20=EC=B2=98=EB=A6=AC=20?= =?UTF-8?q?=EB=B8=94=EB=A1=9D=20=EC=A0=9C=EA=B1=B0=20=EB=B0=8F=20=EA=B2=B0?= =?UTF-8?q?=EA=B3=BC=20=EB=B0=98=ED=99=98=20=EB=B0=A9=EC=8B=9D=20=EC=88=98?= =?UTF-8?q?=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/router/AIInternalAPIRouter.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/main/ai/router/AIInternalAPIRouter.py b/src/main/ai/router/AIInternalAPIRouter.py index c73101a..e20fee3 100644 --- a/src/main/ai/router/AIInternalAPIRouter.py +++ b/src/main/ai/router/AIInternalAPIRouter.py @@ -37,20 +37,8 @@ async def create_file_duplicate_check( request: FileDuplicateCheckRequest, service: FileDuplicateCheckService = Depends(get_file_duplicate_check_service) ): - try: - result = service.create_duplicate_check_request(request) - return result - except HTTPException as e: - raise e - except Exception as e: - return JSONResponse( - status_code=status.HTTP_400_BAD_REQUEST, - content={ - "status": 400, - "message": "잘못된 요청입니다.", - "detail": "명세에 맞지 않은 요청입니다." - } - ) + result = service.create_duplicate_check_request(request) + return result @router.post("/file-duplicate-check-embeddings", response_model=dict) From 2596f96107ead248f0e734f5b95b54e83fdca4fa Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:20:51 +0900 Subject: [PATCH 26/33] =?UTF-8?q?Fix:=20=EC=95=A0=ED=94=8C=EB=A6=AC?= =?UTF-8?q?=EC=BC=80=EC=9D=B4=EC=85=98=EC=97=90=20=EB=A1=9C=EA=B9=85=20?= =?UTF-8?q?=EC=84=A4=EC=A0=95=EC=9D=84=20=EC=B6=94=EA=B0=80=ED=95=98?= =?UTF-8?q?=EC=97=AC=20=EC=A0=95=EB=B3=B4=20=EB=A1=9C=EA=B7=B8=EB=A5=BC=20?= =?UTF-8?q?=EA=B8=B0=EB=A1=9D=ED=95=98=EB=8F=84=EB=A1=9D=20=EA=B5=AC?= =?UTF-8?q?=EC=84=B1=ED=95=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/app.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/app.py b/src/app.py index 63ffeb1..b9920c7 100644 --- a/src/app.py +++ b/src/app.py @@ -3,12 +3,20 @@ from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse from dotenv import load_dotenv +import logging from src.router import router load_dotenv() +# 로깅 설정 +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[logging.StreamHandler()] +) + origins = [ "https://d6jo3bhmz1u5k.cloudfront.net", "https://localhost:5173", From 21b18477c5d53a508c7f3389b94a567d6658280a Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:26:48 +0900 Subject: [PATCH 27/33] =?UTF-8?q?Test:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EB=A6=AC=ED=8F=AC=EC=A7=80?= =?UTF-8?q?=ED=86=A0=EB=A6=AC=EC=9D=98=20mock=20=EA=B0=9D=EC=B2=B4?= =?UTF-8?q?=EB=A5=BC=20=EA=B0=9C=EC=84=A0=ED=95=98=EA=B3=A0=20find=5Fone?= =?UTF-8?q?=20=EA=B2=B0=EA=B3=BC=EB=A5=BC=20=EC=84=A4=EC=A0=95=ED=95=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_file_duplicate_check_repository.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/tests/ai/data/test_file_duplicate_check_repository.py b/src/tests/ai/data/test_file_duplicate_check_repository.py index 716ecae..16c457a 100644 --- a/src/tests/ai/data/test_file_duplicate_check_repository.py +++ b/src/tests/ai/data/test_file_duplicate_check_repository.py @@ -11,9 +11,19 @@ def setup_method(self): # 목업 MongoDB 클라이언트 생성 self.mock_collection = MagicMock() self.mock_files_collection = MagicMock() + self.mock_embeddings_collection = MagicMock() self.mock_db = MagicMock() - self.mock_db.get_collection.side_effect = lambda name: self.mock_collection if name == 'file_duplicate_checks' else self.mock_files_collection + # 컬렉션 이름에 따라 적절한 mock 객체 반환 + def get_collection_side_effect(name): + if name == 'file_duplicate_checks': + return self.mock_collection + elif name == 'files': + return self.mock_files_collection + elif name == 'file_embeddings': + return self.mock_embeddings_collection + + self.mock_db.get_collection.side_effect = get_collection_side_effect self.mock_client = MagicMock() self.mock_client.get_database.return_value = self.mock_db @@ -46,6 +56,17 @@ def test_create_duplicate_check_request(self): } self.mock_collection.insert_one.return_value.inserted_id = self.test_object_id + # mock find_one 결과 설정 + expected_result = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": self.test_time + } + self.mock_collection.find_one.return_value = expected_result + # when result = self.repository.create_duplicate_check_request(self.test_file_id, self.test_user_id) From 13fa31e24f8648aec9edee0632e0a3671abd37df Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:26:56 +0900 Subject: [PATCH 28/33] =?UTF-8?q?Test:=20=EC=B9=B4=ED=85=8C=EA=B3=A0?= =?UTF-8?q?=EB=A6=AC=20=EC=B6=94=EC=B2=9C=20=EB=B0=8F=20=ED=8C=8C=EC=9D=BC?= =?UTF-8?q?=20=EC=A4=91=EB=B3=B5=20=EA=B2=80=EC=82=AC=20API=EC=9D=98=20?= =?UTF-8?q?=EC=9A=94=EC=B2=AD=20=EC=B2=98=EB=A6=AC=20=EB=A1=9C=EC=A7=81=20?= =?UTF-8?q?=EC=88=98=EC=A0=95=20=EB=B0=8F=20=EC=83=88=EB=A1=9C=EC=9A=B4=20?= =?UTF-8?q?=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=BC=80=EC=9D=B4=EC=8A=A4=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/router/test_ai_internal_api_router.py | 149 +++++++++++++----- 1 file changed, 112 insertions(+), 37 deletions(-) diff --git a/src/tests/ai/router/test_ai_internal_api_router.py b/src/tests/ai/router/test_ai_internal_api_router.py index 7a66a17..34a857e 100644 --- a/src/tests/ai/router/test_ai_internal_api_router.py +++ b/src/tests/ai/router/test_ai_internal_api_router.py @@ -3,8 +3,9 @@ from fastapi import FastAPI, HTTPException from fastapi.testclient import TestClient -from src.main.ai.models.CategoryRecommendation import CategoryRecommendationStatusResponse -from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse +from src.main.ai.models.CategoryRecommendation import CategoryRecommendationResultRequest +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckResultRequest, FileDuplicateCheckRequest, FileDuplicateCheckResponse +from src.main.ai.service.FileDuplicateCheckService import FileDuplicateCheckService from src.main.ai.router.AIInternalAPIRouter import router as internal_router @@ -28,97 +29,171 @@ def setup_method(self): def test_update_category_recommendation_result_success(self, client): # given request_data = { + "request_id": self.test_request_id, "predicted_category": self.test_category } # 서비스 응답 모의 설정 with patch('src.main.ai.service.CategoryRecommendationService.CategoryRecommendationService.update_recommendation_result') as mock_service: # 서비스 응답 설정 - mock_service.return_value = CategoryRecommendationStatusResponse( - request_id=self.test_request_id, - is_completed=True, - predicted_category=self.test_category - ) + mock_service.return_value = True # when - response = client.post(f"/ai-proxy/category-recommendation-results/{self.test_request_id}", json=request_data) + response = client.post("/ai-proxy/category-recommendation-results", json=request_data) # then assert response.status_code == 200 - assert response.json() == { - "request_id": self.test_request_id, - "is_completed": True, - "predicted_category": self.test_category - } + assert response.json() == {"success": True} - mock_service.assert_called_once() + # 직접 호출 파라미터 체크 대신 호출 횟수만 확인 + assert mock_service.call_count == 1 def test_update_category_recommendation_result_not_found(self, client): # given request_data = { + "request_id": self.test_request_id, "predicted_category": self.test_category } # 서비스 응답 모의 설정 with patch('src.main.ai.service.CategoryRecommendationService.CategoryRecommendationService.update_recommendation_result') as mock_service: # 서비스 응답 설정 - 요청 없음 - mock_service.return_value = None + mock_service.return_value = False # when - response = client.post(f"/ai-proxy/category-recommendation-results/{self.test_request_id}", json=request_data) + response = client.post("/ai-proxy/category-recommendation-results", json=request_data) # then assert response.status_code == 404 assert response.json() == { - "status": 404, - "message": "요청을 찾을 수 없습니다.", - "detail": "존재하지 않는 ID입니다." + "detail": "요청을 찾을 수 없습니다. 존재하지 않는 ID입니다." } - mock_service.assert_called_once() + # 직접 호출 파라미터 체크 대신 호출 횟수만 확인 + assert mock_service.call_count == 1 - def test_update_file_duplicate_check_embeddings_success(self, client): + def test_create_file_duplicate_check_success(self, client): # given request_data = { - "request_id": self.test_request_id, - "embeddings": [1.0, 2.0, 3.0] + "user_id": self.test_user_id, + "file_id": self.test_file_id } # 서비스 응답 모의 설정 - with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.update_duplicate_check_result') as mock_service: + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: # 서비스 응답 설정 - mock_service.return_value = FileDuplicateCheckStatusResponse( - request_id=self.test_request_id, - file_id=self.test_file_id, - is_completed=True, - is_duplicated=False - ) + mock_service.return_value = FileDuplicateCheckResponse(request_id=self.test_request_id) # when - response = client.post("/ai-proxy/file-duplicate-check-embeddings", json=request_data) + response = client.post("/ai-proxy/file-duplicate-checks", json=request_data) # then assert response.status_code == 200 + assert response.json() == {"request_id": self.test_request_id} + + mock_service.assert_called_once() + + def test_create_file_duplicate_check_file_not_found(self, client): + # given + request_data = { + "user_id": self.test_user_id, + "file_id": self.test_file_id + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: + # 서비스 응답 설정 - 파일 없음 + mock_service.side_effect = HTTPException( + status_code=404, + detail="파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." + ) + + # when + response = client.post("/ai-proxy/file-duplicate-checks", json=request_data) + + # then + assert response.status_code == 404 assert response.json() == { - "request_id": self.test_request_id, + "detail": "파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." + } + + mock_service.assert_called_once() + + def test_create_file_duplicate_check_existing_request(self, client): + # given + request_data = { + "user_id": self.test_user_id, + "file_id": self.test_file_id + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: + # 서비스 응답 설정 - 이미 요청 존재함 + mock_service.side_effect = HTTPException( + status_code=400, + detail="이미 중복 검사 요청이 존재합니다." + ) + + # when + response = client.post("/ai-proxy/file-duplicate-checks", json=request_data) + + # then + assert response.status_code == 400 + assert response.json() == { + "detail": "이미 중복 검사 요청이 존재합니다." + } + + mock_service.assert_called_once() + + def test_update_file_duplicate_check_result_success(self, client): + # given + request_data = { + "request_id": self.test_request_id, + "is_duplicated": False + } + + # 서비스 응답 모의 설정 + with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.update_duplicate_check_result') as mock_service: + # 서비스 응답 설정 + mock_service.return_value = True + + # mock_repository.get_duplicate_check_by_id 설정 - 서비스가 아닌 서비스 인스턴스를 패치 + check_data = { + "_id": self.test_request_id, "file_id": self.test_file_id, "is_completed": True, "is_duplicated": False } - mock_service.assert_called_once() + # repository를 직접 패치하는 대신 get_duplicate_check_by_id 메소드만 패치 + with patch('src.main.ai.data.FileDuplicateCheckRepository.FileDuplicateCheckRepository.get_duplicate_check_by_id', + return_value=check_data) as mock_get_duplicate: + + # when + response = client.post("/ai-proxy/file-duplicate-check-embeddings", json=request_data) + + # then + assert response.status_code == 200 + assert response.json() == { + "request_id": self.test_request_id, + "file_id": self.test_file_id, + "is_completed": True, + "is_duplicated": False + } + + mock_service.assert_called_once_with(self.test_request_id, False) - def test_update_file_duplicate_check_embeddings_not_found(self, client): + def test_update_file_duplicate_check_result_not_found(self, client): # given request_data = { "request_id": self.test_request_id, - "embeddings": [1.0, 2.0, 3.0] + "is_duplicated": False } # 서비스 응답 모의 설정 with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.update_duplicate_check_result') as mock_service: # 서비스 응답 설정 - 요청 없음 - mock_service.return_value = None + mock_service.return_value = False # when response = client.post("/ai-proxy/file-duplicate-check-embeddings", json=request_data) @@ -131,4 +206,4 @@ def test_update_file_duplicate_check_embeddings_not_found(self, client): "detail": "존재하지 않는 ID입니다." } - mock_service.assert_called_once() \ No newline at end of file + mock_service.assert_called_once_with(self.test_request_id, False) \ No newline at end of file From df2271b1010b36d69a7f2a68c1ec35303fa169af Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:27:03 +0900 Subject: [PATCH 29/33] =?UTF-8?q?Test:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EC=9A=94=EC=B2=AD=20=EA=B4=80?= =?UTF-8?q?=EB=A0=A8=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=BC=80=EC=9D=B4?= =?UTF-8?q?=EC=8A=A4=20=EC=A0=9C=EA=B1=B0=20=EB=B0=8F=20=EB=B6=88=ED=95=84?= =?UTF-8?q?=EC=9A=94=ED=95=9C=20=EC=9E=84=ED=8F=AC=ED=8A=B8=20=EC=A0=95?= =?UTF-8?q?=EB=A6=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai/router/test_ai_public_api_router.py | 71 +------------------ 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/src/tests/ai/router/test_ai_public_api_router.py b/src/tests/ai/router/test_ai_public_api_router.py index 74f650d..caff74c 100644 --- a/src/tests/ai/router/test_ai_public_api_router.py +++ b/src/tests/ai/router/test_ai_public_api_router.py @@ -9,7 +9,7 @@ CategoryRecommendationResponse, CategoryRecommendationStatusResponse ) -from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse, FileDuplicateCheckResponse, FileDuplicateCheckRequest +from src.main.ai.models.FileDuplicateCheck import FileDuplicateCheckStatusResponse from src.main.ai.router.AIPublicAPIRouter import router as public_router from src.main.auth.dependencies import get_current_user @@ -100,75 +100,6 @@ def test_get_category_recommendation_status_not_found(self, client): } mock_service.assert_called_once() - def test_create_file_duplicate_check_request_success(self, client): - # given - request_data = { - "user_id": str(self.test_user_id), - "file_id": self.test_file_id - } - - # 서비스 응답 모의 설정 - with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: - # 서비스 응답 설정 - mock_service.return_value = FileDuplicateCheckResponse(request_id=self.test_request_id) - - # when - response = client.post("/ai/file-duplicate-checks", json=request_data) - - # then - assert response.status_code == 200 - assert response.json() == { - "request_id": self.test_request_id - } - mock_service.assert_called_once_with(FileDuplicateCheckRequest(**request_data)) - - def test_create_file_duplicate_check_request_file_not_found(self, client): - # given - request_data = { - "user_id": str(self.test_user_id), - "file_id": self.test_file_id - } - - # 서비스 응답 모의 설정 - with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: - # 서비스에서 예외 발생 - mock_service.side_effect = HTTPException( - status_code=404, - detail="파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." - ) - - # when - response = client.post("/ai/file-duplicate-checks", json=request_data) - - # then - assert response.status_code == 404 - assert response.json() == { - "detail": "파일을 찾을 수 없습니다. 존재하지 않는 ID입니다." - } - mock_service.assert_called_once() - - def test_create_file_duplicate_check_request_bad_request(self, client): - # given - request_data = { - "user_id": str(self.test_user_id), - "file_id": self.test_file_id - } - - # 서비스 응답 모의 설정 - with patch('src.main.ai.service.FileDuplicateCheckService.FileDuplicateCheckService.create_duplicate_check_request') as mock_service: - # 서비스에서 일반 예외 발생 - mock_service.side_effect = Exception("서비스 오류") - - # when - response = client.post("/ai/file-duplicate-checks", json=request_data) - - # then - assert response.status_code == 400 - assert response.json() == { - "detail": "잘못된 요청입니다. 명세에 맞지 않은 요청입니다." - } - mock_service.assert_called_once() - def test_get_file_duplicate_check_status_exists(self, client): # given # 서비스 응답 모의 설정 From 33a8908b56476b599b41364c1bfb925ac5d65129 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:27:08 +0900 Subject: [PATCH 30/33] =?UTF-8?q?Test:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EC=9A=94=EC=B2=AD=20=EC=B2=98?= =?UTF-8?q?=EB=A6=AC=20=EB=A1=9C=EC=A7=81=20=EA=B0=9C=EC=84=A0=20=EB=B0=8F?= =?UTF-8?q?=20=EA=B8=B0=EC=A1=B4=20=EC=9A=94=EC=B2=AD=20=EC=B2=98=EB=A6=AC?= =?UTF-8?q?=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20=EC=BC=80=EC=9D=B4=EC=8A=A4=20?= =?UTF-8?q?=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_file_duplicate_check_service.py | 94 ++++++++++++++----- 1 file changed, 71 insertions(+), 23 deletions(-) diff --git a/src/tests/ai/service/test_file_duplicate_check_service.py b/src/tests/ai/service/test_file_duplicate_check_service.py index 16cdc76..bd50acc 100644 --- a/src/tests/ai/service/test_file_duplicate_check_service.py +++ b/src/tests/ai/service/test_file_duplicate_check_service.py @@ -10,7 +10,7 @@ FileDuplicateCheckRequest, FileDuplicateCheckResponse, FileDuplicateCheckStatusResponse, - FileDuplicateCheckEmbeddingsRequest + FileDuplicateCheckResultRequest ) @@ -29,6 +29,7 @@ def setup_method(self): self.test_request_id = "7123456789abcdef01234567" self.test_object_id = ObjectId(self.test_request_id) self.test_file_object_id = ObjectId(self.test_file_id) + self.test_time = datetime(2023, 1, 1, tzinfo=timezone.utc) def test_create_duplicate_check_request_success(self): # given @@ -40,11 +41,16 @@ def test_create_duplicate_check_request_success(self): # 파일 존재함 file_document = { "_id": self.test_file_object_id, + "s3_bucket": "test-bucket", + "s3_key": "example.pdf", "file_key": "example.pdf", "s3_url": "https://bucket.s3.amazonaws.com/example.pdf" } self.mock_repository.get_file_by_id.return_value = file_document + # 기존 요청 없음 + self.mock_repository.get_duplicate_check_by_file_id.return_value = None + # 요청 생성 응답 mongo_document = { "_id": self.test_object_id, @@ -52,7 +58,7 @@ def test_create_duplicate_check_request_success(self): "user_id": self.test_user_id, "is_completed": False, "is_duplicated": None, - "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + "created_at": self.test_time } self.mock_repository.create_duplicate_check_request.return_value = mongo_document @@ -61,6 +67,7 @@ def test_create_duplicate_check_request_success(self): # then self.mock_repository.get_file_by_id.assert_called_once_with(self.test_file_id) + self.mock_repository.get_duplicate_check_by_file_id.assert_called_once_with(self.test_file_id, self.test_user_id) self.mock_repository.create_duplicate_check_request.assert_called_once_with( file_id=self.test_file_id, user_id=self.test_user_id @@ -70,6 +77,46 @@ def test_create_duplicate_check_request_success(self): assert isinstance(result, FileDuplicateCheckResponse) assert result.request_id == str(self.test_object_id) + def test_create_duplicate_check_request_existing_request(self): + # given + request = FileDuplicateCheckRequest( + user_id=self.test_user_id, + file_id=self.test_file_id + ) + + # 파일 존재함 + file_document = { + "_id": self.test_file_object_id, + "s3_bucket": "test-bucket", + "s3_key": "example.pdf", + "file_key": "example.pdf", + "s3_url": "https://bucket.s3.amazonaws.com/example.pdf" + } + self.mock_repository.get_file_by_id.return_value = file_document + + # 이미 요청이 존재함 + existing_check = { + "_id": self.test_object_id, + "file_id": self.test_file_id, + "user_id": self.test_user_id, + "is_completed": False, + "is_duplicated": None, + "created_at": self.test_time + } + self.mock_repository.get_duplicate_check_by_file_id.return_value = existing_check + + # when & then + with pytest.raises(HTTPException) as exc_info: + self.service.create_duplicate_check_request(request) + + assert exc_info.value.status_code == 400 + assert exc_info.value.detail == "이미 중복 검사 요청이 존재합니다." + + self.mock_repository.get_file_by_id.assert_called_once_with(self.test_file_id) + self.mock_repository.get_duplicate_check_by_file_id.assert_called_once_with(self.test_file_id, self.test_user_id) + self.mock_repository.create_duplicate_check_request.assert_not_called() + self.mock_queue.send_message.assert_not_called() + def test_create_duplicate_check_request_file_not_found(self): # given request = FileDuplicateCheckRequest( @@ -86,6 +133,7 @@ def test_create_duplicate_check_request_file_not_found(self): assert exc_info.value.status_code == 404 self.mock_repository.get_file_by_id.assert_called_once_with(self.test_file_id) + self.mock_repository.get_duplicate_check_by_file_id.assert_not_called() self.mock_repository.create_duplicate_check_request.assert_not_called() self.mock_queue.send_message.assert_not_called() @@ -98,7 +146,7 @@ def test_get_duplicate_check_status_exists(self): "user_id": self.test_user_id, "is_completed": False, "is_duplicated": None, - "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + "created_at": self.test_time } self.mock_repository.get_duplicate_check_by_file_id.return_value = mongo_document @@ -126,7 +174,7 @@ def test_get_duplicate_check_status_completed(self): "user_id": self.test_user_id, "is_completed": True, "is_duplicated": False, - "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + "created_at": self.test_time } self.mock_repository.get_duplicate_check_by_file_id.return_value = mongo_document @@ -163,10 +211,7 @@ def test_get_duplicate_check_status_not_found(self): def test_update_duplicate_check_result_success(self): # given - request = FileDuplicateCheckEmbeddingsRequest( - request_id=self.test_request_id, - embeddings=[1.0, 2.0, 3.0] - ) + is_duplicated = False # 요청이 존재함 check_document = { @@ -175,10 +220,18 @@ def test_update_duplicate_check_result_success(self): "user_id": self.test_user_id, "is_completed": False, "is_duplicated": None, - "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc) + "created_at": self.test_time } self.mock_repository.get_duplicate_check_by_id.return_value = check_document + # 파일 중복 상태 업데이트 성공 + self.mock_repository.update_file_duplicate_status.return_value = { + "_id": self.test_file_object_id, + "s3_bucket": "test-bucket", + "s3_key": "example.pdf", + "is_duplicated": False + } + # 업데이트 응답 updated_document = { "_id": self.test_object_id, @@ -186,42 +239,37 @@ def test_update_duplicate_check_result_success(self): "user_id": self.test_user_id, "is_completed": True, "is_duplicated": False, - "created_at": datetime(2023, 1, 1, tzinfo=timezone.utc), + "created_at": self.test_time, "updated_at": datetime(2023, 1, 2, tzinfo=timezone.utc) } self.mock_repository.update_duplicate_check_result.return_value = updated_document # when - result = self.service.update_duplicate_check_result(self.test_request_id, request) + result = self.service.update_duplicate_check_result(self.test_request_id, is_duplicated) # then self.mock_repository.get_duplicate_check_by_id.assert_called_once_with(self.test_request_id) + self.mock_repository.update_file_duplicate_status.assert_called_once_with(self.test_file_id, is_duplicated) self.mock_repository.update_duplicate_check_result.assert_called_once_with( request_id=self.test_request_id, - is_duplicated=False + is_duplicated=is_duplicated ) - assert isinstance(result, FileDuplicateCheckStatusResponse) - assert result.request_id == str(self.test_object_id) - assert result.file_id == self.test_file_id - assert result.is_completed == True - assert result.is_duplicated == False + assert result == True def test_update_duplicate_check_result_not_found(self): # given - request = FileDuplicateCheckEmbeddingsRequest( - request_id=self.test_request_id, - embeddings=[1.0, 2.0, 3.0] - ) + is_duplicated = False # 요청이 존재하지 않음 self.mock_repository.get_duplicate_check_by_id.return_value = None # when - result = self.service.update_duplicate_check_result(self.test_request_id, request) + result = self.service.update_duplicate_check_result(self.test_request_id, is_duplicated) # then self.mock_repository.get_duplicate_check_by_id.assert_called_once_with(self.test_request_id) + self.mock_repository.update_file_duplicate_status.assert_not_called() self.mock_repository.update_duplicate_check_result.assert_not_called() - assert result is None \ No newline at end of file + assert result == False \ No newline at end of file From 8091e2e238d3fb34b36bdfc4ef26b9bb0cf6d7e5 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:27:12 +0900 Subject: [PATCH 31/33] =?UTF-8?q?Feat:=20=ED=8C=8C=EC=9D=BC=20=EC=A4=91?= =?UTF-8?q?=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=ED=81=90=EC=9D=98=20=EB=A9=94?= =?UTF-8?q?=EC=8B=9C=EC=A7=80=20=EC=A0=84=EC=86=A1=20=EB=A9=94=EC=84=9C?= =?UTF-8?q?=EB=93=9C=EC=97=90=20S3=20=EB=B2=84=ED=82=B7=20=EB=B0=8F=20?= =?UTF-8?q?=ED=82=A4=20=EB=A7=A4=EA=B0=9C=EB=B3=80=EC=88=98=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80=20=EB=B0=8F=20=EB=A9=94=EC=8B=9C=EC=A7=80=20=ED=98=95?= =?UTF-8?q?=EC=8B=9D=20JSON=EC=9C=BC=EB=A1=9C=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/data/FileDuplicateCheckQueue.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/main/ai/data/FileDuplicateCheckQueue.py b/src/main/ai/data/FileDuplicateCheckQueue.py index 948f2c5..2267b64 100644 --- a/src/main/ai/data/FileDuplicateCheckQueue.py +++ b/src/main/ai/data/FileDuplicateCheckQueue.py @@ -1,18 +1,27 @@ +import json + class FileDuplicateCheckQueue: def __init__(self, sqs_client, queue_url): self.sqs_client = sqs_client self.queue_url = queue_url - def send_message(self, message: str, user_id: str, request_id: str): + def send_message(self, request_id: str, user_id: str, s3_bucket: str, s3_key: str): """SQS 큐에 메시지를 전송합니다.""" - print(message) + message_body = { + 'request_type': 'file_duplicate_check_embedding_file', + 'request_id': request_id, + 'user_id': user_id, + 'payload': { + 's3_bucket': s3_bucket, + 's3_key': s3_key + } + } + response = self.sqs_client.send_message( QueueUrl=self.queue_url, MessageGroupId=str(user_id), - MessageDeduplicationId=str(request_id), - MessageBody=message, - + MessageDeduplicationId=str(request_id), + MessageBody=json.dumps(message_body), ) - return response \ No newline at end of file From 4aee93fb69bec15239fba637302f307db57ddad1 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:27:16 +0900 Subject: [PATCH 32/33] =?UTF-8?q?Refactor:=20=ED=8C=8C=EC=9D=BC=20?= =?UTF-8?q?=EC=A4=91=EB=B3=B5=20=EA=B2=80=EC=82=AC=20=EB=A6=AC=ED=8F=AC?= =?UTF-8?q?=EC=A7=80=ED=86=A0=EB=A6=AC=EC=97=90=EC=84=9C=20=EB=8D=B0?= =?UTF-8?q?=EC=9D=B4=ED=84=B0=EB=B2=A0=EC=9D=B4=EC=8A=A4=20=EC=BB=AC?= =?UTF-8?q?=EB=A0=89=EC=85=98=20=EC=A0=91=EA=B7=BC=20=EB=B0=A9=EC=8B=9D?= =?UTF-8?q?=EC=9D=84=20=EA=B0=9C=EC=84=A0=ED=95=98=EA=B3=A0=20=ED=98=84?= =?UTF-8?q?=EC=9E=AC=20=EC=8B=9C=EA=B0=84=EC=9D=84=20=EB=B0=98=ED=99=98?= =?UTF-8?q?=ED=95=98=EB=8A=94=20=EB=A9=94=EC=84=9C=EB=93=9C=20=EC=B6=94?= =?UTF-8?q?=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/data/FileDuplicateCheckRepository.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/ai/data/FileDuplicateCheckRepository.py b/src/main/ai/data/FileDuplicateCheckRepository.py index b1683e8..2d08087 100644 --- a/src/main/ai/data/FileDuplicateCheckRepository.py +++ b/src/main/ai/data/FileDuplicateCheckRepository.py @@ -6,9 +6,13 @@ class FileDuplicateCheckRepository: def __init__(self, mongo_client): self.mongo_client = mongo_client self.db = mongo_client.get_database() - self.file_checks_collection = self.db["file_duplicate_checks"] - self.files_collection = self.db["files"] - self.file_embeddings_collection = self.db["file_embeddings"] + self.file_checks_collection = self.db.get_collection("file_duplicate_checks") + self.files_collection = self.db.get_collection("files") + self.file_embeddings_collection = self.db.get_collection("file_embeddings") + + def get_current_time(self): + """현재 시간을 UTC 기준으로 반환합니다.""" + return datetime.now(timezone.utc) def get_file_by_id(self, file_id: str): """파일 ID로 파일 정보를 조회합니다.""" @@ -29,7 +33,7 @@ def has_file_embedding(self, file_id: str) -> bool: def create_duplicate_check_request(self, file_id: str, user_id: str): """중복 검사 요청을 생성합니다.""" - now = datetime.now(timezone.utc) + now = self.get_current_time() document = { "file_id": file_id, "user_id": user_id, @@ -75,7 +79,7 @@ def update_file_duplicate_status(self, file_id: str, is_duplicated: bool): def update_duplicate_check_result(self, request_id: str, is_duplicated: bool): """중복 검사 결과를 업데이트합니다.""" try: - now = datetime.now(timezone.utc) + now = self.get_current_time() request_obj_id = ObjectId(request_id) result = self.file_checks_collection.update_one( From 2f9abb8d30463222e975d4e1dd0deaeb03a2ebd9 Mon Sep 17 00:00:00 2001 From: Coldot <41678750+Coldot@users.noreply.github.com> Date: Sat, 22 Mar 2025 18:27:21 +0900 Subject: [PATCH 33/33] =?UTF-8?q?Fix:=20CategoryRecommendationResultReques?= =?UTF-8?q?t=20=ED=81=B4=EB=9E=98=EC=8A=A4=EC=9D=98=20request=5Fid=20?= =?UTF-8?q?=ED=95=84=EB=93=9C=EB=A5=BC=20=EC=84=A0=ED=83=9D=EC=A0=81(Optio?= =?UTF-8?q?nal)=EB=A1=9C=20=EB=B3=80=EA=B2=BD=ED=95=98=EC=97=AC=20?= =?UTF-8?q?=EA=B8=B0=EB=B3=B8=EA=B0=92=EC=9D=84=20None=EC=9C=BC=EB=A1=9C?= =?UTF-8?q?=20=EC=84=A4=EC=A0=95=ED=95=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main/ai/models/CategoryRecommendation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/ai/models/CategoryRecommendation.py b/src/main/ai/models/CategoryRecommendation.py index 504440e..e20e032 100644 --- a/src/main/ai/models/CategoryRecommendation.py +++ b/src/main/ai/models/CategoryRecommendation.py @@ -18,5 +18,5 @@ class CategoryRecommendationStatusResponse(BaseModel): class CategoryRecommendationResultRequest(BaseModel): - request_id: str + request_id: Optional[str] = None predicted_category: str \ No newline at end of file