From cda214cf253426788e7285f7e01abfd0a36986e2 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Tue, 13 May 2025 12:31:34 +0900
Subject: [PATCH 01/28] delete api_request

delete api_request
---
 api_request/reliefweb.py | 56 ----------------------------------------
 1 file changed, 56 deletions(-)
 delete mode 100644 api_request/reliefweb.py

diff --git a/api_request/reliefweb.py b/api_request/reliefweb.py
deleted file mode 100644
index 1bcbd7f..0000000
--- a/api_request/reliefweb.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import requests
-import json
-
-# API 엔드포인트
-api_url = "https://api.reliefweb.int/v1/jobs?limit=10&offset=1120"
-# API링크를 저장하기 위한 배열
-description_endpoint=[]
-# API요청을 보내 
-while api_url: # 다음으로 참고할 데이터가 없을 경우 조건문 종료
-    response = requests.get(api_url)
-    if response.status_code==200:
-        data=response.json()
-
-        links = data.get('links', {})
-        if links:
-            next_link = links.get('next', None)
-            api_url = next_link.get('href', None) if next_link else None
-        else:
-            api_url = None  # 'next'가 없으면 종료 조건으로 설정
-            
-
-        print(api_url) # 디버깅용
-        jobs=data.get("data", [])
-        description_endpoint.append([job['href'] for job in jobs])
-
-        # for job in jobs:
-        #     href = job.get("href", "No Link")
-            
-        #     description_endpoint.append({"href": href})
-            
-        
-    else:
-        print("API 요청 실패:", response.status_code, response.text)
-        break
-
-# print(description_endpoint)
-
-job_list=[]
-flattened_data = [item for sublist in description_endpoint for item in sublist]
-
-# print(flattened_data)
-
-for info in flattened_data:
-    
-    response=requests.get(info)
-    
-    if response.status_code==200:
-        data=response.json()
-        jobs=data.get("data", [])
-        for job in jobs:
-            fields=job.get("fields", {})
-            title=fields.get("title", "No title")
-            body=fields.get("body", "No body")
-            job_list.append({"title": title, "body": body})
-
-print(json.dumps(job_list, indent=4, ensure_ascii=False))
\ No newline at end of file

From 374d130c15ac1c1624c02e5bc76acae8ed78db65 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Tue, 13 May 2025 14:48:55 +0900
Subject: [PATCH 02/28] delete api_request

delete api_request
---
 api_request/reliefweb.py | 56 ----------------------------------------
 1 file changed, 56 deletions(-)
 delete mode 100644 api_request/reliefweb.py

diff --git a/api_request/reliefweb.py b/api_request/reliefweb.py
deleted file mode 100644
index 1bcbd7f..0000000
--- a/api_request/reliefweb.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import requests
-import json
-
-# API 엔드포인트
-api_url = "https://api.reliefweb.int/v1/jobs?limit=10&offset=1120"
-# API링크를 저장하기 위한 배열
-description_endpoint=[]
-# API요청을 보내 
-while api_url: # 다음으로 참고할 데이터가 없을 경우 조건문 종료
-    response = requests.get(api_url)
-    if response.status_code==200:
-        data=response.json()
-
-        links = data.get('links', {})
-        if links:
-            next_link = links.get('next', None)
-            api_url = next_link.get('href', None) if next_link else None
-        else:
-            api_url = None  # 'next'가 없으면 종료 조건으로 설정
-            
-
-        print(api_url) # 디버깅용
-        jobs=data.get("data", [])
-        description_endpoint.append([job['href'] for job in jobs])
-
-        # for job in jobs:
-        #     href = job.get("href", "No Link")
-            
-        #     description_endpoint.append({"href": href})
-            
-        
-    else:
-        print("API 요청 실패:", response.status_code, response.text)
-        break
-
-# print(description_endpoint)
-
-job_list=[]
-flattened_data = [item for sublist in description_endpoint for item in sublist]
-
-# print(flattened_data)
-
-for info in flattened_data:
-    
-    response=requests.get(info)
-    
-    if response.status_code==200:
-        data=response.json()
-        jobs=data.get("data", [])
-        for job in jobs:
-            fields=job.get("fields", {})
-            title=fields.get("title", "No title")
-            body=fields.get("body", "No body")
-            job_list.append({"title": title, "body": body})
-
-print(json.dumps(job_list, indent=4, ensure_ascii=False))
\ No newline at end of file

From c13d73d33a44f3687ba47b98e480dd7420be364c Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Tue, 13 May 2025 20:23:35 +0900
Subject: [PATCH 03/28] add docstring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

docstring 추가 및 가장 높은 키워드 반환하도록 수정+O클래스 제거
---
 custom_keyword/ext.py          | 10 +++++++++-
 ocr/__init__.py                |  9 ++++-----
 ocr/o.py                       | 19 +++++++++++++++++--
 summarization/sum_translate.py |  7 +++++--
 4 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/custom_keyword/ext.py b/custom_keyword/ext.py
index aa88bcd..5dedb70 100644
--- a/custom_keyword/ext.py
+++ b/custom_keyword/ext.py
@@ -16,10 +16,18 @@ def calculate_cosine_similarity(vec1, vec2):
     return cosine_similarity(vec1, vec2)
 
 def extract_keywords(question: str):
+    """
+    텍스트로부터 키워드를 추출하는 함수
+
+    Args:
+        question (str): 키워드를 추출하고자 하는 문자열
+    Returns:
+        str: 가장 유사도가 높은 키워드
+    """
     sentence_embedding = get_embeddings(question)
     domain_embeddings = [get_embeddings(keyword) for keyword in domain_keywords]
     similarities = [
         (keyword, calculate_cosine_similarity(sentence_embedding, embedding)[0][0])
         for keyword, embedding in zip(domain_keywords, domain_embeddings)
     ]
-    return sorted(similarities, key=lambda x: x[1], reverse=True)
\ No newline at end of file
+    return max(similarities, key=lambda x: x[1])[0]
diff --git a/ocr/__init__.py b/ocr/__init__.py
index 588b198..d02d56a 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -1,7 +1,6 @@
-from flask import Blueprint, request, jsonify
-from utils import confirm_no_data
+from flask import Blueprint, jsonify
 from server.logger import logger
-from .o import O
+from .o import extract_text, compare_texts
 
 ocr_bp = Blueprint('ocr', __name__, url_prefix='/ocr')
 
@@ -13,10 +12,10 @@ def evaluate_image(review_id):
     compare_text = ""
 
     # OCR 실행
-    extracted_text = O.extract_text(img_path)
+    extracted_text = extract_text(img_path)
     
     # 비교 실행
-    result = O.compare_texts(extracted_text, compare_text)
+    result = compare_texts(extracted_text, compare_text)
 
     try:
         return jsonify({"llm_validation": result,
diff --git a/ocr/o.py b/ocr/o.py
index a505897..526293b 100644
--- a/ocr/o.py
+++ b/ocr/o.py
@@ -8,12 +8,27 @@
 ocr = PaddleOCR(lang="korean")
 
 def extract_text(img_path):
-    """ 이미지에서 텍스트 추출 """
+    """ 
+    이미지에서 텍스트 추출 
+    
+    Args:
+        img_path (str): 이미지의 경로(url)
+    Returns:
+        str : ocr이미지에서 추출한 문자열 반환
+    """
     results = ocr.ocr(img_path, cls=True)
     return " ".join(text for result in results for _, (text, _) in result)
 
 def compare_texts(text1, text2):
-    """ 두 텍스트 간의 관계 분석 """
+    """ 
+    ocr로 추출한 텍스트와 활동 제목 간의 관계 분석
+
+    Args:
+        text1 (str): 이미지에서 추출한 문자열
+        text2 (str): 활동 제목에서의 문자열
+    Returns:
+         str: 관련이 있다 판단 시 True / 없다 판단 시 False를 반환
+    """
     prompt = f"""
     Analyze the relationship between the following two texts. Determine whether they are conceptually or contextually related.
     If they are related, return True; otherwise, return False without additional explanation
diff --git a/summarization/sum_translate.py b/summarization/sum_translate.py
index d9900bc..5c79249 100644
--- a/summarization/sum_translate.py
+++ b/summarization/sum_translate.py
@@ -5,8 +5,11 @@
 def summarize_translate_en_to_ko(text: str) -> str:
     """
     영어 텍스트를 한국어로 번역하고 요약.
-    :param text: 번역 및 요약할 영어 문장
-    :return: 요약된 한국어 번역 결과
+
+    Args:
+        text (str): 번역하고자 하는 원문(영어) 텍스트
+    Returns: 
+        str: 요약된 한국어 번역 결과
     """
     prompt = f"""
     Translate and summarize the following English text **into Korean** in **one or two sentences only**.

From 98a25f7ef7b8d304f6db6c9a43e60ac691323c28 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Wed, 14 May 2025 12:17:54 +0900
Subject: [PATCH 04/28] add databaseconnection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. ocr에서 데이터베이스와 연결해 데이터를 가져오도록 코드 작성
2. api키를 env파일에서 가져오도록 변경
---
 ocr/__init__.py                | 11 ++++++++---
 ocr/o.py                       |  7 ++++++-
 summarization/sum_translate.py |  9 ++++++++-
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index d02d56a..99c3114 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -1,4 +1,5 @@
 from flask import Blueprint, jsonify
+from server.db import run_query
 from server.logger import logger
 from .o import extract_text, compare_texts
 
@@ -7,9 +8,13 @@
 @ocr_bp.route('/<review_id>', methods=['GET'])
 def evaluate_image(review_id):
     
-    # 데이터베이스에서 review_id를를 토대로 데이터를 가져옴(미구현)
-    img_path=""
-    compare_text = ""
+    img_query="""SELECT ri.image_urls
+    FROM reviews r
+    JOIN review_image_urls ri ON r.review_id = ri.review_id
+    WHERE r.review_id = '%s';"""
+    img_path = run_query(img_query, (review_id,))
+    compare_query="SELECT activity_name FROM reviews WHERE review_id='%s';"
+    compare_text = run_query(compare_query, (review_id,))
 
     # OCR 실행
     extracted_text = extract_text(img_path)
diff --git a/ocr/o.py b/ocr/o.py
index 526293b..97e7894 100644
--- a/ocr/o.py
+++ b/ocr/o.py
@@ -1,9 +1,14 @@
 import torch
+import os
+from dotenv import load_dotenv
 from paddleocr import PaddleOCR
 from openai import OpenAI
 
+# .env파일 로드
+load_dotenv()
 
-client = OpenAI(api_key="") # 나중에 api키 교체
+# 환경 변수에서 API 키 가져오기
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 model = "gpt-4"
 ocr = PaddleOCR(lang="korean")
 
diff --git a/summarization/sum_translate.py b/summarization/sum_translate.py
index 5c79249..54efe88 100644
--- a/summarization/sum_translate.py
+++ b/summarization/sum_translate.py
@@ -1,5 +1,12 @@
 from openai import OpenAI
-client = OpenAI(api_key="") # 나중에 api_key 교체
+import os
+from dotenv import load_dotenv
+
+# .env파일 로드
+load_dotenv()
+
+# 환경 변수에서 API 키 가져오기
+client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 model = "gpt-4"
 
 def summarize_translate_en_to_ko(text: str) -> str:

From 2582e7f4cd46469b3a645296e5b334366ade5efd Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Thu, 15 May 2025 11:43:28 +0900
Subject: [PATCH 05/28] add swagger
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. 스웨거 라이브러리 추가
2. ocr 이미지분석 api에 대한 docstring추가
3. ocr이미지분석 중 True/False이외의 값으로 반환할 경우 False로 반환하도록 설정
---
 ocr/__init__.py  | 32 +++++++++++++++++++++++++++++++-
 requirements.in  |  1 +
 requirements.txt | 33 ++++++++++++++++++++++++++++-----
 3 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index 99c3114..93d8a9d 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -7,7 +7,29 @@
 
 @ocr_bp.route('/<review_id>', methods=['GET'])
 def evaluate_image(review_id):
-    
+    """
+    이미지 평가 API
+    ---
+    parameters:
+      - name: review_id
+        in: path
+        type: string
+        required: true
+        description: 리뷰 ID
+    responses:
+      200:
+        description: 성공적으로 평가됨
+        schema:
+          type: object
+          properties:
+            llm_validation:
+              type: boolean
+            review_id:
+              type: string
+      500:
+        description: 서버 오류 발생
+
+    """
     img_query="""SELECT ri.image_urls
     FROM reviews r
     JOIN review_image_urls ri ON r.review_id = ri.review_id
@@ -22,6 +44,14 @@ def evaluate_image(review_id):
     # 비교 실행
     result = compare_texts(extracted_text, compare_text)
 
+   # 문자열 "True" 또는 "False"를 실제 Boolean 값으로 변환
+    if result == "True":
+        result = True
+    elif result == "False":
+        result = False
+    else:
+        result = False  # 예상치 못한 값이면 False로 처리
+
     try:
         return jsonify({"llm_validation": result,
                         "review_id": review_id}), 200
diff --git a/requirements.in b/requirements.in
index 523814a..3cca7c0 100644
--- a/requirements.in
+++ b/requirements.in
@@ -5,6 +5,7 @@ flask
 flask-cors
 requests
 python-dotenv
+flasgger
 
 # Bert 임베딩 기반 유사도 추정
 torch
diff --git a/requirements.txt b/requirements.txt
index 335edfc..b5cb415 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
 #
-# This file is autogenerated by pip-compile with Python 3.12
+# This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile
+#    pip-compile requirements.in
 #
 albucore==0.0.24
     # via
@@ -16,6 +16,10 @@ anyio==4.9.0
     # via
     #   httpx
     #   openai
+attrs==25.3.0
+    # via
+    #   jsonschema
+    #   referencing
 beautifulsoup4==4.13.4
     # via paddleocr
 blinker==1.9.0
@@ -44,9 +48,12 @@ filelock==3.18.0
     #   transformers
 fire==0.7.0
     # via paddleocr
+flasgger==0.9.7.1
+    # via -r requirements.in
 flask==3.1.0
     # via
     #   -r requirements.in
+    #   flasgger
     #   flask-cors
 flask-cors==5.0.1
     # via -r requirements.in
@@ -83,6 +90,10 @@ jiter==0.9.0
     # via openai
 joblib==1.5.0
     # via scikit-learn
+jsonschema==4.23.0
+    # via flasgger
+jsonschema-specifications==2025.4.1
+    # via jsonschema
 lazy-loader==0.4
     # via scikit-image
 lmdb==1.6.2
@@ -93,6 +104,8 @@ markupsafe==3.0.2
     # via
     #   jinja2
     #   werkzeug
+mistune==3.1.3
+    # via flasgger
 mpmath==1.3.0
     # via sympy
 mysql-connector-python==9.3.0
@@ -128,6 +141,7 @@ opencv-python-headless==4.11.0.86
     #   albumentations
 packaging==25.0
     # via
+    #   flasgger
     #   huggingface-hub
     #   lazy-loader
     #   scikit-image
@@ -154,11 +168,16 @@ python-dotenv==1.1.0
 pyyaml==6.0.2
     # via
     #   albumentations
+    #   flasgger
     #   huggingface-hub
     #   paddleocr
     #   transformers
 rapidfuzz==3.13.0
     # via paddleocr
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
 regex==2024.11.6
     # via transformers
 requests==2.32.3
@@ -167,6 +186,10 @@ requests==2.32.3
     #   huggingface-hub
     #   paddleocr
     #   transformers
+rpds-py==0.24.0
+    # via
+    #   jsonschema
+    #   referencing
 safetensors==0.5.3
     # via transformers
 scikit-image==0.25.2
@@ -182,6 +205,8 @@ shapely==2.1.0
     # via paddleocr
 simsimd==6.2.1
     # via albucore
+six==1.17.0
+    # via flasgger
 sniffio==1.3.1
     # via
     #   anyio
@@ -219,6 +244,7 @@ typing-extensions==4.13.2
     #   pydantic
     #   pydantic-core
     #   python-docx
+    #   referencing
     #   torch
     #   typing-inspection
 typing-inspection==0.4.0
@@ -229,6 +255,3 @@ werkzeug==3.1.3
     # via
     #   flask
     #   flask-cors
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools

From f15330ff89ab2e6527f59d9db02c40a8d25e6069 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Fri, 16 May 2025 10:50:14 +0900
Subject: [PATCH 06/28] add paddlepaddle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. paddlepaddle을 requirements.in에 추가
2.flasgger을 적용하기 위한 코드 추가
---
 app.py           |  3 +++
 ocr/__init__.py  |  4 ++--
 requirements.in  |  1 +
 requirements.txt | 19 ++++++++++++++++++-
 4 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/app.py b/app.py
index b7ca66e..e1ba0c4 100644
--- a/app.py
+++ b/app.py
@@ -3,6 +3,7 @@
 from flask import Flask
 from flask_cors import CORS
 from dotenv import load_dotenv
+from flasgger import Swagger
 
 from server.logger import logger
 
@@ -23,6 +24,8 @@
 app = Flask(__name__)
 CORS(app, resources={r"/*": {"origins": "*"}})
 
+swagger=Swagger(app)
+
 # 모든 Blueprint 등록
 from chat import chat_bp
 app.register_blueprint(chat_bp)
diff --git a/ocr/__init__.py b/ocr/__init__.py
index 93d8a9d..e37074d 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -13,7 +13,7 @@ def evaluate_image(review_id):
     parameters:
       - name: review_id
         in: path
-        type: string
+        type: "string"
         required: true
         description: 리뷰 ID
     responses:
@@ -25,7 +25,7 @@ def evaluate_image(review_id):
             llm_validation:
               type: boolean
             review_id:
-              type: string
+              type: "string"
       500:
         description: 서버 오류 발생
 
diff --git a/requirements.in b/requirements.in
index 3cca7c0..9c29df6 100644
--- a/requirements.in
+++ b/requirements.in
@@ -17,6 +17,7 @@ openai
 
 # OCR
 paddleocr
+paddlepaddle
 
 # MySQL
 mysql-connector-python
diff --git a/requirements.txt b/requirements.txt
index b5cb415..26bc0d3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,6 +16,8 @@ anyio==4.9.0
     # via
     #   httpx
     #   openai
+astor==0.8.1
+    # via paddlepaddle
 attrs==25.3.0
     # via
     #   jsonschema
@@ -39,6 +41,8 @@ colorama==0.4.6
     #   tqdm
 cython==3.1.0
     # via paddleocr
+decorator==5.2.1
+    # via paddlepaddle
 distro==1.9.0
     # via openai
 filelock==3.18.0
@@ -68,7 +72,9 @@ h11==0.16.0
 httpcore==1.0.9
     # via httpx
 httpx==0.28.1
-    # via openai
+    # via
+    #   openai
+    #   paddlepaddle
 huggingface-hub==0.31.1
     # via
     #   tokenizers
@@ -112,6 +118,7 @@ mysql-connector-python==9.3.0
     # via -r requirements.in
 networkx==3.4.2
     # via
+    #   paddlepaddle
     #   scikit-image
     #   torch
 numpy==2.2.5
@@ -122,7 +129,9 @@ numpy==2.2.5
     #   opencv-contrib-python
     #   opencv-python
     #   opencv-python-headless
+    #   opt-einsum
     #   paddleocr
+    #   paddlepaddle
     #   scikit-image
     #   scikit-learn
     #   scipy
@@ -139,6 +148,8 @@ opencv-python-headless==4.11.0.86
     # via
     #   albucore
     #   albumentations
+opt-einsum==3.3.0
+    # via paddlepaddle
 packaging==25.0
     # via
     #   flasgger
@@ -148,11 +159,16 @@ packaging==25.0
     #   transformers
 paddleocr==2.10.0
     # via -r requirements.in
+paddlepaddle==3.0.0
+    # via -r requirements.in
 pillow==11.2.1
     # via
     #   imageio
     #   paddleocr
+    #   paddlepaddle
     #   scikit-image
+protobuf==6.31.0
+    # via paddlepaddle
 pyclipper==1.3.0.post6
     # via paddleocr
 pydantic==2.11.4
@@ -241,6 +257,7 @@ typing-extensions==4.13.2
     #   beautifulsoup4
     #   huggingface-hub
     #   openai
+    #   paddlepaddle
     #   pydantic
     #   pydantic-core
     #   python-docx

From e0fb1278251495f32815a1ad3e11ad639833c67c Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Sat, 24 May 2025 18:42:41 +0900
Subject: [PATCH 07/28] include imagestream
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.api반환에서 ocr결과를 인증기록과 수상기록ocr결과 두 개로 나누어 전송하도록 바꾸었습니다.
이를 반영하여 스웨거 또한 변경하였습니다.
2.s3에서 이미지를 가져오기 위해 boto3패키지를 추가하였습니다
3.이미지 스트림을 통해 이미지를 로컬에 저장하도록 하였습니다.
---
 ocr/__init__.py  | 57 +++++++++++++++++++++++++---------------
 ocr/o.py         | 68 +++++++++++++++++++++++++++++++++++++++++++-----
 requirements.in  |  2 ++
 requirements.txt | 22 ++++++++++++++--
 4 files changed, 120 insertions(+), 29 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index e37074d..6da36ef 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -1,7 +1,7 @@
 from flask import Blueprint, jsonify
 from server.db import run_query
 from server.logger import logger
-from .o import extract_text, compare_texts
+from .o import download_image, extract_text, compare_texts
 
 ocr_bp = Blueprint('ocr', __name__, url_prefix='/ocr')
 
@@ -22,38 +22,53 @@ def evaluate_image(review_id):
         schema:
           type: object
           properties:
-            llm_validation:
-              type: boolean
+            ocr_result:
+              type: "string"
+              enum: ["True", "False"]
+            award_ocr_result:
+              type: "string"
+              enum: ["True", "False", "None"]
             review_id:
               type: "string"
       500:
         description: 서버 오류 발생
 
     """
-    img_query="""SELECT ri.image_urls
-    FROM reviews r
-    JOIN review_image_urls ri ON r.review_id = ri.review_id
-    WHERE r.review_id = '%s';"""
-    img_path = run_query(img_query, (review_id,))
-    compare_query="SELECT activity_name FROM reviews WHERE review_id='%s';"
+    # review_img_query="""SELECT ri.image_urls
+    # FROM reviews r
+    # JOIN review_image_urls ri ON r.review_id = ri.review_id
+    # WHERE r.review_id = %s;"""
+    review_img_query="""SELECT image_urls
+                    FROM review_image_urls
+                    WHERE HEX(review_id)=%s"""
+    review_img_path = run_query(review_img_query, (review_id,))
+
+    award_img_query = "SELECT award_image_url FROM reviews WHERE hex(review_id) = %s;"
+    award_img_path=run_query(award_img_query, (review_id, ))
+
+    compare_query="SELECT activity_name FROM reviews WHERE hex(review_id)=%s"
     compare_text = run_query(compare_query, (review_id,))
 
     # OCR 실행
-    extracted_text = extract_text(img_path)
-    
-    # 비교 실행
-    result = compare_texts(extracted_text, compare_text)
-
-   # 문자열 "True" 또는 "False"를 실제 Boolean 값으로 변환
-    if result == "True":
-        result = True
-    elif result == "False":
-        result = False
+    if review_img_path:
+      # ocr결과의 기본값은 False
+      ocr_result = "False"
+      for img_url in review_img_path:
+        image_stream = download_image(img_url)
+        extracted_text = extract_text(image_stream)
+        ocr_result = compare_texts(extracted_text, compare_text[0])
+        if ocr_result == "True":
+           break
+    if award_img_path[0][0]:
+      award_image_stream = download_image(award_img_path[0])
+      award_text = extract_text(award_image_stream)
+      award_ocr_result = compare_texts(award_text, compare_text[0])
     else:
-        result = False  # 예상치 못한 값이면 False로 처리
+       award_ocr_result = "None"
 
     try:
-        return jsonify({"llm_validation": result,
+        return jsonify({"ocr_result": ocr_result,
+                        "award_ocr_result": award_ocr_result,
                         "review_id": review_id}), 200
     except Exception as e:
         logger.error(e)
diff --git a/ocr/o.py b/ocr/o.py
index 97e7894..f7271fc 100644
--- a/ocr/o.py
+++ b/ocr/o.py
@@ -1,8 +1,12 @@
 import torch
 import os
+import boto3
+import numpy as np
+import cv2
 from dotenv import load_dotenv
 from paddleocr import PaddleOCR
 from openai import OpenAI
+from io import BytesIO
 
 # .env파일 로드
 load_dotenv()
@@ -12,18 +16,70 @@
 model = "gpt-4"
 ocr = PaddleOCR(lang="korean")
 
-def extract_text(img_path):
-    """ 
-    이미지에서 텍스트 추출 
+def download_image(img_path):
+    """
+    s3에서 이미지 다운로드 후 벡터db나 tmp폴더에 저장
+    
+    Args:
+        img_path (str): s3상에 이미지 경로
+    Returns:
+        BytesIO: 이미지 데이터의 바이트스트림 객체
+    """
+    s3 = boto3.client(
+        's3',
+        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+        aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
+        region_name=os.getenv("AWS_REGION")  # 원하는 리전
+    )
+    bucket_name='trendist'
+    file_name = img_path[0]
+    # file_name = "award/0EEC67FEECF943B980D60BF3430FB213"
+    image_stream = BytesIO()
+    print(file_name)
+    s3.download_fileobj(bucket_name, file_name, image_stream)
+    image_stream.seek(0)
+
+    return image_stream
+
+
+# def extract_text(img_path):
+#     """ 
+#     이미지에서 텍스트 추출 
+    
+#     Args:
+#         img_path (str): 이미지의 로컬경로(url)
+#     Returns:
+#         str : ocr이미지에서 추출한 문자열 반환
+#     """
+#     results = ocr.ocr(img_path, cls=True)
+#     return " ".join(text for result in results for _, (text, _) in result)
+
+def extract_text(image_stream):
+    """
+    BytesIO 객체의 이미지를 대상으로 OCR 수행
     
     Args:
-        img_path (str): 이미지의 경로(url)
+        image_stream (BytesIO): 메모리에 저장된 이미지 데이터
     Returns:
-        str : ocr이미지에서 추출한 문자열 반환
+        list: OCR 결과
     """
-    results = ocr.ocr(img_path, cls=True)
+    # 스트림을 numpy 배열로 변환
+    image_stream.seek(0)  # 읽기 위치 초기화
+    file_bytes = np.frombuffer(image_stream.getvalue(), dtype=np.uint8)
+    img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)  # OpenCV를 사용하여 이미지 디코딩
+    print("ocr수행전전")
+    # OCR 수행
+    ocr = PaddleOCR(lang='korean')  # 언어 설정 가능
+    results = ocr.ocr(img, cls=True)
+    print("ocr수행후")
     return " ".join(text for result in results for _, (text, _) in result)
 
+# 예제 사용법
+# with open("sample.jpg", "rb") as f:
+#     image_stream = BytesIO(f.read())
+#     ocr_result = perform_ocr(image_stream)
+#     print(ocr_result)
+
 def compare_texts(text1, text2):
     """ 
     ocr로 추출한 텍스트와 활동 제목 간의 관계 분석
diff --git a/requirements.in b/requirements.in
index 9c29df6..f4c73c7 100644
--- a/requirements.in
+++ b/requirements.in
@@ -22,6 +22,8 @@ paddlepaddle
 # MySQL
 mysql-connector-python
 
+#boto3
+boto3
 # 새로운 패키지
 # package-name
 
diff --git a/requirements.txt b/requirements.txt
index 26bc0d3..ee748ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -26,6 +26,12 @@ beautifulsoup4==4.13.4
     # via paddleocr
 blinker==1.9.0
     # via flask
+boto3==1.38.23
+    # via -r requirements.in
+botocore==1.38.23
+    # via
+    #   boto3
+    #   s3transfer
 certifi==2025.1.31
     # via
     #   httpcore
@@ -94,6 +100,10 @@ jinja2==3.1.6
     #   torch
 jiter==0.9.0
     # via openai
+jmespath==1.0.1
+    # via
+    #   boto3
+    #   botocore
 joblib==1.5.0
     # via scikit-learn
 jsonschema==4.23.0
@@ -177,6 +187,8 @@ pydantic==2.11.4
     #   openai
 pydantic-core==2.33.2
     # via pydantic
+python-dateutil==2.9.0.post0
+    # via botocore
 python-docx==1.1.2
     # via paddleocr
 python-dotenv==1.1.0
@@ -206,6 +218,8 @@ rpds-py==0.24.0
     # via
     #   jsonschema
     #   referencing
+s3transfer==0.13.0
+    # via boto3
 safetensors==0.5.3
     # via transformers
 scikit-image==0.25.2
@@ -222,7 +236,9 @@ shapely==2.1.0
 simsimd==6.2.1
     # via albucore
 six==1.17.0
-    # via flasgger
+    # via
+    #   flasgger
+    #   python-dateutil
 sniffio==1.3.1
     # via
     #   anyio
@@ -267,7 +283,9 @@ typing-extensions==4.13.2
 typing-inspection==0.4.0
     # via pydantic
 urllib3==2.3.0
-    # via requests
+    # via
+    #   botocore
+    #   requests
 werkzeug==3.1.3
     # via
     #   flask

From 1183181da491e956bc7c17148a64b9682c6bf88b Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 26 May 2025 19:32:03 +0900
Subject: [PATCH 08/28] =?UTF-8?q?ocr=20api=EC=A1=B0=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

백엔드측의 요청으로 api를 변경하였습니다
1.get방식->post
데이터베이스에 직접 접근 대신 body부분에 url을 전달하는 방식으로 변경
2.return값 변수명 변경 및 review_id제거
3.변경에 따른 변수 및 스웨거 조정
---
 ocr/__init__.py | 116 ++++++++++++++++++++++++++++--------------------
 ocr/o.py        |  26 +----------
 2 files changed, 70 insertions(+), 72 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index 6da36ef..7d5dd84 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -1,54 +1,75 @@
-from flask import Blueprint, jsonify
+from flask import Blueprint, jsonify, request
+from flasgger import Swagger, swag_from
 from server.db import run_query
 from server.logger import logger
 from .o import download_image, extract_text, compare_texts
 
 ocr_bp = Blueprint('ocr', __name__, url_prefix='/ocr')
 
-@ocr_bp.route('/<review_id>', methods=['GET'])
-def evaluate_image(review_id):
-    """
-    이미지 평가 API
-    ---
-    parameters:
-      - name: review_id
-        in: path
-        type: "string"
-        required: true
-        description: 리뷰 ID
-    responses:
-      200:
-        description: 성공적으로 평가됨
-        schema:
-          type: object
-          properties:
-            ocr_result:
-              type: "string"
-              enum: ["True", "False"]
-            award_ocr_result:
-              type: "string"
-              enum: ["True", "False", "None"]
-            review_id:
-              type: "string"
-      500:
-        description: 서버 오류 발생
+@ocr_bp.route('/', methods=['POST'])
+@swag_from({
+    'summary': 'OCR 이미지 비교 API',
+    'description': '이미지에서 텍스트를 추출하고 비교하는 API',
+    'parameters': [
+        {
+            'name': 'body',
+            'in': 'body',
+            'required': True,
+            'schema': {
+                'type': 'object',
+                'properties': {
+                    'image_urls': {
+                        'type': 'array',
+                        'items': {'type': 'string'},
+                        'description': '검토할 이미지 URL 리스트'
+                    },
+                    'award_img_urls': {
+                        'type': 'string',
+                        'description': '수상 이미지의 URL'
+                    },
+                    'title': {
+                        'type': 'string',
+                        'description': '비교할 기준 텍스트'
+                    }
+                }
+            }
+        }
+    ],
+    'responses': {
+        200: {
+            'description': 'OCR 결과 반환',
+            'schema': {
+                'type': 'object',
+                'properties': {
+                    'ocrResult': {'type': 'string', 'description': 'OCR 비교 결과'},
+                    'awardOcrResult': {'type': 'string', 'description': '수상 이미지 OCR 비교 결과'}
+                }
+            }
+        },
+        500: {
+            'description': '서버 에러 발생',
+            'schema': {
+                'type': 'object',
+                'properties': {
+                    'answer': {'type': 'string', 'description': '에러 메시지'}
+                }
+            }
+        }
+    }
+})
 
-    """
-    # review_img_query="""SELECT ri.image_urls
-    # FROM reviews r
-    # JOIN review_image_urls ri ON r.review_id = ri.review_id
-    # WHERE r.review_id = %s;"""
-    review_img_query="""SELECT image_urls
-                    FROM review_image_urls
-                    WHERE HEX(review_id)=%s"""
-    review_img_path = run_query(review_img_query, (review_id,))
+def evaluate_image():
 
-    award_img_query = "SELECT award_image_url FROM reviews WHERE hex(review_id) = %s;"
-    award_img_path=run_query(award_img_query, (review_id, ))
 
-    compare_query="SELECT activity_name FROM reviews WHERE hex(review_id)=%s"
-    compare_text = run_query(compare_query, (review_id,))
+    data=request.get_json()
 
+    review_img_path=data.get("image_urls")
+    award_img_path=data.get("award_img_urls")
+    compare_text=data.get("title")
+
+    print(review_img_path)
+    print(award_img_path)
+    print(compare_text)
     # OCR 실행
     if review_img_path:
       # ocr결과의 기본값은 False
@@ -56,20 +77,19 @@ def evaluate_image(review_id):
       for img_url in review_img_path:
         image_stream = download_image(img_url)
         extracted_text = extract_text(image_stream)
-        ocr_result = compare_texts(extracted_text, compare_text[0])
+        ocr_result = compare_texts(extracted_text, compare_text)
         if ocr_result == "True":
            break
-    if award_img_path[0][0]:
-      award_image_stream = download_image(award_img_path[0])
+    if award_img_path != None:
+      award_image_stream = download_image(award_img_path)
       award_text = extract_text(award_image_stream)
-      award_ocr_result = compare_texts(award_text, compare_text[0])
+      award_ocr_result = compare_texts(award_text, compare_text)
     else:
        award_ocr_result = "None"
 
     try:
-        return jsonify({"ocr_result": ocr_result,
-                        "award_ocr_result": award_ocr_result,
-                        "review_id": review_id}), 200
+        return jsonify({"ocrResult": ocr_result,
+                        "awardOcrResult": award_ocr_result}), 200
     except Exception as e:
         logger.error(e)
         return jsonify({"answer": f"죄송합니다. 에러가 발생했습니다."}), 500
\ No newline at end of file
diff --git a/ocr/o.py b/ocr/o.py
index f7271fc..fecffc0 100644
--- a/ocr/o.py
+++ b/ocr/o.py
@@ -18,7 +18,7 @@
 
 def download_image(img_path):
     """
-    s3에서 이미지 다운로드 후 벡터db나 tmp폴더에 저장
+    s3에서 이미지 다운로드 후 저장
     
     Args:
         img_path (str): s3상에 이미지 경로
@@ -32,28 +32,14 @@ def download_image(img_path):
         region_name=os.getenv("AWS_REGION")  # 원하는 리전
     )
     bucket_name='trendist'
-    file_name = img_path[0]
-    # file_name = "award/0EEC67FEECF943B980D60BF3430FB213"
+    file_name = img_path
     image_stream = BytesIO()
-    print(file_name)
     s3.download_fileobj(bucket_name, file_name, image_stream)
     image_stream.seek(0)
 
     return image_stream
 
 
-# def extract_text(img_path):
-#     """ 
-#     이미지에서 텍스트 추출 
-    
-#     Args:
-#         img_path (str): 이미지의 로컬경로(url)
-#     Returns:
-#         str : ocr이미지에서 추출한 문자열 반환
-#     """
-#     results = ocr.ocr(img_path, cls=True)
-#     return " ".join(text for result in results for _, (text, _) in result)
-
 def extract_text(image_stream):
     """
     BytesIO 객체의 이미지를 대상으로 OCR 수행
@@ -67,19 +53,11 @@ def extract_text(image_stream):
     image_stream.seek(0)  # 읽기 위치 초기화
     file_bytes = np.frombuffer(image_stream.getvalue(), dtype=np.uint8)
     img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)  # OpenCV를 사용하여 이미지 디코딩
-    print("ocr수행전전")
     # OCR 수행
     ocr = PaddleOCR(lang='korean')  # 언어 설정 가능
     results = ocr.ocr(img, cls=True)
-    print("ocr수행후")
     return " ".join(text for result in results for _, (text, _) in result)
 
-# 예제 사용법
-# with open("sample.jpg", "rb") as f:
-#     image_stream = BytesIO(f.read())
-#     ocr_result = perform_ocr(image_stream)
-#     print(ocr_result)
-
 def compare_texts(text1, text2):
     """ 
     ocr로 추출한 텍스트와 활동 제목 간의 관계 분석

From 075df9d8b86d23b74ec9fda3cba96d6a947e61b1 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Thu, 29 May 2025 12:00:13 +0900
Subject: [PATCH 09/28] =?UTF-8?q?=EC=9D=B4=EB=AF=B8=EC=A7=80=20=EB=8B=A4?=
 =?UTF-8?q?=EC=9A=B4=EB=A1=9C=EB=93=9C=20=EB=B0=A9=EC=8B=9D=20=EB=B3=80?=
 =?UTF-8?q?=EA=B2=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

s3에서 boto객체를 만드는 것 대신 presignedurl을 이용하도록 변경
---
 ocr/__init__.py |  9 +++++----
 ocr/o.py        | 24 +++++++++++-------------
 requirements.in |  4 ++--
 3 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index 7d5dd84..6426c86 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -1,6 +1,6 @@
 from flask import Blueprint, jsonify, request
 from flasgger import Swagger, swag_from
-from server.db import run_query
+
 from server.logger import logger
 from .o import download_image, extract_text, compare_texts
 
@@ -63,8 +63,8 @@ def evaluate_image():
 
     data=request.get_json()
 
-    review_img_path=data.get("image_urls")
-    award_img_path=data.get("award_img_urls")
+    review_img_path=data.get("imageUrls")
+    award_img_path=data.get("awardImgUrl")
     compare_text=data.get("title")
 
     print(review_img_path)
@@ -77,6 +77,7 @@ def evaluate_image():
       for img_url in review_img_path:
         image_stream = download_image(img_url)
         extracted_text = extract_text(image_stream)
+        print(extracted_text)
         ocr_result = compare_texts(extracted_text, compare_text)
         if ocr_result == "True":
            break
@@ -85,7 +86,7 @@ def evaluate_image():
       award_text = extract_text(award_image_stream)
       award_ocr_result = compare_texts(award_text, compare_text)
     else:
-       award_ocr_result = "None"
+       award_ocr_result = "False"
 
     try:
         return jsonify({"ocrResult": ocr_result,
diff --git a/ocr/o.py b/ocr/o.py
index fecffc0..ee89ee3 100644
--- a/ocr/o.py
+++ b/ocr/o.py
@@ -1,8 +1,8 @@
 import torch
 import os
-import boto3
 import numpy as np
 import cv2
+import requests
 from dotenv import load_dotenv
 from paddleocr import PaddleOCR
 from openai import OpenAI
@@ -18,24 +18,22 @@
 
 def download_image(img_path):
     """
-    s3에서 이미지 다운로드 후 저장
+    s3에서 이미지 다운로드 후 바이트스트림에 저장
     
     Args:
         img_path (str): s3상에 이미지 경로
     Returns:
         BytesIO: 이미지 데이터의 바이트스트림 객체
     """
-    s3 = boto3.client(
-        's3',
-        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
-        aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
-        region_name=os.getenv("AWS_REGION")  # 원하는 리전
-    )
-    bucket_name='trendist'
-    file_name = img_path
-    image_stream = BytesIO()
-    s3.download_fileobj(bucket_name, file_name, image_stream)
-    image_stream.seek(0)
+
+    presigned_url = img_path
+    print("presigned_url", presigned_url)
+    # 이미지 다운로드 (바이너리 형태)
+    response = requests.get(presigned_url)
+
+# 응답 확인 및 메모리에 저장
+    if response.status_code == 200:
+        image_stream = BytesIO(response.content)
 
     return image_stream
 
diff --git a/requirements.in b/requirements.in
index f4c73c7..d663ace 100644
--- a/requirements.in
+++ b/requirements.in
@@ -18,12 +18,12 @@ openai
 # OCR
 paddleocr
 paddlepaddle
+# gpu 사용할 경우 해당 패키지 포함 
+#paddlepaddle-gpu==2.5.0.post118
 
 # MySQL
 mysql-connector-python
 
-#boto3
-boto3
 # 새로운 패키지
 # package-name
 

From c65ef565ea76fa9bea70879dea2c6d0ceb1bbdae Mon Sep 17 00:00:00 2001
From: urusekai <uru.sekai@gmail.com>
Date: Fri, 30 May 2025 12:49:38 +0900
Subject: [PATCH 10/28] =?UTF-8?q?=ED=81=AC=EB=A1=A4=EB=9F=AC=EC=B6=94?=
 =?UTF-8?q?=EA=B0=80=20=EB=B0=8F=20db.py,=20ext.py,=20sum=5Ftranslate.py?=
 =?UTF-8?q?=20=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crawler/bbc_crawler.py         | 128 ++++++++++++++++++++++
 crawler/idealist_crawler.py    | 146 +++++++++++++++++++++++++
 crawler/keyword_extractor.py   |  84 +++++++++++++++
 crawler/main_crawler.py        |  23 ++++
 crawler/save_to_db.py          |  84 +++++++++++++++
 crawler/unv_crawler.py         | 112 +++++++++++++++++++
 crawler/v1365_crawler.py       | 173 ++++++++++++++++++++++++++++++
 crawler/wevity_crawler.py      | 190 +++++++++++++++++++++++++++++++++
 custom_keyword/ext.py          |  12 ++-
 server/db.py                   |  25 ++++-
 summarization/sum_translate.py |  15 ++-
 11 files changed, 974 insertions(+), 18 deletions(-)
 create mode 100644 crawler/bbc_crawler.py
 create mode 100644 crawler/idealist_crawler.py
 create mode 100644 crawler/keyword_extractor.py
 create mode 100644 crawler/main_crawler.py
 create mode 100644 crawler/save_to_db.py
 create mode 100644 crawler/unv_crawler.py
 create mode 100644 crawler/v1365_crawler.py
 create mode 100644 crawler/wevity_crawler.py

diff --git a/crawler/bbc_crawler.py b/crawler/bbc_crawler.py
new file mode 100644
index 0000000..f254e29
--- /dev/null
+++ b/crawler/bbc_crawler.py
@@ -0,0 +1,128 @@
+import requests
+from crawler.keyword_extractor import extract_keyword
+from summarization.sum_translate import translate_en_to_ko
+from crawler.save_to_db import save_issues
+from bs4 import BeautifulSoup
+from datetime import datetime
+from server.db import run_query
+
+BASE_URL = 'https://web-cdn.api.bbci.co.uk/xd/content-collection/'
+COLLECTIONS = {
+    'natural-wonders' : '9f0b9075-b620-4859-abdc-ed042dd9ee66',
+    'weather-science' : '696fca43-ec53-418d-a42c-067cb0449ba9',
+    'climate-solutions' : '5fa7bbe8-5ea3-4bc6-ac7e-546d0dc4a16b',
+}
+HEADERS = {
+    'User-Agent': 'Mozilla/5.0'
+}
+SIZE = 9
+
+def get_last_issue_date():
+    sql = """
+        SELECT MAX(issue_date)
+        FROM issues;
+    """
+    result = run_query(sql)
+
+    if result and result[0][0]:
+        dt = result[0][0]
+        latest_issue_date = dt.strftime("%Y-%m-%d %H:%M:%S.%f")
+        return latest_issue_date
+    else:
+        return None
+
+def is_end(date, end_time):
+    date_dt = datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f")
+    end_time_dt = datetime.strptime(end_time, "%Y-%m-%d %H:%M:%S.%f")
+    return date_dt <= end_time_dt
+
+def get_datetime(time):
+    dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%fZ")
+    return dt.strftime("%Y-%m-%d %H:%M:%S.%f")
+
+def get_content(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, "html.parser")
+    content_divs = soup.find_all('div', attrs={'data-component': 'text-block'})
+    contents = [div.get_text(strip=True) for div in content_divs]
+    full_content = '\n'.join(contents) if contents else "No Content"
+
+    return full_content
+
+def get_articles(page, collection_id, end_time):
+    params = {
+        'page': page,
+        'size': SIZE,
+    }
+
+    response = requests.get(BASE_URL + collection_id, params=params, headers=HEADERS)
+
+    if not response:
+        return []
+
+    datas = response.json().get('data')
+    articles = []
+
+    for data in datas:
+        date = get_datetime(data['firstPublishedAt'])
+
+        if end_time:
+            if is_end(date, end_time):
+                break
+
+        title = translate_en_to_ko(data['title'])
+        keyword = extract_keyword(data['summary'])
+        summary = translate_en_to_ko(data['summary'])
+        url = "https://www.bbc.com" + data['path']
+        image = data['indexImage']['model']['blocks']['src'] or None
+
+        articles.append(
+            {
+                'content': summary,
+                'image_url': image,
+                'issue_date': date,
+                'keyword': keyword,
+                'site_url': url,
+                'title': title,
+            }
+        )
+        print(f"[BBC] 크롤링 완료 : {title}")  
+
+    return articles
+
+def crawl():
+    print("[BBC] 크롤링 시작")
+    results = []
+    last_issue_date = get_last_issue_date()
+
+    if last_issue_date:
+        print(f"[BBC] DB의 마지막 이슈 이후 데이터만 크롤링 시작 (DATE : {last_issue_date})")
+    else:
+        print(f"[BBC] DB에 이슈 없음, 모든 데이터 크롤링 시작")
+
+    for category, collection_id in COLLECTIONS.items():
+        # print(f"[BBC] 카테고리 {category} :")
+        page = 0
+        
+        while True:
+            articles = get_articles(page, collection_id, last_issue_date)
+
+            if not articles:
+                break
+
+            results.extend(articles)
+            page += 1
+
+    if results:
+        print(f"[BBC] 크롤링 완료 : {len(results)}개의 이슈를 크롤링했습니다.")
+        save_issues(results)
+    else:
+        print("[BBC] 크롤링 완료 : 새로운 이슈가 없습니다.")
+    
+    
+
+def main():
+    crawl()
+
+if __name__ == '__main__':
+    main()
diff --git a/crawler/idealist_crawler.py b/crawler/idealist_crawler.py
new file mode 100644
index 0000000..7313333
--- /dev/null
+++ b/crawler/idealist_crawler.py
@@ -0,0 +1,146 @@
+import requests
+import json
+from datetime import datetime, timedelta, timezone
+from crawler.keyword_extractor import extract_keyword
+from crawler.save_to_db import save_activities
+from server.db import run_query
+
+ENDPOINT = "https://nsv3auess7-dsn.algolia.net/1/indexes/*/queries"
+HEADERS = {
+    "Content-Type": "application/json",
+    "x-algolia-agent": "Algolia for JavaScript (5.20.0); Search (5.20.0); Browser",
+    "x-algolia-api-key": "c2730ea10ab82787f2f3cc961e8c1e06",
+    "x-algolia-application-id": "NSV3AUESS7"
+}
+DEFAULT_IMAGE_URL = "https://www.idealist.org/assets/417d88fd628db1c1ac861f3ea8db58c1a159d52a/images/icons/action-opps/action-opps-volunteermatch.svg"
+
+def get_last_timestamp():
+    sql = """
+        SELECT start_date
+        FROM activities
+        WHERE activity_site = 'IDEALIST'
+        ORDER BY start_date DESC
+        LIMIT 1;
+    """
+    last_timestamp = run_query(sql)
+
+    if last_timestamp:
+        dt = last_timestamp[0][0].replace(tzinfo=timezone.utc)
+        return int(dt.timestamp())
+    else:
+        return 0
+        
+def build_payload(page, type='volunteer', timestamp=0):
+    if type == 'volunteer':
+        filters = f"actionType:'VOLOP' AND published > {timestamp}"
+        index_name = "idealist7-production-action-opps"
+    else:
+        filters = f"type:'INTERNSHIP' AND published > {timestamp}"
+        index_name = "idealist7-production"
+
+    return {
+        "requests": [
+            {
+                "indexName": index_name,
+                "facets": ["*"],
+                "hitsPerPage": 100,
+                "attributesToSnippet": ["description:20"],
+                "attributesToRetrieve": ["*"],
+                "filters": filters,
+                "removeStopWords": True,
+                "ignorePlurals": True,
+                "advancedSyntax": True,
+                "queryLanguages": ["en"],
+                "page": page,
+                "query": "",
+                "getRankingInfo": True,
+                "clickAnalytics": True,
+                "analytics": True
+            }
+        ]
+    }
+
+def get_url(item):
+    url = item.get("url")
+    if isinstance(url, str):
+        return url
+    elif isinstance(url, dict):
+        return "https://www.idealist.org" + next(iter(url.values()), "")
+    return ""
+
+def get_image(item):
+    img = item.get("imageUrl") or DEFAULT_IMAGE_URL
+    return img
+
+def get_published(item):
+    timestamp = item.get("published")
+    return datetime.fromtimestamp(timestamp, tz=timezone.utc).strftime('%Y-%m-%d %H:%M:%S.%f')
+
+def get_activities(page, timestamp, type):
+    payload = build_payload(page, type, timestamp)
+    response = requests.post(ENDPOINT, headers=HEADERS, json=payload)
+    
+    try:
+        data = response.json()["results"][0]["hits"]
+    except Exception as e:
+        print(f"[!] JSON 파싱 에러: {e}")
+        return None
+
+    result = []
+
+    if data:
+        for item in data:
+            activity_type = "VOLUNTEER" if type=='volunteer' else 'INTERNSHIP'
+            activity_content = item.get("description")
+            activity_name = item.get("name")
+            activity_image_url = get_image(item)
+            activity_url = get_url(item)
+            start_date = get_published(item)
+            end_date = None
+            keyword = extract_keyword(activity_content)
+
+            result.append(
+                {
+                    "activity_site": "IDEALIST",
+                    "activity_type": activity_type,
+                    "activity_content": activity_content,
+                    "end_date": end_date,
+                    "activity_image_url": activity_image_url,
+                    "keyword": keyword,
+                    "activity_name": activity_name,
+                    "site_url": activity_url,
+                    "start_date": start_date
+                }
+            )
+            print(f"[IDEALIST] 크롤링 완료 : {item.get("name", '')}")
+        return result
+    else:
+        return None
+
+def crawl():
+    print("[IDEALIST] 크롤링 시작")
+    crawled_activities = []
+    last_timestamp = get_last_timestamp()
+
+    if last_timestamp > 0:
+        print(f"[IDEALIST] DB의 마지막 활동 이후 데이터만 크롤링 시작 (TIMESTAMP: {last_timestamp})")
+    else:
+        print(f"[IDEALIST] DB에 활동 없음, 모든 데이터 크롤링 시작")
+
+    for type in ['volunteer', 'internship']:
+        page = 0
+        while True:
+            activities = get_activities(page, last_timestamp, type)
+            if not activities:
+                break
+            crawled_activities.extend(activities)
+            page += 1
+
+    if crawled_activities:
+        print(f"[IDEALIST] 크롤링 완료 : {len(crawled_activities)}개의 활동을 크롤링했습니다.")
+        save_activities(crawled_activities)
+    else:
+        print("[IDEALIST] 크롤링 완료 : 새로운 활동이 없습니다.") 
+
+if __name__ == "__main__":
+    crawl()
diff --git a/crawler/keyword_extractor.py b/crawler/keyword_extractor.py
new file mode 100644
index 0000000..8d817a1
--- /dev/null
+++ b/crawler/keyword_extractor.py
@@ -0,0 +1,84 @@
+import requests
+import os
+from dotenv import load_dotenv
+
+# .env 파일에서 환경변수 로드
+load_dotenv()
+
+# 키워드 후보
+KEYWORDS = ['Economy','Environment','PeopleAndSociety','Technology']
+MODEL = 'gemini-2.0-flash-lite'
+
+def extract_keyword(text: str) -> str:
+    """
+    봉사활동 내용을 입력받아 적절한 키워드를 반환합니다.
+
+    Parameters:
+        text (str): 봉사활동 내용
+
+    Returns:
+        str: 봉사활동 내용에 맞는 키워드
+    """
+
+    # Gemini API 키 가져오기
+    api_key = os.getenv('GEMINI_API_KEY')
+    if not api_key:
+        raise ValueError("GEMINI_API_KEY 환경변수가 설정되지 않았습니다.")
+
+    # API 엔드포인트 URL
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/{MODEL}:generateContent?key={api_key}"
+
+    # 프롬프트 작성
+    prompt = f"""
+Read the following volunteer activity description and choose the **most appropriate keyword** from the provided list.
+
+Only output **one keyword**, exactly as it appears in the list. Do not add any extra words or punctuation.
+
+Volunteer Description:
+{text}
+
+Keyword List:
+{', '.join(KEYWORDS)}
+"""
+
+    # API 요청 데이터 준비
+    payload = {
+        "contents": [{
+            "parts": [{
+                "text": prompt
+            }]
+        }]
+    }
+
+    # API 호출
+    headers = {'Content-Type': 'application/json'}
+    try:
+        response = requests.post(url, headers=headers, json=payload)
+        response.raise_for_status()  # HTTP 에러 체크
+        
+        # 응답 파싱
+        result = response.json()
+        if 'candidates' in result and len(result['candidates']) > 0:
+            generated_text = result['candidates'][0]['content']['parts'][0]['text']
+            # 응답에서 키워드만 추출 (앞뒤 공백 제거)
+            keyword = generated_text.strip()
+            
+            # 추출된 키워드가 후보 목록에 있는지 확인
+            if keyword in KEYWORDS:
+                return keyword
+            else:
+                return KEYWORDS[0]  # 기본값으로 첫 번째 키워드 반환
+                
+    except Exception as e:
+        print(f"API 호출 중 오류 발생: {e}")
+        return KEYWORDS[0]  # 오류 발생 시 기본값으로 첫 번째 키워드 반환
+
+    return KEYWORDS[0]  # 기본값으로 첫 번째 키워드 반환
+
+if __name__ == "__main__":
+    # 테스트용 예시
+    text = """
+    'Are you passionate about creating a positive change in society? Our CBS-featured non-profit wants you to join us in making a difference. About Us: Bright Mind is an award-winning non-profit organization recognized for our innovative initiatives such as Wellness Week and Street Care. Our outreach has reached up to 60 million people and has been featured on CBS, Politico, ABC, and Newsweek. We are looking for a passionate and versatile volunteer to join our team. If you have a desire to make a positive impact in the lives of those experiencing homelessness, we would love to hear from you! Position Overview ● Bright Mind is seeking dedicated and compassionate individuals to join our Street Care team as Homelessness Volunteers. ● In this role, you will have the opportunity to make a tangible difference in the lives of those experiencing homelessness. ● You will work closely with our Community Outreach team to provide support, resources, and advocacy for homeless individuals and families. ● We have decades of experience providing aid to homeless and highly at risk people, and our program always places safety first. ● We have a variety of openings, whether you’re interested in going out on the street or looking to help in other ways. Key Responsibilities ● Direct Support: ○ Engage with homeless individuals and families to assess their needs and provide appropriate support. ○ Distribute essential items such as food, clothing, hygiene products, and blankets. ● Resource Connection: ○ Connect individuals with local services, including housing, medical care, job training, and mental health support. ○ Provide information about available resources and help individuals navigate the social services system. ● Advocacy and Education: ○ Participate in community education programs to inform the public about homelessness issues and how they can help. ○ Work with local businesses and organizations to secure support (notably in-kind, such as food and clothing) and collaborate on our homeless initiatives. ● Event Coordination: ○ Assist in organizing and executing events such as donation drives, community meals, and health fairs. ○ Support the planning and logistics of outreach activities and special programs. ● Data Collection and Reporting: ○ Maintain accurate records of interactions and services provided to homeless individuals. ○ Assist with data collection and reporting to help track the impact of Bright Mind’s homelessness programs. Qualifications ● Skills and Competencies: ○ Strong interpersonal and communication skills. ○ Empathy, patience, and a non-judgmental attitude towards individuals experiencing homelessness. ○ Ability to work independently and as part of a team. ○ Flexibility and adaptability in a dynamic work environment. ○ Basic knowledge of social services and resources available for homeless individuals (preferred but not required). ● Experience: ○ Previous volunteer experience, especially in community outreach or working with vulnerable populations, is preferred but not required. ○ Experience in event coordination, advocacy, or data collection is a plus. ● Education: ○ Relevant coursework or training in social work, psychology, or a related field is welcomed. Benefits ● Opportunity to make a meaningful impact in the community. ● Hands-on experience in community outreach and social services. ● Professional development and training opportunities. ● Flexible volunteer schedules to accommodate your availability. Note: This is an unpaid position. Contact Us Please reach out to us at info@brightmindenrichment.org. To apply for this position, email your resume to hr@brightmindenrichment.org. Learn more about our initiatives at Street Care (https://streetcare.us/) and Bright Mind (https://brightmindenrichment.org/). Bright Mind is a federally-recognized 501(c)(3) wellness education non-profit and recipient of awards and certifications in recognition of our achievements.'
+    """
+    keyword = extract_keyword(text)
+    print(f"선택된 키워드: {keyword}")
\ No newline at end of file
diff --git a/crawler/main_crawler.py b/crawler/main_crawler.py
new file mode 100644
index 0000000..8fc4910
--- /dev/null
+++ b/crawler/main_crawler.py
@@ -0,0 +1,23 @@
+from crawler.bbc_crawler import crawl as bbc_crawler
+from crawler.wevity_crawler import crawl as wevity_crawler
+from crawler.idealist_crawler import crawl as idealist_crawler
+from crawler.unv_crawler import crawl as unv_crawler
+from crawler.v1365_crawler import crawl as v1365_crawler 
+
+if __name__ == "__main__":
+    # BBC News
+    # bbc_crawler()
+
+    # WEVITY
+    wevity_crawler()
+
+    # 1365
+    v1365_crawler()
+    
+    # IDEALIST
+    # idealist_crawler()
+
+    # UNVOLUNTEERS
+    # unv_crawler()
+
+    
\ No newline at end of file
diff --git a/crawler/save_to_db.py b/crawler/save_to_db.py
new file mode 100644
index 0000000..94ea4cc
--- /dev/null
+++ b/crawler/save_to_db.py
@@ -0,0 +1,84 @@
+from server.db import run_query
+import uuid
+
+def save_issues(issues):
+    if not issues:
+        print("[DB] 저장할 이슈가 없습니다.")
+        return
+    
+    print("[DB] 크롤링한 이슈 DB 저장 중...")
+
+    sql = """
+        INSERT IGNORE INTO issues (
+            issue_id,
+            created_at,
+            content,
+            image_url,
+            issue_date,
+            keyword,
+            site_url,
+            title
+        ) VALUES (%s, UTC_TIMESTAMP(6), %s, %s, %s, %s, %s, %s)
+    """
+
+    values = [
+        (
+            uuid.uuid4().bytes,
+            issue['content'], 
+            issue['image_url'],
+            issue['issue_date'],
+            issue['keyword'],
+            issue['site_url'],
+            issue['title']
+        )
+        for issue in issues
+    ]
+
+    if values:
+        saved_rows = run_query(sql, values)
+        print(f"[DB] {saved_rows}개의 이슈가 저장되었습니다.")
+
+def save_activities(activities):
+    if not activities:
+        print("[DB] 저장할 활동이 없습니다.")
+        return
+
+    print("[DB] 크롤링한 활동 DB 저장 중...")
+
+    sql = """
+        INSERT IGNORE INTO activities (
+            created_at,
+            end_date,
+            start_date,
+            activity_id,
+            activity_image_url,
+            activity_name,
+            site_url,
+            activity_content,
+            activity_site,
+            activity_type,
+            keyword
+        ) VALUES (UTC_TIMESTAMP(6), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
+    """
+
+    values = [
+        (
+            activity['end_date'],
+            activity['start_date'],
+            uuid.uuid4().bytes,
+            activity['activity_image_url'],
+            activity['activity_name'],
+            activity['site_url'],
+            activity['activity_content'],
+            activity['activity_site'],
+            activity['activity_type'],
+            activity['keyword']
+        )
+        for activity in activities
+    ]
+
+    if values:
+        saved_rows = run_query(sql, values)
+        print(f"[DB] {saved_rows}개의 활동이 저장되었습니다.")
+
+
diff --git a/crawler/unv_crawler.py b/crawler/unv_crawler.py
new file mode 100644
index 0000000..beea769
--- /dev/null
+++ b/crawler/unv_crawler.py
@@ -0,0 +1,112 @@
+import requests
+from datetime import datetime, timezone
+from crawler.keyword_extractor import extract_keyword
+from crawler.save_to_db import save_activities
+from server.db import run_query
+
+PAGE_ENDPOINT = "https://app.unv.org/api/doa/doa/SearchDoaAsyncByAzureCognitive"
+DETAIL_ENDPOINT = "https://app.unv.org/api/doa/doa/"
+URL_BASE = "https://app.unv.org/opportunities/"
+HEADERS = {
+    "User-Agent": "Mozilla/5.0"
+}
+DEFAULT_IMAGE_URL = "https://www.unv.org/sites/default/files/unvol.png"
+
+def get_latest_activity_id():
+    query = """
+        SELECT CAST(SUBSTRING_INDEX(site_url, '/', -1) AS UNSIGNED) as activity_id
+        FROM activities 
+        WHERE activity_site = "UNVOLUNTEERS"
+        ORDER BY activity_id DESC
+        LIMIT 1
+    """
+    result = run_query(query)
+            
+    return int(result[0][0]) if result else 0
+
+def get_total_count():
+    payload = {
+        "take": 1,
+        "skip": 0
+    }
+    response = requests.post(PAGE_ENDPOINT, headers=HEADERS, json=payload)
+    data = response.json()
+    total_count = data["value"]["total"]
+
+    return total_count
+
+def iso_to_utc(date_str):
+    if not date_str:
+        return None
+    
+    return datetime.fromisoformat(date_str)
+
+def fetch_activity_id_list():
+    latest_activity_id = get_latest_activity_id()
+    total_count = get_total_count()
+    
+    # API 요청
+    response = requests.post(
+        PAGE_ENDPOINT, 
+        headers=HEADERS, 
+        json={"skip": 0, "take": total_count}
+    )
+    activities = response.json()["value"]["result"]
+    
+    # 마지막 활동이 있으면 그 이후의 데이터만, 없으면 전체 데이터를 가져옴
+    if latest_activity_id > 0:
+        print(f"[UNV] DB의 마지막 활동 이후 데이터만 크롤링 시작 (ID : {latest_activity_id})")
+        return [activity["id"] for activity in activities if activity["id"] > latest_activity_id]
+    else:
+        print(f"[UNV] DB에 활동 없음, 모든 데이터 크롤링 시작")
+        return [activity["id"] for activity in activities]
+
+def fetch_activity_detail(activity_id_list):
+    activities = []
+
+    for activity_id in activity_id_list:
+        response = requests.get(DETAIL_ENDPOINT + str(activity_id), headers=HEADERS)
+        data = response.json()['value']
+
+        activity_content = (
+            f"[Mission and objectives] : {data.get('organizationMission', '')}"
+            f"[Context] : {data.get('context', '')}"
+            f"[Task description] : {data.get('taskDescription', '')}"
+            f"[Required experience]: {data.get('requiredSkillExperience', '')}"
+        )     
+        activity_name = data.get("name")
+        start_date = iso_to_utc(data.get("publishDate"))
+        end_date = data.get("sourcingEndDate")
+        site_url = URL_BASE + str(activity_id)
+        keyword = extract_keyword(data.get('organizationMission') or activity_name)
+
+        activities.append(
+            {
+                "activity_site": "UNVOLUNTEERS",
+                "activity_type": "VOLUNTEER",
+                "activity_content": activity_content,
+                "end_date": end_date,
+                "site_url": site_url,
+                "activity_image_url": DEFAULT_IMAGE_URL,
+                "keyword": keyword,
+                "activity_name": activity_name,
+                "start_date": start_date
+        })
+
+        print(f"[UNV] 활동 크롤링 완료 : {activity_name}")
+
+    return activities
+
+def crawl():
+    print("[UNV] 크롤링 시작")
+    activity_id_list = fetch_activity_id_list()
+
+    if activity_id_list:
+        activities = fetch_activity_detail(activity_id_list)
+        print(f"[UNV] 크롤링 완료 : {len(activity_id_list)}개의 활동을 크롤링했습니다.")
+        save_activities(activities)
+    else:
+        print("[UNV] 크롤링 완료 : 새로운 활동이 없습니다.")
+    
+if __name__ == "__main__":
+    crawl()
\ No newline at end of file
diff --git a/crawler/v1365_crawler.py b/crawler/v1365_crawler.py
new file mode 100644
index 0000000..780fd56
--- /dev/null
+++ b/crawler/v1365_crawler.py
@@ -0,0 +1,173 @@
+import re
+import httpx
+import asyncio
+import requests
+from bs4 import BeautifulSoup
+from server.db import run_query
+from crawler.save_to_db import save_activities
+from crawler.keyword_extractor import extract_keyword
+from itertools import chain
+
+LIST_ENDPOINT = "https://www.1365.go.kr/vols/1572247904127/partcptn/timeCptn.do"
+DETAIL_ENDPOINT = "https://www.1365.go.kr/vols/1572247904127/partcptn/timeCptn.do?type=show&progrmRegistNo="
+HEADERS = {
+    "User-Agent": "Mozilla/5.0"
+}
+DEFAULT_IMAGE_URL = "https://play-lh.googleusercontent.com/9Kheg_iekobkZlP9XzKtwv_j_YL88oVzHCtHe4_hIL3JcQabCL3FFEw4vKzL1XQc8GE"
+BATCH_SIZE = 5 # 한번에 BATCH_SIZE개의 HTTP 요청을 보냄
+MAX_CRAWL_PAGE = 10 # 크롤링할 페이지 수
+
+async def get_soup(url, params=None):
+    """URL에 GET 요청을 보내고 BeautifulSoup 객체를 반환"""
+    async with httpx.AsyncClient() as client:
+        response = await client.get(url, params=params, headers=HEADERS)
+        return BeautifulSoup(response.text, "html.parser")
+
+def get_exist_ids():
+    """DB에서 이미 존재하는 모든 활동 ID들을 리스트로 반환"""
+    sql = """
+        SELECT CAST(
+            SUBSTRING_INDEX(site_url, 'progrmRegistNo=', -1) AS UNSIGNED) AS id
+        FROM activities
+        WHERE activity_site = "KRVOLUNTEERS"
+        ORDER BY id DESC
+    """
+    result = run_query(sql)
+    return [int(row[0]) for row in result] if result else []
+
+def get_last_page():
+    """1365사이트의 마지막 페이지 번호를 반환"""
+    params = {
+                "requstSe": "N",
+                "adultPosblAt": "Y",
+                "yngbgsPosblAt": "Y",
+    }
+    response = requests.get(LIST_ENDPOINT, params=params, headers=HEADERS)
+    soup = BeautifulSoup(response.text, "html.parser")
+
+    btn_last = soup.find('a', class_='btn_last')
+    last_page = btn_last.get('href').split('=')[-1]
+
+    return int(last_page)
+
+def extract_name(soup):
+    """상세 페이지에서 활동 이름을 추출"""
+    name = soup.select_one('h3.tit_board_view input').get('value')
+    return name if name else None
+
+def extract_dates(soup):
+    """상세 페이지에서 봉사기간 시작일, 종료일을 추출"""
+    period = soup.find('dt', string='봉사기간')
+    if period:
+        period = period.find_next('dd').text
+        start_date, end_date = period.split(' ~ ')
+        return start_date.replace('.', '-'), end_date.replace('.', '-')
+    return None, None
+
+def extract_content(soup):
+    """상세 페이지에서 활동 내용을 추출"""
+    pre_tag = soup.find('pre')
+    if pre_tag:
+        return re.sub(r'[\r\n]+', ' ', pre_tag.get_text(separator="\n", strip=True))
+    return ""
+
+async def extract_ids(page):
+    """해당 페이지의 활동 ID들을 리스트 형태로 반환"""
+    params = {
+        "cPage": page,
+        "requstSe": "N",
+        "adultPosblAt": "Y",
+        "yngbgsPosblAt": "Y",
+    }
+    soup = await get_soup(LIST_ENDPOINT, params=params)
+    
+    id_list = []
+    ul = soup.select_one("ul.list_wrap.wrap2")
+    if ul:
+        a_tags = ul.find_all("a", href=True)
+        for a in a_tags:
+            href = a['href']
+            match = re.search(r'show\((\d+)\)', href)
+            if match:
+                id = int(match.group(1))
+                id_list.append(id)
+
+    return id_list
+
+async def fetch_detail(id):
+    """해당 ID에 해당하는 활동의 상세정보를 추출"""
+    url = f"{DETAIL_ENDPOINT}{id}"
+    soup = await get_soup(url)
+    if not soup:
+        return None
+
+    start_date, end_date = extract_dates(soup)
+    activity_content = extract_content(soup)
+    keyword = extract_keyword(activity_content)
+    activity_name = extract_name(soup)
+
+    return {
+        "activity_site": "KRVOLUNTEERS",
+        "activity_type": "VOLUNTEER",
+        "activity_content": activity_content,
+        "end_date": end_date,
+        "activity_image_url": DEFAULT_IMAGE_URL,
+        "keyword": keyword,
+        "activity_name": activity_name,
+        "site_url": url,
+        "start_date": start_date
+    }
+
+async def crawl_async():
+    """비동기적으로 1365 자원봉사 사이트에서 활동 정보를 수집"""
+    last_page = get_last_page()
+    start_page = max(last_page - MAX_CRAWL_PAGE, 1)  # 시작할 페이지 계산
+    exist_ids = get_exist_ids()
+    id_list = []
+    activities = []
+    print(f"[1365] 최근 {MAX_CRAWL_PAGE} 개의 페이지 ({start_page} ~ {last_page}) 에서 ID 수집중... ")
+    
+    # ID 수집 (start_page부터 last_page까지 BATCH_SIZE씩 증가)
+    for start in range(start_page, last_page + 1, BATCH_SIZE):
+        tasks = []
+        end = min(start + BATCH_SIZE, last_page + 1)
+        
+        for current_page in range(start, end):
+            tasks.append(extract_ids(current_page))
+            
+        result = await asyncio.gather(*tasks)
+        id_list.extend(chain.from_iterable(result))
+
+    # DB와 비교하여 새로운 ID만 남김
+    filtered_id_list = list(set(id_list) - set(exist_ids))
+    if not filtered_id_list:
+        return []
+    print(f"[1365] {len(filtered_id_list)} 개의 새로운 활동 ID 수집 완료")
+
+    # DB에 없는 새로운 활동의 상세정보 수집 (BATCH_SIZE 단위로)
+    for i in range(0, len(filtered_id_list), BATCH_SIZE):
+        batch = filtered_id_list[i:i + BATCH_SIZE]
+        detail_tasks = [fetch_detail(id) for id in batch]
+        try:
+            print(f"[1365] {len(filtered_id_list)} 개의 활동 중 {i+1} ~ {i+BATCH_SIZE} 의 상세정보 수집 중...")
+            results = await asyncio.gather(*detail_tasks)
+            # None이 아닌 결과만 추가
+            activities.extend([r for r in results if r is not None])
+        except Exception as e:
+            print(f"Error processing batch {i}: {e}")
+            continue
+
+    return activities
+
+def crawl():
+    """외부 호출용 크롤링 함수"""
+    print("[1365] 크롤링 시작")
+    activities = asyncio.run(crawl_async())
+    if activities:
+        print(f"[1365] 크롤링 완료 : {len(activities)}개의 활동을 크롤링했습니다.")
+        save_activities(activities)
+    else:
+        print("[1365] 크롤링 완료 : 새로운 활동이 없습니다.")
+
+if __name__ == '__main__':
+    crawl()
\ No newline at end of file
diff --git a/crawler/wevity_crawler.py b/crawler/wevity_crawler.py
new file mode 100644
index 0000000..c682a4d
--- /dev/null
+++ b/crawler/wevity_crawler.py
@@ -0,0 +1,190 @@
+import requests
+import time
+from bs4 import BeautifulSoup
+import re
+from datetime import datetime
+from urllib.parse import urlparse, parse_qs
+from server.db import run_query
+from crawler.save_to_db import save_activities
+from crawler.keyword_extractor import extract_keyword
+
+BASE_URL = "https://www.wevity.com"
+FILE_NAME = "data/wevity_data.json"
+HEADERS = {
+    "User-Agent": "Mozilla/5.0"
+}
+MAX_CRAWL_PAGE = 10
+
+def get_soup(url):
+    """웹 페이지를 요청하고 BeautifulSoup 객체 반환"""
+    response = requests.get(url, headers=HEADERS)
+    response.raise_for_status()
+    return BeautifulSoup(response.text, "html.parser")
+
+def is_special_activity(a_tag):
+    """SPECIAL 게시물인지 확인"""
+    return bool(a_tag.select_one("span.stat.spec"))
+
+def get_latest_activity_id():
+    """DB에서 가장 마지막 활동을 조회"""
+    sql = """
+        SELECT CAST(
+            SUBSTRING_INDEX(
+                SUBSTRING_INDEX(site_url, 'ix=', -1),
+                '&', 1
+            ) AS UNSIGNED
+        ) as activity_id
+        FROM activities 
+        WHERE activity_site = "WEVITY"
+        ORDER BY activity_id DESC 
+        LIMIT 1;
+    """
+    result = run_query(sql)
+    
+    if result and result[0][0]:
+        return int(result[0][0])
+    else:
+        return 0
+
+def get_image_url(soup):
+    """썸네일 이미지 URL 추출"""
+    img_tag = soup.select_one("div.thumb img")
+    if not img_tag or not img_tag.has_attr("src"):
+        return ""
+    img_src = img_tag["src"]
+    return BASE_URL + img_src if img_src.startswith("/") else img_src
+
+def get_date_range(soup):
+    """접수기간 추출"""
+    for li in soup.select("li"):
+        if "접수기간" in li.get_text():
+            match = re.search(r'(\d{4}-\d{2}-\d{2})\s*~\s*(\d{4}-\d{2}-\d{2})', li.get_text())
+            if match:
+                try:
+                    start_date = datetime.strptime(match.group(1), "%Y-%m-%d").replace(hour=0, minute=0, second=0, microsecond=0).isoformat()
+                    end_date = datetime.strptime(match.group(2), "%Y-%m-%d").replace(hour=23, minute=59, second=59, microsecond=999999).isoformat()
+                    return start_date, end_date
+                except ValueError:
+                    pass
+    return None, None
+
+def get_activity_type(soup):
+    """활동 유형(카테고리) 결정"""
+    li_tag = soup.select_one("ul.cd-info-list li")
+    
+    if li_tag:
+        span_tag = li_tag.find("span", class_="tit")
+        span_tag.decompose()
+        category_text = li_tag.get_text(strip=True)
+
+        if category_text == "대외활동/서포터즈":
+            return "SUPPORTERS"
+        elif category_text == "봉사활동":
+            return "VOLUNTEER"
+        else:
+            return "CONTEST"
+    
+def get_activity_urls(list_url, last_activity_id):
+    """활동 목록 페이지에서 새로운 활동 URL들을 수집"""
+    activity_urls = []
+    soup = get_soup(list_url)
+    activity_items = soup.select("ul.list li")
+
+    for item in activity_items:
+        # 진행 중인 게시물만 처리
+        if item.select_one("span.dday.end"):
+            continue
+            
+        link_tag = item.select_one("a")
+        if not link_tag:
+            continue
+
+        activity_url = BASE_URL + link_tag['href']
+        # URL에서 활동 ID 추출
+        parsed = urlparse(activity_url)
+        query_params = parse_qs(parsed.query)
+        current_activity_id = int(query_params.get('ix', ['0'])[0])
+        
+        # ID값이 ID의 마지막 활동 id보다 크면 추가
+        if current_activity_id > last_activity_id:
+            activity_urls.append(activity_url)
+        # 특별 게시물이 아닌 경우에 ix 값이 작거나 같으면 더 이상 새로운 게시물이 없으므로 종료
+        elif not is_special_activity(link_tag):
+            return activity_urls
+
+    return activity_urls
+
+def get_activity_detail(url):
+    """활동 상세 페이지에서 데이터 추출"""
+    try:
+        soup = get_soup(url)
+
+        activity_type = get_activity_type(soup)
+        activity_content = soup.select_one("#viewContents").get_text(strip=True) or None
+        activity_name = soup.select_one("h6.tit").get_text(strip=True) or None
+        start_date, end_date = get_date_range(soup)
+        activity_image_url = get_image_url(soup)
+        keyword = extract_keyword(activity_content)
+
+        return {
+            "activity_site": "WEVITY",
+            "activity_type": activity_type,
+            "activity_content": activity_content,
+            "end_date": end_date,
+            "activity_image_url": activity_image_url,
+            "keyword": keyword,
+            "activity_name": activity_name,
+            "site_url": url,
+            "start_date": start_date
+        }
+
+    except Exception as e:
+        print(f"[ERROR] {url} 에서 오류 발생: {e}")
+        return None
+
+def crawl():
+    """위비티 활동 크롤링 실행"""
+    print("[WEVITY] 크롤링 시작")
+    
+    last_activity_id = get_latest_activity_id()
+
+    if last_activity_id > 0:      
+        print(f"[WEVITY] DB의 마지막 활동 이후 데이터만 크롤링 시작 (ID : {last_activity_id})")      
+    else:
+        print(f"[WEVITY] DB에 활동 없음, 모든 데이터 크롤링 시작")
+
+    collected_urls = []
+    page = 1
+
+    print("[WEVITY] 페이지별 활동 링크 수집 중...")
+    while True and page <= MAX_CRAWL_PAGE:
+        paged_url = f"{BASE_URL}/?c=find&s=1&gp={str(page)}"
+        try:
+            new_urls = get_activity_urls(paged_url, last_activity_id)
+            if not new_urls:
+                break
+            collected_urls.extend(new_urls)
+            page += 1
+        except Exception as e:
+            print(f"[ERROR] 목록 페이지 {paged_url} 에서 오류 발생: {e}")
+            break
+
+    crawled_activities = []
+    if collected_urls:
+        print(f"[WEVITY] {len(collected_urls)}개의 활동 링크 수집 완료")
+        print("[WEVITY] 활동 상세내용 크롤링 중...")
+        for url in collected_urls:
+            activity_data = get_activity_detail(url)
+            time.sleep(1.1) # LLM API 요청 간 시간 간격을 두기 위해 1.1초 대기
+            if activity_data:
+                crawled_activities.append(activity_data)
+                print(f"[WEVITY] 활동 크롤링 완료 : {activity_data['activity_name']}")
+    
+    if crawled_activities:
+        print(f"[WEVITY] 크롤링 완료 : {len(crawled_activities)}개의 활동을 크롤링했습니다.")
+        save_activities(crawled_activities)
+    else:
+        print("[WEVITY] 크롤링 완료 : 새로운 활동이 없습니다.")
+
+if __name__ == "__main__":
+    crawl()
diff --git a/custom_keyword/ext.py b/custom_keyword/ext.py
index 5dedb70..d413803 100644
--- a/custom_keyword/ext.py
+++ b/custom_keyword/ext.py
@@ -1,11 +1,9 @@
 from transformers import BertTokenizer, BertModel
 from sklearn.metrics.pairwise import cosine_similarity
-import torch
-
 
 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 model = BertModel.from_pretrained('bert-base-uncased')
-domain_keywords = ["environment", "Society", "Economic", "technology"]
+domain_keywords = ["Economy", "Environment", "Technology", "People", "Society"]
 
 def get_embeddings(text: str):
     inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
@@ -22,7 +20,7 @@ def extract_keywords(question: str):
     Args:
         question (str): 키워드를 추출하고자 하는 문자열
     Returns:
-        str: 가장 유사도가 높은 키워드
+        str: 가장 유사도가 높은 키워드 (DB enum 형식)
     """
     sentence_embedding = get_embeddings(question)
     domain_embeddings = [get_embeddings(keyword) for keyword in domain_keywords]
@@ -30,4 +28,8 @@ def extract_keywords(question: str):
         (keyword, calculate_cosine_similarity(sentence_embedding, embedding)[0][0])
         for keyword, embedding in zip(domain_keywords, domain_embeddings)
     ]
-    return max(similarities, key=lambda x: x[1])[0]
+    extracted_keyword = max(similarities, key=lambda x: x[1])[0]
+    if extracted_keyword.lower() in ["people", "society"]:
+        return "PeopleAndSociety"
+    else:
+        return extracted_keyword
diff --git a/server/db.py b/server/db.py
index 8bb8d5b..eb8ff68 100644
--- a/server/db.py
+++ b/server/db.py
@@ -16,8 +16,23 @@
 def run_query(query: str, params=None):
     conn = pool.get_connection()
     cursor = conn.cursor()
-    cursor.execute(query, params)
-    results = cursor.fetchall()
-    cursor.close()
-    conn.close()  # 풀로 반환
-    return results
+    try:
+        affected_rows = 0
+
+        # 여러 레코드 삽입인 경우
+        if params and isinstance(params, list) and isinstance(params[0], tuple):
+            cursor.executemany(query, params)
+            affected_rows = cursor.rowcount
+        else:
+            cursor.execute(query, params)
+            affected_rows = cursor.rowcount
+
+        # SELECT 처리
+        if query.strip().lower().startswith("select"):
+            return cursor.fetchall()
+        else:
+            conn.commit()
+            return affected_rows
+    finally:
+        cursor.close()
+        conn.close()
\ No newline at end of file
diff --git a/summarization/sum_translate.py b/summarization/sum_translate.py
index 54efe88..db9092c 100644
--- a/summarization/sum_translate.py
+++ b/summarization/sum_translate.py
@@ -9,31 +9,30 @@
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 model = "gpt-4"
 
-def summarize_translate_en_to_ko(text: str) -> str:
+def translate_en_to_ko(text: str) -> str:
     """
-    영어 텍스트를 한국어로 번역하고 요약.
+    영어 텍스트를 한국어로 번역.
 
     Args:
         text (str): 번역하고자 하는 원문(영어) 텍스트
     Returns: 
-        str: 요약된 한국어 번역 결과
+        str: 한국어 번역 결과
     """
     prompt = f"""
-    Translate and summarize the following English text **into Korean** in **one or two sentences only**.
-    Focus on capturing the key message, and write naturally in Korean.
+    Translate the following English text **into Korean**.
+    Maintain the original tone and context as accurately as possible.
 
-    Text:
     {text}
     """
 
     response = client.chat.completions.create(
         model=model,
         messages=[
-            {"role": "system", "content": "You are a professional translator and summarizer."},
+            {"role": "system", "content": "You are a professional translator."},
             {"role": "user", "content": prompt}
         ],
         temperature=0.3,
         max_tokens=600
     )
 
-    return response.choices[0].message.content.strip()
\ No newline at end of file
+    return response.choices[0].message.content.strip()

From cd8dfe39601afdd6b52754a86ae9da1d24a33aff Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Sat, 31 May 2025 11:15:34 +0900
Subject: [PATCH 11/28] =?UTF-8?q?paddleocr=20=EB=8C=80=EC=8B=A0=20gemini?=
 =?UTF-8?q?=EB=A1=9C=20=EB=B3=80=EA=B2=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ocr작업을 paddleocr대신 gemini로 변경하여 작업에 걸리는 시간을 줄였습니다
---
 ocr/__init__.py  |  27 +++------
 ocr/o.py         | 123 +++++++++++++++++++---------------------
 requirements.in  |   7 +--
 requirements.txt | 142 ++++++++---------------------------------------
 4 files changed, 91 insertions(+), 208 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index 6426c86..1cf6402 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -2,7 +2,7 @@
 from flasgger import Swagger, swag_from
 
 from server.logger import logger
-from .o import download_image, extract_text, compare_texts
+from .o import is_review_valid
 
 ocr_bp = Blueprint('ocr', __name__, url_prefix='/ocr')
 
@@ -63,28 +63,17 @@ def evaluate_image():
 
     data=request.get_json()
 
-    review_img_path=data.get("imageUrls")
-    award_img_path=data.get("awardImgUrl")
-    compare_text=data.get("title")
+    imageUrls=data.get("imageUrls")
+    awardImgUrl=data.get("awardImgUrl")
+    title=data.get("title")
 
-    print(review_img_path)
-    print(award_img_path)
-    print(compare_text)
     # OCR 실행
-    if review_img_path:
+    if imageUrls:
       # ocr결과의 기본값은 False
       ocr_result = "False"
-      for img_url in review_img_path:
-        image_stream = download_image(img_url)
-        extracted_text = extract_text(image_stream)
-        print(extracted_text)
-        ocr_result = compare_texts(extracted_text, compare_text)
-        if ocr_result == "True":
-           break
-    if award_img_path != None:
-      award_image_stream = download_image(award_img_path)
-      award_text = extract_text(award_image_stream)
-      award_ocr_result = compare_texts(award_text, compare_text)
+      ocr_result=is_review_valid(title, imageUrls)
+    if awardImgUrl != None:
+       award_ocr_result=is_review_valid(title,awardImgUrl)
     else:
        award_ocr_result = "False"
 
diff --git a/ocr/o.py b/ocr/o.py
index ee89ee3..4dc288f 100644
--- a/ocr/o.py
+++ b/ocr/o.py
@@ -1,89 +1,82 @@
 import torch
 import os
-import numpy as np
-import cv2
 import requests
+from google import genai
+from google.genai import types
 from dotenv import load_dotenv
 from paddleocr import PaddleOCR
 from openai import OpenAI
-from io import BytesIO
 
 # .env파일 로드
 load_dotenv()
 
-# 환경 변수에서 API 키 가져오기
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-model = "gpt-4"
-ocr = PaddleOCR(lang="korean")
+api_key = os.getenv("GEMINI_API_KEY")
+MODEL_NAME = "gemini-1.5-flash"
 
-def download_image(img_path):
+def is_review_valid(title: str, image_urls: list[str]) -> bool:
     """
-    s3에서 이미지 다운로드 후 바이트스트림에 저장
+    리뷰 제목과 이미지들을 기반으로 리뷰가 유효한지 판단합니다.
     
     Args:
-        img_path (str): s3상에 이미지 경로
+        title (str): 리뷰 제목
+        image_urls (list[str]): 이미지 URL 리스트 (최대 5개 권장)
+        
     Returns:
-        BytesIO: 이미지 데이터의 바이트스트림 객체
+        bool: 리뷰가 유효하면 True, 그렇지 않으면 False
     """
+    
+    image_parts = []
 
-    presigned_url = img_path
-    print("presigned_url", presigned_url)
-    # 이미지 다운로드 (바이너리 형태)
-    response = requests.get(presigned_url)
-
-# 응답 확인 및 메모리에 저장
-    if response.status_code == 200:
-        image_stream = BytesIO(response.content)
-
-    return image_stream
+    # 각 이미지 URL을 순회하며 이미지 바이트를 가져와 types.Part 객체로 변환
+    for url in image_urls[:5]: # 최대 5개 이미지만 처리
+        if url: # URL이 비어있지 않은지 확인
+            try:
+                response = requests.get(url, timeout=5) # 타임아웃 추가
+                response.raise_for_status() # HTTP 오류 (4xx, 5xx) 발생 시 예외 발생
+                
+                # MIME 타입 확인 (없으면 기본값 사용)
+                content_type = response.headers.get('Content-Type', 'image/jpeg')
+                
+                # 이미지 바이트를 types.Part 객체로 변환하고 리스트에 추가
+                image_part = types.Part.from_bytes(data=response.content, mime_type=content_type) 
+                image_parts.append(image_part)
+            except requests.exceptions.RequestException as e:
+                print(f"경고: 이미지를 가져오거나 처리하는 데 실패했습니다. URL: {url}, 오류: {e}")
+                continue
+            except Exception as e:
+                print(f"경고: 이미지 {url} 처리 중 예상치 못한 오류 발생: {e}")
+                continue
 
+    if not image_parts:
+        print("경고: 유효한 이미지를 찾거나 가져오지 못했습니다. False를 반환합니다.")
+        return False # 이미지가 없거나 모두 실패하면 유효하지 않다고 판단
 
-def extract_text(image_stream):
-    """
-    BytesIO 객체의 이미지를 대상으로 OCR 수행
-    
-    Args:
-        image_stream (BytesIO): 메모리에 저장된 이미지 데이터
-    Returns:
-        list: OCR 결과
+    prompt = f"""리뷰 제목: "{title}"
+    리뷰의 제목과, 이미지들에 포함된 텍스트를 하나씩 비교합니다. 
+    하나라도 맞는 경우 문자열 True를 모두 아닐 경우 False를 반환합니다.
     """
-    # 스트림을 numpy 배열로 변환
-    image_stream.seek(0)  # 읽기 위치 초기화
-    file_bytes = np.frombuffer(image_stream.getvalue(), dtype=np.uint8)
-    img = cv2.imdecode(file_bytes, cv2.IMREAD_COLOR)  # OpenCV를 사용하여 이미지 디코딩
-    # OCR 수행
-    ocr = PaddleOCR(lang='korean')  # 언어 설정 가능
-    results = ocr.ocr(img, cls=True)
-    return " ".join(text for result in results for _, (text, _) in result)
-
-def compare_texts(text1, text2):
-    """ 
-    ocr로 추출한 텍스트와 활동 제목 간의 관계 분석
 
-    Args:
-        text1 (str): 이미지에서 추출한 문자열
-        text2 (str): 활동 제목에서의 문자열
-    Returns:
-         str: 관련이 있다 판단 시 True / 없다 판단 시 False를 반환
-    """
-    prompt = f"""
-    Analyze the relationship between the following two texts. Determine whether they are conceptually or contextually related.
-    If they are related, return True; otherwise, return False without additional explanation
+    # 텍스트 프롬프트와 모든 이미지 파트를 contents 리스트로 결합
+    contents = [prompt] + image_parts
 
-    Text 1:
-    {text1}
-    Text 2:
-    {text2}
-    """
+    client = genai.Client(api_key=api_key)
+    try:
+        response = client.models.generate_content(
+            model=MODEL_NAME,
+            contents=contents,
+            config={
+                # 응답형식을 True, False로 제한
+                'response_mime_type': 'text/x.enum',
+                'response_schema': {
+                    "type": "STRING",
+                    "enum": ["True", "False"]
+                }
+            }
+        )
 
-    response = client.chat.completions.create(
-        model=model,
-        messages=[
-            {"role": "system", "content": "You are an objective analyst. Compare the following two texts and determine their relationship strictly based on content."},
-            {"role": "user", "content": prompt}
-        ],
-        temperature=0,
-        max_tokens=600
-    )
+        print("responsetext: ",response.text)
+        return response.text.strip() == "True"
 
-    return response.choices[0].message.content.strip()
\ No newline at end of file
+    except Exception as e:
+        print(f"API 호출 실패: {e}")
+        return False # API 호출 실패 시 유효하지 않다고 판단
\ No newline at end of file
diff --git a/requirements.in b/requirements.in
index d663ace..80f725d 100644
--- a/requirements.in
+++ b/requirements.in
@@ -14,12 +14,7 @@ scikit-learn
 
 # Generative AI
 openai
-
-# OCR
-paddleocr
-paddlepaddle
-# gpu 사용할 경우 해당 패키지 포함 
-#paddlepaddle-gpu==2.5.0.post118
+google-genai>=0.1.0
 
 # MySQL
 mysql-connector-python
diff --git a/requirements.txt b/requirements.txt
index ee748ff..b02b51a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,34 +4,21 @@
 #
 #    pip-compile requirements.in
 #
-albucore==0.0.24
-    # via
-    #   albumentations
-    #   paddleocr
-albumentations==2.0.6
-    # via paddleocr
 annotated-types==0.7.0
     # via pydantic
 anyio==4.9.0
     # via
+    #   google-genai
     #   httpx
     #   openai
-astor==0.8.1
-    # via paddlepaddle
 attrs==25.3.0
     # via
     #   jsonschema
     #   referencing
-beautifulsoup4==4.13.4
-    # via paddleocr
 blinker==1.9.0
     # via flask
-boto3==1.38.23
-    # via -r requirements.in
-botocore==1.38.23
-    # via
-    #   boto3
-    #   s3transfer
+cachetools==5.5.2
+    # via google-auth
 certifi==2025.1.31
     # via
     #   httpcore
@@ -45,10 +32,6 @@ colorama==0.4.6
     # via
     #   click
     #   tqdm
-cython==3.1.0
-    # via paddleocr
-decorator==5.2.1
-    # via paddlepaddle
 distro==1.9.0
     # via openai
 filelock==3.18.0
@@ -56,8 +39,6 @@ filelock==3.18.0
     #   huggingface-hub
     #   torch
     #   transformers
-fire==0.7.0
-    # via paddleocr
 flasgger==0.9.7.1
     # via -r requirements.in
 flask==3.1.0
@@ -67,20 +48,22 @@ flask==3.1.0
     #   flask-cors
 flask-cors==5.0.1
     # via -r requirements.in
-fonttools==4.58.0
-    # via paddleocr
 fsspec==2025.3.2
     # via
     #   huggingface-hub
     #   torch
+google-auth==2.40.2
+    # via google-genai
+google-genai==1.18.0
+    # via -r requirements.in
 h11==0.16.0
     # via httpcore
 httpcore==1.0.9
     # via httpx
 httpx==0.28.1
     # via
+    #   google-genai
     #   openai
-    #   paddlepaddle
 huggingface-hub==0.31.1
     # via
     #   tokenizers
@@ -90,8 +73,6 @@ idna==3.10
     #   anyio
     #   httpx
     #   requests
-imageio==2.37.0
-    # via scikit-image
 itsdangerous==2.2.0
     # via flask
 jinja2==3.1.6
@@ -100,22 +81,12 @@ jinja2==3.1.6
     #   torch
 jiter==0.9.0
     # via openai
-jmespath==1.0.1
-    # via
-    #   boto3
-    #   botocore
 joblib==1.5.0
     # via scikit-learn
 jsonschema==4.23.0
     # via flasgger
 jsonschema-specifications==2025.4.1
     # via jsonschema
-lazy-loader==0.4
-    # via scikit-image
-lmdb==1.6.2
-    # via paddleocr
-lxml==5.4.0
-    # via python-docx
 markupsafe==3.0.2
     # via
     #   jinja2
@@ -127,81 +98,38 @@ mpmath==1.3.0
 mysql-connector-python==9.3.0
     # via -r requirements.in
 networkx==3.4.2
-    # via
-    #   paddlepaddle
-    #   scikit-image
-    #   torch
+    # via torch
 numpy==2.2.5
     # via
-    #   albucore
-    #   albumentations
-    #   imageio
-    #   opencv-contrib-python
-    #   opencv-python
-    #   opencv-python-headless
-    #   opt-einsum
-    #   paddleocr
-    #   paddlepaddle
-    #   scikit-image
     #   scikit-learn
     #   scipy
-    #   shapely
-    #   tifffile
     #   transformers
 openai==1.78.1
     # via -r requirements.in
-opencv-contrib-python==4.11.0.86
-    # via paddleocr
-opencv-python==4.11.0.86
-    # via paddleocr
-opencv-python-headless==4.11.0.86
-    # via
-    #   albucore
-    #   albumentations
-opt-einsum==3.3.0
-    # via paddlepaddle
 packaging==25.0
     # via
     #   flasgger
     #   huggingface-hub
-    #   lazy-loader
-    #   scikit-image
     #   transformers
-paddleocr==2.10.0
-    # via -r requirements.in
-paddlepaddle==3.0.0
-    # via -r requirements.in
-pillow==11.2.1
+pyasn1==0.6.1
     # via
-    #   imageio
-    #   paddleocr
-    #   paddlepaddle
-    #   scikit-image
-protobuf==6.31.0
-    # via paddlepaddle
-pyclipper==1.3.0.post6
-    # via paddleocr
+    #   pyasn1-modules
+    #   rsa
+pyasn1-modules==0.4.2
+    # via google-auth
 pydantic==2.11.4
     # via
-    #   albumentations
+    #   google-genai
     #   openai
 pydantic-core==2.33.2
     # via pydantic
-python-dateutil==2.9.0.post0
-    # via botocore
-python-docx==1.1.2
-    # via paddleocr
 python-dotenv==1.1.0
     # via -r requirements.in
 pyyaml==6.0.2
     # via
-    #   albumentations
     #   flasgger
     #   huggingface-hub
-    #   paddleocr
     #   transformers
-rapidfuzz==3.13.0
-    # via paddleocr
 referencing==0.36.2
     # via
     #   jsonschema
@@ -211,50 +139,31 @@ regex==2024.11.6
 requests==2.32.3
     # via
     #   -r requirements.in
+    #   google-genai
     #   huggingface-hub
-    #   paddleocr
     #   transformers
 rpds-py==0.24.0
     # via
     #   jsonschema
     #   referencing
-s3transfer==0.13.0
-    # via boto3
+rsa==4.9.1
+    # via google-auth
 safetensors==0.5.3
     # via transformers
-scikit-image==0.25.2
-    # via paddleocr
 scikit-learn==1.6.1
     # via -r requirements.in
 scipy==1.15.3
-    # via
-    #   albumentations
-    #   scikit-image
-    #   scikit-learn
-shapely==2.1.0
-    # via paddleocr
-simsimd==6.2.1
-    # via albucore
+    # via scikit-learn
 six==1.17.0
-    # via
-    #   flasgger
-    #   python-dateutil
+    # via flasgger
 sniffio==1.3.1
     # via
     #   anyio
     #   openai
-soupsieve==2.7
-    # via beautifulsoup4
-stringzilla==3.12.5
-    # via albucore
 sympy==1.14.0
     # via torch
-termcolor==3.1.0
-    # via fire
 threadpoolctl==3.6.0
     # via scikit-learn
-tifffile==2025.5.10
-    # via scikit-image
 tokenizers==0.21.1
     # via transformers
 torch==2.7.0
@@ -263,29 +172,26 @@ tqdm==4.67.1
     # via
     #   huggingface-hub
     #   openai
-    #   paddleocr
     #   transformers
 transformers==4.51.3
     # via -r requirements.in
 typing-extensions==4.13.2
     # via
     #   anyio
-    #   beautifulsoup4
+    #   google-genai
     #   huggingface-hub
     #   openai
-    #   paddlepaddle
     #   pydantic
     #   pydantic-core
-    #   python-docx
     #   referencing
     #   torch
     #   typing-inspection
 typing-inspection==0.4.0
     # via pydantic
 urllib3==2.3.0
-    # via
-    #   botocore
-    #   requests
+    # via requests
+websockets==15.0.1
+    # via google-genai
 werkzeug==3.1.3
     # via
     #   flask

From abca923a3308e88280b450f408b65c7be411de1b Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Sun, 1 Jun 2025 11:44:11 +0900
Subject: [PATCH 12/28] =?UTF-8?q?api=EC=98=A4=EB=A5=98=20=EC=88=98?=
 =?UTF-8?q?=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

awardImgUrl->awardImageUrl로 받아오도록 수정
---
 ocr/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index 1cf6402..2f7b449 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -18,12 +18,12 @@
             'schema': {
                 'type': 'object',
                 'properties': {
-                    'image_urls': {
+                    'imageUrls': {
                         'type': 'array',
                         'items': {'type': 'string'},
                         'description': '검토할 이미지 URL 리스트'
                     },
-                    'award_img_urls': {
+                    'awardImageUrl': {
                         'type': 'string',
                         'description': '수상 이미지의 URL'
                     },
@@ -64,7 +64,7 @@ def evaluate_image():
     data=request.get_json()
 
     imageUrls=data.get("imageUrls")
-    awardImgUrl=data.get("awardImgUrl")
+    awardImageUrl=data.get("awardImageUrl")
     title=data.get("title")
 
     # OCR 실행
@@ -72,8 +72,8 @@ def evaluate_image():
       # ocr결과의 기본값은 False
       ocr_result = "False"
       ocr_result=is_review_valid(title, imageUrls)
-    if awardImgUrl != None:
-       award_ocr_result=is_review_valid(title,awardImgUrl)
+    if awardImageUrl != None:
+       award_ocr_result=is_review_valid(title,awardImageUrl)
     else:
        award_ocr_result = "False"
 

From 217814838e769e807c65708d35d6c218cecc925c Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Sun, 1 Jun 2025 13:33:14 +0900
Subject: [PATCH 13/28] =?UTF-8?q?ocr=EC=98=A4=EB=A5=98=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

awardocrimage가 잘못 인식되어 사진 체크가 되지 않음을 수정
---
 ocr/__init__.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index 2f7b449..e9875a8 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -71,9 +71,11 @@ def evaluate_image():
     if imageUrls:
       # ocr결과의 기본값은 False
       ocr_result = "False"
-      ocr_result=is_review_valid(title, imageUrls)
+      ocr_result=is_review_valid(title, imageUrls)    
     if awardImageUrl != None:
-       award_ocr_result=is_review_valid(title,awardImageUrl)
+       awardImageUrlList=[awardImageUrl]
+       print("awardImgUrl",awardImageUrlList)
+       award_ocr_result=is_review_valid(title,awardImageUrlList)
     else:
        award_ocr_result = "False"
 

From 32803bb44b8d99b950af5d86bf229cb72280413b Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 13:18:22 +0900
Subject: [PATCH 14/28] =?UTF-8?q?=EA=B9=83=ED=97=88=EB=B8=8C=20=EC=95=A1?=
 =?UTF-8?q?=EC=85=98=EC=9D=84=20=EC=9D=B4=EC=9A=A9=ED=95=9C=20ci/cd?=
 =?UTF-8?q?=EA=B5=AC=EC=B6=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.서버에 접속해서 git pull origin main실행
2.패키지 업데이트 및 설치
3.애플리케이션 실행
위의 과정이 순차로 이루어집니다
---
 .github/workflows/deploy.yml | 19 +++++++++++++++++++
 deploy_script.sh             | 10 ++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 .github/workflows/deploy.yml
 create mode 100644 deploy_script.sh

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
new file mode 100644
index 0000000..d39308b
--- /dev/null
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,19 @@
+name: Deploy with GitHub Actions
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Setup SSH Key
+        run: |
+          echo "${{ secrets.SSH_PRIVATE_KEY }}" > id_rsa.pem
+          chmod 600 id_rsa.pem
+          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd HIGHFIVE-AI/ && git pull origin main && ./deploy_script.sh"
\ No newline at end of file
diff --git a/deploy_script.sh b/deploy_script.sh
new file mode 100644
index 0000000..a113942
--- /dev/null
+++ b/deploy_script.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+cd HIGHFIVE-AI/ || exit
+
+# 패키지 업데이트 및 설치
+pip-compile requirements.in
+pip install -r requirements.txt
+
+# 애플리케이션 실행
+python app.py
\ No newline at end of file

From 5704ad8f5b107800d23d5c052f6edae861dc0afb Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 13:22:06 +0900
Subject: [PATCH 15/28] =?UTF-8?q?ci/cd=ED=85=8C=EC=8A=A4=ED=8A=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app.py b/app.py
index e1ba0c4..8ec10be 100644
--- a/app.py
+++ b/app.py
@@ -6,7 +6,7 @@
 from flasgger import Swagger
 
 from server.logger import logger
-
+#test
 # 현재 app.py 파일의 디렉토리 경로를 sys.path에 추가
 current_dir = os.path.dirname(os.path.abspath(__file__))
 if current_dir not in sys.path:

From 14de5314fac912acf3a97f3c221de332b81872ab Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 13:25:15 +0900
Subject: [PATCH 16/28] =?UTF-8?q?ci/cd=ED=85=8C=EC=8A=A4=ED=8A=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ocr/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index e9875a8..aa3b6f0 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -74,7 +74,6 @@ def evaluate_image():
       ocr_result=is_review_valid(title, imageUrls)    
     if awardImageUrl != None:
        awardImageUrlList=[awardImageUrl]
-       print("awardImgUrl",awardImageUrlList)
        award_ocr_result=is_review_valid(title,awardImageUrlList)
     else:
        award_ocr_result = "False"

From 7c775e041e2827f723344914630d03968b2db76f Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 13:36:51 +0900
Subject: [PATCH 17/28] =?UTF-8?q?ci/cd=ED=85=8C=EC=8A=A4=ED=8A=B82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ocr/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index aa3b6f0..afae7a1 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -73,6 +73,7 @@ def evaluate_image():
       ocr_result = "False"
       ocr_result=is_review_valid(title, imageUrls)    
     if awardImageUrl != None:
+       print(awardImageUrl)
        awardImageUrlList=[awardImageUrl]
        award_ocr_result=is_review_valid(title,awardImageUrlList)
     else:

From 3584e1bd98bc260d09f6d96882b72f2611a43c0e Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 13:41:52 +0900
Subject: [PATCH 18/28] =?UTF-8?q?ci/cd=EC=98=A4=EB=A5=98=EC=88=98=EC=A0=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/deploy.yml | 2 +-
 deploy_script.sh             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index d39308b..279ce4a 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -16,4 +16,4 @@ jobs:
         run: |
           echo "${{ secrets.SSH_PRIVATE_KEY }}" > id_rsa.pem
           chmod 600 id_rsa.pem
-          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd HIGHFIVE-AI/ && git pull origin main && ./deploy_script.sh"
\ No newline at end of file
+          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git pull origin main && ./deploy_script.sh"
\ No newline at end of file
diff --git a/deploy_script.sh b/deploy_script.sh
index a113942..ee11dba 100644
--- a/deploy_script.sh
+++ b/deploy_script.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-cd HIGHFIVE-AI/ || exit
+cd ~/HIGHFIVE-AI/ || exit
 
 # 패키지 업데이트 및 설치
 pip-compile requirements.in

From 57525217ec464864f438695bee68555eff258f86 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 13:48:47 +0900
Subject: [PATCH 19/28] =?UTF-8?q?ci/cd=ED=85=8C=EC=8A=A4=ED=8A=B83?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ocr/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ocr/__init__.py b/ocr/__init__.py
index afae7a1..aa3b6f0 100644
--- a/ocr/__init__.py
+++ b/ocr/__init__.py
@@ -73,7 +73,6 @@ def evaluate_image():
       ocr_result = "False"
       ocr_result=is_review_valid(title, imageUrls)    
     if awardImageUrl != None:
-       print(awardImageUrl)
        awardImageUrlList=[awardImageUrl]
        award_ocr_result=is_review_valid(title,awardImageUrlList)
     else:

From 1135d6cb5209fa6f8563f007882b9c6fea165a23 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 13:54:07 +0900
Subject: [PATCH 20/28] =?UTF-8?q?ci/cd=EC=98=A4=EB=A5=98=EC=88=98=EC=A0=95?=
 =?UTF-8?q?2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/deploy.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 279ce4a..6b802c5 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -11,6 +11,10 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
+        
+      - name: Set execute permissions for deploy script
+        run: chmod +x deploy_script.sh
+
 
       - name: Setup SSH Key
         run: |

From 7cbc655edb887a477bff12305f8ae4549bb1e46e Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 14:03:12 +0900
Subject: [PATCH 21/28] =?UTF-8?q?cicd=ED=85=8C=EC=8A=A4=ED=8A=B84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deploy_script.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/deploy_script.sh b/deploy_script.sh
index ee11dba..5a6df14 100644
--- a/deploy_script.sh
+++ b/deploy_script.sh
@@ -7,4 +7,5 @@ pip-compile requirements.in
 pip install -r requirements.txt
 
 # 애플리케이션 실행
-python app.py
\ No newline at end of file
+screen -dmS cicd python app.py
+echo "Flask is running in a screen session."
\ No newline at end of file

From 5314425291be163374647f0b5ed51b8eb5a3f91d Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 14:06:25 +0900
Subject: [PATCH 22/28] =?UTF-8?q?cdcd=EC=88=98=EC=A0=953?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/deploy.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 6b802c5..b353452 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -11,7 +11,7 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
-        
+
       - name: Set execute permissions for deploy script
         run: chmod +x deploy_script.sh
 
@@ -20,4 +20,4 @@ jobs:
         run: |
           echo "${{ secrets.SSH_PRIVATE_KEY }}" > id_rsa.pem
           chmod 600 id_rsa.pem
-          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git pull origin main && ./deploy_script.sh"
\ No newline at end of file
+          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git reset --hard origin/main && pull origin main && ./deploy_script.sh"
\ No newline at end of file

From 3053e4b3e4076ac6f7842922abea1261d2276a7b Mon Sep 17 00:00:00 2001
From: seominjae1 <153708875+seominjae1@users.noreply.github.com>
Date: Mon, 2 Jun 2025 14:08:23 +0900
Subject: [PATCH 23/28] Update deploy.yml

---
 .github/workflows/deploy.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index b353452..2205596 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -20,4 +20,4 @@ jobs:
         run: |
           echo "${{ secrets.SSH_PRIVATE_KEY }}" > id_rsa.pem
           chmod 600 id_rsa.pem
-          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git reset --hard origin/main && pull origin main && ./deploy_script.sh"
\ No newline at end of file
+          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git reset --hard origin/main && git pull origin main && ./deploy_script.sh"

From d13319f2a68e2602dafd2eb2628a934151b9c535 Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 14:16:09 +0900
Subject: [PATCH 24/28] =?UTF-8?q?cicd=EC=98=A4=EB=A5=98=EC=88=98=EC=A0=954?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/deploy.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index b353452..764d10c 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -13,11 +13,13 @@ jobs:
         uses: actions/checkout@v3
 
       - name: Set execute permissions for deploy script
-        run: chmod +x deploy_script.sh
+        run: |
+          cd ~/HIGHFIVE-AI/
+          chmod +x deploy_script.sh
 
 
       - name: Setup SSH Key
         run: |
           echo "${{ secrets.SSH_PRIVATE_KEY }}" > id_rsa.pem
           chmod 600 id_rsa.pem
-          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git reset --hard origin/main && pull origin main && ./deploy_script.sh"
\ No newline at end of file
+          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git reset --hard origin/main && git pull origin main && ./deploy_script.sh"
\ No newline at end of file

From 846ac013693e8d29f92e959e74c88115bfb00aff Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 14:22:00 +0900
Subject: [PATCH 25/28] =?UTF-8?q?cicd=EC=98=A4=EB=A5=98=EC=88=98=EC=A0=955?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/deploy.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 764d10c..ee26687 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -13,9 +13,7 @@ jobs:
         uses: actions/checkout@v3
 
       - name: Set execute permissions for deploy script
-        run: |
-          cd ~/HIGHFIVE-AI/
-          chmod +x deploy_script.sh
+        run: chmod +x ${{ github.workspace }}/deploy_script.sh
 
 
       - name: Setup SSH Key

From 26bd37b637f53652e543a634204514716e47ee1e Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 14:32:52 +0900
Subject: [PATCH 26/28] =?UTF-8?q?cicd=EC=88=98=EC=A0=956?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/deploy.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 642dc73..befd977 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -12,8 +12,11 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v3
 
+      - name: Check workspace directory
+        run: echo "current workspace${{ github.workspace }}"
+
       - name: Set execute permissions for deploy script
-        run: chmod +x ${{ github.workspace }}/deploy_script.sh
+        run: chmod +x ${{ github.workspace }}/HIGHFIVE-AI/deploy_script.sh
 
 
       - name: Setup SSH Key

From 8d1f93bfd23c0277a2276000c709337d1586e31a Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 14:40:36 +0900
Subject: [PATCH 27/28] =?UTF-8?q?cdcd=EC=88=98=EC=A0=957?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/deploy.yml | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index befd977..e29498f 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -12,11 +12,8 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v3
 
-      - name: Check workspace directory
-        run: echo "current workspace${{ github.workspace }}"
-
       - name: Set execute permissions for deploy script
-        run: chmod +x ${{ github.workspace }}/HIGHFIVE-AI/deploy_script.sh
+        run: chmod +x ${{ github.workspace }}/deploy_script.sh
 
 
       - name: Setup SSH Key
@@ -24,4 +21,4 @@ jobs:
           echo "${{ secrets.SSH_PRIVATE_KEY }}" > id_rsa.pem
           chmod 600 id_rsa.pem
           
-          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git reset --hard origin/main && git pull origin main && ./deploy_script.sh"
\ No newline at end of file
+          ssh -i id_rsa.pem -o StrictHostKeyChecking=no elicer@central-01.tcp.tunnel.elice.io -p 50735 "cd ~/HIGHFIVE-AI/ && git reset --hard origin/main && git pull origin main && chmod +x deploy_script.sh && ./deploy_script.sh"
\ No newline at end of file

From c28196c1a86604f3975dab2a1d37b9871d8edf8f Mon Sep 17 00:00:00 2001
From: seominjae1 <tjalswoo@naver.com>
Date: Mon, 2 Jun 2025 15:00:26 +0900
Subject: [PATCH 28/28] =?UTF-8?q?cicd=EC=88=98=EC=A0=958?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deploy_script.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/deploy_script.sh b/deploy_script.sh
index 5a6df14..bc0ae1d 100644
--- a/deploy_script.sh
+++ b/deploy_script.sh
@@ -6,6 +6,7 @@ cd ~/HIGHFIVE-AI/ || exit
 pip-compile requirements.in
 pip install -r requirements.txt
 
-# 애플리케이션 실행
-screen -dmS cicd python app.py
+# 기존의 스크린 삭제 후 재실행
+screen -S flask-server -X quit
+screen -dmS flask-server bash -c "cd ~/HIGHFIVE-AI && python app.py"
 echo "Flask is running in a screen session."
\ No newline at end of file