From 59c4413ba53354c9e1560bfba1808bc24a7204bf Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham Date: Fri, 14 Nov 2025 17:18:45 +0900 Subject: [PATCH 1/6] =?UTF-8?q?chore:=20html=EC=97=90=EC=84=9C=20=EB=A7=88?= =?UTF-8?q?=ED=81=AC=EB=8B=A4=EC=9A=B4=EC=9C=BC=EB=A1=9C=EC=9D=98=20?= =?UTF-8?q?=EB=B3=80=ED=99=98=EC=9D=84=20=EC=9C=84=ED=95=9C=20markdownify?= =?UTF-8?q?=20=EC=9D=98=EC=A1=B4=EC=84=B1=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1db657b..8f97f3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -boto3 \ No newline at end of file +boto3 +markdownify \ No newline at end of file From 30674129582526ce5925bdd75a601c0ce6ab6563 Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham Date: Fri, 14 Nov 2025 17:22:49 +0900 Subject: [PATCH 2/6] =?UTF-8?q?feat:=20=EC=A0=84=EB=8B=AC=EB=90=9C=20?= =?UTF-8?q?=EC=9A=94=EC=B2=AD=EC=97=90=20=ED=8F=AC=ED=95=A8=EB=90=9C=20htm?= =?UTF-8?q?l=EC=9D=84=20=EC=9A=94=EC=95=BD=20=EC=9D=B4=EC=A0=84=EC=97=90?= =?UTF-8?q?=20=EB=A7=88=ED=81=AC=EB=8B=A4=EC=9A=B4=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lambda_function.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/lambda_function.py b/src/lambda_function.py index d8a3955..0f8100f 100644 --- a/src/lambda_function.py +++ b/src/lambda_function.py @@ -1,4 +1,5 @@ import json +from markdownify import markdownify as md from tos_summarize import tos_summarize from tos_evaluate import tos_evaluate @@ -26,12 +27,15 @@ def lambda_handler(event, context): } url = event['queryStringParameters']['url'] - text_html = event['body'] + tos_content = md(event['body']) + + print('markdownified TOS content:') + print(tos_content) # TODO: 기존 URL 기반 캐싱 로직 구현 # text_html 문자열에서 중요 조항 위주로 약관 요약 - summarized_tos = tos_summarize(text_html) + summarized_tos = tos_summarize(tos_content) # 약관 조항에 대해 분석 수행 evaluation_result = tos_evaluate(summarized_tos) From 34d248e79652219fa6287ffa3767e65b2d6454df Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham Date: Fri, 14 Nov 2025 17:25:18 +0900 Subject: [PATCH 3/6] =?UTF-8?q?refactor:=20=ED=8C=8C=EB=9D=BC=EB=AF=B8?= =?UTF-8?q?=ED=84=B0=20=EC=9D=B4=EB=A6=84=EC=9D=84=20text=5Fhtml=EC=97=90?= =?UTF-8?q?=EC=84=9C=20tos=5Fcontent=EB=A1=9C=20=EB=B3=80=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/tos_summarize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tos_summarize.py b/src/tos_summarize.py index 892bed2..8263f67 100644 --- a/src/tos_summarize.py +++ b/src/tos_summarize.py @@ -1,9 +1,9 @@ import boto3 -def tos_summarize(text_html): +def tos_summarize(tos_content): system_instruction=[{"text": """ 당신은 약관 분석 전문가입니다. -주어진 html 페이지에서 주요 약관 내용을 요약합니다. +주어진 텍스트에서 주요 약관 내용을 요약합니다. 한국어로 응답합니다. """}] @@ -16,7 +16,7 @@ def tos_summarize(text_html): messages = [{ "role": "user", "content": [ - {"text": text_html} + {"text": tos_content} ] }] From d57dac14c98d9e29759d0ecf7d012bc1b4fb5242 Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham Date: Fri, 14 Nov 2025 18:15:28 +0900 Subject: [PATCH 4/6] =?UTF-8?q?feat:=20=EC=A0=84=EC=B2=B4=20=EB=A7=88?= =?UTF-8?q?=ED=81=AC=EB=8B=A4=EC=9A=B4=20=ED=8C=8C=EC=8B=B1=20=EA=B2=B0?= =?UTF-8?q?=EA=B3=BC=EB=A5=BC=20=EC=B6=9C=EB=A0=A5=ED=95=98=EB=8A=94=20?= =?UTF-8?q?=EB=8C=80=EC=8B=A0,=20=EC=9B=90=EB=B3=B8=20html,=20=EB=A7=88?= =?UTF-8?q?=ED=81=AC=EB=8B=A4=EC=9A=B4=EC=9D=98=20=EA=B8=B8=EC=9D=B4=20?= =?UTF-8?q?=EB=B0=8F=20=EA=B0=90=EC=86=8C=EC=9C=A8=EC=9D=84=20=EC=B6=9C?= =?UTF-8?q?=EB=A0=A5=ED=95=98=EB=8F=84=EB=A1=9D=20=EB=B3=80=EA=B2=BD=20?= =?UTF-8?q?=EC=9D=B4=EC=A0=84=20=EB=B2=84=EC=A0=84=EC=9D=80=20cloudwatch?= =?UTF-8?q?=EC=97=90=EC=84=9C=20=EA=B0=80=EB=8F=85=EC=84=B1=EC=9D=B4=20?= =?UTF-8?q?=EC=A7=80=EB=82=98=EC=B9=98=EA=B2=8C=20=EB=96=A8=EC=96=B4?= =?UTF-8?q?=EC=A0=B8=20=EB=B3=80=EA=B2=BD=ED=96=88=EC=8A=B5=EB=8B=88?= =?UTF-8?q?=EB=8B=A4.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lambda_function.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lambda_function.py b/src/lambda_function.py index 0f8100f..f9c7c5e 100644 --- a/src/lambda_function.py +++ b/src/lambda_function.py @@ -29,8 +29,14 @@ def lambda_handler(event, context): url = event['queryStringParameters']['url'] tos_content = md(event['body']) - print('markdownified TOS content:') - print(tos_content) + # 바이트 기준으로 길이 및 감소율 계산 + original_length = len(event['body'].encode('utf-8')) + markdown_length = len(tos_content.encode('utf-8')) + reduction = (original_length - markdown_length) / original_length * 100 + + print(f"원본 html 길이: {original_length} bytes") + print(f"markdown 길이: {markdown_length} bytes") + print(f"감소율: {reduction:.4f}%") # TODO: 기존 URL 기반 캐싱 로직 구현 From e41ee31c22de18b0f91b79a9c348110fc87302ae Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham <94438522+hjham0856@users.noreply.github.com> Date: Fri, 14 Nov 2025 18:49:15 +0900 Subject: [PATCH 5/6] =?UTF-8?q?chore:=20=EC=A3=BC=EC=84=9D=EC=9D=98=20?= =?UTF-8?q?=EC=9D=B4=EC=A0=84=20=EB=B3=80=EC=88=98=EB=AA=85=20text=5Fhtml?= =?UTF-8?q?=EC=9D=84=20=ED=98=84=EC=9E=AC=20=EC=82=AC=EC=9A=A9=ED=95=98?= =?UTF-8?q?=EB=8A=94=20=EC=9D=B4=EB=A6=84=EC=9D=B8=20tos=5Fcontent?= =?UTF-8?q?=EB=A1=9C=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/lambda_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lambda_function.py b/src/lambda_function.py index f9c7c5e..53ff6ac 100644 --- a/src/lambda_function.py +++ b/src/lambda_function.py @@ -40,7 +40,7 @@ def lambda_handler(event, context): # TODO: 기존 URL 기반 캐싱 로직 구현 - # text_html 문자열에서 중요 조항 위주로 약관 요약 + # tos_content 문자열에서 중요 조항 위주로 약관 요약 summarized_tos = tos_summarize(tos_content) # 약관 조항에 대해 분석 수행 From 3850d97dc68a9693bec36695b2df7ceaa2d95cec Mon Sep 17 00:00:00 2001 From: Hyeongjun Ham <94438522+hjham0856@users.noreply.github.com> Date: Fri, 14 Nov 2025 18:51:09 +0900 Subject: [PATCH 6/6] =?UTF-8?q?feat:=20=EC=A7=80=EB=82=98=EC=B9=9C=20?= =?UTF-8?q?=EC=86=8C=EC=88=98=EC=A0=90=20=EC=9E=90=EB=A6=AC=EC=88=98?= =?UTF-8?q?=EB=A5=BC=20=EC=A0=9C=ED=95=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 4자리는 일반적이지 않아 2자리로 수정합니다. Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/lambda_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lambda_function.py b/src/lambda_function.py index 53ff6ac..bbaec90 100644 --- a/src/lambda_function.py +++ b/src/lambda_function.py @@ -36,7 +36,7 @@ def lambda_handler(event, context): print(f"원본 html 길이: {original_length} bytes") print(f"markdown 길이: {markdown_length} bytes") - print(f"감소율: {reduction:.4f}%") + print(f"감소율: {reduction:.2f}%") # TODO: 기존 URL 기반 캐싱 로직 구현