diff --git a/.github/workflows/api.yaml b/.github/workflows/api.yaml
new file mode 100644
index 0000000..275a0ec
--- /dev/null
+++ b/.github/workflows/api.yaml
@@ -0,0 +1,31 @@
+name: Deploy API Gateway
+
+on:
+  push:
+    paths:
+        - 'api/*'
+
+jobs:
+  deploy-api:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install boto3
+      - name: Run API deployment script
+        env:
+          AWS_ACCESS_KEY_ID: AKIAT5VEV4FHFQQJBZVX
+          AWS_SECRET_ACCESS_KEY: o56LCVEDcTPD8RgU2iWtz8SBklKa5DqQ6+nCYawf
+          AWS_REGION: us-east-1          # or your region
+        run: |
+          python3 api/deploy-api.py
\ No newline at end of file
diff --git a/.github/workflows/lambda.yaml b/.github/workflows/lambda.yaml
new file mode 100644
index 0000000..45cd394
--- /dev/null
+++ b/.github/workflows/lambda.yaml
@@ -0,0 +1,95 @@
+name: Deploy Lambdas
+
+on:
+  push:
+    branches:
+      - main
+      - backend_changes
+
+env:
+  LAMBDA_DIR: backend
+  DEPLOY_BUCKET: hackathon-lambda-ap-ai-cyberark
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.12'
+
+      - name: Install AWS CLI
+        run: pip install awscli
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v2
+        with:
+          aws-access-key-id: AKIAT5VEV4FHFQQJBZVX
+          aws-secret-access-key: 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
+          aws-region: us-east-1
+
+      - name: Deploy Lambda Functions (No dependencies)
+        run: |
+          LAYER_ARN=$(aws lambda list-layer-versions \
+            --layer-name "my-python-layer" \
+            --query 'LayerVersions[0].LayerVersionArn' \
+            --output text)
+
+          echo "Using latest layer version: $LAYER_ARN"
+          for dir in "$LAMBDA_DIR"/*/; do
+            dir=${dir%/}
+            function_name=$(basename "$dir")
+            entry_point="$dir/${function_name}.py"
+
+            if [ ! -f "$entry_point" ]; then
+              echo "Skipping $function_name: $entry_point not found."
+              continue
+            fi
+
+            echo "Packaging Lambda: $function_name"
+
+            build_dir="/tmp/${function_name}_build"
+            zip_file="/tmp/${function_name}.zip"
+
+            rm -rf "$build_dir"
+            mkdir -p "$build_dir"
+
+            # Copy Lambda source file
+            cp -r "$dir"/* "$build_dir/"
+
+            # Copy all top-level shared files from backend (excluding directories and requirements.txt)
+            find "$LAMBDA_DIR" -maxdepth 1 -type f ! -name "requirements.txt" -exec cp {} "$build_dir/" \;
+
+            # Zip build directory
+            cd "$build_dir"
+            zip -r "$zip_file" . > /dev/null
+            cd -
+
+            # Upload zip to S3
+            aws s3 cp "$zip_file" "s3://${DEPLOY_BUCKET}/${function_name}.zip"
+
+            # Deploy Lambda
+            if aws lambda get-function --function-name "$function_name" > /dev/null 2>&1; then
+              echo "Updating Lambda: $function_name"
+              aws lambda update-function-code \
+                --function-name "$function_name" \
+                --s3-bucket "$DEPLOY_BUCKET" \
+                --s3-key "${function_name}.zip"
+            else
+              echo "Creating Lambda: $function_name"
+              aws lambda create-function \
+                --function-name "$function_name" \
+                --runtime python3.12 \
+                --role "arn:aws:iam::269854564686:role/hackathon-lambda-role" \
+                --handler "${function_name}.lambda_handler" \
+                --code S3Bucket="$DEPLOY_BUCKET",S3Key="${function_name}.zip" \
+                --timeout 900 \
+                --vpc-config SubnetIds=subnet-02e62e34308bb07d5,subnet-0534b99dd34e646f1,SecurityGroupIds=sg-0b9a6b812b30a1107 \
+                --layers "$LAYER_ARN"
+            fi
+          done
diff --git a/.github/workflows/layer.yaml b/.github/workflows/layer.yaml
new file mode 100644
index 0000000..299a7f9
--- /dev/null
+++ b/.github/workflows/layer.yaml
@@ -0,0 +1,88 @@
+name: Build Lambda Layer
+
+on:
+  push:
+    paths:
+      - 'backend/requirements.txt'
+  workflow_dispatch:
+
+jobs:
+  build-and-publish-layer:
+    runs-on: ubuntu-latest
+
+    env:
+      LAYER_NAME: my-python-layer
+      PYTHON_VERSION: python3.12
+      S3_KEY: layers/layer.zip
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies to python/
+        run: |
+          mkdir -p python
+          pip install -r backend/requirements.txt --platform manylinux2014_x86_64 --only-binary=:all: -t python/
+
+      - name: Clean up unnecessary files
+        run: |
+          find python/ -type d -name "__pycache__" -exec rm -rf {} +
+          find python/ -type d -name "tests" -exec rm -rf {} +
+          find python/ -type f -name "*.pyc" -delete
+
+      - name: Zip the layer
+        run: zip -r layer.zip python
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: AKIAT5VEV4FHFQQJBZVX
+          aws-secret-access-key: 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
+          aws-region: us-east-1
+
+      - name: Upload zip to S3
+        run: |
+          aws s3 cp layer.zip s3://hackathon-lambda-ap-ai-cyberark/${{ env.S3_KEY }}
+
+      - name: Publish Lambda Layer from S3
+        run: |
+          aws lambda publish-layer-version \
+            --layer-name ${{ env.LAYER_NAME }} \
+            --description "Dependencies from backend/requirements.txt" \
+            --content S3Bucket=hackathon-lambda-ap-ai-cyberark,S3Key=${{ env.S3_KEY }} \
+            --compatible-runtimes ${{ env.PYTHON_VERSION }}
+
+      - name: Upload artifact (optional)
+        uses: actions/upload-artifact@v4
+        with:
+          name: lambda-layer
+          path: layer.zip
+      
+      - name: Get latest layer version ARN
+        id: get-layer-version
+        run: |
+          LAYER_ARN=$(aws lambda list-layer-versions --layer-name ${{ env.LAYER_NAME }} \
+            --query 'LayerVersions[0].LayerVersionArn' --output text)
+          echo "layer_arn=$LAYER_ARN" >> "$GITHUB_OUTPUT"
+
+      - name: List functions using the layer
+        id: list-functions
+        run: |
+          FUNCTIONS=$(aws lambda list-functions --query \
+            "Functions[?Layers && contains(join(',', Layers[].Arn), '${{ env.LAYER_NAME }}')].FunctionName" \
+            --output text)
+          echo "functions=$FUNCTIONS" >> "$GITHUB_OUTPUT"
+
+      - name: Update functions to use latest layer version
+        run: |
+          for function in ${{ steps.list-functions.outputs.functions }}; do
+            echo "Updating $function..."
+            aws lambda update-function-configuration \
+              --function-name "$function" \
+              --layers ${{ steps.get-layer-version.outputs.layer_arn }}
+          done
diff --git a/.gitignore b/.gitignore
index fd3a82d..1b02c6d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -193,3 +193,6 @@ cython_debug/
 .cursorignore
 .cursorindexingignore
 .DS_Store
+
+node_modules
+package-lock.json
\ No newline at end of file
diff --git a/amplify.py b/amplify.py
index 03fc414..b48f7dc 100644
--- a/amplify.py
+++ b/amplify.py
@@ -11,7 +11,6 @@
 branch = sys.argv[2]
 repo_owner, repo_name = repo_full.split('/')
 app_name = f"amplify-{repo_name}"
-
 client = boto3.client('amplify','us-east-1')
 print(GITHUB_PAT)
 
diff --git a/api/deploy-api.py b/api/deploy-api.py
new file mode 100644
index 0000000..5f0acc3
--- /dev/null
+++ b/api/deploy-api.py
@@ -0,0 +1,144 @@
+import boto3
+import json
+import os
+
+REGION = "us-east-1"
+STAGE = "prod"
+ACCOUNT_ID = "269854564686"
+
+apigateway = boto3.client("apigateway", region_name=REGION)
+lambda_client = boto3.client("lambda", region_name=REGION)
+
+def get_or_create_api(api_name):
+    apis = apigateway.get_rest_apis()["items"]
+    for api in apis:
+        if api["name"] == api_name:
+            print(f"Found API: {api_name}")
+            return api["id"]
+
+    print(f"Creating API: {api_name}")
+    response = apigateway.create_rest_api(name=api_name)
+    return response["id"]
+
+def get_or_create_resource(api_id, full_path):
+    # Normalize and split nested path: "users/{userId}" => ["users", "{userId}"]
+    parts = [p for p in full_path.strip("/").split("/") if p]
+    resources = apigateway.get_resources(restApiId=api_id)["items"]
+
+    # Build a path-to-id map
+    path_map = {res["path"]: res["id"] for res in resources}
+    parent_path = ""
+    parent_id = path_map["/"]  # root path
+
+    for part in parts:
+        current_path = f"{parent_path}/{part}" if parent_path else f"/{part}"
+        if current_path in path_map:
+            parent_id = path_map[current_path]
+        else:
+            print(f"Creating resource: {current_path}")
+            response = apigateway.create_resource(
+                restApiId=api_id,
+                parentId=parent_id,
+                pathPart=part
+            )
+            parent_id = response["id"]
+            path_map[current_path] = parent_id
+        parent_path = current_path
+
+    return parent_id
+
+
+def method_exists(api_id, resource_id, http_method):
+    try:
+        apigateway.get_method(
+            restApiId=api_id,
+            resourceId=resource_id,
+            httpMethod=http_method
+        )
+        return True
+    except apigateway.exceptions.NotFoundException:
+        return False
+
+def add_lambda_permission(lambda_name, api_id, method, path):
+    statement_id = f"{lambda_name.lower()}-{method.lower()}"
+    try:
+        lambda_client.add_permission(
+            FunctionName=lambda_name,
+            StatementId=statement_id,
+            Action="lambda:InvokeFunction",
+            Principal="apigateway.amazonaws.com",
+            SourceArn=f"arn:aws:execute-api:{REGION}:{ACCOUNT_ID}:{api_id}/*/{method}/{path}"
+        )
+        print(f"Added permission to Lambda {lambda_name} for method {method} /{path}")
+    except lambda_client.exceptions.ResourceConflictException:
+        # Permission already exists
+        print(f"Permission already exists for Lambda {lambda_name} and method {method} /{path}")
+
+def setup_method(api_id, resource_id, method_def, path):
+    method = method_def["httpMethod"].upper()
+    lambda_name = method_def["lambdaFunctionName"]
+    auth_type = method_def.get("authorizationType", "NONE")
+    lambda_arn = f"arn:aws:lambda:{REGION}:{ACCOUNT_ID}:function:{lambda_name}"
+
+    if method_exists(api_id, resource_id, method):
+        print(f"Method {method} already exists for /{path}, skipping method creation.")
+    else:
+        print(f"Creating method {method} for /{path}")
+        apigateway.put_method(
+            restApiId=api_id,
+            resourceId=resource_id,
+            httpMethod=method,
+            authorizationType=auth_type
+        )
+
+    print(f"Setting integration for {method} /{path}")
+    apigateway.put_integration(
+        restApiId=api_id,
+        resourceId=resource_id,
+        httpMethod=method,
+        type="AWS_PROXY",
+        integrationHttpMethod="POST",
+        uri=f"arn:aws:apigateway:{REGION}:lambda:path/2015-03-31/functions/{lambda_arn}/invocations"
+    )
+
+    add_lambda_permission(lambda_name, api_id, method, path)
+
+def deploy_api(api_id):
+    print(f"Deploying API {api_id} to stage: {STAGE}")
+    apigateway.create_deployment(
+        restApiId=api_id,
+        stageName=STAGE
+    )
+
+def main():
+    # Use script folder as working directory to find JSON files
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+
+    deploy_apis = set()
+
+    # Loop all JSON files in current folder
+    for file in os.listdir(script_dir):
+        if not file.endswith(".json"):
+            continue
+
+        json_path = os.path.join(script_dir, file)
+        with open(json_path) as f:
+            config = json.load(f)
+
+        api_name = config["apiName"]
+        resource_path = config["resourcePath"]
+        method_def = config["method"]
+        should_deploy = config.get("deploy", False)
+
+        api_id = get_or_create_api(api_name)
+        resource_id = get_or_create_resource(api_id, resource_path)
+        setup_method(api_id, resource_id, method_def, resource_path)
+
+        if should_deploy:
+            deploy_apis.add(api_id)
+
+    for api_id in deploy_apis:
+        deploy_api(api_id)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/api/get-value.json b/api/get-value.json
new file mode 100644
index 0000000..04ffc52
--- /dev/null
+++ b/api/get-value.json
@@ -0,0 +1,10 @@
+{
+  "apiName": "hackathon",
+  "resourcePath": "get-value",
+  "method": {
+    "httpMethod": "GET",
+    "authorizationType": "NONE",
+    "lambdaFunctionName": "Submit"
+  },
+  "deploy": true
+}
\ No newline at end of file
diff --git a/api/get_feed.json b/api/get_feed.json
new file mode 100644
index 0000000..52f75ce
--- /dev/null
+++ b/api/get_feed.json
@@ -0,0 +1,10 @@
+{
+  "apiName": "hackathon",
+  "resourcePath": "getfeed",
+  "method": {
+    "httpMethod": "GET",
+    "authorizationType": "NONE",
+    "lambdaFunctionName": "get_feed"
+  },
+  "deploy": true
+}
\ No newline at end of file
diff --git a/api/query-agent.json b/api/query-agent.json
new file mode 100644
index 0000000..8dd8ff2
--- /dev/null
+++ b/api/query-agent.json
@@ -0,0 +1,10 @@
+{
+  "apiName": "hackathon",
+  "resourcePath": "queryagent",
+  "method": {
+    "httpMethod": "POST",
+    "authorizationType": "NONE",
+    "lambdaFunctionName": "web_api"
+  },
+  "deploy": true
+}
\ No newline at end of file
diff --git a/api/query-test.json b/api/query-test.json
new file mode 100644
index 0000000..22c5660
--- /dev/null
+++ b/api/query-test.json
@@ -0,0 +1,10 @@
+{
+  "apiName": "hackathon",
+  "resourcePath": "test/query",
+  "method": {
+    "httpMethod": "POST",
+    "authorizationType": "NONE",
+    "lambdaFunctionName": "web_api"
+  },
+  "deploy": true
+}
\ No newline at end of file
diff --git a/api/upload.json b/api/upload.json
new file mode 100644
index 0000000..c67b2cb
--- /dev/null
+++ b/api/upload.json
@@ -0,0 +1,10 @@
+{
+  "apiName": "hackathon",
+  "resourcePath": "upload",
+  "method": {
+    "httpMethod": "POST",
+    "authorizationType": "NONE",
+    "lambdaFunctionName": "Submit"
+  },
+  "deploy": true
+}
\ No newline at end of file
diff --git a/api/web-api.json b/api/web-api.json
new file mode 100644
index 0000000..da394f3
--- /dev/null
+++ b/api/web-api.json
@@ -0,0 +1,10 @@
+{
+  "apiName": "hackathon",
+  "resourcePath": "articles",
+  "method": {
+    "httpMethod": "GET",
+    "authorizationType": "NONE",
+    "lambdaFunctionName": "web_api"
+  },
+  "deploy": true
+}
\ No newline at end of file
diff --git a/backend/Backup/Submit.py b/backend/Backup/Submit.py
deleted file mode 100644
index 2620638..0000000
--- a/backend/Backup/Submit.py
+++ /dev/null
@@ -1,92 +0,0 @@
-import io
-import pandas as pd
-import boto3
-import time
-import uuid
-from Utils import get_postgresql_connection
-
-comprehend = boto3.client('comprehend')
-
-input_s3_uri = 's3://awstraindata/input.csv'
-role_arn = 'arn:aws:iam::269854564686:role/hackathon-comprehend-role'
-
-s3 = boto3.client('s3')
-# Download the file object
-input_csv_object = s3.get_object(Bucket='awstraindata', Key='input.csv')
-
-# Read CSV into DataFrame
-conn = get_postgresql_connection()
-cursor = conn.cursor()
-cursor.execute("drop table if exists articles")
-cursor.execute("""CREATE TABLE IF NOT EXISTS articles (
-                        articles_id TEXT,
-                        title TEXT,
-                        body TEXT,
-                        source TEXT,
-                        published_date TEXT,
-                        location_mentions TEXT,
-                        officials_involved TEXT,
-                        relevance_category TEXT,
-                        sentiment TEXT
-                    )""")
-input_csv = pd.read_csv(io.BytesIO(input_csv_object['Body'].read()))
-for index, row in input_csv.iterrows():
-    print(f"Processing row {index}: {row}")
-    cursor.execute("""
-            INSERT INTO articles (articles_id, title, body, source, published_date)
-            VALUES (%s, %s, %s, %s, %s)""", (row[0], row[1], row[2], row[3], row[4]))
-conn.commit()
-cursor.close()
-entities_job = comprehend.start_entities_detection_job(
-    InputDataConfig={'S3Uri': input_s3_uri, 'InputFormat': 'ONE_DOC_PER_LINE'},
-    OutputDataConfig={'S3Uri': 's3://awstraindata/output/entities/'},
-    DataAccessRoleArn=role_arn,
-    LanguageCode='en',
-    JobName='MyEntityDetectionJob_' + str(int(time.time())), 
-)
-result = comprehend.describe_entities_detection_job(JobId=entities_job['JobId'])
-entities_output = result['EntitiesDetectionJobProperties']['OutputDataConfig']['S3Uri']
-
-# SENTIMENT detection job
-sentiment_job = comprehend.start_sentiment_detection_job(
-    InputDataConfig={'S3Uri': input_s3_uri, 'InputFormat': 'ONE_DOC_PER_LINE'},
-    OutputDataConfig={'S3Uri': 's3://awstraindata/output/sentiment/'},
-    DataAccessRoleArn=role_arn,
-    LanguageCode='en',
-    JobName='MySentimentDetectionJob_' + str(int(time.time())),
-)
-res = comprehend.describe_sentiment_detection_job(JobId=sentiment_job['JobId'])
-sentiment_output = res['SentimentDetectionJobProperties']['OutputDataConfig']['S3Uri']
-
-# KEY PHRASES detection job
-phrases_job = comprehend.start_key_phrases_detection_job(
-    InputDataConfig={'S3Uri': input_s3_uri, 'InputFormat': 'ONE_DOC_PER_LINE'},
-    OutputDataConfig={'S3Uri': 's3://awstraindata/output/keyphrases/'},
-    DataAccessRoleArn=role_arn,
-    LanguageCode='en',
-    JobName='MyKeyPhrasesDetectionJob_' + str(int(time.time())),
-)
-res = comprehend.describe_key_phrases_detection_job(JobId=phrases_job['JobId'])
-key_phrases_output = res['KeyPhrasesDetectionJobProperties']['OutputDataConfig']['S3Uri']
-print("Entities Job Response:", entities_job)
-print("Sentiment Job Response:", sentiment_job)
-print("Key Phrases Job Response:", phrases_job)
-conn = get_postgresql_connection()
-if conn:
-    cursor = conn.cursor()
-    cursor.execute("""
-    CREATE TABLE IF NOT EXISTS comprehend_jobs (
-        batch_id TEXT,
-        input_s3_uri TEXT,
-        entities_job JSONB,
-        sentiment_job JSONB,
-        key_phrases_job JSONB
-    )
-    """)
-    cursor.execute("""
-        INSERT INTO comprehend_jobs (batch_id, input_s3_uri, entities_path, sentiment_path, key_phrases_path)
-        VALUES (%s, %s, %s, %s, %s)""", (str(uuid.uuid4()), input_s3_uri, entities_output.replace('s3://awstraindata/', ''), sentiment_output.replace('s3://awstraindata/', ''), key_phrases_output.replace('s3://awstraindata/', '')))
-    conn.commit()
-    cursor.close()
-    conn.close()
-
diff --git a/backend/Backup/Transform.py b/backend/Backup/Transform.py
deleted file mode 100644
index 2db246e..0000000
--- a/backend/Backup/Transform.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from turtle import pd
-import boto3
-import tarfile
-import json
-import psycopg2
-import io
-
-from EntityRecog.Utils import get_postgresql_connection
-def lambda_handler(event, context):
-    for record in event['Records']:
-        print(f"New record: {record}")
-        bucket = record['s3']['bucket']['name']
-        key = record['s3']['object']['key']
-        conn = get_postgresql_connection()
-        s3 = boto3.client('s3')
-        obj = s3.get_object(Bucket=bucket, Key=key)
-        tar_bytes = io.BytesIO(obj['Body'].read())
-
-        # Extract .json inside the tar.gz
-        with tarfile.open(fileobj=tar_bytes, mode='r:gz') as tar:
-            for member in tar.getmembers():
-                if member.name == "output" and member.isfile():
-                    file = tar.extractfile(member)
-                    results = json.load(file)
-                    print(f"Extracted JSON: {results}")
-                    break
-
-        if not results:
-            folderSplit = key.split('/')
-            type = folderSplit[0]
-            cursor = conn.cursor()
-            query = "SELECT * FROM comprehend_jobs WHERE entities_path = %s or sentiment_path = %s or key_phrases_path = %s"
-            cursor.execute(query, (key, key, key))
-            row = cursor.fetchone()
-            if row:
-                # Download the file object
-                response = s3.get_object(Bucket=bucket, Key=row['input_s3_uri'])
-
-                # Read CSV into DataFrame
-                input_csv = pd.read_csv(io.BytesIO(response['Body'].read()))
-                for row in results:
-                    if type == 'entities':
-                        location_mentions = ', '.join([entity['Text'] for entity in row['Entities'] if entity['Type'] == 'LOCATION'])
-                        officials_involved = ', '.join([entity['Text'] for entity in row['Entities'] if entity['Type'] == 'PERSON'])
-                        relevance_category = ', '.join([entity['Text'] for entity in row['Entities'] if entity['Type'] == 'TITLE'])
-                        if not location_mentions:
-                            cursor.execute("""update articles set location_mentions = %s where articles_id = %s""", (location_mentions, input_article['articles_id']))
-                        if not officials_involved:
-                            cursor.execute("""update articles set  officials_involved = %s where articles_id = %s""", (officials_involved, input_article['articles_id']))
-                        if not relevance_category:
-                            cursor.execute("""update articles set  relevance_category = %s where articles_id = %s""", (relevance_category, input_article['articles_id']))
-                    elif type == 'sentiment':
-                        sentiment = row.get('Sentiment', 'NEUTRAL')
-                        if not sentiment:
-                            cursor.execute("""update articles set sentiment = %s where articles_id = %s""", (sentiment, input_article['articles_id']))
-                    elif type == 'keyphrases':
-                        key_phrases = ', '.join(row.get('KeyPhrases', []))
-                        if not key_phrases:
-                            cursor.execute("""update articles set key_phrases = %s where articles_id = %s""", (key_phrases, input_article['articles_id']))
-                line_number = row['Line']
-                input_article = input_csv[line_number]
-               
-            cursor.close()
-            conn.close()
\ No newline at end of file
diff --git a/backend/JsontoCSV.py b/backend/JsontoCSV.py
deleted file mode 100644
index b510dec..0000000
--- a/backend/JsontoCSV.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import json
-import pandas as pd
-
-# Load the JSON file
-with open("input_feed.json", "r", encoding="utf-8") as f:
-    data = json.load(f)
-
-# Normalize and convert lists to strings
-df = pd.json_normalize(data)
-df["extractedLocations"] = df["extractedLocations"].apply(lambda x: ", ".join(x))
-df["tags"] = df["tags"].apply(lambda x: ", ".join(x))
-
-# Export to CSV
-df.to_csv("news_feed_converted.csv", index=False)
\ No newline at end of file
diff --git a/backend/Sample.csv b/backend/Sample.csv
deleted file mode 100644
index 7a84ed8..0000000
--- a/backend/Sample.csv
+++ /dev/null
@@ -1,19 +0,0 @@
-Text,Type
-"Accident","MISHAP"
-"Argument","MISHAP"
-"fight","MISHAP"
-"quarrel","MISHAP"
-"Burn","MISHAP"
-"Snatch","MISHAP"
-"Murder","CRIME"
-"RoadRage","CRIME"
-"Rash driving","CRIME"
-"Theft","CRIME"
-"Burglary","CRIME"
-"Cheat","CRIME"
-"Stab","CRIME"
-"Kill","CRIME"
-"Hate Speech","CRIME"
-"Hijack","CRIME"
-"Beat","CRIME"
-"Threat","CRIME"
\ No newline at end of file
diff --git a/backend/Submit/Submit.py b/backend/Submit/Submit.py
new file mode 100644
index 0000000..31d88a0
--- /dev/null
+++ b/backend/Submit/Submit.py
@@ -0,0 +1,62 @@
+import base64
+import json
+from requests_toolbelt.multipart import decoder
+import uuid
+from Utils import get_postgresql_connection
+from fastapi import FastAPI
+from docx import Document
+import csv
+import io
+import re
+import boto3
+import traceback
+
+app = FastAPI()
+
+s3 = boto3.client('s3')
+BUCKET_NAME = 'awstraindata'
+
+def lambda_handler(event, context):
+    try:
+        # Decode base64-encoded body (API Gateway encodes binary automatically)
+        print(f"Received event")
+        if event.get("isBase64Encoded", False):
+            body = base64.b64decode(event['body'])
+        else:
+            body = event['body'].encode("utf-8")
+        print(f"Decoded body length: {len(body)} bytes")
+        # Get content-type header
+        content_type = event['headers'].get('Content-Type') or event['headers'].get('content-type')
+        if not content_type:
+            return {"statusCode": 400, "body": "Missing Content-Type header"}
+
+        # Parse multipart form
+        multipart_data = decoder.MultipartDecoder(body, content_type)
+        print(f"Multipart data parts: {len(multipart_data.parts)}")
+        conn = get_postgresql_connection()
+        cursor = conn.cursor()
+        for part in multipart_data.parts:
+            print(f"Processing part: {part.headers.get(b'Content-Disposition')}")
+            file_stream = io.BytesIO(part.content)
+            file_stream.seek(0)
+            file_id = str(uuid.uuid4())
+            s3_key = f"raw_data/{file_id}"
+            # Upload to S3
+            s3.put_object(
+                Bucket=BUCKET_NAME,
+                Key=s3_key,
+                Body=file_stream,
+                ContentType='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+            )
+        return {
+            "statusCode": 200,
+            "headers": {
+                "Content-Type": "application/json"
+            },
+            "body": json.dumps({
+                "status": "success",
+            })
+        }
+    except Exception as e:
+        traceback.print_exc() 
+        return {"statusCode": 500, "body": f"Error: {str(e)}"}
\ No newline at end of file
diff --git a/backend/Transform.py b/backend/Transform.py
deleted file mode 100644
index 14a6976..0000000
--- a/backend/Transform.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from turtle import pd
-import boto3
-import tarfile
-import json
-import psycopg2
-import io
-
-from EntityRecog.Utils import get_postgresql_connection
-def lambda_handler(event, context):
-    for record in event['Records']:
-        print(f"New record: {record}")
-        bucket = record['s3']['bucket']['name']
-        key = record['s3']['object']['key']
-        conn = get_postgresql_connection()
-        s3 = boto3.client('s3')
-        obj = s3.get_object(Bucket=bucket, Key=key)
-        tar_bytes = io.BytesIO(obj['Body'].read())
-
-        # Extract .json inside the tar.gz
-        with tarfile.open(fileobj=tar_bytes, mode='r:gz') as tar:
-            for member in tar.getmembers():
-                if member.name == "output" and member.isfile():
-                    file = tar.extractfile(member)
-                    results = json.load(file)
-                    print(f"Extracted JSON: {results}")
-                    break
-
-        if not results:
-            folderSplit = key.split('/')
-            type = folderSplit[0]
-            cursor = conn.cursor()
-            query = "SELECT * FROM comprehend_jobs WHERE entities_path = %s or sentiment_path = %s or key_phrases_path = %s"
-            cursor.execute(query, (key, key, key))
-            row = cursor.fetchone()
-            if row:
-                article_id = row['article_id']
-                for result in results:
-                    if type == 'entities':
-                        location_mentions = ', '.join([entity['Text'] for entity in result['Entities'] if entity['Type'] == 'LOCATION'])
-                        officials_involved = ', '.join([entity['Text'] for entity in result['Entities'] if entity['Type'] == 'PERSON'])
-                        relevance_category = ', '.join([entity['Text'] for entity in result['Entities'] if entity['Type'] == 'TITLE'])
-                        if not location_mentions:
-                            cursor.execute("""update articles set location_mentions = %s where articles_id = %s""", (location_mentions, article_id))
-                        if not officials_involved:
-                            cursor.execute("""update articles set  officials_involved = %s where articles_id = %s""", (officials_involved, article_id))
-                        if not relevance_category:
-                            cursor.execute("""update articles set  relevance_category = %s where articles_id = %s""", (relevance_category, article_id))
-                    elif type == 'sentiment':
-                        sentiment = row.get('Sentiment', 'NEUTRAL')
-                        if not sentiment:
-                            cursor.execute("""update articles set sentiment = %s where articles_id = %s""", (sentiment, article_id))
-                    elif type == 'keyphrases':
-                        key_phrases = ', '.join(row.get('KeyPhrases', []))
-                        if not key_phrases:
-                            cursor.execute("""update articles set key_phrases = %s where articles_id = %s""", (key_phrases, article_id))
-            cursor.close()
-            conn.close()
\ No newline at end of file
diff --git a/backend/Utils.py b/backend/Utils.py
index 371a1dc..41ed2b4 100644
--- a/backend/Utils.py
+++ b/backend/Utils.py
@@ -1,7 +1,5 @@
 import json
 import psycopg2
-from psycopg2 import sql
-
 def get_postgresql_connection():
     '''get the creds from local config'''
 
diff --git a/backend/annotations.json b/backend/annotations.json
deleted file mode 100644
index a6625b0..0000000
--- a/backend/annotations.json
+++ /dev/null
@@ -1,962 +0,0 @@
-[
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 30,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 47,
-        "endOffset": 53,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 55,
-        "endOffset": 63,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 69,
-        "endOffset": 80,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 26,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 43,
-        "endOffset": 50,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 52,
-        "endOffset": 62,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 68,
-        "endOffset": 74,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 29,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 46,
-        "endOffset": 52,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 54,
-        "endOffset": 60,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 66,
-        "endOffset": 74,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 49,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 51,
-        "endOffset": 57,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 63,
-        "endOffset": 73,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 51,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 53,
-        "endOffset": 62,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 68,
-        "endOffset": 74,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 29,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 46,
-        "endOffset": 52,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 54,
-        "endOffset": 60,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 66,
-        "endOffset": 73,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 27,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 44,
-        "endOffset": 51,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 53,
-        "endOffset": 59,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 65,
-        "endOffset": 79,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 24,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 41,
-        "endOffset": 51,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 53,
-        "endOffset": 61,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 67,
-        "endOffset": 72,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 27,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 44,
-        "endOffset": 52,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 54,
-        "endOffset": 65,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 71,
-        "endOffset": 76,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 24,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 41,
-        "endOffset": 50,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 52,
-        "endOffset": 57,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 63,
-        "endOffset": 70,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 49,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 51,
-        "endOffset": 57,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 63,
-        "endOffset": 69,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 54,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 56,
-        "endOffset": 67,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 73,
-        "endOffset": 82,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 50,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 52,
-        "endOffset": 58,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 64,
-        "endOffset": 73,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 27,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 44,
-        "endOffset": 55,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 57,
-        "endOffset": 66,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 72,
-        "endOffset": 82,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 45,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 47,
-        "endOffset": 57,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 63,
-        "endOffset": 70,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 27,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 44,
-        "endOffset": 54,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 56,
-        "endOffset": 65,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 71,
-        "endOffset": 80,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 24,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 41,
-        "endOffset": 48,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 50,
-        "endOffset": 60,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 66,
-        "endOffset": 80,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 26,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 43,
-        "endOffset": 49,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 51,
-        "endOffset": 57,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 63,
-        "endOffset": 70,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 29,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 46,
-        "endOffset": 54,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 56,
-        "endOffset": 65,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 71,
-        "endOffset": 80,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 50,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 52,
-        "endOffset": 59,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 65,
-        "endOffset": 74,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 24,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 41,
-        "endOffset": 48,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 50,
-        "endOffset": 56,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 62,
-        "endOffset": 68,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 29,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 46,
-        "endOffset": 56,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 58,
-        "endOffset": 64,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 70,
-        "endOffset": 79,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 46,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 48,
-        "endOffset": 55,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 61,
-        "endOffset": 71,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 49,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 51,
-        "endOffset": 58,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 64,
-        "endOffset": 73,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 26,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 43,
-        "endOffset": 53,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 55,
-        "endOffset": 62,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 68,
-        "endOffset": 77,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 24,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 41,
-        "endOffset": 55,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 57,
-        "endOffset": 67,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 73,
-        "endOffset": 80,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 24,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 41,
-        "endOffset": 47,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 49,
-        "endOffset": 56,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 62,
-        "endOffset": 71,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 52,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 54,
-        "endOffset": 64,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 70,
-        "endOffset": 77,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 49,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 51,
-        "endOffset": 65,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 71,
-        "endOffset": 80,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 48,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 50,
-        "endOffset": 59,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 65,
-        "endOffset": 74,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 47,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 49,
-        "endOffset": 55,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 61,
-        "endOffset": 71,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 46,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 48,
-        "endOffset": 55,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 61,
-        "endOffset": 70,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 29,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 46,
-        "endOffset": 53,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 55,
-        "endOffset": 60,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 66,
-        "endOffset": 76,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 49,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 51,
-        "endOffset": 61,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 67,
-        "endOffset": 76,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 24,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 41,
-        "endOffset": 52,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 54,
-        "endOffset": 63,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 69,
-        "endOffset": 79,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 30,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 47,
-        "endOffset": 57,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 59,
-        "endOffset": 64,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 70,
-        "endOffset": 75,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 25,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 42,
-        "endOffset": 51,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 53,
-        "endOffset": 60,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 66,
-        "endOffset": 75,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 27,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 44,
-        "endOffset": 50,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 52,
-        "endOffset": 58,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 64,
-        "endOffset": 71,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 54,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 56,
-        "endOffset": 67,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 73,
-        "endOffset": 80,
-        "label": "TOPIC"
-      }
-    ]
-  },
-  {
-    "annotations": [
-      {
-        "beginOffset": 17,
-        "endOffset": 23,
-        "label": "PLACE"
-      },
-      {
-        "beginOffset": 40,
-        "endOffset": 46,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 48,
-        "endOffset": 58,
-        "label": "TOPIC"
-      },
-      {
-        "beginOffset": 64,
-        "endOffset": 71,
-        "label": "TOPIC"
-      }
-    ]
-  }
-]
\ No newline at end of file
diff --git a/backend/clustering_service/clustering_service.py b/backend/clustering_service/clustering_service.py
new file mode 100644
index 0000000..e9ac3de
--- /dev/null
+++ b/backend/clustering_service/clustering_service.py
@@ -0,0 +1,327 @@
+from Utils import get_postgresql_connection
+
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import faiss
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import networkx as nx
+from collections import defaultdict
+import pickle
+import json
+
+class NewsArticleIndexer:
+    """
+    Multi-algorithm news article indexer with various similarity detection methods
+    """
+    
+    def __init__(self, embedding_model='all-MiniLM-L6-v2', use_gpu=False):
+        # Initialize embedding model
+        self.embedding_model = SentenceTransformer(embedding_model)
+        if use_gpu:
+            self.embedding_model = self.embedding_model.cuda()
+            
+        # Initialize various indexing structures
+        self.articles = []
+        self.embeddings = None
+        self.faiss_index = None
+        self.tfidf_vectorizer = None
+        self.tfidf_matrix = None
+        self.graph = None
+        
+    def add_articles(self, articles):
+        """Add articles to the indexer"""
+        self.articles.extend(articles)
+        self._build_indices()
+    
+    def preprocess_text(self, article):
+        """Enhanced preprocessing for better relevance"""
+        # Combine title (weighted more heavily) and content
+        title_weight = 2  # Give title more importance
+        return f"{' '.join([article['title']] * title_weight)} {article.get('location_mention', '')} {article.get('officals_involved', '')} {article.get('relevance_category', '')}"
+
+    def _build_indices(self):
+        """Build all indexing structures"""
+        texts = [self.preprocess_text(article) for article in self.articles]
+        
+        # 1. SEMANTIC EMBEDDINGS (Best for semantic similarity)
+        print("Building semantic embeddings...")
+        self.embeddings = self.embedding_model.encode(texts, convert_to_tensor=True)
+        
+        # 2. FAISS INDEX (Best for large-scale retrieval)
+        print("Building FAISS index...")
+        self._build_faiss_index()
+        
+        # 3. TF-IDF (Best for keyword-based similarity)
+        print("Building TF-IDF index...")
+        self._build_tfidf_index(texts)
+        
+        # 4. GRAPH-BASED (Best for discovering article networks)
+        print("Building article graph...")
+        self._build_article_graph()
+    
+    def _build_faiss_index(self):
+        """Build FAISS index for fast similarity search"""
+        d = self.embeddings.shape[1]
+        
+        # For small datasets: IndexFlatL2 (exact search)
+        # For large datasets: IndexIVFFlat (approximate search)
+        if len(self.articles) < 10000:
+            self.faiss_index = faiss.IndexFlatL2(d)
+        else:
+            # Use IVF for larger datasets
+            nlist = min(100, len(self.articles) // 10)  # number of clusters
+            quantizer = faiss.IndexFlatL2(d)
+            self.faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist)
+            # Train the index
+            self.faiss_index.train(self.embeddings.cpu().numpy())
+        
+        self.faiss_index.add(self.embeddings.cpu().numpy())
+    
+    def _build_tfidf_index(self, texts):
+        """Build TF-IDF index for keyword-based similarity"""
+        self.tfidf_vectorizer = TfidfVectorizer(
+            max_features=5000,
+            stop_words='english',
+            ngram_range=(1, 2),  # Include bigrams
+            min_df=1,
+            max_df=0.95
+        )
+        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(texts)
+    
+    def _build_article_graph(self, similarity_threshold=0.3):
+        """Build graph of related articles"""
+        self.graph = nx.Graph()
+        
+        # Add all articles as nodes
+        for i, article in enumerate(self.articles):
+            self.graph.add_node(i, **article)
+        
+        # Add edges based on similarity
+        for i in range(len(self.articles)):
+            similar_articles = self.find_similar_semantic(i, k=5, return_scores=True)
+            for j, score in similar_articles:
+                if i != j and score > similarity_threshold:
+                    self.graph.add_edge(i, j, weight=score)
+    
+    # ===== SIMILARITY SEARCH METHODS =====
+    
+    def find_similar_semantic(self, query_idx, k=5, return_scores=False):
+        """Find similar articles using semantic embeddings (BEST for meaning)"""
+        query_embedding = self.embeddings[query_idx].cpu().numpy().reshape(1, -1)
+        
+        # Search using FAISS
+        distances, indices = self.faiss_index.search(query_embedding, k + 1)
+        
+        # Remove the query article itself
+        results = []
+        for i, (dist, idx) in enumerate(zip(distances[0], indices[0])):
+            if idx != query_idx:
+                similarity_score = 1 / (1 + dist)  # Convert distance to similarity
+                if return_scores:
+                    results.append((idx, similarity_score))
+                else:
+                    results.append(idx)
+                if len(results) >= k:
+                    break
+        
+        return results
+    
+    def find_similar_tfidf(self, query_idx, k=5, return_scores=False):
+        """Find similar articles using TF-IDF (BEST for keywords)"""
+        query_vector = self.tfidf_matrix[query_idx]
+        similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()
+        
+        # Get top-k similar articles (excluding self)
+        similar_indices = similarities.argsort()[::-1]
+        results = []
+        
+        for idx in similar_indices:
+            if idx != query_idx and len(results) < k:
+                if return_scores:
+                    results.append((idx, similarities[idx]))
+                else:
+                    results.append(idx)
+        
+        return results
+    
+    def find_similar_hybrid(self, query_idx, k=5, semantic_weight=0.7, return_scores=False):
+        """Hybrid approach combining semantic and TF-IDF (BEST overall)"""
+        # Get semantic similarities
+        semantic_results = self.find_similar_semantic(query_idx, k=k*2, return_scores=True)
+        tfidf_results = self.find_similar_tfidf(query_idx, k=k*2, return_scores=True)
+        
+        # Combine scores
+        combined_scores = defaultdict(float)
+        
+        for idx, score in semantic_results:
+            combined_scores[idx] += semantic_weight * score
+            
+        for idx, score in tfidf_results:
+            combined_scores[idx] += (1 - semantic_weight) * score
+        
+        # Sort by combined score
+        sorted_results = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
+        
+        if return_scores:
+            return sorted_results[:k]
+        else:
+            return [idx for idx, _ in sorted_results[:k]]
+    
+    def find_similar_graph(self, query_idx, k=5):
+        """Find similar articles using graph-based methods"""
+        if not self.graph.has_node(query_idx):
+            return []
+        
+        # Get neighbors sorted by edge weight
+        neighbors = list(self.graph.neighbors(query_idx))
+        neighbor_weights = [(n, self.graph[query_idx][n]['weight']) for n in neighbors]
+        neighbor_weights.sort(key=lambda x: x[1], reverse=True)
+        
+        return [idx for idx, _ in neighbor_weights[:k]]
+    
+    # ===== ADVANCED ANALYSIS METHODS =====
+    
+    def detect_article_clusters(self, method='semantic', n_clusters=None):
+        """Detect clusters of related articles"""
+        from sklearn.cluster import KMeans, DBSCAN
+        
+        if method == 'semantic':
+            features = self.embeddings.cpu().numpy()
+        elif method == 'tfidf':
+            features = self.tfidf_matrix.toarray()
+        
+        if n_clusters:
+            clusterer = KMeans(n_clusters=n_clusters, random_state=42)
+        else:
+            clusterer = DBSCAN(eps=0.5, min_samples=2)
+        
+        cluster_labels = clusterer.fit_predict(features)
+        
+        # Group articles by cluster
+        clusters = defaultdict(list)
+        for i, label in enumerate(cluster_labels):
+            clusters[label].append(i)
+        
+        return dict(clusters)
+    
+    def find_trending_topics(self, time_window_hours=24):
+        """Find trending topics (if articles have timestamps)"""
+        # This would require timestamp information in articles
+        # Implementation would filter recent articles and cluster them
+        pass
+    
+    def get_article_importance_scores(self):
+        """Calculate importance scores using PageRank on article graph"""
+        if self.graph is None:
+            return {}
+        
+        pagerank_scores = nx.pagerank(self.graph, weight='weight')
+        return pagerank_scores
+    
+    def save_index(self, filepath):
+        """Save the entire index to disk"""
+        index_data = {
+            'articles': self.articles,
+            'embeddings': self.embeddings.cpu().numpy() if self.embeddings is not None else None,
+            'tfidf_vectorizer': self.tfidf_vectorizer,
+            'tfidf_matrix': self.tfidf_matrix,
+            'graph': self.graph
+        }
+        
+        with open(filepath, 'wb') as f:
+            pickle.dump(index_data, f)
+        
+        # Save FAISS index separately
+        if self.faiss_index is not None:
+            faiss.write_index(self.faiss_index, f"{filepath}.faiss")
+    
+    def load_index(self, filepath):
+        """Load index from disk"""
+        with open(filepath, 'rb') as f:
+            index_data = pickle.load(f)
+        
+        self.articles = index_data['articles']
+        self.embeddings = index_data['embeddings']
+        self.tfidf_vectorizer = index_data['tfidf_vectorizer']
+        self.tfidf_matrix = index_data['tfidf_matrix']
+        self.graph = index_data['graph']
+        
+        # Load FAISS index
+        try:
+            self.faiss_index = faiss.read_index(f"{filepath}.faiss")
+        except:
+            print("Could not load FAISS index, rebuilding...")
+            self._build_faiss_index()
+
+# ===== USAGE EXAMPLE =====
+
+def main():
+    # Sample articles
+    conn = get_postgresql_connection()
+    cursor = conn.cursor()
+    query = "SELECT * FROM articles order by article_id asc"
+    cursor.execute(query)
+    articles_db = cursor.fetchall()
+    articles = [{} for _ in range(len(articles_db))]
+    for i, article in enumerate(articles_db):
+        articles[i] = {
+            "article_id": article[0],
+            "title": article[1],
+            # "content": article[2],
+            "location_mention": article[5],
+            "officals_involved": article[6],
+            "relevance_category": article[7],
+        }
+    
+    # Initialize indexer
+    indexer = NewsArticleIndexer()
+    indexer.add_articles(articles)
+    
+    # Test different similarity methods
+     # "President Signs New Trade Deal"
+    for i, article in enumerate(articles):
+        query_idx = i
+        print(f"\n🔍 i: '{i}'")
+        print(f"\n🔍 Finding articles similar to: '{articles[query_idx]['title']}'")
+        linked_id = []
+        print("\n1.  Semantic Similarity (Best for meaning):")
+        semantic_results = indexer.find_similar_semantic(query_idx, k=3)
+        for idx in semantic_results:
+            linked_id.append(articles[idx]['article_id'])
+            print(f"   - {articles[idx]['title']}")
+        
+        # print("\n2.  TF-IDF Similarity (Best for keywords):")
+        # tfidf_results = indexer.find_similar_tfidf(query_idx, k=3)
+        # for idx in tfidf_results:
+        #     print(f"   - {articles[idx]['title']}")
+        
+        print("\n3. Hybrid Similarity (Best overall):")
+        hybrid_results = indexer.find_similar_hybrid(query_idx, k=3)
+        for idx in hybrid_results:
+            print(f"   - {articles[idx]['title']}")
+        cursor.execute("UPDATE articles SET linked_id = %s WHERE article_id = %s", (linked_id, articles[query_idx]['article_id']))
+        conn.commit()
+        # print("\n4. Graph-based Similarity:")
+        # graph_results = indexer.find_similar_graph(query_idx, k=3)
+        # for idx in graph_results:
+        #     print(f"   - {articles[idx]['title']}")
+        
+        # # Detect clusters
+        # print("\n Article Clusters:")
+        # clusters = indexer.detect_article_clusters(method='semantic')
+        # for cluster_id, article_indices in clusters.items():
+        #     if cluster_id != -1:  # Ignore noise cluster
+        #         print(f"Cluster {cluster_id}:")
+        #         for idx in article_indices:
+        #             print(f"   - {articles[idx]['title']}")
+        
+        # Calculate importance scores
+        # print("\n Article Importance Scores:")
+        # importance_scores = indexer.get_article_importance_scores()
+        # sorted_importance = sorted(importance_scores.items(), key=lambda x: x[1], reverse=True)
+        # for idx, score in sorted_importance[:3]:
+        #     print(f"   {score:.3f} - {articles[idx]['title']}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/backend/get_feed/get_feed.py b/backend/get_feed/get_feed.py
new file mode 100644
index 0000000..7616578
--- /dev/null
+++ b/backend/get_feed/get_feed.py
@@ -0,0 +1,20 @@
+
+import json
+from Utils import get_postgresql_connection
+
+
+def lambda_handler(event, context):
+    conn = get_postgresql_connection()
+    cursor = conn.cursor()
+    query = "SELECT * FROM articles order by article_id asc"
+    cursor.execute(query)
+    columns = [desc[0] for desc in cursor.description]
+    rows = cursor.fetchall()
+    result = [dict(zip(columns, row)) for row in rows]
+    return {
+            "statusCode": 200,
+            "headers": {
+                "Content-Type": "application/json"
+            },
+            "body": json.dumps(result)
+    }
\ No newline at end of file
diff --git a/backend/input.csv b/backend/input.csv
deleted file mode 100644
index ae4e274..0000000
--- a/backend/input.csv
+++ /dev/null
@@ -1,41 +0,0 @@
-article_id,title,body,source,publishedDate
-1,"Visakhapatnam news update: Policy, heritage, environment","Recent events in Visakhapatnam have focused on policy, heritage, and environment. Authorities are responding accordingly to ensure public welfare.",New Indian Express,01/06/2025
-2,"Hyderabad news update: Weather, cybercrime, policy","Recent events in Hyderabad have focused on weather, cybercrime, and policy. Authorities are responding accordingly to ensure public welfare.",Eenadu,29/06/2025
-3,"Vizianagaram news update: Policy, sports, heritage","Recent events in Vizianagaram have focused on policy, sports, and heritage. Authorities are responding accordingly to ensure public welfare.",The Hindu,03/06/2025
-4,"Ongole news update: Education, policy, employment","Recent events in Ongole have focused on education, policy, and employment. Authorities are responding accordingly to ensure public welfare.",Andhra Jyothy,25/06/2025
-5,"Tirupati news update: Transport, elections, policy","Recent events in Tirupati have focused on transport, elections, and policy. Authorities are responding accordingly to ensure public welfare.",The Hindu,10/06/2025
-6,"Vizianagaram news update: Policy, sports, startup","Recent events in Vizianagaram have focused on policy, sports, and startup. Authorities are responding accordingly to ensure public welfare.",Deccan Chronicle,15/06/2025
-7,"Vijayawada news update: Startup, sports, infrastructure","Recent events in Vijayawada have focused on startup, sports, and infrastructure. Authorities are responding accordingly to ensure public welfare.",New Indian Express,25/06/2025
-8,"Nellore news update: Cybercrime, heritage, flood","Recent events in Nellore have focused on cybercrime, heritage, and flood. Authorities are responding accordingly to ensure public welfare.",Deccan Chronicle,16/06/2025
-9,"Vijayawada news update: Heritage, environment, flood","Recent events in Vijayawada have focused on heritage, environment, and flood. Authorities are responding accordingly to ensure public welfare.",New Indian Express,07/06/2025
-10,"Kurnool news update: Transport, crime, culture","Recent events in Kurnool have focused on transport, crime, and culture. Authorities are responding accordingly to ensure public welfare.",The Hindu,29/06/2025
-11,"Guntur news update: Elections, policy, health","Recent events in Guntur have focused on elections, policy, and health. Authorities are responding accordingly to ensure public welfare.",Sakshi,13/06/2025
-12,"Guntur news update: Infrastructure, environment, transport","Recent events in Guntur have focused on infrastructure, environment, and transport. Authorities are responding accordingly to ensure public welfare.",Deccan Chronicle,06/06/2025
-13,"Guntur news update: Cybercrime, energy, transport","Recent events in Guntur have focused on cybercrime, energy, and transport. Authorities are responding accordingly to ensure public welfare.",Sakshi,14/06/2025
-14,"Srikakulam news update: Environment, elections, government","Recent events in Srikakulam have focused on environment, elections, and government. Authorities are responding accordingly to ensure public welfare.",Times of India,06/06/2025
-15,"Ongole news update: Flood, government, weather","Recent events in Ongole have focused on flood, government, and weather. Authorities are responding accordingly to ensure public welfare.",Times of India,06/06/2025
-16,"Srikakulam news update: Government, transport, education","Recent events in Srikakulam have focused on government, transport, and education. Authorities are responding accordingly to ensure public welfare.",Sakshi,07/06/2025
-17,"Kurnool news update: Weather, government, infrastructure","Recent events in Kurnool have focused on weather, government, and infrastructure. Authorities are responding accordingly to ensure public welfare.",The Hindu,11/06/2025
-18,"Anantapur news update: Policy, health, weather","Recent events in Anantapur have focused on policy, health, and weather. Authorities are responding accordingly to ensure public welfare.",New Indian Express,15/06/2025
-19,"Vizianagaram news update: Heritage, transport, education","Recent events in Vizianagaram have focused on heritage, transport, and education. Authorities are responding accordingly to ensure public welfare.",Deccan Chronicle,08/06/2025
-20,"Tirupati news update: Heritage, weather, education","Recent events in Tirupati have focused on heritage, weather, and education. Authorities are responding accordingly to ensure public welfare.",Deccan Chronicle,18/06/2025
-21,"Kurnool news update: Startup, sports, policy","Recent events in Kurnool have focused on startup, sports, and policy. Authorities are responding accordingly to ensure public welfare.",The Hindu,08/06/2025
-22,"Vizianagaram news update: Technology, health, education","Recent events in Vizianagaram have focused on technology, health, and education. Authorities are responding accordingly to ensure public welfare.",The Hindu,03/06/2025
-23,"Kadapa news update: Health, weather, employment","Recent events in Kadapa have focused on health, weather, and employment. Authorities are responding accordingly to ensure public welfare.",New Indian Express,19/06/2025
-24,"Tirupati news update: Culture, startup, transport","Recent events in Tirupati have focused on culture, startup, and transport. Authorities are responding accordingly to ensure public welfare.",Eenadu,04/06/2025
-25,"Anantapur news update: Employment, culture, education","Recent events in Anantapur have focused on employment, culture, and education. Authorities are responding accordingly to ensure public welfare.",Eenadu,22/06/2025
-26,"Kurnool news update: Infrastructure, cybercrime, culture","Recent events in Kurnool have focused on infrastructure, cybercrime, and culture. Authorities are responding accordingly to ensure public welfare.",The Hindu,14/06/2025
-27,"Kurnool news update: Policy, culture, education","Recent events in Kurnool have focused on policy, culture, and education. Authorities are responding accordingly to ensure public welfare.",Eenadu,08/06/2025
-28,"Tirupati news update: Technology, employment, weather","Recent events in Tirupati have focused on technology, employment, and weather. Authorities are responding accordingly to ensure public welfare.",Sakshi,17/06/2025
-29,"Tirupati news update: Weather, infrastructure, elections","Recent events in Tirupati have focused on weather, infrastructure, and elections. Authorities are responding accordingly to ensure public welfare.",Times of India,21/06/2025
-30,"Tirupati news update: Sports, education, transport","Recent events in Tirupati have focused on sports, education, and transport. Authorities are responding accordingly to ensure public welfare.",Times of India,02/06/2025
-31,"Ongole news update: Weather, health, employment","Recent events in Ongole have focused on weather, health, and employment. Authorities are responding accordingly to ensure public welfare.",Deccan Chronicle,21/06/2025
-32,"Kadapa news update: Policy, culture, elections","Recent events in Kadapa have focused on policy, culture, and elections. Authorities are responding accordingly to ensure public welfare.",Sakshi,12/06/2025
-33,"Vizianagaram news update: Weather, crime, cybercrime","Recent events in Vizianagaram have focused on weather, crime, and cybercrime. Authorities are responding accordingly to ensure public welfare.",Sakshi,01/06/2025
-34,"Tirupati news update: Startup, employment, transport","Recent events in Tirupati have focused on startup, employment, and transport. Authorities are responding accordingly to ensure public welfare.",The Hindu,21/06/2025
-35,"Nellore news update: Environment, transport, technology","Recent events in Nellore have focused on environment, transport, and technology. Authorities are responding accordingly to ensure public welfare.",Sakshi,07/06/2025
-36,"Visakhapatnam news update: Technology, flood, crime","Recent events in Visakhapatnam have focused on technology, flood, and crime. Authorities are responding accordingly to ensure public welfare.",Andhra Jyothy,14/06/2025
-37,"Chittoor news update: Transport, weather, education","Recent events in Chittoor have focused on transport, weather, and education. Authorities are responding accordingly to ensure public welfare.",Eenadu,08/06/2025
-38,"Vijayawada news update: Health, policy, culture","Recent events in Vijayawada have focused on health, policy, and culture. Authorities are responding accordingly to ensure public welfare.",Eenadu,12/06/2025
-39,"Kadapa news update: Infrastructure, environment, weather","Recent events in Kadapa have focused on infrastructure, environment, and weather. Authorities are responding accordingly to ensure public welfare.",New Indian Express,09/06/2025
-40,"Kadapa news update: Health, government, weather","Recent events in Kadapa have focused on health, government, and weather. Authorities are responding accordingly to ensure public welfare.",The Hindu,02/06/2025
\ No newline at end of file
diff --git a/backend/input_feed.json b/backend/input_feed.json
deleted file mode 100644
index ac56db2..0000000
--- a/backend/input_feed.json
+++ /dev/null
@@ -1,602 +0,0 @@
-[
-  {
-    "title": "Visakhapatnam news update: Policy, heritage, environment",
-    "body": "Recent events in Visakhapatnam have focused on policy, heritage, and environment. Authorities are responding accordingly to ensure public welfare.",
-    "source": "New Indian Express",
-    "publishedDate": "01/06/2025",
-    "extractedLocations": [
-      "Visakhapatnam"
-    ],
-    "districtMapping": "Visakhapatnam",
-    "tags": [
-      "policy",
-      "heritage",
-      "environment"
-    ]
-  },
-  {
-    "title": "Hyderabad news update: Weather, cybercrime, policy",
-    "body": "Recent events in Hyderabad have focused on weather, cybercrime, and policy. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Eenadu",
-    "publishedDate": "29/06/2025",
-    "extractedLocations": [
-      "Hyderabad"
-    ],
-    "districtMapping": "Hyderabad",
-    "tags": [
-      "weather",
-      "cybercrime",
-      "policy"
-    ]
-  },
-  {
-    "title": "Vizianagaram news update: Policy, sports, heritage",
-    "body": "Recent events in Vizianagaram have focused on policy, sports, and heritage. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "03/06/2025",
-    "extractedLocations": [
-      "Vizianagaram"
-    ],
-    "districtMapping": "Vizianagaram",
-    "tags": [
-      "policy",
-      "sports",
-      "heritage"
-    ]
-  },
-  {
-    "title": "Ongole news update: Education, policy, employment",
-    "body": "Recent events in Ongole have focused on education, policy, and employment. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Andhra Jyothy",
-    "publishedDate": "25/06/2025",
-    "extractedLocations": [
-      "Ongole"
-    ],
-    "districtMapping": "Prakasam",
-    "tags": [
-      "education",
-      "policy",
-      "employment"
-    ]
-  },
-  {
-    "title": "Tirupati news update: Transport, elections, policy",
-    "body": "Recent events in Tirupati have focused on transport, elections, and policy. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "10/06/2025",
-    "extractedLocations": [
-      "Tirupati"
-    ],
-    "districtMapping": "Tirupati",
-    "tags": [
-      "transport",
-      "elections",
-      "policy"
-    ]
-  },
-  {
-    "title": "Vizianagaram news update: Policy, sports, startup",
-    "body": "Recent events in Vizianagaram have focused on policy, sports, and startup. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Deccan Chronicle",
-    "publishedDate": "15/06/2025",
-    "extractedLocations": [
-      "Vizianagaram"
-    ],
-    "districtMapping": "Vizianagaram",
-    "tags": [
-      "policy",
-      "sports",
-      "startup"
-    ]
-  },
-  {
-    "title": "Vijayawada news update: Startup, sports, infrastructure",
-    "body": "Recent events in Vijayawada have focused on startup, sports, and infrastructure. Authorities are responding accordingly to ensure public welfare.",
-    "source": "New Indian Express",
-    "publishedDate": "25/06/2025",
-    "extractedLocations": [
-      "Vijayawada"
-    ],
-    "districtMapping": "Krishna",
-    "tags": [
-      "startup",
-      "sports",
-      "infrastructure"
-    ]
-  },
-  {
-    "title": "Nellore news update: Cybercrime, heritage, flood",
-    "body": "Recent events in Nellore have focused on cybercrime, heritage, and flood. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Deccan Chronicle",
-    "publishedDate": "16/06/2025",
-    "extractedLocations": [
-      "Nellore"
-    ],
-    "districtMapping": "Nellore",
-    "tags": [
-      "cybercrime",
-      "heritage",
-      "flood"
-    ]
-  },
-  {
-    "title": "Vijayawada news update: Heritage, environment, flood",
-    "body": "Recent events in Vijayawada have focused on heritage, environment, and flood. Authorities are responding accordingly to ensure public welfare.",
-    "source": "New Indian Express",
-    "publishedDate": "07/06/2025",
-    "extractedLocations": [
-      "Vijayawada"
-    ],
-    "districtMapping": "Krishna",
-    "tags": [
-      "heritage",
-      "environment",
-      "flood"
-    ]
-  },
-  {
-    "title": "Kurnool news update: Transport, crime, culture",
-    "body": "Recent events in Kurnool have focused on transport, crime, and culture. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "29/06/2025",
-    "extractedLocations": [
-      "Kurnool"
-    ],
-    "districtMapping": "Kurnool",
-    "tags": [
-      "transport",
-      "crime",
-      "culture"
-    ]
-  },
-  {
-    "title": "Guntur news update: Elections, policy, health",
-    "body": "Recent events in Guntur have focused on elections, policy, and health. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Sakshi",
-    "publishedDate": "13/06/2025",
-    "extractedLocations": [
-      "Guntur"
-    ],
-    "districtMapping": "Guntur",
-    "tags": [
-      "elections",
-      "policy",
-      "health"
-    ]
-  },
-  {
-    "title": "Guntur news update: Infrastructure, environment, transport",
-    "body": "Recent events in Guntur have focused on infrastructure, environment, and transport. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Deccan Chronicle",
-    "publishedDate": "06/06/2025",
-    "extractedLocations": [
-      "Guntur"
-    ],
-    "districtMapping": "Guntur",
-    "tags": [
-      "infrastructure",
-      "environment",
-      "transport"
-    ]
-  },
-  {
-    "title": "Guntur news update: Cybercrime, energy, transport",
-    "body": "Recent events in Guntur have focused on cybercrime, energy, and transport. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Sakshi",
-    "publishedDate": "14/06/2025",
-    "extractedLocations": [
-      "Guntur"
-    ],
-    "districtMapping": "Guntur",
-    "tags": [
-      "cybercrime",
-      "energy",
-      "transport"
-    ]
-  },
-  {
-    "title": "Srikakulam news update: Environment, elections, government",
-    "body": "Recent events in Srikakulam have focused on environment, elections, and government. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Times of India",
-    "publishedDate": "06/06/2025",
-    "extractedLocations": [
-      "Srikakulam"
-    ],
-    "districtMapping": "Srikakulam",
-    "tags": [
-      "environment",
-      "elections",
-      "government"
-    ]
-  },
-  {
-    "title": "Ongole news update: Flood, government, weather",
-    "body": "Recent events in Ongole have focused on flood, government, and weather. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Times of India",
-    "publishedDate": "06/06/2025",
-    "extractedLocations": [
-      "Ongole"
-    ],
-    "districtMapping": "Prakasam",
-    "tags": [
-      "flood",
-      "government",
-      "weather"
-    ]
-  },
-  {
-    "title": "Srikakulam news update: Government, transport, education",
-    "body": "Recent events in Srikakulam have focused on government, transport, and education. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Sakshi",
-    "publishedDate": "07/06/2025",
-    "extractedLocations": [
-      "Srikakulam"
-    ],
-    "districtMapping": "Srikakulam",
-    "tags": [
-      "government",
-      "transport",
-      "education"
-    ]
-  },
-  {
-    "title": "Kurnool news update: Weather, government, infrastructure",
-    "body": "Recent events in Kurnool have focused on weather, government, and infrastructure. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "11/06/2025",
-    "extractedLocations": [
-      "Kurnool"
-    ],
-    "districtMapping": "Kurnool",
-    "tags": [
-      "weather",
-      "government",
-      "infrastructure"
-    ]
-  },
-  {
-    "title": "Anantapur news update: Policy, health, weather",
-    "body": "Recent events in Anantapur have focused on policy, health, and weather. Authorities are responding accordingly to ensure public welfare.",
-    "source": "New Indian Express",
-    "publishedDate": "15/06/2025",
-    "extractedLocations": [
-      "Anantapur"
-    ],
-    "districtMapping": "Anantapur",
-    "tags": [
-      "policy",
-      "health",
-      "weather"
-    ]
-  },
-  {
-    "title": "Vizianagaram news update: Heritage, transport, education",
-    "body": "Recent events in Vizianagaram have focused on heritage, transport, and education. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Deccan Chronicle",
-    "publishedDate": "08/06/2025",
-    "extractedLocations": [
-      "Vizianagaram"
-    ],
-    "districtMapping": "Vizianagaram",
-    "tags": [
-      "heritage",
-      "transport",
-      "education"
-    ]
-  },
-  {
-    "title": "Tirupati news update: Heritage, weather, education",
-    "body": "Recent events in Tirupati have focused on heritage, weather, and education. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Deccan Chronicle",
-    "publishedDate": "18/06/2025",
-    "extractedLocations": [
-      "Tirupati"
-    ],
-    "districtMapping": "Tirupati",
-    "tags": [
-      "heritage",
-      "weather",
-      "education"
-    ]
-  },
-  {
-    "title": "Kurnool news update: Startup, sports, policy",
-    "body": "Recent events in Kurnool have focused on startup, sports, and policy. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "08/06/2025",
-    "extractedLocations": [
-      "Kurnool"
-    ],
-    "districtMapping": "Kurnool",
-    "tags": [
-      "startup",
-      "sports",
-      "policy"
-    ]
-  },
-  {
-    "title": "Vizianagaram news update: Technology, health, education",
-    "body": "Recent events in Vizianagaram have focused on technology, health, and education. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "03/06/2025",
-    "extractedLocations": [
-      "Vizianagaram"
-    ],
-    "districtMapping": "Vizianagaram",
-    "tags": [
-      "technology",
-      "health",
-      "education"
-    ]
-  },
-  {
-    "title": "Kadapa news update: Health, weather, employment",
-    "body": "Recent events in Kadapa have focused on health, weather, and employment. Authorities are responding accordingly to ensure public welfare.",
-    "source": "New Indian Express",
-    "publishedDate": "19/06/2025",
-    "extractedLocations": [
-      "Kadapa"
-    ],
-    "districtMapping": "Kadapa",
-    "tags": [
-      "health",
-      "weather",
-      "employment"
-    ]
-  },
-  {
-    "title": "Tirupati news update: Culture, startup, transport",
-    "body": "Recent events in Tirupati have focused on culture, startup, and transport. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Eenadu",
-    "publishedDate": "04/06/2025",
-    "extractedLocations": [
-      "Tirupati"
-    ],
-    "districtMapping": "Tirupati",
-    "tags": [
-      "culture",
-      "startup",
-      "transport"
-    ]
-  },
-  {
-    "title": "Anantapur news update: Employment, culture, education",
-    "body": "Recent events in Anantapur have focused on employment, culture, and education. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Eenadu",
-    "publishedDate": "22/06/2025",
-    "extractedLocations": [
-      "Anantapur"
-    ],
-    "districtMapping": "Anantapur",
-    "tags": [
-      "employment",
-      "culture",
-      "education"
-    ]
-  },
-  {
-    "title": "Kurnool news update: Infrastructure, cybercrime, culture",
-    "body": "Recent events in Kurnool have focused on infrastructure, cybercrime, and culture. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "14/06/2025",
-    "extractedLocations": [
-      "Kurnool"
-    ],
-    "districtMapping": "Kurnool",
-    "tags": [
-      "infrastructure",
-      "cybercrime",
-      "culture"
-    ]
-  },
-  {
-    "title": "Kurnool news update: Policy, culture, education",
-    "body": "Recent events in Kurnool have focused on policy, culture, and education. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Eenadu",
-    "publishedDate": "08/06/2025",
-    "extractedLocations": [
-      "Kurnool"
-    ],
-    "districtMapping": "Kurnool",
-    "tags": [
-      "policy",
-      "culture",
-      "education"
-    ]
-  },
-  {
-    "title": "Tirupati news update: Technology, employment, weather",
-    "body": "Recent events in Tirupati have focused on technology, employment, and weather. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Sakshi",
-    "publishedDate": "17/06/2025",
-    "extractedLocations": [
-      "Tirupati"
-    ],
-    "districtMapping": "Tirupati",
-    "tags": [
-      "technology",
-      "employment",
-      "weather"
-    ]
-  },
-  {
-    "title": "Tirupati news update: Weather, infrastructure, elections",
-    "body": "Recent events in Tirupati have focused on weather, infrastructure, and elections. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Times of India",
-    "publishedDate": "21/06/2025",
-    "extractedLocations": [
-      "Tirupati"
-    ],
-    "districtMapping": "Tirupati",
-    "tags": [
-      "weather",
-      "infrastructure",
-      "elections"
-    ]
-  },
-  {
-    "title": "Tirupati news update: Sports, education, transport",
-    "body": "Recent events in Tirupati have focused on sports, education, and transport. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Times of India",
-    "publishedDate": "02/06/2025",
-    "extractedLocations": [
-      "Tirupati"
-    ],
-    "districtMapping": "Tirupati",
-    "tags": [
-      "sports",
-      "education",
-      "transport"
-    ]
-  },
-  {
-    "title": "Ongole news update: Weather, health, employment",
-    "body": "Recent events in Ongole have focused on weather, health, and employment. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Deccan Chronicle",
-    "publishedDate": "21/06/2025",
-    "extractedLocations": [
-      "Ongole"
-    ],
-    "districtMapping": "Prakasam",
-    "tags": [
-      "weather",
-      "health",
-      "employment"
-    ]
-  },
-  {
-    "title": "Kadapa news update: Policy, culture, elections",
-    "body": "Recent events in Kadapa have focused on policy, culture, and elections. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Sakshi",
-    "publishedDate": "12/06/2025",
-    "extractedLocations": [
-      "Kadapa"
-    ],
-    "districtMapping": "Kadapa",
-    "tags": [
-      "policy",
-      "culture",
-      "elections"
-    ]
-  },
-  {
-    "title": "Vizianagaram news update: Weather, crime, cybercrime",
-    "body": "Recent events in Vizianagaram have focused on weather, crime, and cybercrime. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Sakshi",
-    "publishedDate": "01/06/2025",
-    "extractedLocations": [
-      "Vizianagaram"
-    ],
-    "districtMapping": "Vizianagaram",
-    "tags": [
-      "weather",
-      "crime",
-      "cybercrime"
-    ]
-  },
-  {
-    "title": "Tirupati news update: Startup, employment, transport",
-    "body": "Recent events in Tirupati have focused on startup, employment, and transport. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "21/06/2025",
-    "extractedLocations": [
-      "Tirupati"
-    ],
-    "districtMapping": "Tirupati",
-    "tags": [
-      "startup",
-      "employment",
-      "transport"
-    ]
-  },
-  {
-    "title": "Nellore news update: Environment, transport, technology",
-    "body": "Recent events in Nellore have focused on environment, transport, and technology. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Sakshi",
-    "publishedDate": "07/06/2025",
-    "extractedLocations": [
-      "Nellore"
-    ],
-    "districtMapping": "Nellore",
-    "tags": [
-      "environment",
-      "transport",
-      "technology"
-    ]
-  },
-  {
-    "title": "Visakhapatnam news update: Technology, flood, crime",
-    "body": "Recent events in Visakhapatnam have focused on technology, flood, and crime. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Andhra Jyothy",
-    "publishedDate": "14/06/2025",
-    "extractedLocations": [
-      "Visakhapatnam"
-    ],
-    "districtMapping": "Visakhapatnam",
-    "tags": [
-      "technology",
-      "flood",
-      "crime"
-    ]
-  },
-  {
-    "title": "Chittoor news update: Transport, weather, education",
-    "body": "Recent events in Chittoor have focused on transport, weather, and education. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Eenadu",
-    "publishedDate": "08/06/2025",
-    "extractedLocations": [
-      "Chittoor"
-    ],
-    "districtMapping": "Chittoor",
-    "tags": [
-      "transport",
-      "weather",
-      "education"
-    ]
-  },
-  {
-    "title": "Vijayawada news update: Health, policy, culture",
-    "body": "Recent events in Vijayawada have focused on health, policy, and culture. Authorities are responding accordingly to ensure public welfare.",
-    "source": "Eenadu",
-    "publishedDate": "12/06/2025",
-    "extractedLocations": [
-      "Vijayawada"
-    ],
-    "districtMapping": "Krishna",
-    "tags": [
-      "health",
-      "policy",
-      "culture"
-    ]
-  },
-  {
-    "title": "Kadapa news update: Infrastructure, environment, weather",
-    "body": "Recent events in Kadapa have focused on infrastructure, environment, and weather. Authorities are responding accordingly to ensure public welfare.",
-    "source": "New Indian Express",
-    "publishedDate": "09/06/2025",
-    "extractedLocations": [
-      "Kadapa"
-    ],
-    "districtMapping": "Kadapa",
-    "tags": [
-      "infrastructure",
-      "environment",
-      "weather"
-    ]
-  },
-  {
-    "title": "Kadapa news update: Health, government, weather",
-    "body": "Recent events in Kadapa have focused on health, government, and weather. Authorities are responding accordingly to ensure public welfare.",
-    "source": "The Hindu",
-    "publishedDate": "02/06/2025",
-    "extractedLocations": [
-      "Kadapa"
-    ],
-    "districtMapping": "Kadapa",
-    "tags": [
-      "health",
-      "government",
-      "weather"
-    ]
-  }
-]
\ No newline at end of file
diff --git a/backend/input_handler/input_handler.py b/backend/input_handler/input_handler.py
new file mode 100644
index 0000000..50addfe
--- /dev/null
+++ b/backend/input_handler/input_handler.py
@@ -0,0 +1,90 @@
+import csv
+import io
+import pandas as pd
+import boto3
+import time
+import uuid
+from ..Utils import get_postgresql_connection
+def lambda_handler(event, context):
+    comprehend = boto3.client('comprehend')
+    s3 = boto3.client('s3')
+    role_arn = 'arn:aws:iam::269854564686:role/hackathon-comprehend-role'
+    bucket_name = 'awstraindata'
+    conn = get_postgresql_connection()
+    cursor = conn.cursor()
+    for record in event['Records']:
+        print(f"New record: {record}")
+        bucket = record['s3']['bucket']['name']
+        key = record['s3']['object']['key']
+        # Download the file object
+        input_csv_object = s3.get_object(Bucket=bucket_name, Key=key)
+        cursor.execute("""
+                        CREATE TABLE IF NOT EXISTS comprehend_jobs (
+                            article_id TEXT,
+                            input_s3_uri TEXT,
+                            entities_path TEXT,
+                            sentiment_path TEXT,
+                            key_phrases_path TEXT
+                        )
+                        """)
+        input_csv = pd.read_csv(io.BytesIO(input_csv_object['Body'].read()))
+        for index, row in input_csv.iterrows():
+            print(f"Processing row {index}: {row}")
+            articles_id = str(uuid.uuid4())  # Generate a unique ID for each article
+            cursor.execute("""
+                    INSERT INTO articles (articles_id, title, body, source, published_date)
+                    VALUES (%s, %s, %s, %s, %s)""", (articles_id, row[1], row[2], row[3], row[4]))
+            # Convert to CSV in-memory
+            csv_buffer = io.StringIO()
+            writer = csv.writer(csv_buffer)
+            # writer.writerow(row.headers)  # Write header
+            writer.writerow(row)
+            s3_path = 'input/' + articles_id + '.csv'
+            s3_uri = 's3://' + bucket_name + '/' + s3_path
+            s3.put_object(
+                Bucket=bucket_name,
+                Key=s3_path,  # adjust as needed
+                Body=csv_buffer.getvalue(),
+                ContentType='text/csv'
+            )
+            entities_job = comprehend.start_entities_detection_job(
+                InputDataConfig={'S3Uri': s3_uri, 'InputFormat': 'ONE_DOC_PER_LINE'},
+                OutputDataConfig={'S3Uri': 's3://awstraindata/output/entities/'},
+                DataAccessRoleArn=role_arn,
+                LanguageCode='en',
+                JobName='MyEntityDetectionJob_'+ articles_id + '_' + str(int(time.time()))
+            )
+            result = comprehend.describe_entities_detection_job(JobId=entities_job['JobId'])
+            entities_output = result['EntitiesDetectionJobProperties']['OutputDataConfig']['S3Uri']
+
+            # SENTIMENT detection job
+            sentiment_job = comprehend.start_sentiment_detection_job(
+                InputDataConfig={'S3Uri': s3_uri, 'InputFormat': 'ONE_DOC_PER_LINE'},
+                OutputDataConfig={'S3Uri': 's3://awstraindata/output/sentiment/'},
+                DataAccessRoleArn=role_arn,
+                LanguageCode='en',
+                JobName='MySentimentDetectionJob_' + articles_id + '_' + str(int(time.time()))
+            )
+            res = comprehend.describe_sentiment_detection_job(JobId=sentiment_job['JobId'])
+            sentiment_output = res['SentimentDetectionJobProperties']['OutputDataConfig']['S3Uri']
+
+            # KEY PHRASES detection job
+            phrases_job = comprehend.start_key_phrases_detection_job(
+                InputDataConfig={'S3Uri': s3_uri, 'InputFormat': 'ONE_DOC_PER_LINE'},
+                OutputDataConfig={'S3Uri': 's3://awstraindata/output/keyphrases/'},
+                DataAccessRoleArn=role_arn,
+                LanguageCode='en',
+                JobName='MyKeyPhrasesDetectionJob_' + articles_id + '_' + str(int(time.time()))
+            )
+            res = comprehend.describe_key_phrases_detection_job(JobId=phrases_job['JobId'])
+            key_phrases_output = res['KeyPhrasesDetectionJobProperties']['OutputDataConfig']['S3Uri']
+            print("Entities Job Response:", entities_output)
+            print("Sentiment Job Response:", sentiment_output)
+            print("Key Phrases Job Response:", key_phrases_output)
+            print("Inserting into comprehend_jobs table")
+            cursor.execute("""
+                INSERT INTO comprehend_jobs (article_id, input_s3_uri, entities_path, sentiment_path, key_phrases_path)
+                VALUES (%s, %s, %s, %s, %s)""", (articles_id, s3_uri, entities_output.replace('s3://awstraindata/', ''), sentiment_output.replace('s3://awstraindata/', ''), key_phrases_output.replace('s3://awstraindata/', '')))
+            conn.commit()
+        cursor.close()
+        conn.close()
\ No newline at end of file
diff --git a/backend/Submit.py b/backend/input_handler/input_handler_test.py
similarity index 70%
rename from backend/Submit.py
rename to backend/input_handler/input_handler_test.py
index 5cdf6db..5c837a8 100644
--- a/backend/Submit.py
+++ b/backend/input_handler/input_handler_test.py
@@ -8,19 +8,18 @@
 
 comprehend = boto3.client('comprehend')
 
-input_s3_uri = 's3://awstraindata/input.csv'
 role_arn = 'arn:aws:iam::269854564686:role/hackathon-comprehend-role'
 bucket_name = 'awstraindata'
 s3 = boto3.client('s3')
 # Download the file object
-input_csv_object = s3.get_object(Bucket=bucket_name, Key='input.csv')
+input_csv_object = s3.get_object(Bucket=bucket_name, Key='input_small.csv')
 
 # Read CSV into DataFrame
 conn = get_postgresql_connection()
 cursor = conn.cursor()
 cursor.execute("drop table if exists articles")
 cursor.execute("""CREATE TABLE IF NOT EXISTS articles (
-                        articles_id TEXT,
+                        article_id TEXT,
                         title TEXT,
                         body TEXT,
                         source TEXT,
@@ -30,19 +29,31 @@
                         relevance_category TEXT,
                         sentiment TEXT
                     )""")
+cursor.execute("""
+                drop table if exists comprehend_jobs
+""")
+cursor.execute("""
+                CREATE TABLE IF NOT EXISTS comprehend_jobs (
+                    article_id TEXT,
+                    input_s3_uri TEXT,
+                    entities_path TEXT,
+                    sentiment_path TEXT,
+                    key_phrases_path TEXT
+                )
+                """)
 input_csv = pd.read_csv(io.BytesIO(input_csv_object['Body'].read()))
 for index, row in input_csv.iterrows():
     print(f"Processing row {index}: {row}")
-    articles_id = str(uuid.uuid4())  # Generate a unique ID for each article
+    article_id = str(uuid.uuid4())  # Generate a unique ID for each article
     cursor.execute("""
-            INSERT INTO articles (articles_id, title, body, source, published_date)
-            VALUES (%s, %s, %s, %s, %s)""", (articles_id, row[1], row[2], row[3], row[4]))
+            INSERT INTO articles (article_id, title, body, source, published_date)
+            VALUES (%s, %s, %s, %s, %s)""", (article_id, row[1], row[2], row[3], row[4]))
     # Convert to CSV in-memory
     csv_buffer = io.StringIO()
     writer = csv.writer(csv_buffer)
     # writer.writerow(row.headers)  # Write header
     writer.writerow(row)
-    s3_path = 'input/' + articles_id + '.csv'
+    s3_path = 'input/' + article_id + '.csv'
     s3_uri = 's3://' + bucket_name + '/' + s3_path
     s3.put_object(
         Bucket=bucket_name,
@@ -55,7 +66,7 @@
         OutputDataConfig={'S3Uri': 's3://awstraindata/output/entities/'},
         DataAccessRoleArn=role_arn,
         LanguageCode='en',
-        JobName='MyEntityDetectionJob_' + str(int(time.time())),
+        JobName='MyEntityDetectionJob_'+ article_id + '_' + str(int(time.time()))
     )
     result = comprehend.describe_entities_detection_job(JobId=entities_job['JobId'])
     entities_output = result['EntitiesDetectionJobProperties']['OutputDataConfig']['S3Uri']
@@ -66,7 +77,7 @@
         OutputDataConfig={'S3Uri': 's3://awstraindata/output/sentiment/'},
         DataAccessRoleArn=role_arn,
         LanguageCode='en',
-        JobName='MySentimentDetectionJob_' + str(int(time.time())),
+        JobName='MySentimentDetectionJob_' + article_id + '_' + str(int(time.time()))
     )
     res = comprehend.describe_sentiment_detection_job(JobId=sentiment_job['JobId'])
     sentiment_output = res['SentimentDetectionJobProperties']['OutputDataConfig']['S3Uri']
@@ -77,28 +88,17 @@
         OutputDataConfig={'S3Uri': 's3://awstraindata/output/keyphrases/'},
         DataAccessRoleArn=role_arn,
         LanguageCode='en',
-        JobName='MyKeyPhrasesDetectionJob_' + str(int(time.time())),
+        JobName='MyKeyPhrasesDetectionJob_' + article_id + '_' + str(int(time.time()))
     )
     res = comprehend.describe_key_phrases_detection_job(JobId=phrases_job['JobId'])
     key_phrases_output = res['KeyPhrasesDetectionJobProperties']['OutputDataConfig']['S3Uri']
     print("Entities Job Response:", entities_output)
     print("Sentiment Job Response:", sentiment_output)
     print("Key Phrases Job Response:", key_phrases_output)
-    cursor.execute("""
-                   drop table if exists comprehend_jobs
-    """)
-    cursor.execute("""
-    CREATE TABLE IF NOT EXISTS comprehend_jobs (
-        article_id TEXT,
-        input_s3_uri TEXT,
-        entities_path TEXT,
-        sentiment_path TEXT,
-        key_phrases_path TEXT
-    )
-    """)
+    print("Inserting into comprehend_jobs table")
     cursor.execute("""
         INSERT INTO comprehend_jobs (article_id, input_s3_uri, entities_path, sentiment_path, key_phrases_path)
-        VALUES (%s, %s, %s, %s, %s)""", (articles_id, s3_uri, entities_output.replace('s3://awstraindata/', ''), sentiment_output.replace('s3://awstraindata/', ''), key_phrases_output.replace('s3://awstraindata/', '')))
+        VALUES (%s, %s, %s, %s, %s)""", (article_id, s3_uri, entities_output.replace('s3://awstraindata/', ''), sentiment_output.replace('s3://awstraindata/', ''), key_phrases_output.replace('s3://awstraindata/', '')))
     conn.commit()
 cursor.close()
 conn.close()
\ No newline at end of file
diff --git a/backend/output_handler/output_handler.py b/backend/output_handler/output_handler.py
new file mode 100644
index 0000000..8669089
--- /dev/null
+++ b/backend/output_handler/output_handler.py
@@ -0,0 +1,148 @@
+import boto3
+import tarfile
+import json
+import io
+from Utils import get_postgresql_connection
+import datetime
+
+
+
+def lambda_handler(event, context):
+    try:
+        for record in event['Records']:
+            print(f"New record: {record}")
+            bucket = record['s3']['bucket']['name']
+            key = record['s3']['object']['key']
+            print(f"Processing file from bucket: {bucket}, key: {key}")
+            conn = get_postgresql_connection()
+            s3 = boto3.client('s3')
+            print(f"Connecting to S3 bucket: {bucket}")
+            obj = s3.get_object(Bucket=bucket, Key=key)
+            print(f"Downloaded object from S3: {key}")
+            tar_bytes = io.BytesIO(obj['Body'].read())
+            print(f"Processing file: {key}")
+            # Extract .json inside the tar.gz
+            with tarfile.open(fileobj=tar_bytes, mode='r:gz') as tar:
+                print(f"Extracting files from tar: {key}")
+                for member in tar.getmembers():
+                    print(f"Found member: {member.name}")
+                    if member.name == "output" and member.isfile():
+                        print(f"Extracting JSON file: {member.name}")
+                        file = tar.extractfile(member)
+                        if not file:
+                            print(f"File {member.name} not found in tar.")
+                            continue
+                        result = json.load(file)
+                        print(f"Extracted JSON: {result}")
+                        break
+            print(f"Results: {result}")
+            if result:
+                print(f"Results found in the file: {key}")
+                folderSplit = key.split('/')
+                type = folderSplit[1]
+                cursor = conn.cursor()
+                query = "SELECT * FROM comprehend_jobs WHERE entities_path = %s or sentiment_path = %s or key_phrases_path = %s"
+                cursor.execute(query, (key, key, key))
+                row = cursor.fetchone()
+                print(f"Row found: {row}")
+                print(f"Type of analysis: {type}")
+                if row:
+                    article_id = row[0]
+                    print(f"Article ID: {article_id}")
+                    if type == 'entities':
+                        entity_array = result['Entities']
+                        if entity_array:
+                            ## get the entities from the entities table
+                            add_entities_to_article(conn, cursor, article_id, entity_array)
+                    elif type == 'keyphrases':
+                        keyPhrases_array = result['KeyPhrases']
+                        if keyPhrases_array:
+                            for keyPhrase in keyPhrases_array:
+                                keyPhrase['Type'] = 'KeyPhrase'
+                            add_entities_to_article(conn, cursor, article_id, keyPhrases_array)
+                    elif type == 'sentiment':
+                        sentiment = result.get('Sentiment', 'NEUTRAL')
+                        if sentiment:
+                            cursor.execute("""update articles set sentiment = %s where article_id = %s""", (sentiment, article_id))
+                conn.commit()
+                cursor.close()
+                ## delete the s3 object
+                # s3.delete_object(Bucket=bucket, Key=result['input_s3_uri'])
+                conn.close()
+    except Exception as e:
+        print(f"Error processing record: {e}")
+        return {
+            'statusCode': 500,
+            'body': json.dumps({'error': str(e)})
+        }
+
+def add_entities_to_article(conn, cursor, article_id, entities):
+    entities_text = [entity['Text'] for entity in entities]
+    print(f"Entities to be added: {entities_text}")
+    cursor.execute("SELECT * FROM entities WHERE entity in %s", (tuple(entities_text),))
+    entity_db_array = cursor.fetchall()
+    print(f"Entities in DB: {entity_db_array}")
+    location_mentions = []
+    officials_involved = []
+    relevance_category = []
+    print(f"article_id: {article_id}")
+    
+    print(f"Relevance category: {relevance_category}")
+    for entity in entities:
+        print(f"Processing entity: {entity}")
+        entity_in_db = [db_entity for db_entity in entity_db_array if db_entity[3].lower() == entity['Text'].lower()]
+        print(f"Entity in DB: {entity_in_db}")
+        if not entity_in_db:
+            current_time = datetime.datetime.utcnow()
+            cursor.execute("INSERT INTO entities (create_time,entity,type) VALUES (%s, %s, %s) RETURNING id", (current_time, entity['Text'], entity['Type']))
+            conn.commit()
+            db_entity = cursor.fetchone()
+            print(f"Inserted new entity: {db_entity}")
+            if entity['Type'] == 'LOCATION':
+                location_mentions.append(db_entity[0])
+            elif entity['Type'] == 'PERSON':
+                officials_involved.append(db_entity[0])
+            else:
+                relevance_category.append(db_entity[0])
+        else:
+            print(f"Entity already exists in DB: {entity_in_db}")
+            if entity['Type'] == 'LOCATION':
+                location_mentions.append(entity_in_db[0][0])
+            elif entity['Type'] == 'PERSON':
+                officials_involved.append(entity_in_db[0][0])
+            else:
+                relevance_category.append(entity_in_db[0][0])
+    if location_mentions:
+        location_mentions = ','.join(map(str, location_mentions))
+        cursor.execute("""update articles set location_mentions = %s where article_id = %s""", (location_mentions, article_id))
+
+    if officials_involved:
+        officials_involved = ','.join(map(str, officials_involved))
+        cursor.execute("""update articles set officials_involved = %s where article_id = %s""", (officials_involved, article_id))
+
+    if relevance_category:
+        cursor.execute("SELECT relevance_category FROM articles WHERE article_id = %s", (article_id,))
+        existing = cursor.fetchone()
+        relevance_category = ','.join(map(str, relevance_category))
+        if existing[0] is not None:
+            print(f"Existing relevance category: {existing[0]}")
+            relevance_category = relevance_category + ',' + existing[0]
+        cursor.execute("""update articles set relevance_category = %s where article_id = %s""", (relevance_category, article_id))
+
+
+# events = [
+#     {
+#         "s3": {
+#             "bucket": {
+#                 "name": "awstraindata"
+#             },
+#             "object": {
+#                 "key": "output/entities/269854564686-NER-7b5218ec8e556761890504a59e10da02/output/output.tar.gz"
+#             }
+#         }
+#     }
+# ]
+# obj= {
+#     "Records": events
+# }
+# lambda_handler(obj, None)
\ No newline at end of file
diff --git a/backend/pg_config.json b/backend/pg_config.json
index 596e176..64ca955 100644
--- a/backend/pg_config.json
+++ b/backend/pg_config.json
@@ -1,7 +1,7 @@
 {
-  "host": "ap-ai-hackathon.cluster-cqt08oi8i1b6.us-east-1.rds.amazonaws.com",
+  "host": "hackathon-ai-ap.cluster-cqt08oi8i1b6.us-east-1.rds.amazonaws.com",
   "database": "postgres",
   "user": "postgres",
-  "password": "AIHackathon",
+  "password": "3D6[~771pd5|pkF03dBeL.5#IZ5?",
   "port": 5432
 }
\ No newline at end of file
diff --git a/backend/raw_data_handler/raw_data_handler.py b/backend/raw_data_handler/raw_data_handler.py
new file mode 100644
index 0000000..9c13cb3
--- /dev/null
+++ b/backend/raw_data_handler/raw_data_handler.py
@@ -0,0 +1,150 @@
+import base64
+import datetime
+import json
+import time
+import uuid
+from Utils import get_postgresql_connection
+from fastapi import FastAPI
+from docx import Document
+import csv
+import io
+import re
+import boto3
+import traceback
+from test_agent import is_relevance
+
+BUCKET_NAME = 'awstraindata'
+role = 'arn:aws:iam::269854564686:role/hackathon-comprehend-role'
+def lambda_handler(event, context):
+    try:
+        conn = get_postgresql_connection()
+        cursor = conn.cursor()
+        comprehend = boto3.client('comprehend', region_name='us-east-1')
+        for record in event['Records']:
+            print(f"New record: {record}")
+            bucket = record['s3']['bucket']['name']
+            key = record['s3']['object']['key']
+            print(f"Processing file from bucket: {bucket}, key: {key}")
+            s3 = boto3.client('s3')
+            print(f"Connecting to S3 bucket: {bucket}")
+            obj = s3.get_object(Bucket=bucket, Key=key)
+            stream = io.BytesIO(obj['Body'].read()) 
+            articles = extract_articles(stream)
+            print(f"Extracted {len(articles)} articles from part")
+            for article in articles:
+                print(f"Processing article: {article['Title']}")
+                # Check if article is relevant
+                is_relevant = is_relevance(article)
+                if not is_relevant:
+                    print(f"Article {article['Title']} is not relevant, skipping")
+                    continue
+                output_csv = io.StringIO()
+                writer = csv.DictWriter(output_csv, fieldnames=["Title", "Source", "Date", "Content"])
+                # writer.writeheader()
+                writer.writerow(article)
+                article_id = str(uuid.uuid4())
+                # Generate unique filename
+                csv_filename = f"input/articles-{article_id}.csv"
+                cursor.execute("""
+                        INSERT INTO articles (article_id, title, body, source, published_date)
+                        VALUES (%s, %s, %s, %s, %s)""", (article_id, article['Title'], article['Content'], article['Source'], article['Date']))
+                # Upload to S3
+                print(f"Uploading CSV to S3: {csv_filename}")
+                conn.commit() 
+                get_data_inline(output_csv.getvalue(), article_id, article['Date'], comprehend, cursor, conn)
+        cursor.close()
+        conn.close()
+        lambda_client = boto3.client('lambda')
+        response = lambda_client.invoke(
+            FunctionName='clustering_service',
+            InvocationType='Event'
+        )
+        print(f"Second Lambda function invoked: {response}")
+    except Exception as e:
+        traceback.print_exc()
+        print(f"Error processing event: {e}")
+
+def get_data_inline(data, articles_id, article_date, comprehend, cursor, conn):
+    print(f"Processing data for article ID: {articles_id}")
+    entities_response = comprehend.detect_entities(
+                Text=data,
+                # DataAccessRoleArn=role_arn,
+                LanguageCode='en'
+            )
+    print(f"Entities detected: {entities_response['Entities']}")
+    add_entities_to_article(conn, cursor, articles_id, entities_response['Entities'])
+    response = comprehend.detect_key_phrases(
+            Text=data,
+            # DataAccessRoleArn=role_arn,
+            LanguageCode='en'
+        )
+    print(f"Key phrases detected: {response['KeyPhrases']}")
+    for keyPhrase in response['KeyPhrases']:
+        keyPhrase['Type'] = 'KeyPhrase'
+    add_keyphrase_to_article(conn, cursor, articles_id, article_date, response['KeyPhrases'])
+    sentiment_response = comprehend.detect_sentiment(
+                Text=data,
+                # DataAccessRoleArn=role_arn,
+                LanguageCode='en'
+    )
+    print(f"Sentiment detected: {sentiment_response['Sentiment']}")
+    sentiment = sentiment_response['Sentiment']
+    if sentiment:
+        cursor.execute("""update articles set sentiment = %s where article_id = %s""", (sentiment, articles_id))
+
+def extract_articles(file_stream):
+    print(f"Extracting articles from file stream")
+    doc = Document(file_stream)
+    print(f"Document loaded with {len(doc.paragraphs)} paragraphs")
+    text = "\n".join(p.text for p in doc.paragraphs)
+    pattern = re.compile(
+        r'Title:\s*(.*?)\s*Source:\s*(.*?)\s*Date:\s*(.*?)\s*(?=(?:\d{1,2}\)|Title:)|\Z)',
+        re.DOTALL
+    )
+    matches = pattern.findall(text)
+    print(f"Found {len(matches)} matches in the document")
+    articles = []
+    for match in matches:
+        print(f"Processing match: {match}")
+        title = match[0].strip()
+        source = match[1].strip()
+        date_parts = match[2].strip().split("\n", 1)
+        date = date_parts[0].strip()
+        content = date_parts[1].strip() if len(date_parts) > 1 else ""
+        print(f"Extracted article - Title: {title}, Source: {source}, Date: {date}, Content length: {len(content)}")
+        articles.append({
+            "Title": title,
+            "Source": source,
+            "Date": date,
+            "Content": content
+            })
+    return articles
+
+
+def add_keyphrase_to_article(conn, cursor, article_id, article_date, entities):
+    entities_text = [entity['Text'] for entity in entities]
+    print(f"Entities to be added: {entities_text}")
+    print(f"article_id: {article_id}")
+    if entities_text:
+        relevance_category = ','.join(map(str, entities_text))
+        cursor.execute("""update articles set relevance_category = %s where article_id = %s""", (relevance_category, article_id))
+
+def add_entities_to_article(conn, cursor, article_id, entities):
+    entities_text = [entity['Text'] for entity in entities]
+    print(f"Entities to be added: {entities_text}")
+    location_mentions = []
+    officials_involved = []
+    print(f"article_id: {article_id}")
+    for entity in entities:
+        if entity['Type'] == 'LOCATION':
+            location_mentions.append(entity['Text'].lower())
+        elif entity['Type'] == 'PERSON' or entity['Type'] == 'ORGANIZATION':
+            officials_involved.append(entity['Text'].lower())
+    print(f"Processing entity: {entity}")
+    if location_mentions:
+        location_mentions = ','.join(map(str, location_mentions))
+        cursor.execute("""update articles set location_mentions = %s where article_id = %s""", (location_mentions, article_id))
+
+    if officials_involved:
+        officials_involved = ','.join(map(str, officials_involved))
+        cursor.execute("""update articles set officials_involved = %s where article_id = %s""", (officials_involved, article_id))
\ No newline at end of file
diff --git a/backend/requirements.txt b/backend/requirements.txt
index e8f021d..0415b7b 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -1,5 +1,14 @@
-fastapi
+numpy
+pandas
+boto3
+python-docx
+fastapi[all]
+uvicorn[standard]
 pydantic
-pydantic-core
-typing-extensions
+python-dotenv
+requests_toolbelt
+sqlmodel
+psycopg2-binary==2.9.9
 mangum
+# psycopg[binary]      # The modern v3 driver
+# psycopg-pool         # The connection pool for v3
diff --git a/backend/test_agent.py b/backend/test_agent.py
new file mode 100644
index 0000000..c09a98f
--- /dev/null
+++ b/backend/test_agent.py
@@ -0,0 +1,54 @@
+import re
+import traceback
+import boto3
+import json
+
+def is_relevance(article):
+    is_relevant = True
+    try:
+    # Initialize Bedrock Agent Runtime client
+        bedrock_agent = boto3.client("bedrock-agent-runtime", region_name="us-east-1")  # replace with your region
+
+        # Agent identifiers (get these from Bedrock console)
+        agent_id = "IKXDLL0K7W"
+        agent_alias_id = "DY9KWQNAGM"
+        
+        # Your dynamic user message (e.g., relationship analysis prompt)
+        user_input = article['Content']
+
+        # Call the agent
+        response = bedrock_agent.invoke_agent(
+            agentId=agent_id,
+            agentAliasId=agent_alias_id,
+            sessionId="news-analysis-session-001",
+            inputText=user_input
+        )
+        
+    # Read the response stream
+   
+        print("Response from Bedrock Agent:")
+        print(response)
+        for event in response["completion"]:
+            print("Event:", event)
+            if "chunk" in event:
+                print("Processing chunk...")
+                print("Chunk ID:", event["chunk"])
+                payload = event["chunk"]["bytes"]
+                chunk_str = payload.decode("utf-8")
+                match = re.search(r"\{.*\}", chunk_str, re.DOTALL)
+                if match:
+                    print("Found JSON block in chunk")
+                    print("JSON Block:", match.group(0))
+                    json_block = match.group(0)
+                    parsed_json = json.loads(json_block)
+                    print("Parsed JSON:", parsed_json)
+                    is_relevant = parsed_json.get("relevance_score", 0) > 0.5
+                    print("Is relevant:", is_relevant)
+                    print(parsed_json.get("relevance_score", "No content found"))
+                else:
+                    print("❌ No JSON found in response")
+    except Exception as e:
+            print("Error processing response:", e)
+            pass
+        # traceback.print_exc()
+    return is_relevant
diff --git a/backend/web_api/bedrock_agent.py b/backend/web_api/bedrock_agent.py
new file mode 100644
index 0000000..8967dbf
--- /dev/null
+++ b/backend/web_api/bedrock_agent.py
@@ -0,0 +1,89 @@
+# bedrock_agent.py
+import boto3
+import json
+
+# It's a good practice to create the client once and reuse it.
+# Ensure your AWS credentials are configured (e.g., via `aws configure`)
+# and you have selected a region where the model is available.
+bedrock_runtime = boto3.client(
+    service_name="bedrock-runtime", 
+    region_name="us-east-1" # e.g., us-east-1
+)
+
+# Choose a model. Claude 3 Sonnet is a great choice for this task.
+# You can also use "anthropic.claude-v2:1", "anthropic.claude-instant-v1", etc.
+MODEL_ID = "anthropic.claude-3-sonnet-20240229-v1:0"
+
+def generate_sql_from_prompt(user_question: str, table_schema: str) -> str:
+    """
+    Uses Bedrock to generate a SQL query from a user's natural language question.
+
+    Args:
+        user_question: The question from the user.
+        table_schema: The CREATE TABLE statement for the relevant table.
+
+    Returns:
+        A SQL query string.
+    """
+    
+    # This prompt engineering is the most critical part of the process.
+    # It gives the model context, instructions, and constraints.
+    prompt = f"""
+Human: You are a PostgreSQL expert. Your task is to generate a SQL query based on a user's question.
+You will be given the database schema and a question.
+You MUST follow these rules:
+1. ONLY generate a SQL `SELECT` query. Do not generate any other type of query (INSERT, UPDATE, DELETE, etc.).
+2. Do not include any text, explanation, or markdown formatting before or after the SQL query. Your entire response must be only the SQL query itself.
+3. The query should be for a PostgreSQL database.
+
+Here is the table schema:
+<schema>
+{table_schema}
+</schema>
+
+Here is the user's question:
+<question>
+{user_question}
+</question>
+
+Assistant:
+"""
+
+    # Prepare the payload for the Bedrock API
+    body = json.dumps({
+        "anthropic_version": "bedrock-2023-05-31",
+        "max_tokens": 1000,
+        "temperature": 0.0, # Use 0 for deterministic, factual responses
+        "messages": [
+            {
+                "role": "user",
+                "content": [{"type": "text", "text": prompt}],
+            }
+        ],
+    })
+
+    try:
+        # Invoke the model
+        response = bedrock_runtime.invoke_model(
+            body=body, 
+            modelId=MODEL_ID,
+            accept='application/json',
+            contentType='application/json'
+        )
+
+        # Parse the response body
+        response_body = json.loads(response.get('body').read())
+        
+        # Extract the generated text
+        generated_sql = response_body.get('content')[0].get('text')
+        
+        # Clean up the response (remove potential leading/trailing whitespace or markdown)
+        cleaned_sql = generated_sql.strip().replace("```sql", "").replace("```", "").strip()
+        
+        print(f"Bedrock generated SQL: {cleaned_sql}")
+        return cleaned_sql
+
+    except Exception as e:
+        print(f"Error invoking Bedrock model: {e}")
+        # In a real app, you'd want more robust error handling
+        raise
\ No newline at end of file
diff --git a/backend/web_api/bedrock_agent_invoke.py b/backend/web_api/bedrock_agent_invoke.py
new file mode 100644
index 0000000..7867aa1
--- /dev/null
+++ b/backend/web_api/bedrock_agent_invoke.py
@@ -0,0 +1,157 @@
+# bedrock_agent_invoke.py
+import boto3
+import json
+from typing import Optional
+
+# Use the 'bedrock-agent-runtime' client for invoking agents
+bedrock_agent_runtime = boto3.client(
+    service_name="bedrock-agent-runtime",
+    region_name="us-east-1"  # Use the region where your agent is deployed
+)
+
+def invoke_bedrock_agent_to_get_sql(
+    question: str, 
+    agent_id: str, 
+    agent_alias_id: str, 
+    session_id: str
+) -> Optional[str]:
+    """
+    Invokes a pre-configured Bedrock Agent and extracts the generated SQL query
+    from its response trace.
+
+    Args:
+        question: The user's natural language question.
+        agent_id: The ID of your Bedrock Agent.
+        agent_alias_id: The alias ID for the agent version you want to use.
+        session_id: A unique identifier for the conversation session.
+
+    Returns:
+        The generated SQL query string, or None if not found.
+    """
+    prompt = f"""
+    convert the natural language query into a pgsql query. Return ONLY the sql command as an answer.
+    Here is the table schema:
+    <schema>
+    CREATE TABLE articles(
+        articles_id text,
+        title text,
+        body text,
+        "source" text,
+        published_date text,
+        entities text,
+        sentiment text
+    );
+    </schema>
+    Here is the user's question:
+    <question>
+    {question}
+    </question>
+    while creating query ensure that it can ignore case sensitive.
+    Give me the stream response as json in an API call
+    
+    """
+    try:
+        # The invoke_agent API returns a streaming response.
+        response = bedrock_agent_runtime.invoke_agent(
+            agentId=agent_id,
+            agentAliasId=agent_alias_id,
+            sessionId=session_id,
+            inputText=prompt,
+             streamingConfigurations = { 
+                "applyGuardrailInterval" : 20,
+                "streamFinalResponse" : False
+            }
+        )
+        
+        event_stream = response['completion']
+        final_sql_query = None
+        all_events = []
+
+        # Collect all events for post-processing
+        # for event in event_stream:
+        #     all_events.append(event)
+        #     if 'trace' in event:
+        #         trace_part = event['trace']['trace']
+        #         if 'observation' in trace_part:
+        #             observation = trace_part['observation']
+        #             # If observation is a list, iterate through it
+        #             if isinstance(observation, list):
+        #                 for obs in observation:
+        #                     if 'finalResponse' in obs:
+        #                         final_response = obs['finalResponse']
+        #                         text = final_response.get('text', '')
+        #                         # Extract SQL from markdown code block if present
+        #                         if '```sql' in text:
+        #                             sql_start = text.find('```sql') + len('```sql')
+        #                             sql_end = text.find('```', sql_start)
+        #                             final_sql_query = text[sql_start:sql_end].strip()
+        #                         else:
+        #                             final_sql_query = text.strip()
+        #                         print(f"Extracted SQL from finalResponse: {final_sql_query}")
+        #                         break
+        #             # If observation is a dict, handle as before
+        #             elif isinstance(observation, dict):
+        #                 if 'finalResponse' in observation:
+        #                     final_response = observation['finalResponse']
+        #                     text = final_response.get('text', '')
+        #                     if '```sql' in text:
+        #                         sql_start = text.find('```sql') + len('```sql')
+        #                         sql_end = text.find('```', sql_start)
+        #                         final_sql_query = text[sql_start:sql_end].strip()
+        #                     else:
+        #                         final_sql_query = text.strip()
+        #                     print(f"Extracted SQL from finalResponse: {final_sql_query}")
+        #                     break
+        #                 if 'actionGroupInvocationOutput' in observation:
+        #                     output_str = observation['actionGroupInvocationOutput']['text']
+        #                     try:
+        #                         output_json = json.loads(output_str)
+        #                         if 'generatedQuery' in output_json:
+        #                             final_sql_query = output_json['generatedQuery']
+        #                             print(f"Extracted SQL from Agent trace: {final_sql_query}")
+        #                             break
+        #                     except json.JSONDecodeError:
+        #                         print(f"Could not decode observation output: {output_str}")
+
+        # Fallback: check for chunk events if not found
+        for event in all_events:
+            if 'chunk' in event:
+                raw_bytes = event['chunk']['bytes']
+                print(f"Raw chunk bytes: {raw_bytes!r}")
+                if raw_bytes:
+                    try:
+                        # Try to decode as JSON, but if it fails, treat as plain text
+                        try:
+                            data = json.loads(raw_bytes.decode())
+                            if data.get('type') == 'finalResponse':
+                                text = data.get('text', '')
+                                if '```sql' in text:
+                                    sql_start = text.find('```sql') + len('```sql')
+                                    sql_end = text.find('```', sql_start)
+                                    final_sql_query = text[sql_start:sql_end].strip()
+                                else:
+                                    final_sql_query = text.strip()
+                                print(f"Extracted SQL from chunk finalResponse: {final_sql_query}")
+                                break
+                        except json.JSONDecodeError:
+                            # Not JSON, treat as plain text
+                            text = raw_bytes.decode()
+                            if '```sql' in text:
+                                sql_start = text.find('```sql') + len('```sql')
+                                sql_end = text.find('```', sql_start)
+                                final_sql_query = text[sql_start:sql_end].strip()
+                            else:
+                                final_sql_query = text.strip()
+                            print(f"Extracted SQL from plain text chunk: {final_sql_query}")
+                            break
+                    except Exception as e:
+                        print(f"Error decoding chunk: {e}")
+                else:
+                    print("Chunk bytes are empty, skipping.")
+        # if not final_sql_query:
+
+        return final_sql_query
+
+    except Exception as e:
+        print(f"Error invoking Bedrock Agent: {e}")
+        raise
\ No newline at end of file
diff --git a/backend/web_api/database.py b/backend/web_api/database.py
new file mode 100644
index 0000000..4f7b9fa
--- /dev/null
+++ b/backend/web_api/database.py
@@ -0,0 +1,23 @@
+# database.py
+import os
+from sqlmodel import create_engine, SQLModel, Session
+
+# Use a real database URL in production
+DATABASE_URL = os.environ.get(
+    "DATABASE_URL",
+    "postgresql://your_user:your_password@your_aurora_endpoint/myappdb"
+).replace("postgresql://", "postgresql+psycopg2://")
+
+# The 'connect_args' is needed for SQLite, but not for PostgreSQL.
+# For PostgreSQL, you can remove it.
+engine = create_engine(DATABASE_URL, echo=True)
+
+# def create_db_and_tables():
+#     # This function creates all tables defined by SQLModel models
+#     # that are subclasses of SQLModel. It's good to run this once at startup.
+#     SQLModel.metadata.create_all(engine)
+
+# Dependency function to get a database session
+def get_session():
+    with Session(engine) as session:
+        yield session
\ No newline at end of file
diff --git a/backend/web_api/models.py b/backend/web_api/models.py
new file mode 100644
index 0000000..206906f
--- /dev/null
+++ b/backend/web_api/models.py
@@ -0,0 +1,57 @@
+# models.py
+from typing import Optional
+from sqlmodel import Field, SQLModel
+import datetime
+
+class Articles(SQLModel, table=True):
+    article_id: str = Field(primary_key=True)
+    title: Optional[str] = None
+    body: Optional[str] = None
+    source: Optional[str] = Field(default=None, alias="source")
+    published_date: Optional[str] = None
+    location_mentions: Optional[str] = None
+    officials_involved: Optional[str] = None
+    relevance_category: Optional[str] = None
+    sentiment: Optional[str] = None
+
+class ArticleCreate(SQLModel):
+    title: Optional[str] = None
+    body: Optional[str] = None
+    source: Optional[str] = None
+    published_date: Optional[str] = None
+    location_mentions: Optional[str] = None
+    officials_involved: Optional[str] = None
+    relevance_category: Optional[str] = None
+    sentiment: Optional[str] = None
+
+class ArticleRead(SQLModel):
+    article_id: str
+    title: Optional[str] = None
+    body: Optional[str] = None
+    source: Optional[str] = None
+    published_date: Optional[str] = None
+    location_mentions: Optional[str] = None
+    officials_involved: Optional[str] = None
+    relevance_category: Optional[str] = None
+    sentiment: Optional[str] = None
+
+class Clusters(SQLModel, table=True):
+    id: int = Field(default=None, primary_key=True)
+    title: Optional[datetime.date] = None
+    linkedarticles: Optional[str] = None
+    startdate: Optional[str] = None
+    enddate: Optional[str] = None
+
+class ClusterCreate(SQLModel):
+    title: Optional[datetime.date] = None
+    linkedarticles: Optional[str] = None
+    startdate: Optional[str] = None
+    enddate: Optional[str] = None
+
+class ClusterRead(SQLModel):
+    id: int
+    title: Optional[datetime.date] = None
+    linkedarticles: Optional[str] = None
+    startdate: Optional[str] = None
+    enddate: Optional[str] = None
+
diff --git a/backend/web_api/web_api.py b/backend/web_api/web_api.py
new file mode 100644
index 0000000..4fcab05
--- /dev/null
+++ b/backend/web_api/web_api.py
@@ -0,0 +1,186 @@
+# main.py
+import os
+from typing import List
+import uuid
+from fastapi import FastAPI, Depends, HTTPException, Response
+from sqlmodel import Session, select
+
+from database import get_session
+from models import Articles, ArticleCreate, ArticleRead, Clusters, ClusterCreate, ClusterRead
+from typing import List, Dict, Any
+
+from contextlib import asynccontextmanager, closing
+# from typing import List, Dict, Any
+
+# from fastapi import FastAPI, HTTPException, Depends
+from pydantic import BaseModel, Field
+from psycopg2 import ProgrammingError
+import psycopg2
+from contextlib import closing
+from mangum import Mangum
+
+# # Import our new Bedrock agent function
+# from bedrock_agent import generate_sql_from_prompt
+# namasthe
+# Import our new agent invoker function
+from bedrock_agent_invoke import invoke_bedrock_agent_to_get_sql
+
+# Load environment variables from .env file
+from dotenv import load_dotenv
+load_dotenv()
+
+# --- Agent Configuration ---
+# Load from environment variables for security and flexibility
+AGENT_ID = os.environ.get("BEDROCK_AGENT_ID")
+AGENT_ALIAS_ID = os.environ.get("BEDROCK_AGENT_ALIAS_ID", "TSTALIASID") # TSTALIASID is a common default
+
+app = FastAPI(
+    title="FastAPI with Bedrock Agents",
+    redirect_slashes=True,
+)
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    print("Application startup...")
+    yield
+    print("Application shutdown...")
+    # pool.close()
+
+
+# This event handler runs once when the application starts.
+# @app.on_event("startup")
+# def on_startup():
+#     create_db_and_tables()
+
+@app.post("/articles/", response_model=ArticleRead)
+def create_article(hero: ArticleCreate, session: Session = Depends(get_session)):
+    db_article = Articles.model_validate(hero)
+    session.add(db_article)
+    session.commit()
+    session.refresh(db_article)
+    return db_article
+
+@app.get("/articles/", response_model=List[ArticleRead])
+def read_articles(skip: int = 0, limit: int = 100, session: Session = Depends(get_session)):
+    heroes = session.exec(select(Articles).offset(skip).limit(limit)).all()
+    return heroes
+
+@app.get("/articles/{hero_id}", response_model=ArticleRead)
+def read_article(hero_id: int, session: Session = Depends(get_session)):
+    article = session.get(Articles, hero_id)
+    if not article:
+        raise HTTPException(status_code=404, detail="Article not found")
+    return article
+
+@app.post("/clusters/", response_model=ClusterRead)
+def create_cluster(cluster: ClusterCreate, session: Session = Depends(get_session)):
+    db_cluster = Clusters.model_validate(cluster)
+    session.add(db_cluster)
+    session.commit()
+    session.refresh(db_cluster)
+    return db_cluster
+
+@app.get("/clusters/", response_model=List[ClusterRead])
+def read_clusters(skip: int = 0, limit: int = 100, session: Session = Depends(get_session)):
+    clusters = session.exec(select(Clusters).offset(skip).limit(limit)).all()
+    return clusters
+
+@app.get("/clusters/{cluster_id}", response_model=ClusterRead)
+def read_cluster(cluster_id: str, session: Session = Depends(get_session)):
+    cluster = session.get(Clusters, cluster_id)
+    if not cluster:
+        raise HTTPException(status_code=404, detail="Cluster not found")
+    return cluster
+
+@app.get("/groupedClusters/")
+def grouped_clusters(session: Session = Depends(get_session)):
+    clusters = session.exec(select(Clusters)).all()
+    articles = session.exec(select(Articles)).all()
+    # Build a mapping from cluster id to articles
+    cluster_map = {cluster.id: [] for cluster in clusters}
+    for article in articles:
+        # Assuming 'linkedarticles' in Clusters is a comma-separated list of article ids
+        for cluster in clusters:
+            if cluster.linkedarticles:
+                linked_ids = [x.strip() for x in cluster.linkedarticles.split(",") if x.strip()]
+                if article.articles_id in linked_ids:
+                    cluster_map[cluster.id].append(article)
+    # Build the response
+    result = []
+    for cluster in clusters:
+        cluster_dict = cluster.dict()
+        cluster_dict["articles"] = cluster_map[cluster.id]
+        result.append(cluster_dict)
+    return result
+    
+    
+# --- Pydantic Models ---
+class NaturalLanguageQuery(BaseModel):
+    question: str = Field(..., example="How many heroes are there?")
+    session_id: str | None = Field(default=None, description="Conversation session ID. A new one is generated if not provided.")
+
+# --- Database Connection ---
+# IMPORTANT: Use a read-only user for the database connection.
+DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://your_user:your_password@your_aurora_endpoint/myappdb")
+
+def get_db_connection():
+    conn = psycopg2.connect(DATABASE_URL)
+    try:
+        yield conn
+    finally:
+        conn.close()
+
+@app.post("/queryagent", response_model=List[Dict[str, Any]])
+def query_with_bedrock_agent(query: NaturalLanguageQuery, conn=Depends(get_db_connection)):
+    """
+    Takes a natural language question, sends it to a pre-configured Bedrock Agent,
+    executes the returned SQL, and returns the results.
+    """
+    session_id = query.session_id or str(uuid.uuid4())
+    print(f"Invoking agent for question: '{query.question}' with session_id: {session_id}")
+
+    # 1. Invoke the agent to get the SQL query
+    try:
+        generated_sql = invoke_bedrock_agent_to_get_sql(
+            question=query.question,
+            agent_id=AGENT_ID,
+            agent_alias_id=AGENT_ALIAS_ID,
+            session_id=session_id
+        )
+        print("Generated SQL from agent:", generated_sql)  # Debug print
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to invoke Bedrock Agent: {e}")
+
+    if not generated_sql:
+        raise HTTPException(status_code=404, detail="Agent did not return a SQL query.")
+
+    # 2. *** CRITICAL SECURITY CHECK ***
+    if not generated_sql.strip().upper().startswith("SELECT"):
+        raise HTTPException(
+            status_code=400,
+            detail="Agent returned a non-SELECT query. Execution aborted."
+        )
+
+    # 3. Execute the SQL from the agent
+    try:
+        with conn.cursor() as cur:
+            cur.execute(generated_sql)
+            if cur.description is None:
+                return []
+            
+            column_names = [desc[0] for desc in cur.description]
+            results = cur.fetchall()
+            return [dict(zip(column_names, row)) for row in results]
+            
+    except ProgrammingError as e:
+        raise HTTPException(status_code=400, detail=f"Invalid SQL Query from Agent: {e}")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Database execution error: {e}")
+
+handler = Mangum(app)
+
+def lambda_handler(event, context):
+    """
+    AWS Lambda handler for FastAPI app using Mangum adapter.
+    """
+    return handler(event, context)