Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions api/routes/migrate_media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""
One-time migration route to replace S3 URLs with Cloudinary URLs in the database.

Reads the mapping from scripts/url_mapping.json (generated by scripts/upload_to_cloudinary.py)
and updates all image/file URL columns that contain S3 references.

This route should be called once after deployment, then removed.
"""

import json
import logging
from pathlib import Path

from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session

from db.database import get_db
from db.models.announcements import Announcement
from db.models.feeds import Feed
from db.models.skills import Skill
from db.models.users import User
from utils.permissions import is_admin

logger = logging.getLogger(__name__)

migrate_media_route = APIRouter(tags=["Migration"], prefix="/migrate")

# S3 URL pattern to match — covers both virtual-hosted and path-style URLs
S3_MARKERS = (".s3.amazonaws.com", ".s3.us-east", ".s3.eu-west", ".s3.af-south")

# Tables and their URL columns to scan
URL_COLUMNS = [
(User, ["profile_pic_url"]),
(Feed, ["feed_pic_url"]),
(Skill, ["image_url"]),
(Announcement, ["image_url"]),
]

MAPPING_FILE = Path(__file__).resolve().parent.parent.parent / "scripts" / "url_mapping.json"


def _load_mapping() -> dict[str, str]:
"""Load the S3-path → Cloudinary-URL mapping from disk."""
if not MAPPING_FILE.exists():
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Mapping file not found at {MAPPING_FILE}. Run scripts/upload_to_cloudinary.py first.",
)
with open(MAPPING_FILE) as f:
mapping = json.load(f)
if not mapping:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Mapping file is empty. Run scripts/upload_to_cloudinary.py first.",
)
return mapping


def _is_s3_url(url: str | None) -> bool:
if not url:
return False
return any(marker in url for marker in S3_MARKERS)


def _find_cloudinary_url(s3_url: str, mapping: dict[str, str]) -> str | None:
"""Try to match an S3 URL to a Cloudinary URL via the mapping.

The mapping keys are relative file paths (e.g. "slightlytechie/profile/20240101-12-00-00").
The S3 URL looks like "https://<bucket>.s3.amazonaws.com/<path>".
We extract the path portion and look it up.
"""
for marker in S3_MARKERS:
if marker in s3_url:
# Split on the marker + any trailing region suffix + "/"
idx = s3_url.find(marker)
# Find the first "/" after the marker
path_start = s3_url.find("/", idx)
if path_start == -1:
continue
s3_path = s3_url[path_start + 1:] # strip leading "/"
# Try exact match
if s3_path in mapping:
return mapping[s3_path]
# Try without query string
clean_path = s3_path.split("?")[0]
if clean_path in mapping:
return mapping[clean_path]
return None


@migrate_media_route.post(
"/s3-to-cloudinary",
summary="Replace S3 URLs with Cloudinary URLs in the database",
)
def migrate_s3_to_cloudinary(
dry_run: bool = True,
db: Session = Depends(get_db),
current_user=Depends(is_admin),
):
"""Scan all URL columns for S3 references and replace them with Cloudinary URLs.

- **dry_run=true** (default): Report what would change without modifying the DB.
- **dry_run=false**: Apply the changes and commit.
"""
mapping = _load_mapping()

results = {
"scanned": 0,
"s3_urls_found": 0,
"replaced": 0,
"not_in_mapping": [],
"changes": [],
}

for model, columns in URL_COLUMNS:
table_name = model.__tablename__
rows = db.query(model).all()

for row in rows:
for col in columns:
results["scanned"] += 1
old_url = getattr(row, col, None)

if not _is_s3_url(old_url):
continue

results["s3_urls_found"] += 1
new_url = _find_cloudinary_url(old_url, mapping)

if new_url is None:
results["not_in_mapping"].append({
"table": table_name,
"id": row.id,
"column": col,
"s3_url": old_url,
})
continue

change = {
"table": table_name,
"id": row.id,
"column": col,
"old_url": old_url,
"new_url": new_url,
}
results["changes"].append(change)

if not dry_run:
setattr(row, col, new_url)
results["replaced"] += 1

if not dry_run and results["replaced"] > 0:
db.commit()
logger.info("Migrated %d S3 URLs to Cloudinary", results["replaced"])

results["dry_run"] = dry_run
if dry_run:
results["message"] = (
f"Dry run complete. {len(results['changes'])} URL(s) would be replaced. "
f"{len(results['not_in_mapping'])} S3 URL(s) have no mapping. "
f"Call with dry_run=false to apply."
)
else:
results["message"] = (
f"Migration complete. {results['replaced']} URL(s) replaced. "
f"{len(results['not_in_mapping'])} S3 URL(s) had no mapping and were skipped."
)

return results
2 changes: 2 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from fastapi_pagination import add_pagination
from api.routes.endpoints import endpoints_route
from api.routes.users import users_route
from api.routes.migrate_media import migrate_media_route
from utils.endpoints_status import create_signup_endpoint

# Base.metadata.create_all(bind=engine)
Expand Down Expand Up @@ -76,6 +77,7 @@ async def startup_event():
app.include_router(email_templates_route, prefix=v1_prefix)
app.include_router(endpoints_route, prefix=v1_prefix)
app.include_router(users_route, prefix=v1_prefix)
app.include_router(migrate_media_route, prefix=v1_prefix)

add_pagination(app)

Expand Down
5 changes: 1 addition & 4 deletions db/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,7 @@

def set_up_db(production_env) -> tuple:
if production_env:
engine = create_engine(
settings.DATABASE_URL,
connect_args={"sslmode": "require"}
)
engine = create_engine(settings.DATABASE_URL)
else:
engine = create_engine(settings.DATABASE_URL)
SessionLocal = sessionmaker(
Expand Down
Loading
Loading