Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 76 additions & 20 deletions cloud_storage/cloud_storage/overrides/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,11 @@ def validate(self) -> None:
PATH: frappe/core/doctype/file/file.py
METHOD: validate
"""
self.associate_files()
if self.flags.cloud_storage or self.flags.ignore_file_validate:
return

self.associate_files()

if not self.is_remote_file:
self.custom_validate()
else:
Expand Down Expand Up @@ -211,7 +213,13 @@ def associate_files(
"file_association",
add_child_file_association(attached_to_doctype, attached_to_name),
)
existing_file.save()
existing_file.flags.ignore_file_validate = True

if self.flags.ignore_links: existing_file.flags.ignore_links = True
if self.flags.ignore_permissions: existing_file.flags.ignore_permissions = True
if self.flags.ignore_validate: existing_file.flags.ignore_validate = True

existing_file.save(ignore_permissions=True)
else:
if self.file_association:
already_linked = any(
Expand Down Expand Up @@ -395,10 +403,15 @@ def has_permission(doc, ptype: str | None = None, user: str | None = None) -> bo
if doc.owner == user:
has_access = True
elif doc.attached_to_doctype and doc.attached_to_name: # type: ignore
reference_doc = frappe.get_doc(doc.attached_to_doctype, doc.attached_to_name) # type: ignore
has_access = reference_doc.has_permission()
if not has_access:
has_access = has_user_permission(doc, user)
try:
reference_doc = frappe.get_doc(doc.attached_to_doctype, doc.attached_to_name) # type: ignore
has_access = reference_doc.has_permission()
if not has_access:
has_access = has_user_permission(doc, user)
except frappe.DoesNotExistError:
# If attached document doesn't exist, check permission on the file itself
has_access = bool(frappe.has_permission(doc.doctype, ptype, user=user))

# elif True:
# Check "shared with" including parent 'folder' to allow access
# ...
Expand Down Expand Up @@ -439,11 +452,31 @@ def strip_special_chars(file_name: str) -> str:
return regex.sub("", file_name)


def get_cloud_storage_config() -> dict:
config = frappe.conf.get("cloud_storage_settings", {})

# If nested config is found and seems populated with at least access_key, use it.
if config and config.get("access_key"):
return config

# Otherwise, build from top-level standard keys
return {
"access_key": frappe.conf.get("s3_access_key"),
"secret": frappe.conf.get("s3_secret_key"),
"bucket": frappe.conf.get("s3_bucket"),
"region": frappe.conf.get("region"),
"endpoint_url": frappe.conf.get("endpoint_url"),
"folder": frappe.conf.get("s3_folder"),
"use_local": frappe.conf.get("use_local"),
"use_legacy_paths": frappe.conf.get("use_legacy_paths", True)
}


@frappe.whitelist()
def get_cloud_storage_client():
validate_config()

config: dict = frappe.conf.cloud_storage_settings
config: dict = get_cloud_storage_config()
session = Session(
aws_access_key_id=config.get("access_key"),
aws_secret_access_key=config.get("secret"),
Expand All @@ -462,7 +495,7 @@ def get_cloud_storage_client():


def validate_config() -> None:
config: dict = frappe.conf.cloud_storage_settings
config: dict = get_cloud_storage_config()

if not config:
frappe.throw(
Expand Down Expand Up @@ -560,14 +593,23 @@ def get_file_path(file: File, folder: str | None = None) -> str:
except Exception as e:
frappe.log_error(f"Custom path generator failed: {str(e)}", "Cloud Storage Path Error")

config = frappe.conf.get("cloud_storage_settings", {})
config = get_cloud_storage_config()
if config.get("use_legacy_paths", True):
return _legacy_get_file_path(file, folder)
# Verify if this is a fresh install or if we want to enforce new paths even with legacy flag
# For Hygient/Zerodiscount: We enforce strict site segregation.
pass

# Standard Logic
path = file.file_name
if folder:
return f"{folder}/{file.file_name}"
path = f"{folder}/{file.file_name}"

return file.file_name
# Enforce Site Segregation
# e.g. "site1.local/folder/filename.jpg"
if hasattr(frappe.local, "site") and frappe.local.site:
return f"{frappe.local.site}/{path}"

return path


def _legacy_get_file_path(file: File, folder: str | None = None) -> str:
Expand Down Expand Up @@ -599,9 +641,8 @@ def get_file_content_hash(content, content_type):

@frappe.whitelist()
def write_file(file: File, remove_spaces_in_file_name: bool = True) -> File:
if not frappe.conf.cloud_storage_settings or frappe.conf.cloud_storage_settings.get(
"use_local", False
):
config = get_cloud_storage_config()
if not config or config.get("use_local", False):
file.save_file_on_filesystem()
return file

Expand All @@ -612,14 +653,24 @@ def write_file(file: File, remove_spaces_in_file_name: bool = True) -> File:
# if a hash-conflict is found, update the existing document with a new file association
existing_file_hashes = frappe.get_all(
"File",
filters={"name": ["!=", file.name], "content_hash": file.content_hash},
filters={
"name": ["!=", file.name],
"content_hash": file.content_hash,
"s3_key": ["is", "set"],
},
pluck="name",
)

if existing_file_hashes:
file_doc: File = frappe.get_doc("File", existing_file_hashes[0])

# Propagate flags
if file.flags.ignore_links: file_doc.flags.ignore_links = True
if file.flags.ignore_permissions: file_doc.flags.ignore_permissions = True
if file.flags.ignore_validate: file_doc.flags.ignore_validate = True

file_doc.associate_files(file.attached_to_doctype, file.attached_to_name)
file_doc.save()
file_doc.save(ignore_permissions=True)
return file_doc

# if a filename-conflict is found, update the existing document with a new version instead
Expand All @@ -636,6 +687,12 @@ def write_file(file: File, remove_spaces_in_file_name: bool = True) -> File:
"content_type": file.content_type,
}
)

# Propagate flags
if file.flags.ignore_links: file_doc.flags.ignore_links = True
if file.flags.ignore_permissions: file_doc.flags.ignore_permissions = True
if file.flags.ignore_validate: file_doc.flags.ignore_validate = True

file_doc.associate_files(file.attached_to_doctype, file.attached_to_name)
file = file_doc

Expand All @@ -649,9 +706,8 @@ def write_file(file: File, remove_spaces_in_file_name: bool = True) -> File:

@frappe.whitelist()
def delete_file(file: File, **kwargs) -> File:
if not frappe.conf.cloud_storage_settings or frappe.conf.cloud_storage_settings.get(
"use_local", False
):
config = get_cloud_storage_config()
if not config or config.get("use_local", False):
file.delete_file_from_filesystem()
return file

Expand Down
39 changes: 37 additions & 2 deletions cloud_storage/migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
get_file_path,
validate_config,
write_file,
get_cloud_storage_config,
FILE_URL,
)


Expand Down Expand Up @@ -41,7 +43,7 @@ def migrate_files(
"""
validate_config()

config = frappe.conf.get("cloud_storage_settings")
config = get_cloud_storage_config()
if config.get("use_local"):
frappe.throw(
"Cloud Storage is not enabled. Please set 'use_local' to 0 in 'cloud_storage_settings'."
Expand Down Expand Up @@ -109,6 +111,11 @@ def migrate_files(
for file_data in batch:
try:
file_doc = frappe.get_doc("File", file_data.name)
if not file_doc.file_name:
print(f"⚠️ SKIP: {file_doc.name} - Missing file_name")
stats["skipped"] += 1
continue

if not file_doc.is_private:
file_path = frappe.get_site_path("public", file_doc.file_url.lstrip("/"))
else:
Expand Down Expand Up @@ -136,13 +143,39 @@ def migrate_files(
print(f" Attached to: {attached_to}")
print(f" Local path: {file_doc.file_url}")

if not dry_run:
client = get_cloud_storage_client()
config = get_cloud_storage_config()
expected_s3_key = get_file_path(file_doc, config.get("folder"))

try:
client.head_object(Bucket=client.bucket, Key=expected_s3_key)
# If we reach here, file exists in S3. Sync DB and Skip Upload.
file_doc.db_set("s3_key", expected_s3_key)
file_doc.db_set("file_url", FILE_URL.format(path=expected_s3_key))
print(f" ✅ Synced Record (Found in Cloud): {expected_s3_key}")
stats["migrated"] += 1

if remove_local and os.path.exists(file_path):
os.remove(file_path)
print(" 🗑️ Deleted local file (Synced)")

continue
except ClientError:
# File not found in S3, proceed with upload
pass

if not dry_run:
with open(file_path, "rb") as f:
file_content = f.read()

file_doc.content = file_content
content_type, _ = mimetypes.guess_type(file_doc.file_name)
file_doc.content_type = content_type or "application/octet-stream"

file_doc.flags.ignore_links = True
file_doc.flags.ignore_permissions = True
file_doc.flags.ignore_validate = True

new_file = write_file(file_doc)
new_file.reload()
Expand All @@ -164,7 +197,9 @@ def migrate_files(
print()

except Exception as e:
import traceback
print(f"❌ FAILED: {file_data.name} - {str(e)}")
traceback.print_exc()
frappe.log_error(
title=f"Cloud Storage Migration Failed: {file_data.name}", message=frappe.get_traceback()
)
Expand Down Expand Up @@ -194,7 +229,7 @@ def migrate_files(
def migrate_paths(dry_run=False, limit=None, batch_size=100):
"""Migrate files from legacy paths to new path strategy."""
client = get_cloud_storage_client()
config = frappe.conf.get("cloud_storage_settings", {})
config = get_cloud_storage_config()

if config.get("use_legacy_paths", True):
print("⚠️ Legacy paths are still enabled in cloud_storage_settings.")
Expand Down
91 changes: 91 additions & 0 deletions cloud_storage/verify_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@

import frappe
import os
from cloud_storage.cloud_storage.overrides.file import get_cloud_storage_client, get_cloud_storage_config
from botocore.exceptions import ClientError

def execute():
print(f"\n{'='*50}")
print(f"Storage Verification for Site: {frappe.local.site}")
print(f"{'='*50}")

# 1. Get All Files from DB
files = frappe.get_all(
"File",
fields=["name", "file_name", "file_url", "is_private", "s3_key", "is_folder"],
filters={"is_folder": 0}
)
total_db_files = len(files)
print(f"Total Files in DB: {total_db_files}")

config = get_cloud_storage_config()
if not config:
print("Cloud Storage not configured for this site.")
return

try:
client = get_cloud_storage_client()
bucket = client.bucket
except Exception as e:
print(f"Failed to initialize MinIO client: {e}")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This project supports S3, DigitalOcean Spaces, Backblaze, etc. Consider using generic messaging like:

storage_name = config.get("endpoint_url", "Cloud Storage")
print(f"Failed to initialize client: {e}")
print(f"Found in Cloud Storage: {minio_count}")

return

local_count = 0
minio_count = 0
synced_count = 0
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

synced_count is an unused variable.

missing_local = []
missing_minio = []

print("\nScanning files...")

for f in files:
# Check Local
if f.is_private:
local_path = frappe.get_site_path("private", "files", f.file_name)
else:
local_path = frappe.get_site_path("public", "files", f.file_name)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assumes all public files are in public/files/, but they can be in various paths (/files/, /assets/, etc.).

Should we use something like:

local_path = frappe.get_site_path("public", f.file_url.lstrip("/"))


if os.path.exists(local_path):
local_count += 1
else:
missing_local.append(f.file_name)

# Check MinIO
in_minio = False
if f.s3_key:
try:
client.head_object(Bucket=bucket, Key=f.s3_key)
in_minio = True
minio_count += 1
synced_count += 1
except ClientError:
missing_minio.append(f.file_name)
else:
# Not marked as migrated (s3_key is empty/null)
pass

print(f"\n{'='*50}")
print(f"Verification Summary for {frappe.local.site}")
print(f"{'='*50}")
print(f"Total Records in DB: {total_db_files}")
print(f"Found Locally: {local_count}")
print(f"Found in MinIO (Verified): {minio_count}")
print(f"Marked as Migrated (DB): {len([f for f in files if f.s3_key])}")
print(f"{'='*50}")

if missing_local:
print(f"\n[WARNING] Missing Local Files ({len(missing_local)}):")
print(", ".join(missing_local[:10]) + ("..." if len(missing_local) > 10 else ""))

if missing_minio:
print(f"\n[ERROR] Missing in MinIO (DB claims migrated) ({len(missing_minio)}):")
print(", ".join(missing_minio[:10]) + ("..." if len(missing_minio) > 10 else ""))

# Verification of count
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comparing local_count vs minio_count is confusing because:

  • After a complete migration with remove_local=True, local_count would be 0
  • If a migration is in progress, they will always be different

A better logic would be:

marked_as_migrated = len([f for f in files if f.s3_key])
if marked_as_migrated != minio_count:
    print(f"[ERROR] Sync Issue: DB claims {marked_as_migrated} files migrated, but only {minio_count} found in cloud!")

This catches the critical case where DB thinks files are migrated but they're missing from cloud storage.

if local_count != minio_count:
print(f"\n[INFO] Count Mismatch: Local ({local_count}) vs MinIO ({minio_count})")
print("Note: This is expected if partial migration or if 'remove_local' was used.")
else:
print("\n[SUCCESS] Local and MinIO counts match (for migrated files).")

print(f"{'='*50}\n")
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"dependencies": {
"docx-preview": "^0.3.0",
"jszip": "^3.10.1",
"madr": "^3.0.0"
"madr": "^3.0.0",
"vue": "^3.3.4"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the PR body you wrote: "Vue Dependency: Added Vue to package.json to resolve bench build failures on v15."
Could you share the exact error message?

Frappe v15 already includes Vue 3 in core, so it shouldn't throw an error. Adding Vue here might cause version conflicts or bundle duplication.

},
"release": {
"branches": [
Expand All @@ -19,4 +20,4 @@
"access": "restricted"
},
"private": true
}
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [

[tool.poetry]
name = "cloud_storage"
version = "15.9.1"
version = "1.1.1"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The version change from 15.9.1 to 1.1.1 breaks the existing versioning convention. Please keep the major version aligned with the Frappe version (e.g., 15.9.1) for clarity and to indicate compatibility.

description = "Frappe App for integrating with cloud storage applications"
authors = ["AgriTheory <support@agritheory.dev>"]
readme = "README.md"
Expand Down
Loading
Loading