Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,17 @@ BATCH_SIZE=100
# Comma-separated chat IDs to process FIRST in all operations
# PRIORITY_CHAT_IDS=

# Skip media downloads for specific chats (comma-separated IDs)
# Messages are still backed up with text, but media files are not downloaded
# Useful for high-volume media chats where you only need text content
# Example: SKIP_MEDIA_CHAT_IDS=-1001234567890,-1009876543210
# SKIP_MEDIA_CHAT_IDS=

# Delete existing media files and DB records for chats in SKIP_MEDIA_CHAT_IDS
# When enabled (default), reclaims storage by removing already-downloaded media
# Set to false to keep existing media but skip future downloads
SKIP_MEDIA_DELETE_EXISTING=true

# Hour (0-23) to recalculate backup statistics daily
# STATS_CALCULATION_HOUR=3

Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
rev: v6.0.0
hooks:
- id: check-yaml
- id: check-toml
Expand All @@ -12,7 +12,7 @@ repos:
args: [--maxkb=500]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.6
rev: v0.15.1
hooks:
- id: ruff
args: [--fix]
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,8 @@ The **Scope** column shows whether each variable applies to the backup scheduler
| `VERIFY_MEDIA` | `false` | B | Re-download missing or corrupted media files |
| `STATS_CALCULATION_HOUR` | `3` | B | Hour (0-23) to recalculate backup statistics daily |
| `PRIORITY_CHAT_IDS` | - | B | Comma-separated chat IDs to process first in all operations |
| `SKIP_MEDIA_CHAT_IDS` | - | B | Skip media downloads for specific chats (messages still backed up with text) |
| `SKIP_MEDIA_DELETE_EXISTING` | `true` | B | Delete existing media files and DB records for chats in skip list to reclaim storage |
| `LOG_LEVEL` | `INFO` | B/V | Logging verbosity: `DEBUG`, `INFO`, `WARNING`/`WARN`, `ERROR` |
| **Chat Filtering** | | | See [Chat Filtering](#chat-filtering) below |
| `CHAT_IDS` | - | B | **Whitelist mode**: backup ONLY these chats (ignores all other filters) |
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ services:
BATCH_SIZE: ${BATCH_SIZE:-100}
# DEDUPLICATE_MEDIA: ${DEDUPLICATE_MEDIA:-true}
# PRIORITY_CHAT_IDS: ${PRIORITY_CHAT_IDS:-}
# SKIP_MEDIA_CHAT_IDS: ${SKIP_MEDIA_CHAT_IDS:-}
# SKIP_MEDIA_DELETE_EXISTING: ${SKIP_MEDIA_DELETE_EXISTING:-true}
# STATS_CALCULATION_HOUR: ${STATS_CALCULATION_HOUR:-3}

# =======================================================================
Expand Down
28 changes: 28 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,11 @@ def __init__(self):
# Useful for ensuring important chats are always backed up first
self.priority_chat_ids = self._parse_id_list(os.getenv("PRIORITY_CHAT_IDS", ""))

# Skip media downloads for specific chats (but still backup message text)
self.skip_media_chat_ids = self._parse_id_list(os.getenv("SKIP_MEDIA_CHAT_IDS", ""))
# Delete existing media files and records for chats in skip list (reclaim storage)
self.skip_media_delete_existing = os.getenv("SKIP_MEDIA_DELETE_EXISTING", "true").lower() == "true"

# Session configuration
self.session_name = os.getenv("SESSION_NAME", "telegram_backup")

Expand Down Expand Up @@ -266,6 +271,9 @@ def __init__(self):
)
if self.display_chat_ids:
logger.info(f"Display mode: Viewer restricted to chat IDs {self.display_chat_ids}")
if self.skip_media_chat_ids:
cleanup_status = "will delete existing media" if self.skip_media_delete_existing else "keeps existing media"
logger.info(f"Media downloads skipped for chat IDs: {self.skip_media_chat_ids} ({cleanup_status})")

def _parse_id_list(self, id_str: str) -> set:
"""Parse comma-separated ID string into a set of integers."""
Expand Down Expand Up @@ -412,6 +420,26 @@ def get_max_media_size_bytes(self) -> int:
"""Get maximum media file size in bytes."""
return self.max_media_size_mb * 1024 * 1024

def should_download_media_for_chat(self, chat_id: int) -> bool:
"""
Determine if media should be downloaded for a specific chat.

Args:
chat_id: Telegram chat ID (marked format)

Returns:
True if media should be downloaded, False if skipped
"""
# If global media download is disabled, return False
if not self.download_media:
return False

# Check if chat is in skip list
if chat_id in self.skip_media_chat_ids:
return False

return True

def validate_credentials(self):
"""Ensure Telegram credentials are present."""
if not all([self.api_id, self.api_hash, self.phone]):
Expand Down
45 changes: 45 additions & 0 deletions src/db/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,51 @@ async def insert_media(self, media_data: dict[str, Any]) -> None:
await session.execute(stmt)
await session.commit()

async def get_media_for_chat(self, chat_id: int) -> list[dict[str, Any]]:
"""
Get all media records for a specific chat.

Args:
chat_id: Chat identifier

Returns:
List of media records with file paths and metadata
"""
async with self.db_manager.async_session_factory() as session:
stmt = select(Media).where(Media.chat_id == chat_id)
result = await session.execute(stmt)
media_records = result.scalars().all()

return [
{
"id": m.id,
"message_id": m.message_id,
"chat_id": m.chat_id,
"type": m.type,
"file_path": m.file_path,
"file_size": m.file_size,
"downloaded": m.downloaded,
}
for m in media_records
]

async def delete_media_for_chat(self, chat_id: int) -> int:
"""
Delete all media records for a specific chat.
Does not delete message records or the chat itself.

Args:
chat_id: Chat identifier

Returns:
Number of media records deleted
"""
async with self.db_manager.async_session_factory() as session:
stmt = delete(Media).where(Media.chat_id == chat_id)
result = await session.execute(stmt)
await session.commit()
return result.rowcount

async def get_media_for_verification(self) -> list[dict[str, Any]]:
"""
Get all media records that should have files on disk.
Expand Down
2 changes: 1 addition & 1 deletion src/listener.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,7 @@ async def on_new_message(event: events.NewMessage.Event) -> None:
# v6.0.0: Handle media - create Media record AFTER message exists
if media_type:
# Download media immediately if enabled
if self.config.listen_new_messages_media and self.config.download_media:
if self.config.listen_new_messages_media and self.config.should_download_media_for_chat(chat_id):
try:
media_path = await self._download_media(message, chat_id)
if media_path:
Expand Down
59 changes: 58 additions & 1 deletion src/telegram_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,11 @@ async def _verify_and_redownload_media(self) -> None:
failed = 0

for chat_id, records in by_chat.items():
# Skip media verification for chats in skip list
if chat_id in self.config.skip_media_chat_ids:
logger.debug(f"Skipping media verification for chat {chat_id} (in SKIP_MEDIA_CHAT_IDS)")
continue

try:
# Get message IDs to fetch
message_ids = [r["message_id"] for r in records if r.get("message_id")]
Expand Down Expand Up @@ -587,6 +592,10 @@ async def _backup_dialog(self, dialog, is_archived: bool = False) -> int:
chat_data = self._extract_chat_data(entity, is_archived=is_archived)
await self.db.upsert_chat(chat_data)

# Clean up existing media if this chat is in the skip list
if chat_id in self.config.skip_media_chat_ids and self.config.skip_media_delete_existing:
await self._cleanup_existing_media(chat_id)

# Ensure profile photos for users and groups/channels are backed up.
# This runs on every dialog backup but only downloads new files when
# Telegram reports a different profile photo.
Expand Down Expand Up @@ -951,7 +960,7 @@ async def _process_message(self, message: Message, chat_id: int) -> dict:
"results": results_data,
}

elif self.config.download_media:
elif self.config.should_download_media_for_chat(chat_id):
# v6.0.0: Download media and store data for later insertion
# (media is inserted AFTER message to satisfy FK constraint)
media_result = await self._process_media(message, chat_id)
Expand Down Expand Up @@ -1039,6 +1048,54 @@ async def _ensure_profile_photo(self, entity, marked_id: int = None) -> None:
except Exception as e:
logger.warning(f"Failed to download avatar for {file_id}: {e}")

async def _cleanup_existing_media(self, chat_id: int) -> None:
"""
Delete existing media files and database records for a chat.
Used when a chat is added to SKIP_MEDIA_CHAT_IDS to reclaim storage.

Args:
chat_id: Chat identifier
"""
try:
# Get all media records for this chat
media_records = await self.db.get_media_for_chat(chat_id)
if not media_records:
logger.debug(f"No existing media found for chat {chat_id}")
return

deleted_files = 0
deleted_records = 0
freed_bytes = 0

for record in media_records:
# Delete file from disk if it exists
file_path = record.get("file_path")
if file_path and os.path.exists(file_path):
try:
file_size = os.path.getsize(file_path)
# Check if it's a symlink (deduplicated media)
if os.path.islink(file_path):
os.unlink(file_path)
else:
os.remove(file_path)
deleted_files += 1
freed_bytes += file_size
except Exception as e:
logger.warning(f"Failed to delete media file {file_path}: {e}")

# Delete all media records from database for this chat
deleted_records = await self.db.delete_media_for_chat(chat_id)

if deleted_files > 0 or deleted_records > 0:
freed_mb = freed_bytes / (1024 * 1024)
logger.info(
f"Cleaned up existing media for chat {chat_id}: "
f"{deleted_files} files ({freed_mb:.1f} MB), {deleted_records} DB records"
)

except Exception as e:
logger.error(f"Error cleaning up existing media for chat {chat_id}: {e}", exc_info=True)

async def _process_media(self, message: Message, chat_id: int) -> dict | None:
"""
Process and download media from a message.
Expand Down
122 changes: 122 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,5 +180,127 @@ def test_database_dir_custom(self):
self.assertTrue(config.database_path.startswith("/data/ssd"))


class TestSkipMediaChatIds(unittest.TestCase):
"""Test SKIP_MEDIA_CHAT_IDS configuration for media filtering."""

def setUp(self):
self.temp_dir = tempfile.mkdtemp()

def tearDown(self):
shutil.rmtree(self.temp_dir, ignore_errors=True)

def test_skip_media_chat_ids_empty(self):
"""Skip media chat IDs defaults to empty set when not configured."""
env_vars = {"CHAT_TYPES": "private", "BACKUP_PATH": self.temp_dir}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
self.assertEqual(config.skip_media_chat_ids, set())

def test_skip_media_chat_ids_single(self):
"""Can configure single chat ID to skip media."""
env_vars = {
"CHAT_TYPES": "private",
"SKIP_MEDIA_CHAT_IDS": "-1001234567890",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
self.assertEqual(config.skip_media_chat_ids, {-1001234567890})

def test_skip_media_chat_ids_multiple(self):
"""Can configure multiple chat IDs to skip media."""
env_vars = {
"CHAT_TYPES": "private",
"SKIP_MEDIA_CHAT_IDS": "-1001234567890,-1009876543210,123456",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
self.assertEqual(config.skip_media_chat_ids, {-1001234567890, -1009876543210, 123456})

def test_should_download_media_for_chat_normal(self):
"""Should download media for chats not in skip list."""
env_vars = {
"CHAT_TYPES": "private",
"DOWNLOAD_MEDIA": "true",
"SKIP_MEDIA_CHAT_IDS": "-1001234567890",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
# Should download for chats not in skip list
self.assertTrue(config.should_download_media_for_chat(123456))
self.assertTrue(config.should_download_media_for_chat(-1009999999))

def test_should_download_media_for_chat_skipped(self):
"""Should NOT download media for chats in skip list."""
env_vars = {
"CHAT_TYPES": "private",
"DOWNLOAD_MEDIA": "true",
"SKIP_MEDIA_CHAT_IDS": "-1001234567890,-1009876543210",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
# Should NOT download for chats in skip list
self.assertFalse(config.should_download_media_for_chat(-1001234567890))
self.assertFalse(config.should_download_media_for_chat(-1009876543210))

def test_should_download_media_respects_global_flag(self):
"""Should respect DOWNLOAD_MEDIA=false even if not in skip list."""
env_vars = {
"CHAT_TYPES": "private",
"DOWNLOAD_MEDIA": "false",
"SKIP_MEDIA_CHAT_IDS": "-1001234567890",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
# Should NOT download for ANY chat when global flag is false
self.assertFalse(config.should_download_media_for_chat(123456))
self.assertFalse(config.should_download_media_for_chat(-1009999999))
self.assertFalse(config.should_download_media_for_chat(-1001234567890))

def test_skip_media_chat_ids_whitespace_handling(self):
"""Should handle whitespace in chat ID list correctly."""
env_vars = {
"CHAT_TYPES": "private",
"SKIP_MEDIA_CHAT_IDS": " -1001234567890 , -1009876543210 , 123456 ",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
self.assertEqual(config.skip_media_chat_ids, {-1001234567890, -1009876543210, 123456})

def test_skip_media_delete_existing_defaults_true(self):
"""SKIP_MEDIA_DELETE_EXISTING defaults to true when not set."""
env_vars = {"CHAT_TYPES": "private", "BACKUP_PATH": self.temp_dir}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
self.assertTrue(config.skip_media_delete_existing)

def test_skip_media_delete_existing_can_be_disabled(self):
"""Can disable SKIP_MEDIA_DELETE_EXISTING to keep existing media."""
env_vars = {
"CHAT_TYPES": "private",
"SKIP_MEDIA_DELETE_EXISTING": "false",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
self.assertFalse(config.skip_media_delete_existing)

def test_skip_media_delete_existing_explicit_true(self):
"""Can explicitly enable SKIP_MEDIA_DELETE_EXISTING."""
env_vars = {
"CHAT_TYPES": "private",
"SKIP_MEDIA_DELETE_EXISTING": "true",
"BACKUP_PATH": self.temp_dir,
}
with patch.dict(os.environ, env_vars, clear=True):
config = Config()
self.assertTrue(config.skip_media_delete_existing)


if __name__ == "__main__":
unittest.main()
Loading