-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgmailLabelStorage.py
More file actions
353 lines (296 loc) · 11.9 KB
/
gmailLabelStorage.py
File metadata and controls
353 lines (296 loc) · 11.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
#!/usr/bin/env python3
"""
Gmail Label Storage Analyzer
This script uses IMAP to calculate the storage space used by emails
associated with specific labels or all labels in your Gmail account.
"""
import imaplib
import email
import os
import sys
import argparse
from email.header import decode_header
def load_credentials():
"""Load credentials from .env file or environment variables"""
# Try to load from .env file if it exists
env_path = os.path.join(os.path.dirname(__file__), '.env')
if os.path.exists(env_path):
with open(env_path, 'r') as f:
for line in f:
line = line.strip()
if line and not line.startswith('#') and '=' in line:
key, value = line.split('=', 1)
os.environ[key.strip()] = value.strip()
# Get credentials
email_addr = os.getenv('GMAIL_EMAIL')
password = os.getenv('GMAIL_PASSWORD')
if not email_addr or not password:
print("❌ Error: Missing credentials")
print("\nPlease set GMAIL_EMAIL and GMAIL_PASSWORD")
print("Either in a .env file or as environment variables")
print("\nFor .env file, create it with:")
print(" GMAIL_EMAIL=your.email@gmail.com")
print(" GMAIL_PASSWORD=your-app-specific-password")
print("\nNote: Use an app-specific password, not your regular password")
print("Generate one at: https://myaccount.google.com/apppasswords")
sys.exit(1)
return email_addr, password
def connect_imap(email_addr, password):
"""Connect to Gmail via IMAP"""
print(f"Connecting to {email_addr}...")
try:
imap = imaplib.IMAP4_SSL('imap.gmail.com')
imap.login(email_addr, password)
print("✅ Connected successfully\n")
return imap
except imaplib.IMAP4.error as e:
print(f"❌ Connection failed: {e}")
print("\nPossible issues:")
print(" - Incorrect password (use app-specific password)")
print(" - 2-Step Verification not enabled")
print(" - IMAP not enabled in Gmail settings")
print("\nTo enable IMAP: Gmail Settings → Forwarding and POP/IMAP")
print("To create app password: https://myaccount.google.com/apppasswords")
sys.exit(1)
def get_all_labels(imap):
"""Get all Gmail labels"""
try:
# List all folders/labels
status, folders = imap.list()
if status != 'OK':
return []
labels = []
for folder in folders:
# Parse folder name from IMAP list response
# Format: (\Flags) "/" "FolderName"
if isinstance(folder, bytes):
folder = folder.decode('utf-8')
# Extract folder name (handle quoted names)
parts = folder.split('"')
if len(parts) >= 3:
folder_name = parts[-2]
# Only include Gmail labels (exclude problematic system folders)
if not folder_name.startswith('[Gmail]/'):
labels.append(folder_name)
elif folder_name in ['[Gmail]/Sent Mail']:
# Only include Sent Mail, exclude All Mail and Drafts (too large/problematic)
labels.append(folder_name)
return sorted(labels)
except Exception as e:
print(f"Error listing labels: {e}")
return []
def calculate_label_storage(imap, label_name=None):
"""
Calculate storage used by a specific label or all labels.
Args:
imap: IMAP connection
label_name: Name of the label to analyze (None for all labels)
Returns:
Dictionary or list with storage information
"""
try:
if label_name:
# Calculate storage for specific label
return calculate_single_label(imap, label_name)
else:
# Calculate storage for all labels
labels = get_all_labels(imap)
if not labels:
print("No labels found")
return []
print(f"Analyzing {len(labels)} labels...\n")
label_storage = []
for i, label in enumerate(labels, 1):
print(f"[{i}/{len(labels)}] Processing: {label}")
storage = calculate_single_label(imap, label)
if storage:
label_storage.append(storage)
return label_storage
except Exception as e:
print(f"Error calculating storage: {e}")
return None
def calculate_single_label(imap, label_name):
"""Calculate storage for a single label"""
try:
# Select the folder/label
status, data = imap.select(f'"{label_name}"', readonly=True)
if status != 'OK':
print(f" ⚠️ Could not access label: {label_name}")
return None
# Get all message IDs
try:
status, messages = imap.search(None, 'ALL')
if status != 'OK':
print(f" ⚠️ Search failed for label: {label_name}")
return None
except imaplib.IMAP4.error as e:
# Handle "got more than X bytes" error
if 'got more than' in str(e):
print(f" ⚠️ Label too large to process efficiently: {label_name}")
print(f" (IMAP response size limit exceeded)")
else:
print(f" ⚠️ IMAP error for {label_name}: {e}")
return None
if not messages or not messages[0]:
print(f" No messages in this label")
return {
'label': label_name,
'message_count': 0,
'total_bytes': 0,
'total_mb': 0.0,
'total_gb': 0.0
}
message_ids = messages[0].split()
message_count = len(message_ids)
if message_count == 0:
print(f" No messages in this label")
return {
'label': label_name,
'message_count': 0,
'total_bytes': 0,
'total_mb': 0.0,
'total_gb': 0.0
}
print(f" Calculating storage for {message_count} messages...")
total_size = 0
batch_size = 100
for i in range(0, len(message_ids), batch_size):
batch = message_ids[i:i+batch_size]
batch_str = b','.join(batch).decode('ascii')
try:
# Fetch RFC822.SIZE (message size) for each message
status, msg_data = imap.fetch(batch_str, '(RFC822.SIZE)')
if status == 'OK' and msg_data:
for item in msg_data:
if isinstance(item, bytes):
# Parse size from response like: b'1 (RFC822.SIZE 12345)'
item_str = item.decode('ascii', errors='ignore')
if 'RFC822.SIZE' in item_str:
size_str = item_str.split('RFC822.SIZE')[1].strip(' ()')
try:
size = int(size_str)
total_size += size
except ValueError:
pass
except Exception as e:
# Log but continue processing remaining batches
if i == 0: # Only print error once
print(f" ⚠️ Error fetching some messages: {e}")
# Progress indicator
progress = min(i + batch_size, len(message_ids))
if message_count > 100:
print(f" Progress: {progress}/{message_count}", end='\r')
if message_count > 100:
print() # New line after progress
return {
'label': label_name,
'message_count': message_count,
'total_bytes': total_size,
'total_mb': total_size / (1024 * 1024),
'total_gb': total_size / (1024 * 1024 * 1024)
}
except Exception as e:
print(f" Error processing {label_name}: {e}")
return None
def format_size(bytes_value):
"""
Format size intelligently based on magnitude.
Rule: Never show more than 4 digits, 1 right of decimal.
Examples:
2,173,902,848 bytes -> "2.0 GB"
648,863,744 bytes -> "618.9 MB"
1,867,776 bytes -> "1.8 MB"
51,200 bytes -> "50.0 KB"
"""
if bytes_value == 0:
return "0.0 KB"
# Calculate in different units
gb = bytes_value / (1024 * 1024 * 1024)
mb = bytes_value / (1024 * 1024)
kb = bytes_value / 1024
# Choose unit based on keeping display under 4 digits before decimal
if gb >= 1.0: # >= 1 GB
return f"{gb:.1f} GB"
elif mb >= 1.0: # >= 1 MB
return f"{mb:.1f} MB"
elif kb >= 1.0: # >= 1 KB
return f"{kb:.1f} KB"
else:
return f"{bytes_value:.1f} B"
def format_output(storage_info, label_name=None):
"""Format and print storage information."""
if label_name:
# Single label output
if storage_info:
size_str = format_size(storage_info['total_bytes'])
print(f"\n{'='*60}")
print(f"Storage Analysis for Label: {storage_info['label']}")
print(f"{'='*60}")
print(f"Message Count: {storage_info['message_count']:,}")
print(f"Total Size (Bytes): {storage_info['total_bytes']:,}")
print(f"Total Size: {size_str}")
print(f"{'='*60}\n")
else:
# All labels output
if storage_info:
# Sort by size (largest first)
storage_info.sort(key=lambda x: x['total_bytes'], reverse=True)
print(f"\n{'='*70}")
print(f"Storage Analysis for All Labels")
print(f"{'='*70}")
print(f"{'Label':<40} {'Messages':>10} {'Size':>15}")
print(f"{'-'*70}")
for info in storage_info:
label_display = info['label'][:38] + '..' if len(info['label']) > 40 else info['label']
size_str = format_size(info['total_bytes'])
print(f"{label_display:<40} {info['message_count']:>10,} {size_str:>15}")
# Calculate totals
total_messages = sum(info['message_count'] for info in storage_info)
total_bytes = sum(info['total_bytes'] for info in storage_info)
total_size_str = format_size(total_bytes)
print(f"{'-'*70}")
print(f"{'TOTAL':<40} {total_messages:>10,} {total_size_str:>15}")
print(f"{'='*70}\n")
def main():
parser = argparse.ArgumentParser(
description='Analyze Gmail storage usage by label using IMAP'
)
parser.add_argument(
'--label',
type=str,
help='Specific label name to analyze (leave empty for all labels)',
default=None
)
parser.add_argument(
'--list-labels',
action='store_true',
help='List all available labels and exit'
)
args = parser.parse_args()
# Load credentials
email_addr, password = load_credentials()
# Connect via IMAP
imap = connect_imap(email_addr, password)
try:
if args.list_labels:
# List all labels
labels = get_all_labels(imap)
print(f"Available Labels ({len(labels)}):")
print("-" * 40)
for label in labels:
print(f" {label}")
print()
else:
# Calculate storage
storage_info = calculate_label_storage(imap, args.label)
# Display results
if storage_info:
format_output(storage_info, args.label)
finally:
# Clean disconnect
try:
imap.logout()
except:
pass
if __name__ == '__main__':
main()