From 3a7885a81fce28616b239cfa69f713b306a8fa21 Mon Sep 17 00:00:00 2001 From: Curtis Doty Date: Tue, 31 Oct 2017 15:35:25 -0700 Subject: [PATCH 1/6] Preserve Gmail labels in the mbox as sub-labels of the main mbox label. - Also ignore messages in Trash/Spam. - Don't bother creating Unread label. --- import-mailbox-to-gmail.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/import-mailbox-to-gmail.py b/import-mailbox-to-gmail.py index ec1659e..258c677 100755 --- a/import-mailbox-to-gmail.py +++ b/import-mailbox-to-gmail.py @@ -25,6 +25,7 @@ import logging import logging.handlers import mailbox +from csv import reader import os import sys @@ -223,6 +224,30 @@ def process_mbox_files(username, service, labels): if index < args.from_message: continue logging.info("Processing message %d in label '%s'", index, labelname) + + if 'X-Gmail-Labels' in message: + gmail_labels= next(reader([message['X-Gmail-Labels']])) + #logging.info('Found Gmail Labels: %s', gmail_labels) + + if 'Spam' in gmail_labels: + logging.info("Skipped Spam message %d in label '%s'", index, labelname) + continue + if 'Trash' in gmail_labels: + logging.info("Skipped Trash message %d in label '%s'", index, labelname) + continue + if 'Unread' in gmail_labels: + gmail_labels.remove('Unread') + + label_ids= [label_id] + for sublabel in gmail_labels: + sublabel= "%s/%s" %(labelname, sublabel) + label_ids.append(get_label_id_from_name(service, username, labels, sublabel)) + metadata_object= {'labelIds': label_ids} + + # TODO: test/handle when there is no labels header? + else: + metadata_object= {'labelIds': [label_id]} + try: if (args.replace_quoted_printable and 'Content-Type' in message and @@ -235,6 +260,7 @@ def process_mbox_files(username, service, labels): logging.exception( 'Failed to replace text/quoted-printable with text/plain ' 'in Content-Type header') + try: if args.fix_msgid and 'Message-ID' in message: msgid = message['Message-ID'] @@ -247,7 +273,7 @@ def process_mbox_files(username, service, labels): message.replace_header('Message-ID', msgid) except Exception: logging.exception('Failed to fix brackets in Message-ID header') - metadata_object = {'labelIds': [label_id]} + try: # Use media upload to allow messages more than 5mb. # See https://developers.google.com/api-client-library/python/guide/media_upload @@ -272,6 +298,7 @@ def process_mbox_files(username, service, labels): except Exception: number_of_failures_in_label += 1 logging.exception('Failed to import mbox message') + logging.info("Finished processing '%s'. %d messages imported " "successfully, %d messages failed.", full_filename, From cfa0c78f087a07c321279165ce5d00bc09118f26 Mon Sep 17 00:00:00 2001 From: Curtis Doty Date: Tue, 31 Oct 2017 16:33:13 -0700 Subject: [PATCH 2/6] Add option to disable import of Gmail labels. --- import-mailbox-to-gmail.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/import-mailbox-to-gmail.py b/import-mailbox-to-gmail.py index 258c677..1c594ee 100755 --- a/import-mailbox-to-gmail.py +++ b/import-mailbox-to-gmail.py @@ -88,6 +88,14 @@ help= "Replace 'Content-Type: text/quoted-printable' with text/plain (default: " "replace it)") +parser.add_argument( + '--with-labels', + dest='with_labels', + required=False, + action='store_false', + help= + "Preserve Gmail labels from the imported mbox as sublabels " + "(default: keep them)") parser.add_argument( '--num_retries', default=10, @@ -114,6 +122,7 @@ 'Message number to resume from, affects ALL users and ALL ' 'mbox files (default: 0)') parser.set_defaults(fix_msgid=True, replace_quoted_printable=True, + with_labels=True, logging_level='INFO') args = parser.parse_args() @@ -225,7 +234,7 @@ def process_mbox_files(username, service, labels): continue logging.info("Processing message %d in label '%s'", index, labelname) - if 'X-Gmail-Labels' in message: + if args.with_labels and 'X-Gmail-Labels' in message: gmail_labels= next(reader([message['X-Gmail-Labels']])) #logging.info('Found Gmail Labels: %s', gmail_labels) From c356b82bc8b676bea59d7ced9186ac1e631f2038 Mon Sep 17 00:00:00 2001 From: Curtis Doty Date: Mon, 6 Nov 2017 15:40:45 -0800 Subject: [PATCH 3/6] Attempting to address feedback from: https://github.com/google/import-mailbox-to-gmail/pull/32#pullrequestreview-74489932 --- import-mailbox-to-gmail.py | 92 +++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 31 deletions(-) diff --git a/import-mailbox-to-gmail.py b/import-mailbox-to-gmail.py index 1c594ee..8774bce 100755 --- a/import-mailbox-to-gmail.py +++ b/import-mailbox-to-gmail.py @@ -20,12 +20,12 @@ import argparse import base64 +from csv import reader import io import json import logging import logging.handlers import mailbox -from csv import reader import os import sys @@ -89,13 +89,32 @@ "Replace 'Content-Type: text/quoted-printable' with text/plain (default: " "replace it)") parser.add_argument( - '--with-labels', - dest='with_labels', + '--takeout-labels', + dest='takeout_labels', + choices=[True, False, 'sublabels'], required=False, - action='store_false', + default=True, + #action='store_false', help= - "Preserve Gmail labels from the imported mbox as sublabels " + "Preserve Gmail labels from Takeout mbox files. " + "Or optionally, transform them into sublabels. " "(default: keep them)") +parser.add_argument( + '--takeout-no-unread', + dest='takeout_no_unread', + required=False, + action='store_false', + help= + "Preserve read/unread state from Takeout mbox files " + "(default: keep it)") +parser.add_argument( + '--takeout-spam-trash', + dest='takeout_spam_trash', + required=False, + action='store_true', + help= + "Import messages from Spam/Trash labels in Takeout mbox files " + "(default: skip them)") parser.add_argument( '--num_retries', default=10, @@ -122,7 +141,6 @@ 'Message number to resume from, affects ALL users and ALL ' 'mbox files (default: 0)') parser.set_defaults(fix_msgid=True, replace_quoted_printable=True, - with_labels=True, logging_level='INFO') args = parser.parse_args() @@ -147,6 +165,8 @@ def get_label_id_from_name(service, username, labels, labelname): if labelname.endswith('.mbox'): # Strip .mbox suffix from folder names labelname = labelname[:-5] + if labelname.upper() == 'UNREAD': + return u'UNREAD' # magic label always there for label in labels: if label['name'] == labelname: return label['id'] @@ -169,6 +189,39 @@ def get_label_id_from_name(service, username, labels, labelname): raise +# TODO: instead of passing mbox_label_name as param, can't we look it up from id? +def get_metadata(service, username, labels, msg, mbox_label_id, mbox_label_name): + """Find Gmail labels and preserve them for import. + + Returns: + A metadata object. + """ + label_ids = [mbox_label_id] + + if args.takeout_labels and 'X-Gmail-Labels' in msg: + gmail_labels = next(reader([msg['X-Gmail-Labels'].replace('\r\n', '')])) + logging.info('Found Gmail Labels: %s', gmail_labels) + + if args.takeout_spam_trash: + # TODO: test if we are importing spam/trash without renaming to sublabels + if 'Spam' in gmail_labels: + logging.info("Skipped Spam message %d in mbox '%s'", index, mbox_label_name) + return False + if 'Trash' in gmail_labels: + logging.info("Skipped Trash message %d in mbox '%s'", index, mbox_label_name) + return False + + if not args.takeout_no_unread and 'Unread' in gmail_labels: + gmail_labels.remove('Unread') + + for gmail_label in gmail_labels: + if args.takeout_labels == 'sublabels' and gmail_label != 'Unread': + gmail_label = "%s/%s" %(mbox_label_name, gmail_label) + label_ids.append(get_label_id_from_name(service, username, labels, gmail_label)) + + return {'labelIds': label_ids} + + def process_mbox_files(username, service, labels): """Iterates over the mbox files found in the user's subdir and imports them. @@ -233,30 +286,6 @@ def process_mbox_files(username, service, labels): if index < args.from_message: continue logging.info("Processing message %d in label '%s'", index, labelname) - - if args.with_labels and 'X-Gmail-Labels' in message: - gmail_labels= next(reader([message['X-Gmail-Labels']])) - #logging.info('Found Gmail Labels: %s', gmail_labels) - - if 'Spam' in gmail_labels: - logging.info("Skipped Spam message %d in label '%s'", index, labelname) - continue - if 'Trash' in gmail_labels: - logging.info("Skipped Trash message %d in label '%s'", index, labelname) - continue - if 'Unread' in gmail_labels: - gmail_labels.remove('Unread') - - label_ids= [label_id] - for sublabel in gmail_labels: - sublabel= "%s/%s" %(labelname, sublabel) - label_ids.append(get_label_id_from_name(service, username, labels, sublabel)) - metadata_object= {'labelIds': label_ids} - - # TODO: test/handle when there is no labels header? - else: - metadata_object= {'labelIds': [label_id]} - try: if (args.replace_quoted_printable and 'Content-Type' in message and @@ -282,7 +311,8 @@ def process_mbox_files(username, service, labels): message.replace_header('Message-ID', msgid) except Exception: logging.exception('Failed to fix brackets in Message-ID header') - + metadata_object = get_metadata(service, username, labels, message, label_id, labelname) + if not metadata_object: continue try: # Use media upload to allow messages more than 5mb. # See https://developers.google.com/api-client-library/python/guide/media_upload From 5560c21ebb529fe87323e2b6d641c6b09e08942f Mon Sep 17 00:00:00 2001 From: Curtis Doty Date: Mon, 6 Nov 2017 15:44:26 -0800 Subject: [PATCH 4/6] tyop --- import-mailbox-to-gmail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/import-mailbox-to-gmail.py b/import-mailbox-to-gmail.py index 8774bce..5b88f57 100755 --- a/import-mailbox-to-gmail.py +++ b/import-mailbox-to-gmail.py @@ -105,7 +105,7 @@ required=False, action='store_false', help= - "Preserve read/unread state from Takeout mbox files " + "Don't preserve read/unread state from Takeout mbox files " "(default: keep it)") parser.add_argument( '--takeout-spam-trash', From f2e186ce452079a7e532679d0f00959054ecc3fd Mon Sep 17 00:00:00 2001 From: Curtis Doty Date: Mon, 6 Nov 2017 15:52:40 -0800 Subject: [PATCH 5/6] Eradicate vestiges of accidental whitepsace invasion. --- import-mailbox-to-gmail.py | 1 - 1 file changed, 1 deletion(-) diff --git a/import-mailbox-to-gmail.py b/import-mailbox-to-gmail.py index 5b88f57..d7271a9 100755 --- a/import-mailbox-to-gmail.py +++ b/import-mailbox-to-gmail.py @@ -337,7 +337,6 @@ def process_mbox_files(username, service, labels): except Exception: number_of_failures_in_label += 1 logging.exception('Failed to import mbox message') - logging.info("Finished processing '%s'. %d messages imported " "successfully, %d messages failed.", full_filename, From 4e4d39efa032d04f9ee761e793105b5765386e6a Mon Sep 17 00:00:00 2001 From: Curtis Doty Date: Mon, 6 Nov 2017 15:56:29 -0800 Subject: [PATCH 6/6] Oh drat. I'm blind. --- import-mailbox-to-gmail.py | 1 - 1 file changed, 1 deletion(-) diff --git a/import-mailbox-to-gmail.py b/import-mailbox-to-gmail.py index d7271a9..48d86a3 100755 --- a/import-mailbox-to-gmail.py +++ b/import-mailbox-to-gmail.py @@ -298,7 +298,6 @@ def process_mbox_files(username, service, labels): logging.exception( 'Failed to replace text/quoted-printable with text/plain ' 'in Content-Type header') - try: if args.fix_msgid and 'Message-ID' in message: msgid = message['Message-ID']