Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 34 additions & 30 deletions lib/eml2html.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,22 @@ def __get_formatted_header_info(self):
for header in FORMATTED_HEADERS_TO_INCLUDE:
if self.eml[header]:
decoded_string = self.__get_utf8_header(self.eml[header])
header_info = header_info + '<b>' + header + '</b>: '\
+ decoded_string + '<br/>'
header_info = f'{header_info}<b>{header}</b>: {decoded_string}<br/>'

return header_info + '<br/>'
return f'{header_info}<br/>'
Comment on lines -81 to +83
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailtoHtml.__get_formatted_header_info refactored with the following changes:


def __get_utf8_header(self, header):
# There is a simpler way of doing this here:
# http://stackoverflow.com/a/21715870/27641. However, it doesn't
# seem to work, as it inserts a space between certain elements
# in the string that's not warranted/correct.
decoded_header = decode_header(header)
hdr = ""
for element in decoded_header:
if isinstance(element[0], bytes):
hdr += str(element[0], element[1] or 'ASCII')
else:
hdr += element[0]
return hdr
return "".join(
str(element[0], element[1] or 'ASCII')
if isinstance(element[0], bytes)
else element[0]
for element in decoded_header
)
Comment on lines -92 to +96
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailtoHtml.__get_utf8_header refactored with the following changes:


def __cid_replace(self, matchobj):
cid = matchobj.group(1)
Expand All @@ -110,7 +108,7 @@ def __cid_replace(self, matchobj):
image_base64 = re.sub("[\r\n\t]", "", image_base64)
image_decoded = image_part.get_payload(decode=True)
mime_type = self.__get_mime_type(image_decoded)
return "data:" + mime_type + ";base64," + image_base64
return f"data:{mime_type};base64,{image_base64}"
Comment on lines -113 to +111
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailtoHtml.__cid_replace refactored with the following changes:

# else:
# raise FatalException(
# "Could not find image cid " + cid + " in email content.")
Expand Down Expand Up @@ -141,16 +139,24 @@ def __find_part_by_content_type_name(self, message, content_type_name):
return None

def __find_part_by_content_id(self, message, content_id):
for part in message.walk():
if part['Content-ID'] in (content_id, '<' + content_id + '>'):
return part
return None
return next(
(
part
for part in message.walk()
if part['Content-ID'] in (content_id, f'<{content_id}>')
),
None,
)
Comment on lines -144 to +149
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailtoHtml.__find_part_by_content_id refactored with the following changes:


def __part_by_content_type(self, message, content_type):
for part in message.walk():
if part.get_content_type() == content_type:
return part
return None
return next(
(
part
for part in message.walk()
if part.get_content_type() == content_type
),
None,
)
Comment on lines -150 to +159
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailtoHtml.__part_by_content_type refactored with the following changes:

  • Use the built-in function next instead of a for-loop (use-next)


def __remove_invalid_urls(self, payload):
soup = BeautifulSoup(payload, "html5lib")
Expand All @@ -162,16 +168,14 @@ def __remove_invalid_urls(self, payload):
if lower_src == 'broken':
del img['src']
elif not lower_src.startswith('data'):
found_blacklist = False

for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST:
if image_load_blacklist_item in lower_src:
found_blacklist = True

if not found_blacklist:
if not utils.can_url_fetch(src):
del img['src']
else:
found_blacklist = any(
image_load_blacklist_item in lower_src
for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST
)
if (
not found_blacklist
and not utils.can_url_fetch(src)
or found_blacklist
):
del img['src']

Comment on lines -165 to -176
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailtoHtml.__remove_invalid_urls refactored with the following changes:

return str(soup)
6 changes: 2 additions & 4 deletions lib/html2img.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ def __get_unique_version(self, filename):
counter = 1
file_name_parts = os.path.splitext(filename)
while os.path.isfile(filename):
filename = "%s_%s%s" % (file_name_parts[0],
'_' + str(counter),
file_name_parts[1])
filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}"
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function HtmltoImage.__get_unique_version refactored with the following changes:

counter += 1
return filename

Expand All @@ -61,7 +59,7 @@ def __process_errors(self, ret_code, error):
original_error = str(error, 'utf-8').rstrip()
stripped_error = stripped_error.rstrip()

if ret_code > 0 and original_error == '':
if ret_code > 0 and not original_error:
Comment on lines -64 to +62
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function HtmltoImage.__process_errors refactored with the following changes:

raise FatalException("wkhtmltoimage failed with exit code " +
str(ret_code) +
", no error output.")
Expand Down
6 changes: 2 additions & 4 deletions lib/html2pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def __get_unique_version(self, filename):
counter = 1
file_name_parts = os.path.splitext(filename)
while os.path.isfile(filename):
filename = "%s_%s%s" % (file_name_parts[0],
'_' + str(counter),
file_name_parts[1])
filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}"
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function HtmltoPdf.__get_unique_version refactored with the following changes:

counter += 1
return filename

Expand All @@ -59,7 +57,7 @@ def __process_errors(self, ret_code, error):
original_error = str(error, 'utf-8').rstrip()
stripped_error = stripped_error.rstrip()

if ret_code > 0 and original_error == '':
if ret_code > 0 and not original_error:
Comment on lines -62 to +60
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function HtmltoPdf.__process_errors refactored with the following changes:

raise FatalException("wkhtmltopdf failed with exit code " +
str(ret_code) +
", no error output.")
Expand Down
7 changes: 1 addition & 6 deletions lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@ def can_url_fetch(src):
try:
req = Request(src)
urlopen(req)
except HTTPError:
except (HTTPError, URLError, ValueError):
return False
except URLError:
return False
except ValueError:
return False

Comment on lines -9 to -15
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function can_url_fetch refactored with the following changes:

return True
10 changes: 4 additions & 6 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@ def __init__(self, IMAP_SERVER, EMAIL_ADDRESS,
self.mail.select()

def get_emails(self):
uids = self.mail.uid('SEARCH', 'ALL')[1][0].split()
return uids
return self.mail.uid('SEARCH', 'ALL')[1][0].split()
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailHelper.get_emails refactored with the following changes:


def get_email_message(self, email_id):
_, data = self.mail.uid('FETCH', email_id, '(RFC822)')
raw_email = data[0][1]
raw_email_string = raw_email.decode('utf-8')
email_message = email.message_from_string(raw_email_string)
return email_message
return email.message_from_string(raw_email_string)
Comment on lines -30 to +29
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Function EmailHelper.get_email_message refactored with the following changes:



email_helper = EmailHelper(IMAP_SERVER, EMAIL_ADDRESS,
Expand All @@ -45,12 +43,12 @@ def get_email_message(self, email_id):
email_message = email_helper.get_email_message(uid)
html = email_to_html_convertor.convert(email_message)

filename = uid.decode() + ".jpg"
filename = f"{uid.decode()}.jpg"
img_path = html_to_img_convertor.save_img(
html.encode(), output_dir, filename)
print(img_path)

filename = uid.decode() + ".pdf"
filename = f"{uid.decode()}.pdf"
Comment on lines -48 to +51
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lines 48-53 refactored with the following changes:

pdf_path = html_to_pdf_convertor.save_pdf(
html.encode(), output_dir, filename)
print(pdf_path)