diff --git a/lib/eml2html.py b/lib/eml2html.py index ed65727..ac23b4a 100644 --- a/lib/eml2html.py +++ b/lib/eml2html.py @@ -78,10 +78,9 @@ def __get_formatted_header_info(self): for header in FORMATTED_HEADERS_TO_INCLUDE: if self.eml[header]: decoded_string = self.__get_utf8_header(self.eml[header]) - header_info = header_info + '' + header + ': '\ - + decoded_string + '
' + header_info = f'{header_info}{header}: {decoded_string}
' - return header_info + '
' + return f'{header_info}
' def __get_utf8_header(self, header): # There is a simpler way of doing this here: @@ -89,13 +88,12 @@ def __get_utf8_header(self, header): # seem to work, as it inserts a space between certain elements # in the string that's not warranted/correct. decoded_header = decode_header(header) - hdr = "" - for element in decoded_header: - if isinstance(element[0], bytes): - hdr += str(element[0], element[1] or 'ASCII') - else: - hdr += element[0] - return hdr + return "".join( + str(element[0], element[1] or 'ASCII') + if isinstance(element[0], bytes) + else element[0] + for element in decoded_header + ) def __cid_replace(self, matchobj): cid = matchobj.group(1) @@ -110,7 +108,7 @@ def __cid_replace(self, matchobj): image_base64 = re.sub("[\r\n\t]", "", image_base64) image_decoded = image_part.get_payload(decode=True) mime_type = self.__get_mime_type(image_decoded) - return "data:" + mime_type + ";base64," + image_base64 + return f"data:{mime_type};base64,{image_base64}" # else: # raise FatalException( # "Could not find image cid " + cid + " in email content.") @@ -141,16 +139,24 @@ def __find_part_by_content_type_name(self, message, content_type_name): return None def __find_part_by_content_id(self, message, content_id): - for part in message.walk(): - if part['Content-ID'] in (content_id, '<' + content_id + '>'): - return part - return None + return next( + ( + part + for part in message.walk() + if part['Content-ID'] in (content_id, f'<{content_id}>') + ), + None, + ) def __part_by_content_type(self, message, content_type): - for part in message.walk(): - if part.get_content_type() == content_type: - return part - return None + return next( + ( + part + for part in message.walk() + if part.get_content_type() == content_type + ), + None, + ) def __remove_invalid_urls(self, payload): soup = BeautifulSoup(payload, "html5lib") @@ -162,16 +168,14 @@ def __remove_invalid_urls(self, payload): if lower_src == 'broken': del img['src'] elif not lower_src.startswith('data'): - found_blacklist = False - - for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST: - if image_load_blacklist_item in lower_src: - found_blacklist = True - - if not found_blacklist: - if not utils.can_url_fetch(src): - del img['src'] - else: + found_blacklist = any( + image_load_blacklist_item in lower_src + for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST + ) + if ( + not found_blacklist + and not utils.can_url_fetch(src) + or found_blacklist + ): del img['src'] - return str(soup) diff --git a/lib/html2img.py b/lib/html2img.py index 06e3005..39d0fa8 100644 --- a/lib/html2img.py +++ b/lib/html2img.py @@ -45,9 +45,7 @@ def __get_unique_version(self, filename): counter = 1 file_name_parts = os.path.splitext(filename) while os.path.isfile(filename): - filename = "%s_%s%s" % (file_name_parts[0], - '_' + str(counter), - file_name_parts[1]) + filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}" counter += 1 return filename @@ -61,7 +59,7 @@ def __process_errors(self, ret_code, error): original_error = str(error, 'utf-8').rstrip() stripped_error = stripped_error.rstrip() - if ret_code > 0 and original_error == '': + if ret_code > 0 and not original_error: raise FatalException("wkhtmltoimage failed with exit code " + str(ret_code) + ", no error output.") diff --git a/lib/html2pdf.py b/lib/html2pdf.py index b260a7d..77a108d 100644 --- a/lib/html2pdf.py +++ b/lib/html2pdf.py @@ -43,9 +43,7 @@ def __get_unique_version(self, filename): counter = 1 file_name_parts = os.path.splitext(filename) while os.path.isfile(filename): - filename = "%s_%s%s" % (file_name_parts[0], - '_' + str(counter), - file_name_parts[1]) + filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}" counter += 1 return filename @@ -59,7 +57,7 @@ def __process_errors(self, ret_code, error): original_error = str(error, 'utf-8').rstrip() stripped_error = stripped_error.rstrip() - if ret_code > 0 and original_error == '': + if ret_code > 0 and not original_error: raise FatalException("wkhtmltopdf failed with exit code " + str(ret_code) + ", no error output.") diff --git a/lib/utils.py b/lib/utils.py index b53bf65..5485fae 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -6,11 +6,6 @@ def can_url_fetch(src): try: req = Request(src) urlopen(req) - except HTTPError: + except (HTTPError, URLError, ValueError): return False - except URLError: - return False - except ValueError: - return False - return True diff --git a/test.py b/test.py index 6d85dd3..6a5089e 100644 --- a/test.py +++ b/test.py @@ -20,15 +20,13 @@ def __init__(self, IMAP_SERVER, EMAIL_ADDRESS, self.mail.select() def get_emails(self): - uids = self.mail.uid('SEARCH', 'ALL')[1][0].split() - return uids + return self.mail.uid('SEARCH', 'ALL')[1][0].split() def get_email_message(self, email_id): _, data = self.mail.uid('FETCH', email_id, '(RFC822)') raw_email = data[0][1] raw_email_string = raw_email.decode('utf-8') - email_message = email.message_from_string(raw_email_string) - return email_message + return email.message_from_string(raw_email_string) email_helper = EmailHelper(IMAP_SERVER, EMAIL_ADDRESS, @@ -45,12 +43,12 @@ def get_email_message(self, email_id): email_message = email_helper.get_email_message(uid) html = email_to_html_convertor.convert(email_message) - filename = uid.decode() + ".jpg" + filename = f"{uid.decode()}.jpg" img_path = html_to_img_convertor.save_img( html.encode(), output_dir, filename) print(img_path) - filename = uid.decode() + ".pdf" + filename = f"{uid.decode()}.pdf" pdf_path = html_to_pdf_convertor.save_pdf( html.encode(), output_dir, filename) print(pdf_path)