-
Notifications
You must be signed in to change notification settings - Fork 0
Sourcery refactored master branch #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -78,24 +78,22 @@ def __get_formatted_header_info(self): | |
| for header in FORMATTED_HEADERS_TO_INCLUDE: | ||
| if self.eml[header]: | ||
| decoded_string = self.__get_utf8_header(self.eml[header]) | ||
| header_info = header_info + '<b>' + header + '</b>: '\ | ||
| + decoded_string + '<br/>' | ||
| header_info = f'{header_info}<b>{header}</b>: {decoded_string}<br/>' | ||
|
|
||
| return header_info + '<br/>' | ||
| return f'{header_info}<br/>' | ||
|
|
||
| def __get_utf8_header(self, header): | ||
| # There is a simpler way of doing this here: | ||
| # http://stackoverflow.com/a/21715870/27641. However, it doesn't | ||
| # seem to work, as it inserts a space between certain elements | ||
| # in the string that's not warranted/correct. | ||
| decoded_header = decode_header(header) | ||
| hdr = "" | ||
| for element in decoded_header: | ||
| if isinstance(element[0], bytes): | ||
| hdr += str(element[0], element[1] or 'ASCII') | ||
| else: | ||
| hdr += element[0] | ||
| return hdr | ||
| return "".join( | ||
| str(element[0], element[1] or 'ASCII') | ||
| if isinstance(element[0], bytes) | ||
| else element[0] | ||
| for element in decoded_header | ||
| ) | ||
|
Comment on lines
-92
to
+96
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def __cid_replace(self, matchobj): | ||
| cid = matchobj.group(1) | ||
|
|
@@ -110,7 +108,7 @@ def __cid_replace(self, matchobj): | |
| image_base64 = re.sub("[\r\n\t]", "", image_base64) | ||
| image_decoded = image_part.get_payload(decode=True) | ||
| mime_type = self.__get_mime_type(image_decoded) | ||
| return "data:" + mime_type + ";base64," + image_base64 | ||
| return f"data:{mime_type};base64,{image_base64}" | ||
|
Comment on lines
-113
to
+111
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| # else: | ||
| # raise FatalException( | ||
| # "Could not find image cid " + cid + " in email content.") | ||
|
|
@@ -141,16 +139,24 @@ def __find_part_by_content_type_name(self, message, content_type_name): | |
| return None | ||
|
|
||
| def __find_part_by_content_id(self, message, content_id): | ||
| for part in message.walk(): | ||
| if part['Content-ID'] in (content_id, '<' + content_id + '>'): | ||
| return part | ||
| return None | ||
| return next( | ||
| ( | ||
| part | ||
| for part in message.walk() | ||
| if part['Content-ID'] in (content_id, f'<{content_id}>') | ||
| ), | ||
| None, | ||
| ) | ||
|
Comment on lines
-144
to
+149
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def __part_by_content_type(self, message, content_type): | ||
| for part in message.walk(): | ||
| if part.get_content_type() == content_type: | ||
| return part | ||
| return None | ||
| return next( | ||
| ( | ||
| part | ||
| for part in message.walk() | ||
| if part.get_content_type() == content_type | ||
| ), | ||
| None, | ||
| ) | ||
|
Comment on lines
-150
to
+159
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def __remove_invalid_urls(self, payload): | ||
| soup = BeautifulSoup(payload, "html5lib") | ||
|
|
@@ -162,16 +168,14 @@ def __remove_invalid_urls(self, payload): | |
| if lower_src == 'broken': | ||
| del img['src'] | ||
| elif not lower_src.startswith('data'): | ||
| found_blacklist = False | ||
|
|
||
| for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST: | ||
| if image_load_blacklist_item in lower_src: | ||
| found_blacklist = True | ||
|
|
||
| if not found_blacklist: | ||
| if not utils.can_url_fetch(src): | ||
| del img['src'] | ||
| else: | ||
| found_blacklist = any( | ||
| image_load_blacklist_item in lower_src | ||
| for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST | ||
| ) | ||
| if ( | ||
| not found_blacklist | ||
| and not utils.can_url_fetch(src) | ||
| or found_blacklist | ||
| ): | ||
| del img['src'] | ||
|
|
||
|
Comment on lines
-165
to
-176
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| return str(soup) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,9 +45,7 @@ def __get_unique_version(self, filename): | |
| counter = 1 | ||
| file_name_parts = os.path.splitext(filename) | ||
| while os.path.isfile(filename): | ||
| filename = "%s_%s%s" % (file_name_parts[0], | ||
| '_' + str(counter), | ||
| file_name_parts[1]) | ||
| filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}" | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| counter += 1 | ||
| return filename | ||
|
|
||
|
|
@@ -61,7 +59,7 @@ def __process_errors(self, ret_code, error): | |
| original_error = str(error, 'utf-8').rstrip() | ||
| stripped_error = stripped_error.rstrip() | ||
|
|
||
| if ret_code > 0 and original_error == '': | ||
| if ret_code > 0 and not original_error: | ||
|
Comment on lines
-64
to
+62
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| raise FatalException("wkhtmltoimage failed with exit code " + | ||
| str(ret_code) + | ||
| ", no error output.") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,9 +43,7 @@ def __get_unique_version(self, filename): | |
| counter = 1 | ||
| file_name_parts = os.path.splitext(filename) | ||
| while os.path.isfile(filename): | ||
| filename = "%s_%s%s" % (file_name_parts[0], | ||
| '_' + str(counter), | ||
| file_name_parts[1]) | ||
| filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}" | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| counter += 1 | ||
| return filename | ||
|
|
||
|
|
@@ -59,7 +57,7 @@ def __process_errors(self, ret_code, error): | |
| original_error = str(error, 'utf-8').rstrip() | ||
| stripped_error = stripped_error.rstrip() | ||
|
|
||
| if ret_code > 0 and original_error == '': | ||
| if ret_code > 0 and not original_error: | ||
|
Comment on lines
-62
to
+60
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| raise FatalException("wkhtmltopdf failed with exit code " + | ||
| str(ret_code) + | ||
| ", no error output.") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,11 +6,6 @@ def can_url_fetch(src): | |
| try: | ||
| req = Request(src) | ||
| urlopen(req) | ||
| except HTTPError: | ||
| except (HTTPError, URLError, ValueError): | ||
| return False | ||
| except URLError: | ||
| return False | ||
| except ValueError: | ||
| return False | ||
|
|
||
|
Comment on lines
-9
to
-15
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
| return True | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,15 +20,13 @@ def __init__(self, IMAP_SERVER, EMAIL_ADDRESS, | |
| self.mail.select() | ||
|
|
||
| def get_emails(self): | ||
| uids = self.mail.uid('SEARCH', 'ALL')[1][0].split() | ||
| return uids | ||
| return self.mail.uid('SEARCH', 'ALL')[1][0].split() | ||
|
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
| def get_email_message(self, email_id): | ||
| _, data = self.mail.uid('FETCH', email_id, '(RFC822)') | ||
| raw_email = data[0][1] | ||
| raw_email_string = raw_email.decode('utf-8') | ||
| email_message = email.message_from_string(raw_email_string) | ||
| return email_message | ||
| return email.message_from_string(raw_email_string) | ||
|
Comment on lines
-30
to
+29
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Function
|
||
|
|
||
|
|
||
| email_helper = EmailHelper(IMAP_SERVER, EMAIL_ADDRESS, | ||
|
|
@@ -45,12 +43,12 @@ def get_email_message(self, email_id): | |
| email_message = email_helper.get_email_message(uid) | ||
| html = email_to_html_convertor.convert(email_message) | ||
|
|
||
| filename = uid.decode() + ".jpg" | ||
| filename = f"{uid.decode()}.jpg" | ||
| img_path = html_to_img_convertor.save_img( | ||
| html.encode(), output_dir, filename) | ||
| print(img_path) | ||
|
|
||
| filename = uid.decode() + ".pdf" | ||
| filename = f"{uid.decode()}.pdf" | ||
|
Comment on lines
-48
to
+51
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines
|
||
| pdf_path = html_to_pdf_convertor.save_pdf( | ||
| html.encode(), output_dir, filename) | ||
| print(pdf_path) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function
EmailtoHtml.__get_formatted_header_inforefactored with the following changes:use-fstring-for-concatenation)