code4storm · sourcery-ai · Apr 24, 2023 · sourcery-ai · Apr 24, 2023 · sourcery-ai
diff --git a/lib/eml2html.py b/lib/eml2html.py
@@ -78,24 +78,22 @@ def __get_formatted_header_info(self):
         for header in FORMATTED_HEADERS_TO_INCLUDE:
             if self.eml[header]:
                 decoded_string = self.__get_utf8_header(self.eml[header])
-                header_info = header_info + '<b>' + header + '</b>: '\
-                    + decoded_string + '<br/>'
+                header_info = f'{header_info}<b>{header}</b>: {decoded_string}<br/>'
 
-        return header_info + '<br/>'
+        return f'{header_info}<br/>'
 
     def __get_utf8_header(self, header):
         # There is a simpler way of doing this here:
         # http://stackoverflow.com/a/21715870/27641. However, it doesn't
         # seem to work, as it inserts a space between certain elements
         # in the string that's not warranted/correct.
         decoded_header = decode_header(header)
-        hdr = ""
-        for element in decoded_header:
-            if isinstance(element[0], bytes):
-                hdr += str(element[0], element[1] or 'ASCII')
-            else:
-                hdr += element[0]
-        return hdr
+        return "".join(
+            str(element[0], element[1] or 'ASCII')
+            if isinstance(element[0], bytes)
+            else element[0]
+            for element in decoded_header
+        )
 
     def __cid_replace(self, matchobj):
         cid = matchobj.group(1)
@@ -110,7 +108,7 @@ def __cid_replace(self, matchobj):
             image_base64 = re.sub("[\r\n\t]", "", image_base64)
             image_decoded = image_part.get_payload(decode=True)
             mime_type = self.__get_mime_type(image_decoded)
-            return "data:" + mime_type + ";base64," + image_base64
+            return f"data:{mime_type};base64,{image_base64}"
         # else:
         #     raise FatalException(
         #         "Could not find image cid " + cid + " in email content.")
@@ -141,16 +139,24 @@ def __find_part_by_content_type_name(self, message, content_type_name):
         return None
 
     def __find_part_by_content_id(self, message, content_id):
-        for part in message.walk():
-            if part['Content-ID'] in (content_id, '<' + content_id + '>'):
-                return part
-        return None
+        return next(
+            (
+                part
+                for part in message.walk()
+                if part['Content-ID'] in (content_id, f'<{content_id}>')
+            ),
+            None,
+        )
 
     def __part_by_content_type(self, message, content_type):
-        for part in message.walk():
-            if part.get_content_type() == content_type:
-                return part
-        return None
+        return next(
+            (
+                part
+                for part in message.walk()
+                if part.get_content_type() == content_type
+            ),
+            None,
+        )
 
     def __remove_invalid_urls(self, payload):
         soup = BeautifulSoup(payload, "html5lib")
@@ -162,16 +168,14 @@ def __remove_invalid_urls(self, payload):
                 if lower_src == 'broken':
                     del img['src']
                 elif not lower_src.startswith('data'):
-                    found_blacklist = False
-
-                    for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST:
-                        if image_load_blacklist_item in lower_src:
-                            found_blacklist = True
-
-                    if not found_blacklist:
-                        if not utils.can_url_fetch(src):
-                            del img['src']
-                    else:
+                    found_blacklist = any(
+                        image_load_blacklist_item in lower_src
+                        for image_load_blacklist_item in IMAGE_LOAD_BLACKLIST
+                    )
+                    if (
+                        not found_blacklist
+                        and not utils.can_url_fetch(src)
+                        or found_blacklist
+                    ):
                         del img['src']
-
         return str(soup)
diff --git a/lib/html2img.py b/lib/html2img.py
@@ -45,9 +45,7 @@ def __get_unique_version(self, filename):
         counter = 1
         file_name_parts = os.path.splitext(filename)
         while os.path.isfile(filename):
-            filename = "%s_%s%s" % (file_name_parts[0],
-                                    '_' + str(counter),
-                                    file_name_parts[1])
+            filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}"
             counter += 1
         return filename
 
@@ -61,7 +59,7 @@ def __process_errors(self, ret_code, error):
         original_error = str(error, 'utf-8').rstrip()
         stripped_error = stripped_error.rstrip()
 
-        if ret_code > 0 and original_error == '':
+        if ret_code > 0 and not original_error:
             raise FatalException("wkhtmltoimage failed with exit code " +
                                  str(ret_code) +
                                  ", no error output.")

diff --git a/lib/html2pdf.py b/lib/html2pdf.py
@@ -43,9 +43,7 @@ def __get_unique_version(self, filename):
         counter = 1
         file_name_parts = os.path.splitext(filename)
         while os.path.isfile(filename):
-            filename = "%s_%s%s" % (file_name_parts[0],
-                                    '_' + str(counter),
-                                    file_name_parts[1])
+            filename = f"{file_name_parts[0]}__{str(counter)}{file_name_parts[1]}"
             counter += 1
         return filename
 
@@ -59,7 +57,7 @@ def __process_errors(self, ret_code, error):
         original_error = str(error, 'utf-8').rstrip()
         stripped_error = stripped_error.rstrip()
 
-        if ret_code > 0 and original_error == '':
+        if ret_code > 0 and not original_error:
             raise FatalException("wkhtmltopdf failed with exit code " +
                                  str(ret_code) +
                                  ", no error output.")

diff --git a/lib/utils.py b/lib/utils.py
@@ -6,11 +6,6 @@ def can_url_fetch(src):
     try:
         req = Request(src)
         urlopen(req)
-    except HTTPError:
+    except (HTTPError, URLError, ValueError):
         return False
-    except URLError:
-        return False
-    except ValueError:
-        return False
-
     return True
diff --git a/test.py b/test.py
@@ -20,15 +20,13 @@ def __init__(self, IMAP_SERVER, EMAIL_ADDRESS,
         self.mail.select()
 
     def get_emails(self):
-        uids = self.mail.uid('SEARCH', 'ALL')[1][0].split()
-        return uids
+        return self.mail.uid('SEARCH', 'ALL')[1][0].split()
 
     def get_email_message(self, email_id):
         _, data = self.mail.uid('FETCH', email_id, '(RFC822)')
         raw_email = data[0][1]
         raw_email_string = raw_email.decode('utf-8')
-        email_message = email.message_from_string(raw_email_string)
-        return email_message
+        return email.message_from_string(raw_email_string)
 
 
 email_helper = EmailHelper(IMAP_SERVER, EMAIL_ADDRESS,
@@ -45,12 +43,12 @@ def get_email_message(self, email_id):
     email_message = email_helper.get_email_message(uid)
     html = email_to_html_convertor.convert(email_message)
 
-    filename = uid.decode() + ".jpg"
+    filename = f"{uid.decode()}.jpg"
     img_path = html_to_img_convertor.save_img(
         html.encode(), output_dir, filename)
     print(img_path)
 
-    filename = uid.decode() + ".pdf"
+    filename = f"{uid.decode()}.pdf"
     pdf_path = html_to_pdf_convertor.save_pdf(
         html.encode(), output_dir, filename)
     print(pdf_path)