@@ -54,6 +54,7 @@ def test_anything(self):
5454from selenium.webdriver.remote.remote_connection import LOGGER
5555from seleniumbase import config as sb_config
5656from seleniumbase.config import settings
57+ from seleniumbase.core import download_helper
5758from seleniumbase.core import log_helper
5859from seleniumbase.fixtures import constants
5960from seleniumbase.fixtures import css_to_xpath
@@ -4561,10 +4562,17 @@ def get_unique_links(self):
45614562 links = page_utils._get_unique_links(page_url, soup)
45624563 return links
45634564
4564- def get_link_status_code(self, link, allow_redirects=False, timeout=5):
4565+ def get_link_status_code(
4566+ self,
4567+ link,
4568+ allow_redirects=False,
4569+ timeout=5,
4570+ verify=False,
4571+ ):
45654572 """Get the status code of a link.
45664573 If the timeout is set to less than 1, it becomes 1.
45674574 If the timeout is exceeded by requests.get(), it will return a 404.
4575+ If "verify" is False, will ignore certificate errors.
45684576 For a list of available status codes, see:
45694577 https://en.wikipedia.org/wiki/List_of_HTTP_status_codes
45704578 """
@@ -4573,7 +4581,10 @@ def get_link_status_code(self, link, allow_redirects=False, timeout=5):
45734581 if timeout < 1:
45744582 timeout = 1
45754583 status_code = page_utils._get_link_status_code(
4576- link, allow_redirects=allow_redirects, timeout=timeout
4584+ link,
4585+ allow_redirects=allow_redirects,
4586+ timeout=timeout,
4587+ verify=verify,
45774588 )
45784589 return status_code
45794590
@@ -4604,10 +4615,12 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
46044615 links = []
46054616 for link in all_links:
46064617 if (
4607- "javascript :" not in link
4618+ "data :" not in link
46084619 and "mailto:" not in link
4609- and "data :" not in link
4620+ and "javascript :" not in link
46104621 and "://fonts.gstatic.com" not in link
4622+ and "://fonts.googleapis.com" not in link
4623+ and "://googleads.g.doubleclick.net" not in link
46114624 ):
46124625 links.append(link)
46134626 if timeout:
@@ -4634,6 +4647,7 @@ def assert_no_404_errors(self, multithreaded=True, timeout=None):
46344647 broken_links.append(link)
46354648 self.__requests_timeout = None # Reset the requests.get() timeout
46364649 if len(broken_links) > 0:
4650+ broken_links = sorted(broken_links)
46374651 bad_links_str = "\n".join(broken_links)
46384652 if len(broken_links) == 1:
46394653 self.fail("Broken link detected:\n%s" % bad_links_str)
@@ -4681,6 +4695,7 @@ def get_pdf_text(
46814695 wrap=False,
46824696 nav=False,
46834697 override=False,
4698+ caching=True,
46844699 ):
46854700 """Gets text from a PDF file.
46864701 PDF can be either a URL or a file path on the local file system.
@@ -4702,7 +4717,8 @@ def get_pdf_text(
47024717 (Not needed because the PDF will be downloaded anyway.)
47034718 override - If the PDF file to be downloaded already exists in the
47044719 downloaded_files/ folder, that PDF will be used
4705- instead of downloading it again."""
4720+ instead of downloading it again.
4721+ caching - If resources should be cached via pdfminer."""
47064722 import warnings
47074723
47084724 with warnings.catch_warnings():
@@ -4716,8 +4732,6 @@ def get_pdf_text(
47164732 raise Exception("%s is not a PDF file! (Expecting a .pdf)" % pdf)
47174733 file_path = None
47184734 if page_utils.is_valid_url(pdf):
4719- from seleniumbase.core import download_helper
4720-
47214735 downloads_folder = download_helper.get_downloads_folder()
47224736 if nav:
47234737 if self.get_current_url() != pdf:
@@ -4750,7 +4764,7 @@ def get_pdf_text(
47504764 password="",
47514765 page_numbers=page_search,
47524766 maxpages=maxpages,
4753- caching=False ,
4767+ caching=caching ,
47544768 codec=codec,
47554769 )
47564770 pdf_text = self.__fix_unicode_conversion(pdf_text)
@@ -4996,8 +5010,6 @@ def get_downloads_folder(self):
49965010 any clicks that download files will also use this folder
49975011 rather than using the browser's default "downloads/" path."""
49985012 self.__check_scope()
4999- from seleniumbase.core import download_helper
5000-
50015013 return download_helper.get_downloads_folder()
50025014
50035015 def get_browser_downloads_folder(self):
@@ -5020,8 +5032,6 @@ def get_browser_downloads_folder(self):
50205032 ):
50215033 return os.path.join(os.path.expanduser("~"), "downloads")
50225034 else:
5023- from seleniumbase.core import download_helper
5024-
50255035 return download_helper.get_downloads_folder()
50265036 return os.path.join(os.path.expanduser("~"), "downloads")
50275037
0 commit comments