From 0530617678e0042d0fc41177d3cd4231f15dfcf6 Mon Sep 17 00:00:00 2001 From: Felix Behne Date: Sun, 16 Apr 2023 23:31:56 +0200 Subject: [PATCH 1/6] refactor: refactor bundesanzeiger module and config to work concurectly (performance gain of up to 300%), add utility functions and increase overall performance and fault tolerancy of code --- pyproject.toml | 4 + .../bundesanzeiger/bundesanzeiger.py | 426 +++++++++++++++--- src/deutschland/bundesanzeiger/model.py | 48 +- src/deutschland/config.py | 18 +- 4 files changed, 396 insertions(+), 100 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9141c82..f6670ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,10 @@ more-itertools = "^8.10.0" onnxruntime = "^1.10.0" numpy = "^1.19.0" protobuf= ">=3.0,<4.0" +tqdm = "^4.64.0" +matplotlib = "^3.5.1" +coloredlogs = "^15.0.1" + # Those are packages which were included in previous release so not optional de-autobahn = {version = "^1.0.4"} diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py index 3b0eecf..c4f1aec 100644 --- a/src/deutschland/bundesanzeiger/bundesanzeiger.py +++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py @@ -1,81 +1,83 @@ +import hashlib +import json +import logging +import re +import time +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime from io import BytesIO +from typing import Optional +import coloredlogs import dateparser +import matplotlib.pyplot as plt import numpy as np import requests from bs4 import BeautifulSoup -import hashlib -import json +from model import Model +from tqdm import tqdm from deutschland.config import Config, module_config +# Get logger +logger = logging.getLogger(__name__) + +# Install coloredlogs +coloredlogs.install( + level="WARNING", + logger=logger, + fmt="%(levelname)s - %(message)s", +) -class Report: - __slots__ = ["date", "name", "content_url", "company", "report"] - def __init__(self, date, name, content_url, company, report=None): - self.date = date +class Report: + def __init__( + self, report_date: datetime, name: str, content_url: str, company: str + ): + self.report_date = report_date self.name = name self.content_url = content_url self.company = company - self.report = report + self.report_content: Optional[str] = None - def to_dict(self): + def to_dict(self) -> dict: return { - "date": self.date, + "report_date": self.report_date.isoformat(), "name": self.name, "company": self.company, - "report": self.report, + "report_content": self.report_content, } - def to_hash(self): - """MD5 hash of a the report.""" + def to_hash(self) -> str: + entry = self.to_dict() + encoded = json.dumps(entry, sort_keys=True).encode("utf-8") + dhash = hashlib.md5(encoded, usedforsecurity=False) + return dhash.hexdigest() - dhash = hashlib.md5() - - entry = { - "date": self.date.isoformat(), - "name": self.name, - "company": self.company, - "report": self.report, - } - - encoded = json.dumps(entry, sort_keys=True).encode('utf-8') - dhash.update(encoded) - - return dhash.hexdigest() + def set_content(self, content: str) -> None: + self.report_content = content class Bundesanzeiger: __slots__ = ["session", "model", "captcha_callback", "_config"] - def __init__(self, on_captach_callback=None, config: Config = None): - if config is None: - self._config = module_config - else: - self._config = config - + def __init__(self, on_captcha_callback=None, config: Optional[Config] = None): + self._config = config or module_config self.session = requests.Session() if self._config.proxy_config is not None: self.session.proxies.update(self._config.proxy_config) - if on_captach_callback: - self.callback = on_captach_callback + if on_captcha_callback: + self.captcha_callback = on_captcha_callback else: - import deutschland.bundesanzeiger.model - - self.model = deutschland.bundesanzeiger.model.load_model() + self.model = Model().session self.captcha_callback = self.__solve_captcha def __solve_captcha(self, image_data: bytes): - import deutschland.bundesanzeiger.model - image = BytesIO(image_data) - image_arr = deutschland.bundesanzeiger.model.load_image_arr(image) + image_arr = Model.load_image_arr(image) image_arr = image_arr.reshape((1, 50, 250, 1)).astype(np.float32) - prediction = self.model.run(None, {"captcha": image_arr})[0][0] - prediction_str = deutschland.bundesanzeiger.model.prediction_to_str(prediction) - + prediction_str = Model.prediction_to_str(prediction) return prediction_str def __is_captcha_needed(self, entry_content: str): @@ -110,19 +112,76 @@ def __find_all_entries_on_page(self, page_content: str): company_name = company_name_element.contents[0].strip() - yield Report(date, entry_name, entry_link, company_name) + yield Report(date, entry_name, entry_link, company_name) # type: ignore - def __generate_result(self, content: str): - """iterate trough all results and try to fetch single reports""" + def __generate_result( + self, + content: str, + company_name: str, + show_progress_bar: bool, + disable_manual_input: bool = False, + ): result = {} - for element in self.__find_all_entries_on_page(content): - get_element_response = self.session.get(element.content_url) - - if self.__is_captcha_needed(get_element_response.text): - soup = BeautifulSoup(get_element_response.text, "html.parser") - captcha_image_src = soup.find("div", {"class": "captcha_wrapper"}).find( - "img" - )["src"] + entries = list(self.__find_all_entries_on_page(content)) + found_companies = set() + for entry in entries: + found_companies.add(entry.company) + + selected_company_name = company_name + selected_option = len(found_companies) + 1 + + if len(found_companies) > 1 and not disable_manual_input: + logger.warning( + f"Found {len(found_companies)} companies for {company_name}:" + ) + for idx, company in enumerate(found_companies, start=1): + print(f"{idx}. {company}") + print(f"{len(found_companies) + 1}. All") + + selected_option = 0 + while selected_option < 1 or selected_option > len(found_companies) + 1: + try: + selected_option = int( + input("Please select the correct company (enter the number): ") + ) + except ValueError: + print("Invalid input. Please enter a number.") + + if selected_option != len(found_companies) + 1: + selected_company_name = list(found_companies)[selected_option - 1] + + with ThreadPoolExecutor(max_workers=4) as executor: + futures = [] + for element in entries: + # Filter entries based on the selected company name + if ( + element.company == selected_company_name + or selected_option == len(found_companies) + 1 + ): + futures.append(executor.submit(self.__process_entry, element)) + + for future in tqdm( + futures, + desc="Processing entries", + unit="entry", + colour="green", + disable=not show_progress_bar, + ): + entry_hash, entry_dict = future.result() + if entry_hash and entry_dict: + result[entry_hash] = entry_dict + + return result + + def __process_entry(self, element: Report): + get_element_response = self.session.get(element.content_url) + + if self.__is_captcha_needed(get_element_response.text): + soup = BeautifulSoup(get_element_response.text, "lxml") + captcha_wrapper = soup.find("div", {"class": "captcha_wrapper"}) + + if captcha_wrapper is not None: + captcha_image_src = captcha_wrapper.find("img")["src"] img_response = self.session.get(captcha_image_src) captcha_result = self.captcha_callback(img_response.content) captcha_endpoint_url = soup.find_all("form")[1]["action"] @@ -131,27 +190,67 @@ def __generate_result(self, content: str): data={"solution": captcha_result, "confirm-button": "OK"}, ) - content_soup = BeautifulSoup(get_element_response.text, "html.parser") - content_element = content_soup.find( - "div", {"class": "publication_container"} - ) + content_soup = BeautifulSoup(get_element_response.text, "lxml") + content_element = content_soup.find("div", {"class": "publication_container"}) - if not content_element: - continue + if not content_element: + return None, None - element.report = content_element.text + element.set_content(content_element.text) + return element.to_hash(), element.to_dict() - result[element.to_hash()] = element.to_dict() - + def __deduplicate_reports(self, reports: dict) -> dict: + """ + Deduplicates financial reports based on report name and company name, keeping the latest report. - return result + Args: + reports (dict): A dictionary containing the fetched reports, with their hash as keys and report details as values. - def get_reports(self, company_name: str): + Returns: + dict: A dictionary containing the deduplicated reports, with their hash as keys and report details as values. """ - fetch all reports for this company name - :param company_name: - :return" : "Dict of all reports + unique_reports = {} + for report_hash, report in reports.items(): + key = (report["name"], report["company"]) + if key not in unique_reports: + unique_reports[key] = report + else: + existing_report = unique_reports[key] + if dateparser.parse(report["report_date"]) > dateparser.parse( # type: ignore + existing_report["report_date"] + ): + unique_reports[key] = report + + # Convert back to the original format with hash as keys + deduplicated_reports = { + hashlib.md5( + json.dumps(report, sort_keys=True).encode("utf-8"), + usedforsecurity=False, + ).hexdigest(): report + for report in unique_reports.values() + } + return deduplicated_reports + + def get_reports( + self, + company_name: str, + deduplicate: bool = False, + show_progress_bar: bool = True, + disable_manual_input: bool = False, + ): + """ + Fetches financial reports for a given company from the Bundesanzeiger website. + + Args: + company_name (str): The name of the company for which to fetch reports. + deduplicate (bool, optional): Whether to deduplicate the reports based on the report_name and report_date, keeping only the most recent report. + Defaults to False. + disable_manual_input (bool, optional): Whether to disable manual input for selecting the correct company if multiple companies are found for the given company name. + show_progress_bar (bool, optional): Whether to display a progress bar during the process. Defaults to True. + + Returns: + dict: A dictionary containing the fetched reports, with their hash as keys and report details as values. """ self.session.cookies["cc"] = "1628606977-805e172265bfdbde-10" self.session.headers.update( @@ -182,10 +281,197 @@ def get_reports(self, company_name: str): response = self.session.get( f"https://www.bundesanzeiger.de/pub/de/start?0-2.-top%7Econtent%7Epanel-left%7Ecard-form=&fulltext={company_name}&area_select=&search_button=Suchen" ) - return self.__generate_result(response.text) + if response.status_code != 200: + raise Exception("Could not fetch reports") + + if deduplicate: + return self.__deduplicate_reports( + self.__generate_result( + response.text, company_name, show_progress_bar, disable_manual_input + ) + ) + + return self.__generate_result( + response.text, company_name, show_progress_bar, disable_manual_input + ) + + def get_reports_by_date_range( + self, + company_name: str, + start_date: str, + end_date: str, + deduplicate: bool = False, + show_progress_bar: bool = True, + disable_manual_input: bool = False, + ): + """ + Fetches financial reports for a given company within a specified date range from the Bundesanzeiger website. + + Args: + company_name (str): The name of the company for which to fetch reports. + start_date (str): The start date of the date range in the format 'YYYY-MM-DD'. + end_date (str): The end date of the date range in the format 'YYYY-MM-DD'. + show_progress_bar (bool, optional): Whether to display a progress bar during the process. Defaults to True. + disable_manual_input (bool, optional): Whether to disable manual input for selecting the correct company if multiple companies are found for the given company name. + + Returns: + dict: A dictionary containing the fetched reports, with their hash as keys and report details as values. + """ + # Set up session cookies and headers + self.session.cookies["cc"] = "1628606977-805e172265bfdbde-10" + self.session.headers.update( + # ... (headers) + ) + # Get the jsessionid cookie + response = self.session.get("https://www.bundesanzeiger.de") + # Go to the start page + response = self.session.get("https://www.bundesanzeiger.de/pub/de/start?0") + # Perform the search within the specified date range + response = self.session.get( + f"https://www.bundesanzeiger.de/pub/de/start?0-2.-top%7Econtent%7Epanel-left%7Ecard-form=&fulltext={company_name}&area_select=&search_button=Suchen&date_start={start_date}&date_end={end_date}" + ) + if response.status_code != 200: + raise Exception("Could not fetch reports") + + if deduplicate: + return self.__deduplicate_reports( + self.__generate_result( + response.text, company_name, show_progress_bar, disable_manual_input + ) + ) + + return self.__generate_result( + response.text, company_name, show_progress_bar, disable_manual_input + ) + + +def extract_kpis(reports: dict) -> dict: + """ + Extracts Key Performance Indicators (KPIs) from the financial reports. + + Args: + reports (dict): A dictionary containing the financial reports with their hash as keys and report details as values. + + Returns: + dict: A dictionary containing the extracted KPIs with their report hash as keys and KPIs as values. + """ + + kpis = {} + + # Define KPI patterns to search for + kpi_patterns = { + "revenue": r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)", + "net_income": r"(?:net income|jahresüberschuss|nettoeinkommen)[:\s]*([\d,.]+[mmb]?)", + "ebit": r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)", + "ebitda": r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)", + "gross_profit": r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)", + "operating_profit": r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)", + "assets": r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)", + "liabilities": r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + "equity": r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)", + "current_assets": r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)", + "current_liabilities": r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + "long_term_debt": r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + "short_term_debt": r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)", + "cash_and_cash_equivalents": r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)", + "dividends": r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)", + "cash_flow": r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)", + } + + for report_hash, report in reports.items(): + report_kpis = {} + report_content = report["report_content"] + + for kpi, pattern in kpi_patterns.items(): + match = re.search(pattern, report_content, flags=re.IGNORECASE | re.UNICODE) + if match: + value = match.group(1) + + # Clean and validate the extracted number + try: + if not value: # Check if value is empty + cleaned_value = None + else: + multiplier = 1 + if value[-1].lower() == "m": + value = value[:-1] + multiplier = 1_000_000 + elif value[-1].lower() == "b": + value = value[:-1] + multiplier = 1_000_000_000 + + # Remove commas after checking for multipliers + value = value.replace(".", "").replace(",", ".").strip() + cleaned_value = float(value) * multiplier + except ValueError: + cleaned_value = None + + if cleaned_value is not None: + report_kpis[kpi] = cleaned_value + + kpis[report_hash] = report_kpis + + return kpis + + +def visualize_kpis(kpis: dict, reports: dict): + """ + Visualizes the extracted KPIs using bar charts. + + Args: + kpis (dict): A dictionary containing the extracted KPIs with their report hash as keys and KPIs as values. + reports (dict): A dictionary containing the financial reports with their hash as keys and Report objects as values. + """ + + kpi_data: dict = {} + for report_hash, report_kpis in kpis.items(): + report = reports[report_hash] + report_title = report["name"] + + for kpi, value in report_kpis.items(): + if kpi not in kpi_data: + kpi_data[kpi] = {"titles": [], "values": []} + + kpi_data[kpi]["titles"].append(report_title) + kpi_data[kpi]["values"].append(value) + + # Create bar charts for each KPI + for kpi, data in kpi_data.items(): + plt.figure() + plt.bar(data["titles"], data["values"]) + plt.title(f"{kpi.capitalize()} over Time") + plt.ylabel(kpi.capitalize()) + plt.xticks(rotation=90) + plt.gcf().autofmt_xdate() + plt.tight_layout() + plt.show() if __name__ == "__main__": ba = Bundesanzeiger() - reports = ba.get_reports("Deutsche Bahn AG") - print(reports.keys(), len(reports)) + start_time = time.time() + reports = ba.get_reports( + "Siemke & Co. Brücken- und Ingenieurbau GmbH", + deduplicate=True, + show_progress_bar=True, + disable_manual_input=True, + ) + end_time = time.time() + elapsed_time = end_time - start_time + print(f"Time taken to fetch reports: {elapsed_time:.2f} seconds") + print(f"Found {len(reports)} reports") + print() + + kpis = extract_kpis(reports) + + for i in reports.keys(): + report = reports[i] + kpi = kpis[i] + print(f"Report name: {report['name']}") + report_date = datetime.strptime(report["report_date"], "%Y-%m-%dT%H:%M:%S") + print( + f"Company name: {report['company']} (date: {report_date.strftime('%d.%m.%Y')})" + ) + print(f"KPIs: {kpi}") + print() + visualize_kpis(kpis, {hash_: report for hash_, report in reports.items()}) diff --git a/src/deutschland/bundesanzeiger/model.py b/src/deutschland/bundesanzeiger/model.py index 5d5141a..4acdaf9 100644 --- a/src/deutschland/bundesanzeiger/model.py +++ b/src/deutschland/bundesanzeiger/model.py @@ -5,25 +5,29 @@ from PIL import Image -def load_image_arr(fp): - image = Image.open(fp).convert("L") - image = np.array(image) - image = image / 255 * 2 - image = image - 1 - return image - - -def character_indexes_to_str(character_indexes): - ALPHABET = list("abcdefghijklmnopqrstuvwxyz0123456789") - characters = np.array(ALPHABET)[character_indexes] - return "".join(list(characters)).upper() - - -def prediction_to_str(label): - character_indexes = np.argmax(label, axis=1) - return character_indexes_to_str(character_indexes) - - -def load_model(): - filepath = Path(__file__).parent / "assets" / "model.onnx" - return InferenceSession(str(filepath)) +class Model: + def __init__(self): + self.session = self.load_model() + + def load_model(self): + filepath = Path(__file__).parent / "assets" / "model.onnx" + return InferenceSession(str(filepath)) + + @staticmethod + def load_image_arr(fp): + image = Image.open(fp).convert("L") + image = np.array(image) + image = image / 255 * 2 + image = image - 1 + return image + + @staticmethod + def character_indexes_to_str(character_indexes): + ALPHABET = list("abcdefghijklmnopqrstuvwxyz0123456789") + characters = np.array(ALPHABET)[character_indexes] + return "".join(list(characters)).upper() + + @staticmethod + def prediction_to_str(label): + character_indexes = np.argmax(label, axis=1) + return Model.character_indexes_to_str(character_indexes) diff --git a/src/deutschland/config.py b/src/deutschland/config.py index 381a62d..e4e80c5 100644 --- a/src/deutschland/config.py +++ b/src/deutschland/config.py @@ -2,16 +2,18 @@ class Config: - proxy_config = None - - def __init__(self, proxies: Dict[str, str] = None): - if proxies is not None and isinstance(proxies, dict): - self.proxy_config = proxies + def __init__(self, proxies: Dict[str, str]): + self.proxy_config = proxies or {} def set_proxy(self, http_proxy: str, https_proxy: str): - if self.proxy_config is None: - self.proxy_config = {} + """ + Sets the HTTP and HTTPS proxies to use. + + Args: + http_proxy (str): The HTTP proxy to use. + https_proxy (str): The HTTPS proxy to use. + """ self.proxy_config.update({"http": http_proxy, "https": https_proxy}) -module_config = Config() +module_config = Config({}) From 0e68f2cfe696d61deb0f3f6e45fc953d2e5bf218 Mon Sep 17 00:00:00 2001 From: Felix Behne Date: Sun, 16 Apr 2023 23:42:44 +0200 Subject: [PATCH 2/6] fix: remove filter function --- .../bundesanzeiger/bundesanzeiger.py | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py index c4f1aec..893330f 100644 --- a/src/deutschland/bundesanzeiger/bundesanzeiger.py +++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py @@ -295,55 +295,6 @@ def get_reports( response.text, company_name, show_progress_bar, disable_manual_input ) - def get_reports_by_date_range( - self, - company_name: str, - start_date: str, - end_date: str, - deduplicate: bool = False, - show_progress_bar: bool = True, - disable_manual_input: bool = False, - ): - """ - Fetches financial reports for a given company within a specified date range from the Bundesanzeiger website. - - Args: - company_name (str): The name of the company for which to fetch reports. - start_date (str): The start date of the date range in the format 'YYYY-MM-DD'. - end_date (str): The end date of the date range in the format 'YYYY-MM-DD'. - show_progress_bar (bool, optional): Whether to display a progress bar during the process. Defaults to True. - disable_manual_input (bool, optional): Whether to disable manual input for selecting the correct company if multiple companies are found for the given company name. - - Returns: - dict: A dictionary containing the fetched reports, with their hash as keys and report details as values. - """ - # Set up session cookies and headers - self.session.cookies["cc"] = "1628606977-805e172265bfdbde-10" - self.session.headers.update( - # ... (headers) - ) - # Get the jsessionid cookie - response = self.session.get("https://www.bundesanzeiger.de") - # Go to the start page - response = self.session.get("https://www.bundesanzeiger.de/pub/de/start?0") - # Perform the search within the specified date range - response = self.session.get( - f"https://www.bundesanzeiger.de/pub/de/start?0-2.-top%7Econtent%7Epanel-left%7Ecard-form=&fulltext={company_name}&area_select=&search_button=Suchen&date_start={start_date}&date_end={end_date}" - ) - if response.status_code != 200: - raise Exception("Could not fetch reports") - - if deduplicate: - return self.__deduplicate_reports( - self.__generate_result( - response.text, company_name, show_progress_bar, disable_manual_input - ) - ) - - return self.__generate_result( - response.text, company_name, show_progress_bar, disable_manual_input - ) - def extract_kpis(reports: dict) -> dict: """ @@ -453,8 +404,6 @@ def visualize_kpis(kpis: dict, reports: dict): reports = ba.get_reports( "Siemke & Co. Brücken- und Ingenieurbau GmbH", deduplicate=True, - show_progress_bar=True, - disable_manual_input=True, ) end_time = time.time() elapsed_time = end_time - start_time From d6573823bb6326f3b961fb5212c294d95ec70d71 Mon Sep 17 00:00:00 2001 From: wirthual Date: Thu, 6 Jul 2023 14:47:05 -0700 Subject: [PATCH 3/6] black and isort --- .../bundesanzeiger/bundesanzeiger.py | 23 +++++++++++-------- tests/bundesanzeiger/test_results.py | 6 +++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py index b83af87..cd91137 100644 --- a/src/deutschland/bundesanzeiger/bundesanzeiger.py +++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py @@ -1,3 +1,8 @@ +import hashlib +import json +import logging +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime from io import BytesIO from typing import Optional @@ -7,16 +12,10 @@ import numpy as np import requests from bs4 import BeautifulSoup -import hashlib -import json -import logging -from datetime import datetime - from tqdm import tqdm -from concurrent.futures import ThreadPoolExecutor -from deutschland.config import Config, module_config from deutschland.bundesanzeiger.model import Model +from deutschland.config import Config, module_config # Get logger logger = logging.getLogger(__name__) @@ -31,7 +30,12 @@ class Report: def __init__( - self, report_date: datetime, name: str, content_url: str, company: str, raw_report: str + self, + report_date: datetime, + name: str, + content_url: str, + company: str, + raw_report: str, ): self.report_date = report_date self.name = name @@ -114,8 +118,7 @@ def __find_all_entries_on_page(self, page_content: str): company_name = company_name_element.contents[0].strip() raw_report = row.prettify() - - yield Report(date, entry_name, entry_link, company_name,raw_report) # type: ignore + yield Report(date, entry_name, entry_link, company_name, raw_report) # type: ignore def __generate_result( self, diff --git a/tests/bundesanzeiger/test_results.py b/tests/bundesanzeiger/test_results.py index 5717feb..472651a 100644 --- a/tests/bundesanzeiger/test_results.py +++ b/tests/bundesanzeiger/test_results.py @@ -3,12 +3,14 @@ def test_results_not_empty(): ba = Bundesanzeiger() - reports = ba.get_reports("Deutsches Zentrum für Luft- und Raumfahrt",disable_manual_input=True) + reports = ba.get_reports( + "Deutsches Zentrum für Luft- und Raumfahrt", disable_manual_input=True + ) assert len(reports) > 0 def test_multiple_entries(): ba = Bundesanzeiger() - reports = ba.get_reports("DE000A0TGJ55",disable_manual_input=True) + reports = ba.get_reports("DE000A0TGJ55", disable_manual_input=True) assert len(reports) > 1 From 3fb3fdac8c5b0edc4e296e754bd75070b63a61ba Mon Sep 17 00:00:00 2001 From: wirthual Date: Thu, 6 Jul 2023 14:48:19 -0700 Subject: [PATCH 4/6] remove 3.6 and 3.7. Add 3.11 --- .github/workflows/runtests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml index 07f0304..7882c6e 100644 --- a/.github/workflows/runtests.yml +++ b/.github/workflows/runtests.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ["3.9.13","3.8.13","3.7.13","3.6.12","3.10"] + python-version: ["3.9.13","3.8.13","3.10","3.11"] fail-fast: false steps: From a7b7c65c037f30f00bb40269d6b3bd522afb0338 Mon Sep 17 00:00:00 2001 From: wirthual Date: Thu, 6 Jul 2023 14:58:28 -0700 Subject: [PATCH 5/6] hashlib backwards support for 3.8 --- src/deutschland/bundesanzeiger/bundesanzeiger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py index cd91137..95f62a0 100644 --- a/src/deutschland/bundesanzeiger/bundesanzeiger.py +++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py @@ -55,7 +55,7 @@ def to_dict(self): def to_hash(self) -> str: entry = self.to_dict() encoded = json.dumps(entry, sort_keys=True).encode("utf-8") - dhash = hashlib.md5(encoded, usedforsecurity=False) + dhash = hashlib.new("md5",encoded, usedforsecurity=False) #If 3.8 support is dropped, use hashlib.md5() return dhash.hexdigest() def set_content(self, content: str) -> None: From 638f61e027177835f26cc4fac9344388632a1941 Mon Sep 17 00:00:00 2001 From: wirthual Date: Thu, 6 Jul 2023 15:00:14 -0700 Subject: [PATCH 6/6] black formated --- src/deutschland/bundesanzeiger/bundesanzeiger.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py index 95f62a0..7f1f576 100644 --- a/src/deutschland/bundesanzeiger/bundesanzeiger.py +++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py @@ -55,7 +55,9 @@ def to_dict(self): def to_hash(self) -> str: entry = self.to_dict() encoded = json.dumps(entry, sort_keys=True).encode("utf-8") - dhash = hashlib.new("md5",encoded, usedforsecurity=False) #If 3.8 support is dropped, use hashlib.md5() + dhash = hashlib.new( + "md5", encoded, usedforsecurity=False + ) # If 3.8 support is dropped, use hashlib.md5() return dhash.hexdigest() def set_content(self, content: str) -> None: