From 0530617678e0042d0fc41177d3cd4231f15dfcf6 Mon Sep 17 00:00:00 2001
From: Felix Behne <felixbehne20@gmail.com>
Date: Sun, 16 Apr 2023 23:31:56 +0200
Subject: [PATCH 1/6] refactor: refactor bundesanzeiger module and config to
 work concurectly (performance gain of up to 300%), add utility functions and
 increase overall performance and fault tolerancy of code

---
 pyproject.toml                                |   4 +
 .../bundesanzeiger/bundesanzeiger.py          | 426 +++++++++++++++---
 src/deutschland/bundesanzeiger/model.py       |  48 +-
 src/deutschland/config.py                     |  18 +-
 4 files changed, 396 insertions(+), 100 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 9141c82..f6670ea 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,10 @@ more-itertools = "^8.10.0"
 onnxruntime = "^1.10.0"
 numpy = "^1.19.0"
 protobuf= ">=3.0,<4.0"
+tqdm = "^4.64.0"
+matplotlib = "^3.5.1"
+coloredlogs = "^15.0.1"
+
 
 # Those are packages which were included in previous release so not optional
 de-autobahn = {version = "^1.0.4"}
diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py
index 3b0eecf..c4f1aec 100644
--- a/src/deutschland/bundesanzeiger/bundesanzeiger.py
+++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py
@@ -1,81 +1,83 @@
+import hashlib
+import json
+import logging
+import re
+import time
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
 from io import BytesIO
+from typing import Optional
 
+import coloredlogs
 import dateparser
+import matplotlib.pyplot as plt
 import numpy as np
 import requests
 from bs4 import BeautifulSoup
-import hashlib
-import json
+from model import Model
+from tqdm import tqdm
 
 from deutschland.config import Config, module_config
 
+# Get logger
+logger = logging.getLogger(__name__)
+
+# Install coloredlogs
+coloredlogs.install(
+    level="WARNING",
+    logger=logger,
+    fmt="%(levelname)s - %(message)s",
+)
 
-class Report:
-    __slots__ = ["date", "name", "content_url", "company", "report"]
 
-    def __init__(self, date, name, content_url, company, report=None):
-        self.date = date
+class Report:
+    def __init__(
+        self, report_date: datetime, name: str, content_url: str, company: str
+    ):
+        self.report_date = report_date
         self.name = name
         self.content_url = content_url
         self.company = company
-        self.report = report
+        self.report_content: Optional[str] = None
 
-    def to_dict(self):
+    def to_dict(self) -> dict:
         return {
-            "date": self.date,
+            "report_date": self.report_date.isoformat(),
             "name": self.name,
             "company": self.company,
-            "report": self.report,
+            "report_content": self.report_content,
         }
 
-    def to_hash(self):
-            """MD5 hash of a the report."""
+    def to_hash(self) -> str:
+        entry = self.to_dict()
+        encoded = json.dumps(entry, sort_keys=True).encode("utf-8")
+        dhash = hashlib.md5(encoded, usedforsecurity=False)
+        return dhash.hexdigest()
 
-            dhash = hashlib.md5()
-
-            entry = {
-                "date": self.date.isoformat(),
-                "name": self.name,
-                "company": self.company,
-                "report": self.report,
-            }
-
-            encoded = json.dumps(entry, sort_keys=True).encode('utf-8')
-            dhash.update(encoded)
-
-            return dhash.hexdigest()
+    def set_content(self, content: str) -> None:
+        self.report_content = content
 
 
 class Bundesanzeiger:
     __slots__ = ["session", "model", "captcha_callback", "_config"]
 
-    def __init__(self, on_captach_callback=None, config: Config = None):
-        if config is None:
-            self._config = module_config
-        else:
-            self._config = config
-
+    def __init__(self, on_captcha_callback=None, config: Optional[Config] = None):
+        self._config = config or module_config
         self.session = requests.Session()
         if self._config.proxy_config is not None:
             self.session.proxies.update(self._config.proxy_config)
-        if on_captach_callback:
-            self.callback = on_captach_callback
+        if on_captcha_callback:
+            self.captcha_callback = on_captcha_callback
         else:
-            import deutschland.bundesanzeiger.model
-
-            self.model = deutschland.bundesanzeiger.model.load_model()
+            self.model = Model().session
             self.captcha_callback = self.__solve_captcha
 
     def __solve_captcha(self, image_data: bytes):
-        import deutschland.bundesanzeiger.model
-
         image = BytesIO(image_data)
-        image_arr = deutschland.bundesanzeiger.model.load_image_arr(image)
+        image_arr = Model.load_image_arr(image)
         image_arr = image_arr.reshape((1, 50, 250, 1)).astype(np.float32)
-
         prediction = self.model.run(None, {"captcha": image_arr})[0][0]
-        prediction_str = deutschland.bundesanzeiger.model.prediction_to_str(prediction)
-
+        prediction_str = Model.prediction_to_str(prediction)
         return prediction_str
 
     def __is_captcha_needed(self, entry_content: str):
@@ -110,19 +112,76 @@ def __find_all_entries_on_page(self, page_content: str):
 
             company_name = company_name_element.contents[0].strip()
 
-            yield Report(date, entry_name, entry_link, company_name)
+            yield Report(date, entry_name, entry_link, company_name)  # type: ignore
 
-    def __generate_result(self, content: str):
-        """iterate trough all results and try to fetch single reports"""
+    def __generate_result(
+        self,
+        content: str,
+        company_name: str,
+        show_progress_bar: bool,
+        disable_manual_input: bool = False,
+    ):
         result = {}
-        for element in self.__find_all_entries_on_page(content):
-            get_element_response = self.session.get(element.content_url)
-
-            if self.__is_captcha_needed(get_element_response.text):
-                soup = BeautifulSoup(get_element_response.text, "html.parser")
-                captcha_image_src = soup.find("div", {"class": "captcha_wrapper"}).find(
-                    "img"
-                )["src"]
+        entries = list(self.__find_all_entries_on_page(content))
+        found_companies = set()
+        for entry in entries:
+            found_companies.add(entry.company)
+
+        selected_company_name = company_name
+        selected_option = len(found_companies) + 1
+
+        if len(found_companies) > 1 and not disable_manual_input:
+            logger.warning(
+                f"Found {len(found_companies)} companies for {company_name}:"
+            )
+            for idx, company in enumerate(found_companies, start=1):
+                print(f"{idx}. {company}")
+            print(f"{len(found_companies) + 1}. All")
+
+            selected_option = 0
+            while selected_option < 1 or selected_option > len(found_companies) + 1:
+                try:
+                    selected_option = int(
+                        input("Please select the correct company (enter the number): ")
+                    )
+                except ValueError:
+                    print("Invalid input. Please enter a number.")
+
+            if selected_option != len(found_companies) + 1:
+                selected_company_name = list(found_companies)[selected_option - 1]
+
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            futures = []
+            for element in entries:
+                # Filter entries based on the selected company name
+                if (
+                    element.company == selected_company_name
+                    or selected_option == len(found_companies) + 1
+                ):
+                    futures.append(executor.submit(self.__process_entry, element))
+
+            for future in tqdm(
+                futures,
+                desc="Processing entries",
+                unit="entry",
+                colour="green",
+                disable=not show_progress_bar,
+            ):
+                entry_hash, entry_dict = future.result()
+                if entry_hash and entry_dict:
+                    result[entry_hash] = entry_dict
+
+        return result
+
+    def __process_entry(self, element: Report):
+        get_element_response = self.session.get(element.content_url)
+
+        if self.__is_captcha_needed(get_element_response.text):
+            soup = BeautifulSoup(get_element_response.text, "lxml")
+            captcha_wrapper = soup.find("div", {"class": "captcha_wrapper"})
+
+            if captcha_wrapper is not None:
+                captcha_image_src = captcha_wrapper.find("img")["src"]
                 img_response = self.session.get(captcha_image_src)
                 captcha_result = self.captcha_callback(img_response.content)
                 captcha_endpoint_url = soup.find_all("form")[1]["action"]
@@ -131,27 +190,67 @@ def __generate_result(self, content: str):
                     data={"solution": captcha_result, "confirm-button": "OK"},
                 )
 
-            content_soup = BeautifulSoup(get_element_response.text, "html.parser")
-            content_element = content_soup.find(
-                "div", {"class": "publication_container"}
-            )
+        content_soup = BeautifulSoup(get_element_response.text, "lxml")
+        content_element = content_soup.find("div", {"class": "publication_container"})
 
-            if not content_element:
-                continue
+        if not content_element:
+            return None, None
 
-            element.report = content_element.text
+        element.set_content(content_element.text)
 
+        return element.to_hash(), element.to_dict()
 
-            result[element.to_hash()] = element.to_dict()
-
+    def __deduplicate_reports(self, reports: dict) -> dict:
+        """
+        Deduplicates financial reports based on report name and company name, keeping the latest report.
 
-        return result
+        Args:
+            reports (dict): A dictionary containing the fetched reports, with their hash as keys and report details as values.
 
-    def get_reports(self, company_name: str):
+        Returns:
+            dict: A dictionary containing the deduplicated reports, with their hash as keys and report details as values.
         """
-        fetch all reports for this company name
-        :param company_name:
-        :return" : "Dict of all reports
+        unique_reports = {}
+        for report_hash, report in reports.items():
+            key = (report["name"], report["company"])
+            if key not in unique_reports:
+                unique_reports[key] = report
+            else:
+                existing_report = unique_reports[key]
+                if dateparser.parse(report["report_date"]) > dateparser.parse(  # type: ignore
+                    existing_report["report_date"]
+                ):
+                    unique_reports[key] = report
+
+        # Convert back to the original format with hash as keys
+        deduplicated_reports = {
+            hashlib.md5(
+                json.dumps(report, sort_keys=True).encode("utf-8"),
+                usedforsecurity=False,
+            ).hexdigest(): report
+            for report in unique_reports.values()
+        }
+        return deduplicated_reports
+
+    def get_reports(
+        self,
+        company_name: str,
+        deduplicate: bool = False,
+        show_progress_bar: bool = True,
+        disable_manual_input: bool = False,
+    ):
+        """
+        Fetches financial reports for a given company from the Bundesanzeiger website.
+
+        Args:
+            company_name (str): The name of the company for which to fetch reports.
+            deduplicate (bool, optional): Whether to deduplicate the reports based on the report_name and report_date, keeping only the most recent report.
+                Defaults to False.
+            disable_manual_input (bool, optional): Whether to disable manual input for selecting the correct company if multiple companies are found for the given company name.
+            show_progress_bar (bool, optional): Whether to display a progress bar during the process. Defaults to True.
+
+        Returns:
+            dict: A dictionary containing the fetched reports, with their hash as keys and report details as values.
         """
         self.session.cookies["cc"] = "1628606977-805e172265bfdbde-10"
         self.session.headers.update(
@@ -182,10 +281,197 @@ def get_reports(self, company_name: str):
         response = self.session.get(
             f"https://www.bundesanzeiger.de/pub/de/start?0-2.-top%7Econtent%7Epanel-left%7Ecard-form=&fulltext={company_name}&area_select=&search_button=Suchen"
         )
-        return self.__generate_result(response.text)
+        if response.status_code != 200:
+            raise Exception("Could not fetch reports")
+
+        if deduplicate:
+            return self.__deduplicate_reports(
+                self.__generate_result(
+                    response.text, company_name, show_progress_bar, disable_manual_input
+                )
+            )
+
+        return self.__generate_result(
+            response.text, company_name, show_progress_bar, disable_manual_input
+        )
+
+    def get_reports_by_date_range(
+        self,
+        company_name: str,
+        start_date: str,
+        end_date: str,
+        deduplicate: bool = False,
+        show_progress_bar: bool = True,
+        disable_manual_input: bool = False,
+    ):
+        """
+        Fetches financial reports for a given company within a specified date range from the Bundesanzeiger website.
+
+        Args:
+            company_name (str): The name of the company for which to fetch reports.
+            start_date (str): The start date of the date range in the format 'YYYY-MM-DD'.
+            end_date (str): The end date of the date range in the format 'YYYY-MM-DD'.
+            show_progress_bar (bool, optional): Whether to display a progress bar during the process. Defaults to True.
+            disable_manual_input (bool, optional): Whether to disable manual input for selecting the correct company if multiple companies are found for the given company name.
+
+        Returns:
+            dict: A dictionary containing the fetched reports, with their hash as keys and report details as values.
+        """
+        # Set up session cookies and headers
+        self.session.cookies["cc"] = "1628606977-805e172265bfdbde-10"
+        self.session.headers.update(
+            # ... (headers)
+        )
+        # Get the jsessionid cookie
+        response = self.session.get("https://www.bundesanzeiger.de")
+        # Go to the start page
+        response = self.session.get("https://www.bundesanzeiger.de/pub/de/start?0")
+        # Perform the search within the specified date range
+        response = self.session.get(
+            f"https://www.bundesanzeiger.de/pub/de/start?0-2.-top%7Econtent%7Epanel-left%7Ecard-form=&fulltext={company_name}&area_select=&search_button=Suchen&date_start={start_date}&date_end={end_date}"
+        )
+        if response.status_code != 200:
+            raise Exception("Could not fetch reports")
+
+        if deduplicate:
+            return self.__deduplicate_reports(
+                self.__generate_result(
+                    response.text, company_name, show_progress_bar, disable_manual_input
+                )
+            )
+
+        return self.__generate_result(
+            response.text, company_name, show_progress_bar, disable_manual_input
+        )
+
+
+def extract_kpis(reports: dict) -> dict:
+    """
+    Extracts Key Performance Indicators (KPIs) from the financial reports.
+
+    Args:
+        reports (dict): A dictionary containing the financial reports with their hash as keys and report details as values.
+
+    Returns:
+        dict: A dictionary containing the extracted KPIs with their report hash as keys and KPIs as values.
+    """
+
+    kpis = {}
+
+    # Define KPI patterns to search for
+    kpi_patterns = {
+        "revenue": r"(?:revenue|umsatz|erlöse)[:\s]*([\d,.]+[mmb]?)",
+        "net_income": r"(?:net income|jahresüberschuss|nettoeinkommen)[:\s]*([\d,.]+[mmb]?)",
+        "ebit": r"(?:ebit|operating income)[:\s]*([\d,.]+[mmb]?)",
+        "ebitda": r"(?:ebitda)[:\s]*([\d,.]+[mmb]?)",
+        "gross_profit": r"(?:gross profit|bruttogewinn)[:\s]*([\d,.]+[mmb]?)",
+        "operating_profit": r"(?:operating profit|betriebsgewinn)[:\s]*([\d,.]+[mmb]?)",
+        "assets": r"(?:total assets|bilanzsumme)[:\s]*([\d,.]+[mmb]?)",
+        "liabilities": r"(?:total liabilities|gesamtverbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
+        "equity": r"(?:shareholders'? equity|eigenkapital)[:\s]*([\d,.]+[mmb]?)",
+        "current_assets": r"(?:current assets|umlaufvermögen)[:\s]*([\d,.]+[mmb]?)",
+        "current_liabilities": r"(?:current liabilities|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
+        "long_term_debt": r"(?:long[-\s]?term debt|langfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
+        "short_term_debt": r"(?:short[-\s]?term debt|kurzfristige verbindlichkeiten)[:\s]*([\d,.]+[mmb]?)",
+        "cash_and_cash_equivalents": r"(?:cash (?:and cash equivalents)?|barmittel)[:\s]*([\d,.]+[mmb]?)",
+        "dividends": r"(?:dividends?|dividende)[:\s]*([\d,.]+[mmb]?)",
+        "cash_flow": r"(?:cash flow|cashflow|cash flow from operating activities)[:\s]*([\d,.]+[mmb]?)",
+    }
+
+    for report_hash, report in reports.items():
+        report_kpis = {}
+        report_content = report["report_content"]
+
+        for kpi, pattern in kpi_patterns.items():
+            match = re.search(pattern, report_content, flags=re.IGNORECASE | re.UNICODE)
+            if match:
+                value = match.group(1)
+
+                # Clean and validate the extracted number
+                try:
+                    if not value:  # Check if value is empty
+                        cleaned_value = None
+                    else:
+                        multiplier = 1
+                        if value[-1].lower() == "m":
+                            value = value[:-1]
+                            multiplier = 1_000_000
+                        elif value[-1].lower() == "b":
+                            value = value[:-1]
+                            multiplier = 1_000_000_000
+
+                        # Remove commas after checking for multipliers
+                        value = value.replace(".", "").replace(",", ".").strip()
+                        cleaned_value = float(value) * multiplier
+                except ValueError:
+                    cleaned_value = None
+
+                if cleaned_value is not None:
+                    report_kpis[kpi] = cleaned_value
+
+        kpis[report_hash] = report_kpis
+
+    return kpis
+
+
+def visualize_kpis(kpis: dict, reports: dict):
+    """
+    Visualizes the extracted KPIs using bar charts.
+
+    Args:
+        kpis (dict): A dictionary containing the extracted KPIs with their report hash as keys and KPIs as values.
+        reports (dict): A dictionary containing the financial reports with their hash as keys and Report objects as values.
+    """
+
+    kpi_data: dict = {}
+    for report_hash, report_kpis in kpis.items():
+        report = reports[report_hash]
+        report_title = report["name"]
+
+        for kpi, value in report_kpis.items():
+            if kpi not in kpi_data:
+                kpi_data[kpi] = {"titles": [], "values": []}
+
+            kpi_data[kpi]["titles"].append(report_title)
+            kpi_data[kpi]["values"].append(value)
+
+    # Create bar charts for each KPI
+    for kpi, data in kpi_data.items():
+        plt.figure()
+        plt.bar(data["titles"], data["values"])
+        plt.title(f"{kpi.capitalize()} over Time")
+        plt.ylabel(kpi.capitalize())
+        plt.xticks(rotation=90)
+        plt.gcf().autofmt_xdate()
+        plt.tight_layout()
+        plt.show()
 
 
 if __name__ == "__main__":
     ba = Bundesanzeiger()
-    reports = ba.get_reports("Deutsche Bahn AG")
-    print(reports.keys(), len(reports))
+    start_time = time.time()
+    reports = ba.get_reports(
+        "Siemke & Co. Brücken- und Ingenieurbau GmbH",
+        deduplicate=True,
+        show_progress_bar=True,
+        disable_manual_input=True,
+    )
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    print(f"Time taken to fetch reports: {elapsed_time:.2f} seconds")
+    print(f"Found {len(reports)} reports")
+    print()
+
+    kpis = extract_kpis(reports)
+
+    for i in reports.keys():
+        report = reports[i]
+        kpi = kpis[i]
+        print(f"Report name: {report['name']}")
+        report_date = datetime.strptime(report["report_date"], "%Y-%m-%dT%H:%M:%S")
+        print(
+            f"Company name: {report['company']} (date: {report_date.strftime('%d.%m.%Y')})"
+        )
+        print(f"KPIs: {kpi}")
+        print()
+    visualize_kpis(kpis, {hash_: report for hash_, report in reports.items()})
diff --git a/src/deutschland/bundesanzeiger/model.py b/src/deutschland/bundesanzeiger/model.py
index 5d5141a..4acdaf9 100644
--- a/src/deutschland/bundesanzeiger/model.py
+++ b/src/deutschland/bundesanzeiger/model.py
@@ -5,25 +5,29 @@
 from PIL import Image
 
 
-def load_image_arr(fp):
-    image = Image.open(fp).convert("L")
-    image = np.array(image)
-    image = image / 255 * 2
-    image = image - 1
-    return image
-
-
-def character_indexes_to_str(character_indexes):
-    ALPHABET = list("abcdefghijklmnopqrstuvwxyz0123456789")
-    characters = np.array(ALPHABET)[character_indexes]
-    return "".join(list(characters)).upper()
-
-
-def prediction_to_str(label):
-    character_indexes = np.argmax(label, axis=1)
-    return character_indexes_to_str(character_indexes)
-
-
-def load_model():
-    filepath = Path(__file__).parent / "assets" / "model.onnx"
-    return InferenceSession(str(filepath))
+class Model:
+    def __init__(self):
+        self.session = self.load_model()
+
+    def load_model(self):
+        filepath = Path(__file__).parent / "assets" / "model.onnx"
+        return InferenceSession(str(filepath))
+
+    @staticmethod
+    def load_image_arr(fp):
+        image = Image.open(fp).convert("L")
+        image = np.array(image)
+        image = image / 255 * 2
+        image = image - 1
+        return image
+
+    @staticmethod
+    def character_indexes_to_str(character_indexes):
+        ALPHABET = list("abcdefghijklmnopqrstuvwxyz0123456789")
+        characters = np.array(ALPHABET)[character_indexes]
+        return "".join(list(characters)).upper()
+
+    @staticmethod
+    def prediction_to_str(label):
+        character_indexes = np.argmax(label, axis=1)
+        return Model.character_indexes_to_str(character_indexes)
diff --git a/src/deutschland/config.py b/src/deutschland/config.py
index 381a62d..e4e80c5 100644
--- a/src/deutschland/config.py
+++ b/src/deutschland/config.py
@@ -2,16 +2,18 @@
 
 
 class Config:
-    proxy_config = None
-
-    def __init__(self, proxies: Dict[str, str] = None):
-        if proxies is not None and isinstance(proxies, dict):
-            self.proxy_config = proxies
+    def __init__(self, proxies: Dict[str, str]):
+        self.proxy_config = proxies or {}
 
     def set_proxy(self, http_proxy: str, https_proxy: str):
-        if self.proxy_config is None:
-            self.proxy_config = {}
+        """
+        Sets the HTTP and HTTPS proxies to use.
+
+        Args:
+            http_proxy (str): The HTTP proxy to use.
+            https_proxy (str): The HTTPS proxy to use.
+        """
         self.proxy_config.update({"http": http_proxy, "https": https_proxy})
 
 
-module_config = Config()
+module_config = Config({})

From 0e68f2cfe696d61deb0f3f6e45fc953d2e5bf218 Mon Sep 17 00:00:00 2001
From: Felix Behne <felixbehne20@gmail.com>
Date: Sun, 16 Apr 2023 23:42:44 +0200
Subject: [PATCH 2/6] fix: remove filter function

---
 .../bundesanzeiger/bundesanzeiger.py          | 51 -------------------
 1 file changed, 51 deletions(-)

diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py
index c4f1aec..893330f 100644
--- a/src/deutschland/bundesanzeiger/bundesanzeiger.py
+++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py
@@ -295,55 +295,6 @@ def get_reports(
             response.text, company_name, show_progress_bar, disable_manual_input
         )
 
-    def get_reports_by_date_range(
-        self,
-        company_name: str,
-        start_date: str,
-        end_date: str,
-        deduplicate: bool = False,
-        show_progress_bar: bool = True,
-        disable_manual_input: bool = False,
-    ):
-        """
-        Fetches financial reports for a given company within a specified date range from the Bundesanzeiger website.
-
-        Args:
-            company_name (str): The name of the company for which to fetch reports.
-            start_date (str): The start date of the date range in the format 'YYYY-MM-DD'.
-            end_date (str): The end date of the date range in the format 'YYYY-MM-DD'.
-            show_progress_bar (bool, optional): Whether to display a progress bar during the process. Defaults to True.
-            disable_manual_input (bool, optional): Whether to disable manual input for selecting the correct company if multiple companies are found for the given company name.
-
-        Returns:
-            dict: A dictionary containing the fetched reports, with their hash as keys and report details as values.
-        """
-        # Set up session cookies and headers
-        self.session.cookies["cc"] = "1628606977-805e172265bfdbde-10"
-        self.session.headers.update(
-            # ... (headers)
-        )
-        # Get the jsessionid cookie
-        response = self.session.get("https://www.bundesanzeiger.de")
-        # Go to the start page
-        response = self.session.get("https://www.bundesanzeiger.de/pub/de/start?0")
-        # Perform the search within the specified date range
-        response = self.session.get(
-            f"https://www.bundesanzeiger.de/pub/de/start?0-2.-top%7Econtent%7Epanel-left%7Ecard-form=&fulltext={company_name}&area_select=&search_button=Suchen&date_start={start_date}&date_end={end_date}"
-        )
-        if response.status_code != 200:
-            raise Exception("Could not fetch reports")
-
-        if deduplicate:
-            return self.__deduplicate_reports(
-                self.__generate_result(
-                    response.text, company_name, show_progress_bar, disable_manual_input
-                )
-            )
-
-        return self.__generate_result(
-            response.text, company_name, show_progress_bar, disable_manual_input
-        )
-
 
 def extract_kpis(reports: dict) -> dict:
     """
@@ -453,8 +404,6 @@ def visualize_kpis(kpis: dict, reports: dict):
     reports = ba.get_reports(
         "Siemke & Co. Brücken- und Ingenieurbau GmbH",
         deduplicate=True,
-        show_progress_bar=True,
-        disable_manual_input=True,
     )
     end_time = time.time()
     elapsed_time = end_time - start_time

From d6573823bb6326f3b961fb5212c294d95ec70d71 Mon Sep 17 00:00:00 2001
From: wirthual <wirthra@gmail.com>
Date: Thu, 6 Jul 2023 14:47:05 -0700
Subject: [PATCH 3/6] black and isort

---
 .../bundesanzeiger/bundesanzeiger.py          | 23 +++++++++++--------
 tests/bundesanzeiger/test_results.py          |  6 +++--
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py
index b83af87..cd91137 100644
--- a/src/deutschland/bundesanzeiger/bundesanzeiger.py
+++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py
@@ -1,3 +1,8 @@
+import hashlib
+import json
+import logging
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
 from io import BytesIO
 from typing import Optional
 
@@ -7,16 +12,10 @@
 import numpy as np
 import requests
 from bs4 import BeautifulSoup
-import hashlib
-import json
-import logging
-from datetime import datetime
-
 from tqdm import tqdm
-from concurrent.futures import ThreadPoolExecutor
 
-from deutschland.config import Config, module_config
 from deutschland.bundesanzeiger.model import Model
+from deutschland.config import Config, module_config
 
 # Get logger
 logger = logging.getLogger(__name__)
@@ -31,7 +30,12 @@
 
 class Report:
     def __init__(
-        self, report_date: datetime, name: str, content_url: str, company: str, raw_report: str
+        self,
+        report_date: datetime,
+        name: str,
+        content_url: str,
+        company: str,
+        raw_report: str,
     ):
         self.report_date = report_date
         self.name = name
@@ -114,8 +118,7 @@ def __find_all_entries_on_page(self, page_content: str):
             company_name = company_name_element.contents[0].strip()
             raw_report = row.prettify()
 
-
-            yield Report(date, entry_name, entry_link, company_name,raw_report)  # type: ignore
+            yield Report(date, entry_name, entry_link, company_name, raw_report)  # type: ignore
 
     def __generate_result(
         self,
diff --git a/tests/bundesanzeiger/test_results.py b/tests/bundesanzeiger/test_results.py
index 5717feb..472651a 100644
--- a/tests/bundesanzeiger/test_results.py
+++ b/tests/bundesanzeiger/test_results.py
@@ -3,12 +3,14 @@
 
 def test_results_not_empty():
     ba = Bundesanzeiger()
-    reports = ba.get_reports("Deutsches Zentrum für Luft- und Raumfahrt",disable_manual_input=True)
+    reports = ba.get_reports(
+        "Deutsches Zentrum für Luft- und Raumfahrt", disable_manual_input=True
+    )
     assert len(reports) > 0
 
 
 def test_multiple_entries():
     ba = Bundesanzeiger()
-    reports = ba.get_reports("DE000A0TGJ55",disable_manual_input=True)
+    reports = ba.get_reports("DE000A0TGJ55", disable_manual_input=True)
 
     assert len(reports) > 1

From 3fb3fdac8c5b0edc4e296e754bd75070b63a61ba Mon Sep 17 00:00:00 2001
From: wirthual <wirthra@gmail.com>
Date: Thu, 6 Jul 2023 14:48:19 -0700
Subject: [PATCH 4/6] remove 3.6 and 3.7. Add 3.11

---
 .github/workflows/runtests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml
index 07f0304..7882c6e 100644
--- a/.github/workflows/runtests.yml
+++ b/.github/workflows/runtests.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: ["3.9.13","3.8.13","3.7.13","3.6.12","3.10"]
+        python-version: ["3.9.13","3.8.13","3.10","3.11"]
       fail-fast: false
 
     steps:

From a7b7c65c037f30f00bb40269d6b3bd522afb0338 Mon Sep 17 00:00:00 2001
From: wirthual <wirthra@gmail.com>
Date: Thu, 6 Jul 2023 14:58:28 -0700
Subject: [PATCH 5/6] hashlib backwards support for 3.8

---
 src/deutschland/bundesanzeiger/bundesanzeiger.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py
index cd91137..95f62a0 100644
--- a/src/deutschland/bundesanzeiger/bundesanzeiger.py
+++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py
@@ -55,7 +55,7 @@ def to_dict(self):
     def to_hash(self) -> str:
         entry = self.to_dict()
         encoded = json.dumps(entry, sort_keys=True).encode("utf-8")
-        dhash = hashlib.md5(encoded, usedforsecurity=False)
+        dhash = hashlib.new("md5",encoded, usedforsecurity=False) #If 3.8 support is dropped, use hashlib.md5()
         return dhash.hexdigest()
 
     def set_content(self, content: str) -> None:

From 638f61e027177835f26cc4fac9344388632a1941 Mon Sep 17 00:00:00 2001
From: wirthual <wirthra@gmail.com>
Date: Thu, 6 Jul 2023 15:00:14 -0700
Subject: [PATCH 6/6] black formated

---
 src/deutschland/bundesanzeiger/bundesanzeiger.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/deutschland/bundesanzeiger/bundesanzeiger.py b/src/deutschland/bundesanzeiger/bundesanzeiger.py
index 95f62a0..7f1f576 100644
--- a/src/deutschland/bundesanzeiger/bundesanzeiger.py
+++ b/src/deutschland/bundesanzeiger/bundesanzeiger.py
@@ -55,7 +55,9 @@ def to_dict(self):
     def to_hash(self) -> str:
         entry = self.to_dict()
         encoded = json.dumps(entry, sort_keys=True).encode("utf-8")
-        dhash = hashlib.new("md5",encoded, usedforsecurity=False) #If 3.8 support is dropped, use hashlib.md5()
+        dhash = hashlib.new(
+            "md5", encoded, usedforsecurity=False
+        )  # If 3.8 support is dropped, use hashlib.md5()
         return dhash.hexdigest()
 
     def set_content(self, content: str) -> None: