From 79e54d3a04a903bad8853519bc8e1e24b1a9e474 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Thu, 10 Apr 2025 12:52:01 +0200
Subject: [PATCH 01/53] Minimal viable product: partial bulk download

---
 open_mastr/mastr.py                           | 14 ++-
 open_mastr/utils/helpers.py                   | 12 ++-
 .../xml_download/utils_download_bulk.py       | 94 +++++++++++++++++++
 3 files changed, 112 insertions(+), 8 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index 646a50b1..1ce558c6 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -2,7 +2,10 @@
 from sqlalchemy import inspect, create_engine
 
 # import xml dependencies
-from open_mastr.xml_download.utils_download_bulk import download_xml_Mastr
+from open_mastr.xml_download.utils_download_bulk import (
+    download_xml_Mastr,
+    download_xml_Mastr_partial
+)
 from open_mastr.xml_download.utils_write_to_database import (
     write_mastr_xml_to_database,
 )
@@ -224,7 +227,7 @@ def download(
 
         date = transform_date_parameter(self, method, date, **kwargs)
 
-        if method == "bulk":
+        if method == "bulk" or method == 'partial bulk':
             # Find the name of the zipped xml folder
             bulk_download_date = parse_date_string(date)
             xml_folder_path = os.path.join(self.output_dir, "data", "xml_download")
@@ -233,7 +236,10 @@ def download(
                 xml_folder_path,
                 f"Gesamtdatenexport_{bulk_download_date}.zip",
             )
-            download_xml_Mastr(zipped_xml_file_path, date, xml_folder_path)
+            if method == 'bulk':
+                download_xml_Mastr(zipped_xml_file_path, date, xml_folder_path)
+            else:
+                download_xml_Mastr_partial(zipped_xml_file_path, date, data, xml_folder_path)
 
             print(
                 f"\nWould you like to speed up the bulk download?\n"
@@ -248,7 +254,7 @@ def download(
                 bulk_cleansing=bulk_cleansing,
                 bulk_download_date=bulk_download_date,
             )
-
+            
         if method == "API":
             validate_api_credentials()
 
diff --git a/open_mastr/utils/helpers.py b/open_mastr/utils/helpers.py
index ad4f4dd8..f08a7049 100644
--- a/open_mastr/utils/helpers.py
+++ b/open_mastr/utils/helpers.py
@@ -122,8 +122,8 @@ def validate_parameter_format_for_download_method(
 
 
 def validate_parameter_method(method) -> None:
-    if method not in ["bulk", "API"]:
-        raise ValueError("parameter method has to be either 'bulk' or 'API'.")
+    if method not in ["bulk", "partial bulk", "API"]:
+        raise ValueError("parameter method has to be either 'bulk', 'partial bulk' or 'API'.")
 
 
 def validate_parameter_api_location_types(api_location_types) -> None:
@@ -172,7 +172,7 @@ def validate_parameter_api_limit(api_limit) -> None:
 def validate_parameter_date(method, date) -> None:
     if date is None:  # default
         return
-    if method == "bulk":
+    if method == "bulk" or method == "partial bulk":
         if date not in ["today", "existing"]:
             try:
                 _ = parse(date)
@@ -216,6 +216,10 @@ def validate_parameter_data(method, data) -> None:
                 raise ValueError(
                     f"Allowed values for parameter data with bulk method are {BULK_DATA}"
                 )
+            if method == "partial bulk" and value not in BULK_DATA:
+                raise ValueError(
+                    f"Allowed values for parameter data with bulk method are {BULK_DATA}"
+                )
             if method == "API" and value not in API_DATA:
                 raise ValueError(
                     f"Allowed values for parameter data with API method are {API_DATA}"
@@ -298,7 +302,7 @@ def transform_data_parameter(
 
 
 def transform_date_parameter(self, method, date, **kwargs):
-    if method == "bulk":
+    if method == "bulk" or method == "partial bulk":
         date = kwargs.get("bulk_date", date)
         date = "today" if date is None else date
         if date == "existing":
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 02e69e84..42861cef 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -3,13 +3,17 @@
 import time
 from importlib.metadata import PackageNotFoundError, version
 from zipfile import BadZipfile, ZipFile
+import shutil
+from pathlib import Path
 
 import numpy as np
 import requests
 from tqdm import tqdm
+import unzip_http
 
 # setup logger
 from open_mastr.utils.config import setup_logger
+from open_mastr.utils.constants import BULK_INCLUDE_TABLES_MAP
 
 try:
     USER_AGENT = (
@@ -203,3 +207,93 @@ def download_xml_Mastr(
     time_b = time.perf_counter()
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
     print(f"MaStR was successfully downloaded to {xml_folder_path}.")
+
+
+def download_xml_Mastr_partial(
+    save_path: str, bulk_date_string: str, bulk_data_list: list, xml_folder_path: str
+) -> None:
+    """Downloads the zipped MaStR.
+
+    Parameters
+    -----------
+    save_path: str
+        The path where the downloaded MaStR zipped folder will be saved.
+    """
+
+    if os.path.exists(save_path):
+        try:
+            _ = ZipFile(save_path)
+        except BadZipfile:
+            log.info(f"Bad Zip file is deleted: {save_path}")
+            os.remove(save_path)
+        else:
+            print("MaStR already downloaded.")
+            return None
+
+    if bulk_date_string != "today":
+        raise OSError(
+            "There exists no file for given date. MaStR can only be downloaded "
+            "from the website if today's date is given."
+        )
+    shutil.rmtree(xml_folder_path, ignore_errors=True)
+    os.makedirs(xml_folder_path, exist_ok=True)
+
+    print_message = (
+        "Download has started, this can take several minutes."
+        "The download bar is only a rough estimate."
+    )
+    warning_message = (
+        "Warning: The servers from MaStR restrict the download speed."
+        " You may want to download it another time."
+    )
+    print(print_message)
+
+    now = time.localtime()
+    url = gen_url(now)
+
+    time_a = time.perf_counter()
+    r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
+    if r.status_code == 404:
+        log.warning(
+            "Download file was not found. Assuming that the new file was not published yet and retrying with yesterday."
+        )
+        now = time.localtime(
+            time.mktime(now) - (24 * 60 * 60)
+        )  # subtract 1 day from the date
+        url = gen_url(now)
+        r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
+    if r.status_code == 404:
+        url = gen_url(now, use_version="before")  # Use lower MaStR Version
+        log.warning(
+            f"Download file was not found. Assuming that the version of MaStR has changed and retrying with download link: {url}"
+        )
+        r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
+    if r.status_code == 404:
+        url = gen_url(now, use_version="after")  # Use higher MaStR Version
+        log.warning(
+            f"Download file was not found. Assuming that the version of MaStR has changed and retrying with download link: {url}"
+        )
+        r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
+
+    if r.status_code == 404:
+        log.error("Could not download file: download URL not found")
+        return
+
+    remote_zip_file = unzip_http.RemoteZipFile(url)
+    remote_zip_names = [remote_zip_name.lower().split('_')[0].split('.')[0] for remote_zip_name in remote_zip_file.namelist()]
+
+    remote_index_list = []
+    for bulk_data_name in bulk_data_list:
+        for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:
+            remote_index_list = [remote_index for remote_index, remote_zip_name in enumerate(remote_zip_names) if remote_zip_name == bulk_file_name]
+            for remote_index in remote_index_list:
+                remote_zip_file.extract(remote_zip_file.namelist()[remote_index],path=Path(save_path[:-4]))
+
+    remote_zip_file.extract('Katalogwerte.xml',path=Path(save_path[:-4]))
+
+    shutil.make_archive(save_path[:-4], 'zip', save_path[:-4])
+    shutil.rmtree(save_path[:-4])
+
+    time_b = time.perf_counter()
+    print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
+    print(f"MaStR was successfully downloaded to {xml_folder_path}.")

From 6a86ac533a80f3dc1cfbc404685ddfa9ca266f5b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Fri, 25 Apr 2025 12:14:28 +0200
Subject: [PATCH 02/53] Remove "partial-bulk" from helpers functions

---
 open_mastr/utils/helpers.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/open_mastr/utils/helpers.py b/open_mastr/utils/helpers.py
index f08a7049..1543e222 100644
--- a/open_mastr/utils/helpers.py
+++ b/open_mastr/utils/helpers.py
@@ -122,8 +122,8 @@ def validate_parameter_format_for_download_method(
 
 
 def validate_parameter_method(method) -> None:
-    if method not in ["bulk", "partial bulk", "API"]:
-        raise ValueError("parameter method has to be either 'bulk', 'partial bulk' or 'API'.")
+    if method not in ["bulk", "API"]:
+        raise ValueError("parameter method has to be either 'bulk', or 'API'.")
 
 
 def validate_parameter_api_location_types(api_location_types) -> None:
@@ -172,7 +172,7 @@ def validate_parameter_api_limit(api_limit) -> None:
 def validate_parameter_date(method, date) -> None:
     if date is None:  # default
         return
-    if method == "bulk" or method == "partial bulk":
+    if method == "bulk":
         if date not in ["today", "existing"]:
             try:
                 _ = parse(date)
@@ -216,10 +216,6 @@ def validate_parameter_data(method, data) -> None:
                 raise ValueError(
                     f"Allowed values for parameter data with bulk method are {BULK_DATA}"
                 )
-            if method == "partial bulk" and value not in BULK_DATA:
-                raise ValueError(
-                    f"Allowed values for parameter data with bulk method are {BULK_DATA}"
-                )
             if method == "API" and value not in API_DATA:
                 raise ValueError(
                     f"Allowed values for parameter data with API method are {API_DATA}"
@@ -302,7 +298,7 @@ def transform_data_parameter(
 
 
 def transform_date_parameter(self, method, date, **kwargs):
-    if method == "bulk" or method == "partial bulk":
+    if method == "bulk":
         date = kwargs.get("bulk_date", date)
         date = "today" if date is None else date
         if date == "existing":

From 3580bfd2096bb04cf198b74716e80363bdef52f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Fri, 25 Apr 2025 12:17:08 +0200
Subject: [PATCH 03/53] Remove "partial bulk" from Mastr.download function

---
 open_mastr/mastr.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index 1ce558c6..03165375 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -227,7 +227,7 @@ def download(
 
         date = transform_date_parameter(self, method, date, **kwargs)
 
-        if method == "bulk" or method == 'partial bulk':
+        if method == "bulk":
             # Find the name of the zipped xml folder
             bulk_download_date = parse_date_string(date)
             xml_folder_path = os.path.join(self.output_dir, "data", "xml_download")
@@ -236,7 +236,7 @@ def download(
                 xml_folder_path,
                 f"Gesamtdatenexport_{bulk_download_date}.zip",
             )
-            if method == 'bulk':
+            if data is None:
                 download_xml_Mastr(zipped_xml_file_path, date, xml_folder_path)
             else:
                 download_xml_Mastr_partial(zipped_xml_file_path, date, data, xml_folder_path)

From 6c5a052201cc6a4e5c9aca541b5128e5fdbe072a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Fri, 25 Apr 2025 14:12:07 +0200
Subject: [PATCH 04/53] Add download completeness check, add sequential
 download functionality

---
 .../xml_download/utils_download_bulk.py       | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 42861cef..1a8af87d 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -208,6 +208,21 @@ def download_xml_Mastr(
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
     print(f"MaStR was successfully downloaded to {xml_folder_path}.")
 
+def check_download_completeness(
+    save_path: str,bulk_data_list: list
+) -> list:
+    """Checks if an existing download contains the xml-files corresponding to the bulk_data_list.
+    """
+    with ZipFile(save_path, 'r') as zip_ref:
+        existing_files = [zip_name.lower().split('_')[0].split('.')[0] for zip_name in zip_ref.namelist()]
+
+    missing_data_set = set()
+    for bulk_data_name in bulk_data_list:
+            for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:    
+                if bulk_file_name not in existing_files:
+                    missing_data_set.add(bulk_data_name)
+    return list(missing_data_set)
+
 
 def download_xml_Mastr_partial(
     save_path: str, bulk_date_string: str, bulk_data_list: list, xml_folder_path: str
@@ -227,8 +242,12 @@ def download_xml_Mastr_partial(
             log.info(f"Bad Zip file is deleted: {save_path}")
             os.remove(save_path)
         else:
-            print("MaStR already downloaded.")
-            return None
+            bulk_data_list = check_download_completeness(save_path,bulk_data_list)
+            if bool(bulk_data_list):
+                print(f"MaStR is missing the following data: {bulk_data_list}")
+            else:
+                print("MaStR already downloaded.")
+                return None
 
     if bulk_date_string != "today":
         raise OSError(

From 17e8347767343736d1a94bc47923c5039ff933d1 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 29 Apr 2025 06:02:18 +0200
Subject: [PATCH 05/53] Remove default branch for test pypi publication

Branch was always set on workflow trigger and manually selected branch ignored
---
 .github/workflows/test-pypi-publish.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/test-pypi-publish.yml b/.github/workflows/test-pypi-publish.yml
index 5fae8627..83c19222 100644
--- a/.github/workflows/test-pypi-publish.yml
+++ b/.github/workflows/test-pypi-publish.yml
@@ -13,8 +13,6 @@ jobs:
     environment: pypi-publish
     steps:
     - uses: actions/checkout@v4
-      with:
-        ref: release
     - name: Set up Python 3.10
       uses: actions/setup-python@v3
       with:

From c00d6057256b9bcf6c36a1a4923bb503753616ff Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 29 Apr 2025 06:21:22 +0200
Subject: [PATCH 06/53] Update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1dbe5e5b..44934ba5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
 ## [v0.XX.X] unreleased - 202X-XX-XX
 ### Added
 ### Changed
+- Fix package publication workflow
+  [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)
 ### Removed
 
 

From aa4bafc91abba7b05dde75a7a0fa5b0296e5867c Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 29 Apr 2025 06:24:54 +0200
Subject: [PATCH 07/53] Remove trailing white space in changelog to trigger
 tests

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 44934ba5..53591623 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,7 +37,7 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
   [#621](https://github.com/OpenEnergyPlatform/open-MaStR/pull/621)
 ### Removed
 - Moved old code artefacts from `scripts` folder to paper specific
-  [repository](https://github.com/FlorianK13/verify-marktstammdaten) 
+  [repository](https://github.com/FlorianK13/verify-marktstammdaten)
   [#561](https://github.com/OpenEnergyPlatform/open-MaStR/pull/561)
 - Remove old dependencies and broken README links
   [#619](https://github.com/OpenEnergyPlatform/open-MaStR/pull/619)

From d18fafe7b1cc1c0ef80166a939ed9a16a55457ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Mon, 5 May 2025 16:20:30 +0200
Subject: [PATCH 08/53] Add unzip_http as own function instead of install and
 import

---
 open_mastr/utils/unzip_http.py | 408 +++++++++++++++++++++++++++++++++
 1 file changed, 408 insertions(+)
 create mode 100644 open_mastr/utils/unzip_http.py

diff --git a/open_mastr/utils/unzip_http.py b/open_mastr/utils/unzip_http.py
new file mode 100644
index 00000000..548a7890
--- /dev/null
+++ b/open_mastr/utils/unzip_http.py
@@ -0,0 +1,408 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Saul Pwanson
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# Originally from
+# https://github.com/saulpw/unzip-http
+# Adjusted for our use case
+
+"""
+usage: unzip_http [-h] [-l] [-f] [-o] url [files ...]
+
+Extract individual files from .zip files over http without downloading the
+entire archive. HTTP server must send `Accept-Ranges: bytes` and
+`Content-Length` in headers.
+
+positional arguments:
+  url                   URL of the remote zip file
+  files                 Files to extract. If no filenames given, displays .zip
+                        contents (filenames and sizes). Each filename can be a
+                        wildcard glob.
+
+options:
+  -h, --help            show this help message and exit
+  -l, --list            List files in the remote zip file
+  -f, --full-filepaths  Recreate folder structure from zip file when extracting
+                        (instead of extracting the files to the current
+                        directory)
+  -o, --stdout          Write files to stdout (if multiple files: concatenate
+                        them to stdout, in zipfile order)
+"""
+
+import sys
+import os
+import io
+import math
+import time
+import zlib
+import struct
+import fnmatch
+import argparse
+import pathlib
+import urllib.parse
+import zipfile
+
+
+__version__ = '0.6'
+
+
+def error(s):
+    raise Exception(s)
+
+def warning(s):
+    print(s, file=sys.stderr)
+
+def get_bits(val:int, *args):
+    'Generate bitfields (one for each arg) from LSB to MSB.'
+    for n in args:
+        x = val & (2**n-1)
+        val >>= n
+        yield x
+
+
+class RemoteZipInfo:
+    def __init__(self, filename:str='',
+                       date_time:int = 0,
+                       header_offset:int = 0,
+                       compress_type:int = 0,
+                       compress_size:int = 0,
+                       file_size:int = 0):
+        self.filename = filename
+        self.header_offset = header_offset
+        self.compress_type = compress_type
+        self.compress_size = compress_size
+        self.file_size = file_size
+
+        sec, mins, hour, day, mon, year = get_bits(date_time, 5, 6, 5, 5, 4, 7)
+        self.date_time = (year+1980, mon, day, hour, mins, sec)
+
+    def is_dir(self):
+        return self.filename.endswith('/')
+
+    def parse_extra(self, extra):
+        i = 0
+        while i < len(extra):
+            fieldid, fieldsz = struct.unpack_from('<HH', extra, i)
+            i += 4
+
+            if fieldid == 0x0001:  # ZIP64
+                if fieldsz == 8: fmt = '<Q'
+                elif fieldsz == 16: fmt = '<QQ'
+                elif fieldsz == 24: fmt = '<QQQ'
+                elif fieldsz == 28: fmt = '<QQQI'
+
+                vals = list(struct.unpack_from(fmt, extra, i))
+                if self.file_size == 0xffffffff:
+                    self.file_size = vals.pop(0)
+
+                if self.compress_size == 0xffffffff:
+                    self.compress_size = vals.pop(0)
+
+                if self.header_offset == 0xffffffff:
+                    self.header_offset = vals.pop(0)
+
+            i += fieldsz
+
+
+class RemoteZipFile:
+    fmt_eocd = '<IHHHHIIH'  # end of central directory
+    fmt_eocd64 = '<IQHHIIQQQQ'  # end of central directory ZIP64
+    fmt_cdirentry = '<IHHHHIIIIHHHHHII'  # central directory entry
+    fmt_localhdr = '<IHHHIIIIHH'  # local directory header
+    magic_eocd64 = b'\x50\x4b\x06\x06'
+    magic_eocd = b'\x50\x4b\x05\x06'
+
+    def __init__(self, url):
+        import urllib3
+        self.url = url
+        self.http = urllib3.PoolManager()
+        self.zip_size = 0
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, a, b, c):
+        pass
+
+    @property
+    def files(self):
+        if not hasattr(self, '_files'):
+            self._files = {r.filename:r for r in self.infoiter()}
+        return self._files
+
+    def infolist(self):
+        return list(self.infoiter())
+
+    def namelist(self):
+        return list(r.filename for r in self.infoiter())
+
+    def infoiter(self):
+        resp = self.http.request('HEAD', self.url)
+        r = resp.headers.get('Accept-Ranges', '')
+        if r != 'bytes':
+            hostname = urllib.parse.urlparse(self.url).netloc
+            warning(f"{hostname} Accept-Ranges header ('{r}') is not 'bytes'--trying anyway")
+
+        self.zip_size = int(resp.headers['Content-Length'])
+        resp = self.get_range(
+            max(self.zip_size-65536, 0),
+            65536
+        )
+
+        cdir_start = -1
+        i = resp.data.rfind(self.magic_eocd64)
+        if i >= 0:
+            magic, eocd_sz, create_ver, min_ver, disk_num, disk_start, disk_num_records, total_num_records, \
+                cdir_bytes, cdir_start = struct.unpack_from(self.fmt_eocd64, resp.data, offset=i)
+        else:
+            i = resp.data.rfind(self.magic_eocd)
+            if i >= 0:
+                magic, \
+                    disk_num, disk_start, disk_num_records, total_num_records, \
+                    cdir_bytes, cdir_start, comment_len = struct.unpack_from(self.fmt_eocd, resp.data, offset=i)
+
+        if cdir_start < 0 or cdir_start >= self.zip_size:
+            error('cannot find central directory')
+
+        if self.zip_size <= 65536:
+            filehdr_index = cdir_start
+        else:
+            filehdr_index = 65536 - (self.zip_size - cdir_start)
+
+        if filehdr_index < 0:
+            resp = self.get_range(cdir_start, self.zip_size - cdir_start)
+            filehdr_index = 0
+
+        cdir_end = filehdr_index + cdir_bytes
+        while filehdr_index < cdir_end:
+            sizeof_cdirentry = struct.calcsize(self.fmt_cdirentry)
+
+            magic, ver, ver_needed, flags, method, date_time, crc, \
+                complen, uncomplen, fnlen, extralen, commentlen, \
+                disknum_start, internal_attr, external_attr, local_header_ofs = \
+                    struct.unpack_from(self.fmt_cdirentry, resp.data, offset=filehdr_index)
+
+            filehdr_index += sizeof_cdirentry
+
+            filename = resp.data[filehdr_index:filehdr_index+fnlen]
+            filehdr_index += fnlen
+
+            extra = resp.data[filehdr_index:filehdr_index+extralen]
+            filehdr_index += extralen
+
+            # comment = resp.data[filehdr_index:filehdr_index+commentlen]
+            filehdr_index += commentlen
+
+            rzi = RemoteZipInfo(filename.decode(), date_time, local_header_ofs, method, complen, uncomplen)
+
+            rzi.parse_extra(extra)
+            yield rzi
+
+    def extract(self, member, path=None, pwd=None):
+            if pwd:
+                raise NotImplementedError('Passwords not supported yet')
+
+            path = path or pathlib.Path('.')
+
+            outpath = path/member
+            os.makedirs(outpath.parent, exist_ok=True)
+            with self.open(member) as fpin:
+                with open(path/member, mode='wb') as fpout:
+                    while True:
+                        r = fpin.read(65536)
+                        if not r:
+                            break
+                        fpout.write(r)
+
+    
+    def extractzip(self, member, path=None, pwd=None):
+        if pwd:
+            raise NotImplementedError('Passwords not supported yet')
+
+        path = path or pathlib.Path('.')
+        outpath = path
+        os.makedirs(outpath.parent, exist_ok=True)
+        with self.open(member) as fpin:
+            with zipfile.ZipFile(outpath, 'a', zipfile.ZIP_DEFLATED) as zout:
+                with zout.open(member,'w') as fpout:
+                    while True:
+                        r = fpin.read(65536)
+                        if not r:
+                            break
+                        fpout.write(r)
+
+
+    def extractall(self, path=None, members=None, pwd=None):
+        for fn in members or self.namelist():
+            self.extract(fn, path, pwd=pwd)
+
+    def get_range(self, start, n):
+        return self.http.request('GET', self.url, headers={'Range': f'bytes={start}-{start+n-1}'}, preload_content=False)
+
+    def matching_files(self, *globs):
+        for f in self.files.values():
+            if any(fnmatch.fnmatch(f.filename, g) for g in globs):
+                yield f
+
+    def open(self, fn):
+        if isinstance(fn, str):
+            f = list(self.matching_files(fn))
+            if not f:
+                error(f'no files matching {fn}')
+            f = f[0]
+        else:
+            f = fn
+
+        sizeof_localhdr = struct.calcsize(self.fmt_localhdr)
+        r = self.get_range(f.header_offset, sizeof_localhdr)
+        localhdr = struct.unpack_from(self.fmt_localhdr, r.data)
+        magic, ver, flags, method, dos_datetime, _, _, uncomplen, fnlen, extralen = localhdr
+        if method == 0: # none
+            return self.get_range(f.header_offset + sizeof_localhdr + fnlen + extralen, f.compress_size)
+        elif method == 8: # DEFLATE
+            resp = self.get_range(f.header_offset + sizeof_localhdr + fnlen + extralen, f.compress_size)
+            return io.BufferedReader(RemoteZipStream(resp, f))
+        else:
+            error(f'unknown compression method {method}')
+
+    def open_text(self, fn):
+        return io.TextIOWrapper(self.open(fn))
+
+
+class RemoteZipStream(io.RawIOBase):
+    def __init__(self, fp, info):
+        super().__init__()
+        self.raw = fp
+        self._decompressor = zlib.decompressobj(-15)
+        self._buffer = bytes()
+
+    def readable(self):
+        return True
+
+    def readinto(self, b):
+        r = self.read(len(b))
+        b[:len(r)] = r
+        return len(r)
+
+    def read(self, n):
+        while n > len(self._buffer):
+            r = self.raw.read(2**18)
+            if not r:
+                self._buffer += self._decompressor.flush()
+                break
+            self._buffer += self._decompressor.decompress(r)
+
+        ret = self._buffer[:n]
+        self._buffer = self._buffer[n:]
+
+        return ret
+
+
+ ### script start
+
+class StreamProgress:
+    def __init__(self, fp, name='', total=0):
+        self.name = name
+        self.fp = fp
+        self.total = total
+        self.start_time = time.time()
+        self.last_update = 0
+        self.amtread = 0
+
+    def read(self, n):
+        r = self.fp.read(n)
+        self.amtread += len(r)
+        now = time.time()
+        if now - self.last_update > 0.1:
+            self.last_update = now
+
+            elapsed_s = now - self.start_time
+            sys.stderr.write(f'\r{elapsed_s:.0f}s  {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB  ({self.amtread/10**6/elapsed_s:.02f} MB/s)  {self.name}')
+
+        if not r:
+            sys.stderr.write('\n')
+
+        return r
+
+
+def list_files(rzf):
+    def safelog(x):
+        return 1 if x == 0 else math.ceil(math.log10(x))
+
+    digits_compr = max(safelog(f.compress_size) for f in rzf.infolist())
+    digits_plain = max(safelog(f.file_size    ) for f in rzf.infolist())
+    fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s'
+    for f in rzf.infolist():
+        print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr)
+
+
+def extract_one(outfile, rzf, f, ofname):
+    print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr)
+
+    fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
+    while r := fp.read(2**18):
+        outfile.write(r)
+
+
+def download_file(f, rzf, args):
+    if not any(fnmatch.fnmatch(f.filename, g) for g in args.files):
+        return
+
+    if args.stdout:
+        extract_one(sys.stdout.buffer, rzf, f, "stdout")
+    else:
+        path = pathlib.Path(f.filename)
+        if args.full_filepaths:
+            path.parent.mkdir(parents=True, exist_ok=True)
+        else:
+            path = path.name
+
+        with open(str(path), 'wb') as of:
+            extract_one(of, rzf, f, str(path))
+
+
+def main():
+    parser = argparse.ArgumentParser(prog='unzip-http', \
+        description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.")
+
+    parser.add_argument('-l', '--list', action='store_true', default=False,
+                        help="List files in the remote zip file")
+    parser.add_argument('-f', '--full-filepaths', action='store_true', default=False,
+                        help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)")
+    parser.add_argument('-o', '--stdout', action='store_true', default=False,
+                        help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)")
+
+    parser.add_argument("url", nargs=1, help="URL of the remote zip file")
+    parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.")
+
+    args = parser.parse_args()
+
+    rzf = RemoteZipFile(args.url[0])
+    if args.list or len(args.files) == 0:
+        list_files(rzf)
+    else:
+        for f in rzf.infolist():
+            download_file(f, rzf, args)
+
+
+
+if __name__ == '__main__':
+    main()

From c579ab3efd9b70dd83f5d0d781e39a5103653355 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Mon, 5 May 2025 16:22:48 +0200
Subject: [PATCH 09/53] Add katalogwerte_bool and fit code to new
 utils.unzip_http

---
 open_mastr/mastr.py                           |  2 +-
 .../xml_download/utils_download_bulk.py       | 26 +++++++++++--------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index 03165375..79917a7c 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -239,7 +239,7 @@ def download(
             if data is None:
                 download_xml_Mastr(zipped_xml_file_path, date, xml_folder_path)
             else:
-                download_xml_Mastr_partial(zipped_xml_file_path, date, data, xml_folder_path)
+                data = download_xml_Mastr_partial(zipped_xml_file_path, date, data, xml_folder_path)
 
             print(
                 f"\nWould you like to speed up the bulk download?\n"
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 1a8af87d..51883dcc 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -3,17 +3,16 @@
 import time
 from importlib.metadata import PackageNotFoundError, version
 from zipfile import BadZipfile, ZipFile
-import shutil
 from pathlib import Path
 
 import numpy as np
 import requests
 from tqdm import tqdm
-import unzip_http
 
 # setup logger
 from open_mastr.utils.config import setup_logger
 from open_mastr.utils.constants import BULK_INCLUDE_TABLES_MAP
+from open_mastr.utils import unzip_http
 
 try:
     USER_AGENT = (
@@ -208,9 +207,10 @@ def download_xml_Mastr(
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
     print(f"MaStR was successfully downloaded to {xml_folder_path}.")
 
+
 def check_download_completeness(
     save_path: str,bulk_data_list: list
-) -> list:
+) -> (list, bool):
     """Checks if an existing download contains the xml-files corresponding to the bulk_data_list.
     """
     with ZipFile(save_path, 'r') as zip_ref:
@@ -221,12 +221,16 @@ def check_download_completeness(
             for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:    
                 if bulk_file_name not in existing_files:
                     missing_data_set.add(bulk_data_name)
-    return list(missing_data_set)
+
+    katalogwerte_bool = 0
+    if 'katalogwerte' in existing_files:
+        katalogwerte_bool = True
+    return list(missing_data_set), katalogwerte_bool
 
 
 def download_xml_Mastr_partial(
     save_path: str, bulk_date_string: str, bulk_data_list: list, xml_folder_path: str
-) -> None:
+) -> list:
     """Downloads the zipped MaStR.
 
     Parameters
@@ -235,6 +239,7 @@ def download_xml_Mastr_partial(
         The path where the downloaded MaStR zipped folder will be saved.
     """
 
+    katalogwerte_bool = False
     if os.path.exists(save_path):
         try:
             _ = ZipFile(save_path)
@@ -242,7 +247,7 @@ def download_xml_Mastr_partial(
             log.info(f"Bad Zip file is deleted: {save_path}")
             os.remove(save_path)
         else:
-            bulk_data_list = check_download_completeness(save_path,bulk_data_list)
+            bulk_data_list, katalogwerte_bool = check_download_completeness(save_path,bulk_data_list)
             if bool(bulk_data_list):
                 print(f"MaStR is missing the following data: {bulk_data_list}")
             else:
@@ -306,13 +311,12 @@ def download_xml_Mastr_partial(
         for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:
             remote_index_list = [remote_index for remote_index, remote_zip_name in enumerate(remote_zip_names) if remote_zip_name == bulk_file_name]
             for remote_index in remote_index_list:
-                remote_zip_file.extract(remote_zip_file.namelist()[remote_index],path=Path(save_path[:-4]))
-
-    remote_zip_file.extract('Katalogwerte.xml',path=Path(save_path[:-4]))
+                remote_zip_file.extractzip(remote_zip_file.namelist()[remote_index],path=Path(save_path))
 
-    shutil.make_archive(save_path[:-4], 'zip', save_path[:-4])
-    shutil.rmtree(save_path[:-4])
+    if not katalogwerte_bool:
+        remote_zip_file.extractzip('Katalogwerte.xml',path=Path(save_path))
 
     time_b = time.perf_counter()
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
     print(f"MaStR was successfully downloaded to {xml_folder_path}.")
+    return bulk_data_list

From 4fe0a0af971af4a2ab0f77ac91965a61d97ad9f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Mon, 12 May 2025 09:41:38 +0200
Subject: [PATCH 10/53] Remove unnecessary imports from unzip_http

---
 open_mastr/utils/unzip_http.py | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/open_mastr/utils/unzip_http.py b/open_mastr/utils/unzip_http.py
index 548a7890..ef4140f8 100644
--- a/open_mastr/utils/unzip_http.py
+++ b/open_mastr/utils/unzip_http.py
@@ -55,7 +55,6 @@
 import zlib
 import struct
 import fnmatch
-import argparse
 import pathlib
 import urllib.parse
 import zipfile
@@ -378,31 +377,3 @@ def download_file(f, rzf, args):
         with open(str(path), 'wb') as of:
             extract_one(of, rzf, f, str(path))
 
-
-def main():
-    parser = argparse.ArgumentParser(prog='unzip-http', \
-        description="Extract individual files from .zip files over http without downloading the entire archive. HTTP server must send `Accept-Ranges: bytes` and `Content-Length` in headers.")
-
-    parser.add_argument('-l', '--list', action='store_true', default=False,
-                        help="List files in the remote zip file")
-    parser.add_argument('-f', '--full-filepaths', action='store_true', default=False,
-                        help="Recreate folder structure from zip file when extracting (instead of extracting the files to the current directory)")
-    parser.add_argument('-o', '--stdout', action='store_true', default=False,
-                        help="Write files to stdout (if multiple files: concatenate them to stdout, in zipfile order)")
-
-    parser.add_argument("url", nargs=1, help="URL of the remote zip file")
-    parser.add_argument("files", nargs='*', help="Files to extract. If no filenames given, displays .zip contents (filenames and sizes). Each filename can be a wildcard glob.")
-
-    args = parser.parse_args()
-
-    rzf = RemoteZipFile(args.url[0])
-    if args.list or len(args.files) == 0:
-        list_files(rzf)
-    else:
-        for f in rzf.infolist():
-            download_file(f, rzf, args)
-
-
-
-if __name__ == '__main__':
-    main()

From e3d3b3d644f7848c6c139cf0edd004bad41e1676 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Wed, 16 Jul 2025 16:36:34 +0200
Subject: [PATCH 11/53] Prepare metadata file creation

---
 open_mastr/mastr.py                            |  2 ++
 open_mastr/utils/helpers.py                    | 14 ++++++++++++++
 open_mastr/xml_download/utils_download_bulk.py |  3 +--
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index 79917a7c..0a9cc0a4 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -26,6 +26,7 @@
     create_db_query,
     db_query_to_csv,
     reverse_fill_basic_units,
+    create_metadata_file
 )
 from open_mastr.utils.config import (
     create_data_dir,
@@ -254,6 +255,7 @@ def download(
                 bulk_cleansing=bulk_cleansing,
                 bulk_download_date=bulk_download_date,
             )
+            create_metadata_file(self, date, data)
             
         if method == "API":
             validate_api_credentials()
diff --git a/open_mastr/utils/helpers.py b/open_mastr/utils/helpers.py
index 1543e222..a38a6467 100644
--- a/open_mastr/utils/helpers.py
+++ b/open_mastr/utils/helpers.py
@@ -4,6 +4,7 @@
 from contextlib import contextmanager
 from datetime import date, datetime
 from warnings import warn
+import csv
 
 import dateutil
 import sqlalchemy
@@ -322,6 +323,19 @@ def transform_date_parameter(self, method, date, **kwargs):
     return date
 
 
+# def create_metadata_file(self, date, data):
+#     log_file = os.path.join(self.output_dir, "data", "metadata_log_file.csv")
+#     if not os.path.isfile(log_file):
+#         with open(log_file, "w", newline="") as file:
+#             writer = csv.writer(file)
+#             writer.writerow(["date", "date_input", "data_tables"])
+#     if date == "today":
+#         actual_date = datetime.today().strftime("%Y%m%d")
+#     with open(log_file, "a", newline="") as file:
+#         writer = csv.writer(file)
+#         writer.writerow([actual_date, date, data])
+
+
 @contextmanager
 def session_scope(engine):
     """Provide a transactional scope around a series of operations."""
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 51883dcc..9210665f 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -313,8 +313,7 @@ def download_xml_Mastr_partial(
             for remote_index in remote_index_list:
                 remote_zip_file.extractzip(remote_zip_file.namelist()[remote_index],path=Path(save_path))
 
-    if not katalogwerte_bool:
-        remote_zip_file.extractzip('Katalogwerte.xml',path=Path(save_path))
+    remote_zip_file.extractzip('Katalogwerte.xml',path=Path(save_path))
 
     time_b = time.perf_counter()
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")

From 84b8647c2a738a867bc478af51d79dee1aa558c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Fri, 18 Jul 2025 12:17:18 +0200
Subject: [PATCH 12/53] Moving two check functions outside download_xml
 function #616

---
 open_mastr/mastr.py                           | 20 ++---
 open_mastr/utils/helpers.py                   | 16 +++-
 .../xml_download/utils_download_bulk.py       | 75 ++++++++-----------
 3 files changed, 56 insertions(+), 55 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index 0a9cc0a4..021bc143 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -4,7 +4,8 @@
 # import xml dependencies
 from open_mastr.xml_download.utils_download_bulk import (
     download_xml_Mastr,
-    download_xml_Mastr_partial
+    download_xml_Mastr_partial,
+    delete_xml_files_not_from_given_date
 )
 from open_mastr.xml_download.utils_write_to_database import (
     write_mastr_xml_to_database,
@@ -26,7 +27,10 @@
     create_db_query,
     db_query_to_csv,
     reverse_fill_basic_units,
-    create_metadata_file
+    delete_zip_file_if_corrupted,
+    create_database_engine,
+    rename_table,
+    create_translated_database_engine,
 )
 from open_mastr.utils.config import (
     create_data_dir,
@@ -37,13 +41,6 @@
 )
 import open_mastr.utils.orm as orm
 
-# import initialize_database dependencies
-from open_mastr.utils.helpers import (
-    create_database_engine,
-    rename_table,
-    create_translated_database_engine,
-)
-
 # constants
 from open_mastr.utils.constants import TECHNOLOGIES, ADDITIONAL_TABLES
 
@@ -237,6 +234,10 @@ def download(
                 xml_folder_path,
                 f"Gesamtdatenexport_{bulk_download_date}.zip",
             )
+
+            delete_zip_file_if_corrupted(zipped_xml_file_path)
+            delete_xml_files_not_from_given_date(zipped_xml_file_path, xml_folder_path)
+            
             if data is None:
                 download_xml_Mastr(zipped_xml_file_path, date, xml_folder_path)
             else:
@@ -255,7 +256,6 @@ def download(
                 bulk_cleansing=bulk_cleansing,
                 bulk_download_date=bulk_download_date,
             )
-            create_metadata_file(self, date, data)
             
         if method == "API":
             validate_api_credentials()
diff --git a/open_mastr/utils/helpers.py b/open_mastr/utils/helpers.py
index a38a6467..8af859f3 100644
--- a/open_mastr/utils/helpers.py
+++ b/open_mastr/utils/helpers.py
@@ -4,7 +4,7 @@
 from contextlib import contextmanager
 from datetime import date, datetime
 from warnings import warn
-import csv
+from zipfile import BadZipfile, ZipFile
 
 import dateutil
 import sqlalchemy
@@ -824,3 +824,17 @@ def create_translated_database_engine(engine, folder_path) -> sqlalchemy.engine.
         )
 
     return create_engine(f"sqlite:///{db_path}")
+
+
+def delete_zip_file_if_corrupted(save_path: str):
+    """
+    Check if existing zip file is corrupted and if yes, delete it, if no, zipfile exists.
+    """
+    if os.path.exists(save_path):
+        try:
+            with ZipFile(save_path) as _:
+                pass
+        except BadZipfile:
+            log.info(f"Bad Zip file is deleted: {save_path}")
+            os.remove(save_path)            
+    
\ No newline at end of file
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 9210665f..a9f0f286 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -2,7 +2,7 @@
 import shutil
 import time
 from importlib.metadata import PackageNotFoundError, version
-from zipfile import BadZipfile, ZipFile
+from zipfile import ZipFile
 from pathlib import Path
 
 import numpy as np
@@ -125,24 +125,7 @@ def download_xml_Mastr(
     save_path: str
         The path where the downloaded MaStR zipped folder will be saved.
     """
-
-    if os.path.exists(save_path):
-        try:
-            _ = ZipFile(save_path)
-        except BadZipfile:
-            log.info(f"Bad Zip file is deleted: {save_path}")
-            os.remove(save_path)
-        else:
-            print("MaStR already downloaded.")
-            return None
-
-    if bulk_date_string != "today":
-        raise OSError(
-            "There exists no file for given date. MaStR can only be downloaded "
-            "from the website if today's date is given."
-        )
-    shutil.rmtree(xml_folder_path, ignore_errors=True)
-    os.makedirs(xml_folder_path, exist_ok=True)
+    
 
     print_message = (
         "Download has started, this can take several minutes."
@@ -210,7 +193,7 @@ def download_xml_Mastr(
 
 def check_download_completeness(
     save_path: str,bulk_data_list: list
-) -> (list, bool):
+) -> tuple[list, bool]:
     """Checks if an existing download contains the xml-files corresponding to the bulk_data_list.
     """
     with ZipFile(save_path, 'r') as zip_ref:
@@ -222,10 +205,10 @@ def check_download_completeness(
                 if bulk_file_name not in existing_files:
                     missing_data_set.add(bulk_data_name)
 
-    katalogwerte_bool = 0
+    is_katalogwerte_existing = False
     if 'katalogwerte' in existing_files:
-        katalogwerte_bool = True
-    return list(missing_data_set), katalogwerte_bool
+        is_katalogwerte_existing = True
+    return list(missing_data_set), is_katalogwerte_existing
 
 
 def download_xml_Mastr_partial(
@@ -239,28 +222,14 @@ def download_xml_Mastr_partial(
         The path where the downloaded MaStR zipped folder will be saved.
     """
 
-    katalogwerte_bool = False
+    is_katalogwerte_existing = False
     if os.path.exists(save_path):
-        try:
-            _ = ZipFile(save_path)
-        except BadZipfile:
-            log.info(f"Bad Zip file is deleted: {save_path}")
-            os.remove(save_path)
+        bulk_data_list, is_katalogwerte_existing = check_download_completeness(save_path,bulk_data_list)
+        if bool(bulk_data_list):
+            print(f"MaStR is missing the following data: {bulk_data_list}")
         else:
-            bulk_data_list, katalogwerte_bool = check_download_completeness(save_path,bulk_data_list)
-            if bool(bulk_data_list):
-                print(f"MaStR is missing the following data: {bulk_data_list}")
-            else:
-                print("MaStR already downloaded.")
-                return None
-
-    if bulk_date_string != "today":
-        raise OSError(
-            "There exists no file for given date. MaStR can only be downloaded "
-            "from the website if today's date is given."
-        )
-    shutil.rmtree(xml_folder_path, ignore_errors=True)
-    os.makedirs(xml_folder_path, exist_ok=True)
+            print("MaStR already downloaded.")
+            return None
 
     print_message = (
         "Download has started, this can take several minutes."
@@ -308,14 +277,32 @@ def download_xml_Mastr_partial(
 
     remote_index_list = []
     for bulk_data_name in bulk_data_list:
+        # Example: ['wind','solar']
         for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:
+            # Example: From "wind" we get ["anlageneegwind", "einheitenwind"], and  from "solar" we get ["anlageneegsolar", "einheitensolar"]
+            # and we have to find the corresponding index in the remote_zip_file list in order to fetch the correct file
             remote_index_list = [remote_index for remote_index, remote_zip_name in enumerate(remote_zip_names) if remote_zip_name == bulk_file_name]
+            # for remote_index in tqdm(remote_index_list):
             for remote_index in remote_index_list:
+                # Example: remote_zip_file.namelist()[remote_index] corresponds to e.g. 'AnlagenEegSolar_1.xml'
                 remote_zip_file.extractzip(remote_zip_file.namelist()[remote_index],path=Path(save_path))
 
-    remote_zip_file.extractzip('Katalogwerte.xml',path=Path(save_path))
+    if not is_katalogwerte_existing:
+        remote_zip_file.extractzip('Katalogwerte.xml',path=Path(save_path))
 
     time_b = time.perf_counter()
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
     print(f"MaStR was successfully downloaded to {xml_folder_path}.")
     return bulk_data_list
+
+
+def delete_xml_files_not_from_given_date(save_path: str, xml_folder_path: str):
+    """
+    Delete xml files that are not corresponding to the given date.
+    Assumes that the xml folder only contains one zipfile.
+    """
+    if os.path.exists(save_path):
+        return
+    else:
+        shutil.rmtree(xml_folder_path)
+        os.makedirs(xml_folder_path)

From 2d681cfd1d7ae61273226e9992f8222d1efa03d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Fri, 18 Jul 2025 15:46:02 +0200
Subject: [PATCH 13/53] Deprecation of date='existing', merging of partial and
 full download in existing download() function #616

---
 environment.yml                               |   2 +-
 open_mastr/mastr.py                           |  14 +-
 open_mastr/utils/helpers.py                   |  65 +++----
 .../xml_download/utils_download_bulk.py       | 174 ++++++++----------
 4 files changed, 101 insertions(+), 154 deletions(-)

diff --git a/environment.yml b/environment.yml
index 104130fe..66f2bd03 100644
--- a/environment.yml
+++ b/environment.yml
@@ -3,4 +3,4 @@ channels:
     - conda-forge
     - defaults
 dependencies:
-    - python=3.10
+    - python=3.11
diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index 021bc143..ada4311e 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -4,8 +4,7 @@
 # import xml dependencies
 from open_mastr.xml_download.utils_download_bulk import (
     download_xml_Mastr,
-    download_xml_Mastr_partial,
-    delete_xml_files_not_from_given_date
+    delete_xml_files_not_from_given_date,
 )
 from open_mastr.xml_download.utils_write_to_database import (
     write_mastr_xml_to_database,
@@ -158,7 +157,7 @@ def download(
             |-----------------------|------|------|
             | "today"                | latest files are downloaded from marktstammdatenregister.de  | -  |
             | "20230101"      | If file from this date exists locally, it is used. Otherwise it throws an error (You can only receive todays data from the server)  | -   |
-            | "existing"               | Use latest downloaded zipped xml files, throws an error if the bulk download folder is empty  | -  |
+            | "existing"               | Deprecated since 0.16, see [#616](https://github.com/OpenEnergyPlatform/open-MaStR/issues/616#issuecomment-3089377062)mkdo  | -  |
             | "latest"               | -  | Retrieve data that is newer than the newest data already in the table  |
             | datetime.datetime(2020, 11, 27)      | -  | Retrieve data that is newer than this time stamp   |
             | None      | set date="today"  | set date="latest"   |
@@ -237,11 +236,8 @@ def download(
 
             delete_zip_file_if_corrupted(zipped_xml_file_path)
             delete_xml_files_not_from_given_date(zipped_xml_file_path, xml_folder_path)
-            
-            if data is None:
-                download_xml_Mastr(zipped_xml_file_path, date, xml_folder_path)
-            else:
-                data = download_xml_Mastr_partial(zipped_xml_file_path, date, data, xml_folder_path)
+
+            download_xml_Mastr(zipped_xml_file_path, date, data, xml_folder_path)
 
             print(
                 f"\nWould you like to speed up the bulk download?\n"
@@ -256,7 +252,7 @@ def download(
                 bulk_cleansing=bulk_cleansing,
                 bulk_download_date=bulk_download_date,
             )
-            
+
         if method == "API":
             validate_api_credentials()
 
diff --git a/open_mastr/utils/helpers.py b/open_mastr/utils/helpers.py
index 8af859f3..a4a1f525 100644
--- a/open_mastr/utils/helpers.py
+++ b/open_mastr/utils/helpers.py
@@ -245,18 +245,16 @@ def raise_warning_for_invalid_parameter_combinations(
         )
 
     if method == "bulk" and (
-        (
-            any(
-                parameter is not None
-                for parameter in [
-                    api_processes,
-                    api_data_types,
-                    api_location_types,
-                ]
-            )
-            or api_limit != 50
-            or api_chunksize != 1000
+        any(
+            parameter is not None
+            for parameter in [
+                api_processes,
+                api_data_types,
+                api_location_types,
+            ]
         )
+        or api_limit != 50
+        or api_chunksize != 1000
     ):
         warn(
             "For method = 'bulk', API related parameters (with prefix api_) are ignored."
@@ -303,39 +301,23 @@ def transform_date_parameter(self, method, date, **kwargs):
         date = kwargs.get("bulk_date", date)
         date = "today" if date is None else date
         if date == "existing":
-            existing_files_list = os.listdir(
-                os.path.join(self.output_dir, "data", "xml_download")
+            log.warning(
+                """
+            The date parameter 'existing' is deprecated and will be removed in the future. 
+            The date parameter is set to `today`.
+
+            If this change causes problems for you, please comment in this issue on github:
+            https://github.com/OpenEnergyPlatform/open-MaStR/issues/616#issuecomment-3089377062
+
+            """
             )
-            if not existing_files_list:
-                date = "today"
-                print(
-                    "By choosing `date`='existing' you want to use an existing "
-                    "xml download."
-                    "However no xml_files were downloaded yet. The parameter `date` is"
-                    "therefore set to 'today'."
-                )
-            # we assume that there is only one file in the folder which is the
-            # zipped xml folder
-            date = existing_files_list[0].split("_")[1].split(".")[0]
+            date = "today"
     elif method == "API":
         date = kwargs.get("api_date", date)
 
     return date
 
 
-# def create_metadata_file(self, date, data):
-#     log_file = os.path.join(self.output_dir, "data", "metadata_log_file.csv")
-#     if not os.path.isfile(log_file):
-#         with open(log_file, "w", newline="") as file:
-#             writer = csv.writer(file)
-#             writer.writerow(["date", "date_input", "data_tables"])
-#     if date == "today":
-#         actual_date = datetime.today().strftime("%Y%m%d")
-#     with open(log_file, "a", newline="") as file:
-#         writer = csv.writer(file)
-#         writer.writerow([actual_date, date, data])
-
-
 @contextmanager
 def session_scope(engine):
     """Provide a transactional scope around a series of operations."""
@@ -369,7 +351,7 @@ def print_api_settings(
     )
     if "permit" in harmonisation_log:
         print(
-            f"data_types: {api_data_types}" "\033[31m",
+            f"data_types: {api_data_types}\033[31m",
             "Attention, 'permit_data' was automatically set in api_data_types, "
             "as you defined 'permit' in parameter data_api.",
             "\033[m",
@@ -494,9 +476,7 @@ def create_db_query(
     unit_type_map_reversed = reverse_unit_type_map()
 
     with session_scope(engine=engine) as session:
-
         if tech:
-
             # Select orm tables for specified additional_data.
             orm_tables = {
                 f"{dat}": getattr(orm, ORM_MAP[tech].get(dat, "KeyNotAvailable"), None)
@@ -567,7 +547,6 @@ def create_db_query(
             return query_tech
 
         if additional_table:
-
             orm_table = getattr(orm, ORM_MAP[additional_table], None)
 
             query_additional_tables = Query(orm_table, session=session)
@@ -755,7 +734,6 @@ def db_query_to_csv(db_query, data_table: str, chunksize: int) -> None:
                         chunk_df[col] = chunk_df[col].str.replace("\r", "")
 
                 if not chunk_df.empty:
-
                     if chunk_number == 0:
                         chunk_df.to_csv(
                             csv_file,
@@ -836,5 +814,4 @@ def delete_zip_file_if_corrupted(save_path: str):
                 pass
         except BadZipfile:
             log.info(f"Bad Zip file is deleted: {save_path}")
-            os.remove(save_path)            
-    
\ No newline at end of file
+            os.remove(save_path)
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index a9f0f286..202f2b32 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -11,7 +11,7 @@
 
 # setup logger
 from open_mastr.utils.config import setup_logger
-from open_mastr.utils.constants import BULK_INCLUDE_TABLES_MAP
+from open_mastr.utils.constants import BULK_INCLUDE_TABLES_MAP, BULK_DATA
 from open_mastr.utils import unzip_http
 
 try:
@@ -116,7 +116,7 @@ def gen_url(when: time.struct_time = time.localtime(), use_version="current") ->
 
 
 def download_xml_Mastr(
-    save_path: str, bulk_date_string: str, xml_folder_path: str
+    save_path: str, bulk_date_string: str, bulk_data_list: list, xml_folder_path: str
 ) -> None:
     """Downloads the zipped MaStR.
 
@@ -125,7 +125,6 @@ def download_xml_Mastr(
     save_path: str
         The path where the downloaded MaStR zipped folder will be saved.
     """
-    
 
     print_message = (
         "Download has started, this can take several minutes."
@@ -168,141 +167,116 @@ def download_xml_Mastr(
         log.error("Could not download file: download URL not found")
         return
 
-    total_length = int(18000 * 1024 * 1024)
-    with (
-        open(save_path, "wb") as zfile,
-        tqdm(desc=save_path, total=(total_length / 1024 / 1024), unit="") as bar,
-    ):
-        for chunk in r.iter_content(chunk_size=1024 * 1024):
-            # chunk size of 1024 * 1024 needs 9min 11 sek = 551sek
-            # chunk size of 1024 needs 9min 11 sek as well
-            if chunk:
-                zfile.write(chunk)
-                zfile.flush()
-            bar.update()
-            # if the rate falls below 100 kB/s -> prompt warning
-            if bar.format_dict["rate"] and bar.format_dict["rate"] < 2:
-                bar.set_postfix_str(s=warning_message)
-            else:
-                # remove warning
-                bar.set_postfix_str(s="")
+    if bulk_data_list == BULK_DATA:
+        full_download_without_unzip_http(save_path, r)
+    else:
+        try:
+            partial_download_with_unzip_http(save_path, url, bulk_data_list)
+        except Exception as e:
+            log.warning(f"Partial download failed, fallback to full download: {e}")
+            full_download_without_unzip_http(save_path, r)
+
     time_b = time.perf_counter()
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
     print(f"MaStR was successfully downloaded to {xml_folder_path}.")
 
 
 def check_download_completeness(
-    save_path: str,bulk_data_list: list
+    save_path: str, bulk_data_list: list
 ) -> tuple[list, bool]:
-    """Checks if an existing download contains the xml-files corresponding to the bulk_data_list.
-    """
-    with ZipFile(save_path, 'r') as zip_ref:
-        existing_files = [zip_name.lower().split('_')[0].split('.')[0] for zip_name in zip_ref.namelist()]
+    """Checks if an existing download contains the xml-files corresponding to the bulk_data_list."""
+    with ZipFile(save_path, "r") as zip_ref:
+        existing_files = [
+            zip_name.lower().split("_")[0].split(".")[0]
+            for zip_name in zip_ref.namelist()
+        ]
 
     missing_data_set = set()
     for bulk_data_name in bulk_data_list:
-            for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:    
-                if bulk_file_name not in existing_files:
-                    missing_data_set.add(bulk_data_name)
+        for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:
+            if bulk_file_name not in existing_files:
+                missing_data_set.add(bulk_data_name)
 
     is_katalogwerte_existing = False
-    if 'katalogwerte' in existing_files:
+    if "katalogwerte" in existing_files:
         is_katalogwerte_existing = True
     return list(missing_data_set), is_katalogwerte_existing
 
 
-def download_xml_Mastr_partial(
-    save_path: str, bulk_date_string: str, bulk_data_list: list, xml_folder_path: str
-) -> list:
-    """Downloads the zipped MaStR.
-
-    Parameters
-    -----------
-    save_path: str
-        The path where the downloaded MaStR zipped folder will be saved.
+def delete_xml_files_not_from_given_date(save_path: str, xml_folder_path: str):
     """
+    Delete xml files that are not corresponding to the given date.
+    Assumes that the xml folder only contains one zipfile.
+    """
+    if os.path.exists(save_path):
+        return
+    else:
+        shutil.rmtree(xml_folder_path)
+        os.makedirs(xml_folder_path)
 
+
+def partial_download_with_unzip_http(save_path: str, url: str, bulk_data_list: list):
     is_katalogwerte_existing = False
     if os.path.exists(save_path):
-        bulk_data_list, is_katalogwerte_existing = check_download_completeness(save_path,bulk_data_list)
+        bulk_data_list, is_katalogwerte_existing = check_download_completeness(
+            save_path, bulk_data_list
+        )
         if bool(bulk_data_list):
             print(f"MaStR is missing the following data: {bulk_data_list}")
         else:
             print("MaStR already downloaded.")
             return None
 
-    print_message = (
-        "Download has started, this can take several minutes."
-        "The download bar is only a rough estimate."
-    )
-    warning_message = (
-        "Warning: The servers from MaStR restrict the download speed."
-        " You may want to download it another time."
-    )
-    print(print_message)
-
-    now = time.localtime()
-    url = gen_url(now)
-
-    time_a = time.perf_counter()
-    r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
-    if r.status_code == 404:
-        log.warning(
-            "Download file was not found. Assuming that the new file was not published yet and retrying with yesterday."
-        )
-        now = time.localtime(
-            time.mktime(now) - (24 * 60 * 60)
-        )  # subtract 1 day from the date
-        url = gen_url(now)
-        r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
-    if r.status_code == 404:
-        url = gen_url(now, use_version="before")  # Use lower MaStR Version
-        log.warning(
-            f"Download file was not found. Assuming that the version of MaStR has changed and retrying with download link: {url}"
-        )
-        r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
-    if r.status_code == 404:
-        url = gen_url(now, use_version="after")  # Use higher MaStR Version
-        log.warning(
-            f"Download file was not found. Assuming that the version of MaStR has changed and retrying with download link: {url}"
-        )
-        r = requests.get(url, stream=True, headers={"User-Agent": USER_AGENT})
-
-    if r.status_code == 404:
-        log.error("Could not download file: download URL not found")
-        return
-
     remote_zip_file = unzip_http.RemoteZipFile(url)
-    remote_zip_names = [remote_zip_name.lower().split('_')[0].split('.')[0] for remote_zip_name in remote_zip_file.namelist()]
+    remote_zip_names = [
+        remote_zip_name.lower().split("_")[0].split(".")[0]
+        for remote_zip_name in remote_zip_file.namelist()
+    ]
 
     remote_index_list = []
+    download_files_list = []
     for bulk_data_name in bulk_data_list:
         # Example: ['wind','solar']
         for bulk_file_name in BULK_INCLUDE_TABLES_MAP[bulk_data_name]:
             # Example: From "wind" we get ["anlageneegwind", "einheitenwind"], and  from "solar" we get ["anlageneegsolar", "einheitensolar"]
             # and we have to find the corresponding index in the remote_zip_file list in order to fetch the correct file
-            remote_index_list = [remote_index for remote_index, remote_zip_name in enumerate(remote_zip_names) if remote_zip_name == bulk_file_name]
+            remote_index_list = [
+                remote_index
+                for remote_index, remote_zip_name in enumerate(remote_zip_names)
+                if remote_zip_name == bulk_file_name
+            ]
             # for remote_index in tqdm(remote_index_list):
             for remote_index in remote_index_list:
                 # Example: remote_zip_file.namelist()[remote_index] corresponds to e.g. 'AnlagenEegSolar_1.xml'
-                remote_zip_file.extractzip(remote_zip_file.namelist()[remote_index],path=Path(save_path))
+                download_files_list.append(remote_zip_file.namelist()[remote_index])
 
-    if not is_katalogwerte_existing:
-        remote_zip_file.extractzip('Katalogwerte.xml',path=Path(save_path))
+    for zipfile_name in tqdm(download_files_list, unit=" file"):
+        remote_zip_file.extractzip(zipfile_name, path=Path(save_path))
 
-    time_b = time.perf_counter()
-    print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
-    print(f"MaStR was successfully downloaded to {xml_folder_path}.")
-    return bulk_data_list
+    if not is_katalogwerte_existing:
+        remote_zip_file.extractzip("Katalogwerte.xml", path=Path(save_path))
 
 
-def delete_xml_files_not_from_given_date(save_path: str, xml_folder_path: str):
-    """
-    Delete xml files that are not corresponding to the given date.
-    Assumes that the xml folder only contains one zipfile.
-    """
-    if os.path.exists(save_path):
-        return
-    else:
-        shutil.rmtree(xml_folder_path)
-        os.makedirs(xml_folder_path)
+def full_download_without_unzip_http(save_path: str, r: requests.models.Response):
+    warning_message = (
+        "Warning: The servers from MaStR restrict the download speed."
+        " You may want to download it another time."
+    )
+    total_length = int(23000)
+    with (
+        open(save_path, "wb") as zfile,
+        tqdm(desc=save_path, total=total_length, unit="") as bar,
+    ):
+        for chunk in r.iter_content(chunk_size=1024 * 1024):
+            # chunk size of 1024 * 1024 needs 9min 11 sek = 551sek
+            # chunk size of 1024 needs 9min 11 sek as well
+            if chunk:
+                zfile.write(chunk)
+                zfile.flush()
+            bar.update()
+            # if the rate falls below 100 kB/s -> prompt warning
+            if bar.format_dict["rate"] and bar.format_dict["rate"] < 2:
+                bar.set_postfix_str(s=warning_message)
+            else:
+                # remove warning
+                bar.set_postfix_str(s="")

From 118f75070aa7446de6163f036b6e34cf7a05ee20 Mon Sep 17 00:00:00 2001
From: Florian Kotthoff <florian.kotthoff@posteo.de>
Date: Mon, 21 Jul 2025 09:20:14 +0200
Subject: [PATCH 14/53] Change print statements #616

---
 open_mastr/mastr.py                            | 6 +++---
 open_mastr/xml_download/utils_download_bulk.py | 5 +----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index ada4311e..ba0cd86e 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -240,9 +240,9 @@ def download(
             download_xml_Mastr(zipped_xml_file_path, date, data, xml_folder_path)
 
             print(
-                f"\nWould you like to speed up the bulk download?\n"
-                f"Try our new parallelized processing by setting os.environ['USE_RECOMMENDED_NUMBER_OF_PROCESSES'] = True "
-                f"or configure your own number of processes via os.environ['NUMBER_OF_PROCESSES'] = your_number\n"
+                "\nWould you like to speed up the creation of your MaStR database?\n"
+                "Try our new parallelized processing by setting os.environ['USE_RECOMMENDED_NUMBER_OF_PROCESSES'] = True "
+                "or configure your own number of processes via os.environ['NUMBER_OF_PROCESSES'] = your_number\n"
             )
 
             write_mastr_xml_to_database(
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 202f2b32..fcc604f4 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -126,10 +126,7 @@ def download_xml_Mastr(
         The path where the downloaded MaStR zipped folder will be saved.
     """
 
-    print_message = (
-        "Download has started, this can take several minutes."
-        "The download bar is only a rough estimate."
-    )
+    print_message = "Starting the Download from marktstammdatenregister.de."
     warning_message = (
         "Warning: The servers from MaStR restrict the download speed."
         " You may want to download it another time."

From c8177fb92eaa6b52a6647bfd6175cdec362e62b1 Mon Sep 17 00:00:00 2001
From: Florian Kotthoff <florian.kotthoff@posteo.de>
Date: Mon, 21 Jul 2025 09:46:02 +0200
Subject: [PATCH 15/53] Create test function for delete_xl_files #616

---
 .../xml_download/test_utils_download_bulk.py  | 31 ++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/tests/xml_download/test_utils_download_bulk.py b/tests/xml_download/test_utils_download_bulk.py
index e1f60bb0..8f650933 100644
--- a/tests/xml_download/test_utils_download_bulk.py
+++ b/tests/xml_download/test_utils_download_bulk.py
@@ -1,5 +1,10 @@
 import time
-from open_mastr.xml_download.utils_download_bulk import gen_url
+from open_mastr.xml_download.utils_download_bulk import (
+    gen_url,
+    delete_xml_files_not_from_given_date,
+)
+import os
+import shutil
 
 
 def test_gen_url():
@@ -84,3 +89,27 @@ def test_gen_url():
         url
         == "https://download.marktstammdatenregister.de/Gesamtdatenexport_20240402_24.2.zip"
     )
+
+
+def test_delete_xml_files_not_from_given_date():
+    xml_folder_path = os.path.join("tests", "test_utils_download")
+    expected_file = os.path.join(xml_folder_path, "20250102.txt")
+    os.makedirs(xml_folder_path)
+
+    # Case where expected file exists
+    open(expected_file, "w").close()
+    delete_xml_files_not_from_given_date(
+        save_path=expected_file, xml_folder_path=xml_folder_path
+    )
+    assert os.path.exists(expected_file)
+    os.remove(expected_file)
+
+    # Case where old date is deleted
+    path_old_file = os.path.join(xml_folder_path, "20250101.txt")
+    open(path_old_file, "w").close()
+    delete_xml_files_not_from_given_date(
+        save_path=expected_file, xml_folder_path=xml_folder_path
+    )
+    assert not os.path.exists(path_old_file)
+    # clean up test folder
+    shutil.rmtree(xml_folder_path)

From e219fa1dfbb98c094af6ce1954601a1a40d91ccc Mon Sep 17 00:00:00 2001
From: Florian Kotthoff <florian.kotthoff@posteo.de>
Date: Mon, 21 Jul 2025 10:24:53 +0200
Subject: [PATCH 16/53] Add test for partial download #616

---
 tests/test_mastr.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tests/test_mastr.py b/tests/test_mastr.py
index 9fe8883b..df4a3a20 100644
--- a/tests/test_mastr.py
+++ b/tests/test_mastr.py
@@ -14,11 +14,6 @@
             _xml_file_exists = True
 
 
-@pytest.fixture
-def db():
-    return Mastr()
-
-
 @pytest.fixture
 def db_path():
     return os.path.join(
@@ -26,6 +21,11 @@ def db_path():
     )
 
 
+@pytest.fixture
+def db(db_path):
+    return Mastr(engine=sqlalchemy.create_engine(f"sqlite:///{db_path}"))
+
+
 @pytest.fixture
 def db_translated(db_path):
     engine = sqlalchemy.create_engine(f"sqlite:///{db_path}")
@@ -71,3 +71,14 @@ def test_Mastr_translate(db_translated, db_path):
 
     for table in table_names:
         assert pd.read_sql(sql=table, con=db_empty.engine).shape[0] == 0
+
+
+def test_mastr_download(db):
+    db.download(data="wind")
+    df_wind = pd.read_sql("wind_extended", con=db.engine)
+    assert len(df_wind) > 10000
+
+    db.download(data="biomass")
+    df_biomass = pd.read_sql("biomass_extended", con=db.engine)
+    assert len(df_wind) > 10000
+    assert len(df_biomass) > 10000

From 566c75fc5faf781d3fa405d417e152dea2caa612 Mon Sep 17 00:00:00 2001
From: Florian Kotthoff <florian.kotthoff@posteo.de>
Date: Mon, 21 Jul 2025 10:31:12 +0200
Subject: [PATCH 17/53] Remove "cleansing" from print statements #644

---
 open_mastr/xml_download/utils_write_to_database.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/open_mastr/xml_download/utils_write_to_database.py b/open_mastr/xml_download/utils_write_to_database.py
index 4b220909..11bbe015 100644
--- a/open_mastr/xml_download/utils_write_to_database.py
+++ b/open_mastr/xml_download/utils_write_to_database.py
@@ -28,7 +28,7 @@ def write_mastr_xml_to_database(
     bulk_download_date: str,
 ) -> None:
     """Write the Mastr in xml format into a database defined by the engine parameter."""
-    print("Starting bulk download and data cleansing...")
+    print("Starting bulk download...")
 
     include_tables = data_to_include_tables(data, mapping="write_xml")
     threads_data = []
@@ -71,7 +71,7 @@ def write_mastr_xml_to_database(
         for item in interleaved_files:
             process_xml_file(*item)
 
-    print("Bulk download and data cleansing were successful.")
+    print("Bulk download was successful.")
 
 
 def get_number_of_processes():

From 7251b3385ed67ec1c1dcafba7c774530fe6e2357 Mon Sep 17 00:00:00 2001
From: Florian Kotthoff <florian.kotthoff@posteo.de>
Date: Mon, 21 Jul 2025 10:33:00 +0200
Subject: [PATCH 18/53] Update Changelog #644

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 53591623..e06dc604 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
 ### Changed
 - Fix package publication workflow
   [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)
+- Change print statement about data cleansing
+  [#650](https://github.com/OpenEnergyPlatform/open-MaStR/pull/650)
 ### Removed
 
 

From 99af202957edae5287f78c443f194cbc2d2ec17d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Wed, 23 Jul 2025 11:21:34 +0200
Subject: [PATCH 19/53] Add test for delete_zip_file_if_corrupted #616

---
 tests/test_helpers.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 4a19f4fb..7779a9c8 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -7,6 +7,7 @@
 from datetime import datetime
 import pandas as pd
 from open_mastr import Mastr
+from zipfile import ZipFile
 
 from open_mastr.utils import orm
 from open_mastr.utils.constants import (
@@ -25,6 +26,7 @@
     create_db_query,
     db_query_to_csv,
     reverse_unit_type_map,
+    delete_zip_file_if_corrupted,
 )
 
 
@@ -398,6 +400,18 @@ def test_db_query_to_csv(tmpdir, engine):
     os.rmdir(get_data_version_dir())
 
 
+def test_delete_zip_file_if_corrupted():
+    test_zip_path = os.path.join("tests", "test.zip")
+    with ZipFile(test_zip_path, "w") as zf:
+        zf.writestr(os.path.join("tests", "file.txt"), "Hello, world!")
+    with open(test_zip_path, "wb+") as f:
+        f.seek(10)
+        f.write(b"\xff\xff\xff\xff")
+
+    delete_zip_file_if_corrupted(test_zip_path)
+    assert not os.path.exists(test_zip_path)
+
+
 def test_save_metadata():
     # FIXME: implement in #386
     pass

From 797486502734bd7602793f302593cbd17a2c2be8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Thu, 31 Jul 2025 11:05:12 +0200
Subject: [PATCH 20/53] =?UTF-8?q?Add=20Kevin=20Kr=C3=A4mer=20to=20CITATION?=
 =?UTF-8?q?.cff?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CITATION.cff | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CITATION.cff b/CITATION.cff
index d2fe6752..99458ea3 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -34,6 +34,10 @@ authors:
     given-names: "Alexandra-Andreea"
     alias: "@AlexandraImbrisca"
     affiliation: "Technical University of Munich"
+  - family-names: 'Krämer'
+    given-names: "Kevin"
+    alias: "pt-kkraemer"
+    affiliation: "ProjectTogether gGmbH"  
 title: "open-MaStR"
 type: software
 license: AGPL-3.0

From e3927147b31edbabfc89e6653f09241efa36213a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Thu, 31 Jul 2025 11:13:23 +0200
Subject: [PATCH 21/53] Add PR to CHANGELOG #616

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 53591623..8929b086 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
 
 ## [v0.XX.X] unreleased - 202X-XX-XX
 ### Added
+- Add partial bulk download
+  [#652](https://github.com/OpenEnergyPlatform/open-MaStR/pull/652)
 ### Changed
 - Fix package publication workflow
   [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)

From ed3f249278080c5a8b2507dd91bff744c7fa61e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Thu, 31 Jul 2025 14:01:21 +0200
Subject: [PATCH 22/53] Add "Einheittyp" to system_catalog #651

---
 open_mastr/xml_download/colums_to_replace.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/open_mastr/xml_download/colums_to_replace.py b/open_mastr/xml_download/colums_to_replace.py
index 8e6ead17..421ac44c 100644
--- a/open_mastr/xml_download/colums_to_replace.py
+++ b/open_mastr/xml_download/colums_to_replace.py
@@ -23,6 +23,20 @@
         3: "Gaserzeugungslokation",
         4: "Gasverbrauchslokation",
     },
+    "Einheittyp": {
+        1: "Solareinheit",
+        2: "Windeinheit",
+        3: "Biomasse",
+        4: "Wasser",
+        5: "Geothermie",
+        6: "Verbrennung",
+        7: "Kernenergie",
+        8: "Stromspeichereinheit",
+        9: "Stromverbrauchseinheit",
+        10: "Gasverbrauchseinheit",
+        11: "Gaserzeugungseinheit",
+        12: "Gasspeichereinheit",
+    },
 }
 
 # columns to replace lists all columns where the entries have

From 09fc84e5bb365a6bd38923ca99e9e62a429b99af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Thu, 31 Jul 2025 14:10:45 +0200
Subject: [PATCH 23/53] Add PR to changelog #651

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 53591623..5d2ecb47 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
 ## [v0.XX.X] unreleased - 202X-XX-XX
 ### Added
 ### Changed
+- Updates the system_catalog dict with missing Einheittyp values
+  [#653](https://github.com/OpenEnergyPlatform/open-MaStR/pull/653)
 - Fix package publication workflow
   [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)
 ### Removed

From 49e6c42e07b62ef49e68880fb2d9626f3098e52f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kevin=20Kr=C3=A4mer?= <kkraemer@projecttogether.org>
Date: Mon, 18 Aug 2025 12:24:56 +0200
Subject: [PATCH 24/53] Update docstring gescription of partial download when
 using "data" #616

---
 open_mastr/mastr.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index ba0cd86e..fd71fc32 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -126,8 +126,8 @@ def download(
             from marktstammdatenregister.de,
             (see :ref:`Configuration <Configuration>`). Default to 'bulk'.
         data : str or list or None, optional
-            Determines which types of data are written to the database. If None, all data is
-            used. If it is a list, possible entries are listed below with respect to the download method. Missing categories are
+            Determines which data is partially downloaded from the bulk download and written to the database. If None, all data is downloaded and written to the database.
+            If it is a list, possible entries are listed below with respect to the download method. Missing categories are
             being developed. If only one data is of interest, this can be given as a string. Default to None, where all data is included.
 
             | Data                  | Bulk | API  |
@@ -157,7 +157,7 @@ def download(
             |-----------------------|------|------|
             | "today"                | latest files are downloaded from marktstammdatenregister.de  | -  |
             | "20230101"      | If file from this date exists locally, it is used. Otherwise it throws an error (You can only receive todays data from the server)  | -   |
-            | "existing"               | Deprecated since 0.16, see [#616](https://github.com/OpenEnergyPlatform/open-MaStR/issues/616#issuecomment-3089377062)mkdo  | -  |
+            | "existing"               | Deprecated since 0.16, see [#616](https://github.com/OpenEnergyPlatform/open-MaStR/issues/616#issuecomment-3089377062)  | -  |
             | "latest"               | -  | Retrieve data that is newer than the newest data already in the table  |
             | datetime.datetime(2020, 11, 27)      | -  | Retrieve data that is newer than this time stamp   |
             | None      | set date="today"  | set date="latest"   |

From 258724a5b9960c2572916fc421840d7afc29bdbf Mon Sep 17 00:00:00 2001
From: FlorianK13 <florian.kotthoff@posteo.de>
Date: Tue, 19 Aug 2025 13:51:26 +0200
Subject: [PATCH 25/53] Delete unused print message #616

---
 open_mastr/xml_download/utils_download_bulk.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index fcc604f4..10f60e3d 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -127,12 +127,9 @@ def download_xml_Mastr(
     """
 
     print_message = "Starting the Download from marktstammdatenregister.de."
-    warning_message = (
-        "Warning: The servers from MaStR restrict the download speed."
-        " You may want to download it another time."
-    )
     print(print_message)
 
+    # TODO this should take bulk_date_string
     now = time.localtime()
     url = gen_url(now)
 

From a2e3dc7d9f090079d1138540da15d618b38214f5 Mon Sep 17 00:00:00 2001
From: FlorianK13 <florian.kotthoff@posteo.de>
Date: Tue, 19 Aug 2025 15:19:03 +0200
Subject: [PATCH 26/53] Extend docs #616

---
 docs/getting_started.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index 5a3dd671..891efbbe 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -35,7 +35,16 @@ db = Mastr()
 db.download()
 ```
 
-When a `Mastr` object is initialized, a sqlite database is created in `$HOME/.open-MaStR/data/sqlite`. With the function `Mastr.download()`, the **whole MaStR is downloaded** in the zipped xml file format. It is then read into the sqlite database and simple data cleansing functions are started.
+When a `Mastr` object is initialized, a sqlite database is created in `$HOME/.open-MaStR/data/sqlite`. With the function [`Mastr.download()`][open_mastr.Mastr.download], the **whole MaStR is downloaded** in the zipped xml file format. It is then read into the sqlite database and simple data cleansing functions are started.
+
+If you are interested in a specific part of the dataset, you can specify this by using the `data` parameter:
+
+```python
+from open_mastr import Mastr
+
+db = Mastr()
+db.download(data=["wind","hydro"])
+```
 
 More detailed information can be found in the section [bulk download](advanced.md#bulk-download).
 

From d603388f092499fea63761d7f6e5712bc7017a2e Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 14:06:03 +0200
Subject: [PATCH 27/53] Replace print() statements by logging #657

---
 open_mastr/mastr.py                           | 18 +++++++------
 open_mastr/soap_api/metadata/description.py   | 11 +++++---
 open_mastr/utils/helpers.py                   | 25 +++++++-----------
 .../xml_download/utils_download_bulk.py       | 13 +++++-----
 .../xml_download/utils_write_to_database.py   | 26 ++++++++++---------
 5 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index fd71fc32..f5cffd7d 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -92,7 +92,7 @@ def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None:
         else:
             self.engine = create_database_engine(engine, self._sqlite_folder_path)
 
-        print(
+        log.info(
             f"Data will be written to the following database: {self.engine.url}\n"
             "If you run into problems, try to "
             "delete the database and update the package by running "
@@ -239,7 +239,7 @@ def download(
 
             download_xml_Mastr(zipped_xml_file_path, date, data, xml_folder_path)
 
-            print(
+            log.info(
                 "\nWould you like to speed up the creation of your MaStR database?\n"
                 "Try our new parallelized processing by setting os.environ['USE_RECOMMENDED_NUMBER_OF_PROCESSES'] = True "
                 "or configure your own number of processes via os.environ['NUMBER_OF_PROCESSES'] = your_number\n"
@@ -259,8 +259,8 @@ def download(
             # Set api_processes to None in order to avoid the malfunctioning usage
             if api_processes:
                 api_processes = None
-                print(
-                    "Warning: The implementation of parallel processes "
+                log.warning(
+                    "The implementation of parallel processes "
                     "is currently under construction. Please let "
                     "the argument api_processes at the default value None."
                 )
@@ -429,9 +429,11 @@ def translate(self) -> None:
             try:
                 os.remove(new_path)
             except Exception as e:
-                print(f"An error occurred: {e}")
+                log.error(
+                    f"An error occurred while removing old translated database: {e}"
+                )
 
-            print("Replacing previous version of the translated database...")
+            log.info("Replacing previous version of the translated database...")
 
         for table in inspector.get_table_names():
             rename_table(table, inspector.get_columns(table), self.engine)
@@ -440,9 +442,9 @@ def translate(self) -> None:
 
         try:
             os.rename(old_path, new_path)
-            print(f"Database '{old_path}' changed to '{new_path}'")
+            log.info(f"Database '{old_path}' changed to '{new_path}'")
         except Exception as e:
-            print(f"An error occurred: {e}")
+            log.error(f"An error occurred while renaming database: {e}")
 
         self.engine = create_engine(f"sqlite:///{new_path}")
         self.is_translated = True
diff --git a/open_mastr/soap_api/metadata/description.py b/open_mastr/soap_api/metadata/description.py
index a4986959..728aec23 100644
--- a/open_mastr/soap_api/metadata/description.py
+++ b/open_mastr/soap_api/metadata/description.py
@@ -1,10 +1,13 @@
 from io import BytesIO
+import logging
 import re
 from urllib.request import urlopen
 from zipfile import ZipFile
 import xmltodict
 from collections import OrderedDict
 
+log = logging.getLogger(__name__)
+
 
 class DataDescription(object):
     """
@@ -150,9 +153,11 @@ def functions_data_documentation(self):
                             fcn["sequence"]["element"]["@type"].split(":")[1]
                         ]["sequence"]["element"]
                 else:
-                    print(type(fcn["sequence"]))
-                    print(fcn["sequence"])
-                    raise ValueError
+                    log.error(f"Unexpected sequence type: {type(fcn['sequence'])}")
+                    log.error(f"Sequence content: {fcn['sequence']}")
+                    raise ValueError(
+                        f"Unexpected sequence structure in function metadata"
+                    )
 
                 # Add data for inherited columns from base types
                 if "@base" in fcn:
diff --git a/open_mastr/utils/helpers.py b/open_mastr/utils/helpers.py
index a4a1f525..9ac2492b 100644
--- a/open_mastr/utils/helpers.py
+++ b/open_mastr/utils/helpers.py
@@ -343,37 +343,32 @@ def print_api_settings(
     api_processes,
     api_location_types,
 ):
-    print(
+    log.info(
         f"Downloading with soap_API.\n\n   -- API settings --  \nunits after date: "
         f"{date}\nunit download limit per data: "
         f"{api_limit}\nparallel_processes: {api_processes}\nchunksize: "
         f"{api_chunksize}\ndata_api: {data}"
     )
     if "permit" in harmonisation_log:
-        print(
-            f"data_types: {api_data_types}\033[31m",
+        log.warning(
+            f"data_types: {api_data_types} - "
             "Attention, 'permit_data' was automatically set in api_data_types, "
-            "as you defined 'permit' in parameter data_api.",
-            "\033[m",
+            "as you defined 'permit' in parameter data_api."
         )
 
     else:
-        print(f"data_types: {api_data_types}")
+        log.info(f"data_types: {api_data_types}")
 
     if "location" in harmonisation_log:
-        print(
-            "location_types:",
-            "\033[31m",
-            "Attention, 'location' is in parameter data. location_types are set to",
-            "\033[m",
-            f"{api_location_types}"
-            "\n                 If you want to change location_types, please remove 'location' "
+        log.warning(
+            f"location_types: {api_location_types} - "
+            "Attention, 'location' is in parameter data. location_types are set accordingly. "
+            "If you want to change location_types, please remove 'location' "
             "from data_api and specify api_location_types."
-            "\n   ------------------  \n",
         )
 
     else:
-        print(
+        log.info(
             f"location_types: {api_location_types}",
             "\n   ------------------  \n",
         )
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 10f60e3d..785d2a3d 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -126,8 +126,7 @@ def download_xml_Mastr(
         The path where the downloaded MaStR zipped folder will be saved.
     """
 
-    print_message = "Starting the Download from marktstammdatenregister.de."
-    print(print_message)
+    log.info("Starting the Download from marktstammdatenregister.de.")
 
     # TODO this should take bulk_date_string
     now = time.localtime()
@@ -171,8 +170,10 @@ def download_xml_Mastr(
             full_download_without_unzip_http(save_path, r)
 
     time_b = time.perf_counter()
-    print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
-    print(f"MaStR was successfully downloaded to {xml_folder_path}.")
+    log.info(
+        f"Download is finished. It took {int(np.around(time_b - time_a))} seconds."
+    )
+    log.info(f"MaStR was successfully downloaded to {xml_folder_path}.")
 
 
 def check_download_completeness(
@@ -216,9 +217,9 @@ def partial_download_with_unzip_http(save_path: str, url: str, bulk_data_list: l
             save_path, bulk_data_list
         )
         if bool(bulk_data_list):
-            print(f"MaStR is missing the following data: {bulk_data_list}")
+            log.info(f"MaStR is missing the following data: {bulk_data_list}")
         else:
-            print("MaStR already downloaded.")
+            log.info("MaStR already downloaded.")
             return None
 
     remote_zip_file = unzip_http.RemoteZipFile(url)
diff --git a/open_mastr/xml_download/utils_write_to_database.py b/open_mastr/xml_download/utils_write_to_database.py
index 11bbe015..e71abc18 100644
--- a/open_mastr/xml_download/utils_write_to_database.py
+++ b/open_mastr/xml_download/utils_write_to_database.py
@@ -19,6 +19,8 @@
 from open_mastr.utils.orm import tablename_mapping
 from open_mastr.xml_download.utils_cleansing_bulk import cleanse_bulk_data
 
+log = setup_logger()
+
 
 def write_mastr_xml_to_database(
     engine: sqlalchemy.engine.Engine,
@@ -28,7 +30,7 @@ def write_mastr_xml_to_database(
     bulk_download_date: str,
 ) -> None:
     """Write the Mastr in xml format into a database defined by the engine parameter."""
-    print("Starting bulk download...")
+    log.info("Starting bulk download...")
 
     include_tables = data_to_include_tables(data, mapping="write_xml")
     threads_data = []
@@ -71,7 +73,7 @@ def write_mastr_xml_to_database(
         for item in interleaved_files:
             process_xml_file(*item)
 
-    print("Bulk download was successful.")
+    log.info("Bulk download was successful.")
 
 
 def get_number_of_processes():
@@ -82,11 +84,11 @@ def get_number_of_processes():
         try:
             number_of_processes = int(os.environ.get("NUMBER_OF_PROCESSES"))
         except ValueError:
-            print("Warning: Invalid value for NUMBER_OF_PROCESSES. Fallback to 1.")
+            log.warning("Invalid value for NUMBER_OF_PROCESSES. Fallback to 1.")
             return 1
         if number_of_processes >= cpu_count():
-            print(
-                f"Warning: Your system supports {cpu_count()} CPUs. Using "
+            log.warning(
+                f"Your system supports {cpu_count()} CPUs. Using "
                 f"more processes than available CPUs may cause excessive "
                 f"context-switching overhead."
             )
@@ -118,9 +120,9 @@ def process_xml_file(
         # The connection url obfuscates the password. We must replace the masked password with the actual password.
         engine = create_efficient_engine(connection_url)
         with ZipFile(zipped_xml_file_path, "r") as f:
-            print(f"Processing file '{file_name}'...")
+            log.info(f"Processing file '{file_name}'...")
             if is_first_file(file_name):
-                print(f"Creating table '{sql_table_name}'...")
+                log.info(f"Creating table '{sql_table_name}'...")
                 create_database_table(engine, xml_table_name)
             df = read_xml_file(f, file_name)
             df = process_table_before_insertion(
@@ -137,7 +139,7 @@ def process_xml_file(
                     df, xml_table_name, sql_table_name, engine
                 )
     except Exception as e:
-        print(f"Error processing file '{file_name}': '{e}'")
+        log.error(f"Error processing file '{file_name}': '{e}'")
 
 
 def create_efficient_engine(connection_url: str) -> sqlalchemy.engine.Engine:
@@ -224,7 +226,7 @@ def is_table_relevant(xml_table_name: str, include_tables: list) -> bool:
             tablename_mapping[xml_table_name]["__class__"] is not None
         )
     except KeyError:
-        print(
+        log.warning(
             f"Table '{xml_table_name}' is not supported by your open-mastr version and "
             f"will be skipped."
         )
@@ -451,7 +453,7 @@ def write_single_entries_until_not_unique_comes_up(
         labels=key_list, errors="ignore"
     )  # drop primary keys that already exist in the table
     df = df.reset_index()
-    print(f"{len_df_before - len(df)} entries already existed in the database.")
+    log.warning(f"{len_df_before - len(df)} entries already existed in the database.")
 
     return df
 
@@ -509,7 +511,7 @@ def add_missing_columns_to_table(
 
 def delete_wrong_xml_entry(err: Error, df: pd.DataFrame) -> pd.DataFrame:
     delete_entry = str(err).split("«")[0].split("»")[1]
-    print(f"The entry {delete_entry} was deleted due to its false data type.")
+    log.warning(f"The entry {delete_entry} was deleted due to its false data type.")
     return df.replace(delete_entry, np.nan)
 
 
@@ -548,7 +550,7 @@ def find_nearest_brackets(xml_string: str, position: int) -> tuple[int, int]:
             row_with_error[: left_bracket + 1] + row_with_error[right_bracket:]
         )
         try:
-            print("One invalid xml expression was deleted.")
+            log.warning("One invalid xml expression was deleted.")
             df = pd.read_xml(StringIO("\n".join(data)))
             return df
         except lxml.etree.XMLSyntaxError as e:

From a9081addd6abd3eee4aadcf3e9724846785a0284 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 14:23:17 +0200
Subject: [PATCH 28/53] Update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 725f1ce6..9ef592cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
   [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)
 - Change print statement about data cleansing
   [#650](https://github.com/OpenEnergyPlatform/open-MaStR/pull/650)
+- Improve logging
+  [#666](https://github.com/OpenEnergyPlatform/open-MaStR/pull/666)
 ### Removed
 
 

From ed96ecbcbbb02b185cd7e84a1c2f543acf6f3f0d Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 14:35:47 +0200
Subject: [PATCH 29/53] Replace print() statements by logging in unzip_http.py
 and apply black #657

---
 open_mastr/utils/unzip_http.py | 245 +++++++++++++++++++++------------
 1 file changed, 157 insertions(+), 88 deletions(-)

diff --git a/open_mastr/utils/unzip_http.py b/open_mastr/utils/unzip_http.py
index ef4140f8..0674e130 100644
--- a/open_mastr/utils/unzip_http.py
+++ b/open_mastr/utils/unzip_http.py
@@ -58,32 +58,39 @@
 import pathlib
 import urllib.parse
 import zipfile
+import logging
 
+log = logging.getLogger(__name__)
 
-__version__ = '0.6'
+__version__ = "0.6"
 
 
 def error(s):
     raise Exception(s)
 
+
 def warning(s):
-    print(s, file=sys.stderr)
+    log.warning(s)
+
 
-def get_bits(val:int, *args):
-    'Generate bitfields (one for each arg) from LSB to MSB.'
+def get_bits(val: int, *args):
+    "Generate bitfields (one for each arg) from LSB to MSB."
     for n in args:
-        x = val & (2**n-1)
+        x = val & (2**n - 1)
         val >>= n
         yield x
 
 
 class RemoteZipInfo:
-    def __init__(self, filename:str='',
-                       date_time:int = 0,
-                       header_offset:int = 0,
-                       compress_type:int = 0,
-                       compress_size:int = 0,
-                       file_size:int = 0):
+    def __init__(
+        self,
+        filename: str = "",
+        date_time: int = 0,
+        header_offset: int = 0,
+        compress_type: int = 0,
+        compress_size: int = 0,
+        file_size: int = 0,
+    ):
         self.filename = filename
         self.header_offset = header_offset
         self.compress_type = compress_type
@@ -91,46 +98,51 @@ def __init__(self, filename:str='',
         self.file_size = file_size
 
         sec, mins, hour, day, mon, year = get_bits(date_time, 5, 6, 5, 5, 4, 7)
-        self.date_time = (year+1980, mon, day, hour, mins, sec)
+        self.date_time = (year + 1980, mon, day, hour, mins, sec)
 
     def is_dir(self):
-        return self.filename.endswith('/')
+        return self.filename.endswith("/")
 
     def parse_extra(self, extra):
         i = 0
         while i < len(extra):
-            fieldid, fieldsz = struct.unpack_from('<HH', extra, i)
+            fieldid, fieldsz = struct.unpack_from("<HH", extra, i)
             i += 4
 
             if fieldid == 0x0001:  # ZIP64
-                if fieldsz == 8: fmt = '<Q'
-                elif fieldsz == 16: fmt = '<QQ'
-                elif fieldsz == 24: fmt = '<QQQ'
-                elif fieldsz == 28: fmt = '<QQQI'
+                if fieldsz == 8:
+                    fmt = "<Q"
+                elif fieldsz == 16:
+                    fmt = "<QQ"
+                elif fieldsz == 24:
+                    fmt = "<QQQ"
+                elif fieldsz == 28:
+                    fmt = "<QQQI"
 
                 vals = list(struct.unpack_from(fmt, extra, i))
-                if self.file_size == 0xffffffff:
+                if self.file_size == 0xFFFFFFFF:
                     self.file_size = vals.pop(0)
 
-                if self.compress_size == 0xffffffff:
+                if self.compress_size == 0xFFFFFFFF:
                     self.compress_size = vals.pop(0)
 
-                if self.header_offset == 0xffffffff:
+                if self.header_offset == 0xFFFFFFFF:
                     self.header_offset = vals.pop(0)
 
             i += fieldsz
 
 
 class RemoteZipFile:
-    fmt_eocd = '<IHHHHIIH'  # end of central directory
-    fmt_eocd64 = '<IQHHIIQQQQ'  # end of central directory ZIP64
-    fmt_cdirentry = '<IHHHHIIIIHHHHHII'  # central directory entry
-    fmt_localhdr = '<IHHHIIIIHH'  # local directory header
-    magic_eocd64 = b'\x50\x4b\x06\x06'
-    magic_eocd = b'\x50\x4b\x05\x06'
+    fmt_eocd = "<IHHHHIIH"  # end of central directory
+    fmt_eocd64 = "<IQHHIIQQQQ"  # end of central directory ZIP64
+    fmt_cdirentry = "<IHHHHIIIIHHHHHII"  # central directory entry
+    fmt_localhdr = "<IHHHIIIIHH"  # local directory header
+    magic_eocd64 = b"\x50\x4b\x06\x06"
+    magic_eocd = b"\x50\x4b\x05\x06"
 
     def __init__(self, url):
         import urllib3
+
         self.url = url
         self.http = urllib3.PoolManager()
         self.zip_size = 0
@@ -143,8 +155,8 @@ def __exit__(self, a, b, c):
 
     @property
     def files(self):
-        if not hasattr(self, '_files'):
-            self._files = {r.filename:r for r in self.infoiter()}
+        if not hasattr(self, "_files"):
+            self._files = {r.filename: r for r in self.infoiter()}
         return self._files
 
     def infolist(self):
@@ -154,32 +166,48 @@ def namelist(self):
         return list(r.filename for r in self.infoiter())
 
     def infoiter(self):
-        resp = self.http.request('HEAD', self.url)
-        r = resp.headers.get('Accept-Ranges', '')
-        if r != 'bytes':
+        resp = self.http.request("HEAD", self.url)
+        r = resp.headers.get("Accept-Ranges", "")
+        if r != "bytes":
             hostname = urllib.parse.urlparse(self.url).netloc
-            warning(f"{hostname} Accept-Ranges header ('{r}') is not 'bytes'--trying anyway")
+            warning(
+                f"{hostname} Accept-Ranges header ('{r}') is not 'bytes'--trying anyway"
+            )
 
-        self.zip_size = int(resp.headers['Content-Length'])
-        resp = self.get_range(
-            max(self.zip_size-65536, 0),
-            65536
-        )
+        self.zip_size = int(resp.headers["Content-Length"])
+        resp = self.get_range(max(self.zip_size - 65536, 0), 65536)
 
         cdir_start = -1
         i = resp.data.rfind(self.magic_eocd64)
         if i >= 0:
-            magic, eocd_sz, create_ver, min_ver, disk_num, disk_start, disk_num_records, total_num_records, \
-                cdir_bytes, cdir_start = struct.unpack_from(self.fmt_eocd64, resp.data, offset=i)
+            (
+                magic,
+                eocd_sz,
+                create_ver,
+                min_ver,
+                disk_num,
+                disk_start,
+                disk_num_records,
+                total_num_records,
+                cdir_bytes,
+                cdir_start,
+            ) = struct.unpack_from(self.fmt_eocd64, resp.data, offset=i)
         else:
             i = resp.data.rfind(self.magic_eocd)
             if i >= 0:
-                magic, \
-                    disk_num, disk_start, disk_num_records, total_num_records, \
-                    cdir_bytes, cdir_start, comment_len = struct.unpack_from(self.fmt_eocd, resp.data, offset=i)
+                (
+                    magic,
+                    disk_num,
+                    disk_start,
+                    disk_num_records,
+                    total_num_records,
+                    cdir_bytes,
+                    cdir_start,
+                    comment_len,
+                ) = struct.unpack_from(self.fmt_eocd, resp.data, offset=i)
 
         if cdir_start < 0 or cdir_start >= self.zip_size:
-            error('cannot find central directory')
+            error("cannot find central directory")
 
         if self.zip_size <= 65536:
             filehdr_index = cdir_start
@@ -194,67 +222,91 @@ def infoiter(self):
         while filehdr_index < cdir_end:
             sizeof_cdirentry = struct.calcsize(self.fmt_cdirentry)
 
-            magic, ver, ver_needed, flags, method, date_time, crc, \
-                complen, uncomplen, fnlen, extralen, commentlen, \
-                disknum_start, internal_attr, external_attr, local_header_ofs = \
-                    struct.unpack_from(self.fmt_cdirentry, resp.data, offset=filehdr_index)
+            (
+                magic,
+                ver,
+                ver_needed,
+                flags,
+                method,
+                date_time,
+                crc,
+                complen,
+                uncomplen,
+                fnlen,
+                extralen,
+                commentlen,
+                disknum_start,
+                internal_attr,
+                external_attr,
+                local_header_ofs,
+            ) = struct.unpack_from(self.fmt_cdirentry, resp.data, offset=filehdr_index)
 
             filehdr_index += sizeof_cdirentry
 
-            filename = resp.data[filehdr_index:filehdr_index+fnlen]
+            filename = resp.data[filehdr_index : filehdr_index + fnlen]
             filehdr_index += fnlen
 
-            extra = resp.data[filehdr_index:filehdr_index+extralen]
+            extra = resp.data[filehdr_index : filehdr_index + extralen]
             filehdr_index += extralen
 
             # comment = resp.data[filehdr_index:filehdr_index+commentlen]
             filehdr_index += commentlen
 
-            rzi = RemoteZipInfo(filename.decode(), date_time, local_header_ofs, method, complen, uncomplen)
+            rzi = RemoteZipInfo(
+                filename.decode(),
+                date_time,
+                local_header_ofs,
+                method,
+                complen,
+                uncomplen,
+            )
 
             rzi.parse_extra(extra)
             yield rzi
 
     def extract(self, member, path=None, pwd=None):
-            if pwd:
-                raise NotImplementedError('Passwords not supported yet')
+        if pwd:
+            raise NotImplementedError("Passwords not supported yet")
 
-            path = path or pathlib.Path('.')
+        path = path or pathlib.Path(".")
 
-            outpath = path/member
-            os.makedirs(outpath.parent, exist_ok=True)
-            with self.open(member) as fpin:
-                with open(path/member, mode='wb') as fpout:
-                    while True:
-                        r = fpin.read(65536)
-                        if not r:
-                            break
-                        fpout.write(r)
+        outpath = path / member
+        os.makedirs(outpath.parent, exist_ok=True)
+        with self.open(member) as fpin:
+            with open(path / member, mode="wb") as fpout:
+                while True:
+                    r = fpin.read(65536)
+                    if not r:
+                        break
+                    fpout.write(r)
 
-    
     def extractzip(self, member, path=None, pwd=None):
         if pwd:
-            raise NotImplementedError('Passwords not supported yet')
+            raise NotImplementedError("Passwords not supported yet")
 
-        path = path or pathlib.Path('.')
+        path = path or pathlib.Path(".")
         outpath = path
         os.makedirs(outpath.parent, exist_ok=True)
         with self.open(member) as fpin:
-            with zipfile.ZipFile(outpath, 'a', zipfile.ZIP_DEFLATED) as zout:
-                with zout.open(member,'w') as fpout:
+            with zipfile.ZipFile(outpath, "a", zipfile.ZIP_DEFLATED) as zout:
+                with zout.open(member, "w") as fpout:
                     while True:
                         r = fpin.read(65536)
                         if not r:
                             break
                         fpout.write(r)
 
-
     def extractall(self, path=None, members=None, pwd=None):
         for fn in members or self.namelist():
             self.extract(fn, path, pwd=pwd)
 
     def get_range(self, start, n):
-        return self.http.request('GET', self.url, headers={'Range': f'bytes={start}-{start+n-1}'}, preload_content=False)
+        return self.http.request(
+            "GET",
+            self.url,
+            headers={"Range": f"bytes={start}-{start+n-1}"},
+            preload_content=False,
+        )
 
     def matching_files(self, *globs):
         for f in self.files.values():
@@ -265,7 +317,7 @@ def open(self, fn):
         if isinstance(fn, str):
             f = list(self.matching_files(fn))
             if not f:
-                error(f'no files matching {fn}')
+                error(f"no files matching {fn}")
             f = f[0]
         else:
             f = fn
@@ -273,14 +325,29 @@ def open(self, fn):
         sizeof_localhdr = struct.calcsize(self.fmt_localhdr)
         r = self.get_range(f.header_offset, sizeof_localhdr)
         localhdr = struct.unpack_from(self.fmt_localhdr, r.data)
-        magic, ver, flags, method, dos_datetime, _, _, uncomplen, fnlen, extralen = localhdr
-        if method == 0: # none
-            return self.get_range(f.header_offset + sizeof_localhdr + fnlen + extralen, f.compress_size)
-        elif method == 8: # DEFLATE
-            resp = self.get_range(f.header_offset + sizeof_localhdr + fnlen + extralen, f.compress_size)
+        (
+            magic,
+            ver,
+            flags,
+            method,
+            dos_datetime,
+            _,
+            _,
+            uncomplen,
+            fnlen,
+            extralen,
+        ) = localhdr
+        if method == 0:  # none
+            return self.get_range(
+                f.header_offset + sizeof_localhdr + fnlen + extralen, f.compress_size
+            )
+        elif method == 8:  # DEFLATE
+            resp = self.get_range(
+                f.header_offset + sizeof_localhdr + fnlen + extralen, f.compress_size
+            )
             return io.BufferedReader(RemoteZipStream(resp, f))
         else:
-            error(f'unknown compression method {method}')
+            error(f"unknown compression method {method}")
 
     def open_text(self, fn):
         return io.TextIOWrapper(self.open(fn))
@@ -298,7 +365,7 @@ def readable(self):
 
     def readinto(self, b):
         r = self.read(len(b))
-        b[:len(r)] = r
+        b[: len(r)] = r
         return len(r)
 
     def read(self, n):
@@ -315,10 +382,11 @@ def read(self, n):
         return ret
 
 
- ### script start
+### script start
+
 
 class StreamProgress:
-    def __init__(self, fp, name='', total=0):
+    def __init__(self, fp, name="", total=0):
         self.name = name
         self.fp = fp
         self.total = total
@@ -334,10 +402,12 @@ def read(self, n):
             self.last_update = now
 
             elapsed_s = now - self.start_time
-            sys.stderr.write(f'\r{elapsed_s:.0f}s  {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB  ({self.amtread/10**6/elapsed_s:.02f} MB/s)  {self.name}')
+            sys.stderr.write(
+                f"\r{elapsed_s:.0f}s  {self.amtread/10**6:.02f}/{self.total/10**6:.02f}MB  ({self.amtread/10**6/elapsed_s:.02f} MB/s)  {self.name}"
+            )
 
         if not r:
-            sys.stderr.write('\n')
+            sys.stderr.write("\n")
 
         return r
 
@@ -347,14 +417,14 @@ def safelog(x):
         return 1 if x == 0 else math.ceil(math.log10(x))
 
     digits_compr = max(safelog(f.compress_size) for f in rzf.infolist())
-    digits_plain = max(safelog(f.file_size    ) for f in rzf.infolist())
-    fmtstr = f'%{digits_compr}d -> %{digits_plain}d\t%s'
+    digits_plain = max(safelog(f.file_size) for f in rzf.infolist())
+    fmtstr = f"%{digits_compr}d -> %{digits_plain}d\t%s"
     for f in rzf.infolist():
-        print(fmtstr % (f.compress_size, f.file_size, f.filename), file=sys.stderr)
+        log.info(fmtstr % (f.compress_size, f.file_size, f.filename))
 
 
 def extract_one(outfile, rzf, f, ofname):
-    print(f'Extracting {f.filename} to {ofname}...', file=sys.stderr)
+    log.info(f"Extracting {f.filename} to {ofname}...")
 
     fp = StreamProgress(rzf.open(f), name=f.filename, total=f.compress_size)
     while r := fp.read(2**18):
@@ -374,6 +444,5 @@ def download_file(f, rzf, args):
         else:
             path = path.name
 
-        with open(str(path), 'wb') as of:
+        with open(str(path), "wb") as of:
             extract_one(of, rzf, f, str(path))
-

From e7ca8862faa6cdd9b39e21906bd29c4c2888c64c Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 17:02:39 +0200
Subject: [PATCH 30/53] Logging: add formatter for debug messages #664

---
 open_mastr/utils/config/logging.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/open_mastr/utils/config/logging.yml b/open_mastr/utils/config/logging.yml
index 64a5ac75..67ce82b3 100644
--- a/open_mastr/utils/config/logging.yml
+++ b/open_mastr/utils/config/logging.yml
@@ -4,6 +4,8 @@ disable_existing_loggers: False
 formatters:
   standard:
     format: "%(asctime)s [%(levelname)s] %(message)s"
+  debug:
+    format: "%(asctime)s [%(levelname)s] %(name)s:%(funcName)s:%(lineno)d - %(message)s"
 
 handlers:
   console:

From 2f7ba6c17de6349e8b5c95462b6387c38d660cbe Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 17:03:49 +0200
Subject: [PATCH 31/53] Logging: set package log level instead of global log
 level #664

---
 open_mastr/utils/config/logging.yml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/open_mastr/utils/config/logging.yml b/open_mastr/utils/config/logging.yml
index 67ce82b3..64b038ec 100644
--- a/open_mastr/utils/config/logging.yml
+++ b/open_mastr/utils/config/logging.yml
@@ -14,14 +14,12 @@ handlers:
     class: "logging.StreamHandler"
     stream: "ext://sys.stdout"
   file:
-    class: "logging.FileHandler"
     level: "DEBUG"
-    formatter: "standard"
+    formatter: "debug"
+    class: "logging.FileHandler"
     mode: "a"
 
-root:
-    level: "DEBUG"
-
 loggers:
   open-MaStR:
+    level: "DEBUG"
     handlers: ["console", "file"]

From 57a58969dc586f504493995b9d0d3af305fe62f5 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 17:07:15 +0200
Subject: [PATCH 32/53] Logging: do not propagate messages to global logger
 #664

---
 open_mastr/utils/config/logging.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/open_mastr/utils/config/logging.yml b/open_mastr/utils/config/logging.yml
index 64b038ec..68a6999a 100644
--- a/open_mastr/utils/config/logging.yml
+++ b/open_mastr/utils/config/logging.yml
@@ -23,3 +23,4 @@ loggers:
   open-MaStR:
     level: "DEBUG"
     handlers: ["console", "file"]
+    propagate: no

From 5d6d4d52aca92e5a48fda549fbea39206b230cff Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 17:37:30 +0200
Subject: [PATCH 33/53] Logging: set default console log level to info #664

---
 open_mastr/utils/config/logging.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/open_mastr/utils/config/logging.yml b/open_mastr/utils/config/logging.yml
index 68a6999a..c1b4c29b 100644
--- a/open_mastr/utils/config/logging.yml
+++ b/open_mastr/utils/config/logging.yml
@@ -21,6 +21,6 @@ handlers:
 
 loggers:
   open-MaStR:
-    level: "DEBUG"
+    level: "INFO"
     handlers: ["console", "file"]
     propagate: no

From bf7e46362dc29f8b1858d46b49403ec16967580c Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 17:37:52 +0200
Subject: [PATCH 34/53] Add splash screen

---
 open_mastr/mastr.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index f5cffd7d..0cc16f21 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -93,6 +93,9 @@ def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None:
             self.engine = create_database_engine(engine, self._sqlite_folder_path)
 
         log.info(
+            "\n==================================================\n"
+            "--------->      open-MaStR started      <---------\n"
+            "==================================================\n"
             f"Data will be written to the following database: {self.engine.url}\n"
             "If you run into problems, try to "
             "delete the database and update the package by running "

From 0ff6c8154b54a9250cef74a65005616fc1b26891 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 17:49:30 +0200
Subject: [PATCH 35/53] Logging: extend instructions in docs #664

---
 docs/advanced.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/advanced.md b/docs/advanced.md
index ec4ffd39..b5441632 100644
--- a/docs/advanced.md
+++ b/docs/advanced.md
@@ -83,6 +83,19 @@ The project home directory is structured as follows (files and folders below `da
 For the download via the API, logs are stored in a single file in `/$HOME/<user>/.open-MaStR/logs/open_mastr.log`.
 New logging messages are appended. It is recommended to delete the log file from time to time because of its required disk space.
 
+By default, the log level is set to `INFO`. You can increase or decrease the verbosity by either changing `logging.yml` (see above)
+or adjusting it manually in your code. E.g. to enable `DEBUG` messages in `open_mastr.log` you can use the following snippet:
+
+```python
+
+  import logging
+  from open_mastr import Mastr
+
+  # Increase to DEBUG to show more details in open_mastr.log
+  # Must be called after importing open_mastr to have the open-MaStR logger imported
+  logging.getLogger("open-MaStR").setLevel(logging.DEBUG)
+```
+
 
 ### Data
 

From 2899552918cb0c621c1456497017279c66a9ac00 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Mon, 20 Oct 2025 17:52:06 +0200
Subject: [PATCH 36/53] Logging: extend instructions in docs #664

---
 docs/advanced.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/advanced.md b/docs/advanced.md
index b5441632..7aa5d9b7 100644
--- a/docs/advanced.md
+++ b/docs/advanced.md
@@ -63,7 +63,7 @@ The project home directory is structured as follows (files and folders below `da
         File names are defined here.
      * `logging.yml` <br>
         Logging configuration. For changing the log level to increase or decrease details of log
-        messages, edit the level of the handlers.
+        messages, edit the level of the handlers. See below for details on logging.
 * **data**
      * `dataversion-<date>` <br>
         Contains exported data as csv files from method [`to_csv`][open_mastr.Mastr.to_csv]

From ca2229cc2f1f7c8bf4e5b67fbedf6520b39bba67 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 07:05:52 +0200
Subject: [PATCH 37/53] Add option to keep old zip files on download #564

---
 open_mastr/mastr.py                           |  9 ++++++-
 .../xml_download/utils_download_bulk.py       | 26 ++++++++++++++++---
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index fd71fc32..a9c9d999 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -107,6 +107,7 @@ def download(
         data=None,
         date=None,
         bulk_cleansing=True,
+        keep_old_downloads: bool = False,
         api_processes=None,
         api_limit=50,
         api_chunksize=1000,
@@ -168,6 +169,8 @@ def download(
             In its original format, many entries in the MaStR are encoded with IDs. Columns like
             `state` or `fueltype` do not contain entries such as "Hessen" or "Braunkohle", but instead
             only contain IDs. Cleansing replaces these IDs with their corresponding original entries.
+        keep_old_downloads: bool
+            If set to True, prior downloaded MaStR zip files will be kept.
         api_processes : int or None or "max", optional
             Number of parallel processes used to download additional data.
             Defaults to `None`. If set to "max", the maximum number of possible processes
@@ -235,7 +238,11 @@ def download(
             )
 
             delete_zip_file_if_corrupted(zipped_xml_file_path)
-            delete_xml_files_not_from_given_date(zipped_xml_file_path, xml_folder_path)
+            delete_xml_files_not_from_given_date(
+                zipped_xml_file_path,
+                xml_folder_path,
+                keep_old_downloads,
+            )
 
             download_xml_Mastr(zipped_xml_file_path, date, data, xml_folder_path)
 
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 10f60e3d..aeaaccff 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -123,7 +123,11 @@ def download_xml_Mastr(
     Parameters
     -----------
     save_path: str
-        The path where the downloaded MaStR zipped folder will be saved.
+        Full file path where the downloaded MaStR zip file will be saved.
+    bulk_date_string: str
+        Date for which the file should be downloaded.
+    xml_folder_path: str
+        Path where the downloaded MaStR zip file will be saved.
     """
 
     print_message = "Starting the Download from marktstammdatenregister.de."
@@ -197,16 +201,30 @@ def check_download_completeness(
     return list(missing_data_set), is_katalogwerte_existing
 
 
-def delete_xml_files_not_from_given_date(save_path: str, xml_folder_path: str):
+def delete_xml_files_not_from_given_date(
+    save_path: str,
+    xml_folder_path: str,
+    keep_old_downloads: bool = False,
+) -> None:
     """
     Delete xml files that are not corresponding to the given date.
     Assumes that the xml folder only contains one zipfile.
+
+    Parameters
+    ----------
+    save_path: str
+        Full file path where the downloaded MaStR zip file will be saved.
+    xml_folder_path: str
+        Path where the downloaded MaStR zip file will be saved.
+    keep_old_downloads: bool
+        If set to True, prior downloaded MaStR zip files will be kept.
     """
     if os.path.exists(save_path):
         return
     else:
-        shutil.rmtree(xml_folder_path)
-        os.makedirs(xml_folder_path)
+        if not keep_old_downloads:
+            shutil.rmtree(xml_folder_path)
+            os.makedirs(xml_folder_path)
 
 
 def partial_download_with_unzip_http(save_path: str, url: str, bulk_data_list: list):

From 8a6911fb930683682ed220e1c25e423469dcf628 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 09:07:47 +0200
Subject: [PATCH 38/53] Complete docstring #564

---
 open_mastr/xml_download/utils_download_bulk.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index aeaaccff..9069908c 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -126,6 +126,8 @@ def download_xml_Mastr(
         Full file path where the downloaded MaStR zip file will be saved.
     bulk_date_string: str
         Date for which the file should be downloaded.
+    bulk_data_list: list
+        List of tables/technologis to be downloaded.
     xml_folder_path: str
         Path where the downloaded MaStR zip file will be saved.
     """

From 7200b29dfa8e515120d555e419d43a6f3de4e29b Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 09:10:09 +0200
Subject: [PATCH 39/53] Add technology checks to full bulk download: do not
 download if data is present #668

---
 .../xml_download/utils_download_bulk.py       | 37 +++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 9069908c..2469141d 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -168,13 +168,13 @@ def download_xml_Mastr(
         return
 
     if bulk_data_list == BULK_DATA:
-        full_download_without_unzip_http(save_path, r)
+        full_download_without_unzip_http(save_path, r, bulk_data_list)
     else:
         try:
             partial_download_with_unzip_http(save_path, url, bulk_data_list)
         except Exception as e:
             log.warning(f"Partial download failed, fallback to full download: {e}")
-            full_download_without_unzip_http(save_path, r)
+            full_download_without_unzip_http(save_path, r, bulk_data_list)
 
     time_b = time.perf_counter()
     print(f"Download is finished. It took {int(np.around(time_b - time_a))} seconds.")
@@ -271,7 +271,38 @@ def partial_download_with_unzip_http(save_path: str, url: str, bulk_data_list: l
         remote_zip_file.extractzip("Katalogwerte.xml", path=Path(save_path))
 
 
-def full_download_without_unzip_http(save_path: str, r: requests.models.Response):
+def full_download_without_unzip_http(
+    save_path: str,
+    r: requests.models.Response,
+    bulk_data_list: list,
+) -> None:
+    """
+
+    Parameters
+    ----------
+    save_path: str
+        Full file path where the downloaded MaStR zip file will be saved.
+    r: requests.models.Response
+        Response from making a request to MaStR.
+    bulk_data_list: list
+        List of tables/technologis to be downloaded.
+
+    Returns
+    -------
+    None
+    """
+    if os.path.exists(save_path):
+        bulk_data_list, is_katalogwerte_existing = check_download_completeness(
+            save_path, bulk_data_list
+        )
+        if bool(bulk_data_list):
+            print(
+                f"MaStR file already present but missing the following data: {bulk_data_list}"
+            )
+        else:
+            print(f"MaStR file already present: {save_path}")
+            return None
+
     warning_message = (
         "Warning: The servers from MaStR restrict the download speed."
         " You may want to download it another time."

From 33903e033079e0dcb0ba729eb746bb6abb6b2259 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 09:11:04 +0200
Subject: [PATCH 40/53] Complete docstring and adjust messages #564

---
 .../xml_download/utils_download_bulk.py       | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index 2469141d..a18f4111 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -230,15 +230,32 @@ def delete_xml_files_not_from_given_date(
 
 
 def partial_download_with_unzip_http(save_path: str, url: str, bulk_data_list: list):
+    """
+
+    Parameters
+    ----------
+    save_path: str
+        Full file path where the downloaded MaStR zip file will be saved.
+    url: str
+        URL path to bulk file.
+    bulk_data_list: list
+        List of tables/technologis to be downloaded.
+
+    Returns
+    -------
+    None
+    """
     is_katalogwerte_existing = False
     if os.path.exists(save_path):
         bulk_data_list, is_katalogwerte_existing = check_download_completeness(
             save_path, bulk_data_list
         )
         if bool(bulk_data_list):
-            print(f"MaStR is missing the following data: {bulk_data_list}")
+            print(
+                f"MaStR file already present but missing the following data: {bulk_data_list}"
+            )
         else:
-            print("MaStR already downloaded.")
+            print(f"MaStR file already present: {save_path}")
             return None
 
     remote_zip_file = unzip_http.RemoteZipFile(url)

From 95f2dab2746c2ff852addde55fff6c09b274eb6e Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 09:14:44 +0200
Subject: [PATCH 41/53] Update changelog

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 725f1ce6..66ff8971 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,10 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
   [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)
 - Change print statement about data cleansing
   [#650](https://github.com/OpenEnergyPlatform/open-MaStR/pull/650)
+- Several improvements in XML download: Support retaining old bulk XML files;
+  Prevent XML file deletion on full download; Add technology checks to full
+  bulk download
+  [#667](https://github.com/OpenEnergyPlatform/open-MaStR/pull/667)
 ### Removed
 
 

From d1206f0f8a1885200229b74bdeaeb5a021c74a24 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 09:30:15 +0200
Subject: [PATCH 42/53] Adjust docs on partial downloads

---
 docs/advanced.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/advanced.md b/docs/advanced.md
index ec4ffd39..f78aafeb 100644
--- a/docs/advanced.md
+++ b/docs/advanced.md
@@ -148,7 +148,7 @@ If needed, the tables in the database can be obtained as csv files. Those files
 
 === "Disadvantages"
     * No single tables or entries can be downloaded
-    * Download takes long time
+    * Download takes long time (you can use the partial download though, see [Getting Started](getting_started.md#bulk-download))
 
 
 ## SOAP API download

From b0193548e4b78e1d17ad46c8616f59e1ba046760 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 09:45:44 +0200
Subject: [PATCH 43/53] Extend docs #564

---
 docs/advanced.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/advanced.md b/docs/advanced.md
index f78aafeb..0cbc9ae6 100644
--- a/docs/advanced.md
+++ b/docs/advanced.md
@@ -150,6 +150,9 @@ If needed, the tables in the database can be obtained as csv files. Those files
     * No single tables or entries can be downloaded
     * Download takes long time (you can use the partial download though, see [Getting Started](getting_started.md#bulk-download))
 
+**Note**: By default, existing zip files in `$HOME/.open-MaStR/data/xml_download` are deleted when a new file is
+downloaded. You can change this behavior by setting `keep_old_downloads`=True in
+[`Mastr.download()`][open_mastr.Mastr.download].
 
 ## SOAP API download
 

From c51f3791ca928ef369fcc5f984f745e2c1fe7dee Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 09:49:32 +0200
Subject: [PATCH 44/53] Update changelog

---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 66ff8971..dbd12aa2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,8 +17,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
   [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)
 - Change print statement about data cleansing
   [#650](https://github.com/OpenEnergyPlatform/open-MaStR/pull/650)
-- Several improvements in XML download: Support retaining old bulk XML files;
-  Prevent XML file deletion on full download; Add technology checks to full
+- Several improvements in bulk download: Support retaining old zip bulk files;
+  Prevent zip file deletion on full download; Add technology checks to full
   bulk download
   [#667](https://github.com/OpenEnergyPlatform/open-MaStR/pull/667)
 ### Removed

From b5a4aacbe917d417831892d542b529c722dd385b Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 15:34:55 +0200
Subject: [PATCH 45/53] Add fixture zipped_xml_file_path to test_mastr.py

---
 tests/test_mastr.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/test_mastr.py b/tests/test_mastr.py
index df4a3a20..4fe446de 100644
--- a/tests/test_mastr.py
+++ b/tests/test_mastr.py
@@ -14,6 +14,16 @@
             _xml_file_exists = True
 
 
+@pytest.fixture(scope="module")
+def zipped_xml_file_path():
+    zipped_xml_file_path = None
+    for entry in os.scandir(path=_xml_folder_path):
+        if "Gesamtdatenexport" in entry.name:
+            zipped_xml_file_path = os.path.join(_xml_folder_path, entry.name)
+
+    return zipped_xml_file_path
+
+
 @pytest.fixture
 def db_path():
     return os.path.join(

From 9a007cabdc54e0c4b420d7815685df2bf9fa63c1 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 15:36:43 +0200
Subject: [PATCH 46/53] Add test: check if keeping old downloads works #564

---
 tests/test_mastr.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/test_mastr.py b/tests/test_mastr.py
index 4fe446de..16f7c1f6 100644
--- a/tests/test_mastr.py
+++ b/tests/test_mastr.py
@@ -1,10 +1,14 @@
+import shutil
+
 from open_mastr.mastr import Mastr
 import os
+import re
 import sqlalchemy
 import pytest
 from os.path import expanduser
 import pandas as pd
 from open_mastr.utils.constants import TRANSLATIONS
+from datetime import date, timedelta
 
 _xml_file_exists = False
 _xml_folder_path = os.path.join(expanduser("~"), ".open-MaStR", "data", "xml_download")
@@ -83,6 +87,7 @@ def test_Mastr_translate(db_translated, db_path):
         assert pd.read_sql(sql=table, con=db_empty.engine).shape[0] == 0
 
 
+@pytest.mark.dependency(name="bulk_downloaded")
 def test_mastr_download(db):
     db.download(data="wind")
     df_wind = pd.read_sql("wind_extended", con=db.engine)
@@ -92,3 +97,15 @@ def test_mastr_download(db):
     df_biomass = pd.read_sql("biomass_extended", con=db.engine)
     assert len(df_wind) > 10000
     assert len(df_biomass) > 10000
+
+
+@pytest.mark.dependency(depends=["bulk_downloaded"])
+def test_mastr_download_keep_old_files(db, zipped_xml_file_path):
+    file_today = zipped_xml_file_path
+    yesterday = (date.today() - timedelta(days=1)).strftime("%Y%m%d")
+    file_old = re.sub(r"\d{8}", yesterday, os.path.basename(file_today))
+    file_old = os.path.join(os.path.dirname(zipped_xml_file_path), file_old)
+    shutil.copy(file_today, file_old)
+    db.download(data="gsgk", keep_old_files=True)
+
+    assert os.path.exists(file_old)

From d6ef3308aff9823e1e9a0e94c6fb191814e1d5c1 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 16:06:21 +0200
Subject: [PATCH 47/53] Set number of parallel CI jobs to 1

This prevents being blocked by the MaStR server due to parallel HTTP requests by GH actions
---
 .github/workflows/ci-develop.yml    | 1 +
 .github/workflows/ci-production.yml | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/ci-develop.yml b/.github/workflows/ci-develop.yml
index 6a7a6457..5794d864 100644
--- a/.github/workflows/ci-develop.yml
+++ b/.github/workflows/ci-develop.yml
@@ -13,6 +13,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     if: ${{ !github.event.pull_request.draft }}
     strategy:
+      max-parallel: 1
       matrix:
         os: [macos-latest, ubuntu-latest, windows-latest]
         python-version: ['3.10', '3.11', '3.12']
diff --git a/.github/workflows/ci-production.yml b/.github/workflows/ci-production.yml
index 16065860..5c4ffc3b 100644
--- a/.github/workflows/ci-production.yml
+++ b/.github/workflows/ci-production.yml
@@ -13,6 +13,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     if: ${{ !github.event.pull_request.draft }}
     strategy:
+      max-parallel: 1
       matrix:
         os: [macos-latest, ubuntu-latest, windows-latest]
         python-version: ['3.10', '3.11', '3.12']

From 0f53502c9b18353064009ef653694a80ece26dbc Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 21 Oct 2025 16:11:33 +0200
Subject: [PATCH 48/53] Update changelog

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 725f1ce6..0eb6fa2c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/
   [#636](https://github.com/OpenEnergyPlatform/open-MaStR/pull/636)
 - Change print statement about data cleansing
   [#650](https://github.com/OpenEnergyPlatform/open-MaStR/pull/650)
+- Limit number of parallel CI jobs
+  [#669](https://github.com/OpenEnergyPlatform/open-MaStR/pull/669)
 ### Removed
 
 

From 846d500e436be1064e54ea00d4b077ca571ef310 Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Sat, 25 Oct 2025 07:02:53 +0200
Subject: [PATCH 49/53] Move check of keep_old_downloads outside of function
 #564

---
 open_mastr/mastr.py                            | 10 +++++-----
 open_mastr/xml_download/utils_download_bulk.py |  8 ++------
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py
index a9c9d999..2f0ffe9f 100644
--- a/open_mastr/mastr.py
+++ b/open_mastr/mastr.py
@@ -238,11 +238,11 @@ def download(
             )
 
             delete_zip_file_if_corrupted(zipped_xml_file_path)
-            delete_xml_files_not_from_given_date(
-                zipped_xml_file_path,
-                xml_folder_path,
-                keep_old_downloads,
-            )
+            if not keep_old_downloads:
+                delete_xml_files_not_from_given_date(
+                    zipped_xml_file_path,
+                    xml_folder_path,
+                )
 
             download_xml_Mastr(zipped_xml_file_path, date, data, xml_folder_path)
 
diff --git a/open_mastr/xml_download/utils_download_bulk.py b/open_mastr/xml_download/utils_download_bulk.py
index a18f4111..50158117 100644
--- a/open_mastr/xml_download/utils_download_bulk.py
+++ b/open_mastr/xml_download/utils_download_bulk.py
@@ -206,7 +206,6 @@ def check_download_completeness(
 def delete_xml_files_not_from_given_date(
     save_path: str,
     xml_folder_path: str,
-    keep_old_downloads: bool = False,
 ) -> None:
     """
     Delete xml files that are not corresponding to the given date.
@@ -218,15 +217,12 @@ def delete_xml_files_not_from_given_date(
         Full file path where the downloaded MaStR zip file will be saved.
     xml_folder_path: str
         Path where the downloaded MaStR zip file will be saved.
-    keep_old_downloads: bool
-        If set to True, prior downloaded MaStR zip files will be kept.
     """
     if os.path.exists(save_path):
         return
     else:
-        if not keep_old_downloads:
-            shutil.rmtree(xml_folder_path)
-            os.makedirs(xml_folder_path)
+        shutil.rmtree(xml_folder_path)
+        os.makedirs(xml_folder_path)
 
 
 def partial_download_with_unzip_http(save_path: str, url: str, bulk_data_list: list):

From 2d1b7151ed7836cff8c60e0ecd4a1feaa59d6b08 Mon Sep 17 00:00:00 2001
From: Florian Kotthoff <74312290+FlorianK13@users.noreply.github.com>
Date: Tue, 11 Nov 2025 09:49:37 +0100
Subject: [PATCH 50/53] Repair or delete broken links #679

---
 README.rst | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/README.rst b/README.rst
index 9bbc4dbb..7e15968e 100644
--- a/README.rst
+++ b/README.rst
@@ -108,7 +108,6 @@ These projects already use open-mastr:
 - `Wasserstoffatlas <https://wasserstoffatlas.de/>`_
 - `EE-Status App <https://ee-status.de/>`_
 - `Digiplan Anhalt <https://digiplan.rl-institut.de/>`_
-- `Data Quality Assessment of the MaStR <https://marktstammdaten.kotthoff.dev/>`_
 - `EmPowerPlan <https://epp.rl-institut.de/>`_
 - `Goal100 Monitor <https://goal100.org/monitor>`_
 
@@ -119,7 +118,6 @@ changes in a `Pull Request <https://github.com/OpenEnergyPlatform/open-MaStR/pul
 External Resources
 ===================
 Besides open-mastr, some other resources exist that ease the process of working with the Marktstammdatenregister:
-- If you are interested in browsing the MaStR online, check out the github organisation `Marktstammdatenregister.dev <https://github.com/marktstammdatenregister-dev>`_.
 - The `bundesAPI/Marktstammdaten-API <https://github.com/bundesAPI/marktstammdaten-api>`_ is another implementation to access data via an official API.
 
 Collaboration
@@ -146,7 +144,7 @@ Data
 
 
 .. |badge_license| image:: https://img.shields.io/github/license/OpenEnergyPlatform/open-MaStR
-    :target: LICENSE.txt
+    :target: LICENSE.md
     :alt: License
 
 .. |badge_rtd| image:: https://readthedocs.org/projects/open-mastr/badge/?style=flat

From cffe9eb130d0b728693e51bb2f75c6b55e0f18e0 Mon Sep 17 00:00:00 2001
From: Florian Kotthoff <74312290+FlorianK13@users.noreply.github.com>
Date: Tue, 11 Nov 2025 09:50:40 +0100
Subject: [PATCH 51/53] Fix formatting in README #679

---
 README.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.rst b/README.rst
index 7e15968e..80250d3a 100644
--- a/README.rst
+++ b/README.rst
@@ -118,6 +118,7 @@ changes in a `Pull Request <https://github.com/OpenEnergyPlatform/open-MaStR/pul
 External Resources
 ===================
 Besides open-mastr, some other resources exist that ease the process of working with the Marktstammdatenregister:
+
 - The `bundesAPI/Marktstammdaten-API <https://github.com/bundesAPI/marktstammdaten-api>`_ is another implementation to access data via an official API.
 
 Collaboration

From 0bce4ea51a8f0805faa02f4a65c3b9fa3520b85c Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 25 Nov 2025 09:28:12 +0100
Subject: [PATCH 52/53] Version update v0.16.0

---
 .bumpversion.cfg                    | 2 +-
 .github/workflows/ci-production.yml | 2 +-
 CHANGELOG.md                        | 2 +-
 CITATION.cff                        | 4 ++--
 pyproject.toml                      | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index 6e37a578..f22ed637 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.15.0
+current_version = 0.16.0
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)((?P<release>(a|na))+(?P<build>\d+))?
 serialize = 
 	{major}.{minor}.{patch}{release}{build}
diff --git a/.github/workflows/ci-production.yml b/.github/workflows/ci-production.yml
index 5c4ffc3b..ce18c2e9 100644
--- a/.github/workflows/ci-production.yml
+++ b/.github/workflows/ci-production.yml
@@ -33,7 +33,7 @@ jobs:
     - name: create package
       run: python -m build --sdist
     - name: import open-mastr
-      run: python -m pip install ./dist/open_mastr-0.15.0.tar.gz
+      run: python -m pip install ./dist/open_mastr-0.16.0.tar.gz
     - name: Create credentials file
       env:
         MASTR_TOKEN: ${{ secrets.MASTR_TOKEN }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e950fd7d..e9e1281a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@ For each version important additions, changes and removals are listed here.
 The format is inspired from [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
-## [v0.XX.X] unreleased - 202X-XX-XX
+## [v0.16.0] PartialPumpkinPull - 2025-11-26
 ### Added
 - Add partial bulk download
   [#652](https://github.com/OpenEnergyPlatform/open-MaStR/pull/652)
diff --git a/CITATION.cff b/CITATION.cff
index 99458ea3..d496ecf2 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -41,7 +41,7 @@ authors:
 title: "open-MaStR"
 type: software
 license: AGPL-3.0
-version: 0.15.0
+version: 0.16.0
 doi: 
-date-released: 2025-04-19
+date-released: 2025-11-26
 url: "https://github.com/OpenEnergyPlatform/open-MaStR/"
diff --git a/pyproject.toml b/pyproject.toml
index a4fcb367..5871bfbe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "open_mastr"
-version = "0.15.0"
+version = "0.16.0"
 dependencies = [
   "pandas>=2.2.2",
   "numpy",
@@ -79,4 +79,4 @@ open_mastr = [
 include = ["open_mastr", "open_mastr.soap_api", "open_mastr.soap_api.metadata", "open_mastr.utils", "open_mastr.utils.config", "open_mastr.xml_download"] # package names should match these glob patterns (["*"] by default)
 
 # from setup.py - not yet included in here
-# download_url="https://github.com/OpenEnergyPlatform/open-MaStR/archive""/refs/tags/v0.15.0.tar.gz",
+# download_url="https://github.com/OpenEnergyPlatform/open-MaStR/archive""/refs/tags/v0.16.0.tar.gz",

From dde75165584a2450ecdcf47bd7fcb0ec3e480eee Mon Sep 17 00:00:00 2001
From: nesnoj <jonathan.amme@rl-institut.de>
Date: Tue, 25 Nov 2025 15:02:39 +0100
Subject: [PATCH 53/53] Change release title

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e9e1281a..a6382314 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,7 @@ For each version important additions, changes and removals are listed here.
 The format is inspired from [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
-## [v0.16.0] PartialPumpkinPull - 2025-11-26
+## [v0.16.0] Partial downloads with open-MaStR PartialPumpkinPull - 2025-11-26
 ### Added
 - Add partial bulk download
   [#652](https://github.com/OpenEnergyPlatform/open-MaStR/pull/652)