From 09831947f22ec40c9b1ccab6964c514903eab6de Mon Sep 17 00:00:00 2001 From: Simon Will Date: Thu, 6 Nov 2025 09:05:54 +0100 Subject: [PATCH] Add option to not create database tables #675 --- CHANGELOG.md | 2 + docs/advanced.md | 26 +++++++--- open_mastr/mastr.py | 31 +++++++++--- .../xml_download/utils_write_to_database.py | 47 +++++++++++++++---- .../test_utils_write_to_database.py | 6 ++- 5 files changed, 88 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ca83c8a..89daca36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/ ## [v0.XX.X] unreleased - 202X-XX-XX ### Added +- Add option to not create database tables + [#676](https://github.com/OpenEnergyPlatform/open-MaStR/pull/676) - Add partial bulk download [#652](https://github.com/OpenEnergyPlatform/open-MaStR/pull/652) ### Changed diff --git a/docs/advanced.md b/docs/advanced.md index 7aa5d9b7..ccaa0f3d 100644 --- a/docs/advanced.md +++ b/docs/advanced.md @@ -5,7 +5,6 @@ or the [SOAP API download](#soap-api-download). ## Configuration ### Database settings - Configure your database with the `engine` parameter of [`Mastr`][open_mastr.Mastr]. It defines the engine of the database where the MaStR is mirrored to. Default is 'sqlite'. @@ -20,14 +19,27 @@ The possible databases are: `open-mastr-db`. Make sure it exists and the user has sufficient permissions. ```python +from sqlalchemy import create_engine +from open_mastr import Mastr + +# SQLite DB +engine_sqlite = create_engine("sqlite:///path/to/sqlite/database.db") +# postgreSQL DB +engine_postgres = create_engine("postgresql+psycopg2://open-mastr:open-mastr-pw@localhost:55443/open-mastr-db") +db = Mastr(engine=engine_sqlite) +``` - from sqlalchemy import create_engine +By default, the Mastr object will create all database tables necessary for storing MaStR data. +If you want to prepare the database yourself and don't want Mastr to create or alter your tables, you can configure +Mastr for that. In this case, you must make sure that your tables are actually suited for storing the MaStR data. +Otherwise, storing will fail. + +```python +from sqlalchemy import create_engine +from open_mastr import Mastr - # SQLite DB - engine_sqlite = create_engine("sqlite:///path/to/sqlite/database.db") - # postgreSQL DB - engine_postgres = create_engine("postgresql+psycopg2://open-mastr:open-mastr-pw@localhost:55443/open-mastr-db") - db = Mastr(engine=engine_sqlite) +engine_postgres = create_engine("postgresql+psycopg2://open-mastr:open-mastr-pw@localhost:55443/open-mastr-db") +db = Mastr(engine=engine_postgres, create_and_alter_database_tables=False) ``` ### Project directory diff --git a/open_mastr/mastr.py b/open_mastr/mastr.py index 0cc16f21..d9c5c024 100644 --- a/open_mastr/mastr.py +++ b/open_mastr/mastr.py @@ -69,14 +69,20 @@ class Mastr: Defines the engine of the database where the MaStR is mirrored to. Default is 'sqlite'. connect_to_translated_db: boolean, optional - Allows connection to an existing translated database. Default is 'False'. - Only for 'sqlite'-type engines. - - - + Allows connection to an existing translated database. Default is 'False'. + Only for 'sqlite'-type engines. + create_and_alter_database_tables: boolean, optional + Automatically creates the database tables necessary for storing the MaStR data. + Default is 'True'. Set this to 'False' if you prepare the database for the download + yourself and don't want this class to touch your database definitions. """ - def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None: + def __init__( + self, + engine="sqlite", + connect_to_translated_db=False, + create_and_alter_database_tables=True, + ) -> None: validate_parameter_format_for_mastr_init(engine) self.output_dir = get_output_dir() @@ -102,7 +108,9 @@ def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None: "'pip install --upgrade open-mastr'\n" ) - orm.Base.metadata.create_all(self.engine) + self.create_and_alter_database_tables = create_and_alter_database_tables + if self.create_and_alter_database_tables: + orm.Base.metadata.create_all(self.engine) def download( self, @@ -254,6 +262,7 @@ def download( data=data, bulk_cleansing=bulk_cleansing, bulk_download_date=bulk_download_date, + create_and_alter_database_tables=self.create_and_alter_database_tables, ) if method == "API": @@ -417,7 +426,15 @@ def translate(self) -> None: print(df.head(10)) ``` + This method will only work with SQLite databases and if :class:`Mastr` + is constructed with :attr:`Mastr.create_and_alter_database_tables` set + to False. """ + if not self.create_and_alter_database_tables: + raise ValueError( + "Translating the database always includes altering tables." + " So this is incompatible with the option `create_and_alter_database_tables`." + ) if "sqlite" not in self.engine.dialect.name: raise ValueError("engine has to be of type 'sqlite'") diff --git a/open_mastr/xml_download/utils_write_to_database.py b/open_mastr/xml_download/utils_write_to_database.py index e71abc18..896c25ae 100644 --- a/open_mastr/xml_download/utils_write_to_database.py +++ b/open_mastr/xml_download/utils_write_to_database.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd import sqlalchemy -from sqlalchemy import select, create_engine, inspect +from sqlalchemy import delete, select, create_engine, inspect from sqlalchemy.sql import text from sqlalchemy.sql.sqltypes import Date, DateTime @@ -28,6 +28,7 @@ def write_mastr_xml_to_database( data: list, bulk_cleansing: bool, bulk_download_date: str, + create_and_alter_database_tables: bool, ) -> None: """Write the Mastr in xml format into a database defined by the engine parameter.""" log.info("Starting bulk download...") @@ -55,6 +56,7 @@ def write_mastr_xml_to_database( zipped_xml_file_path, bulk_download_date, bulk_cleansing, + create_and_alter_database_tables, ) ) @@ -107,6 +109,7 @@ def process_xml_file( zipped_xml_file_path: str, bulk_download_date: str, bulk_cleansing: bool, + create_and_alter_database_tables: bool, ) -> None: """Process a single xml file and write it to the database.""" try: @@ -122,8 +125,12 @@ def process_xml_file( with ZipFile(zipped_xml_file_path, "r") as f: log.info(f"Processing file '{file_name}'...") if is_first_file(file_name): - log.info(f"Creating table '{sql_table_name}'...") - create_database_table(engine, xml_table_name) + if create_and_alter_database_tables: + log.info(f"Creating table '{sql_table_name}'...") + create_database_table(engine, xml_table_name) + else: + log.info(f"Deleting all data from table '{sql_table_name}'...") + delete_data_from_database_table(engine, xml_table_name) df = read_xml_file(f, file_name) df = process_table_before_insertion( df, @@ -133,10 +140,20 @@ def process_xml_file( bulk_cleansing, ) if engine.dialect.name == "sqlite": - add_table_to_sqlite_database(df, xml_table_name, sql_table_name, engine) + add_table_to_sqlite_database( + df, + xml_table_name, + sql_table_name, + engine, + create_and_alter_database_tables, + ) else: add_table_to_non_sqlite_database( - df, xml_table_name, sql_table_name, engine + df, + xml_table_name, + sql_table_name, + engine, + create_and_alter_database_tables, ) except Exception as e: log.error(f"Error processing file '{file_name}': '{e}'") @@ -245,6 +262,14 @@ def create_database_table( orm_class.__table__.create(engine) +def delete_data_from_database_table( + engine: sqlalchemy.engine.Engine, xml_table_name: str +) -> None: + orm_class = tablename_mapping[xml_table_name]["__class__"] + with engine.begin() as conn: + conn.execute(delete(orm_class.__table__)) + + def is_first_file(file_name: str) -> bool: """check if the file name indicates that it is the first file from the table""" return ( @@ -345,6 +370,7 @@ def add_table_to_non_sqlite_database( xml_table_name: str, sql_table_name: str, engine: sqlalchemy.engine.Engine, + add_missing_columns: bool, ) -> None: # get a dictionary for the data types table_columns_list = list( @@ -359,9 +385,10 @@ def add_table_to_non_sqlite_database( # Convert date and datetime columns into the datatype datetime. df = cast_date_columns_to_datetime(xml_table_name, df) - add_missing_columns_to_table( - engine, xml_table_name, column_list=df.columns.tolist() - ) + if add_missing_columns: + add_missing_columns_to_table( + engine, xml_table_name, column_list=df.columns.tolist() + ) for _ in range(10000): try: @@ -584,9 +611,11 @@ def add_table_to_sqlite_database( xml_table_name: str, sql_table_name: str, engine: sqlalchemy.engine.Engine, + add_missing_columns: bool, ) -> None: column_list = df.columns.tolist() - add_missing_columns_to_table(engine, xml_table_name, column_list) + if add_missing_columns: + add_missing_columns_to_table(engine, xml_table_name, column_list) # Convert NaNs to None. df = df.where(pd.notnull(df), None) diff --git a/tests/xml_download/test_utils_write_to_database.py b/tests/xml_download/test_utils_write_to_database.py index 75243b1a..c5af819b 100644 --- a/tests/xml_download/test_utils_write_to_database.py +++ b/tests/xml_download/test_utils_write_to_database.py @@ -390,7 +390,11 @@ def test_add_table_to_sqlite_database(engine_testdb, add_table_to_database_funct ) add_table_to_database_function( - df, "anlageneeggeothermiegrubengasdruckentspannung", "gsgk_eeg", engine_testdb + df, + "anlageneeggeothermiegrubengasdruckentspannung", + "gsgk_eeg", + engine_testdb, + True, ) with engine_testdb.connect() as con: with con.begin():