Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/

## [v0.XX.X] unreleased - 202X-XX-XX
### Added
- Add option to not create database tables
[#676](https://github.com/OpenEnergyPlatform/open-MaStR/pull/676)

### Changed

Expand Down
26 changes: 19 additions & 7 deletions docs/advanced.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ or the [SOAP API download](#soap-api-download).
## Configuration
### Database settings


Configure your database with the `engine` parameter of [`Mastr`][open_mastr.Mastr].
It defines the engine of the database where the MaStR is mirrored to. Default is 'sqlite'.

Expand All @@ -20,14 +19,27 @@ The possible databases are:
`open-mastr-db`. Make sure it exists and the user has sufficient permissions.

```python
from sqlalchemy import create_engine
from open_mastr import Mastr

# SQLite DB
engine_sqlite = create_engine("sqlite:///path/to/sqlite/database.db")
# postgreSQL DB
engine_postgres = create_engine("postgresql+psycopg2://open-mastr:open-mastr-pw@localhost:55443/open-mastr-db")
db = Mastr(engine=engine_sqlite)
```

from sqlalchemy import create_engine
By default, the Mastr object will create all database tables necessary for storing MaStR data.
If you want to prepare the database yourself and don't want Mastr to create or alter your tables, you can configure
Mastr for that. In this case, you must make sure that your tables are actually suited for storing the MaStR data.
Otherwise, storing will fail.

```python
from sqlalchemy import create_engine
from open_mastr import Mastr

# SQLite DB
engine_sqlite = create_engine("sqlite:///path/to/sqlite/database.db")
# postgreSQL DB
engine_postgres = create_engine("postgresql+psycopg2://open-mastr:open-mastr-pw@localhost:55443/open-mastr-db")
db = Mastr(engine=engine_sqlite)
engine_postgres = create_engine("postgresql+psycopg2://open-mastr:open-mastr-pw@localhost:55443/open-mastr-db")
db = Mastr(engine=engine_postgres, create_and_alter_database_tables=False)
```

### Project directory
Expand Down
31 changes: 24 additions & 7 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,20 @@ class Mastr:
Defines the engine of the database where the MaStR is mirrored to.
Default is 'sqlite'.
connect_to_translated_db: boolean, optional
Allows connection to an existing translated database. Default is 'False'.
Only for 'sqlite'-type engines.



Allows connection to an existing translated database. Default is 'False'.
Only for 'sqlite'-type engines.
create_and_alter_database_tables: boolean, optional
Automatically creates the database tables necessary for storing the MaStR data.
Default is 'True'. Set this to 'False' if you prepare the database for the download
yourself and don't want this class to touch your database definitions.
"""

def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None:
def __init__(
self,
engine="sqlite",
connect_to_translated_db=False,
create_and_alter_database_tables=True,
) -> None:
validate_parameter_format_for_mastr_init(engine)

self.output_dir = get_output_dir()
Expand All @@ -102,7 +108,9 @@ def __init__(self, engine="sqlite", connect_to_translated_db=False) -> None:
"'pip install --upgrade open-mastr'\n"
)

orm.Base.metadata.create_all(self.engine)
self.create_and_alter_database_tables = create_and_alter_database_tables
if self.create_and_alter_database_tables:
orm.Base.metadata.create_all(self.engine)

def download(
self,
Expand Down Expand Up @@ -261,6 +269,7 @@ def download(
data=data,
bulk_cleansing=bulk_cleansing,
bulk_download_date=bulk_download_date,
create_and_alter_database_tables=self.create_and_alter_database_tables,
)

if method == "API":
Expand Down Expand Up @@ -424,7 +433,15 @@ def translate(self) -> None:
print(df.head(10))
```

This method will only work with SQLite databases and if :class:`Mastr`
is constructed with :attr:`Mastr.create_and_alter_database_tables` set
to False.
"""
if not self.create_and_alter_database_tables:
raise ValueError(
"Translating the database always includes altering tables."
" So this is incompatible with the option `create_and_alter_database_tables`."
)

if "sqlite" not in self.engine.dialect.name:
raise ValueError("engine has to be of type 'sqlite'")
Expand Down
47 changes: 38 additions & 9 deletions open_mastr/xml_download/utils_write_to_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np
import pandas as pd
import sqlalchemy
from sqlalchemy import select, create_engine, inspect
from sqlalchemy import delete, select, create_engine, inspect
from sqlalchemy.sql import text
from sqlalchemy.sql.sqltypes import Date, DateTime

Expand All @@ -28,6 +28,7 @@ def write_mastr_xml_to_database(
data: list,
bulk_cleansing: bool,
bulk_download_date: str,
create_and_alter_database_tables: bool,
) -> None:
"""Write the Mastr in xml format into a database defined by the engine parameter."""
log.info("Starting bulk download...")
Expand Down Expand Up @@ -55,6 +56,7 @@ def write_mastr_xml_to_database(
zipped_xml_file_path,
bulk_download_date,
bulk_cleansing,
create_and_alter_database_tables,
)
)

Expand Down Expand Up @@ -107,6 +109,7 @@ def process_xml_file(
zipped_xml_file_path: str,
bulk_download_date: str,
bulk_cleansing: bool,
create_and_alter_database_tables: bool,
) -> None:
"""Process a single xml file and write it to the database."""
try:
Expand All @@ -122,8 +125,12 @@ def process_xml_file(
with ZipFile(zipped_xml_file_path, "r") as f:
log.info(f"Processing file '{file_name}'...")
if is_first_file(file_name):
log.info(f"Creating table '{sql_table_name}'...")
create_database_table(engine, xml_table_name)
if create_and_alter_database_tables:
log.info(f"Creating table '{sql_table_name}'...")
create_database_table(engine, xml_table_name)
else:
log.info(f"Deleting all data from table '{sql_table_name}'...")
delete_data_from_database_table(engine, xml_table_name)
df = read_xml_file(f, file_name)
df = process_table_before_insertion(
df,
Expand All @@ -133,10 +140,20 @@ def process_xml_file(
bulk_cleansing,
)
if engine.dialect.name == "sqlite":
add_table_to_sqlite_database(df, xml_table_name, sql_table_name, engine)
add_table_to_sqlite_database(
df,
xml_table_name,
sql_table_name,
engine,
create_and_alter_database_tables,
)
else:
add_table_to_non_sqlite_database(
df, xml_table_name, sql_table_name, engine
df,
xml_table_name,
sql_table_name,
engine,
create_and_alter_database_tables,
)
except Exception as e:
log.error(f"Error processing file '{file_name}': '{e}'")
Expand Down Expand Up @@ -245,6 +262,14 @@ def create_database_table(
orm_class.__table__.create(engine)


def delete_data_from_database_table(
engine: sqlalchemy.engine.Engine, xml_table_name: str
) -> None:
orm_class = tablename_mapping[xml_table_name]["__class__"]
with engine.begin() as conn:
conn.execute(delete(orm_class.__table__))


def is_first_file(file_name: str) -> bool:
"""check if the file name indicates that it is the first file from the table"""
return (
Expand Down Expand Up @@ -345,6 +370,7 @@ def add_table_to_non_sqlite_database(
xml_table_name: str,
sql_table_name: str,
engine: sqlalchemy.engine.Engine,
add_missing_columns: bool,
) -> None:
# get a dictionary for the data types
table_columns_list = list(
Expand All @@ -359,9 +385,10 @@ def add_table_to_non_sqlite_database(
# Convert date and datetime columns into the datatype datetime.
df = cast_date_columns_to_datetime(xml_table_name, df)

add_missing_columns_to_table(
engine, xml_table_name, column_list=df.columns.tolist()
)
if add_missing_columns:
add_missing_columns_to_table(
engine, xml_table_name, column_list=df.columns.tolist()
)

for _ in range(10000):
try:
Expand Down Expand Up @@ -584,9 +611,11 @@ def add_table_to_sqlite_database(
xml_table_name: str,
sql_table_name: str,
engine: sqlalchemy.engine.Engine,
add_missing_columns: bool,
) -> None:
column_list = df.columns.tolist()
add_missing_columns_to_table(engine, xml_table_name, column_list)
if add_missing_columns:
add_missing_columns_to_table(engine, xml_table_name, column_list)

# Convert NaNs to None.
df = df.where(pd.notnull(df), None)
Expand Down
6 changes: 5 additions & 1 deletion tests/xml_download/test_utils_write_to_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,11 @@ def test_add_table_to_sqlite_database(engine_testdb, add_table_to_database_funct
)

add_table_to_database_function(
df, "anlageneeggeothermiegrubengasdruckentspannung", "gsgk_eeg", engine_testdb
df,
"anlageneeggeothermiegrubengasdruckentspannung",
"gsgk_eeg",
engine_testdb,
True,
)
with engine_testdb.connect() as con:
with con.begin():
Expand Down
Loading