Skip to content
Draft
64 changes: 23 additions & 41 deletions open_mastr/soap_api/metadata/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"id": str(uuid.uuid4()),
"description": f"Raw data download Marktstammdatenregister (MaStR) data using the webservice.\n\n{description_extra}",
"language": ["en-GB", "de-DE"],
"subject": [{"name": None, "path": None}],
"keywords": ["powerplants", "renewables"],
"created": publication_date,
"publicationDate": publication_date,
"version": data_version,
"context": {
"homepage": "https://www.marktstammdatenregister.de/MaStR/",
Expand All @@ -85,13 +86,15 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"spatial": {"location": None, "extent": "Germany", "resolution": "vector"},
"temporal": {
"referenceDate": reference_date.strftime("%Y-%m-%d %H:%M:%S"),
"timeseries": {
"timeseries": [
{
"start": None,
"end": None,
"resolution": None,
"alignment": None,
"aggregationType": None,
},
}
]
},
"sources": [
{
Expand All @@ -106,7 +109,7 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"instruction": "You are free: To Share, To Create, To Adapt; As long as you: Attribute",
"attribution": f"© Marktstammdatenregister {datetime.date.today().year} | dl-de/by-2-0",
}
],
]
},
{
"title": "RLI - open_MaStR",
Expand All @@ -120,8 +123,8 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"instruction": "You are free: To Share, To Create, To Adapt; As long as you: Attribute, Share-Alike, Keep open!",
"attribution": "open_MaStR © Reiner Lemoine Institut | AGPL-3.0",
}
],
},
]
}
],
"licenses": [
{
Expand All @@ -134,30 +137,16 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
],
"contributors": [
{
"title": "Ludee",
"title": None,
"email": None,
"path": "https://github.com/ludee",
"role": "maintainer",
"organization": "Reiner Lemoine Institut gGmbH",
},
{
"title": "Guido Pleßmann",
"email": None,
"path": "https://gplssm.de",
"role": "maintainer",
"organization": "Reiner Lemoine Institut gGmbH",
},
{
"title": "oakca",
"email": None,
"path": "https://github.com/oakca",
"role": "contributor",
"organization": "Reiner Lemoine Institut gGmbH",
},
"date": None,
"object": None,
"comment": None
}
],
"review": {"path": None, "badge": None},
"metaMetadata": {
"metadataVersion": "OEP-1.4.0",
"metadataVersion": "OEP-1.5.2",
"metadataLicense": {
"name": "CC0-1.0",
"title": "Creative Commons Zero v1.0 Universal",
Expand All @@ -172,6 +161,7 @@ def datapackag_base(reference_date, publication_date=None, statistik_flag=None):
"licenses": "License name must follow the SPDX License List (https://spdx.org/licenses/)",
"review": "Following the OEP Data Review (https://github.com/OpenEnergyPlatform/data-preprocessing/wiki)",
"null": "If not applicable use (null)",
"todo": "If a value ist not yet available, use: todo"
},
}

Expand Down Expand Up @@ -253,50 +243,44 @@ def create_datapackage_meta_json(
resource = {
"profile": "tabular-data-resource",
"name": f"bnetza_mastr_{tech}_raw",
"title": f"open-MaStR {tech} units (raw)",
"path": filenames["raw"][tech]["joined"],
"scheme": "file",
"format": "csv",
"encoding": "utf-8",
"mediatype": "text/csv",
"schema": {
"fields": raw_fields,
"primaryKey": ["EinheitMastrNummer"],
},
"dialect": {"delimiter": ","},
}

resources_meta["resources"].append(resource)
if "cleaned" in data:
resource = {
"profile": "tabular-data-resource",
"name": f"bnetza_mastr_{tech}_cleaned",
"title": f"open-MaStR {tech} units (cleaned)",
"path": filenames["cleaned"][tech],
"scheme": "file",
"format": "csv",
"encoding": "utf-8",
"mediatype": "text/csv",
"schema": {
"fields": raw_fields,
"primaryKey": ["EinheitMastrNummer"],
},
"dialect": {"delimiter": ","},
}

resources_meta["resources"].append(resource)
if "postprocessed" in data:
processed_fields = [
{
"name": "geom",
"unit": None,
"type": "str",
"desciption": "Standort der Anlage als Punktgeometrie im WKB Format",
"description": "Standort der Anlage als Punktgeometrie im WKB Format",
"examples": "0101000020e610000071fbe59315131c40a2b437f8c20e4a40",
},
{
"name": "comment",
"unit": None,
"type": "str",
"desciption": "Information about data post-processing",
"description": "Information about data post-processing",
"examples": "has_geom; outside_vg250",
},
]
Expand All @@ -306,7 +290,7 @@ def create_datapackage_meta_json(
"name": "tags",
"unit": None,
"type": "json",
"desciption": "Data insights and report about post-processing steps",
"description": "Data insights and report about post-processing steps",
"examples": {
"plz_check": False,
"processed": True,
Expand All @@ -319,18 +303,16 @@ def create_datapackage_meta_json(
"name": "geom",
"unit": None,
"type": "str",
"desciption": "Standort der Anlage als Punktgeometrie im WKB Format (EPSG 3035)",
"description": "Standort der Anlage als Punktgeometrie im WKB Format (EPSG 3035)",
"examples": "0101000020e610000071fbe59315131c40a2b437f8c20e4a40",
}
)
resource = {
"profile": "tabular-data-resource",
"name": f"bnetza_mastr_{tech}",
"title": f"open-MaStR {tech} units",
"path": filenames["postprocessed"][tech],
"scheme": "file",
"format": "csv",
"encoding": "utf-8",
"mediatype": "text/csv",
"schema": {
"fields": raw_fields + processed_fields,
"primaryKey": ["EinheitMastrNummer"],
Expand Down
70 changes: 46 additions & 24 deletions open_mastr/soap_api/metadata/description.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ def __init__(self, xml=None):
self.xml = fh.read()
else:
# If no XML file is given, the file is read from an URL
zipurl = 'https://www.marktstammdatenregister.de/MaStRHilfe/files/' \
'webdienst/Dienstbeschreibung_1_2_39_Produktion.zip'
zipurl = "https://www.marktstammdatenregister.de/MaStRHilfe/files/webdienst/" \
"Dienstbeschreibung_Produktion_Version" \
"1.2.87" \ # update version here
".zip"

with urlopen(zipurl) as zipresp:
with ZipFile(BytesIO(zipresp.read())) as zfile:
self.xml = zfile.read('xsd/mastrbasetypes.xsd')


self.xml = zfile.read("xsd/mastrbasetypes.xsd")

# Parse XML and extract relevant data
parsed = xmltodict.parse(self.xml, process_namespaces=False)
self.complex_types = parsed['schema']["complexType"]
self.simple_types = parsed['schema']["simpleType"]
self.complex_types = parsed["schema"]["complexType"]
self.simple_types = parsed["schema"]["simpleType"]

# Prepare parsed data for documentational purposes
abstract_types, parameters, responses, types = self._filter_type_descriptions()
Expand Down Expand Up @@ -78,13 +78,17 @@ def _filter_type_descriptions(self):
raise ValueError("Ohh...")
else:
# Filter all functions
if item["@name"].startswith(("Get", "Set", "Erneute", "Verschiebe", "Delete")):
if item["@name"].startswith(
("Get", "Set", "Erneute", "Verschiebe", "Delete")
):
functions.append(item)

# Further split the list of functions into paramters and responses
if item["@name"].endswith("Parameter"):
if "complexContent" in item.keys():
parameters[item["@name"]] = item["complexContent"]["extension"]
parameters[item["@name"]] = item["complexContent"][
"extension"
]
else:
parameters[item["@name"]] = item
elif item["@name"].endswith("Antwort"):
Expand All @@ -111,12 +115,14 @@ def prepare_simple_type(self):

for simple_type in self.simple_types:
if "enumeration" in simple_type["restriction"]:
possible_values = [_["@value"] for _ in simple_type["restriction"]["enumeration"]]
possible_values = [
_["@value"] for _ in simple_type["restriction"]["enumeration"]
]
else:
possible_values = []
simple_types_doc[simple_type["@name"]] = {
"type": simple_type["restriction"]["@base"],
"values": possible_values
"values": possible_values,
}
return simple_types_doc

Expand All @@ -140,49 +146,61 @@ def functions_data_documentation(self):
if "annotation" in fcn["sequence"]["element"]:
fcn_data = [fcn["sequence"]["element"]]
else:
fcn_data = self.types[fcn["sequence"]["element"]["@type"].split(":")[1]]["sequence"]["element"]
fcn_data = self.types[
fcn["sequence"]["element"]["@type"].split(":")[1]
]["sequence"]["element"]
else:
print(type(fcn["sequence"]))
print(fcn["sequence"])
raise ValueError

# Add data for inherited columns from base types
if "@base" in fcn:
if not fcn["@base"] == 'mastr:AntwortBasis':
fcn_data = _collect_columns_of_base_type(self.types, fcn["@base"].split(":")[1], fcn_data)
if not fcn["@base"] == "mastr:AntwortBasis":
fcn_data = _collect_columns_of_base_type(
self.types, fcn["@base"].split(":")[1], fcn_data
)
function_docs[fcn_name] = {}
for column in fcn_data:
# Replace MaStR internal types with more general ones
if column["@type"].startswith("mastr:"):
try:
column_type = self.simple_types_prepared[column["@type"].split(":")[1]]["type"]
column_type = self.simple_types_prepared[
column["@type"].split(":")[1]
]["type"]
except KeyError:
column_type = column["@type"]
else:
column_type = column["@type"]

if "annotation" in column.keys():
description = column["annotation"]["documentation"].get("#text", None)
description = column["annotation"]["documentation"].get(
"#text", None
)
if description:
description = re.sub(" +", " ", description.replace("\n", ""))
description = re.sub(
" +", " ", description.replace("\n", "")
)
function_docs[fcn_name][column["@name"]] = {
"type": column_type,
"description": description,
"example": column["annotation"]["documentation"].get("m-ex", None)
"type": column_type,
"description": description,
"example": column["annotation"]["documentation"].get(
"m-ex", None
),
}
else:
function_docs[fcn_name][column["@name"]] = {
"type": column_type,
# TODO: insert information from simple type here
"description": None,
"example": None
"example": None,
}

# Hack in a descrition for a column that gets created after download while flattening data
function_docs["GetEinheitWind"]["HerstellerId"] = {
"type": "str",
"description": "Id des Herstellers der Einheit",
"example": 923
"example": 923,
}

return function_docs
Expand All @@ -193,7 +211,11 @@ def _collect_columns_of_base_type(base_types, base_type_name, fcn_data):
fcn_data += type_description["extension"]["sequence"]["element"]

if "@base" in type_description["extension"]:
if not type_description["extension"]["@base"] == 'mastr:AntwortBasis':
fcn_data = _collect_columns_of_base_type(base_types, type_description["extension"]["@base"].split(":")[1], fcn_data)
if not type_description["extension"]["@base"] == "mastr:AntwortBasis":
fcn_data = _collect_columns_of_base_type(
base_types,
type_description["extension"]["@base"].split(":")[1],
fcn_data,
)

return fcn_data
Loading