diff --git a/CaseStudy.py b/CaseStudy.py index 2377ca2..87dac63 100644 --- a/CaseStudy.py +++ b/CaseStudy.py @@ -1,3 +1,4 @@ +import concurrent.futures import copy import os import warnings @@ -40,6 +41,8 @@ def __init__(self, data_folder: str | Path, do_not_scale_units: bool = False, do_not_merge_single_node_buses: bool = False, + parallel_read: bool = True, + n_jobs: int = 4, global_parameters_file: str = "Global_Parameters.xlsx", dGlobal_Parameters: pd.DataFrame = None, global_scenarios_file: str = "Global_Scenarios.xlsx", dGlobal_Scenarios: pd.DataFrame = None, power_parameters_file: str = "Power_Parameters.xlsx", dPower_Parameters: pd.DataFrame = None, @@ -60,6 +63,7 @@ def __init__(self, self.do_not_scale_units = do_not_scale_units self.do_not_merge_single_node_buses = do_not_merge_single_node_buses + # === SEQUENTIAL READS === if dGlobal_Parameters is not None: self.dGlobal_Parameters = dGlobal_Parameters else: @@ -87,30 +91,109 @@ def __init__(self, self.power_parameters_file = power_parameters_file self.dPower_Parameters = self.get_dPower_Parameters() - if dPower_BusInfo is not None: - self.dPower_BusInfo = dPower_BusInfo + # === PARALLEL READS === + tasks = [] # List of (attribute_name, function, args_tuple) + + # Define file paths + self.power_businfo_file = power_businfo_file + self.power_network_file = power_network_file + self.power_demand_file = power_demand_file + self.power_hindex_file = power_hindex_file + self.power_weightsk_file = power_weightsk_file + + # Add independent tasks + if dPower_BusInfo is None: + tasks.append(("dPower_BusInfo", ExcelReader.get_Power_BusInfo, (self.data_folder + self.power_businfo_file,))) else: - self.power_businfo_file = power_businfo_file - self.dPower_BusInfo = ExcelReader.get_Power_BusInfo(self.data_folder + self.power_businfo_file) + self.dPower_BusInfo = dPower_BusInfo - if dPower_Network is not None: - self.dPower_Network = dPower_Network + if dPower_Network is None: + tasks.append(("dPower_Network", ExcelReader.get_Power_Network, (self.data_folder + self.power_network_file,))) else: - self.power_network_file = power_network_file - self.dPower_Network = ExcelReader.get_Power_Network(self.data_folder + self.power_network_file) + self.dPower_Network = dPower_Network - if dPower_Demand is not None: - self.dPower_Demand = dPower_Demand + if dPower_Demand is None: + tasks.append(("dPower_Demand", ExcelReader.get_Power_Demand, (self.data_folder + self.power_demand_file,))) else: - self.power_demand_file = power_demand_file - self.dPower_Demand = ExcelReader.get_Power_Demand(self.data_folder + self.power_demand_file) + self.dPower_Demand = dPower_Demand - if dPower_Hindex is not None: + if dPower_Hindex is None: + tasks.append(("dPower_Hindex", ExcelReader.get_Power_Hindex, (self.data_folder + self.power_hindex_file,))) + else: self.dPower_Hindex = dPower_Hindex + + if dPower_WeightsK is None: + tasks.append(("dPower_WeightsK", ExcelReader.get_Power_WeightsK, (self.data_folder + self.power_weightsk_file,))) + else: + self.dPower_WeightsK = dPower_WeightsK + + # Add conditional tasks (dependent on dPower_Parameters) + if self.dPower_Parameters["pEnableThermalGen"]: + self.power_thermalgen_file = power_thermalgen_file + if dPower_ThermalGen is None: + tasks.append(("dPower_ThermalGen", ExcelReader.get_Power_ThermalGen, (self.data_folder + self.power_thermalgen_file,))) + else: + self.dPower_ThermalGen = dPower_ThermalGen + + if self.dPower_Parameters["pEnableVRES"]: + self.power_vres_file = power_vres_file + if dPower_VRES is None: + tasks.append(("dPower_VRES", ExcelReader.get_Power_VRES, (self.data_folder + self.power_vres_file,))) + else: + self.dPower_VRES = dPower_VRES + + if dPower_VRESProfiles is None and os.path.isfile(self.data_folder + power_vresprofiles_file): + self.power_vresprofiles_file = power_vresprofiles_file + tasks.append(("dPower_VRESProfiles", ExcelReader.get_Power_VRESProfiles, (self.data_folder + self.power_vresprofiles_file,))) + else: + self.dPower_VRESProfiles = dPower_VRESProfiles + + if self.dPower_Parameters["pEnableStorage"]: + self.power_storage_file = power_storage_file + if dPower_Storage is None: + tasks.append(("dPower_Storage", ExcelReader.get_Power_Storage, (self.data_folder + self.power_storage_file,))) + else: + self.dPower_Storage = dPower_Storage + + if self.dPower_Parameters["pEnableVRES"] or self.dPower_Parameters["pEnableStorage"]: + if dPower_Inflows is None and os.path.isfile(self.data_folder + power_inflows_file): + self.power_inflows_file = power_inflows_file + tasks.append(("dPower_Inflows", ExcelReader.get_Power_Inflows, (self.data_folder + self.power_inflows_file,))) + else: + self.dPower_Inflows = dPower_Inflows + + if self.dPower_Parameters["pEnablePowerImportExport"]: + self.power_importexport_file = power_importexport_file + if dPower_ImportExport is None: + tasks.append(("dPower_ImportExport", ExcelReader.get_Power_ImportExport, (self.data_folder + self.power_importexport_file,))) + else: + self.dPower_ImportExport = dPower_ImportExport else: - self.power_hindex_file = power_hindex_file - self.dPower_Hindex = ExcelReader.get_Power_Hindex(self.data_folder + self.power_hindex_file) + self.dPower_ImportExport = None + # --- Execute Tasks (Parallel or Sequential) --- + if parallel_read and len(tasks) > 0: + num_workers = min(n_jobs, len(tasks)) + with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: + future_to_attr = {executor.submit(task[1], *task[2]): task[0] for task in tasks} + + for future in concurrent.futures.as_completed(future_to_attr): + attr_name = future_to_attr[future] + try: + result_df = future.result() + setattr(self, attr_name, result_df) + except Exception as exc: + printer.error(f"Error reading for '{attr_name}': {exc}") + raise exc + else: + for attr_name, func, args in tasks: + try: + setattr(self, attr_name, func(*args)) + except Exception as exc: + printer.error(f"Error reading for '{attr_name}': {exc}") + raise exc + + # === SEQUENTIAL DEPENDENTS === if dPower_WeightsRP is not None: self.dPower_WeightsRP = dPower_WeightsRP else: @@ -153,63 +236,8 @@ def __init__(self, printer.warning(f"Executing without 'Power_WeightsRP' (since no file was found at '{self.data_folder + self.power_weightsrp_file}').") self.dPower_WeightsRP = dPower_WeightsRP - if dPower_WeightsK is not None: - self.dPower_WeightsK = dPower_WeightsK - else: - self.power_weightsk_file = power_weightsk_file - self.dPower_WeightsK = ExcelReader.get_Power_WeightsK(self.data_folder + self.power_weightsk_file) - - if dPower_Hindex is not None: - self.dPower_Hindex = dPower_Hindex - else: - self.power_hindex_file = power_hindex_file - self.dPower_Hindex = ExcelReader.get_Power_Hindex(self.data_folder + self.power_hindex_file) - self.rpTransitionMatrixAbsolute, self.rpTransitionMatrixRelativeTo, self.rpTransitionMatrixRelativeFrom = self.get_rpTransitionMatrices(clip_method=clip_method, clip_value=clip_value) - if self.dPower_Parameters["pEnableThermalGen"]: - if dPower_ThermalGen is not None: - self.dPower_ThermalGen = dPower_ThermalGen - else: - self.power_thermalgen_file = power_thermalgen_file - self.dPower_ThermalGen = ExcelReader.get_Power_ThermalGen(self.data_folder + self.power_thermalgen_file) - - if self.dPower_Parameters["pEnableVRES"]: - if dPower_VRES is not None: - self.dPower_VRES = dPower_VRES - else: - self.power_vres_file = power_vres_file - self.dPower_VRES = ExcelReader.get_Power_VRES(self.data_folder + self.power_vres_file) - - if dPower_VRESProfiles is not None: - self.dPower_VRESProfiles = dPower_VRESProfiles - elif os.path.isfile(self.data_folder + power_vresprofiles_file): - self.power_vresprofiles_file = power_vresprofiles_file - self.dPower_VRESProfiles = ExcelReader.get_Power_VRESProfiles(self.data_folder + self.power_vresprofiles_file) - - if self.dPower_Parameters["pEnableStorage"]: - if dPower_Storage is not None: - self.dPower_Storage = dPower_Storage - else: - self.power_storage_file = power_storage_file - self.dPower_Storage = ExcelReader.get_Power_Storage(self.data_folder + self.power_storage_file) - - if self.dPower_Parameters["pEnableVRES"] or self.dPower_Parameters["pEnableStorage"]: - if dPower_Inflows is not None: - self.dPower_Inflows = dPower_Inflows - elif os.path.isfile(self.data_folder + power_inflows_file): - self.power_inflows_file = power_inflows_file - self.dPower_Inflows = ExcelReader.get_Power_Inflows(self.data_folder + self.power_inflows_file) - - if self.dPower_Parameters["pEnablePowerImportExport"]: - if dPower_ImportExport is not None: - self.dPower_ImportExport = dPower_ImportExport - else: - self.power_importexport_file = power_importexport_file - self.dPower_ImportExport = ExcelReader.get_Power_ImportExport(self.data_folder + self.power_importexport_file) - else: - self.dPower_ImportExport = None - if not do_not_merge_single_node_buses: self.merge_single_node_buses() @@ -351,8 +379,24 @@ def scale_dPower_ImportExport(self): self.dPower_ImportExport["ImpExpPrice"] *= self.cost_scaling_factor / self.power_scaling_factor def get_dGlobal_Parameters(self): - ExcelReader.check_LEGOExcel_version(self.data_folder + self.global_parameters_file, "v0.1.0", False) - dGlobal_Parameters = pd.read_excel(self.data_folder + self.global_parameters_file, skiprows=[0, 1]) + file_path = self.data_folder + self.global_parameters_file + version_spec = "v0.1.0" + fail_on_wrong_version = False + + try: + xls = pd.ExcelFile(file_path, engine="calamine") + except FileNotFoundError: + printer.error(f"File not found: {file_path}") + raise + + # Check all sheets for version + for sheet in xls.sheet_names: + if sheet.startswith("~"): + continue + ExcelReader.check_LEGOExcel_version(xls, sheet, version_spec, file_path, fail_on_wrong_version) + + # Read global parameters from Excel + dGlobal_Parameters = pd.read_excel(xls, skiprows=[0, 1]) dGlobal_Parameters = dGlobal_Parameters.drop(dGlobal_Parameters.columns[0], axis=1) dGlobal_Parameters = dGlobal_Parameters.set_index('Solver Options') @@ -365,8 +409,23 @@ def get_dGlobal_Parameters(self): return dGlobal_Parameters def get_dPower_Parameters(self): - ExcelReader.check_LEGOExcel_version(self.data_folder + self.power_parameters_file, "v0.2.0", False) - dPower_Parameters = pd.read_excel(self.data_folder + self.power_parameters_file, skiprows=[0, 1]) + file_path = self.data_folder + self.power_parameters_file + version_spec = "v0.2.0" + fail_on_wrong_version = False + + try: + xls = pd.ExcelFile(file_path, engine="calamine") + except FileNotFoundError: + printer.error(f"File not found: {file_path}") + raise + + # Check all sheets for version + for sheet in xls.sheet_names: + if sheet.startswith("~"): + continue + ExcelReader.check_LEGOExcel_version(xls, sheet, version_spec, file_path, fail_on_wrong_version) + + dPower_Parameters = pd.read_excel(xls, skiprows=[0, 1]) dPower_Parameters = dPower_Parameters.drop(dPower_Parameters.columns[0], axis=1) dPower_Parameters = dPower_Parameters.dropna(how="all") dPower_Parameters = dPower_Parameters.set_index('General') @@ -498,22 +557,25 @@ def merge_single_node_buses(self): self.dPower_Network = self.dPower_Network.groupby(['i', 'j']).agg(aggregation_methods_for_columns) ### Adapt dPower_ThermalGen - for i, row in self.dPower_ThermalGen.iterrows(): - if row['i'] in connected_buses: - row['i'] = new_bus_name - self.dPower_ThermalGen.loc[i] = row + if hasattr(self, "dPower_ThermalGen"): + for i, row in self.dPower_ThermalGen.iterrows(): + if row['i'] in connected_buses: + row['i'] = new_bus_name + self.dPower_ThermalGen.loc[i] = row # Adapt dPower_VRES - for i, row in self.dPower_VRES.iterrows(): - if row['i'] in connected_buses: - row['i'] = new_bus_name - self.dPower_VRES.loc[i] = row + if hasattr(self, "dPower_VRES"): + for i, row in self.dPower_VRES.iterrows(): + if row['i'] in connected_buses: + row['i'] = new_bus_name + self.dPower_VRES.loc[i] = row # Adapt dPower_Storage - for i, row in self.dPower_Storage.iterrows(): - if row['i'] in connected_buses: - row['i'] = new_bus_name - self.dPower_Storage.loc[i] = row + if hasattr(self, "dPower_Storage"): + for i, row in self.dPower_Storage.iterrows(): + if row['i'] in connected_buses: + row['i'] = new_bus_name + self.dPower_Storage.loc[i] = row # Adapt dPower_Demand self.dPower_Demand = self.dPower_Demand.reset_index() @@ -524,14 +586,15 @@ def merge_single_node_buses(self): self.dPower_Demand = self.dPower_Demand.groupby(['rp', 'i', 'k']).sum() # Adapt dPower_VRESProfiles - self.dPower_VRESProfiles = self.dPower_VRESProfiles.reset_index() - for i, row in self.dPower_VRESProfiles.iterrows(): - if row['i'] in connected_buses: - row['i'] = new_bus_name - self.dPower_VRESProfiles.loc[i] = row - - self.dPower_VRESProfiles = self.dPower_VRESProfiles.groupby(['rp', 'i', 'k', 'tec']).mean() # TODO: Aggregate using more complex method (capacity * productionCapacity * ... * / Total Production Capacity) - self.dPower_VRESProfiles.sort_index(inplace=True) + if hasattr(self, "dPower_VRESProfiles"): + self.dPower_VRESProfiles = self.dPower_VRESProfiles.reset_index() + for i, row in self.dPower_VRESProfiles.iterrows(): + if row['i'] in connected_buses: + row['i'] = new_bus_name + self.dPower_VRESProfiles.loc[i] = row + + self.dPower_VRESProfiles = self.dPower_VRESProfiles.groupby(['rp', 'i', 'k', 'tec']).mean() # TODO: Aggregate using more complex method (capacity * productionCapacity * ... * / Total Production Capacity) + self.dPower_VRESProfiles.sort_index(inplace=True) # Create transition matrix from Hindex def get_rpTransitionMatrices(self, clip_method: str = "none", clip_value: float = 0) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: diff --git a/ExcelReader.py b/ExcelReader.py index eb21bad..2961941 100644 --- a/ExcelReader.py +++ b/ExcelReader.py @@ -10,26 +10,32 @@ printer = Printer.getInstance() -def check_LEGOExcel_version(excel_file_path: str, version_specifier: str, fail_on_wrong_version: bool = False): +def check_LEGOExcel_version(xls: pd.ExcelFile, sheet_name: str, version_specifier: str, excel_file_path: str, fail_on_wrong_version: bool = False): """ - Check if the Excel file has the correct version specifier. - :param excel_file_path: Path to the Excel file + Check if a specific sheet in an open Excel file has the correct version specifier. + :param xls: The open pandas.ExcelFile object + :param sheet_name: The name of the sheet to check :param version_specifier: Expected version specifier (e.g., "v0.1.0") - :param fail_on_wrong_version: If True, raise an error if the version of the Excel file does not match the expected version - :return: None - :raises ValueError: If the version specifier does not match and fail_on_wrong_version - """ - # Check if the file has the correct version specifier - wb = openpyxl.load_workbook(excel_file_path) - for sheet in wb.sheetnames: - if sheet.startswith("~"): # Skip sheets that start with '~' - continue - if wb[sheet].cell(row=2, column=3).value != version_specifier: - if fail_on_wrong_version: - raise ValueError(f"Excel file '{excel_file_path}' does not have the correct version specifier. Expected '{version_specifier}' but got '{wb[sheet].cell(row=2, column=3).value}'.") - else: - printer.error(f"Excel file '{excel_file_path}' does not have the correct version specifier in sheet '{sheet}'. Expected '{version_specifier}' but got '{wb[sheet].cell(row=2, column=3).value}'.") - printer.error(f"Trying to work with it any way, but this can have unintended consequences!") + :param excel_file_path: Path to the Excel file (for error logging) + :param fail_on_wrong_version: If True, raise an error if the version does not match + """ + try: + # Read only cell C2 (row=2, col=3) from the specified sheet + version_cell = pd.read_excel(xls, sheet_name=sheet_name, usecols="C", skiprows=1, nrows=1, header=None).iloc[0, 0] + except Exception as e: + printer.error(f"Could not read version cell [C2] from sheet '{sheet_name}' in '{excel_file_path}'. Error: {e}") + version_cell = None + + if version_cell != version_specifier: + if fail_on_wrong_version: + raise ValueError( + f"Excel file '{excel_file_path}' (sheet '{sheet_name}') does not have the correct version specifier. " + f"Expected '{version_specifier}' but got '{version_cell}'.") + else: + printer.error( + f"Excel file '{excel_file_path}' (sheet '{sheet_name}') does not have the correct version specifier. " + f"Expected '{version_specifier}' but got '{version_cell}'.") + printer.error(f"Trying to work with it any way, but this can have unintended consequences!") pass @@ -45,15 +51,17 @@ def __read_non_pivoted_file(excel_file_path: str, version_specifier: str, indice :param fail_on_wrong_version: If True, raise an error if the version of the Excel file does not match the expected version :return: DataFrame containing the data from the Excel file """ - check_LEGOExcel_version(excel_file_path, version_specifier, fail_on_wrong_version) - xls = pd.ExcelFile(excel_file_path) + xls = pd.ExcelFile(excel_file_path, engine="calamine") data = pd.DataFrame() for scenario in xls.sheet_names: # Iterate through all sheets, i.e., through all scenarios if scenario.startswith("~"): printer.warning(f"Skipping sheet '{scenario}' from '{excel_file_path}' because it starts with '~'.") continue - df = pd.read_excel(excel_file_path, skiprows=[0, 1, 2, 4, 5, 6], sheet_name=scenario) + + check_LEGOExcel_version(xls, scenario, version_specifier, excel_file_path, fail_on_wrong_version) + + df = pd.read_excel(xls, skiprows=[0, 1, 2, 4, 5, 6], sheet_name=scenario) if has_excl_column: if not keep_excl_columns: df = df[df["excl"].isnull()] # Only keep rows that are not excluded (i.e., have no value in the "Excl." column) @@ -210,13 +218,19 @@ def get_Power_ImportExport(excel_file_path: str, keep_excluded_entries: bool = F if keep_excluded_entries: printer.warning("'keep_excluded_entries' is set for 'get_Power_ImportExport', although nothing is excluded anyway - please check if this is intended.") - check_LEGOExcel_version(excel_file_path, "v0.0.1", fail_on_wrong_version) - xls = pd.ExcelFile(excel_file_path) + version_specifier = "v0.0.1" + xls = pd.ExcelFile(excel_file_path, engine="calamine") data = pd.DataFrame() for scenario in xls.sheet_names: # Iterate through all sheets, i.e., through all scenarios + if scenario.startswith("~"): + printer.warning(f"Skipping sheet '{scenario}' from '{excel_file_path}' because it starts with '~'.") + continue + + check_LEGOExcel_version(xls, scenario, version_specifier, excel_file_path, fail_on_wrong_version) + # Read row 3 (information about hubs and nodes) - hub_i_df = pd.read_excel(excel_file_path, skiprows=[0, 1, 3], nrows=2, sheet_name=scenario) + hub_i_df = pd.read_excel(xls, skiprows=[0, 1, 3], nrows=2, sheet_name=scenario) hub_i = [] hubs = [] i = 6 # Start checking from column 7 (index 6, zero-based) @@ -230,7 +244,7 @@ def get_Power_ImportExport(excel_file_path: str, keep_excluded_entries: bool = F if len(hubs) != len(set(hubs)): raise ValueError(f"Power_ImportExport: Found duplicate hub names in the header row. Hubs must be unique. Please check the Excel file.") - df = pd.read_excel(excel_file_path, skiprows=[0, 1, 2, 4, 5, 6], sheet_name=scenario) + df = pd.read_excel(xls, skiprows=[0, 1, 2, 4, 5, 6], sheet_name=scenario) df = df.drop(df.columns[0], axis=1) # Drop the first column (which is empty) for i, col in enumerate(df.columns): diff --git a/environment.yml b/environment.yml index f6f06d3..c066c5b 100644 --- a/environment.yml +++ b/environment.yml @@ -11,6 +11,7 @@ dependencies: - pyomo=6.9.3 - pytest=8.4.1 - python=3.12.2 + - python-calamine=0.5.3 - rich=13.9.4 - rich-argparse=1.7.1 - tsam=2.3.9