IEE-TUGraz · MarcoAnarmo · Sep 17, 2025 · Sep 15, 2025 · Sep 17, 2025
diff --git a/Utilities.py b/Utilities.py
@@ -1,4 +1,8 @@
+from typing import Literal, Dict
+
+import numpy as np
 import pandas as pd
+import tsam.timeseriesaggregation as tsam
 
 
 def inflowsToCapacityFactors(inflows_df: pd.DataFrame, vres_df: pd.DataFrame, vresProfiles_df: pd.DataFrame) -> pd.DataFrame:
@@ -77,3 +81,364 @@ def capacityFactorsToInflows(vresProfiles_df: pd.DataFrame, vres_df: pd.DataFram
         vresProfiles_df.drop(vresProfiles_df.index[mask], inplace=True)
 
     return df.set_index(['rp', 'k', 'g']).sort_index(level="k")
+
+
+def apply_kmedoids_aggregation(
+        case_study,
+        k: int,
+        rp_length: int = 24,
+        cluster_strategy: Literal["aggregated", "disaggregated"] = "aggregated",
+        capacity_normalization: Literal["installed", "maxInvestment"] = "maxInvestment",
+        sum_production: bool = False
+):
+    """
+    Apply k-medoids temporal aggregation to a CaseStudy object.
+    Each scenario from dGlobal_Scenarios is processed independently.
+
+    Args:
+        case_study: The CaseStudy object to aggregate
+        k: Number of representative periods to create
+        rp_length: Hours per representative period (e.g., 24, 48)
+        cluster_strategy: "aggregated" (sum across buses) or "disaggregated" (keep buses separate)
+        capacity_normalization: "installed" or "maxInvestment" for VRES capacity factor weighting
+        sum_production: If True, sum all technologies into single production column
+
+    Returns:
+        CaseStudy: New clustered CaseStudy object
+    """
+
+    # Create a deep copy to avoid modifying the original
+    aggregated_case_study = case_study.copy()
+
+    # Get scenario names
+    scenario_names = aggregated_case_study.dGlobal_Scenarios.index.values
+
+    # Process each scenario independently
+    all_processed_data = {}
+    for scenario in scenario_names:
+        print(f"\n=== Processing scenario: {scenario} ===")
+
+        print(f"  Step 1: Extracting data for scenario {scenario}")
+        scenario_clustering_data = _extract_scenario_data(case_study, scenario, capacity_normalization)
+
+        if len(scenario_clustering_data) == 0:
+            raise ValueError(f"No data found for scenario {scenario}")
+
+        print(f"  Found {len(scenario_clustering_data)} data points for clustering")
+
+        print(f"  \nStep 2: Preparing data using {cluster_strategy} strategy")
+        if cluster_strategy == "disaggregated":
+            pivot_df = _prepare_disaggregated_data(scenario_clustering_data, sum_production)
+        else:
+            pivot_df = _prepare_aggregated_data(scenario_clustering_data, sum_production)
+
+        print(f"  Prepared {len(pivot_df)} time periods for clustering")
+
+        print(f"  \nStep 3: Running k-medoids clustering (k={k}, rp_length={rp_length})")
+        aggregation_result = _run_kmedoids_clustering(pivot_df, k, rp_length)
+
+        print(f"  \nStep 4: Building representative period data")
+        demand_data, vres_data = _build_representative_periods(
+            case_study, scenario, aggregation_result, rp_length
+        )
+
+        print(f"  \nStep 5: Building weights and hour indices")
+        weights_rp, weights_k, hindex = _build_scenario_weights_and_indices(
+            aggregation_result, scenario, rp_length
+        )
+
+        all_processed_data[scenario] = {
+            'demand': demand_data,
+            'vres_profiles': vres_data,
+            'weights_rp': weights_rp,
+            'weights_k': weights_k,
+            'hindex': hindex
+        }
+        print(f"Scenario {scenario} completed successfully")
+
+    # Update CaseStudy with aggregated data
+    _update_casestudy_with_scenarios(aggregated_case_study, all_processed_data)
+
+    print(f"\nAll scenarios have been processed and combined successfully!")
+    return aggregated_case_study
+
+
+def _extract_scenario_data(case_study, scenario: str, capacity_normalization: str) -> pd.DataFrame:
+    """Extract and combine demand and VRES data for a single scenario - OPTIMIZED."""
+
+    # Extract demand data for this scenario
+    demand_df = case_study.dPower_Demand.reset_index()
+    demand_df = demand_df[demand_df['scenario'] == scenario].copy()
+
+    if len(demand_df) == 0:
+        raise ValueError(f"No demand data found for scenario {scenario}")
+
+    # Initialize with demand data
+    scenario_df = demand_df[['scenario', 'rp', 'i', 'k', 'value']].rename(columns={'value': 'demand'})
+
+    # Process VRES data if available
+    if (hasattr(case_study, 'dPower_VRES') and case_study.dPower_VRES is not None and
+            hasattr(case_study, 'dPower_VRESProfiles') and case_study.dPower_VRESProfiles is not None):
+
+        # Get VRES data for this scenario
+        vres_df = case_study.dPower_VRES.reset_index()
+        vres_df = vres_df[vres_df['scenario'] == scenario].copy()
+
+        # Get VRES profiles for this scenario
+        vres_profiles_df = case_study.dPower_VRESProfiles.reset_index()
+        vres_profiles_df = vres_profiles_df[vres_profiles_df['scenario'] == scenario].copy()
+
+        if len(vres_df) > 0 and len(vres_profiles_df) > 0:
+            # Merge of VRES with VRESProfiles
+            vres_with_profiles = pd.merge(
+                vres_profiles_df,
+                vres_df[['g', 'tec', 'i', 'ExisUnits', 'MaxProd', 'EnableInvest', 'MaxInvest']],
+                on='g',
+                how='left'
+            )
+
+            # Apply capacity normalization (vectorized)
+            if capacity_normalization == "installed":
+                normalization_factor = vres_with_profiles['ExisUnits'].fillna(0)
+            else:  # maxInvestment
+                normalization_factor = np.maximum(
+                    vres_with_profiles['ExisUnits'].fillna(0),
+                    vres_with_profiles['EnableInvest'].fillna(0) * vres_with_profiles['MaxInvest'].fillna(0)
+                )
+
+            # Calculate weighted capacity factor
+            vres_with_profiles['weighted_cf'] = (
+                    vres_with_profiles['value'].fillna(0) *
+                    vres_with_profiles['MaxProd'].fillna(0) *
+                    normalization_factor
+            )
+
+            # Pivot technologies as columns
+            vres_with_profiles = vres_with_profiles.pivot_table(
+                index=['scenario', 'rp', 'k', 'g', 'i'],
+                columns='tec',
+                values='weighted_cf',
+                fill_value=0
+            ).reset_index().drop(columns=['g'])
+
+            # Merge with demand data
+            scenario_df = pd.merge(
+                scenario_df,
+                vres_with_profiles,
+                on=['scenario', 'rp', 'k', 'i'],
+                how='left'
+            )
+
+    return scenario_df
+
+
+def _prepare_disaggregated_data(scenario_df: pd.DataFrame, sum_production: bool) -> pd.DataFrame:
+    """Prepare data for disaggregated clustering (keeps buses separate)."""
+    result_df = scenario_df.copy()
+
+    if sum_production:
+        result_df = _sum_technology_columns(result_df)
+
+    return result_df
+
+
+def _prepare_aggregated_data(scenario_df: pd.DataFrame, sum_production: bool) -> pd.DataFrame:
+    """Prepare data for aggregated clustering (sum across buses)."""
+    grouping_cols = ['scenario', 'rp', 'k']
+    exclude_cols = grouping_cols + ['i']
+    value_cols = [col for col in scenario_df.columns if col not in exclude_cols]
+
+    # Aggregate across buses
+    aggregated_df = scenario_df.groupby(grouping_cols)[value_cols].sum().reset_index()
+
+    if sum_production:
+        aggregated_df = _sum_technology_columns(aggregated_df)
+
+    return aggregated_df
+
+
+def _sum_technology_columns(df: pd.DataFrame) -> pd.DataFrame:
+    """Sum all technology columns into a single 'production' column."""
+    result_df = df.copy()
+    exclude_cols = {'scenario', 'rp', 'i', 'k', 'demand'}
+    tech_cols = [col for col in df.columns if col not in exclude_cols]
+
+    if tech_cols:
+        result_df['production'] = df[tech_cols].sum(axis=1)
+        result_df = result_df.drop(columns=tech_cols)
+
+    return result_df
+
+
+def _run_kmedoids_clustering(pivot_df: pd.DataFrame, k: int, rp_length: int):
+    """Run k-medoids clustering using tsam."""
+
+    # Prepare data for tsam
+    pivot_df_sorted = pivot_df.sort_values('k')
+
+    # Create datetime index
+    pivot_df_sorted['datetime'] = pd.date_range(start='2010-01-01', periods=len(pivot_df_sorted), freq='h')
+
+    # Drop grouping columns and set datetime index
+    clustering_data = pivot_df_sorted.drop(columns=['scenario', 'rp', 'k']).set_index('datetime')
+
+    print(f"    Running k-medoids with {k} clusters, {rp_length} hours/period, {len(clustering_data)} total hours")
+
+    # Run clustering
+    aggregation = tsam.TimeSeriesAggregation(
+        clustering_data,
+        noTypicalPeriods=k,
+        hoursPerPeriod=rp_length,
+        clusterMethod='k_medoids',
+        rescaleClusterPeriods=False
+    )
+
+    typical_periods = aggregation.createTypicalPeriods()
+    print(f"    Clustering completed. Created {len(typical_periods)} typical periods.")
+    print(f"    Cluster center indices (medoids): {aggregation.clusterCenterIndices}")
+
+    return aggregation
+
+
+def _build_representative_periods(case_study, scenario: str, aggregation, rp_length: int):
+    """Build demand and VRES profile data for representative periods."""
+
+    def _extract_numeric_and_calc_p(df, rp_length):
+        """Extract numeric values from rp/k strings and calculate absolute hour."""
+        df['rp_num'] = df['rp'].str.extract(r'(\d+)').astype(int)
+        df['k_num'] = df['k'].str.extract(r'(\d+)').astype(int)
+        df['p'] = (df['rp_num'] - 1) * rp_length + df['k_num']
+        return df
+
+    # Process demand data
+    demand_original = case_study.dPower_Demand.reset_index()
+    demand_original = demand_original[demand_original['scenario'] == scenario].copy()
+    demand_original = _extract_numeric_and_calc_p(demand_original, rp_length)
+
+    demand_data = []
+    for cluster_idx, medoid_period in enumerate(aggregation.clusterCenterIndices):
+        rp_new = f'rp{cluster_idx + 1:02d}'
+        medoid_hours = range(medoid_period * rp_length + 1, (medoid_period + 1) * rp_length + 1)
+        medoid_demand_data = demand_original[demand_original['p'].isin(medoid_hours)]
+
+        for k_offset, abs_hour in enumerate(medoid_hours, start=1):
+            k_new = f'k{k_offset:02d}'
+            hour_demand = medoid_demand_data[medoid_demand_data['p'] == abs_hour]
+
+            for _, row in hour_demand.iterrows():
+                demand_data.append({
+                    'rp': rp_new,
+                    'i': row['i'],
+                    'k': k_new,
+                    'scenario': scenario,
+                    'value': row['value']
+                })
+
+    # Process VRES data if available
+    vres_data = []
+    if hasattr(case_study, 'dPower_VRESProfiles') and case_study.dPower_VRESProfiles is not None:
+        vres_original = case_study.dPower_VRESProfiles.reset_index()
+        vres_original = vres_original[vres_original['scenario'] == scenario].copy()
+        vres_original = _extract_numeric_and_calc_p(vres_original, rp_length)
+
+        for cluster_idx, medoid_period in enumerate(aggregation.clusterCenterIndices):
+            rp_new = f'rp{cluster_idx + 1:02d}'
+            medoid_hours = range(medoid_period * rp_length + 1, (medoid_period + 1) * rp_length + 1)
+            medoid_vres_data = vres_original[vres_original['p'].isin(medoid_hours)]
+
+            for k_offset, abs_hour in enumerate(medoid_hours, start=1):
+                k_new = f'k{k_offset:02d}'
+                hour_vres = medoid_vres_data[medoid_vres_data['p'] == abs_hour]
+
+                for _, row in hour_vres.iterrows():
+                    vres_data.append({
+                        'rp': rp_new,
+                        'k': k_new,
+                        'g': row['g'],
+                        'scenario': scenario,
+                        'value': row['value']
+                    })
+
+    return demand_data, vres_data
+
+
+def _build_scenario_weights_and_indices(aggregation, scenario: str, rp_length: int):
+    """Build representative period weights and hour indices for a single scenario."""
+
+    # RP weights
+    weights_rp = []
+    for rp_idx, weight in aggregation._clusterPeriodNoOccur.items():
+        weights_rp.append({
+            'rp': f'rp{rp_idx + 1:02d}',
+            'scenario': scenario,
+            'pWeight_rp': int(weight)
+        })
+
+    # K weights (all 1 for hourly resolution)
+    weights_k = []
+    for k in range(1, rp_length + 1):
+        weights_k.append({
+            'k': f'k{k:02d}',
+            'scenario': scenario,
+            'pWeight_k': 1
+        })
+
+    # Hindex mapping
+    hindex = []
+    for orig_p, cluster_id in enumerate(aggregation._clusterOrder):
+        for k in range(1, rp_length + 1):
+            hindex.append({
+                'p': f'h{orig_p * rp_length + k:04d}',
+                'rp': f'rp{cluster_id + 1:02d}',
+                'k': f'k{k:02d}',
+                'scenario': scenario
+            })
+
+    return weights_rp, weights_k, hindex
+
+
+def _update_casestudy_with_scenarios(case_study, all_processed_data: Dict):
+    """Update CaseStudy with aggregated data, maintaining original index structures."""
+
+    # Collect all data across scenarios
+    all_demand_data = []
+    all_vres_data = []
+    all_weights_rp_data = []
+    all_weights_k_data = []
+    all_hindex_data = []
+
+    for scenario, scenario_data in all_processed_data.items():
+        all_demand_data.extend(scenario_data['demand'])
+        all_vres_data.extend(scenario_data['vres_profiles'])
+        all_weights_rp_data.extend(scenario_data['weights_rp'])
+        all_weights_k_data.extend(scenario_data['weights_k'])
+        all_hindex_data.extend(scenario_data['hindex'])
+
+    print(f"Updating CaseStudy with combined data:")
+
+    if all_demand_data:
+        demand_df = pd.DataFrame(all_demand_data)
+        case_study.dPower_Demand = demand_df.set_index(['rp', 'i', 'k'])
+        print(f"  - Updated demand: {len(all_demand_data)} entries")
+
+    if all_vres_data:
+        vres_df = pd.DataFrame(all_vres_data)
+        case_study.dPower_VRESProfiles = vres_df.set_index(['rp', 'k', 'g'])
+        print(f"  - Updated VRES profiles: {len(all_vres_data)} entries")
+
+    if all_weights_rp_data:
+        weights_rp_df = pd.DataFrame(all_weights_rp_data)
+        case_study.dPower_WeightsRP = weights_rp_df.set_index(['rp'])
+        print(f"  - Updated RP weights: {len(all_weights_rp_data)} entries")
+
+    if all_weights_k_data:
+        weights_k_df = pd.DataFrame(all_weights_k_data)
+        case_study.dPower_WeightsK = weights_k_df.set_index(['k'])
+        print(f"  - Updated K weights: {len(all_weights_k_data)} entries")
+
+    if all_hindex_data:
+        hindex_df = pd.DataFrame(all_hindex_data)
+        case_study.dPower_Hindex = hindex_df.set_index(['p', 'rp', 'k'])
+        print(f"  - Updated Hindex: {len(all_hindex_data)} entries")
+
+    print("CaseStudy update completed successfully!")
diff --git a/environment.yml b/environment.yml
@@ -3,6 +3,7 @@ channels:
   - defaults
   - conda-forge
 dependencies:
+  - numpy=2.3.1
   - openpyxl=3.1.5
   - pandas=2.3.1
   - pandas-stubs=2.3.0.250703
@@ -12,5 +13,6 @@ dependencies:
   - python=3.12.2
   - rich=13.9.4
   - rich-argparse=1.7.1
+  - tsam=2.3.9
   - pip:
       - pulp==2.9.0