From 9cfd5d7263d84057986ba50589f1fade37a019c3 Mon Sep 17 00:00:00 2001 From: Silvia Rognone Date: Fri, 25 Jul 2025 14:39:19 +0100 Subject: [PATCH 1/6] Add comprehensive internal-only parameters analysis for issue #555 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Created analyze_internal_only_parameters.py script to scan data model files - Generated table comparing with sample_config.yml instead of benchmark1.yml - Found 24 total internal-only parameters (11 in sample_config, 13 not) - Provides CSV table and markdown report for analysis - Script can be re-run as data model evolves Key findings: - 11 parameters present in sample_config.yml need review - 13 parameters are true internal state variables - Parameters span state.py (22) and model.py (2) Addresses GitHub issue #555: Identify complete list of internal-only parameters 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../analyze_internal_only_parameters.py | 313 ++++++++++++++++++ .../internal_only_parameters_report.md | 100 ++++++ .../internal_only_parameters_table.csv | 25 ++ 3 files changed, 438 insertions(+) create mode 100644 src/supy/data_model/initial_states/analyze_internal_only_parameters.py create mode 100644 src/supy/data_model/initial_states/internal_only_parameters_report.md create mode 100644 src/supy/data_model/initial_states/internal_only_parameters_table.csv diff --git a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py new file mode 100644 index 000000000..a1e891697 --- /dev/null +++ b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +""" +Script to analyze internal-only parameters in SUEWS data model. + +This script: +1. Scans the data model Python files for parameters marked with internal_only=True +2. Checks which parameters are present in sample_config.yml +3. Generates a comprehensive table for issue #555 + +Usage: + python analyze_internal_only_parameters.py + +Output: + - internal_only_parameters_table.csv + - internal_only_parameters_report.md +""" + +import ast +import csv +import os +import yaml +from pathlib import Path +from typing import Dict, List, Set, Tuple, Any +import re + + +class InternalOnlyAnalyzer: + """Analyzer for internal-only parameters in SUEWS data model.""" + + def __init__(self): + self.script_dir = Path(__file__).parent + self.repo_root = self.script_dir.parent.parent.parent.parent + self.data_model_dir = self.script_dir.parent + self.sample_config_path = self.repo_root / "src" / "supy" / "sample_run" / "sample_config.yml" + + self.internal_params = [] + self.sample_config_data = {} + + def find_internal_only_parameters(self) -> List[Dict[str, Any]]: + """Find all parameters marked as internal_only in data model files.""" + python_files = [ + "state.py", + "model.py", + "core.py", + "human_activity.py", + "hydro.py", + "ohm.py", + "precheck.py", + "profile.py", + "site.py", + "surface.py", + "type.py" + ] + + internal_params = [] + + for py_file in python_files: + file_path = self.data_model_dir / py_file + if not file_path.exists(): + continue + + print(f"Analyzing {py_file}...") + params = self._extract_internal_params_from_file(file_path) + internal_params.extend(params) + + return internal_params + + def _extract_internal_params_from_file(self, file_path: Path) -> List[Dict[str, Any]]: + """Extract internal-only parameters from a single Python file.""" + params = [] + + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # Split content into lines for line number tracking + lines = content.split('\n') + + # Find Field definitions with internal_only=True + field_pattern = r'(\w+):\s*[^=]*=\s*Field\s*\(' + + for i, line in enumerate(lines, 1): + # Look for Field definitions + field_match = re.search(field_pattern, line) + if field_match: + param_name = field_match.group(1) + + # Look ahead to find the complete Field definition + field_def = self._extract_complete_field_definition(lines, i-1) + + if 'internal_only' in field_def and 'True' in field_def: + # Extract description + desc_match = re.search(r'description=["\'](.*?)["\']', field_def, re.DOTALL) + description = desc_match.group(1) if desc_match else "No description" + + # Extract unit from json_schema_extra + unit_match = re.search(r'"unit":\s*["\'](.*?)["\']', field_def) + unit = unit_match.group(1) if unit_match else "" + + params.append({ + 'param_name': param_name, + 'file_location': f"{file_path.name}:{i}", + 'description': description, + 'unit': unit + }) + + return params + + def _extract_complete_field_definition(self, lines: List[str], start_line: int) -> str: + """Extract complete Field definition spanning multiple lines.""" + field_def = lines[start_line] + line_idx = start_line + 1 + + # Count parentheses to find complete definition + paren_count = field_def.count('(') - field_def.count(')') + + while paren_count > 0 and line_idx < len(lines): + field_def += '\n' + lines[line_idx] + paren_count += lines[line_idx].count('(') - lines[line_idx].count(')') + line_idx += 1 + + return field_def + + def load_sample_config(self) -> Dict[str, Any]: + """Load and parse the sample_config.yml file.""" + if not self.sample_config_path.exists(): + print(f"Warning: sample_config.yml not found at {self.sample_config_path}") + return {} + + with open(self.sample_config_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + + def _flatten_yaml_keys(self, data: Dict[str, Any], prefix: str = "") -> Set[str]: + """Recursively flatten YAML structure to get all parameter names.""" + keys = set() + + if isinstance(data, dict): + for key, value in data.items(): + full_key = f"{prefix}.{key}" if prefix else key + keys.add(key) # Add the key itself + keys.add(full_key) # Add the full path + + if isinstance(value, (dict, list)): + keys.update(self._flatten_yaml_keys(value, full_key)) + elif isinstance(data, list): + for i, item in enumerate(data): + if isinstance(item, (dict, list)): + keys.update(self._flatten_yaml_keys(item, f"{prefix}[{i}]")) + + return keys + + def check_parameter_in_sample_config(self, param_name: str, yaml_keys: Set[str]) -> Tuple[bool, str]: + """Check if a parameter is present in sample_config.yml.""" + # Direct match + if param_name in yaml_keys: + return True, param_name + + # Check for partial matches (parameter might be nested) + matches = [key for key in yaml_keys if param_name in key] + if matches: + return True, matches[0] + + return False, "" + + def generate_table(self) -> None: + """Generate the analysis table and report.""" + print("Finding internal-only parameters...") + self.internal_params = self.find_internal_only_parameters() + + print("Loading sample_config.yml...") + self.sample_config_data = self.load_sample_config() + yaml_keys = self._flatten_yaml_keys(self.sample_config_data) + + print(f"Found {len(self.internal_params)} internal-only parameters") + print(f"Found {len(yaml_keys)} keys in sample_config.yml") + + # Enhance parameter data with sample_config presence + for param in self.internal_params: + in_sample, location = self.check_parameter_in_sample_config( + param['param_name'], yaml_keys + ) + param['in_sample_config'] = 'Yes' if in_sample else 'No' + param['sample_config_location'] = location if in_sample else '' + + # Generate CSV table + self._generate_csv_table() + + # Generate markdown report + self._generate_markdown_report() + + print("\nAnalysis complete!") + print(f"Generated: internal_only_parameters_table.csv") + print(f"Generated: internal_only_parameters_report.md") + + def _generate_csv_table(self) -> None: + """Generate CSV table of internal-only parameters.""" + csv_path = self.script_dir / "internal_only_parameters_table.csv" + + with open(csv_path, 'w', newline='', encoding='utf-8') as f: + fieldnames = [ + 'param_name', + 'file_location', + 'in_sample_config', + 'sample_config_location', + 'description', + 'unit' + ] + writer = csv.DictWriter(f, fieldnames=fieldnames) + + writer.writeheader() + for param in sorted(self.internal_params, key=lambda x: x['param_name']): + writer.writerow(param) + + def _generate_markdown_report(self) -> None: + """Generate markdown report with analysis summary.""" + md_path = self.script_dir / "internal_only_parameters_report.md" + + # Count statistics + total_params = len(self.internal_params) + in_sample = sum(1 for p in self.internal_params if p['in_sample_config'] == 'Yes') + not_in_sample = total_params - in_sample + + # Group by file + by_file = {} + for param in self.internal_params: + file_name = param['file_location'].split(':')[0] + if file_name not in by_file: + by_file[file_name] = [] + by_file[file_name].append(param) + + with open(md_path, 'w', encoding='utf-8') as f: + f.write(f"""# Internal-Only Parameters Analysis Report + +Generated for GitHub Issue #555: Identify complete list of internal-only parameters + +## Summary Statistics + +- **Total internal-only parameters**: {total_params} +- **Parameters in sample_config.yml**: {in_sample} +- **Parameters NOT in sample_config.yml**: {not_in_sample} +- **Files analyzed**: {len(by_file)} + +## Parameters by File + +""") + + for file_name, params in sorted(by_file.items()): + f.write(f"### {file_name} ({len(params)} parameters)\n\n") + f.write("| Parameter | In sample_config.yml | Description | Unit |\n") + f.write("|-----------|---------------------|-------------|------|\n") + + for param in sorted(params, key=lambda x: x['param_name']): + desc = param['description'].replace('\n', ' ').replace('|', '\\|')[:100] + if len(param['description']) > 100: + desc += "..." + f.write(f"| `{param['param_name']}` | {param['in_sample_config']} | {desc} | {param['unit']} |\n") + + f.write("\n") + + f.write(f"""## Parameters Present in sample_config.yml + +These parameters are marked as internal-only but appear in the sample configuration: + +""") + + present_params = [p for p in self.internal_params if p['in_sample_config'] == 'Yes'] + if present_params: + f.write("| Parameter | File | Location in sample_config.yml |\n") + f.write("|-----------|------|-------------------------------|\n") + for param in sorted(present_params, key=lambda x: x['param_name']): + f.write(f"| `{param['param_name']}` | {param['file_location']} | {param['sample_config_location']} |\n") + else: + f.write("*None found*\n") + + f.write("\n\n## Parameters NOT in sample_config.yml\n\n") + f.write("These parameters are internal-only and do not appear in the sample configuration:\n\n") + + absent_params = [p for p in self.internal_params if p['in_sample_config'] == 'No'] + if absent_params: + f.write("| Parameter | File | Description |\n") + f.write("|-----------|------|-------------|\n") + for param in sorted(absent_params, key=lambda x: x['param_name']): + desc = param['description'].replace('\n', ' ').replace('|', '\\|')[:80] + if len(param['description']) > 80: + desc += "..." + f.write(f"| `{param['param_name']}` | {param['file_location']} | {desc} |\n") + else: + f.write("*None found*\n") + + f.write("\n## Recommendations\n\n") + f.write("1. **Parameters in sample_config.yml**: Review whether these should remain internal-only or be exposed to users\n") + f.write("2. **Parameters NOT in sample_config.yml**: These are likely true internal state variables that should remain hidden\n") + f.write("3. **Consider advanced/developer mode**: For parameters needed in benchmark/testing scenarios\n\n") + f.write("## Usage\n\n") + f.write("This analysis was generated using:\n") + f.write("```bash\n") + f.write("python analyze_internal_only_parameters.py\n") + f.write("```\n\n") + f.write(f"Generated on: {self._get_timestamp()}\n") + + def _get_timestamp(self) -> str: + """Get current timestamp for report.""" + from datetime import datetime + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + +def main(): + """Main entry point.""" + analyzer = InternalOnlyAnalyzer() + analyzer.generate_table() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/supy/data_model/initial_states/internal_only_parameters_report.md b/src/supy/data_model/initial_states/internal_only_parameters_report.md new file mode 100644 index 000000000..abaa1b327 --- /dev/null +++ b/src/supy/data_model/initial_states/internal_only_parameters_report.md @@ -0,0 +1,100 @@ +# Internal-Only Parameters Analysis Report + +Generated for GitHub Issue #555: Identify complete list of internal-only parameters + +## Summary Statistics + +- **Total internal-only parameters**: 24 +- **Parameters in sample_config.yml**: 11 +- **Parameters NOT in sample_config.yml**: 13 +- **Files analyzed**: 2 + +## Parameters by File + +### model.py (2 parameters) + +| Parameter | In sample_config.yml | Description | Unit | +|-----------|---------------------|-------------|------| +| `diagnose` | Yes | Level of diagnostic output (0=none, 1=basic, 2=detailed) | | +| `kdownzen` | No | Use zenithal correction for downward shortwave radiation | | + +### state.py (22 parameters) + +| Parameter | In sample_config.yml | Description | Unit | +|-----------|---------------------|-------------|------| +| `cdd_accum` | No | Current day | degC | +| `cdd_daily` | No | Previous day | degC | +| `days_since_rain` | No | Days since rain for irrigation calculations [days] | days | +| `days_since_rain_accum` | No | Days since rain counter (current) [days] | days | +| `dqndt` | Yes | Change in net radiation | | +| `dqnsdt` | Yes | Change in net shortwave radiation | | +| `dt_since_start` | Yes | Time since start | | +| `hdd_accum` | No | Current day | degC | +| `hdd_daily` | No | Previous day | degC | +| `lenday_id` | Yes | Length of the day ID | | +| `precip_accum` | No | Current day | mm | +| `precip_daily_total` | No | Previous day | mm | +| `qn_av` | Yes | Average net radiation | | +| `qn_s_av` | Yes | Average net shortwave radiation | | +| `snowfallcum` | Yes | Cumulative snowfall | | +| `temp_5day_accum` | No | 5-day running mean temperature accumulation [degC] | degC | +| `temp_5day_mean` | No | Previous 5-day running mean temperature for QF calculations [degC] | degC | +| `temp_accum` | No | Current day | degC | +| `temp_daily_mean` | No | Previous day | degC | +| `tmax_id` | Yes | Maximum temperature ID | | +| `tmin_id` | Yes | Minimum temperature ID | | +| `tstep_prev` | Yes | Previous time step | | + +## Parameters Present in sample_config.yml + +These parameters are marked as internal-only but appear in the sample configuration: + +| Parameter | File | Location in sample_config.yml | +|-----------|------|-------------------------------| +| `diagnose` | model.py:667 | diagnose | +| `dqndt` | state.py:815 | dqndt | +| `dqnsdt` | state.py:820 | dqnsdt | +| `dt_since_start` | state.py:825 | dt_since_start | +| `lenday_id` | state.py:830 | lenday_id | +| `qn_av` | state.py:835 | qn_av | +| `qn_s_av` | state.py:843 | qn_s_av | +| `snowfallcum` | state.py:877 | snowfallcum | +| `tmax_id` | state.py:856 | tmax_id | +| `tmin_id` | state.py:864 | tmin_id | +| `tstep_prev` | state.py:872 | tstep_prev | + + +## Parameters NOT in sample_config.yml + +These parameters are internal-only and do not appear in the sample configuration: + +| Parameter | File | Description | +|-----------|------|-------------| +| `cdd_accum` | state.py:621 | Current day | +| `cdd_daily` | state.py:677 | Previous day | +| `days_since_rain` | state.py:713 | Days since rain for irrigation calculations [days] | +| `days_since_rain_accum` | state.py:657 | Days since rain counter (current) [days] | +| `hdd_accum` | state.py:612 | Current day | +| `hdd_daily` | state.py:668 | Previous day | +| `kdownzen` | model.py:657 | Use zenithal correction for downward shortwave radiation | +| `precip_accum` | state.py:648 | Current day | +| `precip_daily_total` | state.py:704 | Previous day | +| `temp_5day_accum` | state.py:639 | 5-day running mean temperature accumulation [degC] | +| `temp_5day_mean` | state.py:695 | Previous 5-day running mean temperature for QF calculations [degC] | +| `temp_accum` | state.py:630 | Current day | +| `temp_daily_mean` | state.py:686 | Previous day | + +## Recommendations + +1. **Parameters in sample_config.yml**: Review whether these should remain internal-only or be exposed to users +2. **Parameters NOT in sample_config.yml**: These are likely true internal state variables that should remain hidden +3. **Consider advanced/developer mode**: For parameters needed in benchmark/testing scenarios + +## Usage + +This analysis was generated using: +```bash +python analyze_internal_only_parameters.py +``` + +Generated on: 2025-07-25 14:38:54 diff --git a/src/supy/data_model/initial_states/internal_only_parameters_table.csv b/src/supy/data_model/initial_states/internal_only_parameters_table.csv new file mode 100644 index 000000000..b43ff4660 --- /dev/null +++ b/src/supy/data_model/initial_states/internal_only_parameters_table.csv @@ -0,0 +1,25 @@ +param_name,file_location,in_sample_config,sample_config_location,description,unit +cdd_accum,state.py:621,No,,Current day,degC +cdd_daily,state.py:677,No,,Previous day,degC +days_since_rain,state.py:713,No,,Days since rain for irrigation calculations [days],days +days_since_rain_accum,state.py:657,No,,Days since rain counter (current) [days],days +diagnose,model.py:667,Yes,diagnose,"Level of diagnostic output (0=none, 1=basic, 2=detailed)", +dqndt,state.py:815,Yes,dqndt,Change in net radiation, +dqnsdt,state.py:820,Yes,dqnsdt,Change in net shortwave radiation, +dt_since_start,state.py:825,Yes,dt_since_start,Time since start, +hdd_accum,state.py:612,No,,Current day,degC +hdd_daily,state.py:668,No,,Previous day,degC +kdownzen,model.py:657,No,,Use zenithal correction for downward shortwave radiation, +lenday_id,state.py:830,Yes,lenday_id,Length of the day ID, +precip_accum,state.py:648,No,,Current day,mm +precip_daily_total,state.py:704,No,,Previous day,mm +qn_av,state.py:835,Yes,qn_av,Average net radiation, +qn_s_av,state.py:843,Yes,qn_s_av,Average net shortwave radiation, +snowfallcum,state.py:877,Yes,snowfallcum,Cumulative snowfall, +temp_5day_accum,state.py:639,No,,5-day running mean temperature accumulation [degC],degC +temp_5day_mean,state.py:695,No,,Previous 5-day running mean temperature for QF calculations [degC],degC +temp_accum,state.py:630,No,,Current day,degC +temp_daily_mean,state.py:686,No,,Previous day,degC +tmax_id,state.py:856,Yes,tmax_id,Maximum temperature ID, +tmin_id,state.py:864,Yes,tmin_id,Minimum temperature ID, +tstep_prev,state.py:872,Yes,tstep_prev,Previous time step, From 849b9697deeaaf71b6ca3136554eb83efe81d808 Mon Sep 17 00:00:00 2001 From: Silvia Rognone Date: Fri, 25 Jul 2025 14:41:03 +0100 Subject: [PATCH 2/6] Remove description column from CSV table per user request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Updated analyze_internal_only_parameters.py to exclude description from CSV - Regenerated internal_only_parameters_table.csv with cleaner format - Kept descriptions in markdown report for full analysis - CSV now has: param_name, file_location, in_sample_config, sample_config_location, unit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../analyze_internal_only_parameters.py | 5 +- .../internal_only_parameters_report.md | 2 +- .../internal_only_parameters_table.csv | 50 +++++++++---------- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py index a1e891697..8b75c2950 100644 --- a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py +++ b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py @@ -201,14 +201,15 @@ def _generate_csv_table(self) -> None: 'file_location', 'in_sample_config', 'sample_config_location', - 'description', 'unit' ] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for param in sorted(self.internal_params, key=lambda x: x['param_name']): - writer.writerow(param) + # Create a copy without description for CSV + csv_param = {k: v for k, v in param.items() if k != 'description'} + writer.writerow(csv_param) def _generate_markdown_report(self) -> None: """Generate markdown report with analysis summary.""" diff --git a/src/supy/data_model/initial_states/internal_only_parameters_report.md b/src/supy/data_model/initial_states/internal_only_parameters_report.md index abaa1b327..6fdf76f77 100644 --- a/src/supy/data_model/initial_states/internal_only_parameters_report.md +++ b/src/supy/data_model/initial_states/internal_only_parameters_report.md @@ -97,4 +97,4 @@ This analysis was generated using: python analyze_internal_only_parameters.py ``` -Generated on: 2025-07-25 14:38:54 +Generated on: 2025-07-25 14:40:51 diff --git a/src/supy/data_model/initial_states/internal_only_parameters_table.csv b/src/supy/data_model/initial_states/internal_only_parameters_table.csv index b43ff4660..7b3fa6cee 100644 --- a/src/supy/data_model/initial_states/internal_only_parameters_table.csv +++ b/src/supy/data_model/initial_states/internal_only_parameters_table.csv @@ -1,25 +1,25 @@ -param_name,file_location,in_sample_config,sample_config_location,description,unit -cdd_accum,state.py:621,No,,Current day,degC -cdd_daily,state.py:677,No,,Previous day,degC -days_since_rain,state.py:713,No,,Days since rain for irrigation calculations [days],days -days_since_rain_accum,state.py:657,No,,Days since rain counter (current) [days],days -diagnose,model.py:667,Yes,diagnose,"Level of diagnostic output (0=none, 1=basic, 2=detailed)", -dqndt,state.py:815,Yes,dqndt,Change in net radiation, -dqnsdt,state.py:820,Yes,dqnsdt,Change in net shortwave radiation, -dt_since_start,state.py:825,Yes,dt_since_start,Time since start, -hdd_accum,state.py:612,No,,Current day,degC -hdd_daily,state.py:668,No,,Previous day,degC -kdownzen,model.py:657,No,,Use zenithal correction for downward shortwave radiation, -lenday_id,state.py:830,Yes,lenday_id,Length of the day ID, -precip_accum,state.py:648,No,,Current day,mm -precip_daily_total,state.py:704,No,,Previous day,mm -qn_av,state.py:835,Yes,qn_av,Average net radiation, -qn_s_av,state.py:843,Yes,qn_s_av,Average net shortwave radiation, -snowfallcum,state.py:877,Yes,snowfallcum,Cumulative snowfall, -temp_5day_accum,state.py:639,No,,5-day running mean temperature accumulation [degC],degC -temp_5day_mean,state.py:695,No,,Previous 5-day running mean temperature for QF calculations [degC],degC -temp_accum,state.py:630,No,,Current day,degC -temp_daily_mean,state.py:686,No,,Previous day,degC -tmax_id,state.py:856,Yes,tmax_id,Maximum temperature ID, -tmin_id,state.py:864,Yes,tmin_id,Minimum temperature ID, -tstep_prev,state.py:872,Yes,tstep_prev,Previous time step, +param_name,file_location,in_sample_config,sample_config_location,unit +cdd_accum,state.py:621,No,,degC +cdd_daily,state.py:677,No,,degC +days_since_rain,state.py:713,No,,days +days_since_rain_accum,state.py:657,No,,days +diagnose,model.py:667,Yes,diagnose, +dqndt,state.py:815,Yes,dqndt, +dqnsdt,state.py:820,Yes,dqnsdt, +dt_since_start,state.py:825,Yes,dt_since_start, +hdd_accum,state.py:612,No,,degC +hdd_daily,state.py:668,No,,degC +kdownzen,model.py:657,No,, +lenday_id,state.py:830,Yes,lenday_id, +precip_accum,state.py:648,No,,mm +precip_daily_total,state.py:704,No,,mm +qn_av,state.py:835,Yes,qn_av, +qn_s_av,state.py:843,Yes,qn_s_av, +snowfallcum,state.py:877,Yes,snowfallcum, +temp_5day_accum,state.py:639,No,,degC +temp_5day_mean,state.py:695,No,,degC +temp_accum,state.py:630,No,,degC +temp_daily_mean,state.py:686,No,,degC +tmax_id,state.py:856,Yes,tmax_id, +tmin_id,state.py:864,Yes,tmin_id, +tstep_prev,state.py:872,Yes,tstep_prev, From f4639c721d8af77dfefbd11c43f3c865666ac253 Mon Sep 17 00:00:00 2001 From: Silvia Rognone Date: Fri, 25 Jul 2025 14:43:25 +0100 Subject: [PATCH 3/6] Remove description column from markdown report as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Updated script to remove descriptions from all tables in markdown report - Simplified format: Parameter | In sample_config.yml | Unit - Cleaner, more focused output for both CSV and markdown - Maintains all essential information without verbose descriptions 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../analyze_internal_only_parameters.py | 18 ++-- .../internal_only_parameters_report.md | 88 +++++++++---------- 2 files changed, 50 insertions(+), 56 deletions(-) diff --git a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py index 8b75c2950..0282ff88e 100644 --- a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py +++ b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py @@ -246,14 +246,11 @@ def _generate_markdown_report(self) -> None: for file_name, params in sorted(by_file.items()): f.write(f"### {file_name} ({len(params)} parameters)\n\n") - f.write("| Parameter | In sample_config.yml | Description | Unit |\n") - f.write("|-----------|---------------------|-------------|------|\n") + f.write("| Parameter | In sample_config.yml | Unit |\n") + f.write("|-----------|---------------------|------|\n") for param in sorted(params, key=lambda x: x['param_name']): - desc = param['description'].replace('\n', ' ').replace('|', '\\|')[:100] - if len(param['description']) > 100: - desc += "..." - f.write(f"| `{param['param_name']}` | {param['in_sample_config']} | {desc} | {param['unit']} |\n") + f.write(f"| `{param['param_name']}` | {param['in_sample_config']} | {param['unit']} |\n") f.write("\n") @@ -277,13 +274,10 @@ def _generate_markdown_report(self) -> None: absent_params = [p for p in self.internal_params if p['in_sample_config'] == 'No'] if absent_params: - f.write("| Parameter | File | Description |\n") - f.write("|-----------|------|-------------|\n") + f.write("| Parameter | File | Unit |\n") + f.write("|-----------|------|------|\n") for param in sorted(absent_params, key=lambda x: x['param_name']): - desc = param['description'].replace('\n', ' ').replace('|', '\\|')[:80] - if len(param['description']) > 80: - desc += "..." - f.write(f"| `{param['param_name']}` | {param['file_location']} | {desc} |\n") + f.write(f"| `{param['param_name']}` | {param['file_location']} | {param['unit']} |\n") else: f.write("*None found*\n") diff --git a/src/supy/data_model/initial_states/internal_only_parameters_report.md b/src/supy/data_model/initial_states/internal_only_parameters_report.md index 6fdf76f77..e1cc23111 100644 --- a/src/supy/data_model/initial_states/internal_only_parameters_report.md +++ b/src/supy/data_model/initial_states/internal_only_parameters_report.md @@ -13,37 +13,37 @@ Generated for GitHub Issue #555: Identify complete list of internal-only paramet ### model.py (2 parameters) -| Parameter | In sample_config.yml | Description | Unit | -|-----------|---------------------|-------------|------| -| `diagnose` | Yes | Level of diagnostic output (0=none, 1=basic, 2=detailed) | | -| `kdownzen` | No | Use zenithal correction for downward shortwave radiation | | +| Parameter | In sample_config.yml | Unit | +|-----------|---------------------|------| +| `diagnose` | Yes | | +| `kdownzen` | No | | ### state.py (22 parameters) -| Parameter | In sample_config.yml | Description | Unit | -|-----------|---------------------|-------------|------| -| `cdd_accum` | No | Current day | degC | -| `cdd_daily` | No | Previous day | degC | -| `days_since_rain` | No | Days since rain for irrigation calculations [days] | days | -| `days_since_rain_accum` | No | Days since rain counter (current) [days] | days | -| `dqndt` | Yes | Change in net radiation | | -| `dqnsdt` | Yes | Change in net shortwave radiation | | -| `dt_since_start` | Yes | Time since start | | -| `hdd_accum` | No | Current day | degC | -| `hdd_daily` | No | Previous day | degC | -| `lenday_id` | Yes | Length of the day ID | | -| `precip_accum` | No | Current day | mm | -| `precip_daily_total` | No | Previous day | mm | -| `qn_av` | Yes | Average net radiation | | -| `qn_s_av` | Yes | Average net shortwave radiation | | -| `snowfallcum` | Yes | Cumulative snowfall | | -| `temp_5day_accum` | No | 5-day running mean temperature accumulation [degC] | degC | -| `temp_5day_mean` | No | Previous 5-day running mean temperature for QF calculations [degC] | degC | -| `temp_accum` | No | Current day | degC | -| `temp_daily_mean` | No | Previous day | degC | -| `tmax_id` | Yes | Maximum temperature ID | | -| `tmin_id` | Yes | Minimum temperature ID | | -| `tstep_prev` | Yes | Previous time step | | +| Parameter | In sample_config.yml | Unit | +|-----------|---------------------|------| +| `cdd_accum` | No | degC | +| `cdd_daily` | No | degC | +| `days_since_rain` | No | days | +| `days_since_rain_accum` | No | days | +| `dqndt` | Yes | | +| `dqnsdt` | Yes | | +| `dt_since_start` | Yes | | +| `hdd_accum` | No | degC | +| `hdd_daily` | No | degC | +| `lenday_id` | Yes | | +| `precip_accum` | No | mm | +| `precip_daily_total` | No | mm | +| `qn_av` | Yes | | +| `qn_s_av` | Yes | | +| `snowfallcum` | Yes | | +| `temp_5day_accum` | No | degC | +| `temp_5day_mean` | No | degC | +| `temp_accum` | No | degC | +| `temp_daily_mean` | No | degC | +| `tmax_id` | Yes | | +| `tmin_id` | Yes | | +| `tstep_prev` | Yes | | ## Parameters Present in sample_config.yml @@ -68,21 +68,21 @@ These parameters are marked as internal-only but appear in the sample configurat These parameters are internal-only and do not appear in the sample configuration: -| Parameter | File | Description | -|-----------|------|-------------| -| `cdd_accum` | state.py:621 | Current day | -| `cdd_daily` | state.py:677 | Previous day | -| `days_since_rain` | state.py:713 | Days since rain for irrigation calculations [days] | -| `days_since_rain_accum` | state.py:657 | Days since rain counter (current) [days] | -| `hdd_accum` | state.py:612 | Current day | -| `hdd_daily` | state.py:668 | Previous day | -| `kdownzen` | model.py:657 | Use zenithal correction for downward shortwave radiation | -| `precip_accum` | state.py:648 | Current day | -| `precip_daily_total` | state.py:704 | Previous day | -| `temp_5day_accum` | state.py:639 | 5-day running mean temperature accumulation [degC] | -| `temp_5day_mean` | state.py:695 | Previous 5-day running mean temperature for QF calculations [degC] | -| `temp_accum` | state.py:630 | Current day | -| `temp_daily_mean` | state.py:686 | Previous day | +| Parameter | File | Unit | +|-----------|------|------| +| `cdd_accum` | state.py:621 | degC | +| `cdd_daily` | state.py:677 | degC | +| `days_since_rain` | state.py:713 | days | +| `days_since_rain_accum` | state.py:657 | days | +| `hdd_accum` | state.py:612 | degC | +| `hdd_daily` | state.py:668 | degC | +| `kdownzen` | model.py:657 | | +| `precip_accum` | state.py:648 | mm | +| `precip_daily_total` | state.py:704 | mm | +| `temp_5day_accum` | state.py:639 | degC | +| `temp_5day_mean` | state.py:695 | degC | +| `temp_accum` | state.py:630 | degC | +| `temp_daily_mean` | state.py:686 | degC | ## Recommendations @@ -97,4 +97,4 @@ This analysis was generated using: python analyze_internal_only_parameters.py ``` -Generated on: 2025-07-25 14:40:51 +Generated on: 2025-07-25 14:43:05 From 26e63c606f5efa8a3258e52f81c07c58df0247aa Mon Sep 17 00:00:00 2001 From: Silvia Rognone Date: Fri, 25 Jul 2025 14:44:58 +0100 Subject: [PATCH 4/6] Simplify markdown report: remove unit and sample_config location columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Removed unit column from all markdown tables - Removed "Location in sample_config.yml" column from present parameters table - Simplified format: Parameter | In sample_config.yml (main tables) - Simplified format: Parameter | File (detailed tables) - Much cleaner, focused output showing only essential information 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../analyze_internal_only_parameters.py | 18 +-- .../internal_only_parameters_report.md | 114 +++++++++--------- 2 files changed, 66 insertions(+), 66 deletions(-) diff --git a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py index 0282ff88e..834f030c4 100644 --- a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py +++ b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py @@ -246,11 +246,11 @@ def _generate_markdown_report(self) -> None: for file_name, params in sorted(by_file.items()): f.write(f"### {file_name} ({len(params)} parameters)\n\n") - f.write("| Parameter | In sample_config.yml | Unit |\n") - f.write("|-----------|---------------------|------|\n") + f.write("| Parameter | In sample_config.yml |\n") + f.write("|-----------|---------------------|\n") for param in sorted(params, key=lambda x: x['param_name']): - f.write(f"| `{param['param_name']}` | {param['in_sample_config']} | {param['unit']} |\n") + f.write(f"| `{param['param_name']}` | {param['in_sample_config']} |\n") f.write("\n") @@ -262,10 +262,10 @@ def _generate_markdown_report(self) -> None: present_params = [p for p in self.internal_params if p['in_sample_config'] == 'Yes'] if present_params: - f.write("| Parameter | File | Location in sample_config.yml |\n") - f.write("|-----------|------|-------------------------------|\n") + f.write("| Parameter | File |\n") + f.write("|-----------|------|\n") for param in sorted(present_params, key=lambda x: x['param_name']): - f.write(f"| `{param['param_name']}` | {param['file_location']} | {param['sample_config_location']} |\n") + f.write(f"| `{param['param_name']}` | {param['file_location']} |\n") else: f.write("*None found*\n") @@ -274,10 +274,10 @@ def _generate_markdown_report(self) -> None: absent_params = [p for p in self.internal_params if p['in_sample_config'] == 'No'] if absent_params: - f.write("| Parameter | File | Unit |\n") - f.write("|-----------|------|------|\n") + f.write("| Parameter | File |\n") + f.write("|-----------|------|\n") for param in sorted(absent_params, key=lambda x: x['param_name']): - f.write(f"| `{param['param_name']}` | {param['file_location']} | {param['unit']} |\n") + f.write(f"| `{param['param_name']}` | {param['file_location']} |\n") else: f.write("*None found*\n") diff --git a/src/supy/data_model/initial_states/internal_only_parameters_report.md b/src/supy/data_model/initial_states/internal_only_parameters_report.md index e1cc23111..e70694bb9 100644 --- a/src/supy/data_model/initial_states/internal_only_parameters_report.md +++ b/src/supy/data_model/initial_states/internal_only_parameters_report.md @@ -13,76 +13,76 @@ Generated for GitHub Issue #555: Identify complete list of internal-only paramet ### model.py (2 parameters) -| Parameter | In sample_config.yml | Unit | -|-----------|---------------------|------| -| `diagnose` | Yes | | -| `kdownzen` | No | | +| Parameter | In sample_config.yml | +|-----------|---------------------| +| `diagnose` | Yes | +| `kdownzen` | No | ### state.py (22 parameters) -| Parameter | In sample_config.yml | Unit | -|-----------|---------------------|------| -| `cdd_accum` | No | degC | -| `cdd_daily` | No | degC | -| `days_since_rain` | No | days | -| `days_since_rain_accum` | No | days | -| `dqndt` | Yes | | -| `dqnsdt` | Yes | | -| `dt_since_start` | Yes | | -| `hdd_accum` | No | degC | -| `hdd_daily` | No | degC | -| `lenday_id` | Yes | | -| `precip_accum` | No | mm | -| `precip_daily_total` | No | mm | -| `qn_av` | Yes | | -| `qn_s_av` | Yes | | -| `snowfallcum` | Yes | | -| `temp_5day_accum` | No | degC | -| `temp_5day_mean` | No | degC | -| `temp_accum` | No | degC | -| `temp_daily_mean` | No | degC | -| `tmax_id` | Yes | | -| `tmin_id` | Yes | | -| `tstep_prev` | Yes | | +| Parameter | In sample_config.yml | +|-----------|---------------------| +| `cdd_accum` | No | +| `cdd_daily` | No | +| `days_since_rain` | No | +| `days_since_rain_accum` | No | +| `dqndt` | Yes | +| `dqnsdt` | Yes | +| `dt_since_start` | Yes | +| `hdd_accum` | No | +| `hdd_daily` | No | +| `lenday_id` | Yes | +| `precip_accum` | No | +| `precip_daily_total` | No | +| `qn_av` | Yes | +| `qn_s_av` | Yes | +| `snowfallcum` | Yes | +| `temp_5day_accum` | No | +| `temp_5day_mean` | No | +| `temp_accum` | No | +| `temp_daily_mean` | No | +| `tmax_id` | Yes | +| `tmin_id` | Yes | +| `tstep_prev` | Yes | ## Parameters Present in sample_config.yml These parameters are marked as internal-only but appear in the sample configuration: -| Parameter | File | Location in sample_config.yml | -|-----------|------|-------------------------------| -| `diagnose` | model.py:667 | diagnose | -| `dqndt` | state.py:815 | dqndt | -| `dqnsdt` | state.py:820 | dqnsdt | -| `dt_since_start` | state.py:825 | dt_since_start | -| `lenday_id` | state.py:830 | lenday_id | -| `qn_av` | state.py:835 | qn_av | -| `qn_s_av` | state.py:843 | qn_s_av | -| `snowfallcum` | state.py:877 | snowfallcum | -| `tmax_id` | state.py:856 | tmax_id | -| `tmin_id` | state.py:864 | tmin_id | -| `tstep_prev` | state.py:872 | tstep_prev | +| Parameter | File | +|-----------|------| +| `diagnose` | model.py:667 | +| `dqndt` | state.py:815 | +| `dqnsdt` | state.py:820 | +| `dt_since_start` | state.py:825 | +| `lenday_id` | state.py:830 | +| `qn_av` | state.py:835 | +| `qn_s_av` | state.py:843 | +| `snowfallcum` | state.py:877 | +| `tmax_id` | state.py:856 | +| `tmin_id` | state.py:864 | +| `tstep_prev` | state.py:872 | ## Parameters NOT in sample_config.yml These parameters are internal-only and do not appear in the sample configuration: -| Parameter | File | Unit | -|-----------|------|------| -| `cdd_accum` | state.py:621 | degC | -| `cdd_daily` | state.py:677 | degC | -| `days_since_rain` | state.py:713 | days | -| `days_since_rain_accum` | state.py:657 | days | -| `hdd_accum` | state.py:612 | degC | -| `hdd_daily` | state.py:668 | degC | -| `kdownzen` | model.py:657 | | -| `precip_accum` | state.py:648 | mm | -| `precip_daily_total` | state.py:704 | mm | -| `temp_5day_accum` | state.py:639 | degC | -| `temp_5day_mean` | state.py:695 | degC | -| `temp_accum` | state.py:630 | degC | -| `temp_daily_mean` | state.py:686 | degC | +| Parameter | File | +|-----------|------| +| `cdd_accum` | state.py:621 | +| `cdd_daily` | state.py:677 | +| `days_since_rain` | state.py:713 | +| `days_since_rain_accum` | state.py:657 | +| `hdd_accum` | state.py:612 | +| `hdd_daily` | state.py:668 | +| `kdownzen` | model.py:657 | +| `precip_accum` | state.py:648 | +| `precip_daily_total` | state.py:704 | +| `temp_5day_accum` | state.py:639 | +| `temp_5day_mean` | state.py:695 | +| `temp_accum` | state.py:630 | +| `temp_daily_mean` | state.py:686 | ## Recommendations @@ -97,4 +97,4 @@ This analysis was generated using: python analyze_internal_only_parameters.py ``` -Generated on: 2025-07-25 14:43:05 +Generated on: 2025-07-25 14:44:46 From df8c1b46c5343fab72ed22c26e00dd93e449e506 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 25 Jul 2025 14:10:28 +0000 Subject: [PATCH 5/6] style: auto-format code with ruff and fprettify Co-authored-by: dayantur <71443948+dayantur@users.noreply.github.com> --- .claude/scripts/changelog_helper.py | 262 +++++++++--------- .../analyze_internal_only_parameters.py | 225 ++++++++------- 2 files changed, 264 insertions(+), 223 deletions(-) diff --git a/.claude/scripts/changelog_helper.py b/.claude/scripts/changelog_helper.py index bb953093f..e11ab8178 100755 --- a/.claude/scripts/changelog_helper.py +++ b/.claude/scripts/changelog_helper.py @@ -21,204 +21,210 @@ class ChangelogRestructurer: - def __init__(self, changelog_path: str = 'CHANGELOG.md'): + def __init__(self, changelog_path: str = "CHANGELOG.md"): self.changelog_path = Path(changelog_path) # Support both old format (- DATE:) and new format (### DATE) - self.date_pattern = re.compile(r'^(?:- |### )(\d{1,2} \w+ \d{4})(?::\s*)?$') - self.entry_pattern = re.compile(r'^(?: )?- \[(\w+)\] (.+)$') - + self.date_pattern = re.compile(r"^(?:- |### )(\d{1,2} \w+ \d{4})(?::\s*)?$") + self.entry_pattern = re.compile(r"^(?: )?- \[(\w+)\] (.+)$") + def parse_to_json(self) -> Dict: """Parse CHANGELOG.md into structured JSON format""" if not self.changelog_path.exists(): raise FileNotFoundError(f"{self.changelog_path} not found") - + content = self.changelog_path.read_text() - lines = content.split('\n') - + lines = content.split("\n") + # Structure to hold parsed data data = { - 'header': [], - 'entries': {}, # date_str -> list of entries - 'raw_entries': [] # For debugging + "header": [], + "entries": {}, # date_str -> list of entries + "raw_entries": [], # For debugging } - + current_date = None current_entries = [] in_header = True - + i = 0 while i < len(lines): line = lines[i] - + # Check for date line date_match = self.date_pattern.match(line) if date_match: # Save previous date's entries if current_date and current_entries: - if current_date not in data['entries']: - data['entries'][current_date] = [] - data['entries'][current_date].extend(current_entries) - data['raw_entries'].append({ - 'date': current_date, - 'entries': current_entries.copy() + if current_date not in data["entries"]: + data["entries"][current_date] = [] + data["entries"][current_date].extend(current_entries) + data["raw_entries"].append({ + "date": current_date, + "entries": current_entries.copy(), }) - + # Start new date current_date = date_match.group(1) current_entries = [] in_header = False - + elif in_header: - data['header'].append(line) - + data["header"].append(line) + elif self.entry_pattern.match(line): # This is an entry line entry_match = self.entry_pattern.match(line) if entry_match: category = entry_match.group(1) description = entry_match.group(2) - + # Start new entry current_entry = { - 'category': category, - 'description': description, - 'details': [] + "category": category, + "description": description, + "details": [], } - + # Look ahead for detail lines (starting with ' -' or ' -') j = i + 1 - while j < len(lines) and (lines[j].startswith(' -') or lines[j].startswith(' -')): + while j < len(lines) and ( + lines[j].startswith(" -") or lines[j].startswith(" -") + ): # Skip if it's another category entry if self.entry_pattern.match(lines[j]): break detail = lines[j].strip() - if detail.startswith('- '): + if detail.startswith("- "): detail = detail[2:] # Remove '- ' - current_entry['details'].append(detail) + current_entry["details"].append(detail) j += 1 - + current_entries.append(current_entry) i = j - 1 # Skip the detail lines we just processed - + i += 1 - + # Don't forget the last entry if current_date and current_entries: - if current_date not in data['entries']: - data['entries'][current_date] = [] - data['entries'][current_date].extend(current_entries) - data['raw_entries'].append({ - 'date': current_date, - 'entries': current_entries + if current_date not in data["entries"]: + data["entries"][current_date] = [] + data["entries"][current_date].extend(current_entries) + data["raw_entries"].append({ + "date": current_date, + "entries": current_entries, }) - + return data - + def clean_entries(self, data: Dict) -> Dict: """Clean duplicates and merge similar entries""" cleaned_data = data.copy() cleaned_entries = {} - + # Track seen entries to remove duplicates seen_entries = set() - - for date_str, entries in data['entries'].items(): + + for date_str, entries in data["entries"].items(): cleaned_date_entries = [] - + for entry in entries: # Create a hash of the entry to detect duplicates entry_hash = hashlib.md5( json.dumps(entry, sort_keys=True).encode() ).hexdigest() - + if entry_hash not in seen_entries: seen_entries.add(entry_hash) cleaned_date_entries.append(entry) - + if cleaned_date_entries: cleaned_entries[date_str] = cleaned_date_entries - - cleaned_data['entries'] = cleaned_entries + + cleaned_data["entries"] = cleaned_entries return cleaned_data - + def sort_entries(self, data: Dict) -> List[Tuple[str, List]]: """Sort entries by date in reverse chronological order""" date_entries = [] - - for date_str, entries in data['entries'].items(): + + for date_str, entries in data["entries"].items(): try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") date_entries.append((date_obj, date_str, entries)) except ValueError: print(f"Warning: Could not parse date '{date_str}'") - + # Sort by date object in reverse order date_entries.sort(key=lambda x: x[0], reverse=True) - + # Return sorted list of (date_str, entries) tuples return [(date_str, entries) for _, date_str, entries in date_entries] - + def format_github_links(self, text: str) -> str: """Convert #xxx and PR #xxx to GitHub links""" # Check if already formatted (contains markdown links) - if '](https://github.com/' in text: + if "](https://github.com/" in text: return text - + # Pattern for PR mentions - pr_pattern = r'PR #(\d+)' - text = re.sub(pr_pattern, r'[PR #\1](https://github.com/UMEP-dev/SUEWS/pull/\1)', text) - + pr_pattern = r"PR #(\d+)" + text = re.sub( + pr_pattern, r"[PR #\1](https://github.com/UMEP-dev/SUEWS/pull/\1)", text + ) + # Pattern for issue mentions (but not PR mentions and not already in brackets) - issue_pattern = r'(? List[str]: """Generate table of contents by year""" years = set() for date_str, _ in sorted_entries: try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") years.add(date_obj.year) except ValueError: pass - + toc_lines = ["## Table of Contents", ""] for year in sorted(years, reverse=True): toc_lines.append(f"- [{year}](#{year})") - + return toc_lines - + def generate_stats_table(self, sorted_entries: List[Tuple[str, List]]) -> List[str]: """Generate annual statistics table""" # Collect stats by year stats_by_year = defaultdict(lambda: defaultdict(int)) - + for date_str, entries in sorted_entries: try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") year = date_obj.year - + for entry in entries: - category = entry['category'] + category = entry["category"] stats_by_year[year][category] += 1 - stats_by_year[year]['total'] += 1 - + stats_by_year[year]["total"] += 1 + except ValueError: pass - + # Generate table table_lines = [ "", "## Annual Statistics", "", "| Year | Features | Bugfixes | Changes | Maintenance | Docs | Total |", - "|------|----------|----------|---------|-------------|------|-------|" + "|------|----------|----------|---------|-------------|------|-------|", ] - - categories = ['feature', 'bugfix', 'change', 'maintenance', 'doc'] - + + categories = ["feature", "bugfix", "change", "maintenance", "doc"] + for year in sorted(stats_by_year.keys(), reverse=True): stats = stats_by_year[year] row = f"| {year} |" @@ -227,141 +233,147 @@ def generate_stats_table(self, sorted_entries: List[Tuple[str, List]]) -> List[s row += f" {count} |" row += f" {stats['total']} |" table_lines.append(row) - + return table_lines - def format_to_markdown(self, data: Dict, sorted_entries: List[Tuple[str, List]]) -> str: + def format_to_markdown( + self, data: Dict, sorted_entries: List[Tuple[str, List]] + ) -> str: """Format the cleaned and sorted data back to markdown""" lines = [] - + # Add header (but skip any existing TOC or stats tables) - for line in data['header']: + for line in data["header"]: # Skip lines that are part of TOC or stats table - if line.startswith('## Table of Contents') or line.startswith('## Annual Statistics'): + if line.startswith("## Table of Contents") or line.startswith( + "## Annual Statistics" + ): break - if line.startswith('| Year |') or line.startswith('|------|'): + if line.startswith("| Year |") or line.startswith("|------|"): continue - if line.startswith('- [20') and '](#20' in line: # TOC entry + if line.startswith("- [20") and "](#20" in line: # TOC entry continue lines.append(line) - + # Remove trailing empty lines from header while lines and lines[-1] == "": lines.pop() - + lines.append("") # Add single blank line after header - + # Add TOC toc = self.generate_toc(sorted_entries) lines.extend(toc) - + # Add stats table stats_table = self.generate_stats_table(sorted_entries) lines.extend(stats_table) lines.append("") # Extra blank line - + # Group entries by year entries_by_year = defaultdict(list) for date_str, entries in sorted_entries: try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") year = date_obj.year entries_by_year[year].append((date_str, entries)) except ValueError: # If date parsing fails, put in "Unknown" year entries_by_year[0].append((date_str, entries)) - + # Add entries grouped by year for year in sorted(entries_by_year.keys(), reverse=True): if year == 0: lines.append("\n## Unknown Year") else: lines.append(f"\n## {year}") - + lines.append("") - + for date_str, entries in entries_by_year[year]: lines.append(f"### {date_str}") - + # Group entries by category for better organization by_category = {} for entry in entries: - cat = entry['category'] + cat = entry["category"] if cat not in by_category: by_category[cat] = [] by_category[cat].append(entry) - + # Output in preferred category order - category_order = ['feature', 'bugfix', 'change', 'maintenance', 'doc'] - + category_order = ["feature", "bugfix", "change", "maintenance", "doc"] + for category in category_order: if category in by_category: for entry in by_category[category]: # Format description with GitHub links - description = self.format_github_links(entry['description']) + description = self.format_github_links(entry["description"]) lines.append(f"- [{entry['category']}] {description}") - + # Format details with GitHub links - for detail in entry['details']: + for detail in entry["details"]: formatted_detail = self.format_github_links(detail) lines.append(f" - {formatted_detail}") - + lines.append("") # Blank line after each date - + # Remove trailing blank lines while lines and lines[-1] == "": lines.pop() - - return '\n'.join(lines) - - def save_json_debug(self, data: Dict, filename: str = 'changelog_debug.json'): + + return "\n".join(lines) + + def save_json_debug(self, data: Dict, filename: str = "changelog_debug.json"): """Save parsed data to JSON for debugging""" - with open(filename, 'w') as f: + with open(filename, "w") as f: # Convert datetime objects to strings for JSON serialization json_data = data.copy() json.dump(json_data, f, indent=2, default=str) print(f"Debug data saved to {filename}") - + def restructure(self): """Main method to restructure the changelog""" print("Step 1: Parsing CHANGELOG.md to JSON...") data = self.parse_to_json() print(f" Found {len(data['entries'])} unique dates") print(f" Total raw entries: {len(data['raw_entries'])}") - + # Save debug JSON self.save_json_debug(data) - + print("\nStep 2: Cleaning duplicates...") cleaned_data = self.clean_entries(data) print(f" Cleaned entries: {len(cleaned_data['entries'])} dates remain") - + print("\nStep 3: Sorting entries by date...") sorted_entries = self.sort_entries(cleaned_data) - + if sorted_entries: newest = sorted_entries[0][0] oldest = sorted_entries[-1][0] print(f" Date range: {newest} → {oldest}") - + print("\nStep 4: Formatting back to markdown...") markdown = self.format_to_markdown(cleaned_data, sorted_entries) - + # Backup original - backup_path = self.changelog_path.with_suffix('.md.backup') + backup_path = self.changelog_path.with_suffix(".md.backup") self.changelog_path.rename(backup_path) print(f"\nOriginal backed up to: {backup_path}") - + # Write new changelog self.changelog_path.write_text(markdown) print(f"✓ Restructured CHANGELOG saved to: {self.changelog_path}") - + # Summary statistics total_entries = sum(len(entries) for _, entries in sorted_entries) print(f"\nSummary:") print(f" - Total dates: {len(sorted_entries)}") print(f" - Total entries: {total_entries}") - print(f" - Average entries per date: {total_entries/len(sorted_entries):.1f}") + print( + f" - Average entries per date: {total_entries / len(sorted_entries):.1f}" + ) def main(): @@ -369,5 +381,5 @@ def main(): restructurer.restructure() -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py index 834f030c4..6435c790f 100644 --- a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py +++ b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py @@ -26,21 +26,23 @@ class InternalOnlyAnalyzer: """Analyzer for internal-only parameters in SUEWS data model.""" - + def __init__(self): self.script_dir = Path(__file__).parent self.repo_root = self.script_dir.parent.parent.parent.parent self.data_model_dir = self.script_dir.parent - self.sample_config_path = self.repo_root / "src" / "supy" / "sample_run" / "sample_config.yml" - + self.sample_config_path = ( + self.repo_root / "src" / "supy" / "sample_run" / "sample_config.yml" + ) + self.internal_params = [] self.sample_config_data = {} - + def find_internal_only_parameters(self) -> List[Dict[str, Any]]: """Find all parameters marked as internal_only in data model files.""" python_files = [ "state.py", - "model.py", + "model.py", "core.py", "human_activity.py", "hydro.py", @@ -49,186 +51,198 @@ def find_internal_only_parameters(self) -> List[Dict[str, Any]]: "profile.py", "site.py", "surface.py", - "type.py" + "type.py", ] - + internal_params = [] - + for py_file in python_files: file_path = self.data_model_dir / py_file if not file_path.exists(): continue - + print(f"Analyzing {py_file}...") params = self._extract_internal_params_from_file(file_path) internal_params.extend(params) - + return internal_params - - def _extract_internal_params_from_file(self, file_path: Path) -> List[Dict[str, Any]]: + + def _extract_internal_params_from_file( + self, file_path: Path + ) -> List[Dict[str, Any]]: """Extract internal-only parameters from a single Python file.""" params = [] - - with open(file_path, 'r', encoding='utf-8') as f: + + with open(file_path, "r", encoding="utf-8") as f: content = f.read() - + # Split content into lines for line number tracking - lines = content.split('\n') - + lines = content.split("\n") + # Find Field definitions with internal_only=True - field_pattern = r'(\w+):\s*[^=]*=\s*Field\s*\(' - + field_pattern = r"(\w+):\s*[^=]*=\s*Field\s*\(" + for i, line in enumerate(lines, 1): # Look for Field definitions field_match = re.search(field_pattern, line) if field_match: param_name = field_match.group(1) - + # Look ahead to find the complete Field definition - field_def = self._extract_complete_field_definition(lines, i-1) - - if 'internal_only' in field_def and 'True' in field_def: + field_def = self._extract_complete_field_definition(lines, i - 1) + + if "internal_only" in field_def and "True" in field_def: # Extract description - desc_match = re.search(r'description=["\'](.*?)["\']', field_def, re.DOTALL) - description = desc_match.group(1) if desc_match else "No description" - + desc_match = re.search( + r'description=["\'](.*?)["\']', field_def, re.DOTALL + ) + description = ( + desc_match.group(1) if desc_match else "No description" + ) + # Extract unit from json_schema_extra unit_match = re.search(r'"unit":\s*["\'](.*?)["\']', field_def) unit = unit_match.group(1) if unit_match else "" - + params.append({ - 'param_name': param_name, - 'file_location': f"{file_path.name}:{i}", - 'description': description, - 'unit': unit + "param_name": param_name, + "file_location": f"{file_path.name}:{i}", + "description": description, + "unit": unit, }) - + return params - - def _extract_complete_field_definition(self, lines: List[str], start_line: int) -> str: + + def _extract_complete_field_definition( + self, lines: List[str], start_line: int + ) -> str: """Extract complete Field definition spanning multiple lines.""" field_def = lines[start_line] line_idx = start_line + 1 - + # Count parentheses to find complete definition - paren_count = field_def.count('(') - field_def.count(')') - + paren_count = field_def.count("(") - field_def.count(")") + while paren_count > 0 and line_idx < len(lines): - field_def += '\n' + lines[line_idx] - paren_count += lines[line_idx].count('(') - lines[line_idx].count(')') + field_def += "\n" + lines[line_idx] + paren_count += lines[line_idx].count("(") - lines[line_idx].count(")") line_idx += 1 - + return field_def - + def load_sample_config(self) -> Dict[str, Any]: """Load and parse the sample_config.yml file.""" if not self.sample_config_path.exists(): print(f"Warning: sample_config.yml not found at {self.sample_config_path}") return {} - - with open(self.sample_config_path, 'r', encoding='utf-8') as f: + + with open(self.sample_config_path, "r", encoding="utf-8") as f: return yaml.safe_load(f) - + def _flatten_yaml_keys(self, data: Dict[str, Any], prefix: str = "") -> Set[str]: """Recursively flatten YAML structure to get all parameter names.""" keys = set() - + if isinstance(data, dict): for key, value in data.items(): full_key = f"{prefix}.{key}" if prefix else key keys.add(key) # Add the key itself keys.add(full_key) # Add the full path - + if isinstance(value, (dict, list)): keys.update(self._flatten_yaml_keys(value, full_key)) elif isinstance(data, list): for i, item in enumerate(data): if isinstance(item, (dict, list)): keys.update(self._flatten_yaml_keys(item, f"{prefix}[{i}]")) - + return keys - - def check_parameter_in_sample_config(self, param_name: str, yaml_keys: Set[str]) -> Tuple[bool, str]: + + def check_parameter_in_sample_config( + self, param_name: str, yaml_keys: Set[str] + ) -> Tuple[bool, str]: """Check if a parameter is present in sample_config.yml.""" # Direct match if param_name in yaml_keys: return True, param_name - + # Check for partial matches (parameter might be nested) matches = [key for key in yaml_keys if param_name in key] if matches: return True, matches[0] - + return False, "" - + def generate_table(self) -> None: """Generate the analysis table and report.""" print("Finding internal-only parameters...") self.internal_params = self.find_internal_only_parameters() - + print("Loading sample_config.yml...") self.sample_config_data = self.load_sample_config() yaml_keys = self._flatten_yaml_keys(self.sample_config_data) - + print(f"Found {len(self.internal_params)} internal-only parameters") print(f"Found {len(yaml_keys)} keys in sample_config.yml") - + # Enhance parameter data with sample_config presence for param in self.internal_params: in_sample, location = self.check_parameter_in_sample_config( - param['param_name'], yaml_keys + param["param_name"], yaml_keys ) - param['in_sample_config'] = 'Yes' if in_sample else 'No' - param['sample_config_location'] = location if in_sample else '' - + param["in_sample_config"] = "Yes" if in_sample else "No" + param["sample_config_location"] = location if in_sample else "" + # Generate CSV table self._generate_csv_table() - + # Generate markdown report self._generate_markdown_report() - + print("\nAnalysis complete!") print(f"Generated: internal_only_parameters_table.csv") print(f"Generated: internal_only_parameters_report.md") - + def _generate_csv_table(self) -> None: """Generate CSV table of internal-only parameters.""" csv_path = self.script_dir / "internal_only_parameters_table.csv" - - with open(csv_path, 'w', newline='', encoding='utf-8') as f: + + with open(csv_path, "w", newline="", encoding="utf-8") as f: fieldnames = [ - 'param_name', - 'file_location', - 'in_sample_config', - 'sample_config_location', - 'unit' + "param_name", + "file_location", + "in_sample_config", + "sample_config_location", + "unit", ] writer = csv.DictWriter(f, fieldnames=fieldnames) - + writer.writeheader() - for param in sorted(self.internal_params, key=lambda x: x['param_name']): + for param in sorted(self.internal_params, key=lambda x: x["param_name"]): # Create a copy without description for CSV - csv_param = {k: v for k, v in param.items() if k != 'description'} + csv_param = {k: v for k, v in param.items() if k != "description"} writer.writerow(csv_param) - + def _generate_markdown_report(self) -> None: """Generate markdown report with analysis summary.""" md_path = self.script_dir / "internal_only_parameters_report.md" - + # Count statistics total_params = len(self.internal_params) - in_sample = sum(1 for p in self.internal_params if p['in_sample_config'] == 'Yes') + in_sample = sum( + 1 for p in self.internal_params if p["in_sample_config"] == "Yes" + ) not_in_sample = total_params - in_sample - + # Group by file by_file = {} for param in self.internal_params: - file_name = param['file_location'].split(':')[0] + file_name = param["file_location"].split(":")[0] if file_name not in by_file: by_file[file_name] = [] by_file[file_name].append(param) - - with open(md_path, 'w', encoding='utf-8') as f: + + with open(md_path, "w", encoding="utf-8") as f: f.write(f"""# Internal-Only Parameters Analysis Report Generated for GitHub Issue #555: Identify complete list of internal-only parameters @@ -243,58 +257,73 @@ def _generate_markdown_report(self) -> None: ## Parameters by File """) - + for file_name, params in sorted(by_file.items()): f.write(f"### {file_name} ({len(params)} parameters)\n\n") f.write("| Parameter | In sample_config.yml |\n") f.write("|-----------|---------------------|\n") - - for param in sorted(params, key=lambda x: x['param_name']): - f.write(f"| `{param['param_name']}` | {param['in_sample_config']} |\n") - + + for param in sorted(params, key=lambda x: x["param_name"]): + f.write( + f"| `{param['param_name']}` | {param['in_sample_config']} |\n" + ) + f.write("\n") - + f.write(f"""## Parameters Present in sample_config.yml These parameters are marked as internal-only but appear in the sample configuration: """) - - present_params = [p for p in self.internal_params if p['in_sample_config'] == 'Yes'] + + present_params = [ + p for p in self.internal_params if p["in_sample_config"] == "Yes" + ] if present_params: f.write("| Parameter | File |\n") f.write("|-----------|------|\n") - for param in sorted(present_params, key=lambda x: x['param_name']): + for param in sorted(present_params, key=lambda x: x["param_name"]): f.write(f"| `{param['param_name']}` | {param['file_location']} |\n") else: f.write("*None found*\n") - + f.write("\n\n## Parameters NOT in sample_config.yml\n\n") - f.write("These parameters are internal-only and do not appear in the sample configuration:\n\n") - - absent_params = [p for p in self.internal_params if p['in_sample_config'] == 'No'] + f.write( + "These parameters are internal-only and do not appear in the sample configuration:\n\n" + ) + + absent_params = [ + p for p in self.internal_params if p["in_sample_config"] == "No" + ] if absent_params: f.write("| Parameter | File |\n") f.write("|-----------|------|\n") - for param in sorted(absent_params, key=lambda x: x['param_name']): + for param in sorted(absent_params, key=lambda x: x["param_name"]): f.write(f"| `{param['param_name']}` | {param['file_location']} |\n") else: f.write("*None found*\n") f.write("\n## Recommendations\n\n") - f.write("1. **Parameters in sample_config.yml**: Review whether these should remain internal-only or be exposed to users\n") - f.write("2. **Parameters NOT in sample_config.yml**: These are likely true internal state variables that should remain hidden\n") - f.write("3. **Consider advanced/developer mode**: For parameters needed in benchmark/testing scenarios\n\n") + f.write( + "1. **Parameters in sample_config.yml**: Review whether these should remain internal-only or be exposed to users\n" + ) + f.write( + "2. **Parameters NOT in sample_config.yml**: These are likely true internal state variables that should remain hidden\n" + ) + f.write( + "3. **Consider advanced/developer mode**: For parameters needed in benchmark/testing scenarios\n\n" + ) f.write("## Usage\n\n") f.write("This analysis was generated using:\n") f.write("```bash\n") f.write("python analyze_internal_only_parameters.py\n") f.write("```\n\n") f.write(f"Generated on: {self._get_timestamp()}\n") - + def _get_timestamp(self) -> str: """Get current timestamp for report.""" from datetime import datetime + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -305,4 +334,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() From 71017a9c18cca7b51f7240d52cafc20118f1846a Mon Sep 17 00:00:00 2001 From: Ting Sun Date: Wed, 6 Aug 2025 08:56:17 +0100 Subject: [PATCH 6/6] Add internal-only parameters guide with rationale and recommendations - Document criteria for identifying internal-only parameters - List all 24 currently identified internal parameters - Provide recommendations for additional parameters (tair_av, hdd_id) - Clarify which parameters should remain user-accessible (state, SPARTACUS, snowfallcum) - Flag diagnose parameter for further discussion - Include implementation notes for developers Addresses #555 --- dev-ref/internal-only-parameters.md | 111 ++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 dev-ref/internal-only-parameters.md diff --git a/dev-ref/internal-only-parameters.md b/dev-ref/internal-only-parameters.md new file mode 100644 index 000000000..2395992f0 --- /dev/null +++ b/dev-ref/internal-only-parameters.md @@ -0,0 +1,111 @@ +# Internal-Only Parameters Guide + +## Overview + +This document provides guidance on identifying and managing internal-only parameters in SUEWS/SuPy. Internal-only parameters are those that should not be exposed to end users in the YAML configuration interface, as they are either calculated internally by the model or used solely for debugging/development purposes. + +## Criteria for Internal-Only Parameters + +Parameters should be marked as `internal_only=True` when they meet one or more of these criteria: + +### 1. **Internal State Variables** +Variables that the model calculates and maintains internally during runtime: +- Derivatives and rate calculations (e.g., `dqndt`, `dqnsdt`) +- Running averages (e.g., `qn_av`, `qn_s_av`, `tair_av`) +- Timestep tracking (e.g., `dt_since_start`, `tstep_prev`) +- Accumulated values for internal calculations (e.g., `temp_accum`, `precip_accum`) + +### 2. **Diagnostic/Debug Parameters** +Parameters originally designed for developer debugging: +- `diagnose` - Legacy debugging flag from Fortran implementation +- `kdownzen` - Internal zenith angle calculations + +### 3. **Internal Indices and Identifiers** +Variables used for internal array indexing or state tracking: +- `lenday_id` - Day length identifier +- `tmax_id`, `tmin_id` - Temperature extreme tracking +- Array indices for internal calculations + +## Parameters Requiring User Access + +The following should remain user-accessible despite being related to model state: + +### 1. **Initial Conditions** +Users need to set starting conditions for simulations: +- `state` - Surface water depth (mm) for each surface type +- `snowfallcum` - Initial accumulated snowfall for mid-winter starts +- Surface temperatures and soil moisture + +### 2. **Computational Settings** +Advanced users may need to tune these for performance: +- SPARTACUS `n_stream_lw_urban`, `n_stream_sw_urban` - Radiation accuracy vs speed +- Physics method selections +- Time step configurations + +### 3. **Physical Properties** +All physical characteristics of the urban environment: +- Albedo, emissivity, conductivity values +- Building dimensions and properties +- Surface fractions and characteristics + +## Current Internal-Only Parameters + +Based on analysis of the codebase (see `src/supy/data_model/initial_states/internal_only_parameters_report.md`): + +### Parameters Already Marked Internal-Only (24 total) + +**In `model.py`:** +- `diagnose` - Developer debugging flag +- `kdownzen` - Internal calculation + +**In `state.py`:** +- Accumulation variables: `cdd_accum`, `hdd_accum`, `temp_accum`, `temp_5day_accum`, `precip_accum`, `days_since_rain_accum` +- Daily calculations: `cdd_daily`, `hdd_daily`, `temp_daily_mean`, `temp_5day_mean`, `precip_daily_total` +- Time tracking: `days_since_rain`, `dt_since_start`, `tstep_prev` +- Averaging variables: `dqndt`, `dqnsdt`, `qn_av`, `qn_s_av` +- Indices: `lenday_id`, `tmax_id`, `tmin_id` +- Physical state: `snowfallcum` + +## Recommendations for Additional Parameters + +### Should Be Made Internal-Only: +1. **`tair_av`** - Internal air temperature averaging variable +2. **`hdd_id` array** - Internal heating degree day tracking array + +### Should Remain User-Accessible: +1. **All `state` parameters** - Essential for setting initial water depths +2. **SPARTACUS numerical parameters** - Valid user tuning options +3. **`snowfallcum`** - Users may need to set initial snow conditions + +### Requires Further Discussion: +1. **`diagnose`** - Currently accessible but questionable user value + - Pro: Some users might use for troubleshooting + - Con: Legacy developer tool that may confuse users + +## Implementation Notes + +When marking parameters as internal-only in the data model: + +```python +field_name: type = Field( + default=value, + description="Description", + json_schema_extra={ + "internal_only": True, + "unit": "unit_if_applicable" + } +) +``` + +## Future Considerations + +1. **User/Developer Mode**: Consider implementing separate configuration modes +2. **Documentation**: Clearly document which parameters are available to users +3. **Migration Path**: Plan for moving parameters between internal/external status +4. **Validation**: Ensure internal parameters are properly initialized by the model + +## References + +- Issue #555: Identify complete list of internal-only parameters +- PR #556: Comprehensive internal-only parameters analysis +- Analysis script: `src/supy/data_model/initial_states/analyze_internal_only_parameters.py` \ No newline at end of file