diff --git a/.claude/scripts/changelog_helper.py b/.claude/scripts/changelog_helper.py index bb953093f..e11ab8178 100755 --- a/.claude/scripts/changelog_helper.py +++ b/.claude/scripts/changelog_helper.py @@ -21,204 +21,210 @@ class ChangelogRestructurer: - def __init__(self, changelog_path: str = 'CHANGELOG.md'): + def __init__(self, changelog_path: str = "CHANGELOG.md"): self.changelog_path = Path(changelog_path) # Support both old format (- DATE:) and new format (### DATE) - self.date_pattern = re.compile(r'^(?:- |### )(\d{1,2} \w+ \d{4})(?::\s*)?$') - self.entry_pattern = re.compile(r'^(?: )?- \[(\w+)\] (.+)$') - + self.date_pattern = re.compile(r"^(?:- |### )(\d{1,2} \w+ \d{4})(?::\s*)?$") + self.entry_pattern = re.compile(r"^(?: )?- \[(\w+)\] (.+)$") + def parse_to_json(self) -> Dict: """Parse CHANGELOG.md into structured JSON format""" if not self.changelog_path.exists(): raise FileNotFoundError(f"{self.changelog_path} not found") - + content = self.changelog_path.read_text() - lines = content.split('\n') - + lines = content.split("\n") + # Structure to hold parsed data data = { - 'header': [], - 'entries': {}, # date_str -> list of entries - 'raw_entries': [] # For debugging + "header": [], + "entries": {}, # date_str -> list of entries + "raw_entries": [], # For debugging } - + current_date = None current_entries = [] in_header = True - + i = 0 while i < len(lines): line = lines[i] - + # Check for date line date_match = self.date_pattern.match(line) if date_match: # Save previous date's entries if current_date and current_entries: - if current_date not in data['entries']: - data['entries'][current_date] = [] - data['entries'][current_date].extend(current_entries) - data['raw_entries'].append({ - 'date': current_date, - 'entries': current_entries.copy() + if current_date not in data["entries"]: + data["entries"][current_date] = [] + data["entries"][current_date].extend(current_entries) + data["raw_entries"].append({ + "date": current_date, + "entries": current_entries.copy(), }) - + # Start new date current_date = date_match.group(1) current_entries = [] in_header = False - + elif in_header: - data['header'].append(line) - + data["header"].append(line) + elif self.entry_pattern.match(line): # This is an entry line entry_match = self.entry_pattern.match(line) if entry_match: category = entry_match.group(1) description = entry_match.group(2) - + # Start new entry current_entry = { - 'category': category, - 'description': description, - 'details': [] + "category": category, + "description": description, + "details": [], } - + # Look ahead for detail lines (starting with ' -' or ' -') j = i + 1 - while j < len(lines) and (lines[j].startswith(' -') or lines[j].startswith(' -')): + while j < len(lines) and ( + lines[j].startswith(" -") or lines[j].startswith(" -") + ): # Skip if it's another category entry if self.entry_pattern.match(lines[j]): break detail = lines[j].strip() - if detail.startswith('- '): + if detail.startswith("- "): detail = detail[2:] # Remove '- ' - current_entry['details'].append(detail) + current_entry["details"].append(detail) j += 1 - + current_entries.append(current_entry) i = j - 1 # Skip the detail lines we just processed - + i += 1 - + # Don't forget the last entry if current_date and current_entries: - if current_date not in data['entries']: - data['entries'][current_date] = [] - data['entries'][current_date].extend(current_entries) - data['raw_entries'].append({ - 'date': current_date, - 'entries': current_entries + if current_date not in data["entries"]: + data["entries"][current_date] = [] + data["entries"][current_date].extend(current_entries) + data["raw_entries"].append({ + "date": current_date, + "entries": current_entries, }) - + return data - + def clean_entries(self, data: Dict) -> Dict: """Clean duplicates and merge similar entries""" cleaned_data = data.copy() cleaned_entries = {} - + # Track seen entries to remove duplicates seen_entries = set() - - for date_str, entries in data['entries'].items(): + + for date_str, entries in data["entries"].items(): cleaned_date_entries = [] - + for entry in entries: # Create a hash of the entry to detect duplicates entry_hash = hashlib.md5( json.dumps(entry, sort_keys=True).encode() ).hexdigest() - + if entry_hash not in seen_entries: seen_entries.add(entry_hash) cleaned_date_entries.append(entry) - + if cleaned_date_entries: cleaned_entries[date_str] = cleaned_date_entries - - cleaned_data['entries'] = cleaned_entries + + cleaned_data["entries"] = cleaned_entries return cleaned_data - + def sort_entries(self, data: Dict) -> List[Tuple[str, List]]: """Sort entries by date in reverse chronological order""" date_entries = [] - - for date_str, entries in data['entries'].items(): + + for date_str, entries in data["entries"].items(): try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") date_entries.append((date_obj, date_str, entries)) except ValueError: print(f"Warning: Could not parse date '{date_str}'") - + # Sort by date object in reverse order date_entries.sort(key=lambda x: x[0], reverse=True) - + # Return sorted list of (date_str, entries) tuples return [(date_str, entries) for _, date_str, entries in date_entries] - + def format_github_links(self, text: str) -> str: """Convert #xxx and PR #xxx to GitHub links""" # Check if already formatted (contains markdown links) - if '](https://github.com/' in text: + if "](https://github.com/" in text: return text - + # Pattern for PR mentions - pr_pattern = r'PR #(\d+)' - text = re.sub(pr_pattern, r'[PR #\1](https://github.com/UMEP-dev/SUEWS/pull/\1)', text) - + pr_pattern = r"PR #(\d+)" + text = re.sub( + pr_pattern, r"[PR #\1](https://github.com/UMEP-dev/SUEWS/pull/\1)", text + ) + # Pattern for issue mentions (but not PR mentions and not already in brackets) - issue_pattern = r'(? List[str]: """Generate table of contents by year""" years = set() for date_str, _ in sorted_entries: try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") years.add(date_obj.year) except ValueError: pass - + toc_lines = ["## Table of Contents", ""] for year in sorted(years, reverse=True): toc_lines.append(f"- [{year}](#{year})") - + return toc_lines - + def generate_stats_table(self, sorted_entries: List[Tuple[str, List]]) -> List[str]: """Generate annual statistics table""" # Collect stats by year stats_by_year = defaultdict(lambda: defaultdict(int)) - + for date_str, entries in sorted_entries: try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") year = date_obj.year - + for entry in entries: - category = entry['category'] + category = entry["category"] stats_by_year[year][category] += 1 - stats_by_year[year]['total'] += 1 - + stats_by_year[year]["total"] += 1 + except ValueError: pass - + # Generate table table_lines = [ "", "## Annual Statistics", "", "| Year | Features | Bugfixes | Changes | Maintenance | Docs | Total |", - "|------|----------|----------|---------|-------------|------|-------|" + "|------|----------|----------|---------|-------------|------|-------|", ] - - categories = ['feature', 'bugfix', 'change', 'maintenance', 'doc'] - + + categories = ["feature", "bugfix", "change", "maintenance", "doc"] + for year in sorted(stats_by_year.keys(), reverse=True): stats = stats_by_year[year] row = f"| {year} |" @@ -227,141 +233,147 @@ def generate_stats_table(self, sorted_entries: List[Tuple[str, List]]) -> List[s row += f" {count} |" row += f" {stats['total']} |" table_lines.append(row) - + return table_lines - def format_to_markdown(self, data: Dict, sorted_entries: List[Tuple[str, List]]) -> str: + def format_to_markdown( + self, data: Dict, sorted_entries: List[Tuple[str, List]] + ) -> str: """Format the cleaned and sorted data back to markdown""" lines = [] - + # Add header (but skip any existing TOC or stats tables) - for line in data['header']: + for line in data["header"]: # Skip lines that are part of TOC or stats table - if line.startswith('## Table of Contents') or line.startswith('## Annual Statistics'): + if line.startswith("## Table of Contents") or line.startswith( + "## Annual Statistics" + ): break - if line.startswith('| Year |') or line.startswith('|------|'): + if line.startswith("| Year |") or line.startswith("|------|"): continue - if line.startswith('- [20') and '](#20' in line: # TOC entry + if line.startswith("- [20") and "](#20" in line: # TOC entry continue lines.append(line) - + # Remove trailing empty lines from header while lines and lines[-1] == "": lines.pop() - + lines.append("") # Add single blank line after header - + # Add TOC toc = self.generate_toc(sorted_entries) lines.extend(toc) - + # Add stats table stats_table = self.generate_stats_table(sorted_entries) lines.extend(stats_table) lines.append("") # Extra blank line - + # Group entries by year entries_by_year = defaultdict(list) for date_str, entries in sorted_entries: try: - date_obj = datetime.strptime(date_str, '%d %b %Y') + date_obj = datetime.strptime(date_str, "%d %b %Y") year = date_obj.year entries_by_year[year].append((date_str, entries)) except ValueError: # If date parsing fails, put in "Unknown" year entries_by_year[0].append((date_str, entries)) - + # Add entries grouped by year for year in sorted(entries_by_year.keys(), reverse=True): if year == 0: lines.append("\n## Unknown Year") else: lines.append(f"\n## {year}") - + lines.append("") - + for date_str, entries in entries_by_year[year]: lines.append(f"### {date_str}") - + # Group entries by category for better organization by_category = {} for entry in entries: - cat = entry['category'] + cat = entry["category"] if cat not in by_category: by_category[cat] = [] by_category[cat].append(entry) - + # Output in preferred category order - category_order = ['feature', 'bugfix', 'change', 'maintenance', 'doc'] - + category_order = ["feature", "bugfix", "change", "maintenance", "doc"] + for category in category_order: if category in by_category: for entry in by_category[category]: # Format description with GitHub links - description = self.format_github_links(entry['description']) + description = self.format_github_links(entry["description"]) lines.append(f"- [{entry['category']}] {description}") - + # Format details with GitHub links - for detail in entry['details']: + for detail in entry["details"]: formatted_detail = self.format_github_links(detail) lines.append(f" - {formatted_detail}") - + lines.append("") # Blank line after each date - + # Remove trailing blank lines while lines and lines[-1] == "": lines.pop() - - return '\n'.join(lines) - - def save_json_debug(self, data: Dict, filename: str = 'changelog_debug.json'): + + return "\n".join(lines) + + def save_json_debug(self, data: Dict, filename: str = "changelog_debug.json"): """Save parsed data to JSON for debugging""" - with open(filename, 'w') as f: + with open(filename, "w") as f: # Convert datetime objects to strings for JSON serialization json_data = data.copy() json.dump(json_data, f, indent=2, default=str) print(f"Debug data saved to {filename}") - + def restructure(self): """Main method to restructure the changelog""" print("Step 1: Parsing CHANGELOG.md to JSON...") data = self.parse_to_json() print(f" Found {len(data['entries'])} unique dates") print(f" Total raw entries: {len(data['raw_entries'])}") - + # Save debug JSON self.save_json_debug(data) - + print("\nStep 2: Cleaning duplicates...") cleaned_data = self.clean_entries(data) print(f" Cleaned entries: {len(cleaned_data['entries'])} dates remain") - + print("\nStep 3: Sorting entries by date...") sorted_entries = self.sort_entries(cleaned_data) - + if sorted_entries: newest = sorted_entries[0][0] oldest = sorted_entries[-1][0] print(f" Date range: {newest} → {oldest}") - + print("\nStep 4: Formatting back to markdown...") markdown = self.format_to_markdown(cleaned_data, sorted_entries) - + # Backup original - backup_path = self.changelog_path.with_suffix('.md.backup') + backup_path = self.changelog_path.with_suffix(".md.backup") self.changelog_path.rename(backup_path) print(f"\nOriginal backed up to: {backup_path}") - + # Write new changelog self.changelog_path.write_text(markdown) print(f"✓ Restructured CHANGELOG saved to: {self.changelog_path}") - + # Summary statistics total_entries = sum(len(entries) for _, entries in sorted_entries) print(f"\nSummary:") print(f" - Total dates: {len(sorted_entries)}") print(f" - Total entries: {total_entries}") - print(f" - Average entries per date: {total_entries/len(sorted_entries):.1f}") + print( + f" - Average entries per date: {total_entries / len(sorted_entries):.1f}" + ) def main(): @@ -369,5 +381,5 @@ def main(): restructurer.restructure() -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/dev-ref/internal-only-parameters.md b/dev-ref/internal-only-parameters.md new file mode 100644 index 000000000..2395992f0 --- /dev/null +++ b/dev-ref/internal-only-parameters.md @@ -0,0 +1,111 @@ +# Internal-Only Parameters Guide + +## Overview + +This document provides guidance on identifying and managing internal-only parameters in SUEWS/SuPy. Internal-only parameters are those that should not be exposed to end users in the YAML configuration interface, as they are either calculated internally by the model or used solely for debugging/development purposes. + +## Criteria for Internal-Only Parameters + +Parameters should be marked as `internal_only=True` when they meet one or more of these criteria: + +### 1. **Internal State Variables** +Variables that the model calculates and maintains internally during runtime: +- Derivatives and rate calculations (e.g., `dqndt`, `dqnsdt`) +- Running averages (e.g., `qn_av`, `qn_s_av`, `tair_av`) +- Timestep tracking (e.g., `dt_since_start`, `tstep_prev`) +- Accumulated values for internal calculations (e.g., `temp_accum`, `precip_accum`) + +### 2. **Diagnostic/Debug Parameters** +Parameters originally designed for developer debugging: +- `diagnose` - Legacy debugging flag from Fortran implementation +- `kdownzen` - Internal zenith angle calculations + +### 3. **Internal Indices and Identifiers** +Variables used for internal array indexing or state tracking: +- `lenday_id` - Day length identifier +- `tmax_id`, `tmin_id` - Temperature extreme tracking +- Array indices for internal calculations + +## Parameters Requiring User Access + +The following should remain user-accessible despite being related to model state: + +### 1. **Initial Conditions** +Users need to set starting conditions for simulations: +- `state` - Surface water depth (mm) for each surface type +- `snowfallcum` - Initial accumulated snowfall for mid-winter starts +- Surface temperatures and soil moisture + +### 2. **Computational Settings** +Advanced users may need to tune these for performance: +- SPARTACUS `n_stream_lw_urban`, `n_stream_sw_urban` - Radiation accuracy vs speed +- Physics method selections +- Time step configurations + +### 3. **Physical Properties** +All physical characteristics of the urban environment: +- Albedo, emissivity, conductivity values +- Building dimensions and properties +- Surface fractions and characteristics + +## Current Internal-Only Parameters + +Based on analysis of the codebase (see `src/supy/data_model/initial_states/internal_only_parameters_report.md`): + +### Parameters Already Marked Internal-Only (24 total) + +**In `model.py`:** +- `diagnose` - Developer debugging flag +- `kdownzen` - Internal calculation + +**In `state.py`:** +- Accumulation variables: `cdd_accum`, `hdd_accum`, `temp_accum`, `temp_5day_accum`, `precip_accum`, `days_since_rain_accum` +- Daily calculations: `cdd_daily`, `hdd_daily`, `temp_daily_mean`, `temp_5day_mean`, `precip_daily_total` +- Time tracking: `days_since_rain`, `dt_since_start`, `tstep_prev` +- Averaging variables: `dqndt`, `dqnsdt`, `qn_av`, `qn_s_av` +- Indices: `lenday_id`, `tmax_id`, `tmin_id` +- Physical state: `snowfallcum` + +## Recommendations for Additional Parameters + +### Should Be Made Internal-Only: +1. **`tair_av`** - Internal air temperature averaging variable +2. **`hdd_id` array** - Internal heating degree day tracking array + +### Should Remain User-Accessible: +1. **All `state` parameters** - Essential for setting initial water depths +2. **SPARTACUS numerical parameters** - Valid user tuning options +3. **`snowfallcum`** - Users may need to set initial snow conditions + +### Requires Further Discussion: +1. **`diagnose`** - Currently accessible but questionable user value + - Pro: Some users might use for troubleshooting + - Con: Legacy developer tool that may confuse users + +## Implementation Notes + +When marking parameters as internal-only in the data model: + +```python +field_name: type = Field( + default=value, + description="Description", + json_schema_extra={ + "internal_only": True, + "unit": "unit_if_applicable" + } +) +``` + +## Future Considerations + +1. **User/Developer Mode**: Consider implementing separate configuration modes +2. **Documentation**: Clearly document which parameters are available to users +3. **Migration Path**: Plan for moving parameters between internal/external status +4. **Validation**: Ensure internal parameters are properly initialized by the model + +## References + +- Issue #555: Identify complete list of internal-only parameters +- PR #556: Comprehensive internal-only parameters analysis +- Analysis script: `src/supy/data_model/initial_states/analyze_internal_only_parameters.py` \ No newline at end of file diff --git a/src/supy/data_model/initial_states/analyze_internal_only_parameters.py b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py new file mode 100644 index 000000000..6435c790f --- /dev/null +++ b/src/supy/data_model/initial_states/analyze_internal_only_parameters.py @@ -0,0 +1,337 @@ +#!/usr/bin/env python3 +""" +Script to analyze internal-only parameters in SUEWS data model. + +This script: +1. Scans the data model Python files for parameters marked with internal_only=True +2. Checks which parameters are present in sample_config.yml +3. Generates a comprehensive table for issue #555 + +Usage: + python analyze_internal_only_parameters.py + +Output: + - internal_only_parameters_table.csv + - internal_only_parameters_report.md +""" + +import ast +import csv +import os +import yaml +from pathlib import Path +from typing import Dict, List, Set, Tuple, Any +import re + + +class InternalOnlyAnalyzer: + """Analyzer for internal-only parameters in SUEWS data model.""" + + def __init__(self): + self.script_dir = Path(__file__).parent + self.repo_root = self.script_dir.parent.parent.parent.parent + self.data_model_dir = self.script_dir.parent + self.sample_config_path = ( + self.repo_root / "src" / "supy" / "sample_run" / "sample_config.yml" + ) + + self.internal_params = [] + self.sample_config_data = {} + + def find_internal_only_parameters(self) -> List[Dict[str, Any]]: + """Find all parameters marked as internal_only in data model files.""" + python_files = [ + "state.py", + "model.py", + "core.py", + "human_activity.py", + "hydro.py", + "ohm.py", + "precheck.py", + "profile.py", + "site.py", + "surface.py", + "type.py", + ] + + internal_params = [] + + for py_file in python_files: + file_path = self.data_model_dir / py_file + if not file_path.exists(): + continue + + print(f"Analyzing {py_file}...") + params = self._extract_internal_params_from_file(file_path) + internal_params.extend(params) + + return internal_params + + def _extract_internal_params_from_file( + self, file_path: Path + ) -> List[Dict[str, Any]]: + """Extract internal-only parameters from a single Python file.""" + params = [] + + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Split content into lines for line number tracking + lines = content.split("\n") + + # Find Field definitions with internal_only=True + field_pattern = r"(\w+):\s*[^=]*=\s*Field\s*\(" + + for i, line in enumerate(lines, 1): + # Look for Field definitions + field_match = re.search(field_pattern, line) + if field_match: + param_name = field_match.group(1) + + # Look ahead to find the complete Field definition + field_def = self._extract_complete_field_definition(lines, i - 1) + + if "internal_only" in field_def and "True" in field_def: + # Extract description + desc_match = re.search( + r'description=["\'](.*?)["\']', field_def, re.DOTALL + ) + description = ( + desc_match.group(1) if desc_match else "No description" + ) + + # Extract unit from json_schema_extra + unit_match = re.search(r'"unit":\s*["\'](.*?)["\']', field_def) + unit = unit_match.group(1) if unit_match else "" + + params.append({ + "param_name": param_name, + "file_location": f"{file_path.name}:{i}", + "description": description, + "unit": unit, + }) + + return params + + def _extract_complete_field_definition( + self, lines: List[str], start_line: int + ) -> str: + """Extract complete Field definition spanning multiple lines.""" + field_def = lines[start_line] + line_idx = start_line + 1 + + # Count parentheses to find complete definition + paren_count = field_def.count("(") - field_def.count(")") + + while paren_count > 0 and line_idx < len(lines): + field_def += "\n" + lines[line_idx] + paren_count += lines[line_idx].count("(") - lines[line_idx].count(")") + line_idx += 1 + + return field_def + + def load_sample_config(self) -> Dict[str, Any]: + """Load and parse the sample_config.yml file.""" + if not self.sample_config_path.exists(): + print(f"Warning: sample_config.yml not found at {self.sample_config_path}") + return {} + + with open(self.sample_config_path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + + def _flatten_yaml_keys(self, data: Dict[str, Any], prefix: str = "") -> Set[str]: + """Recursively flatten YAML structure to get all parameter names.""" + keys = set() + + if isinstance(data, dict): + for key, value in data.items(): + full_key = f"{prefix}.{key}" if prefix else key + keys.add(key) # Add the key itself + keys.add(full_key) # Add the full path + + if isinstance(value, (dict, list)): + keys.update(self._flatten_yaml_keys(value, full_key)) + elif isinstance(data, list): + for i, item in enumerate(data): + if isinstance(item, (dict, list)): + keys.update(self._flatten_yaml_keys(item, f"{prefix}[{i}]")) + + return keys + + def check_parameter_in_sample_config( + self, param_name: str, yaml_keys: Set[str] + ) -> Tuple[bool, str]: + """Check if a parameter is present in sample_config.yml.""" + # Direct match + if param_name in yaml_keys: + return True, param_name + + # Check for partial matches (parameter might be nested) + matches = [key for key in yaml_keys if param_name in key] + if matches: + return True, matches[0] + + return False, "" + + def generate_table(self) -> None: + """Generate the analysis table and report.""" + print("Finding internal-only parameters...") + self.internal_params = self.find_internal_only_parameters() + + print("Loading sample_config.yml...") + self.sample_config_data = self.load_sample_config() + yaml_keys = self._flatten_yaml_keys(self.sample_config_data) + + print(f"Found {len(self.internal_params)} internal-only parameters") + print(f"Found {len(yaml_keys)} keys in sample_config.yml") + + # Enhance parameter data with sample_config presence + for param in self.internal_params: + in_sample, location = self.check_parameter_in_sample_config( + param["param_name"], yaml_keys + ) + param["in_sample_config"] = "Yes" if in_sample else "No" + param["sample_config_location"] = location if in_sample else "" + + # Generate CSV table + self._generate_csv_table() + + # Generate markdown report + self._generate_markdown_report() + + print("\nAnalysis complete!") + print(f"Generated: internal_only_parameters_table.csv") + print(f"Generated: internal_only_parameters_report.md") + + def _generate_csv_table(self) -> None: + """Generate CSV table of internal-only parameters.""" + csv_path = self.script_dir / "internal_only_parameters_table.csv" + + with open(csv_path, "w", newline="", encoding="utf-8") as f: + fieldnames = [ + "param_name", + "file_location", + "in_sample_config", + "sample_config_location", + "unit", + ] + writer = csv.DictWriter(f, fieldnames=fieldnames) + + writer.writeheader() + for param in sorted(self.internal_params, key=lambda x: x["param_name"]): + # Create a copy without description for CSV + csv_param = {k: v for k, v in param.items() if k != "description"} + writer.writerow(csv_param) + + def _generate_markdown_report(self) -> None: + """Generate markdown report with analysis summary.""" + md_path = self.script_dir / "internal_only_parameters_report.md" + + # Count statistics + total_params = len(self.internal_params) + in_sample = sum( + 1 for p in self.internal_params if p["in_sample_config"] == "Yes" + ) + not_in_sample = total_params - in_sample + + # Group by file + by_file = {} + for param in self.internal_params: + file_name = param["file_location"].split(":")[0] + if file_name not in by_file: + by_file[file_name] = [] + by_file[file_name].append(param) + + with open(md_path, "w", encoding="utf-8") as f: + f.write(f"""# Internal-Only Parameters Analysis Report + +Generated for GitHub Issue #555: Identify complete list of internal-only parameters + +## Summary Statistics + +- **Total internal-only parameters**: {total_params} +- **Parameters in sample_config.yml**: {in_sample} +- **Parameters NOT in sample_config.yml**: {not_in_sample} +- **Files analyzed**: {len(by_file)} + +## Parameters by File + +""") + + for file_name, params in sorted(by_file.items()): + f.write(f"### {file_name} ({len(params)} parameters)\n\n") + f.write("| Parameter | In sample_config.yml |\n") + f.write("|-----------|---------------------|\n") + + for param in sorted(params, key=lambda x: x["param_name"]): + f.write( + f"| `{param['param_name']}` | {param['in_sample_config']} |\n" + ) + + f.write("\n") + + f.write(f"""## Parameters Present in sample_config.yml + +These parameters are marked as internal-only but appear in the sample configuration: + +""") + + present_params = [ + p for p in self.internal_params if p["in_sample_config"] == "Yes" + ] + if present_params: + f.write("| Parameter | File |\n") + f.write("|-----------|------|\n") + for param in sorted(present_params, key=lambda x: x["param_name"]): + f.write(f"| `{param['param_name']}` | {param['file_location']} |\n") + else: + f.write("*None found*\n") + + f.write("\n\n## Parameters NOT in sample_config.yml\n\n") + f.write( + "These parameters are internal-only and do not appear in the sample configuration:\n\n" + ) + + absent_params = [ + p for p in self.internal_params if p["in_sample_config"] == "No" + ] + if absent_params: + f.write("| Parameter | File |\n") + f.write("|-----------|------|\n") + for param in sorted(absent_params, key=lambda x: x["param_name"]): + f.write(f"| `{param['param_name']}` | {param['file_location']} |\n") + else: + f.write("*None found*\n") + + f.write("\n## Recommendations\n\n") + f.write( + "1. **Parameters in sample_config.yml**: Review whether these should remain internal-only or be exposed to users\n" + ) + f.write( + "2. **Parameters NOT in sample_config.yml**: These are likely true internal state variables that should remain hidden\n" + ) + f.write( + "3. **Consider advanced/developer mode**: For parameters needed in benchmark/testing scenarios\n\n" + ) + f.write("## Usage\n\n") + f.write("This analysis was generated using:\n") + f.write("```bash\n") + f.write("python analyze_internal_only_parameters.py\n") + f.write("```\n\n") + f.write(f"Generated on: {self._get_timestamp()}\n") + + def _get_timestamp(self) -> str: + """Get current timestamp for report.""" + from datetime import datetime + + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + +def main(): + """Main entry point.""" + analyzer = InternalOnlyAnalyzer() + analyzer.generate_table() + + +if __name__ == "__main__": + main() diff --git a/src/supy/data_model/initial_states/internal_only_parameters_report.md b/src/supy/data_model/initial_states/internal_only_parameters_report.md new file mode 100644 index 000000000..e70694bb9 --- /dev/null +++ b/src/supy/data_model/initial_states/internal_only_parameters_report.md @@ -0,0 +1,100 @@ +# Internal-Only Parameters Analysis Report + +Generated for GitHub Issue #555: Identify complete list of internal-only parameters + +## Summary Statistics + +- **Total internal-only parameters**: 24 +- **Parameters in sample_config.yml**: 11 +- **Parameters NOT in sample_config.yml**: 13 +- **Files analyzed**: 2 + +## Parameters by File + +### model.py (2 parameters) + +| Parameter | In sample_config.yml | +|-----------|---------------------| +| `diagnose` | Yes | +| `kdownzen` | No | + +### state.py (22 parameters) + +| Parameter | In sample_config.yml | +|-----------|---------------------| +| `cdd_accum` | No | +| `cdd_daily` | No | +| `days_since_rain` | No | +| `days_since_rain_accum` | No | +| `dqndt` | Yes | +| `dqnsdt` | Yes | +| `dt_since_start` | Yes | +| `hdd_accum` | No | +| `hdd_daily` | No | +| `lenday_id` | Yes | +| `precip_accum` | No | +| `precip_daily_total` | No | +| `qn_av` | Yes | +| `qn_s_av` | Yes | +| `snowfallcum` | Yes | +| `temp_5day_accum` | No | +| `temp_5day_mean` | No | +| `temp_accum` | No | +| `temp_daily_mean` | No | +| `tmax_id` | Yes | +| `tmin_id` | Yes | +| `tstep_prev` | Yes | + +## Parameters Present in sample_config.yml + +These parameters are marked as internal-only but appear in the sample configuration: + +| Parameter | File | +|-----------|------| +| `diagnose` | model.py:667 | +| `dqndt` | state.py:815 | +| `dqnsdt` | state.py:820 | +| `dt_since_start` | state.py:825 | +| `lenday_id` | state.py:830 | +| `qn_av` | state.py:835 | +| `qn_s_av` | state.py:843 | +| `snowfallcum` | state.py:877 | +| `tmax_id` | state.py:856 | +| `tmin_id` | state.py:864 | +| `tstep_prev` | state.py:872 | + + +## Parameters NOT in sample_config.yml + +These parameters are internal-only and do not appear in the sample configuration: + +| Parameter | File | +|-----------|------| +| `cdd_accum` | state.py:621 | +| `cdd_daily` | state.py:677 | +| `days_since_rain` | state.py:713 | +| `days_since_rain_accum` | state.py:657 | +| `hdd_accum` | state.py:612 | +| `hdd_daily` | state.py:668 | +| `kdownzen` | model.py:657 | +| `precip_accum` | state.py:648 | +| `precip_daily_total` | state.py:704 | +| `temp_5day_accum` | state.py:639 | +| `temp_5day_mean` | state.py:695 | +| `temp_accum` | state.py:630 | +| `temp_daily_mean` | state.py:686 | + +## Recommendations + +1. **Parameters in sample_config.yml**: Review whether these should remain internal-only or be exposed to users +2. **Parameters NOT in sample_config.yml**: These are likely true internal state variables that should remain hidden +3. **Consider advanced/developer mode**: For parameters needed in benchmark/testing scenarios + +## Usage + +This analysis was generated using: +```bash +python analyze_internal_only_parameters.py +``` + +Generated on: 2025-07-25 14:44:46 diff --git a/src/supy/data_model/initial_states/internal_only_parameters_table.csv b/src/supy/data_model/initial_states/internal_only_parameters_table.csv new file mode 100644 index 000000000..7b3fa6cee --- /dev/null +++ b/src/supy/data_model/initial_states/internal_only_parameters_table.csv @@ -0,0 +1,25 @@ +param_name,file_location,in_sample_config,sample_config_location,unit +cdd_accum,state.py:621,No,,degC +cdd_daily,state.py:677,No,,degC +days_since_rain,state.py:713,No,,days +days_since_rain_accum,state.py:657,No,,days +diagnose,model.py:667,Yes,diagnose, +dqndt,state.py:815,Yes,dqndt, +dqnsdt,state.py:820,Yes,dqnsdt, +dt_since_start,state.py:825,Yes,dt_since_start, +hdd_accum,state.py:612,No,,degC +hdd_daily,state.py:668,No,,degC +kdownzen,model.py:657,No,, +lenday_id,state.py:830,Yes,lenday_id, +precip_accum,state.py:648,No,,mm +precip_daily_total,state.py:704,No,,mm +qn_av,state.py:835,Yes,qn_av, +qn_s_av,state.py:843,Yes,qn_s_av, +snowfallcum,state.py:877,Yes,snowfallcum, +temp_5day_accum,state.py:639,No,,degC +temp_5day_mean,state.py:695,No,,degC +temp_accum,state.py:630,No,,degC +temp_daily_mean,state.py:686,No,,degC +tmax_id,state.py:856,Yes,tmax_id, +tmin_id,state.py:864,Yes,tmin_id, +tstep_prev,state.py:872,Yes,tstep_prev,