From 93cf7759906a23823a18a5a5cc274db61c6e43a0 Mon Sep 17 00:00:00 2001 From: SongshGeo Date: Wed, 7 Jan 2026 10:11:54 +0100 Subject: [PATCH 1/2] fix(experiment): :bug: Update repeat_id to run_id for consistency in experiment logging This commit refactors the `experiment.py`, `job_manager.py`, `model.py`, and related files to replace instances of `repeat_id` with `run_id`. This change enhances clarity and consistency across the codebase, particularly in logging and data collection. Additionally, it introduces backward compatibility warnings for deprecated `repeat_id` usage in the `ExperimentManager` class, ensuring a smoother transition for users. Tests are updated to verify that `run_id` is correctly propagated in outputs. --- abses/core/experiment.py | 56 +++++------ abses/core/job_manager.py | 28 +++++- abses/core/model.py | 6 +- abses/utils/datacollector.py | 28 ++++-- abses/utils/log_config.py | 14 +-- abses/utils/logging.py | 12 +-- docs/home/configuration_schema.md | 149 ++++++++++++++++++++++++++++++ tests/api/test_datacollection.py | 35 +++++++ tests/utils/test_logging.py | 32 +++---- 9 files changed, 291 insertions(+), 69 deletions(-) diff --git a/abses/core/experiment.py b/abses/core/experiment.py index 7881ba6c..1fae3256 100644 --- a/abses/core/experiment.py +++ b/abses/core/experiment.py @@ -122,10 +122,10 @@ def run_single( hooks: The hooks to run after the model is run. """ - job_id, repeat_id = key + job_id, run_id = key model = model_cls( parameters=cfg, - run_id=repeat_id, + run_id=run_id, seed=seed, **kwargs, ) @@ -135,7 +135,7 @@ def run_single( for hook_name, hook_func in hooks.items(): logger.info(f"Running hook {hook_name}.") _call_hook_with_optional_args( - hook_func, model, job_id=job_id, repeat_id=repeat_id + hook_func, model, job_id=job_id, run_id=run_id ) return key, seed, results @@ -370,27 +370,27 @@ def _load_hydra_cfg( return cfg - # def _get_logging_mode(self, repeat_id: Optional[int] = None) -> str | bool: + # def _get_logging_mode(self, run_id: Optional[int] = None) -> str | bool: # log_mode = self.exp_config.get("logging", "once") # if log_mode == "once": - # if repeat_id == 1: + # if run_id == 1: # logging: bool | str = self.name # else: # return False # elif bool(log_mode): - # logging = f"{self.name}_{repeat_id}" + # logging = f"{self.name}_{run_id}" # else: # logging = False # return logging # def _update_log_config( - # self, config, repeat_id: Optional[int] = None + # self, config, run_id: Optional[int] = None # ) -> bool: # """Update the log configuration.""" # if isinstance(config, dict): # config = DictConfig(config) # OmegaConf.set_struct(config, False) - # log_name = self._get_logging_mode(repeat_id=repeat_id) + # log_name = self._get_logging_mode(run_id=run_id) # if not log_name: # config["log"] = False # return config @@ -398,17 +398,17 @@ def _load_hydra_cfg( # config = OmegaConf.merge(config, logging_cfg) # return config - def _get_seed(self, repeat_id: int, job_id: Optional[int] = None) -> Optional[int]: + def _get_seed(self, run_id: int, job_id: Optional[int] = None) -> Optional[int]: """获取每次运行的随机种子 使用基础种子初始化随机数生成器,为每次运行生成唯一的随机种子。 这样可以保证: 1. 如果基础种子相同,生成的种子序列也相同 - 2. 不同的 job_id 和 repeat_id 组合会得到不同的种子 + 2. 不同的 job_id 和 run_id 组合会得到不同的种子 3. 种子序列具有更好的随机性 Args: - repeat_id: 重复实验的ID + run_id: 重复实验的ID Returns: 如果没有设置基础种子则返回 None,否则返回生成的随机种子 @@ -419,7 +419,7 @@ def _get_seed(self, repeat_id: int, job_id: Optional[int] = None) -> Optional[in if job_id is None: job_id = self.job_id # 使用基础种子和 job_id 创建随机数生成器 - r = random.Random(self._base_seed + job_id * 1000 + repeat_id) + r = random.Random(self._base_seed + job_id * 1000 + run_id) return r.randrange(2**32) def _get_logging_mode(self) -> str: @@ -431,13 +431,13 @@ def _get_logging_mode(self) -> str: return get_log_mode(self._cfg) def _get_log_file_path( - self, log_name: str, repeat_id: int, logging_mode: str + self, log_name: str, run_id: int, logging_mode: str ) -> Optional[Path]: """Get log file path for a specific repeat. Args: log_name: Base log file name. - repeat_id: Repeat ID (1-indexed). + run_id: Repeat ID (1-indexed). logging_mode: Logging mode. Returns: @@ -449,7 +449,7 @@ def _get_log_file_path( outpath=self.outpath, log_name=log_name, logging_mode=logging_mode, - repeat_id=repeat_id, + run_id=run_id, ) def _log_experiment_info( @@ -518,18 +518,18 @@ def _batch_run_repeats( if self._is_hydra_parallel() or number_process == 1: # Hydra 并行或指定单进程时,顺序执行 disable = repeats == 1 or not display_progress - for repeat_id in tqdm( + for run_id in tqdm( range(1, repeats + 1), disable=disable, desc=f"Job {self.job_id} repeats {repeats} times.", ): # Log separator for merge mode - if logging_mode == "merge" and repeat_id > 1: + if logging_mode == "merge" and run_id > 1: # Note: Separator will be logged in model setup pass # Get log file path for this repeat - log_path = self._get_log_file_path(log_name, repeat_id, logging_mode) + log_path = self._get_log_file_path(log_name, run_id, logging_mode) # Display log file location for separate mode # This should only go to stdout, not to model run log files @@ -539,14 +539,14 @@ def _batch_run_repeats( and log_path is not None ): # Use print instead of logger to avoid writing to model run log files - print(f"Repeat {repeat_id}: Logging to {log_path}") + print(f"Repeat {run_id}: Logging to {log_path}") run_single( model_cls=self.model_cls, cfg=cfg, - key=(self.job_id, repeat_id), + key=(self.job_id, run_id), outpath=self.outpath, - seed=self._get_seed(repeat_id), + seed=self._get_seed(run_id), hooks=self._manager.hooks, **self._extra_kwargs, ) @@ -564,13 +564,13 @@ def _batch_run_repeats( delayed(run_single)( model_cls=self.model_cls, cfg=cfg, - key=(self.job_id, repeat_id), + key=(self.job_id, run_id), outpath=self.outpath, - seed=self._get_seed(repeat_id), + seed=self._get_seed(run_id), hooks=self._manager.hooks, **self._extra_kwargs, ) - for repeat_id in tqdm( + for run_id in tqdm( range(1, repeats + 1), disable=not display_progress, desc=f"Job {self.job_id} repeats {repeats} times, with {number_process} processes.", @@ -644,7 +644,7 @@ def _call_hook_with_optional_args( hook_func: Callable, model: MainModelProtocol, job_id: Optional[int] = None, - repeat_id: Optional[int] = None, + run_id: Optional[int] = None, ) -> Any: """根据钩子函数的参数签名动态调用函数 @@ -652,14 +652,14 @@ def _call_hook_with_optional_args( hook_func: 要调用的钩子函数 model: 模型实例 job_id: 可选的任务ID - repeat_id: 可选的重复实验ID + run_id: 可选的重复实验ID """ sig = inspect.signature(hook_func) hook_args = {} if "job_id" in sig.parameters: hook_args["job_id"] = job_id - if "repeat_id" in sig.parameters: - hook_args["repeat_id"] = repeat_id + if "run_id" in sig.parameters: + hook_args["run_id"] = run_id return hook_func(model, **hook_args) diff --git a/abses/core/job_manager.py b/abses/core/job_manager.py index cf65f5b2..079d7c25 100644 --- a/abses/core/job_manager.py +++ b/abses/core/job_manager.py @@ -7,6 +7,7 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Type import pandas as pd @@ -102,7 +103,7 @@ def update_result( """更新实验结果 Args: - key: (job_id, repeat_id) tuple + key: (job_id, run_id) tuple overrides: Configuration overrides for this run datasets: Row-like mapping of metrics/values to store seed: Random seed used for this run @@ -126,14 +127,35 @@ def get_datasets( self, seed: bool = True, ) -> pd.DataFrame: - """获取所有实验结果的 DataFrame""" + """获取所有实验结果的 DataFrame + + Note: + The ``repeat_id`` column is **deprecated** and will be removed in a + future version. Please use the ``run_id`` column instead. + """ to_concat = [] to_concat.append(self.dict_to_df(self._overrides)) if seed: seed = pd.Series(self._seeds, name="seed", index=self.index) to_concat.append(seed) to_concat.append(self.dict_to_df(self._datasets)) - return pd.concat(to_concat, axis=1).reset_index() + df = pd.concat(to_concat, axis=1).reset_index() + + # Backward compatibility: keep repeat_id, but encourage using run_id. + # Later we can drop repeat_id once users have migrated. + if "repeat_id" in df.columns: + warnings.warn( + "Column 'repeat_id' is deprecated and will be removed in a future " + "version. Please use 'run_id' instead.", + DeprecationWarning, + stacklevel=2, + ) + # If run_id is not already present (e.g. from datacollector), + # create it from repeat_id so new code can rely on run_id. + if "run_id" not in df.columns: + df["run_id"] = df["repeat_id"] + + return df def add_a_hook( self, diff --git a/abses/core/model.py b/abses/core/model.py index 3e7662af..0e083d22 100644 --- a/abses/core/model.py +++ b/abses/core/model.py @@ -140,7 +140,9 @@ def __init__( tracker_backend = create_tracker(tracker_cfg, model=self) collector_cfg = prepare_collector_config(tracker_cfg) self.datacollector: ABSESpyDataCollector = ABSESpyDataCollector( - reports=collector_cfg, tracker=tracker_backend + reports=collector_cfg, + tracker=tracker_backend, + run_id=run_id, ) # Setup logging BEFORE initialize() so user logs in initialize() are captured @@ -347,7 +349,7 @@ def _setup_logger(self, log_cfg: Dict[str, Any]) -> None: rotation=rotation, retention=retention, logging_mode=logging_mode, - repeat_id=self.run_id, + run_id=self.run_id, file_level=file_level, file_format=file_format, file_datefmt=file_datefmt, diff --git a/abses/utils/datacollector.py b/abses/utils/datacollector.py index 8acfbbbd..526d3e62 100644 --- a/abses/utils/datacollector.py +++ b/abses/utils/datacollector.py @@ -116,15 +116,18 @@ def __init__( self, reports: Dict[ReportType, Dict[str, Reporter]] | None = None, tracker: Optional[TrackerProtocol] = None, + run_id: Optional[int] = None, ): """Initialize data collector. Args: reports: Reporters configuration. tracker: Optional tracker backend. + run_id: Optional run id. """ reports = reports or {} self.tracker = tracker + self.run_id = run_id self.model_reporters: Dict[str, Reporter] = {} self.final_reporters: Dict[str, Reporter] = {} self.agent_reporters: Dict[str, Dict[str, Reporter]] = {} @@ -162,6 +165,13 @@ def add_reporters( for name, reporter in reporters.items(): self._new_agent_reporter(breed=item, name=name, reporter=reporter) + def _add_run_id_to_data( + self, data: pd.DataFrame | Dict[str, Any] + ) -> pd.DataFrame | Dict[str, Any]: + if self.run_id is not None: + data["run_id"] = self.run_id + return data + def _new_model_reporter(self, name: str, reporter: Reporter) -> None: """Add a new model-level reporter to collect data. @@ -216,8 +226,9 @@ def get_model_vars_dataframe(self): logger.warning( "No model reporters have been definedreturning empty DataFrame." ) - - return pd.DataFrame(self.model_vars) + df = pd.DataFrame(self.model_vars) + df = self._add_run_id_to_data(df) + return df def get_agent_vars_dataframe(self, breed: Optional[str] = None) -> pd.DataFrame: """获取某种 Agents 的 DataFrame""" @@ -229,8 +240,12 @@ def get_agent_vars_dataframe(self, breed: Optional[str] = None) -> pd.DataFrame: if not self.agent_reporters: logger.warning("No agent reporters have been defined in the DataCollector.") if results := self._agent_records.get(breed): - return pd.concat([pd.DataFrame(res) for res in results]) - return pd.DataFrame() + df = pd.concat([pd.DataFrame(res) for res in results]) + else: + logger.warning(f"No agent records found for breed {breed}.") + df = pd.DataFrame() + df = self._add_run_id_to_data(data=df) + return df def get_final_vars_report(self, model: MainModel) -> Dict[str, Any]: """Report at the end of this model. @@ -239,11 +254,10 @@ def get_final_vars_report(self, model: MainModel) -> Dict[str, Any]: A dictionary mapping variable names to their computed values. """ if not self.final_reporters: - logger.warning( - "No final reporters have been defined, returning empty dict." - ) + logger.info("No final reporters have been defined.") return {} results = {var: func(model) for var, func in self.final_reporters.items()} + self._add_run_id_to_data(results) if self.tracker is not None: self.tracker.log_final_metrics(results) return results diff --git a/abses/utils/log_config.py b/abses/utils/log_config.py index e749af9c..f9539b23 100644 --- a/abses/utils/log_config.py +++ b/abses/utils/log_config.py @@ -229,7 +229,7 @@ def determine_log_file_path( outpath: Optional[Path], log_name: str, logging_mode: str = "once", - repeat_id: Optional[int] = None, + run_id: Optional[int] = None, ) -> Optional[Path]: """Determine log file path based on logging mode. @@ -237,7 +237,7 @@ def determine_log_file_path( outpath: Output directory for log files. log_name: Base log file name (without extension). logging_mode: Logging mode - 'once', 'separate', or 'merge'. - repeat_id: Repeat ID for the current run (1-indexed). + run_id: Repeat ID for the current run (1-indexed). Returns: Path to log file, or None if logging should be disabled. @@ -250,21 +250,21 @@ def determine_log_file_path( if logging_mode == "once": # Only log the first repeat - if repeat_id is None or repeat_id == 1: + if run_id is None or run_id == 1: return outpath / f"{log_name}.log" return None elif logging_mode == "separate": # Each repeat gets its own file - # In separate mode, repeat_id must be provided - if repeat_id is None: + # In separate mode, run_id must be provided + if run_id is None: return None # Don't create default file in separate mode - return outpath / f"{log_name}_{repeat_id}.log" + return outpath / f"{log_name}_{run_id}.log" elif logging_mode == "merge": # All repeats go to the same file return outpath / f"{log_name}.log" else: # Unknown mode, default to once behavior - if repeat_id is None or repeat_id == 1: + if run_id is None or run_id == 1: return outpath / f"{log_name}.log" return None diff --git a/abses/utils/logging.py b/abses/utils/logging.py index 781c0b03..6db5bde6 100644 --- a/abses/utils/logging.py +++ b/abses/utils/logging.py @@ -86,15 +86,15 @@ def setup_logger_info( logger.bind(no_format=True).info(f"Exp environment: {is_exp_env}\n") -def log_repeat_separator(repeat_id: int, total_repeats: int) -> None: +def log_repeat_separator(run_id: int, total_repeats: int) -> None: """Log a separator for a new repeat run in merge mode. Args: - repeat_id: Current repeat ID (1-indexed). + run_id: Current repeat ID (1-indexed). total_repeats: Total number of repeats. """ separator = "\n" + "=" * 60 + "\n" - header = f"Repeat {repeat_id}/{total_repeats}".center(60) + "\n" + header = f"Repeat {run_id}/{total_repeats}".center(60) + "\n" footer = "=" * 60 + "\n" logger.bind(no_format=True).info(separator + header + footer) @@ -111,7 +111,7 @@ def setup_model_logger( retention: Optional[str] = None, log_file_path: Optional[Path] = None, logging_mode: str = "once", - repeat_id: Optional[int] = None, + run_id: Optional[int] = None, file_level: Optional[str] = None, file_format: Optional[str] = None, file_datefmt: Optional[str] = None, @@ -136,7 +136,7 @@ def setup_model_logger( retention: Retention period (e.g., "10 days"). log_file_path: Explicit log file path (overrides automatic path determination). logging_mode: Logging mode - 'once', 'separate', or 'merge'. - repeat_id: Repeat ID for the current run (1-indexed). + run_id: Repeat ID for the current run (1-indexed). file_level: File handler level (defaults to level). file_format: File format string. file_datefmt: File date format string. @@ -156,7 +156,7 @@ def setup_model_logger( outpath=outpath, log_name=name, logging_mode=logging_mode, - repeat_id=repeat_id, + run_id=run_id, ) # Setup integrated logging diff --git a/docs/home/configuration_schema.md b/docs/home/configuration_schema.md index e0f5ea44..b43a633c 100644 --- a/docs/home/configuration_schema.md +++ b/docs/home/configuration_schema.md @@ -53,6 +53,7 @@ tracker: - [2. Experiment Configuration](#2-experiment-configuration) - [3. Model Parameters](#3-model-parameters) - [4. Tracker Configuration](#4-tracker-configuration) +- [5. Logging Configuration](#5-logging-configuration) - [Parameter Sweeps](#parameter-sweeps) - [Complete Examples](#complete-examples) - [Best Practices](#best-practices) @@ -440,6 +441,154 @@ tracker: --- +## 5. Logging Configuration + +ABSESpy provides a unified logging configuration on top of Python's standard `logging` +and Hydra's `job_logging`. This section summarizes the YAML fields that control +logging behaviour in ABSESpy projects. + +There are three main configuration entry points: + +1. **Unified `log` section** (recommended, new style) +2. **Experiment-level `exp.logging` flag** (simple mode, kept for compatibility) +3. **Legacy `log` shorthand in old examples** (deprecated but still supported) + +### 5.1 Unified `log` section (recommended) + +The unified `log` section is defined in the core config and can be +overridden in your experiment config (for example in `examples/fire_spread/config.yaml`). +It controls both experiment-level logging and per-run logging: + +```yaml +log: + # Logging mode for repeated runs: once | separate | merge + mode: str # "once" | "separate" | "merge" + + # Experiment-level logging (progress, high-level summary) + exp: + stdout: + enabled: bool # Enable experiment logs to console + level: str # e.g. "INFO", "DEBUG" + format: str # Log format string + datefmt: str # Time format + file: + enabled: bool # Enable experiment log file + level: str # File log level + format: str # File log format + datefmt: str # File time format + + # Run-level logging (each model execution) + run: + stdout: + enabled: bool # Enable per-run logs to console + level: str + format: str + datefmt: str + file: + enabled: bool # Enable per-run log files + level: str + format: str + datefmt: str + name: str # Base log file name (without extension) + rotation: str | null # e.g. "1 day", "100 MB", null = no rotation + retention: str | null# e.g. "10 days", null = default policy + mesa: + level: str | null # If null, uses run.file.level + format: str | null # If null, uses run.file.format +``` + +#### 5.1.1 `log.mode` + +Controls how repeated runs share log files: + +| Value | Behaviour | +|-------|-----------| +| `"once"` | Only the first repeat writes to the log file | +| `"separate"` | Each repeat writes to its own file with an index suffix | +| `"merge"` | All repeats write to the same log file | + +#### 5.1.2 Experiment-level logging (`log.exp.*`) + +Experiment-level logging is intended for high-level progress and summaries, not +per-step model details. + +| Field | Type | Description | +|-------|------|-------------| +| `log.exp.stdout.enabled` | bool | Enable experiment messages on console | +| `log.exp.stdout.level` | str | Console log level (e.g. `"INFO"`) | +| `log.exp.stdout.format` | str | Console log format string | +| `log.exp.stdout.datefmt` | str | Console time format | +| `log.exp.file.enabled` | bool | Enable experiment log file | +| `log.exp.file.level` | str | Experiment file log level | +| `log.exp.file.format` | str | Experiment file log format | +| `log.exp.file.datefmt` | str | Experiment file time format | + +#### 5.1.3 Run-level logging (`log.run.*`) + +Run-level logging controls logging for each single model run: + +| Field | Type | Description | +|-------|------|-------------| +| `log.run.stdout.enabled` | bool | Enable per-run console logging | +| `log.run.stdout.level` | str | Per-run console log level | +| `log.run.stdout.format` | str | Per-run console log format | +| `log.run.stdout.datefmt` | str | Per-run console time format | +| `log.run.file.enabled` | bool | Enable per-run log files | +| `log.run.file.level` | str | Per-run file log level (e.g. `"DEBUG"`) | +| `log.run.file.format` | str | Per-run file log format | +| `log.run.file.datefmt` | str | Per-run file time format | +| `log.run.file.name` | str | Base file name for logs (without extension) | +| `log.run.file.rotation` | str/null | Rotation policy, e.g. `"1 day"`, `"100 MB"` | +| `log.run.file.retention` | str/null | Retention policy, e.g. `"10 days"` | +| `log.run.mesa.level` | str/null | Log level for Mesa loggers; `null` uses `log.run.file.level` | +| `log.run.mesa.format` | str/null | Log format for Mesa; `null` uses `log.run.file.format` | + +> **Recommended:** For most projects, start by modifying only: +> - `log.mode` +> - `log.exp.file.enabled` +> - `log.run.file.enabled` +> - `log.run.file.level` +> and keep the default formats unless you have special formatting needs. + +### 5.2 Experiment-level `exp.logging` flag (compatibility) + +The `exp.logging` field (described in [2. Experiment Configuration](#2-experiment-configuration)) +is a simpler, older switch that controls logging behaviour at the experiment level: + +```yaml +exp: + logging: str | bool # "once" | "always" | false +``` + +| Value | Behaviour | +|-------|-----------| +| `"once"` | Log only the first repeat | +| `"always"` | Log all repeats | +| `false` | Disable logging for repeats (where supported) | + +> This flag is kept for backward compatibility. New projects should prefer the +> unified `log` section, which gives you explicit control over console/file +> logging at both experiment and run levels. + +### 5.3 Legacy `log` shorthand in examples + +Some older example configs (such as `examples/wolf_sheep/config.yaml` +and `examples/schelling/config.yaml`) use a simplified `log` section: + +```yaml +log: + name: str # Base log file name + level: str # Global log level, e.g. "INFO" + console: bool # Enable/disable console logging + file: bool # (optional) Enable/disable file logging +``` + +These fields are normalized internally to the new unified logging schema and +are maintained for compatibility with older projects. For new models, prefer +defining `log.mode`, `log.exp.*` and `log.run.*` instead of this shorthand. + +--- + ## Parameter Sweeps ABSESpy supports parameter sweeps for automated batch experiments using Hydra's sweeper. You can define parameter ranges that Hydra will automatically expand into all combinations. diff --git a/tests/api/test_datacollection.py b/tests/api/test_datacollection.py index 626fcd8f..9a0903c6 100644 --- a/tests/api/test_datacollection.py +++ b/tests/api/test_datacollection.py @@ -90,3 +90,38 @@ def test_agent_records( result = agent_vars[name] assert len(result) == ticks assert result.mode().item() == 1 + + def test_run_id_added_to_outputs( + self, + test_config, + testing_breeds: Dict[str, Type[Actor]], + ): + """Ensure run_id is propagated into collected outputs when provided.""" + # arrange: create model with explicit run_id + run_id = 7 + model = MainModel(parameters=test_config, seed=42, run_id=run_id) + datacollector = model.datacollector + + # create one agent for a known breed + actor = model.agents.new(testing_breeds["Actor"], singleton=True) + setattr(actor, "test", 1) + + # act + model.run_model(steps=3) + + # model-level data should contain run_id column with constant value + model_df = datacollector.get_model_vars_dataframe() + assert "run_id" in model_df.columns + assert model_df["run_id"].nunique() == 1 + assert model_df["run_id"].iloc[0] == run_id + + # agent-level data should also contain run_id column with constant value + agent_df = datacollector.get_agent_vars_dataframe("Actor") + assert "run_id" in agent_df.columns + assert agent_df["run_id"].nunique() == 1 + assert agent_df["run_id"].iloc[0] == run_id + + # final report dictionary should include run_id key + final_report = datacollector.get_final_vars_report(model) + assert "run_id" in final_report + assert final_report["run_id"] == run_id diff --git a/tests/utils/test_logging.py b/tests/utils/test_logging.py index a5a7c22b..d5678154 100644 --- a/tests/utils/test_logging.py +++ b/tests/utils/test_logging.py @@ -176,37 +176,37 @@ def test_determine_log_file_path_once_mode(self, tmp_path: Path) -> None: """测试确定日志文件路径(once 模式)""" # First repeat should create file path = determine_log_file_path( - outpath=tmp_path, log_name="test", logging_mode="once", repeat_id=1 + outpath=tmp_path, log_name="test", logging_mode="once", run_id=1 ) assert path == tmp_path / "test.log" # Subsequent repeats should return None path = determine_log_file_path( - outpath=tmp_path, log_name="test", logging_mode="once", repeat_id=2 + outpath=tmp_path, log_name="test", logging_mode="once", run_id=2 ) assert path is None def test_determine_log_file_path_separate_mode(self, tmp_path: Path) -> None: """测试确定日志文件路径(separate 模式)""" path1 = determine_log_file_path( - outpath=tmp_path, log_name="test", logging_mode="separate", repeat_id=1 + outpath=tmp_path, log_name="test", logging_mode="separate", run_id=1 ) assert path1 == tmp_path / "test_1.log" path2 = determine_log_file_path( - outpath=tmp_path, log_name="test", logging_mode="separate", repeat_id=2 + outpath=tmp_path, log_name="test", logging_mode="separate", run_id=2 ) assert path2 == tmp_path / "test_2.log" def test_determine_log_file_path_merge_mode(self, tmp_path: Path) -> None: """测试确定日志文件路径(merge 模式)""" path1 = determine_log_file_path( - outpath=tmp_path, log_name="test", logging_mode="merge", repeat_id=1 + outpath=tmp_path, log_name="test", logging_mode="merge", run_id=1 ) assert path1 == tmp_path / "test.log" path2 = determine_log_file_path( - outpath=tmp_path, log_name="test", logging_mode="merge", repeat_id=2 + outpath=tmp_path, log_name="test", logging_mode="merge", run_id=2 ) assert path2 == tmp_path / "test.log" # Same file @@ -344,7 +344,7 @@ def test_setup_model_logger_file_only(self, tmp_path: Path) -> None: outpath=tmp_path, console=False, logging_mode="once", - repeat_id=1, + run_id=1, ) assert logger.name == "abses" assert len(logger.handlers) == 1 @@ -363,7 +363,7 @@ def test_setup_model_logger_custom_format(self, tmp_path: Path) -> None: file_format="[%(asctime)s] %(message)s", file_datefmt="%H:%M:%S", logging_mode="once", - repeat_id=1, + run_id=1, ) # Check that handlers have correct formatters console_handler = next( @@ -467,7 +467,7 @@ def test_model_logging_integration(self, temp_dir: Path) -> None: outpath=temp_dir, console=True, logging_mode="once", - repeat_id=1, + run_id=1, ) try: logger.info("Model started") @@ -485,30 +485,30 @@ def test_logging_modes(self, temp_dir: Path) -> None: """测试不同的日志模式""" # Test once mode path1 = determine_log_file_path( - outpath=temp_dir, log_name="model", logging_mode="once", repeat_id=1 + outpath=temp_dir, log_name="model", logging_mode="once", run_id=1 ) path2 = determine_log_file_path( - outpath=temp_dir, log_name="model", logging_mode="once", repeat_id=2 + outpath=temp_dir, log_name="model", logging_mode="once", run_id=2 ) assert path1 == temp_dir / "model.log" assert path2 is None # Test separate mode path1 = determine_log_file_path( - outpath=temp_dir, log_name="model", logging_mode="separate", repeat_id=1 + outpath=temp_dir, log_name="model", logging_mode="separate", run_id=1 ) path2 = determine_log_file_path( - outpath=temp_dir, log_name="model", logging_mode="separate", repeat_id=2 + outpath=temp_dir, log_name="model", logging_mode="separate", run_id=2 ) assert path1 == temp_dir / "model_1.log" assert path2 == temp_dir / "model_2.log" # Test merge mode path1 = determine_log_file_path( - outpath=temp_dir, log_name="model", logging_mode="merge", repeat_id=1 + outpath=temp_dir, log_name="model", logging_mode="merge", run_id=1 ) path2 = determine_log_file_path( - outpath=temp_dir, log_name="model", logging_mode="merge", repeat_id=2 + outpath=temp_dir, log_name="model", logging_mode="merge", run_id=2 ) assert path1 == temp_dir / "model.log" assert path2 == temp_dir / "model.log" @@ -585,7 +585,7 @@ def test_user_module_logger_writes_to_model_log(self, temp_dir: Path) -> None: outpath=temp_dir, console=False, logging_mode="once", - repeat_id=1, + run_id=1, ) try: From aa04e97b9c1868672fe69e1747fd6f524b8344cf Mon Sep 17 00:00:00 2001 From: SongshGeo Date: Wed, 7 Jan 2026 11:23:55 +0100 Subject: [PATCH 2/2] refactor(logging): :bug: Remove deprecated repeat_id references and enhance run_id usage This commit removes commented-out code related to the deprecated `repeat_id` in the `experiment.py` file and updates the `job_manager.py` and `log_config.py` files to consistently use `run_id`. It introduces backward compatibility warnings for any legacy usage of `repeat_id` in the `ExperimentManager` class, ensuring users are informed of the transition. These changes improve clarity and consistency in logging and data handling across the codebase. --- abses/core/experiment.py | 28 ---------------------------- abses/core/job_manager.py | 16 +++++++--------- abses/utils/log_config.py | 2 +- 3 files changed, 8 insertions(+), 38 deletions(-) diff --git a/abses/core/experiment.py b/abses/core/experiment.py index 1fae3256..e3b67810 100644 --- a/abses/core/experiment.py +++ b/abses/core/experiment.py @@ -370,34 +370,6 @@ def _load_hydra_cfg( return cfg - # def _get_logging_mode(self, run_id: Optional[int] = None) -> str | bool: - # log_mode = self.exp_config.get("logging", "once") - # if log_mode == "once": - # if run_id == 1: - # logging: bool | str = self.name - # else: - # return False - # elif bool(log_mode): - # logging = f"{self.name}_{run_id}" - # else: - # logging = False - # return logging - - # def _update_log_config( - # self, config, run_id: Optional[int] = None - # ) -> bool: - # """Update the log configuration.""" - # if isinstance(config, dict): - # config = DictConfig(config) - # OmegaConf.set_struct(config, False) - # log_name = self._get_logging_mode(run_id=run_id) - # if not log_name: - # config["log"] = False - # return config - # logging_cfg = OmegaConf.create({"log": {"name": log_name}}) - # config = OmegaConf.merge(config, logging_cfg) - # return config - def _get_seed(self, run_id: int, job_id: Optional[int] = None) -> Optional[int]: """获取每次运行的随机种子 diff --git a/abses/core/job_manager.py b/abses/core/job_manager.py index 079d7c25..f3f5fedb 100644 --- a/abses/core/job_manager.py +++ b/abses/core/job_manager.py @@ -116,10 +116,10 @@ def dict_to_df(self, results: dict) -> pd.DataFrame: """将嵌套字典转换为 DataFrame Args: - results: 形如 {(job_id, repeat_id): {'metric': value}} 的字典 + results: 形如 {(job_id, run_id): {'metric': value}} 的字典 Returns: - 包含 job_id, repeat_id 和指标值的 DataFrame + 包含 job_id, run_id 和指标值的 DataFrame """ return pd.DataFrame(results.values(), index=self.index) @@ -141,19 +141,17 @@ def get_datasets( to_concat.append(self.dict_to_df(self._datasets)) df = pd.concat(to_concat, axis=1).reset_index() - # Backward compatibility: keep repeat_id, but encourage using run_id. - # Later we can drop repeat_id once users have migrated. - if "repeat_id" in df.columns: + # Backward compatibility: if legacy results contain a `repeat_id` column + # (e.g. from older versions or custom datasets), mirror it into `run_id` + # and emit a deprecation warning. New code should only rely on `run_id`. + if "repeat_id" in df.columns and "run_id" not in df.columns: warnings.warn( "Column 'repeat_id' is deprecated and will be removed in a future " "version. Please use 'run_id' instead.", DeprecationWarning, stacklevel=2, ) - # If run_id is not already present (e.g. from datacollector), - # create it from repeat_id so new code can rely on run_id. - if "run_id" not in df.columns: - df["run_id"] = df["repeat_id"] + df["run_id"] = df["repeat_id"] return df diff --git a/abses/utils/log_config.py b/abses/utils/log_config.py index f9539b23..6eb08147 100644 --- a/abses/utils/log_config.py +++ b/abses/utils/log_config.py @@ -237,7 +237,7 @@ def determine_log_file_path( outpath: Output directory for log files. log_name: Base log file name (without extension). logging_mode: Logging mode - 'once', 'separate', or 'merge'. - run_id: Repeat ID for the current run (1-indexed). + run_id: Run ID for the current run (1-indexed). Returns: Path to log file, or None if logging should be disabled.