diff --git a/pyproject.toml b/pyproject.toml index bdfecd610..206636166 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -238,9 +238,8 @@ compat = ["scitex-compat>=0.1.1"] # Config Module - Configuration management # Use: pip install scitex[config] -config = [ - "PyYAML", -] +# Real implementation lives in the standalone scitex-config package. +config = ["scitex-config>=0.1.0"] # Context Module - Context managers # Use: pip install scitex[context] diff --git a/src/scitex/config/README.md b/src/scitex/config/README.md deleted file mode 100644 index e4120033a..000000000 --- a/src/scitex/config/README.md +++ /dev/null @@ -1,313 +0,0 @@ - - -# SciTeX Configuration Module - -Centralized configuration management for the SciTeX ecosystem. - -## Overview - -This module provides: -- **ScitexConfig**: YAML-based configuration with env var substitution (Scholar pattern) -- **ScitexPaths**: Centralized path manager for all SciTeX directories -- **PriorityConfig**: Dict-based configuration resolver (for programmatic use) -- **CLI**: `scitex config list` command to view configuration - -## Priority Resolution Patterns - -### ScitexConfig (YAML-based, Recommended) - -Follows the Scholar module's `CascadeConfig` pattern: - -``` -1. Direct value (highest) - Thread-safe, explicit parameter -2. Config (YAML) - Values from default.yaml -3. Environment variable - SCITEX_*, etc. -4. Default value - Fallback (lowest) -``` - -**Key feature**: YAML can reference env vars using `${VAR:-default}` syntax. - -### PriorityConfig (Dict-based) - -For programmatic use with a dictionary: - -``` -1. Direct value (highest) - Explicit parameter -2. Config dict - From passed dictionary -3. Environment variable - SCITEX_*, etc. -4. Default value - Fallback (lowest) -``` - -## Quick Start - -### Using ScitexConfig (YAML-based) - -```python -from scitex.config import get_config - -# Load default configuration -config = get_config() - -# Resolve values with precedence: direct → config → env → default -log_level = config.resolve("logging.level", default="INFO") -debug = config.resolve("debug.enabled", default=False, type=bool) - -# Access raw config values -print(config.get("scitex_dir")) # Value from YAML - -# Load custom config -custom_config = get_config("/path/to/custom.yaml") -``` - -### Using ScitexPaths (Recommended for paths) - -```python -import scitex - -# Access via global PATHS constant -print(scitex.PATHS.logs) # ~/.scitex/logs -print(scitex.PATHS.capture) # ~/.scitex/capture -print(scitex.PATHS.scholar_library) # ~/.scitex/scholar/library - -# Or import directly -from scitex.config import get_paths - -paths = get_paths() # Cached singleton - -# Use resolve() for configurable paths in modules -cache_dir = paths.resolve("cache", user_provided_value) # direct → default -``` - -### Using get_scitex_dir() - -```python -from scitex.config import get_scitex_dir - -# Simple usage - respects SCITEX_DIR env var -base_dir = get_scitex_dir() # Returns Path - -# With direct override (thread-safe) -base_dir = get_scitex_dir("/custom/path") -``` - -### Using PriorityConfig - -```python -from scitex.config import PriorityConfig - -config = PriorityConfig( - config_dict={"port": 3000, "debug": True}, - env_prefix="SCITEX_" -) - -# Resolves: direct → env (SCITEX_PORT) → config_dict → default -port = config.resolve("port", direct_val=None, default=8000, type=int) -``` - -## Pattern for Module Implementation - -When implementing a module that needs configurable paths, use the `resolve()` method: - -```python -from typing import Optional -from scitex.config import get_paths - -class MyModule: - def __init__(self, cache_dir: Optional[str] = None): - """ - Args: - cache_dir: Custom cache directory. If None, uses default - from SCITEX_DIR environment variable. - """ - # resolve() handles: direct value → default (from SCITEX_DIR env → ~/.scitex) - self.cache_dir = get_paths().resolve("cache", cache_dir) - self.cache_dir.mkdir(parents=True, exist_ok=True) -``` - -The `resolve(path_name, direct_val)` method: -- Returns `direct_val` if provided (highest priority) -- Otherwise returns the default path from `SCITEX_DIR` environment variable -- Falls back to `~/.scitex/` if `SCITEX_DIR` is not set - -This pattern ensures: -- **Thread-safety**: Direct values allow multi-user/multi-project scenarios -- **Configurability**: Environment variables for deployment flexibility -- **Sensible defaults**: Works out-of-the-box with `~/.scitex` -- **Consistency**: Same pattern as `PriorityConfig.resolve()` - -## Directory Structure - -``` -$SCITEX_DIR/ # Default: ~/.scitex -├── browser/ # Browser module data -│ ├── screenshots/ # Browser debugging screenshots -│ ├── sessions/ # Shared browser sessions -│ └── persistent/ # Persistent browser profiles -├── cache/ # General cache -│ └── functions/ # Function cache (joblib) -├── capture/ # Screen captures -├── impact_factor_cache/ # Impact factor data -├── logs/ # Log files -├── openathens_cache/ # OpenAthens auth cache -├── rng/ # Random number generator state -├── scholar/ # Scholar module -│ ├── cache/ # Scholar-specific cache -│ └── library/ # PDF library -├── screenshots/ # General screenshots -├── test_monitor/ # Test monitoring screenshots -└── writer/ # Writer module data -``` - -## CLI Commands - -```bash -# List all configured paths -scitex config list - -# Include environment variable info -scitex config list --env - -# Show only existing directories -scitex config list --exists - -# Output as JSON -scitex config list --json - -# Initialize all directories -scitex config init -scitex config init --dry-run - -# Show a specific path -scitex config show logs -scitex config show scholar_library -``` - -## YAML Configuration - -### default.yaml Format - -The `default.yaml` file supports environment variable substitution using `${VAR:-default}` syntax: - -```yaml -# Base directory -scitex_dir: ${SCITEX_DIR:-"~/.scitex"} - -# Nested configuration -logging: - level: ${SCITEX_LOG_LEVEL:-"INFO"} - file_logging: ${SCITEX_FILE_LOGGING:-true} - -debug: - enabled: ${SCITEX_DEBUG:-false} -``` - -### Environment Variable Substitution - -| Syntax | Description | Example | -|--------|-------------|---------| -| `${VAR}` | Use env var (null if not set) | `${SCITEX_DIR}` | -| `${VAR:-default}` | Use env var or default | `${SCITEX_DIR:-"~/.scitex"}` | - -### How It Works - -1. **YAML loads with substitution**: When YAML is loaded, `${VAR:-default}` expressions are replaced with environment variable values or defaults -2. **Resolution respects priority**: When you call `config.resolve()`, it checks: direct → config (from YAML) → env → default - -Example: -```yaml -# In default.yaml -log_level: ${SCITEX_LOG_LEVEL:-"INFO"} -``` - -```python -# If SCITEX_LOG_LEVEL is not set: -config.get("logging.level") # Returns "INFO" (from YAML default) - -# If SCITEX_LOG_LEVEL=DEBUG: -config.get("logging.level") # Returns "DEBUG" (substituted at load time) -``` - -## Environment Variables - -| Variable | Description | Default | -|----------|-------------|---------| -| `SCITEX_DIR` | Base directory for all SciTeX data | `~/.scitex` | -| `SCITEX_LOG_LEVEL` | Logging level | `INFO` | -| `SCITEX_DEBUG` | Enable debug mode | `false` | - -## .env File Support - -The module automatically loads `.env` files from: -1. Current working directory (`./.env`) -2. Home directory (`~/.env`) - -Example `.env`: -```bash -SCITEX_DIR=/data/scitex -SCITEX_LOG_LEVEL=DEBUG -``` - -Note: Environment variables set in shell take precedence over `.env` values. - -## Available Paths - -Access via `scitex.PATHS.` or `get_paths().`: - -| Property | Path | -|----------|------| -| `base` | `$SCITEX_DIR` | -| `logs` | `$SCITEX_DIR/logs` | -| `cache` | `$SCITEX_DIR/cache` | -| `function_cache` | `$SCITEX_DIR/cache/functions` | -| `capture` | `$SCITEX_DIR/capture` | -| `screenshots` | `$SCITEX_DIR/screenshots` | -| `rng` | `$SCITEX_DIR/rng` | -| `browser` | `$SCITEX_DIR/browser` | -| `browser_screenshots` | `$SCITEX_DIR/browser/screenshots` | -| `browser_sessions` | `$SCITEX_DIR/browser/sessions` | -| `browser_persistent` | `$SCITEX_DIR/browser/persistent` | -| `test_monitor` | `$SCITEX_DIR/test_monitor` | -| `impact_factor_cache` | `$SCITEX_DIR/impact_factor_cache` | -| `openathens_cache` | `$SCITEX_DIR/openathens_cache` | -| `scholar` | `$SCITEX_DIR/scholar` | -| `scholar_cache` | `$SCITEX_DIR/scholar/cache` | -| `scholar_library` | `$SCITEX_DIR/scholar/library` | -| `writer` | `$SCITEX_DIR/writer` | - -## Thread-Safe Usage - -For multi-user or multi-project scenarios, pass explicit paths: - -```python -from scitex.config import ScitexPaths - -# Each user/project gets isolated paths -user1_paths = ScitexPaths(base_dir="/data/user1/.scitex") -user2_paths = ScitexPaths(base_dir="/data/user2/.scitex") - -# Use in module initialization -processor = DataProcessor(cache_dir=user1_paths.cache) -``` - -## Migration from Hardcoded Paths - -If you have code with hardcoded paths like: - -```python -# Old (hardcoded) -cache_dir = Path.home() / ".scitex" / "cache" -``` - -Replace with: - -```python -# New (configurable) -from scitex.config import get_paths -cache_dir = get_paths().cache -``` - - diff --git a/src/scitex/config/_PriorityConfig.py b/src/scitex/config/_PriorityConfig.py deleted file mode 100755 index ab86f8d8e..000000000 --- a/src/scitex/config/_PriorityConfig.py +++ /dev/null @@ -1,290 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-12-09 (ywatanabe)" -# File: /home/ywatanabe/proj/scitex-code/src/scitex/config/PriorityConfig.py - - -""" -Priority-based configuration resolver. - -Provides clean precedence hierarchy: direct → config_dict → env → default - -Based on priority-config by ywatanabe (https://github.com/ywatanabe1989/priority-config) -Incorporated into scitex for self-contained configuration management. - -Note: config_dict values (from YAML or passed dict) take priority over -environment variables. This follows the Scholar module's CascadeConfig pattern. -""" - -import os -from pathlib import Path -from typing import Any, Dict, List, Optional, Type - - -def load_dotenv(dotenv_path: Optional[str] = None) -> bool: - """Load environment variables from .env file. - - Searches for .env file in the following order: - 1. Explicit dotenv_path if provided - 2. Current working directory - 3. User home directory - - Parameters - ---------- - dotenv_path : str, optional - Path to .env file. If None, searches default locations. - - Returns - ------- - bool - True if .env file was found and loaded, False otherwise. - """ - paths_to_try = [] - - if dotenv_path: - paths_to_try.append(Path(dotenv_path)) - else: - # Default search paths - paths_to_try.extend( - [ - Path.cwd() / ".env", - Path.home() / ".env", - ] - ) - - for path in paths_to_try: - if path.exists() and path.is_file(): - try: - with open(path, "r") as f: - for line in f: - line = line.strip() - # Skip empty lines and comments - if not line or line.startswith("#"): - continue - # Handle export prefix - if line.startswith("export "): - line = line[7:] - # Parse key=value - if "=" in line: - key, _, value = line.partition("=") - key = key.strip() - value = value.strip() - # Remove quotes if present - if (value.startswith('"') and value.endswith('"')) or ( - value.startswith("'") and value.endswith("'") - ): - value = value[1:-1] - # Only set if not already in environment (env takes precedence) - if key not in os.environ: - os.environ[key] = value - return True - except Exception: - continue - return False - - -def get_scitex_dir(direct_val: Optional[str] = None) -> Path: - """Get SCITEX_DIR with priority: direct → env → default. - - This is a convenience function for the most common use case. - - Parameters - ---------- - direct_val : str, optional - Direct value (highest precedence) - - Returns - ------- - Path - Resolved SCITEX_DIR path - """ - # Try to load .env first (won't override existing env vars) - load_dotenv() - - if direct_val is not None: - return Path(direct_val).expanduser() - - env_val = os.getenv("SCITEX_DIR") - if env_val: - return Path(env_val).expanduser() - - return Path.home() / ".scitex" - - -class PriorityConfig: - """Universal config resolver with precedence: direct → config_dict → env → default - - Config dict (from YAML or passed dict) takes priority over env variables. - This follows the Scholar module's CascadeConfig pattern. - - Examples - -------- - >>> from scitex.config import PriorityConfig - >>> config = PriorityConfig(config_dict={"port": 3000}, env_prefix="SCITEX_") - >>> port = config.resolve("port", None, default=8000, type=int) - 3000 # from config_dict (highest after direct) - >>> # With env: SCITEX_PORT=5000 python script.py - >>> port = config.resolve("port", None, default=8000, type=int) - 3000 # config_dict takes precedence over env - >>> port = config.resolve("port", 9000, default=8000, type=int) - 9000 # direct value takes highest precedence - """ - - SENSITIVE_EXPRESSIONS = [ - "API", - "PASSWORD", - "SECRET", - "TOKEN", - "KEY", - "PASS", - "AUTH", - "CREDENTIAL", - "PRIVATE", - "CERT", - ] - - def __init__( - self, - config_dict: Optional[Dict[str, Any]] = None, - env_prefix: str = "", - auto_uppercase: bool = True, - ): - """Initialize PriorityConfig. - - Parameters - ---------- - config_dict : dict, optional - Dictionary with configuration values - env_prefix : str - Prefix for environment variables (e.g., "SCITEX_") - auto_uppercase : bool - Whether to uppercase keys for env lookup - """ - self.config_dict = config_dict or {} - self.env_prefix = env_prefix - self.auto_uppercase = auto_uppercase - self.resolution_log: List[Dict[str, Any]] = [] - - def __repr__(self) -> str: - return f"PriorityConfig(prefix='{self.env_prefix}', configs={len(self.config_dict)})" - - def get(self, key: str) -> Any: - """Get value from config dict only.""" - return self.config_dict.get(key) - - def resolve( - self, - key: str, - direct_val: Any = None, - default: Any = None, - type: Type = str, - mask: Optional[bool] = None, - ) -> Any: - """Get value with precedence hierarchy. - - Precedence: direct → config_dict → env → default - - This follows the Scholar module's CascadeConfig pattern where - config dict takes higher priority than environment variables. - - Parameters - ---------- - key : str - Configuration key to resolve - direct_val : Any, optional - Direct value (highest precedence) - default : Any, optional - Default value if not found elsewhere - type : Type - Type conversion (str, int, float, bool, list) - mask : bool, optional - Override automatic masking of sensitive values - - Returns - ------- - Any - Resolved configuration value - """ - source = None - final_value = None - - # Replace dots with underscores for env key (e.g., axes.width_mm -> AXES_WIDTH_MM) - normalized_key = key.replace(".", "_") - env_key = f"{self.env_prefix}{normalized_key.upper() if self.auto_uppercase else normalized_key}" - env_val = os.getenv(env_key) - - # Priority: direct → config_dict → env → default - if direct_val is not None: - source = "direct" - final_value = direct_val - elif key in self.config_dict: - source = "config_dict" - final_value = self.config_dict[key] - elif env_val: - source = f"env:{env_key}" - final_value = self._convert_type(env_val, type) - else: - source = "default" - final_value = default - - if mask is False: - should_mask = False - else: - should_mask = self._is_sensitive(key) - - display_value = self._mask_value(final_value) if should_mask else final_value - - self.resolution_log.append( - { - "key": key, - "source": source, - "value": display_value, - "type": type.__name__, - } - ) - - return final_value - - def print_resolutions(self) -> None: - """Print how each config was resolved.""" - if not self.resolution_log: - print("No configurations resolved yet") - return - - print("Configuration Resolution Log:") - print("-" * 50) - for entry in self.resolution_log: - print(f"{entry['key']:<20} = {entry['value']:<20} ({entry['source']})") - - def clear_log(self) -> None: - """Clear resolution log.""" - self.resolution_log = [] - - def _convert_type(self, value: str, type: Type) -> Any: - """Convert string value to specified type.""" - if type == int: - return int(value) - elif type == float: - return float(value) - elif type == bool: - return value.lower() in ("true", "1", "yes") - elif type == list: - return value.split(",") - return value - - def _is_sensitive(self, key: str) -> bool: - """Check if key contains sensitive expressions.""" - key_upper = key.upper() - return any(expr in key_upper for expr in self.SENSITIVE_EXPRESSIONS) - - def _mask_value(self, value: Any) -> str: - """Mask sensitive values for display.""" - if value is None: - return None - value_str = str(value) - if len(value_str) <= 4: - return "****" - return value_str[:2] + "*" * (len(value_str) - 4) + value_str[-2:] - - -# EOF diff --git a/src/scitex/config/_ScitexConfig.py b/src/scitex/config/_ScitexConfig.py deleted file mode 100755 index 8fbbcf56b..000000000 --- a/src/scitex/config/_ScitexConfig.py +++ /dev/null @@ -1,319 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-12-09 (ywatanabe)" -# File: /home/ywatanabe/proj/scitex-code/src/scitex/config/ScitexConfig.py - -""" -YAML-based configuration for SciTeX with environment variable substitution. - -Similar to ScholarConfig, provides: -- YAML configuration loading -- Environment variable substitution (${VAR:-default} syntax) -- Cascade resolution (direct → config → env → default) - -Usage: - from scitex.config import ScitexConfig - - # Load default configuration - config = ScitexConfig() - - # Load custom configuration - config = ScitexConfig(config_path="/path/to/config.yaml") - - # Resolve values with precedence - log_level = config.resolve("logging.level", default="INFO") -""" - -import os -import re -from pathlib import Path -from typing import Any, Dict, Optional, Type, Union - -from ._PriorityConfig import PriorityConfig, load_dotenv - - -def load_yaml(path: Path) -> dict: - """Load YAML file with environment variable substitution. - - Supports ${VAR:-default} syntax for environment variable expansion. - - Parameters - ---------- - path : Path - Path to YAML file - - Returns - ------- - dict - Parsed YAML with environment variables substituted - """ - try: - import yaml - except ImportError: - raise ImportError( - "PyYAML required for YAML config. Install with: pip install pyyaml" - ) - - try: - with open(path) as f: - content = f.read() - - def env_replacer(match): - """Replace ${VAR:-default} with environment variable or default.""" - env_expr = match.group(1) - if ":-" in env_expr: - var_name, default_value = env_expr.split(":-", 1) - value = os.getenv(var_name, default_value.strip('"')) - else: - value = os.getenv(env_expr) - - # Handle special values - if value in ["true", "false"]: - return value - elif value == "null": - return "null" - elif value and not (value.startswith('"') and value.endswith('"')): - return f'"{value}"' - else: - return value or "null" - - content = re.sub(r"\$\{([^}]+)\}", env_replacer, content) - return yaml.safe_load(content) - except Exception as e: - raise ValueError(f"Failed to load YAML config from {path}: {e}") - - -class ScitexConfig: - """YAML-based configuration manager for SciTeX. - - Loads configuration from YAML files with environment variable substitution. - Values can be resolved with priority: direct → config → env → default. - - Examples - -------- - >>> from scitex.config import ScitexConfig - >>> config = ScitexConfig() - >>> config.resolve("logging.level", default="INFO") - 'INFO' - >>> config.get("debug.enabled") - False - """ - - def __init__( - self, - config_path: Optional[Union[str, Path]] = None, - env_prefix: str = "SCITEX_", - ): - """Initialize ScitexConfig. - - Parameters - ---------- - config_path : str or Path, optional - Path to custom YAML config file. If None, uses default.yaml. - env_prefix : str - Prefix for environment variables (default: "SCITEX_") - """ - # Load .env file first - load_dotenv() - - # Load YAML configuration - if config_path and Path(config_path).exists(): - self._config_data = load_yaml(Path(config_path)) - self._config_path = Path(config_path) - else: - default_path = Path(__file__).parent / "default.yaml" - if default_path.exists(): - self._config_data = load_yaml(default_path) - else: - self._config_data = {} - self._config_path = default_path - - # Flatten nested config for easy access - self._flat_config = self._flatten_dict(self._config_data) - - # Initialize PriorityConfig for resolution - self._priority_config = PriorityConfig( - config_dict=self._flat_config, - env_prefix=env_prefix, - ) - - def _flatten_dict(self, d: dict, parent_key: str = "", sep: str = ".") -> dict: - """Flatten nested dictionary with dot notation keys. - - Parameters - ---------- - d : dict - Dictionary to flatten - parent_key : str - Parent key for recursion - sep : str - Separator for nested keys - - Returns - ------- - dict - Flattened dictionary - """ - items = [] - for k, v in d.items(): - new_key = f"{parent_key}{sep}{k}" if parent_key else k - if isinstance(v, dict): - items.extend(self._flatten_dict(v, new_key, sep).items()) - else: - items.append((new_key, v)) - return dict(items) - - def get(self, key: str, default: Any = None) -> Any: - """Get value from config directly (no precedence resolution). - - Supports dot notation for nested keys. - - Parameters - ---------- - key : str - Configuration key (e.g., "logging.level" or "debug.enabled") - default : Any - Default value if key not found - - Returns - ------- - Any - Configuration value - """ - return self._flat_config.get(key, default) - - def resolve( - self, - key: str, - direct_val: Any = None, - default: Any = None, - type: Type = str, - ) -> Any: - """Resolve value with precedence: direct → config → env → default. - - This follows the Scholar module's CascadeConfig pattern where - YAML config takes higher priority than environment variables. - - Parameters - ---------- - key : str - Configuration key (e.g., "logging.level") - direct_val : Any - Direct value (highest precedence) - default : Any - Default value (lowest precedence) - type : Type - Type conversion (str, int, float, bool, list) - - Returns - ------- - Any - Resolved value - """ - # Priority: direct → config → env → default - # (Same as Scholar's CascadeConfig pattern) - if direct_val is not None: - return direct_val - - # Config (YAML) takes priority over env - config_val = self._flat_config.get(key) - if config_val is not None: - return config_val - - # Then check environment variable - normalized_key = key.replace(".", "_") - env_key = f"SCITEX_{normalized_key.upper()}" - env_val = os.getenv(env_key) - if env_val: - return self._convert_type(env_val, type) - - return default - - def _convert_type(self, value: str, type: Type) -> Any: - """Convert string value to specified type.""" - if type == int: - return int(value) - elif type == float: - return float(value) - elif type == bool: - return value.lower() in ("true", "1", "yes") - elif type == list: - return value.split(",") - return value - - def get_nested(self, *keys: str, default: Any = None) -> Any: - """Get nested value from original config structure. - - Parameters - ---------- - *keys : str - Keys to traverse (e.g., "browser", "screenshots_dir") - default : Any - Default value if not found - - Returns - ------- - Any - Nested value - """ - current = self._config_data - for key in keys: - if isinstance(current, dict) and key in current: - current = current[key] - else: - return default - return current - - @property - def config_path(self) -> Path: - """Get the path to the loaded config file.""" - return self._config_path - - @property - def raw(self) -> dict: - """Get raw configuration data (original nested structure).""" - return self._config_data - - @property - def flat(self) -> dict: - """Get flattened configuration data.""" - return self._flat_config - - def print(self) -> None: - """Print configuration resolution log.""" - self._priority_config.print_resolutions() - - def __repr__(self) -> str: - return f"ScitexConfig(path='{self._config_path}')" - - -# Module-level convenience functions - -_default_config: Optional[ScitexConfig] = None - - -def get_config(config_path: Optional[Union[str, Path]] = None) -> ScitexConfig: - """Get ScitexConfig instance. - - Parameters - ---------- - config_path : str or Path, optional - Path to custom config. If None, returns cached default instance. - - Returns - ------- - ScitexConfig - Configuration instance - """ - global _default_config - - if config_path is not None: - return ScitexConfig(config_path) - - if _default_config is None: - _default_config = ScitexConfig() - - return _default_config - - -# EOF diff --git a/src/scitex/config/__init__.py b/src/scitex/config/__init__.py index 147215931..233e4e3d2 100755 --- a/src/scitex/config/__init__.py +++ b/src/scitex/config/__init__.py @@ -1,70 +1,20 @@ -#!/usr/bin/env python3 -# Timestamp: "2025-12-09 (ywatanabe)" -# File: ./src/scitex/config/__init__.py +"""SciTeX config — thin compatibility shim for scitex-config. -""" -SciTeX configuration module. - -Provides two configuration patterns (both use same priority order): - -1. **ScitexConfig** (YAML-based, recommended): - - Loads configuration from YAML files - - YAML supports env var substitution: ${VAR:-default} - -2. **PriorityConfig** (dict-based, for programmatic use): - - Uses a Python dictionary for configuration - -**Priority Order** (same for both): - direct → config (YAML/dict) → env → default +Aliases ``scitex.config`` to the standalone ``scitex_config`` package via +``sys.modules``. ``scitex.config is scitex_config``. -Usage: - from scitex.config import ScitexConfig, ScitexPaths, get_config, get_paths - - # YAML-based configuration (Scholar pattern) - config = get_config() - log_level = config.resolve("logging.level", default="INFO") - - # Centralized path manager - paths = get_paths() - print(paths.logs) # ~/.scitex/logs - print(paths.cache) # ~/.scitex/cache - - # Use resolve() pattern in modules - cache_dir = paths.resolve("cache", user_provided_path) +Install: ``pip install scitex[config]`` (or ``pip install scitex-config``). +See: https://github.com/ywatanabe1989/scitex-config """ -from ._env_registry import ( - ENV_REGISTRY, - EnvVar, - generate_template, - get_all_modules, - get_env_by_module, - get_env_docs, -) -from ._paths import ScitexPaths, get_paths -from ._PriorityConfig import PriorityConfig, get_scitex_dir, load_dotenv -from ._ScitexConfig import ScitexConfig, get_config, load_yaml - -__all__ = [ - # YAML-based config (Scholar pattern) - "ScitexConfig", - "get_config", - "load_yaml", - # Path management - "ScitexPaths", - "get_paths", - # Legacy/utility - "PriorityConfig", - "get_scitex_dir", - "load_dotenv", - # Environment variable registry - "ENV_REGISTRY", - "EnvVar", - "generate_template", - "get_all_modules", - "get_env_by_module", - "get_env_docs", -] +import sys as _sys +try: + import scitex_config as _real +except ImportError as _e: # pragma: no cover + raise ImportError( + "scitex.config requires the 'scitex-config' package. " + "Install with: pip install scitex[config] (or: pip install scitex-config)" + ) from _e -# EOF +_sys.modules[__name__] = _real diff --git a/src/scitex/config/_env_registry.py b/src/scitex/config/_env_registry.py deleted file mode 100755 index 29a3261aa..000000000 --- a/src/scitex/config/_env_registry.py +++ /dev/null @@ -1,256 +0,0 @@ -#!/usr/bin/env python3 -# Timestamp: 2026-01-24 -# File: src/scitex/config/_env_registry.py - -""" -Registry of all SCITEX environment variables. - -Provides documentation and template generation for environment configuration. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Dict, List, Optional - - -@dataclass -class EnvVar: - """Environment variable definition.""" - - name: str - description: str - module: str - default: Optional[str] = None - required: bool = False - sensitive: bool = False - - -# Registry of all SCITEX environment variables -ENV_REGISTRY: List[EnvVar] = [ - # Core - EnvVar("SCITEX_DIR", "Base directory for scitex data", "core", "~/.scitex"), - EnvVar("SCITEX_ENV_SRC", "Path to env source file/directory", "core"), - EnvVar("SCITEX_LOGGING_LEVEL", "Logging level", "logging", "INFO"), - EnvVar("SCITEX_LOGGING_FORMAT", "Log format style", "logging", "default"), - EnvVar("SCITEX_LOGGING_FORCE_COLOR", "Force colored output", "logging", "false"), - # Audio - EnvVar("SCITEX_AUDIO_MODE", "Audio mode: local/remote/auto", "audio", "auto"), - EnvVar("SCITEX_AUDIO_RELAY_URL", "Relay server URL for remote audio", "audio"), - EnvVar("SCITEX_AUDIO_RELAY_HOST", "Relay server host", "audio"), - EnvVar("SCITEX_AUDIO_RELAY_PORT", "Relay server port", "audio", "31293"), - EnvVar("SCITEX_AUDIO_PORT", "Audio server port", "audio", "31293"), - EnvVar( - "SCITEX_AUDIO_ELEVENLABS_API_KEY", "ElevenLabs API key", "audio", sensitive=True - ), - # Scholar - EnvVar("SCITEX_SCHOLAR_DIR", "Scholar library directory", "scholar"), - EnvVar("SCITEX_SCHOLAR_CROSSREF_EMAIL", "Email for Crossref API", "scholar"), - EnvVar("SCITEX_SCHOLAR_PUBMED_EMAIL", "Email for PubMed API", "scholar"), - EnvVar("SCITEX_SCHOLAR_CROSSREF_DB", "Local Crossref database path", "scholar"), - EnvVar("SCITEX_SCHOLAR_CROSSREF_API_URL", "Crossref API URL", "scholar"), - EnvVar( - "SCITEX_SCHOLAR_CROSSREF_MODE", "Crossref mode: local/api/hybrid", "scholar" - ), - EnvVar("SCITEX_SCHOLAR_OPENATHENS_EMAIL", "OpenAthens login email", "scholar"), - EnvVar( - "SCITEX_SCHOLAR_OPENATHENS_ENABLED", - "Enable OpenAthens auth", - "scholar", - "false", - ), - EnvVar( - "SCITEX_SCHOLAR_EZPROXY_URL", "EZProxy URL for institutional access", "scholar" - ), - EnvVar("SCITEX_SCHOLAR_OPENURL_RESOLVER_URL", "OpenURL resolver URL", "scholar"), - EnvVar( - "SCITEX_SCHOLAR_ZENROWS_API_KEY", - "ZenRows API key for scraping", - "scholar", - sensitive=True, - ), - # Social - EnvVar( - "SCITEX_SOCIAL_X_CONSUMER_KEY", - "Twitter/X consumer key", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_X_CONSUMER_KEY_SECRET", - "Twitter/X consumer secret", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_X_ACCESS_TOKEN", - "Twitter/X access token", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_X_ACCESS_TOKEN_SECRET", - "Twitter/X access token secret", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_X_BEARER_TOKEN", - "Twitter/X bearer token", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_LINKEDIN_CLIENT_ID", - "LinkedIn client ID", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_LINKEDIN_CLIENT_SECRET", - "LinkedIn client secret", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_LINKEDIN_ACCESS_TOKEN", - "LinkedIn access token", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_REDDIT_CLIENT_ID", - "Reddit client ID", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_REDDIT_CLIENT_SECRET", - "Reddit client secret", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_YOUTUBE_API_KEY", - "YouTube API key", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_YOUTUBE_CLIENT_SECRETS_FILE", - "YouTube client secrets file path", - "social", - ), - EnvVar( - "SCITEX_SOCIAL_GOOGLE_ANALYTICS_PROPERTY_ID", - "Google Analytics property ID", - "social", - ), - EnvVar( - "SCITEX_SOCIAL_GOOGLE_ANALYTICS_MEASUREMENT_ID", - "Google Analytics measurement ID", - "social", - ), - EnvVar( - "SCITEX_SOCIAL_GOOGLE_ANALYTICS_API_SECRET", - "Google Analytics API secret", - "social", - sensitive=True, - ), - EnvVar( - "SCITEX_SOCIAL_GOOGLE_APPLICATION_CREDENTIALS", - "Google service account credentials path", - "social", - ), - # Cloud - EnvVar("SCITEX_CLOUD_USERNAME", "Cloud username", "cloud", sensitive=True), - EnvVar("SCITEX_CLOUD_PASSWORD", "Cloud password", "cloud", sensitive=True), - EnvVar( - "SCITEX_CLOUD_CODE_WORKSPACE", "Running in cloud workspace", "cloud", "false" - ), - EnvVar("SCITEX_CLOUD_CODE_PROJECT_ROOT", "Cloud project root", "cloud"), - EnvVar("SCITEX_CLOUD_CODE_BACKEND", "Cloud backend type", "cloud"), - # UI/Notification - EnvVar("SCITEX_UI_DEFAULT_BACKEND", "Default notification backend", "ui"), - EnvVar( - "SCITEX_UI_BACKEND_PRIORITY", - "Notification backend priority (comma-sep)", - "ui", - ), - EnvVar("SCITEX_UI_INFO_BACKENDS", "Backends for info notifications", "ui"), - EnvVar("SCITEX_UI_WARNING_BACKENDS", "Backends for warning notifications", "ui"), - EnvVar("SCITEX_UI_ERROR_BACKENDS", "Backends for error notifications", "ui"), - EnvVar("SCITEX_UI_CRITICAL_BACKENDS", "Backends for critical notifications", "ui"), - EnvVar("SCITEX_UI_EMAIL_NOTIFICATION_FROM", "Email notification sender", "ui"), - EnvVar("SCITEX_UI_EMAIL_NOTIFICATION_TO", "Email notification recipient", "ui"), - EnvVar("SCITEX_UI_WEBHOOK_URL", "Webhook URL for notifications", "ui"), - # Capture - EnvVar("SCITEX_CAPTURE_DIR", "Screenshot capture directory", "capture"), - # Web - EnvVar("SCITEX_WEB_DOWNLOADS_DIR", "Web downloads directory", "web"), - # PLT - EnvVar("SCITEX_PLT_AXES_WIDTH_MM", "Default axes width in mm", "plt"), - EnvVar("SCITEX_PLT_LINES_TRACE_MM", "Default line trace width in mm", "plt"), - EnvVar("SCITEX_PLT_STYLE", "Default plot style", "plt"), - EnvVar("SCITEX_PLT_COLORS", "Color palette to use", "plt"), -] - - -def get_env_by_module(module: str) -> List[EnvVar]: - """Get all environment variables for a module.""" - return [e for e in ENV_REGISTRY if e.module == module] - - -def get_all_modules() -> List[str]: - """Get list of all modules with env vars.""" - return sorted(set(e.module for e in ENV_REGISTRY)) - - -def generate_template( - include_sensitive: bool = True, include_defaults: bool = True -) -> str: - """Generate a template .src file with all environment variables.""" - lines = [ - "#!/bin/bash", - "# SciTeX Environment Variables Template", - "# Generated by scitex.config.generate_template()", - "#", - "# Usage: source this file or set SCITEX_ENV_SRC to this path", - "", - ] - - for module in get_all_modules(): - lines.append(f"# === {module.upper()} ===") - for env in get_env_by_module(module): - if env.sensitive and not include_sensitive: - continue - - if env.description: - lines.append(f"# {env.description}") - - if env.default and include_defaults: - lines.append(f'export {env.name}="{env.default}"') - elif env.sensitive: - lines.append(f'# export {env.name}="YOUR_{env.name}_HERE"') - else: - lines.append(f"# export {env.name}=") - lines.append("") - - return "\n".join(lines) - - -def get_env_docs() -> Dict[str, Dict]: - """Get documentation for all environment variables.""" - return { - e.name: { - "description": e.description, - "module": e.module, - "default": e.default, - "required": e.required, - "sensitive": e.sensitive, - } - for e in ENV_REGISTRY - } - - -# EOF diff --git a/src/scitex/config/_paths.py b/src/scitex/config/_paths.py deleted file mode 100755 index d365cb474..000000000 --- a/src/scitex/config/_paths.py +++ /dev/null @@ -1,325 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-12-09 (ywatanabe)" -# File: /home/ywatanabe/proj/scitex-code/src/scitex/config/paths.py - -""" -Centralized path management for SciTeX. - -Provides a single source of truth for all directory paths used across -the SciTeX ecosystem. All paths respect the SCITEX_DIR environment variable. - -Usage: - from scitex.config import ScitexPaths - - paths = ScitexPaths() - - # Method 1: Direct property access (uses default) - print(paths.logs) # ~/.scitex/logs - print(paths.cache) # ~/.scitex/cache - - # Method 2: resolve() with direct value override (recommended for modules) - cache_dir = paths.resolve("cache", direct_val=user_provided_path) - # If user_provided_path is None -> uses default from SCITEX_DIR - - # Thread-safe: pass explicit base_dir - paths = ScitexPaths(base_dir="/custom/path") -""" - -import os -from pathlib import Path -from typing import Optional, Union - -from ._PriorityConfig import get_scitex_dir, load_dotenv - - -class ScitexPaths: - """Centralized path manager for SciTeX directories. - - All paths are derived from SCITEX_DIR (default: ~/.scitex). - Priority: direct_val → SCITEX_DIR env → .env file → default - - Directory Structure: - $SCITEX_DIR/ - ├── browser/ # Browser profiles and data - │ ├── screenshots/ # Browser debugging screenshots - │ ├── sessions/ # Shared browser sessions - │ └── persistent/ # Persistent browser profiles - ├── cache/ # General cache - │ └── functions/ # Function cache (joblib) - ├── capture/ # Screen captures - ├── impact_factor_cache/ # Impact factor data cache - ├── logs/ # Log files - ├── openathens_cache/ # OpenAthens auth cache - ├── rng/ # Random number generator state - ├── scholar/ # Scholar module data - │ ├── cache/ # Scholar-specific cache - │ └── library/ # PDF library - ├── screenshots/ # General screenshots - ├── test_monitor/ # Test monitoring screenshots - └── writer/ # Writer module data - """ - - def __init__(self, base_dir: Optional[str] = None): - """Initialize ScitexPaths. - - Parameters - ---------- - base_dir : str, optional - Explicit base directory. If None, uses SCITEX_DIR env var - or falls back to ~/.scitex. - """ - self._base_dir = get_scitex_dir(base_dir) - - @property - def base(self) -> Path: - """Base SciTeX directory ($SCITEX_DIR or ~/.scitex).""" - return self._base_dir - - # ========== Core directories ========== - - @property - def logs(self) -> Path: - """Log files directory.""" - return self._base_dir / "logs" - - @property - def cache(self) -> Path: - """General cache directory.""" - return self._base_dir / "cache" - - @property - def capture(self) -> Path: - """Screen capture directory.""" - return self._base_dir / "capture" - - @property - def screenshots(self) -> Path: - """General screenshots directory.""" - return self._base_dir / "screenshots" - - @property - def rng(self) -> Path: - """Random number generator state directory.""" - return self._base_dir / "rng" - - # ========== Browser directories ========== - - @property - def browser(self) -> Path: - """Browser module base directory.""" - return self._base_dir / "browser" - - @property - def browser_screenshots(self) -> Path: - """Browser debugging screenshots.""" - return self.browser / "screenshots" - - @property - def browser_sessions(self) -> Path: - """Shared browser sessions.""" - return self.browser / "sessions" - - @property - def browser_persistent(self) -> Path: - """Persistent browser profiles.""" - return self.browser / "persistent" - - @property - def test_monitor(self) -> Path: - """Test monitoring screenshots directory.""" - return self._base_dir / "test_monitor" - - # ========== Cache directories ========== - - @property - def function_cache(self) -> Path: - """Function cache (joblib memory).""" - return self.cache / "functions" - - @property - def impact_factor_cache(self) -> Path: - """Impact factor data cache.""" - return self._base_dir / "impact_factor_cache" - - @property - def openathens_cache(self) -> Path: - """OpenAthens authentication cache.""" - return self._base_dir / "openathens_cache" - - # ========== Scholar directories ========== - - @property - def scholar(self) -> Path: - """Scholar module base directory.""" - return self._base_dir / "scholar" - - @property - def scholar_cache(self) -> Path: - """Scholar-specific cache directory.""" - return self.scholar / "cache" - - @property - def scholar_library(self) -> Path: - """Scholar PDF library directory.""" - return self.scholar / "library" - - # ========== Writer directories ========== - - @property - def writer(self) -> Path: - """Writer module directory.""" - return self._base_dir / "writer" - - # ========== Resolve method (recommended for modules) ========== - - def resolve( - self, - path_name: str, - direct_val: Optional[Union[str, Path]] = None, - ) -> Path: - """Resolve a path with priority: direct_val → default from SCITEX_DIR. - - This is the recommended method for modules that accept optional path - parameters. It follows the same pattern as PriorityConfig.resolve(). - - Parameters - ---------- - path_name : str - Name of the path property (e.g., "cache", "logs", "scholar_library") - direct_val : str or Path, optional - Direct value (highest precedence). If None, uses default. - - Returns - ------- - Path - Resolved path - - Examples - -------- - >>> paths = ScitexPaths() - >>> # User didn't provide path -> use default - >>> cache_dir = paths.resolve("cache", None) - >>> # User provided custom path -> use it - >>> cache_dir = paths.resolve("cache", "/custom/cache") - - Usage in modules: - >>> class MyModule: - ... def __init__(self, cache_dir=None): - ... self.cache_dir = get_paths().resolve("cache", cache_dir) - """ - if direct_val is not None: - return Path(direct_val).expanduser() - - # Get the default path from property - if hasattr(self, path_name): - return getattr(self, path_name) - - raise ValueError( - f"Unknown path name: {path_name}. Available: {list(self.list_all().keys())}" - ) - - # ========== Utility methods ========== - - def ensure_dir(self, path: Path) -> Path: - """Ensure directory exists, creating if necessary. - - Parameters - ---------- - path : Path - Directory path to ensure exists. - - Returns - ------- - Path - The same path, guaranteed to exist. - """ - path.mkdir(parents=True, exist_ok=True) - return path - - def ensure_all(self) -> None: - """Create all standard directories.""" - dirs = [ - self.logs, - self.cache, - self.function_cache, - self.capture, - self.screenshots, - self.rng, - self.browser, - self.browser_screenshots, - self.browser_sessions, - self.browser_persistent, - self.test_monitor, - self.impact_factor_cache, - self.openathens_cache, - self.scholar, - self.scholar_cache, - self.scholar_library, - self.writer, - ] - for d in dirs: - d.mkdir(parents=True, exist_ok=True) - - def list_all(self) -> dict: - """List all configured paths. - - Returns - ------- - dict - Dictionary of path names to Path objects. - """ - return { - "base": self.base, - "logs": self.logs, - "cache": self.cache, - "function_cache": self.function_cache, - "capture": self.capture, - "screenshots": self.screenshots, - "rng": self.rng, - "browser": self.browser, - "browser_screenshots": self.browser_screenshots, - "browser_sessions": self.browser_sessions, - "browser_persistent": self.browser_persistent, - "test_monitor": self.test_monitor, - "impact_factor_cache": self.impact_factor_cache, - "openathens_cache": self.openathens_cache, - "scholar": self.scholar, - "scholar_cache": self.scholar_cache, - "scholar_library": self.scholar_library, - "writer": self.writer, - } - - def __repr__(self) -> str: - return f"ScitexPaths(base='{self._base_dir}')" - - -# Singleton instance for convenience (uses default SCITEX_DIR) -_default_paths: Optional[ScitexPaths] = None - - -def get_paths(base_dir: Optional[str] = None) -> ScitexPaths: - """Get ScitexPaths instance. - - Parameters - ---------- - base_dir : str, optional - Explicit base directory. If None, returns cached default instance. - - Returns - ------- - ScitexPaths - Path manager instance. - """ - global _default_paths - - if base_dir is not None: - return ScitexPaths(base_dir) - - if _default_paths is None: - _default_paths = ScitexPaths() - - return _default_paths - - -# EOF diff --git a/src/scitex/config/_skills/SKILL.md b/src/scitex/config/_skills/SKILL.md deleted file mode 100644 index 2f1ade283..000000000 --- a/src/scitex/config/_skills/SKILL.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -name: stx.config -description: Configuration management for SciTeX — YAML-based config, dict-based priority resolver, centralized path manager, and environment variable registry. Both patterns use the same precedence: direct > config > env > default. ---- - -# stx.config - -Configuration management for the SciTeX ecosystem. Two complementary patterns share the same priority order: **direct > config > env > default**. - -## Sub-skills - -### Priority-based resolver (dict/programmatic) -- [priority-config.md](priority-config.md) — `PriorityConfig`, `get_scitex_dir`, `load_dotenv`: dict-backed resolver with configurable env prefix, type coercion, and sensitive-value masking - -### YAML-based configuration (recommended) -- [scitex-config.md](scitex-config.md) — `ScitexConfig`, `get_config`, `load_yaml`: YAML loader with `${VAR:-default}` substitution, dot-notation access, and cascade resolution - -### Path management -- [paths.md](paths.md) — `ScitexPaths`, `get_paths`: single source of truth for all SciTeX directories; `resolve()` pattern for configurable module paths - -### Environment variable registry -- [env-registry.md](env-registry.md) — `ENV_REGISTRY`, `EnvVar`, `generate_template`, `get_env_docs`, `get_env_by_module`, `get_all_modules`: typed registry of all `SCITEX_*` variables with documentation and template generation - ---- - -## Quick reference - -```python -import scitex as stx -from scitex.config import ( - ScitexConfig, get_config, - ScitexPaths, get_paths, - PriorityConfig, get_scitex_dir, load_dotenv, - ENV_REGISTRY, EnvVar, generate_template, get_env_docs, - get_env_by_module, get_all_modules, -) - -# YAML-based (recommended) -config = get_config() -level = config.resolve("logging.level", default="INFO") -debug = config.resolve("debug.enabled", default=False, type=bool) - -# Path manager -paths = get_paths() # or stx.PATHS -print(paths.logs) # ~/.scitex/logs -print(paths.scholar_library) # ~/.scitex/scholar/library -cache = paths.resolve("cache", user_provided_path) # direct > default pattern - -# Dict-based resolver -cfg = PriorityConfig({"port": 3000}, env_prefix="MYAPP_") -port = cfg.resolve("port", default=8000, type=int) - -# Environment variable template -print(generate_template(include_sensitive=False)) -``` diff --git a/src/scitex/config/_skills/env-registry.md b/src/scitex/config/_skills/env-registry.md deleted file mode 100644 index 7fcacae89..000000000 --- a/src/scitex/config/_skills/env-registry.md +++ /dev/null @@ -1,203 +0,0 @@ ---- -description: ENV_REGISTRY — typed registry of all SCITEX_* environment variables with module grouping, defaults, and sensitive flags. Covers EnvVar dataclass, get_env_by_module, get_all_modules, generate_template, and get_env_docs. ---- - -# Environment Variable Registry - -Typed registry of every `SCITEX_*` environment variable used across the ecosystem. Provides documentation lookup, module filtering, template generation, and `.env` file scaffolding. - -Defined in `scitex/config/_env_registry.py`. - ---- - -## EnvVar - -```python -@dataclass -class EnvVar: - name: str - description: str - module: str - default: str | None = None - required: bool = False - sensitive: bool = False -``` - -**Fields** - -| Field | Type | Description | -|-------|------|-------------| -| `name` | `str` | Full variable name, e.g. `"SCITEX_DIR"` | -| `description` | `str` | Human-readable description | -| `module` | `str` | Module that owns the variable (e.g. `"core"`, `"scholar"`, `"audio"`) | -| `default` | `str` or `None` | Default value shown in templates (string representation) | -| `required` | `bool` | Whether the variable is required (informational) | -| `sensitive` | `bool` | If `True`, the value is masked in logs and the template uses a placeholder | - ---- - -## ENV_REGISTRY - -```python -ENV_REGISTRY: list[EnvVar] -``` - -The complete list of all registered `SCITEX_*` variables. Variables are grouped by module at the source level. - -**Modules covered** (as of current version): - -| Module | Variables (count) | Example | -|--------|-------------------|---------| -| `core` | 3 | `SCITEX_DIR`, `SCITEX_ENV_SRC`, `SCITEX_LOGGING_LEVEL` | -| `audio` | 6 | `SCITEX_AUDIO_MODE`, `SCITEX_AUDIO_ELEVENLABS_API_KEY` | -| `capture` | 1 | `SCITEX_CAPTURE_DIR` | -| `cloud` | 5 | `SCITEX_CLOUD_USERNAME`, `SCITEX_CLOUD_PASSWORD` | -| `plt` | 4 | `SCITEX_PLT_STYLE`, `SCITEX_PLT_AXES_WIDTH_MM` | -| `scholar` | 10 | `SCITEX_SCHOLAR_DIR`, `SCITEX_SCHOLAR_CROSSREF_EMAIL` | -| `social` | 14 | `SCITEX_SOCIAL_X_CONSUMER_KEY`, `SCITEX_SOCIAL_LINKEDIN_*` | -| `ui` | 10 | `SCITEX_UI_DEFAULT_BACKEND`, `SCITEX_UI_WEBHOOK_URL` | -| `web` | 1 | `SCITEX_WEB_DOWNLOADS_DIR` | - ---- - -## get_env_by_module - -```python -get_env_by_module(module: str) -> list[EnvVar] -``` - -Filter `ENV_REGISTRY` to variables belonging to a specific module. - -```python -from scitex.config import get_env_by_module - -scholar_vars = get_env_by_module("scholar") -for v in scholar_vars: - print(f"{v.name}: {v.description}") -# SCITEX_SCHOLAR_DIR: Scholar library directory -# SCITEX_SCHOLAR_CROSSREF_EMAIL: Email for Crossref API -# ... -``` - ---- - -## get_all_modules - -```python -get_all_modules() -> list[str] -``` - -Return a sorted list of all distinct module names in `ENV_REGISTRY`. - -```python -from scitex.config import get_all_modules - -get_all_modules() -# ['audio', 'capture', 'cloud', 'core', 'logging', 'plt', 'scholar', 'social', 'ui', 'web'] -``` - ---- - -## generate_template - -```python -generate_template( - include_sensitive: bool = True, - include_defaults: bool = True, -) -> str -``` - -Generate a bash-sourceable `.env` / `.src` template with all `SCITEX_*` variables. - -**Parameters** - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `include_sensitive` | `bool` | `True` | Include sensitive variables. When `False`, sensitive entries are omitted entirely. | -| `include_defaults` | `bool` | `True` | Emit `export VAR="default"` for variables with a default. When `False`, defaults are commented out. | - -Output format: -- Variables with defaults: `export SCITEX_DIR="~/.scitex"` -- Sensitive variables: `# export SCITEX_SOCIAL_X_CONSUMER_KEY="YOUR_SCITEX_SOCIAL_X_CONSUMER_KEY_HERE"` -- Variables without a default: `# export SCITEX_SCHOLAR_EZPROXY_URL=` - -```python -from scitex.config import generate_template - -# Full template with all variables and their defaults -print(generate_template()) - -# Public-only template (no API keys / tokens) -print(generate_template(include_sensitive=False)) - -# Write to disk -from pathlib import Path -Path("~/.scitex.src").expanduser().write_text(generate_template()) -``` - -Sample output: - -```bash -#!/bin/bash -# SciTeX Environment Variables Template -# Generated by scitex.config.generate_template() -# -# Usage: source this file or set SCITEX_ENV_SRC to this path - -# === AUDIO === -# Audio mode: local/remote/auto -export SCITEX_AUDIO_MODE="auto" -# Relay server port -export SCITEX_AUDIO_RELAY_PORT="31293" -# ElevenLabs API key -# export SCITEX_AUDIO_ELEVENLABS_API_KEY="YOUR_SCITEX_AUDIO_ELEVENLABS_API_KEY_HERE" - -# === CORE === -# Base directory for scitex data -export SCITEX_DIR="~/.scitex" -... -``` - ---- - -## get_env_docs - -```python -get_env_docs() -> dict[str, dict] -``` - -Return a dict keyed by variable name, each value containing all metadata fields. - -```python -from scitex.config import get_env_docs - -docs = get_env_docs() -docs["SCITEX_DIR"] -# { -# "description": "Base directory for scitex data", -# "module": "core", -# "default": "~/.scitex", -# "required": False, -# "sensitive": False, -# } - -docs["SCITEX_SCHOLAR_ZENROWS_API_KEY"]["sensitive"] -# True -``` - ---- - -## Sensitive variable list - -Variables with `sensitive=True` are automatically masked by `PriorityConfig.resolve()` and emitted as placeholders by `generate_template()`. Current sensitive variables include (not exhaustive): - -- `SCITEX_AUDIO_ELEVENLABS_API_KEY` -- `SCITEX_SCHOLAR_ZENROWS_API_KEY` -- `SCITEX_SOCIAL_X_CONSUMER_KEY`, `SCITEX_SOCIAL_X_CONSUMER_KEY_SECRET` -- `SCITEX_SOCIAL_X_ACCESS_TOKEN`, `SCITEX_SOCIAL_X_ACCESS_TOKEN_SECRET` -- `SCITEX_SOCIAL_X_BEARER_TOKEN` -- `SCITEX_SOCIAL_LINKEDIN_CLIENT_ID`, `SCITEX_SOCIAL_LINKEDIN_CLIENT_SECRET`, `SCITEX_SOCIAL_LINKEDIN_ACCESS_TOKEN` -- `SCITEX_SOCIAL_REDDIT_CLIENT_ID`, `SCITEX_SOCIAL_REDDIT_CLIENT_SECRET` -- `SCITEX_SOCIAL_YOUTUBE_API_KEY` -- `SCITEX_SOCIAL_GOOGLE_ANALYTICS_API_SECRET` -- `SCITEX_CLOUD_USERNAME`, `SCITEX_CLOUD_PASSWORD` diff --git a/src/scitex/config/_skills/paths.md b/src/scitex/config/_skills/paths.md deleted file mode 100644 index 99f79b286..000000000 --- a/src/scitex/config/_skills/paths.md +++ /dev/null @@ -1,208 +0,0 @@ ---- -description: ScitexPaths — centralized path manager for all SciTeX directories. All paths derive from $SCITEX_DIR (default ~/.scitex). Covers get_paths, resolve(), ensure_dir(), ensure_all(), and list_all(). ---- - -# ScitexPaths - -Centralized path manager. Every directory used by the SciTeX ecosystem is expressed as a property of `ScitexPaths`. All paths derive from `$SCITEX_DIR` (default: `~/.scitex`). - -Defined in `scitex/config/_paths.py`. - ---- - -## ScitexPaths - -```python -ScitexPaths(base_dir: str | None = None) -``` - -**Parameters** - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `base_dir` | `str` or `None` | `None` | Explicit base directory. If `None`, resolves via `get_scitex_dir()`: `SCITEX_DIR` env var or `~/.scitex`. | - -### Path properties - -All properties return `Path` objects. They are computed on access — no directories are created on construction. - -**Core** - -| Property | Path | -|----------|------| -| `base` | `$SCITEX_DIR` | -| `logs` | `$SCITEX_DIR/logs` | -| `cache` | `$SCITEX_DIR/cache` | -| `function_cache` | `$SCITEX_DIR/cache/functions` | -| `capture` | `$SCITEX_DIR/capture` | -| `screenshots` | `$SCITEX_DIR/screenshots` | -| `rng` | `$SCITEX_DIR/rng` | - -**Browser** - -| Property | Path | -|----------|------| -| `browser` | `$SCITEX_DIR/browser` | -| `browser_screenshots` | `$SCITEX_DIR/browser/screenshots` | -| `browser_sessions` | `$SCITEX_DIR/browser/sessions` | -| `browser_persistent` | `$SCITEX_DIR/browser/persistent` | -| `test_monitor` | `$SCITEX_DIR/test_monitor` | - -**Cache specializations** - -| Property | Path | -|----------|------| -| `impact_factor_cache` | `$SCITEX_DIR/impact_factor_cache` | -| `openathens_cache` | `$SCITEX_DIR/openathens_cache` | - -**Scholar** - -| Property | Path | -|----------|------| -| `scholar` | `$SCITEX_DIR/scholar` | -| `scholar_cache` | `$SCITEX_DIR/scholar/cache` | -| `scholar_library` | `$SCITEX_DIR/scholar/library` | - -**Writer** - -| Property | Path | -|----------|------| -| `writer` | `$SCITEX_DIR/writer` | - -### resolve - -```python -resolve( - path_name: str, - direct_val: str | Path | None = None, -) -> Path -``` - -Return `direct_val` (expanded via `Path.expanduser()`) if it is not `None`; otherwise return the default path for `path_name`. - -`path_name` must match an existing property (raises `ValueError` otherwise). - -This is the **recommended pattern** for modules that accept optional path parameters: - -```python -from typing import Optional -from scitex.config import get_paths - -class MyModule: - def __init__(self, cache_dir: Optional[str] = None): - # If caller passed a path, use it. Otherwise use $SCITEX_DIR/cache. - self.cache_dir = get_paths().resolve("cache", cache_dir) - self.cache_dir.mkdir(parents=True, exist_ok=True) -``` - -### ensure_dir - -```python -ensure_dir(path: Path) -> Path -``` - -Create `path` (including parents) if it does not exist. Returns `path` unchanged. - -```python -paths = ScitexPaths() -log_dir = paths.ensure_dir(paths.logs) -``` - -### ensure_all - -```python -ensure_all() -> None -``` - -Create every standard directory (all 18 paths listed above) in a single call. Idempotent. - -```python -from scitex.config import get_paths -get_paths().ensure_all() # initialize entire ~/.scitex tree -``` - -### list_all - -```python -list_all() -> dict[str, Path] -``` - -Return a dict mapping every property name to its `Path` value. Useful for inspection and iteration. - -```python -paths = ScitexPaths() -for name, path in paths.list_all().items(): - print(f"{name}: {path}") -``` - ---- - -## get_paths - -```python -get_paths(base_dir: str | None = None) -> ScitexPaths -``` - -Module-level convenience function. Returns a cached singleton when called with no arguments; creates a new `ScitexPaths` instance when `base_dir` is given. - -```python -from scitex.config import get_paths - -paths = get_paths() # cached default (~/.scitex) -paths = get_paths("/data/project/.scitex") # new instance for custom root -``` - ---- - -## Global PATHS constant - -`scitex` exposes a pre-constructed `ScitexPaths` instance as `scitex.PATHS`: - -```python -import scitex - -print(scitex.PATHS.logs) # ~/.scitex/logs -print(scitex.PATHS.scholar_library) # ~/.scitex/scholar/library -print(scitex.PATHS.cache) # ~/.scitex/cache -``` - ---- - -## Directory structure - -``` -$SCITEX_DIR/ # default: ~/.scitex -├── browser/ -│ ├── persistent/ -│ ├── screenshots/ -│ └── sessions/ -├── cache/ -│ └── functions/ -├── capture/ -├── impact_factor_cache/ -├── logs/ -├── openathens_cache/ -├── rng/ -├── scholar/ -│ ├── cache/ -│ └── library/ -├── screenshots/ -├── test_monitor/ -└── writer/ -``` - ---- - -## Thread-safe multi-project usage - -Pass an explicit `base_dir` to isolate paths per user or project: - -```python -from scitex.config import ScitexPaths - -user_a = ScitexPaths(base_dir="/data/user_a/.scitex") -user_b = ScitexPaths(base_dir="/data/user_b/.scitex") - -proc_a = DataProcessor(cache_dir=user_a.cache) -proc_b = DataProcessor(cache_dir=user_b.cache) -``` diff --git a/src/scitex/config/_skills/priority-config.md b/src/scitex/config/_skills/priority-config.md deleted file mode 100644 index ced8f502b..000000000 --- a/src/scitex/config/_skills/priority-config.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -description: PriorityConfig — dict-based configuration resolver with precedence hierarchy (direct > config_dict > env > default). Also covers load_dotenv and get_scitex_dir helpers. ---- - -# PriorityConfig - -Dict-based configuration resolver. Precedence order: direct > config_dict > env > default. - -Defined in `scitex/config/_PriorityConfig.py`. - ---- - -## PriorityConfig - -```python -PriorityConfig( - config_dict: dict | None = None, - env_prefix: str = "", - auto_uppercase: bool = True, -) -``` - -**Parameters** - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `config_dict` | `dict` or `None` | `None` | Base configuration values | -| `env_prefix` | `str` | `""` | Prefix prepended to env var names during lookup (e.g., `"SCITEX_"`) | -| `auto_uppercase` | `bool` | `True` | Uppercase the key when constructing the env var name | - -### resolve - -```python -resolve( - key: str, - direct_val: Any = None, - default: Any = None, - type: Type = str, - mask: bool | None = None, -) -> Any -``` - -Returns the first non-None value in priority order: - -1. `direct_val` (if not None) -2. `config_dict[key]` (if present) -3. Environment variable `{env_prefix}{KEY}` (if set) -4. `default` - -Type coercion via `type` applies only to values read from environment variables. The resolved key, source, and display value are appended to `resolution_log`. - -**Sensitive key auto-masking**: Keys containing `API`, `PASSWORD`, `SECRET`, `TOKEN`, `KEY`, `PASS`, `AUTH`, `CREDENTIAL`, `PRIVATE`, or `CERT` (case-insensitive) are masked in the log. Override with `mask=False`. - -**Env key construction**: dots in `key` become underscores: `"axes.width_mm"` → `SCITEX_AXES_WIDTH_MM`. - -**Type conversions** - -| `type` | Input | Output | -|--------|-------|--------| -| `int` | `"3"` | `3` | -| `float` | `"1.5"` | `1.5` | -| `bool` | `"true"`, `"1"`, `"yes"` | `True` | -| `list` | `"a,b,c"` | `["a", "b", "c"]` | -| `str` | anything | unchanged string | - -### Other methods - -```python -get(key: str) -> Any # Direct dict lookup (no env, no default) -print_resolutions() -> None # Print resolution log to stdout -clear_log() -> None # Clear resolution_log list -``` - -**Example** - -```python -from scitex.config import PriorityConfig -import os - -config = PriorityConfig( - config_dict={"port": 3000, "debug": True}, - env_prefix="MYAPP_", -) - -# direct_val wins -port = config.resolve("port", direct_val=9000, default=8000, type=int) -# -> 9000 - -# config_dict wins over env -port = config.resolve("port", default=8000, type=int) -# -> 3000 - -# env wins over default when config_dict has no entry -os.environ["MYAPP_TIMEOUT"] = "30" -timeout = config.resolve("timeout", default=10, type=int) -# -> 30 - -# default when nothing else set -retries = config.resolve("retries", default=3, type=int) -# -> 3 - -config.print_resolutions() -# Configuration Resolution Log: -# -------------------------------------------------- -# port = 9000 (direct) -# port = 3000 (config_dict) -# timeout = 30 (env:MYAPP_TIMEOUT) -# retries = 3 (default) -``` - ---- - -## get_scitex_dir - -```python -get_scitex_dir(direct_val: str | None = None) -> Path -``` - -Resolve the SciTeX base directory. Calls `load_dotenv()` first to pick up `.env` files, then applies priority: direct_val > `SCITEX_DIR` env var > `~/.scitex`. - -```python -from scitex.config import get_scitex_dir - -base = get_scitex_dir() # -> ~/.scitex (default) -base = get_scitex_dir("/data/scitex") # -> /data/scitex (direct override) -# SCITEX_DIR=/mnt/nas python script.py # -> /mnt/nas (env override) -``` - ---- - -## load_dotenv - -```python -load_dotenv(dotenv_path: str | None = None) -> bool -``` - -Load environment variables from a `.env` file. Already-set shell variables are **not** overridden (env takes precedence over `.env`). - -Search order when `dotenv_path` is `None`: -1. `./.env` (current working directory) -2. `~/.env` (home directory) - -Returns `True` if a file was found and loaded, `False` otherwise. - -Supports: -- `KEY=value` -- `export KEY=value` -- Single and double quoted values -- `#` comment lines and blank lines - -```python -from scitex.config import load_dotenv - -loaded = load_dotenv() # auto-search -loaded = load_dotenv("/etc/scitex.env") # explicit path -``` diff --git a/src/scitex/config/_skills/scitex-config.md b/src/scitex/config/_skills/scitex-config.md deleted file mode 100644 index 6fb5564a0..000000000 --- a/src/scitex/config/_skills/scitex-config.md +++ /dev/null @@ -1,201 +0,0 @@ ---- -description: ScitexConfig — YAML-based configuration manager with ${VAR:-default} environment variable substitution and priority resolution (direct > config > env > default). Covers get_config and load_yaml. ---- - -# ScitexConfig - -YAML-based configuration manager. Loads `default.yaml` (or a custom file), substitutes `${VAR:-default}` expressions at load time, then exposes values via `get()` and `resolve()`. - -Defined in `scitex/config/_ScitexConfig.py`. - -Priority order: direct > config (YAML) > env > default. - ---- - -## ScitexConfig - -```python -ScitexConfig( - config_path: str | Path | None = None, - env_prefix: str = "SCITEX_", -) -``` - -**Parameters** - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `config_path` | `str`, `Path`, or `None` | `None` | Path to a custom YAML file. If `None` or path does not exist, uses the bundled `default.yaml`. | -| `env_prefix` | `str` | `"SCITEX_"` | Prefix for environment variable lookups inside `PriorityConfig`. | - -On construction: -1. `load_dotenv()` is called (loads `.env` if present) -2. YAML is loaded with `${VAR:-default}` substitution -3. Nested keys are flattened to dot-notation (`logging.level`, `debug.enabled`, etc.) -4. A `PriorityConfig` instance is created with the flat dict - -### get - -```python -get(key: str, default: Any = None) -> Any -``` - -Direct lookup in the flattened YAML dict. No env or priority logic. Supports dot-notation keys. - -```python -config = ScitexConfig() -config.get("logging.level") # -> "INFO" (from YAML) -config.get("debug.enabled") # -> False -config.get("missing.key", "x") # -> "x" -``` - -### resolve - -```python -resolve( - key: str, - direct_val: Any = None, - default: Any = None, - type: Type = str, -) -> Any -``` - -Resolves with full precedence: - -1. `direct_val` (if not None) -2. YAML config value for `key` -3. `SCITEX_{KEY_UPPER}` environment variable (dots become underscores) -4. `default` - -```python -config = ScitexConfig() - -# From YAML default -level = config.resolve("logging.level", default="WARNING") -# -> "INFO" (YAML wins over default) - -# Direct value overrides everything -level = config.resolve("logging.level", direct_val="DEBUG") -# -> "DEBUG" - -# Type conversion on env var values -max_size = config.resolve("logging.max_file_size_mb", default=5, type=int) -# -> 10 (from YAML, already an int — no conversion needed) -``` - -### get_nested - -```python -get_nested(*keys: str, default: Any = None) -> Any -``` - -Traverse the original nested YAML structure. Useful when a key maps to a dict sub-tree rather than a scalar. - -```python -config = ScitexConfig() -config.get_nested("browser", "screenshots_dir") -# -> None (value from YAML, not yet substituted path) - -config.get_nested("ui", "level_backends", "error") -# -> ["audio", "desktop", "email"] -``` - -### Properties - -| Property | Type | Description | -|----------|------|-------------| -| `config_path` | `Path` | Path to the loaded YAML file | -| `raw` | `dict` | Original nested dict from YAML | -| `flat` | `dict` | Flattened dot-notation dict | - -### print - -```python -print() -> None -``` - -Delegates to `PriorityConfig.print_resolutions()` — shows how each key was resolved. - ---- - -## get_config - -```python -get_config(config_path: str | Path | None = None) -> ScitexConfig -``` - -Module-level convenience function. Returns a cached singleton when called with no arguments; creates a new instance when `config_path` is given. - -```python -from scitex.config import get_config - -config = get_config() # cached default (default.yaml) -config = get_config("/project/my_config.yaml") # new instance from custom file -``` - ---- - -## load_yaml - -```python -load_yaml(path: Path) -> dict -``` - -Load a YAML file with `${VAR:-default}` environment variable substitution. This is a standalone function used by `ScitexConfig.__init__`. - -Substitution rules: -- `${VAR}` — replaced by `os.getenv("VAR")` or `null` if unset -- `${VAR:-default}` — replaced by `os.getenv("VAR", "default")` -- Boolean literals `true`/`false` and `null` are preserved as YAML types - -Requires `pyyaml` (`pip install pyyaml`). - -```python -from scitex.config import load_yaml -from pathlib import Path - -data = load_yaml(Path("config/settings.yaml")) -# Returns a plain dict with all ${...} expressions substituted -``` - ---- - -## default.yaml structure - -The bundled `default.yaml` covers: - -```yaml -scitex_dir: ${SCITEX_DIR:-"~/.scitex"} - -logging: - level: ${SCITEX_LOG_LEVEL:-"INFO"} - format: ${SCITEX_LOG_FORMAT:-"%(asctime)s - %(name)s - %(levelname)s - %(message)s"} - file_logging: ${SCITEX_FILE_LOGGING:-true} - max_file_size_mb: ${SCITEX_LOG_MAX_SIZE_MB:-10} - backup_count: ${SCITEX_LOG_BACKUP_COUNT:-5} - -debug: - enabled: ${SCITEX_DEBUG:-false} - verbose: ${SCITEX_VERBOSE:-false} - capture_screenshots: ${SCITEX_CAPTURE_SCREENSHOTS:-true} - -browser: - base_dir: ${SCITEX_BROWSER_DIR:-null} - screenshots_dir: ${SCITEX_BROWSER_SCREENSHOTS_DIR:-null} - sessions_dir: ${SCITEX_BROWSER_SESSIONS_DIR:-null} - persistent_dir: ${SCITEX_BROWSER_PERSISTENT_DIR:-null} - -scholar: - base_dir: ${SCITEX_SCHOLAR_DIR:-null} - cache_dir: ${SCITEX_SCHOLAR_CACHE_DIR:-null} - library_dir: ${SCITEX_SCHOLAR_LIBRARY_DIR:-null} - -writer: - base_dir: ${SCITEX_WRITER_DIR:-null} - -ui: - default_backend: ${SCITEX_UI_DEFAULT_BACKEND:-"audio"} - # ... backend_priority list, level_backends, timeouts -``` - -Dot-notation after flattening: `logging.level`, `debug.enabled`, `browser.base_dir`, `scholar.library_dir`, etc. diff --git a/src/scitex/config/default.yaml b/src/scitex/config/default.yaml deleted file mode 100644 index 79a84ab8a..000000000 --- a/src/scitex/config/default.yaml +++ /dev/null @@ -1,112 +0,0 @@ -# Timestamp: "2025-12-09 (ywatanabe)" -# File: /home/ywatanabe/proj/scitex-code/src/scitex/config/default.yaml - -# ---------------------------------------- -# SciTeX Global Configuration -# ---------------------------------------- -# All paths can be overridden via environment variables -# Format: ${ENV_VAR:-default_value} - -# ---------------------------------------- -# Base Directory -# ---------------------------------------- -scitex_dir: ${SCITEX_DIR:-"~/.scitex"} - -# ---------------------------------------- -# Core Directories -# ---------------------------------------- -logs_dir: ${SCITEX_LOGS_DIR:-null} -cache_dir: ${SCITEX_CACHE_DIR:-null} -capture_dir: ${SCITEX_CAPTURE_DIR:-null} -screenshots_dir: ${SCITEX_SCREENSHOTS_DIR:-null} -rng_dir: ${SCITEX_RNG_DIR:-null} - -# ---------------------------------------- -# Browser Module -# ---------------------------------------- -browser: - base_dir: ${SCITEX_BROWSER_DIR:-null} - screenshots_dir: ${SCITEX_BROWSER_SCREENSHOTS_DIR:-null} - sessions_dir: ${SCITEX_BROWSER_SESSIONS_DIR:-null} - persistent_dir: ${SCITEX_BROWSER_PERSISTENT_DIR:-null} - -# ---------------------------------------- -# Test Monitoring -# ---------------------------------------- -test_monitor: - output_dir: ${SCITEX_TEST_MONITOR_DIR:-null} - interval: ${SCITEX_TEST_MONITOR_INTERVAL:-2.0} - quality: ${SCITEX_TEST_MONITOR_QUALITY:-70} - -# ---------------------------------------- -# Cache Settings -# ---------------------------------------- -cache: - function_cache_dir: ${SCITEX_FUNCTION_CACHE_DIR:-null} - impact_factor_cache_dir: ${SCITEX_IMPACT_FACTOR_CACHE_DIR:-null} - openathens_cache_dir: ${SCITEX_OPENATHENS_CACHE_DIR:-null} - -# ---------------------------------------- -# Scholar Module -# ---------------------------------------- -scholar: - base_dir: ${SCITEX_SCHOLAR_DIR:-null} - cache_dir: ${SCITEX_SCHOLAR_CACHE_DIR:-null} - library_dir: ${SCITEX_SCHOLAR_LIBRARY_DIR:-null} - -# ---------------------------------------- -# Writer Module -# ---------------------------------------- -writer: - base_dir: ${SCITEX_WRITER_DIR:-null} - -# ---------------------------------------- -# Logging Configuration -# ---------------------------------------- -logging: - level: ${SCITEX_LOG_LEVEL:-"INFO"} - format: ${SCITEX_LOG_FORMAT:-"%(asctime)s - %(name)s - %(levelname)s - %(message)s"} - file_logging: ${SCITEX_FILE_LOGGING:-true} - max_file_size_mb: ${SCITEX_LOG_MAX_SIZE_MB:-10} - backup_count: ${SCITEX_LOG_BACKUP_COUNT:-5} - -# ---------------------------------------- -# Debug Settings -# ---------------------------------------- -debug: - enabled: ${SCITEX_DEBUG:-false} - verbose: ${SCITEX_VERBOSE:-false} - capture_screenshots: ${SCITEX_CAPTURE_SCREENSHOTS:-true} - -# ---------------------------------------- -# UI Notification Settings -# ---------------------------------------- -ui: - default_backend: ${SCITEX_UI_DEFAULT_BACKEND:-"audio"} - backend_priority: - - audio - - desktop - - matplotlib - - playwright - - email - - webhook - level_backends: - info: - - audio - warning: - - audio - - desktop - error: - - audio - - desktop - - email - critical: - - audio - - desktop - - matplotlib - - email - timeouts: - matplotlib: ${SCITEX_UI_TIMEOUT_MATPLOTLIB:-5.0} - playwright: ${SCITEX_UI_TIMEOUT_PLAYWRIGHT:-5.0} - -# EOF diff --git a/tests/scitex/config/__init__.py b/tests/scitex/config/__init__.py deleted file mode 100644 index f3c27e4c0..000000000 --- a/tests/scitex/config/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Tests for scitex.config module diff --git a/tests/scitex/config/test__PriorityConfig.py b/tests/scitex/config/test__PriorityConfig.py deleted file mode 100644 index d4bd37898..000000000 --- a/tests/scitex/config/test__PriorityConfig.py +++ /dev/null @@ -1,582 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-12-09" -# File: ./tests/scitex/config/test__PriorityConfig.py - -"""Tests for PriorityConfig class and load_dotenv, get_scitex_dir functions.""" - -import os -import tempfile -from pathlib import Path - -import pytest - -from scitex.config import PriorityConfig, get_scitex_dir, load_dotenv - - -class TestPriorityConfigBasic: - """Basic PriorityConfig functionality tests.""" - - def test_initialization(self): - """Test PriorityConfig can be initialized.""" - config = PriorityConfig() - assert config is not None - - def test_initialization_with_dict(self): - """Test initialization with config dict.""" - config = PriorityConfig(config_dict={"port": 3000}) - assert config.get("port") == 3000 - - def test_initialization_with_prefix(self): - """Test initialization with env prefix.""" - config = PriorityConfig(env_prefix="TEST_") - assert config.env_prefix == "TEST_" - - def test_repr(self): - """Test string representation.""" - config = PriorityConfig(config_dict={"a": 1, "b": 2}, env_prefix="APP_") - repr_str = repr(config) - assert "APP_" in repr_str - assert "2" in repr_str - - -class TestPriorityConfigResolution: - """Test priority resolution order: direct → config_dict → env → default.""" - - def test_direct_value_highest_priority(self): - """Test direct value takes highest priority.""" - config = PriorityConfig(config_dict={"port": 3000}, env_prefix="TEST_") - result = config.resolve("port", direct_val=9000, default=8000) - assert result == 9000 - - def test_config_dict_over_env(self): - """Test config_dict takes priority over env.""" - os.environ["TEST_PORT"] = "5000" - try: - config = PriorityConfig(config_dict={"port": 3000}, env_prefix="TEST_") - result = config.resolve("port", default=8000) - assert result == 3000 - finally: - del os.environ["TEST_PORT"] - - def test_env_over_default(self): - """Test env takes priority over default.""" - os.environ["TEST_HOST"] = "localhost" - try: - config = PriorityConfig(env_prefix="TEST_") - result = config.resolve("host", default="0.0.0.0") - assert result == "localhost" - finally: - del os.environ["TEST_HOST"] - - def test_default_fallback(self): - """Test default is used when nothing else available.""" - config = PriorityConfig(env_prefix="TEST_") - result = config.resolve("unknown_key", default="fallback") - assert result == "fallback" - - -class TestPriorityConfigTypeConversion: - """Test type conversion in resolve().""" - - def test_int_conversion(self): - """Test integer type conversion.""" - os.environ["TEST_COUNT"] = "42" - try: - config = PriorityConfig(env_prefix="TEST_") - result = config.resolve("count", default=0, type=int) - assert result == 42 - assert isinstance(result, int) - finally: - del os.environ["TEST_COUNT"] - - def test_float_conversion(self): - """Test float type conversion.""" - os.environ["TEST_RATE"] = "3.14" - try: - config = PriorityConfig(env_prefix="TEST_") - result = config.resolve("rate", default=0.0, type=float) - assert result == 3.14 - finally: - del os.environ["TEST_RATE"] - - def test_bool_conversion_true(self): - """Test boolean true conversion.""" - for true_val in ["true", "1", "yes"]: - os.environ["TEST_DEBUG"] = true_val - try: - config = PriorityConfig(env_prefix="TEST_") - result = config.resolve("debug", default=False, type=bool) - assert result is True - finally: - del os.environ["TEST_DEBUG"] - - def test_list_conversion(self): - """Test list type conversion.""" - os.environ["TEST_ITEMS"] = "a,b,c" - try: - config = PriorityConfig(env_prefix="TEST_") - result = config.resolve("items", default=[], type=list) - assert result == ["a", "b", "c"] - finally: - del os.environ["TEST_ITEMS"] - - -class TestPriorityConfigSensitiveValues: - """Test sensitive value masking.""" - - def test_sensitive_key_masked(self): - """Test sensitive keys are automatically masked.""" - config = PriorityConfig(config_dict={"api_key": "secret123"}) - config.resolve("api_key", default="") - log_entry = config.resolution_log[0] - assert log_entry["value"] != "secret123" - - def test_mask_override_false(self): - """Test mask=False overrides automatic masking.""" - config = PriorityConfig(config_dict={"api_key": "secret123"}) - config.resolve("api_key", default="", mask=False) - log_entry = config.resolution_log[0] - assert log_entry["value"] == "secret123" - - -class TestLoadDotenv: - """Test load_dotenv() function.""" - - def test_load_dotenv_from_explicit_path(self): - """Test loading .env from explicit path.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: - f.write("TEST_DOTENV_VAR=explicit_value\n") - temp_path = f.name - - if "TEST_DOTENV_VAR" in os.environ: - del os.environ["TEST_DOTENV_VAR"] - - try: - result = load_dotenv(temp_path) - assert result is True - assert os.environ.get("TEST_DOTENV_VAR") == "explicit_value" - finally: - os.unlink(temp_path) - if "TEST_DOTENV_VAR" in os.environ: - del os.environ["TEST_DOTENV_VAR"] - - def test_load_dotenv_returns_false_for_nonexistent(self): - """Test load_dotenv returns False for nonexistent file.""" - result = load_dotenv("/nonexistent/path/.env") - assert result is False - - def test_load_dotenv_skips_comments(self): - """Test load_dotenv skips comment lines.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: - f.write("# Comment\n") - f.write("TEST_COMMENT_VAR=value\n") - temp_path = f.name - - if "TEST_COMMENT_VAR" in os.environ: - del os.environ["TEST_COMMENT_VAR"] - - try: - load_dotenv(temp_path) - assert os.environ.get("TEST_COMMENT_VAR") == "value" - finally: - os.unlink(temp_path) - if "TEST_COMMENT_VAR" in os.environ: - del os.environ["TEST_COMMENT_VAR"] - - def test_load_dotenv_handles_export_prefix(self): - """Test load_dotenv handles 'export' prefix.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: - f.write("export TEST_EXPORT_VAR=exported_value\n") - temp_path = f.name - - if "TEST_EXPORT_VAR" in os.environ: - del os.environ["TEST_EXPORT_VAR"] - - try: - load_dotenv(temp_path) - assert os.environ.get("TEST_EXPORT_VAR") == "exported_value" - finally: - os.unlink(temp_path) - if "TEST_EXPORT_VAR" in os.environ: - del os.environ["TEST_EXPORT_VAR"] - - def test_load_dotenv_removes_quotes(self): - """Test load_dotenv removes quotes from values.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: - f.write('TEST_QUOTE_VAR="quoted value"\n') - temp_path = f.name - - if "TEST_QUOTE_VAR" in os.environ: - del os.environ["TEST_QUOTE_VAR"] - - try: - load_dotenv(temp_path) - assert os.environ.get("TEST_QUOTE_VAR") == "quoted value" - finally: - os.unlink(temp_path) - if "TEST_QUOTE_VAR" in os.environ: - del os.environ["TEST_QUOTE_VAR"] - - def test_load_dotenv_does_not_override_existing_env(self): - """Test load_dotenv does not override existing env vars.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".env", delete=False) as f: - f.write("TEST_EXISTING_VAR=from_dotenv\n") - temp_path = f.name - - os.environ["TEST_EXISTING_VAR"] = "from_shell" - - try: - load_dotenv(temp_path) - assert os.environ.get("TEST_EXISTING_VAR") == "from_shell" - finally: - os.unlink(temp_path) - del os.environ["TEST_EXISTING_VAR"] - - -class TestGetScitexDir: - """Test get_scitex_dir() function.""" - - def test_get_scitex_dir_default(self): - """Test get_scitex_dir returns default ~/.scitex.""" - original = os.environ.pop("SCITEX_DIR", None) - try: - result = get_scitex_dir() - assert result == Path.home() / ".scitex" - finally: - if original: - os.environ["SCITEX_DIR"] = original - - def test_get_scitex_dir_from_env(self): - """Test get_scitex_dir uses SCITEX_DIR env var.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["SCITEX_DIR"] = tmpdir - try: - result = get_scitex_dir() - assert result == Path(tmpdir) - finally: - del os.environ["SCITEX_DIR"] - - def test_get_scitex_dir_direct_value_highest(self): - """Test get_scitex_dir with direct value takes precedence.""" - with tempfile.TemporaryDirectory() as env_dir: - with tempfile.TemporaryDirectory() as direct_dir: - os.environ["SCITEX_DIR"] = env_dir - try: - result = get_scitex_dir(direct_val=direct_dir) - assert result == Path(direct_dir) - finally: - del os.environ["SCITEX_DIR"] - - def test_get_scitex_dir_expands_user(self): - """Test get_scitex_dir expands ~ in direct value.""" - result = get_scitex_dir(direct_val="~/custom_scitex") - assert "~" not in str(result) - - -if __name__ == "__main__": - import os - - import pytest - - pytest.main([os.path.abspath(__file__)]) - -# -------------------------------------------------------------------------------- -# Start of Source Code from: /home/ywatanabe/proj/scitex-code/src/scitex/config/_PriorityConfig.py -# -------------------------------------------------------------------------------- -# #!/usr/bin/env python3 -# # -*- coding: utf-8 -*- -# # Timestamp: "2025-12-09 (ywatanabe)" -# # File: /home/ywatanabe/proj/scitex-code/src/scitex/config/PriorityConfig.py -# -# -# """ -# Priority-based configuration resolver. -# -# Provides clean precedence hierarchy: direct → config_dict → env → default -# -# Based on priority-config by ywatanabe (https://github.com/ywatanabe1989/priority-config) -# Incorporated into scitex for self-contained configuration management. -# -# Note: config_dict values (from YAML or passed dict) take priority over -# environment variables. This follows the Scholar module's CascadeConfig pattern. -# """ -# -# import os -# from pathlib import Path -# from typing import Dict -# from typing import List -# from typing import Optional, Type, Any -# -# -# def load_dotenv(dotenv_path: Optional[str] = None) -> bool: -# """Load environment variables from .env file. -# -# Searches for .env file in the following order: -# 1. Explicit dotenv_path if provided -# 2. Current working directory -# 3. User home directory -# -# Parameters -# ---------- -# dotenv_path : str, optional -# Path to .env file. If None, searches default locations. -# -# Returns -# ------- -# bool -# True if .env file was found and loaded, False otherwise. -# """ -# paths_to_try = [] -# -# if dotenv_path: -# paths_to_try.append(Path(dotenv_path)) -# else: -# # Default search paths -# paths_to_try.extend( -# [ -# Path.cwd() / ".env", -# Path.home() / ".env", -# ] -# ) -# -# for path in paths_to_try: -# if path.exists() and path.is_file(): -# try: -# with open(path, "r") as f: -# for line in f: -# line = line.strip() -# # Skip empty lines and comments -# if not line or line.startswith("#"): -# continue -# # Handle export prefix -# if line.startswith("export "): -# line = line[7:] -# # Parse key=value -# if "=" in line: -# key, _, value = line.partition("=") -# key = key.strip() -# value = value.strip() -# # Remove quotes if present -# if (value.startswith('"') and value.endswith('"')) or ( -# value.startswith("'") and value.endswith("'") -# ): -# value = value[1:-1] -# # Only set if not already in environment (env takes precedence) -# if key not in os.environ: -# os.environ[key] = value -# return True -# except Exception: -# continue -# return False -# -# -# def get_scitex_dir(direct_val: Optional[str] = None) -> Path: -# """Get SCITEX_DIR with priority: direct → env → default. -# -# This is a convenience function for the most common use case. -# -# Parameters -# ---------- -# direct_val : str, optional -# Direct value (highest precedence) -# -# Returns -# ------- -# Path -# Resolved SCITEX_DIR path -# """ -# # Try to load .env first (won't override existing env vars) -# load_dotenv() -# -# if direct_val is not None: -# return Path(direct_val).expanduser() -# -# env_val = os.getenv("SCITEX_DIR") -# if env_val: -# return Path(env_val).expanduser() -# -# return Path.home() / ".scitex" -# -# -# class PriorityConfig: -# """Universal config resolver with precedence: direct → config_dict → env → default -# -# Config dict (from YAML or passed dict) takes priority over env variables. -# This follows the Scholar module's CascadeConfig pattern. -# -# Examples -# -------- -# >>> from scitex.config import PriorityConfig -# >>> config = PriorityConfig(config_dict={"port": 3000}, env_prefix="SCITEX_") -# >>> port = config.resolve("port", None, default=8000, type=int) -# 3000 # from config_dict (highest after direct) -# >>> # With env: SCITEX_PORT=5000 python script.py -# >>> port = config.resolve("port", None, default=8000, type=int) -# 3000 # config_dict takes precedence over env -# >>> port = config.resolve("port", 9000, default=8000, type=int) -# 9000 # direct value takes highest precedence -# """ -# -# SENSITIVE_EXPRESSIONS = [ -# "API", -# "PASSWORD", -# "SECRET", -# "TOKEN", -# "KEY", -# "PASS", -# "AUTH", -# "CREDENTIAL", -# "PRIVATE", -# "CERT", -# ] -# -# def __init__( -# self, -# config_dict: Optional[Dict[str, Any]] = None, -# env_prefix: str = "", -# auto_uppercase: bool = True, -# ): -# """Initialize PriorityConfig. -# -# Parameters -# ---------- -# config_dict : dict, optional -# Dictionary with configuration values -# env_prefix : str -# Prefix for environment variables (e.g., "SCITEX_") -# auto_uppercase : bool -# Whether to uppercase keys for env lookup -# """ -# self.config_dict = config_dict or {} -# self.env_prefix = env_prefix -# self.auto_uppercase = auto_uppercase -# self.resolution_log: List[Dict[str, Any]] = [] -# -# def __repr__(self) -> str: -# return f"PriorityConfig(prefix='{self.env_prefix}', configs={len(self.config_dict)})" -# -# def get(self, key: str) -> Any: -# """Get value from config dict only.""" -# return self.config_dict.get(key) -# -# def resolve( -# self, -# key: str, -# direct_val: Any = None, -# default: Any = None, -# type: Type = str, -# mask: Optional[bool] = None, -# ) -> Any: -# """Get value with precedence hierarchy. -# -# Precedence: direct → config_dict → env → default -# -# This follows the Scholar module's CascadeConfig pattern where -# config dict takes higher priority than environment variables. -# -# Parameters -# ---------- -# key : str -# Configuration key to resolve -# direct_val : Any, optional -# Direct value (highest precedence) -# default : Any, optional -# Default value if not found elsewhere -# type : Type -# Type conversion (str, int, float, bool, list) -# mask : bool, optional -# Override automatic masking of sensitive values -# -# Returns -# ------- -# Any -# Resolved configuration value -# """ -# source = None -# final_value = None -# -# # Replace dots with underscores for env key (e.g., axes.width_mm -> AXES_WIDTH_MM) -# normalized_key = key.replace(".", "_") -# env_key = f"{self.env_prefix}{normalized_key.upper() if self.auto_uppercase else normalized_key}" -# env_val = os.getenv(env_key) -# -# # Priority: direct → config_dict → env → default -# if direct_val is not None: -# source = "direct" -# final_value = direct_val -# elif key in self.config_dict: -# source = "config_dict" -# final_value = self.config_dict[key] -# elif env_val: -# source = f"env:{env_key}" -# final_value = self._convert_type(env_val, type) -# else: -# source = "default" -# final_value = default -# -# if mask is False: -# should_mask = False -# else: -# should_mask = self._is_sensitive(key) -# -# display_value = self._mask_value(final_value) if should_mask else final_value -# -# self.resolution_log.append( -# { -# "key": key, -# "source": source, -# "value": display_value, -# "type": type.__name__, -# } -# ) -# -# return final_value -# -# def print_resolutions(self) -> None: -# """Print how each config was resolved.""" -# if not self.resolution_log: -# print("No configurations resolved yet") -# return -# -# print("Configuration Resolution Log:") -# print("-" * 50) -# for entry in self.resolution_log: -# print(f"{entry['key']:<20} = {entry['value']:<20} ({entry['source']})") -# -# def clear_log(self) -> None: -# """Clear resolution log.""" -# self.resolution_log = [] -# -# def _convert_type(self, value: str, type: Type) -> Any: -# """Convert string value to specified type.""" -# if type == int: -# return int(value) -# elif type == float: -# return float(value) -# elif type == bool: -# return value.lower() in ("true", "1", "yes") -# elif type == list: -# return value.split(",") -# return value -# -# def _is_sensitive(self, key: str) -> bool: -# """Check if key contains sensitive expressions.""" -# key_upper = key.upper() -# return any(expr in key_upper for expr in self.SENSITIVE_EXPRESSIONS) -# -# def _mask_value(self, value: Any) -> str: -# """Mask sensitive values for display.""" -# if value is None: -# return None -# value_str = str(value) -# if len(value_str) <= 4: -# return "****" -# return value_str[:2] + "*" * (len(value_str) - 4) + value_str[-2:] -# -# -# # EOF - -# -------------------------------------------------------------------------------- -# End of Source Code from: /home/ywatanabe/proj/scitex-code/src/scitex/config/_PriorityConfig.py -# -------------------------------------------------------------------------------- diff --git a/tests/scitex/config/test__ScitexConfig.py b/tests/scitex/config/test__ScitexConfig.py deleted file mode 100644 index aa0d13241..000000000 --- a/tests/scitex/config/test__ScitexConfig.py +++ /dev/null @@ -1,682 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-12-09" -# File: ./tests/scitex/config/test__ScitexConfig.py - -"""Tests for ScitexConfig class and related functions.""" - -import os -import tempfile -from pathlib import Path - -import pytest - -from scitex.config import ScitexConfig, get_config, load_yaml - - -class TestLoadYaml: - """Test load_yaml() function.""" - - def test_load_yaml_basic(self): - """Test basic YAML loading.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("key: value\nnumber: 42\n") - temp_path = f.name - - try: - result = load_yaml(Path(temp_path)) - assert result["key"] == "value" - assert result["number"] == 42 - finally: - os.unlink(temp_path) - - def test_load_yaml_nested(self): - """Test loading nested YAML structure.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("parent:\n child: value\n nested:\n deep: content\n") - temp_path = f.name - - try: - result = load_yaml(Path(temp_path)) - assert result["parent"]["child"] == "value" - assert result["parent"]["nested"]["deep"] == "content" - finally: - os.unlink(temp_path) - - def test_load_yaml_env_substitution_with_default(self): - """Test ${VAR:-default} syntax substitution.""" - # Ensure env var is not set - if "TEST_YAML_VAR" in os.environ: - del os.environ["TEST_YAML_VAR"] - - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write('value: ${TEST_YAML_VAR:-"default_value"}\n') - temp_path = f.name - - try: - result = load_yaml(Path(temp_path)) - assert result["value"] == "default_value" - finally: - os.unlink(temp_path) - - def test_load_yaml_env_substitution_with_env(self): - """Test env var substitution when var is set.""" - os.environ["TEST_YAML_VAR2"] = "from_env" - - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write('value: ${TEST_YAML_VAR2:-"default"}\n') - temp_path = f.name - - try: - result = load_yaml(Path(temp_path)) - assert result["value"] == "from_env" - finally: - os.unlink(temp_path) - del os.environ["TEST_YAML_VAR2"] - - def test_load_yaml_boolean_values(self): - """Test boolean value handling in env substitution.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("enabled: true\ndisabled: false\n") - temp_path = f.name - - try: - result = load_yaml(Path(temp_path)) - assert result["enabled"] is True - assert result["disabled"] is False - finally: - os.unlink(temp_path) - - def test_load_yaml_nonexistent_file(self): - """Test error handling for nonexistent file.""" - with pytest.raises(ValueError): - load_yaml(Path("/nonexistent/path/config.yaml")) - - -class TestScitexConfigBasic: - """Basic ScitexConfig functionality tests.""" - - def test_initialization_default(self): - """Test ScitexConfig can be initialized with defaults.""" - config = ScitexConfig() - assert config is not None - - def test_initialization_with_custom_path(self): - """Test initialization with custom config path.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("custom_key: custom_value\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - assert config.get("custom_key") == "custom_value" - assert config.config_path == Path(temp_path) - finally: - os.unlink(temp_path) - - def test_initialization_with_env_prefix(self): - """Test initialization with custom env prefix.""" - config = ScitexConfig(env_prefix="CUSTOM_") - assert config is not None - - def test_repr(self): - """Test string representation.""" - config = ScitexConfig() - repr_str = repr(config) - assert "ScitexConfig" in repr_str - assert "path=" in repr_str - - -class TestScitexConfigFlattenDict: - """Test dictionary flattening functionality.""" - - def test_flatten_simple(self): - """Test flattening simple nested dict.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("parent:\n child: value\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - assert config.get("parent.child") == "value" - finally: - os.unlink(temp_path) - - def test_flatten_deep_nesting(self): - """Test flattening deeply nested dict.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("a:\n b:\n c:\n d: deep_value\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - assert config.get("a.b.c.d") == "deep_value" - finally: - os.unlink(temp_path) - - -class TestScitexConfigGet: - """Test get() method.""" - - def test_get_existing_key(self): - """Test getting existing key.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("test_key: test_value\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - assert config.get("test_key") == "test_value" - finally: - os.unlink(temp_path) - - def test_get_nonexistent_key_returns_none(self): - """Test getting nonexistent key returns None.""" - config = ScitexConfig() - assert config.get("nonexistent_key") is None - - def test_get_with_default(self): - """Test getting nonexistent key with default.""" - config = ScitexConfig() - assert config.get("nonexistent_key", default="fallback") == "fallback" - - -class TestScitexConfigResolve: - """Test resolve() method with priority order.""" - - def test_resolve_direct_value_highest(self): - """Test direct value takes highest priority.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("test_key: from_config\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - result = config.resolve( - "test_key", direct_val="from_direct", default="from_default" - ) - assert result == "from_direct" - finally: - os.unlink(temp_path) - - def test_resolve_config_over_env(self): - """Test config takes priority over env.""" - os.environ["SCITEX_TEST_KEY"] = "from_env" - - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("test_key: from_config\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - result = config.resolve("test_key", default="from_default") - assert result == "from_config" - finally: - os.unlink(temp_path) - del os.environ["SCITEX_TEST_KEY"] - - def test_resolve_env_over_default(self): - """Test env takes priority over default.""" - os.environ["SCITEX_MISSING_KEY"] = "from_env" - - try: - config = ScitexConfig() - result = config.resolve("missing_key", default="from_default") - assert result == "from_env" - finally: - del os.environ["SCITEX_MISSING_KEY"] - - def test_resolve_default_fallback(self): - """Test default is used when nothing else available.""" - config = ScitexConfig() - result = config.resolve("totally_unknown", default="fallback_value") - assert result == "fallback_value" - - def test_resolve_type_conversion_int(self): - """Test type conversion to int.""" - os.environ["SCITEX_INT_VAL"] = "42" - - try: - config = ScitexConfig() - result = config.resolve("int_val", default=0, type=int) - assert result == 42 - assert isinstance(result, int) - finally: - del os.environ["SCITEX_INT_VAL"] - - def test_resolve_type_conversion_bool(self): - """Test type conversion to bool.""" - os.environ["SCITEX_BOOL_VAL"] = "true" - - try: - config = ScitexConfig() - result = config.resolve("bool_val", default=False, type=bool) - assert result is True - finally: - del os.environ["SCITEX_BOOL_VAL"] - - -class TestScitexConfigGetNested: - """Test get_nested() method.""" - - def test_get_nested_simple(self): - """Test getting nested value.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("parent:\n child: nested_value\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - result = config.get_nested("parent", "child") - assert result == "nested_value" - finally: - os.unlink(temp_path) - - def test_get_nested_deep(self): - """Test getting deeply nested value.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("a:\n b:\n c: deep\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - result = config.get_nested("a", "b", "c") - assert result == "deep" - finally: - os.unlink(temp_path) - - def test_get_nested_missing_returns_default(self): - """Test get_nested returns default for missing path.""" - config = ScitexConfig() - result = config.get_nested("missing", "path", default="default_val") - assert result == "default_val" - - -class TestScitexConfigProperties: - """Test ScitexConfig properties.""" - - def test_raw_property(self): - """Test raw property returns original nested dict.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("parent:\n child: value\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - raw = config.raw - assert isinstance(raw, dict) - assert "parent" in raw - assert raw["parent"]["child"] == "value" - finally: - os.unlink(temp_path) - - def test_flat_property(self): - """Test flat property returns flattened dict.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("parent:\n child: value\n") - temp_path = f.name - - try: - config = ScitexConfig(config_path=temp_path) - flat = config.flat - assert isinstance(flat, dict) - assert "parent.child" in flat - assert flat["parent.child"] == "value" - finally: - os.unlink(temp_path) - - -class TestGetConfig: - """Test get_config() convenience function.""" - - def test_get_config_returns_instance(self): - """Test get_config returns ScitexConfig instance.""" - config = get_config() - assert isinstance(config, ScitexConfig) - - def test_get_config_with_path(self): - """Test get_config with custom path.""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: - f.write("custom: value\n") - temp_path = f.name - - try: - config = get_config(config_path=temp_path) - assert config.get("custom") == "value" - finally: - os.unlink(temp_path) - - -if __name__ == "__main__": - import os - - import pytest - - pytest.main([os.path.abspath(__file__)]) - -# -------------------------------------------------------------------------------- -# Start of Source Code from: /home/ywatanabe/proj/scitex-code/src/scitex/config/_ScitexConfig.py -# -------------------------------------------------------------------------------- -# #!/usr/bin/env python3 -# # -*- coding: utf-8 -*- -# # Timestamp: "2025-12-09 (ywatanabe)" -# # File: /home/ywatanabe/proj/scitex-code/src/scitex/config/ScitexConfig.py -# -# """ -# YAML-based configuration for SciTeX with environment variable substitution. -# -# Similar to ScholarConfig, provides: -# - YAML configuration loading -# - Environment variable substitution (${VAR:-default} syntax) -# - Cascade resolution (direct → config → env → default) -# -# Usage: -# from scitex.config import ScitexConfig -# -# # Load default configuration -# config = ScitexConfig() -# -# # Load custom configuration -# config = ScitexConfig(config_path="/path/to/config.yaml") -# -# # Resolve values with precedence -# log_level = config.resolve("logging.level", default="INFO") -# """ -# -# import os -# import re -# from pathlib import Path -# from typing import Any, Dict, Optional, Type, Union -# -# from ._PriorityConfig import PriorityConfig, load_dotenv -# -# -# def load_yaml(path: Path) -> dict: -# """Load YAML file with environment variable substitution. -# -# Supports ${VAR:-default} syntax for environment variable expansion. -# -# Parameters -# ---------- -# path : Path -# Path to YAML file -# -# Returns -# ------- -# dict -# Parsed YAML with environment variables substituted -# """ -# try: -# import yaml -# except ImportError: -# raise ImportError( -# "PyYAML required for YAML config. Install with: pip install pyyaml" -# ) -# -# try: -# with open(path) as f: -# content = f.read() -# -# def env_replacer(match): -# """Replace ${VAR:-default} with environment variable or default.""" -# env_expr = match.group(1) -# if ":-" in env_expr: -# var_name, default_value = env_expr.split(":-", 1) -# value = os.getenv(var_name, default_value.strip('"')) -# else: -# value = os.getenv(env_expr) -# -# # Handle special values -# if value in ["true", "false"]: -# return value -# elif value == "null": -# return "null" -# elif value and not (value.startswith('"') and value.endswith('"')): -# return f'"{value}"' -# else: -# return value or "null" -# -# content = re.sub(r"\$\{([^}]+)\}", env_replacer, content) -# return yaml.safe_load(content) -# except Exception as e: -# raise ValueError(f"Failed to load YAML config from {path}: {e}") -# -# -# class ScitexConfig: -# """YAML-based configuration manager for SciTeX. -# -# Loads configuration from YAML files with environment variable substitution. -# Values can be resolved with priority: direct → config → env → default. -# -# Examples -# -------- -# >>> from scitex.config import ScitexConfig -# >>> config = ScitexConfig() -# >>> config.resolve("logging.level", default="INFO") -# 'INFO' -# >>> config.get("debug.enabled") -# False -# """ -# -# def __init__( -# self, -# config_path: Optional[Union[str, Path]] = None, -# env_prefix: str = "SCITEX_", -# ): -# """Initialize ScitexConfig. -# -# Parameters -# ---------- -# config_path : str or Path, optional -# Path to custom YAML config file. If None, uses default.yaml. -# env_prefix : str -# Prefix for environment variables (default: "SCITEX_") -# """ -# # Load .env file first -# load_dotenv() -# -# # Load YAML configuration -# if config_path and Path(config_path).exists(): -# self._config_data = load_yaml(Path(config_path)) -# self._config_path = Path(config_path) -# else: -# default_path = Path(__file__).parent / "default.yaml" -# if default_path.exists(): -# self._config_data = load_yaml(default_path) -# else: -# self._config_data = {} -# self._config_path = default_path -# -# # Flatten nested config for easy access -# self._flat_config = self._flatten_dict(self._config_data) -# -# # Initialize PriorityConfig for resolution -# self._priority_config = PriorityConfig( -# config_dict=self._flat_config, -# env_prefix=env_prefix, -# ) -# -# def _flatten_dict(self, d: dict, parent_key: str = "", sep: str = ".") -> dict: -# """Flatten nested dictionary with dot notation keys. -# -# Parameters -# ---------- -# d : dict -# Dictionary to flatten -# parent_key : str -# Parent key for recursion -# sep : str -# Separator for nested keys -# -# Returns -# ------- -# dict -# Flattened dictionary -# """ -# items = [] -# for k, v in d.items(): -# new_key = f"{parent_key}{sep}{k}" if parent_key else k -# if isinstance(v, dict): -# items.extend(self._flatten_dict(v, new_key, sep).items()) -# else: -# items.append((new_key, v)) -# return dict(items) -# -# def get(self, key: str, default: Any = None) -> Any: -# """Get value from config directly (no precedence resolution). -# -# Supports dot notation for nested keys. -# -# Parameters -# ---------- -# key : str -# Configuration key (e.g., "logging.level" or "debug.enabled") -# default : Any -# Default value if key not found -# -# Returns -# ------- -# Any -# Configuration value -# """ -# return self._flat_config.get(key, default) -# -# def resolve( -# self, -# key: str, -# direct_val: Any = None, -# default: Any = None, -# type: Type = str, -# ) -> Any: -# """Resolve value with precedence: direct → config → env → default. -# -# This follows the Scholar module's CascadeConfig pattern where -# YAML config takes higher priority than environment variables. -# -# Parameters -# ---------- -# key : str -# Configuration key (e.g., "logging.level") -# direct_val : Any -# Direct value (highest precedence) -# default : Any -# Default value (lowest precedence) -# type : Type -# Type conversion (str, int, float, bool, list) -# -# Returns -# ------- -# Any -# Resolved value -# """ -# # Priority: direct → config → env → default -# # (Same as Scholar's CascadeConfig pattern) -# if direct_val is not None: -# return direct_val -# -# # Config (YAML) takes priority over env -# config_val = self._flat_config.get(key) -# if config_val is not None: -# return config_val -# -# # Then check environment variable -# normalized_key = key.replace(".", "_") -# env_key = f"SCITEX_{normalized_key.upper()}" -# env_val = os.getenv(env_key) -# if env_val: -# return self._convert_type(env_val, type) -# -# return default -# -# def _convert_type(self, value: str, type: Type) -> Any: -# """Convert string value to specified type.""" -# if type == int: -# return int(value) -# elif type == float: -# return float(value) -# elif type == bool: -# return value.lower() in ("true", "1", "yes") -# elif type == list: -# return value.split(",") -# return value -# -# def get_nested(self, *keys: str, default: Any = None) -> Any: -# """Get nested value from original config structure. -# -# Parameters -# ---------- -# *keys : str -# Keys to traverse (e.g., "browser", "screenshots_dir") -# default : Any -# Default value if not found -# -# Returns -# ------- -# Any -# Nested value -# """ -# current = self._config_data -# for key in keys: -# if isinstance(current, dict) and key in current: -# current = current[key] -# else: -# return default -# return current -# -# @property -# def config_path(self) -> Path: -# """Get the path to the loaded config file.""" -# return self._config_path -# -# @property -# def raw(self) -> dict: -# """Get raw configuration data (original nested structure).""" -# return self._config_data -# -# @property -# def flat(self) -> dict: -# """Get flattened configuration data.""" -# return self._flat_config -# -# def print(self) -> None: -# """Print configuration resolution log.""" -# self._priority_config.print_resolutions() -# -# def __repr__(self) -> str: -# return f"ScitexConfig(path='{self._config_path}')" -# -# -# # Module-level convenience functions -# -# _default_config: Optional[ScitexConfig] = None -# -# -# def get_config(config_path: Optional[Union[str, Path]] = None) -> ScitexConfig: -# """Get ScitexConfig instance. -# -# Parameters -# ---------- -# config_path : str or Path, optional -# Path to custom config. If None, returns cached default instance. -# -# Returns -# ------- -# ScitexConfig -# Configuration instance -# """ -# global _default_config -# -# if config_path is not None: -# return ScitexConfig(config_path) -# -# if _default_config is None: -# _default_config = ScitexConfig() -# -# return _default_config -# -# -# # EOF - -# -------------------------------------------------------------------------------- -# End of Source Code from: /home/ywatanabe/proj/scitex-code/src/scitex/config/_ScitexConfig.py -# -------------------------------------------------------------------------------- diff --git a/tests/scitex/config/test__paths.py b/tests/scitex/config/test__paths.py deleted file mode 100644 index d44384e21..000000000 --- a/tests/scitex/config/test__paths.py +++ /dev/null @@ -1,682 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Timestamp: "2025-12-09" -# File: ./tests/scitex/config/test__paths.py - -"""Tests for ScitexPaths class and get_paths() function.""" - -import os -import tempfile -from pathlib import Path - -import pytest - -from scitex.config import ScitexPaths, get_paths - - -class TestScitexPathsBasic: - """Basic ScitexPaths functionality tests.""" - - def test_initialization_default(self): - """Test ScitexPaths can be initialized with defaults.""" - original = os.environ.pop("SCITEX_DIR", None) - try: - paths = ScitexPaths() - assert paths is not None - assert paths.base == Path.home() / ".scitex" - finally: - if original: - os.environ["SCITEX_DIR"] = original - - def test_initialization_with_base_dir(self): - """Test initialization with explicit base_dir.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.base == Path(tmpdir) - - def test_initialization_from_env(self): - """Test initialization uses SCITEX_DIR env var.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["SCITEX_DIR"] = tmpdir - try: - paths = ScitexPaths() - assert paths.base == Path(tmpdir) - finally: - del os.environ["SCITEX_DIR"] - - def test_repr(self): - """Test string representation.""" - paths = ScitexPaths() - repr_str = repr(paths) - assert "ScitexPaths" in repr_str - assert "base=" in repr_str - - -class TestScitexPathsCoreDirectories: - """Test core directory properties.""" - - def test_logs_path(self): - """Test logs directory path.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.logs == Path(tmpdir) / "logs" - - def test_cache_path(self): - """Test cache directory path.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.cache == Path(tmpdir) / "cache" - - def test_capture_path(self): - """Test capture directory path.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.capture == Path(tmpdir) / "capture" - - def test_screenshots_path(self): - """Test screenshots directory path.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.screenshots == Path(tmpdir) / "screenshots" - - def test_rng_path(self): - """Test rng directory path.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.rng == Path(tmpdir) / "rng" - - -class TestScitexPathsBrowserDirectories: - """Test browser-related directory properties.""" - - def test_browser_path(self): - """Test browser base directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.browser == Path(tmpdir) / "browser" - - def test_browser_screenshots_path(self): - """Test browser screenshots directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.browser_screenshots == Path(tmpdir) / "browser" / "screenshots" - - def test_browser_sessions_path(self): - """Test browser sessions directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.browser_sessions == Path(tmpdir) / "browser" / "sessions" - - def test_browser_persistent_path(self): - """Test browser persistent directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.browser_persistent == Path(tmpdir) / "browser" / "persistent" - - def test_test_monitor_path(self): - """Test test_monitor directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.test_monitor == Path(tmpdir) / "test_monitor" - - -class TestScitexPathsCacheDirectories: - """Test cache-related directory properties.""" - - def test_function_cache_path(self): - """Test function cache directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.function_cache == Path(tmpdir) / "cache" / "functions" - - def test_impact_factor_cache_path(self): - """Test impact factor cache directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.impact_factor_cache == Path(tmpdir) / "impact_factor_cache" - - def test_openathens_cache_path(self): - """Test openathens cache directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.openathens_cache == Path(tmpdir) / "openathens_cache" - - -class TestScitexPathsScholarDirectories: - """Test scholar-related directory properties.""" - - def test_scholar_path(self): - """Test scholar base directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.scholar == Path(tmpdir) / "scholar" - - def test_scholar_cache_path(self): - """Test scholar cache directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.scholar_cache == Path(tmpdir) / "scholar" / "cache" - - def test_scholar_library_path(self): - """Test scholar library directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.scholar_library == Path(tmpdir) / "scholar" / "library" - - -class TestScitexPathsWriterDirectories: - """Test writer-related directory properties.""" - - def test_writer_path(self): - """Test writer directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - assert paths.writer == Path(tmpdir) / "writer" - - -class TestScitexPathsResolve: - """Test resolve() method.""" - - def test_resolve_with_direct_value(self): - """Test resolve returns direct value when provided.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - custom_path = "/custom/cache/path" - result = paths.resolve("cache", direct_val=custom_path) - assert result == Path(custom_path) - - def test_resolve_without_direct_value(self): - """Test resolve returns default path when no direct value.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - result = paths.resolve("cache", direct_val=None) - assert result == Path(tmpdir) / "cache" - - def test_resolve_expands_user(self): - """Test resolve expands ~ in path.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - result = paths.resolve("logs", direct_val="~/custom_logs") - assert "~" not in str(result) - assert str(result).startswith(str(Path.home())) - - def test_resolve_various_paths(self): - """Test resolve works for various path names.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - path_names = ["logs", "cache", "browser", "scholar", "writer"] - for name in path_names: - result = paths.resolve(name) - assert isinstance(result, Path) - - def test_resolve_unknown_path_raises(self): - """Test resolve raises ValueError for unknown path.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - with pytest.raises(ValueError) as exc_info: - paths.resolve("unknown_path_name") - assert "Unknown path name" in str(exc_info.value) - - -class TestScitexPathsEnsureDir: - """Test ensure_dir() method.""" - - def test_ensure_dir_creates_directory(self): - """Test ensure_dir creates directory if not exists.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - new_dir = Path(tmpdir) / "new_subdir" - assert not new_dir.exists() - - result = paths.ensure_dir(new_dir) - assert new_dir.exists() - assert result == new_dir - - def test_ensure_dir_existing_directory(self): - """Test ensure_dir works on existing directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - result = paths.ensure_dir(Path(tmpdir)) - assert result == Path(tmpdir) - - def test_ensure_dir_nested(self): - """Test ensure_dir creates nested directories.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - nested_dir = Path(tmpdir) / "a" / "b" / "c" - assert not nested_dir.exists() - - result = paths.ensure_dir(nested_dir) - assert nested_dir.exists() - assert result == nested_dir - - -class TestScitexPathsEnsureAll: - """Test ensure_all() method.""" - - def test_ensure_all_creates_directories(self): - """Test ensure_all creates all standard directories.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - paths.ensure_all() - - # Check that key directories exist - assert paths.logs.exists() - assert paths.cache.exists() - assert paths.browser.exists() - assert paths.scholar.exists() - assert paths.writer.exists() - - -class TestScitexPathsListAll: - """Test list_all() method.""" - - def test_list_all_returns_dict(self): - """Test list_all returns dictionary.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - result = paths.list_all() - assert isinstance(result, dict) - - def test_list_all_contains_expected_keys(self): - """Test list_all contains expected path names.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - result = paths.list_all() - - expected_keys = [ - "base", - "logs", - "cache", - "function_cache", - "capture", - "screenshots", - "rng", - "browser", - "browser_screenshots", - "browser_sessions", - "browser_persistent", - "test_monitor", - "impact_factor_cache", - "openathens_cache", - "scholar", - "scholar_cache", - "scholar_library", - "writer", - ] - for key in expected_keys: - assert key in result, f"Missing key: {key}" - - def test_list_all_values_are_paths(self): - """Test list_all values are Path objects.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = ScitexPaths(base_dir=tmpdir) - result = paths.list_all() - - for key, value in result.items(): - assert isinstance(value, Path), f"{key} is not a Path" - - -class TestGetPaths: - """Test get_paths() convenience function.""" - - def test_get_paths_returns_instance(self): - """Test get_paths returns ScitexPaths instance.""" - paths = get_paths() - assert isinstance(paths, ScitexPaths) - - def test_get_paths_with_base_dir(self): - """Test get_paths with custom base_dir.""" - with tempfile.TemporaryDirectory() as tmpdir: - paths = get_paths(base_dir=tmpdir) - assert paths.base == Path(tmpdir) - - def test_get_paths_caches_default_instance(self): - """Test get_paths returns same instance when no args.""" - # Note: This test may be affected by other tests that call get_paths() - paths1 = get_paths() - paths2 = get_paths() - # Both should be ScitexPaths instances - assert isinstance(paths1, ScitexPaths) - assert isinstance(paths2, ScitexPaths) - - -if __name__ == "__main__": - import os - - import pytest - - pytest.main([os.path.abspath(__file__)]) - -# -------------------------------------------------------------------------------- -# Start of Source Code from: /home/ywatanabe/proj/scitex-code/src/scitex/config/_paths.py -# -------------------------------------------------------------------------------- -# #!/usr/bin/env python3 -# # -*- coding: utf-8 -*- -# # Timestamp: "2025-12-09 (ywatanabe)" -# # File: /home/ywatanabe/proj/scitex-code/src/scitex/config/paths.py -# -# """ -# Centralized path management for SciTeX. -# -# Provides a single source of truth for all directory paths used across -# the SciTeX ecosystem. All paths respect the SCITEX_DIR environment variable. -# -# Usage: -# from scitex.config import ScitexPaths -# -# paths = ScitexPaths() -# -# # Method 1: Direct property access (uses default) -# print(paths.logs) # ~/.scitex/logs -# print(paths.cache) # ~/.scitex/cache -# -# # Method 2: resolve() with direct value override (recommended for modules) -# cache_dir = paths.resolve("cache", direct_val=user_provided_path) -# # If user_provided_path is None -> uses default from SCITEX_DIR -# -# # Thread-safe: pass explicit base_dir -# paths = ScitexPaths(base_dir="/custom/path") -# """ -# -# import os -# from pathlib import Path -# from typing import Optional, Union -# -# from ._PriorityConfig import get_scitex_dir, load_dotenv -# -# -# class ScitexPaths: -# """Centralized path manager for SciTeX directories. -# -# All paths are derived from SCITEX_DIR (default: ~/.scitex). -# Priority: direct_val → SCITEX_DIR env → .env file → default -# -# Directory Structure: -# $SCITEX_DIR/ -# ├── browser/ # Browser profiles and data -# │ ├── screenshots/ # Browser debugging screenshots -# │ ├── sessions/ # Shared browser sessions -# │ └── persistent/ # Persistent browser profiles -# ├── cache/ # General cache -# │ └── functions/ # Function cache (joblib) -# ├── capture/ # Screen captures -# ├── impact_factor_cache/ # Impact factor data cache -# ├── logs/ # Log files -# ├── openathens_cache/ # OpenAthens auth cache -# ├── rng/ # Random number generator state -# ├── scholar/ # Scholar module data -# │ ├── cache/ # Scholar-specific cache -# │ └── library/ # PDF library -# ├── screenshots/ # General screenshots -# ├── test_monitor/ # Test monitoring screenshots -# └── writer/ # Writer module data -# """ -# -# def __init__(self, base_dir: Optional[str] = None): -# """Initialize ScitexPaths. -# -# Parameters -# ---------- -# base_dir : str, optional -# Explicit base directory. If None, uses SCITEX_DIR env var -# or falls back to ~/.scitex. -# """ -# self._base_dir = get_scitex_dir(base_dir) -# -# @property -# def base(self) -> Path: -# """Base SciTeX directory ($SCITEX_DIR or ~/.scitex).""" -# return self._base_dir -# -# # ========== Core directories ========== -# -# @property -# def logs(self) -> Path: -# """Log files directory.""" -# return self._base_dir / "logs" -# -# @property -# def cache(self) -> Path: -# """General cache directory.""" -# return self._base_dir / "cache" -# -# @property -# def capture(self) -> Path: -# """Screen capture directory.""" -# return self._base_dir / "capture" -# -# @property -# def screenshots(self) -> Path: -# """General screenshots directory.""" -# return self._base_dir / "screenshots" -# -# @property -# def rng(self) -> Path: -# """Random number generator state directory.""" -# return self._base_dir / "rng" -# -# # ========== Browser directories ========== -# -# @property -# def browser(self) -> Path: -# """Browser module base directory.""" -# return self._base_dir / "browser" -# -# @property -# def browser_screenshots(self) -> Path: -# """Browser debugging screenshots.""" -# return self.browser / "screenshots" -# -# @property -# def browser_sessions(self) -> Path: -# """Shared browser sessions.""" -# return self.browser / "sessions" -# -# @property -# def browser_persistent(self) -> Path: -# """Persistent browser profiles.""" -# return self.browser / "persistent" -# -# @property -# def test_monitor(self) -> Path: -# """Test monitoring screenshots directory.""" -# return self._base_dir / "test_monitor" -# -# # ========== Cache directories ========== -# -# @property -# def function_cache(self) -> Path: -# """Function cache (joblib memory).""" -# return self.cache / "functions" -# -# @property -# def impact_factor_cache(self) -> Path: -# """Impact factor data cache.""" -# return self._base_dir / "impact_factor_cache" -# -# @property -# def openathens_cache(self) -> Path: -# """OpenAthens authentication cache.""" -# return self._base_dir / "openathens_cache" -# -# # ========== Scholar directories ========== -# -# @property -# def scholar(self) -> Path: -# """Scholar module base directory.""" -# return self._base_dir / "scholar" -# -# @property -# def scholar_cache(self) -> Path: -# """Scholar-specific cache directory.""" -# return self.scholar / "cache" -# -# @property -# def scholar_library(self) -> Path: -# """Scholar PDF library directory.""" -# return self.scholar / "library" -# -# # ========== Writer directories ========== -# -# @property -# def writer(self) -> Path: -# """Writer module directory.""" -# return self._base_dir / "writer" -# -# # ========== Resolve method (recommended for modules) ========== -# -# def resolve( -# self, -# path_name: str, -# direct_val: Optional[Union[str, Path]] = None, -# ) -> Path: -# """Resolve a path with priority: direct_val → default from SCITEX_DIR. -# -# This is the recommended method for modules that accept optional path -# parameters. It follows the same pattern as PriorityConfig.resolve(). -# -# Parameters -# ---------- -# path_name : str -# Name of the path property (e.g., "cache", "logs", "scholar_library") -# direct_val : str or Path, optional -# Direct value (highest precedence). If None, uses default. -# -# Returns -# ------- -# Path -# Resolved path -# -# Examples -# -------- -# >>> paths = ScitexPaths() -# >>> # User didn't provide path -> use default -# >>> cache_dir = paths.resolve("cache", None) -# >>> # User provided custom path -> use it -# >>> cache_dir = paths.resolve("cache", "/custom/cache") -# -# Usage in modules: -# >>> class MyModule: -# ... def __init__(self, cache_dir=None): -# ... self.cache_dir = get_paths().resolve("cache", cache_dir) -# """ -# if direct_val is not None: -# return Path(direct_val).expanduser() -# -# # Get the default path from property -# if hasattr(self, path_name): -# return getattr(self, path_name) -# -# raise ValueError( -# f"Unknown path name: {path_name}. Available: {list(self.list_all().keys())}" -# ) -# -# # ========== Utility methods ========== -# -# def ensure_dir(self, path: Path) -> Path: -# """Ensure directory exists, creating if necessary. -# -# Parameters -# ---------- -# path : Path -# Directory path to ensure exists. -# -# Returns -# ------- -# Path -# The same path, guaranteed to exist. -# """ -# path.mkdir(parents=True, exist_ok=True) -# return path -# -# def ensure_all(self) -> None: -# """Create all standard directories.""" -# dirs = [ -# self.logs, -# self.cache, -# self.function_cache, -# self.capture, -# self.screenshots, -# self.rng, -# self.browser, -# self.browser_screenshots, -# self.browser_sessions, -# self.browser_persistent, -# self.test_monitor, -# self.impact_factor_cache, -# self.openathens_cache, -# self.scholar, -# self.scholar_cache, -# self.scholar_library, -# self.writer, -# ] -# for d in dirs: -# d.mkdir(parents=True, exist_ok=True) -# -# def list_all(self) -> dict: -# """List all configured paths. -# -# Returns -# ------- -# dict -# Dictionary of path names to Path objects. -# """ -# return { -# "base": self.base, -# "logs": self.logs, -# "cache": self.cache, -# "function_cache": self.function_cache, -# "capture": self.capture, -# "screenshots": self.screenshots, -# "rng": self.rng, -# "browser": self.browser, -# "browser_screenshots": self.browser_screenshots, -# "browser_sessions": self.browser_sessions, -# "browser_persistent": self.browser_persistent, -# "test_monitor": self.test_monitor, -# "impact_factor_cache": self.impact_factor_cache, -# "openathens_cache": self.openathens_cache, -# "scholar": self.scholar, -# "scholar_cache": self.scholar_cache, -# "scholar_library": self.scholar_library, -# "writer": self.writer, -# } -# -# def __repr__(self) -> str: -# return f"ScitexPaths(base='{self._base_dir}')" -# -# -# # Singleton instance for convenience (uses default SCITEX_DIR) -# _default_paths: Optional[ScitexPaths] = None -# -# -# def get_paths(base_dir: Optional[str] = None) -> ScitexPaths: -# """Get ScitexPaths instance. -# -# Parameters -# ---------- -# base_dir : str, optional -# Explicit base directory. If None, returns cached default instance. -# -# Returns -# ------- -# ScitexPaths -# Path manager instance. -# """ -# global _default_paths -# -# if base_dir is not None: -# return ScitexPaths(base_dir) -# -# if _default_paths is None: -# _default_paths = ScitexPaths() -# -# return _default_paths -# -# -# # EOF - -# -------------------------------------------------------------------------------- -# End of Source Code from: /home/ywatanabe/proj/scitex-code/src/scitex/config/_paths.py -# --------------------------------------------------------------------------------