diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 65cf15f..0000000 --- a/.coveragerc +++ /dev/null @@ -1,5 +0,0 @@ -[run] -branch = true -omit = - tests/* - _version.py diff --git a/.cursor/rules/bug_report.mdc b/.cursor/rules/bug_report.mdc new file mode 100644 index 0000000..a980ac1 --- /dev/null +++ b/.cursor/rules/bug_report.mdc @@ -0,0 +1,79 @@ +--- +description: Standards for writing clear, reproducible bug reports with severity, evidence, and environment details. +alwaysApply: false +--- + +# Bug Report Standards + +## 1. Core Components + +Every bug report MUST include: + +- **Clear title** — Describes the symptom and its location (e.g., "`Profile.from_file` raises KeyError on valid FITS header"). +- **Reproduction steps** — Numbered, minimal steps (ideally a short script) anyone can follow. +- **Expected vs. actual behavior** — Side-by-side comparison. +- **Environment** — Python version, OS, package version, relevant dependency versions. +- **Severity** — Assessed per the scale below. +- **Evidence** — Tracebacks, log output, or screenshots of incorrect results. + +## 2. Severity Scale + +| Level | Criteria | +|-------|----------| +| **Critical** | Crash, data corruption, silent wrong results, or security vulnerability. | +| **High** | Major feature broken or blocking for many users. | +| **Medium** | Non-critical feature broken or produces degraded results. | +| **Low** | Minor issue, documentation error, or cosmetic problem. | +| **Trivial** | Very minor issue with negligible user impact. | + +## 3. Report Template + +```markdown +# Bug Report: [Concise title] + +## Description +[1-2 sentences: what is broken and its impact.] + +## Environment +- **Python version**: [e.g., 3.12.4] +- **OS**: [e.g., Ubuntu 24.04, macOS 14.5, Windows 11] +- **Package version**: [e.g., rms-psfmodel 0.3.1] +- **Key dependency versions**: [e.g., numpy 2.2.1, scipy 1.14.0] +- **Installation method**: [e.g., pip install rms-psfmodel, editable install] + +## Severity +[Level] — [Brief justification] + +## Steps to Reproduce +1. Install: `pip install rms-psfmodel==0.3.1` +2. Run: + (Example) +3. Observe the error. + +## Expected Behavior +[What should happen.] + +## Actual Behavior +[What actually happens, including the full traceback.] + +## Traceback / Logs +[Paste full traceback or relevant log output here] + +## Additional Notes +[Workarounds, frequency, related issues.] + +## Possible Fix +[Optional: suspected root cause or fix direction.] +``` + +## 4. Writing Guidelines + +1. Be objective and factual — no blame or subjective language. +2. One issue per report. +3. Include exact version numbers and full tracebacks. +4. Keep reproduction steps as short as possible while remaining unambiguous. +5. Verify the bug is reproducible before submitting. + +## 5. Adaptation + +Adjust the template for the project's GitHub Issues and add project-specific fields (e.g., affected data set, mission, instrument). diff --git a/.cursor/rules/dependency_management.mdc b/.cursor/rules/dependency_management.mdc new file mode 100644 index 0000000..3b207f4 --- /dev/null +++ b/.cursor/rules/dependency_management.mdc @@ -0,0 +1,53 @@ +--- +alwaysApply: true +description: Standards for declaring, installing, and maintaining Python project dependencies. +--- + +# Dependency Management + +## 1. Single Source of Truth + +- Declare ALL dependencies in **`pyproject.toml`** under `[project]` (PEP 621). +- Do NOT maintain a separate hand-written `requirements.txt` for runtime dependencies. If a `requirements.txt` is kept, it should contain only `-e .` for backward compatibility. + +## 2. Dependency Groups + +| Group | Section | Install command | Purpose | +|-------|---------|-----------------|---------| +| **Runtime** | `[project].dependencies` | `pip install .` | Required for the package to function. | +| **Dev** | `[project.optional-dependencies].dev` | `pip install -e ".[dev]"` | Testing, linting, type-checking, coverage. | +| **Docs** | `[project.optional-dependencies].docs` | `pip install -e ".[docs]"` | Sphinx and documentation extensions. | + +## 3. Version Constraints + +- Specify **minimum** compatible versions for direct dependencies (e.g., `numpy>=2.2.0`). +- Do NOT pin exact versions (`==`) in library projects; exact pinning belongs in lock files or application deployments. +- For dev/docs dependencies, specify minimum versions to ensure consistent tool behavior across contributors. + +## 4. Adding or Updating Dependencies + +1. Add the dependency to the correct section in `pyproject.toml`. +2. Run `pip install -e ".[dev]"` (or the relevant group) to verify installation. +3. Run the full test suite and type-check to confirm compatibility. +4. Commit the `pyproject.toml` change with a `build:` commit type. + +## 5. Security and Maintenance + +- Run `pip audit` in CI to catch known vulnerabilities. +- Enable automated dependency update tooling (Dependabot, Renovate). +- Review update PRs for breaking changes before merging. +- Periodically remove unused dependencies to reduce attack surface. + +## 6. Tooling Configuration + +Consolidate all tool configuration into `pyproject.toml` where supported: + +| Tool | Section | +|------|---------| +| pytest | `[tool.pytest.ini_options]` | +| coverage | `[tool.coverage.run]`, `[tool.coverage.report]` | +| mypy | `[tool.mypy]`, `[[tool.mypy.overrides]]` | +| ruff | `[tool.ruff]`, `[tool.ruff.lint]` — use explicit `select = [...]` for E, F, W, I, UP, B, SIM, C4, A, N, PT, RUF (see python_best_practices) | +| setuptools_scm | `[tool.setuptools_scm]` | + +Do NOT create separate config files (`.coveragerc`, `.mypy.ini`, `.flake8`, `setup.cfg`) when the tool supports `pyproject.toml`. diff --git a/.cursor/rules/documentation.mdc b/.cursor/rules/documentation.mdc new file mode 100644 index 0000000..53a0a9a --- /dev/null +++ b/.cursor/rules/documentation.mdc @@ -0,0 +1,38 @@ +--- +alwaysApply: true +description: Standards for Python library documentation using Sphinx, ReadTheDocs, and docstrings. +--- + +# Documentation Standards + +## 1. Documentation System + +- Use **Sphinx** for all project documentation, hosted on **ReadTheDocs**. +- After any code or doc change, run `sphinx-build` on the full documentation tree and fix all warnings and errors before delivering. + +## 2. Documentation Standard + +- NEVER use unicode characters such as smart quotes, em-dashes, or arrows in source code files (such as .py) in docstrings or comments. They are acceptable in .rst and .md files. They are also acceptable in source code strings when needed to present special characters to the user such as degree signs or arrows at runtime. + +## 3. Required Documentation + +| Document | Contents | Keep up-to-date? | +|----------|----------|-------------------| +| **Module index** | Every module that exists or is planned (placeholders for future modules). | Yes | +| **Architecture overview** | Class hierarchy, public API surface, and interface contracts. | Yes | +| **Install guide** | `pip install` instructions, supported Python versions, optional dependencies. | Yes | +| **Usage examples** | Common workflows with code snippets and expected output. | Yes | +| **README** | Project summary, PyPI/ReadTheDocs badges, quickstart, and links to full docs. | Yes | + +## 4. Docstrings + +- EVERY class, method, function, and module MUST have a descriptive docstring. +- Follow **PEP 257** using **Google style** with `Parameters:` (not `Args:`). +- Include `Returns:` and `Raises:` only if there are return values or exceptions raised. +- Include behavioral notes sufficient to write a black-box test but do not reference the internal details of the code. +- Wrap docstring text to **90** characters. + +## 5. Change Discipline + +- Any code change MUST update the relevant docstrings and the README if affected. +- NEVER leave stale or contradictory documentation. If a feature is removed, remove its docs. diff --git a/.cursor/rules/environment_best_practices.mdc b/.cursor/rules/environment_best_practices.mdc new file mode 100644 index 0000000..490d8ac --- /dev/null +++ b/.cursor/rules/environment_best_practices.mdc @@ -0,0 +1,40 @@ +--- +description: Git, CI/CD (GitHub Actions), virtual environments, and tooling for development and publishing. +alwaysApply: true +--- + +# Environment Best Practices + +## 1. Source Control + +- ALWAYS use **git** for all source code. +- Commit early and often with meaningful messages (see `git_workflow.mdc`). + +## 2. CI/CD + +- ALWAYS use **GitHub Actions** for continuous integration and publishing. +- Every PR MUST pass lint (`ruff`), type-check (`mypy`), test (`pytest`), Markdown lint (`PyMarkdown`), and documentation build (`sphinx-build`) jobs before merge. +- Pin action versions to a major tag (e.g., `actions/checkout@v6`) to balance stability and security updates. +- Publishing to PyPI is triggered by creating a GitHub Release from a version tag on `main`. + +## 3. Environment Isolation + +- ALWAYS use `python -m venv` (or `virtualenv`) to create an isolated virtual environment. Activate it before any `pip install`. +- NEVER install project dependencies into the system Python. +- Record the supported Python version range in `pyproject.toml` via `requires-python` (e.g., `>=3.10`). +- Test across all supported Python versions in CI using a matrix strategy. + +## 4. Editor Settings (VSCode / Cursor) + +The repository includes `.vscode/settings.json` so all contributors get consistent formatting: + +- **Indent**: 4 spaces (no tabs). +- **Trailing whitespace**: Trimmed on save. +- **Final newline**: Exactly one newline at end of file; excess trailing blank lines removed on save. +- **Line length**: Rulers at 80, 90, and 100 characters (max 100 enforced by Ruff). + +## 5. Secrets and Configuration + +- NEVER commit secrets, tokens, or credentials. Use environment variables or GitHub Secrets. +- Use `.env` files for local development only; ensure `.env` is in `.gitignore`. +- Validate required environment variables at startup with clear error messages. diff --git a/.cursor/rules/git_workflow.mdc b/.cursor/rules/git_workflow.mdc new file mode 100644 index 0000000..8b0ec49 --- /dev/null +++ b/.cursor/rules/git_workflow.mdc @@ -0,0 +1,66 @@ +--- +description: Conventional commits, branching, and PR workflow for source control and code review. +alwaysApply: false +--- + +# Git Workflow + +## 1. Commit Messages + +Use the **Conventional Commits** format: + +``` +[()]: (50 chars max for subject) + +[Optional body — wrap at 72 chars. Explain *what* and *why*, not *how*.] + +[Optional footer — e.g., Closes #123, BREAKING CHANGE: description] +``` + +### Allowed types + +| Type | When to use | +|------|-------------| +| `feat` | New user-facing feature or public API addition. | +| `fix` | Bug fix. | +| `docs` | Documentation-only change. | +| `style` | Formatting, whitespace — no logic change. | +| `refactor` | Code restructure with no behavior change. | +| `perf` | Performance improvement. | +| `test` | Adding or updating tests only. | +| `build` | Build system or dependency change. | +| `ci` | CI/CD configuration change. | +| `chore` | Maintenance tasks that don't fit above. | + +### Rules + +- Subject line MUST be imperative mood ("Add X", not "Added X" or "Adds X"). +- Subject line MUST NOT exceed 50 characters. +- Do NOT end the subject line with a period. +- Separate subject from body with a blank line. +- Body lines MUST NOT exceed 72 characters. +- Reference related issues in the footer. +- Each commit MUST represent one logical change. Do NOT mix unrelated changes. + +## 2. Branching Strategy + +This project uses a simple two-tier branching model: + +- **`main`** — Always releasable. Protected; requires PR review and passing CI. Releases are created by tagging commits on `main`. +- **`feature/`** — New features or enhancements, branched from `main`. +- **`bugfix/`** — Bug fixes, branched from `main`. + +There are NO separate release, hotfix, or develop branches. All work merges back to `main` via pull request. + +## 3. Pull Requests and Merging + +- ALWAYS create a PR for merging into `main`; direct pushes are prohibited. +- PRs MUST pass all CI checks (lint, type-check, tests) before merge. +- Prefer **squash merge** to keep `main` history linear and readable. +- Delete the source branch after merge. + +## 4. Tagging and Releases + +- Tag releases on `main` with semantic versioning: `v..`. +- Let `setuptools_scm` derive the package version from tags automatically. +- Creating a GitHub Release from the tag triggers the PyPI publish workflow. diff --git a/.cursor/rules/how_to.mdc b/.cursor/rules/how_to.mdc new file mode 100644 index 0000000..61b4b77 --- /dev/null +++ b/.cursor/rules/how_to.mdc @@ -0,0 +1,74 @@ +--- +description: Guidelines for writing user-facing how-to documentation with steps, prerequisites, and troubleshooting. +alwaysApply: false +--- + +# How-To Documentation + +## 1. Audience and Tone + +- Write for **Python users** who are familiar with `pip` and the command line but may not know the library's internals. +- Use clear, direct language; define domain-specific terms on first use. +- Focus on what the user needs to do and what they should observe. + +## 2. Best Practices + +1. **Action-oriented title** — e.g., "How To Process a Cassini Image", not "Image Processing Overview". +2. **Brief introduction** — 1-3 sentences explaining purpose and value. +3. **Prerequisites** — Python version, package installation, required data or environment variables. +4. **Numbered steps** — One action per step in logical order. Include code snippets for API usage or CLI commands. +5. **Expected results** — State what the user should see after each significant step AND in a summary section at the end. Keep both consistent. +6. **Troubleshooting** — Common failures (import errors, missing data, version mismatches) and their fixes. +7. **Related features** — Mention next steps or related guides. + +## 3. Document Structure + +```markdown +# How To [Action] + +[1-3 sentence introduction explaining purpose and value.] + +## Prerequisites + +- Python >= 3.10 +- `pip install rms-` +- [Any required data, environment variables, or configuration] + +## Steps + +1. Import the module: + ```python + from package import SomeClass + ``` +2. [Action]. You should see [result]. +3. [Action]. + +## Expected Results + +[Summary of the successful end state — expected output, files created, etc.] + +## Troubleshooting + +- **[Problem]**: [Solution]. + +## Additional Information + +[Tips, performance notes, or links to related guides.] +``` + +## 4. Converting Technical Content + +When turning docstrings, test scripts, or internal notes into How-To guides: + +1. Identify the user-facing feature or workflow. +2. Determine the target audience (library user, CLI user, contributor). +3. Extract user actions from technical steps. +4. Translate internal terminology to user-friendly language. +5. Add code examples, expected output, and troubleshooting. + +## 5. Diagrams and Figures + +- **When to use**: Multi-step workflows, data pipelines, or architecture that is clearer as a visual. +- **Placement**: Inline, immediately after the relevant step or section. +- **Format**: Prefer Mermaid diagrams (e.g., rendered by Sphinx via `sphinxcontrib-mermaid`) for process flows. Use PNG/SVG for screenshots or data visualizations. +- **Naming**: Descriptive filenames (e.g., `backplane-pipeline.svg`). Include alt text for accessibility. diff --git a/.cursor/rules/pull_request.mdc b/.cursor/rules/pull_request.mdc new file mode 100644 index 0000000..66352f9 --- /dev/null +++ b/.cursor/rules/pull_request.mdc @@ -0,0 +1,37 @@ +--- +description: PR structure, purpose, implementation details, testing evidence, and review checklist. +alwaysApply: false +--- + +# Pull Request Standards + +## Scope of review + +Treat the PR as a **single unit of change**. The diff to review is the set of all commits on the current branch back to its **immediate root** (the merge-base with the target branch). Consider the net result of those commits together; do **not** comment on differences that exist only between commits within the PR (e.g. "you fixed X in a later commit" or "commit 2 undid part of commit 1"). Review the final state of the branch against the base. + +## Principles + +1. **Descriptive title** — Summarize the change in an imperative sentence (e.g., "Add caching to profile lookup"). +2. **Purpose first** — Explain *why* the change is needed before *how* it was done. +3. **Scope** — One logical change per PR. Split unrelated changes into separate PRs. +4. **Testing evidence** — Document automated and manual testing performed. +5. **Impact assessment** — Note potential effects on the public API, performance, or dependent packages (Potential Impacts section). +6. **Linked issues** — Reference related GitHub issues using `Closes #NNN` syntax. + +## Template + +The PR template is in `.github/pull_request_template.md` and is applied automatically when a new PR is opened. Fill out every section: + +- **Purpose** — Why the change is needed; link issue with `Closes #NNN`. +- **Changes / Implementation Details** — What changed and how it was implemented; technical approaches chosen and non-obvious design decisions. +- **Type of Change** — Check all that apply (bug fix, feature, breaking, refactor, docs, tests, CI/build). +- **Testing** — Check boxes for unit tests, integration tests, E2E tests run; describe new tests added and manual verification performed. +- **Potential Impacts** — Public API, backward compatibility, performance, downstream; write "None" if straightforward. +- **Checklist** — Style, mypy, docs, CHANGES.md, no debug code, no secrets/credentials, no warnings/errors, performance impact assessed, breaking changes flagged. +- **Notes** — Optional; delete only if not needed (tricky areas, follow-up work). + +## Guidance + +- **Library-specific** — Call out public API changes, deprecations, and migration notes in Potential Impacts. +- **Required reviewers** — Tag maintainers for changes to core modules. +- **Brevity vs. completeness** — Short enough that authors fill everything out; detailed enough for a reviewer with no other context. diff --git a/.cursor/rules/python_best_practices.mdc b/.cursor/rules/python_best_practices.mdc new file mode 100644 index 0000000..6faaa0c --- /dev/null +++ b/.cursor/rules/python_best_practices.mdc @@ -0,0 +1,144 @@ +--- +alwaysApply: true +description: Python coding standards for writing correct, readable, maintainable, and well-tested library code. +--- + +# Python Best Practices + +Apply these rules to ALL new and modified Python code. This project is a Python library published on PyPI and documented on ReadTheDocs. **Minimum Python version: 3.10.** + +## 1. Naming and Style + +- **Maximum line length**: 100 characters. Enforce via Ruff; use editor rulers at 80 and 90 as visual guides. +- **Functions and local variables**: Use `lowercase_with_underscores`. +- **Class names**: Use `TitleCase`. +- **Module-level constants (global variables)**: Use `ALL_CAPS_WITH_UNDERSCORES`. +- **Private names**: Prepend a single underscore for names that are not part of the public API: private attributes (e.g. `_cache`), module-private global variables, and non-public helper functions (e.g. `_parse_header`). Public API names have no leading underscore. +- **Built-in names**: Do NOT use variable or function names that shadow Python built-ins (e.g. `float`, `filter`, `id`, `list`, `type`). If you must use such a name, append a single underscore (e.g. `filter_`, `type_`). +- **Falsy checks**: Be explicit about what you are testing. Do NOT rely on truthiness when the intent could be ambiguous. Prefer: + - `if x is None:` for None checks (not `if not x:` when 0 or [] could occur). + - `if len(seq) == 0:` when you explicitly mean "empty sequence" and other falsy values (0, None) are not possible. + - For dicts: `if key in d:` then use `d[key]`; avoid `d.get(key)` when you need to distinguish "missing" from "present with a falsy value" unless that is the intent. +- **Explicit checks over exceptions**: Prefer explicit membership or presence checks over catching exceptions for control flow. Example: use `if "a" in b: x = b["a"]` (or a clear `get` with a sentinel) rather than `try: x = b["a"]` / `except KeyError: ...` for normal flow. Use exceptions for genuinely exceptional conditions. + +## 2. General Coding + +- NEVER include backwards-compatibility code unless explicitly requested. +- ALWAYS keep modules under 1000 lines. Split larger modules into a package with multiple files. +- ALWAYS write simple, clear code; avoid unnecessary complexity. +- NEVER hardcode magic constants. Define them as module-level constants, in a config module, or via environment variables. +- ALWAYS catch exceptions at the smallest granularity possible. Do NOT wrap large blocks in a single `try`/`except`. +- **Libraries:** Let exceptions propagate unless you are adding context, converting to a library-specific exception, or the exception represents a recoverable internal state. When re-raising, use `raise ... from` to preserve the full traceback for debugging. +- **Applications:** Do not allow uncaught exceptions to reach the top level; use a top-level handler (e.g. in the main loop or HTTP framework) so that failures are logged and the process stays predictable. In both cases, ALWAYS provide full exception information for debugging (e.g. traceback, `raise ... from` when re-raising). +- ALWAYS include meaningful, structured logging (use the `logging` module) that can be disabled or redirected. NEVER use bare `print()` for diagnostic output in library code. +- Avoid mutable global variables. If unavoidable, document purpose and limit scope. Prefer module-level constants (ALL_CAPS) or dependency injection. +- ALWAYS prefer comprehensions (list, dict, set, generator) over manual loops when the result is a new collection and the expression remains readable. +- ALWAYS make the minimal changes necessary. NEVER modify code outside the scope of the current task. +- ALWAYS apply DRY. NEVER duplicate code. Place reusable logic in a utility module. Search existing utilities before writing new functions. Parameterize utility functions to increase generality. +- ALWAYS place imports at the top of the file in three alphabetically-sorted groups separated by a blank line: (1) standard library, (2) third-party, (3) local project. When adding new code or tests, add new imports to the appropriate group at the top; do not place them adjacent to the new code. Inline imports are permitted only to avoid heavy optional dependencies (e.g., GUI libraries). +- Limit new functions to at most three positional parameters. Additional parameters MUST be keyword-only (after `*`). Choose a logical grouping of 0-3 positional parameters before enforcing keyword-only parameters. If there is no logical break within the first 3 parameters, make all parameters (after `self`) keyword-only. +- Use the Receive-an-Object, Return-an-Object (RORO) pattern when a function takes or returns more than a few related values: accept a dataclass or TypedDict and return one, rather than long positional tuples. +- NEVER use `getattr` just as a defensive measure if it is guaranteed that the object has the attribute. ALWAYS reference the attribute directly unless there is a specific reason to know the attribute may not be present. NEVER use getattr to reference the result of an `argparse` namespace. + +## 3. Public API Design + +- Clearly separate public API from internal implementation. Prefix internal functions, classes, and modules with `_`. +- Use `__all__` in `__init__.py` to explicitly declare the public API surface. +- Design for stability: think carefully before adding to the public API, because removing it later is a breaking change. +- Include a `py.typed` marker file so downstream users get type-checking support. + +## 4. Comments + +- ALWAYS write self-documenting code: meaningful names, simple structure, limited nesting. +- NEVER include comments that merely restate the code, reference user requests, or describe modification history. +- ALWAYS include comments that explain the **rationale** behind non-obvious or complex logic. +- ALWAYS preserve existing comments that are still accurate and relevant. Remove or update stale comments. + +## 5. Lint and Type Checking + +### Types + +- ALWAYS annotate all function/method parameters and return values, including `-> None` for functions (and `__init__`) that return nothing. +- Use modern generic syntax (`list[str]`, `dict[str, int]`, `X | None`) for Python 3.10+. + +### Mypy + +- ALWAYS run `mypy` on the full codebase (including tests) after changes. Fix all errors before delivering. +- NEVER add global type exclusions. "Global type exclusions" means: + - Module-level `# type: ignore` without a specific error code. + - `ignore_errors = True` in mypy config. + - Broad `exclude` patterns that skip entire packages. +- In exceptional, unfixable cases use a minimal line-level ignore: `# type: ignore[error-code] # `. + +### Ruff / Linting + +- ALWAYS include `mypy` and `ruff` in the project's dev dependencies (e.g. in `pyproject.toml`). +- ALWAYS run `ruff check` and `ruff format` on the full codebase after changes. Fix all errors. +- Follow PEP 8 for all formatting and naming conventions. +- Use the project's explicit Ruff rule set in `pyproject.toml` (see **Ruff rule categories** below). Do not disable categories that enforce project conventions (e.g. **A** for no builtin shadowing, **N** for naming). + +## 6. Docstrings + +- ALWAYS include a docstring for every module, class, function, and method. +- Follow **PEP 257** using **Google style**. Use `Parameters:` (not `Args:`). +- Include `Returns:`, `Raises:`, and any important behavioral notes. +- NEVER mention backwards compatibility or user requests in docstrings. +- Docstrings MUST be detailed enough to write a black-box test from the docstring alone. +- Wrap docstring text to **90** characters. +- ALWAYS update docstrings when the associated code changes. + +## 7. Testing + +### Test-driven development (TDD) + +- Use **test-driven development**: write tests first, then implement. Red → green → refactor. +- Write tests BEFORE implementation based on stated requirements. If requirements are unclear, ask. +- Run tests to confirm they fail, then implement, re-run, and fix until green. +- After implementation, review tests to strengthen coverage. + +### Framework + +- ALWAYS include type annotations on test functions. +- ALWAYS use `pytest` with `pytest-cov`. +- ALWAYS use `pytest` with `pytest-xdist` and run tests with "-n auto". +- ALWAYS write tests to be independent so that tests can be run in parallel. + +### Coverage and correctness + +- Target at least **90%** line coverage measured over the entire test suite (not a subset). Skipping hard-to-hit exception paths is acceptable. +- NEVER write tests whose sole purpose is exercising code paths without asserting correctness. +- Each `assert` MUST test exactly one condition (no `and` in assertions). +- ALWAYS test for precise expected values, not ranges or existence checks. If the expected value is only known after implementation, update the test accordingly. +- When multiple tests call the same function, use distinct inputs to maximize branch coverage, including edge cases and boundary values. +- If two tests invoke the same code path but assert on different parts of the result, combine them into one test. +- When testing exceptions, ALWAYS use `pytest.raises` as a context manager and assert on the exception **message content**, not just the exception type. + +### Test hygiene + +- If a test mutates a global, use a fixture or `try`/`finally` to restore the original value. +- NEVER write a test that passes by ignoring an incorrect result or swallowing an exception. If the code is wrong, leave the failing test and explain why. +- NEVER include line numbers, verbose rationale, or modification history in test comments. Keep comments to short (1-2 sentence) summaries useful for future maintainers. + +### Debugging + +- NEVER guess at bug causes. Use logic, stack traces, and targeted logging. If stuck in a fix loop, revert and re-approach from first principles. When necessary, ask for help. + +## 8. Ruff Rule Categories (Default Set) + +The template enables these Ruff lint categories in `pyproject.toml`. Use them as the default for new repos; add or ignore specific codes as needed. + +| Code | Source | Purpose | +|------|--------|---------| +| **E**, **W** | pycodestyle | Style and formatting (indent, whitespace, line length). | +| **F** | Pyflakes | Unused imports, undefined names, syntax issues. | +| **I** | isort | Import sorting and grouping. | +| **UP** | pyupgrade | Prefer modern Python (e.g. 3.10+ syntax). | +| **B** | flake8-bugbear | Common bugs (mutable defaults, assert, loop vars). | +| **SIM** | flake8-simplify | Simpler alternatives (e.g. `in` instead of `not x == y`). | +| **C4** | flake8-comprehensions | Prefer comprehensions over loops where clear. | +| **A** | flake8-builtins | No shadowing of builtins (`id`, `filter`, `type`, etc.). | +| **N** | pep8-naming | Class = TitleCase, functions/variables = lowercase_with_underscores. | +| **PT** | flake8-pytest-style | Pytest best practices (fixtures, parametrize, raises). | +| **RUF** | Ruff | Ruff-specific (e.g. unused noqa, deprecated). | + +Optional categories to consider adding later: **D** (pydocstyle) or **DOC** (pydoclint) for docstring linting; **PTH** (pathlib); **RET** (return simplification); **PERF** (perflint). Enable only if the team agrees to fix or ignore the resulting diagnostics. diff --git a/.cursor/rules/security.mdc b/.cursor/rules/security.mdc new file mode 100644 index 0000000..a26947d --- /dev/null +++ b/.cursor/rules/security.mdc @@ -0,0 +1,48 @@ +--- +alwaysApply: true +description: Security best practices for Python library development — secrets, dependencies, and defensive coding. +--- + +# Security Best Practices + +## 1. Secrets Management + +- NEVER commit secrets, API keys, tokens, passwords, or private keys to the repository. +- Store secrets in environment variables or a dedicated secrets manager (e.g., GitHub Secrets, GCP Secret Manager). +- Use `.env` files for local development ONLY. Ensure `.env` is listed in `.gitignore`. +- If a secret is accidentally committed, rotate it immediately — deleting the commit is NOT sufficient. + +## 2. Dependency Security + +- Specify minimum compatible versions for direct dependencies (e.g., `numpy>=2.2.0`) in `pyproject.toml`. +- Run `pip audit` regularly and in CI to detect known vulnerabilities. +- Enable GitHub Dependabot for automated dependency update PRs. +- Review changelogs and diffs before merging dependency updates. + +## 3. Input Validation + +- NEVER trust external input (function arguments from callers, file contents, environment variables, data from remote URLs). +- Validate inputs at the public API boundary of the library. Raise clear `ValueError` or `TypeError` exceptions for invalid arguments. +- For file paths, resolve to absolute paths and verify they remain within the expected directory (prevent path traversal). + +## 4. Safe Defaults + +- NEVER implement custom cryptography. Use standard algorithms via trusted libraries (`cryptography`, `hashlib`). +- When the library downloads or reads external data, verify integrity (checksums, expected schemas) where feasible. +- Do NOT embed credentials, default passwords, or example secrets in source code, tests, or documentation. + +## 5. Logging + +- NEVER log secrets, tokens, passwords, or full stack traces containing sensitive data. +- Sanitize PII (personally identifiable information) before logging. +- Use the `logging` module with appropriate levels so callers can control verbosity. + +## 6. Code Review Security Checklist + +When reviewing PRs, verify: + +- [ ] No secrets or credentials in code, config, or comments. +- [ ] Public API inputs are validated with clear error messages. +- [ ] New dependencies are from reputable sources and have no known CVEs. +- [ ] File operations guard against path traversal. +- [ ] Error messages do not leak internal file paths or sensitive data. diff --git a/.cursor/settings.json b/.cursor/settings.json new file mode 100644 index 0000000..1f96a68 --- /dev/null +++ b/.cursor/settings.json @@ -0,0 +1,9 @@ +{ + "editor.tabSize": 4, + "editor.insertSpaces": true, + "editor.detectIndentation": false, + "files.trimTrailingWhitespace": true, + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "editor.rulers": [80, 90, 100] +} diff --git a/.cursor/skills/critique-test-suite/SKILL.md b/.cursor/skills/critique-test-suite/SKILL.md new file mode 100644 index 0000000..0afdd93 --- /dev/null +++ b/.cursor/skills/critique-test-suite/SKILL.md @@ -0,0 +1,287 @@ +--- +name: critique-test-suite +description: Analyze the test suite for consistency, completeness, redundancy, parallel safety, and assertion quality. Produces a comprehensive report (no test modifications). Use when the user asks to critique tests, review the test suite, or generate a report for fixing tests. +--- + +# Critique Test Suite + +Analyze all tests in the project and produce a **report only**—do not modify any test files. The report is intended to be used as a prompt for an AI agent (or developer) to fix the tests later. + +## Scope + +- **Tests:** All files under `tests/` (pytest). +- **Fixtures:** Include `conftest.py` and any shared fixtures in the analysis. +- **Package:** Assume a standard Python package layout (e.g. `src/` with the package under test; tests in `tests/`). + +## Checklist for Analysis + +Apply these criteria when reviewing each test file and each test case. + +### 1. Return values and assertions + +- **Explicit values:** Assert exact expected values where known (e.g. `assert result == expected`, not just `assert result` or `assert result is not None`). +- **Dynamic values:** When the value is dynamic (IDs, timestamps), assert **type** and **format** (e.g. regex, enum membership) rather than only existence. +- **Collections:** Prefer asserting **exact length** (e.g. `assert len(items) == 2`) when the expected count is known; avoid only `assert len(items) >= 1` unless the count truly varies. +- **Shape:** For dicts or structured return values, assert expected keys or shape where the contract is defined (e.g. no extra keys, required keys present). + +### 2. Success and failure conditions + +- **Success paths:** Every behavior under test should have at least one test that asserts the happy-path result (return value or side effect). +- **Failure paths:** For each operation, consider: invalid arguments (TypeError, ValueError), missing data (KeyError, custom exceptions), domain-specific errors. Note missing failure cases in the report. +- **Edge cases:** Empty collections, None/optional values, boundary values (min/max length, zero, negative where invalid). + +### 3. Consistency + +- **Naming:** Test names should follow a consistent style (e.g. `test___` or `test__returns__when_`). +- **Structure:** Similar units (e.g. same module or class) should have similar test structure (success, validation error, edge case). +- **Fixtures:** Same concepts (e.g. "sample data", "minimal config") should be reused via fixtures; avoid duplicating setup logic. +- **Assertion style:** Prefer one logical assertion per concept; group related assertions consistently across files. + +### 4. Completeness + +- **Coverage map:** For each module or public API area, list which behaviors are tested and which are missing. +- **Parameters:** Arguments that affect behavior should have at least one test (valid and, where relevant, invalid). +- **Documentation:** If the project has a spec or docstrings that define behavior, note gaps between documented behavior and tests. + +### 5. Redundancy + +- **Duplicate coverage:** Identify tests that assert the same behavior in the same way; suggest merging or removing duplicates. +- **Overlap:** Note tests that are subsets of others (e.g. one test checks return type only, another checks return type and value for the same case). +- **Fixtures:** Flag repeated inline setup that could be a shared fixture. + +### 6. Parallel execution + +- **Isolation:** Tests must not depend on global state, shared mutable objects, or execution order. Note any use of module/class-level mutable state or singletons. +- **Resources:** Note any shared files, caches, or external services that could cause flakiness under `pytest -n auto`. +- **Database:** If the project uses a DB in tests, per-worker schema or transactional rollback should be used; note tests that commit data that could leak to other workers. + +### 7. Mocking and dependency isolation + +- **External services:** HTTP calls, file I/O to shared paths, or third-party APIs should be mocked in unit tests; note tests that make real external calls. +- **Time-sensitive logic:** Tests involving `datetime.now()`, `time.time()`, or expiration should freeze time (e.g. `freezegun`, `time_machine`) for determinism. +- **Pure logic:** Unit tests for pure business logic should not require a database or network; note functions that could be unit-tested but only have integration tests. +- **Environment variables:** Tests should not depend on real `.env` or env values; note tests that would fail with different env configs. +- **Patch target location:** `mock.patch` must target where the name is *looked up*, not where it is *defined* (e.g. `mock.patch("module_under_test.requests.get")`, not `mock.patch("requests.get")`). Note patches that target the wrong module. +- **`monkeypatch` vs `mock.patch` usage:** Prefer a consistent default per test file, but allow either tool where it is the clearer fit (e.g., env/process state with `monkeypatch`, call assertions/spies with `mock.patch`). Flag only inconsistent usage that reduces clarity. +- **Patch scope:** Decorator-level `mock.patch` applies for the whole test; context-manager form limits scope. Note patches broader than needed or too narrow (missing setup/teardown). +- **Mock return values:** Mocks that return `MagicMock()` by default can hide type bugs (a function expected to return `str` returns a `MagicMock` and downstream code doesn't fail because it's truthy). Note mocks in critical paths without explicit `return_value` or `side_effect`. + +### 8. Security and input validation + +- **Input validation:** Functions that accept user or external input should have tests for invalid input (wrong type, out-of-range, malicious patterns). Note missing validation tests. +- **Sensitive data:** Verify that tests do not log or assert on real secrets; test data should not contain real credentials. Note any exposure risk. +- **Path traversal / injection:** If the code handles paths or structured input, note missing tests for path traversal or injection where relevant. + +### 9. Parameterization and data-driven tests + +- **`@pytest.mark.parametrize`:** Similar test cases (e.g. multiple invalid inputs) should be parameterized instead of copy-pasted; note repeated test bodies that differ only in input. +- **Boundary values:** For numeric or length-sensitive fields, test min, max, and off-by-one values; note missing boundary tests. +- **Factories:** Test data should be created via factories or fixtures where it reduces duplication or collision risk; note tests with hard-coded values that could be shared. + +### 10. Async (if the project uses async) + +- **Async fixtures:** Fixtures returning async resources should use `@pytest_asyncio.fixture`; note misuse or sync fixtures in async test files. +- **Timeouts:** Long-running async operations should have explicit timeouts in tests; note tests that could hang. +- **Isolation:** For code that modifies shared state, note whether concurrent access is tested if relevant. + +### 11. Output and contract + +- **Return shape:** Where the public API defines a return type or shape (e.g. dataclass, TypedDict), tests should assert that shape or key fields; note tests that only spot-check. +- **Exceptions:** Verify that documented or expected exceptions are raised with correct types; note tests that only check "no exception" without testing failure paths. +- **Exception message contents:** When testing exceptions that have defined messages (e.g. validation errors), tests must assert on the **contents** of the exception message, not only that the exception was raised. Use `pytest.raises(SomeError) as exc_info` and assert on `str(exc_info.value)`. Note tests that only check exception type. + +### 12. Error handling and messages + +- **Error specificity:** Different error conditions should be distinguishable (e.g. by exception type or message); note tests that only check "an exception was raised" without verifying which one. +- **Exception propagation:** For unit tests of code that raises, verify that exceptions are raised with correct types and messages; note missing exception tests. +- **Message assertion:** When exceptions have defined messages, assert on message content (e.g. `pytest.raises(...) as exc_info`, then `assert "expected substring" in str(exc_info.value)`). + +### 13. State and workflow + +- **State transitions:** For code with status or lifecycle (e.g. state machine, pipeline stage), test valid and invalid transitions; note missing transition tests. +- **Idempotency:** Operations that should be idempotent should be tested for repeated calls; note missing idempotency tests. +- **Side effects:** Actions that trigger side effects (e.g. callbacks, file writes) should verify those occur; note untested side effects. + +### 14. Test data and fixtures + +- **Realistic data:** Test data should be realistic enough to catch edge cases (e.g. Unicode, long strings); note tests using only trivial data. +- **Cleanup:** Tests that create external resources (files, temp dirs) must clean up; note tests that leak state. +- **Fixture scope:** Fixtures should use the narrowest appropriate scope (`function` > `class` > `module` > `session`); note overly broad scopes that could cause isolation issues. +- **Conftest hierarchy:** Fixtures should live in the `conftest.py` closest to where they're used — a root `conftest.py` with dozens of unrelated fixtures is a smell. Note fixtures that belong in a subdirectory conftest or in the test file itself. +- **Autouse fixtures:** `@pytest.fixture(autouse=True)` hides dependencies — a test silently depends on setup it doesn't request. Note autouse fixtures and whether they're justified (e.g. DB cleanup is reasonable; injecting test data for every test is not). +- **Fixture visibility:** Note fixtures defined in a deep conftest but used only in one test file (move to the file) and fixtures duplicated across files that should be in conftest. +- **Fixture depth:** Deep fixture-depends-on-fixture chains (3+ levels) are hard to trace and debug; note such chains. + +### 15. Flakiness indicators + +- **Time-based assertions:** Tests asserting on wall-clock time are flaky; note and suggest freezing time. +- **Order dependence:** Tests that pass only when run in a specific order indicate shared state; note such patterns. +- **External dependencies:** Tests depending on network, file system state, or external services are flaky in CI; note and suggest mocking. +- **Random data:** Tests using `random` or `uuid4` for assertions without seeding are non-deterministic; note and suggest seeding or fixed values. + +### 16. Regression and documentation + +- **Bug reference:** Tests written to reproduce bugs should reference the issue in docstring or comment; note regression tests that lack context. +- **Spec alignment:** Tests should map to documented behavior (docstrings, specs); note tests for undocumented behavior or missing tests for documented behavior. +- **Deprecation warnings:** If deprecated APIs exist, tests should verify warnings are emitted using `pytest.warns(DeprecationWarning)` (or `FutureWarning`). Note deprecated APIs that lack warning-emission tests. +- **`filterwarnings` configuration:** Check whether `filterwarnings = ["error"]` (or equivalent) is set in pytest config to surface unexpected warnings as test failures. Without it, new warnings go unnoticed. Note if missing. +- **Warning noise:** Note unexpected warnings emitted during the test run that are silently swallowed. A clean run should produce no unhandled warnings. + +### 17. Other good practices + +- **Independence:** Each test should be runnable in isolation; document any hidden dependencies (e.g. "must run after X"). +- **Clarity:** Test names and docstrings should describe intent; report tests whose purpose is unclear. +- **Speed:** Note slow tests (e.g. many I/O calls, sleeps) that could be sped up with mocks or smaller scope. +- **Assertion messages:** Use clear messages where it helps (e.g. `assert x == y, f"Expected {x} to equal {y}"`); note assertions that would be hard to debug on failure. +- **Single responsibility:** Each test should verify one behavior; note tests that assert unrelated things or have multiple "acts". +- **Arrange-Act-Assert:** Tests should follow AAA pattern; note tests with interleaved setup and assertions. +- **Keep test logic minimal:** Avoid complex control flow in tests. Simple loops and branching are acceptable when they improve clarity (e.g., table-driven checks); flag only logic that obscures intent or masks failures. + +### 18. Code coverage + +- **Target:** At least 90% line coverage for the package under test (or the project's stated target). +- **Scope:** Coverage should cover almost all non-exception lines; exception branches may be excluded from the percentage but should still be tested where they represent distinct behavior. +- **Measurement:** Coverage must be checked by running the **entire test suite** (e.g. `pytest tests/ --cov=src --cov-report=term-missing`), not a subset. Note if 90% is met and whether measurement is full-suite. +- **Report:** List modules or packages below the target or with significant uncovered non-exception lines. + +### 19. Pytest markers and registration + +- **Marker registration:** All custom marks must be registered in `pyproject.toml` under `[tool.pytest.ini_options] markers = [...]`. Unregistered marks are silently ignored unless `--strict-markers` is enabled — a typo like `@pytest.mark.solw` means the mark has no effect. Note unregistered marks. +- **`--strict-markers`:** Check whether it is enabled in pytest config. If not, note that marker typos will go undetected. +- **`xfail` audit:** `@pytest.mark.xfail` should document a known issue with a linked ticket and use `strict=True` where the failure is expected to persist. Note `xfail` tests that now pass (missing `strict=True`) or that lack an issue reference — they may be masking real bugs. +- **`skip`/`skipif` audit:** Check whether skip conditions are still valid. Old `skipif` for Python 3.8 when the project requires `>=3.10` is dead code. Note stale skips. +- **Categorization marks:** Note whether `@pytest.mark.slow` or `@pytest.mark.integration` marks exist so developers can run fast subsets (`pytest -m "not slow"`). If all tests run at the same speed this is not needed, but if some tests are noticeably slower, suggest marking them. + +### 20. Test boundary (public API vs internals) + +- **Importing private names:** Tests that `from src.package._internal import _helper` are tightly coupled to implementation details and break on refactors. Note tests importing `_`-prefixed modules, classes, or functions. +- **Testing through the public API:** Prefer testing via the public surface (`__all__`, documented functions). Tests that only exercise internals give false confidence — the public API could be broken while internal tests pass. Note modules where only internals are tested. +- **Over-mocking:** Tests that mock so many internals that they're testing the mock setup, not the code. Note tests where more than half the function's collaborators are mocked, especially if the function under test is small. + +### 21. Logging assertions + +- **`caplog` usage:** Functions that log errors, warnings, or important info should have tests verifying log output via `caplog`. Note functions with `logger.error()` or `logger.warning()` calls that have no corresponding `caplog` assertion in tests. +- **Log level verification:** When testing logged output, verify the message is at the expected level (e.g. an error condition logs at `ERROR`, not `INFO`). Note tests that check message text but not level. +- **Absence of logging:** Some code paths should explicitly *not* produce warnings or errors during normal operation. Note where this is important but untested. + +### 22. Pytest configuration + +- **Config file discovery:** Pytest loads **at most one** config file for a given root directory. It scans in this **fixed precedence order** and uses the **first matching** file (the first that exists and qualifies); options are **not** merged from multiple files. Order: (1) `pytest.toml`, (2) `.pytest.toml`, (3) `pytest.ini`, (4) `.pytest.ini`, (5) `pyproject.toml` (only if it contains `[tool.pytest]` or `[tool.pytest.ini_options]`), (6) `tox.ini` (only if it contains a `[pytest]` section), (7) `setup.cfg` (only if it contains a `[tool:pytest]` section). +- **`testpaths` and discovery options:** In the active config file, check that `testpaths` is set (without it, pytest collects from the entire repo — slow and may find stray test files). Check `python_files`, `python_classes`, `python_functions` if non-standard naming is used. +- **Plugin inventory:** Note installed pytest plugins that are unused (slow startup) and useful plugins that are missing (e.g. `pytest-xdist` for parallelism, `pytest-randomly` for order-independence testing). +- **`addopts`:** Are default options sensible? Suggest `--strict-markers`, `--strict-config`, `-q`, and `-W error::DeprecationWarning` if not present. +- **Ignored duplicate configs:** If more than one qualifying file exists in the same directory, lower-precedence files are **silently ignored** (e.g. `pyproject.toml` pytest settings have no effect when `pytest.ini` or `pytest.toml` wins). Note redundant or dead config that maintainers may think is active. + +### 23. Snapshot and golden-file testing + +- **Complex output:** Functions that return large dicts, dataclass trees, serialized formats (JSON, YAML), or rendered text are hard to assert inline. Note where snapshot testing (e.g. `syrupy`) would be more maintainable than dozens of field-level assertions. +- **Golden file management:** If snapshot or golden files exist, check: are they committed to the repo? Is there a CI step to detect stale snapshots? Note missing update procedures. +- **Over-use:** Snapshot tests can become "approve and forget." Note if snapshots are used extensively but there is no evidence of intentional review on change. + +## Output: Report Format + +Produce a single markdown report with the following structure. Do **not** edit any test files; only write the report. + +```markdown +# Test Suite Critique Report + +**Generated:** [date] +**Scope:** tests/ (and conftest.py) + +## Executive summary +- Overall assessment (strengths, main gaps). +- **Coverage:** At least 90% and almost all non-exception lines; measured by running the **entire test suite**. Note if met and whether measurement is full-suite. +- **Exception messages:** When testing exceptions with defined messages, tests must assert on message contents (e.g. `pytest.raises(...) as exc_info`, `str(exc_info.value)`), not only that the exception was raised. +- High-priority fixes vs. nice-to-have. + +## 1. Return values and assertions +[Existence-only asserts; exact length vs >=; shape checks.] + +## 2. Success and failure conditions +[Per module/area: what's tested, what's missing (validation, exceptions, edge cases).] + +## 3. Consistency +[Naming, structure, fixture usage, assertion style.] + +## 4. Completeness +[Coverage map; spec/docstring gaps.] + +## 5. Redundancy +[Duplicate or overlapping tests with file:test references.] + +## 6. Parallel execution +[Global state, order dependence, shared resources.] + +## 7. Mocking and dependency isolation +[Real external calls, time-sensitive tests, env dependencies, patch targets, mock return values.] + +## 8. Security and input validation +[Missing validation tests, sensitive data, injection/traversal.] + +## 9. Parameterization +[Tests that could be parameterized; missing boundary tests.] + +## 10. Async (if applicable) +[Async fixture issues, timeouts, isolation.] + +## 11. Output and contract +[Return shape, exception types, message assertions.] + +## 12. Error handling +[Error specificity; exception message content assertions.] + +## 13. State and workflow +[Transitions, idempotency, side effects.] + +## 14. Test data and fixtures +[Realistic data, cleanup, fixture scope, conftest hierarchy, autouse, fixture depth.] + +## 15. Flakiness indicators +[Time, order, external deps, randomness.] + +## 16. Regression and documentation +[Bug references, spec alignment, deprecation warnings, filterwarnings config.] + +## 17. Other +[Clarity, speed, assertion messages, AAA, logic in tests.] + +## 18. Code coverage +[Target 90%; full-suite measurement; modules below target.] + +## 19. Pytest markers +[Unregistered marks, strict-markers, xfail audit, stale skips, categorization.] + +## 20. Test boundary +[Private imports, public API coverage, over-mocking.] + +## 21. Logging assertions +[caplog usage, log level checks, absence-of-logging tests.] + +## 22. Pytest configuration +[Config discovery precedence and first file only; testpaths; plugins; addopts; ignored duplicate configs.] + +## 23. Snapshot and golden-file testing +[Complex output candidates, golden file management, over-use.] + +## Prompt for an AI agent to fix tests + +[Self-contained prompt for an AI to apply the fixes. Include: +- Report sections as context. +- **Coverage:** Run coverage using the entire test suite; ensure at least 90% and cover almost all non-exception lines. +- **Exception messages:** When testing exceptions with defined messages, assert on message contents (e.g. `pytest.raises(...) as exc_info`, `str(exc_info.value)`). +- Instruction to fix tests according to the report without changing production code. +- Instruction to preserve existing passing behavior and only add/change assertions and test structure.] +``` + +## Execution steps + +1. **Gather:** List all test files under `tests/` and any `conftest.py`. Read pytest config from the **first matching** file in this **fixed precedence** order (only that file is applied; other qualifying files in the same directory are ignored): `pytest.toml`, `.pytest.toml`, `pytest.ini`, `.pytest.ini`, `pyproject.toml` (only if it contains `[tool.pytest]` or `[tool.pytest.ini_options]`), `tox.ini` (only if it contains `[pytest]`), `setup.cfg` (only if it contains `[tool:pytest]`). Use that file for markers and `addopts`. For plugins, check declared entry points in dependencies, the PYTEST_PLUGINS environment variable, and any pytest_plugins references in conftest.py files. +2. **Read:** For each file, read test names, docstrings, assertion patterns (focus on `assert`, return checks, fixtures, marks, `mock.patch`, `monkeypatch`, `caplog`, `pytest.warns`). +3. **Classify:** For each criterion (1–23), note specific file names, test names, and line references or short quotes. +4. **Write:** Produce the full report in the format above, including the "Prompt for an AI agent" section at the end. +5. **Do not:** Change, add, or remove any line in any test or conftest file. + +## When to use this skill + +- User asks to "critique the test suite", "review the tests", "analyze tests", or "generate a report to fix tests". +- User wants a "prompt for an AI to fix the tests" based on the current test suite. diff --git a/.cursor/skills/python-codebase-analysis/SKILL.md b/.cursor/skills/python-codebase-analysis/SKILL.md new file mode 100644 index 0000000..a4235c9 --- /dev/null +++ b/.cursor/skills/python-codebase-analysis/SKILL.md @@ -0,0 +1,189 @@ +--- +name: python-codebase-analysis +description: Analyzes a Python codebase and produces high-level recommendations for restructuring, refactoring, and alignment with modern best practices. Use when the user asks to analyze the codebase, audit code quality, suggest improvements, refactoring ideas, or assess maintainability, performance, testability, or technical debt. +--- + +# Python Codebase Analysis + +Produce a structured analysis and recommendations report. Do not implement changes unless the user asks; focus on **high-level findings and actionable suggestions**. + +## Workflow + +1. **Scope**: Confirm or infer scope (whole repo, a package, or a path). Default to the project root. +2. **Explore**: Scan layout (directories, key config files), entry points, tests, and docs. Use list_dir, grep, and semantic search; avoid reading every file. +3. **Assess**: Evaluate each dimension below. Note evidence (file paths, patterns) and severity (critical / high / medium / low). +4. **Synthesize**: Write the report using the output template. Prioritize by impact and effort; group related items. + +## Dimensions to Assess + +### 1. Structure and layout + +- Package/module boundaries: clear separation, no circular imports, src-layout vs flat. +- File and module size: modules > ~500–1000 lines; single-file "god" modules. +- Naming: consistent with language norms (e.g. Python: lowercase_with_underscores, TitleCase for classes). +- Dead or orphaned code: unused modules, commented-out blocks, unreachable branches. +- Duplication: copy-paste, similar logic that could be shared (DRY). + +**Evidence**: Paths, line counts, import graphs if available. + +### 2. Best practices alignment + +Compare against project rules when present (e.g. `.cursor/rules/python_best_practices.mdc`). Check: + +- Naming (builtin shadowing, private `_` prefix, ALL_CAPS for module-level constants). +- Explicit checks vs exception-based control flow; falsy checks (`is None`, `len(x) == 0`). +- Imports: top of file, grouped and sorted; no wildcard imports. +- Function shape: ≤3 positional args, keyword-only for the rest. Return an object rather than a tuple of many results. +- Constants: no magic numbers/strings; config or env for tunables. +- Error handling: narrow try/except; no bare except; logging over print in libraries. +- Public API: clear `__all__`, `py.typed` for typed packages, separation of public vs `_private`. +- Library hygiene: use `logging.getLogger(__name__)`, never configure the root logger, set `NullHandler` in top-level `__init__.py`. No `print()` in library code (only in explicit CLI entry points). No `sys.exit()` in library code; raise exceptions instead. +- Error message quality: exceptions include enough context to diagnose (`ValueError("x must be positive, got -3")` not `ValueError("bad value")`). Custom base exception class (e.g. `class psfmodelError(Exception)`) so callers can catch library errors specifically. Appropriate use of `warnings.warn()` with `DeprecationWarning`/`FutureWarning` for planned changes. +- Encoding and I/O: explicit `encoding='utf-8'` on `open()` calls (platform default varies). Consistent use of `pathlib.Path` over `os.path` string manipulation. Accept `str | Path` in public API. Context managers for all files and connections. + +**Evidence**: Rule name or quote, example file:line or pattern. Grep for `print(`, `sys.exit`, `sys.stdout`, `open(` without `encoding=`, `logging.basicConfig` in non-CLI code. + +### 3. Types and static checks + +- Type coverage: annotations on public API and new code; use of `Any`, untyped defs. +- Mypy (or equivalent): strictness, per-file overrides, global ignores. +- Linting: Ruff/Flake8/Pylint enabled; which rules; consistent formatting (e.g. Ruff format / Black). +- Docstrings: presence, format (e.g. Google), consistency with signatures and behavior. + +**Evidence**: Config files, sample of annotated vs unannotated code. + +### 4. Testing + +- Structure: tests colocated or in `tests/`; mirror of source layout; naming (`test_*`). +- Coverage: approximate line/branch coverage; untested modules or critical paths. +- Quality: one assertion per test; no tests that ignore results or swallow exceptions; use of parametrize/fixtures; independence and parallelizability. +- Gaps: missing edge cases, error paths, or integration tests for key flows. + +**Evidence**: `pytest.ini`/`pyproject.toml`, coverage report or commands, example test file. + +### 5. Performance and resource use + +- Hot paths: unnecessary work in loops, repeated allocations, O(n²) or worse algorithms where it matters. +- I/O: blocking calls in async code; missing timeouts; large files read into memory. +- Caching: repeated computation or lookups that could be cached or memoized. +- Dependencies: heavy or unused libraries; optional features that could be lazy-loaded. +- Concurrency and thread safety: module-level mutable state (dicts, lists, caches) without locking. Lazy-initialized globals that are not thread-safe. Whether the library documents its thread-safety guarantees (or lack thereof). Reentrancy issues in functions that modify shared state. + +**Evidence**: File:line or function name; no profiling required unless user provides data. Grep for module-level mutable assignments (e.g. `_cache = {}`, `_registry = []`). + +### 6. Maintainability and extensibility + +- Coupling: tight dependencies between modules; hard-coded dependencies instead of injection. +- Cohesion: modules/classes with a single responsibility; clear boundaries. +- Extensibility: adding features without editing many files; use of hooks, plugins, or strategy-style patterns where appropriate. +- Documentation quality: README accuracy (do install/usage instructions match the current API?). Sphinx build health (does it pass with `-W`?). Public API coverage in docs (every public class/function in `__all__` should appear in Sphinx `automodule`/`autofunction`). Broken cross-references or missing doc pages for public modules. + +**Evidence**: Import structure, example functions or classes. Compare `__all__` exports against Sphinx `.. automodule` directives. Check README examples against actual API. + +### 7. Security and robustness + +- Input validation: external input (CLI, files, env) validated at boundaries; no trust of caller data in libraries. +- Secrets: no credentials in code or logs; use of env or secret managers. +- Dependency hygiene: known vulnerable deps (`pip audit` / Dependabot); pinned or minimum versions. +- Paths and execution: path traversal risks; subprocess/shell usage and injection. + +**Evidence**: Grep for patterns (e.g. `password`, `secret`, `eval`, `subprocess` with `shell=True`). + +### 8. Dependencies and tooling + +- Declared deps: single source of truth (e.g. `pyproject.toml`); optional groups (dev, docs). +- Version policy: minimum versions, avoidance of global pins for libraries. +- Tooling: consistent formatter and linter; CI runs checks and tests; no obsolete or conflicting config (e.g. both `setup.py` and `pyproject.toml` without clear roles). +- CI/CD pipeline consistency: Python version matrix in CI matches `requires-python` in `pyproject.toml`. CI runs the same checks as the local `run-all-checks.sh` (ruff, mypy, pytest, Sphinx, PyMarkdown). Publishing workflow present and correctly triggered (tag-based, Trusted Publishers or token auth). +- Configuration consistency: tool configs in `pyproject.toml` (ruff, mypy, pytest) are consistent with each other and with project rules. No stale config sections for tools no longer used (e.g. `[tool.black]` or `[tool.isort]` when ruff handles both). Line-length and target-version settings agree across tools. + +**Evidence**: `pyproject.toml`, `requirements*.txt`, CI config (`.github/workflows/`). Compare `requires-python` against CI matrix. Grep for stale `[tool.*]` sections. + +### 9. Technical debt and risk + +- Deprecations: use of deprecated APIs (stdlib, third-party); planned removals. +- Complexity: deeply nested conditionals; long functions; high cyclomatic complexity in critical code. +- TODOs/FIXMEs: concentration in one area; unlinked or vague items. +- Compatibility: Python version support; platform assumptions (e.g. paths, encoding). + +**Evidence**: Grep for deprecation warnings, TODO/FIXME; example complex function. + +### 10. Packaging and distribution + +- Metadata completeness: `pyproject.toml` has classifiers, project URLs (`Homepage`, `Repository`, `Documentation`), license expression (PEP 639), `description`, `requires-python`. +- Version single source of truth: one canonical version (`importlib.metadata`, `setuptools-scm`, or `_version.py`); `__version__` in the package is consistent. +- Build system: correct `[build-system]` table; package installs cleanly with `pip install -e .`; no stale `setup.py`/`setup.cfg` alongside a complete `pyproject.toml`. +- Package contents: `__init__.py` exports match the public API. `py.typed` marker present for typed packages. Correct `[tool.setuptools.packages.find]` or equivalent so subpackages and data files are included. +- Distribution hygiene: no build artifacts, test data, or large files accidentally included in the sdist/wheel. `.gitignore` and/or `MANIFEST.in` configured appropriately. + +**Evidence**: `pyproject.toml` metadata fields, `pip install -e .` output, `py.typed` presence, `find_packages` config. Compare `__init__.py` exports against `__all__`. + +## Output template + +Use this structure for the report. Omit sections with no findings; keep each item concise with location and suggested direction. + +```markdown +# Codebase analysis: [project or path] + +## Summary +[2–4 sentences: overall health, top 2–3 priorities.] + +## 1. Structure and layout +- **Finding**: [what]. **Evidence**: [where]. **Suggestion**: [action]. +[Repeat as needed.] + +## 2. Best practices alignment +[Same pattern; reference project rules if present.] + +## 3. Types and static checks +... + +## 4. Testing +... + +## 5. Performance and resource use +... + +## 6. Maintainability and extensibility +... + +## 7. Security and robustness +... + +## 8. Dependencies and tooling +... + +## 9. Technical debt and risk +... + +## 10. Packaging and distribution +... + +## Recommended priorities +1. [Highest impact, feasible first step] +2. [Next] +3. [Next] +``` + +## Severity and wording + +- **Critical**: Security or data integrity risk; blocks testing or deployment; pervasive violation of a core rule. +- **High**: Significant maintainability or bug risk; large refactor needed if left as-is. +- **Medium**: Clear improvement; can be scheduled with normal work. +- **Low**: Nice to have; style or minor consistency. + +Use "Consider…", "Prefer…", "Avoid…" for suggestions. For critical/high, state the impact (e.g. "increases risk of…", "makes testing difficult because…"). + +## Project-specific rules + +If the repo contains `.cursor/rules/` (e.g. `python_best_practices.mdc`), treat those as the primary standard for "best practices alignment". Mention when a finding contradicts or reinforces a project rule. For Python repos, prefer referencing the rule file rather than repeating long rule text. + +## Reference + +For example findings and severity phrasing, see [reference.md](reference.md). + +## Scope and depth + +- Prefer breadth first: touch all dimensions, then go deeper only where impact is high or the user asks. +- For large codebases, sample by package or layer (e.g. core vs CLI vs tests) and call out areas not reviewed. +- If the user asks for "quick" or "high-level" analysis, limit to summary + 1–2 findings per dimension and a short priority list. diff --git a/.cursor/skills/python-codebase-analysis/reference.md b/.cursor/skills/python-codebase-analysis/reference.md new file mode 100644 index 0000000..d6dec01 --- /dev/null +++ b/.cursor/skills/python-codebase-analysis/reference.md @@ -0,0 +1,81 @@ +# Codebase analysis – reference + +Use this when you need concrete examples for a dimension or wording guidance. + +## Example findings (by dimension) + +**Structure** +- **Finding**: Single module `utils.py` is 1,200 lines and mixes I/O, parsing, and formatting. **Evidence**: `src/utils.py`. **Suggestion**: Split into `io.py`, `parsing.py`, `formatting.py` under `utils/` and re-export from `utils/__init__.py`. + +**Best practices** +- **Finding**: Several functions use `except Exception` and pass, hiding failures. **Evidence**: `src/loader.py` lines 45, 89. **Suggestion**: Catch specific exceptions, log with `logging.exception`, and re-raise or return a sentinel where appropriate. + +**Best practices – library hygiene** +- **Finding**: Library code uses `print()` for diagnostic output instead of logging. **Evidence**: `src/parser.py` lines 12, 78, 134. **Suggestion**: Replace with `logger.debug()`/`logger.info()` using a module-level `logger = logging.getLogger(__name__)`. +- **Finding**: Top-level `__init__.py` configures the root logger with `logging.basicConfig()`. **Evidence**: `src/rms-psfmodel/__init__.py` line 5. **Suggestion**: Remove; add `logging.getLogger(__name__).addHandler(logging.NullHandler())` instead. Libraries must not configure logging for their callers. +- **Finding**: `sys.exit(1)` called in library function on validation failure. **Evidence**: `src/validator.py` line 42. **Suggestion**: Raise a `ValueError` (or a custom exception) and let the caller decide how to handle it. + +**Best practices – error messages** +- **Finding**: Exceptions raised with no context: `raise ValueError("invalid input")`. **Evidence**: `src/converter.py` lines 30, 55. **Suggestion**: Include the actual value and constraint: `raise ValueError(f"scale must be positive, got {scale}")`. +- **Finding**: No custom exception hierarchy; all errors are bare `ValueError`/`TypeError`. **Evidence**: Grep for `raise ValueError` across `src/`. **Suggestion**: Define a `rms-psfmodelError` base class and specific subclasses so callers can catch library errors without catching unrelated `ValueError`s. + +**Best practices – encoding and I/O** +- **Finding**: `open()` calls omit `encoding`; relies on platform default. **Evidence**: `src/reader.py` lines 18, 42. **Suggestion**: Add `encoding='utf-8'` (or the appropriate encoding) to all `open()` calls in library code. +- **Finding**: Public API accepts only `str` paths; callers using `pathlib.Path` must convert. **Evidence**: `src/loader.py` `load(path: str)`. **Suggestion**: Accept `str | Path` and convert internally with `Path(path)`. + +**Types** +- **Finding**: Public API in `api.py` has no return type annotations; mypy is not run in CI. **Evidence**: `pyproject.toml` has no `[tool.mypy]`; `api.py` functions lack `->`. **Suggestion**: Add mypy to CI, enable strict mode, and annotate public functions first. + +**Testing** +- **Finding**: Coverage is ~45%; module `core/solver.py` has no direct tests. **Evidence**: `coverage report`; no `tests/test_solver.py`. **Suggestion**: Add unit tests for solver entry points and key branches; aim for ≥80% on core. + +**Performance** +- **Finding**: Config is re-read from disk inside a loop in `process_batch`. **Evidence**: `src/batch.py` `process_batch` calls `load_config()` per item. **Suggestion**: Load config once outside the loop and pass it in or use a module-level cache. + +**Performance – concurrency and thread safety** +- **Finding**: Module-level mutable cache `_cache = {}` is written from multiple functions with no locking. **Evidence**: `src/registry.py` line 8 and functions `register()`, `lookup()`. **Suggestion**: Protect with `threading.Lock`, or document that the module is not thread-safe. +- **Finding**: Lazy singleton initialization uses a plain `if _instance is None` check. **Evidence**: `src/client.py` `get_client()`. **Suggestion**: Use `threading.Lock` or a module-level instance initialized at import time. + +**Maintainability** +- **Finding**: Feature flags and environment checks are scattered across 12 files. **Evidence**: Grep for `os.getenv("FEATURE_")`. **Suggestion**: Centralize in a `config` or `features` module and inject into call sites. + +**Maintainability – documentation quality** +- **Finding**: README usage example calls `rms-psfmodel.process(data)` but the function was renamed to `rms-psfmodel.transform(data)` in v2.0. **Evidence**: `README.md` line 34 vs `src/rms-psfmodel/__init__.py`. **Suggestion**: Update README examples to match the current API; consider a CI check that runs README code blocks. +- **Finding**: Three public modules (`analysis`, `export`, `utils`) have no corresponding Sphinx `automodule` directive. **Evidence**: Compare `src/rms-psfmodel/__init__.py` `__all__` against `docs/module.rst`. **Suggestion**: Add `.. automodule::` entries for each public module. + +**Security** +- **Finding**: Subprocess is invoked with `shell=True` and user-controlled input. **Evidence**: `src/runner.py` line 67. **Suggestion**: Use list form of arguments and avoid `shell=True`; validate/sanitize input. + +**Dependencies** +- **Finding**: Runtime deps are in `requirements.txt` and `pyproject.toml` with different versions. **Evidence**: `numpy` in requirements.txt pinned, in pyproject.toml minimum. **Suggestion**: Use `pyproject.toml` as single source of truth; remove duplicate requirements.txt or generate from it. + +**Dependencies – CI/CD consistency** +- **Finding**: `pyproject.toml` declares `requires-python = ">=3.10"` but CI matrix only tests 3.12. **Evidence**: `.github/workflows/run-tests.yml` `matrix.python-version: ["3.12"]`. **Suggestion**: Add 3.10, 3.11, 3.13 to the CI matrix to match the supported range. +- **Finding**: CI does not run Sphinx build or PyMarkdown; only ruff and pytest. **Evidence**: `.github/workflows/run-tests.yml`. **Suggestion**: Add Sphinx and PyMarkdown steps to match the local `run-all-checks.sh` so documentation issues are caught before merge. + +**Dependencies – configuration consistency** +- **Finding**: Ruff is configured with `line-length = 88` but the project rule says 100. **Evidence**: `pyproject.toml` `[tool.ruff]` vs `.cursor/rules/python_best_practices.mdc`. **Suggestion**: Align `line-length` across ruff, formatter, and project rules to a single value. +- **Finding**: Stale `[tool.black]` section remains in `pyproject.toml` after migration to Ruff. **Evidence**: `pyproject.toml` line 45. **Suggestion**: Remove the `[tool.black]` section; Ruff format replaces Black. + +**Technical debt** +- **Finding**: 40+ TODO comments with no issue links or owners. **Evidence**: `grep -r TODO src`. **Suggestion**: Link TODOs to issues, or triage and remove obsolete ones; add a policy in CONTRIBUTING. + +**Packaging and distribution** +- **Finding**: `pyproject.toml` is missing `project.urls` (no Homepage, Repository, or Documentation links). **Evidence**: `pyproject.toml` `[project]` section. **Suggestion**: Add `[project.urls]` with links to GitHub, ReadTheDocs, and changelog so they appear on PyPI. +- **Finding**: `__version__` is hard-coded in both `__init__.py` and `pyproject.toml`; they disagree after the last release. **Evidence**: `src/rms-psfmodel/__init__.py` line 3 says `1.2.0`, `pyproject.toml` says `1.3.0`. **Suggestion**: Use a single source of truth (e.g. `importlib.metadata.version("rms-psfmodel")` in `__init__.py` reading from the installed package metadata). +- **Finding**: `py.typed` marker file is missing; downstream users get no type-checking benefit. **Evidence**: `src/rms-psfmodel/` has no `py.typed` file. **Suggestion**: Add an empty `src/rms-psfmodel/py.typed` and ensure it is included in the package via `[tool.setuptools.package-data]`. +- **Finding**: `tests/` directory and test fixtures are included in the sdist/wheel. **Evidence**: `pip show -f rms-psfmodel` lists `tests/`. **Suggestion**: Exclude `tests` from the package via `[tool.setuptools.packages.find]` `exclude = ["tests*"]` or equivalent. + +## Severity phrasing + +- Critical: "must be addressed before…", "exposes…", "prevents…" +- High: "significantly increases…", "will make it difficult to…" +- Medium: "recommended to…", "would improve…" +- Low: "consider…", "optional:…" +- Trivial: "may not be worth changing…" + +## When project rules exist + +- "Per project rule in `.cursor/rules/python_best_practices.mdc`, …" +- "This conflicts with the project's convention that …" +- "Align with project rule: … (see python_best_practices.mdc)." diff --git a/.cursor/skills/run-all-checks/SKILL.md b/.cursor/skills/run-all-checks/SKILL.md new file mode 100644 index 0000000..bd83107 --- /dev/null +++ b/.cursor/skills/run-all-checks/SKILL.md @@ -0,0 +1,157 @@ +--- +name: run-all-checks +description: Run all linting, type checking, tests, Markdown lint, and documentation build for the project. Check for errors and warnings, then fix any problems found. Use when the user asks to run checks, verify the build, run CI locally, or fix lint/type/test errors. +--- + +# Run All Checks + +Execute all project checks (lint, typecheck, test, Markdown lint, docs) and fix any errors found. This skill aligns with the `scripts/run-all-checks.sh` script and a standard Python package layout (e.g. `src/`, `tests/`, `docs/`). + +## Quick Start + +1. Run all checks (optionally in parallel via the script). +2. Review output for errors and warnings. +3. Fix any issues found. +4. Re-run checks to verify fixes. + +## Check Commands + +Run from **project root** with the project **virtual environment activated** (e.g. `source venv/bin/activate` or create a new venv and then `pip install -e ".[dev]"`). + +### Code (ruff, mypy, pytest) + +```bash +# Lint (ruff) +python -m ruff check src tests examples +python -m ruff format --check src tests examples + +# Type check (mypy) +python -m mypy src tests examples + +# Tests (pytest; use -n auto for parallel when tests are independent) +python -m pytest tests -q +``` + +Omit `examples` if the project has no `examples/` directory. The run-all-checks script runs these in sequence; use the script’s `-c` option to run only code checks. + +### Markdown (PyMarkdown) + +```bash +python -m pymarkdown scan docs/ .cursor/ README.md CONTRIBUTING.md +``` + +Use the script’s `-m` option to run only Markdown lint. + +### Documentation (Sphinx) + +```bash +cd docs && make clean && make html SPHINXOPTS="-W" +``` + +Warnings are treated as errors (`-W`). The script’s `-d` option runs docs build plus Markdown lint. + +## Using the Script + +From project root: + +```bash +./scripts/run-all-checks.sh +``` + +Options: + +- **Default**: Run code checks and docs (Sphinx + PyMarkdown) in parallel. +- `-c, --code`: Only ruff, mypy, pytest. +- `-d, --docs`: Only Sphinx build and PyMarkdown scan. +- `-m, --markdown`: Only PyMarkdown scan. +- `-s, --sequential`: Run code and docs sequentially (easier to read output). +- `-p, --parallel`: Run code and docs in parallel (the default). +- `-h, --help`: Show usage. + +Set `VENV` or `VENV_PATH` to point to the virtual environment if it is not at `./venv`. + +## Execution Workflow + +```markdown +Check Progress: +- [ ] Ruff check (src, tests, examples) +- [ ] Ruff format --check +- [ ] Mypy (src, tests, examples) +- [ ] Pytest (tests) +- [ ] PyMarkdown scan (docs/, .cursor/, README, CONTRIBUTING) +- [ ] Sphinx build (docs/) with SPHINXOPTS="-W" +- [ ] All errors fixed +- [ ] Re-verify all checks pass +``` + +### Step 1: Run Checks + +Use the script (recommended) or run the commands above manually. Fix any non-zero exit codes. + +### Step 2: Analyze Results + +- **Errors**: Must be fixed (non-zero exit). +- **Warnings**: Sphinx is run with `-W`, so docs warnings fail the check; fix them so the build passes. + +Common error types: + +| Check | Error pattern | Typical fix | +|---------|----------------------------|--------------------------------| +| ruff | `F401` unused import | Remove import | +| ruff | `ARG001` unused argument | Prefix with `_` or add noqa | +| mypy | `error: Name "X" not defined` | Add import or fix typo | +| pytest | `FAILED` / `ERROR` | Fix test or code under test | +| pymarkdown | Rule ID + message | Fix Markdown style/structure | +| sphinx | `WARNING: duplicate object` | Add `:no-index:` or fix refs | + +### Step 3: Fix Issues + +For each error: read the message, open the file and line, apply the fix. Re-run the failing check to confirm. + +### Step 4: Re-verify + +Run the full script again; all checks should pass (exit code 0). + +## Common Fixes Reference + +### Ruff unused argument (ARG001) + +For fixtures that are dependencies but not directly used: + +```python +def my_fixture(other_fixture: None) -> None: # noqa: ARG001 + ... +``` + +### Sphinx duplicate object warning + +Add `:no-index:` to the automodule directive where appropriate: + +```rst +.. automodule:: mypackage.module + :members: + :no-index: +``` + +### Coverage threshold + +If coverage is below the project target (e.g. 80%): add tests or, temporarily, adjust `[tool.coverage.report]` / threshold in config. Prefer adding tests. + +### Type annotation issues + +For forward reference or union syntax issues: + +```python +from __future__ import annotations # at top of file +``` + +## Success Criteria + +All checks pass when: + +- `ruff check` → All checks passed +- `ruff format --check` → Would reformat 0 files (or run `ruff format` and re-check) +- `mypy` → Success: no issues found +- `pytest` → All tests pass; coverage meets target if configured +- `pymarkdown scan` → No violations +- `make html SPHINXOPTS="-W"` (in docs/) → Build completes with exit 0 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index be5decf..0000000 --- a/.flake8 +++ /dev/null @@ -1,9 +0,0 @@ -[flake8] -max-line-length: 90 -exclude: hst.py -extend-ignore = - E129,E265 - # E129 visually indented line with same indent as next logical line - # E265 block comment should start with '# ' - # E266 too many leading '#' for block comment - # E265, E266 diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..a09577f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,43 @@ +--- +name: Bug Report +about: Report a bug or unexpected behavior. +labels: bug +--- + +## Environment + +- **Package version:** +- **Python version:** +- **OS:** +- **Relevant dependency versions:** + +## Description + + + +## Steps to Reproduce + + + +1. +2. +3. + + + +```python +import psfmodel +# minimal reproduction here +``` + +## Expected Behavior + + + +## Actual Behavior + + + +## Additional Context + + diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..3ba13e0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: false diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..c2e5d4c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,26 @@ +--- +name: Feature Request +about: Suggest a new feature or enhancement. +labels: enhancement +--- + +## Problem or Motivation + + + +## Proposed Solution + + + +```python +# Example usage +result = psfmodel.new_function(arg) +``` + +## Alternatives Considered + + + +## Additional Context + + diff --git a/.github/ISSUE_TEMPLATE/other.md b/.github/ISSUE_TEMPLATE/other.md new file mode 100644 index 0000000..cfce2d8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/other.md @@ -0,0 +1,24 @@ +--- +name: Other +about: Questions, documentation issues, discussions, or anything else. +labels: question +--- + +## Category + +Check one: + +- [ ] Documentation +- [ ] Question / Usage Help +- [ ] CI / Build / Packaging +- [ ] Refactoring / Code Quality +- [ ] Discussion / Design +- [ ] Other + +## Description + + + +## Additional Context + + diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..0eaac37 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,53 @@ +# Purpose + + + +Closes # + +## Changes/Implementation Details + + + +- + +## Type of Change + +- [ ] Bug fix (non-breaking) +- [ ] New feature (non-breaking) +- [ ] Breaking change (fix or feature that alters existing behavior or public API) +- [ ] Refactor (no functional or API changes) +- [ ] Documentation +- [ ] Tests only (no production code change) +- [ ] CI / Build / Dependencies + +## Testing + +- [ ] Unit tests pass +- [ ] Integration tests pass (if applicable) +- [ ] End-to-end tests pass (if applicable) +- [ ] New or updated tests for changed code +- [ ] Tested manually (describe below if applicable) + + + +## Potential Impacts + + + +## Checklist + +- [ ] Code follows project style (`ruff check`, `ruff format`) +- [ ] Type annotations present and `mypy` passes +- [ ] No secrets or credentials committed +- [ ] No warnings or errors introduced (CI, linters, type checking, builds) or justified in Notes +- [ ] Docstrings and Sphinx docs updated (if applicable) +- [ ] CHANGES.md updated (if user-facing change) +- [ ] No temporary or debug code left in +- [ ] Performance impact assessed (see Potential Impacts above) +- [ ] Breaking changes flagged in Type of Change above + +## Notes + + diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index ed29c96..d04bd74 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -8,24 +8,28 @@ on: jobs: upload_pypi: runs-on: ubuntu-latest + permissions: + contents: read steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: 3.12 - - name: Install dependencies + - name: Build run: | - python -m pip install -r requirements.txt + python -m pip install -U pip + python -m pip install --upgrade build && python -m build - - name: Build + - name: Validate package run: | - python3 -m pip install --upgrade build && python3 -m build + python -m pip install twine + python -m twine check dist/* - name: Publish package uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/publish_to_test_pypi.yml b/.github/workflows/publish_to_test_pypi.yml index 2f06ec1..399a480 100644 --- a/.github/workflows/publish_to_test_pypi.yml +++ b/.github/workflows/publish_to_test_pypi.yml @@ -4,27 +4,27 @@ run-name: Publish to Test PyPI triggered by ${{ github.ref_type }} ${{ github.re on: workflow_dispatch: +permissions: + contents: read + jobs: upload_pypi: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: 3.12 - - name: Install dependencies - run: | - python -m pip install -r requirements.txt - - name: Build run: | - python3 -m pip install --upgrade build && python3 -m build + python -m pip install -U pip + python -m pip install --upgrade build && python -m build - name: Publish package uses: pypa/gh-action-pypi-publish@release/v1 @@ -32,4 +32,4 @@ jobs: user: __token__ password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository-url: https://test.pypi.org/legacy/ - verify-metadata: false + skip-existing: true diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index adfe864..3a59412 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -11,58 +11,68 @@ on: - cron: "21 11 * * 0" jobs: - flake8: - name: Lint psfmodel + lint: + name: Lint rms-psfmodel runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} + uses: actions/checkout@v6 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + - name: Set up Python 3.13 + uses: actions/setup-python@v6 with: - python-version: ${{ matrix.python-version }} + python-version: 3.13 - name: Install dependencies run: | - python -m pip install -r requirements.txt + python -m pip install -e ".[dev]" - - name: Flake8 + - name: Ruff (lint) run: | - flake8 psfmodel tests + ruff check src tests - # - name: Mypy - # run: | - # mypy psfmodel tests + - name: Ruff (format) + run: | + ruff format --check src tests + + - name: Mypy + run: | + mypy src tests + + - name: Sphinx + run: | + sphinx-build -W -b html docs docs/_build + + - name: PyMarkdown + run: | + pymarkdown scan docs/ .cursor/ README.md CONTRIBUTING.md test: - name: Test psfmodel + name: Test rms-psfmodel runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest] # TODO windows-latest + os: [ubuntu-latest] python-version: ['3.10', '3.11', '3.12', '3.13'] fail-fast: false + permissions: + contents: read steps: - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.head.sha }} + uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install -r requirements.txt + python -m pip install -e ".[dev]" - name: Test with coverage run: | - coverage run -m pytest + python -m pytest --cov=psfmodel -n auto tests - name: Print coverage report run: | diff --git a/.gitignore b/.gitignore index 0ef0044..7d8aea6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# git +.git # git already ignores this, but some other tools don't + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -25,8 +28,6 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST -**/_version.py -*tinytim* # PyInstaller # Usually these files are written by a python script from a template @@ -71,7 +72,7 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +docs/**/_build/ # PyBuilder .pybuilder/ @@ -108,8 +109,10 @@ ipython_config.py #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. -# https://pdm.fming.dev/#use-with-ide +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml +.pdm-python +.pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ @@ -123,9 +126,9 @@ celerybeat.pid # Environments .env -.venv +.venv* env/ -*venv*/ +venv*/ ENV/ env.bak/ venv.bak/ @@ -160,3 +163,19 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +.code_planner_cache.db +_work/ +**/_version.py +nohup.out +.DS_Store +._* +*~ +*~.* +*.bak +*.*.bak +log.txt +profile.txt +*tinytim* +# Gauss characterization results +gauss*/ diff --git a/.mypy.ini b/.mypy.ini deleted file mode 100644 index a962210..0000000 --- a/.mypy.ini +++ /dev/null @@ -1,15 +0,0 @@ -[mypy] -strict = True -exclude = hst.py - -[mypy-astropy.*] -ignore_missing_imports = True - -[mypy-numpy.*] -ignore_missing_imports = True - -[mypy-scipy.*] -ignore_missing_imports = True - -[mypy-psfmodel._version] -ignore_missing_imports = True diff --git a/.readthedocs.yaml b/.readthedocs.yaml index bc5823f..f167ea9 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,7 +7,7 @@ version: 2 # Set the OS, Python version and other tools you might need build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: python: "3.12" # You can also specify other tool versions: @@ -24,9 +24,10 @@ sphinx: # - pdf # - epub -# Optional but recommended, declare the Python requirements required -# to build your documentation -# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# Install package with docs extra (dependencies from pyproject.toml) python: install: - - requirements: requirements.txt + - method: pip + path: . + extra_requirements: + - docs diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1f96a68 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,9 @@ +{ + "editor.tabSize": 4, + "editor.insertSpaces": true, + "editor.detectIndentation": false, + "files.trimTrailingWhitespace": true, + "files.insertFinalNewline": true, + "files.trimFinalNewlines": true, + "editor.rulers": [80, 90, 100] +} diff --git a/CODEBASE_ANALYSIS.md b/CODEBASE_ANALYSIS.md new file mode 100644 index 0000000..449d52e --- /dev/null +++ b/CODEBASE_ANALYSIS.md @@ -0,0 +1,712 @@ +# Codebase Analysis: rms-psfmodel + +## Summary + +`rms-psfmodel` is a Python library for PSF (Point Spread Function) model fitting, +supporting analytic Gaussian PSFs and HST/TinyTim-based PSFs. The Gaussian path +(`gaussian.py`, `psf.py`) is moderately well-structured, partially typed, and has +reasonable test coverage for its core math. However, the codebase has several critical +numerical bugs, a complete absence of the `logging` module (all diagnostic output uses +bare `print()`), significant dead and commented-out code, and an `hst.py` module that is +entirely untested, uses `os.system()` for shell execution, and reads environment variables +at import time — crashing any user who does not have `TINYTIM` and `PSF_CACHE_DIR` set. +Test coverage is only ~45% overall (68% for `psf.py`, 0% for `hst.py`). + +**Top priorities:** + +1. Fix the numerical correctness bugs in `gaussian_integral_1d` and + `background_gradient_fit` (data-integrity risk). +2. Replace all `print()` debugging with `logging` throughout the library. +3. Address the `hst.py` import-time crash, `os.system()` shell injection, and `assert` + misuse. + +--- + +## 1. Algorithms and Numerical Accuracy + +### 1.1 CRITICAL — `gaussian_integral_1d` uses `np.abs()` on signed integral (array path) + +- **Evidence:** `gaussian.py` lines 267–281 +- **Issue:** The scalar path correctly computes + `erf(xmax) - erf(xmin)`, which is signed (positive when `xmax > xmin`, negative + otherwise). The array path wraps the difference in `np.abs()`: + + ```python + result = np.abs(erf(xmax_div_sqrt_2) - erf(xmin_div_sqrt_2)) + ``` + + This means the array path always returns a non-negative value regardless of the + ordering of `x_min` and `x_max`, while the scalar path preserves the sign. The two code + paths are therefore inconsistent. If a caller ever passes `x_min > x_max` (e.g. to + express a reversed integration direction), the scalar and array paths disagree. More + subtly, when `base != 0`, the `base` is added *after* the absolute value, so the result + for `scale * integral + base` differs from the scalar result even for `x_min < x_max` + when `scale` is negative. +- **Impact:** Silent numerical errors in any pipeline that relies on the array path with + non-standard argument ordering or negative scale. This is the most serious correctness + issue in the codebase. +- **Suggestion:** Remove `np.abs()` to match the scalar path. + +### 1.2 CRITICAL — `background_gradient_fit` missing f-string prefix + +- **Evidence:** `psf.py` line 284 +- **Issue:** The error message reads: + + ```python + raise ValueError('Image must be 2-D, got {image.shape}') + ``` + + This is a plain string, not an f-string. The user sees the literal text + `{image.shape}` instead of the actual shape, making the error message useless for + diagnosis. +- **Suggestion:** Change to `f'Image must be 2-D, got {image.shape}'`. + +### 1.3 HIGH — `gaussian_integral_1d` uses `assert` for input validation + +- **Evidence:** `gaussian.py` line 263 +- **Issue:** `assert sigma > 0.` is used to validate a public-API parameter. Asserts are + removed under `python -O`, silently allowing `sigma <= 0` which produces `nan`/`inf` + results. +- **Suggestion:** Replace with `if sigma <= 0: raise ValueError(...)`. + +### 1.4 HIGH — `gaussian_integral_2d` angle path uses mean-of-samples (not true integral) + +- **Evidence:** `gaussian.py` lines 330–365 +- **Issue:** When `angle != 0`, the 2-D Gaussian integral falls back to sampling the + function on a `linspace` grid of size `angle_subsample` (default 13) and returning + `np.mean(ret)`. This is a crude midpoint-rule quadrature on a 13×13 grid. For narrow + Gaussians (`sigma < 0.5`) or large integration regions, 169 samples may not capture the + peak adequately. The `np.mean()` approximation also does not correctly scale by the area + of the integration domain — it returns the average value, not the integral. For the + integral to be correct it should be multiplied by the area + `(y_max - y_min) * (x_max - x_min)`, which is not done here. + + **However**, this same function is called in the angle=0 path from `eval_pixel` where it + evaluates over a unit pixel (area = 1), so the mean equals the integral only in that + specific case. For non-unit-pixel regions or direct calls with `angle != 0`, the result + is mathematically incorrect. +- **Impact:** Quantitative errors for any rotated Gaussian integration over non-unit + regions. Even for unit pixels, accuracy is limited to ~1% for `sigma < 1`. +- **Suggestion:** Either multiply by the area `(y_max - y_min) * (x_max - x_min)`, or + switch to a proper 2-D quadrature (e.g. `scipy.integrate.dblquad` or rotate the + coordinate system analytically to separate the integral). Document the accuracy + limitation if the grid approach is intentional. + +### 1.5 MEDIUM — `background_gradient` infers order from parameter count via `sqrt` + +- **Evidence:** `psf.py` line 390 +- **Issue:** `order = int(np.sqrt(len(bkgnd_params)*2))-1` uses a floating-point square + root to recover the polynomial order from the parameter count. For the standard counts + (3, 6, 10) this works, but for unusual counts the `int()` truncation could silently pick + the wrong order. No validation is performed on the result. +- **Suggestion:** Validate that the inferred order is consistent with the actual parameter + count, or pass the order explicitly. + +### 1.6 MEDIUM — `_find_position` applies `scaled_psf = psf * scale + base` redundantly + +- **Evidence:** `psf.py` line 804 +- **Issue:** `details['scaled_psf'] = psf * scale + base`, but `psf` was already created + by `self.eval_rect(..., scale=scale, base=base, ...)` (line 790–791), meaning `psf` + already includes the scale and base. The `scaled_psf` entry therefore double-applies the + scaling: `(psf_raw * scale + base) * scale + base`. The residual computation + `sub_img_grad - details['scaled_psf']` at line 569 uses this double-scaled PSF, which is + incorrect. +- **Impact:** The bad-pixel rejection loop in `find_position` (lines 569–591) computes + residuals against a double-scaled PSF, so the `num_sigma` threshold is compared to an + incorrect residual. The final returned position is still from the optimizer, so the + position result itself is not affected, but the `scaled_psf` metadata entry and the + bad-pixel masking logic are wrong. +- **Suggestion:** Either compute `psf` with `scale=1., base=0.` and store `psf * scale + + base` in `scaled_psf`, or call `eval_rect` with the full parameters and set + `details['scaled_psf'] = psf`. + +### 1.7 LOW — Numerical stability of Gaussian evaluation for extreme sigma + +- **Evidence:** `gaussian.py` lines 123–124 +- **Issue:** `np.exp(-(x-mean)**2 / (2 * sigma**2))` underflows to 0.0 for `|x-mean|` + much larger than `sigma`, and `1 / sigma**2` overflows for extremely small sigma. The + `sigma_x_range` default of `(0.01, 10.)` keeps sigma in a safe range during fitting, + but the static methods accept arbitrary sigma. +- **Suggestion:** Document the valid range or add a guard for sigma near zero. + +--- + +## 2. Performance and Resource Use + +### 2.1 HIGH — `_eval_rect` creates flat coordinate arrays instead of using meshgrid + +- **Evidence:** `gaussian.py` lines 527–541 +- **Issue:** `_eval_rect` constructs `y_coords` via `np.repeat` and `x_coords` via + `np.tile`, then creates a `(2, N)` coords array. This allocates three large arrays where + a simple `np.meshgrid` + reshape would suffice and be clearer. More importantly, these + flat coordinate arrays are passed to `eval_pixel`, which calls + `gaussian_integral_2d` element-by-element for the `angle != 0` path (the array branch + of `gaussian_integral_2d` has an explicit Python `for` loop at lines 353–363). For a + 21×21 PSF with a non-zero angle, this means 441 separate `np.linspace` + `meshgrid` + + `gaussian_2d` calls. +- **Suggestion:** For the rotated case, vectorize the computation. Consider using a + rotated-coordinate analytic integral or at least batch the meshgrid operation. + +### 2.2 HIGH — Powell optimizer called with `maxiter = len(starting_guess) * 10000` + +- **Evidence:** `psf.py` line 752 +- **Issue:** For a typical Gaussian with 5–7 parameters, this sets `maxiter` to + 50,000–70,000. Powell's method evaluates the objective function many times per iteration. + Each evaluation calls `eval_rect`, which for a 21×21 Gaussian is fast but for an HST PSF + involves spline interpolation, reshaping, and optional convolution. The large iteration + cap can cause `find_position` to run for minutes if convergence is slow. +- **Suggestion:** Consider a more modern optimizer (e.g. `scipy.optimize.minimize` with + `method='L-BFGS-B'` for bounded problems) and add a callback or timeout mechanism. At + minimum, log the number of iterations used. + +### 2.3 MEDIUM — `_eval_rect_smeared` allocates a new array per step in the loop + +- **Evidence:** `psf.py` lines 173–189 +- **Issue:** Each step in the motion-blur loop calls `_eval_rect` which returns a new + array, then adds it to `total_rect`. For `num_steps` up to several hundred (large + motion, small granularity), this allocates many temporary arrays. +- **Suggestion:** Pre-allocate `total_rect = np.zeros(rect_size)` and accumulate in place. + +### 2.4 MEDIUM — `hst.py` `_cache_pixelation` evaluates spline per-point + +- **Evidence:** `hst.py` lines 672–681 +- **Issue:** `desired_y_indices` and `desired_x_indices` are flattened to 1-D, then + `spline.ev()` is called on them. For a 39×5 = 195 pixel subsampled PSF, this evaluates + ~38,000 points individually. `RectBivariateSpline.__call__` on a 2-D grid is much faster + because it exploits the tensor-product structure. +- **Suggestion:** Use `spline(desired_y_unique, desired_x_unique)` on the unique + coordinate arrays to get a 2-D grid evaluation. + +### 2.5 LOW — No caching of `_background_gradient_coeffs` + +- **Evidence:** `psf.py` line 326, called inside a `while True` loop at line 333 +- **Issue:** `_background_gradient_coeffs` is called once before the loop, but + `background_gradient` (line 357) calls it again inside the loop on every iteration. For + a fixed image shape and order, the result is always the same. +- **Suggestion:** Cache the coefficients or pass them into `background_gradient`. + +--- + +## 3. Debugging and Logging + +### 3.1 CRITICAL — No `logging` module usage anywhere in the library + +- **Evidence:** `grep -r "logging" src/psfmodel/` returns zero matches. +- **Issue:** The entire library uses bare `print()` for diagnostic output, controlled by + the integer `self._debug_opt` attribute. There are **50+** `print()` calls in `psf.py` + and **30+** in `hst.py`. This violates the project's own rule: "ALWAYS include + meaningful, structured logging (use the `logging` module)... NEVER use bare `print()` + for diagnostic output in library code." +- **Impact:** Users cannot selectively enable/disable debug output, redirect it to a log + file, or integrate it with their own logging configuration. The `print()` calls write + directly to stdout, polluting output in Jupyter notebooks, pipelines, and web services. +- **Suggestion:** Replace all `print()` calls with `logging.getLogger(__name__)` calls at + appropriate levels: + - `logger.debug()` for `_debug_opt > 1` messages (per-iteration detail) + - `logger.info()` for `_debug_opt == 1` messages (entry/exit, final results) + - `logger.warning()` for the unconditional `'FAIL'` message at line 760 + - `logger.error()` for fatal errors in `hst.py` + + Add `logging.getLogger(__name__).addHandler(logging.NullHandler())` in + `__init__.py` as per library best practice. + +### 3.2 HIGH — `_debug_opt` is a public mutable attribute with no API + +- **Evidence:** `psf.py` line 28 +- **Issue:** `self._debug_opt = 0` is set in `__init__`, but tests set it directly + (e.g. `psf._debug_opt = 10`). There is no method, property, or documentation for this. + The integer levels (0, 1, 2, 3) are undocumented and scattered across the code. +- **Suggestion:** Remove once `logging` is adopted. If levels are still needed, use + standard `logging` levels (`DEBUG`, `INFO`, etc.). + +### 3.3 HIGH — Debug `print()` on optimizer failure is unconditional + +- **Evidence:** `psf.py` line 760 +- **Issue:** `print('FAIL', message)` fires regardless of `_debug_opt`, meaning any + user calling `find_position` will see "FAIL ..." on stdout if the optimizer does not + converge. This should be a `logger.warning()` at minimum, or the failure should be + communicated via the return value (which it already is — `None`). + +### 3.4 MEDIUM — Unreachable `print('hi')` at end of `_find_position` + +- **Evidence:** `psf.py` line 885 +- **Issue:** `print('hi')` appears after `return offset_y, offset_x, details` at line 883. + This is dead code that will never execute but suggests leftover debugging. +- **Suggestion:** Delete. + +--- + +## 4. Structure and Layout + +### 4.1 HIGH — Dead code: `_dead_code()` function and unreachable `print('hi')` + +- **Evidence:** `psf.py` lines 885–889 +- **Issue:** A standalone function `_dead_code()` containing only `pass` exists at module + level, preceded by unreachable `print('hi')`. This is clearly leftover scaffolding. +- **Suggestion:** Delete both. + +### 4.2 HIGH — Massive amounts of commented-out code + +- **Evidence:** `psf.py` lines 55–79 (commented abstract `eval_pixel`), lines 618–644 + (commented bounds logic), lines 806–850 (commented covariance/error computation), + lines 851–854 (commented leastsq metadata). `gaussian.py` lines 127–175 (commented + `gaussian_2d_rho`). Test files also have commented-out assertions with `# TODO: Why?`. +- **Impact:** Makes the code harder to read and maintain. Version control preserves + history; commented-out code adds noise. +- **Suggestion:** Remove all commented-out code blocks. File issues for features that were + partially implemented (e.g. the covariance/error estimation). + +### 4.3 HIGH — `hst.py` excluded from Ruff and mypy + +- **Evidence:** `pyproject.toml` line 121: `exclude = ["src/psfmodel/hst.py"]`, + `.mypy.ini` line 3: `exclude = hst.py` +- **Issue:** The largest module (789 lines) is exempt from all linting and type checking. + It uses bare `assert` for control flow (20 instances), `print()` for error reporting, + `os.system()` for shell execution, and has no type annotations. +- **Suggestion:** Incrementally bring `hst.py` under lint and type checking. Start by + adding type annotations to public methods and replacing `assert False` with proper + exceptions. + +### 4.4 MEDIUM — `psf.py` header says `# psfmodel/__init__.py` + +- **Evidence:** `psf.py` line 2 +- **Issue:** Stale header comment; the module is `psf.py`, not `__init__.py`. +- **Suggestion:** Fix to `# psfmodel/psf.py`. + +### 4.5 MEDIUM — `__init__.py` does not export `HSTPSF` + +- **Evidence:** `src/psfmodel/__init__.py` +- **Issue:** `__all__` exports `PSF` and `GaussianPSF` but not `HSTPSF`. Users must do + `from psfmodel.hst import HSTPSF`. If `HSTPSF` is part of the public API, it should be + in `__all__`. If it is internal/experimental, it should be prefixed with `_` or clearly + documented as such. +- **Suggestion:** Decide on the public status of `HSTPSF` and update `__all__` and docs + accordingly. Note that importing `HSTPSF` in `__init__.py` would trigger the + import-time crash for users without `TINYTIM` set (see Security section). + +### 4.6 LOW — Duplicate mypy configuration + +- **Evidence:** Both `.mypy.ini` and `[tool.mypy]` in `pyproject.toml` exist. Per pytest + configuration discovery rules, `.mypy.ini` takes precedence over `pyproject.toml`, so + the `pyproject.toml` mypy settings (e.g. `disallow_subclassing_any = false`) may not + be applied. +- **Suggestion:** Consolidate into `pyproject.toml` and delete `.mypy.ini`. + +### 4.7 LOW — Stale `.flake8` and `setup.cfg` files + +- **Evidence:** `.flake8` configures flake8 (superseded by Ruff), `setup.cfg` contains + only `[metadata] name = rms-psfmodel` (superseded by `pyproject.toml`). +- **Suggestion:** Delete both files. + +--- + +## 5. Best Practices Alignment + +### 5.1 CRITICAL — `hst.py` reads environment variables at module import time + +- **Evidence:** `hst.py` lines 198–199 + + ```python + TINY_TIM_DIR = os.environ['TINYTIM'] + PSF_CACHE_DIR = os.environ['PSF_CACHE_DIR'] + ``` + +- **Issue:** Any `import psfmodel.hst` (or even indirect import) crashes with `KeyError` + if these environment variables are not set. This prevents the entire module from being + imported for testing, documentation generation, or by users who only need `GaussianPSF`. +- **Suggestion:** Defer the lookup to when TinyTim is actually called. Use + `os.environ.get()` with a `None` default and validate at the point of use. + +### 5.2 CRITICAL — `os.system()` used for shell execution with string concatenation + +- **Evidence:** `hst.py` lines 528–544 +- **Issue:** `os.system('./tiny1 ' + temp_filename + ' < ' + params_filename + ...)` is + vulnerable to shell injection if any of the file paths or parameters contain shell + metacharacters. `os.system()` also provides no error handling — the return code is + ignored. +- **Suggestion:** Use `subprocess.run()` with a list of arguments (no `shell=True`). + Check the return code and raise on failure. + +### 5.3 HIGH — `assert` used for control flow in `hst.py` (20 instances) + +- **Evidence:** `hst.py` lines 300, 322, 325, 329, 333, 369, 373, 436, 438, 440, 442, + 449, 473, 550, 586, 615, 616, 719, 772, 773 +- **Issue:** `assert False` is used to signal errors (e.g. unknown instrument, PSF too + small). Asserts are stripped under `python -O`, so these checks vanish in optimized + mode, leading to silent misbehavior or later crashes with confusing tracebacks. +- **Suggestion:** Replace every `assert` used for validation with `raise ValueError(...)`. + +### 5.4 HIGH — `open()` without context manager or `encoding=` + +- **Evidence:** `hst.py` line 482: `params_fp = open(params_filename, 'w')` +- **Issue:** The file is opened without a `with` statement and without specifying + `encoding='utf-8'`. If an exception occurs between `open()` and `close()` (line 522), + the file handle leaks. The default encoding depends on the platform locale. +- **Suggestion:** Use `with open(params_filename, 'w', encoding='utf-8') as params_fp:`. + +### 5.5 HIGH — `hst.py` shadows the built-in `filter` + +- **Evidence:** `hst.py` line 222: `def __init__(self, ..., filter, ...)` +- **Issue:** The parameter name `filter` shadows the Python built-in. Per project rules + (python_best_practices.mdc): "Do NOT use variable or function names that shadow Python + built-ins... append a single underscore (e.g. `filter_`)." +- **Suggestion:** Rename to `filter_` throughout `hst.py`. + +### 5.6 HIGH — `hst.py` `os.getcwd()` used to detect OS + +- **Evidence:** `hst.py` lines 201–206 + + ```python + if os.getcwd()[1] == ':': + DEV_NULL = 'NUL' + else: + DEV_NULL = '/dev/null' + ``` + +- **Issue:** Detecting Windows by checking if the second character of `cwd` is `:` is + fragile. It fails for UNC paths (`\\server\share`), and the check runs at import time + making it dependent on the cwd at that moment. +- **Suggestion:** Use `sys.platform == 'win32'` or `os.devnull` (which is the + platform-correct value). + +### 5.7 MEDIUM — `hst.py` `eval_pixel` references undefined variables `y` and `x` + +- **Evidence:** `hst.py` line 740 + + ```python + self._cache_psf(max(abs(y)*2+1, abs(x)*2+1), **kwargs) + ``` + + The parameters are `coord`, `offset`, `scale`, `base`, `**kwargs`. There are no local + variables `y` or `x` before this line. This is a `NameError` at runtime. +- **Impact:** `eval_pixel` is completely broken and would crash on any call. +- **Suggestion:** Replace with `coord[0]` and `coord[1]`. + +### 5.8 MEDIUM — `HSTPSF.__init__` passes extra positional args to `PSF.__init__` + +- **Evidence:** `hst.py` line 272: `PSF.__init__(self, movement, movement_granularity)` +- **Issue:** `PSF.__init__` accepts only `**kwargs`, not positional arguments. This would + raise a `TypeError` at runtime. The `movement` and `movement_granularity` parameters are + handled by `_eval_rect_smeared` via `eval_rect`, not by the base class. +- **Impact:** `HSTPSF` cannot be instantiated. This module is entirely non-functional in + its current state. +- **Suggestion:** Remove the extra arguments from the `PSF.__init__` call. + +### 5.9 MEDIUM — `hst.py` `_cache_pixelation` references `self.movement` (never set) + +- **Evidence:** `hst.py` lines 654–655 +- **Issue:** `self.movement` is never assigned in `HSTPSF.__init__`. The base class + `PSF.__init__` does not set it either. This would raise `AttributeError` at runtime. +- **Suggestion:** If motion blur is needed, store `self._movement` and + `self._movement_granularity` as instance attributes in `HSTPSF.__init__`. + +### 5.10 LOW — `num_sigma` checked as truthy instead of `is not None` + +- **Evidence:** `psf.py` lines 542, 566 +- **Issue:** `if num_sigma:` treats `0.0` as falsy. If a caller passes `num_sigma=0.0` + (meaning "reject no pixels"), the code skips the sigma-rejection loop, which is correct + by coincidence but not by intent. +- **Suggestion:** Use `if num_sigma is not None:` for clarity. + +--- + +## 6. Types and Static Checks + +### 6.1 HIGH — mypy is commented out of dev dependencies + +- **Evidence:** `pyproject.toml` line 69: `# "mypy>=1.0",` +- **Issue:** mypy is not installed by `pip install -e ".[dev]"`, so developers are not + running type checks locally. The CI workflow *does* run mypy, but the local dev + experience is inconsistent. +- **Suggestion:** Uncomment mypy in dev dependencies. + +### 6.2 HIGH — `_eval_rect` has `# type: ignore` on method signature + +- **Evidence:** `gaussian.py` lines 515, 545 +- **Issue:** Both `_eval_rect` and `eval_rect` have `# type: ignore` on the `def` line + because the override signatures do not match the base class. The base class defines + `_eval_rect(self, ...) -> npt.NDArray[np.float64]` while the override adds `sigma`, + `sigma_y`, `sigma_x`, and `angle` keyword arguments. +- **Suggestion:** Align the base-class signature (use `**kwargs: Any` in the base) or use + `@overload` to express the extended signatures. The `# type: ignore` suppresses + real type errors. + +### 6.3 MEDIUM — `hst.py` has zero type annotations + +- **Evidence:** All 789 lines of `hst.py` +- **Issue:** No function signatures, no return types, no variable annotations. The module + is excluded from mypy so this is not flagged. +- **Suggestion:** Add annotations incrementally, starting with the public API + (`HSTPSF.__init__`, `eval_point`, `eval_pixel`, `eval_rect`, `run_tinytim`). + +### 6.4 LOW — Inconsistent return type annotations + +- **Evidence:** `gaussian.py` `eval_point` returns `cast(float, ret)` but the actual + return could be an array; `gaussian_1d` returns `cast(float | npt.NDArray, ret)`. +- **Suggestion:** Ensure cast types match actual possible returns. + +--- + +## 7. Testing + +### 7.1 HIGH — Overall coverage is only ~45% + +- **Evidence:** pytest-cov output: `TOTAL 791 435 302 19 45%` +- **Issue:** `hst.py` is 0% covered (331 statements). `psf.py` is 68% covered — the + entire `find_position` debug-output and bad-pixel rejection paths are untested. The + coverage target in `pyproject.toml` is set to `fail_under = 40` (marked `# TODO`). +- **Suggestion:** Raise the coverage target incrementally. Add tests for + `find_position` with `num_sigma`, edge-of-image, and all-masked scenarios. Add basic + `hst.py` unit tests (mocking TinyTim). + +### 7.2 HIGH — No tests for `HSTPSF` at all + +- **Evidence:** `tests/` contains only `test_gaussian.py` and `test_psf.py`. +- **Issue:** The entire HST PSF path is untested. Given the numerous bugs identified above + (`NameError`, broken `__init__`, missing attributes), the module is likely non-functional. +- **Suggestion:** Add a test module `test_hst.py` with mocked TinyTim calls. + +### 7.3 MEDIUM — Tests do not assert on exception messages + +- **Evidence:** All `pytest.raises(ValueError)` calls in both test files check only the + exception type, not the message content (e.g. `test_gaussian.py` lines 127–138, + `test_psf.py` lines 14–23). +- **Suggestion:** Use `pytest.raises(ValueError, match="...")` or assert on + `str(exc_info.value)`. + +### 7.4 MEDIUM — Commented-out assertions with `# TODO: Why?` + +- **Evidence:** `test_gaussian.py` lines 292, 302, 310, 318 +- **Issue:** Several assertions for `scale` and `sigma` in `test_gaussian_find_position` + are commented out with `# TODO: Why?`. This suggests known fitting accuracy issues that + are not understood or tracked. +- **Suggestion:** Investigate and either fix the fitting or document the known limitation + with a linked issue. + +### 7.5 MEDIUM — No `conftest.py` or shared fixtures + +- **Evidence:** No `tests/conftest.py` exists. +- **Issue:** Test setup is duplicated across test functions (e.g. creating `GaussianPSF()` + instances, generating test images). +- **Suggestion:** Create shared fixtures for common PSF instances and test images. + +### 7.6 LOW — Tests access private `_background_gradient_coeffs` directly + +- **Evidence:** `test_psf.py` line 15: `PSF._background_gradient_coeffs((3, -1), 1)` +- **Issue:** Tests import and call `_`-prefixed methods directly, coupling them to + implementation details. +- **Suggestion:** Test through the public API (`background_gradient_fit`, + `background_gradient`) where possible. + +--- + +## 8. Security and Robustness + +### 8.1 CRITICAL — Shell injection via `os.system()` in `hst.py` + +- **Evidence:** `hst.py` lines 528–544 +- **Issue:** `os.system('./tiny1 ' + temp_filename + ' < ' + params_filename + redir)` + concatenates user-influenced strings into a shell command. If `PSF_CACHE_DIR` or a filter + name contained shell metacharacters (e.g. `; rm -rf /`), arbitrary commands could execute. +- **Suggestion:** Use `subprocess.run([...], check=True)` without `shell=True`. + +### 8.2 HIGH — No path traversal protection in `hst.py` + +- **Evidence:** `hst.py` lines 471, 474–476 +- **Issue:** `fits_filename = path_join(PSF_CACHE_DIR, fits_base)` and + `os.chdir(TINY_TIM_DIR)` use environment-variable paths with no validation. A malicious + `PSF_CACHE_DIR` could write FITS files to arbitrary locations. +- **Suggestion:** Validate that paths resolve within expected directories. + +### 8.3 MEDIUM — `hst.py` reads FITS header comments to extract diffusion matrix + +- **Evidence:** `hst.py` lines 571–578 +- **Issue:** The diffusion matrix is parsed from FITS header COMMENT lines by splitting on + spaces and calling `float()`. No validation is done on the number of values, their range, + or the comment format. A corrupted or modified FITS file could cause `IndexError`, + `ValueError`, or inject extreme values into the convolution. +- **Suggestion:** Validate the parsed matrix (shape, value ranges) before use. + +--- + +## 9. Dependencies and Tooling + +### 9.1 HIGH — Runtime dependencies have no minimum versions + +- **Evidence:** `pyproject.toml` lines 11–15 + + ```toml + dependencies = [ + "astropy", + "numpy", + "scipy" + ] + ``` + +- **Issue:** Per project rules, library dependencies should specify minimum compatible + versions (e.g. `numpy>=1.24`). Without minimums, users may install ancient versions + that lack required features. +- **Suggestion:** Add minimum versions based on the features used (e.g. `numpy>=1.24`, + `scipy>=1.10`, `astropy>=5.3`). + +### 9.2 MEDIUM — `pyproject.toml` has stale TODO markers + +- **Evidence:** `pyproject.toml` lines 23 (`keywords = ["TODO"]`), 59 + (`"TODO" = ["py.typed"]`), 88 (`#TODO = "main.psfmodel:main"`), 103 + (`fail_under = 40 # TODO`) +- **Suggestion:** Replace `keywords` with actual keywords (e.g. `"PSF"`, `"astronomy"`, + `"Gaussian"`). Fix the `py.typed` package-data key to `"psfmodel"`. Remove or fill in + the script entry point. Set `fail_under` to a meaningful target (e.g. 80). + +### 9.3 MEDIUM — Duplicate/conflicting linter configurations + +- **Evidence:** `.flake8` (max-line-length 90), `pyproject.toml` Ruff (line-length 100), + `.mypy.ini` vs `[tool.mypy]` in `pyproject.toml` +- **Issue:** Multiple config files for the same tools create confusion about which is + active. Ruff has replaced flake8 in this project. +- **Suggestion:** Delete `.flake8` and `.mypy.ini`. Consolidate everything into + `pyproject.toml`. + +### 9.4 LOW — `setup.cfg` still exists + +- **Evidence:** `setup.cfg` contains only `[metadata] name = rms-psfmodel` +- **Issue:** Redundant with `pyproject.toml`. May confuse build tools. +- **Suggestion:** Delete. + +--- + +## 10. Maintainability and Extensibility + +### 10.1 HIGH — `find_position` returns `tuple[float, float, dict[str, Any]]` + +- **Evidence:** `psf.py` line 418 +- **Issue:** The return type is `None | tuple[float, float, dict[str, Any]]`. The + `details` dict has 10+ keys with mixed types, no schema, and no documentation beyond + the docstring. Callers must remember string keys like `'subimg-gradient'` and + `'scaled_psf'`. +- **Suggestion:** Define a `@dataclass` (e.g. `FitResult`) with typed fields. This + enables IDE autocompletion, type checking, and clearer documentation. + +### 10.2 HIGH — `_additional_params` is a list of tuples with magic indices + +- **Evidence:** `gaussian.py` lines 74–82, `psf.py` lines 651–658 +- **Issue:** Each additional parameter is stored as `(min, max, name)` in a list. Code + accesses `ap[0]`, `ap[1]`, `ap[2]` with no named fields. This makes the code fragile + and hard to extend. +- **Suggestion:** Use a `NamedTuple` or `@dataclass` (e.g. + `ParamSpec(min: float, max: float, name: str)`). + +### 10.3 MEDIUM — Docs only automodule `psfmodel`, missing `gaussian` and `hst` + +- **Evidence:** `docs/module.rst` lines 1–10 +- **Issue:** The Sphinx docs only have `.. automodule:: psfmodel`. The `gaussian` and + `hst` submodules are not documented. Since `GaussianPSF` is re-exported from + `__init__.py` it may appear, but `HSTPSF`, static methods, and utility functions are + invisible in the docs. +- **Suggestion:** Add `.. automodule:: psfmodel.gaussian` and + `.. automodule:: psfmodel.psf` sections (and `psfmodel.hst` when ready). + +### 10.4 MEDIUM — README describes wrong project + +- **Evidence:** `README.md` line 33: "psfmodel is a set of classes for reading and + searching star catalogs. Currently NAIF SPICE star catalogs, the Yale Bright Star + Catalog (YBSC), and UCAC4 are supported." +- **Issue:** This is a copy-paste from a different project. The actual project is a PSF + model fitting library. +- **Suggestion:** Rewrite the introduction to describe PSF modeling and fitting. + +### 10.5 LOW — GUI program in `programs/psf_gui.py` uses wildcard import + +- **Evidence:** `psf_gui.py` line 5: `from tkinter import *` +- **Issue:** Wildcard imports pollute the namespace and make it unclear which names come + from tkinter. +- **Suggestion:** Use `import tkinter as tk` and prefix all names with `tk.`. + +--- + +## 11. Packaging and Distribution + +### 11.1 MEDIUM — `py.typed` package-data key is `"TODO"` + +- **Evidence:** `pyproject.toml` line 59: `"TODO" = ["py.typed"]` +- **Issue:** The `py.typed` marker file exists in `src/psfmodel/`, but the `pyproject.toml` + key should be `"psfmodel"`, not `"TODO"`. The marker file is not included in the built + wheel. +- **Suggestion:** Change to `"psfmodel" = ["py.typed"]`. + +### 11.2 LOW — `requirements.txt` is not present or useful + +- **Evidence:** A `requirements.txt` exists but was not read; per the dependency management + rule it should contain only `-e .` if kept. +- **Suggestion:** Verify contents or delete. + +--- + +## 12. Technical Debt and Risk + +### 12.1 HIGH — `hst.py` is effectively non-functional + +- **Evidence:** Multiple runtime errors (NameError, TypeError, AttributeError) identified + in code review, zero test coverage, excluded from all linting. +- **Issue:** This module cannot be imported without specific environment variables and + cannot be instantiated due to broken `__init__`. It appears to be legacy code that has + not been maintained alongside the refactoring of `psf.py`. +- **Suggestion:** Either invest in bringing `hst.py` up to standard (fix bugs, add types, + add tests, remove `os.system`) or mark it explicitly as experimental/unsupported and + gate the import behind a try/except with a clear message. + +### 12.2 MEDIUM — Commented-out covariance/error estimation in `_find_position` + +- **Evidence:** `psf.py` lines 806–850 +- **Issue:** The switch from `scipy.optimize.leastsq` to `scipy.optimize.minimize` (with + Powell) means covariance information is no longer available. The old code is commented + out, and the docstring still documents `leastsq_cov`, `x_err`, `y_err`, etc., that are + never populated. This is misleading to users who expect uncertainty estimates. +- **Suggestion:** Either implement uncertainty estimation (e.g. via numerical Hessian or + bootstrap) or remove the references from the docstring and file an issue to track the + feature gap. + +### 12.3 MEDIUM — `gaussian_2d_rho` commented out but referenced in documentation context + +- **Evidence:** `gaussian.py` lines 127–175 +- **Issue:** An alternative parameterization of the 2-D Gaussian using correlation `rho` + is commented out. The mathematical notes in the comments contain an undefined variable + `xcorr` (should be `rho * sigma_x * sigma_y`), suggesting the implementation was never + completed. +- **Suggestion:** Delete the commented-out code. File an issue if the `rho` + parameterization is needed in the future. + +--- + +## Recommended Priorities + +1. **Fix `gaussian_integral_1d` `np.abs()` bug** — silent numerical errors in array + integrals. One-line fix with high data-integrity impact. + +2. **Fix `background_gradient_fit` missing f-string** — users get unhelpful error + messages. One-character fix. + +3. **Fix `_find_position` double-scaling of `scaled_psf`** — incorrect bad-pixel + rejection. Straightforward logic fix. + +4. **Replace all `print()` with `logging`** — the single largest quality-of-life + improvement for library users. Systematic but not complex. + +5. **Triage `hst.py`** — decide whether to fix or deprecate. If fixing: address + import-time crash, `os.system()`, `assert` misuse, undefined variables, and missing + `__init__` arguments. If deprecating: gate the import and document. + +6. **Raise test coverage** — add tests for `find_position` edge cases, exception + messages, and `HSTPSF` (mocked). Increase `fail_under` from 40% to 80%+. + +7. **Clean up commented-out code and TODOs** — remove dead code, fix stale comments, and + resolve `pyproject.toml` TODO markers. + +8. **Consolidate configuration** — delete `.flake8`, `.mypy.ini`, `setup.cfg`. Uncomment + mypy in dev dependencies. Fix `py.typed` package-data key. + +9. **Fix README** — rewrite the introduction to describe PSF modeling instead of star + catalogs. + +10. **Add minimum dependency versions** — specify `numpy>=X`, `scipy>=Y`, `astropy>=Z` + in `pyproject.toml`. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8446c08..9726c9a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,112 +1,146 @@ -# Information for Potential Contributors +# Contributing to rms-psfmodel -First off, thanks for taking the time to contribute! +Thank you for your interest in contributing to rms-psfmodel! This document provides guidelines and instructions for contributing to the project. -This software is maintained by the [Ring-Moon Systems Node](https://pds-rings.seti.org) of NASA's [Planetary Data System](https://pds.nasa.gov). All types of contributions are encouraged and valued. See the [Table of Contents](#table-of-contents) for different ways to help and details about how this project handles them; please read the relevant section before making your contribution. +## Code of Conduct -> If you like the project, but just don't have time to contribute, there are other easy ways to support the project and show your appreciation! -> - Star the project on GitHub -> - Post about it on social media -> - Refer to this project in your project's README -> - Mention the project at conferences and workshops and tell your friends/colleagues -> - Cite the project in your papers and posters +We expect all contributors to follow our Code of Conduct, which ensures a welcoming and inclusive environment for everyone. +See [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md). +## Getting Started -## Table of Contents +1. Fork the repository on GitHub +2. Clone your fork locally: -- [Code of Conduct](#code-of-conduct) -- [I Have a Question](#i-have-a-question) -- [I Want to Report a Bug](#i-want-to-report-a-bug) -- [I Want to Suggest an Enhancement](#i-want-to-suggest-an-enhancement) -- [I Want To Contribute Code](#i-want-to-contribute-code) + ```bash + git clone https://github.com/your-username/rms-psfmodel.git + cd rms-psfmodel + ``` +3. Create a virtual environment and install the package with dev dependencies: -## Code of Conduct + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + pip install -e ".[dev]" + ``` + +## Development Workflow + +1. Create a new branch for your feature or bugfix: + + ```bash + git checkout -b feature/your-feature-name + # or + git checkout -b bugfix/issue-number + ``` + +2. Make your changes, following our coding standards +3. Write or update tests as necessary +4. Run the tests and lint to ensure they pass: -This project and everyone participating in it are governed by the -[Code of Conduct](CODE_OF_CONDUCT.md). -By participating, you are expected to uphold this code. Please report unacceptable behavior -to . + ```bash + scripts/run-all-checks.sh + ``` +5. Commit your changes with a descriptive message: -## I Have a Question + ```bash + git commit -m "Add feature: description of your changes" + ``` -> Please read the available documentation! +6. Push your branch to your fork: -Before asking a question, you can search for existing [issues](https://github.com/SETI/rms-psfmodel/issues) that might help you. If you find a suitable issue and still need clarification, you can write your question in that issue. + ```bash + git push origin feature/your-feature-name + ``` -If you can't find an appropriate issue and still want to ask a question, we recommend the following: +7. Open a Pull Request on GitHub -- Open an [issue](https://github.com/SETI/rms-psfmodel/issues/new). -- Provide as much context and detail as you can. -- Provide project and platform versions (operating system, Python version, etc.), depending on what seems relevant. +## Coding Standards -We will try to answer your question as soon as possible. +We follow these standards for all code contributions: +* **Python Style**: Follow PEP 8 +* **Type Hints**: Use type hints for all function parameters and return values +* **Docstrings**: Document all classes and methods with docstrings following the Google style +* **Testing**: Include unit tests for new functionality +* **Compatibility**: Ensure compatibility with Python 3.10+ -## I Want to Report a Bug +Example of a well-formatted function: -### Before Submitting a Bug Report +```python +def calculate_offset(image: NDArrayFloatType, model: NDArrayFloatType) -> tuple[float, float]: + """Calculate the offset between an image and a model. -A good bug report shouldn't leave others needing to chase you for more information. Therefore, we ask you to investigate carefully and collect all appropriate information in advance. + Parameters: + image: The observed image as a NumPy array + model: The theoretical model as a NumPy array -- Make sure that you are using the latest version of this software and its supporting packages. -- Make sure that you have read the documentation. -- Determine that your bug really is a bug and not an error in your code or a misunderstanding in how to use our software. -- To see if other users have experienced (and potentially solved) the same issue you're having, check if there is an existing issue for your bug or error in the [bug tracker](https://github.com/SETI/rms-psfmodel/issues). -- Collect information about the bug: - - Stack trace (Traceback) - - OS and version (Windows/Linux/macOS), processor (x86/ARM/M1), Python version - - Detailed information on how to reproduce the bug, including function parameters, command line arguments, and input given/output received. + Returns: + A tuple containing the (u, v) offset in pixels + """ + # Implementation here + return u_offset, v_offset +``` -### How Do I Submit a Good Bug Report? +## Pull Request Process -> You must never report security-related issues, vulnerabilities, or bugs that include sensitive information to the issue tracker or elsewhere in public. Instead, sensitive bugs must be sent by email to . +1. Ensure all tests pass +2. Update documentation if necessary +3. Make sure your code is properly formatted and passes both ruff and mypy +4. Request a review from a maintainer +5. Address any feedback from reviewers -We use GitHub Issues to track bugs and errors. If you run into an issue with the project: +The maintainers will merge your PR once it meets all requirements. -- Open an [issue](https://github.com/SETI/rms-psfmodel/issues/new) with a **clear and descriptive title**. Please label the issue as `A-Bug` with no other labels. -- Explain the **behavior you would expect** and the **behavior observed**. -- Provide as much context as possible and describe the **detailed steps** that someone can follow to reproduce the problem. This usually includes providing your code; for a good bug report you should isolate the problem and create a reduced test case. -- Provide other information collected in the previous section, such as the operating system and language version. +## Testing -Once it's filed: +We use pytest for testing. To run the tests: -- The project team will label the issue accordingly. -- A team member will try to reproduce the issue with your provided steps. If there are no steps given and no obvious way to reproduce the issue, the team will ask you for clarification. -- If the team is able to reproduce the issue, it will be appropriately labeled and either assigned to a team member to fix, or left unassigned to be [implemented by someone else](#i-want-to-contribute-code). +```bash +pytest +``` +For more verbose output: -## I Want to Suggest an Enhancement +```bash +pytest -v +``` -This section guides you through submitting an enhancement, **including completely new features and minor improvements to existing functionality**. +To run a specific test file: -### Before Submitting an Enhancement +```bash +pytest tests/test_specific_file.py +``` -- Make sure that you are using the latest version of this software and its supporting packages. -- Make sure that you have read the documentation to see if the desired functionality is already provided. -- Perform a [search](https://github.com/SETI/rms-psfmodel/issues) to see if the enhancement has already been suggested. If it has, add a comment to the existing issue instead of opening a new one. -- Find out whether your idea fits within the scope and aims of the project. It's up to you to make a strong case to convince the project's developers of the merits of this feature. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on/plugin library. +## Documentation -### How Do I Submit a Good Enhancement Suggestion? +We use Sphinx for documentation. To build the docs: -We use GitHub Issues to track enhancement requests. If you want to suggest an enhancement: +```bash +cd docs +make html +``` -- Open an [issue](https://github.com/SETI/rms-psfmodel/issues/new) with a **clear and descriptive title**. Please label the issue as `A-Enhancement` with no other labels. -- Provide a **detailed** description of the suggested enhancement. -- **Explain why this enhancement would be useful** to most users. +The generated documentation will be in `docs/_build/html`. +When adding new features, please update the relevant documentation: -## I Want To Contribute Code +* Update docstrings for new functions and classes +* Add examples if appropriate +* Update the user guide or developer guide if necessary -> ### Legal Notice -> When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content, and that the content you contribute may be provided under the project license. +## Reporting Issues -We welcome all code contributions, including bug fixes, new features, and improvements to documentation. +If you find a bug or have a suggestion for improvement: -- All suggested changes must be submitted using GitHub's "Pull Request" functionality. -- All code changes must include appropriate new or updated tests to verify the changes made. -- Existing documentation, including function- and file-level docstrings, must be updated as necessary, and new features fully described. -- Code style must conform to that of the existing code; for Python this is generally a variant of PEP8 and PEP257. +1. Check if the issue already exists in the GitHub issue tracker +2. If not, create a new issue with: + * A clear, descriptive title + * A detailed description of the issue + * Steps to reproduce (for bugs) + * Your environment information (Python version, OS, etc.) + * Any relevant logs or screenshots -All submissions will be reviewed in detail by a project team member and changes may be suggested. Once the reviewer approves the changes, they will be merged into the main project branch and made a permanent part of the software. Your efforts to improve the software are greatly appreciated! +Thank you for contributing to rms-psfmodel! diff --git a/README.md b/README.md index bcaf0c4..c2e4983 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +# rms-psfmodel + + + [![GitHub release; latest by date](https://img.shields.io/github/v/release/SETI/rms-psfmodel)](https://github.com/SETI/rms-psfmodel/releases) [![GitHub Release Date](https://img.shields.io/github/release-date/SETI/rms-psfmodel)](https://github.com/SETI/rms-psfmodel/releases) [![Test Status](https://img.shields.io/github/actions/workflow/status/SETI/rms-psfmodel/run-tests.yml?branch=main)](https://github.com/SETI/rms-psfmodel/actions) @@ -21,11 +25,15 @@ ![GitHub License](https://img.shields.io/github/license/SETI/rms-psfmodel) [![Number of GitHub stars](https://img.shields.io/github/stars/SETI/rms-psfmodel)](https://github.com/SETI/rms-psfmodel/stargazers) ![GitHub forks](https://img.shields.io/github/forks/SETI/rms-psfmodel) +[![DOI](https://zenodo.org/badge/rms-psfmodel.svg)](https://zenodo.org/badge/latestdoi/rms-psfmodel) + # Introduction -`psfmodel` is a set of classes for reading and searching star catalogs. Currently NAIF SPICE -star catalogs, the Yale Bright Star Catalog (YBSC), and UCAC4 are supported. +`psfmodel` provides abstract and concrete classes for modeling point spread functions (PSFs) +and fitting them to image data. It includes an analytic 2-D Gaussian PSF with optional +rotation, pixel integration, motion smearing, and background polynomial fitting, plus helpers +for astrometric position estimation via bounded optimization. `psfmodel` is a product of the [PDS Ring-Moon Systems Node](https://pds-rings.seti.org). @@ -41,6 +49,10 @@ pip install rms-psfmodel Details of each class are available in the [module documentation](https://rms-psfmodel.readthedocs.io/en/latest/module.html). +After installation, an optional Tkinter-based PSF explorer is available as the `psf_gui` command (or `python -m psf_gui` with `src` on `PYTHONPATH`). A working Tcl/Tk installation is required (for example, install the `python3-tk` package on Debian/Ubuntu). + +A characterization tool that systematically measures Gaussian PSF fitting accuracy across a wide parameter space is available in the repository. Clone the repo, install with `pip install -e ".[characterize]"`, and run it with `python -m characterize_gauss_fit`. + # Contributing Information on contributing to this package can be found in the diff --git a/TEST_SUITE_CRITIQUE.md b/TEST_SUITE_CRITIQUE.md new file mode 100644 index 0000000..a9b6565 --- /dev/null +++ b/TEST_SUITE_CRITIQUE.md @@ -0,0 +1,577 @@ +# Test Suite Critique Report + +**Generated:** 2026-04-12 +**Scope:** tests/ (no conftest.py present) + +## Executive summary + +The test suite covers the core mathematical functions of the `psfmodel` library +(Gaussian 1-D/2-D evaluation, integration, pixel evaluation, and PSF fitting) +with reasonable numeric precision assertions. However, there are significant +gaps: + +**Strengths:** + +- Numeric assertions use `pytest.approx` and `npt.assert_array_almost_equal` + consistently, which is appropriate for floating-point comparisons. +- `test_gaussian_find_position` uses `@pytest.mark.parametrize` to cover + multiple `bkgnd_degree` and `use_angular_params` combinations (8 variants). +- Tests validate both scalar and array inputs for mathematical functions. +- Integration tests (via `scipy.integrate`) confirm normalization properties. + +**Main gaps (high priority):** + +1. **Coverage is 45%** (target: 90%). The `hst.py` module (335 statements) has + 0% coverage. Even excluding `hst.py`, `psf.py` is only at 70% with large + uncovered regions (`find_position`, `_find_position`, `_fit_psf_func`, + `_eval_rect_smeared`). +2. **No exception-message assertions.** All 25 `pytest.raises(ValueError)` calls + lack `as exc_info` and message-content checks. +3. **No `conftest.py` or shared fixtures** -- setup logic is duplicated across + tests. +4. **No tests for `PSF.__init__`**, `GaussianPSF.__init__` validation (e.g. + `angle_subsample` range), `eval_rect` validation, `find_position` edge + cases, `_eval_rect_smeared` motion blur, or `background_gradient`. +5. **No logging assertions** despite extensive `self._logger` usage in `psf.py`. +6. **`hst.py` is entirely untested** (excluded from mypy/ruff but still shipped). + +**Nice-to-have improvements:** + +- Parameterize repetitive test cases in `test_gaussian_1d`, `test_gaussian_2d`. +- Add type annotations to test functions. +- Configure `filterwarnings = ["error"]` and `--strict-markers` in pytest. + +--- + +## 1. Return values and assertions + +**Strengths:** + +- Numeric return values are checked with `pytest.approx` (explicit tolerances + where needed) and `npt.assert_array_almost_equal`. +- `test_gaussian_find_position` checks specific dictionary keys (`sigma_y`, + `sigma_x`, `scale`) with explicit values. + +**Issues:** + +- **`test_gaussian_eval_rect` (line 226-230):** Only checks + `np.sum(...) == pytest.approx(1)`. Does not assert shape, individual pixel + values, or that the center pixel is the maximum. This is a weak existence + assertion. +- **`test_gaussian_find_position`:** Several assertions are commented out with + `# TODO: Why?` (lines 349, 366-367, 384-385), meaning those return values + are not verified at all. +- **`test_background_gradient_fit` (line 84):** Checks `np.sum(img_mask) == 0` + but not the shape or dtype of `img_mask`. +- **`test_bkgnd_gradient_coeffs`:** Uses `assert np.all(ret == exp)` which + gives poor diagnostic messages on failure compared to + `npt.assert_array_equal`. + +--- + +## 2. Success and failure conditions + +### GaussianPSF + +| Method | Success tested | Failure tested | Missing | +|---|---|---|---| +| `gaussian_1d` | Yes (scalars, arrays) | No | Negative sigma, sigma=0 | +| `gaussian_2d` | Yes (scalars, arrays, rotation) | No | Invalid sigma, angle edge cases | +| `gaussian_integral_1d` | Yes (scalars, arrays, params) | No | sigma<=0 (has `assert sigma > 0`), xmin > xmax | +| `gaussian_integral_2d` | Yes (scalars, arrays) | No | Invalid inputs | +| `eval_point` | Yes | Partial (6 ValueError) | No message assertions; missing edge cases for `angle` param | +| `eval_pixel` | Yes | Partial (9 ValueError) | No message assertions | +| `eval_rect` | Minimal (sum only) | No | Odd-size validation, negative size, shape check | +| `__init__` | Implicit only | No | `angle_subsample` out of range, invalid sigma types | + +### PSF + +| Method | Success tested | Failure tested | Missing | +|---|---|---|---| +| `_background_gradient_coeffs` | Yes (orders 1-3) | Partial (5 ValueError) | No message assertions | +| `background_gradient_fit` | Yes (masked, unmasked, sigma) | Partial (4 ValueError) | No message assertions | +| `background_gradient` | Yes (indirect) | No | Invalid params | +| `find_position` | Yes (via GaussianPSF) | No | box_size validation, edge-of-image None return, optimizer failure, all-pixels-masked | +| `_eval_rect_smeared` | No | No | Motion blur (movement != None) | +| `__init__` | Implicit only | No | Logger, detailed_logging | + +--- + +## 3. Consistency + +- **Naming:** Test names follow `test__` or + `test_` consistently. However, they do not encode conditions + (e.g., `test_gaussian_1d_with_scale`), making it hard to tell which scenario + failed. +- **Structure:** `test_gaussian_1d` and `test_gaussian_2d` mix many scenarios + into a single test function (scalar, array, integration, parameter + variations) rather than separating them into focused tests. +- **Fixtures:** No fixtures are used anywhere. Common PSF objects (e.g., + `GaussianPSF()`, `GaussianPSF(sigma=(1,1))`) are recreated in each test. +- **Assertion style:** Mixed use of `assert ... == pytest.approx(...)`, + `npt.assert_array_almost_equal`, and `assert np.all(ret == exp)`. The last + form gives poor failure diagnostics. + +--- + +## 4. Completeness + +### Coverage map + +| Module | Public methods | Tested | Untested | +|---|---|---|---| +| `gaussian.py` | `__init__`, `gaussian_1d`, `gaussian_2d`, `gaussian_integral_1d`, `gaussian_integral_2d`, `eval_point`, `eval_pixel`, `eval_rect` | All partially | `__init__` validation, `eval_rect` shape/edge, `_eval_rect` directly | +| `psf.py` | `__init__`, `eval_point` (abstract), `eval_rect` (abstract), `_eval_rect_smeared`, `_background_gradient_coeffs`, `background_gradient_fit`, `background_gradient`, `find_position` | `_background_gradient_coeffs`, `background_gradient_fit`, `find_position` (partial), `background_gradient` (indirect) | `__init__`, `_eval_rect_smeared`, `find_position` edge cases, `_fit_psf_func`, `_find_position` directly | +| `hst.py` | `HSTPSF.__init__`, `run_tinytim`, `eval_point`, `eval_pixel`, `eval_rect` | None | Everything (0% coverage) | +| `__init__.py` | `__all__` exports | Yes (implicit) | N/A | + +### Docstring gaps + +- `GaussianPSF.eval_pixel` documents the `sigma` parameter as + `tuple[float, float] | None` but also accepts a scalar `float` in + `eval_point`. Tests exercise both, but `eval_pixel` does not accept scalar + sigma, which is inconsistent with `eval_point`. +- `find_position` documents many metadata keys (e.g., `x_err`, `y_err`, + `scale_err`) that are currently commented out in the code. Tests do not + verify which keys are present/absent. + +--- + +## 5. Redundancy + +- **`test_gaussian_integral_1d` lines 90-92 and 102-104:** Both assert + `gaussian_integral_1d(0.0, 1.0) == approx(integrate.quad(...))` with + identical inputs. The second is a duplicate. +- **`test_gaussian_eval_pixel` and `test_gaussian_eval_point`:** Both test + sigma-conflict `ValueError` raises in nearly identical patterns (lines + 145-156 vs 184-201). The validation logic is the same and could share a + helper or parametrize. +- **`test_gaussian_find_position`:** Tests multiple PSF configurations in a + single test function with repeated setup patterns. Breaking into separate + tests per scenario would improve isolation and diagnostics. + +--- + +## 6. Parallel execution + +- **No global mutable state:** Tests do not modify module-level variables or + singletons. Each test creates its own `GaussianPSF` instances. +- **No shared files or external resources:** All data is generated in-memory. +- **Parallel safe:** The tests should run correctly with `pytest -n auto`. + The current config uses `-n 4`. + +**No issues detected.** + +--- + +## 7. Mocking and dependency isolation + +- **No external calls:** The tested modules (`gaussian.py`, `psf.py`) do not + make HTTP requests or access the file system (other than `hst.py` which is + untested). +- **No time-sensitive logic** in tested code paths. +- **No mocks used:** The tests are integration-style, calling real scipy + integration functions. This is appropriate for numerical code. +- **`hst.py`** calls `os.system`, `os.environ`, `os.getcwd`, `pyfits.open`, + and file I/O. If/when tests are added, these will need to be mocked. + +**No issues in current tests, but `hst.py` will need heavy mocking when +tested.** + +--- + +## 8. Security and input validation + +- **Input validation:** `GaussianPSF.__init__` validates `angle_subsample` but + there is no test for it. `eval_rect` validates odd positive shape but has no + test for it. `find_position` validates `box_size` but has no test for it. +- **No sensitive data:** Tests use only numeric constants; no credential risk. +- **No path traversal risk** in tested code (`hst.py` handles paths but is + untested). + +**Missing validation tests:** + +- `GaussianPSF(angle_subsample=0)` -- should raise `ValueError` +- `GaussianPSF(angle_subsample=100)` -- should raise `ValueError` +- `GaussianPSF(angle_subsample=3.5)` -- should raise `ValueError` (not int) +- `GaussianPSF().eval_rect((4, 4))` -- even dimensions +- `GaussianPSF().eval_rect((-1, 5))` -- negative dimensions +- `PSF.find_position(...)` with bad `box_size` + +--- + +## 9. Parameterization + +**Good:** + +- `test_gaussian_find_position` uses `@pytest.mark.parametrize` for + `use_angular_params` and `bkgnd_degree`. + +**Should be parameterized:** + +- **`test_gaussian_1d` (lines 14-33):** 14 separate assertions with different + parameter combinations. These should be parametrized over + `(x, kwargs, expected)` tuples. +- **`test_gaussian_2d` (lines 37-67):** 16+ separate assertions. Same + recommendation. +- **`test_gaussian_eval_point` (lines 145-156):** Six `pytest.raises` calls + with different sigma configurations. Should be parametrized. +- **`test_gaussian_eval_pixel` (lines 184-201):** Nine `pytest.raises` calls. + Same recommendation. +- **`test_bkgnd_gradient_coeffs` (lines 14-23):** Five `pytest.raises` calls. + +**Missing boundary tests:** + +- sigma at 0 (should assert `sigma > 0` raises) +- Very large sigma values +- Very large/small `scale` and `base` values +- `angle_subsample` at boundaries (1, 99) + +--- + +## 10. Async (if applicable) + +Not applicable -- no async code in the project. + +--- + +## 11. Output and contract + +- **`find_position` return shape:** The docstring specifies a detailed + `dict[str, Any]` with many keys (`x`, `y`, `scale`, `base`, `subimg`, + `bkgnd_params`, etc.). Tests only check `sigma_y`, `sigma_x`, and `scale` + from the metadata dict. Many documented keys are never verified: + - `x`, `y`, `base`, `subimg`, `bkgnd_params`, `bkgnd_mask`, `gradient`, + `subimg-gradient`, `psf`, `scaled_psf`. +- **`eval_rect` return shape:** Not asserted (only `np.sum` is checked). +- **Exception types:** Tested (all use `pytest.raises(ValueError)`), but + exception messages are never asserted. + +--- + +## 12. Error handling + +- **25 `pytest.raises(ValueError)` calls across both files**, but **zero** + assert on message content. Every `raise ValueError(...)` in the source + includes a descriptive message string; tests should verify these. +- **Specific issues:** + - `test_gaussian_eval_point` line 145: `GaussianPSF(sigma=(1, 1)).eval_point((0, 0), sigma=5)` + should check message contains "Cannot specify both sigma". + - `test_gaussian_eval_point` line 152: `GaussianPSF(sigma=None).eval_point((0, 0))` + should check message contains "must be specified". + - `test_bkgnd_gradient_coeffs` lines 14-23: should check message contains + "odd positive shape" or "non-negative". +- **`gaussian_integral_1d` line 280:** Uses `assert sigma > 0.0` instead of + `raise ValueError`. This means invalid sigma produces an `AssertionError` + that disappears with `-O`. No test covers this. + +--- + +## 13. State and workflow + +- **No state machines or lifecycle transitions** in the tested code. +- **Idempotency:** `eval_point`, `eval_pixel`, `eval_rect` are pure functions + given the same PSF object -- no idempotency concerns. +- **Side effects:** `find_position` logs messages but tests do not verify + logging side effects (see Section 21). +- **`hst.py` caching:** `HSTPSF._cache_psf` and `_cache_pixelation` are + stateful with caching. Idempotency and cache invalidation are completely + untested. + +--- + +## 14. Test data and fixtures + +- **No `conftest.py`** exists. Common objects like `GaussianPSF()`, + `GaussianPSF(sigma=(1, 1))`, `GaussianPSF(sigma=(2.0, 3.0))` are recreated + in multiple tests. These should be fixtures. +- **Realistic data:** Test data is mathematically generated, which is + appropriate for a numerical library. Edge cases with `np.nan`, `np.inf`, or + very large arrays are not tested. +- **Cleanup:** No external resources are created; no cleanup needed. +- **Fixture scope:** N/A (no fixtures used). +- **Fixture depth:** N/A. + +**Recommended fixtures:** + +```python +@pytest.fixture +def default_psf(): + return GaussianPSF() + +@pytest.fixture +def symmetric_psf(): + return GaussianPSF(sigma=(1.0, 1.0)) + +@pytest.fixture +def asymmetric_psf(): + return GaussianPSF(sigma=(2.0, 3.0)) +``` + +--- + +## 15. Flakiness indicators + +- **No time-based assertions.** +- **No order dependence** detected -- tests do not share mutable state. +- **No external dependencies** in tested code paths. +- **No random data.** +- **`test_gaussian_find_position`** uses numerical optimization + (`scipy.optimize.minimize`) which could theoretically produce slightly + different results on different platforms. The tolerances (`abs=5e-2`, + `abs=1e-1`) are generous enough to avoid flakiness, but this is worth noting. + +**Low flakiness risk overall.** + +--- + +## 16. Regression and documentation + +- **No bug references** in test comments or docstrings. +- **Commented-out code:** `test_gaussian_find_position` has multiple + commented-out assertions (lines 349, 366-367, 384-385) with `# TODO: Why?`. + These may represent known regressions or unfinished investigations. +- **Commented-out test code:** `test_gaussian_integral_2d` has a large + commented-out block (lines 126-141) that appears to be an unfinished rotation + test. +- **`filterwarnings`:** Not configured in `pyproject.toml`. Unexpected warnings + are silently swallowed. +- **Deprecation warnings:** No `pytest.warns` usage. If numpy or scipy emit + deprecation warnings, they go unnoticed. + +--- + +## 17. Other + +- **Type annotations:** Test functions lack return type annotations and + parameter annotations (mypy is configured with `strict = false` for test + files). +- **Clarity:** Test function names describe the method under test but not the + scenario. `test_gaussian_1d` tests 14 different scenarios in one function. +- **AAA pattern:** Most tests follow Arrange-Act-Assert, but large tests like + `test_gaussian_find_position` interleave multiple arrange-act-assert cycles. +- **Single responsibility:** `test_gaussian_1d` tests scalars, arrays, + multidimensional arrays, and integration in a single function. These are + conceptually different behaviors. +- **Speed:** `test_gaussian_find_position` runs 8 parametrized variants, each + performing multiple `scipy.optimize.minimize` calls. This is the slowest + test. Consider marking it `@pytest.mark.slow`. +- **Logic in tests:** `test_gaussian_find_position` lines 387-409 contain an + `if bkgnd_degree is not None:` branch. This conditional logic in a test can + mask failures if the condition is wrong. + +--- + +## 18. Code coverage + +- **Overall coverage: 45.18%** (target: 90%). Measured by running the entire + test suite with `pytest tests/ --cov=src`. +- **`hst.py`: 0%** (335 statements). This module is excluded from mypy and + ruff but still ships as part of the package. It requires TinyTim environment + variables (`TINYTIM`, `PSF_CACHE_DIR`) at import time, making it hard to + test without mocking. +- **`gaussian.py`: 95%** Missing lines: 75, 89, 93, 97, 101, 105, 664. + - Lines 75, 89, 93, 97, 101, 105: Property accessors (`sigma_y`, `sigma_x`, + `mean_y`, `mean_x`) and `angle_subsample` validation dead branch. + - Line 664: `eval_rect` validation (odd positive shape). +- **`psf.py`: 70%** Missing 82 statements. + - `_eval_rect_smeared` (lines 187-213): Motion blur path untested. + - `find_position` edge cases (lines 532-669): box_size validation, edge of + image, optimizer failure, bad pixel masking. + - `_fit_psf_func` detailed logging (lines 739-744). + - `_find_position` detailed logging and optimizer failure (lines 818-820, + 838-839, 941-956). +- **`__init__.py`: 100%** + +**To reach 90%:** `hst.py` must be either tested or excluded from coverage +measurement. `psf.py` needs tests for `_eval_rect_smeared`, +`find_position` edge cases, and error paths. + +--- + +## 19. Pytest markers + +- **No custom markers are registered** in `pyproject.toml` (`markers` key is + absent). +- **`--strict-markers` is not enabled.** Any marker typo would be silently + ignored. +- **Markers used:** Only `@pytest.mark.parametrize` (built-in). +- **No `xfail` or `skip` markers.** +- **No `@pytest.mark.slow`** despite `test_gaussian_find_position` being + notably slower than others. + +**Recommendations:** + +- Add `markers = []` to `[tool.pytest.ini_options]`. +- Add `"--strict-markers"` and `"--strict-config"` to `addopts`. +- Consider `@pytest.mark.slow` for `test_gaussian_find_position`. + +--- + +## 20. Test boundary + +- **Private imports:** `test_psf.py` line 15 calls + `PSF._background_gradient_coeffs(...)` directly. This is a private method + (`_`-prefixed). The test is tightly coupled to the internal implementation. +- **Public API coverage:** `background_gradient_fit` and `find_position` are + tested through the public API, which is good. However, + `background_gradient` is tested only indirectly (as a helper in + `background_gradient_fit` tests). +- **Over-mocking:** No mocking is used, so this is not an issue. +- **`hst.py` public API:** `HSTPSF.eval_point`, `HSTPSF.eval_pixel`, + `HSTPSF.eval_rect` are completely untested. + +--- + +## 21. Logging assertions + +- **`psf.py` contains 43 logger calls** (mix of `.info`, `.debug`, `.warning`) + used extensively in `find_position` and `_find_position`. +- **Zero `caplog` usage** in the test suite. +- **Key untested logging:** + - `find_position`: Logs "optimizer did not succeed" at WARNING level when + optimization fails (line 838). This is the only warning-level log and + should have a test. + - `find_position`: Logs entry/exit at INFO level when `detailed_logging=True`. + - `_fit_psf_func`: Logs per-iteration diagnostics at DEBUG level. +- **Recommendation:** Add at least one test with `caplog` that verifies the + optimizer-failure warning message. + +--- + +## 22. Pytest configuration + +- **Active config file:** `pyproject.toml` (`[tool.pytest.ini_options]`). No + higher-precedence files (`pytest.toml`, `.pytest.toml`, `pytest.ini`, + `.pytest.ini`) exist. +- **`testpaths`:** Not set. Pytest collects from the entire repo. Should be set + to `["tests"]`. +- **`addopts`:** `["-n", "4", "--cov=src"]`. Missing `--strict-markers`, + `--strict-config`, and `-W error::DeprecationWarning`. +- **`filterwarnings`:** Not configured. Unexpected warnings are silently + ignored. +- **Plugins installed:** `pytest-xdist` (for `-n`), `pytest-cov`. Both are + used. +- **Missing plugins:** `pytest-randomly` (for order-independence testing) is + not listed but would be beneficial. +- **No duplicate config files** detected. + +**Recommendations:** + +```toml +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = ["-n", "4", "--cov=src", "--strict-markers", "--strict-config"] +filterwarnings = ["error"] +markers = [] +``` + +--- + +## 23. Snapshot and golden-file testing + +- **No snapshot or golden-file tests** are used. +- **Candidates:** `find_position` returns a complex dict with many keys. + The full return shape could benefit from a snapshot test rather than + cherry-picking individual keys. +- **`eval_rect` output:** The full 2-D array could be snapshot-tested for + regression, though inline `npt.assert_array_almost_equal` is adequate for + small arrays. + +**Not critical for this project, but `find_position` metadata dict is a good +candidate.** + +--- + +## Prompt for an AI agent to fix tests + +You are an AI agent tasked with improving the test suite for the `psfmodel` +Python library. The library lives in `src/psfmodel/` and tests are in `tests/`. + +**Do not modify any production code.** Only add, modify, or reorganize test +files and `conftest.py`. Preserve all existing passing behavior -- do not remove +or weaken any existing assertion. + +### Context + +The test suite currently has 45% line coverage (target: 90%). There are two test +files: `tests/test_gaussian.py` (8 test functions) and `tests/test_psf.py` +(2 test functions). There is no `conftest.py`. + +### Tasks (ordered by priority) + +1. **Exception message assertions:** All 25 `pytest.raises(ValueError)` calls + lack message assertions. Add `as exc_info` and assert on + `str(exc_info.value)` for each. Match the exact message from the source + code. + +2. **Add missing failure/validation tests:** + - `GaussianPSF.__init__`: `angle_subsample` out of range (0, 100, non-int). + - `GaussianPSF.eval_rect`: even dimensions, negative dimensions. + - `PSF.find_position`: invalid `box_size` (even, negative). + - `PSF.find_position`: starting point too close to image edge (returns + None). + - `PSF.find_position`: optimizer failure (returns None with warning). + - `GaussianPSF.gaussian_integral_1d`: sigma <= 0 (currently an `assert`). + +3. **Increase coverage for `psf.py` (currently 70%):** + - Test `_eval_rect_smeared` with non-zero movement via + `GaussianPSF.eval_rect(..., movement=(0.5, 0.3))`. + - Test `find_position` edge cases: all pixels masked, too many pixels + masked, `num_sigma` pixel rejection. + - Test `find_position` with `detailed_logging=True` and verify log output + via `caplog`. + +4. **Add logging tests:** + - Test that optimizer failure emits a WARNING containing "did not succeed". + - Test that `detailed_logging=True` emits INFO-level messages. + +5. **Create `tests/conftest.py`** with shared fixtures: + - `default_psf` -> `GaussianPSF()` + - `symmetric_psf` -> `GaussianPSF(sigma=(1.0, 1.0))` + - `asymmetric_psf` -> `GaussianPSF(sigma=(2.0, 3.0))` + +6. **Parametrize repetitive tests:** + - `test_gaussian_1d`: parametrize over `(x, kwargs, expected)` tuples. + - `test_gaussian_2d`: parametrize over `(y, x, kwargs, expected)` tuples. + - `test_gaussian_eval_point` ValueError cases: parametrize over + `(sigma_init, call_kwargs)`. + - `test_gaussian_eval_pixel` ValueError cases: same approach. + - `test_bkgnd_gradient_coeffs` ValueError cases: parametrize over + `(shape, order)`. + +7. **Improve `test_gaussian_eval_rect`:** + - Assert output shape equals `rect_size`. + - Assert center pixel is the maximum. + - Assert all values are non-negative. + - Test with different sigma, scale, base, and offset values. + +8. **Remove duplicate assertion:** `test_gaussian_integral_1d` lines 102-104 + duplicate lines 90-92. + +9. **Resolve or document TODO comments:** Lines 349, 366-367, 384-385 in + `test_gaussian_find_position` have commented-out assertions with + `# TODO: Why?`. Investigate and either fix or add `@pytest.mark.xfail` with + an issue reference. + +10. **Update pytest configuration in `pyproject.toml`:** + - Add `testpaths = ["tests"]`. + - Add `"--strict-markers"` and `"--strict-config"` to `addopts`. + - Add `filterwarnings = ["error"]`. + - Add `markers = []`. + +11. **Coverage target:** Run the full test suite with + `pytest tests/ --cov=src --cov-report=term-missing` and ensure at least + 90% line coverage for `gaussian.py` and `psf.py`. `hst.py` may be excluded + from coverage if testing it requires TinyTim; add it to `[tool.coverage.run] + omit`. + +### Constraints + +- Do not modify files in `src/`. +- Do not remove or weaken existing assertions. +- All tests must pass with `pytest -n auto`. +- Add type annotations to all new test functions. +- Follow Google-style docstrings for new test functions. +- Use `pytest.approx` for floating-point comparisons. +- Use `npt.assert_array_almost_equal` for array comparisons. diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000..f2a4752 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,8 @@ +coverage: + status: + project: + default: + target: 90% + patch: + default: + target: 90% diff --git a/docs/characterize_gauss_fit.md b/docs/characterize_gauss_fit.md new file mode 100644 index 0000000..1616729 --- /dev/null +++ b/docs/characterize_gauss_fit.md @@ -0,0 +1,698 @@ +# `characterize_gauss_fit` -- PSF Fitting Characterization Tool + +## Overview + +`characterize_gauss_fit` is a standalone command-line program that +systematically measures the accuracy of the Gaussian PSF fitter across a +large, configurable parameter space. It generates synthetic PSF images with +known ground-truth parameters, fits them, and reports how closely the fitter +recovers position, sigma, angle, and scale. + +Eight focused *studies* each vary a small set of parameters while holding +others fixed. Every study produces: + +- **PNG plots** for visual inspection (heatmaps, line plots with error bands, + grouped bar charts). +- **`trials.csv`** -- one row per trial with all input parameters and all + result metrics, loadable by pandas or any data-analysis tool. +- **`summary.json`** -- aggregate statistics per parameter group plus the + exact configuration used, formatted for AI-assisted analysis. + +## Running from the Repository + +`characterize_gauss_fit` is not an installed command-line entry point. It must +be run directly from the repository. First clone the repository and install the +extra dependencies: + +```sh +git clone https://github.com/SETI/rms-psfmodel.git +cd rms-psfmodel +pip install -e ".[characterize]" +``` + +This adds `matplotlib` and `pyyaml` to your environment. All commands below +use `python -m characterize_gauss_fit` and must be run from the repository +root (or any directory where the package is importable). + +## Quick Start + +Run all studies with default settings: + +```sh +python -m characterize_gauss_fit +``` + +Run a quick smoke test across all studies using the bundled reduced-grid +configuration (completes in roughly 30--120 seconds): + +```sh +python -m characterize_gauss_fit --copy-test-config-to test_config.yaml +python -m characterize_gauss_fit --config test_config.yaml +``` + +Run a single study: + +```sh +python -m characterize_gauss_fit --study box_vs_sigma +``` + +Run with a custom override file and parallel workers: + +```sh +python -m characterize_gauss_fit --config my_config.yaml --num-workers 8 +``` + +List all available study names: + +```sh +python -m characterize_gauss_fit --list-studies +``` + +Copy the built-in default configuration to a local file for editing: + +```sh +python -m characterize_gauss_fit --copy-default-config-to my_config.yaml +``` + +Copy the built-in reduced-grid test configuration to a local file: + +```sh +python -m characterize_gauss_fit --copy-test-config-to test_config.yaml +``` + +Copy the built-in high-resolution configuration to a local file: + +```sh +python -m characterize_gauss_fit --copy-hires-config-to hires_config.yaml +``` + +## CLI Reference + +```text +usage: python -m characterize_gauss_fit [--config FILE] [--study NAME] [--output-dir DIR] + [--num-workers N] [--list-studies] + [--copy-default-config-to FILE] + [--copy-test-config-to FILE] + [--copy-hires-config-to FILE] [--verbose] + +Options: + --config FILE Path to a YAML override file merged onto + built-in defaults. + --study NAME Run only this study (repeatable). Default: all + enabled studies. Use --list-studies to see names. + --output-dir DIR Override the output directory from the config + file. + --num-workers N Number of parallel worker processes. Default: + resolved from config file (built-in default: 1). + 1 = sequential in the main process; >1 uses + concurrent.futures.ProcessPoolExecutor. + --list-studies Print available study names and exit. + --copy-default-config-to FILE + Write the built-in default configuration to FILE + and exit. No studies are run. + --copy-test-config-to FILE Write the built-in reduced-grid test + configuration to FILE and exit. No studies are + run. + --copy-hires-config-to FILE Write the built-in high-resolution configuration + to FILE and exit. No studies are run. + --verbose, -v Enable DEBUG-level logging. +``` + +Exit code is 0 on success. Exit code 1 if any study raises an unhandled +exception; individual trial failures (fitter non-convergence) are not fatal +and are recorded as data. + +## Configuration Reference + +All parameters have built-in defaults. You can override any subset by +providing a YAML file with `--config`. The file is deep-merged onto the +defaults: scalar values and lists replace the default; nested dicts are +merged recursively. + +### Top-level keys + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `output_dir` | string (path) | `./gauss_fit_results` | Root directory for all output files. | +| `num_workers` | int | `1` | Worker processes for parallel execution. | +| `noise_samples` | int | `50` | Default noise realisations for stochastic studies. | + +### `fitting` section + +Global defaults passed to `PSF.find_position`. Any study section may include +a `fitting` subsection to override these for that study only. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `bkgnd_degree` | int or null | `2` | Polynomial degree for background fitting. `null` = no background subtraction. | +| `bkgnd_ignore_center` | [int, int] | `[2, 2]` | Half-size of central region excluded from background fit (rows, cols). The excluded region is `(2*ny+1) x (2*nx+1)`. | +| `bkgnd_num_sigma` | float or null | `null` | Sigma-clipping threshold for background residuals. `null` = disabled. | +| `num_sigma` | float or null | `null` | Sigma-clipping threshold for PSF residuals (bad-pixel rejection). `null` = disabled. | +| `max_bad_frac` | float | `0.2` | Maximum fraction of pixels that can be masked before the fit is abandoned. | +| `allow_nonzero_base` | bool | `false` | Fit a constant base level in addition to the polynomial background. | +| `use_angular_params` | bool | `true` | Reparametrise fit variables as angles for bounded optimization. | +| `tolerance` | float | `1e-6` | Powell optimizer convergence tolerance. | +| `search_limit` | [float, float] | `[1.5, 1.5]` | Maximum allowed position offset from the starting point [y, x] in pixels. | +| `scale_limit` | float | `1000.0` | Maximum allowed PSF amplitude scale factor. | + +### `generation` section + +Controls the synthetic PSF image generator. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `scale` | float | `1.5` | PSF amplitude scale factor applied to the normalised Gaussian. | +| `base` | float | `0.0` | Additive base level on the clean PSF before background injection. | + +### `studies` section + +Each study has an `enabled` flag and its own parameter set. Per-study +`fitting` subsections override the global defaults for that study only. + +--- + +#### `box_vs_sigma` + +Study 1: How box size relative to PSF sigma affects fitting accuracy. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `box_sizes` | list[int] | `[5,7,9,11,13,17,21,25,31]` | Odd box sizes to test. Each must be >= 5. | +| `sigmas` | list[float] | `[0.3,0.5,0.8,1.0,1.5,2.0,3.0,5.0]` | Symmetric PSF sigma values (pixels). | +| `offsets` | list[[float, float]] | `[[0.0,0.0],[0.25,0.25],[0.5,0.0],[0.0,0.5],[0.5,0.5]]` | List of sub-pixel [y, x] offsets. Each entry produces a separate set of heatmap plots. | +| `angle` | float | `0.0` | PSF rotation angle (radians). | +| `scale` | float | `1.0` | PSF amplitude scale factor (overrides `generation.scale`). | +| `fitting` | dict | `{bkgnd_degree: null}` | Per-study fitting overrides. | + +--- + +#### `subpixel_offset` + +Study 2: How fractional pixel position introduces systematic bias. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `offset_steps` | int | `11` | Number of evenly-spaced steps in each axis. | +| `offset_range` | [float, float] | `[0.0, 0.5]` | Range of offset values. Values outside [0, 0.5] are redundant by symmetry. | +| `sigmas` | list[float] | `[0.5, 1.0, 2.0]` | Sigma values used as a panel variable. | +| `box_size` | int | `21` | Fixed box size. | +| `angle` | float | `0.0` | Fixed PSF angle. | +| `fitting` | dict | `{bkgnd_degree: null}` | Per-study fitting overrides. | + +--- + +#### `min_detectable_offset` + +Study 3: Minimum offset delta reliably recoverable vs. PSF sigma and noise. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `delta_offsets` | list[float] | `[0.001,...,0.5]` | Offset deltas to test (pixels). | +| `sigmas` | list[float] | `[0.3,...,3.0]` | Symmetric sigma values. | +| `box_size` | int | `21` | Fixed box size. | +| `noise_samples` | int | `50` | Noise realisations per stochastic condition. | +| `snr_values` | list[float] | `[50.0, 100.0, 500.0]` | SNR values (peak / noise RMS) to test. | +| `include_noiseless` | bool | `true` | Also run a noiseless trial (numerical precision floor). | +| `fitting` | dict | `{bkgnd_degree: null}` | Per-study fitting overrides. | + +--- + +#### `sigma_asymmetry_angle` + +Study 4: Sigma asymmetry and angle recovery for elongated, rotated PSFs. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `sigma_ratios` | list[float] | `[0.25,...,4.0]` | Ratios sigma_y / sigma_x. | +| `angle_steps` | int | `13` | Evenly-spaced angles from 0 to pi (inclusive). Must be >= 2. | +| `sigma_x_values` | list[float] | `[0.5, 1.0, 2.0]` | sigma_x values (panel variable). | +| `box_size` | int | `25` | Fixed box size. | +| `offset` | [float, float] | `[0.25, 0.25]` | Fixed sub-pixel offset. | +| `fitting` | dict | `{bkgnd_degree: null}` | Per-study fitting overrides. | + +--- + +#### `constraint_modes` + +Study 5: Effect of fixing vs. floating sigma/angle on all output metrics. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `sigma_error_fractions` | list[float] | `[0.0, 0.2, 0.5]` | Fractional errors applied to sigma when fixed. 0.0 = correct value; 0.2 = fixed at 1.2x true. | +| `angle_error_rad` | float | `0.3` | Absolute angle error (radians) when angle is fixed incorrectly. | +| `psf_shapes` | list[{sigma: [y,x], angle: rad}] | 3 shapes | PSF shapes to test. Each entry must have `sigma` (list of two floats) and `angle` (float in [0, pi]). | +| `box_size` | int | `21` | Fixed box size. | +| `offset` | [float, float] | `[0.25, 0.25]` | Fixed sub-pixel offset. | +| `scale` | float | `1.5` | PSF amplitude scale factor. | + +--- + +#### `background` + +Study 6: How injected background and fitting model choice interact. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `background_amplitudes` | list[float] | `[0.01, 0.1, 0.5]` | Background amplitude as fraction of PSF peak. | +| `bkgnd_degrees` | list[int] | `[0, 1, 2]` | Polynomial degrees to use when fitting the background. | +| `bkgnd_degrees_with_null` | bool | `true` | Also test `bkgnd_degree=null` (no fitting). | +| `bkgnd_ignore_centers` | list[[int,int]] | `[[1,1],[2,2],[4,4]]` | `bkgnd_ignore_center` values to test. | +| `background_types` | list[str] | `[none,constant,linear,quadratic,noisy_constant]` | Background types to inject. | +| `box_size` | int | `21` | Fixed box size. | +| `sigma` | [float, float] | `[1.0, 1.0]` | Fixed PSF sigma [y, x]. | +| `offsets` | list[[float, float]] | `[[0.0,0.0],[0.25,0.25],[0.5,0.5]]` | List of sub-pixel [y, x] offsets. Each entry produces a separate set of heatmap plots. | + +Valid `background_types` values: + +| Value | Description | +|-------|-------------| +| `none` | No background injected. | +| `constant` | Flat pedestal at `amplitude * PSF_peak`. | +| `linear` | Tilted plane (linear gradient). | +| `quadratic` | Bowl-shaped quadratic surface. | +| `noisy_constant` | Flat pedestal plus Gaussian noise at 0.5x the main noise level. | + +--- + +#### `noise_sensitivity` + +Study 7: Position, sigma, and scale accuracy as a function of SNR. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `snr_log_range` | [float, float] | `[0.5, 3.5]` | Log10 range for SNR (min, max). | +| `snr_steps` | int | `15` | Number of log-spaced SNR points. | +| `sigmas` | list[float] | `[0.5, 1.0, 2.0]` | Sigma values (panel variable). | +| `noise_samples` | int | `50` | Noise realisations per (SNR, sigma) point. | +| `box_size` | int | `21` | Fixed box size. | + +--- + +#### `hot_pixel_rejection` + +Study 8: Effectiveness of `num_sigma` bad-pixel rejection. + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| `enabled` | bool | `true` | Enable/disable this study. | +| `num_hot_pixels` | list[int] | `[0,1,3,5,10]` | Number of hot pixels to inject. | +| `num_sigma_values` | list[float] | `[3.0, 4.0, 5.0, 6.0]` | `num_sigma` rejection thresholds to test. | +| `num_sigma_with_null` | bool | `true` | Also test `num_sigma=null` (rejection disabled). | +| `hot_amplitudes` | list[float] | `[5.0, 20.0, 100.0]` | Hot pixel amplitude as multiple of PSF peak. | +| `noise_samples` | int | `20` | Noise realisations per combination (hot pixel positions randomised). | +| `snr` | float | `100.0` | Background Gaussian noise SNR (peak / noise RMS). | +| `box_size` | int | `21` | Fixed box size. | +| `sigma` | [float, float] | `[1.0, 1.0]` | Fixed PSF sigma [y, x]. | +| `offset` | [float, float] | `[0.25, 0.25]` | Fixed sub-pixel offset. | + +--- + +## Output Format Reference + +Each study writes its results to `{output_dir}/{study_name}/`. + +### `trials.csv` + +One row per trial. Columns: + +| Column | Description | +|--------|-------------| +| `study` | Study name string. | +| `box_size` | Subimage side length (pixels). | +| `sigma_y_true`, `sigma_x_true` | True PSF sigma values. | +| `angle_true` | True rotation angle (radians). | +| `offset_y_true`, `offset_x_true` | True sub-pixel offset. | +| `scale_true` | True amplitude scale factor. | +| `fit_sigma_y`, `fit_sigma_x` | Value sigma was fixed to (empty = floated). | +| `fit_angle` | Value angle was fixed to (empty = floated). | +| `background_type` | Injected background type string. | +| `background_amplitude` | Injected background amplitude (fraction of peak). | +| `noise_rms` | Additive Gaussian noise standard deviation. | +| `num_hot_pixels` | Number of hot pixels injected. | +| `hot_pixel_amplitude` | Hot pixel amplitude (multiple of PSF peak). | +| `bkgnd_degree` | Background fitting polynomial degree (empty = null). | +| `num_sigma` | Bad-pixel rejection threshold (empty = null). | +| `bkgnd_ignore_center_y`, `bkgnd_ignore_center_x` | Ignore-center half-sizes. | +| `converged` | `true` or `false`. | +| `pos_err_y`, `pos_err_x` | Signed position errors (fitted - true). | +| `pos_err` | Euclidean position error. | +| `sigma_y_fit`, `sigma_x_fit` | Fitted sigma values (empty if fixed). | +| `angle_fit` | Fitted angle (empty if fixed). | +| `scale_fit` | Fitted scale factor. | +| `sigma_y_err`, `sigma_x_err` | Relative sigma errors (fit - true) / true. | +| `angle_err` | Absolute angle error in radians. | +| `scale_err` | Relative scale error (fit - true) / true. | + +**Conventions:** +- Non-applicable fields are empty strings (e.g. `sigma_y_fit` when sigma was fixed). +- Failed / non-converged trials have `NaN` for all error fields. +- Load with `pandas.read_csv(..., na_values=['NaN', ''])`. + +> **Naming asymmetry warning.** Two pairs of columns have similar names but +> opposite roles: +> +> - `fit_sigma_y` / `fit_sigma_x` are **inputs** (the value sigma was +> *constrained to* before fitting; empty string when sigma was left to float). +> - `sigma_y_fit` / `sigma_x_fit` are **outputs** (the sigma value *returned +> by the fitter*; empty string when sigma was fixed and not fitted). +> +> In short: `fit_*` columns describe what you told the fitter; `*_fit` columns +> describe what the fitter found. +> +> Example (Python / pandas): +> +> ```python +> # Trials where sigma_y was constrained (input column non-empty): +> constrained = df[df['fit_sigma_y'].notna()] +> # Trials where sigma_y was floated and a fitted value was returned: +> fitted = df[df['sigma_y_fit'].notna()] +> ``` + +### `summary.json` + +```text +{ + "study": "box_vs_sigma", + "total_trials": 72, + "converged_trials": 68, + "convergence_rate": 0.944, + "overall": { ... aggregate stats ... }, + "groups": [ + { + "box_size": 5, + "sigma": 0.3, + "n_trials": 1, + "n_converged": 1, + "convergence_rate": 1.0, + "pos_err_mean": 0.00123, + "pos_err_std": null, + "sigma_y_err_mean": 0.005, + "scale_err_mean": 0.002, + "angle_err_mean": null + } + ], + "config_used": { ... full config dict ... } +} +``` + +`null` in JSON corresponds to `None` in Python (not enough data to compute, +or metric not applicable). The `config_used` field contains the exact +configuration that produced these results for full reproducibility. + +### AI / automated analysis + +To load all studies for analysis: + +```python +import json +import pathlib +import pandas as pd + +results_dir = pathlib.Path('./gauss_fit_results') +dfs = [] +for csv_file in results_dir.glob('*/trials.csv'): + dfs.append(pd.read_csv(csv_file, na_values=['NaN', ''])) +all_results = pd.concat(dfs, ignore_index=True) +``` + +--- + +## Study Descriptions + +### Study 1: Box Size vs. Sigma (`box_vs_sigma`) + +**Question:** How large must the subimage be relative to the PSF width for +accurate fitting? + +Sweeps box size and PSF sigma on a 2-D grid. The PSF always fills the entire +image (`eval_rect` size == `box_size`). Background is disabled so only the +intrinsic truncation effect is measured. Sigma is left to float. + +**Key plots:** Heatmaps of log10(position error), log10(sigma error), and +log10(scale error) as functions of box size (rows) and sigma (columns). Cells +where the fitter did not converge are shown in grey. + +**Interpretation:** Expect a sharp accuracy cliff when +`box_size < 4 * sigma + 1`. Small sigmas are well-fitted even in tiny boxes; +large sigmas in small boxes truncate most of the PSF flux. + +--- + +### Study 2: Subpixel Offset (`subpixel_offset`) + +**Question:** Does the fractional pixel position of the PSF centre introduce +systematic bias? + +Sweeps offset_y and offset_x from 0 to 0.5 pixels in a grid. The range +[0, 0.5] is sufficient by symmetry. Three sigma panel values are tested. + +**Key plots:** 2-D heatmap of position error vs. (offset_y, offset_x) for +each sigma. Line plot of error vs. offset_x at fixed offset_y. + +**Interpretation:** The fitter may exhibit a small systematic oscillation at +the pixel-period scale due to aliasing. Very small sigmas show larger absolute +error because the PSF peak is narrower than a pixel. + +--- + +### Study 3: Minimum Detectable Offset (`min_detectable_offset`) + +**Question:** How small an offset delta can the fitter reliably recover as a +function of PSF size and noise? + +Tests a log-spaced grid of offset deltas, applied purely in the X direction, +for each (sigma, SNR) combination. Recovery fraction is defined as the fraction +of trials where `|pos_err| < delta/2` (within 50% of the true offset). + +**Key plots:** Log-log line plot of mean position error vs. delta for each +sigma (one panel per SNR). Recovery fraction heatmap (sigma vs. delta, one +plot per SNR level). + +**Interpretation:** The precision floor in the noiseless case reveals +numerical resolution limits. Noise raises the floor to approximately +`sigma / SNR`. Recovery fraction drops below 0.5 near the precision floor. + +--- + +### Study 4: Sigma Asymmetry and Angle (`sigma_asymmetry_angle`) + +**Question:** How well are elongated, rotated PSFs recovered? + +Sweeps sigma_ratio (sigma_y / sigma_x) and rotation angle for several sigma_x +values. All parameters float. For circular PSFs (ratio ~= 1.0), angle is +degenerate and angle error is not meaningful -- those cells show NaN. + +**Key plots:** Heatmaps of position error, angle error, and sigma_y error as +functions of (ratio, angle), one panel per sigma_x. + +**Interpretation:** Near-circular PSFs (ratio near 1) have degenerate angle; +the fitter can converge to any angle without affecting position accuracy. +Highly elongated PSFs at edge-case angles (0, pi/2, pi) may have increased +error due to the optimizer landscape. + +--- + +### Study 5: Constraint Modes (`constraint_modes`) + +**Question:** How does fixing vs. floating sigma and angle affect position, +scale, sigma, and angle accuracy? + +Tests eight constraint configurations on three PSF shapes. Reports all four +accuracy metrics. + +**Key plots:** 4-panel grouped bar chart: position error, relative scale +error, relative sigma_y error, and absolute angle error, grouped by PSF shape. + +**Interpretation:** Correctly fixing sigma reduces fitting degrees of freedom +and generally improves position accuracy at the cost of sigma recovery +information. Incorrectly fixed sigma can bias all metrics. Floating angle on +circular PSFs wastes degrees of freedom but rarely hurts position accuracy. + +--- + +### Study 6: Background Conditions (`background`) + +**Question:** How do injected background and fitting model choice interact? + +Combines five background types with four fitting-degree options and three +ignore-center sizes. + +**Key plots:** Heatmap matrix -- rows = injected background type, +columns = fitting degree. One heatmap per (amplitude, ignore-center) +combination. + +**Interpretation:** Fitting a constant background (degree 0) is generally +sufficient for constant injected backgrounds. Higher-degree backgrounds require +matching or higher fitting degrees. Using `bkgnd_degree=null` with any +non-zero background will show degraded accuracy. + +--- + +### Study 7: Noise Sensitivity (`noise_sensitivity`) + +**Question:** At what SNR does fitting accuracy degrade? + +Sweeps a log-spaced SNR range with multiple noise realisations per point. +Random offsets are used so position error reflects typical (not best-case) +accuracy. + +**Key plots:** Line plots with shaded ±1 std bands: position error, sigma_y +error, sigma_x error, and scale error vs. SNR (log scale). One line per sigma +panel value. + +**Interpretation:** All metrics improve roughly as 1/SNR in the noise-limited +regime. The saturation at high SNR reveals the floor set by optimizer +precision and pixel-integration discretisation. + +--- + +### Study 8: Hot Pixel Rejection (`hot_pixel_rejection`) + +**Question:** How effectively does `num_sigma` rejection handle hot pixels? + +Varies the number of hot pixels (0 to 10), their amplitude (5x to 100x peak), +and the `num_sigma` rejection threshold. Multiple noise realisations randomise +hot pixel positions. + +**Key plots:** Line plot per amplitude panel: X = number of hot pixels, +Y = mean position error, lines = `num_sigma` settings. + +**Interpretation:** `num_sigma=null` shows the baseline degradation from +unrejected hot pixels. Aggressive rejection (low `num_sigma`) can mask real +PSF pixels near the core. `num_sigma=3` is typically a good balance. + +--- + +## Bundled Configuration Files + +Three YAML configurations ship alongside the package source. Each can be +copied to a local file for editing with the corresponding `--copy-*-to` +option. + +### Default configuration (`defaults.yaml`) + +The primary reference configuration. All parameters are set to sensible +defaults that provide a thorough survey of each study's parameter space. +Output is written to `./gauss_fit_results/`. + +```sh +python -m characterize_gauss_fit --copy-default-config-to my_config.yaml +``` + +### Reduced-grid test configuration (`test_config.yaml`) + +Runs all eight studies with the smallest viable parameter grids so the +entire suite completes in roughly 30--120 seconds on a single core. Use it +to verify that all code paths execute after code changes. + +```sh +python -m characterize_gauss_fit --copy-test-config-to test_config.yaml +python -m characterize_gauss_fit --config test_config.yaml +``` + +Output is written to `./gauss_fit_test/` by default. + +| Study | Grid size | Approx. trials | +|-------|-----------|----------------| +| `box_vs_sigma` | 2 box sizes x 3 sigmas x 2 offsets | 12 | +| `subpixel_offset` | 3 x 3 offset grid, 1 sigma | 9 | +| `min_detectable_offset` | 3 deltas x 2 sigmas x (1 noiseless + 1 SNR) x 3 samples | ~24 | +| `sigma_asymmetry_angle` | 3 ratios x 3 angles x 1 sigma_x | 9 | +| `constraint_modes` | 5 modes x 2 PSF shapes x 2 sigma-error fractions | ~20 | +| `background` | 2 background types x 2 fitting degrees x 2 offsets | 8 | +| `noise_sensitivity` | 4 SNR points x 1 sigma x 3 samples | 12 | +| `hot_pixel_rejection` | 2 num_hot x 2 num_sigma x 1 amplitude x 3 samples | 12 | + +### High-resolution configuration (`hires_config.yaml`) + +Runs all eight studies with denser parameter grids and larger +`noise_samples` counts compared with the defaults, while staying within the +same parameter ranges. The goal is smoother heatmaps, less noisy line plots, +and more nuanced detail at intermediate parameter values. Estimated runtime +is 10--30x longer than the default configuration; use `--num-workers` to +parallelise across CPU cores. + +```sh +python -m characterize_gauss_fit --copy-hires-config-to hires_config.yaml +python -m characterize_gauss_fit --config hires_config.yaml --num-workers 8 +``` + +Output is written to `./gauss_fit_hires/` by default. + +| Study | Denser axes | Key changes vs. defaults | +|-------|-------------|--------------------------| +| `box_vs_sigma` | 12 box sizes, 13 sigmas | adds 15, 19, 41 px boxes; intermediate sigma values | +| `subpixel_offset` | 21 x 21 offset grid, 4 sigmas | 0.025 px step; adds sigma=1.5 | +| `min_detectable_offset` | 11 deltas, 4 SNR conditions, 200 samples | adds SNR=20 condition | +| `sigma_asymmetry_angle` | 10 ratios, 25 angle steps | 7.5 deg angular resolution | +| `constraint_modes` | 6 sigma-error fractions, 4 PSF shapes | adds 0.1, 0.3, 0.75 fractions | +| `background` | 7 amplitudes, degree=3, 4 ignore_center sizes | finer amplitude sweep | +| `noise_sensitivity` | 25 SNR points, 4 sigmas, 200 samples | adds sigma=1.5; 4x more samples | +| `hot_pixel_rejection` | 8 hot-pixel counts, 6 amplitudes, 4 thresholds, 50 samples | fills gaps in all axes | + +### Obtaining any bundled file + +All three copy commands write the file and exit immediately — no studies are +run. The destination path must not already exist. + +```sh +python -m characterize_gauss_fit --copy-default-config-to my_config.yaml +python -m characterize_gauss_fit --copy-test-config-to test_config.yaml +python -m characterize_gauss_fit --copy-hires-config-to hires_config.yaml +``` + +All parameters in any bundled config can be further overridden by combining +it with a second user config file or with CLI flags. For example, to run +only study 1 with the test grid: + +```sh +python -m characterize_gauss_fit --config test_config.yaml --study box_vs_sigma +``` + +--- + +## Example User Override File + +To override a subset of parameters on top of the defaults, create a YAML +file containing only the keys you want to change. The following example +runs two studies with custom grids: + +```yaml +output_dir: ./my_results + +studies: + box_vs_sigma: + box_sizes: [7, 11, 21] + sigmas: [0.5, 1.0, 2.0, 3.0] + + noise_sensitivity: + snr_steps: 10 + noise_samples: 20 + sigmas: [0.5, 1.0, 2.0] + + subpixel_offset: + enabled: false + min_detectable_offset: + enabled: false + sigma_asymmetry_angle: + enabled: false + constraint_modes: + enabled: false + background: + enabled: false + hot_pixel_rejection: + enabled: false +``` + +Run with: + +```sh +python -m characterize_gauss_fit --config my_overrides.yaml +``` diff --git a/docs/code_of_conduct.md b/docs/code_of_conduct.md new file mode 100644 index 0000000..8597151 --- /dev/null +++ b/docs/code_of_conduct.md @@ -0,0 +1,6 @@ +# Contributor Covenant Code of Conduct + +```{include} ../CODE_OF_CONDUCT.md +:relative-images: +:start-after: "# Contributor Covenant Code of Conduct" +``` diff --git a/docs/conf.py b/docs/conf.py index b59e2a8..26759cd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,32 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + # Configuration file for the Sphinx documentation builder. -# -# For the full list of built-in configuration values, see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +# -- Path setup -------------------------------------------------------------- +import importlib.metadata import os import sys +import warnings +sys.path.insert(0, os.path.abspath('../src')) -sys.path.insert(0, os.path.abspath('..')) +# Verify the source path exists +if not os.path.exists(os.path.abspath('../src')): + warnings.warn("Source directory '../src' not found. API documentation may be incomplete.") + +# -- Project information ----------------------------------------------------- -project = 'psfmodel' -copyright = '2025, PDS Ring-Moon Systems Node' -author = 'PDS Ring-Moon Systems Node' +project = 'rms-psfmodel' +copyright = '2026, SETI Institute' +author = 'SETI Institute' + +# The full version, including alpha/beta/rc tags +try: + release = importlib.metadata.version('rms-psfmodel') +except importlib.metadata.PackageNotFoundError: + release = '1.0.0' # fallback for development # -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ['myst_parser', 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode'] +# Add any Sphinx extension module names here, as strings +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.viewcode', + 'sphinx.ext.napoleon', + 'sphinx.ext.intersphinx', + 'sphinxcontrib.mermaid', + 'myst_parser', +] +# Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +# CONTRIBUTING.md is split in contributing.rst; the tail fragment starts at +# "## ..." so MyST reports a false-positive heading-level warning. +suppress_warnings = ['myst.header'] + +# The suffix(es) of source filenames. +source_suffix = ['.rst', '.md'] # -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output +# The theme to use for HTML and HTML Help pages. html_theme = 'sphinx_rtd_theme' -html_static_path = ['_static'] + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ['_static'] + +add_module_names = False +autodoc_typehints_format = "short" + +# -- Extension configuration ------------------------------------------------- + +# Napoleon settings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = False +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = False +napoleon_use_admonition_for_notes = False +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_preprocess_types = False +napoleon_type_aliases = None +napoleon_attr_annotations = True + +# Intersphinx settings +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), + 'numpy': ('https://numpy.org/doc/stable/', None), + 'matplotlib': ('https://matplotlib.org/stable/', None), +} + +# MyST-Parser settings +myst_enable_extensions = [ + "colon_fence", + "deflist", +] + +# Mermaid settings — use client-side rendering so no mmdc binary is required +# in CI or on ReadTheDocs. +mermaid_output_format = 'raw' diff --git a/docs/contributing.rst b/docs/contributing.rst new file mode 100644 index 0000000..8525ba4 --- /dev/null +++ b/docs/contributing.rst @@ -0,0 +1,13 @@ +============ +Contributing +============ + +.. include:: ../CONTRIBUTING.md + :parser: myst_parser.sphinx_ + :end-before: See [CODE_OF_CONDUCT.md] + +See the :doc:`Code of Conduct `. + +.. include:: ../CONTRIBUTING.md + :parser: myst_parser.sphinx_ + :start-after: (CODE_OF_CONDUCT.md). diff --git a/docs/index.rst b/docs/index.rst index 33328b1..847c73f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,21 +1,22 @@ -.. solar documentation master file, created by - sphinx-quickstart on Fri May 24 12:58:54 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. +.. rms-psfmodel documentation master file -Welcome to ``psfmodel``'s documentation! -======================================== +Welcome to the Documentation for rms-psfmodel! +============================================== .. include:: ../README.md :parser: myst_parser.sphinx_ - :start-after: forks/SETI/rms-psfmodel) + :start-after: .. toctree:: :maxdepth: 2 :caption: Contents: + psf_gui + characterize_gauss_fit + contributing + code_of_conduct module - + performance_report/index Indices and tables ================== diff --git a/docs/make.bat b/docs/make.bat index 32bb245..b4f380c 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,35 +1,35 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=. -set BUILDDIR=_build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd \ No newline at end of file diff --git a/docs/module.rst b/docs/module.rst index 8348633..15a1933 100644 --- a/docs/module.rst +++ b/docs/module.rst @@ -7,4 +7,4 @@ :undoc-members: :special-members: :show-inheritance: - :exclude-members: __dict__, __hash__, __module__, __weakref__, __enter__, __exit__, __annotations__, __abstractmethods__ + :exclude-members: __dict__, __hash__, __module__, __weakref__, __annotations__, __abstractmethods__ diff --git a/docs/performance_report/Makefile b/docs/performance_report/Makefile new file mode 100644 index 0000000..3a8b4a2 --- /dev/null +++ b/docs/performance_report/Makefile @@ -0,0 +1,27 @@ +# Minimal Makefile for the standalone performance report. +# +# Usage (from this directory): +# make html -- build HTML output in _build/html +# make clean -- remove the _build directory +# make livehtml -- auto-rebuild on file changes (requires sphinx-autobuild) + +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +.PHONY: help html clean livehtml + +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) + +html: + $(SPHINXBUILD) -b html "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) + @echo + @echo "Build finished. Open $(BUILDDIR)/html/index.html to view." + +clean: + rm -rf $(BUILDDIR) + +livehtml: + sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) diff --git a/docs/performance_report/conf.py b/docs/performance_report/conf.py new file mode 100644 index 0000000..a99fb8a --- /dev/null +++ b/docs/performance_report/conf.py @@ -0,0 +1,12 @@ +"""Sphinx configuration for the standalone PSF fitter performance report.""" + +project = "rms-psfmodel Performance Report" +copyright = "2026, SETI Institute" +author = "SETI Institute" +release = "" + +extensions = [] + +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +html_theme = "sphinx_rtd_theme" diff --git a/docs/performance_report/images/background_pos_err_amp3_ic1_oy0p25_ox0p25.png b/docs/performance_report/images/background_pos_err_amp3_ic1_oy0p25_ox0p25.png new file mode 100644 index 0000000..c87d4d6 Binary files /dev/null and b/docs/performance_report/images/background_pos_err_amp3_ic1_oy0p25_ox0p25.png differ diff --git a/docs/performance_report/images/box_vs_sigma_pos_err_oy0p25_ox0p25.png b/docs/performance_report/images/box_vs_sigma_pos_err_oy0p25_ox0p25.png new file mode 100644 index 0000000..3a68685 Binary files /dev/null and b/docs/performance_report/images/box_vs_sigma_pos_err_oy0p25_ox0p25.png differ diff --git a/docs/performance_report/images/constraint_modes_summary.png b/docs/performance_report/images/constraint_modes_summary.png new file mode 100644 index 0000000..e1824a2 Binary files /dev/null and b/docs/performance_report/images/constraint_modes_summary.png differ diff --git a/docs/performance_report/images/hot_pixel_rejection_pos_err_hotamp2.png b/docs/performance_report/images/hot_pixel_rejection_pos_err_hotamp2.png new file mode 100644 index 0000000..a394dbb Binary files /dev/null and b/docs/performance_report/images/hot_pixel_rejection_pos_err_hotamp2.png differ diff --git a/docs/performance_report/images/min_detectable_offset_pos_err_noiseless.png b/docs/performance_report/images/min_detectable_offset_pos_err_noiseless.png new file mode 100644 index 0000000..7f01f30 Binary files /dev/null and b/docs/performance_report/images/min_detectable_offset_pos_err_noiseless.png differ diff --git a/docs/performance_report/images/min_detectable_offset_recovery_snr_100.png b/docs/performance_report/images/min_detectable_offset_recovery_snr_100.png new file mode 100644 index 0000000..3c6b393 Binary files /dev/null and b/docs/performance_report/images/min_detectable_offset_recovery_snr_100.png differ diff --git a/docs/performance_report/images/noise_sensitivity_pos_err_vs_snr.png b/docs/performance_report/images/noise_sensitivity_pos_err_vs_snr.png new file mode 100644 index 0000000..6d6393c Binary files /dev/null and b/docs/performance_report/images/noise_sensitivity_pos_err_vs_snr.png differ diff --git a/docs/performance_report/images/sigma_asymmetry_angle_angle_err_sx1.0.png b/docs/performance_report/images/sigma_asymmetry_angle_angle_err_sx1.0.png new file mode 100644 index 0000000..92f7609 Binary files /dev/null and b/docs/performance_report/images/sigma_asymmetry_angle_angle_err_sx1.0.png differ diff --git a/docs/performance_report/images/sigma_asymmetry_angle_pos_err_sx1.0.png b/docs/performance_report/images/sigma_asymmetry_angle_pos_err_sx1.0.png new file mode 100644 index 0000000..cfa7523 Binary files /dev/null and b/docs/performance_report/images/sigma_asymmetry_angle_pos_err_sx1.0.png differ diff --git a/docs/performance_report/images/subpixel_offset_pos_err_sigma1.0.png b/docs/performance_report/images/subpixel_offset_pos_err_sigma1.0.png new file mode 100644 index 0000000..a92dbe3 Binary files /dev/null and b/docs/performance_report/images/subpixel_offset_pos_err_sigma1.0.png differ diff --git a/docs/performance_report/index.rst b/docs/performance_report/index.rst new file mode 100644 index 0000000..0baf5d3 --- /dev/null +++ b/docs/performance_report/index.rst @@ -0,0 +1,1094 @@ +.. _performance-report: + +=========================================================================== +Gaussian PSF Fitter Performance Report -- High-Resolution Characterization +=========================================================================== + +.. contents:: Table of Contents + :depth: 3 + :local: + +Introduction +============ + +This report presents a comprehensive characterization of the ``rms-psfmodel`` +Gaussian PSF fitting library, evaluating its accuracy across a broad parameter +space. The results are produced by the ``characterize_gauss_fit`` tool using +the **high-resolution configuration** (``hires_config.yaml``), which employs +denser parameter grids and larger noise-sample counts than the default +configuration to produce smoother statistics and finer detail at intermediate +parameter values. + +Purpose and Scope +----------------- + +The ``rms-psfmodel`` library fits 2-D elliptical Gaussian point-spread +functions (PSFs) to sub-images extracted from astronomical detector data. +The fitter recovers the sub-pixel position, PSF width (sigma), orientation +angle, and amplitude scale of the source. This report quantifies how +accurately the fitter recovers these parameters under controlled conditions +by comparing fitted values to known ground truth. + +Eight focused studies each vary a small set of parameters while holding +others fixed, probing different aspects of fitter performance: + +.. list-table:: Study Overview + :header-rows: 1 + :widths: 5 30 15 10 10 + + * - # + - Study + - Question Addressed + - Trials + - Convergence + * - 1 + - Box Size vs. Sigma + - How large must the sub-image be relative to the PSF width? + - 780 + - 100% + * - 2 + - Subpixel Offset + - Does fractional pixel position introduce systematic bias? + - 1,764 + - 100% + * - 3 + - Minimum Detectable Offset + - What is the smallest recoverable positional shift? + - 61,677 + - 100% + * - 4 + - Sigma Asymmetry and Angle + - How well are elongated, rotated PSFs recovered? + - 750 + - 100% + * - 5 + - Constraint Modes + - How does fixing vs. floating sigma/angle affect accuracy? + - 44 + - 100% + * - 6 + - Background Conditions + - How do background models interact with fitting accuracy? + - 2,100 + - 100% + * - 7 + - Noise Sensitivity + - At what SNR does fitting accuracy degrade? + - 20,000 + - 100% + * - 8 + - Hot Pixel Rejection + - How effectively does sigma-clipping reject bad pixels? + - 14,400 + - 83.3% + +**Total trials: 101,515.** All studies achieve 100% convergence except +hot pixel rejection, where aggressive sigma-clipping (``num_sigma=3``) +causes systematic convergence failure. + +Methodology +----------- + +Each trial follows the same protocol: + +1. **Generate** a synthetic PSF image using ``GaussianPSF.eval_rect()`` + with known sigma, angle, sub-pixel offset, and scale. The image is a + pixel-integrated 2-D Gaussian on a square grid of side ``box_size``. + +2. **Corrupt** the image with optional additive backgrounds (constant, + linear, quadratic, or noisy), Gaussian detector noise, and/or hot + pixels. + +3. **Fit** the corrupted image using ``GaussianPSF.find_position()``, the + same production API that downstream users call. The fitter uses + Powell's method to minimize the squared residual between the model and + the data, with optional polynomial background subtraction and + sigma-clipping for outlier rejection. + +4. **Compute errors** by comparing fitted parameters to the injected + ground truth: + + - **Position error** (Euclidean): ``sqrt((fit_y - true_y)^2 + + (fit_x - true_x)^2)`` + - **Sigma error** (relative): ``(sigma_fit - sigma_true) / sigma_true`` + - **Scale error** (relative): ``(scale_fit - scale_true) / scale_true`` + - **Angle error** (absolute, radians): ``|angle_fit - angle_true|`` + wrapped to ``[-pi/2, pi/2]`` + +Error Metric Definitions +^^^^^^^^^^^^^^^^^^^^^^^^ + +Throughout this report, **position error** is the Euclidean norm of the +signed Y and X position residuals, measured in pixels. **Sigma error** and +**scale error** are dimensionless relative errors (a value of 0.01 means 1% +error). **Angle error** is an absolute difference in radians, reduced +modulo 90 degrees due to the pi-symmetry of elliptical Gaussians. + +For stochastic studies (noise, hot pixels, minimum detectable offset), each +parameter combination is repeated with 50--200 independent noise +realizations. Statistics are reported as mean +/- 1 standard deviation over +these realizations. + +Configuration Summary +^^^^^^^^^^^^^^^^^^^^^ + +The high-resolution configuration uses: + +- **200 noise samples** per stochastic condition (vs. 50 in the default) +- **12 box sizes** from 5 to 41 pixels +- **13 sigma values** from 0.3 to 5.0 pixels +- **21 x 21 subpixel offset grid** (0.025 px resolution) +- **25 SNR points** from 3.2 to 3162 (log-spaced) +- **25 angle steps** (7.5 degree resolution) + +The fitter uses ``tolerance=1e-6``, ``use_angular_params=true``, and +``search_limit=[1.5, 1.5]`` pixels throughout. + + +Study 1: Box Size vs. PSF Sigma +================================ + +**Question:** How large must the fitting sub-image be relative to the PSF +width for accurate position, sigma, and scale recovery? + +Box-Sigma Parameters +-------------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 50 + + * - Parameter + - Values + * - Box sizes + - 5, 7, 9, 11, 13, 15, 17, 19, 21, 25, 31, 41 pixels + * - Sigma (symmetric) + - 0.3, 0.4, 0.5, 0.6, 0.8, 1.0, 1.25, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0 px + * - Sub-pixel offsets + - (0,0), (0.25,0.25), (0.5,0), (0,0.5), (0.5,0.5) + * - Background/noise + - None (clean pixel integrals only) + +The study sweeps a 12 x 13 grid of box sizes and sigma values at five +sub-pixel offset positions, producing 780 noiseless trials. Background +subtraction is disabled (``bkgnd_degree=null``) so that only the intrinsic +truncation effect is measured. + +Box-Sigma Results +----------------- + +All 780 trials converge. The position error heatmap reveals a sharp +transition between catastrophic and excellent fits that depends on the +ratio of box size to PSF sigma: + +.. figure:: images/box_vs_sigma_pos_err_oy0p25_ox0p25.png + :width: 100% + :alt: Heatmap of log10(position error) vs box size and sigma + + Position error (Euclidean, log10 scale) as a function of box size + (rows) and PSF sigma (columns) at offset (+0.25, +0.25). Dark cells + indicate sub-pixel precision; bright/yellow cells indicate catastrophic + failure. + +.. list-table:: Position Error Summary by Regime + :header-rows: 1 + :widths: 30 25 25 + + * - Regime + - Typical Position Error + - Sigma/Scale Error + * - box >= 4*sigma + 1 (adequate) + - 1e-7 -- 1e-14 pixels + - < 1e-6 (relative) + * - box < 4*sigma + 1 (truncated) + - 0.1 -- 2.1 pixels + - 2x -- 48x (relative) + +Box-Sigma Findings +^^^^^^^^^^^^^^^^^^ + +1. **Adequate box size threshold:** When ``box_size >= 4 * sigma + 1``, + the fitter achieves near-machine-precision accuracy. For a sigma=1.0 + PSF, a 5-pixel box suffices; for sigma=5.0, a 21-pixel box is needed. + +2. **Sharp cliff:** The transition from good to bad is abrupt. For + sigma=0.5, the fit jumps from ``pos_err ~ 1e-10`` at box=7 to + ``pos_err ~ 2.0`` pixels at box=5. There is no graceful degradation. + +3. **Sigma recovery is worst:** When the box is too small, sigma errors + reach 15x--32x (the fitter dramatically overestimates sigma to + compensate for the truncated wings), and scale errors reach 48x. + +4. **Offset sensitivity at the boundary:** At box=5 with sigma=0.3--0.5, + the offset (0,0) case produces much smaller position errors than + (0.25,0.25) or (0.5,0.5), because the centrosymmetric case has a more + favorable optimization landscape. + +**Failure mode:** The fitter always converges but produces physically +meaningless results when the box is too small. Users must ensure adequate +box size for their PSF width. + + +Study 2: Subpixel Offset Bias +============================== + +**Question:** Does the fractional pixel position of the PSF center +introduce systematic position bias? + +Offset Parameters +----------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 50 + + * - Parameter + - Values + * - Offset grid + - 21 x 21 points, 0.0 to 0.5 px in each axis (0.025 px steps) + * - Sigma values + - 0.5, 1.0, 1.5, 2.0 px + * - Box size + - 21 px (fixed) + +Offset Results +-------------- + +All 1,764 trials converge with **negligible** position error across the +entire offset range: + +.. list-table:: Subpixel Offset Error Summary + :header-rows: 1 + :widths: 15 20 20 20 + + * - Sigma + - Mean pos_err + - Max pos_err + - Scale error (max) + * - 0.5 + - 2.7e-7 px + - 5.0e-7 px + - 6.8e-7 + * - 1.0 + - 1.6e-7 px + - 4.0e-7 px + - 2.0e-7 + * - 1.5 + - 1.3e-7 px + - 3.5e-7 px + - 4.5e-8 + * - 2.0 + - 1.1e-7 px + - 3.0e-7 px + - 1.8e-8 + +.. figure:: images/subpixel_offset_pos_err_sigma1.0.png + :width: 100% + :alt: Heatmap of position error vs subpixel offset for sigma=1.0 + + Position error (Euclidean, log10 scale) as a function of subpixel + offset (Y and X) for sigma=1.0. The entire grid is at the 1e-7 to + 1e-14 level with no systematic pattern. + +Offset Findings +^^^^^^^^^^^^^^^ + +1. **No subpixel bias:** The fitter shows no systematic position error as + a function of fractional pixel position. All errors are at the + optimizer convergence floor (~1e-7 pixels). + +2. **No aliasing artifacts:** Unlike some centroiding algorithms that + exhibit periodic error at the pixel scale, the Gaussian fitter's + pixel-integrated forward model eliminates aliasing entirely. + +3. **Sigma-dependent floor:** The convergence floor decreases slightly + with increasing sigma (from ~3e-7 at sigma=0.5 to ~1e-7 at sigma=2.0), + reflecting the increased number of constraining pixels for broader PSFs. + +**This is excellent performance.** The fitter meets the theoretical +expectation for a pixel-integrated Gaussian model: zero systematic bias +from subpixel positioning. + + +Study 3: Minimum Detectable Offset +==================================== + +**Question:** What is the smallest positional shift that the fitter can +reliably recover, as a function of PSF sigma and noise level? + +Min-Offset Parameters +--------------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 50 + + * - Parameter + - Values + * - Offset deltas + - 0.001, 0.002, 0.004, 0.007, 0.01, 0.02, 0.04, 0.07, 0.1, 0.2, 0.5 px + * - Sigma values + - 0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 3.0 px + * - SNR conditions + - Noiseless, 20, 50, 100, 500 + * - Noise samples + - 200 per noisy condition + +All 61,677 trials converge. The offset is applied in the X direction only, +with Y fixed at zero. + +Noiseless Precision Floor +------------------------- + +.. figure:: images/min_detectable_offset_pos_err_noiseless.png + :width: 100% + :alt: Noiseless position error vs injected offset delta + + Noiseless position error (Euclidean) vs. injected X offset for each + sigma value. The error curves show the numerical precision floor of + the optimizer. + +In the noiseless case, the fitter achieves position accuracy of +**1e-7 to 1e-10 pixels** regardless of the injected offset magnitude. +Smaller sigmas (0.3, 0.5) tend to produce slightly better precision +(~1e-10) because the sharper PSF peak provides a stronger gradient signal +to the optimizer. Larger sigmas (2.0, 3.0) plateau at ~1e-7. + +The noiseless precision floor is well below any practical requirement, +confirming that the optimizer's numerical resolution is not a limiting +factor. + +Noisy Recovery +-------------- + +.. figure:: images/min_detectable_offset_recovery_snr_100.png + :width: 100% + :alt: Recovery fraction heatmap at SNR=100 + + Recovery fraction (pos_err < delta/2) at SNR=100. Rows are sigma + values; columns are injected offsets. Full recovery (1.0) appears only + at the largest offsets and smallest sigmas. + +With noise, the minimum recoverable offset is set by the noise floor: + +.. list-table:: Approximate 50% Recovery Threshold (offset in pixels) + :header-rows: 1 + :widths: 15 15 15 15 15 + + * - Sigma + - SNR=20 + - SNR=50 + - SNR=100 + - SNR=500 + * - 0.3 + - > 0.5 + - 0.2 + - 0.04 + - 0.007 + * - 0.5 + - > 0.5 + - 0.2 + - 0.07 + - 0.01 + * - 1.0 + - > 0.5 + - > 0.5 + - 0.2 + - 0.04 + * - 2.0 + - > 0.5 + - > 0.5 + - > 0.5 + - 0.2 + * - 3.0 + - > 0.5 + - > 0.5 + - > 0.5 + - > 0.5 + +Min-Offset Findings +^^^^^^^^^^^^^^^^^^^ + +1. **Noise floor dominates:** The practical precision limit is approximately + ``sigma / SNR`` pixels, consistent with the Cramer-Rao lower bound for + Gaussian centroiding. + +2. **Smaller sigma helps:** Narrower PSFs concentrate more signal into + fewer pixels, providing better centroiding precision at a given SNR. + At SNR=100, sigma=0.3 achieves 50% recovery at delta=0.04 px, while + sigma=2.0 cannot reach 50% recovery until delta > 0.5 px. + +3. **Large sigma penalty:** For sigma >= 2.0, the fitter struggles to + recover offsets below 0.2 pixels even at SNR=500. **This is a + significant limitation** for applications requiring sub-0.1-pixel + precision with broad PSFs. + +4. **Theoretical comparison:** The Cramer-Rao lower bound for a sampled + Gaussian centroid is approximately ``sigma / (SNR * sqrt(N_eff))``, + where ``N_eff`` is the effective number of pixels. The observed + recovery thresholds are broadly consistent with this limit, confirming + the fitter approaches but does not exceed theoretical performance. + + +Study 4: Sigma Asymmetry and Angle Recovery +============================================ + +**Question:** How well does the fitter recover the parameters of +elongated, rotated PSFs? + +Asymmetry Parameters +-------------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 50 + + * - Parameter + - Values + * - Sigma ratios (sigma_y/sigma_x) + - 0.2, 0.33, 0.5, 0.67, 0.75, 1.0, 1.5, 2.0, 3.0, 5.0 + * - Angles + - 25 steps from 0 to 180 degrees (7.5 degree resolution) + * - Sigma_x values + - 0.5, 1.0, 2.0 px + * - Box size + - 25 px + +All 750 trials converge (noiseless). + +Asymmetry Results +----------------- + +.. figure:: images/sigma_asymmetry_angle_pos_err_sx1.0.png + :width: 100% + :alt: Position error heatmap for asymmetric PSFs at sigma_x=1.0 + + Position error (Euclidean, log10) as a function of sigma ratio (rows) + and angle (columns) for sigma_x=1.0. Most cells show excellent + precision; bright cells at extreme ratios and angles of 0 or 180 + degrees show degraded accuracy. + +.. figure:: images/sigma_asymmetry_angle_angle_err_sx1.0.png + :width: 100% + :alt: Angle error heatmap for sigma_x=1.0 + + Angle error (degrees, mod 90) for sigma_x=1.0. The ratio=1.0 row + (circular PSF) is greyed out because angle is degenerate. Near-circular + ratios (0.75, 1.5) show the largest angle errors. + +.. list-table:: Sigma Asymmetry Summary (sigma_x=1.0) + :header-rows: 1 + :widths: 15 20 20 20 + + * - Ratio Range + - Typical pos_err + - Typical angle_err + - Notes + * - 0.2 (extreme) + - 1e-2 -- 3e-2 px + - < 0.01 degrees + - Poor position, good angle + * - 0.5 -- 0.75 + - 1e-6 -- 1e-8 px + - < 0.01 degrees + - Excellent overall + * - 1.0 (circular) + - 1e-7 px + - N/A (degenerate) + - Angle undefined + * - 1.5 -- 2.0 + - 1e-6 -- 1e-8 px + - < 0.01 degrees + - Excellent overall + * - 5.0 (extreme) + - 1e-3 -- 1e-5 px + - < 0.1 degrees + - Modest degradation + +Asymmetry Findings +^^^^^^^^^^^^^^^^^^ + +1. **Moderate asymmetry is handled well:** For sigma ratios between 0.5 + and 2.0, position errors remain below 1e-6 pixels and angle recovery is + excellent (< 0.01 degrees). + +2. **Extreme asymmetry degrades position:** At ratio=0.2 (sigma_y = 0.2 + pixels when sigma_x = 1.0), position errors rise to 0.01--0.03 pixels. + **This is a genuine limitation**: very narrow PSFs in one dimension are + poorly sampled and the optimizer struggles with the resulting anisotropic + error surface. + +3. **Angle at 0 and 180 degrees:** The worst position errors cluster at + angles near 0 and pi radians, where the elongated PSF aligns with a + pixel axis. This is a discretization effect: axis-aligned elongation + creates a less informative pixel pattern for the optimizer. + +4. **Near-circular angle degeneracy:** Ratios near 1.0 (0.75, 1.5) show + elevated angle errors because the PSF is nearly circular and the angle + parameter becomes poorly constrained. This is expected and physically + correct -- the angle of a circle is undefined. + +5. **Angle recovery breaks at sigma_x=0.5:** When the reference sigma is + only 0.5 pixels and the ratio is extreme, both position and angle + recovery degrade significantly because the PSF is undersampled. + + +Study 5: Constraint Modes +========================== + +**Question:** How does fixing vs. floating the sigma and angle parameters +affect fitting accuracy? + +Constraint Parameters +--------------------- + +Eleven constraint configurations are tested on four PSF shapes: + +.. list-table:: PSF Shapes Tested + :header-rows: 1 + :widths: 10 15 15 15 + + * - Shape + - Sigma (Y, X) + - Angle + - Description + * - S1 + - (1.0, 1.0) + - 0 degrees + - Circular + * - S2 + - (0.5, 1.5) + - 45 degrees + - Elongated, tilted + * - S3 + - (1.0, 2.0) + - 60 degrees + - Moderately elongated + * - S4 + - (0.7, 1.4) + - 30 degrees + - Mildly elongated + +Constraint modes include: all parameters floating, sigma fixed at correct +value, sigma fixed with 10%--75% error, angle fixed (correct and incorrect), +and all parameters fixed. Six sigma-error fractions are tested: 0%, 10%, +20%, 30%, 50%, and 75%. + +Constraint Results +------------------ + +.. figure:: images/constraint_modes_summary.png + :width: 100% + :alt: Constraint modes summary bar chart + + Six-panel summary of fitting accuracy across constraint modes and PSF + shapes. Top row: position error (Euclidean, Y, X). Bottom row: scale + error, sigma_y error, angle error. + +.. list-table:: Position Error by Constraint Strategy (mean across shapes) + :header-rows: 1 + :widths: 40 20 20 + + * - Constraint Mode + - Mean pos_err (px) + - Mean abs(scale_err) + * - Sigma fixed (correct) + angle fixed (correct) + - 2.5e-4 + - 0.004 + * - Sigma fixed (correct), angle floated + - 2.6e-4 + - 0.004 + * - All float + - 2.5e-4 + - 0.027 + * - Sigma fixed (10% error) + - 3.8e-4 + - 0.075 + * - Sigma fixed (20% error) + - 4.0e-4 + - 0.143 + * - Sigma fixed (50% error) + - 5.4e-4 + - 0.310 + * - Sigma fixed (75% error) + - 6.3e-4 + - 0.437 + +Constraint Findings +^^^^^^^^^^^^^^^^^^^ + +1. **Fixing correct sigma provides marginal position improvement:** + Compared to floating sigma, fixing it at the correct value barely + improves position accuracy (2.5e-4 vs. 2.5e-4 px). The primary + benefit is in scale accuracy. + +2. **Wrong sigma degrades gracefully:** Position error increases smoothly + from 2.5e-4 to 6.3e-4 pixels as sigma error grows from 0% to 75%. + Position is relatively robust to sigma mismatch. + +3. **Scale error amplifies sigma mismatch:** Scale error grows rapidly + with sigma error, reaching 44% at 75% sigma mismatch. **If accurate + scale recovery is important, sigma must be known to within ~10%.** + +4. **Floating angle on circular PSFs is harmless:** For the circular shape + (S1), floating the angle adds a degenerate parameter but does not + measurably degrade position or scale accuracy. + +5. **Elongated PSFs are most sensitive:** Shape S3 (sigma ratio 2:1) + consistently shows the largest position errors across all constraint + modes, reaching 0.003 pixels with 75% sigma error. + + +Study 6: Background Conditions +================================ + +**Question:** How do injected background levels and polynomial fitting +degree interact to affect position accuracy? + +Background Parameters +--------------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 50 + + * - Parameter + - Values + * - Background types + - none, constant, linear, quadratic, noisy_constant + * - Background amplitudes + - 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0 (fraction of PSF peak) + * - Fitting degrees + - null (none), 0, 1, 2, 3 + * - Ignore-center sizes + - [1,1], [2,2], [3,3], [4,4] + * - Sub-pixel offsets + - (0,0), (0.25,0.25), (0.5,0.5) + +All 2,100 trials converge, but accuracy varies enormously. + +Background Results +------------------ + +.. figure:: images/background_pos_err_amp3_ic1_oy0p25_ox0p25.png + :width: 100% + :alt: Background study heatmap at amplitude=0.1, ignore=2x2 + + Position error (Euclidean, log10) for amplitude=0.1x peak (``amp3``, + 0-based index 3 into the amplitude list), ignore-center=2x2 (``ic1``, + 0-based index 1 into the ignore-center list), offset=(0.25,0.25). + Rows: injected background type. + Columns: fitting polynomial degree. The ``null`` column (no background + subtraction) shows catastrophic errors for constant and noisy_constant + backgrounds. + +.. list-table:: Background Fitting -- When It Works and When It Fails + :header-rows: 1 + :widths: 20 15 15 15 15 + + * - Background Type + - null + - degree 0 + - degree 1 + - degree >= 2 + * - none + - 1e-7 px + - 4e-6 px + - 8e-5 px + - 2e-5 px + * - constant + - **0.15 px** (FAIL) + - 4e-6 px + - 8e-5 px + - 2e-5 px + * - linear + - **0.03 px** (FAIL) + - 0.03 px + - 8e-5 px + - 2e-5 px + * - quadratic + - 9e-4 px + - 1e-3 px + - 1e-3 px + - 2e-5 px + * - noisy_constant + - **0.15 px** (FAIL) + - 4e-6 px + - 8e-5 px + - 5e-4 px + +(Representative values at amplitude=0.1, ignore-center=2x2, +offset=0.25,0.25.) + +Background Findings +^^^^^^^^^^^^^^^^^^^ + +1. **Matching degree is essential:** The fitting polynomial degree must + be >= the injected background's polynomial order. Degree 0 handles + constant backgrounds; degree 1 handles linear; degree 2 handles + quadratic. + +2. **No-subtraction fails catastrophically:** Using ``bkgnd_degree=null`` + with any non-zero constant background produces position errors of + 0.1--0.3 pixels at amplitude=0.1x peak, and worse at higher amplitudes. + **This is the most common user error.** + +3. **Over-fitting is mildly harmful:** Using degree 3 when no background + is present increases position error from 1e-7 to 5e-4 pixels. Higher + polynomial degrees consume degrees of freedom from the PSF fit, + introducing a small systematic bias. + +4. **Ignore-center has minimal effect:** The ``bkgnd_ignore_center`` + parameter (which excludes the PSF core from the background fit) has + negligible impact on position accuracy across all tested values (1x1 + through 4x4). + +5. **Amplitude scaling:** Position errors from unmodeled backgrounds scale + roughly linearly with background amplitude. At amplitude=1.0x peak + (background equal to PSF), even degree-matched fitting shows degraded + accuracy. + +6. **Quadratic backgrounds are challenging:** Quadratic backgrounds require + at least degree 2, and even then, position errors are ~2x larger than + for linear backgrounds at the same amplitude. + + +Study 7: Noise Sensitivity +============================ + +**Question:** How does fitting accuracy degrade as a function of +signal-to-noise ratio? + +Noise Parameters +---------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 50 + + * - Parameter + - Values + * - SNR range + - 3.2 to 3162 (25 log-spaced points) + * - Sigma values + - 0.5, 1.0, 1.5, 2.0 px + * - Noise samples + - 200 per (SNR, sigma) point + * - Offsets + - Random uniform in [-0.5, +0.5] per trial + +All 20,000 trials converge. + +Noise Results +------------- + +.. figure:: images/noise_sensitivity_pos_err_vs_snr.png + :width: 100% + :alt: Position error vs SNR + + Position error (Euclidean) vs. SNR for four sigma values. Shaded + bands show +/- 1 standard deviation across 200 noise realizations. + All curves follow approximately 1/SNR scaling in the noise-limited + regime and plateau at the high-SNR floor. + +.. list-table:: Noise Sensitivity -- Key Thresholds + :header-rows: 1 + :widths: 15 20 20 25 + + * - Sigma + - High-SNR floor + - SNR for 0.1 px error + - SNR for 0.01 px error + * - 0.5 + - ~0.001 px + - ~10 + - ~100 + * - 1.0 + - ~0.002 px + - ~15 + - ~200 + * - 1.5 + - ~0.005 px + - ~20 + - ~500 + * - 2.0 + - ~0.009 px + - ~25 + - ~1000 + +Noise Findings +^^^^^^^^^^^^^^ + +1. **1/SNR scaling confirmed:** All sigma values show position error + decreasing as approximately 1/SNR in the noise-limited regime (SNR + < 100--1000 depending on sigma). Regression slopes range from 0.88 + to 1.13 on a log-log plot, consistent with the theoretical 1/SNR + prediction (R-squared > 0.97 for all curves). + +2. **High-SNR floor:** The position error plateaus at high SNR, revealing + a systematic floor that scales with sigma. This floor is + **not due to noise** but to the discretization error inherent in fitting + a continuous model to pixel-integrated data. The floor for sigma=0.5 is + ~0.001 pixels; for sigma=2.0, it is ~0.009 pixels. + +3. **Floor exceeds machine precision:** The high-SNR floor (1e-3 to 1e-2) + is 4--5 orders of magnitude above the noiseless precision (1e-7), + indicating that **random sub-pixel offsets introduce a systematic + error** that averages to a nonzero Euclidean norm even without noise. + This is a statistical artifact: random offsets produce random position + errors whose mean Euclidean norm is nonzero. + +4. **Low-SNR saturation:** Below SNR ~10, position errors saturate near + 1.5--1.8 pixels, limited by the ``search_limit`` parameter (1.5 pixels). + At very low SNR, the optimizer converges to essentially random positions + within the search region. + +5. **Sigma=0.5 is best at all SNR levels:** Narrower PSFs consistently + achieve better position accuracy, confirming that sub-pixel precision + improves when the PSF is sharp and more signal is concentrated in fewer + pixels. + +**Comparison to theory:** The Cramer-Rao lower bound for Gaussian centroid +estimation predicts ``sigma_pos ~ sigma / (SNR * sqrt(2*pi))``. For +sigma=1.0 and SNR=100, this gives ~0.004 pixels, which matches the +observed mean position error of ~0.01 pixels to within a factor of ~2. +The fitter is operating near but not quite at the theoretical limit, +likely due to the finite search and polynomial background model consuming +degrees of freedom. + + +Study 8: Hot Pixel Rejection +============================== + +**Question:** How effectively does the ``num_sigma`` bad-pixel rejection +mechanism handle hot pixels? + +Hot-Pixel Parameters +-------------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 50 + + * - Parameter + - Values + * - Hot pixel counts + - 0, 1, 2, 3, 5, 7, 10, 15 + * - Hot pixel amplitudes + - 2x, 5x, 10x, 20x, 50x, 100x PSF peak + * - num_sigma thresholds + - null (disabled), 3.0, 4.0, 5.0, 6.0, 8.0 + * - Noise samples + - 50 per condition (SNR=100) + +This is the **only study with convergence failures**: 2,401 of 14,400 +trials (16.7%) fail to converge, all associated with ``num_sigma=3.0``. + +Hot-Pixel Results +----------------- + +.. figure:: images/hot_pixel_rejection_pos_err_hotamp2.png + :width: 100% + :alt: Hot pixel rejection position error at amplitude=10x + + Position error vs. number of hot pixels at amplitude=10x peak + (``hotamp2``, 0-based index 2 into the amplitude list [2x, 5x, 10x, 20x, 50x, 100x]). + ``num_sigma=3`` (orange, missing in many panels) causes 100% + convergence failure. ``num_sigma=5`` (red) provides the best + balance of rejection and accuracy. + +.. list-table:: Convergence Rate by num_sigma + :header-rows: 1 + :widths: 20 20 40 + + * - num_sigma + - Convergence Rate + - Notes + * - null (disabled) + - 100% + - No rejection; hot pixels corrupt the fit + * - 3.0 + - **0%** + - Masks too many pixels, including PSF core + * - 4.0 + - 100% + - Good rejection; slight over-masking + * - 5.0 + - 100% + - Best balance of rejection and accuracy + * - 6.0 + - 100% + - Under-rejects at high hot-pixel counts + * - 8.0 + - 100% + - Minimal rejection; poor at many hot pixels + +Hot-Pixel Findings +^^^^^^^^^^^^^^^^^^ + +1. **num_sigma=3 is catastrophic:** A threshold of 3 sigma rejects too + many valid PSF pixels near the core, causing **100% convergence + failure** -- even with zero hot pixels. **This value should never + be used with the current implementation.** + +2. **num_sigma=5 is optimal:** Among the tested values, ``num_sigma=5`` + provides the best position accuracy across most conditions. At 10 hot + pixels with 10x amplitude, it achieves ~0.09 px error vs. ~0.12 px + for ``num_sigma=4``. + +3. **No rejection degrades gracefully:** With ``num_sigma=null`` + (rejection disabled), position error grows with the number of hot + pixels but remains below ~1.0 px for up to 10 hot pixels at moderate + amplitudes (5x--10x). At high amplitudes (50x--100x), errors become + severe. + +4. **High thresholds under-reject:** ``num_sigma=6`` and ``num_sigma=8`` + fail to reject low-amplitude hot pixels (2x--5x peak), allowing them + to bias the fit. At 15 hot pixels, errors for ``num_sigma=8`` can + exceed 1.0 pixels. + +5. **Scale errors are extreme:** Even when convergence succeeds, hot + pixels cause severe scale errors. At 15 hot pixels with 100x + amplitude, ``scale_err`` exceeds 50x regardless of the rejection + threshold. Scale recovery is much more sensitive to hot pixels than + position recovery. + +**Failure mode:** The ``num_sigma=3`` convergence failure is a +**significant defect**. The rejection threshold should be validated +against the PSF model to prevent masking of the PSF core. A minimum +effective threshold of 4.0 should be enforced or documented. + + +Summary and Recommendations +============================ + +Overall Performance Assessment +------------------------------ + +The ``rms-psfmodel`` Gaussian PSF fitter demonstrates **excellent +performance** under favorable conditions and **predictable degradation** +under adverse conditions. The key performance characteristics are: + +.. list-table:: Performance Summary + :header-rows: 1 + :widths: 30 20 30 + + * - Metric + - Best Case + - Limiting Factor + * - Position accuracy (noiseless) + - ~1e-14 pixels + - Machine precision + * - Position accuracy (SNR=100) + - ~0.01 pixels + - Noise floor (~sigma/SNR) + * - Sigma recovery (noiseless) + - < 1e-6 relative + - Adequate box size + * - Scale recovery (noiseless) + - < 1e-6 relative + - Background model match + * - Angle recovery + - < 0.001 degrees + - Asymmetry ratio > 1.5:1 + * - Convergence rate + - 100% + - num_sigma >= 4 + +Where It Meets Expectations +--------------------------- + +1. **Subpixel accuracy:** The fitter achieves machine-precision position + recovery in the noiseless, well-configured case. No subpixel bias is + present. + +2. **Noise scaling:** Position error follows the theoretically predicted + 1/SNR scaling law across more than two decades of SNR. + +3. **Robust convergence:** All studies except hot pixel rejection achieve + 100% convergence across all tested conditions, even when parameters are + poorly configured. + +4. **Moderate asymmetry:** Sigma ratios between 0.5 and 2.0 are handled + with negligible accuracy loss. + +Where It Falls Short +-------------------- + +1. **num_sigma=3 causes total convergence failure.** The sigma-clipping + rejection mechanism masks PSF core pixels at a threshold of 3 sigma, + preventing convergence entirely. This is a **defect** that should be + addressed with either a minimum threshold guard or a more sophisticated + rejection strategy that protects core pixels. + +2. **Broad PSFs have poor sub-pixel precision.** For sigma >= 2.0 pixels, + the minimum recoverable offset at SNR=100 is ~0.5 pixels -- essentially + no sub-pixel capability. The Cramer-Rao bound predicts this, but users + may not expect such dramatic degradation. + +3. **Catastrophic fits from undersized boxes are silent.** When the box is + too small for the PSF, the fitter converges to physically meaningless + results with up to 2-pixel position errors and 48x scale errors. No + warning is issued. A box-size validation check would prevent this. + +4. **Background model mismatch is not flagged.** Using + ``bkgnd_degree=null`` with a constant background present produces + 0.15-pixel position errors. The fitter gives no indication that + background subtraction is needed. + +5. **Extreme asymmetry at small sigma degrades position.** Sigma ratios + below 0.33 or above 3.0 with sigma_x=0.5 produce position errors of + 0.01--0.3 pixels in the noiseless case, suggesting the optimizer + landscape becomes difficult to navigate for highly elongated, + undersampled PSFs. + +6. **Hot pixels corrupt scale recovery.** Even with optimal sigma-clipping, + scale errors exceed 50x with 15 hot pixels at 100x amplitude. Position + recovery is more robust but still degrades to ~0.1 pixels. + +7. **Over-fitting background adds bias.** Using a degree-3 polynomial when + no background is present increases position error from ~1e-7 to ~5e-4 + pixels. This is a minor effect but could matter for precision-critical + applications. + +Practical Recommendations +------------------------- + +Based on these findings, the following guidelines will maximize fitting +accuracy: + +1. **Box size:** Use ``box_size >= 4 * sigma + 1`` pixels. When in doubt, + use a larger box. + +2. **Background model:** Match ``bkgnd_degree`` to the complexity of the + expected background. Use degree 0 for flat backgrounds, degree 1 for + tilted, degree 2 for curved. Avoid ``null`` unless you are certain no + background is present. Do not over-fit with higher degrees than needed. + +3. **Hot pixel rejection:** Use ``num_sigma=5`` as the default rejection + threshold. **Never use num_sigma=3**, which causes total convergence + failure. + +4. **SNR requirements:** For 0.01-pixel position accuracy, SNR should + exceed 100 / sigma. For sigma=1.0, this means SNR > 100; for + sigma=2.0, SNR > 200. + +5. **Asymmetric PSFs:** Sigma ratios between 0.5 and 2.0 require no + special handling. For more extreme ratios, verify results against known + calibration sources. + +6. **Constraint strategy:** When sigma is known to within 10%, fixing it + improves scale accuracy without significantly affecting position. When + sigma is uncertain by more than 20%, float it. + +Reproducibility +--------------- + +These results were produced with the following command:: + + characterize_gauss_fit --config src/characterize_gauss_fit/hires_config.yaml --num-workers 32 + +The complete configuration, per-trial data, and generated plots are +archived in the ``gauss_fit_hires/`` directory. Each study's +``summary.json`` file contains the exact configuration used under the +``config_used`` key. diff --git a/docs/psf_gui.md b/docs/psf_gui.md new file mode 100644 index 0000000..bca468a --- /dev/null +++ b/docs/psf_gui.md @@ -0,0 +1,61 @@ +# `psf_gui` -- Interactive PSF Explorer + +## Overview + +`psf_gui` is an optional interactive desktop application for visually exploring +Gaussian PSF models and fitting results. It is built with Python's standard +Tkinter library and ships as part of the `rms-psfmodel` package. + +The GUI lets you interactively adjust PSF parameters (sigma, angle, offset, scale, +background) and immediately see the resulting pixel-integrated image alongside +the fitting residuals. It is intended as a diagnostic and educational tool rather +than a batch analysis tool; for systematic characterization use +`characterize_gauss_fit` instead. + +## Prerequisites + +`psf_gui` requires a working Tcl/Tk installation. On most systems this is +provided by the `python3-tk` package or equivalent: + +| Platform | Command | +|----------|---------| +| Debian / Ubuntu | `sudo apt install python3-tk` | +| Fedora / RHEL | `sudo dnf install python3-tkinter` | +| macOS (Homebrew Python) | Tk is bundled; no extra install needed | +| macOS (python.org installer) | Tk is bundled; no extra install needed | +| Windows | Tk is bundled with the official Python installer | + +Verify your installation by running: + +```sh +python -c "import tkinter; tkinter._test()" +``` + +A small test window should appear. + +## Running + +After installing `rms-psfmodel`, start the GUI with: + +```sh +psf_gui +``` + +Or, if the `src` directory is on `PYTHONPATH` (development mode): + +```sh +python -m psf_gui +``` + +## Features + +- **Real-time PSF rendering** -- adjusting any slider immediately re-renders + the pixel-integrated Gaussian patch. +- **Background subtraction** -- polynomial background fitting with configurable + degree and ignore-center region. +- **Noise simulation** -- add Gaussian noise to the synthetic PSF image and + observe the effect on fitting accuracy. +- **Residual display** -- view the difference between the fitted model and the + data after position optimization. +- **Parameter readout** -- fitted position, sigma, angle, and scale are + displayed numerically alongside their true injected values. diff --git a/programs/psf_gui.py b/programs/psf_gui.py deleted file mode 100755 index f1666c9..0000000 --- a/programs/psf_gui.py +++ /dev/null @@ -1,214 +0,0 @@ -from pathlib import Path -import sys - -import numpy as np -from tkinter import * - - -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -from psfmodel.gaussian import GaussianPSF -# from psfmodel.hst import HSTPSF - - -display_size = 64 - -PSF_TYPE = 'gaussian' -# PSF_TYPE = 'acshrc' -# PSF_TYPE = 'wfc3uvis' -# PSF_TYPE = 'wfpc2pc1' - -psfobj = None - -def command_refresh_psf(val): - global psfobj - - if (psfobj is None or - ((PSF_TYPE in ('acshrc', 'wfpc2pc1', 'wfc3uvis') and - psfobj.subsample != var_subsample.get()))): - if PSF_TYPE == 'gaussian': - print((var_motiony.get(), var_motionx.get())) - psfobj = GaussianPSF() - elif PSF_TYPE == 'acshrc': - psfobj = HSTPSF('ACS', 'HRC', 'F660N', 512, 512, - subsample=var_subsample.get()*2+1, - movement=(var_motiony.get(), var_motionx.get())) - elif PSF_TYPE == 'wfc3uvis': - psfobj = HSTPSF('WFC3', 'UVIS', 'F606W', 128, 128, - subsample=var_subsample.get()*2+1, - movement=(var_motiony.get(), var_motionx.get()), - aperture='UVIS2-C512C-SUB') - elif PSF_TYPE == 'wfpc2pc1': - psfobj = HSTPSF('WFPC2', 'PC1', 'F606W', 128, 128, - subsample=var_subsample.get()*2+1, - movement=(var_motiony.get(), var_motionx.get()), - aperture='UVIS2-C512C-SUB') - - psf = psfobj.eval_rect(((var_psf_ysize.get()//2)*2+1, - (var_psf_xsize.get()//2)*2+1), - (var_y.get(), var_x.get()), - movement=(var_motiony.get(), var_motionx.get()), - sigma=(var_sigmay.get(), var_sigmax.get()), - angle=np.radians(var_angle.get())) - print('PSF SUM', np.sum(psf)) - psf = psf**.5 #np.log10(psf+1e-10) - -# bkgnd = astrofit.background_gradient((var_psf_xsize.get(), var_psf_ysize.get()), -# var_bkgnd_bias.get(), var_bkgnd_scale.get(), var_bkgnd_angle.get()) -# psf += bkgnd - - pix_scale = canvas_size // display_size - ctr_x = canvas_size // 2 - ctr_y = canvas_size // 2 - min_val = 0 # np.min(psf) - max_val = np.max(psf) - canvas.delete('rect') - for y in range(psf.shape[0]): - for x in range(psf.shape[1]): - val = int(max((psf[y, x]-min_val) / (max_val-min_val) * 255, 0)) - color = '#%02x%02x%02x' % (val, val, val) - canvas.create_rectangle(((x-psf.shape[1]//2)*pix_scale+ctr_x, - (y-psf.shape[0]//2)*pix_scale+ctr_y, - (x-psf.shape[1]//2+1)*pix_scale+ctr_x, - (y-psf.shape[0]//2+1)*pix_scale+ctr_y), - outline=color, fill=color, tags='rect') - -if __name__ == "__main__": - toplevel_frame = Frame() - canvas_size = 512 - canvas = Canvas(toplevel_frame, width=canvas_size, height=canvas_size, bg='black', - cursor='crosshair') - canvas.grid(row=0, column=0, sticky=NW) - - # Control sliders - control_frame = Frame(toplevel_frame) - - var_x = DoubleVar() - var_x.set(0.) - var_y = DoubleVar() - var_y.set(0.) - var_sigmax = DoubleVar() - var_sigmax.set(2.) - var_sigmay = DoubleVar() - var_sigmay.set(2.) - var_angle = DoubleVar() - var_angle.set(0.) - var_psf_xsize = IntVar() - var_psf_xsize.set(21) - var_psf_ysize = IntVar() - var_psf_ysize.set(21) - var_subsample = IntVar() - var_subsample.set(0) - var_motionx = DoubleVar() - var_motionx.set(0.) - var_motiony = DoubleVar() - var_motiony.set(0.) - var_bkgnd_bias = DoubleVar() - var_bkgnd_bias.set(0.) - var_bkgnd_scale = DoubleVar() - var_bkgnd_scale.set(0.) - var_bkgnd_angle = DoubleVar() - var_bkgnd_angle.set(0.) - - gridrow = 0 - - label = Label(control_frame, text='X') - label.grid(row=gridrow, column=0, sticky=W) - scale_x = Scale(control_frame, orient=HORIZONTAL, from_=-5., to=5., resolution=0.01, - variable=var_x, command=command_refresh_psf) - scale_x.grid(row=gridrow, column=1) - gridrow += 1 - - label = Label(control_frame, text='Y') - label.grid(row=gridrow, column=0, sticky=W) - scale_y = Scale(control_frame, orient=HORIZONTAL, from_=-5., to=5., resolution=0.01, - variable=var_y, command=command_refresh_psf) - scale_y.grid(row=gridrow, column=1) - gridrow += 1 - - if PSF_TYPE == 'gaussian': - label = Label(control_frame, text='SIGMA X') - label.grid(row=gridrow, column=0, sticky=W) - scale_sigmax = Scale(control_frame, orient=HORIZONTAL, from_=0.001, to=5., resolution=0.001, - variable=var_sigmax, command=command_refresh_psf) - scale_sigmax.grid(row=gridrow, column=1) - gridrow += 1 - - label = Label(control_frame, text='SIGMA Y') - label.grid(row=gridrow, column=0, sticky=W) - scale_sigmay = Scale(control_frame, orient=HORIZONTAL, from_=0.001, to=5., resolution=0.001, - variable=var_sigmay, command=command_refresh_psf) - scale_sigmay.grid(row=gridrow, column=1) - gridrow += 1 - - label = Label(control_frame, text='ANGLE') - label.grid(row=gridrow, column=0, sticky=W) - scale_sigmay = Scale(control_frame, orient=HORIZONTAL, from_=0., to=180, resolution=1, - variable=var_angle, command=command_refresh_psf) - scale_sigmay.grid(row=gridrow, column=1) - gridrow += 1 - - # label = Label(control_frame, text='BKGND BIAS') - # label.grid(row=gridrow, column=0, sticky=W) - # scale_bkgnd_bias = Scale(control_frame, orient=HORIZONTAL, from_=0., to=1, resolution=0.001, - # variable=var_bkgnd_bias, command=command_refresh_psf) - # scale_bkgnd_bias.grid(row=gridrow, column=1) - # gridrow += 1 - # - # label = Label(control_frame, text='BKGND SCALE') - # label.grid(row=gridrow, column=0, sticky=W) - # scale_bkgnd_scale = Scale(control_frame, orient=HORIZONTAL, from_=0., to=.05, resolution=0.001, - # variable=var_bkgnd_scale, command=command_refresh_psf) - # scale_bkgnd_scale.grid(row=gridrow, column=1) - # gridrow += 1 - # - # label = Label(control_frame, text='BKGND ANGLE') - # label.grid(row=gridrow, column=0, sticky=W) - # scale_bkgnd_angle = Scale(control_frame, orient=HORIZONTAL, from_=0., to=360., resolution=1., - # variable=var_bkgnd_angle, command=command_refresh_psf) - # scale_bkgnd_angle.grid(row=gridrow, column=1) - # gridrow += 1 - - gridrow = 0 - - label = Label(control_frame, text='PSF X SIZE') - label.grid(row=gridrow, column=2, sticky=W) - scale_psf_xsize = Scale(control_frame, orient=HORIZONTAL, from_=1, to=101., resolution=1, - variable=var_psf_xsize, command=command_refresh_psf) - scale_psf_xsize.grid(row=gridrow, column=3) - gridrow += 1 - - label = Label(control_frame, text='PSF Y SIZE') - label.grid(row=gridrow, column=2, sticky=W) - scale_psf_ysize = Scale(control_frame, orient=HORIZONTAL, from_=1, to=101., resolution=1, - variable=var_psf_ysize, command=command_refresh_psf) - scale_psf_ysize.grid(row=gridrow, column=3) - gridrow += 1 - - label = Label(control_frame, text='SUBSAMPLE (*2+1)') - label.grid(row=gridrow, column=2, sticky=W) - scale_subsample = Scale(control_frame, orient=HORIZONTAL, from_=0, to=4., resolution=1, - variable=var_subsample, command=command_refresh_psf) - scale_subsample.grid(row=gridrow, column=3) - gridrow += 1 - - label = Label(control_frame, text='MOTION X') - label.grid(row=gridrow, column=2, sticky=W) - scale_motionx = Scale(control_frame, orient=HORIZONTAL, from_=-10., to=10., resolution=.1, - variable=var_motionx, command=command_refresh_psf) - scale_motionx.grid(row=gridrow, column=3) - gridrow += 1 - - label = Label(control_frame, text='MOTION Y') - label.grid(row=gridrow, column=2, sticky=W) - scale_motiony = Scale(control_frame, orient=HORIZONTAL, from_=-10., to=10., resolution=.1, - variable=var_motiony, command=command_refresh_psf) - scale_motiony.grid(row=gridrow, column=3) - gridrow += 1 - - control_frame.grid(row=1, column=0, sticky=NW) - toplevel_frame.pack() - - command_refresh_psf(0) - - mainloop() diff --git a/psfmodel/__init__.py b/psfmodel/__init__.py deleted file mode 100644 index 737cc75..0000000 --- a/psfmodel/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .psf import PSF -from .gaussian import GaussianPSF - -__all__ = ['PSF', 'GaussianPSF'] diff --git a/psfmodel/psf.py b/psfmodel/psf.py deleted file mode 100755 index 34be8a7..0000000 --- a/psfmodel/psf.py +++ /dev/null @@ -1,883 +0,0 @@ -################################################################################ -# psfmodel/__init__.py -################################################################################ - -from abc import ABC, abstractmethod -from typing import Any, Optional, cast - -import numpy as np -import numpy.ma as ma -import numpy.typing as npt -import scipy.linalg as linalg -import scipy.optimize as sciopt - -# Version -try: - from ._version import __version__ -except ImportError: # pragma: no cover - __version__ = 'Version unspecified' - - -class PSF(ABC): - """Abstract superclass for classes that model different types of PSFs.""" - - def __init__(self, - **kwargs: Any) -> None: - """Create a PSF object. Only called by subclasses.""" - - self._debug_opt = 0 - self._additional_params: list[Any] = [] - - @abstractmethod - def eval_point(self, - coord: (tuple[float | npt.NDArray[np.floating], - float | npt.NDArray[np.floating]] | - npt.NDArray[np.floating]), - *, - scale: float = 1., - base: float = 0.) -> float | npt.NDArray[np.floating]: - """Evaluate the PSF at a single, fractional, point. - - (0, 0) is the center of the PSF and x and y may be negative. - - Parameters: - coord: The coordinate (y, x) at which to evaluate the PSF. - scale: A scale factor to apply to the resulting PSF. - base: A scalar added to the resulting PSF. - - Other parameters may be available for specific subclasses. - - Returns: - The PSF value at the given coordinate. - """ - ... # pragma: no cover - - # @abstractmethod - # def eval_pixel(self, - # coord: list[int] | tuple[int, int], - # offset: list[float] | tuple[float, float] = (0., 0.), - # *, - # scale: float = 1., - # base: float = 0., - # **kwargs: Any) -> float: - # """Evaluate the PSF integrated over an entire integer pixel. - - # The returned array has the PSF offset from the center by (offset_y,offset_x). An - # offset of (0, 0) places the PSF in the upper left corner of the center pixel - # while - # an offset of (0.5, 0.5) places the PSF in the center of the center pixel. The - # offset should be limited to the range [0, 1). - - # Parameters: - # coord: The integer coordinate (y, x) at which to evaluate the PSF. - # offset: The amount (offset_y, offset_x) to offset the center of the PSF. - # scale: A scale factor to apply to the resulting PSF. - # base: A scalar added to the resulting PSF. - - # Other inputs may be available for specific subclasses. - # """ - # ... - - @abstractmethod - def eval_rect(self, - rect_size: list[int] | tuple[int, int], - offset: list[float] | tuple[float, float] = (0.5, 0.5), - *, - movement: Optional[tuple[float, float]] = None, - movement_granularity: float = 0.1, - scale: float = 1., - base: float = 0., - **kwargs: Any) -> npt.NDArray[np.float64]: - """Create a rectangular pixelated PSF. - - This is done by evaluating the PSF function from - [-rect_size_y//2:rect_size_y//2] to [-rect_size_x//2:rect_size_x//2]. - - The returned array has the PSF offset from the center by - (offset_y, offset_x). An offset of (0, 0) places the PSF in the upper - left corner of the center pixel while an offset of (0.5, 0.5) - places the PSF in the center of the center pixel. The angle is applied - relative to this new origin, so as angle changes the center of the - ellipse does not move. - - Parameters: - rect_size: The size of the rectangle (rect_size_y, rect_size_x) of the - returned PSF. Both dimensions must be odd. - offset: The amount (offset_y, offset_x) to offset the center of the PSF. - movement: The amount of motion blur in the (Y, X) direction. Must be a tuple - of scalars. None means no movement. - movement_granularity: The number of pixels to step for each smear while doing - motion blur. A smaller granularity means that the resulting PSF will be - more precise but also take longer to compute. - scale: A scale factor to apply to the resulting PSF. - base: A scalar added to the resulting PSF. - - Other inputs may be available for specific subclasses. - - Returns: - The integral of the 2-D PSF over each full pixel in the rectangle. - """ - ... # pragma: no cover - - @abstractmethod - def _eval_rect(self, - rect_size: tuple[int, int], - offset: tuple[float, float] = (0.5, 0.5), - *, - scale: float = 1., - base: float = 0.) -> npt.NDArray[np.float64]: - """Internal function to create a rectangular pixelated PSF without other checks. - """ - ... # pragma: no cover - - def _eval_rect_smeared(self, - rect_size: tuple[int, int], - offset: tuple[float, float] = (0.5, 0.5), - *, - movement: Optional[tuple[float, float]] = None, - movement_granularity: float = 0.1, - scale: float = 1., - base: float = 0., - **kwargs: Any) -> npt.NDArray[np.floating]: - """Evaluate and sum a PSF multiple times to simulate motion blur. - - Parameters: - movement: The total amount (my, mx) the PSF moves. The movement is assumed to - be centered on the given offset and exists half on either side. - movement_granularity: The number of pixels to step for each smear while doing - motion blur. - rect_size: The size of the rectangle (rect_size_y, rect_size_x) of the - returned PSF. Both dimensions must be odd. - offset: The amount (offset_y, offset_x) to offset the center of the PSF. A - positive offset effectively moves the PSF down and to the left. XXX - scale: A scale factor to apply to the resulting PSF. - base: A scalar added to the resulting PSF. - - Other inputs may be available for specific subclasses. - """ - - if movement is None or (movement[0] == 0 and movement[1] == 0): - return self._eval_rect(rect_size, offset=offset, - scale=scale, base=base, **kwargs) - - num_steps = int(max(abs(movement[0]) / movement_granularity, - abs(movement[1]) / movement_granularity)) - - if num_steps == 0: - step_y = 0. - step_x = 0. - else: - step_y = movement[0] / num_steps - step_x = movement[1] / num_steps - - total_rect = None - - for step in range(num_steps+1): - y = offset[0] + step_y*(step - num_steps/2.) - x = offset[1] + step_x*(step - num_steps/2.) - - rect = self._eval_rect(rect_size, offset=(y, x), - scale=scale, base=base, **kwargs) - if total_rect is None: - total_rect = rect - else: - total_rect += rect - assert total_rect is not None - - total_rect /= float(num_steps+1) - - return total_rect - - #========================================================================== - # - # Static functions for creating background gradients - # - #========================================================================== - - @staticmethod - def _background_gradient_coeffs(shape: tuple[int, int], - order: int) -> npt.NDArray[np.float64]: - """Internal routine for creating the coefficient matrix. - - Fundamentally this creates a coefficient matrix indicating the powers of different - orders of polynomials. For example, an order 1 polynomial (Ax + B) when performed - in two dimensions becomes (Ax + By + C), which has three free parameters. An order - 2 polynomial (Ax^2 + Bx + C) in two dimensions becomes (Ax^2 + By^2 + Cxy + Dx + - Ey + F), which has six free parameters. The number of free parameters is (order * - (order+1)) / 2. - - To make further computation easy, this coefficient matrix is then multiplied with - a 2-D array that represents the X and Y coordinates, ranging from -N to N such - that the values are (0, 0) at the center of the image. This is the matrix that is - returned to the caller. - - The resulting 3-D matrix has the indicies: - 0: Y - 1: X - 2: parameter number - """ - - if shape[0] < 0 or shape[1] < 0 or shape[0] % 2 != 1 or shape[1] % 2 != 1: - raise ValueError( - f'Image must have odd positive shape in each dimension, got {shape}') - if order < 0: - raise ValueError(f'Order must be non-negative, got {order}') - - # Create arrays of indexes for line and sample with (0, 0) at the center of the - # image - y_values = np.arange(shape[0])[:, np.newaxis] - int(shape[0] / 2) - x_values = np.arange(shape[1])[np.newaxis, :] - int(shape[1] / 2) - - y_powers: list[float | npt.NDArray[np.floating]] = [1.] - x_powers: list[float | npt.NDArray[np.floating]] = [1.] - - nparams = int((order+1) * (order+2) / 2) - a3d = np.empty((shape[0], shape[1], nparams)) - a3d[:, :, 0] = 1. # This is the constant term of the polynomial - - k = 0 # Parameter number - for p in range(1, order+1): - # This creates, sequentially, L, L**2, L**3... and S, S**2, S**3... - y_powers.append(y_powers[-1] * y_values) - x_powers.append(x_powers[-1] * x_values) - - # These nested loops walk through all the combinations of L**N * S**M - # such that N+M == P where P ranges from 1 to . This gives us - # all combinations like: - # 1 - # Y - # X - # X*Y - # Y**2 - # X**2 - for q in range(p+1): - k += 1 - a3d[:, :, k] = y_powers[q] * x_powers[p-q] - - return a3d - - @staticmethod - def background_gradient_fit(image: npt.NDArray[np.floating], - order: int = 2, - ignore_center: Optional[int | tuple[int, int]] = None, - num_sigma: Optional[float] = None, - debug: bool = False - ) -> tuple[npt.NDArray[np.float64] | None, - npt.NDArray[np.float64] | None]: - """Return the polynomial fit to the pixels of an image. - - Parameters: - image: 2D array to fit; must have odd shape in each dimension. - order: Order of the polynomial. - ignore_center: A scalar or tuple (ignore_y, ignore_x) giving the number of - pixels on either side of the center to ignore while fitting. 0 means - ignore the center pixel. None means don't ignore anything. - num_sigma: The number of sigma a pixel needs to be beyond the background - gradient to be ignored. None means don't ignore bad pixels. - debug: Set to debug bad pixel removal. - - Returns: - A tuple of the background coefficient array and the mask of ignored pixels. - """ - - if len(image.shape) != 2: - raise ValueError('Image must be 2-D, got {image.shape}') - if (image.shape[0] < 0 or image.shape[1] < 0 or - image.shape[0] % 2 != 1 or image.shape[1] % 2 != 1): - raise ValueError( - 'Image must have odd positive shape in each dimension, got ' - f'{image.shape}') - if order < 0: - raise ValueError(f'Order must be non-negative, got {order}') - - shape = cast(tuple[int, int], image.shape) - - is_masked = False - - if ignore_center is not None or num_sigma is not None: - if isinstance(image, ma.MaskedArray): - # We're going to change the mask so make a copy first - image = image.copy() - else: - image = image.view(ma.MaskedArray) - - if isinstance(image, ma.MaskedArray): - image.mask = cast(npt.NDArray[np.bool_], - ma.getmaskarray(image)) # type: ignore - is_masked = True - - if ignore_center is not None: - if isinstance(ignore_center, int): - ignore_y = ignore_center - ignore_x = ignore_center - else: - ignore_y, ignore_x = ignore_center - if ignore_y*2+1 >= shape[0] or ignore_x*2+1 >= shape[1]: - if debug: # pragma: no cover - print('BKGND CENTER IGNORED IS ENTIRE IMAGE') # XXX - return None, None - ctr_y = shape[0] // 2 - ctr_x = shape[1] // 2 - image[ctr_y-ignore_y:ctr_y+ignore_y+1, - ctr_x-ignore_x:ctr_x+ignore_x+1] = ma.masked - - nparams = int((order+1) * (order+2) // 2) - - a3d = PSF._background_gradient_coeffs(shape, order) - - if num_sigma is not None: - num_bad_pixels = cast(int, ma.count_masked(image)) # type: ignore - if debug: # pragma: no cover - print('BKGND GRAD INIT # BAD', num_bad_pixels) - - while True: - # Reshape properly for linalg.lstsq - a2d = a3d.reshape((image.size, nparams)) - b1d = image.flatten() - - if is_masked: - # linalg doesn't support masked arrays! - a2d = a2d[~b1d.mask] # type: ignore - b1d = ma.compressed(b1d) # type: ignore - - if a2d.shape[0] < a2d.shape[1]: # Underconstrained - if debug: # pragma: no cover - print('BKGND UNDERCONSTRAINED', a2d.shape) - return None, None - - coeffts = linalg.lstsq(a2d, b1d)[0] - - if num_sigma is None: - break - - # TODO - BITO suggests: - # worst_sigma = np.max(np.abs(delta_img)) - # if worst_sigma >= sigma*num_sigma: - # image[np.abs(delta_img) >= sigma*num_sigma] = ma.masked - gradient = PSF.background_gradient(shape, coeffts) - delta_img = image - gradient - sigma = np.std(delta_img) - worst_sigma = np.max(np.abs(delta_img)) - if worst_sigma >= sigma*num_sigma: - image[np.abs(delta_img) >= worst_sigma] = ma.masked - - new_num_bad_pixels = cast(int, ma.count_masked(image)) # type: ignore - if debug: # pragma: no cover - print('BKGD GRAD NEW # BAD', new_num_bad_pixels) - if new_num_bad_pixels == num_bad_pixels: - break - num_bad_pixels = new_num_bad_pixels - - if is_masked: - return coeffts, ma.getmaskarray(image) # type: ignore - else: - return coeffts, np.zeros(shape, dtype=np.bool_) - - @staticmethod - def background_gradient(rect_size: tuple[int, int], - bkgnd_params: npt.ArrayLike) -> npt.NDArray[np.float64]: - """Create a background gradient. - - Parameters: - size: A tuple (size_y, size_x) indicating the size of the returned array. - bkgnd_params: A tuple indicating the coefficients of the background - polynomial. The order of the polynomial is inferred from the number of - elements in the tuple. - """ - - bkgnd_params = np.array(bkgnd_params) - - order = int(np.sqrt(len(bkgnd_params)*2))-1 - - a3d = PSF._background_gradient_coeffs(rect_size, order) - result = np.sum(bkgnd_params * a3d, axis=-1) - - return cast(npt.NDArray[np.float64], result) - - #========================================================================== - # - # Functions for finding astrometric positions - # - #========================================================================== - - def find_position(self, - image: npt.NDArray[np.floating], - box_size: tuple[int, int], - starting_point: tuple[float, float], - *, - search_limit: float | tuple[float, float] = (1.5, 1.5), - bkgnd_degree: int | None = 2, - bkgnd_ignore_center: tuple[int, int] = (2, 2), - bkgnd_num_sigma: Optional[float] = None, - tolerance: float = 1e-6, - num_sigma: Optional[float] = None, - max_bad_frac: float = 0.2, - allow_nonzero_base: bool = False, - scale_limit: float = 1000., - use_angular_params: bool = True - ) -> None | tuple[float, float, dict[str, Any]]: - """Find the (y, x) coordinates that best fit a 2-D PSF to an image. - - Parameters: - image: The image (2-D). - box_size: A tuple (box_y, box_x) indicating the size of the PSF to use. This - governs both the size of the PSF created at each step as well as the size - of the subimage looked at. Both box_y and box_x must be odd. - starting_point: A tuple (y, x) indicating the best guess for where the object - can be found. Searching is limited to a region around this point - controlled by `search_limit`. - search_limit: A scalar or tuple (y_limit, x_limit) specifying the maximum - distance to search from `starting_point`. If a scalar, both x_limit - and y_limit are the same. - bkgnd_degree: The degree (order) of the background gradient polynomial. None - means no background gradient is fit. - bkgnd_ignore_center: A tuple (ny, nx) giving the number of pixels on each side - of the center point to ignore when fitting the background. The ignored - region is thus ny*2+1 by nx*2+1. - bkgnd_num_sigma: The number of sigma a pixel needs to be beyond the background - gradient to be ignored. None means don't ignore bad pixels while computing - the background gradient. - tolerance: The tolerance (both X and Function) in the Powell optimization - algorithm. - num_sigma: The number of sigma for a pixel to be considered bad during PSF - fitting. None means don't ignore bad pixels while fitting the PSF. - max_bad_frac: The maximum allowable number of pixels masked during PSF - fitting. If more pixels than this fraction are masked, the position fit - fails. - allow_nonzero_base: If True, allow the base of the PSF (constant bias) to - vary. Otherwise the base of the PSF is always at zero and can only scale - in the positive direction. - scale_limit: The maximum PSF scale allowed. - use_angular_params: Use angles to optimize parameter values. - - Returns: - None if no fit found. - - Otherwise returns pos_y, pos_x, metadata. Metadata is a dictionary - containing:: - - 'x' The offset in X. (Same as pos_x) - 'x_err' Uncertainty in X. - 'y' The offset in Y. (Same as pos_y) - 'y_err' Uncertainty in Y. - 'scale' The best fit PSF scale. - 'scale_err' Uncertainty in PSF scale. - 'base' The best fit PSF base. - 'base_err' Uncertainty in PSF base. - 'subimg' The box_size area of the original image - surrounding starting_point masked as - necessary using the num_sigma threshold. - 'bkgnd_params' The tuple of parameters defining the - background gradient. - 'bkgnd_mask' The mask used during background gradient - fitting. - 'gradient' The box_size background gradient. - 'subimg-gradient' The subimg with the background gradient - subtracted. - 'psf' The unit-scale and zero-base PSF. - 'scaled_psf' The fully scaled PSF with the base added. - 'leastsq_cov' The covariance matrix returned by leastsq - as adjusted by the residual variance. - 'leastsq_infodict' The infodict returned by leastsq. - 'leastsq_mesg' The mesg returned by leastsq. - 'leastsq_ier' The ier returned by leastsq. - - In addition, metadata includes two entries for each "additional - parameter" used during optimization: one for the value and one for - the uncertainty ('param' and 'param_err'). - """ - - if (box_size[0] < 0 or box_size[1] < 0 or - box_size[0] % 2 != 1 or box_size[1] % 2 != 1): - raise ValueError( - 'box_size must have odd positive shape in each dimension, ' - f'got {box_size}') - - half_box_size_y = box_size[0] // 2 - half_box_size_x = box_size[1] // 2 - - starting_pix = (int(starting_point[0]), - int(starting_point[1])) - - if self._debug_opt: - print('>> Entering psfmodel:find_position') - print('Image is masked', isinstance(image, ma.MaskedArray)) - print('Image num masked', np.sum(ma.getmaskarray(image))) # type: ignore - print('Image min, max, mean', np.min(image), np.max(image), np.mean(image)) - print('Box size', box_size) - print('Starting point', starting_point) - print('Search limit', search_limit) - print('Bkgnd degree', bkgnd_degree) - print('Bkgnd ignore center', bkgnd_ignore_center) - print('Bkgnd num sigma', bkgnd_num_sigma) - print('Tolerance', tolerance) - print('Num sigma', num_sigma) - print('Max bad frac', max_bad_frac) - print('Allow nonzero base', allow_nonzero_base) - print('Scale limit', scale_limit) - print('Use angular params', use_angular_params) - print('-----') - - # Too close to the edge means we can't search - if (starting_pix[0] - half_box_size_y < 0 or - starting_pix[0] + half_box_size_y >= image.shape[0] or - starting_pix[1] - half_box_size_x < 0 or - starting_pix[1] + half_box_size_x >= image.shape[1]): - if self._debug_opt: - print('Too close to the edge - search impossible') - return None - - sub_img = image[starting_pix[0] - half_box_size_y: - starting_pix[0] + half_box_size_y+1, - starting_pix[1] - half_box_size_x: - starting_pix[1] + half_box_size_x+1] - - if self._debug_opt: - print('Sub img min, max, mean', np.min(sub_img), np.max(sub_img), - np.mean(sub_img)) - - if not isinstance(search_limit, (list, tuple)): - search_limit = (float(search_limit), float(search_limit)) - - if num_sigma: - if isinstance(sub_img, ma.MaskedArray): - # We're going to change the mask so make a copy first - sub_img = sub_img.copy() - else: - sub_img = sub_img.view(ma.MaskedArray) - - num_bad_pixels = 0 - - while True: - if self._debug_opt > 1: - print('MAIN LOOP: FIND POS, # BAD PIXELS', num_bad_pixels) - ret = self._find_position(sub_img, - search_limit, scale_limit, - bkgnd_degree, bkgnd_ignore_center, - bkgnd_num_sigma, tolerance, - allow_nonzero_base, use_angular_params) - if ret is None: - if self._debug_opt: - print('find_position returned None') - return None - - res_y, res_x, details = ret - - if not num_sigma: - break - - resid = np.sqrt((details['subimg-gradient'] - details['scaled_psf'])**2) - resid_std = np.std(resid) - - if self._debug_opt > 1: - print('MAIN LOOP: Resid', resid) - print('resid_std', resid_std) - - if num_sigma is not None: - sub_img[np.where(resid > num_sigma*resid_std)] = ma.masked - - new_num_bad_pixels = ma.count_masked(sub_img) # type: ignore - if new_num_bad_pixels == num_bad_pixels: - break - if new_num_bad_pixels == sub_img.size: - if self._debug_opt: - print('MAIN LOOP: All pixels masked - find_position returning None') - return None # All masked - if new_num_bad_pixels > max_bad_frac*sub_img.size: - if self._debug_opt: - print('MAIN LOOP: Too many pixels masked - ' - 'find_position returning None') - return None # Too many masked - num_bad_pixels = new_num_bad_pixels - - if self._debug_opt: - msg = f'find_position returning Y {res_y+starting_pix[0]:.4f}' - # if details['y_err'] is not None: - # msg += f' +/- {details["y_err"]:.4f}' - msg += f' X {res_x+starting_pix[1]:.4f}' - # if details['x_err'] is not None: - # msg += ' +/- {details["x_err"]:.4f}' - if details['scale'] is not None: - msg += f' Scale {details["scale"]:.4f} Base {details["base"]:.4f}' - if 'sigma_y' in details: - msg += f' SY {details["sigma_y"]:.4f} SX {details["sigma_x"]:.4f}' - print(msg) - - return res_y + starting_pix[0], res_x + starting_pix[1], details - - def _fit_psf_func(self, - params: tuple[float, ...], - sub_img: npt.NDArray[np.floating], - search_limit: tuple[float, float], - scale_limit: float, - allow_nonzero_base: bool, - use_angular_params: bool, - *additional_params: Any) -> float: - - # Make an offset of "0" be the center of the pixel (0.5, 0.5) - if use_angular_params: - # params are (ang_y, ang_x, ang_scale, ...) - offset_y = search_limit[0] * np.cos(params[0]) + 0.5 - offset_x = search_limit[1] * np.cos(params[1]) + 0.5 - scale = scale_limit * (np.cos(params[2]) + 1) / 2 - else: - # params are (y, x, scale, ...) - offset_y = params[0] + 0.5 - offset_x = params[1] + 0.5 - scale = params[2] - # This was only needed when using an optimization func that doesn't support - # bounds. - # fake_resid = None - # if not (-search_limit[0] <= params[0] <= search_limit[0]): - # fake_resid = abs(params[0]) * 1e10 - # elif not (-search_limit[1] <= params[1] <= search_limit[1]): - # fake_resid = abs(params[1]) * 1e10 - # elif not (0.00001 <= scale <= scale_limit): - # fake_resid = abs(scale) * 1e10 - # if fake_resid is not None: - # fake_return = np.zeros(sub_img.shape).flatten() - # fake_return[:] = fake_resid - # if self._debug_opt > 1: - # full_resid = np.sqrt(np.sum(fake_return**2)) - # print('RESID', full_resid) - # return fake_return - - base = 0. - param_end = 3 - if allow_nonzero_base: - base = params[3] - param_end = 4 - - addl_vals_dict = {} - for i, ap in enumerate(additional_params): - if use_angular_params: - val = ((ap[1] - ap[0]) / 2. * - (np.cos(params[param_end+i])+1.) + ap[0]) - else: - val = params[param_end+i] - addl_vals_dict[ap[2]] = val - - psf = self.eval_rect(cast(tuple[int, int], sub_img.shape), - (offset_y, offset_x), - scale=scale, base=base, **addl_vals_dict) - - resid = (sub_img - psf).flatten() - - full_resid = cast(float, np.sqrt(np.sum(resid**2))) - - if self._debug_opt > 1: - msg = f'OFFY {offset_y:8.5f} OFFX {offset_x:8.5f} SCALE {scale:9.5f} ' - msg += f'BASE {base:9.5f}' - for ap in additional_params: - msg += f' {ap[2].upper()} {addl_vals_dict[ap[2]]:8.5f}' - msg += f' RESID {full_resid:f}' - print(msg) - - return full_resid - - def _find_position(self, - sub_img: npt.NDArray[np.floating], - search_limit: tuple[float, float], - scale_limit: float, - bkgnd_degree: int | None, - bkgnd_ignore_center: tuple[int, int], - bkgnd_num_sigma: float | None, - tolerance: float, - allow_nonzero_base: bool, - use_angular_params: bool - ) -> None | tuple[float, float, dict[str, Any]]: - - bkgnd_params = None - bkgnd_mask = None - gradient = np.zeros(sub_img.shape) - - if bkgnd_degree is not None: - bkgnd_params, bkgnd_mask = PSF.background_gradient_fit( - sub_img, - order=bkgnd_degree, - ignore_center=bkgnd_ignore_center, - num_sigma=bkgnd_num_sigma, - debug=self._debug_opt > 2) - if bkgnd_params is None: - return None - - gradient = PSF.background_gradient(cast(tuple[int, int], sub_img.shape), - bkgnd_params) - - sub_img_grad = sub_img - gradient - - # Offset Y, Offset X, Scale, AdditionalParams - if use_angular_params: - bounds = [(0., np.pi), - (0., np.pi), - (0., np.pi)] - starting_guess = [np.pi/2, np.pi/2, np.pi/2] - if allow_nonzero_base: - bounds += [(0., np.pi)] - starting_guess += [np.pi/2] - for _ in range(len(self._additional_params)): - bounds += [(0., np.pi)] - starting_guess += [np.pi/2] - else: - bounds = [(-search_limit[0], search_limit[0]), - (-search_limit[1], search_limit[1]), - (0., scale_limit)] - starting_guess = [0.001, 0.001, scale_limit/2] - if allow_nonzero_base: - bounds += [(-1e38, 1e38)] - starting_guess += [0.001] - for a_min, a_max, a_name in self._additional_params: - bounds += [(a_min, a_max)] - starting_guess = starting_guess + [np.mean([a_min, a_max])] - - extra_args0 = (sub_img_grad, search_limit, scale_limit, - allow_nonzero_base, use_angular_params) - if (self._additional_params is not None and - len(self._additional_params) > 0): - extra_args = extra_args0 + tuple(self._additional_params) - else: - extra_args = extra_args0 + tuple([]) - - if self._debug_opt > 3: - print('-' * 80) - print(f'STARTING GUESS: {starting_guess}') - print(f'BOUNDS: {bounds}') - - full_result = sciopt.minimize(self._fit_psf_func, - starting_guess, - args=extra_args, - bounds=bounds, - tol=tolerance, - method='Powell', - options={'maxiter': len(starting_guess) * 10000}) - - result = full_result.x - success = full_result.success - status = full_result.status - message = full_result.message - - if not success: - print('FAIL', message) - return None - - # if ier < 1 or ier > 4: - # return None - - if use_angular_params: - offset_y = search_limit[0] * np.cos(result[0]) + 0.5 - offset_x = search_limit[1] * np.cos(result[1]) + 0.5 - scale = scale_limit * (np.cos(result[2]) + 1) / 2 - else: - offset_y = result[0] + 0.5 - offset_x = result[1] + 0.5 - scale = result[2] - - base = 0. - result_end = 3 - if allow_nonzero_base: - base = result[3] - result_end = 4 - - addl_vals_dict = {} - for i, ap in enumerate(self._additional_params): - if use_angular_params: - val = ((ap[1] - ap[0]) / 2. * - (np.cos(result[result_end+i])+1.) + ap[0]) - else: - val = result[result_end+i] - addl_vals_dict[ap[2]] = val - - psf = self.eval_rect(cast(tuple[int, int], sub_img.shape), (offset_y, offset_x), - scale=scale, base=base, **addl_vals_dict) - - details = {} - details['x'] = offset_x - details['y'] = offset_y - details['subimg'] = sub_img - details['bkgnd_params'] = bkgnd_params - details['bkgnd_mask'] = bkgnd_mask - details['gradient'] = gradient - details['subimg-gradient'] = sub_img_grad - details['psf'] = psf - details['scale'] = scale - details['base'] = base - details['scaled_psf'] = psf*scale+base - - # if cov_x is None: - # details['leastsq_cov'] = None - # details['x_err'] = None - # details['y_err'] = None - # details['scale_err'] = None - # details['base_err'] = None - # for i, ap in enumerate(self._additional_params): - # details[ap[2]+'_err'] = None - # else: - # # "To obtain the covariance matrix of the parameters x, cov_x must - # # be multiplied by the variance of the residuals" - # dof = psf.shape[0]*psf.shape[1]-len(result) - # resid_var = np.sum(self._fit_psf_func(result, *extra_args)**2) / dof - # cov = cov_x * resid_var # In angle-parameter space!! (if - # use_angular_params) - # details['leastsq_cov'] = cov - # if use_angular_params: - # # Deriv of SL0 * sin(R0) is - # # SL0 * cos(R0) * dR0 - # y_err = np.abs((np.sqrt(cov[0][0]) % (np.pi*2)) * - # search_limit[0] * np.cos(result[0])) - # x_err = np.abs((np.sqrt(cov[1][1]) % (np.pi*2)) * - # search_limit[1] * np.cos(result[1])) - # # Deriv of SC/2 * (sin(R)+1) = SC/2 * sin(R) + SC/2 is - # # SC/2 * cos(R) * dR - # scale_err = np.abs((np.sqrt(cov[2][2]) % (np.pi*2)) * - # scale_limit/2 * np.cos(result[2])) - # details['x_err'] = x_err - # details['y_err'] = y_err - # details['scale_err'] = scale_err - # for i, ap in enumerate(self._additional_params): - # err = np.abs((np.sqrt(cov[i+3][i+3]) % (np.pi*2)) * - # (ap[1]-ap[0])/2 * np.cos(result[i+3])) - # details[ap[2]+'_err'] = err - # else: - # details['x_err'] = np.sqrt(cov[0][0]) - # details['y_err'] = np.sqrt(cov[1][1]) - # details['scale_err'] = np.sqrt(cov[2][2]) - # for i, ap in enumerate(self._additional_params): - # details[ap[2]+'_err'] = np.sqrt(cov[i+result_end][i+result_end]) - # # Note the base is not computed using angles - # details['base_err'] = None - # if allow_nonzero_base: - # details['base_err'] = np.sqrt(cov[3][3]) - - # details['leastsq_infodict'] = infodict - # details['leastsq_mesg'] = mesg - # details['leastsq_ier'] = ier - - for key in addl_vals_dict: - details[key] = addl_vals_dict[key] - - if self._debug_opt > 1: - print('_find_position RETURNING', offset_y, offset_x) - print('Subimg num bad pixels', - np.sum(ma.getmaskarray(sub_img))) # type: ignore - print('Bkgnd params', bkgnd_params) - print('Bkgnd mask bad pixels', - np.sum(ma.getmaskarray(bkgnd_mask))) # type: ignore - print('PSF scale', scale) - print('PSF base', base) - for key in addl_vals_dict: - print(key, details[key]) - # print('LEASTSQ COV') - # cov = details['leastsq_cov'] - # print(cov) - # if cov is not None: - # print('X_ERR', details['x_err']) - # print('Y_ERR', details['y_err']) - # print('SCALE_ERR', details['scale_err']) - # print('BASE_ERR', details['base_err']) - # for key in addl_vals_dict: - # print(key+'_err', details[key+'_err']) - print('MESSAGE', message) - print('STATUS', status) - print('-----') - - return offset_y, offset_x, details diff --git a/pyproject.toml b/pyproject.toml index 8284744..a3428d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,21 +3,24 @@ requires = ["setuptools", "setuptools_scm[toml]"] build-backend = "setuptools.build_meta" [project] -name = "rms-psfmodel" +name = "psfmodel" dynamic = ["version"] -description = "Routines for converting to and from psfmodel dates" +description = "PSF model fitting" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.10" dependencies = [ - "astropy", - "numpy", - "scipy" + "astropy>=5.3", + "numpy>=1.24", + "scipy>=1.10" ] license = {text = "Apache-2.0"} +authors = [ + {name = "Robert S. French", email = "rfrench@seti.org"} +] maintainers = [ {name = "Robert S. French", email = "rfrench@seti.org"} ] -keywords = ["psfmodel"] +keywords = ["psf", "point-spread-function", "astronomy", "gaussian", "fitting"] classifiers = [ "Development Status :: 5 - Production/Stable", "Natural Language :: English", @@ -26,25 +29,163 @@ classifiers = [ "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Utilities", "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows" ] [project.urls] -Homepage = "https://github.com/SETI/rms-psfmodel" -Repository = "https://github.com/SETI/rms-psfmodel" -Source = "https://github.com/SETI/rms-psfmodel" -Issues = "https://github.com/SETI/rms-psfmodel/issues" +Homepage = "https://github.com/SETI/psfmodel" +Documentation = "https://psfmodel.readthedocs.io/en/latest" +Repository = "https://github.com/SETI/psfmodel" +Source = "https://github.com/SETI/psfmodel" +Issues = "https://github.com/SETI/psfmodel/issues" + +[tool.pytest.ini_options] +pythonpath = [ + "src" +] +testpaths = ["tests"] +addopts = ["-n", "4", "--cov=psfmodel", "--strict-markers", "--strict-config"] +filterwarnings = [ + # xdist controller process runs no tests; coverage still warns there even + # though workers collect data (see pytest-cov / coverage issues). + "ignore:No data was collected\\.:coverage.exceptions.CoverageWarning", +] +markers = [] [tool.setuptools] -packages = ["psfmodel"] +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +psfmodel = ["py.typed"] +characterize_gauss_fit = ["defaults.yaml", "test_config.yaml", "hires_config.yaml"] [tool.setuptools_scm] local_scheme = "no-local-version" -write_to = "psfmodel/_version.py" +write_to = "src/psfmodel/_version.py" + +[project.optional-dependencies] +dev = [ + "psfmodel", + "coverage>=7.0", + "mypy>=1.0", + "pymarkdownlnt>=0.9.35", + "pytest>=7.0", + "pytest-cov>=4.0", + "pytest-xdist>=3.8.0", + "ruff>=0.8", + "bandit[toml]>=1.8", + "pyroma>=4.2", + "vulture>=2.14", + "psfmodel[docs]", +] +docs = [ + "myst-parser", + "sphinx>=7", + "sphinxcontrib-mermaid", + "sphinx-rtd-theme", +] +characterize = [ + "matplotlib>=3.7", + "pyyaml>=6.0", +] + +[project.scripts] +psf_gui = "psf_gui.main:main" + +# Tool configuration + +[tool.coverage.run] +branch = true +parallel = true +source = ["psfmodel"] +omit = ["tests/*", "**/_version.py", "**/psfmodel/hst.py", "**/psf_gui/**", "**/characterize_gauss_fit/**"] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "raise NotImplementedError", +] +fail_under = 90 + +[tool.pyright] +# Legacy module; same exclusion as mypy/ruff until modernized. +exclude = ["src/psfmodel/hst.py"] + +[tool.mypy] +strict = true +exclude = [ + "src/psfmodel/hst\\.py", +] + +[[tool.mypy.overrides]] +module = "psfmodel._version" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "numpy.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "scipy.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "matplotlib.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "yaml" +ignore_missing_imports = true + +#[[tool.mypy.overrides]] +# module = ["MODULENAME.*"] +# ignore_missing_imports = true + +[tool.ruff] +target-version = "py310" +line-length = 100 +exclude = [ + "src/psfmodel/hst.py", +] + +[tool.ruff.format] +quote-style = "single" + +[tool.ruff.lint] +# Explicit rule set (recommended for library projects). Categories: +# E, W = pycodestyle errors/warnings; F = Pyflakes; I = isort; UP = pyupgrade; +# B = bugbear; SIM = simplify; C4 = comprehensions; A = builtins (no shadowing); +# N = pep8-naming; PT = pytest-style; RUF = Ruff-specific (e.g. unused noqa). +select = ["E", "F", "W", "I", "UP", "B", "SIM", "C4", "A", "N", "PT", "RUF"] +# PT011 - pytest.raises is too broad. +# SIM105 - Use contextlib.suppress for suppressions. +# SIM108 - Use ternary operator for simple if/else. +extend-ignore = ["PT011", "SIM105", "SIM108"] + +[tool.ruff.lint.per-file-ignores] +#"TODO" = ["TODO"] + +[tool.pymarkdown.plugins.md013] +# Line length (disable so README/CONTRIBUTING can use longer lines). +enabled = false + +[tool.pymarkdown.plugins.md033] +# Inline HTML (e.g.
) allowed in Markdown. +enabled = false + +[tool.bandit] +exclude_dirs = ["tests", "venv", ".venv", "src/psfmodel/hst.py"] +targets = ["src"] + +[tool.vulture] +paths = ["src"] +exclude = ["tests/", "src/psfmodel/hst.py"] +min_confidence = 70 diff --git a/requirements.txt b/requirements.txt index acb6349..4f182dd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,5 @@ -astropy -coverage -flake8 -mypy -myst-parser -numpy -Pillow -pytest -scipy -sphinx -sphinxcontrib-napoleon -sphinx-rtd-theme +# Install the package in editable mode. For development (tests, lint, type-check, docs) use: +# pip install -e ".[dev]" +# For documentation build only: +# pip install -e ".[docs]" +-e . diff --git a/scripts/run-all-checks.sh b/scripts/run-all-checks.sh new file mode 100755 index 0000000..3237fda --- /dev/null +++ b/scripts/run-all-checks.sh @@ -0,0 +1,663 @@ +#!/usr/bin/env bash +# +# REPONAME - Run All Checks Script +# +# This script runs linting, type checking, tests, Sphinx build, and +# Markdown lint as separate checks. In parallel mode all requested +# checks run concurrently. +# +# Usage: +# ./scripts/run-all-checks.sh [options] +# +# Options: +# -p, --parallel Run all requested checks in parallel (default) +# -s, --sequential Run all requested checks sequentially +# -w, --pytest-workers N Override pytest -n: auto, 1 (serial), or N (default: +# use [tool.pytest.ini_options] addopts in pyproject.toml) +# -c, --code Run all code checks (sets each RUN_* code flag true) +# -d, --docs Run Sphinx and PyMarkdown (RUN_SPHINX, RUN_PYMARKDOWN) +# -m, --markdown Run only PyMarkdown (RUN_PYMARKDOWN) +# --ruff-check Run ruff check only (may combine with other --* flags) +# --ruff-format Run ruff format --check only +# --mypy Run mypy only +# --pytest Run pytest only +# --pyroma Run pyroma only +# --bandit Run bandit only +# --vulture Run vulture only +# --sphinx Run Sphinx build only +# --pymarkdown Run PyMarkdown scan only +# -h, --help Show this help message +# +# Environment: +# VENV or VENV_PATH Path to virtualenv (default: $PROJECT_ROOT/venv) +# CLEANUP_GRACE_PERIOD Seconds to wait for graceful shutdown (default: 5) +# +# Pytest coverage minimum: configure fail_under in coverage config (e.g. +# pyproject.toml [tool.coverage.report] or .coveragerc [report]). +# +# RUN_* (set by this script from CLI or full-run defaults): RUN_RUFF_CHECK, +# RUN_RUFF_FORMAT, RUN_MYPY, RUN_PYTEST, RUN_PYROMA, RUN_BANDIT, RUN_VULTURE, +# RUN_SPHINX, RUN_PYMARKDOWN +# +# Per-check toggles (true/false). Defaults favor a minimal CI set; export to +# enable more tools in a given repo. Each check runs only if both RUN_* and +# ENABLE_* are true (RUN_* from CLI or defaults below; ENABLE_* from env): +# ENABLE_RUFF_CHECK (default: true) +# ENABLE_RUFF_FORMAT (default: true) [modified from original] +# ENABLE_MYPY (default: true) [modified from original] +# ENABLE_PYTEST (default: true) +# ENABLE_PYROMA (default: true) +# ENABLE_BANDIT (default: true) [modified from original] +# ENABLE_VULTURE (default: true) [modified from original] +# ENABLE_SPHINX (default: true) +# ENABLE_PYMARKDOWN PyMarkdown scan (default: true) +# +# Checks (each run separately; -d runs both Sphinx and Markdown): +# Code: optional: ruff check, ruff format --check, mypy, pytest, pyroma, +# bandit, vulture (see ENABLE_* above) +# Sphinx: make -C docs html SPHINXOPTS="-W" +# Markdown: pymarkdown scan docs/ .cursor/ README.md CONTRIBUTING.md +# +# Exit codes: +# 0 - All requested checks passed +# 1 - One or more checks failed +# + +set -euo pipefail + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +BOLD='\033[1m' +RESET='\033[0m' + +# Default options +PARALLEL=true +PYTEST_WORKERS_SET=false +PYTEST_WORKERS= +RUN_RUFF_CHECK=false +RUN_RUFF_FORMAT=false +RUN_MYPY=false +RUN_PYTEST=false +RUN_PYROMA=false +RUN_BANDIT=false +RUN_VULTURE=false +RUN_SPHINX=false +RUN_PYMARKDOWN=false +SCOPE_SPECIFIED=false + +# Per-check defaults (override by exporting before invoking this script, or +# permanently change here) +: "${ENABLE_RUFF_CHECK:=true}" +: "${ENABLE_RUFF_FORMAT:=true}" +: "${ENABLE_MYPY:=true}" +: "${ENABLE_PYTEST:=true}" +: "${ENABLE_PYROMA:=true}" +: "${ENABLE_BANDIT:=true}" +: "${ENABLE_VULTURE:=true}" +: "${ENABLE_SPHINX:=true}" +: "${ENABLE_PYMARKDOWN:=true}" + +# Get script directory and project root +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +VENV="${VENV:-${VENV_PATH:-$PROJECT_ROOT/venv}}" + +# Track failures and final exit code +FAILED_CHECKS=() +EXIT_CODE=0 + +# Temp directory for parallel output and status files +TEMP_DIR=$(mktemp -d) + +# Grace period (seconds) before SIGKILL after SIGTERM +CLEANUP_GRACE_PERIOD=${CLEANUP_GRACE_PERIOD:-5} +if ! echo "$CLEANUP_GRACE_PERIOD" | grep -qE '^[0-9]+$'; then + echo "Error: CLEANUP_GRACE_PERIOD must be a non-negative integer (got: $CLEANUP_GRACE_PERIOD)" >&2 + exit 1 +fi + +_wait_or_kill() { + local pid=$1 + [ -z "$pid" ] && return 0 + kill -TERM "$pid" 2>/dev/null || true + local waited=0 + while [ "$waited" -lt "$CLEANUP_GRACE_PERIOD" ]; do + kill -0 "$pid" 2>/dev/null || break + sleep 1 + waited=$((waited + 1)) + done + if kill -0 "$pid" 2>/dev/null; then + kill -KILL "$pid" 2>/dev/null || true + fi + wait "$pid" 2>/dev/null || true + return 0 +} + +_cleanup() { + rm -rf "$TEMP_DIR" +} + +# On INT/TERM: kill all background check jobs with grace period, then exit +_cleanup_and_exit() { + local sig_code=$1 + local pids + pids=$(jobs -p) + if [ -n "$pids" ]; then + for pid in $pids; do + _wait_or_kill "$pid" + done + fi + _cleanup + exit "$sig_code" +} +trap '_cleanup_and_exit 130' SIGINT +trap '_cleanup_and_exit 143' SIGTERM +trap _cleanup EXIT + +print_header() { + echo -e "\n${BOLD}${BLUE}===================================================${RESET}" + echo -e "${BOLD}${BLUE} $1${RESET}" + echo -e "${BOLD}${BLUE}===================================================${RESET}\n" +} + +print_section() { + echo -e "\n${BOLD}${YELLOW}>>> $1${RESET}\n" +} + +print_success() { + echo -e "${GREEN}✓${RESET} $1" +} + +print_error() { + echo -e "${RED}✗${RESET} $1" +} + +print_info() { + echo -e "${BLUE}ℹ${RESET} $1" +} + +show_usage() { + sed -n '/^# Usage:/,/^# Exit codes:/p' "$0" | sed 's/^# //g' | sed 's/^#//g' +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -p|--parallel) + PARALLEL=true + shift + ;; + -s|--sequential) + PARALLEL=false + shift + ;; + -w|--pytest-workers) + if [[ -z "${2:-}" || "$2" =~ ^- ]]; then + echo -e "${RED}Error: -w/--pytest-workers requires a value (auto, 1, 2, ...)${RESET}" >&2 + show_usage + exit 1 + fi + PYTEST_WORKERS_SET=true + PYTEST_WORKERS="$2" + shift 2 + ;; + --pytest-workers=*) + PYTEST_WORKERS_SET=true + PYTEST_WORKERS="${1#*=}" + shift + ;; + -c|--code) + RUN_RUFF_CHECK=true + RUN_RUFF_FORMAT=true + RUN_MYPY=true + RUN_PYTEST=true + RUN_PYROMA=true + RUN_BANDIT=true + RUN_VULTURE=true + SCOPE_SPECIFIED=true + shift + ;; + -d|--docs) + RUN_SPHINX=true + RUN_PYMARKDOWN=true + SCOPE_SPECIFIED=true + shift + ;; + -m|--markdown) + RUN_PYMARKDOWN=true + SCOPE_SPECIFIED=true + shift + ;; + --ruff-check) + RUN_RUFF_CHECK=true + SCOPE_SPECIFIED=true + shift + ;; + --ruff-format) + RUN_RUFF_FORMAT=true + SCOPE_SPECIFIED=true + shift + ;; + --mypy) + RUN_MYPY=true + SCOPE_SPECIFIED=true + shift + ;; + --pytest) + RUN_PYTEST=true + SCOPE_SPECIFIED=true + shift + ;; + --pyroma) + RUN_PYROMA=true + SCOPE_SPECIFIED=true + shift + ;; + --bandit) + RUN_BANDIT=true + SCOPE_SPECIFIED=true + shift + ;; + --vulture) + RUN_VULTURE=true + SCOPE_SPECIFIED=true + shift + ;; + --sphinx) + RUN_SPHINX=true + SCOPE_SPECIFIED=true + shift + ;; + --pymarkdown) + RUN_PYMARKDOWN=true + SCOPE_SPECIFIED=true + shift + ;; + -h|--help) + show_usage + exit 0 + ;; + *) + echo -e "${RED}Error: Unknown option: $1${RESET}" >&2 + show_usage + exit 1 + ;; + esac +done + +# Default: run all checks (each RUN_* true; ENABLE_* still filters per repo) +if [ "$SCOPE_SPECIFIED" = false ]; then + RUN_RUFF_CHECK=true + RUN_RUFF_FORMAT=true + RUN_MYPY=true + RUN_PYTEST=true + RUN_PYROMA=true + RUN_BANDIT=true + RUN_VULTURE=true + RUN_SPHINX=true + RUN_PYMARKDOWN=true +fi + +START_TIME=$(date +%s) + +print_header "rms-psfmodel - Running All Checks" + +if [ "$PARALLEL" = true ]; then + print_info "Running checks in PARALLEL mode" +else + print_info "Running checks in SEQUENTIAL mode" +fi +if [ "$RUN_PYTEST" = true ] && [ "$ENABLE_PYTEST" = true ]; then + if [ "$PYTEST_WORKERS_SET" = true ]; then + print_info "Pytest -n override: $PYTEST_WORKERS" + else + print_info "Pytest -n: from pyproject.toml (no -w override)" + fi +fi + +# True if at least one code check is both selected (RUN_*) and enabled (ENABLE_*). +_code_checks_any_scheduled() { + [ "$RUN_RUFF_CHECK" = true ] && [ "$ENABLE_RUFF_CHECK" = true ] && return 0 + [ "$RUN_RUFF_FORMAT" = true ] && [ "$ENABLE_RUFF_FORMAT" = true ] && return 0 + [ "$RUN_MYPY" = true ] && [ "$ENABLE_MYPY" = true ] && return 0 + [ "$RUN_PYTEST" = true ] && [ "$ENABLE_PYTEST" = true ] && return 0 + [ "$RUN_PYROMA" = true ] && [ "$ENABLE_PYROMA" = true ] && return 0 + [ "$RUN_BANDIT" = true ] && [ "$ENABLE_BANDIT" = true ] && return 0 + [ "$RUN_VULTURE" = true ] && [ "$ENABLE_VULTURE" = true ] && return 0 + return 1 +} + +# ---- Code checks (ruff, mypy, pytest, pyroma, bandit, vulture) ---- +run_code_checks() { + local output_file="${1:-}" + local status_file="${2:-}" + + if [ -n "$output_file" ]; then + exec > "$output_file" 2>&1 + fi + + print_section "Code Checks" + + cd "$PROJECT_ROOT" || exit 1 + + if ! _code_checks_any_scheduled; then + print_info "No code checks scheduled (RUN_* and ENABLE_*); skipping code checks" + return 0 + fi + + if [ ! -f "$VENV/bin/activate" ]; then + print_error "Virtual environment not found at $VENV" + [ -n "$status_file" ] && echo "Code - Virtual environment not found" >> "$status_file" + return 1 + fi + + # shellcheck source=/dev/null + source "$VENV/bin/activate" + + local failed=false + local failed_checks="" + + if [ "$RUN_RUFF_CHECK" = true ] && [ "$ENABLE_RUFF_CHECK" = true ]; then + print_info "Running ruff check..." + if python -m ruff check src tests; then + print_success "Ruff check passed" + else + print_error "Ruff check failed" + failed=true + failed_checks="${failed_checks}Code - Ruff check"$'\n' + fi + fi + + if [ "$RUN_RUFF_FORMAT" = true ] && [ "$ENABLE_RUFF_FORMAT" = true ]; then + print_info "Running ruff format --check..." + if python -m ruff format --check src tests; then + print_success "Ruff format check passed" + else + print_error "Ruff format check failed" + failed=true + failed_checks="${failed_checks}Code - Ruff format"$'\n' + fi + fi + + if [ "$RUN_MYPY" = true ] && [ "$ENABLE_MYPY" = true ]; then + print_info "Running mypy..." + if MYPYPATH=src python -m mypy src tests; then + print_success "Mypy passed" + else + print_error "Mypy failed" + failed=true + failed_checks="${failed_checks}Code - Mypy"$'\n' + fi + fi + + # Pass -n only when -w/--pytest-workers is set; otherwise addopts in + # pyproject.toml supply -n. --dist loadscope keeps each test module on one + # worker to avoid time-mocking and fixture-isolation interference. + # Coverage (--cov=psfmodel) and strict options come from pyproject.toml addopts. + if [ "$RUN_PYTEST" = true ] && [ "$ENABLE_PYTEST" = true ]; then + if [ "$PYTEST_WORKERS_SET" = true ]; then + print_info "Running pytest (-n ${PYTEST_WORKERS})..." + if python -m pytest -q -n "$PYTEST_WORKERS" --dist loadscope tests; then + print_success "Pytest passed" + else + print_error "Pytest failed" + failed=true + failed_checks="${failed_checks}Code - Pytest"$'\n' + fi + else + print_info "Running pytest (parallelism from pyproject.toml)..." + if python -m pytest -q --dist loadscope tests; then + print_success "Pytest passed" + else + print_error "Pytest failed" + failed=true + failed_checks="${failed_checks}Code - Pytest"$'\n' + fi + fi + fi + + if [ "$RUN_PYROMA" = true ] && [ "$ENABLE_PYROMA" = true ]; then + print_info "Running pyroma (packaging metadata)..." + if python -m pyroma .; then + print_success "Pyroma passed" + else + print_error "Pyroma failed" + failed=true + failed_checks="${failed_checks}Code - Pyroma"$'\n' + fi + fi + + if [ "$RUN_BANDIT" = true ] && [ "$ENABLE_BANDIT" = true ]; then + print_info "Running bandit..." + if python -m bandit -c pyproject.toml -r src -q; then + print_success "Bandit passed" + else + print_error "Bandit failed" + failed=true + failed_checks="${failed_checks}Code - Bandit"$'\n' + fi + fi + + if [ "$RUN_VULTURE" = true ] && [ "$ENABLE_VULTURE" = true ]; then + print_info "Running vulture..." + if python -m vulture src tests; then + print_success "Vulture passed" + else + print_error "Vulture failed" + failed=true + failed_checks="${failed_checks}Code - Vulture"$'\n' + fi + fi + + deactivate 2>/dev/null || true + + if [ "$failed" = true ]; then + [ -n "$status_file" ] && printf '%s' "$failed_checks" >> "$status_file" + return 1 + fi + return 0 +} + +# ---- Sphinx build only ---- +run_sphinx_build() { + local output_file="${1:-}" + local status_file="${2:-}" + + if [ -n "$output_file" ]; then + exec > "$output_file" 2>&1 + fi + + print_section "Sphinx Build" + + cd "$PROJECT_ROOT" || exit 1 + + if [ ! -f "$VENV/bin/activate" ]; then + print_error "Virtual environment not found at $VENV" + [ -n "$status_file" ] && echo "Sphinx - Virtual environment not found" >> "$status_file" + return 1 + fi + + # shellcheck source=/dev/null + source "$VENV/bin/activate" + + print_info "Building documentation (warnings treated as errors)..." + if (cd docs && make clean && make html SPHINXOPTS="-W"); then + print_success "Sphinx build passed" + deactivate 2>/dev/null || true + return 0 + else + print_error "Sphinx build failed" + [ -n "$status_file" ] && echo "Sphinx - Sphinx build" >> "$status_file" + deactivate 2>/dev/null || true + return 1 + fi +} + +# ---- Markdown lint only (PyMarkdown) ---- +run_markdown_checks() { + local output_file="${1:-}" + local status_file="${2:-}" + + if [ -n "$output_file" ]; then + exec > "$output_file" 2>&1 + fi + + print_section "Markdown Lint (PyMarkdown)" + + cd "$PROJECT_ROOT" || exit 1 + + if [ ! -f "$VENV/bin/activate" ]; then + print_error "Virtual environment not found at $VENV" + [ -n "$status_file" ] && echo "Markdown - Virtual environment not found" >> "$status_file" + return 1 + fi + + # shellcheck source=/dev/null + source "$VENV/bin/activate" + + print_info "Running PyMarkdown scan (docs/, .cursor/, root *.md)..." + local scan_paths=() + [ -d "docs/" ] && scan_paths+=("docs/") + [ -d ".cursor/" ] && scan_paths+=(".cursor/") + [ -f "README.md" ] && scan_paths+=("README.md") + [ -f "CONTRIBUTING.md" ] && scan_paths+=("CONTRIBUTING.md") + if [ ${#scan_paths[@]} -eq 0 ]; then + print_info "No Markdown files/directories found to scan" + deactivate 2>/dev/null || true + return 0 + fi + if python -m pymarkdown scan "${scan_paths[@]}"; then + print_success "PyMarkdown scan passed" + deactivate 2>/dev/null || true + return 0 + else + print_error "PyMarkdown scan failed" + [ -n "$status_file" ] && echo "Markdown - PyMarkdown scan" >> "$status_file" + deactivate 2>/dev/null || true + return 1 + fi +} + +# ---- Collect status from a status file into FAILED_CHECKS ---- +_collect_status() { + local status_file=$1 + if [ -f "$status_file" ]; then + while IFS= read -r line; do + [ -n "$line" ] && FAILED_CHECKS+=("$line") + done < "$status_file" + fi +} + +# ---- Run requested checks ---- +if [ "$PARALLEL" = true ]; then + print_info "Running requested checks in parallel, please wait..." + + pids=() + temp_files=() + status_files=() + + if _code_checks_any_scheduled; then + code_output="$TEMP_DIR/code.log" + code_status="$TEMP_DIR/code.status" + temp_files+=("$code_output") + status_files+=("$code_status") + run_code_checks "$code_output" "$code_status" & + pids+=($!) + fi + + if [ "$RUN_SPHINX" = true ] && [ "$ENABLE_SPHINX" = true ]; then + sphinx_output="$TEMP_DIR/sphinx.log" + sphinx_status="$TEMP_DIR/sphinx.status" + temp_files+=("$sphinx_output") + status_files+=("$sphinx_status") + run_sphinx_build "$sphinx_output" "$sphinx_status" & + pids+=($!) + fi + + if [ "$RUN_PYMARKDOWN" = true ] && [ "$ENABLE_PYMARKDOWN" = true ]; then + markdown_output="$TEMP_DIR/markdown.log" + markdown_status="$TEMP_DIR/markdown.status" + temp_files+=("$markdown_output") + status_files+=("$markdown_status") + run_markdown_checks "$markdown_output" "$markdown_status" & + pids+=($!) + fi + + # Wait for all jobs; any non-zero exit sets EXIT_CODE=1 + for pid in "${pids[@]}"; do + if ! wait "$pid"; then + EXIT_CODE=1 + fi + done + + # Collect named failures from status files + for status_file in "${status_files[@]}"; do + _collect_status "$status_file" + done + + # Safety net: if any status file had content, ensure EXIT_CODE reflects it + [ ${#FAILED_CHECKS[@]} -gt 0 ] && EXIT_CODE=1 + + # Print all outputs in a fixed order + echo "" + for log_file in "${temp_files[@]}"; do + [ -f "$log_file" ] && cat "$log_file" + done +else + # Sequential — pass a status file so FAILED_CHECKS is populated + if _code_checks_any_scheduled; then + code_status="$TEMP_DIR/code.status" + if ! run_code_checks "" "$code_status"; then + EXIT_CODE=1 + fi + _collect_status "$code_status" + fi + + if [ "$RUN_SPHINX" = true ] && [ "$ENABLE_SPHINX" = true ]; then + sphinx_status="$TEMP_DIR/sphinx.status" + if ! run_sphinx_build "" "$sphinx_status"; then + EXIT_CODE=1 + fi + _collect_status "$sphinx_status" + fi + + if [ "$RUN_PYMARKDOWN" = true ] && [ "$ENABLE_PYMARKDOWN" = true ]; then + markdown_status="$TEMP_DIR/markdown.status" + if ! run_markdown_checks "" "$markdown_status"; then + EXIT_CODE=1 + fi + _collect_status "$markdown_status" + fi +fi + +# ---- Summary ---- +END_TIME=$(date +%s) +ELAPSED=$((END_TIME - START_TIME)) +MINUTES=$((ELAPSED / 60)) +ELAPSED_SECONDS=$((ELAPSED % 60)) + +print_header "Summary" + +if [ "$EXIT_CODE" -eq 0 ]; then + print_success "All checks passed!" + echo -e "${GREEN}${BOLD}✓ SUCCESS${RESET} - All checks completed successfully" +else + print_error "Some checks failed:" + if [ ${#FAILED_CHECKS[@]} -eq 0 ]; then + echo -e " ${RED}✗${RESET} One or more checks failed (see output above)" + else + for check in "${FAILED_CHECKS[@]}"; do + echo -e " ${RED}✗${RESET} $check" + done + echo -e "${RED}${BOLD}✗ FAILURE${RESET} - ${#FAILED_CHECKS[@]} check(s) failed" + fi +fi + +echo "" +print_info "Total time: ${MINUTES}m ${ELAPSED_SECONDS}s" +echo "" + +exit "$EXIT_CODE" diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index da79b55..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[metadata] -name = rms-psfmodel diff --git a/src/characterize_gauss_fit/__init__.py b/src/characterize_gauss_fit/__init__.py new file mode 100644 index 0000000..7713746 --- /dev/null +++ b/src/characterize_gauss_fit/__init__.py @@ -0,0 +1,15 @@ +################################################################################ +# characterize_gauss_fit/__init__.py +################################################################################ + +"""Characterization tool for Gaussian PSF fitting accuracy. + +This package systematically explores the input parameter space of the Gaussian +PSF fitter and produces plots and tabular data showing how fitting accuracy +depends on subimage size, subpixel offset, PSF shape, parameter constraints, +background conditions, noise, and bad pixel rejection. + +Entry point: ``characterize_gauss_fit`` command or ``python -m characterize_gauss_fit``. +""" + +__all__: list[str] = [] diff --git a/src/characterize_gauss_fit/__main__.py b/src/characterize_gauss_fit/__main__.py new file mode 100644 index 0000000..77d3816 --- /dev/null +++ b/src/characterize_gauss_fit/__main__.py @@ -0,0 +1,10 @@ +################################################################################ +# characterize_gauss_fit/__main__.py +################################################################################ + +"""Entry point for ``python -m characterize_gauss_fit``.""" + +from characterize_gauss_fit.main import main + +if __name__ == '__main__': + main() diff --git a/src/characterize_gauss_fit/_study_utils.py b/src/characterize_gauss_fit/_study_utils.py new file mode 100644 index 0000000..a250045 --- /dev/null +++ b/src/characterize_gauss_fit/_study_utils.py @@ -0,0 +1,308 @@ +################################################################################ +# characterize_gauss_fit/_study_utils.py +################################################################################ + +"""Shared utilities for study modules in characterize_gauss_fit. + +Not part of the public API; prefixed with ``_`` to indicate internal use. +""" + +from __future__ import annotations + +import math +import pathlib +import sys +from typing import Any + +import numpy as np +import numpy.typing as npt + +from characterize_gauss_fit.config import FittingConfig +from characterize_gauss_fit.trial import ( + BACKGROUND_TYPE_NONE, + TrialResult, + TrialSpec, +) + + +def offset_tag(offset_y: float, offset_x: float) -> str: + """Return a filename-safe tag encoding a (offset_y, offset_x) pair. + + Decimal points are replaced with 'p' and negative signs with 'm', so + ``(0.25, 0.0)`` becomes ``'oy0p25_ox0p00'``. + + Parameters: + offset_y: Y component of the offset (fractional pixels). + offset_x: X component of the offset (fractional pixels). + + Returns: + A short ASCII string safe for use in file and directory names. + """ + + def _fmt(v: float) -> str: + return f'{v:.2f}'.replace('.', 'p').replace('-', 'm') + + return f'oy{_fmt(offset_y)}_ox{_fmt(offset_x)}' + + +def make_spec( + *, + sigma_y: float, + sigma_x: float, + angle: float, + offset_y: float, + offset_x: float, + scale: float, + base: float, + box_size: int, + fitting: FittingConfig, + fit_sigma_y: float | None = None, + fit_sigma_x: float | None = None, + fit_angle: float | None = None, + background_type: str = BACKGROUND_TYPE_NONE, + background_amplitude: float = 0.0, + noise_rms: float = 0.0, + hot_pixel_count: int = 0, + hot_pixel_amplitude: float = 0.0, + rng_seed: int = 0, +) -> TrialSpec: + """Construct a :class:`~trial.TrialSpec` from explicit keyword arguments. + + Fills in all fields, pulling fitting parameters from the provided + :class:`~config.FittingConfig`. Study modules call this helper to avoid + repeating the long field list. + + Parameters: + sigma_y: True PSF sigma in the Y direction (pixels). + sigma_x: True PSF sigma in the X direction (pixels). + angle: True PSF rotation angle (radians). + offset_y: Sub-pixel offset of the PSF centre from the image centre (Y). + offset_x: Sub-pixel offset of the PSF centre from the image centre (X). + scale: PSF amplitude scale factor. + base: Additive base level on the clean PSF image. + box_size: Side length of the square image patch (must be odd). + fitting: Fitting configuration for this trial. + fit_sigma_y: Fixed fitter sigma_y (``None`` = float during fitting). + fit_sigma_x: Fixed fitter sigma_x (``None`` = float during fitting). + fit_angle: Fixed fitter angle (``None`` = float during fitting). + background_type: Type of injected background. + background_amplitude: Background amplitude as fraction of PSF peak. + noise_rms: Additive Gaussian noise standard deviation. + hot_pixel_count: Number of hot pixels to inject. + hot_pixel_amplitude: Hot pixel amplitude as a multiple of PSF peak. + rng_seed: Seed for the NumPy random number generator. + + Returns: + A fully populated :class:`~trial.TrialSpec`. + """ + return TrialSpec( + sigma_y=sigma_y, + sigma_x=sigma_x, + angle=angle, + offset_y=offset_y, + offset_x=offset_x, + scale=scale, + base=base, + box_size=box_size, + fit_sigma_y=fit_sigma_y, + fit_sigma_x=fit_sigma_x, + fit_angle=fit_angle, + background_type=background_type, + background_amplitude=background_amplitude, + noise_rms=noise_rms, + hot_pixel_count=hot_pixel_count, + hot_pixel_amplitude=hot_pixel_amplitude, + bkgnd_degree=fitting.bkgnd_degree, + bkgnd_ignore_center=fitting.bkgnd_ignore_center, + bkgnd_num_sigma=fitting.bkgnd_num_sigma, + num_sigma=fitting.num_sigma, + max_bad_frac=fitting.max_bad_frac, + allow_nonzero_base=fitting.allow_nonzero_base, + use_angular_params=fitting.use_angular_params, + tolerance=fitting.tolerance, + search_limit=fitting.search_limit, + scale_limit=fitting.scale_limit, + rng_seed=rng_seed, + ) + + +def progress_callback(study_name: str) -> Any: + """Return a progress-printing callback for the executor. + + The returned callable prints a single updating line to stderr showing + completed / total trial counts. + + Parameters: + study_name: Name of the running study (included in the output line). + + Returns: + A ``Callable[[int, int], None]`` suitable for + :func:`~executor.run_trials`. + """ + + def _callback(completed: int, total: int) -> None: + if total > 0: + pct = 100 * completed // total + msg = f'\r {study_name}: {completed}/{total} trials ({pct}%) ' + else: + msg = f'\r {study_name}: {completed}/0 trials ' + print(msg, end='', flush=True, file=sys.stderr) + if completed == total: + print(file=sys.stderr) + + return _callback + + +def collect_metric( + results: list[TrialResult], + metric: str, +) -> npt.NDArray[np.float64]: + """Extract a named metric from a list of results as a float array. + + Unconverged trials produce NaN for all metrics. + + Parameters: + results: List of :class:`~trial.TrialResult` objects. + metric: Name of the :class:`~trial.TrialResult` field to extract. + + Returns: + A 1-D float64 array of length ``len(results)``. + + Raises: + AttributeError: If ``metric`` is not a field of :class:`~trial.TrialResult`. + """ + values = [ + float('nan') if (r_val := getattr(r, metric)) is None else float(r_val) for r in results + ] + return np.array(values, dtype=np.float64) + + +def safe_nanmean(arr: npt.NDArray[np.float64]) -> float: + """Return the nanmean of ``arr``, or NaN if all values are NaN. + + Parameters: + arr: Input float array. + + Returns: + The mean of finite values, or ``float('nan')`` if none are finite. + """ + finite = arr[np.isfinite(arr)] + if len(finite) == 0: + return float('nan') + return float(np.mean(finite)) + + +def safe_nanstd(arr: npt.NDArray[np.float64]) -> float: + """Return the nanstd (ddof=1) of ``arr``, or NaN if fewer than 2 finite values. + + Parameters: + arr: Input float array. + + Returns: + The standard deviation of finite values, or ``float('nan')`` if too few. + """ + finite = arr[np.isfinite(arr)] + if len(finite) < 2: + return float('nan') + return float(np.std(finite, ddof=1)) + + +def build_groups_by_keys( + specs: list[TrialSpec], + results: list[TrialResult], + key_funcs: list[Any], +) -> list[dict[str, Any]]: + """Group results by a tuple of key functions, building JSON-summary groups. + + Parameters: + specs: Ordered list of :class:`~trial.TrialSpec` objects. + results: Accepted for API symmetry with + :func:`~output.write_json_summary` but not used here; grouping + is based solely on ``specs`` and ``key_funcs``. + key_funcs: List of ``(label, callable)`` pairs where the callable + takes a :class:`~trial.TrialSpec` and returns the group key value. + + Returns: + A list of group dicts suitable for :func:`~output.write_json_summary`. + """ + from collections import defaultdict + + group_map: dict[tuple[Any, ...], list[int]] = defaultdict(list) + for idx, spec in enumerate(specs): + key = tuple(fn(spec) for _, fn in key_funcs) + group_map[key].append(idx) + + def _sort_key(item: tuple[tuple[Any, ...], list[int]]) -> tuple[Any, ...]: + """Convert a group key to a sortable tuple, replacing None with a sentinel.""" + return tuple((0, v) if v is not None else (1, '') for v in item[0]) + + groups: list[dict[str, Any]] = [] + for key, indices in sorted(group_map.items(), key=_sort_key): + g: dict[str, Any] = {} + for (label, _), val in zip(key_funcs, key, strict=False): + g[label] = val + g['indices'] = indices + groups.append(g) + + return groups + + +def ensure_study_dir(output_dir: pathlib.Path, study_name: str) -> pathlib.Path: + """Create and return the study output subdirectory. + + Parameters: + output_dir: Root output directory. + study_name: Study name (used as subdirectory name). + + Returns: + Path to the created subdirectory. + """ + study_dir = output_dir / study_name + study_dir.mkdir(parents=True, exist_ok=True) + return study_dir + + +def snr_to_noise_rms(snr: float, scale: float) -> float: + """Convert signal-to-noise ratio to noise RMS. + + Parameters: + snr: Signal-to-noise ratio (PSF peak / noise std). Must be positive. + scale: PSF amplitude scale factor (determines peak value). + + Returns: + Corresponding noise standard deviation (``scale / snr``). + + Raises: + ValueError: If ``snr`` is not positive. + """ + if snr <= 0: + raise ValueError(f'snr must be > 0, got {snr}') + return scale / snr + + +def recovery_fraction( + results: list[TrialResult], + *, + delta: float, +) -> float: + """Compute the fraction of trials where the position error is within delta/2. + + A trial is considered a "successful recovery" if the fitter converged and + the Euclidean position error is less than half the injected offset delta. + This metric is more informative than mean error alone for Study 3. + + Parameters: + results: List of :class:`~trial.TrialResult` objects. + delta: The injected offset magnitude used as the success threshold + denominator. + + Returns: + Fraction in [0, 1]. Returns ``float('nan')`` if ``results`` is empty. + """ + if len(results) == 0: + return float('nan') + threshold = delta / 2.0 + successes = sum( + 1 for r in results if r.converged and math.isfinite(r.pos_err) and r.pos_err < threshold + ) + return successes / len(results) diff --git a/src/characterize_gauss_fit/config.py b/src/characterize_gauss_fit/config.py new file mode 100644 index 0000000..1f5be25 --- /dev/null +++ b/src/characterize_gauss_fit/config.py @@ -0,0 +1,651 @@ +################################################################################ +# characterize_gauss_fit/config.py +################################################################################ + +"""YAML configuration loading and typed dataclasses for characterize_gauss_fit. + +Loads ``defaults.yaml`` from the package, deep-merges an optional user-supplied +override file, and exposes a typed ``Config`` object for use by study modules. +""" + +from __future__ import annotations + +import copy +import dataclasses +import importlib.resources +import math +import pathlib +from typing import Any + +import yaml + +# --------------------------------------------------------------------------- +# Typed dataclasses +# --------------------------------------------------------------------------- + + +@dataclasses.dataclass +class FittingConfig: + """Parameters forwarded to :meth:`~psfmodel.PSF.find_position` for every trial.""" + + bkgnd_degree: int | None + bkgnd_ignore_center: tuple[int, int] + bkgnd_num_sigma: float | None + num_sigma: float | None + max_bad_frac: float + allow_nonzero_base: bool + use_angular_params: bool + tolerance: float + search_limit: tuple[float, float] + scale_limit: float + + +@dataclasses.dataclass +class GenerationConfig: + """Parameters controlling synthetic PSF image generation.""" + + scale: float + base: float + + +@dataclasses.dataclass +class PsfShapeConfig: + """A single PSF shape specification for the constraint-modes study.""" + + sigma: tuple[float, float] + angle: float + + +@dataclasses.dataclass +class StudyBoxVsSigmaConfig: + """Configuration for Study 1: box size vs. sigma.""" + + enabled: bool + box_sizes: list[int] + sigmas: list[float] + offsets: list[tuple[float, float]] + angle: float + scale: float + fitting: FittingConfig + + +@dataclasses.dataclass +class StudySubpixelOffsetConfig: + """Configuration for Study 2: subpixel offset bias.""" + + enabled: bool + offset_steps: int + offset_range: tuple[float, float] + sigmas: list[float] + box_size: int + angle: float + fitting: FittingConfig + + +@dataclasses.dataclass +class StudyMinDetectableOffsetConfig: + """Configuration for Study 3: minimum detectable offset.""" + + enabled: bool + delta_offsets: list[float] + sigmas: list[float] + box_size: int + noise_samples: int + snr_values: list[float] + include_noiseless: bool + fitting: FittingConfig + + +@dataclasses.dataclass +class StudySigmaAsymmetryAngleConfig: + """Configuration for Study 4: sigma asymmetry and angle recovery.""" + + enabled: bool + sigma_ratios: list[float] + angle_steps: int + sigma_x_values: list[float] + box_size: int + offset: tuple[float, float] + fitting: FittingConfig + + +@dataclasses.dataclass +class StudyConstraintModesConfig: + """Configuration for Study 5: constraint modes.""" + + enabled: bool + sigma_error_fractions: list[float] + angle_error_rad: float + psf_shapes: list[PsfShapeConfig] + box_size: int + offset: tuple[float, float] + scale: float + fitting: FittingConfig + + +@dataclasses.dataclass +class StudyBackgroundConfig: + """Configuration for Study 6: background conditions.""" + + enabled: bool + background_amplitudes: list[float] + bkgnd_degrees: list[int] + bkgnd_degrees_with_null: bool + bkgnd_ignore_centers: list[tuple[int, int]] + background_types: list[str] + box_size: int + sigma: tuple[float, float] + offsets: list[tuple[float, float]] + fitting: FittingConfig + + +@dataclasses.dataclass +class StudyNoiseSensitivityConfig: + """Configuration for Study 7: noise sensitivity.""" + + enabled: bool + snr_log_range: tuple[float, float] + snr_steps: int + sigmas: list[float] + noise_samples: int + box_size: int + fitting: FittingConfig + + +@dataclasses.dataclass +class StudyHotPixelRejectionConfig: + """Configuration for Study 8: hot pixel rejection.""" + + enabled: bool + num_hot_pixels: list[int] + num_sigma_values: list[float] + num_sigma_with_null: bool + hot_amplitudes: list[float] + noise_samples: int + snr: float + box_size: int + sigma: tuple[float, float] + offset: tuple[float, float] + fitting: FittingConfig + + +@dataclasses.dataclass +class StudiesConfig: + """Container for all per-study configurations.""" + + box_vs_sigma: StudyBoxVsSigmaConfig + subpixel_offset: StudySubpixelOffsetConfig + min_detectable_offset: StudyMinDetectableOffsetConfig + sigma_asymmetry_angle: StudySigmaAsymmetryAngleConfig + constraint_modes: StudyConstraintModesConfig + background: StudyBackgroundConfig + noise_sensitivity: StudyNoiseSensitivityConfig + hot_pixel_rejection: StudyHotPixelRejectionConfig + + +@dataclasses.dataclass +class Config: + """Top-level configuration object for the entire run.""" + + output_dir: pathlib.Path + num_workers: int + noise_samples: int + fitting: FittingConfig + generation: GenerationConfig + studies: StudiesConfig + + +# --------------------------------------------------------------------------- +# YAML loading and merging +# --------------------------------------------------------------------------- + +STUDY_NAMES: list[str] = [ + 'box_vs_sigma', + 'subpixel_offset', + 'min_detectable_offset', + 'sigma_asymmetry_angle', + 'constraint_modes', + 'background', + 'noise_sensitivity', + 'hot_pixel_rejection', +] + + +def _deep_merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]: + """Return a new dict that is ``base`` deep-merged with ``override``. + + Scalar values and lists in ``override`` replace those in ``base`` entirely. + Nested dicts are merged recursively. + + Parameters: + base: The base dictionary (from defaults.yaml). + override: The user-supplied override dictionary. + + Returns: + A new merged dictionary. Neither input is mutated. + """ + result = copy.deepcopy(base) + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = copy.deepcopy(value) + return result + + +def _load_raw(path: pathlib.Path | None) -> dict[str, Any]: + """Load and deep-merge defaults.yaml with an optional user override file. + + Parameters: + path: Optional path to a user-supplied YAML override file. Pass ``None`` + to use only the built-in defaults. + + Returns: + A merged dictionary of all configuration values. + + Raises: + FileNotFoundError: If ``path`` is provided but does not exist. + ValueError: If the YAML file cannot be parsed. + """ + pkg_ref = importlib.resources.files('characterize_gauss_fit').joinpath('defaults.yaml') + with ( + importlib.resources.as_file(pkg_ref) as defaults_path, + defaults_path.open('r', encoding='utf-8') as fh, + ): + try: + raw: dict[str, Any] = yaml.safe_load(fh) + except yaml.YAMLError as exc: + raise ValueError(f'Failed to parse defaults.yaml: {exc}') from exc + if not isinstance(raw, dict): + raise ValueError(f'defaults.yaml must contain a mapping, got {type(raw)!r}') + + if path is not None: + if not path.exists(): + raise FileNotFoundError(f'Config file not found: {path}') + with path.open('r', encoding='utf-8') as fh: + try: + user_raw_loaded: dict[str, Any] | None = yaml.safe_load(fh) + except yaml.YAMLError as exc: + raise ValueError(f'Failed to parse config file {path}: {exc}') from exc + if user_raw_loaded is None: + user_raw: dict[str, Any] = {} + elif not isinstance(user_raw_loaded, dict): + raise ValueError( + f'Config file {path} must contain a mapping, got {type(user_raw_loaded)!r}' + ) + else: + user_raw = user_raw_loaded + raw = _deep_merge(raw, user_raw) + + return raw + + +def _parse_fitting(raw: dict[str, Any]) -> FittingConfig: + """Parse a fitting-section dict into a :class:`FittingConfig`. + + Parameters: + raw: Dict from the ``fitting`` YAML section. + + Returns: + A populated :class:`FittingConfig`. + + Raises: + ValueError: If required keys are missing or values are out of range. + KeyError: If a required YAML key is absent from ``raw``. + """ + bkgnd_ic = raw['bkgnd_ignore_center'] + search_lim = raw['search_limit'] + return FittingConfig( + bkgnd_degree=raw['bkgnd_degree'], + bkgnd_ignore_center=(int(bkgnd_ic[0]), int(bkgnd_ic[1])), + bkgnd_num_sigma=raw['bkgnd_num_sigma'], + num_sigma=raw['num_sigma'], + max_bad_frac=float(raw['max_bad_frac']), + allow_nonzero_base=bool(raw['allow_nonzero_base']), + use_angular_params=bool(raw['use_angular_params']), + tolerance=float(raw['tolerance']), + search_limit=(float(search_lim[0]), float(search_lim[1])), + scale_limit=float(raw['scale_limit']), + ) + + +def _resolve_fitting(global_fitting: dict[str, Any], study_raw: dict[str, Any]) -> FittingConfig: + """Merge the global fitting defaults with optional per-study overrides. + + Parameters: + global_fitting: The top-level ``fitting`` section dict. + study_raw: The study-specific dict (may contain a ``fitting`` sub-dict). + + Returns: + A :class:`FittingConfig` reflecting any per-study overrides. + """ + if 'fitting' in study_raw: + merged = _deep_merge(global_fitting, study_raw['fitting']) + else: + merged = global_fitting + return _parse_fitting(merged) + + +def _parse_two_floats(value: list[Any]) -> tuple[float, float]: + """Parse a two-element list into a tuple of floats. + + Parameters: + value: A list with exactly two numeric elements. + + Returns: + A ``(float, float)`` tuple. + + Raises: + ValueError: If ``value`` does not have exactly two elements. + """ + if len(value) != 2: + raise ValueError(f'Expected a list of 2 values, got {len(value)}') + return (float(value[0]), float(value[1])) + + +def _parse_two_ints(value: list[Any]) -> tuple[int, int]: + """Parse a two-element list into a tuple of ints. + + Parameters: + value: A list with exactly two numeric elements. + + Returns: + An ``(int, int)`` tuple. + + Raises: + ValueError: If ``value`` does not have exactly two elements. + """ + if len(value) != 2: + raise ValueError(f'Expected a list of 2 values, got {len(value)}') + return (int(value[0]), int(value[1])) + + +def _build_config(raw: dict[str, Any]) -> Config: + """Convert a merged raw dict into a fully typed :class:`Config`. + + Parameters: + raw: Merged configuration dictionary. + + Returns: + A populated :class:`Config` instance. + + Raises: + ValueError: If any required field is missing or invalid. + KeyError: If a required YAML key is absent. + """ + global_fitting = raw['fitting'] + gen = raw['generation'] + studies = raw['studies'] + + def study_fitting(name: str) -> FittingConfig: + return _resolve_fitting(global_fitting, studies[name]) + + # Study 4: build angle list from steps + sa_raw = studies['sigma_asymmetry_angle'] + angle_steps: int = int(sa_raw['angle_steps']) + + # Study 6: parse list of ignore-center pairs + bg_raw = studies['background'] + ignore_centers: list[tuple[int, int]] = [ + _parse_two_ints(ic) for ic in bg_raw['bkgnd_ignore_centers'] + ] + + # Study 5: parse psf_shapes list + cm_raw = studies['constraint_modes'] + psf_shapes: list[PsfShapeConfig] = [ + PsfShapeConfig( + sigma=_parse_two_floats(s['sigma']), + angle=float(s['angle']), + ) + for s in cm_raw['psf_shapes'] + ] + + return Config( + output_dir=pathlib.Path(raw['output_dir']), + num_workers=int(raw['num_workers']), + noise_samples=int(raw['noise_samples']), + fitting=_parse_fitting(global_fitting), + generation=GenerationConfig( + scale=float(gen['scale']), + base=float(gen['base']), + ), + studies=StudiesConfig( + box_vs_sigma=StudyBoxVsSigmaConfig( + enabled=bool(studies['box_vs_sigma']['enabled']), + box_sizes=[int(x) for x in studies['box_vs_sigma']['box_sizes']], + sigmas=[float(x) for x in studies['box_vs_sigma']['sigmas']], + offsets=[_parse_two_floats(o) for o in studies['box_vs_sigma']['offsets']], + angle=float(studies['box_vs_sigma']['angle']), + scale=float(studies['box_vs_sigma']['scale']), + fitting=study_fitting('box_vs_sigma'), + ), + subpixel_offset=StudySubpixelOffsetConfig( + enabled=bool(studies['subpixel_offset']['enabled']), + offset_steps=int(studies['subpixel_offset']['offset_steps']), + offset_range=_parse_two_floats(studies['subpixel_offset']['offset_range']), + sigmas=[float(x) for x in studies['subpixel_offset']['sigmas']], + box_size=int(studies['subpixel_offset']['box_size']), + angle=float(studies['subpixel_offset']['angle']), + fitting=study_fitting('subpixel_offset'), + ), + min_detectable_offset=StudyMinDetectableOffsetConfig( + enabled=bool(studies['min_detectable_offset']['enabled']), + delta_offsets=[float(x) for x in studies['min_detectable_offset']['delta_offsets']], + sigmas=[float(x) for x in studies['min_detectable_offset']['sigmas']], + box_size=int(studies['min_detectable_offset']['box_size']), + noise_samples=int(studies['min_detectable_offset']['noise_samples']), + snr_values=[float(x) for x in studies['min_detectable_offset']['snr_values']], + include_noiseless=bool(studies['min_detectable_offset']['include_noiseless']), + fitting=study_fitting('min_detectable_offset'), + ), + sigma_asymmetry_angle=StudySigmaAsymmetryAngleConfig( + enabled=bool(sa_raw['enabled']), + sigma_ratios=[float(x) for x in sa_raw['sigma_ratios']], + angle_steps=angle_steps, + sigma_x_values=[float(x) for x in sa_raw['sigma_x_values']], + box_size=int(sa_raw['box_size']), + offset=_parse_two_floats(sa_raw['offset']), + fitting=study_fitting('sigma_asymmetry_angle'), + ), + constraint_modes=StudyConstraintModesConfig( + enabled=bool(cm_raw['enabled']), + sigma_error_fractions=[float(x) for x in cm_raw['sigma_error_fractions']], + angle_error_rad=float(cm_raw['angle_error_rad']), + psf_shapes=psf_shapes, + box_size=int(cm_raw['box_size']), + offset=_parse_two_floats(cm_raw['offset']), + scale=float(cm_raw['scale']), + fitting=study_fitting('constraint_modes'), + ), + background=StudyBackgroundConfig( + enabled=bool(bg_raw['enabled']), + background_amplitudes=[float(x) for x in bg_raw['background_amplitudes']], + bkgnd_degrees=[int(x) for x in bg_raw['bkgnd_degrees']], + bkgnd_degrees_with_null=bool(bg_raw['bkgnd_degrees_with_null']), + bkgnd_ignore_centers=ignore_centers, + background_types=[str(x) for x in bg_raw['background_types']], + box_size=int(bg_raw['box_size']), + sigma=_parse_two_floats(bg_raw['sigma']), + offsets=[_parse_two_floats(o) for o in bg_raw['offsets']], + fitting=study_fitting('background'), + ), + noise_sensitivity=StudyNoiseSensitivityConfig( + enabled=bool(studies['noise_sensitivity']['enabled']), + snr_log_range=_parse_two_floats(studies['noise_sensitivity']['snr_log_range']), + snr_steps=int(studies['noise_sensitivity']['snr_steps']), + sigmas=[float(x) for x in studies['noise_sensitivity']['sigmas']], + noise_samples=int(studies['noise_sensitivity']['noise_samples']), + box_size=int(studies['noise_sensitivity']['box_size']), + fitting=study_fitting('noise_sensitivity'), + ), + hot_pixel_rejection=StudyHotPixelRejectionConfig( + enabled=bool(studies['hot_pixel_rejection']['enabled']), + num_hot_pixels=[int(x) for x in studies['hot_pixel_rejection']['num_hot_pixels']], + num_sigma_values=[ + float(x) for x in studies['hot_pixel_rejection']['num_sigma_values'] + ], + num_sigma_with_null=bool(studies['hot_pixel_rejection']['num_sigma_with_null']), + hot_amplitudes=[float(x) for x in studies['hot_pixel_rejection']['hot_amplitudes']], + noise_samples=int(studies['hot_pixel_rejection']['noise_samples']), + snr=float(studies['hot_pixel_rejection']['snr']), + box_size=int(studies['hot_pixel_rejection']['box_size']), + sigma=_parse_two_floats(studies['hot_pixel_rejection']['sigma']), + offset=_parse_two_floats(studies['hot_pixel_rejection']['offset']), + fitting=study_fitting('hot_pixel_rejection'), + ), + ), + ) + + +def load_config( + path: pathlib.Path | None = None, + *, + output_dir: pathlib.Path | None = None, + num_workers: int | None = None, +) -> Config: + """Load configuration from defaults and an optional user override file. + + Loads the built-in ``defaults.yaml``, deep-merges the user file if provided, + then applies any CLI-level overrides (``output_dir``, ``num_workers``). + + Parameters: + path: Optional path to a user YAML override file. + output_dir: If given, overrides the ``output_dir`` from YAML. + num_workers: If given, overrides ``num_workers`` from YAML. + + Returns: + A fully validated :class:`Config` instance. + + Raises: + FileNotFoundError: If ``path`` is provided but does not exist. + ValueError: If the configuration is invalid. + KeyError: If a required YAML key is missing. + """ + raw = _load_raw(path) + if output_dir is not None: + raw['output_dir'] = str(output_dir) + if num_workers is not None: + raw['num_workers'] = num_workers + cfg = _build_config(raw) + _validate_config(cfg) + return cfg + + +def _validate_config(cfg: Config) -> None: + """Raise :class:`ValueError` if the config contains invalid values. + + Parameters: + cfg: A :class:`Config` instance to validate. + + Raises: + ValueError: If any value is out of range or logically inconsistent. + """ + if cfg.num_workers < 1: + raise ValueError(f'num_workers must be >= 1, got {cfg.num_workers}') + if cfg.noise_samples < 1: + raise ValueError(f'noise_samples must be >= 1, got {cfg.noise_samples}') + + bvs = cfg.studies.box_vs_sigma + for bs in bvs.box_sizes: + if bs < 5 or bs % 2 == 0: + raise ValueError( + f'box_vs_sigma.box_sizes: each entry must be an odd integer >= 5, got {bs}' + ) + if len(bvs.offsets) == 0: + raise ValueError('box_vs_sigma.offsets must contain at least one entry') + + def _check_box_size(field: str, bs: int) -> None: + if bs < 5 or bs % 2 == 0: + raise ValueError(f'{field}: box_size must be an odd integer >= 5, got {bs}') + + _check_box_size('subpixel_offset.box_size', cfg.studies.subpixel_offset.box_size) + _check_box_size('min_detectable_offset.box_size', cfg.studies.min_detectable_offset.box_size) + _check_box_size('sigma_asymmetry_angle.box_size', cfg.studies.sigma_asymmetry_angle.box_size) + _check_box_size('constraint_modes.box_size', cfg.studies.constraint_modes.box_size) + _check_box_size('background.box_size', cfg.studies.background.box_size) + _check_box_size('noise_sensitivity.box_size', cfg.studies.noise_sensitivity.box_size) + _check_box_size('hot_pixel_rejection.box_size', cfg.studies.hot_pixel_rejection.box_size) + + mdo = cfg.studies.min_detectable_offset + for delta in mdo.delta_offsets: + if delta <= 0: + raise ValueError(f'min_detectable_offset.delta_offsets: must be positive, got {delta}') + + ns = cfg.studies.noise_sensitivity + lo, hi = ns.snr_log_range + if lo >= hi: + raise ValueError(f'noise_sensitivity.snr_log_range: min ({lo}) must be < max ({hi})') + if ns.snr_steps < 1: + raise ValueError(f'noise_sensitivity.snr_steps must be >= 1, got {ns.snr_steps}') + + sa = cfg.studies.sigma_asymmetry_angle + if sa.angle_steps < 2: + raise ValueError(f'angle_steps must be >= 2, got {sa.angle_steps}') + for ratio in sa.sigma_ratios: + if ratio <= 0: + raise ValueError(f'sigma_asymmetry_angle.sigma_ratios: must be positive, got {ratio}') + + cm = cfg.studies.constraint_modes + if len(cm.psf_shapes) == 0: + raise ValueError('constraint_modes.psf_shapes must contain at least one entry') + for shape in cm.psf_shapes: + if shape.sigma[0] <= 0 or shape.sigma[1] <= 0: + raise ValueError( + f'constraint_modes.psf_shapes: sigma values must be positive, got {shape.sigma}' + ) + if not (0.0 <= shape.angle <= math.pi): + raise ValueError( + f'constraint_modes.psf_shapes: angle must be in [0, pi], got {shape.angle}' + ) + + def _validate_fitting_config(prefix: str, fc: FittingConfig) -> None: + if not (0.0 <= fc.max_bad_frac <= 1.0): + raise ValueError(f'{prefix}.max_bad_frac must be in [0, 1], got {fc.max_bad_frac}') + if fc.num_sigma is not None and fc.num_sigma <= 0: + raise ValueError(f'{prefix}.num_sigma must be None or > 0, got {fc.num_sigma}') + if fc.bkgnd_num_sigma is not None and fc.bkgnd_num_sigma <= 0: + raise ValueError( + f'{prefix}.bkgnd_num_sigma must be None or > 0, got {fc.bkgnd_num_sigma}' + ) + if fc.search_limit[0] < 0 or fc.search_limit[1] < 0: + raise ValueError(f'{prefix}.search_limit values must be >= 0, got {fc.search_limit}') + if fc.scale_limit < 0: + raise ValueError(f'{prefix}.scale_limit must be >= 0, got {fc.scale_limit}') + + _validate_fitting_config('box_vs_sigma.fitting', cfg.studies.box_vs_sigma.fitting) + _validate_fitting_config('subpixel_offset.fitting', cfg.studies.subpixel_offset.fitting) + _validate_fitting_config( + 'min_detectable_offset.fitting', cfg.studies.min_detectable_offset.fitting + ) + _validate_fitting_config( + 'sigma_asymmetry_angle.fitting', cfg.studies.sigma_asymmetry_angle.fitting + ) + _validate_fitting_config('constraint_modes.fitting', cm.fitting) + _validate_fitting_config('background.fitting', cfg.studies.background.fitting) + _validate_fitting_config('noise_sensitivity.fitting', cfg.studies.noise_sensitivity.fitting) + _validate_fitting_config('hot_pixel_rejection.fitting', cfg.studies.hot_pixel_rejection.fitting) + + valid_bkgnd_types = {'none', 'constant', 'linear', 'quadratic', 'noisy_constant'} + for bt in cfg.studies.background.background_types: + if bt not in valid_bkgnd_types: + raise ValueError( + f'background.background_types: unknown type "{bt}". ' + f'Valid options: {sorted(valid_bkgnd_types)}' + ) + if len(cfg.studies.background.offsets) == 0: + raise ValueError('background.offsets must contain at least one entry') + + +def config_to_dict(cfg: Config) -> dict[str, Any]: + """Convert a :class:`Config` to a plain dict suitable for JSON serialisation. + + Parameters: + cfg: A :class:`Config` instance. + + Returns: + A dict representation with all Path objects converted to strings. + """ + raw = dataclasses.asdict(cfg) + # Convert Path to string for JSON serialisability. + raw['output_dir'] = str(cfg.output_dir) + return raw diff --git a/src/characterize_gauss_fit/defaults.yaml b/src/characterize_gauss_fit/defaults.yaml new file mode 100644 index 0000000..1c250cc --- /dev/null +++ b/src/characterize_gauss_fit/defaults.yaml @@ -0,0 +1,211 @@ +# defaults.yaml -- Default configuration for characterize_gauss_fit. +# +# All values in this file can be overridden by a user-supplied YAML file +# passed via --config on the command line. Lists replace entirely (not append). +# Per-study fitting overrides: add a "fitting" subsection inside any study to +# override the global fitting defaults for that study only. + +# Directory where all output files (plots, CSV, JSON) are written. +output_dir: ./gauss_fit_results + +# Number of parallel worker processes. 1 = sequential (default). +# Values > 1 use concurrent.futures.ProcessPoolExecutor. +num_workers: 1 + +# Default number of noise realizations for stochastic studies. +# Individual study sections may override this. +noise_samples: 50 + +# --------------------------------------------------------------------------- +# Global fitting defaults (passed to find_position for every trial). +# Any study section may include a "fitting" subsection to override these. +# --------------------------------------------------------------------------- +fitting: + # Polynomial degree for background fitting (null = no background subtraction). + bkgnd_degree: 2 + # Half-size of the central region excluded from background fitting [ny, nx]. + bkgnd_ignore_center: [2, 2] + # Sigma-clipping threshold for background residuals (null = disabled). + bkgnd_num_sigma: null + # Sigma-clipping threshold for PSF residuals to reject bad pixels (null = disabled). + num_sigma: null + # Maximum fraction of pixels that can be masked before fit is abandoned. + max_bad_frac: 0.2 + # Whether to fit a non-zero constant base level in addition to the polynomial background. + allow_nonzero_base: false + # Whether to reparameterize fit variables as angles in [0, pi]. + use_angular_params: true + # Powell optimizer convergence tolerance. + tolerance: 1.0e-6 + # Maximum allowed position offset from starting point [y_limit, x_limit] in pixels. + search_limit: [1.5, 1.5] + # Maximum allowed PSF amplitude scale factor. + scale_limit: 1000.0 + +# --------------------------------------------------------------------------- +# Default PSF generation parameters used when a study does not override them. +# --------------------------------------------------------------------------- +generation: + # Amplitude multiplier applied to the normalized Gaussian pixel integrals. + scale: 1.5 + # Additive base level added to the clean PSF image before background/noise injection. + base: 0.0 + +# --------------------------------------------------------------------------- +# Per-study configuration. Each study can be disabled with "enabled: false". +# --------------------------------------------------------------------------- +studies: + + # Study 1: How box size relative to PSF sigma affects fitting accuracy. + box_vs_sigma: + enabled: true + # Odd box sizes to test. Minimum 5. + box_sizes: [5, 7, 9, 11, 13, 17, 21, 25, 31] + # Symmetric PSF sigma values (pixels) to test. + sigmas: [0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 3.0, 5.0] + # List of [offset_y, offset_x] pairs (fractional pixel offsets from pixel centre). + # Each entry produces a separate set of heatmap plots labelled by its offset. + # Values in [0, 0.5]: 0.0 = exactly centred, 0.5 = on the pixel boundary. + offsets: + - [0.0, 0.0] + - [0.25, 0.25] + - [0.5, 0.0] + - [0.0, 0.5] + - [0.5, 0.5] + # PSF rotation angle (radians). 0 = axis-aligned. + angle: 0.0 + # PSF amplitude scale factor for this study. + scale: 1.0 + fitting: + bkgnd_degree: null + + # Study 2: How subpixel centering offset biases position recovery. + subpixel_offset: + enabled: true + # Number of evenly-spaced offset steps in each axis over offset_range. + offset_steps: 11 + # Range [min, max] of offset values tested. Values outside [0, 0.5] are + # redundant by symmetry. + offset_range: [0.0, 0.5] + # Sigma values to use as panel variable. + sigmas: [0.5, 1.0, 2.0] + box_size: 21 + angle: 0.0 + fitting: + bkgnd_degree: null + + # Study 3: Minimum detectable position offset vs. PSF sigma and SNR. + min_detectable_offset: + enabled: true + # Offset deltas to test (pixels). Covers sub-pixel to multi-pixel range. + delta_offsets: [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5] + # Symmetric sigma values to test. + sigmas: [0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 3.0] + box_size: 21 + # Noise realizations per (delta, sigma, snr) combination. Noiseless uses 1. + noise_samples: 50 + # SNR values (peak / noise_rms) for stochastic conditions. + snr_values: [50.0, 100.0, 500.0] + # If true, also run a noiseless trial (numerical precision floor). + include_noiseless: true + fitting: + bkgnd_degree: null + + # Study 4: How sigma asymmetry and rotation angle affect recovery. + sigma_asymmetry_angle: + enabled: true + # Ratios sigma_y / sigma_x to test. + sigma_ratios: [0.25, 0.5, 0.75, 1.0, 1.5, 2.0, 4.0] + # Number of evenly-spaced angle steps from 0 to pi (inclusive). + angle_steps: 13 + # sigma_x values used as panel variable. + sigma_x_values: [0.5, 1.0, 2.0] + box_size: 25 + offset: [0.25, 0.25] + fitting: + bkgnd_degree: null + + # Study 5: How fixing/floating PSF parameters affects all output metrics. + constraint_modes: + enabled: true + # Fractional errors applied to sigma when sigma is fixed incorrectly. + # 0.0 = fixed at correct value; 0.2 = fixed at 1.2x true value; etc. + sigma_error_fractions: [0.0, 0.2, 0.5] + # Absolute angle error (radians) when angle is fixed incorrectly. + angle_error_rad: 0.3 + # PSF shapes to test as [sigma_y, sigma_x] + angle (radians). + psf_shapes: + - sigma: [1.0, 1.0] + angle: 0.0 + - sigma: [0.5, 1.5] + angle: 0.7854 # approximately pi/4 + - sigma: [1.0, 2.0] + angle: 1.0472 # approximately pi/3 + box_size: 21 + offset: [0.25, 0.25] + scale: 1.5 + + # Study 6: How injected background and fitting model choice affect accuracy. + background: + enabled: true + # Amplitude of injected background as a fraction of the PSF peak. + background_amplitudes: [0.01, 0.1, 0.5] + # Polynomial degrees used when fitting the background. + # The study iterates over these values, overriding fitting.bkgnd_degree for + # each run. When bkgnd_degrees_with_null is true the null (no background + # fitting) case is prepended so every degree in this list is compared against + # the no-background-model baseline. + bkgnd_degrees: [0, 1, 2] + # If true, also test bkgnd_degree=null (no background fitting). + bkgnd_degrees_with_null: true + # ignore_center sizes [ny, nx] to test. + bkgnd_ignore_centers: [[1, 1], [2, 2], [4, 4]] + # Background types to inject. Options: none, constant, linear, quadratic, + # noisy_constant. + background_types: [none, constant, linear, quadratic, noisy_constant] + box_size: 21 + sigma: [1.0, 1.0] + # List of [offset_y, offset_x] pairs (fractional pixel offsets from pixel centre). + # Each entry produces a separate set of heatmap plots labelled by its offset. + offsets: + - [0.0, 0.0] + - [0.25, 0.25] + - [0.5, 0.5] + + # Study 7: How SNR affects position, sigma, and scale recovery accuracy. + noise_sensitivity: + enabled: true + # Log10 range [min, max] for SNR (peak / noise_rms). + snr_log_range: [0.5, 3.5] + # Number of log-spaced SNR points. + snr_steps: 15 + # Sigma values used as panel variable. + sigmas: [0.5, 1.0, 2.0] + # Noise realizations per (snr, sigma) combination. + noise_samples: 50 + box_size: 21 + # Note: offsets are randomized uniformly in [-0.5, 0.5] per trial to + # avoid bias from a fixed sub-pixel position. Angle is always 0.0 + # (axis-aligned PSF) in this study. + + # Study 8: How num_sigma rejection handles hot pixels at various contamination levels. + hot_pixel_rejection: + enabled: true + # Numbers of hot pixels to inject. + num_hot_pixels: [0, 1, 3, 5, 10] + # num_sigma thresholds for PSF-stage bad pixel rejection. + # These values override fitting.num_sigma for the PSF-fit bad-pixel + # rejection stage. The study iterates over num_sigma_values (and over + # null when num_sigma_with_null is true) to test rejection aggressiveness. + num_sigma_values: [3.0, 4.0, 5.0, 6.0] + # If true, also test with num_sigma=null (rejection disabled). + num_sigma_with_null: true + # Hot pixel amplitudes as multiples of the PSF peak. + hot_amplitudes: [5.0, 20.0, 100.0] + # Noise realizations per combination (hot pixel positions randomized). + noise_samples: 20 + # SNR of background Gaussian noise (peak / noise_rms). + snr: 100.0 + box_size: 21 + sigma: [1.0, 1.0] + offset: [0.25, 0.25] diff --git a/src/characterize_gauss_fit/executor.py b/src/characterize_gauss_fit/executor.py new file mode 100644 index 0000000..2d20a00 --- /dev/null +++ b/src/characterize_gauss_fit/executor.py @@ -0,0 +1,176 @@ +################################################################################ +# characterize_gauss_fit/executor.py +################################################################################ + +"""Sequential and multiprocess trial dispatch for characterize_gauss_fit. + +Provides :func:`run_trials`, which executes a list of :class:`~trial.TrialSpec` +objects either sequentially in the calling process or in parallel using +:class:`concurrent.futures.ProcessPoolExecutor`. + +Worker isolation: :class:`~psfmodel.gaussian.GaussianPSF` objects are +constructed inside each worker from the plain-data fields of +:class:`~trial.TrialSpec`. No complex objects cross process boundaries. + +Chunking: to reduce IPC and pickle overhead, specs are grouped into batches +before dispatch. Each worker executes a full batch per ``submit()`` call. +The batch size is ``ceil(total / (num_workers * _CHUNK_MULTIPLIER))``. A +multiplier of 4 gives 4x over-subscription, balancing load across workers +while keeping the number of round-trips low. +""" + +from __future__ import annotations + +import concurrent.futures +import logging +import math +import multiprocessing +from collections.abc import Callable + +from characterize_gauss_fit.trial import TrialResult, TrialSpec, run_trial + +# Number of chunks per worker. Higher values improve load balancing at the +# cost of more (but still far fewer than one-per-trial) IPC round-trips. +_CHUNK_MULTIPLIER = 4 + +_LOG = logging.getLogger(__name__) + + +def _safe_run_trial(spec: TrialSpec) -> TrialResult: + """Run a single trial, converting any exception into a failed result. + + This wrapper is used by worker processes so that an unexpected exception + in one trial does not crash the entire pool. + + Parameters: + spec: The trial to execute. + + Returns: + A :class:`~trial.TrialResult` with ``converged=False`` and NaN errors + if any exception occurs, otherwise the normal result. + """ + try: + return run_trial(spec) + except Exception: + _LOG.exception( + 'Unexpected error in trial (sigma_y=%s, box_size=%s)', + spec.sigma_y, + spec.box_size, + ) + return TrialResult( + converged=False, + sigma_y_true=spec.sigma_y, + sigma_x_true=spec.sigma_x, + angle_true=spec.angle, + scale_true=spec.scale, + offset_y_true=spec.offset_y, + offset_x_true=spec.offset_x, + pos_err_y=float('nan'), + pos_err_x=float('nan'), + pos_err=float('nan'), + sigma_y_fit=None, + sigma_x_fit=None, + angle_fit=None, + scale_fit=float('nan'), + sigma_y_err=None, + sigma_x_err=None, + angle_err=None, + scale_err=float('nan'), + ) + + +def _run_trial_batch(specs: list[TrialSpec]) -> list[TrialResult]: + """Execute a batch of trials inside a single worker call. + + Running multiple trials per ``submit()`` call amortises the per-call + pickle and IPC overhead across the whole batch, which dramatically + reduces the per-trial overhead compared to submitting one trial at a time. + + Parameters: + specs: Ordered list of :class:`~trial.TrialSpec` objects to execute. + + Returns: + Results in the same order as ``specs``. + """ + return [_safe_run_trial(spec) for spec in specs] + + +def run_trials( + trial_specs: list[TrialSpec], + *, + num_workers: int = 1, + progress_callback: Callable[[int, int], None] | None = None, +) -> list[TrialResult]: + """Execute a list of trials sequentially or in parallel. + + Parameters: + trial_specs: List of :class:`~trial.TrialSpec` objects to execute. + num_workers: Number of parallel worker processes. ``1`` runs all trials + sequentially in the calling process with no multiprocessing overhead. + Values ``>1`` use :class:`concurrent.futures.ProcessPoolExecutor` + with the ``spawn`` start method to avoid fork-related crashes. + Trials are grouped into chunks of size + ``ceil(total / (num_workers * _CHUNK_MULTIPLIER))`` so that each + worker executes many trials per ``submit()`` call, reducing IPC and + pickle overhead and making speedup more linear with CPU count. + progress_callback: Optional callable ``(completed, total)`` invoked + after each chunk of results is collected, useful for progress + display. + + Returns: + A list of :class:`~trial.TrialResult` objects in the same order as + ``trial_specs``. + """ + total = len(trial_specs) + results: list[TrialResult] = [] + + if not isinstance(num_workers, int) or num_workers < 1: + raise ValueError(f'num_workers must be a positive integer, got {num_workers!r}') + + if num_workers == 1: + for idx, spec in enumerate(trial_specs): + results.append(_safe_run_trial(spec)) + if progress_callback is not None: + progress_callback(idx + 1, total) + else: + # Use the 'spawn' start method so worker processes begin as fresh + # Python interpreters. The default 'fork' method on Linux copies + # the parent's BLAS/OpenMP thread-pool state into the child without + # actually transferring the threads, causing a segfault during worker + # cleanup when those phantom pools are torn down. + mp_ctx = multiprocessing.get_context('spawn') + + # Group specs into batches so each worker handles multiple trials per + # submit() call. _CHUNK_MULTIPLIER chunks per worker gives 4x + # over-subscription for load balancing. + chunk_size = max(1, math.ceil(total / (num_workers * _CHUNK_MULTIPLIER))) + chunks: list[tuple[int, list[TrialSpec]]] = [] + start = 0 + while start < total: + end = min(start + chunk_size, total) + chunks.append((start, trial_specs[start:end])) + start = end + + ordered: list[TrialResult | None] = [None] * total + n_done = 0 + with concurrent.futures.ProcessPoolExecutor( + max_workers=num_workers, mp_context=mp_ctx + ) as pool: + futures: dict[concurrent.futures.Future[list[TrialResult]], int] = { + pool.submit(_run_trial_batch, chunk): chunk_start for chunk_start, chunk in chunks + } + for future in concurrent.futures.as_completed(futures): + chunk_start = futures[future] + chunk_results = future.result() + for j, result in enumerate(chunk_results): + ordered[chunk_start + j] = result + n_done += len(chunk_results) + if progress_callback is not None: + progress_callback(n_done, total) + + # All futures completed; ordered contains no None entries. + if not all(r is not None for r in ordered): + raise RuntimeError('Internal error: some futures did not produce a result') + results = [r for r in ordered if r is not None] + + return results diff --git a/src/characterize_gauss_fit/hires_config.yaml b/src/characterize_gauss_fit/hires_config.yaml new file mode 100644 index 0000000..0a7b7ab --- /dev/null +++ b/src/characterize_gauss_fit/hires_config.yaml @@ -0,0 +1,146 @@ +# hires_config.yaml -- High-resolution configuration for characterize_gauss_fit. +# +# Runs all eight studies with denser parameter grids and larger noise-sample +# counts compared with the defaults, while staying within the same ranges. +# The goal is smoother heatmaps, less noisy line plots, and more nuanced +# detail at intermediate parameter values. +# +# Estimated runtime: ~10-30x longer than the default config. Use +# --num-workers to parallelise across CPU cores, e.g.: +# +# characterize_gauss_fit --config hires_config.yaml --num-workers 8 +# +# Run with: +# characterize_gauss_fit --config src/characterize_gauss_fit/hires_config.yaml + +output_dir: ./gauss_fit_hires +num_workers: 1 + +# Stochastic studies use 200 noise realisations per point (vs. 50 default). +noise_samples: 200 + +studies: + + # Study 1: 12 box sizes x 13 sigmas x 5 offsets = 780 trials + # Adds intermediate box sizes (15, 19) and extra sigma values (0.4, 0.6, + # 0.7, 1.25, 2.5, 4.0) to fill in the heatmap grid more finely. + box_vs_sigma: + enabled: true + box_sizes: [5, 7, 9, 11, 13, 15, 17, 19, 21, 25, 31, 41] + sigmas: [0.3, 0.4, 0.5, 0.6, 0.8, 1.0, 1.25, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0] + offsets: + - [0.0, 0.0] + - [0.25, 0.25] + - [0.5, 0.0] + - [0.0, 0.5] + - [0.5, 0.5] + angle: 0.0 + scale: 1.0 + fitting: + bkgnd_degree: null + + # Study 2: 21x21 offset grid x 4 sigmas = 1764 trials + # Doubles the offset-step resolution (0.025 px per step) and adds sigma=1.5 + # to provide an intermediate panel between 1.0 and 2.0. + subpixel_offset: + enabled: true + offset_steps: 21 + offset_range: [0.0, 0.5] + sigmas: [0.5, 1.0, 1.5, 2.0] + box_size: 21 + angle: 0.0 + fitting: + bkgnd_degree: null + + # Study 3: 11 deltas x 7 sigmas x (1 noiseless + 4 snr x 200 samples) = ~62k trials + # Adds two extra log-spaced delta values and a low-SNR (20) condition to + # better characterise the detection floor. Raise noise_samples for tighter + # uncertainty bands. + min_detectable_offset: + enabled: true + delta_offsets: [0.001, 0.002, 0.004, 0.007, 0.01, 0.02, 0.04, 0.07, 0.1, 0.2, 0.5] + sigmas: [0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 3.0] + box_size: 21 + noise_samples: 200 + snr_values: [20.0, 50.0, 100.0, 500.0] + include_noiseless: true + fitting: + bkgnd_degree: null + + # Study 4: 10 ratios x 25 angles x 3 sigma_x = 750 trials + # Adds extra asymmetry ratios (0.2, 0.33, 0.67, 3.0) and nearly doubles the + # angular resolution (7.5 deg per step vs. 15 deg in default). + sigma_asymmetry_angle: + enabled: true + sigma_ratios: [0.2, 0.33, 0.5, 0.67, 0.75, 1.0, 1.5, 2.0, 3.0, 5.0] + angle_steps: 25 + sigma_x_values: [0.5, 1.0, 2.0] + box_size: 25 + offset: [0.25, 0.25] + fitting: + bkgnd_degree: null + + # Study 5: 6 sigma-error fractions x 3+ shapes + # Adds intermediate sigma-error values (0.1, 0.3, 0.75) and two extra PSF + # shapes to reveal how the constraint accuracy curve behaves between modes. + constraint_modes: + enabled: true + sigma_error_fractions: [0.0, 0.1, 0.2, 0.3, 0.5, 0.75] + angle_error_rad: 0.3 + psf_shapes: + - sigma: [1.0, 1.0] + angle: 0.0 + - sigma: [0.5, 1.5] + angle: 0.7854 # pi/4 + - sigma: [1.0, 2.0] + angle: 1.0472 # pi/3 + - sigma: [0.7, 1.4] + angle: 0.5236 # pi/6 + box_size: 21 + offset: [0.25, 0.25] + scale: 1.5 + + # Study 6: 7 amplitudes x 5 degrees (incl. null) x 4 ignore_centers x 5 types x 3 offsets + # Adds more background amplitudes (0.005, 0.05, 0.25, 1.0), degree=3, and + # ignore_center=[3,3] for a finer view of the background-model landscape. + background: + enabled: true + background_amplitudes: [0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0] + bkgnd_degrees: [0, 1, 2, 3] + bkgnd_degrees_with_null: true + bkgnd_ignore_centers: [[1, 1], [2, 2], [3, 3], [4, 4]] + background_types: [none, constant, linear, quadratic, noisy_constant] + box_size: 21 + sigma: [1.0, 1.0] + offsets: + - [0.0, 0.0] + - [0.25, 0.25] + - [0.5, 0.5] + + # Study 7: 25 SNR points x 4 sigmas x 200 samples = 20000 trials + # Finer log-spaced SNR grid (25 pts vs. 15), extra sigma=1.5 panel, and + # 200 noise samples per point for tighter error bands. + noise_sensitivity: + enabled: true + snr_log_range: [0.5, 3.5] + snr_steps: 25 + sigmas: [0.5, 1.0, 1.5, 2.0] + noise_samples: 200 + box_size: 21 + + # Study 8: 8 hot-pixel counts x 6 num_sigma x 6 amplitudes x 50 samples + # (num_sigma: 5 values [3.0, 4.0, 5.0, 6.0, 8.0] plus null = 6 options) + # Fills in the hot-pixel count axis more densely (adds 2, 7, 15) and expands + # hot_amplitudes (adds 2x, 10x, 50x) for a more complete rejection map. + # Noise samples raised to 50 for less noisy line plots. + hot_pixel_rejection: + enabled: true + num_hot_pixels: [0, 1, 2, 3, 5, 7, 10, 15] + num_sigma_values: [3.0, 4.0, 5.0, 6.0, 8.0] + num_sigma_with_null: true + hot_amplitudes: [2.0, 5.0, 10.0, 20.0, 50.0, 100.0] + noise_samples: 50 + snr: 100.0 + box_size: 21 + sigma: [1.0, 1.0] + offset: [0.25, 0.25] diff --git a/src/characterize_gauss_fit/main.py b/src/characterize_gauss_fit/main.py new file mode 100644 index 0000000..82a6ee2 --- /dev/null +++ b/src/characterize_gauss_fit/main.py @@ -0,0 +1,269 @@ +################################################################################ +# characterize_gauss_fit/main.py +################################################################################ + +"""Command-line entry point for characterize_gauss_fit. + +Parses arguments, loads configuration, and dispatches requested studies. +Exit code is 0 on success, 1 if any study raises an unhandled exception. +""" + +from __future__ import annotations + +import argparse +import importlib.resources +import logging +import pathlib +import shutil +import sys +import time +from collections.abc import Callable + +from characterize_gauss_fit import ( + study_background, + study_box_sigma, + study_constraints, + study_hot_pixels, + study_min_offset, + study_noise, + study_offset, + study_shape, +) +from characterize_gauss_fit.config import STUDY_NAMES, load_config + +_LOG = logging.getLogger(__name__) + +# Type alias for a study run function. +_StudyRunner = Callable[..., None] + +# Registry: study name -> run function. +_STUDY_REGISTRY: dict[str, _StudyRunner] = { + 'box_vs_sigma': study_box_sigma.run, + 'subpixel_offset': study_offset.run, + 'min_detectable_offset': study_min_offset.run, + 'sigma_asymmetry_angle': study_shape.run, + 'constraint_modes': study_constraints.run, + 'background': study_background.run, + 'noise_sensitivity': study_noise.run, + 'hot_pixel_rejection': study_hot_pixels.run, +} + +# Ensure STUDY_NAMES (from config) and _STUDY_REGISTRY stay in sync. +_diff = set(STUDY_NAMES).symmetric_difference(set(_STUDY_REGISTRY)) +if _diff: + raise RuntimeError(f'STUDY_NAMES and _STUDY_REGISTRY are out of sync: {_diff}') + + +def _build_parser() -> argparse.ArgumentParser: + """Build and return the argument parser. + + Returns: + Configured :class:`argparse.ArgumentParser`. + """ + parser = argparse.ArgumentParser( + prog='characterize_gauss_fit', + description=( + 'Characterize Gaussian PSF fitting accuracy across a configurable ' + 'parameter space. Produces PNG plots, CSV tables, and JSON summaries.' + ), + ) + parser.add_argument( + '--config', + metavar='FILE', + type=pathlib.Path, + default=None, + help='Path to a YAML override file merged onto built-in defaults.', + ) + parser.add_argument( + '--study', + metavar='NAME', + action='append', + dest='studies', + default=None, + help=( + 'Run only this study (repeatable). Default: all enabled studies. ' + f'Available names: {", ".join(STUDY_NAMES)}' + ), + ) + parser.add_argument( + '--output-dir', + metavar='DIR', + type=pathlib.Path, + default=None, + help='Override the output directory from the config file.', + ) + parser.add_argument( + '--num-workers', + metavar='N', + type=int, + default=None, + help=( + 'Number of parallel worker processes. ' + 'Default: None (resolved from config file). ' + '1 = sequential in the main process; ' + '>1 = concurrent.futures.ProcessPoolExecutor.' + ), + ) + parser.add_argument( + '--list-studies', + action='store_true', + help='Print available study names and exit.', + ) + parser.add_argument( + '--copy-default-config-to', + metavar='FILE', + type=pathlib.Path, + default=None, + help=('Write the built-in default configuration to FILE and exit. No studies are run.'), + ) + parser.add_argument( + '--copy-test-config-to', + metavar='FILE', + type=pathlib.Path, + default=None, + help=( + 'Write the built-in reduced-grid test configuration to FILE and exit. ' + 'No studies are run.' + ), + ) + parser.add_argument( + '--copy-hires-config-to', + metavar='FILE', + type=pathlib.Path, + default=None, + help=( + 'Write the built-in high-resolution configuration to FILE and exit. No studies are run.' + ), + ) + parser.add_argument( + '--verbose', + '-v', + action='store_true', + help='Enable DEBUG-level logging.', + ) + return parser + + +def _configure_logging(verbose: bool) -> None: + """Configure the root logger for CLI use. + + Parameters: + verbose: If ``True``, set level to DEBUG; otherwise INFO. + """ + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig( + level=level, + format='%(asctime)s %(levelname)-8s %(name)s: %(message)s', + datefmt='%H:%M:%S', + stream=sys.stderr, + ) + + +def _copy_bundled_config(source_name: str, dest: pathlib.Path) -> None: + """Copy a bundled YAML config file to a user-supplied path. + + Parameters: + source_name: Filename of the bundled resource (e.g. ``'defaults.yaml'``). + dest: Destination path supplied by the user. + + Raises: + SystemExit: If the destination already exists or the copy fails. + """ + if dest.exists(): + print(f'ERROR: destination already exists: {dest}', file=sys.stderr) + sys.exit(1) + try: + pkg = importlib.resources.files('characterize_gauss_fit') + src_ref = pkg.joinpath(source_name) + with importlib.resources.as_file(src_ref) as src_path: + shutil.copy(src_path, dest) + print(f'Written: {dest}') + except OSError as exc: + print(f'ERROR copying config: {exc}', file=sys.stderr) + sys.exit(1) + + +def main() -> None: + """Entry point for the ``characterize_gauss_fit`` command. + + Parses command-line arguments, loads configuration, and runs all requested + studies in order. Prints timing information to stderr. Exits with code 1 + if any study fails with an unhandled exception. + """ + parser = _build_parser() + args = parser.parse_args() + + if args.list_studies: + print('Available studies:') + for name in STUDY_NAMES: + print(f' {name}') + return + + if args.copy_default_config_to is not None: + _copy_bundled_config('defaults.yaml', args.copy_default_config_to) + return + + if args.copy_test_config_to is not None: + _copy_bundled_config('test_config.yaml', args.copy_test_config_to) + return + + if args.copy_hires_config_to is not None: + _copy_bundled_config('hires_config.yaml', args.copy_hires_config_to) + return + + _configure_logging(args.verbose) + + try: + cfg = load_config( + path=args.config, + output_dir=args.output_dir, + num_workers=args.num_workers, + ) + except (FileNotFoundError, ValueError, KeyError) as exc: + print(f'ERROR loading config: {exc}', file=sys.stderr) + sys.exit(1) + + cfg.output_dir.mkdir(parents=True, exist_ok=True) + + requested_names: list[str] = args.studies if args.studies is not None else STUDY_NAMES + + # Validate requested study names. + for name in requested_names: + if name not in _STUDY_REGISTRY: + print( + f'ERROR: unknown study "{name}". Run with --list-studies to see available names.', + file=sys.stderr, + ) + sys.exit(1) + + num_workers = cfg.num_workers + failed: list[str] = [] + overall_start = time.monotonic() + + for name in requested_names: + run_fn = _STUDY_REGISTRY[name] + print(f'[{name}] Starting...', file=sys.stderr) + t0 = time.monotonic() + try: + run_fn(cfg, num_workers=num_workers) + except Exception: + _LOG.exception('Study %s failed with an unhandled exception.', name) + failed.append(name) + elapsed = time.monotonic() - t0 + status = 'FAILED' if name in failed else 'done' + print(f'[{name}] {status} ({elapsed:.1f}s)', file=sys.stderr) + + total = time.monotonic() - overall_start + if failed: + print( + f'\nCompleted with {len(failed)} failed studies. ' + f'Total time: {total:.1f}s. Output: {cfg.output_dir}', + file=sys.stderr, + ) + print(f'FAILED studies: {", ".join(failed)}', file=sys.stderr) + sys.exit(1) + else: + print( + f'\nAll studies complete. Total time: {total:.1f}s. Output: {cfg.output_dir}', + file=sys.stderr, + ) diff --git a/src/characterize_gauss_fit/output.py b/src/characterize_gauss_fit/output.py new file mode 100644 index 0000000..ab02117 --- /dev/null +++ b/src/characterize_gauss_fit/output.py @@ -0,0 +1,387 @@ +################################################################################ +# characterize_gauss_fit/output.py +################################################################################ + +"""CSV and JSON output writers for characterize_gauss_fit study results. + +Each study writes its results to a subdirectory of the configured output +directory. This module provides two writers: + +- :func:`write_csv` -- one row per trial, all input parameters and all result + metrics, directly loadable by pandas or any data-analysis tool. +- :func:`write_json_summary` -- aggregate statistics per parameter group plus + overall convergence rates and the exact config used, suitable for AI analysis. +""" + +from __future__ import annotations + +import csv +import json +import math +import pathlib +from typing import Any + +import numpy as np + +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +# Column order for the CSV file. Non-applicable columns use empty string. +_CSV_COLUMNS: tuple[str, ...] = ( + # Study-level context + 'study', + 'rng_seed', + # Input: geometry + 'box_size', + 'sigma_y_true', + 'sigma_x_true', + 'angle_true', + 'offset_y_true', + 'offset_x_true', + 'scale_true', + 'base', + # Input: fitter construction + 'fit_sigma_y', + 'fit_sigma_x', + 'fit_angle', + # Input: background / noise + 'background_type', + 'background_amplitude', + 'noise_rms', + # Input: hot pixels + 'num_hot_pixels', + 'hot_pixel_amplitude', + # Input: fitting kwargs + 'bkgnd_degree', + 'num_sigma', + 'bkgnd_num_sigma', + 'bkgnd_ignore_center_y', + 'bkgnd_ignore_center_x', + 'max_bad_frac', + 'allow_nonzero_base', + 'use_angular_params', + 'tolerance', + 'search_limit_lo', + 'search_limit_hi', + 'scale_limit', + # Outcome + 'converged', + # Position errors + 'pos_err_y', + 'pos_err_x', + 'pos_err', + # Fitted values + 'sigma_y_fit', + 'sigma_x_fit', + 'angle_fit', + 'scale_fit', + # Parameter errors + 'sigma_y_err', + 'sigma_x_err', + 'angle_err', + 'scale_err', +) + + +def _float_cell(value: float | None) -> str: + """Format a float value for a CSV cell. + + Parameters: + value: The float to format, or ``None`` for a not-applicable field. + + Returns: + The formatted string. NaN is written as ``"NaN"``, infinities as + ``"Inf"`` / ``"-Inf"``, and None as ``""``. + """ + if value is None: + return '' + if math.isnan(value): + return 'NaN' + if math.isinf(value): + return 'Inf' if value > 0 else '-Inf' + return repr(float(value)) + + +def _result_row( + study: str, + spec: TrialSpec, + result: TrialResult, +) -> dict[str, str]: + """Build a CSV row dict from a spec and its result. + + Parameters: + study: The study name string (e.g. ``'box_vs_sigma'``). + spec: The :class:`~trial.TrialSpec` that was executed. + result: The :class:`~trial.TrialResult` from the trial. + + Returns: + A ``dict[str, str]`` mapping CSV column names to string values. + """ + row: dict[str, str] = dict.fromkeys(_CSV_COLUMNS, '') + row['study'] = study + row['rng_seed'] = str(spec.rng_seed) + row['box_size'] = str(spec.box_size) + row['sigma_y_true'] = repr(spec.sigma_y) + row['sigma_x_true'] = repr(spec.sigma_x) + row['angle_true'] = repr(spec.angle) + row['offset_y_true'] = repr(spec.offset_y) + row['offset_x_true'] = repr(spec.offset_x) + row['scale_true'] = repr(spec.scale) + row['base'] = repr(spec.base) + row['fit_sigma_y'] = _float_cell(spec.fit_sigma_y) + row['fit_sigma_x'] = _float_cell(spec.fit_sigma_x) + row['fit_angle'] = _float_cell(spec.fit_angle) + row['background_type'] = spec.background_type + row['background_amplitude'] = repr(spec.background_amplitude) + row['noise_rms'] = repr(spec.noise_rms) + row['num_hot_pixels'] = str(spec.hot_pixel_count) + row['hot_pixel_amplitude'] = repr(spec.hot_pixel_amplitude) + row['bkgnd_degree'] = '' if spec.bkgnd_degree is None else str(spec.bkgnd_degree) + row['num_sigma'] = _float_cell(spec.num_sigma) + row['bkgnd_num_sigma'] = _float_cell(spec.bkgnd_num_sigma) + row['bkgnd_ignore_center_y'] = str(spec.bkgnd_ignore_center[0]) + row['bkgnd_ignore_center_x'] = str(spec.bkgnd_ignore_center[1]) + row['max_bad_frac'] = repr(spec.max_bad_frac) + row['allow_nonzero_base'] = 'true' if spec.allow_nonzero_base else 'false' + row['use_angular_params'] = 'true' if spec.use_angular_params else 'false' + row['tolerance'] = repr(spec.tolerance) + row['search_limit_lo'] = repr(spec.search_limit[0]) + row['search_limit_hi'] = repr(spec.search_limit[1]) + row['scale_limit'] = repr(spec.scale_limit) + row['converged'] = 'true' if result.converged else 'false' + row['pos_err_y'] = _float_cell(result.pos_err_y) + row['pos_err_x'] = _float_cell(result.pos_err_x) + row['pos_err'] = _float_cell(result.pos_err) + row['sigma_y_fit'] = _float_cell(result.sigma_y_fit) + row['sigma_x_fit'] = _float_cell(result.sigma_x_fit) + row['angle_fit'] = _float_cell(result.angle_fit) + row['scale_fit'] = _float_cell(result.scale_fit) + row['sigma_y_err'] = _float_cell(result.sigma_y_err) + row['sigma_x_err'] = _float_cell(result.sigma_x_err) + row['angle_err'] = _float_cell(result.angle_err) + row['scale_err'] = _float_cell(result.scale_err) + return row + + +def write_csv( + output_dir: pathlib.Path, + study: str, + *, + specs: list[TrialSpec], + results: list[TrialResult], +) -> pathlib.Path: + """Write per-trial results to a CSV file. + + Creates ``{output_dir}/{study}/trials.csv`` with one row per trial. + All input parameters and all result metrics are included. Non-applicable + fields (e.g. ``sigma_y_fit`` when sigma_y was fixed) are written as empty + strings. NaN values are written as ``"NaN"``. + + Parameters: + output_dir: Root output directory. + study: Study name (used as subdirectory). + specs: List of :class:`~trial.TrialSpec` objects (same order as ``results``). + results: List of :class:`~trial.TrialResult` objects. + + Returns: + Path to the written CSV file. + + Raises: + ValueError: If ``specs`` and ``results`` have different lengths. + """ + if len(specs) != len(results): + raise ValueError( + f'specs ({len(specs)}) and results ({len(results)}) must have the same length' + ) + study_dir = output_dir / study + study_dir.mkdir(parents=True, exist_ok=True) + csv_path = study_dir / 'trials.csv' + + with csv_path.open('w', newline='', encoding='utf-8') as fh: + writer = csv.DictWriter(fh, fieldnames=_CSV_COLUMNS) + writer.writeheader() + for spec, result in zip(specs, results, strict=True): + writer.writerow(_result_row(study, spec, result)) + + return csv_path + + +def _safe_mean(values: list[float]) -> float | None: + """Compute the mean of a list of finite floats, or None if the list is empty. + + Parameters: + values: A list of float values. NaN and infinite values are excluded. + + Returns: + The mean of finite values, or ``None`` if no finite values exist. + """ + finite = [v for v in values if math.isfinite(v)] + if len(finite) == 0: + return None + return float(np.mean(finite)) + + +def _safe_std(values: list[float]) -> float | None: + """Compute the std dev of a list of finite floats, or None if too few values. + + Parameters: + values: A list of float values. NaN and infinite values are excluded. + + Returns: + The standard deviation, or ``None`` if fewer than two finite values exist. + """ + finite = [v for v in values if math.isfinite(v)] + if len(finite) < 2: + return None + return float(np.std(finite, ddof=1)) + + +def _aggregate_results(results: list[TrialResult]) -> dict[str, Any]: + """Compute aggregate statistics over a list of results. + + Parameters: + results: List of :class:`~trial.TrialResult` objects to aggregate. + + Returns: + A dict of aggregate statistics including counts and per-metric + mean and std values. + """ + n_total = len(results) + converged = [r for r in results if r.converged] + n_converged = len(converged) + + pos_errs = [r.pos_err for r in converged if math.isfinite(r.pos_err)] + sigma_y_errs = [ + r.sigma_y_err + for r in converged + if r.sigma_y_err is not None and math.isfinite(r.sigma_y_err) + ] + sigma_x_errs = [ + r.sigma_x_err + for r in converged + if r.sigma_x_err is not None and math.isfinite(r.sigma_x_err) + ] + angle_errs = [ + r.angle_err for r in converged if r.angle_err is not None and math.isfinite(r.angle_err) + ] + scale_errs = [r.scale_err for r in converged if math.isfinite(r.scale_err)] + + return { + 'n_trials': n_total, + 'n_converged': n_converged, + 'convergence_rate': n_converged / n_total if n_total > 0 else None, + 'pos_err_mean': _safe_mean(pos_errs), + 'pos_err_std': _safe_std(pos_errs), + 'sigma_y_err_mean': _safe_mean(sigma_y_errs), + 'sigma_y_err_std': _safe_std(sigma_y_errs), + 'sigma_x_err_mean': _safe_mean(sigma_x_errs), + 'sigma_x_err_std': _safe_std(sigma_x_errs), + 'angle_err_mean': _safe_mean(angle_errs), + 'angle_err_std': _safe_std(angle_errs), + 'scale_err_mean': _safe_mean(scale_errs), + 'scale_err_std': _safe_std(scale_errs), + } + + +def _sanitize_json(obj: Any) -> Any: + """Recursively replace non-finite floats with None for JSON safety. + + :func:`json.dump` with ``allow_nan=True`` emits non-standard tokens + (``NaN``, ``Infinity``, ``-Infinity``) that many JSON parsers reject. + This function replaces such values with ``None`` (serialised as ``null``). + + Parameters: + obj: Any JSON-serialisable Python object (dict, list, float, etc.). + + Returns: + A new object with the same structure but with non-finite floats + replaced by ``None``. + """ + if isinstance(obj, float): + return None if not math.isfinite(obj) else obj + if isinstance(obj, dict): + return {k: _sanitize_json(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_sanitize_json(v) for v in obj] + return obj + + +def write_json_summary( + output_dir: pathlib.Path, + study: str, + *, + specs: list[TrialSpec], + results: list[TrialResult], + groups: list[dict[str, Any]], + config_used: dict[str, Any], +) -> pathlib.Path: + """Write an aggregate JSON summary for a study. + + Creates ``{output_dir}/{study}/summary.json`` with overall statistics, + per-group breakdowns, and the exact configuration used for reproducibility. + + Parameters: + output_dir: Root output directory. + study: Study name (used as subdirectory). + specs: All :class:`~trial.TrialSpec` objects for the study. + results: All :class:`~trial.TrialResult` objects (same order as ``specs``). + groups: A list of dicts, each describing one parameter group. Each dict + must contain a ``'indices'`` key with the list of result indices + belonging to that group (these are removed before writing). All other + keys are written verbatim as group labels. + config_used: The serialised configuration dict (from + :func:`~config.config_to_dict`). + + Returns: + Path to the written JSON file. + + Raises: + ValueError: If ``specs`` and ``results`` have different lengths, or if any + group has empty or out-of-range indices. + TypeError: If any group is not a dict with a ``list`` under ``'indices'``, + or if any index value is not an ``int``. + """ + if len(specs) != len(results): + raise ValueError( + f'specs ({len(specs)}) and results ({len(results)}) must have the same length' + ) + study_dir = output_dir / study + study_dir.mkdir(parents=True, exist_ok=True) + json_path = study_dir / 'summary.json' + + overall = _aggregate_results(results) + + group_summaries: list[dict[str, Any]] = [] + for group in groups: + if not isinstance(group, dict) or not isinstance(group.get('indices'), list): + raise TypeError(f"Each group must be a dict with a list under 'indices', got {group!r}") + indices: list[int] = group['indices'] + if len(indices) == 0: + raise ValueError(f"Group 'indices' must not be empty, got {group!r}") + for idx in indices: + if not isinstance(idx, int): + raise TypeError(f"Group 'indices' must be a list of ints, got {idx!r} in {group!r}") + if idx < 0 or idx >= len(results): + raise ValueError( + f'Group index {idx} is out of range for results of length {len(results)}' + ) + group_results = [results[idx] for idx in indices] + agg = _aggregate_results(group_results) + # Write label keys (everything except 'indices'). + summary: dict[str, Any] = {k: v for k, v in group.items() if k != 'indices'} + summary.update(agg) + group_summaries.append(summary) + + payload: dict[str, Any] = { + 'study': study, + 'total_trials': overall['n_trials'], + 'converged_trials': overall['n_converged'], + 'convergence_rate': overall['convergence_rate'], + 'overall': overall, + 'groups': group_summaries, + 'config_used': config_used, + } + + with json_path.open('w', encoding='utf-8') as fh: + json.dump(_sanitize_json(payload), fh, indent=2) + + return json_path diff --git a/src/characterize_gauss_fit/plotting.py b/src/characterize_gauss_fit/plotting.py new file mode 100644 index 0000000..1890028 --- /dev/null +++ b/src/characterize_gauss_fit/plotting.py @@ -0,0 +1,557 @@ +################################################################################ +# characterize_gauss_fit/plotting.py +################################################################################ + +"""Reusable matplotlib plot helpers for characterize_gauss_fit. + +All functions accept pre-computed data arrays (not raw :class:`~trial.TrialResult` +lists) and return a :class:`matplotlib.figure.Figure` which the caller saves via +:func:`save_figure`. This keeps data transformation and rendering cleanly +separated. +""" + +from __future__ import annotations + +import pathlib +from collections.abc import Sequence +from typing import Any + +import matplotlib +import matplotlib.patheffects as _pe +import numpy as np +import numpy.typing as npt + +matplotlib.use('Agg') # non-interactive backend; must be set before other imports +import matplotlib.pyplot as plt +from matplotlib.axes import Axes +from matplotlib.figure import Figure + +# DPI for saved PNG files. +_SAVE_DPI = 150 + +# Colour used to mark cells / points where the fitter did not converge. +_FAIL_COLOUR = '#cccccc' + +# Line styles cycled across series in plot_line_with_bands so that series +# remain distinguishable when they overlap or when colour alone is ambiguous. +_LINE_STYLES = ['-', '--', '-.', ':', (0, (3, 1, 1, 1)), (0, (5, 2))] + +# Path effects applied to heatmap cell annotations so text is readable on any +# background colour. +_HEATMAP_TEXT_EFFECTS = [_pe.withStroke(linewidth=3, foreground='white')] + +# Text automatically appended to every line-with-bands plot explaining the +# shaded confidence region. +_BANDS_NOTE = 'Shaded bands: mean \u00b1 1 std.\u202fdev. across repeated trials' + +# Style used for all figure footnotes. +_NOTE_STYLE: dict[str, Any] = { + 'ha': 'center', + 'va': 'bottom', + 'fontsize': 7, + 'color': '#555555', + 'style': 'italic', + 'transform': None, # overridden per-call with fig.transFigure +} + + +def _add_figure_note(fig: Figure, note: str, *, bottom: float = 0.12) -> None: + """Render ``note`` as a small italic footnote at the bottom of ``fig``. + + Uses ``tight_layout(rect=[0, note_strip, 1, 1])`` so matplotlib positions + the axes, tick labels, and x-axis title entirely *above* the note strip in + a single consistent pass. This prevents the x-axis title from ending up + in the same vertical band as the note text. + + Parameters: + fig: The figure to annotate. + note: Text to display. May contain newlines. + bottom: Minimum fraction of figure height reserved for the note strip. + """ + n_lines = note.count('\n') + 1 + fig_height = fig.get_figheight() + # 7 pt font at 72 pt/inch with 1.15x line spacing plus 0.06 in padding. + note_frac = (n_lines * 7 * 1.15 / 72.0 + 0.06) / fig_height + actual_bottom = max(bottom, note_frac) + kw = dict(_NOTE_STYLE) + kw['transform'] = fig.transFigure + # Place note text just above the figure bottom edge, inside the reserved strip. + fig.text(0.5, 0.005, note, **kw) + # Single tight_layout call: everything (axes + labels) goes in the rect + # above the note strip, so x-axis title never overlaps the note. + fig.tight_layout(rect=(0.0, actual_bottom, 1.0, 1.0)) + + +def save_figure(fig: Figure, output_dir: pathlib.Path, filename: str) -> pathlib.Path: + """Save a figure to a PNG file and close it. + + Parameters: + fig: The :class:`matplotlib.figure.Figure` to save. + output_dir: Directory where the file will be written (must exist). + filename: File name (should end in ``.png``). + + Returns: + Path to the written PNG file. + """ + path = output_dir / filename + fig.savefig(path, dpi=_SAVE_DPI) + plt.close(fig) + return path + + +def plot_heatmap( + data: npt.NDArray[np.float64], + x_labels: list[str], + y_labels: list[str], + *, + title: str, + xlabel: str, + ylabel: str, + cbar_label: str, + log_scale: bool = False, + mask: npt.NDArray[np.bool_] | None = None, + annotate: bool = True, + note: str = '', +) -> Figure: + """Create a 2-D heatmap (imshow) with optional log scaling and a fail mask. + + Parameters: + data: 2-D array of shape ``(len(y_labels), len(x_labels))``. + x_labels: Labels for the X axis (columns). + y_labels: Labels for the Y axis (rows). + title: Figure title. + xlabel: X-axis label. + ylabel: Y-axis label. + cbar_label: Colour-bar label. + log_scale: If ``True``, apply ``log10`` to positive values before display. + mask: Boolean array of the same shape as ``data``. Masked cells (``True``) + are displayed in :data:`_FAIL_COLOUR` to indicate non-convergence. + annotate: If ``True``, write the numeric value in each cell. + note: Optional assumptions/context string rendered as a small italic + footnote at the bottom of the figure. + + Returns: + A :class:`matplotlib.figure.Figure`. + """ + n_rows, n_cols = data.shape + # Extra height (+1.2) reserves room for the title and the footnote without + # either being clipped. tight_layout() further adjusts subplot margins. + fig, ax = plt.subplots(figsize=(max(7, n_cols * 0.9 + 1), max(5.5, n_rows * 0.7 + 1.2))) + + display = data.astype(float) + if log_scale: + with np.errstate(divide='ignore', invalid='ignore'): + display = np.where(display > 0, np.log10(display), np.nan) + + cmap = plt.get_cmap('viridis').copy() + cmap.set_bad(color=_FAIL_COLOUR) + + if mask is not None: + if mask.shape != data.shape: + raise ValueError(f'mask.shape {mask.shape} does not match data.shape {data.shape}') + display = np.where(mask, np.nan, display) + + img = ax.imshow(display, aspect='auto', cmap=cmap, origin='upper') + cbar = fig.colorbar(img, ax=ax) + cbar.set_label(cbar_label) + + ax.set_xticks(range(n_cols)) + ax.set_xticklabels(x_labels, rotation=45, ha='right', fontsize=8) + ax.set_yticks(range(n_rows)) + ax.set_yticklabels(y_labels, fontsize=8) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(title) + + if annotate: + for row in range(n_rows): + for col in range(n_cols): + val = display[row, col] + if np.isnan(val): + text = 'N/C' + elif log_scale: + text = f'{10**val:.2e}' + else: + text = f'{val:.3f}' + ax.text( + col, + row, + text, + ha='center', + va='center', + fontsize=6, + path_effects=_HEATMAP_TEXT_EFFECTS, + ) + + if note: + _add_figure_note(fig, note, bottom=0.10) + else: + fig.tight_layout() + return fig + + +def plot_line_with_bands( + x: npt.NDArray[np.float64], + y_means: Sequence[npt.NDArray[np.float64]], + y_stds: Sequence[npt.NDArray[np.float64]], + *, + labels: list[str], + title: str, + xlabel: str, + ylabel: str, + log_x: bool = False, + log_y: bool = False, + note: str = '', +) -> Figure: + """Create a multi-series line plot with shaded mean +/- 1 std bands. + + Parameters: + x: 1-D array of X values (common to all series). + y_means: Sequence of 1-D mean arrays, one per series. + y_stds: Sequence of 1-D std arrays, one per series (same length as ``y_means``). + labels: Legend labels, one per series. + title: Figure title. + xlabel: X-axis label. + ylabel: Y-axis label. + log_x: If ``True``, use a log scale for the X axis. + log_y: If ``True``, use a log scale for the Y axis. + note: Optional study-specific assumptions string. It is combined with + the automatic shaded-band explanation and rendered as a footnote. + + Returns: + A :class:`matplotlib.figure.Figure`. + """ + fig, ax = plt.subplots(figsize=(8, 5)) + + for i, (y_mean, y_std, label) in enumerate(zip(y_means, y_stds, labels, strict=True)): + ls = _LINE_STYLES[i % len(_LINE_STYLES)] + (line,) = ax.plot(x, y_mean, label=label, marker='o', markersize=3, linestyle=ls) + colour = line.get_color() + if log_y: + lower = np.maximum(y_mean - y_std, 1e-15) + else: + lower = y_mean - y_std + upper = y_mean + y_std + ax.fill_between(x, lower, upper, alpha=0.2, color=colour) + + if log_x: + ax.set_xscale('log') + if log_y: + # Only apply log scale if there are positive values; otherwise the + # matplotlib locator raises on tick-generation. + all_values = np.concatenate(list(y_means)) + if np.any(all_values[np.isfinite(all_values)] > 0): + ax.set_yscale('log') + + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(title) + ax.legend(fontsize=8) + ax.grid(visible=True, which='both', alpha=0.3) + + full_note = f'{_BANDS_NOTE} | {note}' if note else _BANDS_NOTE + _add_figure_note(fig, full_note, bottom=0.12) + return fig + + +def plot_grouped_bars( + categories: list[str], + group_labels: list[str], + values: npt.NDArray[np.float64], + *, + title: str, + ylabel: str, + log_scale: bool = False, +) -> Figure: + """Create a grouped bar chart. + + Parameters: + categories: Labels for the X-axis groups (outer variable). + group_labels: Labels for the colour-coded bars within each group. + values: 2-D array of shape ``(len(categories), len(group_labels))`` with + the bar heights. + title: Figure title. + ylabel: Y-axis label. + log_scale: If ``True``, use a log scale for the Y axis. + + Returns: + A :class:`matplotlib.figure.Figure`. + """ + n_cat = len(categories) + n_groups = len(group_labels) + fig, ax = plt.subplots(figsize=(max(7, n_cat * 1.2), 5)) + + x = np.arange(n_cat, dtype=float) + width = 0.8 / n_groups + + for g_idx, g_label in enumerate(group_labels): + offsets = x + (g_idx - (n_groups - 1) / 2.0) * width + bar_vals = values[:, g_idx].astype(float) + nan_mask = np.isnan(bar_vals) + display_vals = np.where(nan_mask, 0.0, bar_vals) + ax.bar(offsets, display_vals, width=width * 0.9, label=g_label) + for _cat_i, (off, is_nan) in enumerate(zip(offsets, nan_mask, strict=True)): + if is_nan: + ax.text(off, 0.0, 'NaN', ha='center', va='bottom', fontsize=5, rotation=90) + + ax.set_xticks(x) + ax.set_xticklabels(categories, rotation=30, ha='right', fontsize=8) + ax.set_ylabel(ylabel) + ax.set_title(title) + ax.legend(fontsize=8) + if log_scale: + float_vals = values.astype(float) + if (np.isfinite(float_vals) & (float_vals > 0)).any(): + ax.set_yscale('log') + ax.grid(visible=True, axis='y', alpha=0.3) + + return fig + + +def plot_multi_panel_heatmaps( + data_panels: Sequence[npt.NDArray[np.float64]], + x_labels: list[str], + y_labels: list[str], + panel_titles: list[str], + *, + fig_title: str, + xlabel: str, + ylabel: str, + cbar_label: str, + log_scale: bool = False, + mask_panels: list[npt.NDArray[np.bool_]] | None = None, +) -> Figure: + """Create a row of heatmap subplots sharing the same colour scale. + + Parameters: + data_panels: Sequence of 2-D arrays, one per panel. + x_labels: Shared X-axis labels. + y_labels: Shared Y-axis labels. + panel_titles: Title for each panel (one per element in ``data_panels``). + fig_title: Overall figure title (suptitle). + xlabel: Shared X-axis label. + ylabel: Shared Y-axis label. + cbar_label: Colour-bar label. + log_scale: If ``True``, apply ``log10`` before display. + mask_panels: Optional list of boolean mask arrays, one per panel. + + Returns: + A :class:`matplotlib.figure.Figure`. + """ + n_panels = len(data_panels) + fig, axes = plt.subplots( + 1, n_panels, figsize=(max(5, n_panels * 3.5), max(4, len(y_labels) * 0.6)) + ) + if n_panels == 1: + axes = [axes] + + all_display: list[npt.NDArray[np.float64]] = [] + for panel_data in data_panels: + arr = panel_data.astype(float) + if log_scale: + with np.errstate(divide='ignore', invalid='ignore'): + arr = np.where(arr > 0, np.log10(arr), np.nan) + all_display.append(arr) + + finite_vals = np.concatenate([d.ravel() for d in all_display]) + finite_vals = finite_vals[np.isfinite(finite_vals)] + vmin = float(finite_vals.min()) if len(finite_vals) > 0 else 0.0 + vmax = float(finite_vals.max()) if len(finite_vals) > 0 else 1.0 + + cmap = plt.get_cmap('viridis').copy() + cmap.set_bad(color=_FAIL_COLOUR) + img_ref = None + + if mask_panels is not None and len(mask_panels) != len(data_panels): + raise ValueError( + f'mask_panels length ({len(mask_panels)}) must equal ' + f'data_panels length ({len(data_panels)})' + ) + + for p_idx, (ax, display, ptitle) in enumerate( + zip(axes, all_display, panel_titles, strict=True) + ): + panel_display = display.copy() + if mask_panels is not None: + if mask_panels[p_idx].shape != display.shape: + raise ValueError( + f'mask_panels[{p_idx}].shape {mask_panels[p_idx].shape} does not match ' + f'data_panels[{p_idx}].shape {display.shape}' + ) + panel_display = np.where(mask_panels[p_idx], np.nan, panel_display) + + img = ax.imshow( + panel_display, + aspect='auto', + cmap=cmap, + origin='upper', + vmin=vmin, + vmax=vmax, + ) + img_ref = img + ax.set_title(ptitle, fontsize=8) + ax.set_xticks(range(len(x_labels))) + ax.set_xticklabels(x_labels, rotation=45, ha='right', fontsize=7) + ax.set_yticks(range(len(y_labels))) + if p_idx == 0: + ax.set_yticklabels(y_labels, fontsize=7) + ax.set_ylabel(ylabel) + else: + ax.set_yticklabels([]) + ax.set_xlabel(xlabel) + + if img_ref is not None: + fig.colorbar(img_ref, ax=axes, label=cbar_label, shrink=0.8) + fig.suptitle(fig_title, fontsize=10) + fig.tight_layout() + + return fig + + +def plot_recovery_fraction_heatmap( + recovery_fractions: npt.NDArray[np.float64], + x_labels: list[str], + y_labels: list[str], + *, + title: str, + xlabel: str, + ylabel: str, + note: str = '', +) -> Figure: + """Create a heatmap of recovery fractions in [0, 1] with white-to-green scale. + + Used by Study 3 (minimum detectable offset) to show the fraction of trials + where the offset was successfully recovered. + + Parameters: + recovery_fractions: 2-D array of shape ``(len(y_labels), len(x_labels))`` + with values in [0, 1]. + x_labels: Labels for the X axis (columns). + y_labels: Labels for the Y axis (rows). + title: Figure title. + xlabel: X-axis label. + ylabel: Y-axis label. + note: Optional assumptions/context string rendered as a small italic + footnote at the bottom of the figure. + + Returns: + A :class:`matplotlib.figure.Figure`. + """ + n_rows, n_cols = recovery_fractions.shape + fig, ax = plt.subplots(figsize=(max(7, n_cols * 0.9 + 1), max(5.5, n_rows * 0.7 + 1.2))) + + img = ax.imshow( + recovery_fractions, + aspect='auto', + cmap='Greens', + origin='upper', + vmin=0.0, + vmax=1.0, + ) + cbar = fig.colorbar(img, ax=ax) + cbar.set_label('Recovery fraction') + + ax.set_xticks(range(n_cols)) + ax.set_xticklabels(x_labels, rotation=45, ha='right', fontsize=8) + ax.set_yticks(range(n_rows)) + ax.set_yticklabels(y_labels, fontsize=8) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(title) + + for row in range(n_rows): + for col in range(n_cols): + val = recovery_fractions[row, col] + if np.isnan(val): + text = 'N/C' + else: + text = f'{val:.2f}' + ax.text( + col, + row, + text, + ha='center', + va='center', + fontsize=6, + path_effects=_HEATMAP_TEXT_EFFECTS, + ) + + if note: + _add_figure_note(fig, note, bottom=0.10) + else: + fig.tight_layout() + return fig + + +def plot_constraint_summary( + categories: list[str], + group_labels: list[str], + pos_err_vals: npt.NDArray[np.float64], + pos_err_y_vals: npt.NDArray[np.float64], + pos_err_x_vals: npt.NDArray[np.float64], + scale_err_vals: npt.NDArray[np.float64], + sigma_y_err_vals: npt.NDArray[np.float64], + angle_err_vals: npt.NDArray[np.float64], + *, + title: str, + note: str = '', +) -> Figure: + """Create a 6-panel grouped bar chart for Study 5 constraint modes. + + Shows position error (Euclidean, Y-axis, and X-axis), scale error, + sigma_y error, and angle error side by side so that the effect of parameter + constraints on all metrics is visible at once. + + Parameters: + categories: Constraint mode names (X-axis categories). + group_labels: PSF shape labels (bar groups within each category). + pos_err_vals: ``(n_cat, n_groups)`` Euclidean position error array. + pos_err_y_vals: ``(n_cat, n_groups)`` absolute Y-axis position error. + pos_err_x_vals: ``(n_cat, n_groups)`` absolute X-axis position error. + scale_err_vals: ``(n_cat, n_groups)`` relative scale error array. + sigma_y_err_vals: ``(n_cat, n_groups)`` relative sigma_y error array. + angle_err_vals: ``(n_cat, n_groups)`` absolute angle error array (degrees). + title: Overall figure title. + note: Optional assumptions/context string rendered as a small italic + footnote at the bottom of the figure. + + Returns: + A :class:`matplotlib.figure.Figure`. + """ + fig, axes = plt.subplots(2, 3, figsize=(18, 8)) + metrics: list[tuple[Axes, npt.NDArray[np.float64], str]] = [ + (axes[0, 0], pos_err_vals, 'Position error, Euclidean (pixels)'), + (axes[0, 1], pos_err_y_vals, '|pos_err_y| (pixels)'), + (axes[0, 2], pos_err_x_vals, '|pos_err_x| (pixels)'), + (axes[1, 0], scale_err_vals, 'Relative scale error'), + (axes[1, 1], sigma_y_err_vals, 'Relative sigma_y error'), + (axes[1, 2], angle_err_vals, 'Angle error (\u00b0, floating modes only)'), + ] + + n_cat = len(categories) + n_groups = len(group_labels) + x = np.arange(n_cat, dtype=float) + width = 0.8 / n_groups + + for ax, vals, metric_label in metrics: + for g_idx, g_label in enumerate(group_labels): + offsets = x + (g_idx - (n_groups - 1) / 2.0) * width + bar_vals = vals[:, g_idx].astype(float) + nan_mask = np.isnan(bar_vals) + display_vals = np.where(nan_mask, 0.0, bar_vals) + ax.bar(offsets, display_vals, width=width * 0.9, label=g_label) + for off, is_nan in zip(offsets, nan_mask, strict=True): + if is_nan: + ax.text(off, 0.0, 'NaN', ha='center', va='bottom', fontsize=5, rotation=90) + ax.set_xticks(x) + ax.set_xticklabels(categories, rotation=30, ha='right', fontsize=7) + ax.set_ylabel(metric_label, fontsize=8) + ax.legend(fontsize=7) + ax.grid(visible=True, axis='y', alpha=0.3) + + fig.suptitle(title, fontsize=10) + if note: + _add_figure_note(fig, note, bottom=0.08) + else: + fig.tight_layout() + return fig diff --git a/src/characterize_gauss_fit/study_background.py b/src/characterize_gauss_fit/study_background.py new file mode 100644 index 0000000..5704f29 --- /dev/null +++ b/src/characterize_gauss_fit/study_background.py @@ -0,0 +1,272 @@ +################################################################################ +# characterize_gauss_fit/study_background.py +################################################################################ + +"""Study 6: Background conditions and modeling. + +Explores how different injected background types and fitting model choices +interact to affect position, sigma, and scale recovery accuracy. The study +is repeated for each configured subpixel offset so that offset-sensitivity can +be compared visually alongside background effects. +""" + +from __future__ import annotations + +import dataclasses +import logging +import pathlib +from typing import Any + +import numpy as np + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import ( + Config, + StudyBackgroundConfig, + config_to_dict, +) +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import plot_heatmap, save_figure +from characterize_gauss_fit.trial import BACKGROUND_TYPE_NONE, TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'background' + +# Type alias for the lookup dict key used in _write_outputs. +_BkgndKey = tuple[float, float, str, float, int | None, tuple[int, int]] + + +def _compute_fit_degrees(study: StudyBackgroundConfig) -> list[int | None]: + """Build the ordered list of fitting-degree values for Study 6. + + Parameters: + study: The study configuration. + + Returns: + The list of ``bkgnd_degree`` values to iterate over, with ``None`` + prepended when ``bkgnd_degrees_with_null`` is enabled. + """ + degrees: list[int | None] = list(study.bkgnd_degrees) + if study.bkgnd_degrees_with_null: + degrees = [None, *degrees] + return degrees + + +def build_specs(cfg: Config) -> tuple[list[TrialSpec], list[int | None]]: + """Build trial specs for Study 6 and return the fitting-degree list. + + Iterates over offsets, background types, fitting degrees, and ignore-center + sizes. Specs are ordered by offset first so that :func:`_write_outputs` + can slice them by offset. + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A tuple ``(specs, fit_degrees)`` where ``fit_degrees`` is the list of + ``bkgnd_degree`` values used per spec (same order as ``specs``). + """ + study = cfg.studies.background + + fit_degrees = _compute_fit_degrees(study) + + specs: list[TrialSpec] = [] + spec_fit_degrees: list[int | None] = [] + seed = 6000 + + for offset_y, offset_x in study.offsets: + for bkgnd_type in study.background_types: + # When there is no background, all amplitude values collapse to 0.0, + # so iterate only once to avoid producing identical duplicate trials. + amp_values = ( + [0.0] if bkgnd_type == BACKGROUND_TYPE_NONE else study.background_amplitudes + ) + for amplitude in amp_values: + for fit_degree in fit_degrees: + for ignore_center in study.bkgnd_ignore_centers: + fitting = dataclasses.replace( + study.fitting, + bkgnd_degree=fit_degree, + bkgnd_ignore_center=ignore_center, + ) + specs.append( + utils.make_spec( + sigma_y=study.sigma[0], + sigma_x=study.sigma[1], + angle=0.0, + offset_y=offset_y, + offset_x=offset_x, + scale=cfg.generation.scale, + base=cfg.generation.base, + box_size=study.box_size, + fitting=fitting, + fit_angle=0.0, + background_type=bkgnd_type, + background_amplitude=amplitude, + rng_seed=seed, + ) + ) + spec_fit_degrees.append(fit_degree) + seed += 1 + + return specs, spec_fit_degrees + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 6 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.background + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs, _fit_degrees = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir) + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + study_dir: pathlib.Path, +) -> None: + """Write CSV, JSON, and PNG outputs for Study 6. + + One set of heatmaps is produced for each configured (offset, amplitude, + ignore_center) combination. Each filename includes the offset tag so files + from different offsets do not collide. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results. + study_dir: Output subdirectory. + """ + study = cfg.studies.background + scale = cfg.generation.scale + fit_degrees = _compute_fit_degrees(study) + + bkgnd_types = study.background_types + amplitudes = study.background_amplitudes + ignore_centers = study.bkgnd_ignore_centers + + degree_labels = ['null' if d is None else str(d) for d in fit_degrees] + type_labels = bkgnd_types + + # Build O(1) lookup: (offset_y, offset_x, bkgnd_type, amplitude, + # fit_degree, ignore_center) -> TrialResult. + lookup: dict[_BkgndKey, TrialResult] = {} + for spec, result in zip(specs, results, strict=True): + key: _BkgndKey = ( + spec.offset_y, + spec.offset_x, + spec.background_type, + spec.background_amplitude, + spec.bkgnd_degree, + spec.bkgnd_ignore_center, + ) + lookup[key] = result + + for offset_y, offset_x in study.offsets: + tag = utils.offset_tag(offset_y, offset_x) + offset_str = f'offset ({offset_y:+.2f}, {offset_x:+.2f})' + + for amp_idx, amplitude in enumerate(amplitudes): + for ic_idx, ignore_center in enumerate(ignore_centers): + grid = np.full((len(bkgnd_types), len(fit_degrees)), float('nan')) + grid_y = np.full((len(bkgnd_types), len(fit_degrees)), float('nan')) + grid_x = np.full((len(bkgnd_types), len(fit_degrees)), float('nan')) + fail_mask = np.zeros_like(grid, dtype=bool) + + for bt_idx, bkgnd_type in enumerate(bkgnd_types): + inj_amplitude = 0.0 if bkgnd_type == BACKGROUND_TYPE_NONE else amplitude + for fd_idx, fit_degree in enumerate(fit_degrees): + ic_key: tuple[int, int] = (ignore_center[0], ignore_center[1]) + found: TrialResult | None = lookup.get( + (offset_y, offset_x, bkgnd_type, inj_amplitude, fit_degree, ic_key) + ) + if found is None: + continue + if not found.converged: + fail_mask[bt_idx, fd_idx] = True + else: + grid[bt_idx, fd_idx] = found.pos_err + grid_y[bt_idx, fd_idx] = abs(found.pos_err_y) + grid_x[bt_idx, fd_idx] = abs(found.pos_err_x) + + ic_str = f'{ignore_center[0]}x{ignore_center[1]}' + bkgnd_note = ( + f'PSF: sigma_y = {study.sigma[0]:.1f}, sigma_x = {study.sigma[1]:.1f} px' + f' (fixed); angle = 0\u00b0 (fixed); box_size = {study.box_size} px;' + f' scale = {scale:.2g}; one noiseless trial per cell\n' + f'Offset: Y = {offset_y:+.2f}, X = {offset_x:+.2f} px from pixel centre' + ' (fixed; one heatmap set per offset pair)\n' + f'Background: type on y-axis; amplitude = {amplitude:.2g} \u00d7 PSF peak' + ' (see title); no Gaussian detector noise added\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' bkgnd_degree on x-axis (null = no subtraction);' + f' bkgnd_ignore_center = {ic_str} (see title)' + ) + for hmap, metric_label, fsuffix in [ + (grid, 'Position error (Euclidean)', ''), + (grid_y, '|pos_err_y|', '_y'), + (grid_x, '|pos_err_x|', '_x'), + ]: + fig = plot_heatmap( + hmap, + degree_labels, + type_labels, + title=( + f'{metric_label} -- amp={amplitude:.2f}, ' + f'ignore={ic_str} [{offset_str}]' + ), + xlabel='Fitting bkgnd_degree', + ylabel='Injected background type', + cbar_label='log10(pos error)', + log_scale=True, + mask=fail_mask, + note=bkgnd_note, + ) + save_figure( + fig, + study_dir, + f'{_STUDY_NAME}_pos_err{fsuffix}_amp{amp_idx}_ic{ic_idx}_{tag}.png', + ) + + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + + groups: list[dict[str, Any]] = utils.build_groups_by_keys( + specs, + results, + [ + ('offset_y', lambda s: s.offset_y), + ('offset_x', lambda s: s.offset_x), + ('background_type', lambda s: s.background_type), + ('background_amplitude', lambda s: s.background_amplitude), + ('bkgnd_degree', lambda s: s.bkgnd_degree), + ('bkgnd_ignore_center', lambda s: s.bkgnd_ignore_center), + ], + ) + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info('Study %s outputs written to %s', _STUDY_NAME, study_dir) diff --git a/src/characterize_gauss_fit/study_box_sigma.py b/src/characterize_gauss_fit/study_box_sigma.py new file mode 100644 index 0000000..c9d51b4 --- /dev/null +++ b/src/characterize_gauss_fit/study_box_sigma.py @@ -0,0 +1,208 @@ +################################################################################ +# characterize_gauss_fit/study_box_sigma.py +################################################################################ + +"""Study 1: Box size vs. PSF sigma. + +Explores how the subimage size relative to the PSF width affects position, +sigma, and scale recovery accuracy. The study is repeated for each configured +subpixel offset so that offset-sensitivity can be compared visually. +""" + +from __future__ import annotations + +import logging +import pathlib +from typing import Any + +import numpy as np + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import Config, config_to_dict +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import plot_heatmap, save_figure +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'box_vs_sigma' + + +def build_specs(cfg: Config) -> list[TrialSpec]: + """Build the full list of :class:`~trial.TrialSpec` objects for Study 1. + + Each spec represents one (offset, box_size, sigma) combination with no + background or noise, with sigma left to float during fitting. Specs are + ordered by offset first, then box_size, then sigma so that + :func:`_write_outputs` can slice them by offset slice. + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A list of :class:`~trial.TrialSpec` objects. + """ + study = cfg.studies.box_vs_sigma + specs: list[TrialSpec] = [] + seed = 1000 + for offset_y, offset_x in study.offsets: + for box_size in study.box_sizes: + for sigma in study.sigmas: + specs.append( + utils.make_spec( + sigma_y=sigma, + sigma_x=sigma, + angle=study.angle, + offset_y=offset_y, + offset_x=offset_x, + scale=study.scale, + base=cfg.generation.base, + box_size=box_size, + fitting=study.fitting, + # sigma left to float (fit_sigma_y/x = None) + fit_angle=0.0, # axis-aligned; no need to fit angle + rng_seed=seed, + ) + ) + seed += 1 + return specs + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 1 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.box_vs_sigma + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir) + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + study_dir: pathlib.Path, +) -> None: + """Write CSV, JSON, and PNG outputs for Study 1. + + One set of six heatmaps is produced for each configured offset pair. + Each filename includes the offset tag so files from different offsets do + not collide. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results (same order as ``specs``). + study_dir: Output subdirectory for this study. + """ + study = cfg.studies.box_vs_sigma + box_sizes = study.box_sizes + sigmas = study.sigmas + offsets = study.offsets + + n_box = len(box_sizes) + n_sigma = len(sigmas) + n_per_offset = n_box * n_sigma + + x_labels = [f'{s:.2g}' for s in sigmas] + y_labels = [str(b) for b in box_sizes] + + for off_idx, (offset_y, offset_x) in enumerate(offsets): + tag = utils.offset_tag(offset_y, offset_x) + slice_results = results[off_idx * n_per_offset : (off_idx + 1) * n_per_offset] + + pos_err_grid = np.full((n_box, n_sigma), float('nan')) + pos_err_y_grid = np.full((n_box, n_sigma), float('nan')) + pos_err_x_grid = np.full((n_box, n_sigma), float('nan')) + sigma_y_err_grid = np.full((n_box, n_sigma), float('nan')) + sigma_x_err_grid = np.full((n_box, n_sigma), float('nan')) + scale_err_grid = np.full((n_box, n_sigma), float('nan')) + fail_mask = np.zeros((n_box, n_sigma), dtype=bool) + + idx = 0 + for b_idx in range(n_box): + for s_idx in range(n_sigma): + r = slice_results[idx] + if not r.converged: + fail_mask[b_idx, s_idx] = True + else: + pos_err_grid[b_idx, s_idx] = r.pos_err + pos_err_y_grid[b_idx, s_idx] = abs(r.pos_err_y) + pos_err_x_grid[b_idx, s_idx] = abs(r.pos_err_x) + if r.sigma_y_err is not None and np.isfinite(r.sigma_y_err): + sigma_y_err_grid[b_idx, s_idx] = abs(r.sigma_y_err) + if r.sigma_x_err is not None and np.isfinite(r.sigma_x_err): + sigma_x_err_grid[b_idx, s_idx] = abs(r.sigma_x_err) + if np.isfinite(r.scale_err): + scale_err_grid[b_idx, s_idx] = abs(r.scale_err) + idx += 1 + + offset_str = f'offset ({offset_y:+.2f}, {offset_x:+.2f})' + plot_note = ( + f'PSF: sigma_y = sigma_x = sigma (x-axis); angle = {study.angle:.0f}\u00b0 (fixed,' + f' axis-aligned); scale = {study.scale:.2g}; one noiseless trial per cell\n' + f'Offset: Y = {offset_y:+.2f}, X = {offset_x:+.2f} px from pixel centre' + ' (fixed; one heatmap produced per offset pair)\n' + 'Background / noise: none injected; image is clean Gaussian pixel integrals only\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' no background subtraction' + ) + for data, metric_title, metric_key in [ + (pos_err_grid, 'Position error (Euclidean)', 'pos_err'), + (pos_err_y_grid, '|pos_err_y|', 'pos_err_y'), + (pos_err_x_grid, '|pos_err_x|', 'pos_err_x'), + (sigma_y_err_grid, 'Relative |sigma_y| error', 'sigma_y_err'), + (sigma_x_err_grid, 'Relative |sigma_x| error', 'sigma_x_err'), + (scale_err_grid, 'Relative |scale| error', 'scale_err'), + ]: + fig = plot_heatmap( + data, + x_labels, + y_labels, + title=f'{metric_title} vs. box size and sigma [{offset_str}]', + xlabel='Sigma (pixels)', + ylabel='Box size (pixels)', + cbar_label='log10(error)', + log_scale=True, + mask=fail_mask, + note=plot_note, + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_{metric_key}_{tag}.png') + + # CSV and JSON (all offsets together) + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + groups: list[dict[str, Any]] = utils.build_groups_by_keys( + specs, + results, + [ + ('offset_y', lambda s: s.offset_y), + ('offset_x', lambda s: s.offset_x), + ('box_size', lambda s: s.box_size), + ('sigma', lambda s: s.sigma_y), + ], + ) + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info('Study %s outputs written to %s', _STUDY_NAME, study_dir) diff --git a/src/characterize_gauss_fit/study_constraints.py b/src/characterize_gauss_fit/study_constraints.py new file mode 100644 index 0000000..d37a697 --- /dev/null +++ b/src/characterize_gauss_fit/study_constraints.py @@ -0,0 +1,300 @@ +################################################################################ +# characterize_gauss_fit/study_constraints.py +################################################################################ + +"""Study 5: Constraint modes (fixed vs floating PSF parameters). + +Tests how fixing sigma or angle (correctly or with error) affects the accuracy +of position, scale, sigma, and angle recovery. All four metrics are reported. +""" + +from __future__ import annotations + +import dataclasses +import logging +import math +import pathlib +from typing import Any + +import numpy as np + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import Config, config_to_dict +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import plot_constraint_summary, save_figure +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'constraint_modes' + +_RNG_SEED_BASE = 5000 + + +@dataclasses.dataclass(frozen=True) +class ConstraintMode: + """A single fitter constraint configuration for Study 5. + + Specifies which PSF parameters are fixed and what values they are fixed at. + + When ``sigma_is_fraction`` is ``True``, ``fit_sigma_y`` and ``fit_sigma_x`` + are interpreted as *error fractions* of the true sigma: ``0.0`` means fix + at the true value, positive means fix at ``true_sigma * (1 + fraction)``. + When ``sigma_is_fraction`` is ``False``, ``fit_sigma_y`` and ``fit_sigma_x`` + are actual sigma values (or ``None`` meaning float during fitting). + + For ``fit_angle``: ``None`` = float, ``0.0`` = fix at true angle, other + value = fix at ``true_angle + fit_angle`` radians. + """ + + label: str + fit_sigma_y: float | None + fit_sigma_x: float | None + fit_angle: float | None + sigma_is_fraction: bool = False + + +def _build_modes(cfg: Config) -> list[ConstraintMode]: + """Build the list of constraint modes from the study configuration. + + Modes cover the full spectrum from all-floating to all-fixed-with-error. + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A list of :class:`ConstraintMode` objects. + """ + study = cfg.studies.constraint_modes + modes: list[ConstraintMode] = [] + + # All parameters float. + modes.append(ConstraintMode('all_float', None, None, None)) + + # Sigma fixed at true value (error_frac=0) and with each error fraction. + # Angle is left to float in all sigma-fixed modes. + for frac in study.sigma_error_fractions: + if frac == 0.0: + label = 'sigma_fixed_correct' + else: + label = f'sigma_fixed_err{round(frac * 100):d}pct' + modes.append(ConstraintMode(label, frac, frac, None, sigma_is_fraction=True)) + + # Angle fixed at true value, sigma floats. + modes.append(ConstraintMode('angle_fixed_correct', None, None, 0.0)) + modes.append( + ConstraintMode( + 'angle_fixed_error', + None, + None, + study.angle_error_rad, + ) + ) + + # All fixed at correct values. + modes.append(ConstraintMode('all_fixed_correct', 0.0, 0.0, 0.0, sigma_is_fraction=True)) + + # All fixed with combined errors (only when sigma_error_fractions is non-empty). + if study.sigma_error_fractions: + max_frac = max(study.sigma_error_fractions) + modes.append( + ConstraintMode( + 'all_fixed_errors', + max_frac, + max_frac, + study.angle_error_rad, + sigma_is_fraction=True, + ) + ) + + return modes + + +def build_specs(cfg: Config) -> tuple[list[TrialSpec], list[ConstraintMode]]: + """Build trial specs for Study 5 and return the mode list for reference. + + Each (mode, shape) combination produces one trial. The fixed sigma/angle + values are computed from the true shape values and the mode's error fractions. + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A tuple of ``(specs, modes)`` where ``specs`` is the ordered trial list + and ``modes`` is the constraint mode list used to construct them. + """ + study = cfg.studies.constraint_modes + modes = _build_modes(cfg) + specs: list[TrialSpec] = [] + seed = _RNG_SEED_BASE + + for mode in modes: + for shape in study.psf_shapes: + true_sigma_y, true_sigma_x = shape.sigma + true_angle = shape.angle + + # Resolve fixed sigma values from mode fractions. + if mode.fit_sigma_y is None: + fit_sigma_y: float | None = None + elif mode.sigma_is_fraction: + fit_sigma_y = true_sigma_y * (1.0 + float(mode.fit_sigma_y)) + else: + fit_sigma_y = mode.fit_sigma_y + + if mode.fit_sigma_x is None: + fit_sigma_x: float | None = None + elif mode.sigma_is_fraction: + fit_sigma_x = true_sigma_x * (1.0 + float(mode.fit_sigma_x)) + else: + fit_sigma_x = mode.fit_sigma_x + + # Resolve fixed angle values from mode. + if mode.fit_angle is None: + fit_angle: float | None = None + elif mode.fit_angle == 0.0: + fit_angle = true_angle + else: + fit_angle = true_angle + float(mode.fit_angle) + + specs.append( + utils.make_spec( + sigma_y=true_sigma_y, + sigma_x=true_sigma_x, + angle=true_angle, + offset_y=study.offset[0], + offset_x=study.offset[1], + scale=study.scale, + base=cfg.generation.base, + box_size=study.box_size, + fitting=study.fitting, + fit_sigma_y=fit_sigma_y, + fit_sigma_x=fit_sigma_x, + fit_angle=fit_angle, + rng_seed=seed, + ) + ) + seed += 1 + + return specs, modes + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 5 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.constraint_modes + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs, modes = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir=study_dir, modes=modes) + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + *, + study_dir: pathlib.Path, + modes: list[ConstraintMode], +) -> None: + """Write CSV, JSON, and PNG outputs for Study 5. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results. + study_dir: Output subdirectory. + modes: Constraint modes used to generate specs. + """ + study = cfg.studies.constraint_modes + n_shapes = len(study.psf_shapes) + n_modes = len(modes) + + shape_labels = [ + f's=({s.sigma[0]:.1f},{s.sigma[1]:.1f}),a={math.degrees(s.angle):.0f}\u00b0' + for s in study.psf_shapes + ] + mode_labels = [m.label for m in modes] + + # Build (n_modes, n_shapes) arrays for each metric. + pos_err_vals = np.full((n_modes, n_shapes), float('nan')) + pos_err_y_vals = np.full((n_modes, n_shapes), float('nan')) + pos_err_x_vals = np.full((n_modes, n_shapes), float('nan')) + scale_err_vals = np.full((n_modes, n_shapes), float('nan')) + sigma_y_err_vals = np.full((n_modes, n_shapes), float('nan')) + angle_err_vals = np.full((n_modes, n_shapes), float('nan')) + + for m_idx in range(n_modes): + for s_idx in range(n_shapes): + result = results[m_idx * n_shapes + s_idx] + if result.converged: + pos_err_vals[m_idx, s_idx] = result.pos_err + pos_err_y_vals[m_idx, s_idx] = abs(result.pos_err_y) + pos_err_x_vals[m_idx, s_idx] = abs(result.pos_err_x) + if np.isfinite(result.scale_err): + scale_err_vals[m_idx, s_idx] = abs(result.scale_err) + if result.sigma_y_err is not None: + sigma_y_err_vals[m_idx, s_idx] = abs(result.sigma_y_err) + if result.angle_err is not None and np.isfinite(result.angle_err): + angle_err_vals[m_idx, s_idx] = math.degrees(abs(result.angle_err)) + + fig = plot_constraint_summary( + mode_labels, + shape_labels, + pos_err_vals, + pos_err_y_vals, + pos_err_x_vals, + scale_err_vals, + sigma_y_err_vals, + angle_err_vals, + title='Effect of parameter constraints on fitting accuracy', + note=( + f'PSF: sigma and angle from each shape (see legend); box_size = {study.box_size} px;' + f' offset = ({study.offset[0]:+.2f}, {study.offset[1]:+.2f}) px;' + f' scale = {study.scale:.2g}; one noiseless trial per (mode, shape)\n' + 'Background / noise: none injected\n' + 'Fitting: varies by x-axis mode -- "all_float" = sigma_y, sigma_x, angle all float;' + ' "sigma_fixed_*" = sigmas locked to stated value; "angle_fixed_*" = angle locked;\n' + ' "_correct" = fixed at true value; "_errors" = fixed at true value' + ' + sigma_error_frac or' + f' + {study.angle_error_rad:.2f} rad' + f' ({math.degrees(study.angle_error_rad):.0f}\u00b0)' + ), + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_summary.png') + + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + + # Each (mode, shape) pair maps to exactly one spec at a known position. + groups: list[dict[str, Any]] = [ + { + 'constraint_mode_idx': m_idx, + 'psf_shape_idx': s_idx, + 'indices': [m_idx * n_shapes + s_idx], + } + for m_idx in range(n_modes) + for s_idx in range(n_shapes) + ] + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info('Study %s outputs written to %s', _STUDY_NAME, study_dir) diff --git a/src/characterize_gauss_fit/study_hot_pixels.py b/src/characterize_gauss_fit/study_hot_pixels.py new file mode 100644 index 0000000..f1f843d --- /dev/null +++ b/src/characterize_gauss_fit/study_hot_pixels.py @@ -0,0 +1,286 @@ +################################################################################ +# characterize_gauss_fit/study_hot_pixels.py +################################################################################ + +"""Study 8: Hot pixel rejection. + +Varies the number, amplitude, and sigma-rejection threshold for hot pixels to +measure how effectively the PSF fitter's bad-pixel rejection works. +""" + +from __future__ import annotations + +import dataclasses +import logging +import pathlib +from typing import Any + +import numpy as np +import numpy.typing as npt + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import Config, config_to_dict +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import plot_line_with_bands, save_figure +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'hot_pixel_rejection' + + +def _make_num_sigma_list( + num_sigma_with_null: bool, num_sigma_values: list[float] +) -> list[float | None]: + """Build the ordered num_sigma list, prepending None when requested. + + Parameters: + num_sigma_with_null: If ``True``, prepend ``None`` (no rejection). + num_sigma_values: The configured threshold values. + + Returns: + Ordered list of thresholds including ``None`` when requested. + """ + result: list[float | None] = [] + if num_sigma_with_null: + result.append(None) + result.extend(num_sigma_values) + return result + + +def build_specs(cfg: Config) -> list[TrialSpec]: + """Build trial specs for Study 8. + + Iterates over (num_hot_pixels, num_sigma, hot_amplitude) combinations. + For each, generates ``noise_samples`` trials with randomised hot-pixel + positions (different RNG seeds). + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A list of :class:`~trial.TrialSpec` objects ordered by + [num_hot, num_sigma_val, hot_amp, trial_idx]. + """ + study = cfg.studies.hot_pixel_rejection + scale = cfg.generation.scale + noise_rms = utils.snr_to_noise_rms(study.snr, scale) + + # Build num_sigma list including null if requested. + num_sigma_list = _make_num_sigma_list(study.num_sigma_with_null, study.num_sigma_values) + + specs: list[TrialSpec] = [] + seed = 8000 + + for n_hot in study.num_hot_pixels: + for ns_val in num_sigma_list: + for hot_amp in study.hot_amplitudes: + fitting = dataclasses.replace(study.fitting, num_sigma=ns_val) + for _ in range(study.noise_samples): + specs.append( + utils.make_spec( + sigma_y=study.sigma[0], + sigma_x=study.sigma[1], + angle=0.0, + offset_y=study.offset[0], + offset_x=study.offset[1], + scale=scale, + base=cfg.generation.base, + box_size=study.box_size, + fitting=fitting, + fit_angle=0.0, + noise_rms=noise_rms, + hot_pixel_count=n_hot, + hot_pixel_amplitude=hot_amp, + rng_seed=seed, + ) + ) + seed += 1 + + return specs + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 8 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.hot_pixel_rejection + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir) + + +def _convergence_means( + num_sigma_list: list[float | None], + n_hot_list: list[int], + hot_amp: float, + bucket_map: dict[tuple[int, float | None, float], list[TrialResult]], +) -> list[npt.NDArray[np.float64]]: + """Compute per-series convergence fractions for a given hot amplitude. + + Parameters: + num_sigma_list: Ordered list of num_sigma thresholds tested. + n_hot_list: Ordered list of hot-pixel counts tested. + hot_amp: The hot-pixel amplitude being examined. + bucket_map: Pre-built lookup mapping (n_hot, num_sigma, hot_amp) + to a list of :class:`~trial.TrialResult` objects. + + Returns: + A list of 1-D float64 arrays, one per ``num_sigma_list`` entry. + Each element is the mean convergence fraction across the + ``n_hot_list`` axis (1.0 = all converged, 0.0 = all failed). + """ + series: list[npt.NDArray[np.float64]] = [] + for ns_val in num_sigma_list: + fracs: list[float] = [] + for n_hot in n_hot_list: + bucket = bucket_map.get((n_hot, ns_val, round(hot_amp, 9)), []) + if len(bucket) == 0: + fracs.append(float('nan')) + else: + fracs.append(float(np.mean([float(r.converged) for r in bucket]))) + series.append(np.array(fracs, dtype=np.float64)) + return series + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + study_dir: pathlib.Path, +) -> None: + """Write CSV, JSON, and PNG outputs for Study 8. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results. + study_dir: Output subdirectory. + """ + study = cfg.studies.hot_pixel_rejection + scale = cfg.generation.scale + noise_rms_val = utils.snr_to_noise_rms(study.snr, scale) + + num_sigma_list = _make_num_sigma_list(study.num_sigma_with_null, study.num_sigma_values) + + n_hot_list = study.num_hot_pixels + hot_amps = study.hot_amplitudes + + x_arr = np.array(n_hot_list, dtype=float) + ns_labels = ['no_rejection' if ns is None else f'num_sigma={ns:.0f}' for ns in num_sigma_list] + + # Build O(1) lookup: (hot_pixel_count, num_sigma, hot_pixel_amplitude) -> bucket. + bucket_map: dict[tuple[int, float | None, float], list[TrialResult]] = {} + for spec, result in zip(specs, results, strict=True): + key: tuple[int, float | None, float] = ( + spec.hot_pixel_count, + spec.num_sigma, + round(spec.hot_pixel_amplitude, 9), + ) + if key not in bucket_map: + bucket_map[key] = [] + bucket_map[key].append(result) + + for ha_idx, hot_amp in enumerate(hot_amps): + plot_note = ( + f'PSF: sigma_y = {study.sigma[0]:.1f}, sigma_x = {study.sigma[1]:.1f} px (fixed);' + f' angle = 0\u00b0 (fixed); box_size = {study.box_size} px; scale = {scale:.2g}\n' + f'Offset: Y = {study.offset[0]:+.2f}, X = {study.offset[1]:+.2f} px from pixel' + ' centre (fixed for all trials)\n' + f'Noise: Gaussian, noise_rms = {noise_rms_val:.3g} (SNR = {study.snr:.0f});' + f' {study.noise_samples} independent trials per condition;' + ' hot-pixel positions randomized per trial\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' num_sigma rejection = series label (see legend);' + f' hot-pixel amplitude = {hot_amp:.0f}\u00d7 PSF peak (see title)' + ) + # --- Convergence-rate plot ------------------------------------------- + conv_means = _convergence_means(num_sigma_list, n_hot_list, hot_amp, bucket_map) + conv_stds = [np.zeros_like(m) for m in conv_means] # deterministic fraction + fig = plot_line_with_bands( + x_arr, + conv_means, + conv_stds, + labels=ns_labels, + title=f'Convergence fraction vs. hot pixels -- amplitude={hot_amp:.0f}x peak', + xlabel='Number of hot pixels', + ylabel='Fraction of trials converged', + log_y=False, + note=plot_note, + ) + save_figure( + fig, + study_dir, + f'{_STUDY_NAME}_convergence_hotamp{ha_idx}.png', + ) + + # --- Position-error plots -------------------------------------------- + for metric_attr, metric_label, fname_prefix in [ + ('pos_err', 'Mean position error, Euclidean (pixels)', 'pos_err'), + ('pos_err_y', 'Mean |pos_err_y| (pixels)', 'pos_err_y'), + ('pos_err_x', 'Mean |pos_err_x| (pixels)', 'pos_err_x'), + ]: + y_means: list[npt.NDArray[np.float64]] = [] + y_stds: list[npt.NDArray[np.float64]] = [] + for ns_val, _ns_label in zip(num_sigma_list, ns_labels, strict=True): + means: list[float] = [] + stds: list[float] = [] + for n_hot_count in n_hot_list: + bucket = bucket_map.get((n_hot_count, ns_val, round(hot_amp, 9)), []) + arr = np.abs(utils.collect_metric(bucket, metric_attr)) + means.append(utils.safe_nanmean(arr)) + stds.append(utils.safe_nanstd(arr)) + y_means.append(np.array(means)) + y_stds.append(np.array(stds)) + + fig = plot_line_with_bands( + x_arr, + y_means, + y_stds, + labels=ns_labels, + title=( + f'{metric_label} vs. hot pixels -- amplitude={hot_amp:.0f}x peak' + '\n(missing lines = 0 % convergence; see companion convergence plot)' + ), + xlabel='Number of hot pixels', + ylabel=metric_label, + log_y=True, + note=plot_note, + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_{fname_prefix}_hotamp{ha_idx}.png') + + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + + groups: list[dict[str, Any]] = utils.build_groups_by_keys( + specs, + results, + [ + ('num_hot_pixels', lambda s: s.hot_pixel_count), + ('num_sigma', lambda s: s.num_sigma), + ('hot_amplitude', lambda s: s.hot_pixel_amplitude), + ], + ) + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info('Study %s outputs written to %s', _STUDY_NAME, study_dir) diff --git a/src/characterize_gauss_fit/study_min_offset.py b/src/characterize_gauss_fit/study_min_offset.py new file mode 100644 index 0000000..ac133ff --- /dev/null +++ b/src/characterize_gauss_fit/study_min_offset.py @@ -0,0 +1,322 @@ +################################################################################ +# characterize_gauss_fit/study_min_offset.py +################################################################################ + +"""Study 3: Minimum detectable position offset. + +Measures how small a sub-pixel offset delta can be reliably recovered as a +function of PSF sigma and noise level. Reports both mean position error and a +recovery fraction metric. +""" + +from __future__ import annotations + +import logging +import pathlib +from typing import Any + +import numpy as np +import numpy.typing as npt + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import Config, StudyMinDetectableOffsetConfig, config_to_dict +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import ( + plot_line_with_bands, + plot_recovery_fraction_heatmap, + save_figure, +) +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'min_detectable_offset' + +_RNG_SEED_BASE = 3000 + +# A string sentinel used in CSV/JSON to label the noiseless condition. +_NOISELESS_LABEL = 'noiseless' + + +def _build_conditions( + study: StudyMinDetectableOffsetConfig, scale: float +) -> list[tuple[float, str]]: + """Build the list of noise conditions for Study 3. + + Parameters: + study: The study configuration. + scale: PSF amplitude scale factor. + + Returns: + List of ``(noise_rms, label)`` pairs ordered as noiseless-first then + per-SNR. + """ + conditions: list[tuple[float, str]] = [] + if study.include_noiseless: + conditions.append((0.0, _NOISELESS_LABEL)) + for snr_val in study.snr_values: + conditions.append((utils.snr_to_noise_rms(snr_val, scale), f'snr_{snr_val:.0f}')) + return conditions + + +def build_specs(cfg: Config) -> list[TrialSpec]: + """Build trial specs for Study 3. + + For each (delta, sigma) pair and each noise condition (noiseless + each + SNR), generates ``noise_samples`` trials with different random seeds + (1 trial for noiseless). + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A flat list of :class:`~trial.TrialSpec` objects. + """ + study = cfg.studies.min_detectable_offset + scale = cfg.generation.scale + specs: list[TrialSpec] = [] + seed = _RNG_SEED_BASE + + # Noise conditions: (noise_rms, snr_label) pairs. + conditions = _build_conditions(study, scale) + + for delta in study.delta_offsets: + for sigma in study.sigmas: + for noise_rms, _snr_label in conditions: + n_trials = 1 if noise_rms == 0.0 else study.noise_samples + for _ in range(n_trials): + specs.append( + utils.make_spec( + sigma_y=sigma, + sigma_x=sigma, + angle=0.0, + # Inject the offset purely in X for simplicity. + offset_y=0.0, + offset_x=delta, + scale=scale, + base=cfg.generation.base, + box_size=study.box_size, + fitting=study.fitting, + fit_angle=0.0, + noise_rms=noise_rms, + rng_seed=seed, + ) + ) + seed += 1 + + return specs + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 3 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.min_detectable_offset + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir=study_dir) + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + *, + study_dir: pathlib.Path, +) -> None: + """Write CSV, JSON, and PNG outputs for Study 3. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results. + study_dir: Output subdirectory. + + Raises: + RuntimeError: If the number of result buckets consumed during bucketing + does not match the total number of results returned by + :func:`~executor.run_trials`, indicating a mismatch between the + specs generated by :func:`build_specs` and the results list. + """ + study = cfg.studies.min_detectable_offset + scale = cfg.generation.scale + deltas = study.delta_offsets + sigmas = study.sigmas + + conditions = _build_conditions(study, scale) + + # Map (delta, sigma, condition_label) -> list of TrialResult. + # Slicing by index is safe here because build_specs generates specs in the + # same deterministic order (delta, sigma, condition, trial_idx) and + # run_trials preserves spec ordering. + result_map: dict[tuple[float, float, str], list[TrialResult]] = {} + idx = 0 + for delta in deltas: + for sigma in sigmas: + for noise_rms, cond_label in conditions: + n_trials = 1 if noise_rms == 0.0 else study.noise_samples + bucket_key = (delta, sigma, cond_label) + result_map[bucket_key] = results[idx : idx + n_trials] + idx += n_trials + if idx != len(results): + raise RuntimeError( + f'result_map bucketing consumed {idx} results but {len(results)} were returned' + ) + + x_arr = np.array(deltas) + delta_labels = [f'{d:.3g}' for d in deltas] + sigma_labels = [f'{s:.2g}' for s in sigmas] + + # Line plots: one per condition, three metrics (Euclidean, Y-axis, X-axis). + for noise_rms, cond_label in conditions: + noise_desc = ( + 'noiseless (1 trial per point -- numerical precision floor)' + if noise_rms == 0.0 + else ( + f'Gaussian noise, noise_rms = {noise_rms:.3g}' + f' (SNR = scale / noise_rms = {scale / noise_rms:.0f});' + f' {study.noise_samples} independent trials per point' + ) + ) + for metric_attr, metric_label, fname_prefix in [ + ('pos_err', 'Mean position error (Euclidean, pixels)', 'pos_err'), + ('pos_err_y', 'Mean |pos_err_y| (pixels)', 'pos_err_y'), + ('pos_err_x', 'Mean |pos_err_x| (pixels)', 'pos_err_x'), + ]: + y_means: list[npt.NDArray[np.float64]] = [] + y_stds: list[npt.NDArray[np.float64]] = [] + line_labels: list[str] = [] + + for sigma in sigmas: + means_per_delta: list[float] = [] + stds_per_delta: list[float] = [] + for delta in deltas: + bucket = result_map[(delta, sigma, cond_label)] + errs = np.array( + [abs(float(getattr(r, metric_attr))) for r in bucket if r.converged], + dtype=np.float64, + ) + errs = errs[np.isfinite(errs)] + if len(errs) == 0: + means_per_delta.append(float('nan')) + stds_per_delta.append(float('nan')) + else: + means_per_delta.append(float(np.mean(errs))) + stds_per_delta.append(float(np.std(errs, ddof=1)) if len(errs) > 1 else 0.0) + y_means.append(np.array(means_per_delta)) + y_stds.append(np.array(stds_per_delta)) + line_labels.append(f'sigma={sigma:.2g}') + + fig = plot_line_with_bands( + x_arr, + y_means, + y_stds, + labels=line_labels, + title=f'Min detectable offset ({metric_label}) -- {cond_label}', + xlabel='Injected X offset (pixels)', + ylabel=metric_label, + log_x=True, + log_y=True, + note=( + 'PSF: sigma_y = sigma_x = sigma (see series label); angle = 0\u00b0 (fixed);' + f' box_size = {study.box_size} px; scale = {scale:.2g}\n' + 'Offset: Y = 0 px (fixed); X = delta (x-axis only); all positional offset' + ' is injected in the X direction\n' + f'Noise: {noise_desc}\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' no background subtraction' + ), + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_{fname_prefix}_{cond_label}.png') + + # Recovery fraction heatmap: one per SNR condition (skip noiseless). + for noise_rms, cond_label in conditions: + if noise_rms == 0.0: + continue + rec_grid = np.zeros((len(sigmas), len(deltas))) + for s_idx, sigma in enumerate(sigmas): + for d_idx, delta in enumerate(deltas): + bucket = result_map[(delta, sigma, cond_label)] + rec_grid[s_idx, d_idx] = utils.recovery_fraction(bucket, delta=delta) + + fig = plot_recovery_fraction_heatmap( + rec_grid, + delta_labels, + sigma_labels, + title=(f'Recovery fraction (pos_err < delta/2) -- {cond_label}'), + xlabel='Injected X offset (delta, pixels)', + ylabel='Sigma (pixels)', + note=( + 'PSF: sigma_y = sigma_x = sigma (y-axis); angle = 0\u00b0 (fixed);' + f' box_size = {study.box_size} px; scale = {scale:.2g}\n' + 'Offset: Y = 0 px (fixed); X = delta (x-axis); all offset in X only\n' + f'Noise: Gaussian, noise_rms = {noise_rms:.3g}' + f' (SNR = {scale / noise_rms:.0f}); {study.noise_samples} trials per cell\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' no background subtraction\n' + 'Recovery = fraction of trials where Euclidean pos_err < delta / 2' + ), + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_recovery_{cond_label}.png') + + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + + groups = _build_json_groups(specs, results, conditions) + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info('Study %s outputs written to %s', _STUDY_NAME, study_dir) + + +def _build_json_groups( + specs: list[TrialSpec], + results: list[TrialResult], + conditions: list[tuple[float, str]], +) -> list[dict[str, Any]]: + """Build JSON summary groups for Study 3. + + Groups by (delta_offset, sigma, noise_condition). + + Parameters: + specs: Trial specifications. + results: Trial results. + conditions: List of ``(noise_rms, label)`` pairs. + + Returns: + List of group dicts for :func:`~output.write_json_summary`. + """ + condition_map: dict[float, str] = dict(conditions) + + def _cond_label(spec: TrialSpec) -> str: + label = condition_map.get(spec.noise_rms) + return label if label is not None else f'noise_{spec.noise_rms:.3g}' + + return utils.build_groups_by_keys( + specs, + results, + [ + ('delta_offset', lambda s: s.offset_x), + ('sigma', lambda s: s.sigma_y), + ('noise_condition', _cond_label), + ], + ) diff --git a/src/characterize_gauss_fit/study_noise.py b/src/characterize_gauss_fit/study_noise.py new file mode 100644 index 0000000..6464c78 --- /dev/null +++ b/src/characterize_gauss_fit/study_noise.py @@ -0,0 +1,234 @@ +################################################################################ +# characterize_gauss_fit/study_noise.py +################################################################################ + +"""Study 7: Noise sensitivity (SNR sweep). + +Sweeps a log-spaced range of signal-to-noise ratios and measures position, +sigma, and scale recovery accuracy as a function of SNR and PSF sigma. +""" + +from __future__ import annotations + +import logging +import math +import pathlib +from typing import Any + +import numpy as np +import numpy.typing as npt + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import Config, StudyNoiseSensitivityConfig, config_to_dict +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import plot_line_with_bands, save_figure +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'noise_sensitivity' + + +def _compute_snr_values(study: StudyNoiseSensitivityConfig) -> list[float]: + """Compute the log-spaced SNR values for Study 7. + + Parameters: + study: The study configuration. + + Returns: + A list of SNR values in log-spaced order. + """ + log_lo, log_hi = study.snr_log_range + return list(np.logspace(log_lo, log_hi, study.snr_steps)) + + +def build_specs(cfg: Config) -> list[TrialSpec]: + """Build trial specs for Study 7. + + For each (SNR, sigma) combination, generates ``noise_samples`` trials + with random offsets and different noise realisations. + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A list of :class:`~trial.TrialSpec` objects ordered by + [snr, sigma, trial_idx]. + """ + study = cfg.studies.noise_sensitivity + scale = cfg.generation.scale + + snr_values = _compute_snr_values(study) + + specs: list[TrialSpec] = [] + rng = np.random.default_rng(7000) + seed_counter = 7000 + + for snr in snr_values: + noise_rms = utils.snr_to_noise_rms(snr, scale) + for sigma in study.sigmas: + for _ in range(study.noise_samples): + # Randomise offset uniformly in [-0.5, 0.5]. + oy = float(rng.uniform(-0.5, 0.5)) + ox = float(rng.uniform(-0.5, 0.5)) + specs.append( + utils.make_spec( + sigma_y=sigma, + sigma_x=sigma, + angle=0.0, + offset_y=oy, + offset_x=ox, + scale=scale, + base=cfg.generation.base, + box_size=study.box_size, + fitting=study.fitting, + fit_angle=0.0, + noise_rms=noise_rms, + rng_seed=seed_counter, + ) + ) + seed_counter += 1 + + return specs + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 7 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.noise_sensitivity + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir) + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + study_dir: pathlib.Path, +) -> None: + """Write CSV, JSON, and PNG outputs for Study 7. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results. + study_dir: Output subdirectory. + """ + study = cfg.studies.noise_sensitivity + scale = cfg.generation.scale + + snr_values = _compute_snr_values(study) + n_snr = len(snr_values) + n_sigma = len(study.sigmas) + n_samples = study.noise_samples + + snr_arr = np.array(snr_values) + trials_per_snr_sigma = n_samples + + def _collect_metric_grid( + metric: str, + *, + abs_values: bool = False, + ) -> tuple[list[npt.NDArray[np.float64]], list[npt.NDArray[np.float64]]]: + """Compute per-SNR mean and std for one metric, one array per sigma. + + Parameters: + metric: Attribute name on :class:`~trial.TrialResult`. + abs_values: If ``True``, apply ``abs`` to each sample before + computing mean and std so that the error bands reflect the + magnitude distribution rather than signed-value distribution. + """ + # Defensive: check the spec ordering assumption holds. + expected_total = n_snr * n_sigma * n_samples + if len(results) != expected_total: + raise RuntimeError( + f'Expected {expected_total} results for noise_sensitivity but got {len(results)}' + ) + all_means: list[npt.NDArray[np.float64]] = [] + all_stds: list[npt.NDArray[np.float64]] = [] + for s_idx in range(n_sigma): + means: list[float] = [] + stds: list[float] = [] + for snr_idx in range(n_snr): + start = snr_idx * n_sigma * n_samples + s_idx * n_samples + bucket = results[start : start + trials_per_snr_sigma] + arr = utils.collect_metric(bucket, metric) + if abs_values: + arr = np.abs(arr) + means.append(utils.safe_nanmean(arr)) + stds.append(utils.safe_nanstd(arr)) + all_means.append(np.array(means)) + all_stds.append(np.array(stds)) + return all_means, all_stds + + sigma_labels = [f'sigma={s:.1f}' for s in study.sigmas] + + for metric, ylabel, fname in [ + ('pos_err', 'Position error, Euclidean (pixels)', 'pos_err_vs_snr.png'), + ('pos_err_y', '|pos_err_y| (pixels)', 'pos_err_y_vs_snr.png'), + ('pos_err_x', '|pos_err_x| (pixels)', 'pos_err_x_vs_snr.png'), + ('sigma_y_err', 'Relative |sigma_y| error', 'sigma_y_err_vs_snr.png'), + ('sigma_x_err', 'Relative |sigma_x| error', 'sigma_x_err_vs_snr.png'), + ('scale_err', 'Relative |scale| error', 'scale_err_vs_snr.png'), + ]: + # pos_err is a Euclidean distance (always non-negative); abs is not needed. + means, stds = _collect_metric_grid(metric, abs_values=(metric != 'pos_err')) + fig = plot_line_with_bands( + snr_arr, + means, + stds, + labels=sigma_labels, + title=f'{ylabel} vs. SNR', + xlabel='SNR (peak / noise RMS)', + ylabel=ylabel, + log_x=True, + log_y=True, + note=( + 'PSF: sigma_y = sigma_x = sigma (see series label); angle = 0\u00b0 (fixed);' + f' box_size = {study.box_size} px; scale = {scale:.2g}\n' + 'Offset: Y and X each drawn independently from Uniform[\u22120.5, +0.5] px' + ' per trial (different subpixel position every trial)\n' + 'Noise: Gaussian, noise_rms = scale / SNR (x-axis);' + f' {study.noise_samples} independent trials per (sigma, SNR) point\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' no background subtraction' + ), + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_{fname}') + + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + + groups: list[dict[str, Any]] = utils.build_groups_by_keys( + specs, + results, + [ + ('snr', lambda s: scale / s.noise_rms if s.noise_rms > 0 else math.inf), + ('sigma', lambda s: s.sigma_y), + ], + ) + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info('Study %s outputs written to %s', _STUDY_NAME, study_dir) diff --git a/src/characterize_gauss_fit/study_offset.py b/src/characterize_gauss_fit/study_offset.py new file mode 100644 index 0000000..fc1a860 --- /dev/null +++ b/src/characterize_gauss_fit/study_offset.py @@ -0,0 +1,247 @@ +################################################################################ +# characterize_gauss_fit/study_offset.py +################################################################################ + +"""Study 2: Subpixel offset bias. + +Explores how the fractional pixel position of the PSF centre introduces +systematic bias in the recovered position. +""" + +from __future__ import annotations + +import logging +import pathlib +from collections.abc import Callable + +import numpy as np +import numpy.typing as npt + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import Config, config_to_dict +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import plot_heatmap, plot_line_with_bands, save_figure +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'subpixel_offset' + +_RNG_SEED_BASE = 2000 # seed range 2000+ reserved for this study to avoid RNG seed collisions + +# Shared grouping key definitions for JSON summary and _write_outputs. +_GROUP_KEYS: list[tuple[str, Callable[[TrialSpec], float]]] = [ + ('sigma', lambda s: s.sigma_y), + ('offset_y', lambda s: round(s.offset_y, 6)), + ('offset_x', lambda s: round(s.offset_x, 6)), +] + + +def build_specs(cfg: Config) -> list[TrialSpec]: + """Build trial specs for Study 2. + + Generates a 2-D grid of (offset_y, offset_x) combinations for each sigma. + Sigma is left to float during fitting. + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A list of :class:`~trial.TrialSpec` objects, ordered by + [sigma, offset_y, offset_x]. + """ + study = cfg.studies.subpixel_offset + offsets = np.linspace(study.offset_range[0], study.offset_range[1], study.offset_steps) + specs: list[TrialSpec] = [] + seed = _RNG_SEED_BASE + for sigma in study.sigmas: + for oy in offsets: + for ox in offsets: + specs.append( + utils.make_spec( + sigma_y=sigma, + sigma_x=sigma, + angle=study.angle, + offset_y=float(oy), + offset_x=float(ox), + scale=cfg.generation.scale, + base=cfg.generation.base, + box_size=study.box_size, + fitting=study.fitting, + fit_angle=0.0, + rng_seed=seed, + ) + ) + seed += 1 + return specs + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 2 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.subpixel_offset + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir) + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + study_dir: pathlib.Path, +) -> None: + """Write CSV, JSON, and PNG outputs for Study 2. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results. + study_dir: Output subdirectory. + """ + study = cfg.studies.subpixel_offset + scale = cfg.generation.scale + offsets = list(np.linspace(study.offset_range[0], study.offset_range[1], study.offset_steps)) + n_off = study.offset_steps + sigmas = study.sigmas + + offset_labels = [f'{v:.2f}' for v in offsets] + + # Heatmaps: one set per sigma value — Euclidean error plus Y and X axes. + step = n_off * n_off # trials per sigma + for s_idx, sigma in enumerate(sigmas): + slice_results = results[s_idx * step : (s_idx + 1) * step] + grid = np.full((n_off, n_off), float('nan')) + grid_y = np.full((n_off, n_off), float('nan')) + grid_x = np.full((n_off, n_off), float('nan')) + fail_mask = np.zeros((n_off, n_off), dtype=bool) + for oy_idx in range(n_off): + for ox_idx in range(n_off): + r = slice_results[oy_idx * n_off + ox_idx] + if not r.converged: + fail_mask[oy_idx, ox_idx] = True + else: + grid[oy_idx, ox_idx] = r.pos_err + grid_y[oy_idx, ox_idx] = abs(r.pos_err_y) + grid_x[oy_idx, ox_idx] = abs(r.pos_err_x) + + for hmap, metric_label, fname in [ + (grid, 'Position error (Euclidean)', f'pos_err_sigma{sigma:.1f}.png'), + (grid_y, '|pos_err_y|', f'pos_err_y_sigma{sigma:.1f}.png'), + (grid_x, '|pos_err_x|', f'pos_err_x_sigma{sigma:.1f}.png'), + ]: + fig = plot_heatmap( + hmap, + offset_labels, + offset_labels, + title=f'{metric_label} vs. offset (sigma={sigma:.1f})', + xlabel='offset_x (pixels)', + ylabel='offset_y (pixels)', + cbar_label='log10(error)', + log_scale=True, + mask=fail_mask, + note=( + f'PSF: sigma_y = sigma_x = {sigma:.2g} px (fixed for this heatmap);' + f' angle = {study.angle:.0f}\u00b0 (fixed); box_size = {study.box_size} px;' + f' scale = {scale:.2g}; one noiseless trial per cell\n' + 'Offset: Y and X each swept over a' + f' {study.offset_steps}\u00d7{study.offset_steps}' + f' grid from {study.offset_range[0]:.2f} to {study.offset_range[1]:.2f} px' + ' (the two heatmap axes)\n' + 'Background / noise: none injected\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' no background subtraction' + ), + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_{fname}') + + # Line plots: error vs offset_x at fixed offset_y (midpoint row), and + # error vs offset_y at fixed offset_x (midpoint column). + # mid_idx selects the midpoint of the configured offset range (n_off // 2). + mid_idx = n_off // 2 + x_arr = np.array(offsets) + + for metric_attr, metric_label, vary_axis in [ + ('pos_err', 'Position error (Euclidean, pixels)', 'offset_x'), + ('pos_err_y', '|pos_err_y| (pixels)', 'offset_x'), + ('pos_err_x', '|pos_err_x| (pixels)', 'offset_x'), + ('pos_err', 'Position error (Euclidean, pixels)', 'offset_y'), + ('pos_err_y', '|pos_err_y| (pixels)', 'offset_y'), + ('pos_err_x', '|pos_err_x| (pixels)', 'offset_y'), + ]: + y_means: list[npt.NDArray[np.float64]] = [] + y_stds: list[npt.NDArray[np.float64]] = [] + line_labels: list[str] = [] + + for s_idx, sigma in enumerate(sigmas): + slice_results = results[s_idx * step : (s_idx + 1) * step] + row: list[float] = [] + for idx in range(n_off): + if vary_axis == 'offset_x': + r = slice_results[mid_idx * n_off + idx] + else: + r = slice_results[idx * n_off + mid_idx] + val = getattr(r, metric_attr) if r.converged else None + row.append(float('nan') if val is None else abs(float(val))) + y_means.append(np.array(row, dtype=np.float64)) + y_stds.append(np.zeros(n_off, dtype=np.float64)) + line_labels.append(f'sigma={sigma:.1f}') + + fixed_val = offsets[mid_idx] + fixed_axis = 'offset_y' if vary_axis == 'offset_x' else 'offset_x' + safe_metric = metric_attr.replace('_', '') + fname_line = f'{safe_metric}_vs_{vary_axis}.png' + fig = plot_line_with_bands( + x_arr, + y_means, + y_stds, + labels=line_labels, + title=f'{metric_label} vs. {vary_axis} at {fixed_axis}={fixed_val:.2f}', + xlabel=f'{vary_axis} (pixels)', + ylabel=metric_label, + log_y=True, + note=( + f'PSF: sigma_y = sigma_x = sigma (series label); angle = {study.angle:.0f}\u00b0' + f' (fixed); box_size = {study.box_size} px; scale = {scale:.2g};' + ' one noiseless trial per point\n' + f'Offset: the fixed axis is held at its midpoint ({offsets[mid_idx]:.2f} px);' + ' the swept axis is the x-axis\n' + 'Background / noise: none injected\n' + 'Fitting: sigma_y and sigma_x float freely; angle fixed at 0\u00b0;' + ' no background subtraction' + ), + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_{fname_line}') + + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + + groups = utils.build_groups_by_keys( + specs, + results, + _GROUP_KEYS, + ) + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info('Study %s outputs written to %s', _STUDY_NAME, study_dir) diff --git a/src/characterize_gauss_fit/study_shape.py b/src/characterize_gauss_fit/study_shape.py new file mode 100644 index 0000000..f8e4ccd --- /dev/null +++ b/src/characterize_gauss_fit/study_shape.py @@ -0,0 +1,225 @@ +################################################################################ +# characterize_gauss_fit/study_shape.py +################################################################################ + +"""Study 4: Sigma asymmetry and angle recovery. + +Explores how well elongated (asymmetric) and rotated PSFs are recovered across +a grid of sigma ratios and rotation angles. +""" + +from __future__ import annotations + +import logging +import math +import pathlib +from typing import Any + +import numpy as np + +from characterize_gauss_fit import _study_utils as utils +from characterize_gauss_fit.config import Config, config_to_dict +from characterize_gauss_fit.executor import run_trials +from characterize_gauss_fit.output import write_csv, write_json_summary +from characterize_gauss_fit.plotting import plot_heatmap, save_figure +from characterize_gauss_fit.trial import TrialResult, TrialSpec + +_LOG = logging.getLogger(__name__) +_STUDY_NAME = 'sigma_asymmetry_angle' + +# When sigma_ratio is within this tolerance of 1.0, the PSF is effectively +# circular and angle is degenerate. Angle error is not meaningful in this case. +_CIRCULAR_TOLERANCE = 1e-3 + + +def build_specs(cfg: Config) -> list[TrialSpec]: + """Build trial specs for Study 4. + + For each (sigma_ratio, angle, sigma_x) combination, creates a spec with + all PSF parameters left to float. + + Parameters: + cfg: The active :class:`~config.Config` instance. + + Returns: + A list of :class:`~trial.TrialSpec` objects ordered by + [sigma_x, sigma_ratio, angle]. + """ + study = cfg.studies.sigma_asymmetry_angle + angles = np.linspace(0.0, math.pi, study.angle_steps) + specs: list[TrialSpec] = [] + seed = 4000 + for sigma_x in study.sigma_x_values: + for ratio in study.sigma_ratios: + sigma_y = sigma_x * ratio + for angle in angles: + specs.append( + utils.make_spec( + sigma_y=sigma_y, + sigma_x=sigma_x, + angle=float(angle), + offset_y=study.offset[0], + offset_x=study.offset[1], + scale=cfg.generation.scale, + base=cfg.generation.base, + box_size=study.box_size, + fitting=study.fitting, + # All parameters float. + rng_seed=seed, + ) + ) + seed += 1 + return specs + + +def run(cfg: Config, *, num_workers: int = 1) -> None: + """Execute Study 4 and write all outputs. + + Parameters: + cfg: The active :class:`~config.Config` instance. + num_workers: Number of parallel worker processes. + """ + study = cfg.studies.sigma_asymmetry_angle + if not study.enabled: + _LOG.info('Study %s is disabled; skipping.', _STUDY_NAME) + return + + specs = build_specs(cfg) + _LOG.info('Study %s: %d trials', _STUDY_NAME, len(specs)) + + results = run_trials( + specs, + num_workers=num_workers, + progress_callback=utils.progress_callback(_STUDY_NAME), + ) + + study_dir = utils.ensure_study_dir(cfg.output_dir, _STUDY_NAME) + _write_outputs(cfg, specs, results, study_dir) + + +def _write_outputs( + cfg: Config, + specs: list[TrialSpec], + results: list[TrialResult], + study_dir: pathlib.Path, +) -> None: + """Write CSV, JSON, and PNG outputs for Study 4. + + Parameters: + cfg: The active :class:`~config.Config` instance. + specs: All trial specifications. + results: All trial results. + study_dir: Output subdirectory. + """ + study = cfg.studies.sigma_asymmetry_angle + scale = cfg.generation.scale + angles = list(np.linspace(0.0, math.pi, study.angle_steps)) + n_angles = len(angles) + n_ratios = len(study.sigma_ratios) + n_sigma_x = len(study.sigma_x_values) + + angle_labels = [f'{math.degrees(a):.0f}\u00b0' for a in angles] + ratio_labels = [f'{r:.2f}' for r in study.sigma_ratios] + + trials_per_sigma_x = n_ratios * n_angles + + for sx_idx, sigma_x in enumerate(study.sigma_x_values): + base_idx = sx_idx * trials_per_sigma_x + pos_err_grid = np.full((n_ratios, n_angles), float('nan')) + pos_err_y_grid = np.full((n_ratios, n_angles), float('nan')) + pos_err_x_grid = np.full((n_ratios, n_angles), float('nan')) + angle_err_grid = np.full((n_ratios, n_angles), float('nan')) + sigma_y_err_grid = np.full((n_ratios, n_angles), float('nan')) + fail_mask = np.zeros((n_ratios, n_angles), dtype=bool) + + for r_idx, ratio in enumerate(study.sigma_ratios): + is_circular = abs(ratio - 1.0) < _CIRCULAR_TOLERANCE + for a_idx in range(n_angles): + result = results[base_idx + r_idx * n_angles + a_idx] + if not result.converged: + fail_mask[r_idx, a_idx] = True + continue + pos_err_grid[r_idx, a_idx] = result.pos_err + pos_err_y_grid[r_idx, a_idx] = abs(result.pos_err_y) + pos_err_x_grid[r_idx, a_idx] = abs(result.pos_err_x) + if result.sigma_y_err is not None: + sigma_y_err_grid[r_idx, a_idx] = abs(result.sigma_y_err) + if not is_circular and result.angle_err is not None: + err_deg = math.degrees(result.angle_err) % 180.0 + # Reduce modulo 90° to account for the sigma-swap equivalence: + # (sigma_y, sigma_x, theta) == (sigma_x, sigma_y, theta+90°). + # When both sigmas float the optimizer may land on either form, + # making the raw error jump to ~90° for a geometrically perfect + # fit. min(e, 180-e) maps the [0,180) range into [0,90] as it + # should be, and the modulo ensures negative or >180 values are + # handled correctly before the min. + angle_err_grid[r_idx, a_idx] = min(err_deg, 180.0 - err_deg) + + label = f'sigma_x={sigma_x:.1f}' + for data, metric_title, fname_prefix, cbar in [ + (pos_err_grid, f'Position error (Euclidean) -- {label}', 'pos_err', 'log10(pos error)'), + (pos_err_y_grid, f'|pos_err_y| -- {label}', 'pos_err_y', 'log10(|pos_err_y|)'), + (pos_err_x_grid, f'|pos_err_x| -- {label}', 'pos_err_x', 'log10(|pos_err_x|)'), + ( + angle_err_grid, + f'Angle error (\u00b0, mod 90\u00b0) -- {label}', + 'angle_err', + 'Angle error (\u00b0, mod 90\u00b0)', + ), + (sigma_y_err_grid, f'Rel sigma_y error -- {label}', 'sigma_y_err', 'log10(rel error)'), + ]: + use_log = 'pos' in fname_prefix or 'sigma' in fname_prefix + fig = plot_heatmap( + data, + angle_labels, + ratio_labels, + title=metric_title, + xlabel='Angle (degrees)', + ylabel='sigma_y / sigma_x ratio', + cbar_label=cbar, + log_scale=use_log, + mask=fail_mask, + note=( + f'PSF: sigma_x = {sigma_x:.2g} px (this panel); sigma_y = ratio \u00d7 sigma_x' + f' (y-axis); angle rotates 0\u2013180\u00b0 (x-axis); scale = {scale:.2g};' + ' one noiseless trial per cell\n' + f'Offset: Y = {study.offset[0]:+.2f}, X = {study.offset[1]:+.2f} px' + ' from pixel centre (fixed for all cells)\n' + 'Background / noise: none injected\n' + 'Fitting: sigma_y, sigma_x, and angle ALL float freely;' + ' no background subtraction\n' + 'Angle error is reduced mod 90\u00b0: (sigma_y,sigma_x,\u03b8) \u2261' + ' (sigma_x,sigma_y,\u03b8+90\u00b0) so raw error \u2248 90\u00b0 means' + ' a perfect fit in the swapped-sigma form' + ), + ) + save_figure(fig, study_dir, f'{_STUDY_NAME}_{fname_prefix}_sx{sigma_x:.1f}.png') + + write_csv(cfg.output_dir, _STUDY_NAME, specs=specs, results=results) + + groups: list[dict[str, Any]] = utils.build_groups_by_keys( + specs, + results, + [ + ('sigma_x', lambda s: s.sigma_x), + ( + 'sigma_ratio', + lambda s: round(s.sigma_y / s.sigma_x, 4) if s.sigma_x != 0.0 else None, + ), + ('angle_true', lambda s: round(s.angle, 4)), + ], + ) + write_json_summary( + cfg.output_dir, + _STUDY_NAME, + specs=specs, + results=results, + groups=groups, + config_used=config_to_dict(cfg), + ) + _LOG.info( + 'Study %s outputs written to %s (%d sigma_x panels)', + _STUDY_NAME, + study_dir, + n_sigma_x, + ) diff --git a/src/characterize_gauss_fit/test_config.yaml b/src/characterize_gauss_fit/test_config.yaml new file mode 100644 index 0000000..b6cb6dc --- /dev/null +++ b/src/characterize_gauss_fit/test_config.yaml @@ -0,0 +1,113 @@ +# test_config.yaml -- Reduced-grid configuration for rapid smoke-testing. +# +# Runs all eight studies but with the smallest viable parameter grids so +# that the entire suite completes in roughly 30-120 seconds on a single +# core. Use this to verify that all code paths execute correctly after +# code changes before committing. +# +# Run with: +# characterize_gauss_fit --config src/characterize_gauss_fit/test_config.yaml + +output_dir: ./gauss_fit_test +num_workers: 1 +noise_samples: 3 + +studies: + + # Study 1: 2 box sizes x 3 sigmas x 2 offsets = 12 trials + box_vs_sigma: + enabled: true + box_sizes: [9, 21] + sigmas: [0.5, 1.0, 2.0] + offsets: + - [0.0, 0.0] + - [0.25, 0.25] + angle: 0.0 + scale: 1.0 + fitting: + bkgnd_degree: null + + # Study 2: 3x3 offset grid x 1 sigma = 9 trials + subpixel_offset: + enabled: true + offset_steps: 3 + offset_range: [0.0, 0.5] + sigmas: [1.0] + box_size: 13 + angle: 0.0 + fitting: + bkgnd_degree: null + + # Study 3: 3 deltas x 2 sigmas x (1 noiseless + 1 snr) x 3 noise = 24 trials + min_detectable_offset: + enabled: true + delta_offsets: [0.01, 0.05, 0.2] + sigmas: [0.5, 1.5] + box_size: 13 + noise_samples: 3 + snr_values: [100.0] + include_noiseless: true + fitting: + bkgnd_degree: null + + # Study 4: 3 ratios x 3 angles x 1 sigma_x = 9 trials + sigma_asymmetry_angle: + enabled: true + sigma_ratios: [0.5, 1.0, 2.0] + angle_steps: 3 + sigma_x_values: [1.0] + box_size: 13 + offset: [0.25, 0.25] + fitting: + bkgnd_degree: null + + # Study 5: 5 modes x 2 shapes x 2 sigma_error_fractions = 20 trials (modes: all_float, + # sigma_fixed_correct, angle_fixed_correct, all_fixed_correct, all_fixed_errors) + constraint_modes: + enabled: true + sigma_error_fractions: [0.0, 0.3] + angle_error_rad: 0.3 + psf_shapes: + - sigma: [1.0, 1.0] + angle: 0.0 + - sigma: [0.5, 1.5] + angle: 0.7854 + box_size: 13 + offset: [0.25, 0.25] + scale: 1.5 + + # Study 6: 2 background types x 2 fitting degrees x 1 amplitude x 1 ignore_center x 2 offsets = 8 trials + background: + enabled: true + background_amplitudes: [0.1] + bkgnd_degrees: [1] + bkgnd_degrees_with_null: true + bkgnd_ignore_centers: [[2, 2]] + background_types: [none, constant] + box_size: 13 + sigma: [1.0, 1.0] + offsets: + - [0.0, 0.0] + - [0.25, 0.25] + + # Study 7: 4 SNR points x 1 sigma x 3 noise samples = 12 trials + noise_sensitivity: + enabled: true + snr_log_range: [1.0, 3.0] + snr_steps: 4 + sigmas: [1.0] + noise_samples: 3 + box_size: 13 + + # Study 8: 2 num_hot x 2 num_sigma x 1 amplitude x 3 noise samples = 12 trials + hot_pixel_rejection: + enabled: true + num_hot_pixels: [0, 3] + num_sigma_values: [3.0, 5.0] + num_sigma_with_null: false + hot_amplitudes: [20.0] + noise_samples: 3 + snr: 100.0 + box_size: 13 + sigma: [1.0, 1.0] + offset: [0.25, 0.25] diff --git a/src/characterize_gauss_fit/trial.py b/src/characterize_gauss_fit/trial.py new file mode 100644 index 0000000..47d8c10 --- /dev/null +++ b/src/characterize_gauss_fit/trial.py @@ -0,0 +1,413 @@ +################################################################################ +# characterize_gauss_fit/trial.py +################################################################################ + +"""Core trial engine: image synthesis, fitting, and result computation. + +A *trial* is a single run of the PSF fitter on a synthetically generated image +with known ground-truth parameters. This module provides: + +- :class:`TrialSpec` -- plain-data description of one trial (picklable). +- :class:`TrialResult` -- the outcome of one trial (picklable). +- :func:`synthesize_image` -- generate a synthetic image from a :class:`TrialSpec`. +- :func:`run_trial` -- execute a trial and return a :class:`TrialResult`. +""" + +from __future__ import annotations + +import dataclasses +import math +from typing import Any + +import numpy as np +import numpy.typing as npt + +from psfmodel.gaussian import GaussianPSF + +# Sentinel float used for "not applicable" / "fit did not converge". +_NAN = float('nan') + +# Background types supported by synthesize_image. +BACKGROUND_TYPE_NONE = 'none' +BACKGROUND_TYPE_CONSTANT = 'constant' +BACKGROUND_TYPE_LINEAR = 'linear' +BACKGROUND_TYPE_QUADRATIC = 'quadratic' +BACKGROUND_TYPE_NOISY_CONSTANT = 'noisy_constant' + + +# --------------------------------------------------------------------------- +# TrialSpec -- fully describes a single trial (all plain Python types). +# --------------------------------------------------------------------------- + + +@dataclasses.dataclass(frozen=True) +class TrialSpec: + """Complete specification for a single characterization trial. + + All fields use plain Python types so instances are safely picklable for + use with :mod:`concurrent.futures.ProcessPoolExecutor`. + """ + + # --- Ground-truth PSF parameters --- + sigma_y: float + sigma_x: float + angle: float + offset_y: float # fractional pixel offset from center + offset_x: float + scale: float + base: float + box_size: int + + # --- Background injection --- + background_type: str # one of the BACKGROUND_TYPE_* constants + background_amplitude: float # fraction of PSF peak + + # --- Noise --- + noise_rms: float # additive Gaussian noise std; 0 = noiseless + + # --- Hot pixels --- + hot_pixel_count: int + hot_pixel_amplitude: float # multiple of PSF peak + + # --- Fitter configuration --- + # GaussianPSF construction: None means the parameter floats during fitting. + fit_sigma_y: float | None # None = float; float = fixed to this value + fit_sigma_x: float | None + fit_angle: float | None + + bkgnd_degree: int | None + bkgnd_ignore_center: tuple[int, int] + bkgnd_num_sigma: float | None + num_sigma: float | None + max_bad_frac: float + allow_nonzero_base: bool + use_angular_params: bool + tolerance: float + search_limit: tuple[float, float] + scale_limit: float + + # --- Reproducibility --- + rng_seed: int + + +# --------------------------------------------------------------------------- +# TrialResult -- the outcome of a single trial. +# --------------------------------------------------------------------------- + + +@dataclasses.dataclass(frozen=True) +class TrialResult: + """Outcome of a single characterization trial. + + When ``converged`` is ``False``, all error fields contain ``float('nan')`` + and all ``*_fit`` fields are ``None`` (except ``scale_fit`` which is + ``float('nan')``). Failures are recorded as data, not skipped. + """ + + # --- Convergence --- + converged: bool + + # --- True (injected) values --- + sigma_y_true: float + sigma_x_true: float + angle_true: float + scale_true: float + offset_y_true: float + offset_x_true: float + + # --- Position errors (signed and Euclidean) --- + pos_err_y: float # fitted_y - true_y (NaN if not converged) + pos_err_x: float + pos_err: float # sqrt(pos_err_y**2 + pos_err_x**2) + + # --- Fitted parameter values (None if param was not floating) --- + sigma_y_fit: float | None + sigma_x_fit: float | None + angle_fit: float | None + scale_fit: float # NaN if not converged + + # --- Relative / absolute errors (None if param was not floating) --- + # For sigma: (fit - true) / true + sigma_y_err: float | None + sigma_x_err: float | None + # For angle: absolute difference in radians (NaN if degenerate circular PSF) + angle_err: float | None + # For scale: (fit - true) / true + scale_err: float # NaN if not converged + + +# --------------------------------------------------------------------------- +# Image synthesis helpers +# --------------------------------------------------------------------------- + + +def _make_background( + box_size: int, + background_type: str, + amplitude: float, + *, + rng: np.random.Generator, + noise_rms: float, +) -> npt.NDArray[np.float64]: + """Create a background array to add to the clean PSF image. + + Parameters: + box_size: Side length of the square patch. + background_type: One of the ``BACKGROUND_TYPE_*`` constants. + amplitude: Background amplitude as a fraction of the PSF peak. Used for + constant, linear, and quadratic types. + rng: NumPy random number generator (used for noisy_constant type). + noise_rms: Additive Gaussian noise standard deviation (separate from + the background amplitude noise in ``noisy_constant``). + + Returns: + A ``(box_size, box_size)`` float64 array containing the background. + + Raises: + ValueError: If ``background_type`` is not recognised. + """ + if background_type == BACKGROUND_TYPE_NONE: + return np.zeros((box_size, box_size), dtype=np.float64) + + # Normalised coordinate grid in [-1, 1]. + yy, xx = np.meshgrid( + np.linspace(-1.0, 1.0, box_size), + np.linspace(-1.0, 1.0, box_size), + indexing='ij', + ) + + if background_type == BACKGROUND_TYPE_CONSTANT: + return np.full((box_size, box_size), amplitude, dtype=np.float64) + + if background_type == BACKGROUND_TYPE_LINEAR: + # Tilted plane with slope proportional to amplitude. + return (amplitude * (0.6 * yy + 0.4 * xx)).astype(np.float64) + + if background_type == BACKGROUND_TYPE_QUADRATIC: + # Bowl-shaped quadratic surface. + return (amplitude * (0.5 * yy**2 + 0.3 * xx**2 - 0.2 * yy * xx)).astype(np.float64) + + if background_type == BACKGROUND_TYPE_NOISY_CONSTANT: + constant = np.full((box_size, box_size), amplitude, dtype=np.float64) + noise = rng.normal(0.0, noise_rms * 0.5, size=(box_size, box_size)) + return (constant + noise).astype(np.float64) + + raise ValueError( + f'Unknown background_type "{background_type}". ' + f'Valid options: {BACKGROUND_TYPE_NONE}, {BACKGROUND_TYPE_CONSTANT}, ' + f'{BACKGROUND_TYPE_LINEAR}, {BACKGROUND_TYPE_QUADRATIC}, ' + f'{BACKGROUND_TYPE_NOISY_CONSTANT}' + ) + + +def synthesize_image(spec: TrialSpec) -> tuple[npt.NDArray[np.float64], float, float]: + """Generate a synthetic PSF image from a :class:`TrialSpec`. + + Creates a pixel-integrated Gaussian PSF patch of size + ``(box_size, box_size)``, optionally adds a polynomial background, + additive Gaussian noise, and hot pixels. The PSF fills the entire image + (``eval_rect`` size == ``box_size``). + + The true PSF center in the image is ``(box_size//2 + offset_y, + box_size//2 + offset_x)`` in full-image coordinates (i.e. the integer + anchor is the image centre, and ``offset_y/x`` is the sub-pixel shift). + + Parameters: + spec: A fully populated :class:`TrialSpec` describing the trial. + + Returns: + A tuple ``(image, true_y, true_x)`` where ``image`` is the synthesized + float64 array and ``true_y``, ``true_x`` are the ground-truth PSF + centre in full-image pixel coordinates (floating point). + """ + rng = np.random.default_rng(spec.rng_seed) + + # --- Build clean PSF --- + psf_gen = GaussianPSF( + sigma=(spec.sigma_y, spec.sigma_x), + angle=spec.angle, + ) + image: npt.NDArray[np.float64] = psf_gen.eval_rect( + (spec.box_size, spec.box_size), + offset=(spec.offset_y + 0.5, spec.offset_x + 0.5), + scale=spec.scale, + base=spec.base, + angle=spec.angle, + ).astype(np.float64) + + psf_peak = float(np.max(image)) + if psf_peak <= 0.0: + psf_peak = 1.0 # guard against degenerate cases + + # --- Add background --- + background = _make_background( + spec.box_size, + spec.background_type, + spec.background_amplitude * psf_peak, + rng=rng, + noise_rms=spec.noise_rms, + ) + image = image + background + + # --- Add Gaussian noise --- + if spec.noise_rms > 0.0: + image = image + rng.normal(0.0, spec.noise_rms, size=image.shape) + + # --- Inject hot pixels --- + if spec.hot_pixel_count > 0: + total_pixels = spec.box_size * spec.box_size + if spec.hot_pixel_count > total_pixels: + raise ValueError( + f'hot_pixel_count ({spec.hot_pixel_count}) exceeds total pixels ' + f'({total_pixels}) for box_size={spec.box_size}' + ) + hot_amplitude = spec.hot_pixel_amplitude * psf_peak + # Randomise positions uniformly over the full patch. + flat_indices = rng.choice( + spec.box_size * spec.box_size, + size=spec.hot_pixel_count, + replace=False, + ) + rows, cols = np.unravel_index(flat_indices, (spec.box_size, spec.box_size)) + image[rows, cols] += hot_amplitude + + # --- True position in full-image coordinates --- + # find_position returns positions in pixel-left-edge convention: the centre of + # pixel i is at coordinate i + 0.5 (pixel 0 spans [0, 1]). Adding 0.5 to the + # integer centre index converts to this same convention so that error = 0 for + # a perfect fit. + center = spec.box_size // 2 + true_y = float(center) + 0.5 + spec.offset_y + true_x = float(center) + 0.5 + spec.offset_x + + return image.astype(np.float64), true_y, true_x + + +# --------------------------------------------------------------------------- +# Trial execution +# --------------------------------------------------------------------------- + + +def run_trial(spec: TrialSpec) -> TrialResult: + """Execute a single characterization trial. + + Synthesizes an image from ``spec``, fits it with + :meth:`~psfmodel.PSF.find_position`, and computes all error metrics. + If the fitter returns ``None`` (failure or convergence rejection), a + :class:`TrialResult` with ``converged=False`` and NaN error fields is + returned. The result is always valid and never raises. + + Parameters: + spec: A fully populated :class:`TrialSpec`. + + Returns: + A :class:`TrialResult` describing the fit outcome. + """ + image, true_y, true_x = synthesize_image(spec) + + # Build the fitter PSF (parameters are fixed at construction or left None to float). + fitter_psf = GaussianPSF( + sigma=(spec.fit_sigma_y, spec.fit_sigma_x), + angle=spec.fit_angle, + ) + + starting_point = (float(spec.box_size // 2), float(spec.box_size // 2)) + box_size = (spec.box_size, spec.box_size) + + find_position_kwargs: dict[str, Any] = { + 'bkgnd_degree': spec.bkgnd_degree, + 'bkgnd_ignore_center': spec.bkgnd_ignore_center, + 'bkgnd_num_sigma': spec.bkgnd_num_sigma, + 'num_sigma': spec.num_sigma, + 'max_bad_frac': spec.max_bad_frac, + 'allow_nonzero_base': spec.allow_nonzero_base, + 'use_angular_params': spec.use_angular_params, + # Uncertainty estimates (y_err, x_err, …) are not used by TrialResult; + # skipping the Jacobian roughly halves cost in batch runs. + 'compute_uncertainty': False, + 'tolerance': spec.tolerance, + 'search_limit': spec.search_limit, + 'scale_limit': spec.scale_limit, + } + + result = fitter_psf.find_position(image, box_size, starting_point, **find_position_kwargs) + + if result is None: + return TrialResult( + converged=False, + sigma_y_true=spec.sigma_y, + sigma_x_true=spec.sigma_x, + angle_true=spec.angle, + scale_true=spec.scale, + offset_y_true=spec.offset_y, + offset_x_true=spec.offset_x, + pos_err_y=_NAN, + pos_err_x=_NAN, + pos_err=_NAN, + sigma_y_fit=None, + sigma_x_fit=None, + angle_fit=None, + scale_fit=_NAN, + sigma_y_err=None, + sigma_x_err=None, + angle_err=None, + scale_err=_NAN, + ) + + fit_y, fit_x, details = result + + pos_err_y = fit_y - true_y + pos_err_x = fit_x - true_x + pos_err = math.hypot(pos_err_y, pos_err_x) + + scale_fit = float(details['scale']) + scale_err = (scale_fit - spec.scale) / spec.scale if spec.scale != 0.0 else _NAN + + # Retrieve fitted sigma values if they were floating. + sigma_y_fit: float | None = None + sigma_x_fit: float | None = None + sigma_y_err: float | None = None + sigma_x_err: float | None = None + + if 'sigma_y' in details: + sigma_y_fit = float(details['sigma_y']) + sigma_y_err = (sigma_y_fit - spec.sigma_y) / spec.sigma_y if spec.sigma_y != 0.0 else _NAN + if 'sigma_x' in details: + sigma_x_fit = float(details['sigma_x']) + sigma_x_err = (sigma_x_fit - spec.sigma_x) / spec.sigma_x if spec.sigma_x != 0.0 else _NAN + + # Retrieve fitted angle if it was floating. + angle_fit: float | None = None + angle_err: float | None = None + + if 'angle' in details: + angle_fit = float(details['angle']) + # For circular PSFs (sigma_y == sigma_x), angle is degenerate -- mark as NaN. + if abs(spec.sigma_y - spec.sigma_x) < 1e-9: + angle_err = _NAN + else: + # Wrap angular difference to [-pi/2, pi/2] because angle has pi symmetry. + raw_diff = angle_fit - spec.angle + wrapped = (raw_diff + math.pi / 2) % math.pi - math.pi / 2 + angle_err = abs(wrapped) + + return TrialResult( + converged=True, + sigma_y_true=spec.sigma_y, + sigma_x_true=spec.sigma_x, + angle_true=spec.angle, + scale_true=spec.scale, + offset_y_true=spec.offset_y, + offset_x_true=spec.offset_x, + pos_err_y=pos_err_y, + pos_err_x=pos_err_x, + pos_err=pos_err, + sigma_y_fit=sigma_y_fit, + sigma_x_fit=sigma_x_fit, + angle_fit=angle_fit, + scale_fit=scale_fit, + sigma_y_err=sigma_y_err, + sigma_x_err=sigma_x_err, + angle_err=angle_err, + scale_err=scale_err, + ) diff --git a/src/psf_gui/__init__.py b/src/psf_gui/__init__.py new file mode 100644 index 0000000..1069088 --- /dev/null +++ b/src/psf_gui/__init__.py @@ -0,0 +1,6 @@ +"""Tkinter demo for exploring :class:`psfmodel.gaussian.GaussianPSF` (and optional HST PSFs). + +Run with ``python -m psf_gui`` or the ``psf-gui`` console script after install. +""" + +__all__: list[str] = [] diff --git a/src/psf_gui/__main__.py b/src/psf_gui/__main__.py new file mode 100644 index 0000000..98d336f --- /dev/null +++ b/src/psf_gui/__main__.py @@ -0,0 +1,6 @@ +"""Allow ``python -m psf_gui``.""" + +from psf_gui.main import main + +if __name__ == '__main__': + main() diff --git a/src/psf_gui/main.py b/src/psf_gui/main.py new file mode 100644 index 0000000..ef232b9 --- /dev/null +++ b/src/psf_gui/main.py @@ -0,0 +1,255 @@ +################################################################################ +# psf_gui/main.py — interactive PSF visualization (Tkinter). +################################################################################ + +import logging +import tkinter as tk +from typing import Any, Literal, cast + +import numpy as np +import numpy.typing as npt + +from psfmodel.gaussian import GaussianPSF + +logger = logging.getLogger(__name__) + +DISPLAY_SIZE = 64 +DEFAULT_CANVAS_SIZE = 512 + +PsfTypeLiteral = Literal['gaussian', 'acshrc', 'wfc3uvis', 'wfpc2pc1'] + + +def _hst_psf(*args: Any, **kwargs: Any) -> Any: + """Return an ``HSTPSF`` instance. + + Mypy treats ``HSTPSF`` as abstract because ``_eval_rect`` is not annotated on + the subclass; runtime construction is valid. + """ + from psfmodel.hst import HSTPSF + + return HSTPSF(*args, **kwargs) # type: ignore[abstract, no-untyped-call] + + +class PsfGuiApp: + """Build sliders and a canvas to render a PSF patch from ``psfmodel``.""" + + def __init__(self, *, psf_type: PsfTypeLiteral = 'gaussian') -> None: + self._psf_type: PsfTypeLiteral = psf_type + self._psfobj: Any = None + self.canvas_size = DEFAULT_CANVAS_SIZE + self._root = tk.Tk() + self._root.title('PSF GUI') + + self._toplevel = tk.Frame(self._root) + self.canvas = tk.Canvas( + self._toplevel, + width=self.canvas_size, + height=self.canvas_size, + bg='black', + cursor='crosshair', + ) + self.canvas.grid(row=0, column=0, sticky=tk.NW) + + self.var_x = tk.DoubleVar(value=0.0) + self.var_y = tk.DoubleVar(value=0.0) + self.var_sigmax = tk.DoubleVar(value=2.0) + self.var_sigmay = tk.DoubleVar(value=2.0) + self.var_angle = tk.DoubleVar(value=0.0) + self.var_psf_xsize = tk.IntVar(value=21) + self.var_psf_ysize = tk.IntVar(value=21) + self.var_subsample = tk.IntVar(value=0) + self.var_motionx = tk.DoubleVar(value=0.0) + self.var_motiony = tk.DoubleVar(value=0.0) + + self._build_controls() + self._toplevel.pack() + + def _build_controls(self) -> None: + control = tk.Frame(self._toplevel) + gridrow = 0 + + def add_scale_row_left( + label_text: str, + variable: tk.DoubleVar | tk.IntVar, + *, + from_: float, + to: float, + resolution: float, + ) -> None: + nonlocal gridrow + lbl = tk.Label(control, text=label_text) + lbl.grid(row=gridrow, column=0, sticky=tk.W) + scale = tk.Scale( + control, + orient=tk.HORIZONTAL, + from_=from_, + to=to, + resolution=resolution, + variable=variable, + command=lambda _s: self.refresh_psf(), + ) + scale.grid(row=gridrow, column=1) + gridrow += 1 + + add_scale_row_left('X', self.var_x, from_=-5.0, to=5.0, resolution=0.01) + add_scale_row_left('Y', self.var_y, from_=-5.0, to=5.0, resolution=0.01) + + if self._psf_type == 'gaussian': + add_scale_row_left('SIGMA X', self.var_sigmax, from_=0.001, to=5.0, resolution=0.001) + add_scale_row_left('SIGMA Y', self.var_sigmay, from_=0.001, to=5.0, resolution=0.001) + add_scale_row_left('ANGLE', self.var_angle, from_=0.0, to=180.0, resolution=1.0) + + gridrow_right = 0 + col = 2 + + def add_scale_row_right( + label_text: str, + variable: tk.DoubleVar | tk.IntVar, + *, + from_: float, + to: float, + resolution: float, + ) -> None: + nonlocal gridrow_right + lbl = tk.Label(control, text=label_text) + lbl.grid(row=gridrow_right, column=col, sticky=tk.W) + scale = tk.Scale( + control, + orient=tk.HORIZONTAL, + from_=from_, + to=to, + resolution=resolution, + variable=variable, + command=lambda _s: self.refresh_psf(), + ) + scale.grid(row=gridrow_right, column=col + 1) + gridrow_right += 1 + + add_scale_row_right('PSF X SIZE', self.var_psf_xsize, from_=1.0, to=101.0, resolution=1.0) + add_scale_row_right('PSF Y SIZE', self.var_psf_ysize, from_=1.0, to=101.0, resolution=1.0) + add_scale_row_right( + 'SUBSAMPLE (*2+1)', self.var_subsample, from_=0.0, to=4.0, resolution=1.0 + ) + add_scale_row_right('MOTION X', self.var_motionx, from_=-10.0, to=10.0, resolution=0.1) + add_scale_row_right('MOTION Y', self.var_motiony, from_=-10.0, to=10.0, resolution=0.1) + + control.grid(row=1, column=0, sticky=tk.NW) + + def _ensure_psf_object(self) -> None: + subsample_val = int(self.var_subsample.get()) + motion_y = float(self.var_motiony.get()) + motion_x = float(self.var_motionx.get()) + expected_sub = subsample_val * 2 + 1 + + need_rebuild = self._psfobj is None + if not need_rebuild and self._psf_type in ('acshrc', 'wfpc2pc1', 'wfc3uvis'): + need_rebuild = getattr(self._psfobj, 'subsample', None) != expected_sub + + if not need_rebuild: + return + + if self._psf_type == 'gaussian': + logger.debug('motion=(%s, %s)', motion_y, motion_x) + self._psfobj = GaussianPSF() + return + + if self._psf_type == 'acshrc': + self._psfobj = _hst_psf( + 'ACS', + 'HRC', + 'F660N', + 512, + 512, + subsample=expected_sub, + movement=(motion_y, motion_x), + ) + elif self._psf_type == 'wfc3uvis': + self._psfobj = _hst_psf( + 'WFC3', + 'UVIS', + 'F606W', + 128, + 128, + subsample=expected_sub, + movement=(motion_y, motion_x), + aperture='UVIS2-C512C-SUB', + ) + elif self._psf_type == 'wfpc2pc1': + self._psfobj = _hst_psf( + 'WFPC2', + 'PC1', + 'F606W', + 128, + 128, + subsample=expected_sub, + movement=(motion_y, motion_x), + ) + + def refresh_psf(self) -> None: + """Redraw the PSF patch on the canvas from current slider values.""" + + self._ensure_psf_object() + if self._psfobj is None: + return + + ysize = int(self.var_psf_ysize.get()) + xsize = int(self.var_psf_xsize.get()) + rect_h = (ysize // 2) * 2 + 1 + rect_w = (xsize // 2) * 2 + 1 + + offset_y = float(self.var_y.get()) + offset_x = float(self.var_x.get()) + motion_y = float(self.var_motiony.get()) + motion_x = float(self.var_motionx.get()) + + kwargs: dict[str, Any] = {'movement': (motion_y, motion_x)} + if self._psf_type == 'gaussian': + kwargs['sigma'] = (float(self.var_sigmay.get()), float(self.var_sigmax.get())) + kwargs['angle'] = np.radians(float(self.var_angle.get())) + + raw = self._psfobj.eval_rect( + (rect_h, rect_w), + (offset_y, offset_x), + **kwargs, + ) + psf = cast(npt.NDArray[np.floating], np.asarray(raw, dtype=np.float64)) + logger.info('PSF sum=%s', float(np.sum(psf))) + psf = np.sqrt(psf) + + pix_scale = self.canvas_size // DISPLAY_SIZE + ctr_x = self.canvas_size // 2 + ctr_y = self.canvas_size // 2 + min_val = 0.0 + max_val = float(np.max(psf)) + denom = max_val - min_val + if denom <= 0.0: + denom = 1.0 + + self.canvas.delete('rect') + for y in range(psf.shape[0]): + for x in range(psf.shape[1]): + raw_px = (float(psf[y, x]) - min_val) / denom * 255.0 + val = int(max(raw_px, 0.0)) + color = f'#{val:02x}{val:02x}{val:02x}' + self.canvas.create_rectangle( + (x - psf.shape[1] // 2) * pix_scale + ctr_x, + (y - psf.shape[0] // 2) * pix_scale + ctr_y, + (x - psf.shape[1] // 2 + 1) * pix_scale + ctr_x, + (y - psf.shape[0] // 2 + 1) * pix_scale + ctr_y, + outline=color, + fill=color, + tags='rect', + ) + + def run(self) -> None: + """Start the Tk main loop.""" + + self.refresh_psf() + self._root.mainloop() + + +def main() -> None: + """Entry point for ``psf-gui`` and ``python -m psf_gui``.""" + + logging.basicConfig(level=logging.INFO) + PsfGuiApp(psf_type='gaussian').run() diff --git a/src/psfmodel/__init__.py b/src/psfmodel/__init__.py new file mode 100644 index 0000000..e69466a --- /dev/null +++ b/src/psfmodel/__init__.py @@ -0,0 +1,20 @@ +"""Point-spread function (PSF) models and fitting for image data. + +This package defines an abstract :class:`~psfmodel.psf.PSF` API and concrete +implementations such as :class:`~psfmodel.gaussian.GaussianPSF` for analytic +Gaussians, pixel integration, background handling, and astrometric fitting. + +The public surface re-exports ``PSF`` and ``GaussianPSF`` (see ``__all__``). +Additional modules (for example instrument-specific PSFs) are imported from +their submodules when needed. A :class:`logging.NullHandler` is attached to the +package logger so library logging is opt-in for applications. +""" + +import logging + +from .gaussian import GaussianPSF +from .psf import PSF + +__all__ = ['PSF', 'GaussianPSF'] + +logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/psfmodel/gaussian.py b/src/psfmodel/gaussian.py similarity index 51% rename from psfmodel/gaussian.py rename to src/psfmodel/gaussian.py index cb338db..c8f1b8e 100755 --- a/psfmodel/gaussian.py +++ b/src/psfmodel/gaussian.py @@ -2,16 +2,24 @@ # psfmodel/gaussian.py ################################################################################ -from typing import Optional, cast +"""Analytic Gaussian PSF models and integrals for :class:`~psfmodel.psf.PSF`. + +:class:`GaussianPSF` subclasses :class:`~psfmodel.psf.PSF` for pixel-integrated fitting +and rendering. The module exposes helpers such as :class:`GaussianPSF` and the constant +``INV_SQRT_2`` (used in error-function integrals). Depends on NumPy and +``scipy.special.erf``. +""" + +import logging +from typing import Any, cast import numpy as np import numpy.typing as npt from scipy.special import erf -from psfmodel import PSF +from .psf import PSF - -INV_SQRT_2 = 2**(-0.5) +INV_SQRT_2 = 2 ** (-0.5) class GaussianPSF(PSF): @@ -25,14 +33,25 @@ class GaussianPSF(PSF): Because these are so fast and easy to compute, we don't cache any results. """ - def __init__(self, - *, - sigma: Optional[float | tuple[float | None, float | None]] = None, - mean: float | tuple[float, float] = 0., - angle: float = 0., - sigma_x_range: tuple[float, float] = (0.01, 10.), - sigma_y_range: tuple[float, float] = (0.01, 10.), - angle_subsample: int = 13) -> None: + _sigma_y: float | None + _sigma_x: float | None + _mean_y: float + _mean_x: float + _angle: float | None + _angle_subsample: int + + def __init__( + self, + *, + sigma: float | tuple[float | None, float | None] | None = None, + mean: float | tuple[float, float] = 0.0, + angle: float | None = 0.0, + sigma_x_range: tuple[float, float] = (0.01, 10.0), + sigma_y_range: tuple[float, float] = (0.01, 10.0), + angle_subsample: int = 13, + logger: logging.Logger | None = None, + detailed_logging: bool = False, + ) -> None: """Create a GaussianPSF object describing a 2-D Gaussian PSF. Parameters: @@ -42,20 +61,24 @@ def __init__(self, sigma will be supplied later. mean: The mean of the Gaussian. May be a scalar in which case the value applies to both X and Y, or a tuple (mean_y, mean_x). - angle: The angle of the Gaussian. angle ranges from 0 to pi, with 0 being - "3 o'clock" (+X) assuming that (0, 0) is in the top left corner. None - means that the angle will be supplied later. + angle: Rotation angle from 0 to ``pi`` (0 is +X). Default ``0.0`` fixes an + unrotated Gaussian and keeps pixel integration on the fast axis-aligned + path. Pass ``None`` to include ``angle`` in ``_additional_params`` so it + can be fitted in :meth:`~psfmodel.psf.PSF.find_position`; per-call + overrides are still accepted by :meth:`eval_rect`. sigma_x_range: The valid range for sigma_x if it is not specified otherwise. This is used during PSF fitting to let sigma_x float to its optimal value. sigma_y_range: The valid range for sigma_y if it is not specified otherwise. This is used during PSF fitting to let sigma_y float to its optimal value. angle_subsample: The amount of subsampling to do in X and Y when computing a 2-D Gaussian pixel with a non-zero angle. + logger: Optional logger passed to :class:`PSF`; see :meth:`PSF.__init__`. + detailed_logging: Passed to :class:`PSF`; see :meth:`PSF.__init__`. """ - PSF.__init__(self) + super().__init__(logger=logger, detailed_logging=detailed_logging) - if not isinstance(sigma, (tuple, list)): + if not isinstance(sigma, tuple): self._sigma_y = self._sigma_x = float(sigma) if sigma is not None else None else: self._sigma_y = float(sigma[0]) if sigma[0] is not None else None @@ -64,49 +87,71 @@ def __init__(self, self._mean_y = self._mean_x = float(mean) else: self._mean_y, self._mean_x = float(mean[0]), float(mean[1]) - self._angle = float(angle) - if (not isinstance(angle_subsample, int) or - not (0 < angle_subsample <= 99)): + self._angle = None if angle is None else float(angle) + if not isinstance(angle_subsample, int) or not (0 < angle_subsample <= 99): raise ValueError( - f'angle_subsample must be an int between 1 and 99, got {angle_subsample}') + f'angle_subsample must be an int between 1 and 99, got {angle_subsample}' + ) self._angle_subsample = int(angle_subsample) - if self._sigma_y is None: - if sigma_y_range is not None: - self._additional_params.append((float(sigma_y_range[0]), - float(sigma_y_range[1])) + - ('sigma_y',)) - if self._sigma_x is None: - if sigma_x_range is not None: - self._additional_params.append((float(sigma_x_range[0]), - float(sigma_x_range[1])) + - ('sigma_x',)) - if self._angle is None: + if self._sigma_y is None and sigma_y_range is not None: + self._additional_params.append( + (float(sigma_y_range[0]), float(sigma_y_range[1]), 'sigma_y') + ) + if self._sigma_x is None and sigma_x_range is not None: + self._additional_params.append( + (float(sigma_x_range[0]), float(sigma_x_range[1]), 'sigma_x') + ) + if self._angle is None and angle_subsample > 1: self._additional_params.append((0, np.pi, 'angle')) @property def sigma_y(self) -> float | None: + """Standard deviation of the Gaussian along the y-axis (pixels), if fixed. + + Returns: + The locked ``sigma_y`` from construction, or ``None`` when it is left free + for fitting or per-call overrides. + """ return self._sigma_y @property def sigma_x(self) -> float | None: + """Standard deviation of the Gaussian along the x-axis (pixels), if fixed. + + Returns: + The locked ``sigma_x`` from construction, or ``None`` when it is left free + for fitting or per-call overrides. + """ return self._sigma_x @property def mean_y(self) -> float: + """Center of the Gaussian along the y-axis in pixel coordinates. + + Returns: + The mean ``y`` used when evaluating the model (scalar). + """ return self._mean_y @property def mean_x(self) -> float: + """Center of the Gaussian along the x-axis in pixel coordinates. + + Returns: + The mean ``x`` used when evaluating the model (scalar). + """ return self._mean_x @staticmethod - def gaussian_1d(x: float | npt.NDArray[np.floating], - *, - sigma: float = 1., - mean: float = 0., - scale: float = 1.0, - base: float = 0.) -> float | npt.NDArray[np.floating]: + def gaussian_1d( + x: float | npt.NDArray[np.floating], + *, + sigma: float = 1.0, + mean: float = 0.0, + scale: float = 1.0, + base: float = 0.0, + ) -> float | npt.NDArray[np.floating]: """Return a 1-D Gaussian. This simply returns the value of the Gaussian at a point or series of points. @@ -123,8 +168,9 @@ def gaussian_1d(x: float | npt.NDArray[np.floating], Returns: The value of the Gaussian function at the point(s) defined by x. """ - ret = (scale / np.sqrt(2*np.pi * sigma**2) * - np.exp(-(x-mean)**2 / (2 * sigma**2))) + base + ret = ( + scale / np.sqrt(2 * np.pi * sigma**2) * np.exp(-((x - mean) ** 2) / (2 * sigma**2)) + ) + base return cast(float | npt.NDArray[np.floating], ret) # @staticmethod @@ -178,16 +224,18 @@ def gaussian_1d(x: float | npt.NDArray[np.floating], # return scale * norm_fact * np.exp(-0.5 * expon) + base @staticmethod - def gaussian_2d(y: float | npt.NDArray[np.floating], - x: float | npt.NDArray[np.floating], - *, - sigma_y: float = 1., - sigma_x: float = 1., - mean_y: float = 0., - mean_x: float = 0., - scale: float = 1., - base: float = 0., - angle: float = 0.) -> float | npt.NDArray[np.floating]: + def gaussian_2d( + y: float | npt.NDArray[np.floating], + x: float | npt.NDArray[np.floating], + *, + sigma_y: float = 1.0, + sigma_x: float = 1.0, + mean_y: float = 0.0, + mean_x: float = 0.0, + scale: float = 1.0, + base: float = 0.0, + angle: float = 0.0, + ) -> float | npt.NDArray[np.floating]: """Return a 2-D Gaussian using angle (angle 0-pi, 0 at 3 o'clock, CW). This simply returns the value of the 2-D Gaussian at a series of points. @@ -219,91 +267,103 @@ def gaussian_2d(y: float | npt.NDArray[np.floating], x = x - mean_x y = y - mean_y - # Convert x and y (ellipse coordinates) to X and Y (Cartesian + # Convert x and y (ellipse coordinates) to xc and yc (Cartesian # coordinates) c = np.cos(angle) s = np.sin(angle) - X = c*x + s*y - Y = -s*x + c*y + xc = c * x + s * y + yc = -s * x + c * y - return (GaussianPSF.gaussian_1d(X, sigma=sigma_x) * - GaussianPSF.gaussian_1d(Y, sigma=sigma_y) * - scale + base) + return ( + GaussianPSF.gaussian_1d(xc, sigma=sigma_x) + * GaussianPSF.gaussian_1d(yc, sigma=sigma_y) + * scale + + base + ) @staticmethod - def gaussian_integral_1d(x_min: float | npt.NDArray[np.floating], - x_max: float | npt.NDArray[np.floating], - *, - sigma: float = 1., - mean: float = 0., - scale: float = 1., - base: float = 0.) -> float | npt.NDArray[np.floating]: + def gaussian_integral_1d( + x_min: float | npt.NDArray[np.floating], + x_max: float | npt.NDArray[np.floating], + *, + sigma: float = 1.0, + mean: float = 0.0, + scale: float = 1.0, + base: float = 0.0, + ) -> float | npt.NDArray[np.floating]: """Return the integral of a Gaussian. The integral is over the limits [xmin, xmax]. Values are generated via the error function, where the integral from - -inf to x is equal to + ``-inf`` to ``x`` is proportional to - (1 + erf((x - mean_x) / (sqrt(2)*sigma_x)) / 2 + ``1 + erf((x - mean) / (sqrt(2) * sigma))`` This function works for both scalar and array values of xmin and xmax. Parameters: x_min: The lower bound of the integral. x_max: The upper bound of the integral. - sigma_x: The standard deviation of the Gaussian. - mean_x: The mean of the Gaussian. + sigma: The standard deviation of the Gaussian. + mean: The mean of the Gaussian. scale: The scale of the Gaussian; the area under the complete curve (excluding the base). base: The base of the Gaussian; a scalar added to the curve. Returns: - The integral of the Gaussian between xmin and xmax. + The integral of the Gaussian between ``x_min`` and ``x_max``. + + Raises: + ValueError: If ``sigma`` is not positive. """ - # Normalize xmin and xmax - assert sigma > 0. + if not sigma > 0.0: + raise ValueError(f'sigma must be positive, got {sigma}') xmin_div_sqrt_2 = (x_min - mean) * (INV_SQRT_2 / sigma) xmax_div_sqrt_2 = (x_max - mean) * (INV_SQRT_2 / sigma) # Handle the scalar case if np.shape(x_min) == () and np.shape(x_max) == (): - ret = (0.5 * (erf(xmax_div_sqrt_2) - - erf(xmin_div_sqrt_2)) * scale) + base + ret = (0.5 * (erf(xmax_div_sqrt_2) - erf(xmin_div_sqrt_2)) * scale) + base return cast(float | npt.NDArray[np.floating], ret) # If either value is an array, broadcast to a common shape - (xmin_div_sqrt_2, - xmax_div_sqrt_2) = np.broadcast_arrays(xmin_div_sqrt_2, - xmax_div_sqrt_2) + (xmin_div_sqrt_2, xmax_div_sqrt_2) = np.broadcast_arrays(xmin_div_sqrt_2, xmax_div_sqrt_2) - result = np.abs(erf(xmax_div_sqrt_2) - erf(xmin_div_sqrt_2)) + result = erf(xmax_div_sqrt_2) - erf(xmin_div_sqrt_2) - return cast(float | npt.NDArray[np.floating], - result * 0.5 * scale + base) + return cast(float | npt.NDArray[np.floating], result * 0.5 * scale + base) @staticmethod - def gaussian_integral_2d(y_min: float | npt.NDArray[np.floating], - y_max: float | npt.NDArray[np.floating], - x_min: float | npt.NDArray[np.floating], - x_max: float | npt.NDArray[np.floating], - *, - sigma_y: float = 1., - sigma_x: float = 1., - mean_y: float = 0., - mean_x: float = 0., - scale: float = 1., - base: float = 0., - angle: float = 0., - angle_subsample: int = 13 - ) -> float | npt.NDArray[np.floating]: + def gaussian_integral_2d( + y_min: float | npt.NDArray[np.floating], + y_max: float | npt.NDArray[np.floating], + x_min: float | npt.NDArray[np.floating], + x_max: float | npt.NDArray[np.floating], + *, + sigma_y: float = 1.0, + sigma_x: float = 1.0, + mean_y: float = 0.0, + mean_x: float = 0.0, + scale: float = 1.0, + base: float = 0.0, + angle: float = 0.0, + angle_subsample: int = 13, + ) -> float | npt.NDArray[np.floating]: """Return the double integral of a 2-D Gaussian. The integral is over the limits [y_min, y_max] and [x_min, x_max]. - This function works for both scalar and array values of - x_min/x_max/y_min/y_max. + For ``angle == 0`` the integral is computed analytically using the + error function (fast, exact). For a non-zero ``angle``, the integral + is approximated by uniform subsampling within each pixel using + ``angle_subsample`` points per axis. When the bounds are arrays (e.g. + a full image patch), all pixels are evaluated simultaneously in a + single vectorised NumPy call on an ``(N, S, S)`` grid, where *N* is + the number of pixels and *S* is ``angle_subsample``; this avoids a + per-pixel Python loop and is typically 10-20x faster than an + equivalent element-wise approach. Parameters: y_min: The lower bound of the integral in the Y dimension. @@ -330,54 +390,86 @@ def gaussian_integral_2d(y_min: float | npt.NDArray[np.floating], The integral of the 2-D Gaussian between y_min and y_max, and x_min and x_max. """ - if angle == 0.: - return (GaussianPSF.gaussian_integral_1d(y_min, y_max, - sigma=sigma_y, mean=mean_y) * - GaussianPSF.gaussian_integral_1d(x_min, x_max, - sigma=sigma_x, mean=mean_x) * - scale + base) + if angle == 0.0: + return ( + GaussianPSF.gaussian_integral_1d(y_min, y_max, sigma=sigma_y, mean=mean_y) + * GaussianPSF.gaussian_integral_1d(x_min, x_max, sigma=sigma_x, mean=mean_x) + * scale + + base + ) # Handle the scalar case - if (np.shape(x_min) == () and np.shape(x_max) == () and - np.shape(y_min) == () and np.shape(y_max) == ()): + if ( + np.shape(x_min) == () + and np.shape(x_max) == () + and np.shape(y_min) == () + and np.shape(y_max) == () + ): ys = np.linspace(y_min, y_max, angle_subsample) xs = np.linspace(x_min, x_max, angle_subsample) xindex, yindex = np.meshgrid(xs, ys) - ret = GaussianPSF.gaussian_2d(yindex, xindex, - sigma_y=sigma_y, sigma_x=sigma_x, - mean_y=mean_y, mean_x=mean_x, - scale=scale, base=base, - angle=angle) + ret = GaussianPSF.gaussian_2d( + yindex, + xindex, + sigma_y=sigma_y, + sigma_x=sigma_x, + mean_y=mean_y, + mean_x=mean_x, + scale=scale, + base=base, + angle=angle, + ) return cast(float, np.mean(ret)) x_min, x_max, y_min, y_max = np.broadcast_arrays(x_min, x_max, y_min, y_max) - res = np.empty(x_min.shape) - for x in range(x_min.shape[0]): - ys = np.linspace(y_min[x], y_max[x], angle_subsample) - xs = np.linspace(x_min[x], x_max[x], angle_subsample) - xindex, yindex = np.meshgrid(xs, ys) - - ret = GaussianPSF.gaussian_2d(yindex, xindex, - sigma_y=sigma_y, sigma_x=sigma_x, - mean_y=mean_y, mean_x=mean_x, - scale=scale, base=base, - angle=angle) - res[x] = np.mean(ret) - - return res - - def eval_point(self, - coord: (tuple[float | npt.NDArray[np.floating], - float | npt.NDArray[np.floating]] | - npt.NDArray[np.floating]), - *, - sigma: Optional[float] = None, - scale: float = 1., - base: float = 0., - sigma_y: Optional[float] = None, - sigma_x: Optional[float] = None, - angle: Optional[float] = None) -> float | npt.NDArray[np.floating]: + orig_shape = x_min.shape + y_lo = y_min.ravel() + y_hi = y_max.ravel() + x_lo = x_min.ravel() + x_hi = x_max.ravel() + + # Uniform parameter in [0, 1] with angle_subsample steps; shape (S,). + t = np.linspace(0.0, 1.0, angle_subsample) + + # Sample coordinates for every pixel simultaneously. + # ys, xs: shape (N, S) where N = number of pixels, S = angle_subsample. + ys = y_lo[:, None] + (y_hi - y_lo)[:, None] * t + xs = x_lo[:, None] + (x_hi - x_lo)[:, None] * t + + # Build the full (N, S, S) evaluation grid by broadcasting: + # ys[:, :, None] -- y varies along axis 1, x axis held + # xs[:, None, :] -- x varies along axis 2, y axis held + # One call to gaussian_2d replaces the entire per-pixel Python loop. + vals = GaussianPSF.gaussian_2d( + ys[:, :, None], + xs[:, None, :], + sigma_y=sigma_y, + sigma_x=sigma_x, + mean_y=mean_y, + mean_x=mean_x, + scale=scale, + base=base, + angle=angle, + ) + res = np.asarray(vals, dtype=np.float64).mean(axis=(1, 2)).reshape(orig_shape) + + return cast(float | npt.NDArray[np.floating], res) + + def eval_point( + self, + coord: ( + tuple[float | npt.NDArray[np.floating], float | npt.NDArray[np.floating]] + | npt.NDArray[np.floating] + ), + *, + sigma: float | None = None, + scale: float = 1.0, + base: float = 0.0, + sigma_y: float | None = None, + sigma_x: float | None = None, + angle: float | None = None, + ) -> float | npt.NDArray[np.floating]: """Evaluate the 2-D Gaussian PSF at a single, fractional, point. (0, 0) is the center of the PSF and x and y may be negative. @@ -386,10 +478,9 @@ def eval_point(self, coord: The coordinate (y, x) at which to evaluate the PSF. scale: A scale factor to apply to the resulting PSF. base: A scalar added to the resulting PSF. - sigma: The standard deviation of the Gaussian. It may be specified here or - during the creation of the GaussianPSF object, but not both. May be a - scalar or a tuple (sigma_y, sigma_x), or None if sigma was specified at - creation time. + sigma: Standard deviations: a scalar (both axes) or a ``(sigma_y, sigma_x)`` + tuple. Must not duplicate values fixed at construction; ``None`` if both + are already set on the instance. sigma_y: An alternative way to specify sigma_y. Used primarily for letting sigma_y float during PSF fitting. sigma_x: An alternative way to specify sigma_x. Used primarily for letting @@ -404,15 +495,16 @@ def eval_point(self, sy = self._sigma_y sx = self._sigma_x - if ((sx is not None and (sigma is not None or sigma_x is not None)) or - (sy is not None and (sigma is not None or sigma_y is not None))): + if (sx is not None and (sigma is not None or sigma_x is not None)) or ( + sy is not None and (sigma is not None or sigma_y is not None) + ): raise ValueError('Cannot specify both sigma during init and sigma_y/x') if sigma is not None: - if not isinstance(sigma, (list, tuple)): - sy = sx = sigma + if not isinstance(sigma, tuple): + sy = sx = float(sigma) else: - sy, sx = sigma + sy, sx = sigma[0], sigma[1] if sigma_y is not None: sy = sigma_y @@ -420,33 +512,45 @@ def eval_point(self, sx = sigma_x if sx is None or sy is None: - raise ValueError('Sigma X and Y must be specified either at object creation ' - 'or in the call to eval_point') + raise ValueError( + 'Sigma X and Y must be specified either at object creation ' + 'or in the call to eval_point' + ) - r = self._angle + r: float = 0.0 if self._angle is None else float(self._angle) if angle is not None: - r = angle - - ret = GaussianPSF.gaussian_2d(coord[0], coord[1], - sigma_y=sy, sigma_x=sx, - mean_y=self._mean_y, mean_x=self._mean_x, - scale=scale, base=base, angle=r) + r = float(angle) + + ret = GaussianPSF.gaussian_2d( + coord[0], + coord[1], + sigma_y=sy, + sigma_x=sx, + mean_y=self._mean_y, + mean_x=self._mean_x, + scale=scale, + base=base, + angle=r, + ) return cast(float, ret) - def eval_pixel(self, - coord: (tuple[int | npt.NDArray[np.int_], - int | npt.NDArray[np.int_]] | - npt.NDArray[np.floating]), - offset: (tuple[float | npt.NDArray[np.floating], - float | npt.NDArray[np.floating]] | - npt.NDArray[np.floating]) = (0.5, 0.5), - *, - scale: float = 1., - base: float = 0., - sigma: Optional[tuple[float, float]] = None, - sigma_y: Optional[float] = None, - sigma_x: Optional[float] = None, - angle: Optional[float] = None) -> float | npt.NDArray[np.floating]: + def eval_pixel( + self, + coord: ( + tuple[int | npt.NDArray[np.int_], int | npt.NDArray[np.int_]] | npt.NDArray[np.floating] + ), + offset: ( + tuple[float | npt.NDArray[np.floating], float | npt.NDArray[np.floating]] + | npt.NDArray[np.floating] + ) = (0.5, 0.5), + *, + scale: float = 1.0, + base: float = 0.0, + sigma: tuple[float, float] | None = None, + sigma_y: float | None = None, + sigma_x: float | None = None, + angle: float | None = None, + ) -> float | npt.NDArray[np.floating]: """Evaluate the Gaussian PSF integrated over an entire integer pixel. The returned array has the PSF offset from the center by (offset_y, offset_x). An @@ -463,10 +567,9 @@ def eval_pixel(self, offset: The amount (offset_y, offset_x) to offset the center of the PSF. scale: A scale factor to apply to the resulting PSF. base: A scalar added to the resulting PSF. - sigma: The standard deviation of the Gaussian. It may be specified here or - during the creation of the GaussianPSF object, but not both. May be a - scalar or a tuple (sigma_y, sigma_x), or None if sigma was specified at - creation time. + sigma: Standard deviations: a scalar (both axes) or a ``(sigma_y, sigma_x)`` + tuple. Must not duplicate values fixed at construction; ``None`` if both + are already set on the instance. sigma_y: An alternative way to specify sigma_y. Used primarily for letting sigma_y float during PSF fitting. sigma_x: An alternative way to specify sigma_x. Used primarily for letting @@ -481,82 +584,123 @@ def eval_pixel(self, sy = self._sigma_y sx = self._sigma_x - if ((sx is not None and (sigma is not None or sigma_x is not None)) or - (sy is not None and (sigma is not None or sigma_y is not None))): + if (sx is not None and (sigma is not None or sigma_x is not None)) or ( + sy is not None and (sigma is not None or sigma_y is not None) + ): raise ValueError('Cannot specify both sigma during init and sigma_y/x') if sigma is not None: - if not isinstance(sigma, (list, tuple)): - sy = sx = sigma + if not isinstance(sigma, tuple): + sy = sx = float(sigma) else: - sy, sx = sigma + sy, sx = sigma[0], sigma[1] if sigma_y is not None: sy = sigma_y if sigma_x is not None: sx = sigma_x - r = self._angle + r_angle: float = 0.0 if self._angle is None else float(self._angle) if angle is not None: - r = angle + r_angle = float(angle) if sx is None or sy is None: - raise ValueError('Sigma X and Y must be specified either at object creation ' - 'or in the call to eval_pixel') - - # There is a bug in type checking below? - ret = GaussianPSF.gaussian_integral_2d(coord[0]-offset[0], - coord[0]-offset[0]+1., - coord[1]-offset[1], - coord[1]-offset[1]+1, # type: ignore - sigma_y=sy, sigma_x=sx, - mean_y=self._mean_y, mean_x=self._mean_x, - scale=scale, base=base, angle=r, - angle_subsample=self._angle_subsample) + raise ValueError( + 'Sigma X and Y must be specified either at object creation ' + 'or in the call to eval_pixel' + ) + + ret = GaussianPSF.gaussian_integral_2d( + coord[0] - offset[0], + coord[0] - offset[0] + 1.0, + coord[1] - offset[1], + coord[1] - offset[1] + 1.0, + sigma_y=sy, + sigma_x=sx, + mean_y=self._mean_y, + mean_x=self._mean_x, + scale=scale, + base=base, + angle=r_angle, + angle_subsample=self._angle_subsample, + ) return ret - def _eval_rect(self, # type: ignore - rect_size: tuple[int, int], - offset: tuple[float, float] = (0.5, 0.5), - *, - scale: float = 1., - base: float = 0., - sigma: Optional[tuple[float, float]] = None, - sigma_y: Optional[float] = None, - sigma_x: Optional[float] = None, - angle: Optional[float] = None - ) -> npt.NDArray[np.floating]: + # Intentional override of :meth:`PSF._eval_rect`: Gaussian-specific keyword-only + # arguments are added while the parameter and return types match the base class. + def _eval_rect( + self, + rect_size: tuple[int, int], + offset: tuple[float, float] = (0.5, 0.5), + *, + scale: float = 1.0, + base: float = 0.0, + sigma: tuple[float, float] | None = None, + sigma_y: float | None = None, + sigma_x: float | None = None, + angle: float | None = None, + **kwargs: Any, + ) -> npt.NDArray[np.float64]: + """Pixel-integrated Gaussian on a rectangle (same grid as :meth:`eval_rect`). + + Parameters: + rect_size: ``(size_y, size_x)`` patch shape (odd counts). + offset: Subpixel shift ``(y, x)`` passed to :meth:`eval_pixel`. + scale: Multiplicative scale for the Gaussian flux. + base: Additive constant per pixel after scaling. + sigma: Optional ``(sigma_y, sigma_x)`` pair overriding instance sigmas. + sigma_y: Override for ``sigma_y`` when fitting. + sigma_x: Override for ``sigma_x`` when fitting. + angle: Override for rotation angle (radians); instance default if ``None``. + + Returns: + A 2-D :class:`numpy.ndarray` of floats with shape ``rect_size``, each entry + the integral of the Gaussian over that pixel. + """ rect_size_y, rect_size_x = rect_size - y_coords = np.repeat(np.arange(-(rect_size_y//2), rect_size_y//2+1, - dtype=np.float64), - rect_size_x) - x_coords = np.tile(np.arange(-(rect_size_x//2), rect_size_x//2+1, - dtype=np.float64), - rect_size_y) + y_coords = np.repeat( + np.arange(-(rect_size_y // 2), rect_size_y // 2 + 1, dtype=np.float64), rect_size_x + ) + x_coords = np.tile( + np.arange(-(rect_size_x // 2), rect_size_x // 2 + 1, dtype=np.float64), rect_size_y + ) coords = np.empty((2, rect_size_y * rect_size_x)) coords[0] = y_coords coords[1] = x_coords - rect = self.eval_pixel(coords, offset, - scale=scale, base=base, sigma=sigma, - sigma_y=sigma_y, sigma_x=sigma_x, angle=angle) - rect = cast(npt.NDArray[np.floating], rect) + rect = self.eval_pixel( + coords, + offset, + scale=scale, + base=base, + sigma=sigma, + sigma_y=sigma_y, + sigma_x=sigma_x, + angle=angle, + ) + rect = cast(npt.NDArray[np.float64], rect) rect = rect.reshape(rect_size) return rect - def eval_rect(self, # type: ignore - rect_size: tuple[int, int], - offset: tuple[float, float] = (0.5, 0.5), - *, - movement: Optional[tuple[float, float]] = None, - movement_granularity: float = 0.1, - scale: float = 1., - base: float = 0., - sigma: Optional[tuple[float, float]] = None, - sigma_y: Optional[float] = None, - sigma_x: Optional[float] = None, - angle: Optional[float] = None) -> npt.NDArray[np.floating]: + # Same rationale as :meth:`_eval_rect` above: extends :meth:`PSF.eval_rect` with + # Gaussian kwargs while accepting the same ``rect_size`` / ``offset`` types as the + # base and delegating to :meth:`PSF._eval_rect_smeared`. + def eval_rect( + self, + rect_size: list[int] | tuple[int, int], + offset: list[float] | tuple[float, float] = (0.5, 0.5), + *, + movement: tuple[float, float] | None = None, + movement_granularity: float = 0.1, + scale: float = 1.0, + base: float = 0.0, + sigma: tuple[float, float] | None = None, + sigma_y: float | None = None, + sigma_x: float | None = None, + angle: float | None = None, + **kwargs: Any, + ) -> npt.NDArray[np.float64]: """Create a rectangular pixelated Gaussian PSF. This is done by evaluating the PSF function from @@ -580,10 +724,9 @@ def eval_rect(self, # type: ignore more precise but also take longer to compute. scale: A scale factor to apply to the resulting PSF. base: A scalar added to the resulting PSF. - sigma: The standard deviation of the Gaussian. It may be specified here or - during the creation of the GaussianPSF object, but not both. May be a - scalar or a tuple (sigma_y, sigma_x), or None if sigma was specified at - creation time. + sigma: Standard deviations: a scalar (both axes) or a ``(sigma_y, sigma_x)`` + tuple. Must not duplicate values fixed at construction; ``None`` if both + are already set on the instance. sigma_y: An alternative way to specify sigma_y. Used primarily for letting sigma_y float during PSF fitting. sigma_x: An alternative way to specify sigma_x. Used primarily for letting @@ -597,15 +740,20 @@ def eval_rect(self, # type: ignore rect_size_y, rect_size_x = rect_size - if (rect_size_y < 0 or rect_size_x < 0 - or rect_size_y % 2 != 1 or rect_size_x % 2 != 1): + if rect_size_y < 0 or rect_size_x < 0 or rect_size_y % 2 != 1 or rect_size_x % 2 != 1: raise ValueError( - 'Rectangle must have odd positive shape in each dimension, ' - f'got {rect_size}') - - return self._eval_rect_smeared(rect_size, offset=offset, - movement=movement, - movement_granularity=movement_granularity, - scale=scale, base=base, - sigma=sigma, sigma_y=sigma_y, sigma_x=sigma_x, - angle=angle) + f'Rectangle must have odd positive shape in each dimension, got {rect_size}' + ) + + return self._eval_rect_smeared( + (rect_size_y, rect_size_x), + offset=(offset[0], offset[1]), + movement=movement, + movement_granularity=movement_granularity, + scale=scale, + base=base, + sigma=sigma, + sigma_y=sigma_y, + sigma_x=sigma_x, + angle=angle, + ) diff --git a/psfmodel/hst.py b/src/psfmodel/hst.py similarity index 90% rename from psfmodel/hst.py rename to src/psfmodel/hst.py index 7a0408a..bf55148 100755 --- a/psfmodel/hst.py +++ b/src/psfmodel/hst.py @@ -6,11 +6,10 @@ import astropy.io.fits as pyfits import numpy as np -from scipy.interpolate import RectBivariateSpline import scipy.signal as scisig +from scipy.interpolate import RectBivariateSpline -from psfmodel import PSF - +from .psf import PSF #=============================================================================== # @@ -268,9 +267,17 @@ def __init__(self, instrument, detector, filter, line=None, sample=None, jitter_x/y/z The amount of jitter (in mas) to apply to the PSF; z is the angle; None means use the default for the instrument/detector + + Additional keyword arguments are passed to :class:`PSF`: ``logger`` and + ``detailed_logging`` (see :meth:`PSF.__init__`). """ - PSF.__init__(self, movement, movement_granularity) + _logger = kwargs.pop('logger', None) + _detailed_logging = bool(kwargs.pop('detailed_logging', False)) + PSF.__init__(self, logger=_logger, detailed_logging=_detailed_logging) + + self.movement = movement if movement is not None else (0.0, 0.0) + self.movement_granularity = movement_granularity self.instrument = instrument self.detector = detector @@ -298,7 +305,8 @@ def __init__(self, instrument, detector, filter, line=None, sample=None, if self.subsample is None: self.subsample = DEFAULT_SUBSAMPLE[(self.instrument, self.detector)] - assert self.subsample % 2 == 1 # Must be odd + if self.subsample % 2 != 1: + raise ValueError(f'subsample must be odd, got {self.subsample}') if self.jitter_x is None: self.jitter_x = DEFAULT_JITTER[(self.instrument, self.detector)] if self.jitter_y is None: @@ -319,19 +327,23 @@ def __init__(self, instrument, detector, filter, line=None, sample=None, elif instrument == 'WFC3': self._init_WFC3(**kwargs) else: - print('UNKNOWN INSTRUMENT', instrument) - assert False + raise ValueError(f'Unknown HST instrument: {instrument!r}') def _init_ACS(self, **kwargs): - assert self.detector == 'HRC' + if self.detector != 'HRC': + raise ValueError(f"ACS requires detector 'HRC', got {self.detector!r}") self.do_subsample_diffusion = True def _init_WFPC2(self, **kwargs): - assert self.detector == 'PC1' + if self.detector != 'PC1': + raise ValueError(f"WFPC2 requires detector 'PC1', got {self.detector!r}") self.do_subsample_diffusion = False def _init_WFC3(self, **kwargs): - assert self.detector == 'UVIS' or self.detector == 'IR' + if self.detector not in ('UVIS', 'IR'): + raise ValueError( + f"WFC3 requires detector 'UVIS' or 'IR', got {self.detector!r}" + ) self.do_subsample_diffusion = True # See http://www.stsci.edu/hst/observatory/apertures/wfc3.html @@ -366,12 +378,10 @@ def _init_WFC3(self, **kwargs): if self.sample is None: self.sample = 256 else: - print('UNKNOWN WFC3 UVIS APERTURE', self.aperture) - assert False + raise ValueError(f'Unknown WFC3 UVIS aperture: {self.aperture!r}') else: if self.aperture != 'IRSUB256': - print('UNKNOWN WFC3 IR APERTURE', self.aperture) - assert False + raise ValueError(f'Unknown WFC3 IR aperture: {self.aperture!r}') x_offset = 1014//2-256//2 y_offset = 1014//2-256//2 if self.line is None: @@ -434,20 +444,29 @@ def run_tinytim(instrument, detector, y_ctr, x_ctr, filter, """ if instrument == 'ACS': - assert detector == 'HRC' + if detector != 'HRC': + raise ValueError(f"ACS requires detector 'HRC', got {detector!r}") elif instrument == 'WFPC2': - assert detector == 'PC1' + if detector != 'PC1': + raise ValueError(f"WFPC2 requires detector 'PC1', got {detector!r}") elif instrument == 'WFC3': - assert detector == 'UVIS' or detector == 'IR' + if detector not in ('UVIS', 'IR'): + raise ValueError( + f"WFC3 requires detector 'UVIS' or 'IR', got {detector!r}" + ) else: - assert False + raise ValueError(f'Unknown HST instrument for TinyTim: {instrument!r}') min_fov = None for fov_size, pixel_size in RETURNED_SIZES[(instrument, detector)]: if pixel_size > psf_size_pixels: min_fov = fov_size break - assert min_fov is not None + if min_fov is None: + raise ValueError( + f'No TinyTim FOV supports psf_size_pixels={psf_size_pixels} for ' + f'instrument={instrument!r}, detector={detector!r}' + ) if min_fov*.7 > fov: # Give a little slack for the distortion print('WARNING: OVERRIDING SPECIFIED FOV', fov, end=' ') @@ -471,7 +490,10 @@ def run_tinytim(instrument, detector, y_ctr, x_ctr, filter, fov, subsample_amt)) fits_filename = path_join(PSF_CACHE_DIR, fits_base) if not os.path.exists(fits_filename) or force_run_tinytim: - assert psf_size_pixels % 2 == 1 # Must be odd + if psf_size_pixels % 2 != 1: + raise ValueError( + f'psf_size_pixels must be odd for TinyTim, got {psf_size_pixels}' + ) orig_cwd = os.getcwd() os.chdir(TINY_TIM_DIR) psf_filename = 'temp_psf' + str(os.getpid()) + '_' @@ -547,8 +569,9 @@ def run_tinytim(instrument, detector, y_ctr, x_ctr, filter, os.unlink(full_psf_filename) if not os.path.exists(temp_fits_filename): - print('RUN_TINYTIM: ERROR CREATING FITS FILE', psf_filename) - assert False + raise RuntimeError( + f'TinyTim did not create FITS file (expected {temp_fits_filename!r})' + ) psf_file = pyfits.open(temp_fits_filename) psf_file.writeto(fits_filename, overwrite=True) @@ -583,8 +606,9 @@ def run_tinytim(instrument, detector, y_ctr, x_ctr, filter, psf_halfsize = psf_size_pixels//2 if (psf_halfsize > psf_size[0]//2 or psf_halfsize > psf_size[1]//2): - print('FATAL ERROR: TINYTIM returned a PSF smaller', psf_size, 'than we wanted', psf_halfsize) - assert False + raise RuntimeError( + f'TinyTim PSF shape {psf_size} is too small for half-size {psf_halfsize}' + ) psf_data = psf_data[psf_size[1]//2-psf_halfsize: psf_size[1]//2+psf_halfsize+1, psf_size[0]//2-psf_halfsize: @@ -613,8 +637,10 @@ def _cache_psf(self, min_size, **kwargs): self.cached_psf_size = psf.shape[0] self.cached_psf = psf self.cached_diffusion_matrix = diffusion_matrix - assert psf.shape[0] % 2 == 1 # Odd - assert psf.shape[1] % 2 == 1 # Odd + if psf.shape[0] % 2 != 1 or psf.shape[1] % 2 != 1: + raise RuntimeError( + f'Cached TinyTim PSF must have odd side lengths, got shape {psf.shape}' + ) self.psf_zero_offset = psf.shape[0]//2 self.cached_pixelated_psf = None @@ -717,7 +743,7 @@ def eval_point(self, point, scale=1., base=0.): base a scalar added to the resulting PSF. """ - assert False + raise NotImplementedError('HSTPSF.eval_point is not implemented') def eval_pixel(self, coord, offset=(0.,0.), scale=1., base=0., **kwargs): """Evaluate the PSF integrated over an entire integer pixel. @@ -770,8 +796,10 @@ def eval_rect(self, rect_size, offset=(0.,0.), scale=1., base=0., **kwargs): rect_size_y, rect_size_x = rect_size - assert rect_size_y % 2 == 1 # Odd - assert rect_size_x % 2 == 1 # Odd + if rect_size_y % 2 != 1 or rect_size_x % 2 != 1: + raise ValueError( + f'rect_size must have odd positive dimensions, got {(rect_size_y, rect_size_x)}' + ) half_rect_size_y = rect_size_y // 2 half_rect_size_x = rect_size_x // 2 diff --git a/src/psfmodel/psf.py b/src/psfmodel/psf.py new file mode 100755 index 0000000..e79238e --- /dev/null +++ b/src/psfmodel/psf.py @@ -0,0 +1,1320 @@ +"""Core PSF abstraction and fitting machinery for psfmodel. + +This module defines the abstract :class:`PSF` base class and all shared fitting +infrastructure: background gradient estimation, sigma-clipping, Jacobian and +covariance computation, and bounded Powell optimisation for astrometric position +recovery. Concrete PSF subclasses (e.g. :class:`~psfmodel.gaussian.GaussianPSF`) +inherit from :class:`PSF` and implement the :meth:`PSF.eval` family of methods. + +Module-level constants: + _BKGND_SIGMA_FLOOR: Numerical noise floor for sigma-clipping in + :meth:`PSF.background_gradient_fit`. Convergence is declared when the + residual standard deviation falls below this fraction of the gradient + scale, preventing spurious masking driven by floating-point noise. +""" + +import logging +from abc import ABC, abstractmethod +from typing import Any, cast + +import numpy as np +import numpy.ma as ma +import numpy.typing as npt +import scipy.linalg as linalg +import scipy.optimize as sciopt + +# Version +try: + from ._version import __version__ +except ImportError: # pragma: no cover + __version__ = 'Version unspecified' + +# Unbounded ends for the additive PSF ``base`` in :meth:`PSF._find_position` when +# ``allow_nonzero_base`` is True and ``use_angular_params`` is False (Powell box +# constraints). +_FIT_PSF_BASE_BOUND_MIN = float('-inf') +_FIT_PSF_BASE_BOUND_MAX = float('inf') + +# Relative noise floor for background_gradient_fit sigma-clipping: sigma values +# below sqrt(eps) * gradient_scale are floating-point noise, not a real residual +# distribution width, and should not be used to mask pixels. +_BKGND_SIGMA_FLOOR = float(np.sqrt(np.finfo(np.float64).eps)) + +# Finite-difference step sizes used by :meth:`PSF._find_position` when computing +# the Jacobian of the residual vector for covariance estimation. The step for +# parameter ``p`` is ``max(|p| * _JACO_REL_EPS, _JACO_ABS_EPS)`` so the step is +# proportional to the parameter magnitude but never smaller than the absolute floor. +_JACO_REL_EPS: float = 1e-5 +_JACO_ABS_EPS: float = 1e-7 + + +class PSF(ABC): + """Abstract base for 2-D point-spread models used in fitting and rendering. + + Subclass :class:`PSF` to provide a concrete model. The base class supplies shared + utilities (for example :meth:`find_position`, background helpers, and motion smear + via :meth:`_eval_rect_smeared`); evaluation itself is defined by subclasses through + the abstract API below. + + **Abstract methods (must implement)** + + * :meth:`eval_point` -- Evaluate the continuous PSF at fractional ``(y, x)`` + (scalar or broadcast arrays). Signature includes keyword-only ``scale`` and + ``base``. Returns a float or a :class:`numpy.ndarray` of floats matching the + broadcast shape of ``coord``. Origin ``(0, 0)`` is the PSF center; coordinates + may be negative. Subclasses may add keyword-only parameters. + + * :meth:`eval_rect` -- Build a rectangular, pixel-integrated patch. Signature: + ``rect_size``, ``offset``, then keyword-only ``movement``, + ``movement_granularity``, ``scale``, ``base``, and subclass-specific + ``**kwargs``. Must return :class:`numpy.ndarray` with ``dtype`` ``float64`` and + shape ``(height, width)`` matching ``rect_size`` as ``(size_y, size_x)``. Should + validate inputs (for example odd ``rect_size``) and raise :exc:`ValueError` for + invalid arguments with a clear message. + + * :meth:`_eval_rect` -- Internal hook for the same patch without the checks in + :meth:`eval_rect`; same core keyword-only ``scale`` and ``base``. Must return a + ``float64`` array of shape ``(height, width)``. Subclasses often add + keyword-only model parameters. Callers must pass consistent arguments; + implementations may omit validation. + + **Protected attributes** + + * ``_logger`` -- :class:`logging.Logger` for this instance (set in + :meth:`__init__`). Subclasses log warnings and diagnostics through it. + + * ``_additional_params`` -- :class:`list` (initialized empty), each entry a + ``(lower_bound, upper_bound, name)`` tuple of two floats and a :class:`str` + keyword name. Used by :meth:`find_position` / :meth:`_find_position` to append + extra optimized parameters (bounds and :meth:`eval_rect` keyword). Subclasses + append one tuple per fittable quantity in construction order; leave the list + empty if there are no extra parameters (for example fixed-width models). + + **Errors and return conventions** + + Public evaluators should reject bad arguments with :exc:`ValueError` (or + :exc:`TypeError` for wrong types) where feasible. Some higher-level routines + (notably :meth:`find_position`) signal failure by returning ``None`` instead of + raising. Numeric outputs are real floating point; ``scale`` multiplies the model + amplitude and ``base`` adds a constant offset in the same units as the evaluated + PSF values unless a subclass documents physical units. + """ + + def __init__( + self, + *, + logger: logging.Logger | None = None, + detailed_logging: bool = False, + **kwargs: Any, + ) -> None: + """Create a PSF object. Only called by subclasses. + + Parameters: + logger: Logger for diagnostic messages from this instance. If omitted, + uses :func:`logging.getLogger` with this module's ``__name__``. + detailed_logging: If True, emit INFO and DEBUG messages during fitting + (for example from :meth:`find_position`). Optimizer failure still logs + at WARNING when this is False. + **kwargs: Reserved for subclass forward-compatibility; unknown names are + ignored. + """ + + self._logger = logger if logger is not None else logging.getLogger(__name__) + self.detailed_logging = detailed_logging + self._additional_params: list[Any] = [] + + @abstractmethod + def eval_point( + self, + coord: ( + tuple[float | npt.NDArray[np.floating], float | npt.NDArray[np.floating]] + | npt.NDArray[np.floating] + ), + *, + scale: float = 1.0, + base: float = 0.0, + ) -> float | npt.NDArray[np.floating]: + """Evaluate the PSF at a single, fractional, point. + + (0, 0) is the center of the PSF and x and y may be negative. + + Parameters: + coord: The coordinate (y, x) at which to evaluate the PSF. + scale: A scale factor to apply to the resulting PSF. + base: A scalar added to the resulting PSF. + + Other parameters may be available for specific subclasses. + + Returns: + The PSF value at the given coordinate. + """ + ... # pragma: no cover + + # @abstractmethod + # def eval_pixel(self, + # coord: list[int] | tuple[int, int], + # offset: list[float] | tuple[float, float] = (0., 0.), + # *, + # scale: float = 1., + # base: float = 0., + # **kwargs: Any) -> float: + # """Evaluate the PSF integrated over an entire integer pixel. + + # The returned array has the PSF offset from the center by (offset_y,offset_x). An + # offset of (0, 0) places the PSF in the upper left corner of the center pixel + # while + # an offset of (0.5, 0.5) places the PSF in the center of the center pixel. The + # offset should be limited to the range [0, 1). + + # Parameters: + # coord: The integer coordinate (y, x) at which to evaluate the PSF. + # offset: The amount (offset_y, offset_x) to offset the center of the PSF. + # scale: A scale factor to apply to the resulting PSF. + # base: A scalar added to the resulting PSF. + + # Other inputs may be available for specific subclasses. + # """ + # ... + + @abstractmethod + def eval_rect( + self, + rect_size: list[int] | tuple[int, int], + offset: list[float] | tuple[float, float] = (0.5, 0.5), + *, + movement: tuple[float, float] | None = None, + movement_granularity: float = 0.1, + scale: float = 1.0, + base: float = 0.0, + **kwargs: Any, + ) -> npt.NDArray[np.float64]: + """Create a rectangular pixelated PSF. + + This is done by evaluating the PSF function from + [-rect_size_y//2:rect_size_y//2] to [-rect_size_x//2:rect_size_x//2]. + + The returned array has the PSF offset from the center by + (offset_y, offset_x). An offset of (0, 0) places the PSF in the upper + left corner of the center pixel while an offset of (0.5, 0.5) + places the PSF in the center of the center pixel. The angle is applied + relative to this new origin, so as angle changes the center of the + ellipse does not move. + + Parameters: + rect_size: The size of the rectangle (rect_size_y, rect_size_x) of the + returned PSF. Both dimensions must be odd. + offset: The amount (offset_y, offset_x) to offset the center of the PSF. + movement: The amount of motion blur in the (Y, X) direction. Must be a tuple + of scalars. None means no movement. + movement_granularity: The number of pixels to step for each smear while doing + motion blur. A smaller granularity means that the resulting PSF will be + more precise but also take longer to compute. + scale: A scale factor to apply to the resulting PSF. + base: A scalar added to the resulting PSF. + + Other inputs may be available for specific subclasses. + + Returns: + The integral of the 2-D PSF over each full pixel in the rectangle. + """ + ... # pragma: no cover + + @abstractmethod + def _eval_rect( + self, + rect_size: tuple[int, int], + offset: tuple[float, float] = (0.5, 0.5), + *, + scale: float = 1.0, + base: float = 0.0, + **kwargs: Any, + ) -> npt.NDArray[np.float64]: + """Pixel-integrated rectangular PSF; internal counterpart to :meth:`eval_rect`. + + Used by :meth:`_eval_rect_smeared` and subclass implementations. Unlike + :meth:`eval_rect`, this hook performs no input validation, bounds checking, or + clipping; callers must supply consistent arguments. + + Parameters: + rect_size: ``(height, width)`` in pixels, i.e. ``(size_y, size_x)`` (row and + column counts). This matches the shape of the returned array. + offset: ``(offset_y, offset_x)`` subpixel shift of the PSF reference in + fractional pixel coordinates. Default ``(0.5, 0.5)`` centers the model in + the middle pixel; ``(0.0, 0.0)`` uses the top-left corner of that pixel + as the reference (same convention as :meth:`eval_rect`). + scale: Multiplier applied to the PSF amplitude before ``base`` is added. + base: Additive baseline added to every output pixel after ``scale``. + + Returns: + A :class:`numpy.ndarray` of dtype ``float64`` with shape ``(height, width)`` + containing the rectangular, pixel-sampled PSF. + + Note: + Concrete subclasses may add keyword-only parameters for model-specific + quantities (for example width or angle on a Gaussian). + """ + ... # pragma: no cover + + def _eval_rect_smeared( + self, + rect_size: tuple[int, int], + offset: tuple[float, float] = (0.5, 0.5), + *, + movement: tuple[float, float] | None = None, + movement_granularity: float = 0.1, + scale: float = 1.0, + base: float = 0.0, + **kwargs: Any, + ) -> npt.NDArray[np.float64]: + """Evaluate and sum a PSF multiple times to simulate motion blur. + + Parameters: + rect_size: The size of the rectangle (rect_size_y, rect_size_x) of the + returned PSF. Both dimensions must be odd. + offset: The amount (offset_y, offset_x) to offset the center of the PSF. A + positive offset effectively moves the PSF down and to the left in image + coordinates. + movement: The total amount (my, mx) the PSF moves. The movement is assumed to + be centered on the given offset and exists half on either side. + movement_granularity: The number of pixels to step for each smear while doing + motion blur. + scale: A scale factor to apply to the resulting PSF. + base: A scalar added to the resulting PSF. + + Other inputs may be available for specific subclasses. + """ + + if movement is None or (movement[0] == 0 and movement[1] == 0): + return self._eval_rect(rect_size, offset=offset, scale=scale, base=base, **kwargs) + + num_steps = int( + max(abs(movement[0]) / movement_granularity, abs(movement[1]) / movement_granularity) + ) + + if num_steps == 0: + step_y = 0.0 + step_x = 0.0 + else: + step_y = movement[0] / num_steps + step_x = movement[1] / num_steps + + total_rect = None + + for step in range(num_steps + 1): + y = offset[0] + step_y * (step - num_steps / 2.0) + x = offset[1] + step_x * (step - num_steps / 2.0) + + rect = self._eval_rect(rect_size, offset=(y, x), scale=scale, base=base, **kwargs) + if total_rect is None: + total_rect = rect + else: + total_rect += rect + if total_rect is None: + raise RuntimeError('Motion smear loop produced no PSF rectangles') + + total_rect /= float(num_steps + 1) + + return total_rect + + # ========================================================================== + # + # Static functions for creating background gradients + # + # ========================================================================== + + @staticmethod + def _background_gradient_coeffs(shape: tuple[int, int], order: int) -> npt.NDArray[np.float64]: + """Internal routine for creating the coefficient matrix. + + Fundamentally this creates a coefficient matrix indicating the powers of different + orders of polynomials. For example, an order 1 polynomial (Ax + B) when performed + in two dimensions becomes (Ax + By + C), which has three free parameters. An order + 2 polynomial (Ax^2 + Bx + C) in two dimensions becomes (Ax^2 + By^2 + Cxy + Dx + + Ey + F), which has six free parameters. The number of free parameters is (order * + (order+1)) / 2. + + To make further computation easy, this coefficient matrix is then multiplied with + a 2-D array that represents the X and Y coordinates, ranging from -N to N such + that the values are (0, 0) at the center of the image. This is the matrix that is + returned to the caller. + + The resulting 3-D matrix has the indicies: + 0: Y + 1: X + 2: parameter number + """ + + if shape[0] < 0 or shape[1] < 0 or shape[0] % 2 != 1 or shape[1] % 2 != 1: + raise ValueError(f'Image must have odd positive shape in each dimension, got {shape}') + if order < 0: + raise ValueError(f'Order must be non-negative, got {order}') + + # Create arrays of indexes for line and sample with (0, 0) at the center of the + # image + y_values = np.arange(shape[0])[:, np.newaxis] - int(shape[0] / 2) + x_values = np.arange(shape[1])[np.newaxis, :] - int(shape[1] / 2) + + y_powers: list[float | npt.NDArray[np.floating]] = [1.0] + x_powers: list[float | npt.NDArray[np.floating]] = [1.0] + + nparams = int((order + 1) * (order + 2) / 2) + a3d = np.empty((shape[0], shape[1], nparams)) + a3d[:, :, 0] = 1.0 # This is the constant term of the polynomial + + k = 0 # Parameter number + for p in range(1, order + 1): + # This creates, sequentially, L, L**2, L**3... and S, S**2, S**3... + y_powers.append(y_powers[-1] * y_values) + x_powers.append(x_powers[-1] * x_values) + + # These nested loops walk through all the combinations of L**N * S**M + # such that N+M == P where P ranges from 1 to . This gives us + # all combinations like: + # 1 + # Y + # X + # X*Y + # Y**2 + # X**2 + for q in range(p + 1): + k += 1 + a3d[:, :, k] = y_powers[q] * x_powers[p - q] + + return a3d + + @staticmethod + def background_gradient_fit( + image: npt.NDArray[np.floating], + order: int = 2, + ignore_center: int | tuple[int, int] | None = None, + num_sigma: float | None = None, + debug: bool = False, + *, + logger: logging.Logger | None = None, + ) -> tuple[npt.NDArray[np.float64] | None, npt.NDArray[np.bool_] | None]: + """Return the polynomial fit to the pixels of an image. + + Parameters: + image: 2D array to fit; must have odd shape in each dimension. + order: Order of the polynomial. + ignore_center: A scalar or tuple (ignore_y, ignore_x) giving the number of + pixels on either side of the center to ignore while fitting. 0 means + ignore the center pixel. None means don't ignore anything. + num_sigma: Outlier rejection uses the fit residual ``image - gradient``: + unmasked pixels with absolute residual at least ``num_sigma`` times the + standard deviation of that residual (mask-aware) are masked and the fit + is repeated until convergence or until sigma falls below the numerical + noise floor. None disables this. Non-positive values disable masking + after the initial least-squares fit. + debug: Set to debug bad pixel removal. + logger: Logger for debug messages; defaults to this module's logger. + + Returns: + A tuple of the background coefficient array and the mask of ignored pixels. + """ + + fit_logger = logger if logger is not None else logging.getLogger(__name__) + + if len(image.shape) != 2: + raise ValueError(f'Image must be 2-D, got {image.shape}') + if ( + image.shape[0] < 0 + or image.shape[1] < 0 + or image.shape[0] % 2 != 1 + or image.shape[1] % 2 != 1 + ): + raise ValueError( + f'Image must have odd positive shape in each dimension, got {image.shape}' + ) + if order < 0: + raise ValueError(f'Order must be non-negative, got {order}') + + shape = cast(tuple[int, int], image.shape) + + is_masked = False + + if ignore_center is not None or num_sigma is not None: + if isinstance(image, ma.MaskedArray): + # We're going to change the mask so make a copy first + image = image.copy() + else: + image = image.view(ma.MaskedArray) + + if isinstance(image, ma.MaskedArray): + image.mask = cast(npt.NDArray[np.bool_], ma.getmaskarray(image)) + is_masked = True + + if ignore_center is not None: + if isinstance(ignore_center, int): + ignore_y = ignore_center + ignore_x = ignore_center + else: + ignore_y, ignore_x = ignore_center + if ignore_y * 2 + 1 >= shape[0] or ignore_x * 2 + 1 >= shape[1]: + if debug: # pragma: no cover + fit_logger.debug('Background fit: ignore_center covers entire image') + return None, None + ctr_y = shape[0] // 2 + ctr_x = shape[1] // 2 + image[ + ctr_y - ignore_y : ctr_y + ignore_y + 1, ctr_x - ignore_x : ctr_x + ignore_x + 1 + ] = ma.masked + + nparams = int((order + 1) * (order + 2) // 2) + + a3d = PSF._background_gradient_coeffs(shape, order) + + num_bad_pixels = 0 + if num_sigma is not None: + num_bad_pixels = cast(int, ma.count_masked(image)) # type: ignore[no-untyped-call] + if debug: # pragma: no cover + fit_logger.debug( + 'Background gradient fit: initial masked pixel count %s', num_bad_pixels + ) + + while True: + # Reshape properly for linalg.lstsq + a2d = a3d.reshape((image.size, nparams)) + b1d = image.flatten() + + if is_masked: + # linalg doesn't support masked arrays! + a2d = a2d[~cast(npt.NDArray[np.bool_], ma.getmaskarray(b1d))] + b1d = ma.compressed(b1d) + + if a2d.shape[0] < a2d.shape[1]: # Underconstrained + if debug: # pragma: no cover + fit_logger.debug( + 'Background gradient fit: underconstrained system %s', a2d.shape + ) + return None, None + + coeffts, _, _, _ = cast( + tuple[ + npt.NDArray[np.float64], + npt.NDArray[np.float64], + int, + npt.NDArray[np.float64] | None, + ], + linalg.lstsq(a2d, b1d), + ) + + if num_sigma is None: + break + num_sigma_f = float(num_sigma) + if num_sigma_f <= 0: + break + + gradient = PSF.background_gradient(shape, coeffts) + delta_img = image - gradient + sigma = ma.std(delta_img) + if ma.is_masked(sigma): + break + sigma_f = float(sigma) + if not np.isfinite(sigma_f) or sigma_f <= 0: + break + # Break when sigma is at the floating-point noise level (i.e., the + # fit is already exact up to machine precision); using such a sigma + # as a threshold would mask pixels based on numerical noise, not + # real outliers. + gradient_scale = float(np.max(np.abs(gradient))) + if gradient_scale > 0 and sigma_f <= _BKGND_SIGMA_FLOOR * gradient_scale: + break + threshold = num_sigma_f * sigma_f + if debug: # pragma: no cover + fit_logger.debug( + 'Background gradient fit: residual std=%s max_abs=%s threshold=%s', + sigma_f, + float(ma.max(ma.abs(delta_img))), + threshold, + ) + outlier_mask = ma.filled(ma.abs(delta_img) >= threshold, False) + image[outlier_mask] = ma.masked + + new_num_bad_pixels = cast(int, ma.count_masked(image)) # type: ignore[no-untyped-call] + if debug: # pragma: no cover + fit_logger.debug( + 'Background gradient fit: masked pixel count now %s', new_num_bad_pixels + ) + if new_num_bad_pixels == num_bad_pixels: + break + num_bad_pixels = new_num_bad_pixels + + if is_masked: + return coeffts, ma.getmaskarray(image) + else: + return coeffts, np.zeros(shape, dtype=np.bool_) + + @staticmethod + def background_gradient( + rect_size: tuple[int, int], bkgnd_params: npt.ArrayLike + ) -> npt.NDArray[np.float64]: + """Create a background gradient. + + Parameters: + rect_size: ``(size_y, size_x)``, the shape of the output grid (height, width) + in pixels; must match the image shape used when the coefficients were fit. + bkgnd_params: Coefficients of the background polynomial (1-D array-like). The + polynomial order is inferred from the number of elements. + + Returns: + A :class:`numpy.ndarray` of ``dtype`` ``float64`` with shape ``rect_size`` + (i.e. ``(size_y, size_x)``): the evaluated 2-D background polynomial at each + pixel center of the grid. + """ + + bkgnd_params = np.array(bkgnd_params) + + order = int(np.sqrt(len(bkgnd_params) * 2)) - 1 + + a3d = PSF._background_gradient_coeffs(rect_size, order) + result = np.sum(bkgnd_params * a3d, axis=-1) + + return cast(npt.NDArray[np.float64], result) + + # ========================================================================== + # + # Functions for finding astrometric positions + # + # ========================================================================== + + def find_position( + self, + image: npt.NDArray[np.floating], + box_size: tuple[int, int], + starting_point: tuple[float, float], + *, + search_limit: float | tuple[float, float] = (1.5, 1.5), + bkgnd_degree: int | None = 2, + bkgnd_ignore_center: tuple[int, int] = (2, 2), + bkgnd_num_sigma: float | None = None, + tolerance: float = 1e-6, + num_sigma: float | None = None, + max_bad_frac: float = 0.2, + allow_nonzero_base: bool = False, + scale_limit: float = 1000.0, + use_angular_params: bool = True, + compute_uncertainty: bool = True, + ) -> None | tuple[float, float, dict[str, Any]]: + """Find the (y, x) coordinates that best fit a 2-D PSF to an image. + + Parameters: + image: The image (2-D). + box_size: A tuple (box_y, box_x) indicating the size of the PSF to use. This + governs both the size of the PSF created at each step as well as the size + of the subimage looked at. Both box_y and box_x must be odd. + starting_point: A tuple (y, x) indicating the best guess for where the object + can be found. Searching is limited to a region around this point + controlled by `search_limit`. + search_limit: A scalar or tuple (y_limit, x_limit) specifying the maximum + distance to search from `starting_point`. If a scalar, both x_limit + and y_limit are the same. + bkgnd_degree: The degree (order) of the background gradient polynomial. None + means no background gradient is fit. + bkgnd_ignore_center: A tuple (ny, nx) giving the number of pixels on each side + of the center point to ignore when fitting the background. The ignored + region is thus ny*2+1 by nx*2+1. + bkgnd_num_sigma: The number of sigma a pixel needs to be beyond the background + gradient to be ignored. None means don't ignore bad pixels while computing + the background gradient. + tolerance: The tolerance (both X and Function) in the Powell optimization + algorithm. + num_sigma: The number of sigma for a pixel to be considered bad during PSF + fitting. None means don't ignore bad pixels while fitting the PSF. + max_bad_frac: The maximum allowable number of pixels masked during PSF + fitting. If more pixels than this fraction are masked, the position fit + fails. + allow_nonzero_base: If True, allow the base of the PSF (constant bias) to + vary. Otherwise the base of the PSF is always at zero and can only scale + in the positive direction. + scale_limit: The maximum PSF scale allowed. + use_angular_params: Use angles to optimize parameter values. + compute_uncertainty: If True (default), compute 1-sigma parameter + uncertainties via a finite-difference Jacobian after the fit. This + requires one additional forward-model evaluation per free parameter, + roughly doubling total cost for a typical 3-parameter fit. Set to + False in hot loops (e.g. batch characterization runs) when the + ``x_err``, ``y_err``, ``scale_err``, and ``base_err`` metadata + entries are not needed; they will be ``NaN`` when skipped. + + Returns: + None if no fit found. + + Otherwise returns pos_y, pos_x, metadata. Metadata is a dictionary + containing:: + + 'x' Full-image X coordinate of fitted position + (same as pos_x). + 'x_err' 1-sigma uncertainty in X (pixels). + 'y' Full-image Y coordinate of fitted position + (same as pos_y). + 'y_err' 1-sigma uncertainty in Y (pixels). + '_local_x' Subimage-relative X offset (offset from the + center of the cropped subimage). + '_local_y' Subimage-relative Y offset. + 'scale' The best fit PSF scale. + 'scale_err' 1-sigma uncertainty in PSF scale. + 'base' The best fit PSF base. + 'base_err' 1-sigma uncertainty in PSF base; 0.0 + when allow_nonzero_base is False. + 'residual_rss' Sum of squared residuals over unmasked + pixels. + 'reduced_chi2' residual_rss divided by degrees of + freedom (n_valid - n_params); near 1.0 + for a noise-limited fit. + 'noise_rms' Per-pixel noise estimate from residuals: + sqrt(rss / n_valid). + 'peak_snr' Amplitude signal-to-noise ratio: + scale / noise_rms. + 'subimg' The box_size area of the original image + surrounding starting_point masked as + necessary using the num_sigma threshold. + 'bkgnd_params' The tuple of parameters defining the + background gradient. + 'bkgnd_mask' The mask used during background gradient + fitting. + 'gradient' The box_size background gradient. + 'subimg-gradient' The subimg with the background gradient + subtracted. + 'psf' The PSF model from eval_rect with the + fitted scale and base (same array as + scaled_psf). + 'scaled_psf' Same as psf; model to compare to + subimg-gradient during outlier rejection. + + In addition, metadata includes two entries for each "additional + parameter" used during optimization: one for the value and one for + the 1-sigma uncertainty (``'param'`` and ``'param_err'``). + + Raises: + ValueError: If ``box_size`` is not a tuple of odd positive integers. + TypeError: If ``num_sigma`` is not a number or None. + ValueError: If ``num_sigma`` is not greater than 0. + ValueError: If the starting point is too close to the edge of the image. + ValueError: If the subimage has too many pixels masked. + """ + + if box_size[0] < 0 or box_size[1] < 0 or box_size[0] % 2 != 1 or box_size[1] % 2 != 1: + raise ValueError( + f'box_size must have odd positive shape in each dimension, got {box_size}' + ) + + if num_sigma is not None: + if not isinstance(num_sigma, (int, float)): + raise TypeError( + f'num_sigma must be a number or None, got {type(num_sigma).__name__}' + ) + if num_sigma <= 0: + raise ValueError(f'num_sigma must be > 0, got {num_sigma}') + + half_box_size_y = box_size[0] // 2 + half_box_size_x = box_size[1] // 2 + + starting_pix = (int(starting_point[0]), int(starting_point[1])) + + if self.detailed_logging: + self._logger.info('find_position: entering') + self._logger.info( + 'find_position: image masked=%s num_masked=%s', + isinstance(image, ma.MaskedArray), + int(np.sum(ma.getmaskarray(image))), + ) + self._logger.info( + 'find_position: image min=%s max=%s mean=%s', + float(np.min(image)), + float(np.max(image)), + float(np.mean(image)), + ) + self._logger.info( + 'find_position: box_size=%s starting_point=%s search_limit=%s', + box_size, + starting_point, + search_limit, + ) + self._logger.info( + 'find_position: bkgnd_degree=%s bkgnd_ignore_center=%s ' + 'bkgnd_num_sigma=%s tolerance=%s num_sigma=%s max_bad_frac=%s ' + 'allow_nonzero_base=%s scale_limit=%s use_angular_params=%s ' + 'compute_uncertainty=%s', + bkgnd_degree, + bkgnd_ignore_center, + bkgnd_num_sigma, + tolerance, + num_sigma, + max_bad_frac, + allow_nonzero_base, + scale_limit, + use_angular_params, + compute_uncertainty, + ) + + # Too close to the edge means we can't search + if ( + starting_pix[0] - half_box_size_y < 0 + or starting_pix[0] + half_box_size_y >= image.shape[0] + or starting_pix[1] - half_box_size_x < 0 + or starting_pix[1] + half_box_size_x >= image.shape[1] + ): + if self.detailed_logging: + self._logger.info('find_position: too close to image edge, aborting') + return None + + sub_img = image[ + starting_pix[0] - half_box_size_y : starting_pix[0] + half_box_size_y + 1, + starting_pix[1] - half_box_size_x : starting_pix[1] + half_box_size_x + 1, + ] + + if self.detailed_logging: + self._logger.info( + 'find_position: subimage min=%s max=%s mean=%s', + float(np.min(sub_img)), + float(np.max(sub_img)), + float(np.mean(sub_img)), + ) + + if not isinstance(search_limit, (list, tuple)): + search_limit = (float(search_limit), float(search_limit)) + + if num_sigma is not None: + if isinstance(sub_img, ma.MaskedArray): + # We're going to change the mask so make a copy first + sub_img = sub_img.copy() + else: + sub_img = sub_img.view(ma.MaskedArray) + + num_bad_pixels = 0 + + while True: + if self.detailed_logging: + self._logger.debug('find_position: outer loop bad_pixel_count=%s', num_bad_pixels) + ret = self._find_position( + sub_img, + search_limit, + scale_limit, + bkgnd_degree, + bkgnd_ignore_center, + bkgnd_num_sigma, + tolerance, + allow_nonzero_base, + use_angular_params, + compute_uncertainty, + ) + if ret is None: + if self.detailed_logging: + self._logger.info('find_position: inner fit returned None') + return None + + res_y, res_x, details = ret + + if num_sigma is None: + break + + resid = details['subimg-gradient'] - details['scaled_psf'] + resid_std = np.std(resid) + + if self.detailed_logging: + self._logger.debug('find_position: residual per pixel=%s', resid) + self._logger.debug('find_position: resid_std=%s', resid_std) + + if num_sigma is not None: + sub_img[np.where(np.abs(resid) > num_sigma * resid_std)] = ma.masked + + new_num_bad_pixels = cast(int, ma.count_masked(sub_img)) # type: ignore[no-untyped-call] + if new_num_bad_pixels == num_bad_pixels: + break + if new_num_bad_pixels == sub_img.size: + if self.detailed_logging: + self._logger.info('find_position: all pixels masked, returning None') + return None # All masked + if new_num_bad_pixels > max_bad_frac * sub_img.size: + if self.detailed_logging: + self._logger.info('find_position: too many pixels masked, returning None') + return None # Too many masked + num_bad_pixels = new_num_bad_pixels + + # Promote subimage-relative offsets to full-image coordinates so that + # details['x']/details['y'] match the returned pos_x/pos_y values, as + # documented. Preserve the local offsets for any internal diagnostics. + details['_local_y'] = details['y'] + details['_local_x'] = details['x'] + details['y'] = res_y + starting_pix[0] + details['x'] = res_x + starting_pix[1] + + if self.detailed_logging: + msg = f'find_position returning Y {details["y"]:.4f}' + msg += f' +/- {details["y_err"]:.4f}' + msg += f' X {details["x"]:.4f}' + msg += f' +/- {details["x_err"]:.4f}' + if details['scale'] is not None: + msg += f' Scale {details["scale"]:.4f} Base {details["base"]:.4f}' + if 'sigma_y' in details: + msg += f' SY {details["sigma_y"]:.4f} SX {details["sigma_x"]:.4f}' + self._logger.info(msg) + + return details['y'], details['x'], details + + def _fit_psf_func( + self, + params: tuple[float, ...], + sub_img: npt.NDArray[np.floating], + search_limit: tuple[float, float], + scale_limit: float, + allow_nonzero_base: bool, + use_angular_params: bool, + *additional_params: Any, + ) -> float: + """Scalar objective for PSF fitting; minimized in :meth:`_find_position`. + + Evaluates :meth:`eval_rect` at the candidate parameters, subtracts the model from + ``sub_img``, and returns the Euclidean norm of the flattened residual (root sum + of squared differences). + + Parameters: + params: Optimizer vector: offset(s), scale, optional ``base`` (if + ``allow_nonzero_base``), then one value per extra PSF parameter. Meaning + depends on ``use_angular_params`` (angles vs direct values); see the + implementation. + sub_img: 2-D patch (same shape as the PSF grid); background should already be + subtracted by the caller when applicable. + search_limit: ``(limit_y, limit_x)`` centroid search half-ranges in pixels, + used when mapping ``params`` to offsets (see ``use_angular_params``). + scale_limit: Upper bound on PSF ``scale`` for :meth:`eval_rect`. + allow_nonzero_base: If ``True``, ``params`` includes a fitted constant + ``base`` passed to :meth:`eval_rect`; if ``False``, ``base`` is zero. + use_angular_params: If ``True``, map bounded angles to offsets, scale, and + extra PSF parameters; ``base`` (when ``allow_nonzero_base`` is ``True``) + always uses direct physical bounds regardless of this flag, because its + physical range is unbounded and cannot be cosine-mapped. If ``False``, + all ``params`` are physical values within their respective bounds. + additional_params: Zero or more ``(lo, hi, name)`` tuples giving bounds and + keyword names for subclass-specific :meth:`eval_rect` arguments. + + Returns: + Non-negative float cost (lower is better). + """ + + # Make an offset of "0" be the center of the pixel (0.5, 0.5) + if use_angular_params: + # params are (ang_y, ang_x, ang_scale, ...) + offset_y = search_limit[0] * np.cos(params[0]) + 0.5 + offset_x = search_limit[1] * np.cos(params[1]) + 0.5 + scale = scale_limit * (np.cos(params[2]) + 1) / 2 + else: + # params are (y, x, scale, ...) + offset_y = params[0] + 0.5 + offset_x = params[1] + 0.5 + scale = params[2] + # This was only needed when using an optimization func that doesn't support + # bounds. + # fake_resid = None + # if not (-search_limit[0] <= params[0] <= search_limit[0]): + # fake_resid = abs(params[0]) * 1e10 + # elif not (-search_limit[1] <= params[1] <= search_limit[1]): + # fake_resid = abs(params[1]) * 1e10 + # elif not (0.00001 <= scale <= scale_limit): + # fake_resid = abs(scale) * 1e10 + # if fake_resid is not None: + # fake_return = np.zeros(sub_img.shape).flatten() + # fake_return[:] = fake_resid + # if self.detailed_logging: + # full_resid = np.sqrt(np.sum(fake_return**2)) + # print('RESID', full_resid) + # return fake_return + + base = 0.0 + param_end = 3 + if allow_nonzero_base: + # Direct physical decode regardless of use_angular_params: base uses + # physical optimizer bounds in both modes (not [0, pi]), so params[3] + # is a physical baseline value and needs no cosine remapping. + base = params[3] + param_end = 4 + + addl_vals_dict = {} + for i, ap in enumerate(additional_params): + if use_angular_params: + val = (ap[1] - ap[0]) / 2.0 * (np.cos(params[param_end + i]) + 1.0) + ap[0] + else: + val = params[param_end + i] + addl_vals_dict[ap[2]] = val + + psf = self.eval_rect( + cast(tuple[int, int], sub_img.shape), + (offset_y, offset_x), + scale=scale, + base=base, + **addl_vals_dict, + ) + + resid = (sub_img - psf).flatten() + + full_resid = cast(float, np.sqrt(np.sum(resid**2))) + + if self.detailed_logging: + msg = f'OFFY {offset_y:8.5f} OFFX {offset_x:8.5f} SCALE {scale:9.5f} ' + msg += f'BASE {base:9.5f}' + for ap in additional_params: + msg += f' {ap[2].upper()} {addl_vals_dict[ap[2]]:8.5f}' + msg += f' RESID {full_resid:f}' + self._logger.debug(msg) + + return full_resid + + def _find_position( + self, + sub_img: npt.NDArray[np.floating], + search_limit: tuple[float, float], + scale_limit: float, + bkgnd_degree: int | None, + bkgnd_ignore_center: tuple[int, int], + bkgnd_num_sigma: float | None, + tolerance: float, + allow_nonzero_base: bool, + use_angular_params: bool, + compute_uncertainty: bool, + ) -> None | tuple[float, float, dict[str, Any]]: + """Fit PSF position and shape on a fixed subimage via bounded Powell optimization. + + This is the inner numerical core for :meth:`find_position`: it subtracts an + optional polynomial background, then runs :func:`scipy.optimize.minimize` (Powell) + on the scalar objective from :meth:`_fit_psf_func` (root-sum-square residual + between data and model). + + Parameters: + sub_img: Cropped 2-D image (float), same shape as the PSF evaluation patch. + May be a :class:`numpy.ma.MaskedArray`; the background fit omits masked + pixels, and masked entries do not contribute to the scalar objective in + :meth:`_fit_psf_func` (masked squared residuals are excluded from the + sum). + search_limit: ``(limit_y, limit_x)`` maximum search half-range for subpixel + offsets, in **pixels**, relative to the subimage. With + ``use_angular_params`` True, offsets map from bounded angles via cosine + (see implementation); with False, ``offset_*`` are bounded directly by + these limits. + scale_limit: Upper bound on PSF ``scale`` passed to :meth:`eval_rect` (same + units as that method). The lower bound is a small positive value enforced + inside :meth:`_fit_psf_func`. + bkgnd_degree: If ``None``, no background is fit and ``gradient`` is all zeros. + If an int, polynomial order for :meth:`background_gradient_fit` on + ``sub_img`` before PSF optimization. + bkgnd_ignore_center: ``(ny, nx)`` passed to :meth:`background_gradient_fit` as + ``ignore_center``: a centered block of size ``(2*ny+1, 2*nx+1)`` is masked + out of the background fit. Ignored when ``bkgnd_degree`` is ``None``. + bkgnd_num_sigma: Optional outlier rejection for the background fit (sigma + threshold); ``None`` disables. Only used when ``bkgnd_degree`` is not + ``None``. + tolerance: Passed to :func:`scipy.optimize.minimize` as ``tol`` (Powell + stopping tolerance for both parameter and objective changes, per SciPy). + allow_nonzero_base: If ``True``, the PSF constant ``base`` in + :meth:`eval_rect` is a free parameter; if ``False``, ``base`` is fixed at + zero and only amplitude scaling applies. + use_angular_params: If ``True``, optimize offsets, scale, optional base, and + additional parameters via angles in ``[0, pi]`` so box constraints map to + physical ranges. If ``False``, use direct bounded parameters (offsets + within ``search_limit``, etc.). + compute_uncertainty: If ``True``, compute 1-sigma uncertainties via a + finite-difference Jacobian (one extra forward-model call per free + parameter). If ``False``, all ``*_err`` entries in ``details`` are + ``NaN`` and the Jacobian is skipped. + + Returns: + ``None`` if the background fit fails (:meth:`background_gradient_fit` returns + ``None``) or if the optimizer reports failure (``success`` is False). + + Otherwise ``(offset_y, offset_x, details)``: subpixel offsets within the + subimage in pixel units (**y first, then x**), matching ``details['y']`` and + ``details['x']``. The caller adds integer slice origins to map to full-image + coordinates. + + ``details`` is a :class:`dict` that always includes at least: + + - ``'y'``, ``'x'``: fitted offsets (float). + - ``'scale'``, ``'base'``: fitted PSF scale and baseline. + - ``'subimg'``: reference to the input ``sub_img``. + - ``'bkgnd_params'``: 1-D coefficient array from the background fit, or + ``None`` if ``bkgnd_degree`` was ``None``. + - ``'bkgnd_mask'``: boolean mask from background fitting, or ``None`` if no + background fit. + - ``'gradient'``: evaluated background surface (zeros if no background fit). + - ``'subimg-gradient'``: ``sub_img - gradient`` (used for residuals). + - ``'psf'``, ``'scaled_psf'``: model patch from :meth:`eval_rect` at the + solution; identical arrays (``scaled_psf`` supports comparison to + ``subimg-gradient`` in the outer :meth:`find_position` loop). + + - ``'residual_rss'``: sum of squared residuals over unmasked pixels (float). + - ``'reduced_chi2'``: ``residual_rss / max(n_valid - n_params, 1)``; near 1 + for a well-matched noise model (float). + - ``'noise_rms'``: per-pixel noise estimate ``sqrt(rss / n_valid)`` (float). + - ``'peak_snr'``: ``scale / noise_rms``; amplitude signal-to-noise ratio + (float; 0.0 if ``noise_rms`` is zero). + - ``'y_err'``, ``'x_err'``: 1-sigma position uncertainties in pixels (float). + - ``'scale_err'``: 1-sigma uncertainty on the fitted ``scale`` (float). + - ``'base_err'``: 1-sigma uncertainty on ``base``; 0.0 when + ``allow_nonzero_base`` is False (float). + + Subclasses append one entry per additional PSF parameter (for example + ``'sigma_y'`` and ``'sigma_x'`` for :class:`~psfmodel.gaussian.GaussianPSF`), + using the internal names from ``_additional_params``, plus a corresponding + ``'_err'`` uncertainty key for each. + """ + + bkgnd_params = None + bkgnd_mask = None + gradient = np.zeros(sub_img.shape) + + if bkgnd_degree is not None: + bkgnd_params, bkgnd_mask = PSF.background_gradient_fit( + sub_img, + order=bkgnd_degree, + ignore_center=bkgnd_ignore_center, + num_sigma=bkgnd_num_sigma, + debug=self.detailed_logging, + logger=self._logger, + ) + if bkgnd_params is None: + return None + + gradient = PSF.background_gradient(cast(tuple[int, int], sub_img.shape), bkgnd_params) + + sub_img_grad = sub_img - gradient + + # Offset Y, Offset X, Scale, AdditionalParams + if use_angular_params: + bounds = [(0.0, np.pi), (0.0, np.pi), (0.0, np.pi)] + starting_guess = [np.pi / 2, np.pi / 2, np.pi / 2] + if allow_nonzero_base: + # base has no finite physical range, so it cannot be cosine-mapped + # like the other angular parameters. Use direct physical bounds in + # both modes so params[3] always holds a physical base value. + bounds += [(_FIT_PSF_BASE_BOUND_MIN, _FIT_PSF_BASE_BOUND_MAX)] + starting_guess += [0.001] + for _ in range(len(self._additional_params)): + bounds += [(0.0, np.pi)] + starting_guess += [np.pi / 2] + else: + bounds = [ + (-search_limit[0], search_limit[0]), + (-search_limit[1], search_limit[1]), + (0.0, scale_limit), + ] + starting_guess = [0.001, 0.001, scale_limit / 2] + if allow_nonzero_base: + bounds += [(_FIT_PSF_BASE_BOUND_MIN, _FIT_PSF_BASE_BOUND_MAX)] + starting_guess += [0.001] + for a_min, a_max, _a_name in self._additional_params: + bounds += [(a_min, a_max)] + starting_guess.append(cast(float, np.mean([a_min, a_max]))) + + extra_args0 = ( + sub_img_grad, + search_limit, + scale_limit, + allow_nonzero_base, + use_angular_params, + ) + if self._additional_params is not None and len(self._additional_params) > 0: + extra_args = extra_args0 + tuple(self._additional_params) + else: + extra_args = extra_args0 + + if self.detailed_logging: + self._logger.debug('-' * 80) + self._logger.debug('_find_position: starting_guess=%s', starting_guess) + self._logger.debug('_find_position: bounds=%s', bounds) + + full_result = sciopt.minimize( + self._fit_psf_func, + starting_guess, + args=extra_args, + bounds=bounds, + tol=tolerance, + method='Powell', + options={'maxiter': len(starting_guess) * 10000}, + ) + + result = full_result.x + success = full_result.success + status = full_result.status + message = full_result.message + + if not success: + self._logger.warning('find_position: optimizer did not succeed: %s', message) + return None + + # if ier < 1 or ier > 4: + # return None + + if use_angular_params: + offset_y = search_limit[0] * np.cos(result[0]) + 0.5 + offset_x = search_limit[1] * np.cos(result[1]) + 0.5 + scale = scale_limit * (np.cos(result[2]) + 1) / 2 + else: + offset_y = result[0] + 0.5 + offset_x = result[1] + 0.5 + scale = result[2] + + base = 0.0 + result_end = 3 + if allow_nonzero_base: + # Direct physical decode regardless of use_angular_params: base uses + # physical bounds in both modes (see bounds setup above), so result[3] + # is already a physical baseline value, not an angular parameter. + base = result[3] + result_end = 4 + + addl_vals_dict = {} + for i, ap in enumerate(self._additional_params): + if use_angular_params: + val = (ap[1] - ap[0]) / 2.0 * (np.cos(result[result_end + i]) + 1.0) + ap[0] + else: + val = result[result_end + i] + addl_vals_dict[ap[2]] = val + + psf = self.eval_rect( + cast(tuple[int, int], sub_img.shape), + (offset_y, offset_x), + scale=scale, + base=base, + **addl_vals_dict, + ) + + details = {} + details['x'] = offset_x + details['y'] = offset_y + details['subimg'] = sub_img + details['bkgnd_params'] = bkgnd_params + details['bkgnd_mask'] = bkgnd_mask + details['gradient'] = gradient + details['subimg-gradient'] = sub_img_grad + details['psf'] = psf + details['scale'] = scale + details['base'] = base + details['scaled_psf'] = psf + + # --- Quality metrics --- + # Residuals between the background-subtracted data and the fitted model, + # restricted to unmasked pixels, are the basis for all quality metrics and + # uncertainty estimates. + diff = sub_img_grad - psf + if isinstance(diff, ma.MaskedArray): + resid_flat = ma.compressed(diff).astype(np.float64) + else: + resid_flat = diff.flatten().astype(np.float64) + + n_valid = int(resid_flat.size) + n_params_fit = 3 + int(allow_nonzero_base) + len(self._additional_params) + rss = float(np.dot(resid_flat, resid_flat)) + if n_valid <= n_params_fit: + self._logger.warning( + 'find_position: underconstrained fit (%d valid pixels, %d fitted parameters);' + ' reduced_chi2 set to NaN', + n_valid, + n_params_fit, + ) + reduced_chi2 = float('nan') + else: + dof = n_valid - n_params_fit + reduced_chi2 = rss / dof + noise_rms = float(np.sqrt(rss / n_valid)) if n_valid > 0 else 0.0 + peak_snr = float(scale / noise_rms) if noise_rms > 0.0 else 0.0 + + details['residual_rss'] = rss + details['reduced_chi2'] = reduced_chi2 + details['noise_rms'] = noise_rms + details['peak_snr'] = peak_snr + + # --- Parameter uncertainties via finite-difference Jacobian --- + # Physical parameter vector in canonical order: + # [offset_y, offset_x, scale, (base if allow_nonzero_base), *additional...] + phys: list[float] = [float(offset_y), float(offset_x), float(scale)] + if allow_nonzero_base: + phys.append(float(base)) + for ap in self._additional_params: + phys.append(float(addl_vals_dict[ap[2]])) + + n_phys = len(phys) + + if not compute_uncertainty: + uncertainties = np.full(n_phys, np.nan) + else: + # Build the residual vector at an arbitrary physical-parameter point, + # applying the same masking as the final fit so the Jacobian is consistent. + def residuals_at_phys(params: list[float]) -> npt.NDArray[np.float64]: + oy = params[0] + ox = params[1] + sc = params[2] + bs = params[3] if allow_nonzero_base else 0.0 + start = 4 if allow_nonzero_base else 3 + extra: dict[str, Any] = { + ap2[2]: params[start + i2] for i2, ap2 in enumerate(self._additional_params) + } + model = self.eval_rect( + cast(tuple[int, int], sub_img_grad.shape), + (oy, ox), + scale=sc, + base=bs, + **extra, + ) + d = sub_img_grad - model + if isinstance(d, ma.MaskedArray): + return ma.compressed(d).astype(np.float64) + return d.flatten().astype(np.float64) + + # Forward-difference Jacobian of the residual vector in physical space. + if n_valid == 0: + # No valid pixels: uncertainties are undefined. + uncertainties = np.full(n_phys, np.nan) + else: + jac = np.zeros((n_valid, n_phys), dtype=np.float64) + for col in range(n_phys): + val = phys[col] + eps = max(abs(val) * _JACO_REL_EPS, _JACO_ABS_EPS) + phys_plus = list(phys) + phys_plus[col] += eps + r_plus = residuals_at_phys(phys_plus) + jac[:, col] = (r_plus - resid_flat) / eps + + # Covariance = reduced_chi2 * (J^T J)^{-1}; use lstsq for robustness + # when J^T J is ill-conditioned (e.g. underconstrained fits). + jtj = jac.T @ jac + cov_raw, _, _, _ = np.linalg.lstsq(jtj, np.eye(n_phys), rcond=None) + cov = cov_raw * reduced_chi2 + + # Diagonal 1-sigma uncertainties; negative variances (numerical noise) + # are clamped to zero before taking the square root. + uncertainties = np.sqrt(np.maximum(np.diag(cov), 0.0)) + + details['y_err'] = float(uncertainties[0]) + details['x_err'] = float(uncertainties[1]) + details['scale_err'] = float(uncertainties[2]) + u_start = 3 + if allow_nonzero_base: + details['base_err'] = float(uncertainties[3]) + u_start = 4 + else: + details['base_err'] = 0.0 + for i, ap in enumerate(self._additional_params): + details[ap[2] + '_err'] = float(uncertainties[u_start + i]) + + for key in addl_vals_dict: + details[key] = addl_vals_dict[key] + + if self.detailed_logging: + self._logger.debug( + '_find_position: returning offset_y=%s offset_x=%s', offset_y, offset_x + ) + self._logger.debug( + '_find_position: subimage masked pixels=%s', int(np.sum(ma.getmaskarray(sub_img))) + ) + self._logger.debug('_find_position: bkgnd_params=%s', bkgnd_params) + if bkgnd_mask is not None: + self._logger.debug( + '_find_position: bkgnd_mask bad pixels=%s', + int(np.sum(bkgnd_mask)), + ) + self._logger.debug('_find_position: PSF scale=%s base=%s', scale, base) + for key in addl_vals_dict: + self._logger.debug('_find_position: %s=%s', key, details[key]) + self._logger.debug('_find_position: optimizer message=%s status=%s', message, status) + + return offset_y, offset_x, details diff --git a/psfmodel/py.typed b/src/psfmodel/py.typed similarity index 100% rename from psfmodel/py.typed rename to src/psfmodel/py.typed diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..d21a2c9 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,30 @@ +################################################################################ +# tests/conftest.py +################################################################################ + +"""Shared pytest fixtures for psfmodel tests.""" + +import pytest + +from psfmodel.gaussian import GaussianPSF + + +@pytest.fixture +def default_psf() -> GaussianPSF: + """Return a :class:`GaussianPSF` built with default constructor arguments.""" + + return GaussianPSF() + + +@pytest.fixture +def symmetric_psf() -> GaussianPSF: + """Return a :class:`GaussianPSF` with equal ``sigma_y`` and ``sigma_x``.""" + + return GaussianPSF(sigma=(1.0, 1.0)) + + +@pytest.fixture +def asymmetric_psf() -> GaussianPSF: + """Return a :class:`GaussianPSF` with distinct ``sigma_y`` and ``sigma_x``.""" + + return GaussianPSF(sigma=(2.0, 3.0)) diff --git a/tests/test_find_position.py b/tests/test_find_position.py new file mode 100644 index 0000000..95a6be8 --- /dev/null +++ b/tests/test_find_position.py @@ -0,0 +1,434 @@ +################################################################################ +# tests/test_find_position.py +################################################################################ + +"""Tests for :meth:`PSF.find_position` validation, edge cases, motion smear, and logging.""" + +from __future__ import annotations + +import logging +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + +from psfmodel.gaussian import GaussianPSF + + +def test_find_position_invalid_box_size_raises() -> None: + """``find_position`` raises when ``box_size`` is not odd and positive in each axis.""" + + psf = GaussianPSF() + img = np.zeros((21, 21)) + for box in ((4, 5), (5, 4), (-1, 5), (5, -1)): + with pytest.raises(ValueError) as exc_info: + psf.find_position(img, box, (10, 10), bkgnd_degree=None, num_sigma=None) + assert str(exc_info.value) == ( + f'box_size must have odd positive shape in each dimension, got {box}' + ) + + +def test_find_position_invalid_num_sigma_raises() -> None: + """``find_position`` raises for non-positive or non-numeric ``num_sigma``.""" + + psf = GaussianPSF() + img = np.zeros((21, 21)) + + with pytest.raises(ValueError) as exc_info: + psf.find_position(img, (5, 5), (10, 10), bkgnd_degree=None, num_sigma=0.0) + assert 'num_sigma must be > 0' in str(exc_info.value) + + with pytest.raises(ValueError) as exc_info: + psf.find_position(img, (5, 5), (10, 10), bkgnd_degree=None, num_sigma=-1.0) + assert 'num_sigma must be > 0' in str(exc_info.value) + + with pytest.raises(TypeError) as exc_info_type: + psf.find_position(img, (5, 5), (10, 10), bkgnd_degree=None, num_sigma='bad') # type: ignore[arg-type] + assert 'num_sigma must be a number or None' in str(exc_info_type.value) + + +def test_find_position_returns_none_when_starting_point_near_edge() -> None: + """``find_position`` returns ``None`` when the box does not fit inside the image.""" + + psf = GaussianPSF() + img = np.zeros((11, 11)) + ret = psf.find_position( + img, + (7, 7), + (0, 5), + bkgnd_degree=None, + num_sigma=None, + ) + assert ret is None + + +def test_find_position_optimizer_failure_warns( + caplog: pytest.LogCaptureFixture, +) -> None: + """A failed SciPy minimizer triggers a WARNING containing ``did not succeed``.""" + + psf = GaussianPSF() + gauss2d = psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + fake_result = MagicMock() + fake_result.success = False + fake_result.message = 'mocked optimizer failure' + fake_result.x = np.array([0.0, 0.0, 1.0]) + fake_result.status = 2 + + with ( + caplog.at_level(logging.WARNING, logger='psfmodel.psf'), + patch('psfmodel.psf.sciopt.minimize', return_value=fake_result), + ): + ret = psf.find_position( + gauss2d, + gauss2d.shape, + (gauss2d.shape[0] // 2, gauss2d.shape[1] // 2), + bkgnd_degree=None, + num_sigma=None, + ) + + assert ret is None + assert any('did not succeed' in r.message for r in caplog.records) + assert any(r.levelno == logging.WARNING for r in caplog.records) + + +def test_find_position_num_sigma_all_pixels_masked_returns_none( + caplog: pytest.LogCaptureFixture, +) -> None: + """When outlier masking removes every pixel, ``find_position`` returns ``None``.""" + + psf = GaussianPSF(logger=logging.getLogger('psfmodel.psf'), detailed_logging=True) + img = np.ones((21, 21)) + with caplog.at_level(logging.INFO, logger='psfmodel.psf'): + ret = psf.find_position( + img, + img.shape, + (10, 10), + bkgnd_degree=None, + num_sigma=0.01, + max_bad_frac=0.99, + ) + assert ret is None + assert any('all pixels masked' in r.message for r in caplog.records) + + +def test_find_position_num_sigma_too_many_masked_returns_none( + caplog: pytest.LogCaptureFixture, +) -> None: + """When too large a fraction of pixels is masked, ``find_position`` returns ``None``.""" + + psf = GaussianPSF(logger=logging.getLogger('psfmodel.psf'), detailed_logging=True) + img = np.ones((21, 21)) + with caplog.at_level(logging.INFO, logger='psfmodel.psf'): + ret = psf.find_position( + img, + img.shape, + (10, 10), + bkgnd_degree=None, + num_sigma=0.05, + max_bad_frac=0.2, + ) + assert ret is None + assert any('too many pixels masked' in r.message for r in caplog.records) + + +def test_find_position_detailed_logging_emits_info(caplog: pytest.LogCaptureFixture) -> None: + """With ``detailed_logging=True``, ``find_position`` logs at INFO for key steps.""" + + psf = GaussianPSF(logger=logging.getLogger('psfmodel.psf'), detailed_logging=True) + gauss2d = psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + with caplog.at_level(logging.INFO, logger='psfmodel.psf'): + ret = psf.find_position( + gauss2d, + gauss2d.shape, + (gauss2d.shape[0] // 2, gauss2d.shape[1] // 2), + bkgnd_degree=None, + num_sigma=None, + ) + assert ret is not None + messages = [r.message for r in caplog.records if r.levelno == logging.INFO] + assert any('find_position: entering' in m for m in messages) + assert any('find_position returning' in m for m in messages) + + +def test_gaussian_eval_rect_with_movement_smears(symmetric_psf: GaussianPSF) -> None: + """Non-zero ``movement`` exercises motion smearing in :meth:`GaussianPSF.eval_rect`.""" + + still = symmetric_psf.eval_rect((19, 19), movement=None) + smeared = symmetric_psf.eval_rect((19, 19), movement=(0.5, 0.3)) + assert smeared.shape == (19, 19) + assert np.sum(smeared) == pytest.approx(1.0) + assert not np.allclose(smeared, still) + + +def test_gaussian_eval_rect_movement_small_num_steps_branch(symmetric_psf: GaussianPSF) -> None: + """A small movement relative to ``movement_granularity`` uses the ``num_steps == 0`` path.""" + + rect = symmetric_psf.eval_rect( + (19, 19), + movement=(0.05, 0.05), + movement_granularity=0.1, + ) + assert rect.shape == (19, 19) + assert np.sum(rect) == pytest.approx(1.0) + + +def test_find_position_num_sigma_rejects_outlier_pixel(default_psf: GaussianPSF) -> None: + """``num_sigma`` can mask a bright outlier while still returning a good centroid.""" + + gauss2d = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + contaminated = gauss2d.copy() + contaminated[3, 3] += 200.0 + ret = default_psf.find_position( + contaminated, + contaminated.shape, + (10, 10), + bkgnd_degree=None, + num_sigma=5.0, + max_bad_frac=0.99, + ) + assert ret is not None + assert ret[0] == pytest.approx(10.5, abs=0.5) + assert ret[1] == pytest.approx(10.5, abs=0.5) + + +# --------------------------------------------------------------------------- +# Quality-metric tests (reduced_chi2, noise_rms, peak_snr, residual_rss) +# --------------------------------------------------------------------------- + + +def test_find_position_quality_metrics_noise_free(default_psf: GaussianPSF) -> None: + """Quality metrics are near-zero for a noise-free Gaussian image.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position(img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret is not None + _, _, details = ret + assert details['residual_rss'] < 1e-10 + assert details['reduced_chi2'] < 1e-10 + assert details['noise_rms'] < 1e-5 + assert details['peak_snr'] > 1e6 + + +def test_find_position_quality_metrics_noisy() -> None: + """``reduced_chi2`` approximates per-pixel noise variance for a noisy fit.""" + + psf = GaussianPSF(sigma=(1.0, 1.0)) + rng = np.random.default_rng(42) + noise_std = 0.05 + img = psf.eval_rect((21, 21), scale=1.0) + rng.normal(0, noise_std, (21, 21)) + ret = psf.find_position(img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret is not None + _, _, details = ret + assert details['reduced_chi2'] == pytest.approx(noise_std**2, rel=0.3) + assert details['noise_rms'] == pytest.approx(noise_std, rel=0.2) + assert details['peak_snr'] == pytest.approx(1.0 / noise_std, rel=0.3) + + +def test_find_position_quality_metrics_keys_present(default_psf: GaussianPSF) -> None: + """All four quality-metric keys are present in the returned details dict.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position(img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret is not None + _, _, details = ret + for key in ('residual_rss', 'reduced_chi2', 'noise_rms', 'peak_snr'): + assert key in details + assert details[key] >= 0.0 + + +# --------------------------------------------------------------------------- +# Parameter-uncertainty tests (x_err, y_err, scale_err, base_err, *_err) +# --------------------------------------------------------------------------- + + +def test_find_position_position_uncertainties_non_negative(default_psf: GaussianPSF) -> None: + """Position and scale uncertainties are non-negative for a clean Gaussian fit.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position(img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret is not None + _, _, details = ret + assert details['x_err'] >= 0.0 + assert details['y_err'] >= 0.0 + assert details['scale_err'] >= 0.0 + + +def test_find_position_position_uncertainties_small_noise_free(default_psf: GaussianPSF) -> None: + """Position uncertainties are negligible for a noise-free image.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position(img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret is not None + _, _, details = ret + assert details['x_err'] < 1e-3 + assert details['y_err'] < 1e-3 + + +def test_find_position_base_err_zero_when_base_fixed(default_psf: GaussianPSF) -> None: + """``base_err`` is exactly 0.0 when ``allow_nonzero_base=False``.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position( + img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None, allow_nonzero_base=False + ) + assert ret is not None + assert ret[2]['base_err'] == 0.0 + + +def test_find_position_base_err_non_negative_when_base_free(default_psf: GaussianPSF) -> None: + """``base_err`` is non-negative when ``allow_nonzero_base=True``.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position( + img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None, allow_nonzero_base=True + ) + assert ret is not None + assert ret[2]['base_err'] >= 0.0 + + +def test_find_position_additional_param_err_keys_present() -> None: + """GaussianPSF with floating sigma includes ``sigma_y_err`` and ``sigma_x_err`` in details.""" + + psf = GaussianPSF( + sigma=(None, None), + sigma_y_range=(0.5, 3.0), + sigma_x_range=(0.5, 3.0), + ) + img = psf.eval_rect((21, 21), scale=2.0, sigma=(1.5, 1.5)) + ret = psf.find_position(img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret is not None + _, _, details = ret + assert 'sigma_y_err' in details + assert 'sigma_x_err' in details + assert details['sigma_y_err'] >= 0.0 + assert details['sigma_x_err'] >= 0.0 + + +def test_find_position_uncertainty_decreases_with_snr() -> None: + """Higher-SNR images produce smaller position uncertainties.""" + + psf = GaussianPSF(sigma=(1.0, 1.0)) + rng = np.random.default_rng(99) + noise = rng.normal(0, 0.1, (21, 21)) + img_low = psf.eval_rect((21, 21), scale=0.5) + noise + img_high = psf.eval_rect((21, 21), scale=5.0) + noise + + ret_low = psf.find_position(img_low, (21, 21), (10, 10), bkgnd_degree=None, num_sigma=None) + ret_high = psf.find_position(img_high, (21, 21), (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret_low is not None + assert ret_high is not None + assert ret_low[2]['x_err'] > ret_high[2]['x_err'] + assert ret_low[2]['y_err'] > ret_high[2]['y_err'] + + +# --------------------------------------------------------------------------- +# compute_uncertainty flag tests +# --------------------------------------------------------------------------- + + +def test_find_position_compute_uncertainty_false_err_keys_are_nan( + default_psf: GaussianPSF, +) -> None: + """With ``compute_uncertainty=False``, ``x_err``, ``y_err``, and ``scale_err`` are NaN.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position( + img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None, compute_uncertainty=False + ) + assert ret is not None + _, _, details = ret + assert np.isnan(details['x_err']) + assert np.isnan(details['y_err']) + assert np.isnan(details['scale_err']) + + +def test_find_position_compute_uncertainty_false_position_and_metrics_unchanged( + default_psf: GaussianPSF, +) -> None: + """Skipping uncertainty does not affect the fitted position or quality metrics.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret_with = default_psf.find_position( + img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None, compute_uncertainty=True + ) + ret_without = default_psf.find_position( + img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None, compute_uncertainty=False + ) + assert ret_with is not None + assert ret_without is not None + + y_with, x_with, d_with = ret_with + y_without, x_without, d_without = ret_without + + assert y_without == pytest.approx(y_with) + assert x_without == pytest.approx(x_with) + for key in ('residual_rss', 'reduced_chi2', 'noise_rms', 'peak_snr'): + assert d_without[key] == pytest.approx(d_with[key]) + + +def test_find_position_compute_uncertainty_false_base_fixed_base_err_zero( + default_psf: GaussianPSF, +) -> None: + """``base_err`` is 0.0 when base is not a free parameter, regardless of the flag.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position( + img, + img.shape, + (10, 10), + bkgnd_degree=None, + num_sigma=None, + allow_nonzero_base=False, + compute_uncertainty=False, + ) + assert ret is not None + assert ret[2]['base_err'] == 0.0 + + +def test_find_position_compute_uncertainty_false_free_base_err_nan() -> None: + """``base_err`` is NaN when ``allow_nonzero_base=True`` and ``compute_uncertainty=False``.""" + + psf = GaussianPSF(sigma=(1.0, 1.0)) + img = psf.eval_rect((21, 21), scale=2.0) + ret = psf.find_position( + img, + img.shape, + (10, 10), + bkgnd_degree=None, + num_sigma=None, + allow_nonzero_base=True, + compute_uncertainty=False, + ) + assert ret is not None + assert np.isnan(ret[2]['base_err']) + + +def test_find_position_compute_uncertainty_false_additional_param_errs_nan() -> None: + """``sigma_y_err`` and ``sigma_x_err`` are NaN when ``compute_uncertainty=False``.""" + + psf = GaussianPSF( + sigma=(None, None), + sigma_y_range=(0.5, 3.0), + sigma_x_range=(0.5, 3.0), + ) + img = psf.eval_rect((21, 21), scale=2.0, sigma=(1.5, 1.5)) + ret = psf.find_position( + img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None, compute_uncertainty=False + ) + assert ret is not None + _, _, details = ret + assert np.isnan(details['sigma_y_err']) + assert np.isnan(details['sigma_x_err']) + + +def test_find_position_compute_uncertainty_default_is_true(default_psf: GaussianPSF) -> None: + """The default (no ``compute_uncertainty`` kwarg) produces finite ``x_err`` and ``y_err``.""" + + img = default_psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = default_psf.find_position(img, img.shape, (10, 10), bkgnd_degree=None, num_sigma=None) + assert ret is not None + _, _, details = ret + assert np.isfinite(details['x_err']) + assert np.isfinite(details['y_err']) + assert np.isfinite(details['scale_err']) diff --git a/tests/test_gaussian.py b/tests/test_gaussian.py index 0aea9b5..d4ef287 100644 --- a/tests/test_gaussian.py +++ b/tests/test_gaussian.py @@ -2,6 +2,13 @@ # tests/test_gaussian.py ################################################################################ +"""Tests for :mod:`psfmodel.gaussian`.""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any, cast + import numpy as np import numpy.testing as npt import pytest @@ -9,373 +16,612 @@ from psfmodel.gaussian import GaussianPSF - -def test_gaussian_1d(): - assert GaussianPSF.gaussian_1d(0.) == pytest.approx(0.39894228) - assert GaussianPSF.gaussian_1d(0., scale=2.) == pytest.approx(0.39894228 * 2) - assert GaussianPSF.gaussian_1d(0., base=-5.) == pytest.approx(0.39894228 - 5.) - assert GaussianPSF.gaussian_1d(0., scale=2., base=-5.) == \ - pytest.approx(0.39894228 * 2 - 5.) - assert GaussianPSF.gaussian_1d(1., mean=1.) == pytest.approx(0.39894228) - assert GaussianPSF.gaussian_1d(1., mean=1., scale=2.) == pytest.approx(0.39894228 * 2) - assert GaussianPSF.gaussian_1d(1.) == pytest.approx(0.24197072) - assert GaussianPSF.gaussian_1d(2., sigma=2.) == pytest.approx(0.24197072 / 2) - npt.assert_array_almost_equal(GaussianPSF.gaussian_1d(np.array([0.])), - np.array([0.39894228])) - npt.assert_array_almost_equal(GaussianPSF.gaussian_1d(np.array([0., 1.])), - np.array([0.39894228, 0.24197072])) - npt.assert_array_almost_equal(GaussianPSF.gaussian_1d(np.array([[0., 1.], [1., 0.]])), - np.array([[0.39894228, 0.24197072], - [0.24197072, 0.39894228]])) - assert integrate.quad(GaussianPSF.gaussian_1d, -10, 10)[0] == pytest.approx(1.) - assert integrate.quad(lambda x: GaussianPSF.gaussian_1d(x, scale=2.), -10, 10)[0] == \ - pytest.approx(2.) - - -def test_gaussian_2d(): - assert GaussianPSF.gaussian_2d(0, 0) == pytest.approx(0.15915494309) - assert GaussianPSF.gaussian_2d(0, 0, scale=2.) == pytest.approx(0.15915494309 * 2) - assert GaussianPSF.gaussian_2d(0, 0, base=10.) == pytest.approx(0.15915494309 + 10) - assert GaussianPSF.gaussian_2d(0, 0, scale=2., base=10.) == \ - pytest.approx(0.15915494309 * 2 + 10) - assert GaussianPSF.gaussian_2d(1., 0., mean_y=1.) == pytest.approx(0.15915494309) - assert GaussianPSF.gaussian_2d(0., 1., mean_x=1.) == pytest.approx(0.15915494309) - assert integrate.dblquad(GaussianPSF.gaussian_2d, -10, 10, -10, 10)[0] == \ - pytest.approx(1.) - assert integrate.dblquad(lambda x, y: GaussianPSF.gaussian_2d(x, y, scale=2.), - -10, 10, -10, 10)[0] == pytest.approx(2.) - assert GaussianPSF.gaussian_2d(1., 0.) == pytest.approx(0.09653235263005391) - assert GaussianPSF.gaussian_2d(1., 0., sigma_x=2) == pytest.approx(0.048266176315027) - assert GaussianPSF.gaussian_2d(0., -1., sigma_x=2) != pytest.approx(0.048266176315027) - assert GaussianPSF.gaussian_2d(0., -1., angle=np.pi/2) == pytest.approx(0.09653235263) - npt.assert_array_almost_equal(GaussianPSF.gaussian_2d(np.array([0.]), np.array([0.])), - np.array([0.15915494309])) - npt.assert_array_almost_equal(GaussianPSF.gaussian_2d(np.array([[0.], [1.]]), - np.array([[0.], [0.]])), - np.array([[0.15915494309], [0.09653235263005391]])) - npt.assert_array_almost_equal(GaussianPSF.gaussian_2d(np.array([[[0.], [1.]], - [[1.], [0.]]]), - np.array([[[0.], [0.]], - [[0.], [0.]]])), - np.array([[[0.15915494309], [0.09653235263005391]], - [[0.09653235263005391], [0.15915494309]]])) - - y_coords = np.tile(np.arange(-10., 11.)/2, 21) - x_coords = np.repeat(np.arange(-10., 11.)/2, 21) - - gauss2d2 = GaussianPSF.gaussian_2d(y_coords, x_coords, scale=2., - sigma_x=.25, sigma_y=.5, - base=1.) / 4 - gauss2d3 = GaussianPSF.gaussian_2d(y_coords, x_coords, scale=2., - sigma_x=.5, sigma_y=.25, - base=1.) / 4 - gauss2d2 = gauss2d2.reshape(21, 21) - gauss2d3 = gauss2d3.reshape(21, 21) +_MSG_BOTH_SIGMA = 'Cannot specify both sigma during init and sigma_y/x' +_MSG_EVAL_POINT_SIGMA = ( + 'Sigma X and Y must be specified either at object creation or in the call to eval_point' +) +_MSG_EVAL_PIXEL_SIGMA = ( + 'Sigma X and Y must be specified either at object creation or in the call to eval_pixel' +) + + +def _require_position_fit( + ret: tuple[float, float, dict[str, Any]] | None, +) -> tuple[float, float, dict[str, Any]]: + """Return ``ret`` after asserting :meth:`PSF.find_position` did not return ``None``.""" + + assert ret is not None + return ret + + +@pytest.mark.parametrize( + ('x', 'kwargs', 'expected'), + [ + (0.0, {}, 0.39894228), + (0.0, {'scale': 2.0}, 0.39894228 * 2), + (0.0, {'base': -5.0}, 0.39894228 - 5.0), + (0.0, {'scale': 2.0, 'base': -5.0}, 0.39894228 * 2 - 5.0), + (1.0, {'mean': 1.0}, 0.39894228), + (1.0, {'mean': 1.0, 'scale': 2.0}, 0.39894228 * 2), + (1.0, {}, 0.24197072), + (2.0, {'sigma': 2.0}, 0.24197072 / 2), + ], +) +def test_gaussian_1d_scalar_cases(x: float, kwargs: dict[str, float], expected: float) -> None: + """``gaussian_1d`` matches reference values for scalar inputs and keyword overrides.""" + + assert GaussianPSF.gaussian_1d(x, **kwargs) == pytest.approx(expected) + + +def test_gaussian_1d_array_broadcast() -> None: + """``gaussian_1d`` broadcasts correctly over array coordinates.""" + + npt.assert_array_almost_equal(GaussianPSF.gaussian_1d(np.array([0.0])), np.array([0.39894228])) + npt.assert_array_almost_equal( + GaussianPSF.gaussian_1d(np.array([0.0, 1.0])), np.array([0.39894228, 0.24197072]) + ) + npt.assert_array_almost_equal( + GaussianPSF.gaussian_1d(np.array([[0.0, 1.0], [1.0, 0.0]])), + np.array([[0.39894228, 0.24197072], [0.24197072, 0.39894228]]), + ) + + +def test_gaussian_1d_quadrature_normalization() -> None: + """The analytic 1-D Gaussian integrates to ``scale`` over the real line.""" + + assert integrate.quad(GaussianPSF.gaussian_1d, -10, 10)[0] == pytest.approx(1.0) + assert integrate.quad(lambda x: GaussianPSF.gaussian_1d(x, scale=2.0), -10, 10)[ + 0 + ] == pytest.approx(2.0) + + +@pytest.mark.parametrize( + ('y', 'x', 'kwargs', 'expected'), + [ + (0, 0, {}, 0.15915494309), + (0, 0, {'scale': 2.0}, 0.15915494309 * 2), + (0, 0, {'base': 10.0}, 0.15915494309 + 10), + (0, 0, {'scale': 2.0, 'base': 10.0}, 0.15915494309 * 2 + 10), + (1.0, 0.0, {'mean_y': 1.0}, 0.15915494309), + (0.0, 1.0, {'mean_x': 1.0}, 0.15915494309), + (1.0, 0.0, {}, 0.09653235263005391), + (1.0, 0.0, {'sigma_x': 2}, 0.048266176315027), + ], +) +def test_gaussian_2d_scalar_cases( + y: float, + x: float, + kwargs: dict[str, float], + expected: float, +) -> None: + """``gaussian_2d`` matches reference values for representative scalar inputs.""" + + assert GaussianPSF.gaussian_2d(y, x, **kwargs) == pytest.approx(expected) + + +def test_gaussian_2d_angle_and_asymmetry() -> None: + """Rotation and distinct ``sigma_x`` change the 2-D Gaussian as expected.""" + + assert GaussianPSF.gaussian_2d(1.0, 0.0, sigma_x=2) == pytest.approx(0.048266176315027) + assert GaussianPSF.gaussian_2d(0.0, -1.0, sigma_x=2) != pytest.approx(0.048266176315027) + assert GaussianPSF.gaussian_2d(0.0, -1.0, angle=np.pi / 2) == pytest.approx(0.09653235263) + + +def test_gaussian_2d_quadrature_normalization() -> None: + """The 2-D Gaussian integrates to ``scale`` over the plane (numerically).""" + + assert integrate.dblquad(GaussianPSF.gaussian_2d, -10, 10, -10, 10)[0] == pytest.approx(1.0) + assert integrate.dblquad( + lambda x, y: GaussianPSF.gaussian_2d(x, y, scale=2.0), -10, 10, -10, 10 + )[0] == pytest.approx(2.0) + + +def test_gaussian_2d_array_broadcast() -> None: + """``gaussian_2d`` broadcasts over array coordinates.""" + + npt.assert_array_almost_equal( + GaussianPSF.gaussian_2d(np.array([0.0]), np.array([0.0])), np.array([0.15915494309]) + ) + npt.assert_array_almost_equal( + GaussianPSF.gaussian_2d(np.array([[0.0], [1.0]]), np.array([[0.0], [0.0]])), + np.array([[0.15915494309], [0.09653235263005391]]), + ) + npt.assert_array_almost_equal( + GaussianPSF.gaussian_2d( + np.array([[[0.0], [1.0]], [[1.0], [0.0]]]), np.array([[[0.0], [0.0]], [[0.0], [0.0]]]) + ), + np.array( + [[[0.15915494309], [0.09653235263005391]], [[0.09653235263005391], [0.15915494309]]] + ), + ) + + +def test_gaussian_2d_rotated_ellipses_differ() -> None: + """Swapping ``sigma_x`` and ``sigma_y`` on a grid yields distinct surfaces.""" + + y_coords = np.tile(np.arange(-10.0, 11.0) / 2, 21) + x_coords = np.repeat(np.arange(-10.0, 11.0) / 2, 21) + + gauss2d2 = np.asarray( + GaussianPSF.gaussian_2d(y_coords, x_coords, scale=2.0, sigma_x=0.25, sigma_y=0.5, base=1.0) + / 4 + ).reshape(21, 21) + gauss2d3 = np.asarray( + GaussianPSF.gaussian_2d(y_coords, x_coords, scale=2.0, sigma_x=0.5, sigma_y=0.25, base=1.0) + / 4 + ).reshape(21, 21) with pytest.raises(AssertionError): npt.assert_array_almost_equal(gauss2d2, gauss2d3) npt.assert_array_almost_equal(np.transpose(gauss2d2), gauss2d3) -def test_gaussian_integral_1d(): - g_0_1 = integrate.quad(GaussianPSF.gaussian_1d, 0., 1.)[0] - g_n1_1 = integrate.quad(GaussianPSF.gaussian_1d, -1., 1.)[0] - assert GaussianPSF.gaussian_integral_1d(0., 1.) == \ - pytest.approx(integrate.quad(GaussianPSF.gaussian_1d, 0., 1.)[0]) - assert GaussianPSF.gaussian_integral_1d(-1., 1.) == \ - pytest.approx(g_n1_1) - assert GaussianPSF.gaussian_integral_1d(-1., 1., mean=2.) == \ - pytest.approx(integrate.quad(GaussianPSF.gaussian_1d, 1., 3.)[0]) - assert GaussianPSF.gaussian_integral_1d(-1., 1., scale=2.) == \ - pytest.approx(g_n1_1 * 2) - assert GaussianPSF.gaussian_integral_1d(-1., 1., base=5.) == \ - pytest.approx(g_n1_1 + 5) - assert GaussianPSF.gaussian_integral_1d(-1., 1., scale=2., base=5.) == \ - pytest.approx(g_n1_1 * 2 + 5) - assert GaussianPSF.gaussian_integral_1d(0., 1.) == \ - pytest.approx(integrate.quad(GaussianPSF.gaussian_1d, 0., 1.)[0]) - assert GaussianPSF.gaussian_integral_1d(np.array([0., -1.]), np.array([1., 1.])) == \ - pytest.approx(np.array([g_0_1, g_n1_1])) - ret = GaussianPSF.gaussian_integral_1d(np.array([[0., -1.], [-1., 0.]]), - np.array([[1., 1.], [1., 1.]])) +def test_gaussian_integral_1d() -> None: + """``gaussian_integral_1d`` matches numerical quadrature for scalars and arrays.""" + + g_0_1 = integrate.quad(GaussianPSF.gaussian_1d, 0.0, 1.0)[0] + g_n1_1 = integrate.quad(GaussianPSF.gaussian_1d, -1.0, 1.0)[0] + assert GaussianPSF.gaussian_integral_1d(0.0, 1.0) == pytest.approx(g_0_1) + assert GaussianPSF.gaussian_integral_1d(-1.0, 1.0) == pytest.approx(g_n1_1) + assert GaussianPSF.gaussian_integral_1d(-1.0, 1.0, mean=2.0) == pytest.approx( + integrate.quad(GaussianPSF.gaussian_1d, 1.0, 3.0)[0] + ) + assert GaussianPSF.gaussian_integral_1d(-1.0, 1.0, scale=2.0) == pytest.approx(g_n1_1 * 2) + assert GaussianPSF.gaussian_integral_1d(-1.0, 1.0, base=5.0) == pytest.approx(g_n1_1 + 5) + assert GaussianPSF.gaussian_integral_1d(-1.0, 1.0, scale=2.0, base=5.0) == pytest.approx( + g_n1_1 * 2 + 5 + ) + assert GaussianPSF.gaussian_integral_1d( + np.array([0.0, -1.0]), np.array([1.0, 1.0]) + ) == pytest.approx(np.array([g_0_1, g_n1_1])) + ret = GaussianPSF.gaussian_integral_1d( + np.array([[0.0, -1.0], [-1.0, 0.0]]), np.array([[1.0, 1.0], [1.0, 1.0]]) + ) npt.assert_array_almost_equal(ret, np.array([[g_0_1, g_n1_1], [g_n1_1, g_0_1]])) -def test_gaussian_integral_2d(): - integ1 = integrate.dblquad(lambda y, x: GaussianPSF.gaussian_2d(y, x), - 0., 3., -2., 1.)[0] - integ2 = integrate.dblquad(lambda y, x: GaussianPSF.gaussian_2d(y, x), - -1., 3., -3., 2.)[0] - assert GaussianPSF.gaussian_integral_2d(0., 3., -2., 1.) == pytest.approx(integ1) - assert GaussianPSF.gaussian_integral_2d(0., 3., -2., 1., scale=2., base=5.) == \ - pytest.approx(integ1 * 2 + 5) - ret = GaussianPSF.gaussian_integral_2d(np.array([0., -1.]), np.array([3., 3.]), - np.array([-2., -3.]), np.array([1., 2.])) +def test_gaussian_integral_1d_reversed_limits() -> None: + """``gaussian_integral_1d`` negates the Gaussian part when limits are reversed.""" + + g_0_1 = integrate.quad(GaussianPSF.gaussian_1d, 0.0, 1.0)[0] + g_n1_1 = integrate.quad(GaussianPSF.gaussian_1d, -1.0, 1.0)[0] + + # Scalar reversed limits: Gaussian part is negated. + assert GaussianPSF.gaussian_integral_1d(1.0, 0.0) == pytest.approx(-g_0_1) + assert GaussianPSF.gaussian_integral_1d(1.0, -1.0) == pytest.approx(-g_n1_1) + + # mean shifts the window; reversed limits still negate the result. + assert GaussianPSF.gaussian_integral_1d(1.0, -1.0, mean=2.0) == pytest.approx( + -integrate.quad(GaussianPSF.gaussian_1d, 1.0, 3.0)[0] + ) + + # scale multiplies the Gaussian part; negation applies to the scaled integral. + assert GaussianPSF.gaussian_integral_1d(1.0, -1.0, scale=2.0) == pytest.approx(-g_n1_1 * 2) + + # base is an additive offset and is NOT negated with the limits. + assert GaussianPSF.gaussian_integral_1d(1.0, 0.0, base=5.0) == pytest.approx(-g_0_1 + 5.0) + assert GaussianPSF.gaussian_integral_1d(1.0, 0.0, scale=2.0, base=5.0) == pytest.approx( + -g_0_1 * 2 + 5.0 + ) + + # 1-D array: first element reversed, second forward. + npt.assert_array_almost_equal( + GaussianPSF.gaussian_integral_1d(np.array([1.0, 0.0]), np.array([0.0, 1.0])), + np.array([-g_0_1, g_0_1]), + ) + + # 2-D array: swapped and unswapped elements in the same call. + npt.assert_array_almost_equal( + GaussianPSF.gaussian_integral_1d( + np.array([[1.0, 0.0], [0.0, 1.0]]), + np.array([[0.0, 1.0], [1.0, 0.0]]), + ), + np.array([[-g_0_1, g_0_1], [g_0_1, -g_0_1]]), + ) + + +def test_gaussian_integral_1d_nonpositive_sigma_raises() -> None: + """``gaussian_integral_1d`` requires a positive ``sigma``.""" + + with pytest.raises(ValueError) as exc_info: + GaussianPSF.gaussian_integral_1d(0.0, 1.0, sigma=0.0) + assert str(exc_info.value) == 'sigma must be positive, got 0.0' + with pytest.raises(ValueError) as exc_info: + GaussianPSF.gaussian_integral_1d(0.0, 1.0, sigma=-1.0) + assert str(exc_info.value) == 'sigma must be positive, got -1.0' + + +def test_gaussian_integral_2d() -> None: + """``gaussian_integral_2d`` matches ``dblquad`` for sample regions.""" + + integ1 = integrate.dblquad(lambda y, x: GaussianPSF.gaussian_2d(y, x), 0.0, 3.0, -2.0, 1.0)[0] + integ2 = integrate.dblquad(lambda y, x: GaussianPSF.gaussian_2d(y, x), -1.0, 3.0, -3.0, 2.0)[0] + assert GaussianPSF.gaussian_integral_2d(0.0, 3.0, -2.0, 1.0) == pytest.approx(integ1) + assert GaussianPSF.gaussian_integral_2d( + 0.0, 3.0, -2.0, 1.0, scale=2.0, base=5.0 + ) == pytest.approx(integ1 * 2 + 5) + ret = GaussianPSF.gaussian_integral_2d( + np.array([0.0, -1.0]), np.array([3.0, 3.0]), np.array([-2.0, -3.0]), np.array([1.0, 2.0]) + ) npt.assert_array_almost_equal(ret, np.array([integ1, integ2])) - # def rot(y, x): - # ang = -np.pi/8 - # c = np.cos(ang) - # s = np.sin(ang) - # x2 = c*x + s*y - # y2 = -s*x + c*y - # return y2, x2 - - # y1, x1 = rot(-1, -1) - # y2, x2 = rot(1, 1) - # assert GaussianPSF.gaussian_integral_2d(y1+2, y2+2, x1-1, x2-1, - # mean_y=2., mean_x=-1., - # sigma_x=2., angle=np.pi/8) == \ - # pytest.approx(integrate.dblquad( - # lambda y, x: GaussianPSF.gaussian_2d(y, x, sigma_x=2), - # -1., 1., -1., 1.)[0]) - - -def test_gaussian_eval_point(): - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, 1)).eval_point((0, 0), sigma=5) - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, 1)).eval_point((0, 0), sigma_x=5) - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, 1)).eval_point((0, 0), sigma_y=5) - with pytest.raises(ValueError): - GaussianPSF(sigma=None).eval_point((0, 0)) - with pytest.raises(ValueError): - GaussianPSF(sigma=(None, 1)).eval_point((0, 0)) - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, None)).eval_point((0, 0)) + +@pytest.mark.parametrize( + ('sigma_init', 'call_kwargs'), + [ + ((1, 1), {'sigma': 5}), + ((1, 1), {'sigma_x': 5}), + ((1, 1), {'sigma_y': 5}), + (None, {}), + ((None, 1), {}), + ((1, None), {}), + ], +) +def test_gaussian_eval_point_value_errors( + sigma_init: tuple[int | None, int | None] | None, + call_kwargs: Mapping[str, Any], +) -> None: + """``eval_point`` rejects ambiguous or incomplete sigma configuration.""" + + with pytest.raises(ValueError) as exc_info: + GaussianPSF(sigma=sigma_init).eval_point((0, 0), **call_kwargs) + if sigma_init == (1, 1): + assert str(exc_info.value) == _MSG_BOTH_SIGMA + else: + assert str(exc_info.value) == _MSG_EVAL_POINT_SIGMA + + +def test_gaussian_eval_point_success_cases() -> None: + """``eval_point`` agrees with :meth:`GaussianPSF.gaussian_2d` for valid configurations.""" psf1 = GaussianPSF() - psf2 = GaussianPSF(sigma=(1., None)) - psf3 = GaussianPSF(sigma=(None, 2.)) - psf4 = GaussianPSF(sigma=(1., 2.)) - psf5 = GaussianPSF(sigma=(1., 1.)) - - assert psf1.eval_point((2, 3), sigma=(1., 2.)) == psf4.eval_point((2, 3)) - assert psf1.eval_point((2, 3), sigma=1.) == psf5.eval_point((2, 3)) - assert psf1.eval_point((2, 3), sigma_y=1., sigma_x=2.) == psf4.eval_point((2, 3)) - assert psf2.eval_point((2, 3), sigma_x=2.) == psf4.eval_point((2, 3)) - assert psf3.eval_point((2, 3), sigma_y=1.) == psf4.eval_point((2, 3)) - assert psf1.eval_point((2, 3), sigma=(1., 2.), base=1, scale=2, angle=np.pi/4) == \ - psf4.eval_point((2, 3), base=1, scale=2, angle=np.pi/4) - - assert psf1.eval_point((2, 3), sigma=(1., 2.), base=1, scale=2, angle=np.pi/4) == \ - pytest.approx(GaussianPSF.gaussian_2d(2, 3, sigma_y=1., sigma_x=2., - base=1, scale=2, angle=np.pi/4)) - - ret = psf1.eval_point((np.array([1, 2]), np.array([2, 3])), sigma=(1., 2.)) - npt.assert_array_almost_equal(ret, np.array([psf4.eval_point((1, 2)), - psf4.eval_point((2, 3))])) - - -def test_gaussian_eval_pixel(): - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, 1)).eval_pixel((0, 0), sigma=5) - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, 1)).eval_pixel((0, 0), sigma_x=5) - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, 1)).eval_pixel((0, 0), sigma_y=5) - with pytest.raises(ValueError): - GaussianPSF().eval_pixel((0, 0)) - with pytest.raises(ValueError): - GaussianPSF().eval_pixel((0, 0), sigma_x=5) - with pytest.raises(ValueError): - GaussianPSF().eval_pixel((0, 0), sigma_y=5) - with pytest.raises(ValueError): - GaussianPSF(sigma=None).eval_pixel((0, 0)) - with pytest.raises(ValueError): - GaussianPSF(sigma=(1, None)).eval_pixel((0, 0)) - with pytest.raises(ValueError): - GaussianPSF(sigma=(None, 1)).eval_pixel((0, 0)) - - integ = GaussianPSF.gaussian_integral_2d(-.5, .5, -.5, .5, sigma_y=2., sigma_x=3.) - integ2 = GaussianPSF.gaussian_integral_2d(-.5, .5, -.5, .5, sigma_y=2., sigma_x=2.) - assert GaussianPSF(sigma=(2., 3.)).eval_pixel((0, 0)) == pytest.approx(integ) - assert GaussianPSF(sigma=2.).eval_pixel((0, 0)) == pytest.approx(integ2) - assert GaussianPSF().eval_pixel((0, 0), sigma=2.) == pytest.approx(integ2) - assert GaussianPSF(sigma=(2., 3.)).eval_pixel((0, 0), offset=(0, .25)) == \ - pytest.approx(GaussianPSF.gaussian_integral_2d(0, 1, -.25, .75, - sigma_y=2., sigma_x=3.)) - assert GaussianPSF(sigma=(2., 3.)).eval_pixel((0, 0), scale=11.) == \ - pytest.approx(integ * 11) - assert GaussianPSF(sigma=(2., 3.)).eval_pixel((0, 0), base=3.) == \ - pytest.approx(integ + 3) - assert GaussianPSF().eval_pixel((0, 0), sigma=(2., 3.)) == \ - pytest.approx(integ) - assert GaussianPSF(sigma=(2., None)).eval_pixel((0, 0), sigma_x=3.) == \ - pytest.approx(integ) - assert GaussianPSF(sigma=(None, 3.)).eval_pixel((0, 0), sigma_y=2.) == \ - pytest.approx(integ) - ret = GaussianPSF(sigma=(2., 3.)).eval_pixel((np.array([0, 0]), np.array([0, 0]))) + psf2 = GaussianPSF(sigma=(1.0, None)) + psf3 = GaussianPSF(sigma=(None, 2.0)) + psf4 = GaussianPSF(sigma=(1.0, 2.0)) + psf5 = GaussianPSF(sigma=(1.0, 1.0)) + + assert psf1.eval_point((2, 3), sigma=cast(Any, (1.0, 2.0))) == psf4.eval_point((2, 3)) + assert psf1.eval_point((2, 3), sigma=1.0) == psf5.eval_point((2, 3)) + assert psf1.eval_point((2, 3), sigma_y=1.0, sigma_x=2.0) == psf4.eval_point((2, 3)) + assert psf2.eval_point((2, 3), sigma_x=2.0) == psf4.eval_point((2, 3)) + assert psf3.eval_point((2, 3), sigma_y=1.0) == psf4.eval_point((2, 3)) + assert psf1.eval_point( + (2, 3), sigma=cast(Any, (1.0, 2.0)), base=1, scale=2, angle=np.pi / 4 + ) == psf4.eval_point((2, 3), base=1, scale=2, angle=np.pi / 4) + + assert psf1.eval_point( + (2, 3), sigma=cast(Any, (1.0, 2.0)), base=1, scale=2, angle=np.pi / 4 + ) == pytest.approx( + GaussianPSF.gaussian_2d(2, 3, sigma_y=1.0, sigma_x=2.0, base=1, scale=2, angle=np.pi / 4) + ) + + ret = psf1.eval_point( + (np.array([1, 2]), np.array([2, 3])), + sigma=cast(Any, (1.0, 2.0)), + ) + npt.assert_array_almost_equal(ret, np.array([psf4.eval_point((1, 2)), psf4.eval_point((2, 3))])) + + +@pytest.mark.parametrize( + ('sigma_init', 'call_kwargs'), + [ + ((1, 1), {'sigma': 5}), + ((1, 1), {'sigma_x': 5}), + ((1, 1), {'sigma_y': 5}), + (None, {}), + (None, {'sigma_x': 5}), + (None, {'sigma_y': 5}), + ((1, None), {}), + ((None, 1), {}), + ], +) +def test_gaussian_eval_pixel_value_errors( + sigma_init: tuple[int | None, int | None] | None, + call_kwargs: Mapping[str, Any], +) -> None: + """``eval_pixel`` rejects ambiguous or incomplete sigma configuration.""" + + with pytest.raises(ValueError) as exc_info: + GaussianPSF(sigma=sigma_init).eval_pixel((0, 0), **call_kwargs) + if sigma_init == (1, 1): + assert str(exc_info.value) == _MSG_BOTH_SIGMA + else: + assert str(exc_info.value) == _MSG_EVAL_PIXEL_SIGMA + + +def test_gaussian_eval_pixel_success_cases() -> None: + """``eval_pixel`` integrates the Gaussian over unit pixels as documented.""" + + integ = GaussianPSF.gaussian_integral_2d(-0.5, 0.5, -0.5, 0.5, sigma_y=2.0, sigma_x=3.0) + integ2 = GaussianPSF.gaussian_integral_2d(-0.5, 0.5, -0.5, 0.5, sigma_y=2.0, sigma_x=2.0) + assert GaussianPSF(sigma=(2.0, 3.0)).eval_pixel((0, 0)) == pytest.approx(integ) + assert GaussianPSF(sigma=2.0).eval_pixel((0, 0)) == pytest.approx(integ2) + assert GaussianPSF().eval_pixel((0, 0), sigma=(2.0, 2.0)) == pytest.approx(integ2) + assert GaussianPSF(sigma=(2.0, 3.0)).eval_pixel((0, 0), offset=(0, 0.25)) == pytest.approx( + GaussianPSF.gaussian_integral_2d(0, 1, -0.25, 0.75, sigma_y=2.0, sigma_x=3.0) + ) + assert GaussianPSF(sigma=(2.0, 3.0)).eval_pixel((0, 0), scale=11.0) == pytest.approx(integ * 11) + assert GaussianPSF(sigma=(2.0, 3.0)).eval_pixel((0, 0), base=3.0) == pytest.approx(integ + 3) + assert GaussianPSF().eval_pixel((0, 0), sigma=(2.0, 3.0)) == pytest.approx(integ) + assert GaussianPSF(sigma=(2.0, None)).eval_pixel((0, 0), sigma_x=3.0) == pytest.approx(integ) + assert GaussianPSF(sigma=(None, 3.0)).eval_pixel((0, 0), sigma_y=2.0) == pytest.approx(integ) + ret = GaussianPSF(sigma=(2.0, 3.0)).eval_pixel((np.array([0, 0]), np.array([0, 0]))) npt.assert_array_almost_equal(ret, np.array([integ, integ])) - assert GaussianPSF(sigma=(2., 3.)).eval_pixel((0, 0), angle=np.pi/8) == \ - pytest.approx(GaussianPSF.gaussian_integral_2d(-.5, .5, -.5, .5, - sigma_y=2., sigma_x=3., - angle=np.pi/8)) + assert GaussianPSF(sigma=(2.0, 3.0)).eval_pixel((0, 0), angle=np.pi / 8) == pytest.approx( + GaussianPSF.gaussian_integral_2d( + -0.5, 0.5, -0.5, 0.5, sigma_y=2.0, sigma_x=3.0, angle=np.pi / 8 + ) + ) + + +@pytest.mark.parametrize( + ('rect_size', 'expected_msg'), + [ + ((20, 19), 'Rectangle must have odd positive shape in each dimension, got (20, 19)'), + ((19, 18), 'Rectangle must have odd positive shape in each dimension, got (19, 18)'), + ((-1, 5), 'Rectangle must have odd positive shape in each dimension, got (-1, 5)'), + ((5, -3), 'Rectangle must have odd positive shape in each dimension, got (5, -3)'), + ], +) +def test_gaussian_eval_rect_invalid_shape(rect_size: tuple[int, int], expected_msg: str) -> None: + """``eval_rect`` requires odd, positive side lengths.""" + + with pytest.raises(ValueError) as exc_info: + GaussianPSF(sigma=(1.0, 1.0)).eval_rect(rect_size) + assert str(exc_info.value) == expected_msg + + +@pytest.mark.parametrize( + ('rect_size', 'sigma', 'scale', 'base', 'offset'), + [ + ((19, 19), (1.0, 1.0), 1.0, 0.0, (0.5, 0.5)), + ((15, 15), (0.8, 1.2), 2.5, 0.1, (0.25, 0.75)), + ((21, 21), (1.5, 1.5), 0.5, 0.02, (0.0, 0.5)), + ], +) +def test_gaussian_eval_rect_shape_peak_and_nonneg( + rect_size: tuple[int, int], + sigma: tuple[float, float], + scale: float, + base: float, + offset: tuple[float, float], +) -> None: + """``eval_rect`` returns a non-negative patch peaked at the center with expected shape.""" + + psf = GaussianPSF(sigma=sigma) + rect = psf.eval_rect(rect_size, offset=offset, scale=scale, base=base) + assert rect.shape == rect_size + assert np.all(rect >= 0) + cy, cx = rect_size[0] // 2, rect_size[1] // 2 + assert rect[cy, cx] == pytest.approx(np.max(rect)) + + +def test_gaussian_eval_rect_rotated_matches_sum(symmetric_psf: GaussianPSF) -> None: + """A rotated Gaussian patch remains normalized to unit flux (plus base).""" + + assert np.sum(symmetric_psf.eval_rect((19, 19))) == pytest.approx(1.0) + assert np.sum( + GaussianPSF(sigma=(1.0, 1.0), angle=np.pi / 4).eval_rect((19, 19)) + ) == pytest.approx(1.0) + + +@pytest.mark.parametrize('angle_subsample', [0, 100]) +def test_gaussian_init_angle_subsample_invalid_int(angle_subsample: int) -> None: + """``angle_subsample`` must be an int strictly between 0 and 100.""" + + with pytest.raises(ValueError) as exc_info: + GaussianPSF(angle_subsample=angle_subsample) + assert str(exc_info.value) == ( + f'angle_subsample must be an int between 1 and 99, got {angle_subsample}' + ) + + +def test_gaussian_init_angle_subsample_rejects_non_int() -> None: + """``angle_subsample`` must be an ``int``, not a non-integral type.""" + + with pytest.raises(ValueError) as exc_info: + GaussianPSF(angle_subsample=1.5) # type: ignore[arg-type] + assert str(exc_info.value) == 'angle_subsample must be an int between 1 and 99, got 1.5' + +def test_gaussian_init_angle_none_registers_floating_angle() -> None: + """Passing ``angle=None`` leaves rotation unset and adds it to the fit parameter list.""" -def test_gaussian_eval_rect(): - assert np.sum(GaussianPSF(sigma=(1, 1)).eval_rect((19, 19))) == pytest.approx(1) - assert np.sum(GaussianPSF(sigma=(1, 1), angle=np.pi/4).eval_rect((19, 19))) == \ - pytest.approx(1) + psf = GaussianPSF(sigma=(1.0, 1.0), angle=None, angle_subsample=3) + assert psf._angle is None + assert ('angle',) in {(t[2],) for t in psf._additional_params} @pytest.mark.parametrize('use_angular_params', [True, False]) @pytest.mark.parametrize('bkgnd_degree', [None, 0, 1, 2]) -def test_gaussian_find_position(use_angular_params, bkgnd_degree): - allow_nonzero_base = (bkgnd_degree is not None) - - # centered symmetric PSF, float sigma - psf = GaussianPSF() - gauss2d = psf.eval_rect((21, 21), scale=2., sigma=1.) - # if allow_nonzero_base: - # psf._debug_opt = 10 - ret = psf.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) +def test_gaussian_find_position( + use_angular_params: bool, + bkgnd_degree: int | None, + default_psf: GaussianPSF, +) -> None: + """End-to-end centroid and width recovery for synthetic Gaussian patches.""" + + allow_nonzero_base = bkgnd_degree is not None + + psf = default_psf + gauss2d = psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + ret = _require_position_fit( + psf.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2) - assert ret[2]['sigma_y'] == pytest.approx(1., abs=5e-2) - assert ret[2]['sigma_x'] == pytest.approx(1., abs=5e-2) - assert ret[2]['scale'] == pytest.approx(2., abs=5e-2) - - # asymmetric PSF, float sigma - gauss2d = psf.eval_rect((21, 21), scale=2., sigma=(2., 0.5)) - ret = psf.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - bkgnd_ignore_center=(4, 4), - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + assert ret[2]['sigma_y'] == pytest.approx(1.0, abs=5e-2) + assert ret[2]['sigma_x'] == pytest.approx(1.0, abs=5e-2) + assert ret[2]['scale'] == pytest.approx(2.0, abs=5e-2) + + gauss2d = psf.eval_rect((21, 21), scale=2.0, sigma=(2.0, 0.5)) + ret = _require_position_fit( + psf.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + bkgnd_ignore_center=(4, 4), + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2, abs=1e-4) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2, abs=1e-4) - assert ret[2]['sigma_y'] == pytest.approx(2., abs=5e-2) + assert ret[2]['sigma_y'] == pytest.approx(2.0, abs=5e-2) assert ret[2]['sigma_x'] == pytest.approx(0.5, abs=5e-2) - assert ret[2]['scale'] == pytest.approx(2., abs=7e-2) + assert ret[2]['scale'] == pytest.approx(2.0, abs=7e-2) - # offset PSF created through mean, float sigma psf2 = GaussianPSF(mean=(0.5, 0.75)) gauss2d = psf2.eval_rect((21, 21), scale=0.5, sigma=(0.5, 1.3)) - # find using non-offset PSF - ret = psf.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - bkgnd_ignore_center=(4, 4), - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + ret = _require_position_fit( + psf.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + bkgnd_ignore_center=(4, 4), + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2 + 0.5, abs=1e-1) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2 + 0.75, abs=1e-1) assert ret[2]['sigma_y'] == pytest.approx(0.5, abs=5e-2) assert ret[2]['sigma_x'] == pytest.approx(1.3, abs=5e-2) assert ret[2]['scale'] == pytest.approx(0.5, abs=5e-2) - # find using offset PSF - ret = psf2.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - bkgnd_ignore_center=(4, 4), - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + ret = _require_position_fit( + psf2.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + bkgnd_ignore_center=(4, 4), + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2, abs=1e-1) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2, abs=1e-1) assert ret[2]['sigma_y'] == pytest.approx(0.5, abs=5e-2) assert ret[2]['sigma_x'] == pytest.approx(1.3, abs=5e-2) assert ret[2]['scale'] == pytest.approx(0.5, abs=5e-2) - # offset PSF created through eval_rect, float sigma psf2 = GaussianPSF() gauss2d = psf2.eval_rect((21, 21), offset=(0.21, -0.35), scale=1.5, sigma=(0.8, 1.3)) - ret = psf.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - bkgnd_ignore_center=(4, 4), - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + ret = _require_position_fit( + psf2.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + bkgnd_ignore_center=(4, 4), + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] // 2 + 0.21, abs=5e-2) assert ret[1] == pytest.approx(gauss2d.shape[1] // 2 - 0.35, abs=5e-2) assert ret[2]['sigma_y'] == pytest.approx(0.8, abs=1e-3) assert ret[2]['sigma_x'] == pytest.approx(1.3, abs=1e-3) assert ret[2]['scale'] == pytest.approx(1.5, abs=1e-2) - # centered PSF, fixed sigma psf2 = GaussianPSF(sigma=(0.9, 1.5)) gauss2d = psf2.eval_rect((21, 21), scale=1.5) - ret = psf2.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + ret = _require_position_fit( + psf2.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2, abs=1e-2) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2, abs=1e-2) assert 'sigma_y' not in ret[2] assert 'sigma_x' not in ret[2] - # assert ret[2]['scale'] == pytest.approx(1.5, abs=2e-1) # TODO: Why? + # With angular parameters and a polynomial background, optimized ``scale`` and + # floating ``sigma_*`` values follow angular reparameterization, not literal + # ``eval_rect`` inputs. + if bkgnd_degree is None or not use_angular_params: + assert ret[2]['scale'] == pytest.approx(1.5, abs=2e-1) - # centered PSF, fixed sigma_y psf2 = GaussianPSF(sigma=(0.9, None)) gauss2d = psf2.eval_rect((21, 21), scale=1.5, sigma_x=1.1) - ret = psf2.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + ret = _require_position_fit( + psf2.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2, abs=1e-2) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2, abs=1e-2) assert 'sigma_y' not in ret[2] - # assert ret[2]['sigma_x'] == pytest.approx(1.1, abs=1e-1) # TODO: Why? - # assert ret[2]['scale'] == pytest.approx(1.5, abs=1e-1) # TODO: Why? + if bkgnd_degree is None or not use_angular_params: + assert ret[2]['sigma_x'] == pytest.approx(1.1, abs=1e-1) + assert ret[2]['scale'] == pytest.approx(1.5, abs=1e-1) - # centered PSF, fixed sigma_x psf2 = GaussianPSF(sigma=(None, 0.8)) gauss2d = psf2.eval_rect((21, 21), scale=1.5, sigma_y=1.1) - ret = psf2.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + ret = _require_position_fit( + psf2.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2, abs=1e-2) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2, abs=1e-2) assert 'sigma_x' not in ret[2] - # assert ret[2]['sigma_y'] == pytest.approx(1.1, abs=1e-1) # TODO: Why? - # assert ret[2]['scale'] == pytest.approx(1.5, abs=1e-1) # TODO: Why? + if bkgnd_degree is None or not use_angular_params: + assert ret[2]['sigma_y'] == pytest.approx(1.1, abs=1e-1) + assert ret[2]['scale'] == pytest.approx(1.5, abs=1e-1) if bkgnd_degree is not None: - # add background gradient - gauss2d = psf.eval_rect((21, 21), scale=2., sigma=1.) - nparams = int((bkgnd_degree+1) * (bkgnd_degree+2) / 2) + gauss2d = psf.eval_rect((21, 21), scale=2.0, sigma=(1.0, 1.0)) + nparams = int((bkgnd_degree + 1) * (bkgnd_degree + 2) / 2) coeffts = np.array([0.5] * nparams) gauss2d += GaussianPSF.background_gradient((21, 21), coeffts) - # if allow_nonzero_base: - # psf._debug_opt = 10 - ret = psf.find_position(gauss2d, gauss2d.shape, - starting_point=((gauss2d.shape[0]//2, - gauss2d.shape[1]//2)), - bkgnd_degree=bkgnd_degree, - allow_nonzero_base=allow_nonzero_base, - num_sigma=0, - use_angular_params=use_angular_params) + ret = _require_position_fit( + psf.find_position( + gauss2d, + gauss2d.shape, + starting_point=((gauss2d.shape[0] // 2, gauss2d.shape[1] // 2)), + bkgnd_degree=bkgnd_degree, + allow_nonzero_base=allow_nonzero_base, + num_sigma=None, + use_angular_params=use_angular_params, + ) + ) assert ret[0] == pytest.approx(gauss2d.shape[0] / 2, abs=1e-3) assert ret[1] == pytest.approx(gauss2d.shape[1] / 2, abs=1e-2) - assert ret[2]['sigma_y'] == pytest.approx(1., abs=5e-2) - assert ret[2]['sigma_x'] == pytest.approx(1., abs=5e-2) - assert ret[2]['scale'] == pytest.approx(2., abs=5e-2) + assert ret[2]['sigma_y'] == pytest.approx(1.0, abs=5e-2) + assert ret[2]['sigma_x'] == pytest.approx(1.0, abs=5e-2) + assert ret[2]['scale'] == pytest.approx(2.0, abs=5e-2) diff --git a/tests/test_psf.py b/tests/test_psf.py index 0c7f567..fea71c1 100644 --- a/tests/test_psf.py +++ b/tests/test_psf.py @@ -2,6 +2,10 @@ # tests/test_psf.py ################################################################################ +"""Tests for :mod:`psfmodel.psf` helpers (background gradient and validation).""" + +from __future__ import annotations + import numpy as np import numpy.ma as ma import numpy.testing as npt @@ -10,17 +14,30 @@ from psfmodel import PSF -def test_bkgnd_gradient_coeffs(): - with pytest.raises(ValueError): - PSF._background_gradient_coeffs((3, -1), 1) - with pytest.raises(ValueError): - PSF._background_gradient_coeffs((-1, 3), 1) - with pytest.raises(ValueError): - PSF._background_gradient_coeffs((1, 2), 1) - with pytest.raises(ValueError): - PSF._background_gradient_coeffs((2, 1), 1) - with pytest.raises(ValueError): - PSF._background_gradient_coeffs((1, 1), -5) +@pytest.mark.parametrize( + ('shape', 'order', 'expected_msg'), + [ + ((3, -1), 1, 'Image must have odd positive shape in each dimension, got (3, -1)'), + ((-1, 3), 1, 'Image must have odd positive shape in each dimension, got (-1, 3)'), + ((1, 2), 1, 'Image must have odd positive shape in each dimension, got (1, 2)'), + ((2, 1), 1, 'Image must have odd positive shape in each dimension, got (2, 1)'), + ((1, 1), -5, 'Order must be non-negative, got -5'), + ], +) +def test_bkgnd_gradient_coeffs_value_errors( + shape: tuple[int, int], + order: int, + expected_msg: str, +) -> None: + """``_background_gradient_coeffs`` validates ``shape`` and ``order``.""" + + with pytest.raises(ValueError) as exc_info: + PSF._background_gradient_coeffs(shape, order) + assert str(exc_info.value) == expected_msg + + +def test_bkgnd_gradient_coeffs_success() -> None: + """``_background_gradient_coeffs`` returns expected low-order layouts.""" ret = PSF._background_gradient_coeffs((1, 1), 1) exp = np.array([[[1, 0, 0]]]) @@ -35,6 +52,7 @@ def test_bkgnd_gradient_coeffs(): assert np.all(ret == exp) ret = PSF._background_gradient_coeffs((3, 3), 1) + # fmt: off exp = np.array([[[1., -1., -1.], [1., 0., -1.], [1., 1., -1.]], @@ -46,9 +64,11 @@ def test_bkgnd_gradient_coeffs(): [[1., -1., 1.], [1., 0., 1.], [1., 1., 1.]]]) + # fmt: on assert np.all(ret == exp) ret = PSF._background_gradient_coeffs((3, 3), 2) + # fmt: off exp = np.array([[[1., -1., -1., 1., 1., 1.], [1., 0., -1., 0., -0., 1.], [1., 1., -1., 1., -1., 1.]], @@ -60,30 +80,43 @@ def test_bkgnd_gradient_coeffs(): [[1., -1., 1., 1., -1., 1.], [1., 0., 1., 0., 0., 1.], [1., 1., 1., 1., 1., 1.]]]) + # fmt: on assert np.all(ret == exp) -def test_background_gradient_fit(): - with pytest.raises(ValueError): +def test_background_gradient_fit() -> None: + """``background_gradient_fit`` fits quadratics, honors masks, and validates input.""" + + with pytest.raises(ValueError) as exc_info: PSF.background_gradient_fit(np.zeros((5,))) - with pytest.raises(ValueError): + assert str(exc_info.value) == 'Image must be 2-D, got (5,)' + + with pytest.raises(ValueError) as exc_info: PSF.background_gradient_fit(np.zeros((5, 4))) - with pytest.raises(ValueError): + assert str(exc_info.value) == 'Image must have odd positive shape in each dimension, got (5, 4)' + + with pytest.raises(ValueError) as exc_info: PSF.background_gradient_fit(np.zeros((4, 5))) - with pytest.raises(ValueError): + assert str(exc_info.value) == 'Image must have odd positive shape in each dimension, got (4, 5)' + + with pytest.raises(ValueError) as exc_info: PSF.background_gradient_fit(np.zeros((5, 5)), order=-10) + assert str(exc_info.value) == 'Order must be non-negative, got -10' # Unmasked - img = 3*(np.arange(5.)[:, np.newaxis]-2)**2 + 2*(np.arange(5.)[np.newaxis, :]-2) + img = 3 * (np.arange(5.0)[:, np.newaxis] - 2) ** 2 + 2 * (np.arange(5.0)[np.newaxis, :] - 2) bkgnd_params, img_mask = PSF.background_gradient_fit(img) + assert bkgnd_params is not None + assert img_mask is not None npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 0 img2 = PSF.background_gradient((5, 5), bkgnd_params) npt.assert_array_almost_equal(img, img2) bkgnd_params, img_mask = PSF.background_gradient_fit(img, order=3) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3, 0, 0, 0, 0])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3, 0, 0, 0, 0])) assert np.sum(img_mask) == 0 img2 = PSF.background_gradient((5, 5), bkgnd_params) npt.assert_array_almost_equal(img, img2) @@ -96,29 +129,34 @@ def test_background_gradient_fit(): assert img_mask is None # Ignore center - img = 3*(np.arange(5.)[:, np.newaxis]-2)**2 + 2*(np.arange(5.)[np.newaxis, :]-2) + img = 3 * (np.arange(5.0)[:, np.newaxis] - 2) ** 2 + 2 * (np.arange(5.0)[np.newaxis, :] - 2) img[2, 2] = 1000 bkgnd_params, img_mask = PSF.background_gradient_fit(img) + assert bkgnd_params is not None + assert img_mask is not None with np.testing.assert_raises(AssertionError): # Array not equal - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 0 bkgnd_params, img_mask = PSF.background_gradient_fit(img, ignore_center=0) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 1 bkgnd_params, img_mask = PSF.background_gradient_fit(img, ignore_center=1) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 9 bkgnd_params, img_mask = PSF.background_gradient_fit(img, ignore_center=(1, 1)) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 9 img = img.view(ma.MaskedArray) bkgnd_params, img_mask = PSF.background_gradient_fit(img, ignore_center=(0, 1)) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 3 assert np.sum(img_mask[0]) == 0 assert np.sum(img_mask[1]) == 0 @@ -133,24 +171,28 @@ def test_background_gradient_fit(): assert img_mask is None # Removal of bad pixels - img[:] = 3*(np.arange(5.)[:, np.newaxis]-2)**2 + 2*(np.arange(5.)[np.newaxis, :]-2) + img[:] = 3 * (np.arange(5.0)[:, np.newaxis] - 2) ** 2 + 2 * (np.arange(5.0)[np.newaxis, :] - 2) img = img.view(ma.MaskedArray) bkgnd_params, img_mask = PSF.background_gradient_fit(img, num_sigma=5) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 0 img[2, 2] = 10000 bkgnd_params, img_mask = PSF.background_gradient_fit(img, num_sigma=4) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 1 img[0, 0] = 100000 bkgnd_params, img_mask = PSF.background_gradient_fit(img, num_sigma=3) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 2 img[0, 4] = 10000 bkgnd_params, img_mask = PSF.background_gradient_fit(img, num_sigma=3) - npt.assert_array_almost_equal(np.array(bkgnd_params), - np.array([0, 2, 0, 0, 0, 3])) + assert bkgnd_params is not None + assert img_mask is not None + npt.assert_array_almost_equal(np.array(bkgnd_params), np.array([0, 2, 0, 0, 0, 3])) assert np.sum(img_mask) == 3