Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Exclude non-essential content from the Docker build context.
# Keep only what ``pip install .`` needs: pyproject.toml, README.md, LICENSE, qpx/.

.git/
.github/
.pytest_cache/
.hypothesis/
.ruff_cache/
.tmp/
.vscode/
.windsurf/
.idea/
__pycache__/
*.pyc
*.pyo
*.egg-info/
build/
dist/
.coverage
coverage.xml
.DS_Store
.venv/
venv/
env/

# Project-specific large/non-essential dirs
tests/
docs/
site/
benchmarks/
scripts/
data/

# Docker-related files are not needed inside the image
Dockerfile
.dockerignore
88 changes: 88 additions & 0 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Build and publish the QPX container image to GitHub Container Registry (GHCR).
#
# Triggers:
# - push to main -> publishes the `dev` tag (floating)
# - push tag v*.*.* -> publishes semantic version tags + `latest`
# - pull_request to main -> build only (smoke-test the Dockerfile)
# - workflow_dispatch -> manual run
#
# Image: ghcr.io/${owner}/qpx (expands to ghcr.io/bigbio/qpx)
# Platform: linux/amd64 (multi-arch can be added later)
#
# Repo settings required:
# Settings -> Actions -> General -> Workflow permissions -> "Read and write permissions"
# (the default GITHUB_TOKEN is used to authenticate against GHCR)

name: Publish Docker image

on:
push:
branches: [main]
tags: ["v*.*.*"]
pull_request:
branches: [main]
workflow_dispatch:

permissions:
contents: read
packages: write
id-token: write

jobs:
docker:
name: Build and push to GHCR
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
fetch-tags: true

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Log in to GHCR
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Resolve QPX version for build-arg
id: ver
run: |
if [[ "${GITHUB_REF}" == refs/tags/v* ]]; then
echo "qpx_version=${GITHUB_REF#refs/tags/v}" >> "$GITHUB_OUTPUT"
else
echo "qpx_version=0.0.0.dev0+$(git rev-parse --short HEAD)" >> "$GITHUB_OUTPUT"
fi

- name: Extract image metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository_owner }}/qpx
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=ref,event=pr
type=raw,value=dev,enable=${{ github.ref == 'refs/heads/main' && github.event_name == 'push' }}
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/v') }}
type=sha,format=short

- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
platforms: linux/amd64
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
QPX_VERSION=${{ steps.ver.outputs.qpx_version }}
provenance: true
cache-from: type=gha
cache-to: type=gha,mode=max
39 changes: 39 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# syntax=docker/dockerfile:1.6
#
# QPX container image
# -------------------
# Provides the ``qpxc`` CLI built from this repository's source, with the
# optional ``[mudata]`` extras pre-installed for MuData export.
#
# Build:
# docker build -t ghcr.io/bigbio/qpx:dev .
#
# Run:
# docker run --rm -v $(pwd):/data ghcr.io/bigbio/qpx:dev convert diann --help

FROM python:3.11-slim-bookworm

# Runtime system deps required by pyOpenMS (see environment.yml).
RUN apt-get update \
&& apt-get install -y --no-install-recommends libglib2.0-0=2.74.6-2+deb12u8 procps=2:4.0.2-3 \
&& rm -rf /var/lib/apt/lists/*

# hatch-vcs derives the version from git history; when the build context lacks
# a .git directory (typical Docker builds), fall back to this placeholder.
ARG QPX_VERSION=0.0.0.dev0
ENV SETUPTOOLS_SCM_PRETEND_VERSION=${QPX_VERSION}

# Minimal build inputs required by ``pip install .`` under hatchling.
WORKDIR /src
COPY pyproject.toml README.md LICENSE ./
COPY qpx ./qpx

RUN pip install --no-cache-dir --upgrade pip==24.0 \

Check warning on line 31 in Dockerfile

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

Dockerfile#L31

Pin versions in pip. Instead of `pip install <package>` use `pip install <package>==<version>` or `pip install --requirement <requirements file>`
&& pip install --no-cache-dir ".[mudata]" \
&& rm -rf /src

LABEL org.opencontainers.image.source="https://github.com/bigbio/qpx"
LABEL org.opencontainers.image.description="QPX: Quantitative Proteomics Parquet toolkit (qpxc CLI) with MuData export"
LABEL org.opencontainers.image.licenses="Apache-2.0"

WORKDIR /data
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ mzidentml = ["lxml>=4.9.0"]
transforms = ["biopython", "mygene>=1.0.0", "anndata>=0.9.0"]
plotting = ["plotly>=5.0.0", "scikit-learn>=1.5.0"]
quantify = ["mokume>=0.1.0", "directlfq"]
all = ["lxml>=4.9.0", "biopython", "mygene>=1.0.0", "anndata>=0.9.0", "plotly>=5.0.0", "scikit-learn>=1.5.0", "mokume>=0.1.0", "directlfq"]
mudata = ["mudata>=0.2.4", "anndata>=0.9.0"]
all = ["lxml>=4.9.0", "biopython", "mygene>=1.0.0", "anndata>=0.9.0", "plotly>=5.0.0", "scikit-learn>=1.5.0", "mokume>=0.1.0", "directlfq", "mudata>=0.2.4"]
dev = [
"pytest",
"pytest-timeout",
"hypothesis",
"lxml>=4.9.0",
"anndata",
"mudata>=0.2.4",
"scikit-learn",
"plotly>=5.0.0",
"pre-commit",
Expand Down
107 changes: 107 additions & 0 deletions qpx/cli/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
qpxc convert maxquant — MaxQuant output to QPX
qpxc convert fragpipe — FragPipe output to QPX
qpxc convert mzidentml — mzIdentML (incl. XL-MS 1.3) to QPX
qpxc convert openms — OpenMS -out_qpx enrichment to full QPX
qpxc convert sdrf — SDRF to sample.parquet + run.parquet
"""

Expand All @@ -20,6 +21,8 @@

import click

from qpx.converters.openms import OpenMSConverter

logger = logging.getLogger("qpx.cli.convert")


Expand Down Expand Up @@ -263,6 +266,11 @@
show_default=True,
help="Parquet compression codec.",
)
@click.option(
"--diann-log",
help="DIA-NN summary log file (version auto-detected from first line)",
type=click.Path(exists=True, dir_okay=False, path_type=Path),
)
@click.option("--verbose", help="Enable verbose logging", is_flag=True)
def convert_diann_cmd(
report_path: Path,
Expand All @@ -281,6 +289,7 @@
project_accession: Optional[str],
enrich_pride: bool,
compression: str,
diann_log: Optional[Path],
verbose: bool,
):
"""Convert DIA-NN report to QPX format.
Expand Down Expand Up @@ -321,6 +330,7 @@
duckdb_max_memory=duckdb_max_memory,
duckdb_threads=duckdb_threads,
compression=compression,
diann_log=str(diann_log) if diann_log else None,
)
converter.convert_features(
mzml_info_folder=mzml_info_folder,
Expand Down Expand Up @@ -779,6 +789,103 @@
click.echo(f"mzIdentML conversion complete. Output: {output_folder}")


# ---------------------------------------------------------------------------
# OpenMS
# ---------------------------------------------------------------------------


@convert.command("openms")
@click.option(
"--qpx-dir",
help="Directory containing OpenMS -out_qpx parquet files (*.psm.parquet, *.feature.parquet, *.pg.parquet)",
required=True,
type=click.Path(exists=True, file_okay=False, path_type=Path),
)
@click.option(
"--sdrf-file",
help="SDRF metadata file path (for sample/run generation)",
required=True,
Comment on lines +806 to +807
Copy link

Copilot AI Apr 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The openms CLI command requires --sdrf-file, but OpenMSConverter supports running without SDRF (and there’s a test asserting sample/run are skipped when SDRF is absent). Either make --sdrf-file optional (and update help/examples accordingly), or remove the no-SDRF codepath/tests so the CLI and converter behavior stay consistent.

Suggested change
help="SDRF metadata file path (for sample/run generation)",
required=True,
help="Optional SDRF metadata file path; if omitted, sample/run generation is skipped",
required=False,

Copilot uses AI. Check for mistakes.
type=click.Path(exists=True, dir_okay=False, path_type=Path),
)
@click.option(
"--output-folder",
help="Output directory for the full QPX dataset",
required=True,
type=click.Path(file_okay=False, path_type=Path),
)
@click.option(
"--output-prefix",
help="Prefix for output file names",
default="openms",
)
@click.option(
"--project-accession",
help="PRIDE / ProteomeXchange accession (e.g. PXD001819)",
)
@click.option(
"--enrich-pride",
help="Fetch project metadata from PRIDE API after conversion",
is_flag=True,
default=False,
)
@click.option(
"--compression",
type=click.Choice(["zstd", "snappy", "gzip", "none"], case_sensitive=False),
default="zstd",
show_default=True,
help="Parquet compression codec.",
)
@click.option("--verbose", help="Enable verbose logging", is_flag=True)
def convert_openms_cmd(**kwargs):
r"""Enrich OpenMS ProteomicsLFQ -out_qpx output into a full QPX dataset.

Check notice on line 840 in qpx/cli/convert.py

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

qpx/cli/convert.py#L840

Multi-line docstring summary should start at the second line (D213)

Validates the existing psm/feature/pg parquet files, copies them to the
output folder, and generates the missing metadata tables (run, sample,
ontology, provenance, dataset) from the SDRF file.

\b
Examples:
# Enrich OpenMS QPX output
qpxc convert openms \\
--qpx-dir ./openms_qpx_output \\
--sdrf-file metadata.sdrf.tsv \\
--output-folder ./qpx_full

# With project accession
qpxc convert openms \\
--qpx-dir ./openms_qpx_output \\
--sdrf-file metadata.sdrf.tsv \\
--output-folder ./qpx_full \\
--project-accession PXD001819
"""
qpx_dir = kwargs["qpx_dir"]
sdrf_file = kwargs["sdrf_file"]
output_folder = kwargs["output_folder"]
output_prefix = kwargs["output_prefix"]
project_accession = kwargs["project_accession"]
enrich_pride = kwargs["enrich_pride"]
compression = kwargs["compression"]
verbose = kwargs["verbose"]

if verbose:
logging.getLogger().setLevel(logging.DEBUG)

converter = OpenMSConverter(
qpx_dir=qpx_dir,
sdrf_path=sdrf_file,
compression=compression,
)
converter.convert(
output_folder=output_folder,
output_prefix=output_prefix,
project_accession=project_accession,
)

_maybe_enrich_pride(output_folder, project_accession, enrich_pride)

click.echo(f"OpenMS QPX enrichment complete. Output: {output_folder}")


# ---------------------------------------------------------------------------
# SDRF
# ---------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions qpx/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from qpx.converters.maxquant.pg_adapter import MaxQuantPgAdapter
from qpx.converters.maxquant.psm_adapter import MaxQuantPsmAdapter
from qpx.converters.mzidentml.psm_adapter import MzIdentMLPsmAdapter
from qpx.converters.openms.converter import OpenMSConverter
from qpx.converters.orchestrator import BaseOrchestrator, build_dataset_record
from qpx.converters.quantms.converter import QuantMSConverter
from qpx.converters.quantms.feature_adapter import QuantmsFeatureAdapter
Expand Down Expand Up @@ -55,4 +56,6 @@
"FragPipeConverter",
# mzIdentML
"MzIdentMLPsmAdapter",
# OpenMS
"OpenMSConverter",
]
Loading
Loading