diff --git a/ALIGNMENT_INTEGRATION_WORKFLOW.md b/ALIGNMENT_INTEGRATION_WORKFLOW.md new file mode 100644 index 0000000..04f152d --- /dev/null +++ b/ALIGNMENT_INTEGRATION_WORKFLOW.md @@ -0,0 +1,416 @@ +# SCORE_ALIGNMENT Integration Workflow + +## Overview + +This diagram illustrates how the SCORE_ALIGNMENT integration works to recover peaks with weak MS2 signals but good alignment scores. + +## High-Level Workflow + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ PyProphet Export Command │ +│ pyprophet export tsv --in data.osw --out results.tsv │ +│ (use_alignment=True by default) │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 1. Configuration Check │ +│ • use_alignment = True (default) │ +│ • max_alignment_pep = 0.7 (default) │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 2. Auto-Detection Phase │ +│ │ +│ OSW Files: │ +│ ├─ Check FEATURE_MS2_ALIGNMENT table exists? │ +│ └─ Check SCORE_ALIGNMENT table exists? │ +│ │ +│ Parquet Files: │ +│ └─ Check for {basename}_feature_alignment.parquet? │ +│ │ +│ Split Parquet Files: │ +│ └─ Check for {infile}/feature_alignment.parquet? │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ + ┌─────────────┴─────────────┐ + │ │ + ┌─────────▼──────────┐ ┌──────────▼─────────┐ + │ Alignment Present │ │ Alignment Missing │ + │ use_alignment=T │ │ use_alignment=T │ + └─────────┬──────────┘ └──────────┬─────────┘ + │ │ + │ ↓ + │ ┌────────────────────────┐ + │ │ Standard Export Only │ + │ │ (no alignment used) │ + │ └────────────────────────┘ + │ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 3. Data Reading Phase │ +│ │ +│ Step A: Fetch Base Features (MS2 QVALUE filter) │ +│ ┌───────────────────────────────────────────────────────────────┐ │ +│ │ SELECT CAST(FEATURE.ID AS INTEGER) AS id, │ │ +│ │ ... (other columns) │ │ +│ │ FROM FEATURES │ │ +│ │ WHERE SCORE_MS2.QVALUE < max_rs_peakgroup_qvalue (e.g., 0.05)│ │ +│ │ → Base Features (passed MS2 threshold) │ │ +│ │ → Mark with from_alignment=0 │ │ +│ │ → CAST preserves precision for large feature IDs │ │ +│ └───────────────────────────────────────────────────────────────┘ │ +│ │ +│ Step B: Fetch Aligned Features (Alignment PEP filter) │ +│ ┌───────────────────────────────────────────────────────────────┐ │ +│ │ SELECT DENSE_RANK() OVER (...) AS alignment_group_id, │ │ +│ │ ALIGNED_FEATURE_ID AS id, │ │ +│ │ CAST(REFERENCE_FEATURE_ID AS INTEGER) │ │ +│ │ AS alignment_reference_feature_id, │ │ +│ │ REFERENCE_RT AS alignment_reference_rt │ │ +│ │ FROM FEATURE_MS2_ALIGNMENT │ │ +│ │ JOIN SCORE_ALIGNMENT │ │ +│ │ WHERE LABEL = 1 (target) │ │ +│ │ AND SCORE_ALIGNMENT.PEP < max_alignment_pep (e.g., 0.7) │ │ +│ │ AND REF FEATURE passes MS2 QVALUE threshold │ │ +│ │ → Aligned Features (good alignment scores) │ │ +│ │ → Includes alignment_group_id and reference info │ │ +│ │ → CAST preserves precision for large feature IDs │ │ +│ └───────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 4. Feature Recovery Logic │ +│ │ +│ ┌─────────────────┐ ┌──────────────────┐ │ +│ │ Base Features │ │ Aligned Features │ │ +│ │ (MS2 passed) │ │ (Alignment good) │ │ +│ │ IDs: 1,2,3,4,5 │ │ IDs: 3,4,6,7,8 │ │ +│ └────────┬────────┘ └────────┬─────────┘ │ +│ │ │ │ +│ └──────────┬───────────────┘ │ +│ ↓ │ +│ ┌──────────────────────┐ │ +│ │ Find NEW features: │ │ +│ │ aligned - base │ │ +│ │ = {6, 7, 8} │ │ +│ └──────────┬───────────┘ │ +│ ↓ │ +│ ┌──────────────────────────────────────┐ │ +│ │ Fetch full data for recovered │ │ +│ │ features: 6, 7, 8 │ │ +│ │ Mark: from_alignment=1 │ │ +│ │ Add: alignment_pep │ │ +│ │ Add: alignment_qvalue │ │ +│ │ Add: alignment_group_id │ │ +│ │ Add: alignment_reference_feature_id │ │ +│ │ Add: alignment_reference_rt │ │ +│ └──────────┬───────────────────────────┘ │ +│ ↓ │ +│ ┌──────────────────────────────────────┐ │ +│ │ Assign alignment_group_id to │ │ +│ │ reference features │ │ +│ │ (features pointed to by aligned IDs) │ │ +│ └──────────┬───────────────────────────┘ │ +│ ↓ │ +│ ┌──────────────────────┐ │ +│ │ Combine: │ │ +│ │ Base (1,2,3,4,5) + │ │ +│ │ Recovered (6,7,8) │ │ +│ │ = Final (1-8) │ │ +│ └──────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 5. Export Results │ +│ │ +│ Final TSV/Matrix includes: │ +│ • Original features (from_alignment=0) │ +│ • Recovered features (from_alignment=1, with alignment scores) │ +│ • More complete quantification with fewer missing values │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Detailed Component Workflow + +### A. Reader Classes (OSW, Parquet, Split Parquet) + +``` +┌──────────────────────────────────────────────────────────────┐ +│ Reader.__init__() │ +│ │ +│ OSWReader: │ +│ N/A - checks at read time │ +│ │ +│ ParquetReader: │ +│ self._has_alignment = _check_alignment_file_exists() │ +│ • Checks: {basename}_feature_alignment.parquet │ +│ │ +│ SplitParquetReader: │ +│ self._has_alignment = _check_alignment_file_exists() │ +│ • Checks: {infile}/feature_alignment.parquet │ +└──────────────────────────────────────────────────────────────┘ + ↓ +┌──────────────────────────────────────────────────────────────┐ +│ Reader.read() │ +│ │ +│ → _read_standard_data() │ +│ if config.use_alignment AND alignment_present: │ +│ → _fetch_alignment_features() │ +│ → Merge with base features │ +└──────────────────────────────────────────────────────────────┘ +``` + +### B. Alignment Detection Methods + +``` +OSW Files (.osw): +┌─────────────────────────────────────────┐ +│ _check_alignment_presence(con) │ +│ │ +│ return: │ +│ check_sqlite_table( │ +│ con, "FEATURE_MS2_ALIGNMENT" │ +│ ) AND │ +│ check_sqlite_table( │ +│ con, "SCORE_ALIGNMENT" │ +│ ) │ +└─────────────────────────────────────────┘ + +Parquet Files (.parquet): +┌─────────────────────────────────────────┐ +│ _check_alignment_file_exists() │ +│ │ +│ if infile.endswith('.parquet'): │ +│ base = infile[:-8] │ +│ alignment_file = │ +│ f"{base}_feature_alignment.parquet"│ +│ return os.path.exists(alignment_file)│ +└─────────────────────────────────────────┘ + +Split Parquet Files (directory with .oswpq): +┌─────────────────────────────────────────┐ +│ _check_alignment_file_exists() │ +│ │ +│ if os.path.isdir(infile): │ +│ alignment_file = os.path.join( │ +│ infile, "feature_alignment.parquet"│ +│ ) │ +│ return os.path.exists(alignment_file)│ +└─────────────────────────────────────────┘ +``` + +### C. Feature Recovery Decision Tree + +``` + Start Export + │ + ↓ + ┌──────────────────────┐ + │ use_alignment=True? │ + └──────────┬───────────┘ + │ + ┌─────────────┴─────────────┐ + │ │ + YES NO + │ │ + ↓ ↓ + ┌──────────────┐ ┌──────────────┐ + │ Alignment │ │ Standard │ + │ data exists? │ │ Export Only │ + └──────┬───────┘ └──────────────┘ + │ + ┌─────┴─────┐ + │ │ + YES NO + │ │ + ↓ ↓ +┌─────────┐ ┌─────────┐ +│ Use │ │Standard │ +│Alignment│ │Export │ +└─────────┘ └─────────┘ + │ │ + └─────┬─────┘ + ↓ + Export Results +``` + +## Example Scenario + +### Before Alignment Integration: + +``` +Run 1: Feature detected with MS2 QVALUE = 0.02 ✓ (exported) +Run 2: Feature detected with MS2 QVALUE = 0.08 ✗ (not exported - weak signal) +Run 3: Feature detected with MS2 QVALUE = 0.03 ✓ (exported) + +Result: Missing quantification in Run 2 +``` + +### After Alignment Integration: + +``` +Run 1: Feature detected with MS2 QVALUE = 0.02 ✓ (exported, from_alignment=0) +Run 2: Feature detected with MS2 QVALUE = 0.08 ✗ (weak MS2) + BUT: Alignment PEP = 0.4 ✓ (good alignment!) + → Recovered via alignment (exported, from_alignment=1) +Run 3: Feature detected with MS2 QVALUE = 0.03 ✓ (exported, from_alignment=0) + +Result: Complete quantification across all runs +``` + +## File Structure Examples + +### OSW Format: +``` +data.osw (SQLite database) +├─ FEATURE_MS2_ALIGNMENT table +└─ SCORE_ALIGNMENT table +``` + +### Parquet Format: +``` +data.parquet ← Main file +data_feature_alignment.parquet ← Alignment file +``` + +### Split Parquet Format: +``` +experiment/ +├─ run1.oswpq/ +│ ├─ precursors_features.parquet +│ └─ transition_features.parquet +├─ run2.oswpq/ +│ ├─ precursors_features.parquet +│ └─ transition_features.parquet +└─ feature_alignment.parquet ← Alignment file (parent level) +``` + +## Key Benefits + +1. **Increased Coverage**: Recovers peaks with weak MS2 but good alignment +2. **Better Quantification**: Fewer missing values in matrices +3. **Quality Control**: Uses alignment PEP/QVALUE thresholds +4. **Backwards Compatible**: Disabled by default via auto-detection +5. **Transparent**: Features marked with `from_alignment` flag + +## Configuration Options + +```bash +# Use default (enabled with auto-detection) +pyprophet export tsv --in data.osw --out results.tsv + +# Customize threshold +pyprophet export tsv --in data.osw --out results.tsv \ + --max_alignment_pep 0.5 + +# Explicitly disable +pyprophet export tsv --in data.osw --out results.tsv \ + --no-use_alignment +``` + +## Output Columns + +Recovered features include additional columns: + +- `from_alignment`: 0 (base) or 1 (recovered) +- `alignment_pep`: Alignment posterior error probability +- `alignment_qvalue`: Alignment q-value +- `alignment_group_id`: Group identifier linking aligned features together +- `alignment_reference_feature_id`: ID of the reference feature used for alignment +- `alignment_reference_rt`: Retention time of the reference feature + +These allow users to: +- Identify which features were recovered +- Assess alignment quality +- Track which features are aligned together via `alignment_group_id` +- Find the reference feature that was used for alignment +- Filter or analyze separately if needed + +## Technical Implementation Details + +### Precision Preservation for Large Feature IDs + +Large integer feature IDs (e.g., `5,405,272,318,039,692,409`) require special handling to prevent precision loss during database operations and pandas DataFrame creation. + +#### The Problem +- Feature IDs can exceed 2^53, the maximum integer that float64 can represent precisely +- When pandas reads INTEGER columns from databases without explicit typing, it may infer float64 dtype +- This causes precision loss: `5,405,272,318,039,692,409` → `5,405,272,318,039,692,288` + +#### The Solution +SQL queries use explicit CAST operations in SELECT clauses (but NOT in JOIN conditions): + +```sql +-- OSW (SQLite) +SELECT CAST(FEATURE.ID AS INTEGER) AS id, + CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) AS alignment_reference_feature_id +FROM ... + +-- Parquet (DuckDB) +SELECT CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID +FROM ... +``` + +**Key Design Principles:** +1. **CAST in SELECT**: Ensures pandas reads columns as integers, preserving precision +2. **No CAST in JOIN**: Database can use indexes for fast lookups (~16 seconds vs 50 minutes) +3. **Post-query conversion**: After reading, convert to pandas Int64 dtype for nullable integer support + +```python +# After reading from database +if "alignment_reference_feature_id" in df.columns: + df["alignment_reference_feature_id"] = df["alignment_reference_feature_id"].astype("Int64") +if "id" in data.columns: + data["id"] = data["id"].astype("Int64") +``` + +### Alignment Group ID Assignment + +The `alignment_group_id` is computed using `DENSE_RANK()` to assign a unique identifier to each alignment group: + +```sql +SELECT DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS alignment_group_id, + ALIGNED_FEATURE_ID AS id, + REFERENCE_FEATURE_ID AS alignment_reference_feature_id +FROM FEATURE_MS2_ALIGNMENT +``` + +#### Assigning Group IDs to Reference Features + +Reference features (those that aligned features point to) also need to receive their `alignment_group_id`. This is handled in post-processing: + +```python +# 1. Extract mapping: reference_feature_id -> alignment_group_id +ref_mapping = data[ + data["alignment_reference_feature_id"].notna() +][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() + +# 2. Create reverse mapping: id -> alignment_group_id for references +ref_group_mapping = ref_mapping.rename( + columns={"alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id"} +) + +# 3. Merge to assign group IDs to reference features +data = pd.merge(data, ref_group_mapping, on="id", how="left") + +# 4. Fill in alignment_group_id where it's null but ref_alignment_group_id exists +mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() +data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] +``` + +**Result:** All features in an alignment group (both aligned and reference features) share the same `alignment_group_id`, enabling: +- Tracking which features are aligned together +- Identifying the reference feature for each alignment group +- Analyzing alignment quality across related features + +### Performance Considerations + +| Approach | Query Time | Precision | Index Usage | +|----------|-----------|-----------|-------------| +| No CAST | ~16 sec | ❌ Lost | ✅ Yes | +| CAST in JOIN | ~50 min | ✅ Preserved | ❌ No | +| CAST in SELECT | ~16 sec | ✅ Preserved | ✅ Yes | + +**Conclusion:** CAST in SELECT clause provides both precision preservation and optimal performance. diff --git a/pyprophet/_config.py b/pyprophet/_config.py index e5b71f5..4814fe7 100644 --- a/pyprophet/_config.py +++ b/pyprophet/_config.py @@ -649,6 +649,8 @@ class ExportIOConfig(BaseIOConfig): max_global_peptide_qvalue (float): Filter results to maximum global peptide-level q-value. protein (bool): Append protein-level error-rate estimates if available. max_global_protein_qvalue (float): Filter results to maximum global protein-level q-value. + use_alignment (bool): Use alignment results to recover peaks with good alignment scores if alignment data is present (default: True). + max_alignment_pep (float): Maximum PEP to consider for good alignments when use_alignment is True (default: 0.7). # Quantification matrix options top_n (int): Number of top intense features to use for summarization @@ -688,6 +690,10 @@ class ExportIOConfig(BaseIOConfig): protein: bool = True max_global_protein_qvalue: float = 0.01 test: bool = False + + # Alignment options + use_alignment: bool = True + max_alignment_pep: float = 0.7 # Quantification matrix options top_n: int = 3 diff --git a/pyprophet/cli/export.py b/pyprophet/cli/export.py index 2dde75c..e80c300 100644 --- a/pyprophet/cli/export.py +++ b/pyprophet/cli/export.py @@ -146,6 +146,19 @@ def export(): type=float, help="[format: matrix/legacy] Filter results to maximum global protein-level q-value.", ) +@click.option( + "--use_alignment/--no-use_alignment", + default=True, + show_default=True, + help="Use alignment results to recover peaks with good alignment scores if alignment data is present in the input file.", +) +@click.option( + "--max_alignment_pep", + default=0.7, + show_default=True, + type=float, + help="[format: matrix/legacy] Maximum PEP to consider for good alignments when use_alignment is enabled.", +) @measure_memory_usage_and_time def export_tsv( infile, @@ -161,6 +174,8 @@ def export_tsv( max_global_peptide_qvalue, protein, max_global_protein_qvalue, + use_alignment, + max_alignment_pep, ): """ Export Proteomics/Peptidoform TSV/CSV tables @@ -190,6 +205,8 @@ def export_tsv( max_global_peptide_qvalue=max_global_peptide_qvalue, protein=protein, max_global_protein_qvalue=max_global_protein_qvalue, + use_alignment=use_alignment, + max_alignment_pep=max_alignment_pep, ) reader = ReaderDispatcher.get_reader(config) @@ -278,6 +295,19 @@ def export_tsv( type=float, help="[format: matrix/legacy] Filter results to maximum global protein-level q-value.", ) +@click.option( + "--use_alignment/--no-use_alignment", + default=True, + show_default=True, + help="Use alignment results to recover peaks with good alignment scores if alignment data is present in the input file.", +) +@click.option( + "--max_alignment_pep", + default=0.7, + show_default=True, + type=float, + help="[format: matrix/legacy] Maximum PEP to consider for good alignments when use_alignment is enabled.", +) @click.option( "--top_n", default=3, @@ -312,6 +342,8 @@ def export_matrix( max_rs_peakgroup_qvalue, max_global_peptide_qvalue, max_global_protein_qvalue, + use_alignment, + max_alignment_pep, top_n, consistent_top, normalization, @@ -344,6 +376,8 @@ def export_matrix( max_global_peptide_qvalue=max_global_peptide_qvalue, protein=True, max_global_protein_qvalue=max_global_protein_qvalue, + use_alignment=use_alignment, + max_alignment_pep=max_alignment_pep, top_n=top_n, consistent_top=consistent_top, normalization=normalization, diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index b25c882..4dc2256 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -161,6 +161,12 @@ def _check_ipf_presence(self, con, cfg): """Check if IPF data is present and should be used.""" return cfg.ipf != "disable" and check_sqlite_table(con, "SCORE_IPF") + def _check_alignment_presence(self, con): + """Check if alignment data is present.""" + return check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") and check_sqlite_table( + con, "SCORE_ALIGNMENT" + ) + def _read_unscored_data(self, con): """Read data from unscored files.""" score_sql = self._build_score_sql(con) @@ -316,7 +322,11 @@ def _read_augmented_data(self, con, cfg): return pd.merge(data, ipf_data, how="left", on="id") def _read_standard_data(self, con, cfg): - """Read standard OpenSWATH data without IPF.""" + """Read standard OpenSWATH data without IPF, optionally including aligned features.""" + # Check if we should attempt alignment integration + use_alignment = cfg.use_alignment and self._check_alignment_presence(con) + + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT RUN.ID AS id_run, PEPTIDE.ID AS id_peptide, @@ -330,7 +340,7 @@ def _read_standard_data(self, con, cfg): FEATURE.NORM_RT AS iRT, PRECURSOR.LIBRARY_RT AS assay_iRT, FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, - FEATURE.ID AS id, + CAST(FEATURE.ID AS INTEGER) AS id, PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, PRECURSOR.CHARGE AS Charge, @@ -342,7 +352,8 @@ def _read_standard_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score + SCORE_MS2.QVALUE AS m_score, + SCORE_MS2.PEP AS pep FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -354,7 +365,171 @@ def _read_standard_data(self, con, cfg): WHERE SCORE_MS2.QVALUE < {cfg.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank; """ - return pd.read_sql_query(query, con) + data = pd.read_sql_query(query, con) + + # Ensure id column is Int64 to preserve precision for large feature IDs + if "id" in data.columns: + data["id"] = data["id"].astype("Int64") + + # If alignment is enabled and alignment data is present, fetch and merge aligned features + if use_alignment: + aligned_features = self._fetch_alignment_features(con, cfg) + + if not aligned_features.empty: + # Get full feature data for aligned features that are NOT already in base results + # We only want to add features that didn't pass MS2 threshold but have good alignment + aligned_ids = aligned_features["id"].unique() + existing_ids = data["id"].unique() + new_aligned_ids = [ + aid for aid in aligned_ids if aid not in existing_ids + ] + + # First, merge alignment info into existing features (those that passed MS2) + # Mark them with from_alignment=0 + data = pd.merge( + data, + aligned_features[ + [ + "id", + "alignment_group_id", + "alignment_reference_feature_id", + "alignment_reference_rt", + "alignment_pep", + "alignment_qvalue", + ] + ], + on="id", + how="left", + ) + data["from_alignment"] = 0 + + # Now add features that didn't pass MS2 but have good alignment (recovered features) + if new_aligned_ids: + # Fetch full data for these new aligned features + aligned_ids_str = ",".join(map(str, new_aligned_ids)) + aligned_query = f""" + SELECT RUN.ID AS id_run, + PEPTIDE.ID AS id_peptide, + PRECURSOR.ID AS transition_group_id, + PRECURSOR.DECOY AS decoy, + RUN.ID AS run_id, + RUN.FILENAME AS filename, + FEATURE.EXP_RT AS RT, + FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, + FEATURE.DELTA_RT AS delta_rt, + FEATURE.NORM_RT AS iRT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, + CAST(FEATURE.ID AS INTEGER) AS id, + PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, + PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, + PRECURSOR.CHARGE AS Charge, + PRECURSOR.PRECURSOR_MZ AS mz, + FEATURE_MS2.AREA_INTENSITY AS Intensity, + FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, + FEATURE.LEFT_WIDTH AS leftWidth, + FEATURE.RIGHT_WIDTH AS rightWidth, + SCORE_MS2.RANK AS peak_group_rank, + SCORE_MS2.SCORE AS d_score, + SCORE_MS2.QVALUE AS m_score, + SCORE_MS2.PEP AS pep + FROM PRECURSOR + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID + INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID + INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID + LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID + WHERE FEATURE.ID IN ({aligned_ids_str}) + """ + aligned_data = pd.read_sql_query(aligned_query, con) + + # Ensure id column is Int64 to preserve precision + if "id" in aligned_data.columns: + aligned_data["id"] = aligned_data["id"].astype("Int64") + + # Merge alignment scores and reference info into the aligned data + aligned_data = pd.merge( + aligned_data, + aligned_features[ + [ + "id", + "alignment_group_id", + "alignment_reference_feature_id", + "alignment_reference_rt", + "alignment_pep", + "alignment_qvalue", + ] + ], + on="id", + how="left", + ) + + # Mark as recovered through alignment + aligned_data["from_alignment"] = 1 + + logger.info( + f"Adding {len(aligned_data)} features recovered through alignment" + ) + + # Combine with base data + data = pd.concat([data, aligned_data], ignore_index=True) + + # Convert alignment_reference_feature_id to int64 to avoid scientific notation + if "alignment_reference_feature_id" in data.columns: + data["alignment_reference_feature_id"] = data[ + "alignment_reference_feature_id" + ].astype("Int64") + if "alignment_group_id" in data.columns: + data["alignment_group_id"] = data["alignment_group_id"].astype( + "Int64" + ) + + # Assign alignment_group_id to reference features + # Create a mapping from reference feature IDs to their alignment_group_ids + if ( + "alignment_reference_feature_id" in data.columns + and "alignment_group_id" in data.columns + ): + # Get all reference feature IDs and their corresponding alignment_group_ids + ref_mapping = data[data["alignment_reference_feature_id"].notna()][ + ["alignment_reference_feature_id", "alignment_group_id"] + ].drop_duplicates() + + # For each reference feature ID, we need to assign the alignment_group_id + # to the feature row where id == alignment_reference_feature_id + if not ref_mapping.empty: + # Merge the alignment_group_id for reference features + # First create a DataFrame mapping id -> alignment_group_id for references + ref_group_mapping = ref_mapping.rename( + columns={ + "alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id", + } + ) + + # Merge this mapping to assign alignment_group_id to reference features + data = pd.merge(data, ref_group_mapping, on="id", how="left") + + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) + mask = ( + data["alignment_group_id"].isna() + & data["ref_alignment_group_id"].notna() + ) + data.loc[mask, "alignment_group_id"] = data.loc[ + mask, "ref_alignment_group_id" + ] + + # Drop the temporary column + data = data.drop(columns=["ref_alignment_group_id"]) + + logger.debug( + f"Assigned alignment_group_id to {mask.sum()} reference features" + ) + + return data def _augment_data(self, data, con, cfg): """Apply common data augmentations to the base dataset.""" @@ -451,7 +626,8 @@ def _get_base_openswath_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score + SCORE_MS2.QVALUE AS m_score, + SCORE_MS2.PEP AS pep FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -633,6 +809,69 @@ def _add_protein_error_data(self, data, con, cfg): return data + def _fetch_alignment_features(self, con, cfg): + """ + Fetch aligned features with good alignment scores. + + This method retrieves features that have been aligned across runs + and pass the alignment quality threshold. Only features whose reference + feature passes the MS2 QVALUE threshold are included, ensuring that + recovered peaks are aligned to high-quality reference features. + + Args: + con: Database connection + cfg: Configuration object with max_alignment_pep threshold + + Returns: + DataFrame with aligned feature IDs that pass quality threshold + """ + max_alignment_pep = cfg.max_alignment_pep + max_rs_peakgroup_qvalue = cfg.max_rs_peakgroup_qvalue + + query = f""" + SELECT + DENSE_RANK() OVER (ORDER BY FEATURE_MS2_ALIGNMENT.PRECURSOR_ID, FEATURE_MS2_ALIGNMENT.ALIGNMENT_ID) AS alignment_group_id, + FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID AS id, + FEATURE_MS2_ALIGNMENT.PRECURSOR_ID AS transition_group_id, + FEATURE_MS2_ALIGNMENT.RUN_ID AS run_id, + CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) AS alignment_reference_feature_id, + FEATURE_MS2_ALIGNMENT.REFERENCE_RT AS alignment_reference_rt, + SCORE_ALIGNMENT.PEP AS alignment_pep, + SCORE_ALIGNMENT.QVALUE AS alignment_qvalue + FROM ( + SELECT DISTINCT * FROM FEATURE_MS2_ALIGNMENT + ) AS FEATURE_MS2_ALIGNMENT + INNER JOIN ( + SELECT DISTINCT *, MIN(QVALUE) + FROM SCORE_ALIGNMENT + GROUP BY FEATURE_ID + ) AS SCORE_ALIGNMENT + ON SCORE_ALIGNMENT.FEATURE_ID = FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID + INNER JOIN ( + SELECT FEATURE_ID, QVALUE + FROM SCORE_MS2 + ) AS REF_SCORE_MS2 + ON REF_SCORE_MS2.FEATURE_ID = FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID + WHERE FEATURE_MS2_ALIGNMENT.LABEL = 1 + AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} + AND REF_SCORE_MS2.QVALUE < {max_rs_peakgroup_qvalue} + """ + + df = pd.read_sql_query(query, con) + + # Ensure Int64 dtype for large integer IDs (pandas nullable integer type) + if "alignment_reference_feature_id" in df.columns: + df["alignment_reference_feature_id"] = df[ + "alignment_reference_feature_id" + ].astype("Int64") + if "alignment_group_id" in df.columns: + df["alignment_group_id"] = df["alignment_group_id"].astype("Int64") + + logger.info( + f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep} with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" + ) + return df + ################################## # Export-specific readers below ################################## @@ -863,11 +1102,11 @@ def _get_peptide_protein_score_table_sqlite(self, con, level: str) -> str: def export_feature_scores(self, outfile: str, plot_callback): """ Export feature scores from OSW file for plotting. - + Detects if SCORE tables exist and adjusts behavior: - If SCORE tables exist: applies RANK==1 filtering and plots SCORE + VAR_ columns - If SCORE tables don't exist: plots only VAR_ columns - + Parameters ---------- outfile : str @@ -877,22 +1116,24 @@ def export_feature_scores(self, outfile: str, plot_callback): Signature: plot_callback(df, outfile, level, append) """ con = sqlite3.connect(self.infile) - + try: # Check for SCORE tables has_score_ms1 = check_sqlite_table(con, "SCORE_MS1") has_score_ms2 = check_sqlite_table(con, "SCORE_MS2") has_score_transition = check_sqlite_table(con, "SCORE_TRANSITION") - + if has_score_ms1 or has_score_ms2 or has_score_transition: - logger.info("SCORE tables detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + logger.info( + "SCORE tables detected - applying RANK==1 filter and plotting SCORE + VAR_ columns" + ) else: logger.info("No SCORE tables detected - plotting only VAR_ columns") - + # Process MS1 level if available if check_sqlite_table(con, "FEATURE_MS1"): logger.info("Processing MS1 level feature scores") - + if has_score_ms1: # Scored mode: Include SCORE columns and apply RANK==1 filter ms1_query = """ @@ -922,9 +1163,11 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_MS1)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: - var_cols_sql = ", ".join([f"FEATURE_MS1.{col}" for col in var_cols]) + var_cols_sql = ", ".join( + [f"FEATURE_MS1.{col}" for col in var_cols] + ) ms1_query = f""" SELECT {var_cols_sql}, @@ -936,16 +1179,16 @@ def export_feature_scores(self, outfile: str, plot_callback): else: logger.warning("No VAR_ columns found in FEATURE_MS1 table") ms1_query = None - + if ms1_query: df_ms1 = pd.read_sql_query(ms1_query, con) if not df_ms1.empty: plot_callback(df_ms1, outfile, "ms1", append=False) - + # Process MS2 level if available if check_sqlite_table(con, "FEATURE_MS2"): logger.info("Processing MS2 level feature scores") - + if has_score_ms2: # Scored mode: Include SCORE columns and apply RANK==1 filter ms2_query = """ @@ -982,9 +1225,11 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_MS2)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: - var_cols_sql = ", ".join([f"FEATURE_MS2.{col}" for col in var_cols]) + var_cols_sql = ", ".join( + [f"FEATURE_MS2.{col}" for col in var_cols] + ) ms2_query = f""" SELECT {var_cols_sql}, @@ -996,17 +1241,17 @@ def export_feature_scores(self, outfile: str, plot_callback): else: logger.warning("No VAR_ columns found in FEATURE_MS2 table") ms2_query = None - + if ms2_query: df_ms2 = pd.read_sql_query(ms2_query, con) if not df_ms2.empty: append = check_sqlite_table(con, "FEATURE_MS1") plot_callback(df_ms2, outfile, "ms2", append=append) - + # Process transition level if available if check_sqlite_table(con, "FEATURE_TRANSITION"): logger.info("Processing transition level feature scores") - + if has_score_transition: # Scored mode: Include SCORE columns and apply RANK==1 filter transition_query = """ @@ -1042,9 +1287,11 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_TRANSITION)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: - var_cols_sql = ", ".join([f"FEATURE_TRANSITION.{col}" for col in var_cols]) + var_cols_sql = ", ".join( + [f"FEATURE_TRANSITION.{col}" for col in var_cols] + ) transition_query = f""" SELECT {var_cols_sql}, @@ -1054,15 +1301,21 @@ def export_feature_scores(self, outfile: str, plot_callback): INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID """ else: - logger.warning("No VAR_ columns found in FEATURE_TRANSITION table") + logger.warning( + "No VAR_ columns found in FEATURE_TRANSITION table" + ) transition_query = None - + if transition_query: df_transition = pd.read_sql_query(transition_query, con) if not df_transition.empty: - append = check_sqlite_table(con, "FEATURE_MS1") or check_sqlite_table(con, "FEATURE_MS2") - plot_callback(df_transition, outfile, "transition", append=append) - + append = check_sqlite_table( + con, "FEATURE_MS1" + ) or check_sqlite_table(con, "FEATURE_MS2") + plot_callback( + df_transition, outfile, "transition", append=append + ) + # Process alignment level if available (no SCORE tables for alignment) if check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT"): logger.info("Processing alignment level feature scores") @@ -1071,7 +1324,7 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_MS2_ALIGNMENT)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: var_cols_sql = ", ".join(var_cols) alignment_query = f""" @@ -1082,13 +1335,17 @@ def export_feature_scores(self, outfile: str, plot_callback): """ df_alignment = pd.read_sql_query(alignment_query, con) if not df_alignment.empty: - append = (check_sqlite_table(con, "FEATURE_MS1") or - check_sqlite_table(con, "FEATURE_MS2") or - check_sqlite_table(con, "FEATURE_TRANSITION")) + append = ( + check_sqlite_table(con, "FEATURE_MS1") + or check_sqlite_table(con, "FEATURE_MS2") + or check_sqlite_table(con, "FEATURE_TRANSITION") + ) plot_callback(df_alignment, outfile, "alignment", append=append) else: - logger.warning("No VAR_ columns found in FEATURE_MS2_ALIGNMENT table") - + logger.warning( + "No VAR_ columns found in FEATURE_MS2_ALIGNMENT table" + ) + finally: con.close() @@ -1177,6 +1434,10 @@ def _prepare_column_info(self, conn) -> dict: "has_annotation": "ANNOTATION" in get_table_columns(self.config.infile, "TRANSITION"), "has_im": "EXP_IM" in get_table_columns(self.config.infile, "FEATURE"), + "has_im_boundaries": all( + col in get_table_columns(self.config.infile, "FEATURE") + for col in ["EXP_IM_LEFTWIDTH", "EXP_IM_RIGHTWIDTH"] + ), "feature_ms1_cols": [ col for col in get_table_columns_with_types( @@ -1196,7 +1457,8 @@ def _prepare_column_info(self, conn) -> dict: for col in get_table_columns_with_types( self.config.infile, "FEATURE_TRANSITION" ) - if col[0] not in ["FEATURE_ID", "TRANSITION_ID"] and col[1] # Ensure column has a type + if col[0] not in ["FEATURE_ID", "TRANSITION_ID"] + and col[1] # Ensure column has a type ], "score_ms1_exists": {"SCORE_MS1"}.issubset(table_names), "score_ms2_exists": {"SCORE_MS2"}.issubset(table_names), @@ -1266,9 +1528,13 @@ def _export_split_by_run(self, conn, column_info: dict) -> None: f"{transition_query_run}\nUNION ALL\n{transition_query_null}" ) logger.info(f"Exporting transition data to {transition_path}") - self._execute_copy_query(conn, combined_transition_query, transition_path) + self._execute_copy_query( + conn, combined_transition_query, transition_path + ) else: - logger.info("Skipping transition data export (include_transition_data=False)") + logger.info( + "Skipping transition data export (include_transition_data=False)" + ) # Export alignment data if exists if column_info["feature_ms2_alignment_exists"]: @@ -1296,7 +1562,9 @@ def _export_combined(self, conn, column_info: dict) -> None: transition_query = self._build_transition_query(column_info) self._execute_copy_query(conn, transition_query, transition_path) else: - logger.info("Skipping transition data export (include_transition_data=False)") + logger.info( + "Skipping transition data export (include_transition_data=False)" + ) # Export alignment data if exists if column_info["feature_ms2_alignment_exists"]: @@ -1312,6 +1580,7 @@ def _export_single_file(self, conn, column_info: dict) -> None: # Insert precursor data logger.debug("Inserting precursor data into temp table") precursor_query = self._build_combined_precursor_query(conn, column_info) + # print(precursor_query) conn.execute(f"INSERT INTO temp_table {precursor_query}") # Insert transition data if requested @@ -1320,7 +1589,9 @@ def _export_single_file(self, conn, column_info: dict) -> None: transition_query = self._build_combined_transition_query(column_info) conn.execute(f"INSERT INTO temp_table {transition_query}") else: - logger.info("Skipping transition data export (include_transition_data=False)") + logger.info( + "Skipping transition data export (include_transition_data=False)" + ) # Export to parquet logger.info(f"Exporting combined data to {self.config.outfile}") @@ -1334,6 +1605,183 @@ def _export_single_file(self, conn, column_info: dict) -> None: logger.info(f"Exporting alignment data to {alignment_path}") self._export_alignment_data(conn, alignment_path) + def _register_peptide_ipf_map(self, conn: duckdb.DuckDBPyConnection) -> None: + """Create or refresh peptide ↔ IPF peptide mapping inside DuckDB.""" + logger.info("Preparing peptide unimod to codename mapping view") + conn.create_function("unimod_to_codename", unimod_to_codename, [str], str) + conn.execute( + f""" + CREATE OR REPLACE TEMP TABLE UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING AS + WITH peptides AS ( + SELECT + ID, + MODIFIED_SEQUENCE, + unimod_to_codename(MODIFIED_SEQUENCE) AS CODENAME, + MODIFIED_SEQUENCE LIKE '%%UniMod%%' AS HAS_UNIMOD + FROM sqlite_scan('{self.config.infile}', 'PEPTIDE') + ), + unimod_peptides AS ( + SELECT CODENAME, ID AS PEPTIDE_ID + FROM peptides + WHERE HAS_UNIMOD + ), + codename_peptides AS ( + SELECT CODENAME, ID AS IPF_PEPTIDE_ID + FROM peptides + WHERE NOT HAS_UNIMOD + ) + SELECT DISTINCT + COALESCE(unimod_peptides.PEPTIDE_ID, codename_peptides.IPF_PEPTIDE_ID) AS PEPTIDE_ID, + COALESCE(codename_peptides.IPF_PEPTIDE_ID, unimod_peptides.PEPTIDE_ID) AS IPF_PEPTIDE_ID, + COALESCE(unimod_peptides.CODENAME, codename_peptides.CODENAME) AS CODENAME + FROM unimod_peptides + FULL OUTER JOIN codename_peptides USING (CODENAME) + """ + ) + + def _create_unimod_to_codename_peptide_id_mapping_table(self) -> None: + """Create peptide unimod to codename mapping table in SQLite database.""" + logger.info( + "Generating peptide unimod to codename mapping and storing in SQLite" + ) + + with sqlite3.connect(self.config.infile) as sql_conn: + # First get the peptide table and process it with pyopenms + peptide_df = pd.read_sql_query( + "SELECT ID, MODIFIED_SEQUENCE FROM PEPTIDE", sql_conn + ) + + peptide_df["codename"] = peptide_df["MODIFIED_SEQUENCE"].apply( + unimod_to_codename + ) + + # Create the merged mapping + unimod_mask = peptide_df["MODIFIED_SEQUENCE"].str.contains("UniMod") + merged_df = pd.merge( + peptide_df[unimod_mask][["codename", "ID"]], + peptide_df[~unimod_mask][["codename", "ID"]], + on="codename", + suffixes=("_unimod", "_codename"), + how="outer", + ) + + # Fill NaN values in the 'ID_codename' column with the 'ID_unimod' values + merged_df["ID_codename"] = merged_df["ID_codename"].fillna( + merged_df["ID_unimod"] + ) + # Fill NaN values in the 'ID_unimod' column with the 'ID_codename' values + merged_df["ID_unimod"] = merged_df["ID_unimod"].fillna( + merged_df["ID_codename"] + ) + + merged_df["ID_unimod"] = merged_df["ID_unimod"].astype(int) + merged_df["ID_codename"] = merged_df["ID_codename"].astype(int) + + # Create the UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING table in SQLite + sql_conn.execute( + """ + CREATE TABLE IF NOT EXISTS UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING ( + ID_unimod INTEGER, + ID_codename INTEGER, + codename TEXT, + PRIMARY KEY (ID_unimod, ID_codename) + ) + """ + ) + sql_conn.execute("DELETE FROM UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING") + + # Insert the data into SQLite table + merged_df[["ID_unimod", "ID_codename", "codename"]].to_sql( + "UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING", + sql_conn, + if_exists="append", + index=False, + ) + + # Create indices for better performance + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_peptide_ipf_unimod ON UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING(ID_unimod)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_peptide_ipf_codename ON UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING(ID_codename)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_peptide_ipf_codename_text ON UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING(codename)" + ) + + sql_conn.commit() + logger.info( + f"Successfully created UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING table with {len(merged_df)} mappings" + ) + + def _insert_precursor_peptide_ipf_map(self) -> None: + """Insert precursor-peptide-IPF table into the input sqlite OSW file.""" + logger.info("Inserting precursor-peptide-IPF mapping into OSW file") + with sqlite3.connect(self.config.infile) as sql_conn: + # Create the main mapping table + sql_conn.execute( + """ + CREATE TABLE IF NOT EXISTS PRECURSOR_PEPTIDE_IPF_MAPPING ( + PRECURSOR_ID INTEGER, + ID_unimod INTEGER, + ID_codename INTEGER, + MODIFIED_SEQUENCE TEXT, + CODENAME TEXT, + FEATURE_ID INTEGER, + PRECURSOR_PEAKGROUP_PEP REAL, + QVALUE REAL, + PEP REAL + ) + """ + ) + sql_conn.execute("DELETE FROM PRECURSOR_PEPTIDE_IPF_MAPPING") + + # Insert the data using your join logic + sql_conn.execute( + """ + INSERT INTO PRECURSOR_PEPTIDE_IPF_MAPPING ( + PRECURSOR_ID, ID_unimod, ID_codename, MODIFIED_SEQUENCE, + CODENAME, FEATURE_ID, PRECURSOR_PEAKGROUP_PEP, QVALUE, PEP + ) + SELECT + ppm.PRECURSOR_ID, + pim.ID_unimod, + pim.ID_codename, + p.MODIFIED_SEQUENCE, + pim.codename, + si.FEATURE_ID, + si.PRECURSOR_PEAKGROUP_PEP, + si.QVALUE, + si.PEP + FROM PEPTIDE p + INNER JOIN UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING pim ON pim.ID_unimod = p.ID + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ppm ON ppm.PEPTIDE_ID = p.ID + INNER JOIN SCORE_IPF si ON si.PEPTIDE_ID = pim.ID_codename + """ + ) + + # Create indices for better query performance + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_precursor_id ON PRECURSOR_PEPTIDE_IPF_MAPPING(PRECURSOR_ID)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_feature_id ON PRECURSOR_PEPTIDE_IPF_MAPPING(FEATURE_ID)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_id_unimod ON PRECURSOR_PEPTIDE_IPF_MAPPING(ID_unimod)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_id_codename ON PRECURSOR_PEPTIDE_IPF_MAPPING(ID_codename)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_precursor_feature ON PRECURSOR_PEPTIDE_IPF_MAPPING(PRECURSOR_ID, FEATURE_ID)" + ) + + sql_conn.commit() + logger.info( + "Successfully created PRECURSOR_PEPTIDE_IPF_MAPPING table with indices" + ) + def _build_precursor_query(self, conn, column_info: dict) -> str: """Build SQL query for precursor data""" feature_ms1_cols_sql = ", ".join( @@ -1346,49 +1794,15 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: for col in column_info["feature_ms2_cols"] ) + # First get the peptide table and process it with pyopenms + # self._register_peptide_ipf_map(conn) + self._create_unimod_to_codename_peptide_id_mapping_table() + # Check if score tables exist and build score SQLs score_cols_selct, score_table_joins, score_column_views = ( self._build_score_column_selection_and_joins(column_info) ) - # First get the peptide table and process it with pyopenms - logger.info("Generating peptide unimod to codename mapping") - with sqlite3.connect(self.config.infile) as sql_conn: - peptide_df = pd.read_sql_query( - "SELECT ID, MODIFIED_SEQUENCE FROM PEPTIDE", sql_conn - ) - peptide_df["codename"] = peptide_df["MODIFIED_SEQUENCE"].apply( - unimod_to_codename - ) - - # Create the merged mapping - unimod_mask = peptide_df["MODIFIED_SEQUENCE"].str.contains("UniMod") - merged_df = pd.merge( - peptide_df[unimod_mask][["codename", "ID"]], - peptide_df[~unimod_mask][["codename", "ID"]], - on="codename", - suffixes=("_unimod", "_codename"), - how="outer", - ) - - # Fill NaN values in the 'ID_codename' column with the 'ID_unimod' values - merged_df["ID_codename"] = merged_df["ID_codename"].fillna( - merged_df["ID_unimod"] - ) - # Fill NaN values in the 'ID_unimod' column with the 'ID_codename' values - merged_df["ID_unimod"] = merged_df["ID_unimod"].fillna(merged_df["ID_codename"]) - - merged_df["ID_unimod"] = merged_df["ID_unimod"].astype(int) - merged_df["ID_codename"] = merged_df["ID_codename"].astype(int) - - # Register peptide_ipf_map - conn.register( - "peptide_ipf_map", - merged_df.rename( - columns={"ID_unimod": "PEPTIDE_ID", "ID_codename": "IPF_PEPTIDE_ID"} - ), - ) - return f""" -- Need to map the unimod peptide ids to the ipf codename peptide ids. The section below is commented out, since it's limited to only the 4 common modifications. Have replaced it above with a more general approach that handles all modifications using pyopenms --WITH normalized_peptides AS ( @@ -1410,7 +1824,7 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: -- FROM normalized_peptides -- GROUP BY NORMALIZED_SEQUENCE --), - --peptide_ipf_map AS ( + --UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING AS ( -- SELECT -- np.PEPTIDE_ID, -- g.IPF_PEPTIDE_ID @@ -1421,12 +1835,12 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: {score_column_views} SELECT PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID AS PROTEIN_ID, - PEPTIDE.ID AS PEPTIDE_ID, - pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID, - PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID, + {"SCORE_IPF.ID_unimod AS PEPTIDE_ID," if column_info["score_ipf_exists"] else "PEPTIDE.ID AS PEPTIDE_ID,"} + {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.ID_codename AS IPF_PEPTIDE_ID,"} + {"SCORE_IPF.PRECURSOR_ID AS PRECURSOR_ID," if column_info["score_ipf_exists"] else "PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID,"} PROTEIN.PROTEIN_ACCESSION AS PROTEIN_ACCESSION, PEPTIDE.UNMODIFIED_SEQUENCE, - PEPTIDE.MODIFIED_SEQUENCE, + {"SCORE_IPF.MODIFIED_SEQUENCE AS MODIFIED_SEQUENCE," if column_info["score_ipf_exists"] else "PEPTIDE.MODIFIED_SEQUENCE,"} PRECURSOR.TRAML_ID AS PRECURSOR_TRAML_ID, PRECURSOR.GROUP_LABEL AS PRECURSOR_GROUP_LABEL, PRECURSOR.PRECURSOR_MZ AS PRECURSOR_MZ, @@ -1449,6 +1863,8 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: FEATURE.DELTA_RT, FEATURE.LEFT_WIDTH, FEATURE.RIGHT_WIDTH, + {"FEATURE.EXP_IM_LEFTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_leftWidth, + {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_rightWidth, {feature_ms1_cols_sql}, {feature_ms2_cols_sql}, {score_cols_selct} @@ -1457,8 +1873,8 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE') AS PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID - INNER JOIN peptide_ipf_map AS pipf - ON PEPTIDE.ID = pipf.PEPTIDE_ID + INNER JOIN sqlite_scan('{self.config.infile}', 'UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING') AS pipf + ON PEPTIDE.ID = pipf.ID_unimod INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE_PROTEIN_MAPPING') AS PEPTIDE_PROTEIN_MAPPING ON PEPTIDE.ID = PEPTIDE_PROTEIN_MAPPING.PEPTIDE_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PROTEIN') AS PROTEIN @@ -1475,12 +1891,16 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: {score_table_joins} """ - def _build_transition_score_columns_and_join(self, column_info: dict) -> Tuple[str, str]: + def _build_transition_score_columns_and_join( + self, column_info: dict + ) -> Tuple[str, str]: """Build score columns and join clause for transition scores""" score_transition_cols = "" score_transition_join = "" if column_info.get("score_transition_exists", False): - logger.debug("SCORE_TRANSITION table exists, adding score columns to transition query") + logger.debug( + "SCORE_TRANSITION table exists, adding score columns to transition query" + ) score_cols = [ "SCORE_TRANSITION.SCORE AS SCORE_TRANSITION_SCORE", "SCORE_TRANSITION.RANK AS SCORE_TRANSITION_RANK", @@ -1510,7 +1930,9 @@ def _build_transition_query(self, column_info: dict) -> str: ) # Add transition score columns if they exist - score_transition_cols, score_transition_join = self._build_transition_score_columns_and_join(column_info) + score_transition_cols, score_transition_join = ( + self._build_transition_score_columns_and_join(column_info) + ) return f""" SELECT @@ -1562,6 +1984,10 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: for col in column_info["feature_transition_cols"] ) + # First get the peptide table and process it with pyopenms + # self._register_peptide_ipf_map(conn) + self._create_unimod_to_codename_peptide_id_mapping_table() + # Get score columns for precursor level score_cols_select, score_table_joins, score_column_views = ( self._build_score_column_selection_and_joins(column_info) @@ -1572,44 +1998,6 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: if column_info.get("score_transition_exists", False): as_null_transition_score_cols = ", NULL AS SCORE_TRANSITION_SCORE, NULL AS SCORE_TRANSITION_RANK, NULL AS SCORE_TRANSITION_P_VALUE, NULL AS SCORE_TRANSITION_Q_VALUE, NULL AS SCORE_TRANSITION_PEP" - # First get the peptide table and process it with pyopenms - logger.info("Generating peptide unimod to codename mapping") - with sqlite3.connect(self.config.infile) as sql_conn: - peptide_df = pd.read_sql_query( - "SELECT ID, MODIFIED_SEQUENCE FROM PEPTIDE", sql_conn - ) - peptide_df["codename"] = peptide_df["MODIFIED_SEQUENCE"].apply( - unimod_to_codename - ) - - # Create the merged mapping as you did in your example - unimod_mask = peptide_df["MODIFIED_SEQUENCE"].str.contains("UniMod") - merged_df = pd.merge( - peptide_df[unimod_mask][["codename", "ID"]], - peptide_df[~unimod_mask][["codename", "ID"]], - on="codename", - suffixes=("_unimod", "_codename"), - how="outer", - ) - - # Fill NaN values in the 'ID_codename' column with the 'ID_unimod' values - merged_df["ID_codename"] = merged_df["ID_codename"].fillna( - merged_df["ID_unimod"] - ) - # Fill NaN values in the 'ID_unimod' column with the 'ID_codename' values - merged_df["ID_unimod"] = merged_df["ID_unimod"].fillna(merged_df["ID_codename"]) - - merged_df["ID_unimod"] = merged_df["ID_unimod"].astype(int) - merged_df["ID_codename"] = merged_df["ID_codename"].astype(int) - - # Register peptide_ipf_map - conn.register( - "peptide_ipf_map", - merged_df.rename( - columns={"ID_unimod": "PEPTIDE_ID", "ID_codename": "IPF_PEPTIDE_ID"} - ), - ) - return f""" -- Need to map the unimod peptide ids to the ipf codename peptide ids. The section below is commented out, since it's limited to only the 4 common modifications. Have replaced it above with a more general approach that handles all modifications using pyopenms --WITH normalized_peptides AS ( @@ -1631,7 +2019,7 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: -- FROM normalized_peptides -- GROUP BY NORMALIZED_SEQUENCE --), - --peptide_ipf_map AS ( + --UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING AS ( -- SELECT -- np.PEPTIDE_ID, -- g.IPF_PEPTIDE_ID @@ -1642,12 +2030,12 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: {score_column_views} SELECT PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID AS PROTEIN_ID, - PEPTIDE.ID AS PEPTIDE_ID, - pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID, - PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID, + {"SCORE_IPF.ID_unimod AS PEPTIDE_ID," if column_info["score_ipf_exists"] else "PEPTIDE.ID AS PEPTIDE_ID,"} + {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.ID_codename AS IPF_PEPTIDE_ID,"} + {"SCORE_IPF.PRECURSOR_ID AS PRECURSOR_ID," if column_info["score_ipf_exists"] else "PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID,"} PROTEIN.PROTEIN_ACCESSION AS PROTEIN_ACCESSION, PEPTIDE.UNMODIFIED_SEQUENCE, - PEPTIDE.MODIFIED_SEQUENCE, + {"SCORE_IPF.MODIFIED_SEQUENCE AS MODIFIED_SEQUENCE," if column_info["score_ipf_exists"] else "PEPTIDE.MODIFIED_SEQUENCE,"} PRECURSOR.TRAML_ID AS PRECURSOR_TRAML_ID, PRECURSOR.GROUP_LABEL AS PRECURSOR_GROUP_LABEL, PRECURSOR.PRECURSOR_MZ AS PRECURSOR_MZ, @@ -1670,6 +2058,8 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: FEATURE.DELTA_RT, FEATURE.LEFT_WIDTH, FEATURE.RIGHT_WIDTH, + {"FEATURE.EXP_IM_LEFTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_leftWidth, + {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_rightWidth, {feature_ms1_cols_sql}, {feature_ms2_cols_sql}, NULL AS TRANSITION_ID, @@ -1690,8 +2080,8 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE') AS PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID - INNER JOIN peptide_ipf_map AS pipf - ON PEPTIDE.ID = pipf.PEPTIDE_ID + INNER JOIN sqlite_scan('{self.config.infile}', 'UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING') AS pipf + ON PEPTIDE.ID = pipf.ID_unimod INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE_PROTEIN_MAPPING') AS PEPTIDE_PROTEIN_MAPPING ON PEPTIDE.ID = PEPTIDE_PROTEIN_MAPPING.PEPTIDE_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PROTEIN') AS PROTEIN @@ -1730,7 +2120,9 @@ def _build_combined_transition_query(self, column_info: dict) -> str: ) # Add transition score columns if they exist - score_transition_cols, score_transition_join = self._build_transition_score_columns_and_join(column_info) + score_transition_cols, score_transition_join = ( + self._build_transition_score_columns_and_join(column_info) + ) # Also need to add NULL columns for score columns that appear in precursor query as_null_score_cols = "" @@ -1740,7 +2132,7 @@ def _build_combined_transition_query(self, column_info: dict) -> str: as_null_score_cols += ", NULL AS SCORE_MS2_SCORE, NULL AS SCORE_MS2_PEAK_GROUP_RANK, NULL AS SCORE_MS2_P_VALUE, NULL AS SCORE_MS2_Q_VALUE, NULL AS SCORE_MS2_PEP" if column_info.get("score_ipf_exists", False): as_null_score_cols += ", NULL AS SCORE_IPF_PRECURSOR_PEAKGROUP_PEP, NULL AS SCORE_IPF_PEP, NULL AS SCORE_IPF_QVALUE" - + # Add NULL columns for peptide and protein score contexts for table in ["peptide", "protein"]: if column_info.get(f"score_{table}_exists", False): @@ -1779,6 +2171,8 @@ def _build_combined_transition_query(self, column_info: dict) -> str: NULL AS DELTA_RT, NULL AS LEFT_WIDTH, NULL AS RIGHT_WIDTH, + NULL AS IM_leftWidth, + NULL AS IM_rightWidth, {as_null_feature_ms1_cols_sql}, {as_null_feature_ms2_cols_sql}, TRANSITION.ID AS TRANSITION_ID, @@ -1822,52 +2216,64 @@ def _create_temp_table(self, conn, column_info: dict) -> None: # Build score column types score_cols_types = [] if column_info.get("score_ms1_exists", False): - score_cols_types.extend([ - "SCORE_MS1_SCORE DOUBLE", - "SCORE_MS1_RANK INTEGER", - "SCORE_MS1_P_VALUE DOUBLE", - "SCORE_MS1_Q_VALUE DOUBLE", - "SCORE_MS1_PEP DOUBLE" - ]) + score_cols_types.extend( + [ + "SCORE_MS1_SCORE DOUBLE", + "SCORE_MS1_RANK INTEGER", + "SCORE_MS1_P_VALUE DOUBLE", + "SCORE_MS1_Q_VALUE DOUBLE", + "SCORE_MS1_PEP DOUBLE", + ] + ) if column_info.get("score_ms2_exists", False): - score_cols_types.extend([ - "SCORE_MS2_SCORE DOUBLE", - "SCORE_MS2_PEAK_GROUP_RANK INTEGER", - "SCORE_MS2_P_VALUE DOUBLE", - "SCORE_MS2_Q_VALUE DOUBLE", - "SCORE_MS2_PEP DOUBLE" - ]) + score_cols_types.extend( + [ + "SCORE_MS2_SCORE DOUBLE", + "SCORE_MS2_PEAK_GROUP_RANK INTEGER", + "SCORE_MS2_P_VALUE DOUBLE", + "SCORE_MS2_Q_VALUE DOUBLE", + "SCORE_MS2_PEP DOUBLE", + ] + ) if column_info.get("score_ipf_exists", False): - score_cols_types.extend([ - "SCORE_IPF_PRECURSOR_PEAKGROUP_PEP DOUBLE", - "SCORE_IPF_PEP DOUBLE", - "SCORE_IPF_QVALUE DOUBLE" - ]) - + score_cols_types.extend( + [ + "SCORE_IPF_PRECURSOR_PEAKGROUP_PEP DOUBLE", + "SCORE_IPF_PEP DOUBLE", + "SCORE_IPF_QVALUE DOUBLE", + ] + ) + # Add peptide and protein score columns for each context for table in ["peptide", "protein"]: if column_info.get(f"score_{table}_exists", False): for context in column_info.get(f"score_{table}_contexts", []): safe_context = context.upper().replace("-", "_") - score_cols_types.extend([ - f"SCORE_{table.upper()}_{safe_context}_SCORE DOUBLE", - f"SCORE_{table.upper()}_{safe_context}_P_VALUE DOUBLE", - f"SCORE_{table.upper()}_{safe_context}_Q_VALUE DOUBLE", - f"SCORE_{table.upper()}_{safe_context}_PEP DOUBLE" - ]) - + score_cols_types.extend( + [ + f"SCORE_{table.upper()}_{safe_context}_SCORE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_P_VALUE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_Q_VALUE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_PEP DOUBLE", + ] + ) + # Add transition score columns if column_info.get("score_transition_exists", False): - score_cols_types.extend([ - "SCORE_TRANSITION_SCORE DOUBLE", - "SCORE_TRANSITION_RANK INTEGER", - "SCORE_TRANSITION_P_VALUE DOUBLE", - "SCORE_TRANSITION_Q_VALUE DOUBLE", - "SCORE_TRANSITION_PEP DOUBLE" - ]) + score_cols_types.extend( + [ + "SCORE_TRANSITION_SCORE DOUBLE", + "SCORE_TRANSITION_RANK INTEGER", + "SCORE_TRANSITION_P_VALUE DOUBLE", + "SCORE_TRANSITION_Q_VALUE DOUBLE", + "SCORE_TRANSITION_PEP DOUBLE", + ] + ) # Prepend comma and space to score columns if there are any - score_cols_types_sql = (", " + ", ".join(score_cols_types)) if score_cols_types else "" + score_cols_types_sql = ( + (", " + ", ".join(score_cols_types)) if score_cols_types else "" + ) create_temp_table_query = f""" CREATE TABLE temp_table ( @@ -1900,6 +2306,8 @@ def _create_temp_table(self, conn, column_info: dict) -> None: DELTA_RT DOUBLE, LEFT_WIDTH DOUBLE, RIGHT_WIDTH DOUBLE, + IM_leftWidth DOUBLE, + IM_rightWidth DOUBLE, {feature_ms1_cols_types}, {feature_ms2_cols_types}, TRANSITION_ID BIGINT, @@ -1920,30 +2328,67 @@ def _create_temp_table(self, conn, column_info: dict) -> None: conn.execute(create_temp_table_query) def _export_alignment_data(self, conn, path: str = None) -> None: - """Export feature alignment data""" + """Export feature alignment data with scores if available""" if path is None: path = os.path.join(self.config.outfile, "feature_alignment.parquet") - query = f""" - SELECT - ALIGNMENT_ID, - RUN_ID, - PRECURSOR_ID, - ALIGNED_FEATURE_ID AS FEATURE_ID, - REFERENCE_FEATURE_ID, - ALIGNED_RT, - REFERENCE_RT, - XCORR_COELUTION_TO_REFERENCE AS VAR_XCORR_COELUTION_TO_REFERENCE, - XCORR_SHAPE_TO_REFERENCE AS VAR_XCORR_SHAPE_TO_REFERENCE, - MI_TO_REFERENCE AS VAR_MI_TO_REFERENCE, - XCORR_COELUTION_TO_ALL AS VAR_XCORR_COELUTION_TO_ALL, - XCORR_SHAPE_TO_ALL AS VAR_XCORR_SHAPE, - MI_TO_ALL AS VAR_MI_TO_ALL, - RETENTION_TIME_DEVIATION AS VAR_RETENTION_TIME_DEVIATION, - PEAK_INTENSITY_RATIO AS VAR_PEAK_INTENSITY_RATIO, - LABEL AS DECOY - FROM sqlite_scan('{self.config.infile}', 'FEATURE_MS2_ALIGNMENT') - """ + # Check if SCORE_ALIGNMENT table exists + with sqlite3.connect(self.config.infile) as sql_conn: + has_score_alignment = check_sqlite_table(sql_conn, "SCORE_ALIGNMENT") + + if has_score_alignment: + # Export with alignment scores + query = f""" + SELECT + FEATURE_MS2_ALIGNMENT.ALIGNMENT_ID, + FEATURE_MS2_ALIGNMENT.RUN_ID, + FEATURE_MS2_ALIGNMENT.PRECURSOR_ID, + FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID AS FEATURE_ID, + FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID, + FEATURE_MS2_ALIGNMENT.ALIGNED_RT, + FEATURE_MS2_ALIGNMENT.REFERENCE_RT, + FEATURE_MS2_ALIGNMENT.XCORR_COELUTION_TO_REFERENCE AS VAR_XCORR_COELUTION_TO_REFERENCE, + FEATURE_MS2_ALIGNMENT.XCORR_SHAPE_TO_REFERENCE AS VAR_XCORR_SHAPE_TO_REFERENCE, + FEATURE_MS2_ALIGNMENT.MI_TO_REFERENCE AS VAR_MI_TO_REFERENCE, + FEATURE_MS2_ALIGNMENT.XCORR_COELUTION_TO_ALL AS VAR_XCORR_COELUTION_TO_ALL, + FEATURE_MS2_ALIGNMENT.XCORR_SHAPE_TO_ALL AS VAR_XCORR_SHAPE, + FEATURE_MS2_ALIGNMENT.MI_TO_ALL AS VAR_MI_TO_ALL, + FEATURE_MS2_ALIGNMENT.RETENTION_TIME_DEVIATION AS VAR_RETENTION_TIME_DEVIATION, + FEATURE_MS2_ALIGNMENT.PEAK_INTENSITY_RATIO AS VAR_PEAK_INTENSITY_RATIO, + FEATURE_MS2_ALIGNMENT.LABEL AS DECOY, + SCORE_ALIGNMENT.SCORE AS SCORE, + SCORE_ALIGNMENT.PEP AS PEP, + SCORE_ALIGNMENT.QVALUE AS QVALUE + FROM sqlite_scan('{self.config.infile}', 'FEATURE_MS2_ALIGNMENT') AS FEATURE_MS2_ALIGNMENT + LEFT JOIN ( + SELECT FEATURE_ID, SCORE, PEP, QVALUE, MIN(QVALUE) as MIN_QVALUE + FROM sqlite_scan('{self.config.infile}', 'SCORE_ALIGNMENT') + GROUP BY FEATURE_ID + ) AS SCORE_ALIGNMENT + ON FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID = SCORE_ALIGNMENT.FEATURE_ID + """ + else: + # Export without scores (original behavior) + query = f""" + SELECT + ALIGNMENT_ID, + RUN_ID, + PRECURSOR_ID, + ALIGNED_FEATURE_ID AS FEATURE_ID, + REFERENCE_FEATURE_ID, + ALIGNED_RT, + REFERENCE_RT, + XCORR_COELUTION_TO_REFERENCE AS VAR_XCORR_COELUTION_TO_REFERENCE, + XCORR_SHAPE_TO_REFERENCE AS VAR_XCORR_SHAPE_TO_REFERENCE, + MI_TO_REFERENCE AS VAR_MI_TO_REFERENCE, + XCORR_COELUTION_TO_ALL AS VAR_XCORR_COELUTION_TO_ALL, + XCORR_SHAPE_TO_ALL AS VAR_XCORR_SHAPE, + MI_TO_ALL AS VAR_MI_TO_ALL, + RETENTION_TIME_DEVIATION AS VAR_RETENTION_TIME_DEVIATION, + PEAK_INTENSITY_RATIO AS VAR_PEAK_INTENSITY_RATIO, + LABEL AS DECOY + FROM sqlite_scan('{self.config.infile}', 'FEATURE_MS2_ALIGNMENT') + """ self._execute_copy_query(conn, query, path) @@ -2084,8 +2529,13 @@ def _build_score_column_selection_and_joins( score_columns_to_select.append( "SCORE_IPF.PRECURSOR_PEAKGROUP_PEP AS SCORE_IPF_PRECURSOR_PEAKGROUP_PEP, SCORE_IPF.PEP AS SCORE_IPF_PEP, SCORE_IPF.QVALUE AS SCORE_IPF_QVALUE" ) + # NOTE: UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING needs to be created before this join is actually executed. This is done by registering the table in DuckDB in the precursor query builder. + # TODO: We should maybe add the UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING during OpenSwathWorkflow execution to avoid doing it here? + self._insert_precursor_peptide_ipf_map() score_tables_to_join.append( - f"LEFT JOIN sqlite_scan('{self.config.infile}', 'SCORE_IPF') AS SCORE_IPF ON FEATURE.ID = SCORE_IPF.FEATURE_ID" + f""" + LEFT JOIN sqlite_scan('{self.config.infile}', 'PRECURSOR_PEPTIDE_IPF_MAPPING') AS SCORE_IPF ON SCORE_IPF.FEATURE_ID = FEATURE.ID + """ ) # Create views for peptide and protein score tables if they exist diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 3bcd887..23d9751 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -1,3 +1,4 @@ +import os import duckdb import pandas as pd from loguru import logger @@ -28,6 +29,9 @@ def __init__(self, config: ExportIOConfig): col.startswith("SCORE_TRANSITION_") for col in self._columns ) + # Check for alignment file + self._has_alignment = self._check_alignment_file_exists() + def read(self) -> pd.DataFrame: """ Main entry point for reading Parquet data. @@ -69,6 +73,22 @@ def _is_unscored_file(self) -> bool: all_cols = get_parquet_column_names(self.infile) return all(not col.startswith("SCORE_") for col in all_cols) + def _check_alignment_file_exists(self) -> bool: + """ + Check if alignment parquet file exists. + """ + import os + + alignment_file = None + if self.infile.endswith(".parquet"): + base_name = self.infile[:-8] # Remove .parquet + alignment_file = f"{base_name}_feature_alignment.parquet" + + if alignment_file and os.path.exists(alignment_file): + logger.debug(f"Alignment file found: {alignment_file}") + return True + return False + def _read_unscored_data(self, con) -> pd.DataFrame: """ Read unscored data from Parquet files. @@ -236,8 +256,12 @@ def _read_augmented_data(self, con) -> pd.DataFrame: def _read_standard_data(self, con) -> pd.DataFrame: """ - Read standard OpenSWATH data without IPF. + Read standard OpenSWATH data without IPF, optionally including aligned features. """ + # Check if we should attempt alignment integration + use_alignment = self.config.use_alignment and self._has_alignment + + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT RUN_ID AS id_run, @@ -264,13 +288,157 @@ def _read_standard_data(self, con) -> pd.DataFrame: RIGHT_WIDTH AS rightWidth, SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, SCORE_MS2_SCORE AS d_score, - SCORE_MS2_Q_VALUE AS m_score + SCORE_MS2_Q_VALUE AS m_score, + SCORE_MS2_PEP AS pep FROM data WHERE PROTEIN_ID IS NOT NULL AND SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank """ - return con.execute(query).fetchdf() + data = con.execute(query).fetchdf() + + # If alignment is enabled and alignment data is present, fetch and merge aligned features + if use_alignment: + aligned_features = self._fetch_alignment_features(con) + + if not aligned_features.empty: + # Get full feature data for aligned features that are NOT already in base results + # We only want to add features that didn't pass MS2 threshold but have good alignment + aligned_ids = aligned_features["id"].unique() + existing_ids = data["id"].unique() + new_aligned_ids = [ + aid for aid in aligned_ids if aid not in existing_ids + ] + + # First, merge alignment info into existing features (those that passed MS2) + # Mark them with from_alignment=0 + if "alignment_pep" in aligned_features.columns: + # Build list of columns to merge + merge_cols = ["id", "alignment_pep", "alignment_qvalue"] + if "alignment_group_id" in aligned_features.columns: + merge_cols.append("alignment_group_id") + if "alignment_reference_feature_id" in aligned_features.columns: + merge_cols.append("alignment_reference_feature_id") + if "alignment_reference_rt" in aligned_features.columns: + merge_cols.append("alignment_reference_rt") + + data = pd.merge( + data, aligned_features[merge_cols], on="id", how="left" + ) + data["from_alignment"] = 0 + + # Now add features that didn't pass MS2 but have good alignment (recovered features) + if new_aligned_ids: + # Fetch full data for these new aligned features from the main data view + # Register aligned IDs as a temp table for the query + aligned_ids_df = pd.DataFrame({"id": new_aligned_ids}) + con.register("aligned_ids_temp", aligned_ids_df) + + aligned_query = f""" + SELECT + RUN_ID AS id_run, + PEPTIDE_ID AS id_peptide, + PRECURSOR_ID AS transition_group_id, + PRECURSOR_DECOY AS decoy, + RUN_ID AS run_id, + FILENAME AS filename, + EXP_RT AS RT, + EXP_RT - DELTA_RT AS assay_rt, + DELTA_RT AS delta_rt, + NORM_RT AS iRT, + PRECURSOR_LIBRARY_RT AS assay_iRT, + NORM_RT - PRECURSOR_LIBRARY_RT AS delta_iRT, + FEATURE_ID AS id, + UNMODIFIED_SEQUENCE AS Sequence, + MODIFIED_SEQUENCE AS FullPeptideName, + PRECURSOR_CHARGE AS Charge, + PRECURSOR_MZ AS mz, + FEATURE_MS2_AREA_INTENSITY AS Intensity, + FEATURE_MS1_AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1_APEX_INTENSITY AS aggr_prec_Peak_Apex, + LEFT_WIDTH AS leftWidth, + RIGHT_WIDTH AS rightWidth, + SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, + SCORE_MS2_SCORE AS d_score, + SCORE_MS2_Q_VALUE AS m_score + FROM data + WHERE PROTEIN_ID IS NOT NULL + AND FEATURE_ID IN (SELECT id FROM aligned_ids_temp) + """ + aligned_data = con.execute(aligned_query).fetchdf() + + # Merge alignment scores and reference info into the aligned data + if "alignment_pep" in aligned_features.columns: + aligned_data = pd.merge( + aligned_data, + aligned_features[merge_cols], + on="id", + how="left", + ) + + # Mark as recovered through alignment + aligned_data["from_alignment"] = 1 + + logger.info( + f"Adding {len(aligned_data)} features recovered through alignment" + ) + + # Combine with base data + data = pd.concat([data, aligned_data], ignore_index=True) + + # Convert alignment_reference_feature_id to int64 to avoid scientific notation + if "alignment_reference_feature_id" in data.columns: + data["alignment_reference_feature_id"] = data[ + "alignment_reference_feature_id" + ].astype("Int64") + if "alignment_group_id" in data.columns: + data["alignment_group_id"] = data["alignment_group_id"].astype( + "Int64" + ) + + # Assign alignment_group_id to reference features + # Create a mapping from reference feature IDs to their alignment_group_ids + if ( + "alignment_reference_feature_id" in data.columns + and "alignment_group_id" in data.columns + ): + # Get all reference feature IDs and their corresponding alignment_group_ids + ref_mapping = data[data["alignment_reference_feature_id"].notna()][ + ["alignment_reference_feature_id", "alignment_group_id"] + ].drop_duplicates() + + # For each reference feature ID, we need to assign the alignment_group_id + # to the feature row where id == alignment_reference_feature_id + if not ref_mapping.empty: + # Merge the alignment_group_id for reference features + # First create a DataFrame mapping id -> alignment_group_id for references + ref_group_mapping = ref_mapping.rename( + columns={ + "alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id", + } + ) + + # Merge this mapping to assign alignment_group_id to reference features + data = pd.merge(data, ref_group_mapping, on="id", how="left") + + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) + mask = ( + data["alignment_group_id"].isna() + & data["ref_alignment_group_id"].notna() + ) + data.loc[mask, "alignment_group_id"] = data.loc[ + mask, "ref_alignment_group_id" + ] + + # Drop the temporary column + data = data.drop(columns=["ref_alignment_group_id"]) + + logger.debug( + f"Assigned alignment_group_id to {mask.sum()} reference features" + ) + + return data def _augment_data(self, data, con) -> pd.DataFrame: """ @@ -559,6 +727,165 @@ def _build_feature_vars_sql(self) -> str: return ", " + ", ".join(feature_vars) if feature_vars else "" + def _fetch_alignment_features(self, con) -> pd.DataFrame: + """ + Fetch aligned features with good alignment scores from alignment parquet file. + + This method checks for an alignment parquet file and retrieves features + that have been aligned across runs and pass the alignment quality threshold. + Only features whose reference feature passes the MS2 QVALUE threshold are included. + + Args: + con: DuckDB connection + + Returns: + DataFrame with aligned feature IDs that pass quality threshold + """ + + # Check for alignment file - it should be named with _feature_alignment.parquet suffix + alignment_file = None + if self.infile.endswith(".parquet"): + base_name = self.infile[:-8] # Remove .parquet + alignment_file = f"{base_name}_feature_alignment.parquet" + + if not alignment_file or not os.path.exists(alignment_file): + logger.debug( + "Alignment parquet file not found, skipping alignment integration" + ) + return pd.DataFrame() + + logger.debug(f"Loading alignment data from {alignment_file}") + max_alignment_pep = self.config.max_alignment_pep + max_rs_peakgroup_qvalue = self.config.max_rs_peakgroup_qvalue + + try: + # Load alignment data + alignment_df = pd.read_parquet(alignment_file) + + # Filter to target (non-decoy) features with good alignment scores + # Note: DECOY column in parquet alignment file comes from LABEL in SQLite + # where LABEL=1 (DECOY=1 in parquet) means target, not decoy + if ( + "DECOY" in alignment_df.columns + and "VAR_XCORR_SHAPE" in alignment_df.columns + ): + # This looks like the feature_alignment table structure + + # Check if we have alignment scores (PEP/QVALUE) in the file + # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features + has_alignment_scores = ( + "SCORE_ALIGNMENT_PEP" in alignment_df.columns + or "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns + ) + + if has_alignment_scores: + # Filter by alignment PEP threshold + pep_col = ( + "SCORE_ALIGNMENT_PEP" + if "SCORE_ALIGNMENT_PEP" in alignment_df.columns + else None + ) + qvalue_col = ( + "SCORE_ALIGNMENT_Q_VALUE" + if "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns + else None + ) + + if pep_col: + filtered_df = alignment_df[ + ( + alignment_df["DECOY"] == 1 + ) # DECOY=1 means target (from LABEL=1 in SQLite) + & (alignment_df[pep_col] < max_alignment_pep) + ].copy() + else: + # Use QVALUE if PEP not available (less ideal but workable) + filtered_df = alignment_df[ + (alignment_df["DECOY"] == 1) + & (alignment_df[qvalue_col] < max_alignment_pep) + ].copy() + else: + # No alignment scores in file - just filter by target status + # In this case, we can't apply alignment quality threshold + logger.warning( + "Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality." + ) + filtered_df = alignment_df[alignment_df["DECOY"] == 1].copy() + + # Now filter by reference feature MS2 QVALUE + # Need to join with main data to check reference feature QVALUE + if "REFERENCE_FEATURE_ID" in filtered_df.columns: + # Register filtered alignment data for SQL query + con.register("filtered_alignment", filtered_df) + + # Query to get aligned features where reference passes MS2 QVALUE threshold + # Also compute alignment_group_id using DENSE_RANK + # CAST in SELECT preserves precision, but not in JOIN (for performance) + ref_check_query = f""" + SELECT + DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, + fa.FEATURE_ID, + fa.PRECURSOR_ID, + fa.RUN_ID, + CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, + fa.REFERENCE_RT, + fa.SCORE_ALIGNMENT_PEP, + fa.SCORE_ALIGNMENT_Q_VALUE + FROM filtered_alignment fa + INNER JOIN data d ON d.FEATURE_ID = fa.REFERENCE_FEATURE_ID + WHERE d.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} + """ + filtered_df = con.execute(ref_check_query).fetchdf() + + # Rename columns to match expected format + if "FEATURE_ID" in filtered_df.columns: + # Start with base columns + base_cols = ["FEATURE_ID", "PRECURSOR_ID", "RUN_ID"] + result = filtered_df[base_cols].rename(columns={"FEATURE_ID": "id"}) + + # Add alignment group ID if available + if "ALIGNMENT_GROUP_ID" in filtered_df.columns: + result["alignment_group_id"] = filtered_df[ + "ALIGNMENT_GROUP_ID" + ].values + + # Add reference feature ID and RT if available + # Ensure Int64 dtype to preserve precision for large IDs + if "REFERENCE_FEATURE_ID" in filtered_df.columns: + result["alignment_reference_feature_id"] = ( + filtered_df["REFERENCE_FEATURE_ID"].astype("Int64").values + ) + if "REFERENCE_RT" in filtered_df.columns: + result["alignment_reference_rt"] = filtered_df[ + "REFERENCE_RT" + ].values + + # Add alignment scores if available + if "SCORE_ALIGNMENT_PEP" in filtered_df.columns: + result["alignment_pep"] = filtered_df[ + "SCORE_ALIGNMENT_PEP" + ].values + if "SCORE_ALIGNMENT_Q_VALUE" in filtered_df.columns: + result["alignment_qvalue"] = filtered_df[ + "SCORE_ALIGNMENT_Q_VALUE" + ].values + + # Convert alignment_group_id to int64 + if "alignment_group_id" in result.columns: + result["alignment_group_id"] = result[ + "alignment_group_id" + ].astype("Int64") + + logger.info( + f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" + ) + return result + except Exception as e: + logger.warning(f"Could not load alignment data: {e}") + + return pd.DataFrame() + ################################## # Export-specific readers below ################################## @@ -604,11 +931,11 @@ def _read_for_export_scored_report(self, con) -> pd.DataFrame: def export_feature_scores(self, outfile: str, plot_callback): """ Export feature scores from Parquet file for plotting. - + Detects if SCORE columns exist and adjusts behavior: - If SCORE columns exist: applies RANK==1 filtering and plots SCORE + VAR_ columns - If SCORE columns don't exist: plots only VAR_ columns - + Parameters ---------- outfile : str @@ -620,28 +947,32 @@ def export_feature_scores(self, outfile: str, plot_callback): logger.info(f"Reading parquet file: {self.infile}") # Ensure pyarrow is available pa, _, _ = _ensure_pyarrow() - + # First, read only column names to identify what to load parquet_file = pa.parquet.ParquetFile(self.infile) all_columns = parquet_file.schema.names - + # Check for SCORE columns score_cols = [col for col in all_columns if col.startswith("SCORE_")] has_scores = len(score_cols) > 0 - + if has_scores: - logger.info("SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + logger.info( + "SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns" + ) else: logger.info("No SCORE columns detected - plotting only VAR_ columns") - + # Identify columns to read for each level ms1_cols = [col for col in all_columns if col.startswith("FEATURE_MS1_VAR_")] ms2_cols = [col for col in all_columns if col.startswith("FEATURE_MS2_VAR_")] - transition_cols = [col for col in all_columns if col.startswith("FEATURE_TRANSITION_VAR_")] - + transition_cols = [ + col for col in all_columns if col.startswith("FEATURE_TRANSITION_VAR_") + ] + # Determine which columns to read (only what we need) cols_to_read = set() - + # Add SCORE columns if they exist if has_scores: cols_to_read.update(score_cols) @@ -653,7 +984,7 @@ def export_feature_scores(self, outfile: str, plot_callback): cols_to_read.add("RUN_ID") if "PRECURSOR_ID" in all_columns: cols_to_read.add("PRECURSOR_ID") - + if ms1_cols and "PRECURSOR_DECOY" in all_columns: cols_to_read.update(ms1_cols) cols_to_read.add("PRECURSOR_DECOY") @@ -663,67 +994,75 @@ def export_feature_scores(self, outfile: str, plot_callback): if transition_cols and "TRANSITION_DECOY" in all_columns: cols_to_read.update(transition_cols) cols_to_read.add("TRANSITION_DECOY") - + if not cols_to_read: logger.warning("No VAR_ columns found in parquet file") return - + # Read only the columns we need logger.info(f"Reading {len(cols_to_read)} columns from parquet file") df = pd.read_parquet(self.infile, columns=list(cols_to_read)) - + # Apply RANK==1 filter if SCORE columns exist - if has_scores and 'SCORE_MS2_PEAK_GROUP_RANK' in df.columns: + if has_scores and "SCORE_MS2_PEAK_GROUP_RANK" in df.columns: logger.info(f"Filtering to RANK==1: {len(df)} -> ", end="") - df = df[df['SCORE_MS2_PEAK_GROUP_RANK'] == 1].copy() + df = df[df["SCORE_MS2_PEAK_GROUP_RANK"] == 1].copy() logger.info(f"{len(df)} rows") - + # Generate GROUP_ID if needed - if has_scores and 'GROUP_ID' not in df.columns: - if 'RUN_ID' in df.columns and 'PRECURSOR_ID' in df.columns: - df['GROUP_ID'] = df['RUN_ID'].astype(str) + '_' + df['PRECURSOR_ID'].astype(str) - + if has_scores and "GROUP_ID" not in df.columns: + if "RUN_ID" in df.columns and "PRECURSOR_ID" in df.columns: + df["GROUP_ID"] = ( + df["RUN_ID"].astype(str) + "_" + df["PRECURSOR_ID"].astype(str) + ) + # Process MS1 level if ms1_cols and "PRECURSOR_DECOY" in df.columns: logger.info("Processing MS1 level feature scores") select_cols = ms1_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms1_cols = [col for col in score_cols if 'MS1' in col.upper()] + score_ms1_cols = [col for col in score_cols if "MS1" in col.upper()] select_cols.extend(score_ms1_cols) - if 'GROUP_ID' in df.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df.columns: + select_cols.append("GROUP_ID") ms1_df = df[select_cols].copy() ms1_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) plot_callback(ms1_df, outfile, "ms1", append=False) del ms1_df # Free memory - + # Process MS2 level if ms2_cols and "PRECURSOR_DECOY" in df.columns: logger.info("Processing MS2 level feature scores") select_cols = ms2_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms2_cols = [col for col in score_cols if 'MS2' in col.upper() or 'MS1' not in col.upper()] + score_ms2_cols = [ + col + for col in score_cols + if "MS2" in col.upper() or "MS1" not in col.upper() + ] select_cols.extend(score_ms2_cols) - if 'GROUP_ID' in df.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df.columns: + select_cols.append("GROUP_ID") ms2_df = df[select_cols].copy() ms2_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) append = bool(ms1_cols) plot_callback(ms2_df, outfile, "ms2", append=append) del ms2_df # Free memory - + # Process transition level if transition_cols and "TRANSITION_DECOY" in df.columns: logger.info("Processing transition level feature scores") select_cols = transition_cols + ["TRANSITION_DECOY"] # Add SCORE columns if present if has_scores: - score_transition_cols = [col for col in score_cols if 'TRANSITION' in col.upper()] + score_transition_cols = [ + col for col in score_cols if "TRANSITION" in col.upper() + ] select_cols.extend(score_transition_cols) - if 'GROUP_ID' in df.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df.columns: + select_cols.append("GROUP_ID") transition_df = df[select_cols].copy() transition_df.rename(columns={"TRANSITION_DECOY": "DECOY"}, inplace=True) append = bool(ms1_cols or ms2_cols) diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 8eb4a69..0aebef6 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -43,6 +43,9 @@ def __init__(self, config: ExportIOConfig): col.startswith("SCORE_TRANSITION_") for col in self._columns ) + # Check for alignment file + self._has_alignment = self._check_alignment_file_exists() + def _get_precursor_files(self): """Helper to get precursor files based on structure""" if os.path.isdir(self.infile) and any( @@ -68,16 +71,21 @@ def read(self) -> pd.DataFrame: try: self._init_duckdb_views(con) + if self.config.context == "export_scored_report": + return self._read_for_export_scored_report(con) + if self.config.export_format == "library": if self._is_unscored_file(): - descr= "Files must be scored for library generation." + descr = "Files must be scored for library generation." logger.exception(descr) raise ValueError(descr) if not self._has_peptide_protein_global_scores(): - descr= "Files must have peptide and protein level global scores for library generation." + descr = "Files must have peptide and protein level global scores for library generation." logger.exception(descr) raise ValueError(descr) - logger.info("Reading standard OpenSWATH data for library from split Parquet files.") + logger.info( + "Reading standard OpenSWATH data for library from split Parquet files." + ) return self._read_library_data(con) if self._is_unscored_file(): @@ -97,13 +105,17 @@ def read(self) -> pd.DataFrame: return self._augment_data(data, con) finally: con.close() - + def _has_peptide_protein_global_scores(self) -> bool: """ Check if files contain peptide and protein global scores """ - has_peptide = any(col.startswith("SCORE_PEPTIDE_GLOBAL") for col in self._columns) - has_protein = any(col.startswith("SCORE_PROTEIN_GLOBAL") for col in self._columns) + has_peptide = any( + col.startswith("SCORE_PEPTIDE_GLOBAL") for col in self._columns + ) + has_protein = any( + col.startswith("SCORE_PROTEIN_GLOBAL") for col in self._columns + ) return has_peptide and has_protein def _is_unscored_file(self) -> bool: @@ -112,6 +124,26 @@ def _is_unscored_file(self) -> bool: """ return all(not col.startswith("SCORE_") for col in self._columns) + def _check_alignment_file_exists(self) -> bool: + """ + Check if alignment parquet file exists for split parquet format. + + For split parquet, alignment file is at the parent directory level: + - infile is a directory containing *.oswpq subdirectories + - alignment file is at infile/feature_alignment.parquet + """ + import os + + alignment_file = None + if os.path.isdir(self.infile): + # Split parquet format: alignment file is in the parent directory + alignment_file = os.path.join(self.infile, "feature_alignment.parquet") + + if alignment_file and os.path.exists(alignment_file): + logger.debug(f"Alignment file found: {alignment_file}") + return True + return False + def _read_unscored_data(self, con) -> pd.DataFrame: """ Read unscored data from split Parquet files. @@ -292,14 +324,16 @@ def _read_library_data(self, con) -> pd.DataFrame: im_col = "p.PRECURSOR_LIBRARY_DRIFT_TIME" if self.config.intensity_calibration: - intensity_col = 't.FEATURE_TRANSITION_AREA_INTENSITY' + intensity_col = "t.FEATURE_TRANSITION_AREA_INTENSITY" else: - intensity_col = 't.TRANSITION_LIBRARY_INTENSITY' - + intensity_col = "t.TRANSITION_LIBRARY_INTENSITY" + if self.config.keep_decoys: decoy_query = "" else: - decoy_query ="p.PRECURSOR_DECOY is false and t.TRANSITION_DECOY is false and" + decoy_query = ( + "p.PRECURSOR_DECOY is false and t.TRANSITION_DECOY is false and" + ) query = f""" SELECT @@ -336,11 +370,15 @@ def _read_library_data(self, con) -> pd.DataFrame: t.TRANSITION_CHARGE, t.TRANSITION_TYPE, t.TRANSITION_ORDINAL, t.TRANSITION_ID, p.PRECURSOR_DECOY, p.RUN_ID, p.FEATURE_MS2_AREA_INTENSITY """ return con.execute(query).fetchdf() - + def _read_standard_data(self, con) -> pd.DataFrame: """ - Read standard OpenSWATH data without IPF from split files. + Read standard OpenSWATH data without IPF from split files, optionally including aligned features. """ + # Check if we should attempt alignment integration + use_alignment = self.config.use_alignment and self._has_alignment + + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT p.RUN_ID AS id_run, @@ -367,13 +405,157 @@ def _read_standard_data(self, con) -> pd.DataFrame: p.RIGHT_WIDTH AS rightWidth, p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, p.SCORE_MS2_SCORE AS d_score, - p.SCORE_MS2_Q_VALUE AS m_score + p.SCORE_MS2_Q_VALUE AS m_score, + p.SCORE_MS2_PEP AS pep FROM precursors p WHERE p.PROTEIN_ID IS NOT NULL AND p.SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank """ - return con.execute(query).fetchdf() + data = con.execute(query).fetchdf() + + # If alignment is enabled and alignment data is present, fetch and merge aligned features + if use_alignment: + aligned_features = self._fetch_alignment_features(con) + + if not aligned_features.empty: + # Get full feature data for aligned features that are NOT already in base results + # We only want to add features that didn't pass MS2 threshold but have good alignment + aligned_ids = aligned_features["id"].unique() + existing_ids = data["id"].unique() + new_aligned_ids = [ + aid for aid in aligned_ids if aid not in existing_ids + ] + + # First, merge alignment info into existing features (those that passed MS2) + # Mark them with from_alignment=0 + if "alignment_pep" in aligned_features.columns: + # Build list of columns to merge + merge_cols = ["id", "alignment_pep", "alignment_qvalue"] + if "alignment_group_id" in aligned_features.columns: + merge_cols.append("alignment_group_id") + if "alignment_reference_feature_id" in aligned_features.columns: + merge_cols.append("alignment_reference_feature_id") + if "alignment_reference_rt" in aligned_features.columns: + merge_cols.append("alignment_reference_rt") + + data = pd.merge( + data, aligned_features[merge_cols], on="id", how="left" + ) + data["from_alignment"] = 0 + + # Now add features that didn't pass MS2 but have good alignment (recovered features) + if new_aligned_ids: + # Fetch full data for these new aligned features from the main data view + # Register aligned IDs as a temp table for the query + aligned_ids_df = pd.DataFrame({"id": new_aligned_ids}) + con.register("aligned_ids_temp", aligned_ids_df) + + aligned_query = f""" + SELECT + p.RUN_ID AS id_run, + p.PEPTIDE_ID AS id_peptide, + p.PRECURSOR_ID AS transition_group_id, + p.PRECURSOR_DECOY AS decoy, + p.RUN_ID AS run_id, + p.FILENAME AS filename, + p.EXP_RT AS RT, + p.EXP_RT - p.DELTA_RT AS assay_rt, + p.DELTA_RT AS delta_rt, + p.NORM_RT AS iRT, + p.PRECURSOR_LIBRARY_RT AS assay_iRT, + p.NORM_RT - p.PRECURSOR_LIBRARY_RT AS delta_iRT, + p.FEATURE_ID AS id, + p.UNMODIFIED_SEQUENCE AS Sequence, + p.MODIFIED_SEQUENCE AS FullPeptideName, + p.PRECURSOR_CHARGE AS Charge, + p.PRECURSOR_MZ AS mz, + p.FEATURE_MS2_AREA_INTENSITY AS Intensity, + p.FEATURE_MS1_AREA_INTENSITY AS aggr_prec_Peak_Area, + p.FEATURE_MS1_APEX_INTENSITY AS aggr_prec_Peak_Apex, + p.LEFT_WIDTH AS leftWidth, + p.RIGHT_WIDTH AS rightWidth, + p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, + p.SCORE_MS2_SCORE AS d_score, + p.SCORE_MS2_Q_VALUE AS m_score + FROM precursors p + WHERE p.PROTEIN_ID IS NOT NULL + AND p.FEATURE_ID IN (SELECT id FROM aligned_ids_temp) + """ + aligned_data = con.execute(aligned_query).fetchdf() + + # Merge alignment scores and reference info into the aligned data + if "alignment_pep" in aligned_features.columns: + aligned_data = pd.merge( + aligned_data, + aligned_features[merge_cols], + on="id", + how="left", + ) + + # Mark as recovered through alignment + aligned_data["from_alignment"] = 1 + + logger.info( + f"Adding {len(aligned_data)} features recovered through alignment" + ) + + # Combine with base data + data = pd.concat([data, aligned_data], ignore_index=True) + + # Convert alignment_reference_feature_id to int64 to avoid scientific notation + if "alignment_reference_feature_id" in data.columns: + data["alignment_reference_feature_id"] = data[ + "alignment_reference_feature_id" + ].astype("Int64") + if "alignment_group_id" in data.columns: + data["alignment_group_id"] = data["alignment_group_id"].astype( + "Int64" + ) + + # Assign alignment_group_id to reference features + # Create a mapping from reference feature IDs to their alignment_group_ids + if ( + "alignment_reference_feature_id" in data.columns + and "alignment_group_id" in data.columns + ): + # Get all reference feature IDs and their corresponding alignment_group_ids + ref_mapping = data[data["alignment_reference_feature_id"].notna()][ + ["alignment_reference_feature_id", "alignment_group_id"] + ].drop_duplicates() + + # For each reference feature ID, we need to assign the alignment_group_id + # to the feature row where id == alignment_reference_feature_id + if not ref_mapping.empty: + # Merge the alignment_group_id for reference features + # First create a DataFrame mapping id -> alignment_group_id for references + ref_group_mapping = ref_mapping.rename( + columns={ + "alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id", + } + ) + + # Merge this mapping to assign alignment_group_id to reference features + data = pd.merge(data, ref_group_mapping, on="id", how="left") + + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) + mask = ( + data["alignment_group_id"].isna() + & data["ref_alignment_group_id"].notna() + ) + data.loc[mask, "alignment_group_id"] = data.loc[ + mask, "ref_alignment_group_id" + ] + + # Drop the temporary column + data = data.drop(columns=["ref_alignment_group_id"]) + + logger.debug( + f"Assigned alignment_group_id to {mask.sum()} reference features" + ) + + return data def _augment_data(self, data, con) -> pd.DataFrame: """ @@ -643,6 +825,160 @@ def _add_protein_error_data(self, data, con) -> pd.DataFrame: return data + def _fetch_alignment_features(self, con) -> pd.DataFrame: + """ + Fetch aligned features with good alignment scores from alignment parquet file. + + This method checks for an alignment parquet file and retrieves features + that have been aligned across runs and pass the alignment quality threshold. + Only features whose reference feature passes the MS2 QVALUE threshold are included. + + Args: + con: DuckDB connection + + Returns: + DataFrame with aligned feature IDs that pass quality threshold + """ + + # For split parquet, alignment file is at parent directory level + alignment_file = os.path.join(self.infile, "feature_alignment.parquet") + + if not os.path.exists(alignment_file): + logger.debug( + "Alignment parquet file not found, skipping alignment integration" + ) + return pd.DataFrame() + + logger.debug(f"Loading alignment data from {alignment_file}") + max_alignment_pep = self.config.max_alignment_pep + max_rs_peakgroup_qvalue = self.config.max_rs_peakgroup_qvalue + + try: + # Load alignment data + alignment_df = pd.read_parquet(alignment_file) + + # Filter to target (non-decoy) features with good alignment scores + # Note: DECOY column in parquet alignment file comes from LABEL in SQLite + # where LABEL=1 (DECOY=1 in parquet) means target, not decoy + if ( + "DECOY" in alignment_df.columns + and "VAR_XCORR_SHAPE" in alignment_df.columns + ): + # Check if we have alignment scores (PEP/QVALUE) in the file + # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features + has_alignment_scores = ( + "SCORE_ALIGNMENT_PEP" in alignment_df.columns + or "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns + ) + + if has_alignment_scores: + # Filter by alignment PEP threshold + pep_col = ( + "SCORE_ALIGNMENT_PEP" + if "SCORE_ALIGNMENT_PEP" in alignment_df.columns + else None + ) + qvalue_col = ( + "SCORE_ALIGNMENT_Q_VALUE" + if "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns + else None + ) + + if pep_col: + filtered_df = alignment_df[ + ( + alignment_df["DECOY"] == 1 + ) # DECOY=1 means target (from LABEL=1 in SQLite) + & (alignment_df[pep_col] < max_alignment_pep) + ].copy() + else: + # Use QVALUE if SCORE_ALIGNMENT_PEP not available (less ideal but workable) + filtered_df = alignment_df[ + (alignment_df["DECOY"] == 1) + & (alignment_df[qvalue_col] < max_alignment_pep) + ].copy() + else: + # No alignment scores in file - just filter by target status + # In this case, we can't apply alignment quality threshold + logger.warning( + "Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality." + ) + filtered_df = alignment_df[alignment_df["DECOY"] == 1].copy() + + # Now filter by reference feature MS2 QVALUE + # Need to join with precursors data to check reference feature QVALUE + if "REFERENCE_FEATURE_ID" in filtered_df.columns: + # Register filtered alignment data for SQL query + con.register("filtered_alignment", filtered_df) + + # Query to get aligned features where reference passes MS2 QVALUE threshold + # Also compute alignment_group_id using DENSE_RANK + # CAST in SELECT preserves precision, but not in JOIN (for performance) + ref_check_query = f""" + SELECT + DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, + fa.FEATURE_ID, + fa.PRECURSOR_ID, + fa.RUN_ID, + CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, + fa.REFERENCE_RT, + fa.SCORE_ALIGNMENT_PEP, + fa.SCORE_ALIGNMENT_Q_VALUE + FROM filtered_alignment fa + INNER JOIN precursors p ON p.FEATURE_ID = fa.REFERENCE_FEATURE_ID + WHERE p.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} + """ + filtered_df = con.execute(ref_check_query).fetchdf() + + # Rename columns to match expected format + if "FEATURE_ID" in filtered_df.columns: + # Start with base columns + base_cols = ["FEATURE_ID", "PRECURSOR_ID", "RUN_ID"] + result = filtered_df[base_cols].rename(columns={"FEATURE_ID": "id"}) + + # Add alignment group ID if available + if "ALIGNMENT_GROUP_ID" in filtered_df.columns: + result["alignment_group_id"] = filtered_df[ + "ALIGNMENT_GROUP_ID" + ].values + + # Add reference feature ID and RT if available + # Ensure Int64 dtype to preserve precision for large IDs + if "REFERENCE_FEATURE_ID" in filtered_df.columns: + result["alignment_reference_feature_id"] = ( + filtered_df["REFERENCE_FEATURE_ID"].astype("Int64").values + ) + if "REFERENCE_RT" in filtered_df.columns: + result["alignment_reference_rt"] = filtered_df[ + "REFERENCE_RT" + ].values + + # Add alignment scores if available + if "SCORE_ALIGNMENT_PEP" in filtered_df.columns: + result["alignment_pep"] = filtered_df[ + "SCORE_ALIGNMENT_PEP" + ].values + if "SCORE_ALIGNMENT_Q_VALUE" in filtered_df.columns: + result["alignment_qvalue"] = filtered_df[ + "SCORE_ALIGNMENT_Q_VALUE" + ].values + + # Convert alignment_group_id to int64 + if "alignment_group_id" in result.columns: + result["alignment_group_id"] = result[ + "alignment_group_id" + ].astype("Int64") + + logger.info( + f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" + ) + return result + except Exception as e: + logger.warning(f"Could not load alignment data: {e}") + + return pd.DataFrame() + def _get_ms1_score_info(self) -> tuple[str, str]: """ Get MS1 score information if available. @@ -666,14 +1002,60 @@ def _build_feature_vars_sql(self) -> str: return ", " + ", ".join(feature_vars) if feature_vars else "" + ################################## + # Export-specific readers below + ################################## + + def _read_for_export_scored_report(self, con) -> pd.DataFrame: + """ + Lightweight reader that returns the minimal scored-report columns from split Parquet files. + """ + select_cols = [ + "RUN_ID", + "PROTEIN_ID", + "PEPTIDE_ID", + "PRECURSOR_ID", + "PRECURSOR_DECOY", + "FEATURE_MS2_AREA_INTENSITY", + "SCORE_MS2_SCORE", + "SCORE_MS2_PEAK_GROUP_RANK", + "SCORE_MS2_Q_VALUE", + "SCORE_PEPTIDE_GLOBAL_SCORE", + "SCORE_PEPTIDE_GLOBAL_Q_VALUE", + "SCORE_PEPTIDE_EXPERIMENT_WIDE_SCORE", + "SCORE_PEPTIDE_EXPERIMENT_WIDE_Q_VALUE", + "SCORE_PEPTIDE_RUN_SPECIFIC_SCORE", + "SCORE_PEPTIDE_RUN_SPECIFIC_Q_VALUE", + "SCORE_PROTEIN_GLOBAL_SCORE", + "SCORE_PROTEIN_GLOBAL_Q_VALUE", + "SCORE_PROTEIN_EXPERIMENT_WIDE_SCORE", + "SCORE_PROTEIN_EXPERIMENT_WIDE_Q_VALUE", + "SCORE_IPF_QVALUE", + ] + + # Filter select cols based on available columns in the precursor files + select_cols = [col for col in select_cols if col in self._columns] + + # Build query to select only the needed columns from precursors view + cols_str = ", ".join([f"p.{col}" for col in select_cols]) + + query = f""" + SELECT {cols_str} + FROM precursors p + WHERE p.PROTEIN_ID IS NOT NULL + """ + + df = con.execute(query).fetchdf() + return df + def export_feature_scores(self, outfile: str, plot_callback): """ Export feature scores from split Parquet directory for plotting. - + Detects if SCORE columns exist and adjusts behavior: - If SCORE columns exist: applies RANK==1 filtering and plots SCORE + VAR_ columns - If SCORE columns don't exist: plots only VAR_ columns - + Parameters ---------- outfile : str @@ -684,30 +1066,32 @@ def export_feature_scores(self, outfile: str, plot_callback): """ # Ensure pyarrow is available pa, _, _ = _ensure_pyarrow() - + # Read precursor features - only necessary columns precursor_file = os.path.join(self.infile, "precursors_features.parquet") logger.info(f"Reading precursor features from: {precursor_file}") - + # First check what columns are available precursor_parquet = pa.parquet.ParquetFile(precursor_file) all_columns = precursor_parquet.schema.names - + # Check for SCORE columns score_cols = [col for col in all_columns if col.startswith("SCORE_")] has_scores = len(score_cols) > 0 - + if has_scores: - logger.info("SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + logger.info( + "SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns" + ) else: logger.info("No SCORE columns detected - plotting only VAR_ columns") - + # Identify columns to read ms1_cols = [col for col in all_columns if col.startswith("FEATURE_MS1_VAR_")] ms2_cols = [col for col in all_columns if col.startswith("FEATURE_MS2_VAR_")] - + cols_to_read = set() - + # Add SCORE columns if they exist if has_scores: cols_to_read.update(score_cols) @@ -719,113 +1103,144 @@ def export_feature_scores(self, outfile: str, plot_callback): cols_to_read.add("RUN_ID") if "PRECURSOR_ID" in all_columns: cols_to_read.add("PRECURSOR_ID") - + if ms1_cols and "PRECURSOR_DECOY" in all_columns: cols_to_read.update(ms1_cols) cols_to_read.add("PRECURSOR_DECOY") if ms2_cols and "PRECURSOR_DECOY" in all_columns: cols_to_read.update(ms2_cols) cols_to_read.add("PRECURSOR_DECOY") - + if cols_to_read: logger.info(f"Reading {len(cols_to_read)} columns from precursor features") df_precursor = pd.read_parquet(precursor_file, columns=list(cols_to_read)) - + # Apply RANK==1 filter if SCORE columns exist - if has_scores and 'SCORE_MS2_PEAK_GROUP_RANK' in df_precursor.columns: + if has_scores and "SCORE_MS2_PEAK_GROUP_RANK" in df_precursor.columns: logger.info(f"Filtering to RANK==1: {len(df_precursor)} -> ", end="") - df_precursor = df_precursor[df_precursor['SCORE_MS2_PEAK_GROUP_RANK'] == 1].copy() + df_precursor = df_precursor[ + df_precursor["SCORE_MS2_PEAK_GROUP_RANK"] == 1 + ].copy() logger.info(f"{len(df_precursor)} rows") - + # Generate GROUP_ID if needed - if has_scores and 'GROUP_ID' not in df_precursor.columns: - if 'RUN_ID' in df_precursor.columns and 'PRECURSOR_ID' in df_precursor.columns: - df_precursor['GROUP_ID'] = df_precursor['RUN_ID'].astype(str) + '_' + df_precursor['PRECURSOR_ID'].astype(str) - + if has_scores and "GROUP_ID" not in df_precursor.columns: + if ( + "RUN_ID" in df_precursor.columns + and "PRECURSOR_ID" in df_precursor.columns + ): + df_precursor["GROUP_ID"] = ( + df_precursor["RUN_ID"].astype(str) + + "_" + + df_precursor["PRECURSOR_ID"].astype(str) + ) + # Process MS1 level if ms1_cols and "PRECURSOR_DECOY" in df_precursor.columns: logger.info("Processing MS1 level feature scores") select_cols = ms1_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms1_cols = [col for col in score_cols if 'MS1' in col.upper()] + score_ms1_cols = [col for col in score_cols if "MS1" in col.upper()] select_cols.extend(score_ms1_cols) - if 'GROUP_ID' in df_precursor.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df_precursor.columns: + select_cols.append("GROUP_ID") ms1_df = df_precursor[select_cols].copy() ms1_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) plot_callback(ms1_df, outfile, "ms1", append=False) del ms1_df # Free memory - + # Process MS2 level if ms2_cols and "PRECURSOR_DECOY" in df_precursor.columns: logger.info("Processing MS2 level feature scores") select_cols = ms2_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms2_cols = [col for col in score_cols if 'MS2' in col.upper() or 'MS1' not in col.upper()] + score_ms2_cols = [ + col + for col in score_cols + if "MS2" in col.upper() or "MS1" not in col.upper() + ] select_cols.extend(score_ms2_cols) - if 'GROUP_ID' in df_precursor.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df_precursor.columns: + select_cols.append("GROUP_ID") ms2_df = df_precursor[select_cols].copy() ms2_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) append = bool(ms1_cols) plot_callback(ms2_df, outfile, "ms2", append=append) del ms2_df # Free memory - + del df_precursor # Free memory - + # Read transition features if available transition_file = os.path.join(self.infile, "transition_features.parquet") if os.path.exists(transition_file): logger.info(f"Reading transition features from: {transition_file}") - + # Check what columns are available transition_parquet = pa.parquet.ParquetFile(transition_file) transition_all_columns = transition_parquet.schema.names - transition_cols = [col for col in transition_all_columns if col.startswith("FEATURE_TRANSITION_VAR_")] - + transition_cols = [ + col + for col in transition_all_columns + if col.startswith("FEATURE_TRANSITION_VAR_") + ] + # Check for SCORE columns in transition file - transition_score_cols = [col for col in transition_all_columns if col.startswith("SCORE_") and 'TRANSITION' in col.upper()] + transition_score_cols = [ + col + for col in transition_all_columns + if col.startswith("SCORE_") and "TRANSITION" in col.upper() + ] has_transition_scores = len(transition_score_cols) > 0 - + if transition_cols and "TRANSITION_DECOY" in transition_all_columns: # Read only necessary columns cols_to_read = transition_cols + ["TRANSITION_DECOY"] if has_transition_scores: cols_to_read.extend(transition_score_cols) - if 'GROUP_ID' in transition_all_columns: - cols_to_read.append('GROUP_ID') - - logger.info(f"Reading {len(cols_to_read)} columns from transition features") + if "GROUP_ID" in transition_all_columns: + cols_to_read.append("GROUP_ID") + + logger.info( + f"Reading {len(cols_to_read)} columns from transition features" + ) df_transition = pd.read_parquet(transition_file, columns=cols_to_read) - + logger.info("Processing transition level feature scores") transition_df = df_transition.copy() - transition_df.rename(columns={"TRANSITION_DECOY": "DECOY"}, inplace=True) + transition_df.rename( + columns={"TRANSITION_DECOY": "DECOY"}, inplace=True + ) append = bool(ms1_cols or ms2_cols) plot_callback(transition_df, outfile, "transition", append=append) del transition_df, df_transition # Free memory - + # Read alignment features if available alignment_file = os.path.join(self.infile, "feature_alignment.parquet") if os.path.exists(alignment_file): logger.info(f"Reading alignment features from: {alignment_file}") - + # Check what columns are available alignment_parquet = pa.parquet.ParquetFile(alignment_file) alignment_all_columns = alignment_parquet.schema.names var_cols = [col for col in alignment_all_columns if col.startswith("VAR_")] - + if var_cols and "DECOY" in alignment_all_columns: # Read only necessary columns cols_to_read = var_cols + ["DECOY"] - logger.info(f"Reading {len(cols_to_read)} columns from alignment features") + logger.info( + f"Reading {len(cols_to_read)} columns from alignment features" + ) df_alignment = pd.read_parquet(alignment_file, columns=cols_to_read) - + logger.info("Processing alignment level feature scores") alignment_df = df_alignment[var_cols + ["DECOY"]].copy() - append = bool(ms1_cols or ms2_cols or (os.path.exists(transition_file) and transition_cols)) + append = bool( + ms1_cols + or ms2_cols + or (os.path.exists(transition_file) and transition_cols) + ) plot_callback(alignment_df, outfile, "alignment", append=append) del alignment_df, df_alignment # Free memory diff --git a/pyprophet/report.py b/pyprophet/report.py index 681e75a..b7f0220 100644 --- a/pyprophet/report.py +++ b/pyprophet/report.py @@ -918,14 +918,19 @@ def plot_score_distributions(pdf, plotter, df, score_mapping): n_rows = (n_scores + n_cols - 1) // n_cols # Calculate needed rows fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows)) - fig.suptitle("Score Distributions", y=1.02, fontsize=14) - plt.subplots_adjust(hspace=0.4, wspace=0.3) - # Flatten axes array for easy iteration - if n_scores > 1: + # Ensure axes is always a flat array for consistent indexing + # plt.subplots returns different types based on grid size: + # - Single subplot (1,1): returns single Axes object + # - Single row/col (1,n) or (n,1): returns 1D array + # - Grid (m,n): returns 2D array + if not isinstance(axes, np.ndarray): + axes = np.array([axes]) + elif axes.ndim > 1: axes = axes.flatten() - else: - axes = [axes] # Make it iterable even for single plot + + fig.suptitle("Score Distributions", y=1.02, fontsize=14) + plt.subplots_adjust(hspace=0.4, wspace=0.3) for i, (base_key, base_dict) in enumerate(score_mapping.items()): score_col = base_dict["score"] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out index 5431328..c59ca7c 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out index 87ebae1..8f4829a 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1343_b4_1;1359_y3_1;1344_b5_1;1360_y4_1;1345_b... 969.0;36907.0;1426.0;6131.0;2071.0;11984.0;559... 10322.0;251772.0;9915.0;43365.0;15040.0;80527.... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR NaN NaN NaN 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR NaN NaN NaN 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR NaN NaN NaN 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR NaN NaN NaN 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1803_b3_1;1813_y3_1;1821_y8_2;1804_b4_1;1814_y... 20367.0;19321.0;4323.0;3974.0;17424.0;5191.0;1... 89094.0;85016.0;20487.0;17689.0;74968.0;25322.... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR NaN NaN NaN 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR NaN NaN NaN 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR NaN NaN NaN 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR NaN NaN NaN 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1343_b4_1;1359_y3_1;1344_b5_1;1360_y4_1;1345_b... 969.0;36907.0;1426.0;6131.0;2071.0;11984.0;559... 10322.0;251772.0;9915.0;43365.0;15040.0;80527.... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR NaN NaN NaN 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR NaN NaN NaN 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR NaN NaN NaN 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR NaN NaN NaN 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1803_b3_1;1813_y3_1;1821_y8_2;1804_b4_1;1814_y... 20367.0;19321.0;4323.0;3974.0;17424.0;5191.0;1... 89094.0;85016.0;20487.0;17689.0;74968.0;25322.... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR NaN NaN NaN 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR NaN NaN NaN 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR NaN NaN NaN 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR NaN NaN NaN 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out index 5431328..c59ca7c 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out index 3c532e1..9d5ced6 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out @@ -1,3 +1,3 @@ Empty DataFrame -Columns: [Charge, FullPeptideName, Intensity, ProteinName, RT, Sequence, aggr_prec_Peak_Apex, aggr_prec_Peak_Area, assay_iRT, assay_rt, d_score, decoy, delta_iRT, delta_rt, filename, iRT, id, leftWidth, m_score, m_score_protein_experiment_wide, m_score_protein_global, m_score_protein_run_specific, mz, peak_group_rank, rightWidth, run_id, transition_group_id] +Columns: [Charge, FullPeptideName, Intensity, ProteinName, RT, Sequence, aggr_prec_Peak_Apex, aggr_prec_Peak_Area, assay_iRT, assay_rt, d_score, decoy, delta_iRT, delta_rt, filename, iRT, id, leftWidth, m_score, m_score_protein_experiment_wide, m_score_protein_global, m_score_protein_run_specific, mz, peak_group_rank, pep, rightWidth, run_id, transition_group_id] Index: [] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out index b7ec461..b3014b4 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.00 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.00 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.20 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.30 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.00 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 YTSDPDVTSVGPSK(UniMod:259) 230755.0 AQUA4SWATH_HMLangeA 1802.63 YTSDPDVTSVGPSK 194786.2681 924585.00 1.5 1736.8945 5.6930 0 2.0327 65.7355 napedro_L120420_010_SW.mzXML.gz 3.5327 6243564357659176748 1787.0300 0.0033 0.0033 0.0033 0.0033 730.8534 1 1821.1700 -8670811102654834151 20 -96 2 YTSDPDVTSVGPSK(UniMod:259) 4683.0 AQUA4SWATH_HMLangeA 1727.20 YTSDPDVTSVGPSK 2721.4443 21561.90 1.5 1736.8978 3.6145 0 -0.1546 -9.6978 napedro_L120420_010_SW.mzXML.gz 1.3454 8621961886436053858 1711.9200 0.0091 0.0033 0.0033 0.0033 730.8534 2 1749.4700 -8670811102654834151 20 -97 2 YTSDPDVTSVGPSK(UniMod:259) 2042.0 AQUA4SWATH_HMLangeA 1846.77 YTSDPDVTSVGPSK 670.7683 6339.12 1.5 1736.8956 0.4186 0 3.3126 109.8744 napedro_L120420_010_SW.mzXML.gz 4.8126 -5596989166542619604 1834.8199 0.2507 0.0033 0.0033 0.0033 730.8534 3 1858.7200 -8670811102654834151 20 -98 2 YTSDPDVTSVGPSK(UniMod:259) 1801.0 AQUA4SWATH_HMLangeA 1657.80 YTSDPDVTSVGPSK 1076.2317 4819.60 1.5 1736.8914 0.2832 0 -2.1667 -79.0914 napedro_L120420_010_SW.mzXML.gz -0.6667 -2650714328790198942 1653.8800 0.4000 0.0033 0.0033 0.0033 730.8534 4 1681.2000 -8670811102654834151 20 -99 2 YTSDPDVTSVGPSK(UniMod:259) 32774.0 AQUA4SWATH_HMLangeA 1930.69 YTSDPDVTSVGPSK 76935.7696 571655.00 1.5 1736.8936 -5.7742 0 5.7460 193.7964 napedro_L120420_010_SW.mzXML.gz 7.2460 9040480247797844482 1906.5100 0.4692 0.0033 0.0033 0.0033 730.8534 5 1971.3800 -8670811102654834151 20 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.00 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.00 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.20 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.30 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.00 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 YTSDPDVTSVGPSK(UniMod:259) 230755.0 AQUA4SWATH_HMLangeA 1802.63 YTSDPDVTSVGPSK 194786.2681 924585.00 1.5 1736.8945 5.6930 0 2.0327 65.7355 napedro_L120420_010_SW.mzXML.gz 3.5327 6243564357659176748 1787.0300 0.0033 0.0033 0.0033 0.0033 730.8534 1 0.0031 1821.1700 -8670811102654834151 20 +96 2 YTSDPDVTSVGPSK(UniMod:259) 4683.0 AQUA4SWATH_HMLangeA 1727.20 YTSDPDVTSVGPSK 2721.4443 21561.90 1.5 1736.8978 3.6145 0 -0.1546 -9.6978 napedro_L120420_010_SW.mzXML.gz 1.3454 8621961886436053858 1711.9200 0.0091 0.0033 0.0033 0.0033 730.8534 2 0.1143 1749.4700 -8670811102654834151 20 +97 2 YTSDPDVTSVGPSK(UniMod:259) 2042.0 AQUA4SWATH_HMLangeA 1846.77 YTSDPDVTSVGPSK 670.7683 6339.12 1.5 1736.8956 0.4186 0 3.3126 109.8744 napedro_L120420_010_SW.mzXML.gz 4.8126 -5596989166542619604 1834.8199 0.2507 0.0033 0.0033 0.0033 730.8534 3 1.0000 1858.7200 -8670811102654834151 20 +98 2 YTSDPDVTSVGPSK(UniMod:259) 1801.0 AQUA4SWATH_HMLangeA 1657.80 YTSDPDVTSVGPSK 1076.2317 4819.60 1.5 1736.8914 0.2832 0 -2.1667 -79.0914 napedro_L120420_010_SW.mzXML.gz -0.6667 -2650714328790198942 1653.8800 0.4000 0.0033 0.0033 0.0033 730.8534 4 1.0000 1681.2000 -8670811102654834151 20 +99 2 YTSDPDVTSVGPSK(UniMod:259) 32774.0 AQUA4SWATH_HMLangeA 1930.69 YTSDPDVTSVGPSK 76935.7696 571655.00 1.5 1736.8936 -5.7742 0 5.7460 193.7964 napedro_L120420_010_SW.mzXML.gz 7.2460 9040480247797844482 1906.5100 0.4692 0.0033 0.0033 0.0033 730.8534 5 1.0000 1971.3800 -8670811102654834151 20 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out index 90381cb..fa31d5c 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out index b77b45e..f117c36 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out index 85769b2..2520c48 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 26 columns] +[100 rows x 27 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out index e74a991..20406b3 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out index 6a1fed7..f8f9d27 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out index 6a3c091..7540ce7 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out index dffb2b5..13284eb 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 26 columns] +[100 rows x 27 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out index fe1be09..e81a8b3 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out index d201015..7d752c0 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out index d26f2c0..795ea38 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out @@ -1,13 +1,13 @@ -Exported 3410 rows with 98 columns (no transition data) +Exported 3410 rows with 100 columns (no transition data) Score columns found: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] - ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE -0 None -85.0733 NaN 1923.17 483971408708572459 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -1 None -55.2126 NaN 1953.03 6854889104354289238 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -2 None 9.7944 NaN 2018.03 2696300170322160855 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -3 None -130.8641 NaN 1877.37 8207933629855485114 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -4 None -268.6805 NaN 1739.56 745237666153652118 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -5 None -6.0218 NaN 3084.15 1082368609638691369 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3110.6101 -8670811102654834151 1 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -6 None 228.9700 NaN 3319.15 -1344271892660954750 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3339.3301 -8670811102654834151 5 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -7 None -124.5660 NaN 2965.61 -4515618252120499488 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2994.5300 -8670811102654834151 3 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -8 None 28.5831 NaN 3118.76 -4044853666210028406 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3131.0901 -8670811102654834151 2 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -9 None -184.3073 NaN 2905.87 7439833196907350500 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2929.6699 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK + ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IM_leftWidth IM_rightWidth IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE +0 None -85.0733 NaN 1923.17 483971408708572459 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +1 None -55.2126 NaN 1953.03 6854889104354289238 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +2 None 9.7944 NaN 2018.03 2696300170322160855 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +3 None -130.8641 NaN 1877.37 8207933629855485114 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +4 None -268.6805 NaN 1739.56 745237666153652118 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +5 None -6.0218 NaN 3084.15 1082368609638691369 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3110.6101 -8670811102654834151 1 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +6 None 228.9700 NaN 3319.15 -1344271892660954750 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3339.3301 -8670811102654834151 5 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +7 None -124.5660 NaN 2965.61 -4515618252120499488 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2994.5300 -8670811102654834151 3 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +8 None 28.5831 NaN 3118.76 -4044853666210028406 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3131.0901 -8670811102654834151 2 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +9 None -184.3073 NaN 2905.87 7439833196907350500 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2929.6699 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out index 100164f..67a0288 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out @@ -1,13 +1,13 @@ -Exported 97964 rows with 98 columns +Exported 97964 rows with 100 columns Score columns found: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] - ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE -0 None -85.0733 NaN 1923.17 4.8397e+17 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1946.4600 -8.6708e+18 1.0 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -1 None -55.2126 NaN 1953.03 6.8549e+18 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1977.1899 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -2 None 9.7944 NaN 2018.03 2.6963e+18 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 2024.9800 -8.6708e+18 2.0 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -3 None -130.8641 NaN 1877.37 8.2079e+18 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1898.6700 -8.6708e+18 3.0 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -4 None -268.6805 NaN 1739.56 7.4524e+17 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1762.1100 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -5 None -6.0218 NaN 3084.15 1.0824e+18 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3110.6101 -8.6708e+18 1.0 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -6 None 228.9700 NaN 3319.15 -1.3443e+18 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3339.3301 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -7 None -124.5660 NaN 2965.61 -4.5156e+18 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2994.5300 -8.6708e+18 3.0 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -8 None 28.5831 NaN 3118.76 -4.0449e+18 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3131.0901 -8.6708e+18 2.0 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -9 None -184.3073 NaN 2905.87 7.4398e+18 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2929.6699 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK + ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IM_leftWidth IM_rightWidth IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE +0 None -85.0733 NaN 1923.17 4.8397e+17 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1946.4600 -8.6708e+18 1.0 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +1 None -55.2126 NaN 1953.03 6.8549e+18 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1977.1899 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +2 None 9.7944 NaN 2018.03 2.6963e+18 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 2024.9800 -8.6708e+18 2.0 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +3 None -130.8641 NaN 1877.37 8.2079e+18 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1898.6700 -8.6708e+18 3.0 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +4 None -268.6805 NaN 1739.56 7.4524e+17 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1762.1100 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +5 None -6.0218 NaN 3084.15 1.0824e+18 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3110.6101 -8.6708e+18 1.0 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +6 None 228.9700 NaN 3319.15 -1.3443e+18 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3339.3301 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +7 None -124.5660 NaN 2965.61 -4.5156e+18 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2994.5300 -8.6708e+18 3.0 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +8 None 28.5831 NaN 3118.76 -4.0449e+18 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3131.0901 -8.6708e+18 2.0 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +9 None -184.3073 NaN 2905.87 7.4398e+18 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2929.6699 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out index 0d886bc..7515c06 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out @@ -1,10 +1,10 @@ -Precursor data: 3410 rows with 79 columns +Precursor data: 3410 rows with 81 columns Transition data: 96259 rows with 23 columns Precursor score columns: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] Precursor data sample: - DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE UNMODIFIED_SEQUENCE -0 -85.0733 NaN 1923.17 483971408708572459 192394.8869 935372.0 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1898.67 GIGDWSDSK(UniMod:259) 7.0277 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -1 -55.2126 NaN 1953.03 6854889104354289238 5696.7273 45882.6 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1946.46 GIGDWSDSK(UniMod:259) 7.8936 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -2 9.7944 NaN 2018.03 2696300170322160855 17401.1825 95751.8 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 2001.09 GIGDWSDSK(UniMod:259) 9.7785 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -3 -130.8641 NaN 1877.37 8207933629855485114 6239.5198 48788.5 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1857.70 GIGDWSDSK(UniMod:259) 5.7000 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -4 -268.6805 NaN 1739.56 745237666153652118 6493.7774 66798.4 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1717.73 GIGDWSDSK(UniMod:259) 1.7038 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK + DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FILENAME GENE_DECOY GENE_ID GENE_NAME IM_leftWidth IM_rightWidth IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE UNMODIFIED_SEQUENCE +0 -85.0733 NaN 1923.17 483971408708572459 192394.8869 935372.0 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1898.67 GIGDWSDSK(UniMod:259) 7.0277 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +1 -55.2126 NaN 1953.03 6854889104354289238 5696.7273 45882.6 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1946.46 GIGDWSDSK(UniMod:259) 7.8936 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +2 9.7944 NaN 2018.03 2696300170322160855 17401.1825 95751.8 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 2001.09 GIGDWSDSK(UniMod:259) 9.7785 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +3 -130.8641 NaN 1877.37 8207933629855485114 6239.5198 48788.5 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1857.70 GIGDWSDSK(UniMod:259) 5.7000 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +4 -268.6805 NaN 1739.56 745237666153652118 6493.7774 66798.4 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1717.73 GIGDWSDSK(UniMod:259) 1.7038 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out index fe59cae..5ed4c2d 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out @@ -1,4 +1,4 @@ -Exported 97965 rows with 103 columns +Exported 97964 rows with 105 columns SCORE_IPF columns found: ['SCORE_IPF_PEP', 'SCORE_IPF_PRECURSOR_PEAKGROUP_PEP', 'SCORE_IPF_QVALUE'] Sample data with IPF scores: FEATURE_ID SCORE_IPF_PEP SCORE_IPF_PRECURSOR_PEAKGROUP_PEP SCORE_IPF_QVALUE