diff --git a/README.md b/README.md index 9039cb9..7484fc1 100644 --- a/README.md +++ b/README.md @@ -154,15 +154,15 @@ E.g. http://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/absolute #### Features to peptides ```asciidoc -ibaqpy features2peptides -p tests/PXD003947/PXD003947-feature.parquet -s tests/PXD003947/PXD003947.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output tests/PXD003947/PXD003947-peptides-norm.csv +ibaqpyc features2peptides -p PXD000000.ibaq.parquet -s PXD000000.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output PXD000000-peptides-norm.csv ``` ```asciidoc Usage: features2peptides.py [OPTIONS] Options: - -p, --parquet TEXT Parquet file import generated by quantms.io - -s, --sdrf TEXT SDRF file import generated by quantms + -p, --parquet TEXT iBAQ view generated by quantms.io + -s, --sdrf TEXT SDRF file for the experiment --min_aa INTEGER Minimum number of amino acids to filter peptides --min_unique INTEGER Minimum number of unique peptides to filter @@ -192,7 +192,7 @@ Options: #### Compute IBAQ/TPA ```asciidoc -ibaqpy peptides2protein -f Homo-sapiens-uniprot-reviewed-contaminants-decoy-202210.fasta -p PXD017834-peptides.csv -e Trypsin -n -t -r --ploidy 2 --cpc 200 --organism human --output PXD003947.tsv --verbose +ibaqpyc peptides2protein -f Homo-sapiens-uniprot-reviewed-contaminants-decoy-202210.fasta -p PXD017834-peptides.csv -e Trypsin -n -t -r --ploidy 2 --cpc 200 --organism human --output PXD003947.tsv --verbose ``` ```asciidoc @@ -223,7 +223,7 @@ Options: ### Citation -> Zheng P, Audain E, Webel H, Dai C, Klein J, Hitz MP, Sachsenberg T, Bai M, Perez-Riverol Y. ibaqpy: A scalable Python package for baseline quantification in proteomics leveraging SDRF metadata. bioRxiv 2025.02.08.637208; doi: https://doi.org/10.1101/2025.02.08.637208 +> Zheng P, Audain E, Webel H, Dai C, Klein J, Hitz MP, Sachsenberg T, Bai M, Perez-Riverol Y. Ibaqpy: A scalable Python package for baseline quantification in proteomics leveraging SDRF metadata. J Proteomics. 2025 Jun 15;317:105440. doi: https://doi.org/10.1016/j.jprot.2025.105440. Epub 2025 Apr 21. PMID: 40268243. Other relevant publications: diff --git a/ibaqpy/ibaq/peptide_normalization.py b/ibaqpy/ibaq/peptide_normalization.py index cd89668..ee0470b 100644 --- a/ibaqpy/ibaq/peptide_normalization.py +++ b/ibaqpy/ibaq/peptide_normalization.py @@ -208,6 +208,18 @@ def apply_initial_filtering(data_df: pd.DataFrame, min_aa: int) -> pd.DataFrame: data_df = data_df[(data_df["Condition"] != "Empty") | (data_df["Condition"].isnull())] + # "Run" is NA for reference files not found in the SDRF file. + if data_df[RUN].isna().any(): + + missing_files = data_df.loc[ + data_df[RUN].isna(), "Reference" + ].drop_duplicates().tolist() + + logger.warning( + f"Reference files {missing_files} are not present in the SDRF file. Skipping calculation." + ) + data_df.dropna(subset=[RUN], inplace=True) + # Filter peptides with less amino acids than min_aa (default: 7) data_df.loc[:, "len"] = data_df[PEPTIDE_CANONICAL].apply(len) data_df = data_df[data_df["len"] >= min_aa] diff --git a/ibaqpy/model/quantification_type.py b/ibaqpy/model/quantification_type.py index 4edf1d4..a14ea97 100644 --- a/ibaqpy/model/quantification_type.py +++ b/ibaqpy/model/quantification_type.py @@ -66,7 +66,9 @@ def classify( """ label_scheme = None - if len(labels) == 1 and any("label free" in s.lower() for s in labels): + if len(labels) == 1 and any( + keyword in s.lower() for s in labels for keyword in ["lfq", "label free"] + ): label_category = cls.LFQ elif any("tmt" in s.lower() for s in labels): @@ -96,7 +98,7 @@ def classify( else: raise ValueError( - f"Cannot infer labeling scheme from {labels}, only support label free, TMT and ITRAQ experiment!" + f"Cannot infer labeling scheme from {labels}, only support label free (or lfq), TMT and ITRAQ experiment!" ) return label_category, label_scheme