bigbio · ypriverol · Sep 25, 2025 · May 14, 2025 · Aug 27, 2025 · Aug 27, 2025
diff --git a/README.md b/README.md
@@ -154,15 +154,15 @@ E.g. http://ftp.pride.ebi.ac.uk/pub/databases/pride/resources/proteomes/absolute
 #### Features to peptides
 
 ```asciidoc
-ibaqpy features2peptides -p tests/PXD003947/PXD003947-feature.parquet -s tests/PXD003947/PXD003947.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output tests/PXD003947/PXD003947-peptides-norm.csv
+ibaqpyc features2peptides -p PXD000000.ibaq.parquet -s PXD000000.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output PXD000000-peptides-norm.csv
-ibaqpyc features2peptides -p PXD000000.ibaq.parquet -s PXD000000.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output PXD000000-peptides-norm.csv
+ibaqpy features2peptides -p PXD000000.ibaq.parquet -s PXD000000.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output PXD000000-peptides-norm.csv
-ibaqpyc features2peptides -p PXD000000.ibaq.parquet -s PXD000000.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output PXD000000-peptides-norm.csv
+ibaqpy features2peptides -p PXD000000.ibaq.parquet -s PXD000000.sdrf.tsv --remove_ids data/contaminants_ids.tsv --remove_decoy_contaminants --remove_low_frequency_peptides --output PXD000000-peptides-norm.csv
 ```
 
 ```asciidoc
 Usage: features2peptides.py [OPTIONS]
 
 Options:
-  -p, --parquet TEXT              Parquet file import generated by quantms.io
-  -s, --sdrf TEXT                 SDRF file import generated by quantms
+  -p, --parquet TEXT              iBAQ view generated by quantms.io
+  -s, --sdrf TEXT                 SDRF file for the experiment
   --min_aa INTEGER                Minimum number of amino acids to filter
                                   peptides
   --min_unique INTEGER            Minimum number of unique peptides to filter
@@ -192,7 +192,7 @@ Options:
 #### Compute IBAQ/TPA
 
 ```asciidoc
-ibaqpy peptides2protein -f Homo-sapiens-uniprot-reviewed-contaminants-decoy-202210.fasta -p PXD017834-peptides.csv -e Trypsin -n -t -r --ploidy 2 --cpc 200 --organism human --output PXD003947.tsv --verbose
+ibaqpyc peptides2protein -f Homo-sapiens-uniprot-reviewed-contaminants-decoy-202210.fasta -p PXD017834-peptides.csv -e Trypsin -n -t -r --ploidy 2 --cpc 200 --organism human --output PXD003947.tsv --verbose
 ```
 
 ```asciidoc
@@ -223,7 +223,7 @@ Options:
 
 ### Citation
 
-> Zheng P, Audain E, Webel H, Dai C, Klein J, Hitz MP, Sachsenberg T, Bai M, Perez-Riverol Y. ibaqpy: A scalable Python package for baseline quantification in proteomics leveraging SDRF metadata. bioRxiv 2025.02.08.637208; doi: https://doi.org/10.1101/2025.02.08.637208
+> Zheng P, Audain E, Webel H, Dai C, Klein J, Hitz MP, Sachsenberg T, Bai M, Perez-Riverol Y. Ibaqpy: A scalable Python package for baseline quantification in proteomics leveraging SDRF metadata. J Proteomics. 2025 Jun 15;317:105440. doi: https://doi.org/10.1016/j.jprot.2025.105440. Epub 2025 Apr 21. PMID: 40268243.
 
 Other relevant publications:
 

diff --git a/ibaqpy/ibaq/peptide_normalization.py b/ibaqpy/ibaq/peptide_normalization.py
@@ -208,6 +208,18 @@ def apply_initial_filtering(data_df: pd.DataFrame, min_aa: int) -> pd.DataFrame:
 
     data_df = data_df[(data_df["Condition"] != "Empty") | (data_df["Condition"].isnull())]
 
+    # "Run" is NA for reference files not found in the SDRF file.
+    if data_df[RUN].isna().any():
+
+        missing_files = data_df.loc[
+            data_df[RUN].isna(), "Reference"
+        ].drop_duplicates().tolist()
+
+        logger.warning(
+            f"Reference files {missing_files} are not present in the SDRF file. Skipping calculation."
+        )
+        data_df.dropna(subset=[RUN], inplace=True)
+
     # Filter peptides with less amino acids than min_aa (default: 7)
     data_df.loc[:, "len"] = data_df[PEPTIDE_CANONICAL].apply(len)
     data_df = data_df[data_df["len"] >= min_aa]

diff --git a/ibaqpy/model/quantification_type.py b/ibaqpy/model/quantification_type.py
@@ -66,7 +66,9 @@ def classify(
         """
         label_scheme = None
 
-        if len(labels) == 1 and any("label free" in s.lower() for s in labels):
+        if len(labels) == 1 and any(
+            keyword in s.lower() for s in labels for keyword in ["lfq", "label free"]
+        ):
             label_category = cls.LFQ
 
         elif any("tmt" in s.lower() for s in labels):
@@ -96,7 +98,7 @@ def classify(
 
         else:
             raise ValueError(
-                f"Cannot infer labeling scheme from {labels}, only support label free, TMT and ITRAQ experiment!"
+                f"Cannot infer labeling scheme from {labels}, only support label free (or lfq), TMT and ITRAQ experiment!"
             )
         return label_category, label_scheme