From b4f02f5c5bc6cf1f89c79b15f4d1bec23a7676a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 20:53:40 +0000 Subject: [PATCH 01/30] Initial plan From 499c5063f6856ca0d40bf55dba75b4a0a9be5424 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 20:56:55 +0000 Subject: [PATCH 02/30] Initial exploration of alignment integration requirement Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/scoring/_optimized.c | 726 +++++++++++++++++---------------- 1 file changed, 383 insertions(+), 343 deletions(-) diff --git a/pyprophet/scoring/_optimized.c b/pyprophet/scoring/_optimized.c index 91448a4e..e1df89f0 100644 --- a/pyprophet/scoring/_optimized.c +++ b/pyprophet/scoring/_optimized.c @@ -1,4 +1,4 @@ -/* Generated by Cython 3.1.2 */ +/* Generated by Cython 3.1.6 */ /* BEGIN: Cython Metadata { @@ -17,8 +17,16 @@ END: Cython Metadata */ #define PY_SSIZE_T_CLEAN #endif /* PY_SSIZE_T_CLEAN */ /* InitLimitedAPI */ -#if defined(Py_LIMITED_API) && !defined(CYTHON_LIMITED_API) +#if defined(Py_LIMITED_API) + #if !defined(CYTHON_LIMITED_API) #define CYTHON_LIMITED_API 1 + #endif +#elif defined(CYTHON_LIMITED_API) + #ifdef _MSC_VER + #pragma message ("Limited API usage is enabled with 'CYTHON_LIMITED_API' but 'Py_LIMITED_API' does not define a Python target version. Consider setting 'Py_LIMITED_API' instead.") + #else + #warning Limited API usage is enabled with 'CYTHON_LIMITED_API' but 'Py_LIMITED_API' does not define a Python target version. Consider setting 'Py_LIMITED_API' instead. + #endif #endif #include "Python.h" @@ -27,8 +35,8 @@ END: Cython Metadata */ #elif PY_VERSION_HEX < 0x03080000 #error Cython requires Python 3.8+. #else -#define __PYX_ABI_VERSION "3_1_2" -#define CYTHON_HEX_VERSION 0x030102F0 +#define __PYX_ABI_VERSION "3_1_6" +#define CYTHON_HEX_VERSION 0x030106F0 #define CYTHON_FUTURE_DIVISION 1 /* CModulePreamble */ #include @@ -391,6 +399,9 @@ END: Cython Metadata */ enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) }; #endif #endif +#ifndef CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME + #define CYTHON_LOCK_AND_GIL_DEADLOCK_AVOIDANCE_TIME 100 +#endif #ifndef __has_attribute #define __has_attribute(x) 0 #endif @@ -1295,6 +1306,7 @@ static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*); typedef sdigit __Pyx_compact_pylong; typedef digit __Pyx_compact_upylong; #endif + static CYTHON_INLINE int __Pyx_PyLong_CompactAsLong(PyObject *x, long *return_value); #if PY_VERSION_HEX >= 0x030C00A5 #define __Pyx_PyLong_Digits(x) (((PyLongObject*)x)->long_value.ob_digit) #else @@ -1371,7 +1383,7 @@ static const char *__pyx_filename; static const char* const __pyx_f[] = { "pyprophet/scoring/_optimized.pyx", "", - "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd", + "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd", "cpython/type.pxd", }; /* #### Code section: utility_code_proto_before_types ### */ @@ -1594,7 +1606,7 @@ typedef struct { /* #### Code section: numeric_typedefs ### */ -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":787 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":743 * # in Cython to enable them only on the right systems. * * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< @@ -1603,7 +1615,7 @@ typedef struct { */ typedef npy_int8 __pyx_t_5numpy_int8_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":788 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":744 * * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< @@ -1612,26 +1624,26 @@ typedef npy_int8 __pyx_t_5numpy_int8_t; */ typedef npy_int16 __pyx_t_5numpy_int16_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":789 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":745 * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< * ctypedef npy_int64 int64_t - * #ctypedef npy_int96 int96_t + * */ typedef npy_int32 __pyx_t_5numpy_int32_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":790 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":746 * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< - * #ctypedef npy_int96 int96_t - * #ctypedef npy_int128 int128_t + * + * ctypedef npy_uint8 uint8_t */ typedef npy_int64 __pyx_t_5numpy_int64_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":794 - * #ctypedef npy_int128 int128_t +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":748 + * ctypedef npy_int64 int64_t * * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< * ctypedef npy_uint16 uint16_t @@ -1639,7 +1651,7 @@ typedef npy_int64 __pyx_t_5numpy_int64_t; */ typedef npy_uint8 __pyx_t_5numpy_uint8_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":795 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":749 * * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< @@ -1648,26 +1660,26 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t; */ typedef npy_uint16 __pyx_t_5numpy_uint16_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":796 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":750 * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< * ctypedef npy_uint64 uint64_t - * #ctypedef npy_uint96 uint96_t + * */ typedef npy_uint32 __pyx_t_5numpy_uint32_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":797 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":751 * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< - * #ctypedef npy_uint96 uint96_t - * #ctypedef npy_uint128 uint128_t + * + * ctypedef npy_float32 float32_t */ typedef npy_uint64 __pyx_t_5numpy_uint64_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":801 - * #ctypedef npy_uint128 uint128_t +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":753 + * ctypedef npy_uint64 uint64_t * * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< * ctypedef npy_float64 float64_t @@ -1675,7 +1687,7 @@ typedef npy_uint64 __pyx_t_5numpy_uint64_t; */ typedef npy_float32 __pyx_t_5numpy_float32_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":802 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":754 * * ctypedef npy_float32 float32_t * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< @@ -1684,7 +1696,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t; */ typedef npy_float64 __pyx_t_5numpy_float64_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":809 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":761 * ctypedef double complex complex128_t * * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< @@ -1693,7 +1705,7 @@ typedef npy_float64 __pyx_t_5numpy_float64_t; */ typedef npy_longlong __pyx_t_5numpy_longlong_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":810 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":762 * * ctypedef npy_longlong longlong_t * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< @@ -1702,7 +1714,7 @@ typedef npy_longlong __pyx_t_5numpy_longlong_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":812 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":764 * ctypedef npy_ulonglong ulonglong_t * * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< @@ -1711,7 +1723,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; */ typedef npy_intp __pyx_t_5numpy_intp_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":813 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":765 * * ctypedef npy_intp intp_t * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< @@ -1720,7 +1732,7 @@ typedef npy_intp __pyx_t_5numpy_intp_t; */ typedef npy_uintp __pyx_t_5numpy_uintp_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":815 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":767 * ctypedef npy_uintp uintp_t * * ctypedef npy_double float_t # <<<<<<<<<<<<<< @@ -1729,7 +1741,7 @@ typedef npy_uintp __pyx_t_5numpy_uintp_t; */ typedef npy_double __pyx_t_5numpy_float_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":816 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":768 * * ctypedef npy_double float_t * ctypedef npy_double double_t # <<<<<<<<<<<<<< @@ -1738,7 +1750,7 @@ typedef npy_double __pyx_t_5numpy_float_t; */ typedef npy_double __pyx_t_5numpy_double_t; -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":817 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":769 * ctypedef npy_double float_t * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< @@ -2635,22 +2647,22 @@ static int __Pyx__DelItemOnTypeDict(PyTypeObject *tp, PyObject *k); static int __Pyx_setup_reduce(PyObject* type_obj); /* TypeImport.proto */ -#ifndef __PYX_HAVE_RT_ImportType_proto_3_1_2 -#define __PYX_HAVE_RT_ImportType_proto_3_1_2 +#ifndef __PYX_HAVE_RT_ImportType_proto_3_1_6 +#define __PYX_HAVE_RT_ImportType_proto_3_1_6 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L #include #endif #if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L -#define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) alignof(s) +#define __PYX_GET_STRUCT_ALIGNMENT_3_1_6(s) alignof(s) #else -#define __PYX_GET_STRUCT_ALIGNMENT_3_1_2(s) sizeof(void*) +#define __PYX_GET_STRUCT_ALIGNMENT_3_1_6(s) sizeof(void*) #endif -enum __Pyx_ImportType_CheckSize_3_1_2 { - __Pyx_ImportType_CheckSize_Error_3_1_2 = 0, - __Pyx_ImportType_CheckSize_Warn_3_1_2 = 1, - __Pyx_ImportType_CheckSize_Ignore_3_1_2 = 2 +enum __Pyx_ImportType_CheckSize_3_1_6 { + __Pyx_ImportType_CheckSize_Error_3_1_6 = 0, + __Pyx_ImportType_CheckSize_Warn_3_1_6 = 1, + __Pyx_ImportType_CheckSize_Ignore_3_1_6 = 2 }; -static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size); +static PyTypeObject *__Pyx_ImportType_3_1_6(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_6 check_size); #endif /* FetchSharedCythonModule.proto */ @@ -3418,7 +3430,7 @@ static const char __pyx_k_F_A_R_86_1_a_Rq_c_2T_c_AV1_Q_a[] = "\200\001\360\006\0 static const char __pyx_k_strided_and_direct_or_indirect[] = ""; static const char __pyx_k_8_q_Rq_F_RvRuF_A_q_r_uBa_V1A_e2[] = "\200\001\360\010\000\005\035\320\0348\270\007\270q\300\005\300R\300q\330\004\024\220F\230&\240\001\240\021\330\004\r\210R\210v\220R\220u\230F\240\"\240A\330\004 \240\001\330\004\027\220q\360\006\000\005\013\210%\210r\220\021\330\010\017\210u\220B\220a\330\010\r\210V\2201\220A\330\010\016\210e\2202\220R\220t\2306\240\021\240&\250\003\2501\330\014\024\220A\330\010\024\220A\220X\230V\2406\250\027\260\001\330\010\017\210q\330\017\024\220A\220Q\330\004\013\2101"; static const char __pyx_k_F_BfBe6_1_a_q_q_aq_U_6_q_fAQ_V1[] = "\200\001\360\006\000\005\025\220F\230&\240\001\240\021\330\004\014\210B\210f\220B\220e\2306\240\022\2401\330\004\036\230a\330\004!\240\026\240q\250\001\330\004\037\230q\330\004 \240\006\240a\240q\330\004\"\240!\360\010\000\005\t\210\005\210U\220!\2206\230\026\230q\240\001\330\010\016\210f\220A\220Q\330\010\r\210V\2201\220A\330\010\013\2104\210s\220!\330\014\034\230A\330\014\020\220\001\320\021!\240\021\330\014\037\230q\330\014\032\230!\330\014\033\2301\330\014\r\330\010\013\2103\210b\220\001\330\014\032\230!\330\014\033\2301\330\004\010\210\001\320\t\031\230\021\330\004\013\2101"; -static const char __pyx_k_IJ_E_q_m6_RvR_fBa_a_t1_E_aq_Q_1[] = "\200\001\340IJ\330\004\034\230E\240\026\240q\250\001\330\004\036\230m\2506\260\021\260!\330\004\r\210R\210v\220R\220\240f\250B\250a\330\004\036\230a\360\014\000\005\010\200t\2101\330\010\014\210E\220\025\220a\220q\330\014\023\220=\240\001\240\021\330\014\025\220Q\330\014\030\230\003\2301\230E\240\021\240#\240R\240q\330\014\020\220\005\220U\230!\2303\230a\330\020\027\220s\230!\2305\240\001\240\023\240B\240a\330\020\023\2205\230\002\230!\330\024 \240\001\330\024\035\230Q\330\014\020\220\001\220\025\220a\330\010\017\210q\340\004\021\220\037\240\001\240\021\330\004\010\210\005\210U\220!\2201\330\010\017\210}\230A\230Q\330\010\013\210;\220c\230\021\330\014\025\220Q\330\014\030\230\003\2301\230E\240\021\240#\240R\240q\330\014\020\220\005\220U\230!\2303\230a\330\020\027\220s\230!\2305\240\001\240\023\240B\240a\330\020\023\2205\230\002\230!\330\024 \240\001\330\024\035\230Q\330\r\030\230\003\2301\330\014\022\220!\330\014\023\220:\230R\230q\330\014\026\220a\330\014\017\210u\220A\220U\230#\230Q\330\020\031\230\021\330\021\026\220a\220v\230S\240\001\330\020\031\230\021\340\020\026\220d\230\"\230E\240\022\2401\330\024\033\2304\230r\240\026\240s\250!\330\024\027\220u\230A\230U\240#\240Q\330\030!\240\021\330\024\027\220u\230A\230U\240\"\240A\330\030\036\230a\340\030\037\230q\330\020\023\2207\230$\230a\330\024\027\220s\230!\2305\240\001\240\025\240b\250\006\250b\260\003\2601\260E\270\021\270&\300\002\300!\330\030!\240\021\340\030!\240\021\340\014\022\220'\230\022\2301\330\020\023\2205\230\001\230\027\240\002\240#\240S\250\005\250Q\250a\330\024\035\230W\240B\240a\340\024\025\340\014\022\220!\330\014\023\220:\230R\230q\330\014\026\220a\330\014\017\210u\220A\220U\230#\230Q\330\020\031\230\021\330\021\026\220a\220v\230S\240\001\330\020\031\230\021\340\020\026\220d\230\"\230E\240\022\2401\330\024\033\2304\230r\240\026\240s\250!\330\024\027\220u\230A\230U\240#\240Q\330\030!\240\021\330\030\031\330\024\027\220u\230A\230U\240\"\240A\330\030\036\230a\340\030\037\230q\330\020\023\2207\230$""\230a\330\024\027\220s\230!\2305\240\001\240\025\240b\250\006\250b\260\003\2601\260E\270\021\270&\300\002\300!\330\030!\240\021\340\030!\240\021\340\014\022\220'\230\022\2301\330\020\023\2205\230\001\230\027\240\002\240#\240S\250\005\250Q\250a\330\024\035\230W\240B\240a\340\024\025\340\010\014\210A\210U\220!\330\004\013\2101"; +static const char __pyx_k_IJ_E_q_m6_RvR_fBa_a_t1_E_aq_Q_1[] = "\200\001\340IJ\330\004\034\230E\240\026\240q\250\001\330\004\036\230m\2506\260\021\260!\330\004\r\210R\210v\220R\220\177\240f\250B\250a\330\004\036\230a\360\014\000\005\010\200t\2101\330\010\014\210E\220\025\220a\220q\330\014\023\220=\240\001\240\021\330\014\025\220Q\330\014\030\230\003\2301\230E\240\021\240#\240R\240q\330\014\020\220\005\220U\230!\2303\230a\330\020\027\220s\230!\2305\240\001\240\023\240B\240a\330\020\023\2205\230\002\230!\330\024 \240\001\330\024\035\230Q\330\014\020\220\001\220\025\220a\330\010\017\210q\340\004\021\220\037\240\001\240\021\330\004\010\210\005\210U\220!\2201\330\010\017\210}\230A\230Q\330\010\013\210;\220c\230\021\330\014\025\220Q\330\014\030\230\003\2301\230E\240\021\240#\240R\240q\330\014\020\220\005\220U\230!\2303\230a\330\020\027\220s\230!\2305\240\001\240\023\240B\240a\330\020\023\2205\230\002\230!\330\024 \240\001\330\024\035\230Q\330\r\030\230\003\2301\330\014\022\220!\330\014\023\220:\230R\230q\330\014\026\220a\330\014\017\210u\220A\220U\230#\230Q\330\020\031\230\021\330\021\026\220a\220v\230S\240\001\330\020\031\230\021\340\020\026\220d\230\"\230E\240\022\2401\330\024\033\2304\230r\240\026\240s\250!\330\024\027\220u\230A\230U\240#\240Q\330\030!\240\021\330\024\027\220u\230A\230U\240\"\240A\330\030\036\230a\340\030\037\230q\330\020\023\2207\230$\230a\330\024\027\220s\230!\2305\240\001\240\025\240b\250\006\250b\260\003\2601\260E\270\021\270&\300\002\300!\330\030!\240\021\340\030!\240\021\340\014\022\220'\230\022\2301\330\020\023\2205\230\001\230\027\240\002\240#\240S\250\005\250Q\250a\330\024\035\230W\240B\240a\340\024\025\340\014\022\220!\330\014\023\220:\230R\230q\330\014\026\220a\330\014\017\210u\220A\220U\230#\230Q\330\020\031\230\021\330\021\026\220a\220v\230S\240\001\330\020\031\230\021\340\020\026\220d\230\"\230E\240\022\2401\330\024\033\2304\230r\240\026\240s\250!\330\024\027\220u\230A\230U\240#\240Q\330\030!\240\021\330\030\031\330\024\027\220u\230A\230U\240\"\240A\330\030\036\230a\340\030\037\230q\330\020\023\2207""\230$\230a\330\024\027\220s\230!\2305\240\001\240\025\240b\250\006\250b\260\003\2601\260E\270\021\270&\300\002\300!\330\030!\240\021\340\030!\240\021\340\014\022\220'\230\022\2301\330\020\023\2205\230\001\230\027\240\002\240#\240S\250\005\250Q\250a\330\024\035\230W\240B\240a\340\024\025\340\010\014\210A\210U\220!\330\004\013\2101"; static const char __pyx_k_N_RvRq_fBa_1_Q_Ba_nAQ_Q_1E_Ba_B[] = "\200\001\360\014\000\005\025\220N\240&\250\001\250\021\360\006\000\005!\240\001\330\004\r\210R\210v\220R\220q\230\001\230\025\230f\240B\240a\330\004#\2401\360\010\000\005\026\220Q\360\006\000\005\t\210\001\330\004\n\210\"\210B\210a\330\010\016\210n\230A\230Q\330\010\r\210Q\340\004\013\2101\210E\220\023\220B\220a\360\010\000\005\t\210\001\330\004\n\210\"\210B\210a\330\010\016\210n\230A\230Q\330\010\014\210A\330\010\r\210Q\340\010\016\210b\220\002\220!\330\014\017\210r\220\023\220A\330\020\026\220n\240A\240Q\330\014\021\220\021\340\010\024\320\024%\240Q\240a\240u\250B\250a\340\010\017\210q\220\001\220\021\220%\220z\240\022\2401\330\010\r\210Q\360\006\000\005\t\210\001\330\004\n\210\"\210B\210a\210q\220\001\330\010\023\2207\230!\2301\330\010\r\210Q\360\006\000\005\t\210\001\330\004\n\210\"\210B\210a\210q\220\001\330\010\017\210q\220\005\220W\230A\230S\240\002\240!\330\010\r\210Q\340\004\013\2101"; static const char __pyx_k_All_dimensions_preceding_dimensi[] = "All dimensions preceding dimension %d must be indexed and not sliced"; static const char __pyx_k_Buffer_view_does_not_expose_stri[] = "Buffer view does not expose strides"; @@ -3958,7 +3970,7 @@ static int __pyx_array___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, P default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__cinit__", 0) < 0) __PYX_ERR(1, 129, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__cinit__", 0) < (0)) __PYX_ERR(1, 129, __pyx_L3_error) if (!values[3]) values[3] = __Pyx_NewRef(((PyObject *)__pyx_mstate_global->__pyx_n_u_c)); for (Py_ssize_t i = __pyx_nargs; i < 3; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 3, 5, i); __PYX_ERR(1, 129, __pyx_L3_error) } @@ -5572,7 +5584,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < 0) __PYX_ERR(1, 3, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < (0)) __PYX_ERR(1, 3, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, i); __PYX_ERR(1, 3, __pyx_L3_error) } } @@ -5917,7 +5929,7 @@ static struct __pyx_array_obj *__pyx_array_new(PyObject *__pyx_v_shape, Py_ssize __pyx_t_4 = 0; __pyx_t_4 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 273, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_t_4, __pyx_mstate_global->__pyx_n_u_allocate_buffer, Py_False) < 0) __PYX_ERR(1, 273, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_4, __pyx_mstate_global->__pyx_n_u_allocate_buffer, Py_False) < (0)) __PYX_ERR(1, 273, __pyx_L1_error) __pyx_t_3 = ((PyObject *)__pyx_tp_new_array(((PyTypeObject *)__pyx_mstate_global->__pyx_array_type), __pyx_t_1, __pyx_t_4)); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 273, __pyx_L1_error) __Pyx_GOTREF((PyObject *)__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -6012,7 +6024,7 @@ static int __pyx_MemviewEnum___init__(PyObject *__pyx_v_self, PyObject *__pyx_ar default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__init__", 0) < 0) __PYX_ERR(1, 302, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__init__", 0) < (0)) __PYX_ERR(1, 302, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, i); __PYX_ERR(1, 302, __pyx_L3_error) } } @@ -6449,7 +6461,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < 0) __PYX_ERR(1, 16, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < (0)) __PYX_ERR(1, 16, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, i); __PYX_ERR(1, 16, __pyx_L3_error) } } @@ -6574,7 +6586,7 @@ static int __pyx_memoryview___cinit__(PyObject *__pyx_v_self, PyObject *__pyx_ar default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__cinit__", 0) < 0) __PYX_ERR(1, 347, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__cinit__", 0) < (0)) __PYX_ERR(1, 347, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 2; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, i); __PYX_ERR(1, 347, __pyx_L3_error) } } @@ -10891,7 +10903,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < 0) __PYX_ERR(1, 3, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < (0)) __PYX_ERR(1, 3, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, i); __PYX_ERR(1, 3, __pyx_L3_error) } } @@ -13778,7 +13790,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < 0) __PYX_ERR(1, 3, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__setstate_cython__", 0) < (0)) __PYX_ERR(1, 3, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__setstate_cython__", 1, 1, 1, i); __PYX_ERR(1, 3, __pyx_L3_error) } } @@ -16937,7 +16949,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__pyx_unpickle_Enum", 0) < 0) __PYX_ERR(1, 1, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "__pyx_unpickle_Enum", 0) < (0)) __PYX_ERR(1, 1, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 3; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("__pyx_unpickle_Enum", 1, 3, 3, i); __PYX_ERR(1, 1, __pyx_L3_error) } } @@ -17259,7 +17271,7 @@ static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":286 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":242 * cdef int type_num * * @property # <<<<<<<<<<<<<< @@ -17270,7 +17282,7 @@ static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *__ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_Descr *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":288 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":244 * @property * cdef inline npy_intp itemsize(self) noexcept nogil: * return PyDataType_ELSIZE(self) # <<<<<<<<<<<<<< @@ -17280,7 +17292,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_D __pyx_r = PyDataType_ELSIZE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":286 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":242 * cdef int type_num * * @property # <<<<<<<<<<<<<< @@ -17293,7 +17305,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_D return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":290 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":246 * return PyDataType_ELSIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17304,7 +17316,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_D static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray_Descr *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":292 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":248 * @property * cdef inline npy_intp alignment(self) noexcept nogil: * return PyDataType_ALIGNMENT(self) # <<<<<<<<<<<<<< @@ -17314,7 +17326,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray __pyx_r = PyDataType_ALIGNMENT(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":290 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":246 * return PyDataType_ELSIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17327,7 +17339,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":296 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":252 * # Use fields/names with care as they may be NULL. You must check * # for this using PyDataType_HASFIELDS. * @property # <<<<<<<<<<<<<< @@ -17341,7 +17353,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Desc PyObject *__pyx_t_1; __Pyx_RefNannySetupContext("fields", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":298 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":254 * @property * cdef inline object fields(self): * return PyDataType_FIELDS(self) # <<<<<<<<<<<<<< @@ -17354,7 +17366,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Desc __pyx_r = ((PyObject *)__pyx_t_1); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":296 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":252 * # Use fields/names with care as they may be NULL. You must check * # for this using PyDataType_HASFIELDS. * @property # <<<<<<<<<<<<<< @@ -17369,7 +17381,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Desc return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":300 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":256 * return PyDataType_FIELDS(self) * * @property # <<<<<<<<<<<<<< @@ -17383,7 +17395,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr PyObject *__pyx_t_1; __Pyx_RefNannySetupContext("names", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":302 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":258 * @property * cdef inline tuple names(self): * return PyDataType_NAMES(self) # <<<<<<<<<<<<<< @@ -17396,7 +17408,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr __pyx_r = ((PyObject*)__pyx_t_1); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":300 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":256 * return PyDataType_FIELDS(self) * * @property # <<<<<<<<<<<<<< @@ -17411,7 +17423,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":307 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":263 * # valid (the pointer can be NULL). Most users should access * # this field via the inline helper method PyDataType_SHAPE. * @property # <<<<<<<<<<<<<< @@ -17422,7 +17434,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarray(PyArray_Descr *__pyx_v_self) { PyArray_ArrayDescr *__pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":309 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":265 * @property * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: * return PyDataType_SUBARRAY(self) # <<<<<<<<<<<<<< @@ -17432,7 +17444,7 @@ static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarra __pyx_r = PyDataType_SUBARRAY(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":307 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":263 * # valid (the pointer can be NULL). Most users should access * # this field via the inline helper method PyDataType_SHAPE. * @property # <<<<<<<<<<<<<< @@ -17445,7 +17457,7 @@ static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarra return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":311 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":267 * return PyDataType_SUBARRAY(self) * * @property # <<<<<<<<<<<<<< @@ -17456,7 +17468,7 @@ static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarra static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr *__pyx_v_self) { npy_uint64 __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":314 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":270 * cdef inline npy_uint64 flags(self) noexcept nogil: * """The data types flags.""" * return PyDataType_FLAGS(self) # <<<<<<<<<<<<<< @@ -17466,7 +17478,7 @@ static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr __pyx_r = PyDataType_FLAGS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":311 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":267 * return PyDataType_SUBARRAY(self) * * @property # <<<<<<<<<<<<<< @@ -17479,7 +17491,7 @@ static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":323 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":279 * ctypedef class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]: * * @property # <<<<<<<<<<<<<< @@ -17490,7 +17502,7 @@ static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMultiIterObject *__pyx_v_self) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":326 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":282 * cdef inline int numiter(self) noexcept nogil: * """The number of arrays that need to be broadcast to the same shape.""" * return PyArray_MultiIter_NUMITER(self) # <<<<<<<<<<<<<< @@ -17500,7 +17512,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMulti __pyx_r = PyArray_MultiIter_NUMITER(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":323 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":279 * ctypedef class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]: * * @property # <<<<<<<<<<<<<< @@ -17513,7 +17525,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMulti return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":328 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":284 * return PyArray_MultiIter_NUMITER(self) * * @property # <<<<<<<<<<<<<< @@ -17524,7 +17536,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMulti static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiIterObject *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":331 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":287 * cdef inline npy_intp size(self) noexcept nogil: * """The total broadcasted size.""" * return PyArray_MultiIter_SIZE(self) # <<<<<<<<<<<<<< @@ -17534,7 +17546,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiI __pyx_r = PyArray_MultiIter_SIZE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":328 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":284 * return PyArray_MultiIter_NUMITER(self) * * @property # <<<<<<<<<<<<<< @@ -17547,7 +17559,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiI return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":333 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":289 * return PyArray_MultiIter_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17558,7 +17570,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiI static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMultiIterObject *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":336 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":292 * cdef inline npy_intp index(self) noexcept nogil: * """The current (1-d) index into the broadcasted result.""" * return PyArray_MultiIter_INDEX(self) # <<<<<<<<<<<<<< @@ -17568,7 +17580,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMult __pyx_r = PyArray_MultiIter_INDEX(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":333 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":289 * return PyArray_MultiIter_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17581,7 +17593,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMult return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":338 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":294 * return PyArray_MultiIter_INDEX(self) * * @property # <<<<<<<<<<<<<< @@ -17592,7 +17604,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMult static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject *__pyx_v_self) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":341 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":297 * cdef inline int nd(self) noexcept nogil: * """The number of dimensions in the broadcasted result.""" * return PyArray_MultiIter_NDIM(self) # <<<<<<<<<<<<<< @@ -17602,7 +17614,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject __pyx_r = PyArray_MultiIter_NDIM(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":338 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":294 * return PyArray_MultiIter_INDEX(self) * * @property # <<<<<<<<<<<<<< @@ -17615,7 +17627,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":343 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":299 * return PyArray_MultiIter_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17626,7 +17638,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions(PyArrayMultiIterObject *__pyx_v_self) { npy_intp *__pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":346 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":302 * cdef inline npy_intp* dimensions(self) noexcept nogil: * """The shape of the broadcasted result.""" * return PyArray_MultiIter_DIMS(self) # <<<<<<<<<<<<<< @@ -17636,7 +17648,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions __pyx_r = PyArray_MultiIter_DIMS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":343 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":299 * return PyArray_MultiIter_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17649,7 +17661,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":348 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":304 * return PyArray_MultiIter_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17660,7 +17672,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiIterObject *__pyx_v_self) { void **__pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":352 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":308 * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. * On return, the iterators are adjusted for broadcasting.""" * return PyArray_MultiIter_ITERS(self) # <<<<<<<<<<<<<< @@ -17670,7 +17682,7 @@ static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiI __pyx_r = PyArray_MultiIter_ITERS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":348 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":304 * return PyArray_MultiIter_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17683,7 +17695,7 @@ static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiI return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":366 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":322 * # Instead, we use properties that map to the corresponding C-API functions. * * @property # <<<<<<<<<<<<<< @@ -17694,7 +17706,7 @@ static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiI static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self) { PyObject *__pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":370 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":326 * """Returns a borrowed reference to the object owning the data/memory. * """ * return PyArray_BASE(self) # <<<<<<<<<<<<<< @@ -17704,7 +17716,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject __pyx_r = PyArray_BASE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":366 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":322 * # Instead, we use properties that map to the corresponding C-API functions. * * @property # <<<<<<<<<<<<<< @@ -17717,7 +17729,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":372 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":328 * return PyArray_BASE(self) * * @property # <<<<<<<<<<<<<< @@ -17731,7 +17743,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray PyArray_Descr *__pyx_t_1; __Pyx_RefNannySetupContext("descr", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":376 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":332 * """Returns an owned reference to the dtype of the array. * """ * return PyArray_DESCR(self) # <<<<<<<<<<<<<< @@ -17744,7 +17756,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray __pyx_r = ((PyArray_Descr *)__pyx_t_1); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":372 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":328 * return PyArray_BASE(self) * * @property # <<<<<<<<<<<<<< @@ -17759,7 +17771,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":378 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":334 * return PyArray_DESCR(self) * * @property # <<<<<<<<<<<<<< @@ -17770,7 +17782,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":382 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":338 * """Returns the number of dimensions in the array. * """ * return PyArray_NDIM(self) # <<<<<<<<<<<<<< @@ -17780,7 +17792,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx __pyx_r = PyArray_NDIM(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":378 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":334 * return PyArray_DESCR(self) * * @property # <<<<<<<<<<<<<< @@ -17793,7 +17805,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":384 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":340 * return PyArray_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17804,7 +17816,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self) { npy_intp *__pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":390 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":346 * Can return NULL for 0-dimensional arrays. * """ * return PyArray_DIMS(self) # <<<<<<<<<<<<<< @@ -17814,7 +17826,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObjec __pyx_r = PyArray_DIMS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":384 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":340 * return PyArray_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17827,7 +17839,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObjec return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":392 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":348 * return PyArray_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17838,7 +17850,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObjec static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self) { npy_intp *__pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":397 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":353 * The number of elements matches the number of dimensions of the array (ndim). * """ * return PyArray_STRIDES(self) # <<<<<<<<<<<<<< @@ -17848,7 +17860,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayO __pyx_r = PyArray_STRIDES(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":392 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":348 * return PyArray_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17861,7 +17873,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayO return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":399 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":355 * return PyArray_STRIDES(self) * * @property # <<<<<<<<<<<<<< @@ -17872,7 +17884,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayO static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":403 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":359 * """Returns the total size (in number of elements) of the array. * """ * return PyArray_SIZE(self) # <<<<<<<<<<<<<< @@ -17882,7 +17894,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject * __pyx_r = PyArray_SIZE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":399 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":355 * return PyArray_STRIDES(self) * * @property # <<<<<<<<<<<<<< @@ -17895,7 +17907,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject * return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":405 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":361 * return PyArray_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17906,7 +17918,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject * static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self) { char *__pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":412 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":368 * of `PyArray_DATA()` instead, which returns a 'void*'. * """ * return PyArray_BYTES(self) # <<<<<<<<<<<<<< @@ -17916,7 +17928,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__p __pyx_r = PyArray_BYTES(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":405 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":361 * return PyArray_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17929,7 +17941,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__p return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":824 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":776 * ctypedef long double complex clongdouble_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -17946,7 +17958,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":825 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":777 * * cdef inline object PyArray_MultiIterNew1(a): * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< @@ -17954,13 +17966,13 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ * cdef inline object PyArray_MultiIterNew2(a, b): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 825, __pyx_L1_error) + __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 777, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":824 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":776 * ctypedef long double complex clongdouble_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -17979,7 +17991,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":827 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":779 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -17996,7 +18008,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":828 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":780 * * cdef inline object PyArray_MultiIterNew2(a, b): * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< @@ -18004,13 +18016,13 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ * cdef inline object PyArray_MultiIterNew3(a, b, c): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 828, __pyx_L1_error) + __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 780, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":827 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":779 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -18029,7 +18041,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":830 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":782 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -18046,7 +18058,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":831 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":783 * * cdef inline object PyArray_MultiIterNew3(a, b, c): * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< @@ -18054,13 +18066,13 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ * cdef inline object PyArray_MultiIterNew4(a, b, c, d): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 831, __pyx_L1_error) + __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 783, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":830 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":782 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -18079,7 +18091,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":833 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":785 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -18096,7 +18108,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":834 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":786 * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< @@ -18104,13 +18116,13 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 834, __pyx_L1_error) + __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 786, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":833 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":785 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -18129,7 +18141,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":836 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":788 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -18146,7 +18158,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":837 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":789 * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< @@ -18154,13 +18166,13 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ * cdef inline tuple PyDataType_SHAPE(dtype d): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 837, __pyx_L1_error) + __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 789, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":836 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":788 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -18179,7 +18191,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":839 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":791 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< @@ -18194,7 +18206,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ PyObject *__pyx_t_2; __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":840 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":792 * * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< @@ -18204,7 +18216,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ __pyx_t_1 = PyDataType_HASSUBARRAY(__pyx_v_d); if (__pyx_t_1) { - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":841 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":793 * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): * return d.subarray.shape # <<<<<<<<<<<<<< @@ -18217,7 +18229,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ __pyx_r = ((PyObject*)__pyx_t_2); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":840 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":792 * * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< @@ -18226,7 +18238,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ */ } - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":843 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":795 * return d.subarray.shape * else: * return () # <<<<<<<<<<<<<< @@ -18240,7 +18252,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ goto __pyx_L0; } - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":839 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":791 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< @@ -18255,7 +18267,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1035 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":994 * int _import_umath() except -1 * * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< @@ -18269,7 +18281,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a const char *__pyx_filename = NULL; int __pyx_clineno = 0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1036 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":995 * * cdef inline void set_array_base(ndarray arr, object base) except *: * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<< @@ -18278,16 +18290,16 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_INCREF(__pyx_v_base); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1037 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":996 * cdef inline void set_array_base(ndarray arr, object base) except *: * Py_INCREF(base) # important to do this before stealing the reference below! * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<< * * cdef inline object get_array_base(ndarray arr): */ - __pyx_t_1 = PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(2, 1037, __pyx_L1_error) + __pyx_t_1 = PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(2, 996, __pyx_L1_error) - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1035 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":994 * int _import_umath() except -1 * * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< @@ -18302,7 +18314,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __pyx_L0:; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1039 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":998 * PyArray_SetBaseObject(arr, base) * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -18317,7 +18329,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py int __pyx_t_1; __Pyx_RefNannySetupContext("get_array_base", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1040 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":999 * * cdef inline object get_array_base(ndarray arr): * base = PyArray_BASE(arr) # <<<<<<<<<<<<<< @@ -18326,7 +18338,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py */ __pyx_v_base = PyArray_BASE(__pyx_v_arr); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1041 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1000 * cdef inline object get_array_base(ndarray arr): * base = PyArray_BASE(arr) * if base is NULL: # <<<<<<<<<<<<<< @@ -18336,7 +18348,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_t_1 = (__pyx_v_base == NULL); if (__pyx_t_1) { - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1042 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1001 * base = PyArray_BASE(arr) * if base is NULL: * return None # <<<<<<<<<<<<<< @@ -18347,7 +18359,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1041 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1000 * cdef inline object get_array_base(ndarray arr): * base = PyArray_BASE(arr) * if base is NULL: # <<<<<<<<<<<<<< @@ -18356,7 +18368,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py */ } - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1043 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1002 * if base is NULL: * return None * return base # <<<<<<<<<<<<<< @@ -18368,7 +18380,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_r = ((PyObject *)__pyx_v_base); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1039 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":998 * PyArray_SetBaseObject(arr, base) * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -18383,7 +18395,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1047 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1006 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -18410,7 +18422,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("import_array", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1048 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -18426,16 +18438,16 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1049 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1008 * cdef inline int import_array() except -1: * try: * __pyx_import_array() # <<<<<<<<<<<<<< * except Exception: * raise ImportError("numpy._core.multiarray failed to import") */ - __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1049, __pyx_L3_error) + __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1008, __pyx_L3_error) - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1048 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -18449,7 +18461,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1050 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1009 * try: * __pyx_import_array() * except Exception: # <<<<<<<<<<<<<< @@ -18459,12 +18471,12 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(((PyTypeObject*)PyExc_Exception)))); if (__pyx_t_4) { __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(2, 1050, __pyx_L5_except_error) + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(2, 1009, __pyx_L5_except_error) __Pyx_XGOTREF(__pyx_t_5); __Pyx_XGOTREF(__pyx_t_6); __Pyx_XGOTREF(__pyx_t_7); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1051 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1010 * __pyx_import_array() * except Exception: * raise ImportError("numpy._core.multiarray failed to import") # <<<<<<<<<<<<<< @@ -18480,16 +18492,16 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __pyx_t_8 = __Pyx_PyObject_FastCall(__pyx_t_10, __pyx_callargs+__pyx_t_11, (2-__pyx_t_11) | (__pyx_t_11*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)); __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 1051, __pyx_L5_except_error) + if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 1010, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_8); } __Pyx_Raise(__pyx_t_8, 0, 0, 0); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __PYX_ERR(2, 1051, __pyx_L5_except_error) + __PYX_ERR(2, 1010, __pyx_L5_except_error) } goto __pyx_L5_except_error; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1048 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -18505,7 +18517,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __pyx_L8_try_end:; } - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1047 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1006 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -18530,7 +18542,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1053 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1012 * raise ImportError("numpy._core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -18557,7 +18569,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("import_umath", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1054 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -18573,16 +18585,16 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1055 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1014 * cdef inline int import_umath() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< * except Exception: * raise ImportError("numpy._core.umath failed to import") */ - __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1055, __pyx_L3_error) + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1014, __pyx_L3_error) - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1054 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -18596,7 +18608,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1056 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1015 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -18606,12 +18618,12 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(((PyTypeObject*)PyExc_Exception)))); if (__pyx_t_4) { __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(2, 1056, __pyx_L5_except_error) + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(2, 1015, __pyx_L5_except_error) __Pyx_XGOTREF(__pyx_t_5); __Pyx_XGOTREF(__pyx_t_6); __Pyx_XGOTREF(__pyx_t_7); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1057 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1016 * _import_umath() * except Exception: * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< @@ -18627,16 +18639,16 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __pyx_t_8 = __Pyx_PyObject_FastCall(__pyx_t_10, __pyx_callargs+__pyx_t_11, (2-__pyx_t_11) | (__pyx_t_11*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)); __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 1057, __pyx_L5_except_error) + if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 1016, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_8); } __Pyx_Raise(__pyx_t_8, 0, 0, 0); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __PYX_ERR(2, 1057, __pyx_L5_except_error) + __PYX_ERR(2, 1016, __pyx_L5_except_error) } goto __pyx_L5_except_error; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1054 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -18652,7 +18664,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __pyx_L8_try_end:; } - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1053 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1012 * raise ImportError("numpy._core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -18677,7 +18689,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1059 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1018 * raise ImportError("numpy._core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -18704,7 +18716,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("import_ufunc", 0); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1060 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -18720,16 +18732,16 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1061 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1020 * cdef inline int import_ufunc() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< * except Exception: * raise ImportError("numpy._core.umath failed to import") */ - __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1061, __pyx_L3_error) + __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1020, __pyx_L3_error) - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1060 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -18743,7 +18755,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1062 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1021 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -18753,12 +18765,12 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(((PyTypeObject*)PyExc_Exception)))); if (__pyx_t_4) { __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename); - if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(2, 1062, __pyx_L5_except_error) + if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(2, 1021, __pyx_L5_except_error) __Pyx_XGOTREF(__pyx_t_5); __Pyx_XGOTREF(__pyx_t_6); __Pyx_XGOTREF(__pyx_t_7); - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1063 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1022 * _import_umath() * except Exception: * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< @@ -18774,16 +18786,16 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __pyx_t_8 = __Pyx_PyObject_FastCall(__pyx_t_10, __pyx_callargs+__pyx_t_11, (2-__pyx_t_11) | (__pyx_t_11*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET)); __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 1063, __pyx_L5_except_error) + if (unlikely(!__pyx_t_8)) __PYX_ERR(2, 1022, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_8); } __Pyx_Raise(__pyx_t_8, 0, 0, 0); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __PYX_ERR(2, 1063, __pyx_L5_except_error) + __PYX_ERR(2, 1022, __pyx_L5_except_error) } goto __pyx_L5_except_error; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1060 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -18799,7 +18811,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __pyx_L8_try_end:; } - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1059 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1018 * raise ImportError("numpy._core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -18824,7 +18836,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1066 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1025 * * * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18835,7 +18847,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1078 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1037 * bool * """ * return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) # <<<<<<<<<<<<<< @@ -18845,7 +18857,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_ __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type)); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1066 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1025 * * * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18858,7 +18870,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1081 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1040 * * * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18869,7 +18881,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1093 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1052 * bool * """ * return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) # <<<<<<<<<<<<<< @@ -18879,7 +18891,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_o __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type)); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1081 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1040 * * * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18892,7 +18904,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_o return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1096 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1055 * * * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18903,7 +18915,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_o static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) { npy_datetime __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1103 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1062 * also needed. That can be found using `get_datetime64_unit`. * """ * return (obj).obval # <<<<<<<<<<<<<< @@ -18913,7 +18925,7 @@ static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject * __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1096 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1055 * * * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18926,7 +18938,7 @@ static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject * return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1106 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1065 * * * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18937,7 +18949,7 @@ static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject * static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) { npy_timedelta __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1110 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1069 * returns the int64 value underlying scalar numpy timedelta64 object * """ * return (obj).obval # <<<<<<<<<<<<<< @@ -18947,7 +18959,7 @@ static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1106 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1065 * * * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18960,7 +18972,7 @@ static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject return __pyx_r; } -/* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1113 +/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1072 * * * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18971,7 +18983,7 @@ static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) { NPY_DATETIMEUNIT __pyx_r; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1117 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1076 * returns the unit part of the dtype for a numpy datetime64 object. * """ * return (obj).obmeta.base # <<<<<<<<<<<<<< @@ -18981,7 +18993,7 @@ static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObjec __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-0mzx3goa/overlay/lib/python3.10/site-packages/numpy/__init__.cython-30.pxd":1113 + /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1072 * * * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -19062,7 +19074,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "find_nearest_matches", 0) < 0) __PYX_ERR(0, 12, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "find_nearest_matches", 0) < (0)) __PYX_ERR(0, 12, __pyx_L3_error) if (!values[2]) values[2] = __Pyx_NewRef(((PyObject *)((PyObject*)__pyx_mstate_global->__pyx_int_1))); for (Py_ssize_t i = __pyx_nargs; i < 2; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("find_nearest_matches", 0, 2, 3, i); __PYX_ERR(0, 12, __pyx_L3_error) } @@ -19215,7 +19227,7 @@ static PyObject *__pyx_pf_9pyprophet_7scoring_10_optimized_find_nearest_matches( PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_2, __pyx_t_5}; __pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < 0) __PYX_ERR(0, 17, __pyx_L1_error) + if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 17, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_4, __pyx_callargs+__pyx_t_7, (2-__pyx_t_7) | (__pyx_t_7*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; @@ -20579,7 +20591,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "count_num_positives", 0) < 0) __PYX_ERR(0, 135, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "count_num_positives", 0) < (0)) __PYX_ERR(0, 135, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 1; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("count_num_positives", 1, 1, 1, i); __PYX_ERR(0, 135, __pyx_L3_error) } } @@ -20697,7 +20709,7 @@ static PyObject *__pyx_pf_9pyprophet_7scoring_10_optimized_2count_num_positives( PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_2, __pyx_t_3}; __pyx_t_5 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 141, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_5, __pyx_callargs+2, 0) < 0) __PYX_ERR(0, 141, __pyx_L1_error) + if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_5, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 141, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_4, __pyx_callargs+__pyx_t_7, (2-__pyx_t_7) | (__pyx_t_7*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_5); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -20915,7 +20927,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "find_top_ranked", 0) < 0) __PYX_ERR(0, 154, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "find_top_ranked", 0) < (0)) __PYX_ERR(0, 154, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 2; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("find_top_ranked", 1, 2, 2, i); __PYX_ERR(0, 154, __pyx_L3_error) } } @@ -21038,7 +21050,7 @@ static PyObject *__pyx_pf_9pyprophet_7scoring_10_optimized_4find_top_ranked(CYTH PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_2, __pyx_t_5}; __pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 158, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < 0) __PYX_ERR(0, 158, __pyx_L1_error) + if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 158, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_4, __pyx_callargs+__pyx_t_7, (2-__pyx_t_7) | (__pyx_t_7*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; @@ -21548,7 +21560,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "rank", 0) < 0) __PYX_ERR(0, 201, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "rank", 0) < (0)) __PYX_ERR(0, 201, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 2; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("rank", 1, 2, 2, i); __PYX_ERR(0, 201, __pyx_L3_error) } } @@ -21674,7 +21686,7 @@ static PyObject *__pyx_pf_9pyprophet_7scoring_10_optimized_6rank(CYTHON_UNUSED P PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_2, __pyx_t_5}; __pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 207, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < 0) __PYX_ERR(0, 207, __pyx_L1_error) + if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 207, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_4, __pyx_callargs+__pyx_t_7, (2-__pyx_t_7) | (__pyx_t_7*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; @@ -21900,7 +21912,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "rank32", 0) < 0) __PYX_ERR(0, 223, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "rank32", 0) < (0)) __PYX_ERR(0, 223, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 2; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("rank32", 1, 2, 2, i); __PYX_ERR(0, 223, __pyx_L3_error) } } @@ -22026,7 +22038,7 @@ static PyObject *__pyx_pf_9pyprophet_7scoring_10_optimized_8rank32(CYTHON_UNUSED PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_2, __pyx_t_5}; __pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 229, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < 0) __PYX_ERR(0, 229, __pyx_L1_error) + if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 229, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_4, __pyx_callargs+__pyx_t_7, (2-__pyx_t_7) | (__pyx_t_7*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; @@ -22257,7 +22269,7 @@ PyObject *__pyx_args, PyObject *__pyx_kwds default: goto __pyx_L5_argtuple_error; } const Py_ssize_t kwd_pos_args = __pyx_nargs; - if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "single_chromatogram_hypothesis_fast", 0) < 0) __PYX_ERR(0, 245, __pyx_L3_error) + if (__Pyx_ParseKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values, kwd_pos_args, __pyx_kwds_len, "single_chromatogram_hypothesis_fast", 0) < (0)) __PYX_ERR(0, 245, __pyx_L3_error) for (Py_ssize_t i = __pyx_nargs; i < 3; i++) { if (unlikely(!values[i])) { __Pyx_RaiseArgtupleInvalid("single_chromatogram_hypothesis_fast", 1, 3, 3, i); __PYX_ERR(0, 245, __pyx_L3_error) } } @@ -22387,7 +22399,7 @@ static PyObject *__pyx_pf_9pyprophet_7scoring_10_optimized_10single_chromatogram PyObject *__pyx_callargs[2 + ((CYTHON_VECTORCALL) ? 1 : 0)] = {__pyx_t_2, __pyx_t_5}; __pyx_t_3 = __Pyx_MakeVectorcallBuilderKwds(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 255, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < 0) __PYX_ERR(0, 255, __pyx_L1_error) + if (__Pyx_VectorcallBuilder_AddArg(__pyx_mstate_global->__pyx_n_u_dtype, __pyx_t_6, __pyx_t_3, __pyx_callargs+2, 0) < (0)) __PYX_ERR(0, 255, __pyx_L1_error) __pyx_t_1 = __Pyx_Object_Vectorcall_CallFromBuilder(__pyx_t_4, __pyx_callargs+__pyx_t_7, (2-__pyx_t_7) | (__pyx_t_7*__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET), __pyx_t_3); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; @@ -23688,35 +23700,35 @@ static int __Pyx_modinit_type_init_code(__pyx_mstatetype *__pyx_mstate) { #else #warning "The buffer protocol is not supported in the Limited C-API < 3.11." #endif - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_array_spec, __pyx_mstate->__pyx_array_type) < 0) __PYX_ERR(1, 110, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_array_spec, __pyx_mstate->__pyx_array_type) < (0)) __PYX_ERR(1, 110, __pyx_L1_error) #else __pyx_mstate->__pyx_array_type = &__pyx_type___pyx_array; #endif #if !CYTHON_COMPILING_IN_LIMITED_API #endif #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_array_type) < 0) __PYX_ERR(1, 110, __pyx_L1_error) + if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_array_type) < (0)) __PYX_ERR(1, 110, __pyx_L1_error) #endif - if (__Pyx_SetVtable(__pyx_mstate->__pyx_array_type, __pyx_vtabptr_array) < 0) __PYX_ERR(1, 110, __pyx_L1_error) - if (__Pyx_MergeVtables(__pyx_mstate->__pyx_array_type) < 0) __PYX_ERR(1, 110, __pyx_L1_error) - if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_array_type) < 0) __PYX_ERR(1, 110, __pyx_L1_error) + if (__Pyx_SetVtable(__pyx_mstate->__pyx_array_type, __pyx_vtabptr_array) < (0)) __PYX_ERR(1, 110, __pyx_L1_error) + if (__Pyx_MergeVtables(__pyx_mstate->__pyx_array_type) < (0)) __PYX_ERR(1, 110, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_array_type) < (0)) __PYX_ERR(1, 110, __pyx_L1_error) #if CYTHON_USE_TYPE_SPECS __pyx_mstate->__pyx_MemviewEnum_type = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type___pyx_MemviewEnum_spec, NULL); if (unlikely(!__pyx_mstate->__pyx_MemviewEnum_type)) __PYX_ERR(1, 299, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_MemviewEnum_spec, __pyx_mstate->__pyx_MemviewEnum_type) < 0) __PYX_ERR(1, 299, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_MemviewEnum_spec, __pyx_mstate->__pyx_MemviewEnum_type) < (0)) __PYX_ERR(1, 299, __pyx_L1_error) #else __pyx_mstate->__pyx_MemviewEnum_type = &__pyx_type___pyx_MemviewEnum; #endif #if !CYTHON_COMPILING_IN_LIMITED_API #endif #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_MemviewEnum_type) < 0) __PYX_ERR(1, 299, __pyx_L1_error) + if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_MemviewEnum_type) < (0)) __PYX_ERR(1, 299, __pyx_L1_error) #endif #if !CYTHON_COMPILING_IN_LIMITED_API if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_mstate->__pyx_MemviewEnum_type->tp_dictoffset && __pyx_mstate->__pyx_MemviewEnum_type->tp_getattro == PyObject_GenericGetAttr)) { __pyx_mstate->__pyx_MemviewEnum_type->tp_getattro = PyObject_GenericGetAttr; } #endif - if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_MemviewEnum_type) < 0) __PYX_ERR(1, 299, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_MemviewEnum_type) < (0)) __PYX_ERR(1, 299, __pyx_L1_error) __pyx_vtabptr_memoryview = &__pyx_vtable_memoryview; __pyx_vtable_memoryview.get_item_pointer = (char *(*)(struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_get_item_pointer; __pyx_vtable_memoryview.is_slice = (PyObject *(*)(struct __pyx_memoryview_obj *, PyObject *))__pyx_memoryview_is_slice; @@ -23740,23 +23752,23 @@ static int __Pyx_modinit_type_init_code(__pyx_mstatetype *__pyx_mstate) { #else #warning "The buffer protocol is not supported in the Limited C-API < 3.11." #endif - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_memoryview_spec, __pyx_mstate->__pyx_memoryview_type) < 0) __PYX_ERR(1, 334, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_memoryview_spec, __pyx_mstate->__pyx_memoryview_type) < (0)) __PYX_ERR(1, 334, __pyx_L1_error) #else __pyx_mstate->__pyx_memoryview_type = &__pyx_type___pyx_memoryview; #endif #if !CYTHON_COMPILING_IN_LIMITED_API #endif #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_memoryview_type) < 0) __PYX_ERR(1, 334, __pyx_L1_error) + if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_memoryview_type) < (0)) __PYX_ERR(1, 334, __pyx_L1_error) #endif #if !CYTHON_COMPILING_IN_LIMITED_API if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_mstate->__pyx_memoryview_type->tp_dictoffset && __pyx_mstate->__pyx_memoryview_type->tp_getattro == PyObject_GenericGetAttr)) { __pyx_mstate->__pyx_memoryview_type->tp_getattro = PyObject_GenericGetAttr; } #endif - if (__Pyx_SetVtable(__pyx_mstate->__pyx_memoryview_type, __pyx_vtabptr_memoryview) < 0) __PYX_ERR(1, 334, __pyx_L1_error) - if (__Pyx_MergeVtables(__pyx_mstate->__pyx_memoryview_type) < 0) __PYX_ERR(1, 334, __pyx_L1_error) - if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_memoryview_type) < 0) __PYX_ERR(1, 334, __pyx_L1_error) + if (__Pyx_SetVtable(__pyx_mstate->__pyx_memoryview_type, __pyx_vtabptr_memoryview) < (0)) __PYX_ERR(1, 334, __pyx_L1_error) + if (__Pyx_MergeVtables(__pyx_mstate->__pyx_memoryview_type) < (0)) __PYX_ERR(1, 334, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_memoryview_type) < (0)) __PYX_ERR(1, 334, __pyx_L1_error) __pyx_vtabptr__memoryviewslice = &__pyx_vtable__memoryviewslice; __pyx_vtable__memoryviewslice.__pyx_base = *__pyx_vtabptr_memoryview; __pyx_vtable__memoryviewslice.__pyx_base.convert_item_to_object = (PyObject *(*)(struct __pyx_memoryview_obj *, char *))__pyx_memoryviewslice_convert_item_to_object; @@ -23768,7 +23780,7 @@ static int __Pyx_modinit_type_init_code(__pyx_mstatetype *__pyx_mstate) { __pyx_mstate->__pyx_memoryviewslice_type = (PyTypeObject *) __Pyx_PyType_FromModuleAndSpec(__pyx_m, &__pyx_type___pyx_memoryviewslice_spec, __pyx_t_1); __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; if (unlikely(!__pyx_mstate->__pyx_memoryviewslice_type)) __PYX_ERR(1, 950, __pyx_L1_error) - if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_memoryviewslice_spec, __pyx_mstate->__pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 950, __pyx_L1_error) + if (__Pyx_fix_up_extension_type_from_spec(&__pyx_type___pyx_memoryviewslice_spec, __pyx_mstate->__pyx_memoryviewslice_type) < (0)) __PYX_ERR(1, 950, __pyx_L1_error) #else __pyx_mstate->__pyx_memoryviewslice_type = &__pyx_type___pyx_memoryviewslice; #endif @@ -23776,16 +23788,16 @@ static int __Pyx_modinit_type_init_code(__pyx_mstatetype *__pyx_mstate) { __pyx_mstate_global->__pyx_memoryviewslice_type->tp_base = __pyx_mstate_global->__pyx_memoryview_type; #endif #if !CYTHON_USE_TYPE_SPECS - if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 950, __pyx_L1_error) + if (__Pyx_PyType_Ready(__pyx_mstate->__pyx_memoryviewslice_type) < (0)) __PYX_ERR(1, 950, __pyx_L1_error) #endif #if !CYTHON_COMPILING_IN_LIMITED_API if ((CYTHON_USE_TYPE_SLOTS && CYTHON_USE_PYTYPE_LOOKUP) && likely(!__pyx_mstate->__pyx_memoryviewslice_type->tp_dictoffset && __pyx_mstate->__pyx_memoryviewslice_type->tp_getattro == PyObject_GenericGetAttr)) { __pyx_mstate->__pyx_memoryviewslice_type->tp_getattro = PyObject_GenericGetAttr; } #endif - if (__Pyx_SetVtable(__pyx_mstate->__pyx_memoryviewslice_type, __pyx_vtabptr__memoryviewslice) < 0) __PYX_ERR(1, 950, __pyx_L1_error) - if (__Pyx_MergeVtables(__pyx_mstate->__pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 950, __pyx_L1_error) - if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_memoryviewslice_type) < 0) __PYX_ERR(1, 950, __pyx_L1_error) + if (__Pyx_SetVtable(__pyx_mstate->__pyx_memoryviewslice_type, __pyx_vtabptr__memoryviewslice) < (0)) __PYX_ERR(1, 950, __pyx_L1_error) + if (__Pyx_MergeVtables(__pyx_mstate->__pyx_memoryviewslice_type) < (0)) __PYX_ERR(1, 950, __pyx_L1_error) + if (__Pyx_setup_reduce((PyObject *) __pyx_mstate->__pyx_memoryviewslice_type) < (0)) __PYX_ERR(1, 950, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -23805,153 +23817,153 @@ static int __Pyx_modinit_type_import_code(__pyx_mstatetype *__pyx_mstate) { /*--- Type import code ---*/ __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(3, 9, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_2(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", + __pyx_mstate->__pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_1_6(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyTypeObject), + sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyTypeObject), #elif CYTHON_COMPILING_IN_LIMITED_API 0, 0, #else - sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyHeapTypeObject), + sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyHeapTypeObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(3, 9, __pyx_L1_error) + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_7cpython_4type_type) __PYX_ERR(3, 9, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 272, __pyx_L1_error) + __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 228, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_mstate->__pyx_ptype_5numpy_dtype = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "dtype", + __pyx_mstate->__pyx_ptype_5numpy_dtype = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "dtype", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArray_Descr), + sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArray_Descr), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArray_Descr), + sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArray_Descr), #else - sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArray_Descr), + sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArray_Descr), #endif - __Pyx_ImportType_CheckSize_Ignore_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_dtype) __PYX_ERR(2, 272, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_flatiter = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "flatiter", + __Pyx_ImportType_CheckSize_Ignore_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_dtype) __PYX_ERR(2, 228, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_flatiter = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "flatiter", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayIterObject), + sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayIterObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayIterObject), + sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayIterObject), #else - sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayIterObject), + sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayIterObject), #endif - __Pyx_ImportType_CheckSize_Ignore_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_flatiter) __PYX_ERR(2, 317, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_broadcast = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "broadcast", + __Pyx_ImportType_CheckSize_Ignore_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_flatiter) __PYX_ERR(2, 273, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_broadcast = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "broadcast", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayMultiIterObject), + sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayMultiIterObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayMultiIterObject), + sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayMultiIterObject), #else - sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayMultiIterObject), + sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayMultiIterObject), #endif - __Pyx_ImportType_CheckSize_Ignore_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_broadcast) __PYX_ERR(2, 321, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_ndarray = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "ndarray", + __Pyx_ImportType_CheckSize_Ignore_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_broadcast) __PYX_ERR(2, 277, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_ndarray = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "ndarray", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayObject), + sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayObject), + sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayObject), #else - sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyArrayObject), + sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyArrayObject), #endif - __Pyx_ImportType_CheckSize_Ignore_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_ndarray) __PYX_ERR(2, 360, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_generic = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "generic", + __Pyx_ImportType_CheckSize_Ignore_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_ndarray) __PYX_ERR(2, 316, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_generic = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "generic", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_generic) __PYX_ERR(2, 873, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_number = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "number", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_generic) __PYX_ERR(2, 825, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_number = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "number", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_number) __PYX_ERR(2, 875, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_integer = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "integer", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_number) __PYX_ERR(2, 827, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_integer = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "integer", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_integer) __PYX_ERR(2, 877, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_signedinteger = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "signedinteger", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_integer) __PYX_ERR(2, 829, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_signedinteger = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "signedinteger", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_signedinteger) __PYX_ERR(2, 879, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "unsignedinteger", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_signedinteger) __PYX_ERR(2, 831, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "unsignedinteger", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(2, 881, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_inexact = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "inexact", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(2, 833, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_inexact = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "inexact", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_inexact) __PYX_ERR(2, 883, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_floating = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "floating", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_inexact) __PYX_ERR(2, 835, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_floating = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "floating", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_floating) __PYX_ERR(2, 885, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_complexfloating = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "complexfloating", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_floating) __PYX_ERR(2, 837, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_complexfloating = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "complexfloating", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_complexfloating) __PYX_ERR(2, 887, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_flexible = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "flexible", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_complexfloating) __PYX_ERR(2, 839, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_flexible = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "flexible", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_flexible) __PYX_ERR(2, 889, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_character = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "character", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_flexible) __PYX_ERR(2, 841, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_character = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "character", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #else - sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyObject), + sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyObject), #endif - __Pyx_ImportType_CheckSize_Warn_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_character) __PYX_ERR(2, 891, __pyx_L1_error) - __pyx_mstate->__pyx_ptype_5numpy_ufunc = __Pyx_ImportType_3_1_2(__pyx_t_1, "numpy", "ufunc", + __Pyx_ImportType_CheckSize_Warn_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_character) __PYX_ERR(2, 843, __pyx_L1_error) + __pyx_mstate->__pyx_ptype_5numpy_ufunc = __Pyx_ImportType_3_1_6(__pyx_t_1, "numpy", "ufunc", #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000 - sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyUFuncObject), + sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyUFuncObject), #elif CYTHON_COMPILING_IN_LIMITED_API - sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyUFuncObject), + sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyUFuncObject), #else - sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_2(PyUFuncObject), + sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_1_6(PyUFuncObject), #endif - __Pyx_ImportType_CheckSize_Ignore_3_1_2); if (!__pyx_mstate->__pyx_ptype_5numpy_ufunc) __PYX_ERR(2, 955, __pyx_L1_error) + __Pyx_ImportType_CheckSize_Ignore_3_1_6); if (!__pyx_mstate->__pyx_ptype_5numpy_ufunc) __PYX_ERR(2, 907, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_RefNannyFinishContext(); return 0; @@ -24214,7 +24226,7 @@ if (!__Pyx_RefNanny) { #endif __Pyx_RefNannySetupContext("PyInit__optimized", 0); - if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #ifdef __Pxy_PyFrame_Initialize_Offsets __Pxy_PyFrame_Initialize_Offsets(); #endif @@ -24222,30 +24234,30 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); __pyx_mstate->__pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_mstate->__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error) __pyx_mstate->__pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_mstate->__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error) /*--- Initialize various global constants etc. ---*/ - if (__Pyx_InitConstants(__pyx_mstate) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_InitConstants(__pyx_mstate) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) stringtab_initialized = 1; - if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_InitGlobals() < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #if 0 || defined(__Pyx_CyFunction_USED) || defined(__Pyx_FusedFunction_USED) || defined(__Pyx_Coroutine_USED) || defined(__Pyx_Generator_USED) || defined(__Pyx_AsyncGen_USED) - if (__pyx_CommonTypesMetaclass_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__pyx_CommonTypesMetaclass_init(__pyx_m) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_CyFunction_USED - if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__pyx_CyFunction_init(__pyx_m) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_FusedFunction_USED - if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__pyx_FusedFunction_init(__pyx_m) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_Coroutine_USED - if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__pyx_Coroutine_init(__pyx_m) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_Generator_USED - if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__pyx_Generator_init(__pyx_m) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #endif #ifdef __Pyx_AsyncGen_USED - if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__pyx_AsyncGen_init(__pyx_m) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) #endif /*--- Library function declarations ---*/ if (__pyx_module_is_main_pyprophet__scoring___optimized) { - if (PyObject_SetAttr(__pyx_m, __pyx_mstate_global->__pyx_n_u_name_2, __pyx_mstate_global->__pyx_n_u_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyObject_SetAttr(__pyx_m, __pyx_mstate_global->__pyx_n_u_name_2, __pyx_mstate_global->__pyx_n_u_main) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) } { PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error) @@ -24254,10 +24266,10 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); } } /*--- Builtin init code ---*/ - if (__Pyx_InitCachedBuiltins(__pyx_mstate) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_InitCachedBuiltins(__pyx_mstate) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) /*--- Constants init code ---*/ - if (__Pyx_InitCachedConstants(__pyx_mstate) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - if (__Pyx_CreateCodeObjects(__pyx_mstate) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_InitCachedConstants(__pyx_mstate) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) + if (__Pyx_CreateCodeObjects(__pyx_mstate) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) /*--- Global type/function init code ---*/ (void)__Pyx_modinit_global_init_code(__pyx_mstate); (void)__Pyx_modinit_variable_export_code(__pyx_mstate); @@ -24408,7 +24420,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_mstate_global->__pyx_n_u_count); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 240, __pyx_L10_error) __Pyx_GOTREF(__pyx_t_5); - if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_array_type, __pyx_mstate_global->__pyx_n_u_count, __pyx_t_5) < 0) __PYX_ERR(1, 240, __pyx_L10_error) + if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_array_type, __pyx_mstate_global->__pyx_n_u_count, __pyx_t_5) < (0)) __PYX_ERR(1, 240, __pyx_L10_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "View.MemoryView":241 @@ -24420,7 +24432,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_mstate_global->__pyx_n_u_index); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 241, __pyx_L10_error) __Pyx_GOTREF(__pyx_t_5); - if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_array_type, __pyx_mstate_global->__pyx_n_u_index, __pyx_t_5) < 0) __PYX_ERR(1, 241, __pyx_L10_error) + if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_array_type, __pyx_mstate_global->__pyx_n_u_index, __pyx_t_5) < (0)) __PYX_ERR(1, 241, __pyx_L10_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "View.MemoryView":239 @@ -24630,7 +24642,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_mstate_global->__pyx_n_u_count); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 983, __pyx_L18_error) __Pyx_GOTREF(__pyx_t_5); - if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_memoryviewslice_type, __pyx_mstate_global->__pyx_n_u_count, __pyx_t_5) < 0) __PYX_ERR(1, 983, __pyx_L18_error) + if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_memoryviewslice_type, __pyx_mstate_global->__pyx_n_u_count, __pyx_t_5) < (0)) __PYX_ERR(1, 983, __pyx_L18_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "View.MemoryView":984 @@ -24642,7 +24654,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_collections_abc_Sequence, __pyx_mstate_global->__pyx_n_u_index); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 984, __pyx_L18_error) __Pyx_GOTREF(__pyx_t_5); - if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_memoryviewslice_type, __pyx_mstate_global->__pyx_n_u_index, __pyx_t_5) < 0) __PYX_ERR(1, 984, __pyx_L18_error) + if (__Pyx_SetItemOnTypeDict(__pyx_mstate_global->__pyx_memoryviewslice_type, __pyx_mstate_global->__pyx_n_u_index, __pyx_t_5) < (0)) __PYX_ERR(1, 984, __pyx_L18_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "View.MemoryView":982 @@ -24797,7 +24809,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = PyCFunction_NewEx(&__pyx_mdef_15View_dot_MemoryView_1__pyx_unpickle_Enum, NULL, __pyx_mstate_global->__pyx_n_u_View_MemoryView); if (unlikely(!__pyx_t_5)) __PYX_ERR(1, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_pyx_unpickle_Enum, __pyx_t_5) < 0) __PYX_ERR(1, 1, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_pyx_unpickle_Enum, __pyx_t_5) < (0)) __PYX_ERR(1, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":5 @@ -24809,7 +24821,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_ImportDottedModule(__pyx_mstate_global->__pyx_n_u_numpy, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_np, __pyx_t_5) < 0) __PYX_ERR(0, 5, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_np, __pyx_t_5) < (0)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":6 @@ -24821,7 +24833,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_ImportDottedModule(__pyx_mstate_global->__pyx_n_u_operator, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_operator, __pyx_t_5) < 0) __PYX_ERR(0, 6, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_operator, __pyx_t_5) < (0)) __PYX_ERR(0, 6, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":12 @@ -24834,7 +24846,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); __pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_9pyprophet_7scoring_10_optimized_1find_nearest_matches, 0, __pyx_mstate_global->__pyx_n_u_find_nearest_matches, NULL, __pyx_mstate_global->__pyx_n_u_pyprophet_scoring__optimized, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[0])); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 12, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_5, __pyx_mstate_global->__pyx_tuple[2]); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_find_nearest_matches, __pyx_t_5) < 0) __PYX_ERR(0, 12, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_find_nearest_matches, __pyx_t_5) < (0)) __PYX_ERR(0, 12, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":135 @@ -24846,7 +24858,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_9pyprophet_7scoring_10_optimized_3count_num_positives, 0, __pyx_mstate_global->__pyx_n_u_count_num_positives, NULL, __pyx_mstate_global->__pyx_n_u_pyprophet_scoring__optimized, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[1])); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 135, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_count_num_positives, __pyx_t_5) < 0) __PYX_ERR(0, 135, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_count_num_positives, __pyx_t_5) < (0)) __PYX_ERR(0, 135, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":154 @@ -24858,7 +24870,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_9pyprophet_7scoring_10_optimized_5find_top_ranked, 0, __pyx_mstate_global->__pyx_n_u_find_top_ranked, NULL, __pyx_mstate_global->__pyx_n_u_pyprophet_scoring__optimized, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[2])); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_find_top_ranked, __pyx_t_5) < 0) __PYX_ERR(0, 154, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_find_top_ranked, __pyx_t_5) < (0)) __PYX_ERR(0, 154, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":201 @@ -24870,7 +24882,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_9pyprophet_7scoring_10_optimized_7rank, 0, __pyx_mstate_global->__pyx_n_u_rank, NULL, __pyx_mstate_global->__pyx_n_u_pyprophet_scoring__optimized, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[3])); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 201, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_rank, __pyx_t_5) < 0) __PYX_ERR(0, 201, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_rank, __pyx_t_5) < (0)) __PYX_ERR(0, 201, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":223 @@ -24882,7 +24894,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_9pyprophet_7scoring_10_optimized_9rank32, 0, __pyx_mstate_global->__pyx_n_u_rank32, NULL, __pyx_mstate_global->__pyx_n_u_pyprophet_scoring__optimized, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[4])); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 223, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_rank32, __pyx_t_5) < 0) __PYX_ERR(0, 223, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_rank32, __pyx_t_5) < (0)) __PYX_ERR(0, 223, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":245 @@ -24894,7 +24906,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_9pyprophet_7scoring_10_optimized_11single_chromatogram_hypothesis_fast, 0, __pyx_mstate_global->__pyx_n_u_single_chromatogram_hypothesis_f, NULL, __pyx_mstate_global->__pyx_n_u_pyprophet_scoring__optimized, __pyx_mstate_global->__pyx_d, ((PyObject *)__pyx_mstate_global->__pyx_codeobj_tab[5])); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 245, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_single_chromatogram_hypothesis_f, __pyx_t_5) < 0) __PYX_ERR(0, 245, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_single_chromatogram_hypothesis_f, __pyx_t_5) < (0)) __PYX_ERR(0, 245, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /* "pyprophet/scoring/_optimized.pyx":1 @@ -24904,7 +24916,7 @@ __Pyx_RefNannySetupContext("PyInit__optimized", 0); */ __pyx_t_5 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_test, __pyx_t_5) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyDict_SetItem(__pyx_mstate_global->__pyx_d, __pyx_mstate_global->__pyx_n_u_test, __pyx_t_5) < (0)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; /*--- Wrapped vars code ---*/ @@ -25168,7 +25180,7 @@ static int __Pyx_InitCachedBuiltins(__pyx_mstatetype *__pyx_mstate) { __pyx_builtin_Ellipsis = __Pyx_GetBuiltinName(__pyx_mstate->__pyx_n_u_Ellipsis); if (!__pyx_builtin_Ellipsis) __PYX_ERR(1, 408, __pyx_L1_error) __pyx_builtin_id = __Pyx_GetBuiltinName(__pyx_mstate->__pyx_n_u_id); if (!__pyx_builtin_id) __PYX_ERR(1, 618, __pyx_L1_error) __pyx_builtin_IndexError = __Pyx_GetBuiltinName(__pyx_mstate->__pyx_n_u_IndexError); if (!__pyx_builtin_IndexError) __PYX_ERR(1, 914, __pyx_L1_error) - __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_mstate->__pyx_n_u_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(2, 1051, __pyx_L1_error) + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_mstate->__pyx_n_u_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(2, 1010, __pyx_L1_error) return 0; __pyx_L1_error:; return -1; @@ -25557,7 +25569,7 @@ __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) res = PyTuple_New(n); if (unlikely(res == NULL)) return NULL; for (i = 0; i < n; i++) { - if (unlikely(__Pyx_PyTuple_SET_ITEM(res, i, src[i]) < 0)) { + if (unlikely(__Pyx_PyTuple_SET_ITEM(res, i, src[i]) < (0))) { Py_DECREF(res); return NULL; } @@ -28297,6 +28309,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject changed = 1; } #endif // CYTHON_METH_FASTCALL +#if !CYTHON_COMPILING_IN_PYPY else if (strcmp(memb->name, "__module__") == 0) { PyObject *descr; assert(memb->type == T_OBJECT); @@ -28311,11 +28324,13 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject } changed = 1; } +#endif // !CYTHON_COMPILING_IN_PYPY } memb++; } } #endif // !CYTHON_COMPILING_IN_LIMITED_API +#if !CYTHON_COMPILING_IN_PYPY slot = spec->slots; while (slot && slot->slot && slot->slot != Py_tp_getset) slot++; @@ -28347,6 +28362,7 @@ static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject ++getset; } } +#endif // !CYTHON_COMPILING_IN_PYPY if (changed) PyType_Modified(type); #endif // PY_VERSION_HEX > 0x030900B1 @@ -28451,6 +28467,13 @@ static int __Pyx_PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **me /* PyObjectCallMethod0 */ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name) { +#if CYTHON_VECTORCALL && (__PYX_LIMITED_VERSION_HEX >= 0x030C0000 || (!CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x03090000)) + PyObject *args[1] = {obj}; + (void) __Pyx_PyObject_GetMethod; + (void) __Pyx_PyObject_CallOneArg; + (void) __Pyx_PyObject_CallNoArg; + return PyObject_VectorcallMethod(method_name, args, 1 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); +#else PyObject *method = NULL, *result = NULL; int is_method = __Pyx_PyObject_GetMethod(obj, method_name, &method); if (likely(is_method)) { @@ -28463,6 +28486,7 @@ static PyObject* __Pyx_PyObject_CallMethod0(PyObject* obj, PyObject* method_name Py_DECREF(method); bad: return result; +#endif } /* ValidateBasesTuple */ @@ -28892,15 +28916,15 @@ static int __Pyx_setup_reduce(PyObject* type_obj) { } /* TypeImport */ -#ifndef __PYX_HAVE_RT_ImportType_3_1_2 -#define __PYX_HAVE_RT_ImportType_3_1_2 -static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module_name, const char *class_name, - size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_2 check_size) +#ifndef __PYX_HAVE_RT_ImportType_3_1_6 +#define __PYX_HAVE_RT_ImportType_3_1_6 +static PyTypeObject *__Pyx_ImportType_3_1_6(PyObject *module, const char *module_name, const char *class_name, + size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_1_6 check_size) { PyObject *result = 0; Py_ssize_t basicsize; Py_ssize_t itemsize; -#if CYTHON_COMPILING_IN_LIMITED_API +#if defined(Py_LIMITED_API) || (defined(CYTHON_COMPILING_IN_LIMITED_API) && CYTHON_COMPILING_IN_LIMITED_API) PyObject *py_basicsize; PyObject *py_itemsize; #endif @@ -28913,7 +28937,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module module_name, class_name); goto bad; } -#if !CYTHON_COMPILING_IN_LIMITED_API +#if !( defined(Py_LIMITED_API) || (defined(CYTHON_COMPILING_IN_LIMITED_API) && CYTHON_COMPILING_IN_LIMITED_API) ) basicsize = ((PyTypeObject *)result)->tp_basicsize; itemsize = ((PyTypeObject *)result)->tp_itemsize; #else @@ -28951,7 +28975,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module module_name, class_name, size, basicsize+itemsize); goto bad; } - if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_2 && + if (check_size == __Pyx_ImportType_CheckSize_Error_3_1_6 && ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) { PyErr_Format(PyExc_ValueError, "%.200s.%.200s size changed, may indicate binary incompatibility. " @@ -28959,7 +28983,7 @@ static PyTypeObject *__Pyx_ImportType_3_1_2(PyObject *module, const char *module module_name, class_name, size, basicsize, basicsize+itemsize); goto bad; } - else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_2 && (size_t)basicsize > size) { + else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_1_6 && (size_t)basicsize > size) { if (PyErr_WarnFormat(NULL, 0, "%.200s.%.200s size changed, may indicate binary incompatibility. " "Expected %zd from C header, got %zd from PyObject", @@ -29100,7 +29124,7 @@ static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyTypeObject *metaclass, PyOb } /* CommonTypesMetaclass */ -PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) { +static PyObject* __pyx_CommonTypesMetaclass_get_module(CYTHON_UNUSED PyObject *self, CYTHON_UNUSED void* context) { return PyUnicode_FromString(__PYX_ABI_MODULE_NAME); } static PyGetSetDef __pyx_CommonTypesMetaclass_getset[] = { @@ -29129,6 +29153,7 @@ static int __pyx_CommonTypesMetaclass_init(PyObject *module) { return -1; } mstate->__pyx_CommonTypesMetaclassType = __Pyx_FetchCommonTypeFromSpec(NULL, module, &__pyx_CommonTypesMetaclass_spec, bases); + Py_DECREF(bases); if (unlikely(mstate->__pyx_CommonTypesMetaclassType == NULL)) { return -1; } @@ -33739,6 +33764,10 @@ __Pyx_PyType_GetFullyQualifiedName(PyTypeObject* tp) PyCode_NewWithPosOnlyArgs #endif (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, __pyx_mstate_global->__pyx_empty_bytes); + #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030c00A1 + if (likely(result)) + result->_co_firsttraceable = 0; + #endif return result; } #elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY @@ -34066,6 +34095,17 @@ static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) { static CYTHON_INLINE PyObject * __Pyx_PyLong_FromSize_t(size_t ival) { return PyLong_FromSize_t(ival); } +#if CYTHON_USE_PYLONG_INTERNALS +static CYTHON_INLINE int __Pyx_PyLong_CompactAsLong(PyObject *x, long *return_value) { + if (unlikely(!__Pyx_PyLong_IsCompact(x))) + return 0; + Py_ssize_t value = __Pyx_PyLong_CompactValue(x); + if ((sizeof(long) < sizeof(Py_ssize_t)) && unlikely(value != (long) value)) + return 0; + *return_value = (long) value; + return 1; +} +#endif /* MultiPhaseInitModuleState */ From 7dd1426c17f54811991ccdd2679b9481db0ee0b7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 21:02:19 +0000 Subject: [PATCH 03/30] Add alignment integration to export functionality Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/_config.py | 6 ++ pyprophet/cli/export.py | 34 +++++++++ pyprophet/io/export/osw.py | 123 +++++++++++++++++++++++++++++- pyprophet/io/export/parquet.py | 134 ++++++++++++++++++++++++++++++++- 4 files changed, 291 insertions(+), 6 deletions(-) diff --git a/pyprophet/_config.py b/pyprophet/_config.py index 7a9edfa3..993711e4 100644 --- a/pyprophet/_config.py +++ b/pyprophet/_config.py @@ -649,6 +649,8 @@ class ExportIOConfig(BaseIOConfig): max_global_peptide_qvalue (float): Filter results to maximum global peptide-level q-value. protein (bool): Append protein-level error-rate estimates if available. max_global_protein_qvalue (float): Filter results to maximum global protein-level q-value. + use_alignment (bool): Use alignment results to recover peaks with good alignment scores. + max_alignment_pep (float): Maximum PEP to consider for good alignments when use_alignment is True. # Quantification matrix options top_n (int): Number of top intense features to use for summarization @@ -688,6 +690,10 @@ class ExportIOConfig(BaseIOConfig): protein: bool = True max_global_protein_qvalue: float = 0.01 test: bool = False + + # Alignment options + use_alignment: bool = False + max_alignment_pep: float = 0.7 # Quantification matrix options top_n: int = 3 diff --git a/pyprophet/cli/export.py b/pyprophet/cli/export.py index e094e924..a488b7b3 100644 --- a/pyprophet/cli/export.py +++ b/pyprophet/cli/export.py @@ -141,6 +141,19 @@ def export(): type=float, help="[format: matrix/legacy] Filter results to maximum global protein-level q-value.", ) +@click.option( + "--use_alignment/--no-use_alignment", + default=False, + show_default=True, + help="Use alignment results to recover peaks with good alignment scores (requires FEATURE_MS2_ALIGNMENT and SCORE_ALIGNMENT tables).", +) +@click.option( + "--max_alignment_pep", + default=0.7, + show_default=True, + type=float, + help="[format: matrix/legacy] Maximum PEP to consider for good alignments when use_alignment is enabled.", +) @measure_memory_usage_and_time def export_tsv( infile, @@ -156,6 +169,8 @@ def export_tsv( max_global_peptide_qvalue, protein, max_global_protein_qvalue, + use_alignment, + max_alignment_pep, ): """ Export Proteomics/Peptidoform TSV/CSV tables @@ -185,6 +200,8 @@ def export_tsv( max_global_peptide_qvalue=max_global_peptide_qvalue, protein=protein, max_global_protein_qvalue=max_global_protein_qvalue, + use_alignment=use_alignment, + max_alignment_pep=max_alignment_pep, ) reader = ReaderDispatcher.get_reader(config) @@ -273,6 +290,19 @@ def export_tsv( type=float, help="[format: matrix/legacy] Filter results to maximum global protein-level q-value.", ) +@click.option( + "--use_alignment/--no-use_alignment", + default=False, + show_default=True, + help="Use alignment results to recover peaks with good alignment scores (requires FEATURE_MS2_ALIGNMENT and SCORE_ALIGNMENT tables).", +) +@click.option( + "--max_alignment_pep", + default=0.7, + show_default=True, + type=float, + help="[format: matrix/legacy] Maximum PEP to consider for good alignments when use_alignment is enabled.", +) @click.option( "--top_n", default=3, @@ -307,6 +337,8 @@ def export_matrix( max_rs_peakgroup_qvalue, max_global_peptide_qvalue, max_global_protein_qvalue, + use_alignment, + max_alignment_pep, top_n, consistent_top, normalization, @@ -339,6 +371,8 @@ def export_matrix( max_global_peptide_qvalue=max_global_peptide_qvalue, protein=True, max_global_protein_qvalue=max_global_protein_qvalue, + use_alignment=use_alignment, + max_alignment_pep=max_alignment_pep, top_n=top_n, consistent_top=consistent_top, normalization=normalization, diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 4e3ef6ab..adba96d2 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -316,7 +316,8 @@ def _read_augmented_data(self, con, cfg): return pd.merge(data, ipf_data, how="left", on="id") def _read_standard_data(self, con, cfg): - """Read standard OpenSWATH data without IPF.""" + """Read standard OpenSWATH data without IPF, optionally including aligned features.""" + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT RUN.ID AS id_run, PEPTIDE.ID AS id_peptide, @@ -342,7 +343,8 @@ def _read_standard_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score + SCORE_MS2.QVALUE AS m_score, + 0 AS from_alignment FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -354,7 +356,75 @@ def _read_standard_data(self, con, cfg): WHERE SCORE_MS2.QVALUE < {cfg.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank; """ - return pd.read_sql_query(query, con) + data = pd.read_sql_query(query, con) + + # If alignment is enabled, fetch and merge aligned features + if cfg.use_alignment: + aligned_features = self._fetch_alignment_features(con, cfg) + + if not aligned_features.empty: + # Get full feature data for aligned features that are NOT already in base results + # We only want to add features that didn't pass MS2 threshold but have good alignment + aligned_ids = aligned_features['id'].unique() + existing_ids = data['id'].unique() + new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] + + if new_aligned_ids: + # Fetch full data for these new aligned features + aligned_ids_str = ','.join(map(str, new_aligned_ids)) + aligned_query = f""" + SELECT RUN.ID AS id_run, + PEPTIDE.ID AS id_peptide, + PRECURSOR.ID AS transition_group_id, + PRECURSOR.DECOY AS decoy, + RUN.ID AS run_id, + RUN.FILENAME AS filename, + FEATURE.EXP_RT AS RT, + FEATURE.EXP_RT - FEATURE.DELTA_RT AS assay_rt, + FEATURE.DELTA_RT AS delta_rt, + FEATURE.NORM_RT AS iRT, + PRECURSOR.LIBRARY_RT AS assay_iRT, + FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, + FEATURE.ID AS id, + PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, + PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, + PRECURSOR.CHARGE AS Charge, + PRECURSOR.PRECURSOR_MZ AS mz, + FEATURE_MS2.AREA_INTENSITY AS Intensity, + FEATURE_MS1.AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1.APEX_INTENSITY AS aggr_prec_Peak_Apex, + FEATURE.LEFT_WIDTH AS leftWidth, + FEATURE.RIGHT_WIDTH AS rightWidth, + SCORE_MS2.RANK AS peak_group_rank, + SCORE_MS2.SCORE AS d_score, + SCORE_MS2.QVALUE AS m_score, + 1 AS from_alignment + FROM PRECURSOR + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID + INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID + INNER JOIN FEATURE ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN RUN ON RUN.ID = FEATURE.RUN_ID + LEFT JOIN FEATURE_MS1 ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + LEFT JOIN FEATURE_MS2 ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + LEFT JOIN SCORE_MS2 ON SCORE_MS2.FEATURE_ID = FEATURE.ID + WHERE FEATURE.ID IN ({aligned_ids_str}) + """ + aligned_data = pd.read_sql_query(aligned_query, con) + + # Merge alignment scores into the aligned data + aligned_data = pd.merge( + aligned_data, + aligned_features[['id', 'alignment_pep', 'alignment_qvalue']], + on='id', + how='left' + ) + + logger.info(f"Adding {len(aligned_data)} features recovered through alignment") + + # Combine with base data + data = pd.concat([data, aligned_data], ignore_index=True) + + return data def _augment_data(self, data, con, cfg): """Apply common data augmentations to the base dataset.""" @@ -633,6 +703,53 @@ def _add_protein_error_data(self, data, con, cfg): return data + def _fetch_alignment_features(self, con, cfg): + """ + Fetch aligned features with good alignment scores. + + This method retrieves features that have been aligned across runs + and pass the alignment quality threshold. These features can be used + to recover peaks in runs where the MS2 signal might be weak but the + alignment score is good. + + Args: + con: Database connection + cfg: Configuration object with max_alignment_pep threshold + + Returns: + DataFrame with aligned feature IDs that pass quality threshold + """ + # Check if alignment tables exist + if not check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") or not check_sqlite_table(con, "SCORE_ALIGNMENT"): + logger.debug("Alignment tables not found, skipping alignment integration") + return pd.DataFrame() + + max_alignment_pep = cfg.max_alignment_pep + + query = f""" + SELECT + ALIGNED_FEATURE_ID AS id, + PRECURSOR_ID AS transition_group_id, + RUN_ID AS run_id, + SCORE_ALIGNMENT.PEP AS alignment_pep, + SCORE_ALIGNMENT.QVALUE AS alignment_qvalue + FROM ( + SELECT DISTINCT * FROM FEATURE_MS2_ALIGNMENT + ) AS FEATURE_MS2_ALIGNMENT + INNER JOIN ( + SELECT DISTINCT *, MIN(QVALUE) + FROM SCORE_ALIGNMENT + GROUP BY FEATURE_ID + ) AS SCORE_ALIGNMENT + ON SCORE_ALIGNMENT.FEATURE_ID = FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID + WHERE LABEL = 1 + AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} + """ + + df = pd.read_sql_query(query, con) + logger.info(f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep}") + return df + ################################## # Export-specific readers below ################################## diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index f9cc2e19..8bc31885 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -236,8 +236,9 @@ def _read_augmented_data(self, con) -> pd.DataFrame: def _read_standard_data(self, con) -> pd.DataFrame: """ - Read standard OpenSWATH data without IPF. + Read standard OpenSWATH data without IPF, optionally including aligned features. """ + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT RUN_ID AS id_run, @@ -264,13 +265,81 @@ def _read_standard_data(self, con) -> pd.DataFrame: RIGHT_WIDTH AS rightWidth, SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, SCORE_MS2_SCORE AS d_score, - SCORE_MS2_Q_VALUE AS m_score + SCORE_MS2_Q_VALUE AS m_score, + 0 AS from_alignment FROM data WHERE PROTEIN_ID IS NOT NULL AND SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank """ - return con.execute(query).fetchdf() + data = con.execute(query).fetchdf() + + # If alignment is enabled, fetch and merge aligned features + if self.config.use_alignment: + aligned_features = self._fetch_alignment_features(con) + + if not aligned_features.empty: + # Get full feature data for aligned features that are NOT already in base results + # We only want to add features that didn't pass MS2 threshold but have good alignment + aligned_ids = aligned_features['id'].unique() + existing_ids = data['id'].unique() + new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] + + if new_aligned_ids: + # Fetch full data for these new aligned features from the main data view + # Register aligned IDs as a temp table for the query + aligned_ids_df = pd.DataFrame({'id': new_aligned_ids}) + con.register('aligned_ids_temp', aligned_ids_df) + + aligned_query = f""" + SELECT + RUN_ID AS id_run, + PEPTIDE_ID AS id_peptide, + PRECURSOR_ID AS transition_group_id, + PRECURSOR_DECOY AS decoy, + RUN_ID AS run_id, + FILENAME AS filename, + EXP_RT AS RT, + EXP_RT - DELTA_RT AS assay_rt, + DELTA_RT AS delta_rt, + NORM_RT AS iRT, + PRECURSOR_LIBRARY_RT AS assay_iRT, + NORM_RT - PRECURSOR_LIBRARY_RT AS delta_iRT, + FEATURE_ID AS id, + UNMODIFIED_SEQUENCE AS Sequence, + MODIFIED_SEQUENCE AS FullPeptideName, + PRECURSOR_CHARGE AS Charge, + PRECURSOR_MZ AS mz, + FEATURE_MS2_AREA_INTENSITY AS Intensity, + FEATURE_MS1_AREA_INTENSITY AS aggr_prec_Peak_Area, + FEATURE_MS1_APEX_INTENSITY AS aggr_prec_Peak_Apex, + LEFT_WIDTH AS leftWidth, + RIGHT_WIDTH AS rightWidth, + SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, + SCORE_MS2_SCORE AS d_score, + SCORE_MS2_Q_VALUE AS m_score, + 1 AS from_alignment + FROM data + WHERE PROTEIN_ID IS NOT NULL + AND FEATURE_ID IN (SELECT id FROM aligned_ids_temp) + """ + aligned_data = con.execute(aligned_query).fetchdf() + + # Merge alignment scores into the aligned data + if 'alignment_pep' in aligned_features.columns: + aligned_data = pd.merge( + aligned_data, + aligned_features[['id', 'alignment_pep', 'alignment_qvalue']], + on='id', + how='left' + ) + + logger.info(f"Adding {len(aligned_data)} features recovered through alignment") + + # Combine with base data + data = pd.concat([data, aligned_data], ignore_index=True) + + return data def _augment_data(self, data, con) -> pd.DataFrame: """ @@ -559,6 +628,65 @@ def _build_feature_vars_sql(self) -> str: return ", " + ", ".join(feature_vars) if feature_vars else "" + def _fetch_alignment_features(self, con) -> pd.DataFrame: + """ + Fetch aligned features with good alignment scores from alignment parquet file. + + This method checks for an alignment parquet file and retrieves features + that have been aligned across runs and pass the alignment quality threshold. + + Args: + con: DuckDB connection + + Returns: + DataFrame with aligned feature IDs that pass quality threshold + """ + import os + + # Check for alignment file - it should be named with _feature_alignment.parquet suffix + alignment_file = None + if self.infile.endswith('.parquet'): + base_name = self.infile[:-8] # Remove .parquet + alignment_file = f"{base_name}_feature_alignment.parquet" + + if not alignment_file or not os.path.exists(alignment_file): + logger.debug("Alignment parquet file not found, skipping alignment integration") + return pd.DataFrame() + + logger.debug(f"Loading alignment data from {alignment_file}") + max_alignment_pep = self.config.max_alignment_pep + + try: + # Load alignment data + alignment_df = pd.read_parquet(alignment_file) + + # Filter to target (non-decoy) features with good alignment scores + if 'DECOY' in alignment_df.columns and 'VAR_XCORR_SHAPE' in alignment_df.columns: + # This looks like the feature_alignment table structure + filtered_df = alignment_df[ + (alignment_df['DECOY'] == 1) & # LABEL=1 means target + (alignment_df.get('alignment_pep', alignment_df.get('PEP', 1.0)) < max_alignment_pep) + ].copy() + + # Rename columns to match expected format + if 'FEATURE_ID' in filtered_df.columns: + result = filtered_df[['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID']].rename( + columns={'FEATURE_ID': 'id'} + ) + + # Add alignment scores if available + if 'PEP' in filtered_df.columns: + result['alignment_pep'] = filtered_df['PEP'] + if 'QVALUE' in filtered_df.columns: + result['alignment_qvalue'] = filtered_df['QVALUE'] + + logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep}") + return result + except Exception as e: + logger.warning(f"Could not load alignment data: {e}") + + return pd.DataFrame() + ################################## # Export-specific readers below ################################## From 3df1c34aa5304b98380ebba0a801bea273008346 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 21:07:57 +0000 Subject: [PATCH 04/30] Enhance alignment export to include SCORE_ALIGNMENT data and improve parquet handling Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 79 +++++++++++++++++++++++++--------- pyprophet/io/export/parquet.py | 39 ++++++++++++++--- 2 files changed, 90 insertions(+), 28 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index adba96d2..11492a46 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -1683,30 +1683,67 @@ def _create_temp_table(self, conn, column_info: dict) -> None: conn.execute(create_temp_table_query) def _export_alignment_data(self, conn, path: str = None) -> None: - """Export feature alignment data""" + """Export feature alignment data with scores if available""" if path is None: path = os.path.join(self.config.outfile, "feature_alignment.parquet") - query = f""" - SELECT - ALIGNMENT_ID, - RUN_ID, - PRECURSOR_ID, - ALIGNED_FEATURE_ID AS FEATURE_ID, - REFERENCE_FEATURE_ID, - ALIGNED_RT, - REFERENCE_RT, - XCORR_COELUTION_TO_REFERENCE AS VAR_XCORR_COELUTION_TO_REFERENCE, - XCORR_SHAPE_TO_REFERENCE AS VAR_XCORR_SHAPE_TO_REFERENCE, - MI_TO_REFERENCE AS VAR_MI_TO_REFERENCE, - XCORR_COELUTION_TO_ALL AS VAR_XCORR_COELUTION_TO_ALL, - XCORR_SHAPE_TO_ALL AS VAR_XCORR_SHAPE, - MI_TO_ALL AS VAR_MI_TO_ALL, - RETENTION_TIME_DEVIATION AS VAR_RETENTION_TIME_DEVIATION, - PEAK_INTENSITY_RATIO AS VAR_PEAK_INTENSITY_RATIO, - LABEL AS DECOY - FROM sqlite_scan('{self.config.infile}', 'FEATURE_MS2_ALIGNMENT') - """ + # Check if SCORE_ALIGNMENT table exists + with sqlite3.connect(self.config.infile) as sql_conn: + has_score_alignment = check_sqlite_table(sql_conn, "SCORE_ALIGNMENT") + + if has_score_alignment: + # Export with alignment scores + query = f""" + SELECT + FEATURE_MS2_ALIGNMENT.ALIGNMENT_ID, + FEATURE_MS2_ALIGNMENT.RUN_ID, + FEATURE_MS2_ALIGNMENT.PRECURSOR_ID, + FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID AS FEATURE_ID, + FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID, + FEATURE_MS2_ALIGNMENT.ALIGNED_RT, + FEATURE_MS2_ALIGNMENT.REFERENCE_RT, + FEATURE_MS2_ALIGNMENT.XCORR_COELUTION_TO_REFERENCE AS VAR_XCORR_COELUTION_TO_REFERENCE, + FEATURE_MS2_ALIGNMENT.XCORR_SHAPE_TO_REFERENCE AS VAR_XCORR_SHAPE_TO_REFERENCE, + FEATURE_MS2_ALIGNMENT.MI_TO_REFERENCE AS VAR_MI_TO_REFERENCE, + FEATURE_MS2_ALIGNMENT.XCORR_COELUTION_TO_ALL AS VAR_XCORR_COELUTION_TO_ALL, + FEATURE_MS2_ALIGNMENT.XCORR_SHAPE_TO_ALL AS VAR_XCORR_SHAPE, + FEATURE_MS2_ALIGNMENT.MI_TO_ALL AS VAR_MI_TO_ALL, + FEATURE_MS2_ALIGNMENT.RETENTION_TIME_DEVIATION AS VAR_RETENTION_TIME_DEVIATION, + FEATURE_MS2_ALIGNMENT.PEAK_INTENSITY_RATIO AS VAR_PEAK_INTENSITY_RATIO, + FEATURE_MS2_ALIGNMENT.LABEL AS DECOY, + SCORE_ALIGNMENT.SCORE AS SCORE, + SCORE_ALIGNMENT.PEP AS PEP, + SCORE_ALIGNMENT.QVALUE AS QVALUE + FROM sqlite_scan('{self.config.infile}', 'FEATURE_MS2_ALIGNMENT') AS FEATURE_MS2_ALIGNMENT + LEFT JOIN ( + SELECT FEATURE_ID, SCORE, PEP, QVALUE, MIN(QVALUE) as MIN_QVALUE + FROM sqlite_scan('{self.config.infile}', 'SCORE_ALIGNMENT') + GROUP BY FEATURE_ID + ) AS SCORE_ALIGNMENT + ON FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID = SCORE_ALIGNMENT.FEATURE_ID + """ + else: + # Export without scores (original behavior) + query = f""" + SELECT + ALIGNMENT_ID, + RUN_ID, + PRECURSOR_ID, + ALIGNED_FEATURE_ID AS FEATURE_ID, + REFERENCE_FEATURE_ID, + ALIGNED_RT, + REFERENCE_RT, + XCORR_COELUTION_TO_REFERENCE AS VAR_XCORR_COELUTION_TO_REFERENCE, + XCORR_SHAPE_TO_REFERENCE AS VAR_XCORR_SHAPE_TO_REFERENCE, + MI_TO_REFERENCE AS VAR_MI_TO_REFERENCE, + XCORR_COELUTION_TO_ALL AS VAR_XCORR_COELUTION_TO_ALL, + XCORR_SHAPE_TO_ALL AS VAR_XCORR_SHAPE, + MI_TO_ALL AS VAR_MI_TO_ALL, + RETENTION_TIME_DEVIATION AS VAR_RETENTION_TIME_DEVIATION, + PEAK_INTENSITY_RATIO AS VAR_PEAK_INTENSITY_RATIO, + LABEL AS DECOY + FROM sqlite_scan('{self.config.infile}', 'FEATURE_MS2_ALIGNMENT') + """ self._execute_copy_query(conn, query, path) diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 8bc31885..1475e8a7 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -661,12 +661,36 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: alignment_df = pd.read_parquet(alignment_file) # Filter to target (non-decoy) features with good alignment scores + # Note: DECOY column in parquet alignment file comes from LABEL in SQLite + # where LABEL=1 (DECOY=1 in parquet) means target, not decoy if 'DECOY' in alignment_df.columns and 'VAR_XCORR_SHAPE' in alignment_df.columns: # This looks like the feature_alignment table structure - filtered_df = alignment_df[ - (alignment_df['DECOY'] == 1) & # LABEL=1 means target - (alignment_df.get('alignment_pep', alignment_df.get('PEP', 1.0)) < max_alignment_pep) - ].copy() + + # Check if we have alignment scores (PEP/QVALUE) in the file + # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features + has_alignment_scores = 'PEP' in alignment_df.columns or 'QVALUE' in alignment_df.columns + + if has_alignment_scores: + # Filter by alignment PEP threshold + pep_col = 'PEP' if 'PEP' in alignment_df.columns else None + qvalue_col = 'QVALUE' if 'QVALUE' in alignment_df.columns else None + + if pep_col: + filtered_df = alignment_df[ + (alignment_df['DECOY'] == 1) & # DECOY=1 means target (from LABEL=1 in SQLite) + (alignment_df[pep_col] < max_alignment_pep) + ].copy() + else: + # Use QVALUE if PEP not available (less ideal but workable) + filtered_df = alignment_df[ + (alignment_df['DECOY'] == 1) & + (alignment_df[qvalue_col] < max_alignment_pep) + ].copy() + else: + # No alignment scores in file - just filter by target status + # In this case, we can't apply alignment quality threshold + logger.warning("Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality.") + filtered_df = alignment_df[alignment_df['DECOY'] == 1].copy() # Rename columns to match expected format if 'FEATURE_ID' in filtered_df.columns: @@ -676,11 +700,12 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Add alignment scores if available if 'PEP' in filtered_df.columns: - result['alignment_pep'] = filtered_df['PEP'] + result['alignment_pep'] = filtered_df['PEP'].values if 'QVALUE' in filtered_df.columns: - result['alignment_qvalue'] = filtered_df['QVALUE'] + result['alignment_qvalue'] = filtered_df['QVALUE'].values - logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep}") + logger.info(f"Found {len(result)} aligned features" + + (f" passing alignment PEP < {max_alignment_pep}" if has_alignment_scores else "")) return result except Exception as e: logger.warning(f"Could not load alignment data: {e}") From 0132d06185ef6f136a331c18ed02a3662d43ab05 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 21:24:01 +0000 Subject: [PATCH 05/30] Change use_alignment default to True with auto-detection of alignment data Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/_config.py | 6 +++--- pyprophet/cli/export.py | 8 ++++---- pyprophet/io/export/osw.py | 16 +++++++++------- pyprophet/io/export/parquet.py | 23 +++++++++++++++++++++-- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/pyprophet/_config.py b/pyprophet/_config.py index 993711e4..8ee78651 100644 --- a/pyprophet/_config.py +++ b/pyprophet/_config.py @@ -649,8 +649,8 @@ class ExportIOConfig(BaseIOConfig): max_global_peptide_qvalue (float): Filter results to maximum global peptide-level q-value. protein (bool): Append protein-level error-rate estimates if available. max_global_protein_qvalue (float): Filter results to maximum global protein-level q-value. - use_alignment (bool): Use alignment results to recover peaks with good alignment scores. - max_alignment_pep (float): Maximum PEP to consider for good alignments when use_alignment is True. + use_alignment (bool): Use alignment results to recover peaks with good alignment scores if alignment data is present (default: True). + max_alignment_pep (float): Maximum PEP to consider for good alignments when use_alignment is True (default: 0.7). # Quantification matrix options top_n (int): Number of top intense features to use for summarization @@ -692,7 +692,7 @@ class ExportIOConfig(BaseIOConfig): test: bool = False # Alignment options - use_alignment: bool = False + use_alignment: bool = True max_alignment_pep: float = 0.7 # Quantification matrix options diff --git a/pyprophet/cli/export.py b/pyprophet/cli/export.py index a488b7b3..32373e08 100644 --- a/pyprophet/cli/export.py +++ b/pyprophet/cli/export.py @@ -143,9 +143,9 @@ def export(): ) @click.option( "--use_alignment/--no-use_alignment", - default=False, + default=True, show_default=True, - help="Use alignment results to recover peaks with good alignment scores (requires FEATURE_MS2_ALIGNMENT and SCORE_ALIGNMENT tables).", + help="Use alignment results to recover peaks with good alignment scores if alignment data is present in the input file.", ) @click.option( "--max_alignment_pep", @@ -292,9 +292,9 @@ def export_tsv( ) @click.option( "--use_alignment/--no-use_alignment", - default=False, + default=True, show_default=True, - help="Use alignment results to recover peaks with good alignment scores (requires FEATURE_MS2_ALIGNMENT and SCORE_ALIGNMENT tables).", + help="Use alignment results to recover peaks with good alignment scores if alignment data is present in the input file.", ) @click.option( "--max_alignment_pep", diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 11492a46..47129587 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -161,6 +161,13 @@ def _check_ipf_presence(self, con, cfg): """Check if IPF data is present and should be used.""" return cfg.ipf != "disable" and check_sqlite_table(con, "SCORE_IPF") + def _check_alignment_presence(self, con): + """Check if alignment data is present.""" + return ( + check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") + and check_sqlite_table(con, "SCORE_ALIGNMENT") + ) + def _read_unscored_data(self, con): """Read data from unscored files.""" score_sql = self._build_score_sql(con) @@ -358,8 +365,8 @@ def _read_standard_data(self, con, cfg): """ data = pd.read_sql_query(query, con) - # If alignment is enabled, fetch and merge aligned features - if cfg.use_alignment: + # If alignment is enabled and alignment data is present, fetch and merge aligned features + if cfg.use_alignment and self._check_alignment_presence(con): aligned_features = self._fetch_alignment_features(con, cfg) if not aligned_features.empty: @@ -719,11 +726,6 @@ def _fetch_alignment_features(self, con, cfg): Returns: DataFrame with aligned feature IDs that pass quality threshold """ - # Check if alignment tables exist - if not check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") or not check_sqlite_table(con, "SCORE_ALIGNMENT"): - logger.debug("Alignment tables not found, skipping alignment integration") - return pd.DataFrame() - max_alignment_pep = cfg.max_alignment_pep query = f""" diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 1475e8a7..89d29b7b 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -27,6 +27,9 @@ def __init__(self, config: ExportIOConfig): self._has_transition_scores = any( col.startswith("SCORE_TRANSITION_") for col in self._columns ) + + # Check for alignment file + self._has_alignment = self._check_alignment_file_exists() def read(self) -> pd.DataFrame: """ @@ -69,6 +72,22 @@ def _is_unscored_file(self) -> bool: all_cols = get_parquet_column_names(self.infile) return all(not col.startswith("SCORE_") for col in all_cols) + def _check_alignment_file_exists(self) -> bool: + """ + Check if alignment parquet file exists. + """ + import os + + alignment_file = None + if self.infile.endswith('.parquet'): + base_name = self.infile[:-8] # Remove .parquet + alignment_file = f"{base_name}_feature_alignment.parquet" + + if alignment_file and os.path.exists(alignment_file): + logger.debug(f"Alignment file found: {alignment_file}") + return True + return False + def _read_unscored_data(self, con) -> pd.DataFrame: """ Read unscored data from Parquet files. @@ -274,8 +293,8 @@ def _read_standard_data(self, con) -> pd.DataFrame: """ data = con.execute(query).fetchdf() - # If alignment is enabled, fetch and merge aligned features - if self.config.use_alignment: + # If alignment is enabled and alignment data is present, fetch and merge aligned features + if self.config.use_alignment and self._has_alignment: aligned_features = self._fetch_alignment_features(con) if not aligned_features.empty: From 6c545eadd698917aed70db59a2186340ebb9fbf8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 21:43:36 +0000 Subject: [PATCH 06/30] Add alignment integration to split_parquet reader Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/split_parquet.py | 179 ++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 3 deletions(-) diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 265130a8..76ea0609 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -42,6 +42,9 @@ def __init__(self, config: ExportIOConfig): self._has_transition_scores = any( col.startswith("SCORE_TRANSITION_") for col in self._columns ) + + # Check for alignment file + self._has_alignment = self._check_alignment_file_exists() def _get_precursor_files(self): """Helper to get precursor files based on structure""" @@ -112,6 +115,26 @@ def _is_unscored_file(self) -> bool: """ return all(not col.startswith("SCORE_") for col in self._columns) + def _check_alignment_file_exists(self) -> bool: + """ + Check if alignment parquet file exists for split parquet format. + + For split parquet, alignment file is at the parent directory level: + - infile is a directory containing *.oswpq subdirectories + - alignment file is at infile/feature_alignment.parquet + """ + import os + + alignment_file = None + if os.path.isdir(self.infile): + # Split parquet format: alignment file is in the parent directory + alignment_file = os.path.join(self.infile, "feature_alignment.parquet") + + if alignment_file and os.path.exists(alignment_file): + logger.debug(f"Alignment file found: {alignment_file}") + return True + return False + def _read_unscored_data(self, con) -> pd.DataFrame: """ Read unscored data from split Parquet files. @@ -339,8 +362,9 @@ def _read_library_data(self, con) -> pd.DataFrame: def _read_standard_data(self, con) -> pd.DataFrame: """ - Read standard OpenSWATH data without IPF from split files. + Read standard OpenSWATH data without IPF from split files, optionally including aligned features. """ + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT p.RUN_ID AS id_run, @@ -367,13 +391,81 @@ def _read_standard_data(self, con) -> pd.DataFrame: p.RIGHT_WIDTH AS rightWidth, p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, p.SCORE_MS2_SCORE AS d_score, - p.SCORE_MS2_Q_VALUE AS m_score + p.SCORE_MS2_Q_VALUE AS m_score, + 0 AS from_alignment FROM precursors p WHERE p.PROTEIN_ID IS NOT NULL AND p.SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank """ - return con.execute(query).fetchdf() + data = con.execute(query).fetchdf() + + # If alignment is enabled and alignment data is present, fetch and merge aligned features + if self.config.use_alignment and self._has_alignment: + aligned_features = self._fetch_alignment_features(con) + + if not aligned_features.empty: + # Get full feature data for aligned features that are NOT already in base results + # We only want to add features that didn't pass MS2 threshold but have good alignment + aligned_ids = aligned_features['id'].unique() + existing_ids = data['id'].unique() + new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] + + if new_aligned_ids: + # Fetch full data for these new aligned features from the main data view + # Register aligned IDs as a temp table for the query + aligned_ids_df = pd.DataFrame({'id': new_aligned_ids}) + con.register('aligned_ids_temp', aligned_ids_df) + + aligned_query = f""" + SELECT + p.RUN_ID AS id_run, + p.PEPTIDE_ID AS id_peptide, + p.PRECURSOR_ID AS transition_group_id, + p.PRECURSOR_DECOY AS decoy, + p.RUN_ID AS run_id, + p.FILENAME AS filename, + p.EXP_RT AS RT, + p.EXP_RT - p.DELTA_RT AS assay_rt, + p.DELTA_RT AS delta_rt, + p.NORM_RT AS iRT, + p.PRECURSOR_LIBRARY_RT AS assay_iRT, + p.NORM_RT - p.PRECURSOR_LIBRARY_RT AS delta_iRT, + p.FEATURE_ID AS id, + p.UNMODIFIED_SEQUENCE AS Sequence, + p.MODIFIED_SEQUENCE AS FullPeptideName, + p.PRECURSOR_CHARGE AS Charge, + p.PRECURSOR_MZ AS mz, + p.FEATURE_MS2_AREA_INTENSITY AS Intensity, + p.FEATURE_MS1_AREA_INTENSITY AS aggr_prec_Peak_Area, + p.FEATURE_MS1_APEX_INTENSITY AS aggr_prec_Peak_Apex, + p.LEFT_WIDTH AS leftWidth, + p.RIGHT_WIDTH AS rightWidth, + p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, + p.SCORE_MS2_SCORE AS d_score, + p.SCORE_MS2_Q_VALUE AS m_score, + 1 AS from_alignment + FROM precursors p + WHERE p.PROTEIN_ID IS NOT NULL + AND p.FEATURE_ID IN (SELECT id FROM aligned_ids_temp) + """ + aligned_data = con.execute(aligned_query).fetchdf() + + # Merge alignment scores into the aligned data + if 'alignment_pep' in aligned_features.columns: + aligned_data = pd.merge( + aligned_data, + aligned_features[['id', 'alignment_pep', 'alignment_qvalue']], + on='id', + how='left' + ) + + logger.info(f"Adding {len(aligned_data)} features recovered through alignment") + + # Combine with base data + data = pd.concat([data, aligned_data], ignore_index=True) + + return data def _augment_data(self, data, con) -> pd.DataFrame: """ @@ -643,6 +735,87 @@ def _add_protein_error_data(self, data, con) -> pd.DataFrame: return data + def _fetch_alignment_features(self, con) -> pd.DataFrame: + """ + Fetch aligned features with good alignment scores from alignment parquet file. + + This method checks for an alignment parquet file and retrieves features + that have been aligned across runs and pass the alignment quality threshold. + + Args: + con: DuckDB connection + + Returns: + DataFrame with aligned feature IDs that pass quality threshold + """ + import os + + # For split parquet, alignment file is at parent directory level + alignment_file = os.path.join(self.infile, "feature_alignment.parquet") + + if not os.path.exists(alignment_file): + logger.debug("Alignment parquet file not found, skipping alignment integration") + return pd.DataFrame() + + logger.debug(f"Loading alignment data from {alignment_file}") + max_alignment_pep = self.config.max_alignment_pep + + try: + # Load alignment data + alignment_df = pd.read_parquet(alignment_file) + + # Filter to target (non-decoy) features with good alignment scores + # Note: DECOY column in parquet alignment file comes from LABEL in SQLite + # where LABEL=1 (DECOY=1 in parquet) means target, not decoy + if 'DECOY' in alignment_df.columns and 'VAR_XCORR_SHAPE' in alignment_df.columns: + # This looks like the feature_alignment table structure + + # Check if we have alignment scores (PEP/QVALUE) in the file + # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features + has_alignment_scores = 'PEP' in alignment_df.columns or 'QVALUE' in alignment_df.columns + + if has_alignment_scores: + # Filter by alignment PEP threshold + pep_col = 'PEP' if 'PEP' in alignment_df.columns else None + qvalue_col = 'QVALUE' if 'QVALUE' in alignment_df.columns else None + + if pep_col: + filtered_df = alignment_df[ + (alignment_df['DECOY'] == 1) & # DECOY=1 means target (from LABEL=1 in SQLite) + (alignment_df[pep_col] < max_alignment_pep) + ].copy() + else: + # Use QVALUE if PEP not available (less ideal but workable) + filtered_df = alignment_df[ + (alignment_df['DECOY'] == 1) & + (alignment_df[qvalue_col] < max_alignment_pep) + ].copy() + else: + # No alignment scores in file - just filter by target status + # In this case, we can't apply alignment quality threshold + logger.warning("Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality.") + filtered_df = alignment_df[alignment_df['DECOY'] == 1].copy() + + # Rename columns to match expected format + if 'FEATURE_ID' in filtered_df.columns: + result = filtered_df[['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID']].rename( + columns={'FEATURE_ID': 'id'} + ) + + # Add alignment scores if available + if 'PEP' in filtered_df.columns: + result['alignment_pep'] = filtered_df['PEP'].values + if 'QVALUE' in filtered_df.columns: + result['alignment_qvalue'] = filtered_df['QVALUE'].values + + logger.info(f"Found {len(result)} aligned features" + + (f" passing alignment PEP < {max_alignment_pep}" if has_alignment_scores else "")) + return result + except Exception as e: + logger.warning(f"Could not load alignment data: {e}") + + return pd.DataFrame() + def _get_ms1_score_info(self) -> tuple[str, str]: """ Get MS1 score information if available. From 901f25b848186da555349e1659a3264826c2dfea Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 24 Oct 2025 23:48:58 +0000 Subject: [PATCH 07/30] Add workflow diagram documentation for alignment integration Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- ALIGNMENT_INTEGRATION_WORKFLOW.md | 304 ++++++++++++++++++++++++++++++ 1 file changed, 304 insertions(+) create mode 100644 ALIGNMENT_INTEGRATION_WORKFLOW.md diff --git a/ALIGNMENT_INTEGRATION_WORKFLOW.md b/ALIGNMENT_INTEGRATION_WORKFLOW.md new file mode 100644 index 00000000..203b3d4a --- /dev/null +++ b/ALIGNMENT_INTEGRATION_WORKFLOW.md @@ -0,0 +1,304 @@ +# SCORE_ALIGNMENT Integration Workflow + +## Overview + +This diagram illustrates how the SCORE_ALIGNMENT integration works to recover peaks with weak MS2 signals but good alignment scores. + +## High-Level Workflow + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ PyProphet Export Command │ +│ pyprophet export tsv --in data.osw --out results.tsv │ +│ (use_alignment=True by default) │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 1. Configuration Check │ +│ • use_alignment = True (default) │ +│ • max_alignment_pep = 0.7 (default) │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 2. Auto-Detection Phase │ +│ │ +│ OSW Files: │ +│ ├─ Check FEATURE_MS2_ALIGNMENT table exists? │ +│ └─ Check SCORE_ALIGNMENT table exists? │ +│ │ +│ Parquet Files: │ +│ └─ Check for {basename}_feature_alignment.parquet? │ +│ │ +│ Split Parquet Files: │ +│ └─ Check for {infile}/feature_alignment.parquet? │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ + ┌─────────────┴─────────────┐ + │ │ + ┌─────────▼──────────┐ ┌──────────▼─────────┐ + │ Alignment Present │ │ Alignment Missing │ + │ use_alignment=T │ │ use_alignment=T │ + └─────────┬──────────┘ └──────────┬─────────┘ + │ │ + │ ↓ + │ ┌────────────────────────┐ + │ │ Standard Export Only │ + │ │ (no alignment used) │ + │ └────────────────────────┘ + │ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 3. Data Reading Phase │ +│ │ +│ Step A: Fetch Base Features (MS2 QVALUE filter) │ +│ ┌───────────────────────────────────────────────────────────────┐ │ +│ │ SELECT * FROM FEATURES │ │ +│ │ WHERE SCORE_MS2.QVALUE < max_rs_peakgroup_qvalue (e.g., 0.05)│ │ +│ │ → Base Features (passed MS2 threshold) │ │ +│ │ → Mark with from_alignment=0 │ │ +│ └───────────────────────────────────────────────────────────────┘ │ +│ │ +│ Step B: Fetch Aligned Features (Alignment PEP filter) │ +│ ┌───────────────────────────────────────────────────────────────┐ │ +│ │ SELECT ALIGNED_FEATURE_ID FROM FEATURE_MS2_ALIGNMENT │ │ +│ │ JOIN SCORE_ALIGNMENT │ │ +│ │ WHERE LABEL = 1 (target) │ │ +│ │ AND SCORE_ALIGNMENT.PEP < max_alignment_pep (e.g., 0.7) │ │ +│ │ → Aligned Features (good alignment scores) │ │ +│ └───────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 4. Feature Recovery Logic │ +│ │ +│ ┌─────────────────┐ ┌──────────────────┐ │ +│ │ Base Features │ │ Aligned Features │ │ +│ │ (MS2 passed) │ │ (Alignment good) │ │ +│ │ IDs: 1,2,3,4,5 │ │ IDs: 3,4,6,7,8 │ │ +│ └────────┬────────┘ └────────┬─────────┘ │ +│ │ │ │ +│ └──────────┬───────────────┘ │ +│ ↓ │ +│ ┌──────────────────────┐ │ +│ │ Find NEW features: │ │ +│ │ aligned - base │ │ +│ │ = {6, 7, 8} │ │ +│ └──────────┬───────────┘ │ +│ ↓ │ +│ ┌──────────────────────┐ │ +│ │ Fetch full data for │ │ +│ │ recovered features │ │ +│ │ 6, 7, 8 │ │ +│ │ Mark: from_alignment=1│ │ +│ │ Add: alignment_pep │ │ +│ │ Add: alignment_qvalue│ │ +│ └──────────┬───────────┘ │ +│ ↓ │ +│ ┌──────────────────────┐ │ +│ │ Combine: │ │ +│ │ Base (1,2,3,4,5) + │ │ +│ │ Recovered (6,7,8) │ │ +│ │ = Final (1-8) │ │ +│ └──────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────────────────────┐ +│ 5. Export Results │ +│ │ +│ Final TSV/Matrix includes: │ +│ • Original features (from_alignment=0) │ +│ • Recovered features (from_alignment=1, with alignment scores) │ +│ • More complete quantification with fewer missing values │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Detailed Component Workflow + +### A. Reader Classes (OSW, Parquet, Split Parquet) + +``` +┌──────────────────────────────────────────────────────────────┐ +│ Reader.__init__() │ +│ │ +│ OSWReader: │ +│ N/A - checks at read time │ +│ │ +│ ParquetReader: │ +│ self._has_alignment = _check_alignment_file_exists() │ +│ • Checks: {basename}_feature_alignment.parquet │ +│ │ +│ SplitParquetReader: │ +│ self._has_alignment = _check_alignment_file_exists() │ +│ • Checks: {infile}/feature_alignment.parquet │ +└──────────────────────────────────────────────────────────────┘ + ↓ +┌──────────────────────────────────────────────────────────────┐ +│ Reader.read() │ +│ │ +│ → _read_standard_data() │ +│ if config.use_alignment AND alignment_present: │ +│ → _fetch_alignment_features() │ +│ → Merge with base features │ +└──────────────────────────────────────────────────────────────┘ +``` + +### B. Alignment Detection Methods + +``` +OSW Files (.osw): +┌─────────────────────────────────────────┐ +│ _check_alignment_presence(con) │ +│ │ +│ return: │ +│ check_sqlite_table( │ +│ con, "FEATURE_MS2_ALIGNMENT" │ +│ ) AND │ +│ check_sqlite_table( │ +│ con, "SCORE_ALIGNMENT" │ +│ ) │ +└─────────────────────────────────────────┘ + +Parquet Files (.parquet): +┌─────────────────────────────────────────┐ +│ _check_alignment_file_exists() │ +│ │ +│ if infile.endswith('.parquet'): │ +│ base = infile[:-8] │ +│ alignment_file = │ +│ f"{base}_feature_alignment.parquet"│ +│ return os.path.exists(alignment_file)│ +└─────────────────────────────────────────┘ + +Split Parquet Files (directory with .oswpq): +┌─────────────────────────────────────────┐ +│ _check_alignment_file_exists() │ +│ │ +│ if os.path.isdir(infile): │ +│ alignment_file = os.path.join( │ +│ infile, "feature_alignment.parquet"│ +│ ) │ +│ return os.path.exists(alignment_file)│ +└─────────────────────────────────────────┘ +``` + +### C. Feature Recovery Decision Tree + +``` + Start Export + │ + ↓ + ┌──────────────────────┐ + │ use_alignment=True? │ + └──────────┬───────────┘ + │ + ┌─────────────┴─────────────┐ + │ │ + YES NO + │ │ + ↓ ↓ + ┌──────────────┐ ┌──────────────┐ + │ Alignment │ │ Standard │ + │ data exists? │ │ Export Only │ + └──────┬───────┘ └──────────────┘ + │ + ┌─────┴─────┐ + │ │ + YES NO + │ │ + ↓ ↓ +┌─────────┐ ┌─────────┐ +│ Use │ │Standard │ +│Alignment│ │Export │ +└─────────┘ └─────────┘ + │ │ + └─────┬─────┘ + ↓ + Export Results +``` + +## Example Scenario + +### Before Alignment Integration: + +``` +Run 1: Feature detected with MS2 QVALUE = 0.02 ✓ (exported) +Run 2: Feature detected with MS2 QVALUE = 0.08 ✗ (not exported - weak signal) +Run 3: Feature detected with MS2 QVALUE = 0.03 ✓ (exported) + +Result: Missing quantification in Run 2 +``` + +### After Alignment Integration: + +``` +Run 1: Feature detected with MS2 QVALUE = 0.02 ✓ (exported, from_alignment=0) +Run 2: Feature detected with MS2 QVALUE = 0.08 ✗ (weak MS2) + BUT: Alignment PEP = 0.4 ✓ (good alignment!) + → Recovered via alignment (exported, from_alignment=1) +Run 3: Feature detected with MS2 QVALUE = 0.03 ✓ (exported, from_alignment=0) + +Result: Complete quantification across all runs +``` + +## File Structure Examples + +### OSW Format: +``` +data.osw (SQLite database) +├─ FEATURE_MS2_ALIGNMENT table +└─ SCORE_ALIGNMENT table +``` + +### Parquet Format: +``` +data.parquet ← Main file +data_feature_alignment.parquet ← Alignment file +``` + +### Split Parquet Format: +``` +experiment/ +├─ run1.oswpq/ +│ ├─ precursors_features.parquet +│ └─ transition_features.parquet +├─ run2.oswpq/ +│ ├─ precursors_features.parquet +│ └─ transition_features.parquet +└─ feature_alignment.parquet ← Alignment file (parent level) +``` + +## Key Benefits + +1. **Increased Coverage**: Recovers peaks with weak MS2 but good alignment +2. **Better Quantification**: Fewer missing values in matrices +3. **Quality Control**: Uses alignment PEP/QVALUE thresholds +4. **Backwards Compatible**: Disabled by default via auto-detection +5. **Transparent**: Features marked with `from_alignment` flag + +## Configuration Options + +```bash +# Use default (enabled with auto-detection) +pyprophet export tsv --in data.osw --out results.tsv + +# Customize threshold +pyprophet export tsv --in data.osw --out results.tsv \ + --max_alignment_pep 0.5 + +# Explicitly disable +pyprophet export tsv --in data.osw --out results.tsv \ + --no-use_alignment +``` + +## Output Columns + +Recovered features include additional columns: + +- `from_alignment`: 0 (base) or 1 (recovered) +- `alignment_pep`: Alignment posterior error probability +- `alignment_qvalue`: Alignment q-value + +These allow users to: +- Identify which features were recovered +- Assess alignment quality +- Filter or analyze separately if needed From 2e7f217ab43b3ea08f1a2d5e80466ee43f136fb7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 04:32:59 +0000 Subject: [PATCH 08/30] Merge master branch into alignment integration feature Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- CHANGELOG.md | 15 + docs/cli.rst | 23 +- pyprophet/_config.py | 1 + pyprophet/cli/export.py | 78 ++- pyprophet/export/export_report.py | 453 ++++++++++++++++++ pyprophet/io/export/osw.py | 430 +++++++++++++++-- pyprophet/io/export/parquet.py | 131 ++++- pyprophet/io/export/split_parquet.py | 165 ++++++- pyprophet/report.py | 11 +- pyprophet/scoring/_optimized.c | 292 +++++------ requirements.txt | 2 +- ...test_feature_scores_ms1_ms2_transition.out | 8 + ...est_feature_scores_parquet_with_scores.out | 1 + ..._export.test_feature_scores_scored_osw.out | 3 + ...ature_scores_split_parquet_with_scores.out | 1 + ...xport.test_feature_scores_unscored_osw.out | 2 + ...test_parquet_export_no_transition_data.out | 13 + ..._export.test_parquet_export_scored_osw.out | 13 + ...xport.test_parquet_export_split_format.out | 10 + ...et_export.test_parquet_export_with_ipf.out | 14 + tests/test_pyprophet_export.py | 300 +++++++++++- 21 files changed, 1766 insertions(+), 200 deletions(-) create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_ms1_ms2_transition.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_parquet_with_scores.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_scored_osw.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_split_parquet_with_scores.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_unscored_osw.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out create mode 100644 tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b1178e5..a3ca57f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,21 @@ All notable changes to this project will be documented in this file. +## [Unreleased] + +### 🚀 Features + +- Add unified `pyprophet export feature-scores` command that works with all file formats (OSW, Parquet, Split Parquet) + - Auto-detects SCORE tables and adjusts behavior intelligently + - Applies RANK==1 filtering when SCORE tables exist + - Plots only VAR_ columns for unscored files + - Supports MS1, MS2, and transition level features + +### 🔧 Deprecated + +- Deprecate `pyprophet export score-plots` command in favor of `pyprophet export feature-scores` + - Old command still works with deprecation warning for backward compatibility + ## [3.0.4] - 2025-10-21 ### 🚀 Features diff --git a/docs/cli.rst b/docs/cli.rst index 85453d89..a2ff1a1c 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -144,10 +144,27 @@ To convert OpenSwath's *.osw* / *.sqMass* format to a parquet format, you can us :prog: pyprophet export parquet :nested: none -Export Score Plots -^^^^^^^^^^^^^^^^^^ +Export Feature Score Plots +^^^^^^^^^^^^^^^^^^^^^^^^^^^ -It may be useful to export the distribution of scores for the different input features. This can help you investigate the distribution and quality of scores for target-decoy separation. +To export the distribution of feature scores (VAR_ columns) and, if available, scorer scores (SCORE columns), you can use the :program:`export feature-scores` subcommand. This command works with all file formats (OSW, Parquet, and Split Parquet): + +- **For unscored files**: Plots only VAR_ columns (feature variables) +- **For scored files**: Applies RANK==1 filtering and plots both SCORE and VAR_ columns + +This is useful for investigating the distribution and quality of scores for target-decoy separation. + +.. click:: pyprophet.cli.export:export_feature_scores + :prog: pyprophet export feature-scores + :nested: none + +Export Score Plots (Deprecated) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. deprecated:: 3.1 + Use :program:`pyprophet export feature-scores` instead. + +The :program:`export score-plots` command is deprecated and will be removed in a future version. It has been replaced by the more flexible :program:`export feature-scores` command which works with all file formats. .. click:: pyprophet.cli.export:export_score_plots :prog: pyprophet export score-plots diff --git a/pyprophet/_config.py b/pyprophet/_config.py index 8ee78651..4814fe7d 100644 --- a/pyprophet/_config.py +++ b/pyprophet/_config.py @@ -705,6 +705,7 @@ class ExportIOConfig(BaseIOConfig): compression_level: int = 11 split_transition_data: bool = True split_runs: bool = False + include_transition_data: bool = True # Whether to include transition data in parquet export # SqMass: Export to parquet pqp_file: Optional[str] = None # Path to PQP file for precursor/transition mapping diff --git a/pyprophet/cli/export.py b/pyprophet/cli/export.py index 32373e08..e80c3001 100644 --- a/pyprophet/cli/export.py +++ b/pyprophet/cli/export.py @@ -14,6 +14,9 @@ from ..export.export_report import ( export_scored_report as _export_scored_report, ) +from ..export.export_report import ( + export_feature_scores as _export_feature_scores, +) from ..export.calibration_report import generate_report as generate_calibration_report from ..glyco.export import ( export_score_plots as export_glyco_score_plots, @@ -43,8 +46,10 @@ def export(): export.add_command(export_parquet, name="parquet") export.add_command(export_compound, name="compound") export.add_command(export_glyco, name="glyco") - export.add_command(export_score_plots, name="score-plots") + export.add_command(export_feature_scores, name="feature-scores") + export.add_command(export_score_plots, name="score-plots") # Deprecated export.add_command(export_scored_report, name="score-report") + export.add_command(export_feature_scores, name="feature-scores") export.add_command(export_calibration_report, name="calibration-report") return export @@ -587,6 +592,13 @@ def export_library( type=int, help="Compression level to use for parquet file.", ) +@click.option( + "--include_transition_data/--no-include_transition_data", + "include_transition_data", + default=True, + show_default=True, + help="Include transition data in the exported parquet file(s). When disabled, only precursor-level data is exported.", +) @measure_memory_usage_and_time def export_parquet( infile, @@ -599,6 +611,7 @@ def export_parquet( split_runs, compression, compression_level, + include_transition_data, ): """ Export OSW or sqMass to parquet format @@ -634,6 +647,7 @@ def export_parquet( split_runs=split_runs, compression_method=compression, compression_level=compression_level, + include_transition_data=include_transition_data, ) writer = WriterDispatcher.get_writer(config) @@ -854,7 +868,35 @@ def export_glyco( ) -# Export score plots +# Export feature scores (unified command) +@click.command(name="feature-scores", cls=AdvancedHelpCommand) +@click.option( + "--in", + "infile", + required=True, + type=click.Path(exists=True), + help="PyProphet input file (OSW, Parquet, or Split Parquet directory).", +) +@click.option( + "--out", + "outfile", + type=click.Path(exists=False), + help="Output PDF file path. If not specified, will be derived from input filename.", +) +@measure_memory_usage_and_time +def export_feature_scores(infile, outfile): + """ + Export feature score plots + + Works with OSW, Parquet, and Split Parquet formats. + - If SCORE tables exist: applies RANK==1 filtering and plots SCORE + VAR_ columns + - If SCORE tables don't exist: plots only VAR_ columns + """ + from ..export.export_report import export_feature_scores as _export_feature_scores + _export_feature_scores(infile, outfile) + + +# Export score plots (deprecated - use feature-scores instead) @click.command(name="score-plots", cls=AdvancedHelpCommand) @click.option( "--in", @@ -874,8 +916,11 @@ def export_glyco( @measure_memory_usage_and_time def export_score_plots(infile, glycoform): """ - Export score plots + Export score plots (DEPRECATED - use 'feature-scores' instead) + + This command is deprecated. Please use 'pyprophet export feature-scores' instead. """ + logger.warning("DEPRECATED: 'pyprophet export score-plots' is deprecated. Use 'pyprophet export feature-scores' instead.") if infile.endswith(".osw"): if not glycoform: _export_score_plots(infile) @@ -905,6 +950,33 @@ def export_scored_report(infile): _export_scored_report(infile, outfile) +# Export feature scores +@click.command(name="feature-scores", cls=AdvancedHelpCommand) +@click.option( + "--in", + "infile", + required=True, + type=click.Path(exists=True), + help="PyProphet input file (OSW, Parquet, or Split Parquet directory).", +) +@click.option( + "--out", + "outfile", + type=click.Path(exists=False), + help="Output PDF file. If not provided, will be auto-generated based on input filename.", +) +@measure_memory_usage_and_time +def export_feature_scores(infile, outfile): + """ + Export feature score plots from a PyProphet input file. + + Creates plots showing the distribution of feature scores (var_* columns) + at different levels (ms1, ms2, transition, alignment) colored by target/decoy status. + Works with OSW, Parquet, and Split Parquet files (scored or unscored). + """ + _export_feature_scores(infile, outfile) + + # Export OpenSwath Calibration debug plots @click.command(name="calibration-report", cls=AdvancedHelpCommand) @click.option( diff --git a/pyprophet/export/export_report.py b/pyprophet/export/export_report.py index ea5042da..4a105c6b 100644 --- a/pyprophet/export/export_report.py +++ b/pyprophet/export/export_report.py @@ -1,5 +1,8 @@ import sqlite3 import pandas as pd +import os +from pathlib import Path +from loguru import logger from .._config import ExportIOConfig @@ -7,9 +10,234 @@ from ..io.dispatcher import ReaderDispatcher from ..io.util import get_parquet_column_names from ..io.util import check_sqlite_table +from ..io.util import _ensure_pyarrow from ..report import plot_scores +def export_feature_scores(infile, outfile=None): + """ + Export feature score plots from a PyProphet input file. + + This function works with OSW, Parquet, and Split Parquet formats. + - If SCORE tables exist: applies RANK==1 filtering and plots SCORE + VAR_ columns + - If SCORE tables don't exist: plots only VAR_ columns + + Parameters + ---------- + infile : str + Path to input file (OSW, Parquet, or Split Parquet directory) + outfile : str, optional + Path for output PDF files. If None, derives from infile. + """ + # Detect file type based on extension and existence + if infile.endswith(".osw"): + file_type = "osw" + elif infile.endswith(".parquet"): + file_type = "parquet" + elif os.path.isdir(infile): + # Check if it's a split parquet directory + precursor_file = os.path.join(infile, "precursors_features.parquet") + if os.path.exists(precursor_file): + file_type = "parquet_split" + else: + raise ValueError(f"Directory {infile} does not appear to be a valid split parquet directory") + else: + raise ValueError(f"Unsupported file type for {infile}") + + logger.info(f"Detected file type: {file_type}") + + # Generate output filename if not provided + if outfile is None: + if file_type == "osw": + outfile = infile.replace(".osw", "_feature_scores.pdf") + elif file_type == "parquet": + outfile = infile.replace(".parquet", "_feature_scores.pdf") + else: # parquet_split + outfile = infile.rstrip("/") + "_feature_scores.pdf" + + logger.info(f"Output file: {outfile}") + + # Create config and reader based on file type + config = ExportIOConfig( + infile=infile, + outfile=outfile, + subsample_ratio=1.0, + level="export", + context="export_feature_scores", + ) + + # Get appropriate reader + reader = ReaderDispatcher.get_reader(config) + + # Export feature scores using the reader's method + reader.export_feature_scores(outfile, _plot_feature_scores) + + logger.info(f"Feature score plots exported to {outfile}") + + +def _plot_feature_scores(df: pd.DataFrame, outfile: str, level: str, append: bool = False, sample_size: int = 100000): + """ + Create plots for feature scores at a specific level. + + Parameters + ---------- + df : pd.DataFrame + DataFrame containing feature scores and DECOY column. + outfile : str + Path to the output PDF file. + level : str + Level name (ms1, ms2, transition, or alignment). + append : bool + If True, append to existing PDF. If False, create new PDF. + sample_size : int + Maximum number of rows to use for plotting. If df has more rows, + a stratified sample (by DECOY) will be taken to reduce memory usage. + """ + # Get all columns that contain feature scores (VAR_ or SCORE columns) + score_cols = [col for col in df.columns if ("VAR_" in col.upper() or col.upper().startswith("SCORE")) and col != "DECOY"] + + if not score_cols: + logger.warning(f"No feature score columns found for {level} level") + return + + logger.info(f"Found {len(score_cols)} feature score columns for {level} level") + + # Prepare data for plotting - ensure DECOY column exists + if "DECOY" not in df.columns: + logger.warning(f"No DECOY column found for {level} level, skipping") + return + + # Only select the columns we need for plotting + plot_df = df[score_cols + ["DECOY"]].dropna(subset=["DECOY"]).copy() + + # Check if we have any data left after dropping NAs + if len(plot_df) == 0: + logger.warning(f"No valid data rows found for {level} level after removing rows with null DECOY values") + return + + # Memory optimization: Sample data if it's too large + if len(plot_df) > sample_size: + logger.info(f"Dataset has {len(plot_df)} rows, sampling {sample_size} rows (stratified by DECOY) to reduce memory usage") + # Stratified sampling to maintain target/decoy ratio + target_df = plot_df[plot_df["DECOY"] == 0] + decoy_df = plot_df[plot_df["DECOY"] == 1] + + # Calculate sample sizes proportional to original distribution + n_targets = len(target_df) + n_decoys = len(decoy_df) + total = n_targets + n_decoys + + # Handle edge cases where one group might be empty + if total == 0: + logger.warning(f"No data with valid DECOY values for {level} level") + return + + target_sample_size = int(sample_size * n_targets / total) if n_targets > 0 else 0 + decoy_sample_size = int(sample_size * n_decoys / total) if n_decoys > 0 else 0 + + # Sample from each group + samples = [] + if n_targets > 0: + if n_targets > target_sample_size and target_sample_size > 0: + target_sample = target_df.sample(n=target_sample_size, random_state=42) + else: + target_sample = target_df + samples.append(target_sample) + + if n_decoys > 0: + if n_decoys > decoy_sample_size and decoy_sample_size > 0: + decoy_sample = decoy_df.sample(n=decoy_sample_size, random_state=42) + else: + decoy_sample = decoy_df + samples.append(decoy_sample) + + # Combine samples + plot_df = pd.concat(samples, ignore_index=True) + logger.info(f"Sampled {len(plot_df)} rows") + + # Ensure DECOY is 0 or 1 + if plot_df["DECOY"].dtype == bool: + plot_df["DECOY"] = plot_df["DECOY"].astype(int) + + # Generate a temporary output file for this level + temp_outfile = outfile.replace(".pdf", f"_{level}_temp.pdf") + + # Rename columns to match plot_scores expectations + # plot_scores expects columns named "SCORE", "MAIN_VAR_*", or "VAR_*" + rename_dict = {} + for col in score_cols: + # Ensure column names start with VAR_ or SCORE + if not col.upper().startswith("VAR_") and not col.upper().startswith("SCORE"): + # Extract the var part from column names like FEATURE_MS1_VAR_XXX + parts = col.split("VAR_") + if len(parts) > 1: + new_name = "VAR_" + parts[-1] + else: + new_name = "VAR_" + col + rename_dict[col] = new_name + + if rename_dict: + plot_df.rename(columns=rename_dict, inplace=True) + + # Call plot_scores with the formatted dataframe + plot_scores(plot_df, temp_outfile) + + # Check if the temporary file was created and has content + if not os.path.exists(temp_outfile): + logger.warning(f"plot_scores did not create output file for {level} level, skipping") + return + + if os.path.getsize(temp_outfile) == 0: + logger.warning(f"plot_scores created empty output file for {level} level, skipping") + os.remove(temp_outfile) + return + + # If appending, merge PDFs, otherwise just rename + if append and os.path.exists(outfile): + from pypdf import PdfReader, PdfWriter + + try: + # Merge the PDFs + writer = PdfWriter() + + # Add pages from existing PDF + with open(outfile, "rb") as f: + existing_pdf = PdfReader(f) + for page in existing_pdf.pages: + writer.add_page(page) + + # Add pages from new PDF + with open(temp_outfile, "rb") as f: + new_pdf = PdfReader(f) + for page in new_pdf.pages: + writer.add_page(page) + + # Write merged PDF + with open(outfile, "wb") as f: + writer.write(f) + + # Remove temporary file + os.remove(temp_outfile) + except Exception as e: + logger.warning(f"Failed to merge PDF for {level} level: {e}. Skipping this level.") + # Clean up temporary file if it exists + if os.path.exists(temp_outfile): + os.remove(temp_outfile) + return + else: + # Just rename temporary file to output file + try: + if os.path.exists(outfile): + os.remove(outfile) + os.rename(temp_outfile, outfile) + except Exception as e: + logger.warning(f"Failed to save PDF for {level} level: {e}. Skipping this level.") + # Clean up temporary file if it exists + if os.path.exists(temp_outfile): + os.remove(temp_outfile) + return + + def export_score_plots(infile): """ Export score plots from a PyProphet input file. @@ -149,3 +377,228 @@ def export_scored_report( df = reader.read() post_scoring_report(df, outfile) + + +def export_feature_scores(infile: str, outfile: str = None): + """ + Export feature score plots from a PyProphet input file. + + This function creates plots showing the distribution of feature scores + (var_* columns) at different levels (ms1, ms2, transition, alignment) + colored by target/decoy status. Works with OSW, Parquet, and Split Parquet files. + + Parameters + ---------- + infile : str + Path to the input file (OSW, Parquet, or Split Parquet format). + outfile : str, optional + Path to the output PDF file. If None, will be auto-generated based on input filename. + """ + # Detect file type based on extension and existence + if infile.endswith(".osw"): + file_type = "osw" + elif infile.endswith(".parquet"): + file_type = "parquet" + elif os.path.isdir(infile): + # Check if it's a split parquet directory + precursor_file = os.path.join(infile, "precursors_features.parquet") + if os.path.exists(precursor_file): + file_type = "parquet_split" + else: + raise ValueError(f"Directory {infile} does not appear to be a valid split parquet directory") + else: + raise ValueError(f"Unsupported file type for {infile}") + + logger.info(f"Detected file type: {file_type}") + + # Generate output filename if not provided + if outfile is None: + if file_type == "osw": + outfile = infile.replace(".osw", "_feature_scores.pdf") + elif file_type == "parquet": + outfile = infile.replace(".parquet", "_feature_scores.pdf") + else: # parquet_split + outfile = infile.rstrip("/") + "_feature_scores.pdf" + + logger.info(f"Output file: {outfile}") + + # Create config and reader based on file type + config = ExportIOConfig( + infile=infile, + outfile=outfile, + subsample_ratio=1.0, + level="export", + context="export_feature_scores", + ) + + # Get appropriate reader + reader = ReaderDispatcher.get_reader(config) + + # Export feature scores using the reader's method + reader.export_feature_scores(outfile, _plot_feature_scores) + + logger.info(f"Feature score plots exported to {outfile}") + + + +def _plot_feature_scores(df: pd.DataFrame, outfile: str, level: str, append: bool = False, sample_size: int = 100000): + """ + Create plots for feature scores at a specific level. + + Parameters + ---------- + df : pd.DataFrame + DataFrame containing feature scores and DECOY column. + outfile : str + Path to the output PDF file. + level : str + Level name (ms1, ms2, transition, or alignment). + append : bool + If True, append to existing PDF. If False, create new PDF. + sample_size : int + Maximum number of rows to use for plotting. If df has more rows, + a stratified sample (by DECOY) will be taken to reduce memory usage. + """ + # Get all columns that contain feature scores (VAR_ columns or columns with _VAR_ in name) + score_cols = [col for col in df.columns if "VAR_" in col.upper() and col != "DECOY"] + + if not score_cols: + logger.warning(f"No feature score columns found for {level} level") + return + + logger.info(f"Found {len(score_cols)} feature score columns for {level} level: {score_cols}") + + # Prepare data for plotting - ensure DECOY column exists + if "DECOY" not in df.columns: + logger.warning(f"No DECOY column found for {level} level, skipping") + return + + # Only select the columns we need for plotting + plot_df = df[score_cols + ["DECOY"]].dropna(subset=["DECOY"]).copy() + + # Check if we have any data left after dropping NAs + if len(plot_df) == 0: + logger.warning(f"No valid data rows found for {level} level after removing rows with null DECOY values") + return + + # Memory optimization: Sample data if it's too large + if len(plot_df) > sample_size: + logger.info(f"Dataset has {len(plot_df)} rows, sampling {sample_size} rows (stratified by DECOY) to reduce memory usage") + # Stratified sampling to maintain target/decoy ratio + target_df = plot_df[plot_df["DECOY"] == 0] + decoy_df = plot_df[plot_df["DECOY"] == 1] + + # Calculate sample sizes proportional to original distribution + n_targets = len(target_df) + n_decoys = len(decoy_df) + total = n_targets + n_decoys + + # Handle edge cases where one group might be empty + if total == 0: + logger.warning(f"No data with valid DECOY values for {level} level") + return + + target_sample_size = int(sample_size * n_targets / total) if n_targets > 0 else 0 + decoy_sample_size = int(sample_size * n_decoys / total) if n_decoys > 0 else 0 + + # Sample from each group + samples = [] + if n_targets > 0: + if n_targets > target_sample_size and target_sample_size > 0: + target_sample = target_df.sample(n=target_sample_size, random_state=42) + else: + target_sample = target_df + samples.append(target_sample) + + if n_decoys > 0: + if n_decoys > decoy_sample_size and decoy_sample_size > 0: + decoy_sample = decoy_df.sample(n=decoy_sample_size, random_state=42) + else: + decoy_sample = decoy_df + samples.append(decoy_sample) + + # Combine samples + plot_df = pd.concat(samples, ignore_index=True) + logger.info(f"Sampled {len(plot_df)} rows ({len(samples[0]) if len(samples) > 0 and n_targets > 0 else 0} targets, {len(samples[-1]) if len(samples) > 0 and n_decoys > 0 else 0} decoys)") + + # Ensure DECOY is 0 or 1 + if plot_df["DECOY"].dtype == bool: + plot_df["DECOY"] = plot_df["DECOY"].astype(int) + + # Generate a temporary output file for this level + temp_outfile = outfile.replace(".pdf", f"_{level}_temp.pdf") + + # Rename columns to match plot_scores expectations + # plot_scores expects columns named "SCORE", "MAIN_VAR_*", or "VAR_*" + rename_dict = {} + for i, col in enumerate(score_cols): + # Ensure column names start with VAR_ + if not col.upper().startswith("VAR_"): + # Extract the var part from column names like FEATURE_MS1_VAR_XXX + parts = col.split("VAR_") + if len(parts) > 1: + new_name = "VAR_" + parts[-1] + else: + new_name = "VAR_" + col + rename_dict[col] = new_name + + if rename_dict: + plot_df.rename(columns=rename_dict, inplace=True) + + # Call plot_scores with the formatted dataframe + plot_scores(plot_df, temp_outfile) + + # Check if the temporary file was created and has content + if not os.path.exists(temp_outfile): + logger.warning(f"plot_scores did not create output file for {level} level, skipping") + return + + if os.path.getsize(temp_outfile) == 0: + logger.warning(f"plot_scores created empty output file for {level} level, skipping") + os.remove(temp_outfile) + return + + # If appending, merge PDFs, otherwise just rename + if append and os.path.exists(outfile): + from pypdf import PdfReader, PdfWriter + + try: + # Merge the PDFs + writer = PdfWriter() + + # Add pages from existing PDF + with open(outfile, "rb") as f: + existing_pdf = PdfReader(f) + for page in existing_pdf.pages: + writer.add_page(page) + + # Add pages from new PDF + with open(temp_outfile, "rb") as f: + new_pdf = PdfReader(f) + for page in new_pdf.pages: + writer.add_page(page) + + # Write merged PDF + with open(outfile, "wb") as f: + writer.write(f) + + # Remove temporary file + os.remove(temp_outfile) + except Exception as e: + logger.warning(f"Failed to merge PDF for {level} level: {e}. Skipping this level.") + # Clean up temporary file if it exists + if os.path.exists(temp_outfile): + os.remove(temp_outfile) + return + else: + # Just rename temporary file to output file + try: + if os.path.exists(outfile): + os.remove(outfile) + os.rename(temp_outfile, outfile) + except Exception as e: + logger.warning(f"Failed to save PDF for {level} level: {e}. Skipping this level.") + # Clean up temporary file if it exists + if os.path.exists(temp_outfile): + os.remove(temp_outfile) + return diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 47129587..d3a94c7f 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -979,6 +979,238 @@ def _get_peptide_protein_score_table_sqlite(self, con, level: str) -> str: return f"{view_name} AS ({merged})" + def export_feature_scores(self, outfile: str, plot_callback): + """ + Export feature scores from OSW file for plotting. + + Detects if SCORE tables exist and adjusts behavior: + - If SCORE tables exist: applies RANK==1 filtering and plots SCORE + VAR_ columns + - If SCORE tables don't exist: plots only VAR_ columns + + Parameters + ---------- + outfile : str + Path to the output PDF file. + plot_callback : callable + Function to call for plotting each level's data. + Signature: plot_callback(df, outfile, level, append) + """ + con = sqlite3.connect(self.infile) + + try: + # Check for SCORE tables + has_score_ms1 = check_sqlite_table(con, "SCORE_MS1") + has_score_ms2 = check_sqlite_table(con, "SCORE_MS2") + has_score_transition = check_sqlite_table(con, "SCORE_TRANSITION") + + if has_score_ms1 or has_score_ms2 or has_score_transition: + logger.info("SCORE tables detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + else: + logger.info("No SCORE tables detected - plotting only VAR_ columns") + + # Process MS1 level if available + if check_sqlite_table(con, "FEATURE_MS1"): + logger.info("Processing MS1 level feature scores") + + if has_score_ms1: + # Scored mode: Include SCORE columns and apply RANK==1 filter + ms1_query = """ + SELECT *, + RUN_ID || '_' || PRECURSOR_ID AS GROUP_ID + FROM FEATURE_MS1 + INNER JOIN + (SELECT RUN_ID, + ID, + PRECURSOR_ID, + EXP_RT + FROM FEATURE) AS FEATURE ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + INNER JOIN + (SELECT ID, + CHARGE AS VAR_PRECURSOR_CHARGE, + DECOY + FROM PRECURSOR) AS PRECURSOR ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN SCORE_MS1 ON FEATURE.ID = SCORE_MS1.FEATURE_ID + WHERE RANK == 1 + ORDER BY RUN_ID, + PRECURSOR.ID ASC, + FEATURE.EXP_RT ASC + """ + else: + # Unscored mode: Only VAR_ columns + cursor = con.cursor() + cursor.execute("PRAGMA table_info(FEATURE_MS1)") + all_cols = [row[1] for row in cursor.fetchall()] + var_cols = [col for col in all_cols if "VAR_" in col.upper()] + + if var_cols: + var_cols_sql = ", ".join([f"FEATURE_MS1.{col}" for col in var_cols]) + ms1_query = f""" + SELECT + {var_cols_sql}, + PRECURSOR.DECOY + FROM FEATURE_MS1 + INNER JOIN FEATURE ON FEATURE_MS1.FEATURE_ID = FEATURE.ID + INNER JOIN PRECURSOR ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + """ + else: + logger.warning("No VAR_ columns found in FEATURE_MS1 table") + ms1_query = None + + if ms1_query: + df_ms1 = pd.read_sql_query(ms1_query, con) + if not df_ms1.empty: + plot_callback(df_ms1, outfile, "ms1", append=False) + + # Process MS2 level if available + if check_sqlite_table(con, "FEATURE_MS2"): + logger.info("Processing MS2 level feature scores") + + if has_score_ms2: + # Scored mode: Include SCORE columns and apply RANK==1 filter + ms2_query = """ + SELECT *, + RUN_ID || '_' || PRECURSOR_ID AS GROUP_ID + FROM FEATURE_MS2 + INNER JOIN + (SELECT RUN_ID, + ID, + PRECURSOR_ID, + EXP_RT + FROM FEATURE) AS FEATURE ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + INNER JOIN + (SELECT ID, + CHARGE AS VAR_PRECURSOR_CHARGE, + DECOY + FROM PRECURSOR) AS PRECURSOR ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN + (SELECT PRECURSOR_ID AS ID, + COUNT(*) AS VAR_TRANSITION_NUM_SCORE + FROM TRANSITION_PRECURSOR_MAPPING + INNER JOIN TRANSITION ON TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID = TRANSITION.ID + WHERE DETECTING==1 + GROUP BY PRECURSOR_ID) AS VAR_TRANSITION_SCORE ON FEATURE.PRECURSOR_ID = VAR_TRANSITION_SCORE.ID + INNER JOIN SCORE_MS2 ON FEATURE.ID = SCORE_MS2.FEATURE_ID + WHERE RANK == 1 + ORDER BY RUN_ID, + PRECURSOR.ID ASC, + FEATURE.EXP_RT ASC + """ + else: + # Unscored mode: Only VAR_ columns + cursor = con.cursor() + cursor.execute("PRAGMA table_info(FEATURE_MS2)") + all_cols = [row[1] for row in cursor.fetchall()] + var_cols = [col for col in all_cols if "VAR_" in col.upper()] + + if var_cols: + var_cols_sql = ", ".join([f"FEATURE_MS2.{col}" for col in var_cols]) + ms2_query = f""" + SELECT + {var_cols_sql}, + PRECURSOR.DECOY + FROM FEATURE_MS2 + INNER JOIN FEATURE ON FEATURE_MS2.FEATURE_ID = FEATURE.ID + INNER JOIN PRECURSOR ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + """ + else: + logger.warning("No VAR_ columns found in FEATURE_MS2 table") + ms2_query = None + + if ms2_query: + df_ms2 = pd.read_sql_query(ms2_query, con) + if not df_ms2.empty: + append = check_sqlite_table(con, "FEATURE_MS1") + plot_callback(df_ms2, outfile, "ms2", append=append) + + # Process transition level if available + if check_sqlite_table(con, "FEATURE_TRANSITION"): + logger.info("Processing transition level feature scores") + + if has_score_transition: + # Scored mode: Include SCORE columns and apply RANK==1 filter + transition_query = """ + SELECT TRANSITION.DECOY AS DECOY, + FEATURE_TRANSITION.*, + PRECURSOR.CHARGE AS VAR_PRECURSOR_CHARGE, + TRANSITION.VAR_PRODUCT_CHARGE AS VAR_PRODUCT_CHARGE, + SCORE_TRANSITION.*, + RUN_ID || '_' || FEATURE_TRANSITION.FEATURE_ID || '_' || PRECURSOR_ID || '_' || FEATURE_TRANSITION.TRANSITION_ID AS GROUP_ID + FROM FEATURE_TRANSITION + INNER JOIN + (SELECT RUN_ID, + ID, + PRECURSOR_ID, + EXP_RT + FROM FEATURE) AS FEATURE ON FEATURE_TRANSITION.FEATURE_ID = FEATURE.ID + INNER JOIN PRECURSOR ON FEATURE.PRECURSOR_ID = PRECURSOR.ID + INNER JOIN SCORE_TRANSITION ON FEATURE_TRANSITION.FEATURE_ID = SCORE_TRANSITION.FEATURE_ID + AND FEATURE_TRANSITION.TRANSITION_ID = SCORE_TRANSITION.TRANSITION_ID + INNER JOIN + (SELECT ID, + CHARGE AS VAR_PRODUCT_CHARGE, + DECOY + FROM TRANSITION) AS TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID + ORDER BY RUN_ID, + PRECURSOR.ID, + FEATURE.EXP_RT, + TRANSITION.ID + """ + else: + # Unscored mode: Only VAR_ columns + cursor = con.cursor() + cursor.execute("PRAGMA table_info(FEATURE_TRANSITION)") + all_cols = [row[1] for row in cursor.fetchall()] + var_cols = [col for col in all_cols if "VAR_" in col.upper()] + + if var_cols: + var_cols_sql = ", ".join([f"FEATURE_TRANSITION.{col}" for col in var_cols]) + transition_query = f""" + SELECT + {var_cols_sql}, + TRANSITION.DECOY + FROM FEATURE_TRANSITION + INNER JOIN FEATURE ON FEATURE_TRANSITION.FEATURE_ID = FEATURE.ID + INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID + """ + else: + logger.warning("No VAR_ columns found in FEATURE_TRANSITION table") + transition_query = None + + if transition_query: + df_transition = pd.read_sql_query(transition_query, con) + if not df_transition.empty: + append = check_sqlite_table(con, "FEATURE_MS1") or check_sqlite_table(con, "FEATURE_MS2") + plot_callback(df_transition, outfile, "transition", append=append) + + # Process alignment level if available (no SCORE tables for alignment) + if check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT"): + logger.info("Processing alignment level feature scores") + # Get only VAR_ columns to reduce memory usage + cursor = con.cursor() + cursor.execute("PRAGMA table_info(FEATURE_MS2_ALIGNMENT)") + all_cols = [row[1] for row in cursor.fetchall()] + var_cols = [col for col in all_cols if "VAR_" in col.upper()] + + if var_cols: + var_cols_sql = ", ".join(var_cols) + alignment_query = f""" + SELECT + {var_cols_sql}, + LABEL AS DECOY + FROM FEATURE_MS2_ALIGNMENT + """ + df_alignment = pd.read_sql_query(alignment_query, con) + if not df_alignment.empty: + append = (check_sqlite_table(con, "FEATURE_MS1") or + check_sqlite_table(con, "FEATURE_MS2") or + check_sqlite_table(con, "FEATURE_TRANSITION")) + plot_callback(df_alignment, outfile, "alignment", append=append) + else: + logger.warning("No VAR_ columns found in FEATURE_MS2_ALIGNMENT table") + + finally: + con.close() + class OSWWriter(BaseOSWWriter): """ @@ -1069,21 +1301,21 @@ def _prepare_column_info(self, conn) -> dict: for col in get_table_columns_with_types( self.config.infile, "FEATURE_MS1" ) - if col[0] != "FEATURE_ID" + if col[0] != "FEATURE_ID" and col[1] # Ensure column has a type ], "feature_ms2_cols": [ col for col in get_table_columns_with_types( self.config.infile, "FEATURE_MS2" ) - if col[0] != "FEATURE_ID" + if col[0] != "FEATURE_ID" and col[1] # Ensure column has a type ], "feature_transition_cols": [ col for col in get_table_columns_with_types( self.config.infile, "FEATURE_TRANSITION" ) - if col[0] not in ["FEATURE_ID", "TRANSITION_ID"] + if col[0] not in ["FEATURE_ID", "TRANSITION_ID"] and col[1] # Ensure column has a type ], "score_ms1_exists": {"SCORE_MS1"}.issubset(table_names), "score_ms2_exists": {"SCORE_MS2"}.issubset(table_names), @@ -1138,21 +1370,24 @@ def _export_split_by_run(self, conn, column_info: dict) -> None: logger.info(f"Exporting precursor data to {precursor_path}") self._execute_copy_query(conn, precursor_query, precursor_path) - # Export transition data - transition_path = os.path.join(run_dir, "transition_features.parquet") - transition_query_run = ( - self._build_transition_query(column_info) - + f"\nWHERE FEATURE.RUN_ID = {run_id}" - ) - transition_query_null = ( - self._build_transition_query(column_info) - + "\nWHERE FEATURE.RUN_ID IS NULL" - ) - combined_transition_query = ( - f"{transition_query_run}\nUNION ALL\n{transition_query_null}" - ) - logger.info(f"Exporting transition data to {transition_path}") - self._execute_copy_query(conn, combined_transition_query, transition_path) + # Export transition data if requested + if self.config.include_transition_data: + transition_path = os.path.join(run_dir, "transition_features.parquet") + transition_query_run = ( + self._build_transition_query(column_info) + + f"\nWHERE FEATURE.RUN_ID = {run_id}" + ) + transition_query_null = ( + self._build_transition_query(column_info) + + "\nWHERE FEATURE.RUN_ID IS NULL" + ) + combined_transition_query = ( + f"{transition_query_run}\nUNION ALL\n{transition_query_null}" + ) + logger.info(f"Exporting transition data to {transition_path}") + self._execute_copy_query(conn, combined_transition_query, transition_path) + else: + logger.info("Skipping transition data export (include_transition_data=False)") # Export alignment data if exists if column_info["feature_ms2_alignment_exists"]: @@ -1171,13 +1406,16 @@ def _export_combined(self, conn, column_info: dict) -> None: precursor_query = self._build_precursor_query(conn, column_info) self._execute_copy_query(conn, precursor_query, precursor_path) - # Export transition data - transition_path = os.path.join( - self.config.outfile, "transition_features.parquet" - ) - logger.info(f"Exporting transition data to {transition_path}") - transition_query = self._build_transition_query(column_info) - self._execute_copy_query(conn, transition_query, transition_path) + # Export transition data if requested + if self.config.include_transition_data: + transition_path = os.path.join( + self.config.outfile, "transition_features.parquet" + ) + logger.info(f"Exporting transition data to {transition_path}") + transition_query = self._build_transition_query(column_info) + self._execute_copy_query(conn, transition_query, transition_path) + else: + logger.info("Skipping transition data export (include_transition_data=False)") # Export alignment data if exists if column_info["feature_ms2_alignment_exists"]: @@ -1195,10 +1433,13 @@ def _export_single_file(self, conn, column_info: dict) -> None: precursor_query = self._build_combined_precursor_query(conn, column_info) conn.execute(f"INSERT INTO temp_table {precursor_query}") - # Insert transition data - logger.debug("Inserting transition data into temp table") - transition_query = self._build_combined_transition_query(column_info) - conn.execute(f"INSERT INTO temp_table {transition_query}") + # Insert transition data if requested + if self.config.include_transition_data: + logger.debug("Inserting transition data into temp table") + transition_query = self._build_combined_transition_query(column_info) + conn.execute(f"INSERT INTO temp_table {transition_query}") + else: + logger.info("Skipping transition data export (include_transition_data=False)") # Export to parquet logger.info(f"Exporting combined data to {self.config.outfile}") @@ -1353,6 +1594,27 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: {score_table_joins} """ + def _build_transition_score_columns_and_join(self, column_info: dict) -> Tuple[str, str]: + """Build score columns and join clause for transition scores""" + score_transition_cols = "" + score_transition_join = "" + if column_info.get("score_transition_exists", False): + logger.debug("SCORE_TRANSITION table exists, adding score columns to transition query") + score_cols = [ + "SCORE_TRANSITION.SCORE AS SCORE_TRANSITION_SCORE", + "SCORE_TRANSITION.RANK AS SCORE_TRANSITION_RANK", + "SCORE_TRANSITION.PVALUE AS SCORE_TRANSITION_P_VALUE", + "SCORE_TRANSITION.QVALUE AS SCORE_TRANSITION_Q_VALUE", + "SCORE_TRANSITION.PEP AS SCORE_TRANSITION_PEP", + ] + score_transition_cols = ", " + ", ".join(score_cols) + score_transition_join = ( + f"LEFT JOIN sqlite_scan('{self.config.infile}', 'SCORE_TRANSITION') AS SCORE_TRANSITION " + f"ON FEATURE_TRANSITION.TRANSITION_ID = SCORE_TRANSITION.TRANSITION_ID " + f"AND FEATURE_TRANSITION.FEATURE_ID = SCORE_TRANSITION.FEATURE_ID" + ) + return score_transition_cols, score_transition_join + def _build_transition_query(self, column_info: dict) -> str: """Build SQL query for transition data""" feature_transition_cols_sql = ", ".join( @@ -1366,6 +1628,9 @@ def _build_transition_query(self, column_info: dict) -> str: else "TRANSITION.TYPE || CAST(TRANSITION.ORDINAL AS VARCHAR) || '^' || CAST(TRANSITION.CHARGE AS VARCHAR)" ) + # Add transition score columns if they exist + score_transition_cols, score_transition_join = self._build_transition_score_columns_and_join(column_info) + return f""" SELECT FEATURE.RUN_ID AS RUN_ID, @@ -1383,6 +1648,7 @@ def _build_transition_query(self, column_info: dict) -> str: TRANSITION.DECOY AS TRANSITION_DECOY, FEATURE.ID AS FEATURE_ID, {feature_transition_cols_sql} + {score_transition_cols} FROM sqlite_scan('{self.config.infile}', 'TRANSITION') AS TRANSITION FULL JOIN sqlite_scan('{self.config.infile}', 'TRANSITION_PRECURSOR_MAPPING') AS TRANSITION_PRECURSOR_MAPPING ON TRANSITION.ID = TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID @@ -1395,6 +1661,7 @@ def _build_transition_query(self, column_info: dict) -> str: FROM sqlite_scan('{self.config.infile}', 'FEATURE') ) AS FEATURE ON FEATURE_TRANSITION.FEATURE_ID = FEATURE.ID + {score_transition_join} """ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: @@ -1414,6 +1681,16 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: for col in column_info["feature_transition_cols"] ) + # Get score columns for precursor level + score_cols_select, score_table_joins, score_column_views = ( + self._build_score_column_selection_and_joins(column_info) + ) + + # Add NULL columns for transition score columns + as_null_transition_score_cols = "" + if column_info.get("score_transition_exists", False): + as_null_transition_score_cols = ", NULL AS SCORE_TRANSITION_SCORE, NULL AS SCORE_TRANSITION_RANK, NULL AS SCORE_TRANSITION_P_VALUE, NULL AS SCORE_TRANSITION_Q_VALUE, NULL AS SCORE_TRANSITION_PEP" + # First get the peptide table and process it with pyopenms logger.info("Generating peptide unimod to codename mapping") with sqlite3.connect(self.config.infile) as sql_conn: @@ -1481,6 +1758,7 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: -- JOIN ipf_groups g USING (NORMALIZED_SEQUENCE) --) + {score_column_views} SELECT PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID AS PROTEIN_ID, PEPTIDE.ID AS PEPTIDE_ID, @@ -1523,7 +1801,9 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: NULL AS TRANSITION_DETECTING, NULL AS TRANSITION_LIBRARY_INTENSITY, NULL AS TRANSITION_DECOY, - {as_null_feature_transition_cols_sql} + {as_null_feature_transition_cols_sql}, + {score_cols_select} + {as_null_transition_score_cols} FROM sqlite_scan('{self.config.infile}', 'PRECURSOR') AS PRECURSOR INNER JOIN sqlite_scan('{self.config.infile}', 'PRECURSOR_PEPTIDE_MAPPING') AS PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID @@ -1544,6 +1824,7 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: ON FEATURE.ID = FEATURE_MS2.FEATURE_ID INNER JOIN sqlite_scan('{self.config.infile}', 'RUN') AS RUN ON FEATURE.RUN_ID = RUN.ID + {score_table_joins} """ def _build_combined_transition_query(self, column_info: dict) -> str: @@ -1567,6 +1848,25 @@ def _build_combined_transition_query(self, column_info: dict) -> str: else "TRANSITION.TYPE || CAST(TRANSITION.ORDINAL AS VARCHAR) || '^' || CAST(TRANSITION.CHARGE AS VARCHAR)" ) + # Add transition score columns if they exist + score_transition_cols, score_transition_join = self._build_transition_score_columns_and_join(column_info) + + # Also need to add NULL columns for score columns that appear in precursor query + as_null_score_cols = "" + if column_info.get("score_ms1_exists", False): + as_null_score_cols += ", NULL AS SCORE_MS1_SCORE, NULL AS SCORE_MS1_RANK, NULL AS SCORE_MS1_P_VALUE, NULL AS SCORE_MS1_Q_VALUE, NULL AS SCORE_MS1_PEP" + if column_info.get("score_ms2_exists", False): + as_null_score_cols += ", NULL AS SCORE_MS2_SCORE, NULL AS SCORE_MS2_PEAK_GROUP_RANK, NULL AS SCORE_MS2_P_VALUE, NULL AS SCORE_MS2_Q_VALUE, NULL AS SCORE_MS2_PEP" + if column_info.get("score_ipf_exists", False): + as_null_score_cols += ", NULL AS SCORE_IPF_PRECURSOR_PEAKGROUP_PEP, NULL AS SCORE_IPF_PEP, NULL AS SCORE_IPF_QVALUE" + + # Add NULL columns for peptide and protein score contexts + for table in ["peptide", "protein"]: + if column_info.get(f"score_{table}_exists", False): + for context in column_info.get(f"score_{table}_contexts", []): + safe_context = context.upper().replace("-", "_") + as_null_score_cols += f", NULL AS SCORE_{table.upper()}_{safe_context}_SCORE, NULL AS SCORE_{table.upper()}_{safe_context}_P_VALUE, NULL AS SCORE_{table.upper()}_{safe_context}_Q_VALUE, NULL AS SCORE_{table.upper()}_{safe_context}_PEP" + return f""" SELECT NULL AS PROTEIN_ID, @@ -1611,6 +1911,8 @@ def _build_combined_transition_query(self, column_info: dict) -> str: TRANSITION.LIBRARY_INTENSITY AS TRANSITION_LIBRARY_INTENSITY, TRANSITION.DECOY AS TRANSITION_DECOY, {feature_transition_cols_sql} + {as_null_score_cols} + {score_transition_cols} FROM sqlite_scan('{self.config.infile}', 'TRANSITION_PRECURSOR_MAPPING') AS TRANSITION_PRECURSOR_MAPPING INNER JOIN sqlite_scan('{self.config.infile}', 'TRANSITION') AS TRANSITION ON TRANSITION_PRECURSOR_MAPPING.TRANSITION_ID = TRANSITION.ID @@ -1618,6 +1920,7 @@ def _build_combined_transition_query(self, column_info: dict) -> str: ON TRANSITION.ID = TRANSITION_PEPTIDE_MAPPING.TRANSITION_ID FULL JOIN sqlite_scan('{self.config.infile}', 'FEATURE_TRANSITION') AS FEATURE_TRANSITION ON TRANSITION.ID = FEATURE_TRANSITION.TRANSITION_ID + {score_transition_join} """ def _create_temp_table(self, conn, column_info: dict) -> None: @@ -1635,6 +1938,56 @@ def _create_temp_table(self, conn, column_info: dict) -> None: for col in column_info["feature_transition_cols"] ) + # Build score column types + score_cols_types = [] + if column_info.get("score_ms1_exists", False): + score_cols_types.extend([ + "SCORE_MS1_SCORE DOUBLE", + "SCORE_MS1_RANK INTEGER", + "SCORE_MS1_P_VALUE DOUBLE", + "SCORE_MS1_Q_VALUE DOUBLE", + "SCORE_MS1_PEP DOUBLE" + ]) + if column_info.get("score_ms2_exists", False): + score_cols_types.extend([ + "SCORE_MS2_SCORE DOUBLE", + "SCORE_MS2_PEAK_GROUP_RANK INTEGER", + "SCORE_MS2_P_VALUE DOUBLE", + "SCORE_MS2_Q_VALUE DOUBLE", + "SCORE_MS2_PEP DOUBLE" + ]) + if column_info.get("score_ipf_exists", False): + score_cols_types.extend([ + "SCORE_IPF_PRECURSOR_PEAKGROUP_PEP DOUBLE", + "SCORE_IPF_PEP DOUBLE", + "SCORE_IPF_QVALUE DOUBLE" + ]) + + # Add peptide and protein score columns for each context + for table in ["peptide", "protein"]: + if column_info.get(f"score_{table}_exists", False): + for context in column_info.get(f"score_{table}_contexts", []): + safe_context = context.upper().replace("-", "_") + score_cols_types.extend([ + f"SCORE_{table.upper()}_{safe_context}_SCORE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_P_VALUE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_Q_VALUE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_PEP DOUBLE" + ]) + + # Add transition score columns + if column_info.get("score_transition_exists", False): + score_cols_types.extend([ + "SCORE_TRANSITION_SCORE DOUBLE", + "SCORE_TRANSITION_RANK INTEGER", + "SCORE_TRANSITION_P_VALUE DOUBLE", + "SCORE_TRANSITION_Q_VALUE DOUBLE", + "SCORE_TRANSITION_PEP DOUBLE" + ]) + + # Prepend comma and space to score columns if there are any + score_cols_types_sql = (", " + ", ".join(score_cols_types)) if score_cols_types else "" + create_temp_table_query = f""" CREATE TABLE temp_table ( PROTEIN_ID BIGINT, @@ -1679,6 +2032,7 @@ def _create_temp_table(self, conn, column_info: dict) -> None: TRANSITION_LIBRARY_INTENSITY DOUBLE, TRANSITION_DECOY BOOLEAN, {feature_transition_cols_types} + {score_cols_types_sql} ); """ @@ -1817,6 +2171,7 @@ def _get_peptide_protein_score_table(self, level, contexts: list) -> str: if global_exists: glob_query = f""" SELECT {id_col}, + RUN_ID, SCORE as {score_table}_GLOBAL_SCORE, PVALUE as {score_table}_GLOBAL_PVALUE, QVALUE as {score_table}_GLOBAL_QVALUE, @@ -1835,7 +2190,7 @@ def _get_peptide_protein_score_table(self, level, contexts: list) -> str: g.{score_table}_GLOBAL_QVALUE, g.{score_table}_GLOBAL_PEP FROM ({non_global_query}) ng - LEFT JOIN ({glob_query}) g ON ng.{id_col} = g.{id_col} + LEFT JOIN ({glob_query}) g ON ng.{id_col} = g.{id_col} AND ng.RUN_ID = g.RUN_ID """ elif pivot_cols_str and not global_exists: # Only non-global contexts exist @@ -1880,6 +2235,15 @@ def _build_score_column_selection_and_joins( f"INNER JOIN sqlite_scan('{self.config.infile}', 'SCORE_MS2') AS SCORE_MS2 ON FEATURE.ID = SCORE_MS2.FEATURE_ID" ) + if column_info["score_ipf_exists"]: + logger.debug("SCORE_IPF table exists, adding score columns to selection") + score_columns_to_select.append( + "SCORE_IPF.PRECURSOR_PEAKGROUP_PEP AS SCORE_IPF_PRECURSOR_PEAKGROUP_PEP, SCORE_IPF.PEP AS SCORE_IPF_PEP, SCORE_IPF.QVALUE AS SCORE_IPF_QVALUE" + ) + score_tables_to_join.append( + f"LEFT JOIN sqlite_scan('{self.config.infile}', 'SCORE_IPF') AS SCORE_IPF ON FEATURE.ID = SCORE_IPF.FEATURE_ID" + ) + # Create views for peptide and protein score tables if they exist if column_info["score_peptide_exists"]: logger.debug("SCORE_PEPTIDE table exists, adding score table view to query") @@ -1890,7 +2254,7 @@ def _build_score_column_selection_and_joins( ) # Add JOIN for peptide score view score_tables_to_join.append( - "INNER JOIN score_peptide_view ON PEPTIDE.ID = score_peptide_view.PEPTIDE_ID AND FEATURE.RUN_ID = score_peptide_view.RUN_ID" + "LEFT JOIN score_peptide_view ON PEPTIDE.ID = score_peptide_view.PEPTIDE_ID AND FEATURE.RUN_ID = score_peptide_view.RUN_ID" ) if column_info["score_protein_exists"]: logger.debug("SCORE_PROTEIN table exists, adding score table view to query") @@ -1901,7 +2265,7 @@ def _build_score_column_selection_and_joins( ) # Add JOIN for protein score view score_tables_to_join.append( - "INNER JOIN score_protein_view ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = score_protein_view.PROTEIN_ID AND FEATURE.RUN_ID = score_protein_view.RUN_ID" + "LEFT JOIN score_protein_view ON PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID = score_protein_view.PROTEIN_ID AND FEATURE.RUN_ID = score_protein_view.RUN_ID" ) # Add score columns for peptide and protein contexts diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 89d29b7b..bc33c7d0 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -4,7 +4,7 @@ from ..._config import ExportIOConfig from .._base import BaseParquetReader, BaseParquetWriter -from ..util import get_parquet_column_names +from ..util import get_parquet_column_names, _ensure_pyarrow class ParquetReader(BaseParquetReader): @@ -773,6 +773,135 @@ def _read_for_export_scored_report(self, con) -> pd.DataFrame: return df + def export_feature_scores(self, outfile: str, plot_callback): + """ + Export feature scores from Parquet file for plotting. + + Detects if SCORE columns exist and adjusts behavior: + - If SCORE columns exist: applies RANK==1 filtering and plots SCORE + VAR_ columns + - If SCORE columns don't exist: plots only VAR_ columns + + Parameters + ---------- + outfile : str + Path to the output PDF file. + plot_callback : callable + Function to call for plotting each level's data. + Signature: plot_callback(df, outfile, level, append) + """ + logger.info(f"Reading parquet file: {self.infile}") + # Ensure pyarrow is available + pa, _, _ = _ensure_pyarrow() + + # First, read only column names to identify what to load + parquet_file = pa.parquet.ParquetFile(self.infile) + all_columns = parquet_file.schema.names + + # Check for SCORE columns + score_cols = [col for col in all_columns if col.startswith("SCORE_")] + has_scores = len(score_cols) > 0 + + if has_scores: + logger.info("SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + else: + logger.info("No SCORE columns detected - plotting only VAR_ columns") + + # Identify columns to read for each level + ms1_cols = [col for col in all_columns if col.startswith("FEATURE_MS1_VAR_")] + ms2_cols = [col for col in all_columns if col.startswith("FEATURE_MS2_VAR_")] + transition_cols = [col for col in all_columns if col.startswith("FEATURE_TRANSITION_VAR_")] + + # Determine which columns to read (only what we need) + cols_to_read = set() + + # Add SCORE columns if they exist + if has_scores: + cols_to_read.update(score_cols) + # Add RANK column for filtering + if "SCORE_MS2_PEAK_GROUP_RANK" in all_columns: + cols_to_read.add("SCORE_MS2_PEAK_GROUP_RANK") + # Add ID columns for grouping + if "RUN_ID" in all_columns: + cols_to_read.add("RUN_ID") + if "PRECURSOR_ID" in all_columns: + cols_to_read.add("PRECURSOR_ID") + + if ms1_cols and "PRECURSOR_DECOY" in all_columns: + cols_to_read.update(ms1_cols) + cols_to_read.add("PRECURSOR_DECOY") + if ms2_cols and "PRECURSOR_DECOY" in all_columns: + cols_to_read.update(ms2_cols) + cols_to_read.add("PRECURSOR_DECOY") + if transition_cols and "TRANSITION_DECOY" in all_columns: + cols_to_read.update(transition_cols) + cols_to_read.add("TRANSITION_DECOY") + + if not cols_to_read: + logger.warning("No VAR_ columns found in parquet file") + return + + # Read only the columns we need + logger.info(f"Reading {len(cols_to_read)} columns from parquet file") + df = pd.read_parquet(self.infile, columns=list(cols_to_read)) + + # Apply RANK==1 filter if SCORE columns exist + if has_scores and 'SCORE_MS2_PEAK_GROUP_RANK' in df.columns: + logger.info(f"Filtering to RANK==1: {len(df)} -> ", end="") + df = df[df['SCORE_MS2_PEAK_GROUP_RANK'] == 1].copy() + logger.info(f"{len(df)} rows") + + # Generate GROUP_ID if needed + if has_scores and 'GROUP_ID' not in df.columns: + if 'RUN_ID' in df.columns and 'PRECURSOR_ID' in df.columns: + df['GROUP_ID'] = df['RUN_ID'].astype(str) + '_' + df['PRECURSOR_ID'].astype(str) + + # Process MS1 level + if ms1_cols and "PRECURSOR_DECOY" in df.columns: + logger.info("Processing MS1 level feature scores") + select_cols = ms1_cols + ["PRECURSOR_DECOY"] + # Add SCORE columns if present + if has_scores: + score_ms1_cols = [col for col in score_cols if 'MS1' in col.upper()] + select_cols.extend(score_ms1_cols) + if 'GROUP_ID' in df.columns: + select_cols.append('GROUP_ID') + ms1_df = df[select_cols].copy() + ms1_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) + plot_callback(ms1_df, outfile, "ms1", append=False) + del ms1_df # Free memory + + # Process MS2 level + if ms2_cols and "PRECURSOR_DECOY" in df.columns: + logger.info("Processing MS2 level feature scores") + select_cols = ms2_cols + ["PRECURSOR_DECOY"] + # Add SCORE columns if present + if has_scores: + score_ms2_cols = [col for col in score_cols if 'MS2' in col.upper() or 'MS1' not in col.upper()] + select_cols.extend(score_ms2_cols) + if 'GROUP_ID' in df.columns: + select_cols.append('GROUP_ID') + ms2_df = df[select_cols].copy() + ms2_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) + append = bool(ms1_cols) + plot_callback(ms2_df, outfile, "ms2", append=append) + del ms2_df # Free memory + + # Process transition level + if transition_cols and "TRANSITION_DECOY" in df.columns: + logger.info("Processing transition level feature scores") + select_cols = transition_cols + ["TRANSITION_DECOY"] + # Add SCORE columns if present + if has_scores: + score_transition_cols = [col for col in score_cols if 'TRANSITION' in col.upper()] + select_cols.extend(score_transition_cols) + if 'GROUP_ID' in df.columns: + select_cols.append('GROUP_ID') + transition_df = df[select_cols].copy() + transition_df.rename(columns={"TRANSITION_DECOY": "DECOY"}, inplace=True) + append = bool(ms1_cols or ms2_cols) + plot_callback(transition_df, outfile, "transition", append=append) + del transition_df # Free memory + class ParquetWriter(BaseParquetWriter): """ diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 76ea0609..aad02c66 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -4,7 +4,7 @@ import duckdb from loguru import logger -from ..util import get_parquet_column_names +from ..util import get_parquet_column_names, _ensure_pyarrow from .._base import BaseSplitParquetReader, BaseSplitParquetWriter from ..._config import ExportIOConfig @@ -839,6 +839,169 @@ def _build_feature_vars_sql(self) -> str: return ", " + ", ".join(feature_vars) if feature_vars else "" + def export_feature_scores(self, outfile: str, plot_callback): + """ + Export feature scores from split Parquet directory for plotting. + + Detects if SCORE columns exist and adjusts behavior: + - If SCORE columns exist: applies RANK==1 filtering and plots SCORE + VAR_ columns + - If SCORE columns don't exist: plots only VAR_ columns + + Parameters + ---------- + outfile : str + Path to the output PDF file. + plot_callback : callable + Function to call for plotting each level's data. + Signature: plot_callback(df, outfile, level, append) + """ + # Ensure pyarrow is available + pa, _, _ = _ensure_pyarrow() + + # Read precursor features - only necessary columns + precursor_file = os.path.join(self.infile, "precursors_features.parquet") + logger.info(f"Reading precursor features from: {precursor_file}") + + # First check what columns are available + precursor_parquet = pa.parquet.ParquetFile(precursor_file) + all_columns = precursor_parquet.schema.names + + # Check for SCORE columns + score_cols = [col for col in all_columns if col.startswith("SCORE_")] + has_scores = len(score_cols) > 0 + + if has_scores: + logger.info("SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + else: + logger.info("No SCORE columns detected - plotting only VAR_ columns") + + # Identify columns to read + ms1_cols = [col for col in all_columns if col.startswith("FEATURE_MS1_VAR_")] + ms2_cols = [col for col in all_columns if col.startswith("FEATURE_MS2_VAR_")] + + cols_to_read = set() + + # Add SCORE columns if they exist + if has_scores: + cols_to_read.update(score_cols) + # Add RANK column for filtering + if "SCORE_MS2_PEAK_GROUP_RANK" in all_columns: + cols_to_read.add("SCORE_MS2_PEAK_GROUP_RANK") + # Add ID columns for grouping + if "RUN_ID" in all_columns: + cols_to_read.add("RUN_ID") + if "PRECURSOR_ID" in all_columns: + cols_to_read.add("PRECURSOR_ID") + + if ms1_cols and "PRECURSOR_DECOY" in all_columns: + cols_to_read.update(ms1_cols) + cols_to_read.add("PRECURSOR_DECOY") + if ms2_cols and "PRECURSOR_DECOY" in all_columns: + cols_to_read.update(ms2_cols) + cols_to_read.add("PRECURSOR_DECOY") + + if cols_to_read: + logger.info(f"Reading {len(cols_to_read)} columns from precursor features") + df_precursor = pd.read_parquet(precursor_file, columns=list(cols_to_read)) + + # Apply RANK==1 filter if SCORE columns exist + if has_scores and 'SCORE_MS2_PEAK_GROUP_RANK' in df_precursor.columns: + logger.info(f"Filtering to RANK==1: {len(df_precursor)} -> ", end="") + df_precursor = df_precursor[df_precursor['SCORE_MS2_PEAK_GROUP_RANK'] == 1].copy() + logger.info(f"{len(df_precursor)} rows") + + # Generate GROUP_ID if needed + if has_scores and 'GROUP_ID' not in df_precursor.columns: + if 'RUN_ID' in df_precursor.columns and 'PRECURSOR_ID' in df_precursor.columns: + df_precursor['GROUP_ID'] = df_precursor['RUN_ID'].astype(str) + '_' + df_precursor['PRECURSOR_ID'].astype(str) + + # Process MS1 level + if ms1_cols and "PRECURSOR_DECOY" in df_precursor.columns: + logger.info("Processing MS1 level feature scores") + select_cols = ms1_cols + ["PRECURSOR_DECOY"] + # Add SCORE columns if present + if has_scores: + score_ms1_cols = [col for col in score_cols if 'MS1' in col.upper()] + select_cols.extend(score_ms1_cols) + if 'GROUP_ID' in df_precursor.columns: + select_cols.append('GROUP_ID') + ms1_df = df_precursor[select_cols].copy() + ms1_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) + plot_callback(ms1_df, outfile, "ms1", append=False) + del ms1_df # Free memory + + # Process MS2 level + if ms2_cols and "PRECURSOR_DECOY" in df_precursor.columns: + logger.info("Processing MS2 level feature scores") + select_cols = ms2_cols + ["PRECURSOR_DECOY"] + # Add SCORE columns if present + if has_scores: + score_ms2_cols = [col for col in score_cols if 'MS2' in col.upper() or 'MS1' not in col.upper()] + select_cols.extend(score_ms2_cols) + if 'GROUP_ID' in df_precursor.columns: + select_cols.append('GROUP_ID') + ms2_df = df_precursor[select_cols].copy() + ms2_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) + append = bool(ms1_cols) + plot_callback(ms2_df, outfile, "ms2", append=append) + del ms2_df # Free memory + + del df_precursor # Free memory + + # Read transition features if available + transition_file = os.path.join(self.infile, "transition_features.parquet") + if os.path.exists(transition_file): + logger.info(f"Reading transition features from: {transition_file}") + + # Check what columns are available + transition_parquet = pa.parquet.ParquetFile(transition_file) + transition_all_columns = transition_parquet.schema.names + transition_cols = [col for col in transition_all_columns if col.startswith("FEATURE_TRANSITION_VAR_")] + + # Check for SCORE columns in transition file + transition_score_cols = [col for col in transition_all_columns if col.startswith("SCORE_") and 'TRANSITION' in col.upper()] + has_transition_scores = len(transition_score_cols) > 0 + + if transition_cols and "TRANSITION_DECOY" in transition_all_columns: + # Read only necessary columns + cols_to_read = transition_cols + ["TRANSITION_DECOY"] + if has_transition_scores: + cols_to_read.extend(transition_score_cols) + if 'GROUP_ID' in transition_all_columns: + cols_to_read.append('GROUP_ID') + + logger.info(f"Reading {len(cols_to_read)} columns from transition features") + df_transition = pd.read_parquet(transition_file, columns=cols_to_read) + + logger.info("Processing transition level feature scores") + transition_df = df_transition.copy() + transition_df.rename(columns={"TRANSITION_DECOY": "DECOY"}, inplace=True) + append = bool(ms1_cols or ms2_cols) + plot_callback(transition_df, outfile, "transition", append=append) + del transition_df, df_transition # Free memory + + # Read alignment features if available + alignment_file = os.path.join(self.infile, "feature_alignment.parquet") + if os.path.exists(alignment_file): + logger.info(f"Reading alignment features from: {alignment_file}") + + # Check what columns are available + alignment_parquet = pa.parquet.ParquetFile(alignment_file) + alignment_all_columns = alignment_parquet.schema.names + var_cols = [col for col in alignment_all_columns if col.startswith("VAR_")] + + if var_cols and "DECOY" in alignment_all_columns: + # Read only necessary columns + cols_to_read = var_cols + ["DECOY"] + logger.info(f"Reading {len(cols_to_read)} columns from alignment features") + df_alignment = pd.read_parquet(alignment_file, columns=cols_to_read) + + logger.info("Processing alignment level feature scores") + alignment_df = df_alignment[var_cols + ["DECOY"]].copy() + append = bool(ms1_cols or ms2_cols or (os.path.exists(transition_file) and transition_cols)) + plot_callback(alignment_df, outfile, "alignment", append=append) + del alignment_df, df_alignment # Free memory + class SplitParquetWriter(BaseSplitParquetWriter): """ diff --git a/pyprophet/report.py b/pyprophet/report.py index ecd2534e..681e75a0 100644 --- a/pyprophet/report.py +++ b/pyprophet/report.py @@ -853,11 +853,12 @@ def plot_scores(df, out, color_palette="normal"): "Error: The matplotlib package is required to create a report." ) - score_columns = ( - ["SCORE"] - + [c for c in df.columns if c.startswith("MAIN_VAR_")] - + [c for c in df.columns if c.startswith("VAR_")] - ) + # Build score_columns list, only including SCORE if it exists + score_columns = [] + if "SCORE" in df.columns: + score_columns.append("SCORE") + score_columns += [c for c in df.columns if c.startswith("MAIN_VAR_")] + score_columns += [c for c in df.columns if c.startswith("VAR_")] t_col, d_col = color_blind_friendly(color_palette) diff --git a/pyprophet/scoring/_optimized.c b/pyprophet/scoring/_optimized.c index e1df89f0..70c9b4f9 100644 --- a/pyprophet/scoring/_optimized.c +++ b/pyprophet/scoring/_optimized.c @@ -1383,7 +1383,7 @@ static const char *__pyx_filename; static const char* const __pyx_f[] = { "pyprophet/scoring/_optimized.pyx", "", - "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd", + "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd", "cpython/type.pxd", }; /* #### Code section: utility_code_proto_before_types ### */ @@ -1606,7 +1606,7 @@ typedef struct { /* #### Code section: numeric_typedefs ### */ -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":743 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":743 * # in Cython to enable them only on the right systems. * * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< @@ -1615,7 +1615,7 @@ typedef struct { */ typedef npy_int8 __pyx_t_5numpy_int8_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":744 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":744 * * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< @@ -1624,7 +1624,7 @@ typedef npy_int8 __pyx_t_5numpy_int8_t; */ typedef npy_int16 __pyx_t_5numpy_int16_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":745 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":745 * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< @@ -1633,7 +1633,7 @@ typedef npy_int16 __pyx_t_5numpy_int16_t; */ typedef npy_int32 __pyx_t_5numpy_int32_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":746 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":746 * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< @@ -1642,7 +1642,7 @@ typedef npy_int32 __pyx_t_5numpy_int32_t; */ typedef npy_int64 __pyx_t_5numpy_int64_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":748 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":748 * ctypedef npy_int64 int64_t * * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< @@ -1651,7 +1651,7 @@ typedef npy_int64 __pyx_t_5numpy_int64_t; */ typedef npy_uint8 __pyx_t_5numpy_uint8_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":749 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":749 * * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< @@ -1660,7 +1660,7 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t; */ typedef npy_uint16 __pyx_t_5numpy_uint16_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":750 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":750 * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< @@ -1669,7 +1669,7 @@ typedef npy_uint16 __pyx_t_5numpy_uint16_t; */ typedef npy_uint32 __pyx_t_5numpy_uint32_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":751 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":751 * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< @@ -1678,7 +1678,7 @@ typedef npy_uint32 __pyx_t_5numpy_uint32_t; */ typedef npy_uint64 __pyx_t_5numpy_uint64_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":753 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":753 * ctypedef npy_uint64 uint64_t * * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< @@ -1687,7 +1687,7 @@ typedef npy_uint64 __pyx_t_5numpy_uint64_t; */ typedef npy_float32 __pyx_t_5numpy_float32_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":754 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":754 * * ctypedef npy_float32 float32_t * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< @@ -1696,7 +1696,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t; */ typedef npy_float64 __pyx_t_5numpy_float64_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":761 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":761 * ctypedef double complex complex128_t * * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< @@ -1705,7 +1705,7 @@ typedef npy_float64 __pyx_t_5numpy_float64_t; */ typedef npy_longlong __pyx_t_5numpy_longlong_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":762 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":762 * * ctypedef npy_longlong longlong_t * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< @@ -1714,7 +1714,7 @@ typedef npy_longlong __pyx_t_5numpy_longlong_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":764 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":764 * ctypedef npy_ulonglong ulonglong_t * * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< @@ -1723,7 +1723,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; */ typedef npy_intp __pyx_t_5numpy_intp_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":765 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":765 * * ctypedef npy_intp intp_t * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< @@ -1732,7 +1732,7 @@ typedef npy_intp __pyx_t_5numpy_intp_t; */ typedef npy_uintp __pyx_t_5numpy_uintp_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":767 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":767 * ctypedef npy_uintp uintp_t * * ctypedef npy_double float_t # <<<<<<<<<<<<<< @@ -1741,7 +1741,7 @@ typedef npy_uintp __pyx_t_5numpy_uintp_t; */ typedef npy_double __pyx_t_5numpy_float_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":768 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":768 * * ctypedef npy_double float_t * ctypedef npy_double double_t # <<<<<<<<<<<<<< @@ -1750,7 +1750,7 @@ typedef npy_double __pyx_t_5numpy_float_t; */ typedef npy_double __pyx_t_5numpy_double_t; -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":769 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":769 * ctypedef npy_double float_t * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< @@ -17271,7 +17271,7 @@ static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":242 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":242 * cdef int type_num * * @property # <<<<<<<<<<<<<< @@ -17282,7 +17282,7 @@ static PyObject *__pyx_unpickle_Enum__set_state(struct __pyx_MemviewEnum_obj *__ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_Descr *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":244 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":244 * @property * cdef inline npy_intp itemsize(self) noexcept nogil: * return PyDataType_ELSIZE(self) # <<<<<<<<<<<<<< @@ -17292,7 +17292,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_D __pyx_r = PyDataType_ELSIZE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":242 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":242 * cdef int type_num * * @property # <<<<<<<<<<<<<< @@ -17305,7 +17305,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_D return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":246 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":246 * return PyDataType_ELSIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17316,7 +17316,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_8itemsize_itemsize(PyArray_D static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray_Descr *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":248 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":248 * @property * cdef inline npy_intp alignment(self) noexcept nogil: * return PyDataType_ALIGNMENT(self) # <<<<<<<<<<<<<< @@ -17326,7 +17326,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray __pyx_r = PyDataType_ALIGNMENT(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":246 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":246 * return PyDataType_ELSIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17339,7 +17339,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_5dtype_9alignment_alignment(PyArray return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":252 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":252 * # Use fields/names with care as they may be NULL. You must check * # for this using PyDataType_HASFIELDS. * @property # <<<<<<<<<<<<<< @@ -17353,7 +17353,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Desc PyObject *__pyx_t_1; __Pyx_RefNannySetupContext("fields", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":254 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":254 * @property * cdef inline object fields(self): * return PyDataType_FIELDS(self) # <<<<<<<<<<<<<< @@ -17366,7 +17366,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Desc __pyx_r = ((PyObject *)__pyx_t_1); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":252 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":252 * # Use fields/names with care as they may be NULL. You must check * # for this using PyDataType_HASFIELDS. * @property # <<<<<<<<<<<<<< @@ -17381,7 +17381,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_6fields_fields(PyArray_Desc return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":256 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":256 * return PyDataType_FIELDS(self) * * @property # <<<<<<<<<<<<<< @@ -17395,7 +17395,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr PyObject *__pyx_t_1; __Pyx_RefNannySetupContext("names", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":258 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":258 * @property * cdef inline tuple names(self): * return PyDataType_NAMES(self) # <<<<<<<<<<<<<< @@ -17408,7 +17408,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr __pyx_r = ((PyObject*)__pyx_t_1); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":256 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":256 * return PyDataType_FIELDS(self) * * @property # <<<<<<<<<<<<<< @@ -17423,7 +17423,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":263 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":263 * # valid (the pointer can be NULL). Most users should access * # this field via the inline helper method PyDataType_SHAPE. * @property # <<<<<<<<<<<<<< @@ -17434,7 +17434,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_5dtype_5names_names(PyArray_Descr static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarray(PyArray_Descr *__pyx_v_self) { PyArray_ArrayDescr *__pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":265 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":265 * @property * cdef inline PyArray_ArrayDescr* subarray(self) noexcept nogil: * return PyDataType_SUBARRAY(self) # <<<<<<<<<<<<<< @@ -17444,7 +17444,7 @@ static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarra __pyx_r = PyDataType_SUBARRAY(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":263 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":263 * # valid (the pointer can be NULL). Most users should access * # this field via the inline helper method PyDataType_SHAPE. * @property # <<<<<<<<<<<<<< @@ -17457,7 +17457,7 @@ static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarra return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":267 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":267 * return PyDataType_SUBARRAY(self) * * @property # <<<<<<<<<<<<<< @@ -17468,7 +17468,7 @@ static CYTHON_INLINE PyArray_ArrayDescr *__pyx_f_5numpy_5dtype_8subarray_subarra static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr *__pyx_v_self) { npy_uint64 __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":270 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":270 * cdef inline npy_uint64 flags(self) noexcept nogil: * """The data types flags.""" * return PyDataType_FLAGS(self) # <<<<<<<<<<<<<< @@ -17478,7 +17478,7 @@ static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr __pyx_r = PyDataType_FLAGS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":267 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":267 * return PyDataType_SUBARRAY(self) * * @property # <<<<<<<<<<<<<< @@ -17491,7 +17491,7 @@ static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":279 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":279 * ctypedef class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]: * * @property # <<<<<<<<<<<<<< @@ -17502,7 +17502,7 @@ static CYTHON_INLINE npy_uint64 __pyx_f_5numpy_5dtype_5flags_flags(PyArray_Descr static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMultiIterObject *__pyx_v_self) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":282 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":282 * cdef inline int numiter(self) noexcept nogil: * """The number of arrays that need to be broadcast to the same shape.""" * return PyArray_MultiIter_NUMITER(self) # <<<<<<<<<<<<<< @@ -17512,7 +17512,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMulti __pyx_r = PyArray_MultiIter_NUMITER(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":279 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":279 * ctypedef class numpy.broadcast [object PyArrayMultiIterObject, check_size ignore]: * * @property # <<<<<<<<<<<<<< @@ -17525,7 +17525,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMulti return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":284 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":284 * return PyArray_MultiIter_NUMITER(self) * * @property # <<<<<<<<<<<<<< @@ -17536,7 +17536,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_7numiter_numiter(PyArrayMulti static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiIterObject *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":287 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":287 * cdef inline npy_intp size(self) noexcept nogil: * """The total broadcasted size.""" * return PyArray_MultiIter_SIZE(self) # <<<<<<<<<<<<<< @@ -17546,7 +17546,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiI __pyx_r = PyArray_MultiIter_SIZE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":284 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":284 * return PyArray_MultiIter_NUMITER(self) * * @property # <<<<<<<<<<<<<< @@ -17559,7 +17559,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiI return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":289 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":289 * return PyArray_MultiIter_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17570,7 +17570,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_4size_size(PyArrayMultiI static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMultiIterObject *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":292 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":292 * cdef inline npy_intp index(self) noexcept nogil: * """The current (1-d) index into the broadcasted result.""" * return PyArray_MultiIter_INDEX(self) # <<<<<<<<<<<<<< @@ -17580,7 +17580,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMult __pyx_r = PyArray_MultiIter_INDEX(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":289 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":289 * return PyArray_MultiIter_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17593,7 +17593,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMult return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":294 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":294 * return PyArray_MultiIter_INDEX(self) * * @property # <<<<<<<<<<<<<< @@ -17604,7 +17604,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_9broadcast_5index_index(PyArrayMult static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject *__pyx_v_self) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":297 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":297 * cdef inline int nd(self) noexcept nogil: * """The number of dimensions in the broadcasted result.""" * return PyArray_MultiIter_NDIM(self) # <<<<<<<<<<<<<< @@ -17614,7 +17614,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject __pyx_r = PyArray_MultiIter_NDIM(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":294 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":294 * return PyArray_MultiIter_INDEX(self) * * @property # <<<<<<<<<<<<<< @@ -17627,7 +17627,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":299 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":299 * return PyArray_MultiIter_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17638,7 +17638,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_9broadcast_2nd_nd(PyArrayMultiIterObject static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions(PyArrayMultiIterObject *__pyx_v_self) { npy_intp *__pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":302 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":302 * cdef inline npy_intp* dimensions(self) noexcept nogil: * """The shape of the broadcasted result.""" * return PyArray_MultiIter_DIMS(self) # <<<<<<<<<<<<<< @@ -17648,7 +17648,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions __pyx_r = PyArray_MultiIter_DIMS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":299 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":299 * return PyArray_MultiIter_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17661,7 +17661,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":304 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":304 * return PyArray_MultiIter_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17672,7 +17672,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_9broadcast_10dimensions_dimensions static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiIterObject *__pyx_v_self) { void **__pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":308 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":308 * """An array of iterator objects that holds the iterators for the arrays to be broadcast together. * On return, the iterators are adjusted for broadcasting.""" * return PyArray_MultiIter_ITERS(self) # <<<<<<<<<<<<<< @@ -17682,7 +17682,7 @@ static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiI __pyx_r = PyArray_MultiIter_ITERS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":304 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":304 * return PyArray_MultiIter_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17695,7 +17695,7 @@ static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiI return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":322 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":322 * # Instead, we use properties that map to the corresponding C-API functions. * * @property # <<<<<<<<<<<<<< @@ -17706,7 +17706,7 @@ static CYTHON_INLINE void **__pyx_f_5numpy_9broadcast_5iters_iters(PyArrayMultiI static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self) { PyObject *__pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":326 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":326 * """Returns a borrowed reference to the object owning the data/memory. * """ * return PyArray_BASE(self) # <<<<<<<<<<<<<< @@ -17716,7 +17716,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject __pyx_r = PyArray_BASE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":322 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":322 * # Instead, we use properties that map to the corresponding C-API functions. * * @property # <<<<<<<<<<<<<< @@ -17729,7 +17729,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":328 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":328 * return PyArray_BASE(self) * * @property # <<<<<<<<<<<<<< @@ -17743,7 +17743,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray PyArray_Descr *__pyx_t_1; __Pyx_RefNannySetupContext("descr", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":332 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":332 * """Returns an owned reference to the dtype of the array. * """ * return PyArray_DESCR(self) # <<<<<<<<<<<<<< @@ -17756,7 +17756,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray __pyx_r = ((PyArray_Descr *)__pyx_t_1); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":328 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":328 * return PyArray_BASE(self) * * @property # <<<<<<<<<<<<<< @@ -17771,7 +17771,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":334 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":334 * return PyArray_DESCR(self) * * @property # <<<<<<<<<<<<<< @@ -17782,7 +17782,7 @@ static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArray static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":338 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":338 * """Returns the number of dimensions in the array. * """ * return PyArray_NDIM(self) # <<<<<<<<<<<<<< @@ -17792,7 +17792,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx __pyx_r = PyArray_NDIM(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":334 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":334 * return PyArray_DESCR(self) * * @property # <<<<<<<<<<<<<< @@ -17805,7 +17805,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":340 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":340 * return PyArray_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17816,7 +17816,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self) { npy_intp *__pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":346 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":346 * Can return NULL for 0-dimensional arrays. * """ * return PyArray_DIMS(self) # <<<<<<<<<<<<<< @@ -17826,7 +17826,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObjec __pyx_r = PyArray_DIMS(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":340 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":340 * return PyArray_NDIM(self) * * @property # <<<<<<<<<<<<<< @@ -17839,7 +17839,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObjec return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":348 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":348 * return PyArray_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17850,7 +17850,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObjec static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self) { npy_intp *__pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":353 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":353 * The number of elements matches the number of dimensions of the array (ndim). * """ * return PyArray_STRIDES(self) # <<<<<<<<<<<<<< @@ -17860,7 +17860,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayO __pyx_r = PyArray_STRIDES(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":348 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":348 * return PyArray_DIMS(self) * * @property # <<<<<<<<<<<<<< @@ -17873,7 +17873,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayO return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":355 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":355 * return PyArray_STRIDES(self) * * @property # <<<<<<<<<<<<<< @@ -17884,7 +17884,7 @@ static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayO static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self) { npy_intp __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":359 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":359 * """Returns the total size (in number of elements) of the array. * """ * return PyArray_SIZE(self) # <<<<<<<<<<<<<< @@ -17894,7 +17894,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject * __pyx_r = PyArray_SIZE(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":355 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":355 * return PyArray_STRIDES(self) * * @property # <<<<<<<<<<<<<< @@ -17907,7 +17907,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject * return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":361 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":361 * return PyArray_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17918,7 +17918,7 @@ static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject * static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self) { char *__pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":368 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":368 * of `PyArray_DATA()` instead, which returns a 'void*'. * """ * return PyArray_BYTES(self) # <<<<<<<<<<<<<< @@ -17928,7 +17928,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__p __pyx_r = PyArray_BYTES(__pyx_v_self); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":361 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":361 * return PyArray_SIZE(self) * * @property # <<<<<<<<<<<<<< @@ -17941,7 +17941,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__p return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":776 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":776 * ctypedef long double complex clongdouble_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -17958,7 +17958,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":777 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":777 * * cdef inline object PyArray_MultiIterNew1(a): * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< @@ -17972,7 +17972,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":776 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":776 * ctypedef long double complex clongdouble_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -17991,7 +17991,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":779 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":779 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -18008,7 +18008,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":780 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":780 * * cdef inline object PyArray_MultiIterNew2(a, b): * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< @@ -18022,7 +18022,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":779 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":779 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -18041,7 +18041,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":782 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":782 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -18058,7 +18058,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":783 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":783 * * cdef inline object PyArray_MultiIterNew3(a, b, c): * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< @@ -18072,7 +18072,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":782 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":782 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -18091,7 +18091,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":785 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":785 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -18108,7 +18108,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":786 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":786 * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< @@ -18122,7 +18122,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":785 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":785 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -18141,7 +18141,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":788 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":788 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -18158,7 +18158,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":789 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":789 * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< @@ -18172,7 +18172,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":788 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":788 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -18191,7 +18191,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":791 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":791 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< @@ -18206,7 +18206,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ PyObject *__pyx_t_2; __Pyx_RefNannySetupContext("PyDataType_SHAPE", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":792 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":792 * * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< @@ -18216,7 +18216,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ __pyx_t_1 = PyDataType_HASSUBARRAY(__pyx_v_d); if (__pyx_t_1) { - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":793 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":793 * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): * return d.subarray.shape # <<<<<<<<<<<<<< @@ -18229,7 +18229,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ __pyx_r = ((PyObject*)__pyx_t_2); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":792 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":792 * * cdef inline tuple PyDataType_SHAPE(dtype d): * if PyDataType_HASSUBARRAY(d): # <<<<<<<<<<<<<< @@ -18238,7 +18238,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ */ } - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":795 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":795 * return d.subarray.shape * else: * return () # <<<<<<<<<<<<<< @@ -18252,7 +18252,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ goto __pyx_L0; } - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":791 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":791 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline tuple PyDataType_SHAPE(dtype d): # <<<<<<<<<<<<<< @@ -18267,7 +18267,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":994 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":994 * int _import_umath() except -1 * * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< @@ -18281,7 +18281,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a const char *__pyx_filename = NULL; int __pyx_clineno = 0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":995 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":995 * * cdef inline void set_array_base(ndarray arr, object base) except *: * Py_INCREF(base) # important to do this before stealing the reference below! # <<<<<<<<<<<<<< @@ -18290,7 +18290,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_INCREF(__pyx_v_base); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":996 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":996 * cdef inline void set_array_base(ndarray arr, object base) except *: * Py_INCREF(base) # important to do this before stealing the reference below! * PyArray_SetBaseObject(arr, base) # <<<<<<<<<<<<<< @@ -18299,7 +18299,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ __pyx_t_1 = PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(2, 996, __pyx_L1_error) - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":994 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":994 * int _import_umath() except -1 * * cdef inline void set_array_base(ndarray arr, object base) except *: # <<<<<<<<<<<<<< @@ -18314,7 +18314,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __pyx_L0:; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":998 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":998 * PyArray_SetBaseObject(arr, base) * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -18329,7 +18329,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py int __pyx_t_1; __Pyx_RefNannySetupContext("get_array_base", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":999 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":999 * * cdef inline object get_array_base(ndarray arr): * base = PyArray_BASE(arr) # <<<<<<<<<<<<<< @@ -18338,7 +18338,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py */ __pyx_v_base = PyArray_BASE(__pyx_v_arr); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1000 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1000 * cdef inline object get_array_base(ndarray arr): * base = PyArray_BASE(arr) * if base is NULL: # <<<<<<<<<<<<<< @@ -18348,7 +18348,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_t_1 = (__pyx_v_base == NULL); if (__pyx_t_1) { - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1001 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1001 * base = PyArray_BASE(arr) * if base is NULL: * return None # <<<<<<<<<<<<<< @@ -18359,7 +18359,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_r = Py_None; __Pyx_INCREF(Py_None); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1000 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1000 * cdef inline object get_array_base(ndarray arr): * base = PyArray_BASE(arr) * if base is NULL: # <<<<<<<<<<<<<< @@ -18368,7 +18368,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py */ } - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1002 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1002 * if base is NULL: * return None * return base # <<<<<<<<<<<<<< @@ -18380,7 +18380,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_r = ((PyObject *)__pyx_v_base); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":998 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":998 * PyArray_SetBaseObject(arr, base) * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -18395,7 +18395,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1006 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1006 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -18422,7 +18422,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("import_array", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -18438,7 +18438,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1008 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1008 * cdef inline int import_array() except -1: * try: * __pyx_import_array() # <<<<<<<<<<<<<< @@ -18447,7 +18447,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { */ __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1008, __pyx_L3_error) - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -18461,7 +18461,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1009 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1009 * try: * __pyx_import_array() * except Exception: # <<<<<<<<<<<<<< @@ -18476,7 +18476,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __Pyx_XGOTREF(__pyx_t_6); __Pyx_XGOTREF(__pyx_t_7); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1010 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1010 * __pyx_import_array() * except Exception: * raise ImportError("numpy._core.multiarray failed to import") # <<<<<<<<<<<<<< @@ -18501,7 +18501,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { } goto __pyx_L5_except_error; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1007 * # Cython code. * cdef inline int import_array() except -1: * try: # <<<<<<<<<<<<<< @@ -18517,7 +18517,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { __pyx_L8_try_end:; } - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1006 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1006 * # Versions of the import_* functions which are more suitable for * # Cython code. * cdef inline int import_array() except -1: # <<<<<<<<<<<<<< @@ -18542,7 +18542,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) { return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1012 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1012 * raise ImportError("numpy._core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -18569,7 +18569,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("import_umath", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -18585,7 +18585,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1014 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1014 * cdef inline int import_umath() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< @@ -18594,7 +18594,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { */ __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1014, __pyx_L3_error) - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -18608,7 +18608,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1015 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1015 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -18623,7 +18623,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __Pyx_XGOTREF(__pyx_t_6); __Pyx_XGOTREF(__pyx_t_7); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1016 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1016 * _import_umath() * except Exception: * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< @@ -18648,7 +18648,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { } goto __pyx_L5_except_error; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1013 * * cdef inline int import_umath() except -1: * try: # <<<<<<<<<<<<<< @@ -18664,7 +18664,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { __pyx_L8_try_end:; } - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1012 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1012 * raise ImportError("numpy._core.multiarray failed to import") * * cdef inline int import_umath() except -1: # <<<<<<<<<<<<<< @@ -18689,7 +18689,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) { return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1018 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1018 * raise ImportError("numpy._core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -18716,7 +18716,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { int __pyx_clineno = 0; __Pyx_RefNannySetupContext("import_ufunc", 0); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -18732,7 +18732,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1020 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1020 * cdef inline int import_ufunc() except -1: * try: * _import_umath() # <<<<<<<<<<<<<< @@ -18741,7 +18741,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { */ __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(2, 1020, __pyx_L3_error) - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -18755,7 +18755,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { goto __pyx_L8_try_end; __pyx_L3_error:; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1021 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1021 * try: * _import_umath() * except Exception: # <<<<<<<<<<<<<< @@ -18770,7 +18770,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __Pyx_XGOTREF(__pyx_t_6); __Pyx_XGOTREF(__pyx_t_7); - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1022 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1022 * _import_umath() * except Exception: * raise ImportError("numpy._core.umath failed to import") # <<<<<<<<<<<<<< @@ -18795,7 +18795,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { } goto __pyx_L5_except_error; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1019 * * cdef inline int import_ufunc() except -1: * try: # <<<<<<<<<<<<<< @@ -18811,7 +18811,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { __pyx_L8_try_end:; } - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1018 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1018 * raise ImportError("numpy._core.umath failed to import") * * cdef inline int import_ufunc() except -1: # <<<<<<<<<<<<<< @@ -18836,7 +18836,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1025 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1025 * * * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18847,7 +18847,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) { static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1037 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1037 * bool * """ * return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) # <<<<<<<<<<<<<< @@ -18857,7 +18857,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_ __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type)); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1025 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1025 * * * cdef inline bint is_timedelta64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18870,7 +18870,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_ return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1040 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1040 * * * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18881,7 +18881,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) { int __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1052 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1052 * bool * """ * return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) # <<<<<<<<<<<<<< @@ -18891,7 +18891,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_o __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type)); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1040 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1040 * * * cdef inline bint is_datetime64_object(object obj) noexcept: # <<<<<<<<<<<<<< @@ -18904,7 +18904,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_o return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1055 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1055 * * * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18915,7 +18915,7 @@ static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_o static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) { npy_datetime __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1062 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1062 * also needed. That can be found using `get_datetime64_unit`. * """ * return (obj).obval # <<<<<<<<<<<<<< @@ -18925,7 +18925,7 @@ static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject * __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1055 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1055 * * * cdef inline npy_datetime get_datetime64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18938,7 +18938,7 @@ static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject * return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1065 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1065 * * * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18949,7 +18949,7 @@ static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject * static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) { npy_timedelta __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1069 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1069 * returns the int64 value underlying scalar numpy timedelta64 object * """ * return (obj).obval # <<<<<<<<<<<<<< @@ -18959,7 +18959,7 @@ static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval; goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1065 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1065 * * * cdef inline npy_timedelta get_timedelta64_value(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18972,7 +18972,7 @@ static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject return __pyx_r; } -/* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1072 +/* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1072 * * * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< @@ -18983,7 +18983,7 @@ static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) { NPY_DATETIMEUNIT __pyx_r; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1076 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1076 * returns the unit part of the dtype for a numpy datetime64 object. * """ * return (obj).obmeta.base # <<<<<<<<<<<<<< @@ -18993,7 +18993,7 @@ static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObjec __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base); goto __pyx_L0; - /* "../../../../../tmp/pip-build-env-7ngr7o2t/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1072 + /* "../../../../../tmp/pip-build-env-x3g6sxmd/overlay/local/lib/python3.12/dist-packages/numpy/__init__.cython-30.pxd":1072 * * * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) noexcept nogil: # <<<<<<<<<<<<<< diff --git a/requirements.txt b/requirements.txt index b0db78ab..14b8dc83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -159,7 +159,7 @@ pyopenms==3.4.0 # pyprophet (setup.py) pyparsing==3.2.3 # via matplotlib -pypdf==6.0.0 +pypdf==6.1.3 # via # pyprophet # pyprophet (setup.py) diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_ms1_ms2_transition.out b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_ms1_ms2_transition.out new file mode 100644 index 00000000..8235141f --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_ms1_ms2_transition.out @@ -0,0 +1,8 @@ +Created 4 PDF file(s) from multi-level scoring: + MS1 files: 1 + MS2 files: 1 + Transition files: 1 + - feature_scores.pdf + - test_data_ms1_report.pdf + - test_data_ms2_report.pdf + - test_data_transition_report.pdf diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_parquet_with_scores.out b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_parquet_with_scores.out new file mode 100644 index 00000000..9d69ad74 --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_parquet_with_scores.out @@ -0,0 +1 @@ +Successfully created feature scores from Parquet with SCORE columns diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_scored_osw.out b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_scored_osw.out new file mode 100644 index 00000000..14806759 --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_scored_osw.out @@ -0,0 +1,3 @@ +Created 2 PDF file(s) from scored OSW: + - feature_scores.pdf + - test_data_ms2_report.pdf diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_split_parquet_with_scores.out b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_split_parquet_with_scores.out new file mode 100644 index 00000000..30ac013a --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_split_parquet_with_scores.out @@ -0,0 +1 @@ +Successfully created feature scores from split Parquet with SCORE columns diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_unscored_osw.out b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_unscored_osw.out new file mode 100644 index 00000000..84d09e4d --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_feature_scores_unscored_osw.out @@ -0,0 +1,2 @@ +Created 1 PDF file(s): + - feature_scores.pdf diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out new file mode 100644 index 00000000..d26f2c01 --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out @@ -0,0 +1,13 @@ +Exported 3410 rows with 98 columns (no transition data) +Score columns found: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] + ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE +0 None -85.0733 NaN 1923.17 483971408708572459 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +1 None -55.2126 NaN 1953.03 6854889104354289238 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +2 None 9.7944 NaN 2018.03 2696300170322160855 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +3 None -130.8641 NaN 1877.37 8207933629855485114 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +4 None -268.6805 NaN 1739.56 745237666153652118 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +5 None -6.0218 NaN 3084.15 1082368609638691369 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3110.6101 -8670811102654834151 1 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +6 None 228.9700 NaN 3319.15 -1344271892660954750 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3339.3301 -8670811102654834151 5 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +7 None -124.5660 NaN 2965.61 -4515618252120499488 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2994.5300 -8670811102654834151 3 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +8 None 28.5831 NaN 3118.76 -4044853666210028406 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3131.0901 -8670811102654834151 2 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +9 None -184.3073 NaN 2905.87 7439833196907350500 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2929.6699 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out new file mode 100644 index 00000000..100164fd --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out @@ -0,0 +1,13 @@ +Exported 97964 rows with 98 columns +Score columns found: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] + ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE +0 None -85.0733 NaN 1923.17 4.8397e+17 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1946.4600 -8.6708e+18 1.0 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +1 None -55.2126 NaN 1953.03 6.8549e+18 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1977.1899 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +2 None 9.7944 NaN 2018.03 2.6963e+18 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 2024.9800 -8.6708e+18 2.0 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +3 None -130.8641 NaN 1877.37 8.2079e+18 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1898.6700 -8.6708e+18 3.0 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +4 None -268.6805 NaN 1739.56 7.4524e+17 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1762.1100 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +5 None -6.0218 NaN 3084.15 1.0824e+18 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3110.6101 -8.6708e+18 1.0 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +6 None 228.9700 NaN 3319.15 -1.3443e+18 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3339.3301 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +7 None -124.5660 NaN 2965.61 -4.5156e+18 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2994.5300 -8.6708e+18 3.0 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +8 None 28.5831 NaN 3118.76 -4.0449e+18 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3131.0901 -8.6708e+18 2.0 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +9 None -184.3073 NaN 2905.87 7.4398e+18 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2929.6699 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out new file mode 100644 index 00000000..0d886bc2 --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out @@ -0,0 +1,10 @@ +Precursor data: 3410 rows with 79 columns +Transition data: 96259 rows with 23 columns +Precursor score columns: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] +Precursor data sample: + DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE UNMODIFIED_SEQUENCE +0 -85.0733 NaN 1923.17 483971408708572459 192394.8869 935372.0 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1898.67 GIGDWSDSK(UniMod:259) 7.0277 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +1 -55.2126 NaN 1953.03 6854889104354289238 5696.7273 45882.6 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1946.46 GIGDWSDSK(UniMod:259) 7.8936 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +2 9.7944 NaN 2018.03 2696300170322160855 17401.1825 95751.8 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 2001.09 GIGDWSDSK(UniMod:259) 9.7785 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +3 -130.8641 NaN 1877.37 8207933629855485114 6239.5198 48788.5 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1857.70 GIGDWSDSK(UniMod:259) 5.7000 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +4 -268.6805 NaN 1739.56 745237666153652118 6493.7774 66798.4 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1717.73 GIGDWSDSK(UniMod:259) 1.7038 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out new file mode 100644 index 00000000..fe59caec --- /dev/null +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out @@ -0,0 +1,14 @@ +Exported 97965 rows with 103 columns +SCORE_IPF columns found: ['SCORE_IPF_PEP', 'SCORE_IPF_PRECURSOR_PEAKGROUP_PEP', 'SCORE_IPF_QVALUE'] +Sample data with IPF scores: + FEATURE_ID SCORE_IPF_PEP SCORE_IPF_PRECURSOR_PEAKGROUP_PEP SCORE_IPF_QVALUE +0 4.8397e+17 0.0000e+00 2.1927e-07 0.0000e+00 +1 1.0824e+18 0.0000e+00 9.9998e-08 0.0000e+00 +2 -1.1854e+18 0.0000e+00 4.5323e-08 0.0000e+00 +3 6.8070e+18 0.0000e+00 1.5978e-09 0.0000e+00 +4 7.1486e+18 0.0000e+00 1.1662e-08 0.0000e+00 +5 9.0780e+17 2.5734e-10 1.6434e-04 9.0990e-13 +6 2.4200e+18 0.0000e+00 1.7179e-08 0.0000e+00 +7 -1.4753e+18 0.0000e+00 1.5978e-09 0.0000e+00 +8 5.4169e+18 0.0000e+00 4.0794e-08 0.0000e+00 +9 -3.0355e+17 0.0000e+00 2.0475e-08 0.0000e+00 diff --git a/tests/test_pyprophet_export.py b/tests/test_pyprophet_export.py index 6e05f8c3..cbfff358 100644 --- a/tests/test_pyprophet_export.py +++ b/tests/test_pyprophet_export.py @@ -9,6 +9,8 @@ import pandas as pd import pytest +from pyprophet.export.export_report import export_feature_scores + pd.options.display.expand_frame_repr = False pd.options.display.precision = 4 pd.options.display.max_columns = None @@ -91,10 +93,12 @@ def run_pyprophet_command(cmd, temp_folder): ).decode() except subprocess.CalledProcessError as error: print(f"Command failed: {cmd}\n{error.output.decode()}", file=sys.stderr) - if "NotImplementedError" in error.output.decode(): # attempt to catch the specific error rather than the CalledProcessError + if ( + "NotImplementedError" in error.output.decode() + ): # attempt to catch the specific error rather than the CalledProcessError raise NotImplementedError else: - raise + raise def validate_export_results( @@ -150,12 +154,11 @@ def test_osw_analysis( f"{temp_folder}/test_data.tsv", ) + @pytest.mark.parametrize( - "calib, rt_unit", - [ (True, 'iRT'), (False, 'iRT'), (True, 'RT'), (False, 'RT')] + "calib, rt_unit", [(True, "iRT"), (False, "iRT"), (True, "RT"), (False, "RT")] ) -def test_osw_analysis_libExport(input_strategy, temp_folder, regtest, calib, rt_unit -): +def test_osw_analysis_libExport(input_strategy, temp_folder, regtest, calib, rt_unit): cmd = f"pyprophet score {input_strategy['cmd_prefix']} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " # peptide-level @@ -164,7 +167,6 @@ def test_osw_analysis_libExport(input_strategy, temp_folder, regtest, calib, rt_ # protein-level cmd += f"pyprophet infer protein --pi0_lambda=0 0 0 {input_strategy['cmd_prefix']} --context=global && " - # export if calib: cmd += f"pyprophet export library {input_strategy['cmd_prefix']} --out={temp_folder}/test_lib.tsv --test --max_peakgroup_qvalue=1 --max_global_peptide_qvalue=1 --max_global_protein_qvalue=1 --rt_unit={rt_unit}" @@ -183,6 +185,7 @@ def test_osw_analysis_libExport(input_strategy, temp_folder, regtest, calib, rt_ f"{temp_folder}/test_lib.tsv", ) + def test_osw_unscored(input_strategy, temp_folder, regtest): """Test export of unscored OSW data""" cmd = f"pyprophet export tsv {input_strategy['cmd_prefix']} --out={temp_folder}/test_data.tsv --format=legacy_merged" @@ -268,3 +271,286 @@ def test_compound_ms2(test_data_compound_osw, temp_folder, regtest): df = pd.read_csv(f"{temp_folder}/test_data_compound_ms2.tsv", sep="\t", nrows=100) print(df.sort_index(axis=1), file=regtest) + + +# ================== PARQUET EXPORT TESTS ================== +def test_parquet_export_scored_osw(test_data_osw, temp_folder, regtest): + """Test exporting scored OSW with SCORE_ tables to parquet format""" + # Score at MS2 level + cmd = f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + + # Infer peptide level with global context + cmd += f"pyprophet infer peptide --pi0_lambda=0.001 0 0 --in={test_data_osw} --context=global && " + + # Infer protein level with global context + cmd += f"pyprophet infer protein --pi0_lambda=0 0 0 --in={test_data_osw} --context=global && " + + # Export to parquet (should include SCORE_ tables) + cmd += f"pyprophet export parquet --in={test_data_osw} --out={temp_folder}/test_data_scored.parquet" + + run_pyprophet_command(cmd, temp_folder) + + # Verify the parquet file exists and has data + import pyarrow.parquet as pq + + table = pq.read_table(f"{temp_folder}/test_data_scored.parquet") + df = table.to_pandas() + + # Check that we have data + assert len(df) > 0, "Exported parquet file should not be empty" + + # Check that score columns are present + score_columns = [col for col in df.columns if col.startswith("SCORE_")] + assert len(score_columns) > 0, "Exported parquet should contain SCORE_ columns" + + print(f"Exported {len(df)} rows with {len(df.columns)} columns", file=regtest) + print(f"Score columns found: {sorted(score_columns)}", file=regtest) + print(df.head(10).sort_index(axis=1), file=regtest) + + +def test_parquet_export_no_transition_data(test_data_osw, temp_folder, regtest): + """Test exporting parquet without transition data using --no-include_transition_data flag""" + # Score at MS2 level + cmd = f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + + # Infer peptide level with global context + cmd += f"pyprophet infer peptide --pi0_lambda=0.001 0 0 --in={test_data_osw} --context=global && " + + # Infer protein level with global context + cmd += f"pyprophet infer protein --pi0_lambda=0 0 0 --in={test_data_osw} --context=global && " + + # Export to parquet without transition data + cmd += f"pyprophet export parquet --in={test_data_osw} --out={temp_folder}/test_data_no_transition.parquet --no-include_transition_data" + + run_pyprophet_command(cmd, temp_folder) + + # Verify the parquet file exists and has data + import pyarrow.parquet as pq + + table = pq.read_table(f"{temp_folder}/test_data_no_transition.parquet") + df = table.to_pandas() + + # Check that we have data + assert len(df) > 0, "Exported parquet file should not be empty" + + # Check that transition-specific columns are NOT present + # transition_columns = [col for col in df.columns if 'TRANSITION' in col.upper()] + # assert len(transition_columns) == 0, "Exported parquet should not contain TRANSITION columns when --no-include_transition_data is used" + assert df["TRANSITION_ID"].isnull().all(), ( + "TRANSITION_ID column should be empty when --no-include_transition_data is used" + ) + + # Check that score columns are present + score_columns = [col for col in df.columns if col.startswith("SCORE_")] + assert len(score_columns) > 0, "Exported parquet should contain SCORE_ columns" + + print( + f"Exported {len(df)} rows with {len(df.columns)} columns (no transition data)", + file=regtest, + ) + print(f"Score columns found: {sorted(score_columns)}", file=regtest) + print(df.head(10).sort_index(axis=1), file=regtest) + + +def test_parquet_export_split_format(test_data_osw, temp_folder, regtest): + """Test exporting to split parquet format with score data""" + # Score at MS2 level + cmd = f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + + # Infer peptide level with global context + cmd += f"pyprophet infer peptide --pi0_lambda=0.001 0 0 --in={test_data_osw} --context=global && " + + # Infer protein level with global context + cmd += f"pyprophet infer protein --pi0_lambda=0 0 0 --in={test_data_osw} --context=global && " + + # Export to split parquet format + cmd += f"pyprophet export parquet --in={test_data_osw} --out={temp_folder}/test_data_split --split_transition_data" + + run_pyprophet_command(cmd, temp_folder) + + # Verify the directory exists and contains parquet files + import pyarrow.parquet as pq + + split_dir = Path(temp_folder) / "test_data_split" + assert split_dir.exists(), "Split parquet directory should exist" + + precursor_file = split_dir / "precursors_features.parquet" + transition_file = split_dir / "transition_features.parquet" + + assert precursor_file.exists(), "precursors_features.parquet should exist" + assert transition_file.exists(), "transition_features.parquet should exist" + + # Read precursor data + precursor_table = pq.read_table(str(precursor_file)) + precursor_df = precursor_table.to_pandas() + + # Read transition data + transition_table = pq.read_table(str(transition_file)) + transition_df = transition_table.to_pandas() + + # Check that we have data in both files + assert len(precursor_df) > 0, "Precursor parquet file should not be empty" + assert len(transition_df) > 0, "Transition parquet file should not be empty" + + # Check that score columns are present in precursor file + precursor_score_columns = [ + col for col in precursor_df.columns if col.startswith("SCORE_") + ] + assert len(precursor_score_columns) > 0, ( + "Precursor parquet should contain SCORE_ columns" + ) + + print( + f"Precursor data: {len(precursor_df)} rows with {len(precursor_df.columns)} columns", + file=regtest, + ) + print( + f"Transition data: {len(transition_df)} rows with {len(transition_df.columns)} columns", + file=regtest, + ) + print(f"Precursor score columns: {sorted(precursor_score_columns)}", file=regtest) + print("Precursor data sample:", file=regtest) + print(precursor_df.head(5).sort_index(axis=1), file=regtest) + + +def test_parquet_export_with_ipf(test_data_osw, temp_folder, regtest): + """Test exporting parquet with IPF (Inference of Peptidoforms) scoring""" + # Score at MS1 level + cmd = f"pyprophet score --in={test_data_osw} --level=ms1 --test --pi0_lambda=0.1 0 0 --ss_iteration_fdr=0.02 && " + + # Score at MS2 level + cmd += f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + + # Score at transition level + cmd += f"pyprophet score --in={test_data_osw} --level=transition --test --pi0_lambda=0.1 0 0 --ss_iteration_fdr=0.02 && " + + # Run IPF (Inference of Peptidoforms) + cmd += f"pyprophet infer peptidoform --in={test_data_osw} && " + + # Export to parquet (should include SCORE_IPF columns) + cmd += f"pyprophet export parquet --in={test_data_osw} --out={temp_folder}/test_data_ipf.parquet" + + run_pyprophet_command(cmd, temp_folder) + + # Verify the parquet file exists and has data + import pyarrow.parquet as pq + table = pq.read_table(f"{temp_folder}/test_data_ipf.parquet") + df = table.to_pandas() + + # Check that we have data + assert len(df) > 0, "Exported parquet file should not be empty" + + # Check that SCORE_IPF columns are present + ipf_columns = [col for col in df.columns if col.startswith('SCORE_IPF')] + assert len(ipf_columns) > 0, "Exported parquet should contain SCORE_IPF columns" + + # Check for specific SCORE_IPF columns + expected_ipf_columns = ['SCORE_IPF_PRECURSOR_PEAKGROUP_PEP', 'SCORE_IPF_PEP', 'SCORE_IPF_QVALUE'] + for col in expected_ipf_columns: + assert col in df.columns, f"Expected column {col} not found in exported parquet" + + print(f"Exported {len(df)} rows with {len(df.columns)} columns", file=regtest) + print(f"SCORE_IPF columns found: {sorted(ipf_columns)}", file=regtest) + print("Sample data with IPF scores:", file=regtest) + print(df[['FEATURE_ID'] + ipf_columns].head(10).sort_index(axis=1), file=regtest) + + +# ================== FEATURE SCORES EXPORT TESTS ================== +def test_feature_scores_unscored_osw(test_data_osw, temp_folder, regtest): + """Test exporting feature scores from unscored OSW file""" + cmd = f"pyprophet export feature-scores --in={test_data_osw} --out={temp_folder}/feature_scores.pdf" + + run_pyprophet_command(cmd, temp_folder) + + # Check that output PDF files were created + output_files = list(temp_folder.glob("*.pdf")) + assert len(output_files) > 0, "Expected at least one PDF file to be created" + + print(f"Created {len(output_files)} PDF file(s):", file=regtest) + for f in sorted(output_files): + print(f" - {f.name}", file=regtest) + + +def test_feature_scores_scored_osw(test_data_osw, temp_folder, regtest): + """Test exporting feature scores from scored OSW file with SCORE tables""" + # Score at MS2 level first + cmd = f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + + # Export feature scores (should detect SCORE_MS2 table) + cmd += f"pyprophet export feature-scores --in={test_data_osw} --out={temp_folder}/feature_scores.pdf" + + run_pyprophet_command(cmd, temp_folder) + + # Check that output PDF files were created + output_files = list(temp_folder.glob("*.pdf")) + assert len(output_files) > 0, "Expected at least one PDF file to be created" + + print(f"Created {len(output_files)} PDF file(s) from scored OSW:", file=regtest) + for f in sorted(output_files): + print(f" - {f.name}", file=regtest) + + +def test_feature_scores_parquet_with_scores(test_data_osw, temp_folder, regtest): + """Test exporting feature scores from Parquet file with SCORE columns""" + # Score and export to parquet + cmd = f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + cmd += f"pyprophet export parquet --in={test_data_osw} --out={temp_folder}/test_data_scored.parquet && " + + # Export feature scores from parquet + cmd += f"pyprophet export feature-scores --in={temp_folder}/test_data_scored.parquet --out={temp_folder}/feature_scores.pdf" + + run_pyprophet_command(cmd, temp_folder) + + # Check that output PDF was created + pdf_file = temp_folder / "feature_scores.pdf" + assert pdf_file.exists(), "Expected feature_scores.pdf to be created" + + print(f"Successfully created feature scores from Parquet with SCORE columns", file=regtest) + + +def test_feature_scores_split_parquet_with_scores(test_data_osw, temp_folder, regtest): + """Test exporting feature scores from split Parquet directory with SCORE columns""" + # Score and export to split parquet + cmd = f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + cmd += f"pyprophet export parquet --in={test_data_osw} --out={temp_folder}/test_data_split --split_transition_data && " + + # Export feature scores from split parquet + cmd += f"pyprophet export feature-scores --in={temp_folder}/test_data_split --out={temp_folder}/feature_scores.pdf" + + run_pyprophet_command(cmd, temp_folder) + + # Check that output PDF was created + pdf_file = temp_folder / "feature_scores.pdf" + assert pdf_file.exists(), "Expected feature_scores.pdf to be created" + + print(f"Successfully created feature scores from split Parquet with SCORE columns", file=regtest) + + +def test_feature_scores_ms1_ms2_transition(test_data_osw, temp_folder, regtest): + """Test exporting feature scores with MS1, MS2, and transition level scoring""" + # Score at all levels + cmd = f"pyprophet score --in={test_data_osw} --level=ms1 --test --pi0_lambda=0.1 0 0 --ss_iteration_fdr=0.02 && " + cmd += f"pyprophet score --in={test_data_osw} --level=ms2 --test --pi0_lambda=0.001 0 0 --ss_iteration_fdr=0.02 && " + cmd += f"pyprophet score --in={test_data_osw} --level=transition --test --pi0_lambda=0.1 0 0 --ss_iteration_fdr=0.02 && " + + # Export feature scores (should create ms1, ms2, and transition PDFs) + cmd += f"pyprophet export feature-scores --in={test_data_osw} --out={temp_folder}/feature_scores.pdf" + + run_pyprophet_command(cmd, temp_folder) + + # Check that output PDF files were created for all levels + output_files = list(temp_folder.glob("*.pdf")) + assert len(output_files) >= 3, "Expected at least 3 PDF files (ms1, ms2, transition)" + + # Check for specific files + ms1_files = [f for f in output_files if 'ms1' in f.name.lower()] + ms2_files = [f for f in output_files if 'ms2' in f.name.lower()] + transition_files = [f for f in output_files if 'transition' in f.name.lower()] + + print(f"Created {len(output_files)} PDF file(s) from multi-level scoring:", file=regtest) + print(f" MS1 files: {len(ms1_files)}", file=regtest) + print(f" MS2 files: {len(ms2_files)}", file=regtest) + print(f" Transition files: {len(transition_files)}", file=regtest) + + for f in sorted(output_files): + print(f" - {f.name}", file=regtest) From 79e110a2df04616d2a013a5953af9c3e7f33db18 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 05:30:05 +0000 Subject: [PATCH 09/30] Fix: Only add from_alignment column when alignment is actually used Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 10 +++++++--- pyprophet/io/export/parquet.py | 10 +++++++--- pyprophet/io/export/split_parquet.py | 10 +++++++--- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index d3a94c7f..95c4f8ec 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -324,7 +324,12 @@ def _read_augmented_data(self, con, cfg): def _read_standard_data(self, con, cfg): """Read standard OpenSWATH data without IPF, optionally including aligned features.""" + # Check if we should attempt alignment integration + use_alignment = cfg.use_alignment and self._check_alignment_presence(con) + # First, get features that pass MS2 QVALUE threshold + # Only add from_alignment column if we're using alignment + from_alignment_col = ", 0 AS from_alignment" if use_alignment else "" query = f""" SELECT RUN.ID AS id_run, PEPTIDE.ID AS id_peptide, @@ -350,8 +355,7 @@ def _read_standard_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score, - 0 AS from_alignment + SCORE_MS2.QVALUE AS m_score{from_alignment_col} FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -366,7 +370,7 @@ def _read_standard_data(self, con, cfg): data = pd.read_sql_query(query, con) # If alignment is enabled and alignment data is present, fetch and merge aligned features - if cfg.use_alignment and self._check_alignment_presence(con): + if use_alignment: aligned_features = self._fetch_alignment_features(con, cfg) if not aligned_features.empty: diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index bc33c7d0..21182244 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -257,7 +257,12 @@ def _read_standard_data(self, con) -> pd.DataFrame: """ Read standard OpenSWATH data without IPF, optionally including aligned features. """ + # Check if we should attempt alignment integration + use_alignment = self.config.use_alignment and self._has_alignment + # First, get features that pass MS2 QVALUE threshold + # Only add from_alignment column if we're using alignment + from_alignment_col = ", 0 AS from_alignment" if use_alignment else "" query = f""" SELECT RUN_ID AS id_run, @@ -284,8 +289,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: RIGHT_WIDTH AS rightWidth, SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, SCORE_MS2_SCORE AS d_score, - SCORE_MS2_Q_VALUE AS m_score, - 0 AS from_alignment + SCORE_MS2_Q_VALUE AS m_score{from_alignment_col} FROM data WHERE PROTEIN_ID IS NOT NULL AND SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} @@ -294,7 +298,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: data = con.execute(query).fetchdf() # If alignment is enabled and alignment data is present, fetch and merge aligned features - if self.config.use_alignment and self._has_alignment: + if use_alignment: aligned_features = self._fetch_alignment_features(con) if not aligned_features.empty: diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index aad02c66..ea0e2a61 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -364,7 +364,12 @@ def _read_standard_data(self, con) -> pd.DataFrame: """ Read standard OpenSWATH data without IPF from split files, optionally including aligned features. """ + # Check if we should attempt alignment integration + use_alignment = self.config.use_alignment and self._has_alignment + # First, get features that pass MS2 QVALUE threshold + # Only add from_alignment column if we're using alignment + from_alignment_col = ", 0 AS from_alignment" if use_alignment else "" query = f""" SELECT p.RUN_ID AS id_run, @@ -391,8 +396,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: p.RIGHT_WIDTH AS rightWidth, p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, p.SCORE_MS2_SCORE AS d_score, - p.SCORE_MS2_Q_VALUE AS m_score, - 0 AS from_alignment + p.SCORE_MS2_Q_VALUE AS m_score{from_alignment_col} FROM precursors p WHERE p.PROTEIN_ID IS NOT NULL AND p.SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} @@ -401,7 +405,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: data = con.execute(query).fetchdf() # If alignment is enabled and alignment data is present, fetch and merge aligned features - if self.config.use_alignment and self._has_alignment: + if use_alignment: aligned_features = self._fetch_alignment_features(con) if not aligned_features.empty: From 247e458dc5fdd014f9090cbb674d164978a51166 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 05:41:40 +0000 Subject: [PATCH 10/30] Add alignment reference feature ID and RT to recovered features Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 7 +++++-- pyprophet/io/export/parquet.py | 21 ++++++++++++++++++--- pyprophet/io/export/split_parquet.py | 21 ++++++++++++++++++--- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 95c4f8ec..cf39c90b 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -422,10 +422,11 @@ def _read_standard_data(self, con, cfg): """ aligned_data = pd.read_sql_query(aligned_query, con) - # Merge alignment scores into the aligned data + # Merge alignment scores and reference info into the aligned data aligned_data = pd.merge( aligned_data, - aligned_features[['id', 'alignment_pep', 'alignment_qvalue']], + aligned_features[['id', 'alignment_reference_feature_id', 'alignment_reference_rt', + 'alignment_pep', 'alignment_qvalue']], on='id', how='left' ) @@ -737,6 +738,8 @@ def _fetch_alignment_features(self, con, cfg): ALIGNED_FEATURE_ID AS id, PRECURSOR_ID AS transition_group_id, RUN_ID AS run_id, + REFERENCE_FEATURE_ID AS alignment_reference_feature_id, + REFERENCE_RT AS alignment_reference_rt, SCORE_ALIGNMENT.PEP AS alignment_pep, SCORE_ALIGNMENT.QVALUE AS alignment_qvalue FROM ( diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 21182244..35ce0819 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -348,11 +348,18 @@ def _read_standard_data(self, con) -> pd.DataFrame: """ aligned_data = con.execute(aligned_query).fetchdf() - # Merge alignment scores into the aligned data + # Merge alignment scores and reference info into the aligned data if 'alignment_pep' in aligned_features.columns: + # Build list of columns to merge + merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] + if 'alignment_reference_feature_id' in aligned_features.columns: + merge_cols.append('alignment_reference_feature_id') + if 'alignment_reference_rt' in aligned_features.columns: + merge_cols.append('alignment_reference_rt') + aligned_data = pd.merge( aligned_data, - aligned_features[['id', 'alignment_pep', 'alignment_qvalue']], + aligned_features[merge_cols], on='id', how='left' ) @@ -717,10 +724,18 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Rename columns to match expected format if 'FEATURE_ID' in filtered_df.columns: - result = filtered_df[['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID']].rename( + # Start with base columns + base_cols = ['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID'] + result = filtered_df[base_cols].rename( columns={'FEATURE_ID': 'id'} ) + # Add reference feature ID and RT if available + if 'REFERENCE_FEATURE_ID' in filtered_df.columns: + result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].values + if 'REFERENCE_RT' in filtered_df.columns: + result['alignment_reference_rt'] = filtered_df['REFERENCE_RT'].values + # Add alignment scores if available if 'PEP' in filtered_df.columns: result['alignment_pep'] = filtered_df['PEP'].values diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index ea0e2a61..b20e70bf 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -455,11 +455,18 @@ def _read_standard_data(self, con) -> pd.DataFrame: """ aligned_data = con.execute(aligned_query).fetchdf() - # Merge alignment scores into the aligned data + # Merge alignment scores and reference info into the aligned data if 'alignment_pep' in aligned_features.columns: + # Build list of columns to merge + merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] + if 'alignment_reference_feature_id' in aligned_features.columns: + merge_cols.append('alignment_reference_feature_id') + if 'alignment_reference_rt' in aligned_features.columns: + merge_cols.append('alignment_reference_rt') + aligned_data = pd.merge( aligned_data, - aligned_features[['id', 'alignment_pep', 'alignment_qvalue']], + aligned_features[merge_cols], on='id', how='left' ) @@ -802,10 +809,18 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Rename columns to match expected format if 'FEATURE_ID' in filtered_df.columns: - result = filtered_df[['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID']].rename( + # Start with base columns + base_cols = ['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID'] + result = filtered_df[base_cols].rename( columns={'FEATURE_ID': 'id'} ) + # Add reference feature ID and RT if available + if 'REFERENCE_FEATURE_ID' in filtered_df.columns: + result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].values + if 'REFERENCE_RT' in filtered_df.columns: + result['alignment_reference_rt'] = filtered_df['REFERENCE_RT'].values + # Add alignment scores if available if 'PEP' in filtered_df.columns: result['alignment_pep'] = filtered_df['PEP'].values From 376399fd693a9e0240045e58e311eb86dbc1b54f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 06:31:08 +0000 Subject: [PATCH 11/30] Ensure reference features pass MS2 QVALUE threshold for alignment recovery Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 15 +++++++++++---- pyprophet/io/export/parquet.py | 28 ++++++++++++++++++++++++++-- pyprophet/io/export/split_parquet.py | 28 ++++++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index cf39c90b..d2e4b99f 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -720,9 +720,9 @@ def _fetch_alignment_features(self, con, cfg): Fetch aligned features with good alignment scores. This method retrieves features that have been aligned across runs - and pass the alignment quality threshold. These features can be used - to recover peaks in runs where the MS2 signal might be weak but the - alignment score is good. + and pass the alignment quality threshold. Only features whose reference + feature passes the MS2 QVALUE threshold are included, ensuring that + recovered peaks are aligned to high-quality reference features. Args: con: Database connection @@ -732,6 +732,7 @@ def _fetch_alignment_features(self, con, cfg): DataFrame with aligned feature IDs that pass quality threshold """ max_alignment_pep = cfg.max_alignment_pep + max_rs_peakgroup_qvalue = cfg.max_rs_peakgroup_qvalue query = f""" SELECT @@ -751,12 +752,18 @@ def _fetch_alignment_features(self, con, cfg): GROUP BY FEATURE_ID ) AS SCORE_ALIGNMENT ON SCORE_ALIGNMENT.FEATURE_ID = FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID + INNER JOIN ( + SELECT FEATURE_ID, QVALUE + FROM SCORE_MS2 + ) AS REF_SCORE_MS2 + ON REF_SCORE_MS2.FEATURE_ID = FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID WHERE LABEL = 1 AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} + AND REF_SCORE_MS2.QVALUE < {max_rs_peakgroup_qvalue} """ df = pd.read_sql_query(query, con) - logger.info(f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep}") + logger.info(f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep} with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") return df ################################## diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 35ce0819..558e92dc 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -664,6 +664,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: This method checks for an alignment parquet file and retrieves features that have been aligned across runs and pass the alignment quality threshold. + Only features whose reference feature passes the MS2 QVALUE threshold are included. Args: con: DuckDB connection @@ -685,6 +686,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: logger.debug(f"Loading alignment data from {alignment_file}") max_alignment_pep = self.config.max_alignment_pep + max_rs_peakgroup_qvalue = self.config.max_rs_peakgroup_qvalue try: # Load alignment data @@ -722,6 +724,28 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: logger.warning("Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality.") filtered_df = alignment_df[alignment_df['DECOY'] == 1].copy() + # Now filter by reference feature MS2 QVALUE + # Need to join with main data to check reference feature QVALUE + if 'REFERENCE_FEATURE_ID' in filtered_df.columns: + # Register filtered alignment data for SQL query + con.register('filtered_alignment', filtered_df) + + # Query to get aligned features where reference passes MS2 QVALUE threshold + ref_check_query = f""" + SELECT + fa.FEATURE_ID, + fa.PRECURSOR_ID, + fa.RUN_ID, + fa.REFERENCE_FEATURE_ID, + fa.REFERENCE_RT, + fa.PEP, + fa.QVALUE + FROM filtered_alignment fa + INNER JOIN data d ON d.FEATURE_ID = fa.REFERENCE_FEATURE_ID + WHERE d.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} + """ + filtered_df = con.execute(ref_check_query).fetchdf() + # Rename columns to match expected format if 'FEATURE_ID' in filtered_df.columns: # Start with base columns @@ -742,8 +766,8 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: if 'QVALUE' in filtered_df.columns: result['alignment_qvalue'] = filtered_df['QVALUE'].values - logger.info(f"Found {len(result)} aligned features" + - (f" passing alignment PEP < {max_alignment_pep}" if has_alignment_scores else "")) + logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") return result except Exception as e: logger.warning(f"Could not load alignment data: {e}") diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index b20e70bf..6e9833b6 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -752,6 +752,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: This method checks for an alignment parquet file and retrieves features that have been aligned across runs and pass the alignment quality threshold. + Only features whose reference feature passes the MS2 QVALUE threshold are included. Args: con: DuckDB connection @@ -770,6 +771,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: logger.debug(f"Loading alignment data from {alignment_file}") max_alignment_pep = self.config.max_alignment_pep + max_rs_peakgroup_qvalue = self.config.max_rs_peakgroup_qvalue try: # Load alignment data @@ -807,6 +809,28 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: logger.warning("Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality.") filtered_df = alignment_df[alignment_df['DECOY'] == 1].copy() + # Now filter by reference feature MS2 QVALUE + # Need to join with precursors data to check reference feature QVALUE + if 'REFERENCE_FEATURE_ID' in filtered_df.columns: + # Register filtered alignment data for SQL query + con.register('filtered_alignment', filtered_df) + + # Query to get aligned features where reference passes MS2 QVALUE threshold + ref_check_query = f""" + SELECT + fa.FEATURE_ID, + fa.PRECURSOR_ID, + fa.RUN_ID, + fa.REFERENCE_FEATURE_ID, + fa.REFERENCE_RT, + fa.PEP, + fa.QVALUE + FROM filtered_alignment fa + INNER JOIN precursors p ON p.FEATURE_ID = fa.REFERENCE_FEATURE_ID + WHERE p.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} + """ + filtered_df = con.execute(ref_check_query).fetchdf() + # Rename columns to match expected format if 'FEATURE_ID' in filtered_df.columns: # Start with base columns @@ -827,8 +851,8 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: if 'QVALUE' in filtered_df.columns: result['alignment_qvalue'] = filtered_df['QVALUE'].values - logger.info(f"Found {len(result)} aligned features" + - (f" passing alignment PEP < {max_alignment_pep}" if has_alignment_scores else "")) + logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") return result except Exception as e: logger.warning(f"Could not load alignment data: {e}") From 5ee818aec1234ad686ceeafc8aa1b1e391345b18 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 13:27:17 +0000 Subject: [PATCH 12/30] Add alignment_group_id to recovered alignment features Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 5 +++-- pyprophet/io/export/parquet.py | 8 ++++++++ pyprophet/io/export/split_parquet.py | 8 ++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index d2e4b99f..be0611d5 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -425,8 +425,8 @@ def _read_standard_data(self, con, cfg): # Merge alignment scores and reference info into the aligned data aligned_data = pd.merge( aligned_data, - aligned_features[['id', 'alignment_reference_feature_id', 'alignment_reference_rt', - 'alignment_pep', 'alignment_qvalue']], + aligned_features[['id', 'alignment_group_id', 'alignment_reference_feature_id', + 'alignment_reference_rt', 'alignment_pep', 'alignment_qvalue']], on='id', how='left' ) @@ -736,6 +736,7 @@ def _fetch_alignment_features(self, con, cfg): query = f""" SELECT + DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS alignment_group_id, ALIGNED_FEATURE_ID AS id, PRECURSOR_ID AS transition_group_id, RUN_ID AS run_id, diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 558e92dc..a61e6da5 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -352,6 +352,8 @@ def _read_standard_data(self, con) -> pd.DataFrame: if 'alignment_pep' in aligned_features.columns: # Build list of columns to merge merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] + if 'alignment_group_id' in aligned_features.columns: + merge_cols.append('alignment_group_id') if 'alignment_reference_feature_id' in aligned_features.columns: merge_cols.append('alignment_reference_feature_id') if 'alignment_reference_rt' in aligned_features.columns: @@ -731,8 +733,10 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: con.register('filtered_alignment', filtered_df) # Query to get aligned features where reference passes MS2 QVALUE threshold + # Also compute alignment_group_id using DENSE_RANK ref_check_query = f""" SELECT + DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, @@ -754,6 +758,10 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: columns={'FEATURE_ID': 'id'} ) + # Add alignment group ID if available + if 'ALIGNMENT_GROUP_ID' in filtered_df.columns: + result['alignment_group_id'] = filtered_df['ALIGNMENT_GROUP_ID'].values + # Add reference feature ID and RT if available if 'REFERENCE_FEATURE_ID' in filtered_df.columns: result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].values diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 6e9833b6..9f473b25 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -459,6 +459,8 @@ def _read_standard_data(self, con) -> pd.DataFrame: if 'alignment_pep' in aligned_features.columns: # Build list of columns to merge merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] + if 'alignment_group_id' in aligned_features.columns: + merge_cols.append('alignment_group_id') if 'alignment_reference_feature_id' in aligned_features.columns: merge_cols.append('alignment_reference_feature_id') if 'alignment_reference_rt' in aligned_features.columns: @@ -816,8 +818,10 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: con.register('filtered_alignment', filtered_df) # Query to get aligned features where reference passes MS2 QVALUE threshold + # Also compute alignment_group_id using DENSE_RANK ref_check_query = f""" SELECT + DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, @@ -839,6 +843,10 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: columns={'FEATURE_ID': 'id'} ) + # Add alignment group ID if available + if 'ALIGNMENT_GROUP_ID' in filtered_df.columns: + result['alignment_group_id'] = filtered_df['ALIGNMENT_GROUP_ID'].values + # Add reference feature ID and RT if available if 'REFERENCE_FEATURE_ID' in filtered_df.columns: result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].values From 6bd270fba758cf2f624ec6f79df445121b1e82a7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 13:46:43 +0000 Subject: [PATCH 13/30] Fix ambiguous ALIGNMENT_ID column error in OSW alignment query Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index be0611d5..ef0a1ab0 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -736,12 +736,12 @@ def _fetch_alignment_features(self, con, cfg): query = f""" SELECT - DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS alignment_group_id, - ALIGNED_FEATURE_ID AS id, - PRECURSOR_ID AS transition_group_id, - RUN_ID AS run_id, - REFERENCE_FEATURE_ID AS alignment_reference_feature_id, - REFERENCE_RT AS alignment_reference_rt, + DENSE_RANK() OVER (ORDER BY FEATURE_MS2_ALIGNMENT.PRECURSOR_ID, FEATURE_MS2_ALIGNMENT.ALIGNMENT_ID) AS alignment_group_id, + FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID AS id, + FEATURE_MS2_ALIGNMENT.PRECURSOR_ID AS transition_group_id, + FEATURE_MS2_ALIGNMENT.RUN_ID AS run_id, + FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS alignment_reference_feature_id, + FEATURE_MS2_ALIGNMENT.REFERENCE_RT AS alignment_reference_rt, SCORE_ALIGNMENT.PEP AS alignment_pep, SCORE_ALIGNMENT.QVALUE AS alignment_qvalue FROM ( @@ -758,7 +758,7 @@ def _fetch_alignment_features(self, con, cfg): FROM SCORE_MS2 ) AS REF_SCORE_MS2 ON REF_SCORE_MS2.FEATURE_ID = FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID - WHERE LABEL = 1 + WHERE FEATURE_MS2_ALIGNMENT.LABEL = 1 AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} AND REF_SCORE_MS2.QVALUE < {max_rs_peakgroup_qvalue} """ From 42bf44196d807927331aff0fd1c6a48b29a005f9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 14:10:38 +0000 Subject: [PATCH 14/30] Fix alignment_reference_feature_id displaying as scientific notation by converting to Int64 Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 6 ++++++ pyprophet/io/export/parquet.py | 6 +++++- pyprophet/io/export/split_parquet.py | 6 +++++- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index ef0a1ab0..564193c4 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -431,6 +431,12 @@ def _read_standard_data(self, con, cfg): how='left' ) + # Convert alignment_reference_feature_id to int64 to avoid scientific notation + if 'alignment_reference_feature_id' in aligned_data.columns: + aligned_data['alignment_reference_feature_id'] = aligned_data['alignment_reference_feature_id'].astype('Int64') + if 'alignment_group_id' in aligned_data.columns: + aligned_data['alignment_group_id'] = aligned_data['alignment_group_id'].astype('Int64') + logger.info(f"Adding {len(aligned_data)} features recovered through alignment") # Combine with base data diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index a61e6da5..26551985 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -764,7 +764,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Add reference feature ID and RT if available if 'REFERENCE_FEATURE_ID' in filtered_df.columns: - result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].values + result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].astype('Int64').values if 'REFERENCE_RT' in filtered_df.columns: result['alignment_reference_rt'] = filtered_df['REFERENCE_RT'].values @@ -774,6 +774,10 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: if 'QVALUE' in filtered_df.columns: result['alignment_qvalue'] = filtered_df['QVALUE'].values + # Convert alignment_group_id to int64 + if 'alignment_group_id' in result.columns: + result['alignment_group_id'] = result['alignment_group_id'].astype('Int64') + logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") return result diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 9f473b25..4c959115 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -849,7 +849,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Add reference feature ID and RT if available if 'REFERENCE_FEATURE_ID' in filtered_df.columns: - result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].values + result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].astype('Int64').values if 'REFERENCE_RT' in filtered_df.columns: result['alignment_reference_rt'] = filtered_df['REFERENCE_RT'].values @@ -859,6 +859,10 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: if 'QVALUE' in filtered_df.columns: result['alignment_qvalue'] = filtered_df['QVALUE'].values + # Convert alignment_group_id to int64 + if 'alignment_group_id' in result.columns: + result['alignment_group_id'] = result['alignment_group_id'].astype('Int64') + logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") return result From e0c444548d966b6197703f8b71ee79e100c44aa5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 14:26:55 +0000 Subject: [PATCH 15/30] Add alignment info to all features, not just recovered ones Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 32 +++++++++++++------ pyprophet/io/export/parquet.py | 46 +++++++++++++++++++--------- pyprophet/io/export/split_parquet.py | 46 +++++++++++++++++++--------- 3 files changed, 86 insertions(+), 38 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 564193c4..5e718c66 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -328,8 +328,6 @@ def _read_standard_data(self, con, cfg): use_alignment = cfg.use_alignment and self._check_alignment_presence(con) # First, get features that pass MS2 QVALUE threshold - # Only add from_alignment column if we're using alignment - from_alignment_col = ", 0 AS from_alignment" if use_alignment else "" query = f""" SELECT RUN.ID AS id_run, PEPTIDE.ID AS id_peptide, @@ -355,7 +353,7 @@ def _read_standard_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score{from_alignment_col} + SCORE_MS2.QVALUE AS m_score FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -380,6 +378,18 @@ def _read_standard_data(self, con, cfg): existing_ids = data['id'].unique() new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] + # First, merge alignment info into existing features (those that passed MS2) + # Mark them with from_alignment=0 + data = pd.merge( + data, + aligned_features[['id', 'alignment_group_id', 'alignment_reference_feature_id', + 'alignment_reference_rt', 'alignment_pep', 'alignment_qvalue']], + on='id', + how='left' + ) + data['from_alignment'] = 0 + + # Now add features that didn't pass MS2 but have good alignment (recovered features) if new_aligned_ids: # Fetch full data for these new aligned features aligned_ids_str = ','.join(map(str, new_aligned_ids)) @@ -408,8 +418,7 @@ def _read_standard_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score, - 1 AS from_alignment + SCORE_MS2.QVALUE AS m_score FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -431,16 +440,19 @@ def _read_standard_data(self, con, cfg): how='left' ) - # Convert alignment_reference_feature_id to int64 to avoid scientific notation - if 'alignment_reference_feature_id' in aligned_data.columns: - aligned_data['alignment_reference_feature_id'] = aligned_data['alignment_reference_feature_id'].astype('Int64') - if 'alignment_group_id' in aligned_data.columns: - aligned_data['alignment_group_id'] = aligned_data['alignment_group_id'].astype('Int64') + # Mark as recovered through alignment + aligned_data['from_alignment'] = 1 logger.info(f"Adding {len(aligned_data)} features recovered through alignment") # Combine with base data data = pd.concat([data, aligned_data], ignore_index=True) + + # Convert alignment_reference_feature_id to int64 to avoid scientific notation + if 'alignment_reference_feature_id' in data.columns: + data['alignment_reference_feature_id'] = data['alignment_reference_feature_id'].astype('Int64') + if 'alignment_group_id' in data.columns: + data['alignment_group_id'] = data['alignment_group_id'].astype('Int64') return data diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 26551985..940de587 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -261,8 +261,6 @@ def _read_standard_data(self, con) -> pd.DataFrame: use_alignment = self.config.use_alignment and self._has_alignment # First, get features that pass MS2 QVALUE threshold - # Only add from_alignment column if we're using alignment - from_alignment_col = ", 0 AS from_alignment" if use_alignment else "" query = f""" SELECT RUN_ID AS id_run, @@ -289,7 +287,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: RIGHT_WIDTH AS rightWidth, SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, SCORE_MS2_SCORE AS d_score, - SCORE_MS2_Q_VALUE AS m_score{from_alignment_col} + SCORE_MS2_Q_VALUE AS m_score FROM data WHERE PROTEIN_ID IS NOT NULL AND SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} @@ -308,6 +306,27 @@ def _read_standard_data(self, con) -> pd.DataFrame: existing_ids = data['id'].unique() new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] + # First, merge alignment info into existing features (those that passed MS2) + # Mark them with from_alignment=0 + if 'alignment_pep' in aligned_features.columns: + # Build list of columns to merge + merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] + if 'alignment_group_id' in aligned_features.columns: + merge_cols.append('alignment_group_id') + if 'alignment_reference_feature_id' in aligned_features.columns: + merge_cols.append('alignment_reference_feature_id') + if 'alignment_reference_rt' in aligned_features.columns: + merge_cols.append('alignment_reference_rt') + + data = pd.merge( + data, + aligned_features[merge_cols], + on='id', + how='left' + ) + data['from_alignment'] = 0 + + # Now add features that didn't pass MS2 but have good alignment (recovered features) if new_aligned_ids: # Fetch full data for these new aligned features from the main data view # Register aligned IDs as a temp table for the query @@ -340,8 +359,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: RIGHT_WIDTH AS rightWidth, SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, SCORE_MS2_SCORE AS d_score, - SCORE_MS2_Q_VALUE AS m_score, - 1 AS from_alignment + SCORE_MS2_Q_VALUE AS m_score FROM data WHERE PROTEIN_ID IS NOT NULL AND FEATURE_ID IN (SELECT id FROM aligned_ids_temp) @@ -350,15 +368,6 @@ def _read_standard_data(self, con) -> pd.DataFrame: # Merge alignment scores and reference info into the aligned data if 'alignment_pep' in aligned_features.columns: - # Build list of columns to merge - merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] - if 'alignment_group_id' in aligned_features.columns: - merge_cols.append('alignment_group_id') - if 'alignment_reference_feature_id' in aligned_features.columns: - merge_cols.append('alignment_reference_feature_id') - if 'alignment_reference_rt' in aligned_features.columns: - merge_cols.append('alignment_reference_rt') - aligned_data = pd.merge( aligned_data, aligned_features[merge_cols], @@ -366,10 +375,19 @@ def _read_standard_data(self, con) -> pd.DataFrame: how='left' ) + # Mark as recovered through alignment + aligned_data['from_alignment'] = 1 + logger.info(f"Adding {len(aligned_data)} features recovered through alignment") # Combine with base data data = pd.concat([data, aligned_data], ignore_index=True) + + # Convert alignment_reference_feature_id to int64 to avoid scientific notation + if 'alignment_reference_feature_id' in data.columns: + data['alignment_reference_feature_id'] = data['alignment_reference_feature_id'].astype('Int64') + if 'alignment_group_id' in data.columns: + data['alignment_group_id'] = data['alignment_group_id'].astype('Int64') return data diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 4c959115..391b18fa 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -368,8 +368,6 @@ def _read_standard_data(self, con) -> pd.DataFrame: use_alignment = self.config.use_alignment and self._has_alignment # First, get features that pass MS2 QVALUE threshold - # Only add from_alignment column if we're using alignment - from_alignment_col = ", 0 AS from_alignment" if use_alignment else "" query = f""" SELECT p.RUN_ID AS id_run, @@ -396,7 +394,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: p.RIGHT_WIDTH AS rightWidth, p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, p.SCORE_MS2_SCORE AS d_score, - p.SCORE_MS2_Q_VALUE AS m_score{from_alignment_col} + p.SCORE_MS2_Q_VALUE AS m_score FROM precursors p WHERE p.PROTEIN_ID IS NOT NULL AND p.SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} @@ -415,6 +413,27 @@ def _read_standard_data(self, con) -> pd.DataFrame: existing_ids = data['id'].unique() new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] + # First, merge alignment info into existing features (those that passed MS2) + # Mark them with from_alignment=0 + if 'alignment_pep' in aligned_features.columns: + # Build list of columns to merge + merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] + if 'alignment_group_id' in aligned_features.columns: + merge_cols.append('alignment_group_id') + if 'alignment_reference_feature_id' in aligned_features.columns: + merge_cols.append('alignment_reference_feature_id') + if 'alignment_reference_rt' in aligned_features.columns: + merge_cols.append('alignment_reference_rt') + + data = pd.merge( + data, + aligned_features[merge_cols], + on='id', + how='left' + ) + data['from_alignment'] = 0 + + # Now add features that didn't pass MS2 but have good alignment (recovered features) if new_aligned_ids: # Fetch full data for these new aligned features from the main data view # Register aligned IDs as a temp table for the query @@ -447,8 +466,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: p.RIGHT_WIDTH AS rightWidth, p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, p.SCORE_MS2_SCORE AS d_score, - p.SCORE_MS2_Q_VALUE AS m_score, - 1 AS from_alignment + p.SCORE_MS2_Q_VALUE AS m_score FROM precursors p WHERE p.PROTEIN_ID IS NOT NULL AND p.FEATURE_ID IN (SELECT id FROM aligned_ids_temp) @@ -457,15 +475,6 @@ def _read_standard_data(self, con) -> pd.DataFrame: # Merge alignment scores and reference info into the aligned data if 'alignment_pep' in aligned_features.columns: - # Build list of columns to merge - merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] - if 'alignment_group_id' in aligned_features.columns: - merge_cols.append('alignment_group_id') - if 'alignment_reference_feature_id' in aligned_features.columns: - merge_cols.append('alignment_reference_feature_id') - if 'alignment_reference_rt' in aligned_features.columns: - merge_cols.append('alignment_reference_rt') - aligned_data = pd.merge( aligned_data, aligned_features[merge_cols], @@ -473,10 +482,19 @@ def _read_standard_data(self, con) -> pd.DataFrame: how='left' ) + # Mark as recovered through alignment + aligned_data['from_alignment'] = 1 + logger.info(f"Adding {len(aligned_data)} features recovered through alignment") # Combine with base data data = pd.concat([data, aligned_data], ignore_index=True) + + # Convert alignment_reference_feature_id to int64 to avoid scientific notation + if 'alignment_reference_feature_id' in data.columns: + data['alignment_reference_feature_id'] = data['alignment_reference_feature_id'].astype('Int64') + if 'alignment_group_id' in data.columns: + data['alignment_group_id'] = data['alignment_group_id'].astype('Int64') return data From bc78c823066e3ba41d0dc54300eb307f7e48937d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 14:59:44 +0000 Subject: [PATCH 16/30] Add SCORE_MS2.PEP (MS2_PEAKGROUP_PEP) to standard OpenSWATH exports Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 9 ++++++--- pyprophet/io/export/parquet.py | 3 ++- pyprophet/io/export/split_parquet.py | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 5e718c66..7d59093a 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -353,7 +353,8 @@ def _read_standard_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score + SCORE_MS2.QVALUE AS m_score, + SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -418,7 +419,8 @@ def _read_standard_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score + SCORE_MS2.QVALUE AS m_score, + SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -551,7 +553,8 @@ def _get_base_openswath_data(self, con, cfg): FEATURE.RIGHT_WIDTH AS rightWidth, SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, - SCORE_MS2.QVALUE AS m_score + SCORE_MS2.QVALUE AS m_score, + SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 940de587..a6fdb858 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -287,7 +287,8 @@ def _read_standard_data(self, con) -> pd.DataFrame: RIGHT_WIDTH AS rightWidth, SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, SCORE_MS2_SCORE AS d_score, - SCORE_MS2_Q_VALUE AS m_score + SCORE_MS2_Q_VALUE AS m_score, + SCORE_MS2_PEP AS MS2_PEAKGROUP_PEP FROM data WHERE PROTEIN_ID IS NOT NULL AND SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 391b18fa..cafb525b 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -394,7 +394,8 @@ def _read_standard_data(self, con) -> pd.DataFrame: p.RIGHT_WIDTH AS rightWidth, p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, p.SCORE_MS2_SCORE AS d_score, - p.SCORE_MS2_Q_VALUE AS m_score + p.SCORE_MS2_Q_VALUE AS m_score, + p.SCORE_MS2_PEP AS MS2_PEAKGROUP_PEP FROM precursors p WHERE p.PROTEIN_ID IS NOT NULL AND p.SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} From 2300d1642cc1f56f4ac874ed5c1bc508ee81672b Mon Sep 17 00:00:00 2001 From: singjc Date: Thu, 30 Oct 2025 18:12:21 -0400 Subject: [PATCH 17/30] Update output files for pyprophet export tests to include new 'pep' column - Added 'pep' column to the output of test_pyprophet_export.test_osw_analysis with split_parquet set to False. - Updated output of test_pyprophet_export.test_osw_analysis with split_parquet set to True to reflect the addition of the 'pep' column. --- pyprophet/io/export/osw.py | 308 +++++++++------ pyprophet/io/export/parquet.py | 296 ++++++++------ pyprophet/io/export/split_parquet.py | 373 ++++++++++-------- ...xport.test_ipf_analysis[False-disable].out | 26 +- ...export.test_ipf_analysis[True-disable].out | 26 +- ...st_osw_analysis[osw-False-False-False].out | 26 +- ...est_osw_analysis[osw-False-False-True].out | 2 +- ...est_osw_analysis[osw-False-True-False].out | 26 +- ...est_osw_analysis[osw-True-False-False].out | 26 +- ...sw_analysis[parquet-False-False-False].out | 26 +- ...osw_analysis[parquet-False-False-True].out | 26 +- ...osw_analysis[parquet-False-True-False].out | 26 +- ...osw_analysis[parquet-True-False-False].out | 26 +- ...lysis[split_parquet-False-False-False].out | 26 +- ...alysis[split_parquet-False-False-True].out | 26 +- ...alysis[split_parquet-False-True-False].out | 26 +- ...alysis[split_parquet-True-False-False].out | 26 +- 17 files changed, 745 insertions(+), 572 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 7d59093a..6472551a 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -163,9 +163,8 @@ def _check_ipf_presence(self, con, cfg): def _check_alignment_presence(self, con): """Check if alignment data is present.""" - return ( - check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") - and check_sqlite_table(con, "SCORE_ALIGNMENT") + return check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT") and check_sqlite_table( + con, "SCORE_ALIGNMENT" ) def _read_unscored_data(self, con): @@ -326,7 +325,7 @@ def _read_standard_data(self, con, cfg): """Read standard OpenSWATH data without IPF, optionally including aligned features.""" # Check if we should attempt alignment integration use_alignment = cfg.use_alignment and self._check_alignment_presence(con) - + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT RUN.ID AS id_run, @@ -354,7 +353,7 @@ def _read_standard_data(self, con, cfg): SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, SCORE_MS2.QVALUE AS m_score, - SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP + SCORE_MS2.PEP AS pep FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -367,33 +366,43 @@ def _read_standard_data(self, con, cfg): ORDER BY transition_group_id, peak_group_rank; """ data = pd.read_sql_query(query, con) - + # If alignment is enabled and alignment data is present, fetch and merge aligned features if use_alignment: aligned_features = self._fetch_alignment_features(con, cfg) - + if not aligned_features.empty: # Get full feature data for aligned features that are NOT already in base results # We only want to add features that didn't pass MS2 threshold but have good alignment - aligned_ids = aligned_features['id'].unique() - existing_ids = data['id'].unique() - new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] - + aligned_ids = aligned_features["id"].unique() + existing_ids = data["id"].unique() + new_aligned_ids = [ + aid for aid in aligned_ids if aid not in existing_ids + ] + # First, merge alignment info into existing features (those that passed MS2) # Mark them with from_alignment=0 data = pd.merge( data, - aligned_features[['id', 'alignment_group_id', 'alignment_reference_feature_id', - 'alignment_reference_rt', 'alignment_pep', 'alignment_qvalue']], - on='id', - how='left' + aligned_features[ + [ + "id", + "alignment_group_id", + "alignment_reference_feature_id", + "alignment_reference_rt", + "alignment_pep", + "alignment_qvalue", + ] + ], + on="id", + how="left", ) - data['from_alignment'] = 0 - + data["from_alignment"] = 0 + # Now add features that didn't pass MS2 but have good alignment (recovered features) if new_aligned_ids: # Fetch full data for these new aligned features - aligned_ids_str = ','.join(map(str, new_aligned_ids)) + aligned_ids_str = ",".join(map(str, new_aligned_ids)) aligned_query = f""" SELECT RUN.ID AS id_run, PEPTIDE.ID AS id_peptide, @@ -420,7 +429,7 @@ def _read_standard_data(self, con, cfg): SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, SCORE_MS2.QVALUE AS m_score, - SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP + SCORE_MS2.PEP AS pep FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -432,30 +441,44 @@ def _read_standard_data(self, con, cfg): WHERE FEATURE.ID IN ({aligned_ids_str}) """ aligned_data = pd.read_sql_query(aligned_query, con) - + # Merge alignment scores and reference info into the aligned data aligned_data = pd.merge( aligned_data, - aligned_features[['id', 'alignment_group_id', 'alignment_reference_feature_id', - 'alignment_reference_rt', 'alignment_pep', 'alignment_qvalue']], - on='id', - how='left' + aligned_features[ + [ + "id", + "alignment_group_id", + "alignment_reference_feature_id", + "alignment_reference_rt", + "alignment_pep", + "alignment_qvalue", + ] + ], + on="id", + how="left", ) - + # Mark as recovered through alignment - aligned_data['from_alignment'] = 1 - - logger.info(f"Adding {len(aligned_data)} features recovered through alignment") - + aligned_data["from_alignment"] = 1 + + logger.info( + f"Adding {len(aligned_data)} features recovered through alignment" + ) + # Combine with base data data = pd.concat([data, aligned_data], ignore_index=True) - + # Convert alignment_reference_feature_id to int64 to avoid scientific notation - if 'alignment_reference_feature_id' in data.columns: - data['alignment_reference_feature_id'] = data['alignment_reference_feature_id'].astype('Int64') - if 'alignment_group_id' in data.columns: - data['alignment_group_id'] = data['alignment_group_id'].astype('Int64') - + if "alignment_reference_feature_id" in data.columns: + data["alignment_reference_feature_id"] = data[ + "alignment_reference_feature_id" + ].astype("Int64") + if "alignment_group_id" in data.columns: + data["alignment_group_id"] = data["alignment_group_id"].astype( + "Int64" + ) + return data def _augment_data(self, data, con, cfg): @@ -554,7 +577,7 @@ def _get_base_openswath_data(self, con, cfg): SCORE_MS2.RANK AS peak_group_rank, SCORE_MS2.SCORE AS d_score, SCORE_MS2.QVALUE AS m_score, - SCORE_MS2.PEP AS MS2_PEAKGROUP_PEP + SCORE_MS2.PEP AS pep FROM PRECURSOR INNER JOIN PRECURSOR_PEPTIDE_MAPPING ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID @@ -739,22 +762,22 @@ def _add_protein_error_data(self, data, con, cfg): def _fetch_alignment_features(self, con, cfg): """ Fetch aligned features with good alignment scores. - + This method retrieves features that have been aligned across runs and pass the alignment quality threshold. Only features whose reference feature passes the MS2 QVALUE threshold are included, ensuring that recovered peaks are aligned to high-quality reference features. - + Args: con: Database connection cfg: Configuration object with max_alignment_pep threshold - + Returns: DataFrame with aligned feature IDs that pass quality threshold """ max_alignment_pep = cfg.max_alignment_pep max_rs_peakgroup_qvalue = cfg.max_rs_peakgroup_qvalue - + query = f""" SELECT DENSE_RANK() OVER (ORDER BY FEATURE_MS2_ALIGNMENT.PRECURSOR_ID, FEATURE_MS2_ALIGNMENT.ALIGNMENT_ID) AS alignment_group_id, @@ -783,9 +806,11 @@ def _fetch_alignment_features(self, con, cfg): AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} AND REF_SCORE_MS2.QVALUE < {max_rs_peakgroup_qvalue} """ - + df = pd.read_sql_query(query, con) - logger.info(f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep} with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") + logger.info( + f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep} with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" + ) return df ################################## @@ -1018,11 +1043,11 @@ def _get_peptide_protein_score_table_sqlite(self, con, level: str) -> str: def export_feature_scores(self, outfile: str, plot_callback): """ Export feature scores from OSW file for plotting. - + Detects if SCORE tables exist and adjusts behavior: - If SCORE tables exist: applies RANK==1 filtering and plots SCORE + VAR_ columns - If SCORE tables don't exist: plots only VAR_ columns - + Parameters ---------- outfile : str @@ -1032,22 +1057,24 @@ def export_feature_scores(self, outfile: str, plot_callback): Signature: plot_callback(df, outfile, level, append) """ con = sqlite3.connect(self.infile) - + try: # Check for SCORE tables has_score_ms1 = check_sqlite_table(con, "SCORE_MS1") has_score_ms2 = check_sqlite_table(con, "SCORE_MS2") has_score_transition = check_sqlite_table(con, "SCORE_TRANSITION") - + if has_score_ms1 or has_score_ms2 or has_score_transition: - logger.info("SCORE tables detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + logger.info( + "SCORE tables detected - applying RANK==1 filter and plotting SCORE + VAR_ columns" + ) else: logger.info("No SCORE tables detected - plotting only VAR_ columns") - + # Process MS1 level if available if check_sqlite_table(con, "FEATURE_MS1"): logger.info("Processing MS1 level feature scores") - + if has_score_ms1: # Scored mode: Include SCORE columns and apply RANK==1 filter ms1_query = """ @@ -1077,9 +1104,11 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_MS1)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: - var_cols_sql = ", ".join([f"FEATURE_MS1.{col}" for col in var_cols]) + var_cols_sql = ", ".join( + [f"FEATURE_MS1.{col}" for col in var_cols] + ) ms1_query = f""" SELECT {var_cols_sql}, @@ -1091,16 +1120,16 @@ def export_feature_scores(self, outfile: str, plot_callback): else: logger.warning("No VAR_ columns found in FEATURE_MS1 table") ms1_query = None - + if ms1_query: df_ms1 = pd.read_sql_query(ms1_query, con) if not df_ms1.empty: plot_callback(df_ms1, outfile, "ms1", append=False) - + # Process MS2 level if available if check_sqlite_table(con, "FEATURE_MS2"): logger.info("Processing MS2 level feature scores") - + if has_score_ms2: # Scored mode: Include SCORE columns and apply RANK==1 filter ms2_query = """ @@ -1137,9 +1166,11 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_MS2)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: - var_cols_sql = ", ".join([f"FEATURE_MS2.{col}" for col in var_cols]) + var_cols_sql = ", ".join( + [f"FEATURE_MS2.{col}" for col in var_cols] + ) ms2_query = f""" SELECT {var_cols_sql}, @@ -1151,17 +1182,17 @@ def export_feature_scores(self, outfile: str, plot_callback): else: logger.warning("No VAR_ columns found in FEATURE_MS2 table") ms2_query = None - + if ms2_query: df_ms2 = pd.read_sql_query(ms2_query, con) if not df_ms2.empty: append = check_sqlite_table(con, "FEATURE_MS1") plot_callback(df_ms2, outfile, "ms2", append=append) - + # Process transition level if available if check_sqlite_table(con, "FEATURE_TRANSITION"): logger.info("Processing transition level feature scores") - + if has_score_transition: # Scored mode: Include SCORE columns and apply RANK==1 filter transition_query = """ @@ -1197,9 +1228,11 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_TRANSITION)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: - var_cols_sql = ", ".join([f"FEATURE_TRANSITION.{col}" for col in var_cols]) + var_cols_sql = ", ".join( + [f"FEATURE_TRANSITION.{col}" for col in var_cols] + ) transition_query = f""" SELECT {var_cols_sql}, @@ -1209,15 +1242,21 @@ def export_feature_scores(self, outfile: str, plot_callback): INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID """ else: - logger.warning("No VAR_ columns found in FEATURE_TRANSITION table") + logger.warning( + "No VAR_ columns found in FEATURE_TRANSITION table" + ) transition_query = None - + if transition_query: df_transition = pd.read_sql_query(transition_query, con) if not df_transition.empty: - append = check_sqlite_table(con, "FEATURE_MS1") or check_sqlite_table(con, "FEATURE_MS2") - plot_callback(df_transition, outfile, "transition", append=append) - + append = check_sqlite_table( + con, "FEATURE_MS1" + ) or check_sqlite_table(con, "FEATURE_MS2") + plot_callback( + df_transition, outfile, "transition", append=append + ) + # Process alignment level if available (no SCORE tables for alignment) if check_sqlite_table(con, "FEATURE_MS2_ALIGNMENT"): logger.info("Processing alignment level feature scores") @@ -1226,7 +1265,7 @@ def export_feature_scores(self, outfile: str, plot_callback): cursor.execute("PRAGMA table_info(FEATURE_MS2_ALIGNMENT)") all_cols = [row[1] for row in cursor.fetchall()] var_cols = [col for col in all_cols if "VAR_" in col.upper()] - + if var_cols: var_cols_sql = ", ".join(var_cols) alignment_query = f""" @@ -1237,13 +1276,17 @@ def export_feature_scores(self, outfile: str, plot_callback): """ df_alignment = pd.read_sql_query(alignment_query, con) if not df_alignment.empty: - append = (check_sqlite_table(con, "FEATURE_MS1") or - check_sqlite_table(con, "FEATURE_MS2") or - check_sqlite_table(con, "FEATURE_TRANSITION")) + append = ( + check_sqlite_table(con, "FEATURE_MS1") + or check_sqlite_table(con, "FEATURE_MS2") + or check_sqlite_table(con, "FEATURE_TRANSITION") + ) plot_callback(df_alignment, outfile, "alignment", append=append) else: - logger.warning("No VAR_ columns found in FEATURE_MS2_ALIGNMENT table") - + logger.warning( + "No VAR_ columns found in FEATURE_MS2_ALIGNMENT table" + ) + finally: con.close() @@ -1351,7 +1394,8 @@ def _prepare_column_info(self, conn) -> dict: for col in get_table_columns_with_types( self.config.infile, "FEATURE_TRANSITION" ) - if col[0] not in ["FEATURE_ID", "TRANSITION_ID"] and col[1] # Ensure column has a type + if col[0] not in ["FEATURE_ID", "TRANSITION_ID"] + and col[1] # Ensure column has a type ], "score_ms1_exists": {"SCORE_MS1"}.issubset(table_names), "score_ms2_exists": {"SCORE_MS2"}.issubset(table_names), @@ -1421,9 +1465,13 @@ def _export_split_by_run(self, conn, column_info: dict) -> None: f"{transition_query_run}\nUNION ALL\n{transition_query_null}" ) logger.info(f"Exporting transition data to {transition_path}") - self._execute_copy_query(conn, combined_transition_query, transition_path) + self._execute_copy_query( + conn, combined_transition_query, transition_path + ) else: - logger.info("Skipping transition data export (include_transition_data=False)") + logger.info( + "Skipping transition data export (include_transition_data=False)" + ) # Export alignment data if exists if column_info["feature_ms2_alignment_exists"]: @@ -1451,7 +1499,9 @@ def _export_combined(self, conn, column_info: dict) -> None: transition_query = self._build_transition_query(column_info) self._execute_copy_query(conn, transition_query, transition_path) else: - logger.info("Skipping transition data export (include_transition_data=False)") + logger.info( + "Skipping transition data export (include_transition_data=False)" + ) # Export alignment data if exists if column_info["feature_ms2_alignment_exists"]: @@ -1475,7 +1525,9 @@ def _export_single_file(self, conn, column_info: dict) -> None: transition_query = self._build_combined_transition_query(column_info) conn.execute(f"INSERT INTO temp_table {transition_query}") else: - logger.info("Skipping transition data export (include_transition_data=False)") + logger.info( + "Skipping transition data export (include_transition_data=False)" + ) # Export to parquet logger.info(f"Exporting combined data to {self.config.outfile}") @@ -1630,12 +1682,16 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: {score_table_joins} """ - def _build_transition_score_columns_and_join(self, column_info: dict) -> Tuple[str, str]: + def _build_transition_score_columns_and_join( + self, column_info: dict + ) -> Tuple[str, str]: """Build score columns and join clause for transition scores""" score_transition_cols = "" score_transition_join = "" if column_info.get("score_transition_exists", False): - logger.debug("SCORE_TRANSITION table exists, adding score columns to transition query") + logger.debug( + "SCORE_TRANSITION table exists, adding score columns to transition query" + ) score_cols = [ "SCORE_TRANSITION.SCORE AS SCORE_TRANSITION_SCORE", "SCORE_TRANSITION.RANK AS SCORE_TRANSITION_RANK", @@ -1665,7 +1721,9 @@ def _build_transition_query(self, column_info: dict) -> str: ) # Add transition score columns if they exist - score_transition_cols, score_transition_join = self._build_transition_score_columns_and_join(column_info) + score_transition_cols, score_transition_join = ( + self._build_transition_score_columns_and_join(column_info) + ) return f""" SELECT @@ -1885,7 +1943,9 @@ def _build_combined_transition_query(self, column_info: dict) -> str: ) # Add transition score columns if they exist - score_transition_cols, score_transition_join = self._build_transition_score_columns_and_join(column_info) + score_transition_cols, score_transition_join = ( + self._build_transition_score_columns_and_join(column_info) + ) # Also need to add NULL columns for score columns that appear in precursor query as_null_score_cols = "" @@ -1895,7 +1955,7 @@ def _build_combined_transition_query(self, column_info: dict) -> str: as_null_score_cols += ", NULL AS SCORE_MS2_SCORE, NULL AS SCORE_MS2_PEAK_GROUP_RANK, NULL AS SCORE_MS2_P_VALUE, NULL AS SCORE_MS2_Q_VALUE, NULL AS SCORE_MS2_PEP" if column_info.get("score_ipf_exists", False): as_null_score_cols += ", NULL AS SCORE_IPF_PRECURSOR_PEAKGROUP_PEP, NULL AS SCORE_IPF_PEP, NULL AS SCORE_IPF_QVALUE" - + # Add NULL columns for peptide and protein score contexts for table in ["peptide", "protein"]: if column_info.get(f"score_{table}_exists", False): @@ -1977,52 +2037,64 @@ def _create_temp_table(self, conn, column_info: dict) -> None: # Build score column types score_cols_types = [] if column_info.get("score_ms1_exists", False): - score_cols_types.extend([ - "SCORE_MS1_SCORE DOUBLE", - "SCORE_MS1_RANK INTEGER", - "SCORE_MS1_P_VALUE DOUBLE", - "SCORE_MS1_Q_VALUE DOUBLE", - "SCORE_MS1_PEP DOUBLE" - ]) + score_cols_types.extend( + [ + "SCORE_MS1_SCORE DOUBLE", + "SCORE_MS1_RANK INTEGER", + "SCORE_MS1_P_VALUE DOUBLE", + "SCORE_MS1_Q_VALUE DOUBLE", + "SCORE_MS1_PEP DOUBLE", + ] + ) if column_info.get("score_ms2_exists", False): - score_cols_types.extend([ - "SCORE_MS2_SCORE DOUBLE", - "SCORE_MS2_PEAK_GROUP_RANK INTEGER", - "SCORE_MS2_P_VALUE DOUBLE", - "SCORE_MS2_Q_VALUE DOUBLE", - "SCORE_MS2_PEP DOUBLE" - ]) + score_cols_types.extend( + [ + "SCORE_MS2_SCORE DOUBLE", + "SCORE_MS2_PEAK_GROUP_RANK INTEGER", + "SCORE_MS2_P_VALUE DOUBLE", + "SCORE_MS2_Q_VALUE DOUBLE", + "SCORE_MS2_PEP DOUBLE", + ] + ) if column_info.get("score_ipf_exists", False): - score_cols_types.extend([ - "SCORE_IPF_PRECURSOR_PEAKGROUP_PEP DOUBLE", - "SCORE_IPF_PEP DOUBLE", - "SCORE_IPF_QVALUE DOUBLE" - ]) - + score_cols_types.extend( + [ + "SCORE_IPF_PRECURSOR_PEAKGROUP_PEP DOUBLE", + "SCORE_IPF_PEP DOUBLE", + "SCORE_IPF_QVALUE DOUBLE", + ] + ) + # Add peptide and protein score columns for each context for table in ["peptide", "protein"]: if column_info.get(f"score_{table}_exists", False): for context in column_info.get(f"score_{table}_contexts", []): safe_context = context.upper().replace("-", "_") - score_cols_types.extend([ - f"SCORE_{table.upper()}_{safe_context}_SCORE DOUBLE", - f"SCORE_{table.upper()}_{safe_context}_P_VALUE DOUBLE", - f"SCORE_{table.upper()}_{safe_context}_Q_VALUE DOUBLE", - f"SCORE_{table.upper()}_{safe_context}_PEP DOUBLE" - ]) - + score_cols_types.extend( + [ + f"SCORE_{table.upper()}_{safe_context}_SCORE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_P_VALUE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_Q_VALUE DOUBLE", + f"SCORE_{table.upper()}_{safe_context}_PEP DOUBLE", + ] + ) + # Add transition score columns if column_info.get("score_transition_exists", False): - score_cols_types.extend([ - "SCORE_TRANSITION_SCORE DOUBLE", - "SCORE_TRANSITION_RANK INTEGER", - "SCORE_TRANSITION_P_VALUE DOUBLE", - "SCORE_TRANSITION_Q_VALUE DOUBLE", - "SCORE_TRANSITION_PEP DOUBLE" - ]) + score_cols_types.extend( + [ + "SCORE_TRANSITION_SCORE DOUBLE", + "SCORE_TRANSITION_RANK INTEGER", + "SCORE_TRANSITION_P_VALUE DOUBLE", + "SCORE_TRANSITION_Q_VALUE DOUBLE", + "SCORE_TRANSITION_PEP DOUBLE", + ] + ) # Prepend comma and space to score columns if there are any - score_cols_types_sql = (", " + ", ".join(score_cols_types)) if score_cols_types else "" + score_cols_types_sql = ( + (", " + ", ".join(score_cols_types)) if score_cols_types else "" + ) create_temp_table_query = f""" CREATE TABLE temp_table ( @@ -2082,7 +2154,7 @@ def _export_alignment_data(self, conn, path: str = None) -> None: # Check if SCORE_ALIGNMENT table exists with sqlite3.connect(self.config.infile) as sql_conn: has_score_alignment = check_sqlite_table(sql_conn, "SCORE_ALIGNMENT") - + if has_score_alignment: # Export with alignment scores query = f""" diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index a6fdb858..2d186d67 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -27,7 +27,7 @@ def __init__(self, config: ExportIOConfig): self._has_transition_scores = any( col.startswith("SCORE_TRANSITION_") for col in self._columns ) - + # Check for alignment file self._has_alignment = self._check_alignment_file_exists() @@ -77,12 +77,12 @@ def _check_alignment_file_exists(self) -> bool: Check if alignment parquet file exists. """ import os - + alignment_file = None - if self.infile.endswith('.parquet'): + if self.infile.endswith(".parquet"): base_name = self.infile[:-8] # Remove .parquet alignment_file = f"{base_name}_feature_alignment.parquet" - + if alignment_file and os.path.exists(alignment_file): logger.debug(f"Alignment file found: {alignment_file}") return True @@ -259,7 +259,7 @@ def _read_standard_data(self, con) -> pd.DataFrame: """ # Check if we should attempt alignment integration use_alignment = self.config.use_alignment and self._has_alignment - + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT @@ -288,52 +288,51 @@ def _read_standard_data(self, con) -> pd.DataFrame: SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, SCORE_MS2_SCORE AS d_score, SCORE_MS2_Q_VALUE AS m_score, - SCORE_MS2_PEP AS MS2_PEAKGROUP_PEP + SCORE_MS2_PEP AS pep FROM data WHERE PROTEIN_ID IS NOT NULL AND SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank """ data = con.execute(query).fetchdf() - + # If alignment is enabled and alignment data is present, fetch and merge aligned features if use_alignment: aligned_features = self._fetch_alignment_features(con) - + if not aligned_features.empty: # Get full feature data for aligned features that are NOT already in base results # We only want to add features that didn't pass MS2 threshold but have good alignment - aligned_ids = aligned_features['id'].unique() - existing_ids = data['id'].unique() - new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] - + aligned_ids = aligned_features["id"].unique() + existing_ids = data["id"].unique() + new_aligned_ids = [ + aid for aid in aligned_ids if aid not in existing_ids + ] + # First, merge alignment info into existing features (those that passed MS2) # Mark them with from_alignment=0 - if 'alignment_pep' in aligned_features.columns: + if "alignment_pep" in aligned_features.columns: # Build list of columns to merge - merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] - if 'alignment_group_id' in aligned_features.columns: - merge_cols.append('alignment_group_id') - if 'alignment_reference_feature_id' in aligned_features.columns: - merge_cols.append('alignment_reference_feature_id') - if 'alignment_reference_rt' in aligned_features.columns: - merge_cols.append('alignment_reference_rt') - + merge_cols = ["id", "alignment_pep", "alignment_qvalue"] + if "alignment_group_id" in aligned_features.columns: + merge_cols.append("alignment_group_id") + if "alignment_reference_feature_id" in aligned_features.columns: + merge_cols.append("alignment_reference_feature_id") + if "alignment_reference_rt" in aligned_features.columns: + merge_cols.append("alignment_reference_rt") + data = pd.merge( - data, - aligned_features[merge_cols], - on='id', - how='left' + data, aligned_features[merge_cols], on="id", how="left" ) - data['from_alignment'] = 0 - + data["from_alignment"] = 0 + # Now add features that didn't pass MS2 but have good alignment (recovered features) if new_aligned_ids: # Fetch full data for these new aligned features from the main data view # Register aligned IDs as a temp table for the query - aligned_ids_df = pd.DataFrame({'id': new_aligned_ids}) - con.register('aligned_ids_temp', aligned_ids_df) - + aligned_ids_df = pd.DataFrame({"id": new_aligned_ids}) + con.register("aligned_ids_temp", aligned_ids_df) + aligned_query = f""" SELECT RUN_ID AS id_run, @@ -366,30 +365,36 @@ def _read_standard_data(self, con) -> pd.DataFrame: AND FEATURE_ID IN (SELECT id FROM aligned_ids_temp) """ aligned_data = con.execute(aligned_query).fetchdf() - + # Merge alignment scores and reference info into the aligned data - if 'alignment_pep' in aligned_features.columns: + if "alignment_pep" in aligned_features.columns: aligned_data = pd.merge( aligned_data, aligned_features[merge_cols], - on='id', - how='left' + on="id", + how="left", ) - + # Mark as recovered through alignment - aligned_data['from_alignment'] = 1 - - logger.info(f"Adding {len(aligned_data)} features recovered through alignment") - + aligned_data["from_alignment"] = 1 + + logger.info( + f"Adding {len(aligned_data)} features recovered through alignment" + ) + # Combine with base data data = pd.concat([data, aligned_data], ignore_index=True) - + # Convert alignment_reference_feature_id to int64 to avoid scientific notation - if 'alignment_reference_feature_id' in data.columns: - data['alignment_reference_feature_id'] = data['alignment_reference_feature_id'].astype('Int64') - if 'alignment_group_id' in data.columns: - data['alignment_group_id'] = data['alignment_group_id'].astype('Int64') - + if "alignment_reference_feature_id" in data.columns: + data["alignment_reference_feature_id"] = data[ + "alignment_reference_feature_id" + ].astype("Int64") + if "alignment_group_id" in data.columns: + data["alignment_group_id"] = data["alignment_group_id"].astype( + "Int64" + ) + return data def _augment_data(self, data, con) -> pd.DataFrame: @@ -682,75 +687,86 @@ def _build_feature_vars_sql(self) -> str: def _fetch_alignment_features(self, con) -> pd.DataFrame: """ Fetch aligned features with good alignment scores from alignment parquet file. - + This method checks for an alignment parquet file and retrieves features that have been aligned across runs and pass the alignment quality threshold. Only features whose reference feature passes the MS2 QVALUE threshold are included. - + Args: con: DuckDB connection - + Returns: DataFrame with aligned feature IDs that pass quality threshold """ import os - + # Check for alignment file - it should be named with _feature_alignment.parquet suffix alignment_file = None - if self.infile.endswith('.parquet'): + if self.infile.endswith(".parquet"): base_name = self.infile[:-8] # Remove .parquet alignment_file = f"{base_name}_feature_alignment.parquet" - + if not alignment_file or not os.path.exists(alignment_file): - logger.debug("Alignment parquet file not found, skipping alignment integration") + logger.debug( + "Alignment parquet file not found, skipping alignment integration" + ) return pd.DataFrame() - + logger.debug(f"Loading alignment data from {alignment_file}") max_alignment_pep = self.config.max_alignment_pep max_rs_peakgroup_qvalue = self.config.max_rs_peakgroup_qvalue - + try: # Load alignment data alignment_df = pd.read_parquet(alignment_file) - + # Filter to target (non-decoy) features with good alignment scores # Note: DECOY column in parquet alignment file comes from LABEL in SQLite # where LABEL=1 (DECOY=1 in parquet) means target, not decoy - if 'DECOY' in alignment_df.columns and 'VAR_XCORR_SHAPE' in alignment_df.columns: + if ( + "DECOY" in alignment_df.columns + and "VAR_XCORR_SHAPE" in alignment_df.columns + ): # This looks like the feature_alignment table structure - + # Check if we have alignment scores (PEP/QVALUE) in the file # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features - has_alignment_scores = 'PEP' in alignment_df.columns or 'QVALUE' in alignment_df.columns - + has_alignment_scores = ( + "PEP" in alignment_df.columns or "QVALUE" in alignment_df.columns + ) + if has_alignment_scores: # Filter by alignment PEP threshold - pep_col = 'PEP' if 'PEP' in alignment_df.columns else None - qvalue_col = 'QVALUE' if 'QVALUE' in alignment_df.columns else None - + pep_col = "PEP" if "PEP" in alignment_df.columns else None + qvalue_col = "QVALUE" if "QVALUE" in alignment_df.columns else None + if pep_col: filtered_df = alignment_df[ - (alignment_df['DECOY'] == 1) & # DECOY=1 means target (from LABEL=1 in SQLite) - (alignment_df[pep_col] < max_alignment_pep) + ( + alignment_df["DECOY"] == 1 + ) # DECOY=1 means target (from LABEL=1 in SQLite) + & (alignment_df[pep_col] < max_alignment_pep) ].copy() else: # Use QVALUE if PEP not available (less ideal but workable) filtered_df = alignment_df[ - (alignment_df['DECOY'] == 1) & - (alignment_df[qvalue_col] < max_alignment_pep) + (alignment_df["DECOY"] == 1) + & (alignment_df[qvalue_col] < max_alignment_pep) ].copy() else: # No alignment scores in file - just filter by target status # In this case, we can't apply alignment quality threshold - logger.warning("Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality.") - filtered_df = alignment_df[alignment_df['DECOY'] == 1].copy() - + logger.warning( + "Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality." + ) + filtered_df = alignment_df[alignment_df["DECOY"] == 1].copy() + # Now filter by reference feature MS2 QVALUE # Need to join with main data to check reference feature QVALUE - if 'REFERENCE_FEATURE_ID' in filtered_df.columns: + if "REFERENCE_FEATURE_ID" in filtered_df.columns: # Register filtered alignment data for SQL query - con.register('filtered_alignment', filtered_df) - + con.register("filtered_alignment", filtered_df) + # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK ref_check_query = f""" @@ -768,41 +784,49 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: WHERE d.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} """ filtered_df = con.execute(ref_check_query).fetchdf() - + # Rename columns to match expected format - if 'FEATURE_ID' in filtered_df.columns: + if "FEATURE_ID" in filtered_df.columns: # Start with base columns - base_cols = ['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID'] - result = filtered_df[base_cols].rename( - columns={'FEATURE_ID': 'id'} - ) - + base_cols = ["FEATURE_ID", "PRECURSOR_ID", "RUN_ID"] + result = filtered_df[base_cols].rename(columns={"FEATURE_ID": "id"}) + # Add alignment group ID if available - if 'ALIGNMENT_GROUP_ID' in filtered_df.columns: - result['alignment_group_id'] = filtered_df['ALIGNMENT_GROUP_ID'].values - + if "ALIGNMENT_GROUP_ID" in filtered_df.columns: + result["alignment_group_id"] = filtered_df[ + "ALIGNMENT_GROUP_ID" + ].values + # Add reference feature ID and RT if available - if 'REFERENCE_FEATURE_ID' in filtered_df.columns: - result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].astype('Int64').values - if 'REFERENCE_RT' in filtered_df.columns: - result['alignment_reference_rt'] = filtered_df['REFERENCE_RT'].values - + if "REFERENCE_FEATURE_ID" in filtered_df.columns: + result["alignment_reference_feature_id"] = ( + filtered_df["REFERENCE_FEATURE_ID"].astype("Int64").values + ) + if "REFERENCE_RT" in filtered_df.columns: + result["alignment_reference_rt"] = filtered_df[ + "REFERENCE_RT" + ].values + # Add alignment scores if available - if 'PEP' in filtered_df.columns: - result['alignment_pep'] = filtered_df['PEP'].values - if 'QVALUE' in filtered_df.columns: - result['alignment_qvalue'] = filtered_df['QVALUE'].values - + if "PEP" in filtered_df.columns: + result["alignment_pep"] = filtered_df["PEP"].values + if "QVALUE" in filtered_df.columns: + result["alignment_qvalue"] = filtered_df["QVALUE"].values + # Convert alignment_group_id to int64 - if 'alignment_group_id' in result.columns: - result['alignment_group_id'] = result['alignment_group_id'].astype('Int64') - - logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + - f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") + if "alignment_group_id" in result.columns: + result["alignment_group_id"] = result[ + "alignment_group_id" + ].astype("Int64") + + logger.info( + f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" + ) return result except Exception as e: logger.warning(f"Could not load alignment data: {e}") - + return pd.DataFrame() ################################## @@ -850,11 +874,11 @@ def _read_for_export_scored_report(self, con) -> pd.DataFrame: def export_feature_scores(self, outfile: str, plot_callback): """ Export feature scores from Parquet file for plotting. - + Detects if SCORE columns exist and adjusts behavior: - If SCORE columns exist: applies RANK==1 filtering and plots SCORE + VAR_ columns - If SCORE columns don't exist: plots only VAR_ columns - + Parameters ---------- outfile : str @@ -866,28 +890,32 @@ def export_feature_scores(self, outfile: str, plot_callback): logger.info(f"Reading parquet file: {self.infile}") # Ensure pyarrow is available pa, _, _ = _ensure_pyarrow() - + # First, read only column names to identify what to load parquet_file = pa.parquet.ParquetFile(self.infile) all_columns = parquet_file.schema.names - + # Check for SCORE columns score_cols = [col for col in all_columns if col.startswith("SCORE_")] has_scores = len(score_cols) > 0 - + if has_scores: - logger.info("SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + logger.info( + "SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns" + ) else: logger.info("No SCORE columns detected - plotting only VAR_ columns") - + # Identify columns to read for each level ms1_cols = [col for col in all_columns if col.startswith("FEATURE_MS1_VAR_")] ms2_cols = [col for col in all_columns if col.startswith("FEATURE_MS2_VAR_")] - transition_cols = [col for col in all_columns if col.startswith("FEATURE_TRANSITION_VAR_")] - + transition_cols = [ + col for col in all_columns if col.startswith("FEATURE_TRANSITION_VAR_") + ] + # Determine which columns to read (only what we need) cols_to_read = set() - + # Add SCORE columns if they exist if has_scores: cols_to_read.update(score_cols) @@ -899,7 +927,7 @@ def export_feature_scores(self, outfile: str, plot_callback): cols_to_read.add("RUN_ID") if "PRECURSOR_ID" in all_columns: cols_to_read.add("PRECURSOR_ID") - + if ms1_cols and "PRECURSOR_DECOY" in all_columns: cols_to_read.update(ms1_cols) cols_to_read.add("PRECURSOR_DECOY") @@ -909,67 +937,75 @@ def export_feature_scores(self, outfile: str, plot_callback): if transition_cols and "TRANSITION_DECOY" in all_columns: cols_to_read.update(transition_cols) cols_to_read.add("TRANSITION_DECOY") - + if not cols_to_read: logger.warning("No VAR_ columns found in parquet file") return - + # Read only the columns we need logger.info(f"Reading {len(cols_to_read)} columns from parquet file") df = pd.read_parquet(self.infile, columns=list(cols_to_read)) - + # Apply RANK==1 filter if SCORE columns exist - if has_scores and 'SCORE_MS2_PEAK_GROUP_RANK' in df.columns: + if has_scores and "SCORE_MS2_PEAK_GROUP_RANK" in df.columns: logger.info(f"Filtering to RANK==1: {len(df)} -> ", end="") - df = df[df['SCORE_MS2_PEAK_GROUP_RANK'] == 1].copy() + df = df[df["SCORE_MS2_PEAK_GROUP_RANK"] == 1].copy() logger.info(f"{len(df)} rows") - + # Generate GROUP_ID if needed - if has_scores and 'GROUP_ID' not in df.columns: - if 'RUN_ID' in df.columns and 'PRECURSOR_ID' in df.columns: - df['GROUP_ID'] = df['RUN_ID'].astype(str) + '_' + df['PRECURSOR_ID'].astype(str) - + if has_scores and "GROUP_ID" not in df.columns: + if "RUN_ID" in df.columns and "PRECURSOR_ID" in df.columns: + df["GROUP_ID"] = ( + df["RUN_ID"].astype(str) + "_" + df["PRECURSOR_ID"].astype(str) + ) + # Process MS1 level if ms1_cols and "PRECURSOR_DECOY" in df.columns: logger.info("Processing MS1 level feature scores") select_cols = ms1_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms1_cols = [col for col in score_cols if 'MS1' in col.upper()] + score_ms1_cols = [col for col in score_cols if "MS1" in col.upper()] select_cols.extend(score_ms1_cols) - if 'GROUP_ID' in df.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df.columns: + select_cols.append("GROUP_ID") ms1_df = df[select_cols].copy() ms1_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) plot_callback(ms1_df, outfile, "ms1", append=False) del ms1_df # Free memory - + # Process MS2 level if ms2_cols and "PRECURSOR_DECOY" in df.columns: logger.info("Processing MS2 level feature scores") select_cols = ms2_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms2_cols = [col for col in score_cols if 'MS2' in col.upper() or 'MS1' not in col.upper()] + score_ms2_cols = [ + col + for col in score_cols + if "MS2" in col.upper() or "MS1" not in col.upper() + ] select_cols.extend(score_ms2_cols) - if 'GROUP_ID' in df.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df.columns: + select_cols.append("GROUP_ID") ms2_df = df[select_cols].copy() ms2_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) append = bool(ms1_cols) plot_callback(ms2_df, outfile, "ms2", append=append) del ms2_df # Free memory - + # Process transition level if transition_cols and "TRANSITION_DECOY" in df.columns: logger.info("Processing transition level feature scores") select_cols = transition_cols + ["TRANSITION_DECOY"] # Add SCORE columns if present if has_scores: - score_transition_cols = [col for col in score_cols if 'TRANSITION' in col.upper()] + score_transition_cols = [ + col for col in score_cols if "TRANSITION" in col.upper() + ] select_cols.extend(score_transition_cols) - if 'GROUP_ID' in df.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df.columns: + select_cols.append("GROUP_ID") transition_df = df[select_cols].copy() transition_df.rename(columns={"TRANSITION_DECOY": "DECOY"}, inplace=True) append = bool(ms1_cols or ms2_cols) diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index cafb525b..509b8633 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -42,7 +42,7 @@ def __init__(self, config: ExportIOConfig): self._has_transition_scores = any( col.startswith("SCORE_TRANSITION_") for col in self._columns ) - + # Check for alignment file self._has_alignment = self._check_alignment_file_exists() @@ -73,14 +73,16 @@ def read(self) -> pd.DataFrame: if self.config.export_format == "library": if self._is_unscored_file(): - descr= "Files must be scored for library generation." + descr = "Files must be scored for library generation." logger.exception(descr) raise ValueError(descr) if not self._has_peptide_protein_global_scores(): - descr= "Files must have peptide and protein level global scores for library generation." + descr = "Files must have peptide and protein level global scores for library generation." logger.exception(descr) raise ValueError(descr) - logger.info("Reading standard OpenSWATH data for library from split Parquet files.") + logger.info( + "Reading standard OpenSWATH data for library from split Parquet files." + ) return self._read_library_data(con) if self._is_unscored_file(): @@ -100,13 +102,17 @@ def read(self) -> pd.DataFrame: return self._augment_data(data, con) finally: con.close() - + def _has_peptide_protein_global_scores(self) -> bool: """ Check if files contain peptide and protein global scores """ - has_peptide = any(col.startswith("SCORE_PEPTIDE_GLOBAL") for col in self._columns) - has_protein = any(col.startswith("SCORE_PROTEIN_GLOBAL") for col in self._columns) + has_peptide = any( + col.startswith("SCORE_PEPTIDE_GLOBAL") for col in self._columns + ) + has_protein = any( + col.startswith("SCORE_PROTEIN_GLOBAL") for col in self._columns + ) return has_peptide and has_protein def _is_unscored_file(self) -> bool: @@ -118,18 +124,18 @@ def _is_unscored_file(self) -> bool: def _check_alignment_file_exists(self) -> bool: """ Check if alignment parquet file exists for split parquet format. - + For split parquet, alignment file is at the parent directory level: - infile is a directory containing *.oswpq subdirectories - alignment file is at infile/feature_alignment.parquet """ import os - + alignment_file = None if os.path.isdir(self.infile): # Split parquet format: alignment file is in the parent directory alignment_file = os.path.join(self.infile, "feature_alignment.parquet") - + if alignment_file and os.path.exists(alignment_file): logger.debug(f"Alignment file found: {alignment_file}") return True @@ -315,14 +321,16 @@ def _read_library_data(self, con) -> pd.DataFrame: im_col = "p.PRECURSOR_LIBRARY_DRIFT_TIME" if self.config.intensity_calibration: - intensity_col = 't.FEATURE_TRANSITION_AREA_INTENSITY' + intensity_col = "t.FEATURE_TRANSITION_AREA_INTENSITY" else: - intensity_col = 't.TRANSITION_LIBRARY_INTENSITY' - + intensity_col = "t.TRANSITION_LIBRARY_INTENSITY" + if self.config.keep_decoys: decoy_query = "" else: - decoy_query ="p.PRECURSOR_DECOY is false and t.TRANSITION_DECOY is false and" + decoy_query = ( + "p.PRECURSOR_DECOY is false and t.TRANSITION_DECOY is false and" + ) query = f""" SELECT @@ -359,14 +367,14 @@ def _read_library_data(self, con) -> pd.DataFrame: t.TRANSITION_CHARGE, t.TRANSITION_TYPE, t.TRANSITION_ORDINAL, t.TRANSITION_ID, p.PRECURSOR_DECOY, p.RUN_ID, p.FEATURE_MS2_AREA_INTENSITY """ return con.execute(query).fetchdf() - + def _read_standard_data(self, con) -> pd.DataFrame: """ Read standard OpenSWATH data without IPF from split files, optionally including aligned features. """ # Check if we should attempt alignment integration use_alignment = self.config.use_alignment and self._has_alignment - + # First, get features that pass MS2 QVALUE threshold query = f""" SELECT @@ -395,52 +403,51 @@ def _read_standard_data(self, con) -> pd.DataFrame: p.SCORE_MS2_PEAK_GROUP_RANK AS peak_group_rank, p.SCORE_MS2_SCORE AS d_score, p.SCORE_MS2_Q_VALUE AS m_score, - p.SCORE_MS2_PEP AS MS2_PEAKGROUP_PEP + p.SCORE_MS2_PEP AS pep FROM precursors p WHERE p.PROTEIN_ID IS NOT NULL AND p.SCORE_MS2_Q_VALUE < {self.config.max_rs_peakgroup_qvalue} ORDER BY transition_group_id, peak_group_rank """ data = con.execute(query).fetchdf() - + # If alignment is enabled and alignment data is present, fetch and merge aligned features if use_alignment: aligned_features = self._fetch_alignment_features(con) - + if not aligned_features.empty: # Get full feature data for aligned features that are NOT already in base results # We only want to add features that didn't pass MS2 threshold but have good alignment - aligned_ids = aligned_features['id'].unique() - existing_ids = data['id'].unique() - new_aligned_ids = [aid for aid in aligned_ids if aid not in existing_ids] - + aligned_ids = aligned_features["id"].unique() + existing_ids = data["id"].unique() + new_aligned_ids = [ + aid for aid in aligned_ids if aid not in existing_ids + ] + # First, merge alignment info into existing features (those that passed MS2) # Mark them with from_alignment=0 - if 'alignment_pep' in aligned_features.columns: + if "alignment_pep" in aligned_features.columns: # Build list of columns to merge - merge_cols = ['id', 'alignment_pep', 'alignment_qvalue'] - if 'alignment_group_id' in aligned_features.columns: - merge_cols.append('alignment_group_id') - if 'alignment_reference_feature_id' in aligned_features.columns: - merge_cols.append('alignment_reference_feature_id') - if 'alignment_reference_rt' in aligned_features.columns: - merge_cols.append('alignment_reference_rt') - + merge_cols = ["id", "alignment_pep", "alignment_qvalue"] + if "alignment_group_id" in aligned_features.columns: + merge_cols.append("alignment_group_id") + if "alignment_reference_feature_id" in aligned_features.columns: + merge_cols.append("alignment_reference_feature_id") + if "alignment_reference_rt" in aligned_features.columns: + merge_cols.append("alignment_reference_rt") + data = pd.merge( - data, - aligned_features[merge_cols], - on='id', - how='left' + data, aligned_features[merge_cols], on="id", how="left" ) - data['from_alignment'] = 0 - + data["from_alignment"] = 0 + # Now add features that didn't pass MS2 but have good alignment (recovered features) if new_aligned_ids: # Fetch full data for these new aligned features from the main data view # Register aligned IDs as a temp table for the query - aligned_ids_df = pd.DataFrame({'id': new_aligned_ids}) - con.register('aligned_ids_temp', aligned_ids_df) - + aligned_ids_df = pd.DataFrame({"id": new_aligned_ids}) + con.register("aligned_ids_temp", aligned_ids_df) + aligned_query = f""" SELECT p.RUN_ID AS id_run, @@ -473,30 +480,36 @@ def _read_standard_data(self, con) -> pd.DataFrame: AND p.FEATURE_ID IN (SELECT id FROM aligned_ids_temp) """ aligned_data = con.execute(aligned_query).fetchdf() - + # Merge alignment scores and reference info into the aligned data - if 'alignment_pep' in aligned_features.columns: + if "alignment_pep" in aligned_features.columns: aligned_data = pd.merge( aligned_data, aligned_features[merge_cols], - on='id', - how='left' + on="id", + how="left", ) - + # Mark as recovered through alignment - aligned_data['from_alignment'] = 1 - - logger.info(f"Adding {len(aligned_data)} features recovered through alignment") - + aligned_data["from_alignment"] = 1 + + logger.info( + f"Adding {len(aligned_data)} features recovered through alignment" + ) + # Combine with base data data = pd.concat([data, aligned_data], ignore_index=True) - + # Convert alignment_reference_feature_id to int64 to avoid scientific notation - if 'alignment_reference_feature_id' in data.columns: - data['alignment_reference_feature_id'] = data['alignment_reference_feature_id'].astype('Int64') - if 'alignment_group_id' in data.columns: - data['alignment_group_id'] = data['alignment_group_id'].astype('Int64') - + if "alignment_reference_feature_id" in data.columns: + data["alignment_reference_feature_id"] = data[ + "alignment_reference_feature_id" + ].astype("Int64") + if "alignment_group_id" in data.columns: + data["alignment_group_id"] = data["alignment_group_id"].astype( + "Int64" + ) + return data def _augment_data(self, data, con) -> pd.DataFrame: @@ -770,72 +783,83 @@ def _add_protein_error_data(self, data, con) -> pd.DataFrame: def _fetch_alignment_features(self, con) -> pd.DataFrame: """ Fetch aligned features with good alignment scores from alignment parquet file. - + This method checks for an alignment parquet file and retrieves features that have been aligned across runs and pass the alignment quality threshold. Only features whose reference feature passes the MS2 QVALUE threshold are included. - + Args: con: DuckDB connection - + Returns: DataFrame with aligned feature IDs that pass quality threshold """ import os - + # For split parquet, alignment file is at parent directory level alignment_file = os.path.join(self.infile, "feature_alignment.parquet") - + if not os.path.exists(alignment_file): - logger.debug("Alignment parquet file not found, skipping alignment integration") + logger.debug( + "Alignment parquet file not found, skipping alignment integration" + ) return pd.DataFrame() - + logger.debug(f"Loading alignment data from {alignment_file}") max_alignment_pep = self.config.max_alignment_pep max_rs_peakgroup_qvalue = self.config.max_rs_peakgroup_qvalue - + try: # Load alignment data alignment_df = pd.read_parquet(alignment_file) - + # Filter to target (non-decoy) features with good alignment scores # Note: DECOY column in parquet alignment file comes from LABEL in SQLite # where LABEL=1 (DECOY=1 in parquet) means target, not decoy - if 'DECOY' in alignment_df.columns and 'VAR_XCORR_SHAPE' in alignment_df.columns: + if ( + "DECOY" in alignment_df.columns + and "VAR_XCORR_SHAPE" in alignment_df.columns + ): # This looks like the feature_alignment table structure - + # Check if we have alignment scores (PEP/QVALUE) in the file # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features - has_alignment_scores = 'PEP' in alignment_df.columns or 'QVALUE' in alignment_df.columns - + has_alignment_scores = ( + "PEP" in alignment_df.columns or "QVALUE" in alignment_df.columns + ) + if has_alignment_scores: # Filter by alignment PEP threshold - pep_col = 'PEP' if 'PEP' in alignment_df.columns else None - qvalue_col = 'QVALUE' if 'QVALUE' in alignment_df.columns else None - + pep_col = "PEP" if "PEP" in alignment_df.columns else None + qvalue_col = "QVALUE" if "QVALUE" in alignment_df.columns else None + if pep_col: filtered_df = alignment_df[ - (alignment_df['DECOY'] == 1) & # DECOY=1 means target (from LABEL=1 in SQLite) - (alignment_df[pep_col] < max_alignment_pep) + ( + alignment_df["DECOY"] == 1 + ) # DECOY=1 means target (from LABEL=1 in SQLite) + & (alignment_df[pep_col] < max_alignment_pep) ].copy() else: # Use QVALUE if PEP not available (less ideal but workable) filtered_df = alignment_df[ - (alignment_df['DECOY'] == 1) & - (alignment_df[qvalue_col] < max_alignment_pep) + (alignment_df["DECOY"] == 1) + & (alignment_df[qvalue_col] < max_alignment_pep) ].copy() else: # No alignment scores in file - just filter by target status # In this case, we can't apply alignment quality threshold - logger.warning("Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality.") - filtered_df = alignment_df[alignment_df['DECOY'] == 1].copy() - + logger.warning( + "Alignment file found but no PEP/QVALUE scores present. Cannot filter by alignment quality." + ) + filtered_df = alignment_df[alignment_df["DECOY"] == 1].copy() + # Now filter by reference feature MS2 QVALUE # Need to join with precursors data to check reference feature QVALUE - if 'REFERENCE_FEATURE_ID' in filtered_df.columns: + if "REFERENCE_FEATURE_ID" in filtered_df.columns: # Register filtered alignment data for SQL query - con.register('filtered_alignment', filtered_df) - + con.register("filtered_alignment", filtered_df) + # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK ref_check_query = f""" @@ -853,41 +877,49 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: WHERE p.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} """ filtered_df = con.execute(ref_check_query).fetchdf() - + # Rename columns to match expected format - if 'FEATURE_ID' in filtered_df.columns: + if "FEATURE_ID" in filtered_df.columns: # Start with base columns - base_cols = ['FEATURE_ID', 'PRECURSOR_ID', 'RUN_ID'] - result = filtered_df[base_cols].rename( - columns={'FEATURE_ID': 'id'} - ) - + base_cols = ["FEATURE_ID", "PRECURSOR_ID", "RUN_ID"] + result = filtered_df[base_cols].rename(columns={"FEATURE_ID": "id"}) + # Add alignment group ID if available - if 'ALIGNMENT_GROUP_ID' in filtered_df.columns: - result['alignment_group_id'] = filtered_df['ALIGNMENT_GROUP_ID'].values - + if "ALIGNMENT_GROUP_ID" in filtered_df.columns: + result["alignment_group_id"] = filtered_df[ + "ALIGNMENT_GROUP_ID" + ].values + # Add reference feature ID and RT if available - if 'REFERENCE_FEATURE_ID' in filtered_df.columns: - result['alignment_reference_feature_id'] = filtered_df['REFERENCE_FEATURE_ID'].astype('Int64').values - if 'REFERENCE_RT' in filtered_df.columns: - result['alignment_reference_rt'] = filtered_df['REFERENCE_RT'].values - + if "REFERENCE_FEATURE_ID" in filtered_df.columns: + result["alignment_reference_feature_id"] = ( + filtered_df["REFERENCE_FEATURE_ID"].astype("Int64").values + ) + if "REFERENCE_RT" in filtered_df.columns: + result["alignment_reference_rt"] = filtered_df[ + "REFERENCE_RT" + ].values + # Add alignment scores if available - if 'PEP' in filtered_df.columns: - result['alignment_pep'] = filtered_df['PEP'].values - if 'QVALUE' in filtered_df.columns: - result['alignment_qvalue'] = filtered_df['QVALUE'].values - + if "PEP" in filtered_df.columns: + result["alignment_pep"] = filtered_df["PEP"].values + if "QVALUE" in filtered_df.columns: + result["alignment_qvalue"] = filtered_df["QVALUE"].values + # Convert alignment_group_id to int64 - if 'alignment_group_id' in result.columns: - result['alignment_group_id'] = result['alignment_group_id'].astype('Int64') - - logger.info(f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + - f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}") + if "alignment_group_id" in result.columns: + result["alignment_group_id"] = result[ + "alignment_group_id" + ].astype("Int64") + + logger.info( + f"Found {len(result)} aligned features passing alignment PEP < {max_alignment_pep} " + + f"with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" + ) return result except Exception as e: logger.warning(f"Could not load alignment data: {e}") - + return pd.DataFrame() def _get_ms1_score_info(self) -> tuple[str, str]: @@ -916,11 +948,11 @@ def _build_feature_vars_sql(self) -> str: def export_feature_scores(self, outfile: str, plot_callback): """ Export feature scores from split Parquet directory for plotting. - + Detects if SCORE columns exist and adjusts behavior: - If SCORE columns exist: applies RANK==1 filtering and plots SCORE + VAR_ columns - If SCORE columns don't exist: plots only VAR_ columns - + Parameters ---------- outfile : str @@ -931,30 +963,32 @@ def export_feature_scores(self, outfile: str, plot_callback): """ # Ensure pyarrow is available pa, _, _ = _ensure_pyarrow() - + # Read precursor features - only necessary columns precursor_file = os.path.join(self.infile, "precursors_features.parquet") logger.info(f"Reading precursor features from: {precursor_file}") - + # First check what columns are available precursor_parquet = pa.parquet.ParquetFile(precursor_file) all_columns = precursor_parquet.schema.names - + # Check for SCORE columns score_cols = [col for col in all_columns if col.startswith("SCORE_")] has_scores = len(score_cols) > 0 - + if has_scores: - logger.info("SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns") + logger.info( + "SCORE columns detected - applying RANK==1 filter and plotting SCORE + VAR_ columns" + ) else: logger.info("No SCORE columns detected - plotting only VAR_ columns") - + # Identify columns to read ms1_cols = [col for col in all_columns if col.startswith("FEATURE_MS1_VAR_")] ms2_cols = [col for col in all_columns if col.startswith("FEATURE_MS2_VAR_")] - + cols_to_read = set() - + # Add SCORE columns if they exist if has_scores: cols_to_read.update(score_cols) @@ -966,113 +1000,144 @@ def export_feature_scores(self, outfile: str, plot_callback): cols_to_read.add("RUN_ID") if "PRECURSOR_ID" in all_columns: cols_to_read.add("PRECURSOR_ID") - + if ms1_cols and "PRECURSOR_DECOY" in all_columns: cols_to_read.update(ms1_cols) cols_to_read.add("PRECURSOR_DECOY") if ms2_cols and "PRECURSOR_DECOY" in all_columns: cols_to_read.update(ms2_cols) cols_to_read.add("PRECURSOR_DECOY") - + if cols_to_read: logger.info(f"Reading {len(cols_to_read)} columns from precursor features") df_precursor = pd.read_parquet(precursor_file, columns=list(cols_to_read)) - + # Apply RANK==1 filter if SCORE columns exist - if has_scores and 'SCORE_MS2_PEAK_GROUP_RANK' in df_precursor.columns: + if has_scores and "SCORE_MS2_PEAK_GROUP_RANK" in df_precursor.columns: logger.info(f"Filtering to RANK==1: {len(df_precursor)} -> ", end="") - df_precursor = df_precursor[df_precursor['SCORE_MS2_PEAK_GROUP_RANK'] == 1].copy() + df_precursor = df_precursor[ + df_precursor["SCORE_MS2_PEAK_GROUP_RANK"] == 1 + ].copy() logger.info(f"{len(df_precursor)} rows") - + # Generate GROUP_ID if needed - if has_scores and 'GROUP_ID' not in df_precursor.columns: - if 'RUN_ID' in df_precursor.columns and 'PRECURSOR_ID' in df_precursor.columns: - df_precursor['GROUP_ID'] = df_precursor['RUN_ID'].astype(str) + '_' + df_precursor['PRECURSOR_ID'].astype(str) - + if has_scores and "GROUP_ID" not in df_precursor.columns: + if ( + "RUN_ID" in df_precursor.columns + and "PRECURSOR_ID" in df_precursor.columns + ): + df_precursor["GROUP_ID"] = ( + df_precursor["RUN_ID"].astype(str) + + "_" + + df_precursor["PRECURSOR_ID"].astype(str) + ) + # Process MS1 level if ms1_cols and "PRECURSOR_DECOY" in df_precursor.columns: logger.info("Processing MS1 level feature scores") select_cols = ms1_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms1_cols = [col for col in score_cols if 'MS1' in col.upper()] + score_ms1_cols = [col for col in score_cols if "MS1" in col.upper()] select_cols.extend(score_ms1_cols) - if 'GROUP_ID' in df_precursor.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df_precursor.columns: + select_cols.append("GROUP_ID") ms1_df = df_precursor[select_cols].copy() ms1_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) plot_callback(ms1_df, outfile, "ms1", append=False) del ms1_df # Free memory - + # Process MS2 level if ms2_cols and "PRECURSOR_DECOY" in df_precursor.columns: logger.info("Processing MS2 level feature scores") select_cols = ms2_cols + ["PRECURSOR_DECOY"] # Add SCORE columns if present if has_scores: - score_ms2_cols = [col for col in score_cols if 'MS2' in col.upper() or 'MS1' not in col.upper()] + score_ms2_cols = [ + col + for col in score_cols + if "MS2" in col.upper() or "MS1" not in col.upper() + ] select_cols.extend(score_ms2_cols) - if 'GROUP_ID' in df_precursor.columns: - select_cols.append('GROUP_ID') + if "GROUP_ID" in df_precursor.columns: + select_cols.append("GROUP_ID") ms2_df = df_precursor[select_cols].copy() ms2_df.rename(columns={"PRECURSOR_DECOY": "DECOY"}, inplace=True) append = bool(ms1_cols) plot_callback(ms2_df, outfile, "ms2", append=append) del ms2_df # Free memory - + del df_precursor # Free memory - + # Read transition features if available transition_file = os.path.join(self.infile, "transition_features.parquet") if os.path.exists(transition_file): logger.info(f"Reading transition features from: {transition_file}") - + # Check what columns are available transition_parquet = pa.parquet.ParquetFile(transition_file) transition_all_columns = transition_parquet.schema.names - transition_cols = [col for col in transition_all_columns if col.startswith("FEATURE_TRANSITION_VAR_")] - + transition_cols = [ + col + for col in transition_all_columns + if col.startswith("FEATURE_TRANSITION_VAR_") + ] + # Check for SCORE columns in transition file - transition_score_cols = [col for col in transition_all_columns if col.startswith("SCORE_") and 'TRANSITION' in col.upper()] + transition_score_cols = [ + col + for col in transition_all_columns + if col.startswith("SCORE_") and "TRANSITION" in col.upper() + ] has_transition_scores = len(transition_score_cols) > 0 - + if transition_cols and "TRANSITION_DECOY" in transition_all_columns: # Read only necessary columns cols_to_read = transition_cols + ["TRANSITION_DECOY"] if has_transition_scores: cols_to_read.extend(transition_score_cols) - if 'GROUP_ID' in transition_all_columns: - cols_to_read.append('GROUP_ID') - - logger.info(f"Reading {len(cols_to_read)} columns from transition features") + if "GROUP_ID" in transition_all_columns: + cols_to_read.append("GROUP_ID") + + logger.info( + f"Reading {len(cols_to_read)} columns from transition features" + ) df_transition = pd.read_parquet(transition_file, columns=cols_to_read) - + logger.info("Processing transition level feature scores") transition_df = df_transition.copy() - transition_df.rename(columns={"TRANSITION_DECOY": "DECOY"}, inplace=True) + transition_df.rename( + columns={"TRANSITION_DECOY": "DECOY"}, inplace=True + ) append = bool(ms1_cols or ms2_cols) plot_callback(transition_df, outfile, "transition", append=append) del transition_df, df_transition # Free memory - + # Read alignment features if available alignment_file = os.path.join(self.infile, "feature_alignment.parquet") if os.path.exists(alignment_file): logger.info(f"Reading alignment features from: {alignment_file}") - + # Check what columns are available alignment_parquet = pa.parquet.ParquetFile(alignment_file) alignment_all_columns = alignment_parquet.schema.names var_cols = [col for col in alignment_all_columns if col.startswith("VAR_")] - + if var_cols and "DECOY" in alignment_all_columns: # Read only necessary columns cols_to_read = var_cols + ["DECOY"] - logger.info(f"Reading {len(cols_to_read)} columns from alignment features") + logger.info( + f"Reading {len(cols_to_read)} columns from alignment features" + ) df_alignment = pd.read_parquet(alignment_file, columns=cols_to_read) - + logger.info("Processing alignment level feature scores") alignment_df = df_alignment[var_cols + ["DECOY"]].copy() - append = bool(ms1_cols or ms2_cols or (os.path.exists(transition_file) and transition_cols)) + append = bool( + ms1_cols + or ms2_cols + or (os.path.exists(transition_file) and transition_cols) + ) plot_callback(alignment_df, outfile, "alignment", append=append) del alignment_df, df_alignment # Free memory diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out index 54313281..c59ca7cd 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[False-disable].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out index 87ebae17..8f4829a2 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_ipf_analysis[True-disable].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1343_b4_1;1359_y3_1;1344_b5_1;1360_y4_1;1345_b... 969.0;36907.0;1426.0;6131.0;2071.0;11984.0;559... 10322.0;251772.0;9915.0;43365.0;15040.0;80527.... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR NaN NaN NaN 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR NaN NaN NaN 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR NaN NaN NaN 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR NaN NaN NaN 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1803_b3_1;1813_y3_1;1821_y8_2;1804_b4_1;1814_y... 20367.0;19321.0;4323.0;3974.0;17424.0;5191.0;1... 89094.0;85016.0;20487.0;17689.0;74968.0;25322.... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR NaN NaN NaN 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR NaN NaN NaN 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR NaN NaN NaN 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR NaN NaN NaN 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1343_b4_1;1359_y3_1;1344_b5_1;1360_y4_1;1345_b... 969.0;36907.0;1426.0;6131.0;2071.0;11984.0;559... 10322.0;251772.0;9915.0;43365.0;15040.0;80527.... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR NaN NaN NaN 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR NaN NaN NaN 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR NaN NaN NaN 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR NaN NaN NaN 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1803_b3_1;1813_y3_1;1821_y8_2;1804_b4_1;1814_y... 20367.0;19321.0;4323.0;3974.0;17424.0;5191.0;1... 89094.0;85016.0;20487.0;17689.0;74968.0;25322.... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR NaN NaN NaN 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR NaN NaN NaN 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR NaN NaN NaN 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR NaN NaN NaN 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out index 54313281..c59ca7cd 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out index 3c532e11..9d5ced62 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-False-True].out @@ -1,3 +1,3 @@ Empty DataFrame -Columns: [Charge, FullPeptideName, Intensity, ProteinName, RT, Sequence, aggr_prec_Peak_Apex, aggr_prec_Peak_Area, assay_iRT, assay_rt, d_score, decoy, delta_iRT, delta_rt, filename, iRT, id, leftWidth, m_score, m_score_protein_experiment_wide, m_score_protein_global, m_score_protein_run_specific, mz, peak_group_rank, rightWidth, run_id, transition_group_id] +Columns: [Charge, FullPeptideName, Intensity, ProteinName, RT, Sequence, aggr_prec_Peak_Apex, aggr_prec_Peak_Area, assay_iRT, assay_rt, d_score, decoy, delta_iRT, delta_rt, filename, iRT, id, leftWidth, m_score, m_score_protein_experiment_wide, m_score_protein_global, m_score_protein_run_specific, mz, peak_group_rank, pep, rightWidth, run_id, transition_group_id] Index: [] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out index b7ec4618..b3014b40 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-False-True-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.00 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.00 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.20 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.30 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.00 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 YTSDPDVTSVGPSK(UniMod:259) 230755.0 AQUA4SWATH_HMLangeA 1802.63 YTSDPDVTSVGPSK 194786.2681 924585.00 1.5 1736.8945 5.6930 0 2.0327 65.7355 napedro_L120420_010_SW.mzXML.gz 3.5327 6243564357659176748 1787.0300 0.0033 0.0033 0.0033 0.0033 730.8534 1 1821.1700 -8670811102654834151 20 -96 2 YTSDPDVTSVGPSK(UniMod:259) 4683.0 AQUA4SWATH_HMLangeA 1727.20 YTSDPDVTSVGPSK 2721.4443 21561.90 1.5 1736.8978 3.6145 0 -0.1546 -9.6978 napedro_L120420_010_SW.mzXML.gz 1.3454 8621961886436053858 1711.9200 0.0091 0.0033 0.0033 0.0033 730.8534 2 1749.4700 -8670811102654834151 20 -97 2 YTSDPDVTSVGPSK(UniMod:259) 2042.0 AQUA4SWATH_HMLangeA 1846.77 YTSDPDVTSVGPSK 670.7683 6339.12 1.5 1736.8956 0.4186 0 3.3126 109.8744 napedro_L120420_010_SW.mzXML.gz 4.8126 -5596989166542619604 1834.8199 0.2507 0.0033 0.0033 0.0033 730.8534 3 1858.7200 -8670811102654834151 20 -98 2 YTSDPDVTSVGPSK(UniMod:259) 1801.0 AQUA4SWATH_HMLangeA 1657.80 YTSDPDVTSVGPSK 1076.2317 4819.60 1.5 1736.8914 0.2832 0 -2.1667 -79.0914 napedro_L120420_010_SW.mzXML.gz -0.6667 -2650714328790198942 1653.8800 0.4000 0.0033 0.0033 0.0033 730.8534 4 1681.2000 -8670811102654834151 20 -99 2 YTSDPDVTSVGPSK(UniMod:259) 32774.0 AQUA4SWATH_HMLangeA 1930.69 YTSDPDVTSVGPSK 76935.7696 571655.00 1.5 1736.8936 -5.7742 0 5.7460 193.7964 napedro_L120420_010_SW.mzXML.gz 7.2460 9040480247797844482 1906.5100 0.4692 0.0033 0.0033 0.0033 730.8534 5 1971.3800 -8670811102654834151 20 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 117220.7482 854645.00 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 8790.7812 104006.00 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 5750.4716 73215.20 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 4036.5600 25862.30 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 10419.7435 241873.00 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 YTSDPDVTSVGPSK(UniMod:259) 230755.0 AQUA4SWATH_HMLangeA 1802.63 YTSDPDVTSVGPSK 194786.2681 924585.00 1.5 1736.8945 5.6930 0 2.0327 65.7355 napedro_L120420_010_SW.mzXML.gz 3.5327 6243564357659176748 1787.0300 0.0033 0.0033 0.0033 0.0033 730.8534 1 0.0031 1821.1700 -8670811102654834151 20 +96 2 YTSDPDVTSVGPSK(UniMod:259) 4683.0 AQUA4SWATH_HMLangeA 1727.20 YTSDPDVTSVGPSK 2721.4443 21561.90 1.5 1736.8978 3.6145 0 -0.1546 -9.6978 napedro_L120420_010_SW.mzXML.gz 1.3454 8621961886436053858 1711.9200 0.0091 0.0033 0.0033 0.0033 730.8534 2 0.1143 1749.4700 -8670811102654834151 20 +97 2 YTSDPDVTSVGPSK(UniMod:259) 2042.0 AQUA4SWATH_HMLangeA 1846.77 YTSDPDVTSVGPSK 670.7683 6339.12 1.5 1736.8956 0.4186 0 3.3126 109.8744 napedro_L120420_010_SW.mzXML.gz 4.8126 -5596989166542619604 1834.8199 0.2507 0.0033 0.0033 0.0033 730.8534 3 1.0000 1858.7200 -8670811102654834151 20 +98 2 YTSDPDVTSVGPSK(UniMod:259) 1801.0 AQUA4SWATH_HMLangeA 1657.80 YTSDPDVTSVGPSK 1076.2317 4819.60 1.5 1736.8914 0.2832 0 -2.1667 -79.0914 napedro_L120420_010_SW.mzXML.gz -0.6667 -2650714328790198942 1653.8800 0.4000 0.0033 0.0033 0.0033 730.8534 4 1.0000 1681.2000 -8670811102654834151 20 +99 2 YTSDPDVTSVGPSK(UniMod:259) 32774.0 AQUA4SWATH_HMLangeA 1930.69 YTSDPDVTSVGPSK 76935.7696 571655.00 1.5 1736.8936 -5.7742 0 5.7460 193.7964 napedro_L120420_010_SW.mzXML.gz 7.2460 9040480247797844482 1906.5100 0.4692 0.0033 0.0033 0.0033 730.8534 5 1.0000 1971.3800 -8670811102654834151 20 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out index 90381cbe..fa31d5cb 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[osw-True-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out index b77b45eb..f117c36d 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out index 85769b21..2520c48f 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-False-True].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 26 columns] +[100 rows x 27 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out index e74a9910..20406b3f 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-False-True-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out index 6a1fed75..f8f9d27e 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[parquet-True-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7500 854645.0 26.5 2595.5788 5.7301 False 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1.0 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7810 104006.0 26.5 2595.5733 1.2404 False 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2.0 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4717 73215.2 26.5 2595.5750 0.8151 False 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3.0 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 False 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4.0 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7430 241873.0 26.5 2595.5778 -0.1013 False 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5.0 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5600 1192530.0 16.3 2245.2318 5.6532 False -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1.0 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7600 89588.0 16.3 2245.2345 0.3846 False 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2.0 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0810 85676.6 16.3 2245.2373 0.3343 False -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3.0 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8610 75465.1 16.3 2245.2345 -0.0155 False 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4.0 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2700 56553.9 16.3 2245.2307 -1.8782 False 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5.0 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out index 6a3c0911..7540ce7c 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 24 columns] +[100 rows x 25 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out index dffb2b54..13284eb7 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-False-True].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_protein_experiment_wide m_score_protein_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0625 0.0625 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0625 0.0625 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0625 0.0625 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0625 0.0625 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0625 0.0625 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0625 0.0625 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0625 0.0625 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0625 0.0625 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0625 0.0625 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0625 0.0625 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 26 columns] +[100 rows x 27 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out index fe1be09e..e81a8b32 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-False-True-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score m_score_peptide_experiment_wide m_score_peptide_global m_score_peptide_run_specific mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 0.0033 0.0033 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 0.0033 0.0033 0.0033 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 0.0033 0.0033 0.0033 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 0.0033 0.0033 0.0033 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 0.0033 0.0033 0.0033 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 0.0033 0.0033 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 0.0033 0.0033 0.0033 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 0.0033 0.0033 0.0033 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 0.0033 0.0033 0.0033 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 0.0033 0.0033 0.0033 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out index d2010153..7d752c0c 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_osw_analysis[split_parquet-True-False-False].out @@ -1,14 +1,14 @@ - Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank rightWidth run_id transition_group_id -0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 2705.3701 -8670811102654834151 0 -1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 2623.4399 -8670811102654834151 0 -2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 2736.0901 -8670811102654834151 0 -3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 2811.2000 -8670811102654834151 0 -4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 2855.5801 -8670811102654834151 0 -.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... -95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 2247.3999 -8670811102654834151 19 -96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 2332.7400 -8670811102654834151 19 -97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 2141.5701 -8670811102654834151 19 -98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 2298.6101 -8670811102654834151 19 -99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 2544.4099 -8670811102654834151 19 + Charge FullPeptideName Intensity ProteinName RT Sequence aggr_Fragment_Annotation aggr_Peak_Apex aggr_Peak_Area aggr_prec_Peak_Apex aggr_prec_Peak_Area assay_iRT assay_rt d_score decoy delta_iRT delta_rt filename iRT id leftWidth m_score mz peak_group_rank pep rightWidth run_id transition_group_id +0 2 ADSTGTLVITDPTR(UniMod:267) 207283.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2661.55 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 41.0;787.0;969.0;36907.0;479.0;60.0;1426.0;30.... 223.0;12510.0;10322.0;251772.0;5557.0;500.0;99... 117220.7482 854645.0 26.5 2595.5788 5.7301 0 1.9379 65.9712 napedro_L120420_010_SW.mzXML.gz 28.4379 -4409520928686189639 2640.5100 0.0033 728.8795 1 0.0031 2705.3701 -8670811102654834151 0 +1 2 ADSTGTLVITDPTR(UniMod:267) 6385.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2605.74 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;520.0;181.0;1338.0;294.0;20.0;232.0;10.0;1... 150.0;13418.0;3123.0;9325.0;2329.0;100.0;4328.... 8790.7812 104006.0 26.5 2595.5733 1.2404 0 0.3198 10.1667 napedro_L120420_010_SW.mzXML.gz 26.8198 260819276075322832 2575.6399 0.0685 728.8795 2 1.0000 2623.4399 -8670811102654834151 0 +2 2 ADSTGTLVITDPTR(UniMod:267) 3838.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2708.53 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;541.0;294.0;1060.0;90.0;0.0;190.0;30.0;429... 50.0;5415.0;1984.0;6992.0;1344.0;60.0;1748.0;1... 5750.4716 73215.2 26.5 2595.5750 0.8151 0 3.3002 112.9550 napedro_L120420_010_SW.mzXML.gz 29.8002 8534214264242363560 2705.3701 0.2018 728.8795 3 1.0000 2736.0901 -8670811102654834151 0 +3 2 ADSTGTLVITDPTR(UniMod:267) 2693.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2795.06 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;702.0;334.0;386.0;241.0;10.0;212.0;20.0;29... 10.0;4454.0;1675.0;2411.0;1323.0;60.0;1121.0;1... 4036.5600 25862.3 26.5 2595.5754 0.6777 0 5.8092 199.4846 napedro_L120420_010_SW.mzXML.gz 32.3092 6932937885234622359 2790.7200 0.2018 728.8795 4 1.0000 2811.2000 -8670811102654834151 0 +4 2 ADSTGTLVITDPTR(UniMod:267) 5180.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2832.77 ADSTGTLVITDPTR 1363_y6_2;1348_b8_2;1343_b4_1;1359_y3_1;1365_y... 0.0;592.0;397.0;743.0;427.0;20.0;222.0;0.0;348... 30.0;10869.0;5111.0;8116.0;33319.0;271.0;6193.... 10419.7435 241873.0 26.5 2595.5778 -0.1013 0 6.9026 237.1922 napedro_L120420_010_SW.mzXML.gz 33.4026 5163914660633416481 2811.2000 0.4692 728.8795 5 1.0000 2855.5801 -8670811102654834151 0 +.. ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... +95 2 VYVYAVDQTR(UniMod:267) 597887.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2230.18 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 439.0;20367.0;40.0;19321.0;968.0;120.0;4323.0;... 3589.0;89094.0;571.0;85016.0;4040.0;965.0;2048... 269150.5777 1192530.0 16.3 2245.2318 5.6532 0 -0.3700 -15.0518 napedro_L120420_010_SW.mzXML.gz 15.9300 6870255268859409918 2213.2600 0.0033 612.3184 1 0.0031 2247.3999 -8670811102654834151 19 +96 2 VYVYAVDQTR(UniMod:267) 16553.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2317.38 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 224.0;847.0;10.0;582.0;30.0;50.0;295.0;531.0;5... 2071.0;5859.0;141.0;4805.0;321.0;503.0;2775.0;... 9656.7598 89588.0 16.3 2245.2345 0.3846 0 2.1584 72.1455 napedro_L120420_010_SW.mzXML.gz 18.4584 6262215160571261022 2302.0200 0.2507 612.3184 2 1.0000 2332.7400 -8670811102654834151 19 +97 2 VYVYAVDQTR(UniMod:267) 20746.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2120.97 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 283.0;913.0;50.0;1597.0;60.0;50.0;643.0;937.0;... 4611.0;12157.0;330.0;11363.0;594.0;1098.0;5460... 10568.0806 85676.6 16.3 2245.2373 0.3343 0 -3.5368 -124.2673 napedro_L120420_010_SW.mzXML.gz 12.7632 -610141049182829192 2100.6001 0.4000 612.3184 3 1.0000 2141.5701 -8670811102654834151 19 +98 2 VYVYAVDQTR(UniMod:267) 48058.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2291.53 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 397.0;263.0;50.0;520.0;162.0;80.0;441.0;296.0;... 4032.0;2661.0;580.0;6989.0;1362.0;732.0;6854.0... 6480.8607 75465.1 16.3 2245.2345 -0.0155 0 1.4088 46.2955 napedro_L120420_010_SW.mzXML.gz 17.7088 2043199813358518344 2267.8799 0.4692 612.3184 4 1.0000 2298.6101 -8670811102654834151 19 +99 2 VYVYAVDQTR(UniMod:267) 10959.0 AQUA4SWATH_HMLangeA;AQUA4SWATH_HMLangeA;AQUA4S... 2538.15 VYVYAVDQTR 1817_y6_2;1803_b3_1;1808_b7_2;1813_y3_1;1819_y... 306.0;233.0;30.0;510.0;40.0;213.0;406.0;562.0;... 1334.0;1410.0;140.0;2288.0;140.0;1194.0;1952.0... 10810.2698 56553.9 16.3 2245.2307 -1.8782 0 8.5599 292.9193 napedro_L120420_010_SW.mzXML.gz 24.8599 -5430403952310232561 2520.5100 0.4692 612.3184 5 1.0000 2544.4099 -8670811102654834151 19 -[100 rows x 27 columns] +[100 rows x 28 columns] From 48f603b7b3034b5fc457dd767407f3b0904e9541 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 22:48:20 +0000 Subject: [PATCH 18/30] Initial plan From 3558e6ef81d22db04a9ea3dc010e88ee4a59be66 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 22:57:16 +0000 Subject: [PATCH 19/30] Fix alignment reference feature ID precision and assign alignment_group_id to reference features - Cast REFERENCE_FEATURE_ID to BIGINT/INTEGER in SQL queries to prevent precision loss - Add logic to assign alignment_group_id to reference features - Applied fixes to split_parquet.py, parquet.py, and osw.py export modules Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 35 +++++++++++++++++++++++++-- pyprophet/io/export/parquet.py | 36 ++++++++++++++++++++++++++-- pyprophet/io/export/split_parquet.py | 36 ++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 6 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 6472551a..1974378e 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -479,6 +479,37 @@ def _read_standard_data(self, con, cfg): "Int64" ) + # Assign alignment_group_id to reference features + # Create a mapping from reference feature IDs to their alignment_group_ids + if "alignment_reference_feature_id" in data.columns and "alignment_group_id" in data.columns: + # Get all reference feature IDs and their corresponding alignment_group_ids + ref_mapping = data[ + data["alignment_reference_feature_id"].notna() + ][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() + + # For each reference feature ID, we need to assign the alignment_group_id + # to the feature row where id == alignment_reference_feature_id + if not ref_mapping.empty: + # Merge the alignment_group_id for reference features + # First create a DataFrame mapping id -> alignment_group_id for references + ref_group_mapping = ref_mapping.rename( + columns={"alignment_reference_feature_id": "id", "alignment_group_id": "ref_alignment_group_id"} + ) + + # Merge this mapping to assign alignment_group_id to reference features + data = pd.merge(data, ref_group_mapping, on="id", how="left") + + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) + mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() + data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] + + # Drop the temporary column + data = data.drop(columns=["ref_alignment_group_id"]) + + logger.debug( + f"Assigned alignment_group_id to {mask.sum()} reference features" + ) + return data def _augment_data(self, data, con, cfg): @@ -784,7 +815,7 @@ def _fetch_alignment_features(self, con, cfg): FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID AS id, FEATURE_MS2_ALIGNMENT.PRECURSOR_ID AS transition_group_id, FEATURE_MS2_ALIGNMENT.RUN_ID AS run_id, - FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS alignment_reference_feature_id, + CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) AS alignment_reference_feature_id, FEATURE_MS2_ALIGNMENT.REFERENCE_RT AS alignment_reference_rt, SCORE_ALIGNMENT.PEP AS alignment_pep, SCORE_ALIGNMENT.QVALUE AS alignment_qvalue @@ -801,7 +832,7 @@ def _fetch_alignment_features(self, con, cfg): SELECT FEATURE_ID, QVALUE FROM SCORE_MS2 ) AS REF_SCORE_MS2 - ON REF_SCORE_MS2.FEATURE_ID = FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID + ON CAST(REF_SCORE_MS2.FEATURE_ID AS INTEGER) = CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) WHERE FEATURE_MS2_ALIGNMENT.LABEL = 1 AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} AND REF_SCORE_MS2.QVALUE < {max_rs_peakgroup_qvalue} diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 2d186d67..57dbb0a9 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -395,6 +395,37 @@ def _read_standard_data(self, con) -> pd.DataFrame: "Int64" ) + # Assign alignment_group_id to reference features + # Create a mapping from reference feature IDs to their alignment_group_ids + if "alignment_reference_feature_id" in data.columns and "alignment_group_id" in data.columns: + # Get all reference feature IDs and their corresponding alignment_group_ids + ref_mapping = data[ + data["alignment_reference_feature_id"].notna() + ][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() + + # For each reference feature ID, we need to assign the alignment_group_id + # to the feature row where id == alignment_reference_feature_id + if not ref_mapping.empty: + # Merge the alignment_group_id for reference features + # First create a DataFrame mapping id -> alignment_group_id for references + ref_group_mapping = ref_mapping.rename( + columns={"alignment_reference_feature_id": "id", "alignment_group_id": "ref_alignment_group_id"} + ) + + # Merge this mapping to assign alignment_group_id to reference features + data = pd.merge(data, ref_group_mapping, on="id", how="left") + + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) + mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() + data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] + + # Drop the temporary column + data = data.drop(columns=["ref_alignment_group_id"]) + + logger.debug( + f"Assigned alignment_group_id to {mask.sum()} reference features" + ) + return data def _augment_data(self, data, con) -> pd.DataFrame: @@ -769,18 +800,19 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK + # Cast REFERENCE_FEATURE_ID to BIGINT to preserve precision and avoid float conversion ref_check_query = f""" SELECT DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, - fa.REFERENCE_FEATURE_ID, + CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, fa.REFERENCE_RT, fa.PEP, fa.QVALUE FROM filtered_alignment fa - INNER JOIN data d ON d.FEATURE_ID = fa.REFERENCE_FEATURE_ID + INNER JOIN data d ON CAST(d.FEATURE_ID AS BIGINT) = CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) WHERE d.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} """ filtered_df = con.execute(ref_check_query).fetchdf() diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 509b8633..69ee97b9 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -510,6 +510,37 @@ def _read_standard_data(self, con) -> pd.DataFrame: "Int64" ) + # Assign alignment_group_id to reference features + # Create a mapping from reference feature IDs to their alignment_group_ids + if "alignment_reference_feature_id" in data.columns and "alignment_group_id" in data.columns: + # Get all reference feature IDs and their corresponding alignment_group_ids + ref_mapping = data[ + data["alignment_reference_feature_id"].notna() + ][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() + + # For each reference feature ID, we need to assign the alignment_group_id + # to the feature row where id == alignment_reference_feature_id + if not ref_mapping.empty: + # Merge the alignment_group_id for reference features + # First create a DataFrame mapping id -> alignment_group_id for references + ref_group_mapping = ref_mapping.rename( + columns={"alignment_reference_feature_id": "id", "alignment_group_id": "ref_alignment_group_id"} + ) + + # Merge this mapping to assign alignment_group_id to reference features + data = pd.merge(data, ref_group_mapping, on="id", how="left") + + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) + mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() + data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] + + # Drop the temporary column + data = data.drop(columns=["ref_alignment_group_id"]) + + logger.debug( + f"Assigned alignment_group_id to {mask.sum()} reference features" + ) + return data def _augment_data(self, data, con) -> pd.DataFrame: @@ -862,18 +893,19 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK + # Cast REFERENCE_FEATURE_ID to BIGINT to preserve precision and avoid float conversion ref_check_query = f""" SELECT DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, - fa.REFERENCE_FEATURE_ID, + CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, fa.REFERENCE_RT, fa.PEP, fa.QVALUE FROM filtered_alignment fa - INNER JOIN precursors p ON p.FEATURE_ID = fa.REFERENCE_FEATURE_ID + INNER JOIN precursors p ON CAST(p.FEATURE_ID AS BIGINT) = CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) WHERE p.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} """ filtered_df = con.execute(ref_check_query).fetchdf() From bbe0149c73d4bcf8940c142685894301665885bc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Oct 2025 23:03:12 +0000 Subject: [PATCH 20/30] Add comment clarifying SQLite INTEGER type equivalence Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 1974378e..3390fa63 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -837,6 +837,7 @@ def _fetch_alignment_features(self, con, cfg): AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} AND REF_SCORE_MS2.QVALUE < {max_rs_peakgroup_qvalue} """ + # Note: SQLite INTEGER is 8 bytes (equivalent to BIGINT) and can store values up to 2^63-1 df = pd.read_sql_query(query, con) logger.info( From c7fc186483373abe6ac175125288099f76e5dde7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 31 Oct 2025 01:15:15 +0000 Subject: [PATCH 21/30] Fix performance regression: remove CAST from JOIN conditions The CAST operations in JOIN conditions prevented database indexes from being used, causing ~50 minute performance regression. Solution: let database use native integer types for fast joins, then cast result columns in pandas to preserve precision. Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 13 ++++++++++--- pyprophet/io/export/parquet.py | 6 +++--- pyprophet/io/export/split_parquet.py | 6 +++--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 3390fa63..62c16a47 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -815,7 +815,7 @@ def _fetch_alignment_features(self, con, cfg): FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID AS id, FEATURE_MS2_ALIGNMENT.PRECURSOR_ID AS transition_group_id, FEATURE_MS2_ALIGNMENT.RUN_ID AS run_id, - CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) AS alignment_reference_feature_id, + FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS alignment_reference_feature_id, FEATURE_MS2_ALIGNMENT.REFERENCE_RT AS alignment_reference_rt, SCORE_ALIGNMENT.PEP AS alignment_pep, SCORE_ALIGNMENT.QVALUE AS alignment_qvalue @@ -832,14 +832,21 @@ def _fetch_alignment_features(self, con, cfg): SELECT FEATURE_ID, QVALUE FROM SCORE_MS2 ) AS REF_SCORE_MS2 - ON CAST(REF_SCORE_MS2.FEATURE_ID AS INTEGER) = CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) + ON REF_SCORE_MS2.FEATURE_ID = FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID WHERE FEATURE_MS2_ALIGNMENT.LABEL = 1 AND SCORE_ALIGNMENT.PEP < {max_alignment_pep} AND REF_SCORE_MS2.QVALUE < {max_rs_peakgroup_qvalue} """ - # Note: SQLite INTEGER is 8 bytes (equivalent to BIGINT) and can store values up to 2^63-1 df = pd.read_sql_query(query, con) + + # Ensure alignment_reference_feature_id is read as Int64 to preserve precision for large IDs + # SQLite stores these as INTEGER (8 bytes), but pandas may infer float64 which loses precision + if "alignment_reference_feature_id" in df.columns: + df["alignment_reference_feature_id"] = df["alignment_reference_feature_id"].astype("Int64") + if "alignment_group_id" in df.columns: + df["alignment_group_id"] = df["alignment_group_id"].astype("Int64") + logger.info( f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep} with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" ) diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 57dbb0a9..a47f1c88 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -800,19 +800,18 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK - # Cast REFERENCE_FEATURE_ID to BIGINT to preserve precision and avoid float conversion ref_check_query = f""" SELECT DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, - CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, + fa.REFERENCE_FEATURE_ID, fa.REFERENCE_RT, fa.PEP, fa.QVALUE FROM filtered_alignment fa - INNER JOIN data d ON CAST(d.FEATURE_ID AS BIGINT) = CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) + INNER JOIN data d ON d.FEATURE_ID = fa.REFERENCE_FEATURE_ID WHERE d.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} """ filtered_df = con.execute(ref_check_query).fetchdf() @@ -830,6 +829,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: ].values # Add reference feature ID and RT if available + # Ensure Int64 dtype to preserve precision for large IDs if "REFERENCE_FEATURE_ID" in filtered_df.columns: result["alignment_reference_feature_id"] = ( filtered_df["REFERENCE_FEATURE_ID"].astype("Int64").values diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 69ee97b9..a8139a40 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -893,19 +893,18 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK - # Cast REFERENCE_FEATURE_ID to BIGINT to preserve precision and avoid float conversion ref_check_query = f""" SELECT DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, - CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, + fa.REFERENCE_FEATURE_ID, fa.REFERENCE_RT, fa.PEP, fa.QVALUE FROM filtered_alignment fa - INNER JOIN precursors p ON CAST(p.FEATURE_ID AS BIGINT) = CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) + INNER JOIN precursors p ON p.FEATURE_ID = fa.REFERENCE_FEATURE_ID WHERE p.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} """ filtered_df = con.execute(ref_check_query).fetchdf() @@ -923,6 +922,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: ].values # Add reference feature ID and RT if available + # Ensure Int64 dtype to preserve precision for large IDs if "REFERENCE_FEATURE_ID" in filtered_df.columns: result["alignment_reference_feature_id"] = ( filtered_df["REFERENCE_FEATURE_ID"].astype("Int64").values From 71d2c31d2c834a405fe01fba45fa84fbe828465d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 31 Oct 2025 01:36:34 +0000 Subject: [PATCH 22/30] Fix precision loss by adding CAST in SELECT clause (not JOIN) Added CAST(FEATURE.ID AS INTEGER) in SELECT clauses to ensure pandas reads large feature IDs correctly. CAST in SELECT preserves precision without the performance penalty of CAST in JOIN conditions. Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- pyprophet/io/export/osw.py | 17 ++++++++++++----- pyprophet/io/export/parquet.py | 3 ++- pyprophet/io/export/split_parquet.py | 3 ++- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 62c16a47..54762ea2 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -340,7 +340,7 @@ def _read_standard_data(self, con, cfg): FEATURE.NORM_RT AS iRT, PRECURSOR.LIBRARY_RT AS assay_iRT, FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, - FEATURE.ID AS id, + CAST(FEATURE.ID AS INTEGER) AS id, PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, PRECURSOR.CHARGE AS Charge, @@ -366,6 +366,10 @@ def _read_standard_data(self, con, cfg): ORDER BY transition_group_id, peak_group_rank; """ data = pd.read_sql_query(query, con) + + # Ensure id column is Int64 to preserve precision for large feature IDs + if "id" in data.columns: + data["id"] = data["id"].astype("Int64") # If alignment is enabled and alignment data is present, fetch and merge aligned features if use_alignment: @@ -416,7 +420,7 @@ def _read_standard_data(self, con, cfg): FEATURE.NORM_RT AS iRT, PRECURSOR.LIBRARY_RT AS assay_iRT, FEATURE.NORM_RT - PRECURSOR.LIBRARY_RT AS delta_iRT, - FEATURE.ID AS id, + CAST(FEATURE.ID AS INTEGER) AS id, PEPTIDE.UNMODIFIED_SEQUENCE AS Sequence, PEPTIDE.MODIFIED_SEQUENCE AS FullPeptideName, PRECURSOR.CHARGE AS Charge, @@ -441,6 +445,10 @@ def _read_standard_data(self, con, cfg): WHERE FEATURE.ID IN ({aligned_ids_str}) """ aligned_data = pd.read_sql_query(aligned_query, con) + + # Ensure id column is Int64 to preserve precision + if "id" in aligned_data.columns: + aligned_data["id"] = aligned_data["id"].astype("Int64") # Merge alignment scores and reference info into the aligned data aligned_data = pd.merge( @@ -815,7 +823,7 @@ def _fetch_alignment_features(self, con, cfg): FEATURE_MS2_ALIGNMENT.ALIGNED_FEATURE_ID AS id, FEATURE_MS2_ALIGNMENT.PRECURSOR_ID AS transition_group_id, FEATURE_MS2_ALIGNMENT.RUN_ID AS run_id, - FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS alignment_reference_feature_id, + CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) AS alignment_reference_feature_id, FEATURE_MS2_ALIGNMENT.REFERENCE_RT AS alignment_reference_rt, SCORE_ALIGNMENT.PEP AS alignment_pep, SCORE_ALIGNMENT.QVALUE AS alignment_qvalue @@ -840,8 +848,7 @@ def _fetch_alignment_features(self, con, cfg): df = pd.read_sql_query(query, con) - # Ensure alignment_reference_feature_id is read as Int64 to preserve precision for large IDs - # SQLite stores these as INTEGER (8 bytes), but pandas may infer float64 which loses precision + # Ensure Int64 dtype for large integer IDs (pandas nullable integer type) if "alignment_reference_feature_id" in df.columns: df["alignment_reference_feature_id"] = df["alignment_reference_feature_id"].astype("Int64") if "alignment_group_id" in df.columns: diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index a47f1c88..7d3b48c4 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -800,13 +800,14 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK + # CAST in SELECT preserves precision, but not in JOIN (for performance) ref_check_query = f""" SELECT DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, - fa.REFERENCE_FEATURE_ID, + CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, fa.REFERENCE_RT, fa.PEP, fa.QVALUE diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index a8139a40..eaaee5a6 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -893,13 +893,14 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Query to get aligned features where reference passes MS2 QVALUE threshold # Also compute alignment_group_id using DENSE_RANK + # CAST in SELECT preserves precision, but not in JOIN (for performance) ref_check_query = f""" SELECT DENSE_RANK() OVER (ORDER BY fa.PRECURSOR_ID, fa.ALIGNMENT_ID) AS ALIGNMENT_GROUP_ID, fa.FEATURE_ID, fa.PRECURSOR_ID, fa.RUN_ID, - fa.REFERENCE_FEATURE_ID, + CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, fa.REFERENCE_RT, fa.PEP, fa.QVALUE From 75388914992a8f4c1188ddd2eb8dfb562cca524d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 31 Oct 2025 03:45:11 +0000 Subject: [PATCH 23/30] Update ALIGNMENT_INTEGRATION_WORKFLOW.md with precision fixes and group ID assignment Added comprehensive documentation on: - Precision preservation for large feature IDs using CAST in SELECT - alignment_group_id assignment to both aligned and reference features - Performance comparison table showing CAST placement impact - Updated workflow diagrams to show new columns and processing steps Co-authored-by: singjc <32938975+singjc@users.noreply.github.com> --- ALIGNMENT_INTEGRATION_WORKFLOW.md | 132 +++++++++++++++++++++++++++--- 1 file changed, 122 insertions(+), 10 deletions(-) diff --git a/ALIGNMENT_INTEGRATION_WORKFLOW.md b/ALIGNMENT_INTEGRATION_WORKFLOW.md index 203b3d4a..04f152d8 100644 --- a/ALIGNMENT_INTEGRATION_WORKFLOW.md +++ b/ALIGNMENT_INTEGRATION_WORKFLOW.md @@ -52,19 +52,30 @@ This diagram illustrates how the SCORE_ALIGNMENT integration works to recover pe │ │ │ Step A: Fetch Base Features (MS2 QVALUE filter) │ │ ┌───────────────────────────────────────────────────────────────┐ │ -│ │ SELECT * FROM FEATURES │ │ +│ │ SELECT CAST(FEATURE.ID AS INTEGER) AS id, │ │ +│ │ ... (other columns) │ │ +│ │ FROM FEATURES │ │ │ │ WHERE SCORE_MS2.QVALUE < max_rs_peakgroup_qvalue (e.g., 0.05)│ │ │ │ → Base Features (passed MS2 threshold) │ │ │ │ → Mark with from_alignment=0 │ │ +│ │ → CAST preserves precision for large feature IDs │ │ │ └───────────────────────────────────────────────────────────────┘ │ │ │ │ Step B: Fetch Aligned Features (Alignment PEP filter) │ │ ┌───────────────────────────────────────────────────────────────┐ │ -│ │ SELECT ALIGNED_FEATURE_ID FROM FEATURE_MS2_ALIGNMENT │ │ +│ │ SELECT DENSE_RANK() OVER (...) AS alignment_group_id, │ │ +│ │ ALIGNED_FEATURE_ID AS id, │ │ +│ │ CAST(REFERENCE_FEATURE_ID AS INTEGER) │ │ +│ │ AS alignment_reference_feature_id, │ │ +│ │ REFERENCE_RT AS alignment_reference_rt │ │ +│ │ FROM FEATURE_MS2_ALIGNMENT │ │ │ │ JOIN SCORE_ALIGNMENT │ │ │ │ WHERE LABEL = 1 (target) │ │ │ │ AND SCORE_ALIGNMENT.PEP < max_alignment_pep (e.g., 0.7) │ │ +│ │ AND REF FEATURE passes MS2 QVALUE threshold │ │ │ │ → Aligned Features (good alignment scores) │ │ +│ │ → Includes alignment_group_id and reference info │ │ +│ │ → CAST preserves precision for large feature IDs │ │ │ └───────────────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────────┘ ↓ @@ -85,14 +96,22 @@ This diagram illustrates how the SCORE_ALIGNMENT integration works to recover pe │ │ = {6, 7, 8} │ │ │ └──────────┬───────────┘ │ │ ↓ │ -│ ┌──────────────────────┐ │ -│ │ Fetch full data for │ │ -│ │ recovered features │ │ -│ │ 6, 7, 8 │ │ -│ │ Mark: from_alignment=1│ │ -│ │ Add: alignment_pep │ │ -│ │ Add: alignment_qvalue│ │ -│ └──────────┬───────────┘ │ +│ ┌──────────────────────────────────────┐ │ +│ │ Fetch full data for recovered │ │ +│ │ features: 6, 7, 8 │ │ +│ │ Mark: from_alignment=1 │ │ +│ │ Add: alignment_pep │ │ +│ │ Add: alignment_qvalue │ │ +│ │ Add: alignment_group_id │ │ +│ │ Add: alignment_reference_feature_id │ │ +│ │ Add: alignment_reference_rt │ │ +│ └──────────┬───────────────────────────┘ │ +│ ↓ │ +│ ┌──────────────────────────────────────┐ │ +│ │ Assign alignment_group_id to │ │ +│ │ reference features │ │ +│ │ (features pointed to by aligned IDs) │ │ +│ └──────────┬───────────────────────────┘ │ │ ↓ │ │ ┌──────────────────────┐ │ │ │ Combine: │ │ @@ -297,8 +316,101 @@ Recovered features include additional columns: - `from_alignment`: 0 (base) or 1 (recovered) - `alignment_pep`: Alignment posterior error probability - `alignment_qvalue`: Alignment q-value +- `alignment_group_id`: Group identifier linking aligned features together +- `alignment_reference_feature_id`: ID of the reference feature used for alignment +- `alignment_reference_rt`: Retention time of the reference feature These allow users to: - Identify which features were recovered - Assess alignment quality +- Track which features are aligned together via `alignment_group_id` +- Find the reference feature that was used for alignment - Filter or analyze separately if needed + +## Technical Implementation Details + +### Precision Preservation for Large Feature IDs + +Large integer feature IDs (e.g., `5,405,272,318,039,692,409`) require special handling to prevent precision loss during database operations and pandas DataFrame creation. + +#### The Problem +- Feature IDs can exceed 2^53, the maximum integer that float64 can represent precisely +- When pandas reads INTEGER columns from databases without explicit typing, it may infer float64 dtype +- This causes precision loss: `5,405,272,318,039,692,409` → `5,405,272,318,039,692,288` + +#### The Solution +SQL queries use explicit CAST operations in SELECT clauses (but NOT in JOIN conditions): + +```sql +-- OSW (SQLite) +SELECT CAST(FEATURE.ID AS INTEGER) AS id, + CAST(FEATURE_MS2_ALIGNMENT.REFERENCE_FEATURE_ID AS INTEGER) AS alignment_reference_feature_id +FROM ... + +-- Parquet (DuckDB) +SELECT CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID +FROM ... +``` + +**Key Design Principles:** +1. **CAST in SELECT**: Ensures pandas reads columns as integers, preserving precision +2. **No CAST in JOIN**: Database can use indexes for fast lookups (~16 seconds vs 50 minutes) +3. **Post-query conversion**: After reading, convert to pandas Int64 dtype for nullable integer support + +```python +# After reading from database +if "alignment_reference_feature_id" in df.columns: + df["alignment_reference_feature_id"] = df["alignment_reference_feature_id"].astype("Int64") +if "id" in data.columns: + data["id"] = data["id"].astype("Int64") +``` + +### Alignment Group ID Assignment + +The `alignment_group_id` is computed using `DENSE_RANK()` to assign a unique identifier to each alignment group: + +```sql +SELECT DENSE_RANK() OVER (ORDER BY PRECURSOR_ID, ALIGNMENT_ID) AS alignment_group_id, + ALIGNED_FEATURE_ID AS id, + REFERENCE_FEATURE_ID AS alignment_reference_feature_id +FROM FEATURE_MS2_ALIGNMENT +``` + +#### Assigning Group IDs to Reference Features + +Reference features (those that aligned features point to) also need to receive their `alignment_group_id`. This is handled in post-processing: + +```python +# 1. Extract mapping: reference_feature_id -> alignment_group_id +ref_mapping = data[ + data["alignment_reference_feature_id"].notna() +][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() + +# 2. Create reverse mapping: id -> alignment_group_id for references +ref_group_mapping = ref_mapping.rename( + columns={"alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id"} +) + +# 3. Merge to assign group IDs to reference features +data = pd.merge(data, ref_group_mapping, on="id", how="left") + +# 4. Fill in alignment_group_id where it's null but ref_alignment_group_id exists +mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() +data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] +``` + +**Result:** All features in an alignment group (both aligned and reference features) share the same `alignment_group_id`, enabling: +- Tracking which features are aligned together +- Identifying the reference feature for each alignment group +- Analyzing alignment quality across related features + +### Performance Considerations + +| Approach | Query Time | Precision | Index Usage | +|----------|-----------|-----------|-------------| +| No CAST | ~16 sec | ❌ Lost | ✅ Yes | +| CAST in JOIN | ~50 min | ✅ Preserved | ❌ No | +| CAST in SELECT | ~16 sec | ✅ Preserved | ✅ Yes | + +**Conclusion:** CAST in SELECT clause provides both precision preservation and optimal performance. From ccf5f2589fc7ea327ddde6986b3f328f16fb5bd0 Mon Sep 17 00:00:00 2001 From: singjc Date: Sat, 1 Nov 2025 12:03:00 -0400 Subject: [PATCH 24/30] Refactor OSWReader and OSWWriter to enhance peptide mapping and alignment handling; improve precision for ID columns and streamline SQL queries. --- pyprophet/io/export/osw.py | 344 +++++++++++++++++++++++++------------ 1 file changed, 236 insertions(+), 108 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index 54762ea2..d3d29bcb 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -366,7 +366,7 @@ def _read_standard_data(self, con, cfg): ORDER BY transition_group_id, peak_group_rank; """ data = pd.read_sql_query(query, con) - + # Ensure id column is Int64 to preserve precision for large feature IDs if "id" in data.columns: data["id"] = data["id"].astype("Int64") @@ -445,7 +445,7 @@ def _read_standard_data(self, con, cfg): WHERE FEATURE.ID IN ({aligned_ids_str}) """ aligned_data = pd.read_sql_query(aligned_query, con) - + # Ensure id column is Int64 to preserve precision if "id" in aligned_data.columns: aligned_data["id"] = aligned_data["id"].astype("Int64") @@ -489,31 +489,42 @@ def _read_standard_data(self, con, cfg): # Assign alignment_group_id to reference features # Create a mapping from reference feature IDs to their alignment_group_ids - if "alignment_reference_feature_id" in data.columns and "alignment_group_id" in data.columns: + if ( + "alignment_reference_feature_id" in data.columns + and "alignment_group_id" in data.columns + ): # Get all reference feature IDs and their corresponding alignment_group_ids - ref_mapping = data[ - data["alignment_reference_feature_id"].notna() - ][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() - + ref_mapping = data[data["alignment_reference_feature_id"].notna()][ + ["alignment_reference_feature_id", "alignment_group_id"] + ].drop_duplicates() + # For each reference feature ID, we need to assign the alignment_group_id # to the feature row where id == alignment_reference_feature_id if not ref_mapping.empty: # Merge the alignment_group_id for reference features # First create a DataFrame mapping id -> alignment_group_id for references ref_group_mapping = ref_mapping.rename( - columns={"alignment_reference_feature_id": "id", "alignment_group_id": "ref_alignment_group_id"} + columns={ + "alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id", + } ) - + # Merge this mapping to assign alignment_group_id to reference features data = pd.merge(data, ref_group_mapping, on="id", how="left") - + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) - mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() - data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] - + mask = ( + data["alignment_group_id"].isna() + & data["ref_alignment_group_id"].notna() + ) + data.loc[mask, "alignment_group_id"] = data.loc[ + mask, "ref_alignment_group_id" + ] + # Drop the temporary column data = data.drop(columns=["ref_alignment_group_id"]) - + logger.debug( f"Assigned alignment_group_id to {mask.sum()} reference features" ) @@ -847,13 +858,15 @@ def _fetch_alignment_features(self, con, cfg): """ df = pd.read_sql_query(query, con) - + # Ensure Int64 dtype for large integer IDs (pandas nullable integer type) if "alignment_reference_feature_id" in df.columns: - df["alignment_reference_feature_id"] = df["alignment_reference_feature_id"].astype("Int64") + df["alignment_reference_feature_id"] = df[ + "alignment_reference_feature_id" + ].astype("Int64") if "alignment_group_id" in df.columns: df["alignment_group_id"] = df["alignment_group_id"].astype("Int64") - + logger.info( f"Found {len(df)} aligned features passing alignment PEP < {max_alignment_pep} with reference features passing MS2 QVALUE < {max_rs_peakgroup_qvalue}" ) @@ -1563,6 +1576,7 @@ def _export_single_file(self, conn, column_info: dict) -> None: # Insert precursor data logger.debug("Inserting precursor data into temp table") precursor_query = self._build_combined_precursor_query(conn, column_info) + # print(precursor_query) conn.execute(f"INSERT INTO temp_table {precursor_query}") # Insert transition data if requested @@ -1587,6 +1601,183 @@ def _export_single_file(self, conn, column_info: dict) -> None: logger.info(f"Exporting alignment data to {alignment_path}") self._export_alignment_data(conn, alignment_path) + def _register_peptide_ipf_map(self, conn: duckdb.DuckDBPyConnection) -> None: + """Create or refresh peptide ↔ IPF peptide mapping inside DuckDB.""" + logger.info("Preparing peptide unimod to codename mapping view") + conn.create_function("unimod_to_codename", unimod_to_codename, [str], str) + conn.execute( + f""" + CREATE OR REPLACE TEMP TABLE UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING AS + WITH peptides AS ( + SELECT + ID, + MODIFIED_SEQUENCE, + unimod_to_codename(MODIFIED_SEQUENCE) AS CODENAME, + MODIFIED_SEQUENCE LIKE '%%UniMod%%' AS HAS_UNIMOD + FROM sqlite_scan('{self.config.infile}', 'PEPTIDE') + ), + unimod_peptides AS ( + SELECT CODENAME, ID AS PEPTIDE_ID + FROM peptides + WHERE HAS_UNIMOD + ), + codename_peptides AS ( + SELECT CODENAME, ID AS IPF_PEPTIDE_ID + FROM peptides + WHERE NOT HAS_UNIMOD + ) + SELECT DISTINCT + COALESCE(unimod_peptides.PEPTIDE_ID, codename_peptides.IPF_PEPTIDE_ID) AS PEPTIDE_ID, + COALESCE(codename_peptides.IPF_PEPTIDE_ID, unimod_peptides.PEPTIDE_ID) AS IPF_PEPTIDE_ID, + COALESCE(unimod_peptides.CODENAME, codename_peptides.CODENAME) AS CODENAME + FROM unimod_peptides + FULL OUTER JOIN codename_peptides USING (CODENAME) + """ + ) + + def _create_unimod_to_codename_peptide_id_mapping_table(self) -> None: + """Create peptide unimod to codename mapping table in SQLite database.""" + logger.info( + "Generating peptide unimod to codename mapping and storing in SQLite" + ) + + with sqlite3.connect(self.config.infile) as sql_conn: + # First get the peptide table and process it with pyopenms + peptide_df = pd.read_sql_query( + "SELECT ID, MODIFIED_SEQUENCE FROM PEPTIDE", sql_conn + ) + + peptide_df["codename"] = peptide_df["MODIFIED_SEQUENCE"].apply( + unimod_to_codename + ) + + # Create the merged mapping + unimod_mask = peptide_df["MODIFIED_SEQUENCE"].str.contains("UniMod") + merged_df = pd.merge( + peptide_df[unimod_mask][["codename", "ID"]], + peptide_df[~unimod_mask][["codename", "ID"]], + on="codename", + suffixes=("_unimod", "_codename"), + how="outer", + ) + + # Fill NaN values in the 'ID_codename' column with the 'ID_unimod' values + merged_df["ID_codename"] = merged_df["ID_codename"].fillna( + merged_df["ID_unimod"] + ) + # Fill NaN values in the 'ID_unimod' column with the 'ID_codename' values + merged_df["ID_unimod"] = merged_df["ID_unimod"].fillna( + merged_df["ID_codename"] + ) + + merged_df["ID_unimod"] = merged_df["ID_unimod"].astype(int) + merged_df["ID_codename"] = merged_df["ID_codename"].astype(int) + + # Create the UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING table in SQLite + sql_conn.execute( + """ + CREATE TABLE IF NOT EXISTS UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING ( + ID_unimod INTEGER, + ID_codename INTEGER, + codename TEXT, + PRIMARY KEY (ID_unimod, ID_codename) + ) + """ + ) + sql_conn.execute("DELETE FROM UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING") + + # Insert the data into SQLite table + merged_df[["ID_unimod", "ID_codename", "codename"]].to_sql( + "UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING", + sql_conn, + if_exists="append", + index=False, + ) + + # Create indices for better performance + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_peptide_ipf_unimod ON UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING(ID_unimod)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_peptide_ipf_codename ON UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING(ID_codename)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_peptide_ipf_codename_text ON UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING(codename)" + ) + + sql_conn.commit() + logger.info( + f"Successfully created UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING table with {len(merged_df)} mappings" + ) + + def _insert_precursor_peptide_ipf_map(self) -> None: + """Insert precursor-peptide-IPF table into the input sqlite OSW file.""" + logger.info("Inserting precursor-peptide-IPF mapping into OSW file") + with sqlite3.connect(self.config.infile) as sql_conn: + # Create the main mapping table + sql_conn.execute( + """ + CREATE TABLE IF NOT EXISTS PRECURSOR_PEPTIDE_IPF_MAPPING ( + PRECURSOR_ID INTEGER, + ID_unimod INTEGER, + ID_codename INTEGER, + MODIFIED_SEQUENCE TEXT, + CODENAME TEXT, + FEATURE_ID INTEGER, + PRECURSOR_PEAKGROUP_PEP REAL, + QVALUE REAL, + PEP REAL + ) + """ + ) + sql_conn.execute("DELETE FROM PRECURSOR_PEPTIDE_IPF_MAPPING") + + # Insert the data using your join logic + sql_conn.execute( + """ + INSERT INTO PRECURSOR_PEPTIDE_IPF_MAPPING ( + PRECURSOR_ID, ID_unimod, ID_codename, MODIFIED_SEQUENCE, + CODENAME, FEATURE_ID, PRECURSOR_PEAKGROUP_PEP, QVALUE, PEP + ) + SELECT + ppm.PRECURSOR_ID, + pim.ID_unimod, + pim.ID_codename, + p.MODIFIED_SEQUENCE, + pim.codename, + si.FEATURE_ID, + si.PRECURSOR_PEAKGROUP_PEP, + si.QVALUE, + si.PEP + FROM PEPTIDE p + INNER JOIN UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING pim ON pim.ID_unimod = p.ID + INNER JOIN PRECURSOR_PEPTIDE_MAPPING ppm ON ppm.PEPTIDE_ID = p.ID + INNER JOIN SCORE_IPF si ON si.PEPTIDE_ID = pim.ID_codename + """ + ) + + # Create indices for better query performance + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_precursor_id ON PRECURSOR_PEPTIDE_IPF_MAPPING(PRECURSOR_ID)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_feature_id ON PRECURSOR_PEPTIDE_IPF_MAPPING(FEATURE_ID)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_id_unimod ON PRECURSOR_PEPTIDE_IPF_MAPPING(ID_unimod)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_id_codename ON PRECURSOR_PEPTIDE_IPF_MAPPING(ID_codename)" + ) + sql_conn.execute( + "CREATE INDEX IF NOT EXISTS idx_ppim_precursor_feature ON PRECURSOR_PEPTIDE_IPF_MAPPING(PRECURSOR_ID, FEATURE_ID)" + ) + + sql_conn.commit() + logger.info( + "Successfully created PRECURSOR_PEPTIDE_IPF_MAPPING table with indices" + ) + def _build_precursor_query(self, conn, column_info: dict) -> str: """Build SQL query for precursor data""" feature_ms1_cols_sql = ", ".join( @@ -1599,49 +1790,15 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: for col in column_info["feature_ms2_cols"] ) + # First get the peptide table and process it with pyopenms + # self._register_peptide_ipf_map(conn) + self._create_unimod_to_codename_peptide_id_mapping_table() + # Check if score tables exist and build score SQLs score_cols_selct, score_table_joins, score_column_views = ( self._build_score_column_selection_and_joins(column_info) ) - # First get the peptide table and process it with pyopenms - logger.info("Generating peptide unimod to codename mapping") - with sqlite3.connect(self.config.infile) as sql_conn: - peptide_df = pd.read_sql_query( - "SELECT ID, MODIFIED_SEQUENCE FROM PEPTIDE", sql_conn - ) - peptide_df["codename"] = peptide_df["MODIFIED_SEQUENCE"].apply( - unimod_to_codename - ) - - # Create the merged mapping - unimod_mask = peptide_df["MODIFIED_SEQUENCE"].str.contains("UniMod") - merged_df = pd.merge( - peptide_df[unimod_mask][["codename", "ID"]], - peptide_df[~unimod_mask][["codename", "ID"]], - on="codename", - suffixes=("_unimod", "_codename"), - how="outer", - ) - - # Fill NaN values in the 'ID_codename' column with the 'ID_unimod' values - merged_df["ID_codename"] = merged_df["ID_codename"].fillna( - merged_df["ID_unimod"] - ) - # Fill NaN values in the 'ID_unimod' column with the 'ID_codename' values - merged_df["ID_unimod"] = merged_df["ID_unimod"].fillna(merged_df["ID_codename"]) - - merged_df["ID_unimod"] = merged_df["ID_unimod"].astype(int) - merged_df["ID_codename"] = merged_df["ID_codename"].astype(int) - - # Register peptide_ipf_map - conn.register( - "peptide_ipf_map", - merged_df.rename( - columns={"ID_unimod": "PEPTIDE_ID", "ID_codename": "IPF_PEPTIDE_ID"} - ), - ) - return f""" -- Need to map the unimod peptide ids to the ipf codename peptide ids. The section below is commented out, since it's limited to only the 4 common modifications. Have replaced it above with a more general approach that handles all modifications using pyopenms --WITH normalized_peptides AS ( @@ -1663,7 +1820,7 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: -- FROM normalized_peptides -- GROUP BY NORMALIZED_SEQUENCE --), - --peptide_ipf_map AS ( + --UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING AS ( -- SELECT -- np.PEPTIDE_ID, -- g.IPF_PEPTIDE_ID @@ -1674,12 +1831,12 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: {score_column_views} SELECT PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID AS PROTEIN_ID, - PEPTIDE.ID AS PEPTIDE_ID, - pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID, - PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID, + {"SCORE_IPF.ID_unimod AS PEPTIDE_ID," if column_info["score_ipf_exists"] else "PEPTIDE.ID AS PEPTIDE_ID,"} + {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID,"} + {"SCORE_IPF.PRECURSOR_ID AS PRECURSOR_ID," if column_info["score_ipf_exists"] else "PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID,"} PROTEIN.PROTEIN_ACCESSION AS PROTEIN_ACCESSION, PEPTIDE.UNMODIFIED_SEQUENCE, - PEPTIDE.MODIFIED_SEQUENCE, + {"SCORE_IPF.MODIFIED_SEQUENCE AS MODIFIED_SEQUENCE," if column_info["score_ipf_exists"] else "PEPTIDE.MODIFIED_SEQUENCE,"} PRECURSOR.TRAML_ID AS PRECURSOR_TRAML_ID, PRECURSOR.GROUP_LABEL AS PRECURSOR_GROUP_LABEL, PRECURSOR.PRECURSOR_MZ AS PRECURSOR_MZ, @@ -1710,8 +1867,8 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE') AS PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID - INNER JOIN peptide_ipf_map AS pipf - ON PEPTIDE.ID = pipf.PEPTIDE_ID + INNER JOIN sqlite_scan('{self.config.infile}', 'UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING') AS pipf + ON PEPTIDE.ID = pipf.ID_unimod INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE_PROTEIN_MAPPING') AS PEPTIDE_PROTEIN_MAPPING ON PEPTIDE.ID = PEPTIDE_PROTEIN_MAPPING.PEPTIDE_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PROTEIN') AS PROTEIN @@ -1821,6 +1978,10 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: for col in column_info["feature_transition_cols"] ) + # First get the peptide table and process it with pyopenms + # self._register_peptide_ipf_map(conn) + self._create_unimod_to_codename_peptide_id_mapping_table() + # Get score columns for precursor level score_cols_select, score_table_joins, score_column_views = ( self._build_score_column_selection_and_joins(column_info) @@ -1831,44 +1992,6 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: if column_info.get("score_transition_exists", False): as_null_transition_score_cols = ", NULL AS SCORE_TRANSITION_SCORE, NULL AS SCORE_TRANSITION_RANK, NULL AS SCORE_TRANSITION_P_VALUE, NULL AS SCORE_TRANSITION_Q_VALUE, NULL AS SCORE_TRANSITION_PEP" - # First get the peptide table and process it with pyopenms - logger.info("Generating peptide unimod to codename mapping") - with sqlite3.connect(self.config.infile) as sql_conn: - peptide_df = pd.read_sql_query( - "SELECT ID, MODIFIED_SEQUENCE FROM PEPTIDE", sql_conn - ) - peptide_df["codename"] = peptide_df["MODIFIED_SEQUENCE"].apply( - unimod_to_codename - ) - - # Create the merged mapping as you did in your example - unimod_mask = peptide_df["MODIFIED_SEQUENCE"].str.contains("UniMod") - merged_df = pd.merge( - peptide_df[unimod_mask][["codename", "ID"]], - peptide_df[~unimod_mask][["codename", "ID"]], - on="codename", - suffixes=("_unimod", "_codename"), - how="outer", - ) - - # Fill NaN values in the 'ID_codename' column with the 'ID_unimod' values - merged_df["ID_codename"] = merged_df["ID_codename"].fillna( - merged_df["ID_unimod"] - ) - # Fill NaN values in the 'ID_unimod' column with the 'ID_codename' values - merged_df["ID_unimod"] = merged_df["ID_unimod"].fillna(merged_df["ID_codename"]) - - merged_df["ID_unimod"] = merged_df["ID_unimod"].astype(int) - merged_df["ID_codename"] = merged_df["ID_codename"].astype(int) - - # Register peptide_ipf_map - conn.register( - "peptide_ipf_map", - merged_df.rename( - columns={"ID_unimod": "PEPTIDE_ID", "ID_codename": "IPF_PEPTIDE_ID"} - ), - ) - return f""" -- Need to map the unimod peptide ids to the ipf codename peptide ids. The section below is commented out, since it's limited to only the 4 common modifications. Have replaced it above with a more general approach that handles all modifications using pyopenms --WITH normalized_peptides AS ( @@ -1890,7 +2013,7 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: -- FROM normalized_peptides -- GROUP BY NORMALIZED_SEQUENCE --), - --peptide_ipf_map AS ( + --UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING AS ( -- SELECT -- np.PEPTIDE_ID, -- g.IPF_PEPTIDE_ID @@ -1901,12 +2024,12 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: {score_column_views} SELECT PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID AS PROTEIN_ID, - PEPTIDE.ID AS PEPTIDE_ID, - pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID, - PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID, + {"SCORE_IPF.ID_unimod AS PEPTIDE_ID," if column_info["score_ipf_exists"] else "PEPTIDE.ID AS PEPTIDE_ID,"} + {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID,"} + {"SCORE_IPF.PRECURSOR_ID AS PRECURSOR_ID," if column_info["score_ipf_exists"] else "PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID,"} PROTEIN.PROTEIN_ACCESSION AS PROTEIN_ACCESSION, PEPTIDE.UNMODIFIED_SEQUENCE, - PEPTIDE.MODIFIED_SEQUENCE, + {"SCORE_IPF.MODIFIED_SEQUENCE AS MODIFIED_SEQUENCE," if column_info["score_ipf_exists"] else "PEPTIDE.MODIFIED_SEQUENCE,"} PRECURSOR.TRAML_ID AS PRECURSOR_TRAML_ID, PRECURSOR.GROUP_LABEL AS PRECURSOR_GROUP_LABEL, PRECURSOR.PRECURSOR_MZ AS PRECURSOR_MZ, @@ -1949,8 +2072,8 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: ON PRECURSOR.ID = PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE') AS PEPTIDE ON PRECURSOR_PEPTIDE_MAPPING.PEPTIDE_ID = PEPTIDE.ID - INNER JOIN peptide_ipf_map AS pipf - ON PEPTIDE.ID = pipf.PEPTIDE_ID + INNER JOIN sqlite_scan('{self.config.infile}', 'UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING') AS pipf + ON PEPTIDE.ID = pipf.ID_unimod INNER JOIN sqlite_scan('{self.config.infile}', 'PEPTIDE_PROTEIN_MAPPING') AS PEPTIDE_PROTEIN_MAPPING ON PEPTIDE.ID = PEPTIDE_PROTEIN_MAPPING.PEPTIDE_ID INNER JOIN sqlite_scan('{self.config.infile}', 'PROTEIN') AS PROTEIN @@ -2394,8 +2517,13 @@ def _build_score_column_selection_and_joins( score_columns_to_select.append( "SCORE_IPF.PRECURSOR_PEAKGROUP_PEP AS SCORE_IPF_PRECURSOR_PEAKGROUP_PEP, SCORE_IPF.PEP AS SCORE_IPF_PEP, SCORE_IPF.QVALUE AS SCORE_IPF_QVALUE" ) + # NOTE: UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING needs to be created before this join is actually executed. This is done by registering the table in DuckDB in the precursor query builder. + # TODO: We should maybe add the UNIMOD_TO_CODENAME_PEPTIDE_ID_MAPPING during OpenSwathWorkflow execution to avoid doing it here? + self._insert_precursor_peptide_ipf_map() score_tables_to_join.append( - f"LEFT JOIN sqlite_scan('{self.config.infile}', 'SCORE_IPF') AS SCORE_IPF ON FEATURE.ID = SCORE_IPF.FEATURE_ID" + f""" + LEFT JOIN sqlite_scan('{self.config.infile}', 'PRECURSOR_PEPTIDE_IPF_MAPPING') AS SCORE_IPF ON SCORE_IPF.FEATURE_ID = FEATURE.ID + """ ) # Create views for peptide and protein score tables if they exist From cada53bf20fc3ab5036e41d845ad8152995037a3 Mon Sep 17 00:00:00 2001 From: singjc Date: Sun, 2 Nov 2025 00:04:42 -0400 Subject: [PATCH 25/30] Add support for IM boundaries in OSWWriter: include EXP_IM_LEFTWIDTH and EXP_IM_RIGHTWIDTH columns --- pyprophet/io/export/osw.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index d3d29bcb..b5bd648c 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -1434,6 +1434,10 @@ def _prepare_column_info(self, conn) -> dict: "has_annotation": "ANNOTATION" in get_table_columns(self.config.infile, "TRANSITION"), "has_im": "EXP_IM" in get_table_columns(self.config.infile, "FEATURE"), + "has_im_boundaries": all( + col in get_table_columns(self.config.infile, "FEATURE") + for col in ["EXP_IM_LEFTWIDTH", "EXP_IM_RIGHTWIDTH"] + ), "feature_ms1_cols": [ col for col in get_table_columns_with_types( @@ -1859,6 +1863,8 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: FEATURE.DELTA_RT, FEATURE.LEFT_WIDTH, FEATURE.RIGHT_WIDTH, + {"FEATURE.EXP_IM_LEFTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_leftWidth, + {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_rightWidth, {feature_ms1_cols_sql}, {feature_ms2_cols_sql}, {score_cols_selct} @@ -2052,6 +2058,8 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: FEATURE.DELTA_RT, FEATURE.LEFT_WIDTH, FEATURE.RIGHT_WIDTH, + {"FEATURE.EXP_IM_LEFTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_leftWidth, + {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_rightWidth, {feature_ms1_cols_sql}, {feature_ms2_cols_sql}, NULL AS TRANSITION_ID, @@ -2296,6 +2304,8 @@ def _create_temp_table(self, conn, column_info: dict) -> None: DELTA_RT DOUBLE, LEFT_WIDTH DOUBLE, RIGHT_WIDTH DOUBLE, + IM_leftWidth DOUBLE, + IM_rightWidth DOUBLE, {feature_ms1_cols_types}, {feature_ms2_cols_types}, TRANSITION_ID BIGINT, From 2477343e79c5117dec5bc9d79808f052273992a2 Mon Sep 17 00:00:00 2001 From: singjc Date: Sun, 2 Nov 2025 23:52:48 -0500 Subject: [PATCH 26/30] fix(tests): fix bug with restructuring of ipf score to precursor mapping and IM boundaries --- pyprophet/io/export/osw.py | 15 +++++++----- ...test_parquet_export_no_transition_data.out | 24 +++++++++---------- ..._export.test_parquet_export_scored_osw.out | 24 +++++++++---------- ...xport.test_parquet_export_split_format.out | 14 +++++------ ...et_export.test_parquet_export_with_ipf.out | 2 +- 5 files changed, 41 insertions(+), 38 deletions(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index b5bd648c..c5392d90 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -1836,7 +1836,7 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: SELECT PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID AS PROTEIN_ID, {"SCORE_IPF.ID_unimod AS PEPTIDE_ID," if column_info["score_ipf_exists"] else "PEPTIDE.ID AS PEPTIDE_ID,"} - {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID,"} + {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.ID_codename AS IPF_PEPTIDE_ID,"} {"SCORE_IPF.PRECURSOR_ID AS PRECURSOR_ID," if column_info["score_ipf_exists"] else "PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID,"} PROTEIN.PROTEIN_ACCESSION AS PROTEIN_ACCESSION, PEPTIDE.UNMODIFIED_SEQUENCE, @@ -1863,8 +1863,8 @@ def _build_precursor_query(self, conn, column_info: dict) -> str: FEATURE.DELTA_RT, FEATURE.LEFT_WIDTH, FEATURE.RIGHT_WIDTH, - {"FEATURE.EXP_IM_LEFTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_leftWidth, - {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_rightWidth, + {"FEATURE.EXP_IM_LEFTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_leftWidth, + {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_rightWidth, {feature_ms1_cols_sql}, {feature_ms2_cols_sql}, {score_cols_selct} @@ -2031,7 +2031,7 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: SELECT PEPTIDE_PROTEIN_MAPPING.PROTEIN_ID AS PROTEIN_ID, {"SCORE_IPF.ID_unimod AS PEPTIDE_ID," if column_info["score_ipf_exists"] else "PEPTIDE.ID AS PEPTIDE_ID,"} - {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.IPF_PEPTIDE_ID AS IPF_PEPTIDE_ID,"} + {"SCORE_IPF.ID_codename AS IPF_PEPTIDE_ID," if column_info["score_ipf_exists"] else "pipf.ID_codename AS IPF_PEPTIDE_ID,"} {"SCORE_IPF.PRECURSOR_ID AS PRECURSOR_ID," if column_info["score_ipf_exists"] else "PRECURSOR_PEPTIDE_MAPPING.PRECURSOR_ID AS PRECURSOR_ID,"} PROTEIN.PROTEIN_ACCESSION AS PROTEIN_ACCESSION, PEPTIDE.UNMODIFIED_SEQUENCE, @@ -2058,8 +2058,8 @@ def _build_combined_precursor_query(self, conn, column_info: dict) -> str: FEATURE.DELTA_RT, FEATURE.LEFT_WIDTH, FEATURE.RIGHT_WIDTH, - {"FEATURE.EXP_IM_LEFTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_leftWidth, - {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info["has_im_boundaries"] else "NULL"} AS IM_rightWidth, + {"FEATURE.EXP_IM_LEFTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_leftWidth, + {"FEATURE.EXP_IM_RIGHTWIDTH" if column_info.get("has_im_boundaries", False) else "NULL"} AS IM_rightWidth, {feature_ms1_cols_sql}, {feature_ms2_cols_sql}, NULL AS TRANSITION_ID, @@ -2171,6 +2171,8 @@ def _build_combined_transition_query(self, column_info: dict) -> str: NULL AS DELTA_RT, NULL AS LEFT_WIDTH, NULL AS RIGHT_WIDTH, + NULL AS IM_leftWidth, + NULL AS IM_rightWidth, {as_null_feature_ms1_cols_sql}, {as_null_feature_ms2_cols_sql}, TRANSITION.ID AS TRANSITION_ID, @@ -2322,6 +2324,7 @@ def _create_temp_table(self, conn, column_info: dict) -> None: {score_cols_types_sql} ); """ + print(create_temp_table_query) conn.execute(create_temp_table_query) diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out index d26f2c01..795ea384 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_no_transition_data.out @@ -1,13 +1,13 @@ -Exported 3410 rows with 98 columns (no transition data) +Exported 3410 rows with 100 columns (no transition data) Score columns found: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] - ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE -0 None -85.0733 NaN 1923.17 483971408708572459 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -1 None -55.2126 NaN 1953.03 6854889104354289238 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -2 None 9.7944 NaN 2018.03 2696300170322160855 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -3 None -130.8641 NaN 1877.37 8207933629855485114 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -4 None -268.6805 NaN 1739.56 745237666153652118 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -5 None -6.0218 NaN 3084.15 1082368609638691369 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3110.6101 -8670811102654834151 1 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -6 None 228.9700 NaN 3319.15 -1344271892660954750 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3339.3301 -8670811102654834151 5 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -7 None -124.5660 NaN 2965.61 -4515618252120499488 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2994.5300 -8670811102654834151 3 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -8 None 28.5831 NaN 3118.76 -4044853666210028406 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3131.0901 -8670811102654834151 2 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -9 None -184.3073 NaN 2905.87 7439833196907350500 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2929.6699 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK + ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IM_leftWidth IM_rightWidth IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE +0 None -85.0733 NaN 1923.17 483971408708572459 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +1 None -55.2126 NaN 1953.03 6854889104354289238 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +2 None 9.7944 NaN 2018.03 2696300170322160855 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +3 None -130.8641 NaN 1877.37 8207933629855485114 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +4 None -268.6805 NaN 1739.56 745237666153652118 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523 2 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +5 None -6.0218 NaN 3084.15 1082368609638691369 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3110.6101 -8670811102654834151 1 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +6 None 228.9700 NaN 3319.15 -1344271892660954750 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3339.3301 -8670811102654834151 5 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +7 None -124.5660 NaN 2965.61 -4515618252120499488 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2994.5300 -8670811102654834151 3 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +8 None 28.5831 NaN 3118.76 -4044853666210028406 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 3131.0901 -8670811102654834151 2 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +9 None -184.3073 NaN 2905.87 7439833196907350500 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386 2 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8 2929.6699 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out index 100164fd..67a02880 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_scored_osw.out @@ -1,13 +1,13 @@ -Exported 97964 rows with 98 columns +Exported 97964 rows with 100 columns Score columns found: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] - ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE -0 None -85.0733 NaN 1923.17 4.8397e+17 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1946.4600 -8.6708e+18 1.0 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -1 None -55.2126 NaN 1953.03 6.8549e+18 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1977.1899 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -2 None 9.7944 NaN 2018.03 2.6963e+18 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 2024.9800 -8.6708e+18 2.0 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -3 None -130.8641 NaN 1877.37 8.2079e+18 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1898.6700 -8.6708e+18 3.0 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -4 None -268.6805 NaN 1739.56 7.4524e+17 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 522.0 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1762.1100 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK -5 None -6.0218 NaN 3084.15 1.0824e+18 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3110.6101 -8.6708e+18 1.0 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -6 None 228.9700 NaN 3319.15 -1.3443e+18 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3339.3301 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -7 None -124.5660 NaN 2965.61 -4.5156e+18 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2994.5300 -8.6708e+18 3.0 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -8 None 28.5831 NaN 3118.76 -4.0449e+18 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3131.0901 -8.6708e+18 2.0 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK -9 None -184.3073 NaN 2905.87 7.4398e+18 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None 385.0 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2929.6699 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK + ANNOTATION DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FEATURE_TRANSITION_APEX_INTENSITY FEATURE_TRANSITION_AREA_INTENSITY FEATURE_TRANSITION_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_TRANSITION_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_TRANSITION_VAR_LOG_INTENSITY FEATURE_TRANSITION_VAR_LOG_SN_SCORE FEATURE_TRANSITION_VAR_MASSDEV_SCORE FEATURE_TRANSITION_VAR_XCORR_COELUTION FEATURE_TRANSITION_VAR_XCORR_SHAPE FILENAME GENE_DECOY GENE_ID GENE_NAME IM_leftWidth IM_rightWidth IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PRODUCT_MZ PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE TRANSITION_CHARGE TRANSITION_DECOY TRANSITION_DETECTING TRANSITION_ID TRANSITION_LIBRARY_INTENSITY TRANSITION_ORDINAL TRANSITION_TRAML_ID TRANSITION_TYPE UNMODIFIED_SEQUENCE +0 None -85.0733 NaN 1923.17 4.8397e+17 192394.8906 935372.0000 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1898.6700 GIGDWSDSK(UniMod:259) 7.0277 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1946.4600 -8.6708e+18 1.0 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +1 None -55.2126 NaN 1953.03 6.8549e+18 5696.7271 45882.6016 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1946.4600 GIGDWSDSK(UniMod:259) 7.8936 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1977.1899 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +2 None 9.7944 NaN 2018.03 2.6963e+18 17401.1816 95751.7969 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 2001.0900 GIGDWSDSK(UniMod:259) 9.7785 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 2024.9800 -8.6708e+18 2.0 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +3 None -130.8641 NaN 1877.37 8.2079e+18 6239.5195 48788.5000 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1857.7000 GIGDWSDSK(UniMod:259) 5.7000 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1898.6700 -8.6708e+18 3.0 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +4 None -268.6805 NaN 1739.56 7.4524e+17 6493.7773 66798.3984 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 522.0 1717.7300 GIGDWSDSK(UniMod:259) 1.7038 False 523.0 2.0 False AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... NaN AQUA4SWATH_HMLangeF None 5.0 1762.1100 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None GIGDWSDSK +5 None -6.0218 NaN 3084.15 1.0824e+18 88839.6328 604041.0000 0.9885 0.1408 0.7967 0.0000 0.9766 37375.0 195175.0 4.0 0.8376 NaN 0.7351 0.9983 0.0000 0.9806 0.9979 0.0536 0.0300 0.0412 0.1268 4.8405 0.6753 2.2770 1.9680 0.0021 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9976 0.9985 8.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 3066.2200 ESDILAVVK(UniMod:259) 40.6918 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3110.6101 -8.6708e+18 1.0 0.0031 0.0029 0.0033 5.5702 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +6 None 228.9700 NaN 3319.15 -1.3443e+18 9115.7090 102078.0000 0.7000 1.0399 16.1263 5.5000 0.4975 4782.0 23890.0 2.0 0.4271 NaN 0.0900 0.9602 1.0000 0.7442 0.9239 0.3826 0.1244 0.1502 0.4641 3.2047 1.1861 14.1720 12.7184 0.0661 NaN NaN NaN NaN NaN NaN 0.7830 0.1486 0.8549 0.8773 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 3301.7800 ESDILAVVK(UniMod:259) 47.5056 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3339.3301 -8.6708e+18 5.0 1.0000 0.4692 0.4692 -1.8443 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +7 None -124.5660 NaN 2965.61 -4.5156e+18 9268.4902 109284.0000 0.6217 1.1110 22.4222 1.5000 0.7138 793.0 6371.0 4.0 0.8481 NaN 0.0240 0.9434 0.0000 0.8820 0.9766 0.2001 0.0990 0.1152 0.3558 1.0582 0.5825 6.0852 4.5232 0.0365 NaN NaN NaN NaN NaN NaN 0.9749 0.0676 0.7227 0.8143 2.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 2946.7400 ESDILAVVK(UniMod:259) 37.2545 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2994.5300 -8.6708e+18 3.0 1.0000 0.0205 0.0210 1.9398 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +8 None 28.5831 NaN 3118.76 -4.0449e+18 9989.7793 33949.8008 0.2103 0.6817 10.4054 1.0774 0.8894 865.0 2909.0 4.0 0.8179 NaN 0.0110 0.4917 0.0000 0.6890 0.9634 0.2912 0.1309 0.1515 0.4848 1.3179 0.6547 5.8786 5.3254 0.0080 NaN NaN NaN NaN NaN NaN 0.7830 0.3902 0.8943 0.8531 1.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 3114.0200 ESDILAVVK(UniMod:259) 41.6952 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 3131.0901 -8.6708e+18 2.0 1.0000 0.0176 0.0180 2.2958 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK +9 None -184.3073 NaN 2905.87 7.4398e+18 15059.3516 143080.0000 -0.2753 3.5529 3.1948 3.9578 0.6528 1527.0 8281.0 5.0 0.5666 NaN 0.0312 0.8691 0.0000 -0.3982 0.7375 0.6314 0.2844 0.3777 1.1866 2.5854 0.9876 9.8422 8.8909 0.0538 NaN NaN NaN NaN NaN NaN 3.7575 2.5131 0.6924 0.6775 0.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN napedro_L120420_010_SW.mzXML.gz None NaN None NaN NaN 385.0 2881.8701 ESDILAVVK(UniMod:259) 35.5222 False 386.0 2.0 False AQUA4SWATH_Lepto_ESDILAVVK(UniMod:259)/2 166 NaN NaN 40.9 491.2890 AQUA4SWATH_Lepto_ESDILAVVK(Label:13C(6)15N(2))... NaN AQUA4SWATH_Lepto None 8.0 2929.6699 -8.6708e+18 4.0 1.0000 0.4692 0.4692 -1.8184 NaN NaN NaN NaN NaN NaN NaN NaN NaN None None NaN NaN NaN None None ESDILAVVK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out index 0d886bc2..7515c069 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_split_format.out @@ -1,10 +1,10 @@ -Precursor data: 3410 rows with 79 columns +Precursor data: 3410 rows with 81 columns Transition data: 96259 rows with 23 columns Precursor score columns: ['SCORE_MS2_PEAK_GROUP_RANK', 'SCORE_MS2_PEP', 'SCORE_MS2_P_VALUE', 'SCORE_MS2_Q_VALUE', 'SCORE_MS2_SCORE', 'SCORE_PEPTIDE_GLOBAL_PEP', 'SCORE_PEPTIDE_GLOBAL_P_VALUE', 'SCORE_PEPTIDE_GLOBAL_Q_VALUE', 'SCORE_PEPTIDE_GLOBAL_SCORE', 'SCORE_PROTEIN_GLOBAL_PEP', 'SCORE_PROTEIN_GLOBAL_P_VALUE', 'SCORE_PROTEIN_GLOBAL_Q_VALUE', 'SCORE_PROTEIN_GLOBAL_SCORE'] Precursor data sample: - DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FILENAME GENE_DECOY GENE_ID GENE_NAME IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE UNMODIFIED_SEQUENCE -0 -85.0733 NaN 1923.17 483971408708572459 192394.8869 935372.0 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1898.67 GIGDWSDSK(UniMod:259) 7.0277 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -1 -55.2126 NaN 1953.03 6854889104354289238 5696.7273 45882.6 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1946.46 GIGDWSDSK(UniMod:259) 7.8936 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -2 9.7944 NaN 2018.03 2696300170322160855 17401.1825 95751.8 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 2001.09 GIGDWSDSK(UniMod:259) 9.7785 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -3 -130.8641 NaN 1877.37 8207933629855485114 6239.5198 48788.5 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1857.70 GIGDWSDSK(UniMod:259) 5.7000 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK -4 -268.6805 NaN 1739.56 745237666153652118 6493.7774 66798.4 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN 522 1717.73 GIGDWSDSK(UniMod:259) 1.7038 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK + DELTA_RT EXP_IM EXP_RT FEATURE_ID FEATURE_MS1_APEX_INTENSITY FEATURE_MS1_AREA_INTENSITY FEATURE_MS1_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS1_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS1_VAR_MASSDEV_SCORE FEATURE_MS1_VAR_XCORR_COELUTION FEATURE_MS1_VAR_XCORR_SHAPE FEATURE_MS2_APEX_INTENSITY FEATURE_MS2_AREA_INTENSITY FEATURE_MS2_VAR_BSERIES_SCORE FEATURE_MS2_VAR_DOTPROD_SCORE FEATURE_MS2_VAR_ELUTION_MODEL_FIT_SCORE FEATURE_MS2_VAR_INTENSITY_SCORE FEATURE_MS2_VAR_ISOTOPE_CORRELATION_SCORE FEATURE_MS2_VAR_ISOTOPE_OVERLAP_SCORE FEATURE_MS2_VAR_LIBRARY_CORR FEATURE_MS2_VAR_LIBRARY_DOTPROD FEATURE_MS2_VAR_LIBRARY_MANHATTAN FEATURE_MS2_VAR_LIBRARY_RMSD FEATURE_MS2_VAR_LIBRARY_ROOTMEANSQUARE FEATURE_MS2_VAR_LIBRARY_SANGLE FEATURE_MS2_VAR_LOG_SN_SCORE FEATURE_MS2_VAR_MANHATTAN_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE FEATURE_MS2_VAR_MASSDEV_SCORE_WEIGHTED FEATURE_MS2_VAR_NORM_RT_SCORE FEATURE_MS2_VAR_SONAR_LAG FEATURE_MS2_VAR_SONAR_LOG_DIFF FEATURE_MS2_VAR_SONAR_LOG_SN FEATURE_MS2_VAR_SONAR_LOG_TREND FEATURE_MS2_VAR_SONAR_RSQ FEATURE_MS2_VAR_SONAR_SHAPE FEATURE_MS2_VAR_XCORR_COELUTION FEATURE_MS2_VAR_XCORR_COELUTION_WEIGHTED FEATURE_MS2_VAR_XCORR_SHAPE FEATURE_MS2_VAR_XCORR_SHAPE_WEIGHTED FEATURE_MS2_VAR_YSERIES_SCORE FILENAME GENE_DECOY GENE_ID GENE_NAME IM_leftWidth IM_rightWidth IPF_PEPTIDE_ID LEFT_WIDTH MODIFIED_SEQUENCE NORM_RT PEPTIDE_DECOY PEPTIDE_ID PRECURSOR_CHARGE PRECURSOR_DECOY PRECURSOR_GROUP_LABEL PRECURSOR_ID PRECURSOR_LIBRARY_DRIFT_TIME PRECURSOR_LIBRARY_INTENSITY PRECURSOR_LIBRARY_RT PRECURSOR_MZ PRECURSOR_TRAML_ID PROTEIN_ACCESSION PROTEIN_DECOY PROTEIN_ID RIGHT_WIDTH RUN_ID SCORE_MS2_PEAK_GROUP_RANK SCORE_MS2_PEP SCORE_MS2_P_VALUE SCORE_MS2_Q_VALUE SCORE_MS2_SCORE SCORE_PEPTIDE_GLOBAL_PEP SCORE_PEPTIDE_GLOBAL_P_VALUE SCORE_PEPTIDE_GLOBAL_Q_VALUE SCORE_PEPTIDE_GLOBAL_SCORE SCORE_PROTEIN_GLOBAL_PEP SCORE_PROTEIN_GLOBAL_P_VALUE SCORE_PROTEIN_GLOBAL_Q_VALUE SCORE_PROTEIN_GLOBAL_SCORE UNMODIFIED_SEQUENCE +0 -85.0733 NaN 1923.17 483971408708572459 192394.8869 935372.0 0.9919 0.1179 3.1724 0.0000 0.9713 61269.0 321681.0 5.0 0.7829 NaN 0.3415 0.9953 0.0000 0.9944 0.9786 0.2234 0.0893 0.1044 0.2119 3.7524 0.7034 2.8770 1.0938 0.0237 NaN NaN NaN NaN NaN NaN 0.0000 0.0000 0.9660 0.9924 6.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1898.67 GIGDWSDSK(UniMod:259) 7.0277 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1946.4600 -8670811102654834151 1 0.0031 0.0029 0.0033 4.6997 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +1 -55.2126 NaN 1953.03 6854889104354289238 5696.7273 45882.6 0.2886 1.6923 20.0342 5.1458 0.5950 7999.0 45147.0 3.0 0.5879 NaN 0.0479 0.6040 0.6485 0.9860 0.9809 0.2040 0.0854 0.1024 0.2154 1.8184 0.8426 7.5457 10.7048 0.0151 NaN NaN NaN NaN NaN NaN 3.4670 1.4506 0.6956 0.8139 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1946.46 GIGDWSDSK(UniMod:259) 7.8936 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1977.1899 -8670811102654834151 4 1.0000 0.4692 0.4692 -1.7930 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +2 9.7944 NaN 2018.03 2696300170322160855 17401.1825 95751.8 0.9301 0.5084 13.5151 0.7500 0.6966 2243.0 13809.0 4.0 0.5880 NaN 0.0147 -0.0614 0.1439 -0.4161 0.7216 0.6824 0.3240 0.4081 1.2232 0.0221 0.9147 2.4447 2.0283 0.0038 NaN NaN NaN NaN NaN NaN 1.3498 0.4384 0.8111 0.8839 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 2001.09 GIGDWSDSK(UniMod:259) 9.7785 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 2024.9800 -8670811102654834151 2 1.0000 0.4692 0.4692 -0.3786 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +3 -130.8641 NaN 1877.37 8207933629855485114 6239.5198 48788.5 -0.5293 1.7491 3.3984 3.2500 0.5946 3336.0 36324.0 3.0 0.4316 NaN 0.0386 0.1794 0.2909 -0.3937 0.8019 0.6135 0.2909 0.3399 1.0151 0.6018 1.1139 5.2642 1.9825 0.0370 NaN NaN NaN NaN NaN NaN 2.2472 0.8549 0.7655 0.8558 4.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1857.70 GIGDWSDSK(UniMod:259) 5.7000 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1898.6700 -8670811102654834151 3 1.0000 0.4692 0.4692 -1.5525 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK +4 -268.6805 NaN 1739.56 745237666153652118 6493.7774 66798.4 -0.4011 0.7500 16.0151 4.5817 0.6404 7539.0 53232.0 3.0 0.5633 NaN 0.0565 0.3089 0.5266 -0.7130 0.6301 0.8349 0.3552 0.4363 1.3366 1.4128 0.9671 6.3286 10.9637 0.0770 NaN NaN NaN NaN NaN NaN 2.8670 1.1513 0.7267 0.8314 3.0 napedro_L120420_010_SW.mzXML.gz NaN NaN NaN NaN NaN 522 1717.73 GIGDWSDSK(UniMod:259) 1.7038 0 523 2 0 AQUA4SWATH_HMLangeF_GIGDWSDSK(UniMod:259)/2 119 NaN NaN 9.4 486.7293 AQUA4SWATH_HMLangeF_GIGDWSDSK(Label:13C(6)15N(... AQUA4SWATH_HMLangeF NaN 5 1762.1100 -8670811102654834151 5 1.0000 0.4692 0.4692 -3.2559 NaN NaN NaN NaN NaN NaN NaN NaN GIGDWSDSK diff --git a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out index fe59caec..5ed4c2d2 100644 --- a/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out +++ b/tests/_regtest_outputs/test_pyprophet_export.test_parquet_export_with_ipf.out @@ -1,4 +1,4 @@ -Exported 97965 rows with 103 columns +Exported 97964 rows with 105 columns SCORE_IPF columns found: ['SCORE_IPF_PEP', 'SCORE_IPF_PRECURSOR_PEAKGROUP_PEP', 'SCORE_IPF_QVALUE'] Sample data with IPF scores: FEATURE_ID SCORE_IPF_PEP SCORE_IPF_PRECURSOR_PEAKGROUP_PEP SCORE_IPF_QVALUE From ef86895b056c9197cdc2cf14d80f444d4a3b2bea Mon Sep 17 00:00:00 2001 From: singjc Date: Sun, 16 Nov 2025 14:38:23 -0500 Subject: [PATCH 27/30] refactor(parquet): improve alignment group ID assignment and update score column references --- pyprophet/io/export/parquet.py | 68 +++++++++++++++++--------- pyprophet/io/export/split_parquet.py | 71 ++++++++++++++++++---------- 2 files changed, 92 insertions(+), 47 deletions(-) diff --git a/pyprophet/io/export/parquet.py b/pyprophet/io/export/parquet.py index 7d3b48c4..23d97514 100644 --- a/pyprophet/io/export/parquet.py +++ b/pyprophet/io/export/parquet.py @@ -1,3 +1,4 @@ +import os import duckdb import pandas as pd from loguru import logger @@ -397,31 +398,42 @@ def _read_standard_data(self, con) -> pd.DataFrame: # Assign alignment_group_id to reference features # Create a mapping from reference feature IDs to their alignment_group_ids - if "alignment_reference_feature_id" in data.columns and "alignment_group_id" in data.columns: + if ( + "alignment_reference_feature_id" in data.columns + and "alignment_group_id" in data.columns + ): # Get all reference feature IDs and their corresponding alignment_group_ids - ref_mapping = data[ - data["alignment_reference_feature_id"].notna() - ][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() - + ref_mapping = data[data["alignment_reference_feature_id"].notna()][ + ["alignment_reference_feature_id", "alignment_group_id"] + ].drop_duplicates() + # For each reference feature ID, we need to assign the alignment_group_id # to the feature row where id == alignment_reference_feature_id if not ref_mapping.empty: # Merge the alignment_group_id for reference features # First create a DataFrame mapping id -> alignment_group_id for references ref_group_mapping = ref_mapping.rename( - columns={"alignment_reference_feature_id": "id", "alignment_group_id": "ref_alignment_group_id"} + columns={ + "alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id", + } ) - + # Merge this mapping to assign alignment_group_id to reference features data = pd.merge(data, ref_group_mapping, on="id", how="left") - + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) - mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() - data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] - + mask = ( + data["alignment_group_id"].isna() + & data["ref_alignment_group_id"].notna() + ) + data.loc[mask, "alignment_group_id"] = data.loc[ + mask, "ref_alignment_group_id" + ] + # Drop the temporary column data = data.drop(columns=["ref_alignment_group_id"]) - + logger.debug( f"Assigned alignment_group_id to {mask.sum()} reference features" ) @@ -729,7 +741,6 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: Returns: DataFrame with aligned feature IDs that pass quality threshold """ - import os # Check for alignment file - it should be named with _feature_alignment.parquet suffix alignment_file = None @@ -763,13 +774,22 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: # Check if we have alignment scores (PEP/QVALUE) in the file # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features has_alignment_scores = ( - "PEP" in alignment_df.columns or "QVALUE" in alignment_df.columns + "SCORE_ALIGNMENT_PEP" in alignment_df.columns + or "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns ) if has_alignment_scores: # Filter by alignment PEP threshold - pep_col = "PEP" if "PEP" in alignment_df.columns else None - qvalue_col = "QVALUE" if "QVALUE" in alignment_df.columns else None + pep_col = ( + "SCORE_ALIGNMENT_PEP" + if "SCORE_ALIGNMENT_PEP" in alignment_df.columns + else None + ) + qvalue_col = ( + "SCORE_ALIGNMENT_Q_VALUE" + if "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns + else None + ) if pep_col: filtered_df = alignment_df[ @@ -809,8 +829,8 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: fa.RUN_ID, CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, fa.REFERENCE_RT, - fa.PEP, - fa.QVALUE + fa.SCORE_ALIGNMENT_PEP, + fa.SCORE_ALIGNMENT_Q_VALUE FROM filtered_alignment fa INNER JOIN data d ON d.FEATURE_ID = fa.REFERENCE_FEATURE_ID WHERE d.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} @@ -841,10 +861,14 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: ].values # Add alignment scores if available - if "PEP" in filtered_df.columns: - result["alignment_pep"] = filtered_df["PEP"].values - if "QVALUE" in filtered_df.columns: - result["alignment_qvalue"] = filtered_df["QVALUE"].values + if "SCORE_ALIGNMENT_PEP" in filtered_df.columns: + result["alignment_pep"] = filtered_df[ + "SCORE_ALIGNMENT_PEP" + ].values + if "SCORE_ALIGNMENT_Q_VALUE" in filtered_df.columns: + result["alignment_qvalue"] = filtered_df[ + "SCORE_ALIGNMENT_Q_VALUE" + ].values # Convert alignment_group_id to int64 if "alignment_group_id" in result.columns: diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index eaaee5a6..8514441d 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -512,31 +512,42 @@ def _read_standard_data(self, con) -> pd.DataFrame: # Assign alignment_group_id to reference features # Create a mapping from reference feature IDs to their alignment_group_ids - if "alignment_reference_feature_id" in data.columns and "alignment_group_id" in data.columns: + if ( + "alignment_reference_feature_id" in data.columns + and "alignment_group_id" in data.columns + ): # Get all reference feature IDs and their corresponding alignment_group_ids - ref_mapping = data[ - data["alignment_reference_feature_id"].notna() - ][["alignment_reference_feature_id", "alignment_group_id"]].drop_duplicates() - + ref_mapping = data[data["alignment_reference_feature_id"].notna()][ + ["alignment_reference_feature_id", "alignment_group_id"] + ].drop_duplicates() + # For each reference feature ID, we need to assign the alignment_group_id # to the feature row where id == alignment_reference_feature_id if not ref_mapping.empty: # Merge the alignment_group_id for reference features # First create a DataFrame mapping id -> alignment_group_id for references ref_group_mapping = ref_mapping.rename( - columns={"alignment_reference_feature_id": "id", "alignment_group_id": "ref_alignment_group_id"} + columns={ + "alignment_reference_feature_id": "id", + "alignment_group_id": "ref_alignment_group_id", + } ) - + # Merge this mapping to assign alignment_group_id to reference features data = pd.merge(data, ref_group_mapping, on="id", how="left") - + # Fill in alignment_group_id for reference features (where it's currently null but ref_alignment_group_id is not) - mask = data["alignment_group_id"].isna() & data["ref_alignment_group_id"].notna() - data.loc[mask, "alignment_group_id"] = data.loc[mask, "ref_alignment_group_id"] - + mask = ( + data["alignment_group_id"].isna() + & data["ref_alignment_group_id"].notna() + ) + data.loc[mask, "alignment_group_id"] = data.loc[ + mask, "ref_alignment_group_id" + ] + # Drop the temporary column data = data.drop(columns=["ref_alignment_group_id"]) - + logger.debug( f"Assigned alignment_group_id to {mask.sum()} reference features" ) @@ -825,7 +836,6 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: Returns: DataFrame with aligned feature IDs that pass quality threshold """ - import os # For split parquet, alignment file is at parent directory level alignment_file = os.path.join(self.infile, "feature_alignment.parquet") @@ -851,18 +861,25 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: "DECOY" in alignment_df.columns and "VAR_XCORR_SHAPE" in alignment_df.columns ): - # This looks like the feature_alignment table structure - # Check if we have alignment scores (PEP/QVALUE) in the file # If not, we'll need to rely on the base MS2 scores and just use alignment to identify features has_alignment_scores = ( - "PEP" in alignment_df.columns or "QVALUE" in alignment_df.columns + "SCORE_ALIGNMENT_PEP" in alignment_df.columns + or "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns ) if has_alignment_scores: # Filter by alignment PEP threshold - pep_col = "PEP" if "PEP" in alignment_df.columns else None - qvalue_col = "QVALUE" if "QVALUE" in alignment_df.columns else None + pep_col = ( + "SCORE_ALIGNMENT_PEP" + if "SCORE_ALIGNMENT_PEP" in alignment_df.columns + else None + ) + qvalue_col = ( + "SCORE_ALIGNMENT_Q_VALUE" + if "SCORE_ALIGNMENT_Q_VALUE" in alignment_df.columns + else None + ) if pep_col: filtered_df = alignment_df[ @@ -872,7 +889,7 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: & (alignment_df[pep_col] < max_alignment_pep) ].copy() else: - # Use QVALUE if PEP not available (less ideal but workable) + # Use QVALUE if SCORE_ALIGNMENT_PEP not available (less ideal but workable) filtered_df = alignment_df[ (alignment_df["DECOY"] == 1) & (alignment_df[qvalue_col] < max_alignment_pep) @@ -902,8 +919,8 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: fa.RUN_ID, CAST(fa.REFERENCE_FEATURE_ID AS BIGINT) AS REFERENCE_FEATURE_ID, fa.REFERENCE_RT, - fa.PEP, - fa.QVALUE + fa.SCORE_ALIGNMENT_PEP, + fa.SCORE_ALIGNMENT_Q_VALUE FROM filtered_alignment fa INNER JOIN precursors p ON p.FEATURE_ID = fa.REFERENCE_FEATURE_ID WHERE p.SCORE_MS2_Q_VALUE < {max_rs_peakgroup_qvalue} @@ -934,10 +951,14 @@ def _fetch_alignment_features(self, con) -> pd.DataFrame: ].values # Add alignment scores if available - if "PEP" in filtered_df.columns: - result["alignment_pep"] = filtered_df["PEP"].values - if "QVALUE" in filtered_df.columns: - result["alignment_qvalue"] = filtered_df["QVALUE"].values + if "SCORE_ALIGNMENT_PEP" in filtered_df.columns: + result["alignment_pep"] = filtered_df[ + "SCORE_ALIGNMENT_PEP" + ].values + if "SCORE_ALIGNMENT_Q_VALUE" in filtered_df.columns: + result["alignment_qvalue"] = filtered_df[ + "SCORE_ALIGNMENT_Q_VALUE" + ].values # Convert alignment_group_id to int64 if "alignment_group_id" in result.columns: From bf96ddb9b6ce22c351c2a6b1eec50b640b1e3be5 Mon Sep 17 00:00:00 2001 From: singjc Date: Tue, 18 Nov 2025 01:41:15 -0500 Subject: [PATCH 28/30] fix(osw): remove debug print statement for temporary table query in OSWWriter --- pyprophet/io/export/osw.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyprophet/io/export/osw.py b/pyprophet/io/export/osw.py index c5392d90..4dc22568 100644 --- a/pyprophet/io/export/osw.py +++ b/pyprophet/io/export/osw.py @@ -2324,7 +2324,6 @@ def _create_temp_table(self, conn, column_info: dict) -> None: {score_cols_types_sql} ); """ - print(create_temp_table_query) conn.execute(create_temp_table_query) From 048686b571cee7ac1dd71d4a19d822236800fdf7 Mon Sep 17 00:00:00 2001 From: singjc Date: Tue, 18 Nov 2025 01:41:24 -0500 Subject: [PATCH 29/30] feat(export): add support for exporting minimal scored-report columns from split Parquet files --- pyprophet/io/export/split_parquet.py | 49 ++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/pyprophet/io/export/split_parquet.py b/pyprophet/io/export/split_parquet.py index 8514441d..0aebef66 100644 --- a/pyprophet/io/export/split_parquet.py +++ b/pyprophet/io/export/split_parquet.py @@ -71,6 +71,9 @@ def read(self) -> pd.DataFrame: try: self._init_duckdb_views(con) + if self.config.context == "export_scored_report": + return self._read_for_export_scored_report(con) + if self.config.export_format == "library": if self._is_unscored_file(): descr = "Files must be scored for library generation." @@ -999,6 +1002,52 @@ def _build_feature_vars_sql(self) -> str: return ", " + ", ".join(feature_vars) if feature_vars else "" + ################################## + # Export-specific readers below + ################################## + + def _read_for_export_scored_report(self, con) -> pd.DataFrame: + """ + Lightweight reader that returns the minimal scored-report columns from split Parquet files. + """ + select_cols = [ + "RUN_ID", + "PROTEIN_ID", + "PEPTIDE_ID", + "PRECURSOR_ID", + "PRECURSOR_DECOY", + "FEATURE_MS2_AREA_INTENSITY", + "SCORE_MS2_SCORE", + "SCORE_MS2_PEAK_GROUP_RANK", + "SCORE_MS2_Q_VALUE", + "SCORE_PEPTIDE_GLOBAL_SCORE", + "SCORE_PEPTIDE_GLOBAL_Q_VALUE", + "SCORE_PEPTIDE_EXPERIMENT_WIDE_SCORE", + "SCORE_PEPTIDE_EXPERIMENT_WIDE_Q_VALUE", + "SCORE_PEPTIDE_RUN_SPECIFIC_SCORE", + "SCORE_PEPTIDE_RUN_SPECIFIC_Q_VALUE", + "SCORE_PROTEIN_GLOBAL_SCORE", + "SCORE_PROTEIN_GLOBAL_Q_VALUE", + "SCORE_PROTEIN_EXPERIMENT_WIDE_SCORE", + "SCORE_PROTEIN_EXPERIMENT_WIDE_Q_VALUE", + "SCORE_IPF_QVALUE", + ] + + # Filter select cols based on available columns in the precursor files + select_cols = [col for col in select_cols if col in self._columns] + + # Build query to select only the needed columns from precursors view + cols_str = ", ".join([f"p.{col}" for col in select_cols]) + + query = f""" + SELECT {cols_str} + FROM precursors p + WHERE p.PROTEIN_ID IS NOT NULL + """ + + df = con.execute(query).fetchdf() + return df + def export_feature_scores(self, outfile: str, plot_callback): """ Export feature scores from split Parquet directory for plotting. From e5510b6d3f6288e06e6830b3d50f2af67a912c3b Mon Sep 17 00:00:00 2001 From: singjc Date: Tue, 18 Nov 2025 02:05:05 -0500 Subject: [PATCH 30/30] fix(report): ensure axes are consistently indexed in plot_score_distributions function --- pyprophet/report.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pyprophet/report.py b/pyprophet/report.py index 681e75a0..b7f02201 100644 --- a/pyprophet/report.py +++ b/pyprophet/report.py @@ -918,14 +918,19 @@ def plot_score_distributions(pdf, plotter, df, score_mapping): n_rows = (n_scores + n_cols - 1) // n_cols # Calculate needed rows fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5 * n_rows)) - fig.suptitle("Score Distributions", y=1.02, fontsize=14) - plt.subplots_adjust(hspace=0.4, wspace=0.3) - # Flatten axes array for easy iteration - if n_scores > 1: + # Ensure axes is always a flat array for consistent indexing + # plt.subplots returns different types based on grid size: + # - Single subplot (1,1): returns single Axes object + # - Single row/col (1,n) or (n,1): returns 1D array + # - Grid (m,n): returns 2D array + if not isinstance(axes, np.ndarray): + axes = np.array([axes]) + elif axes.ndim > 1: axes = axes.flatten() - else: - axes = [axes] # Make it iterable even for single plot + + fig.suptitle("Score Distributions", y=1.02, fontsize=14) + plt.subplots_adjust(hspace=0.4, wspace=0.3) for i, (base_key, base_dict) in enumerate(score_mapping.items()): score_col = base_dict["score"]