From e9d2f3a83f6291b650b2929f4b94ac045e961aac Mon Sep 17 00:00:00 2001 From: nicolai Date: Wed, 31 Dec 2025 22:43:22 +0100 Subject: [PATCH] #161 fix: Standardize deconvolution model outputs --- biolearn/model.py | 8 ++++--- .../DeconvoluteBlood450K.csv | 18 ++++++++------ .../DeconvoluteBloodEPIC.csv | 18 ++++++++------ .../TwelveCellDeconvoluteBloodEPIC.csv | 24 +++++++++---------- .../plot_blood_deconvolution.py | 14 +++++------ 5 files changed, 45 insertions(+), 37 deletions(-) diff --git a/biolearn/model.py b/biolearn/model.py index 2f88893..f2a4d90 100644 --- a/biolearn/model.py +++ b/biolearn/model.py @@ -1135,10 +1135,12 @@ def solve_qp(meth_vector, deconv_reference): ) # convert cell proportion ndarray to dataframe - cell_prop_df = pd.DataFrame(cell_prop, columns=sample_names) - cell_prop_df.index = cell_types + cell_prop_df = pd.DataFrame( + cell_prop, columns=sample_names, index=cell_types + ) - return cell_prop_df + # Return samples as rows to match other model outputs + return cell_prop_df.T # returns required methylation sites def methylation_sites(self): diff --git a/biolearn/test/data/expected_model_outputs/DeconvoluteBlood450K.csv b/biolearn/test/data/expected_model_outputs/DeconvoluteBlood450K.csv index 97ee6a3..bd26943 100644 --- a/biolearn/test/data/expected_model_outputs/DeconvoluteBlood450K.csv +++ b/biolearn/test/data/expected_model_outputs/DeconvoluteBlood450K.csv @@ -1,7 +1,11 @@ -,GSM1009660,GSM1009661,GSM1009662,GSM1009663,GSM1009664,GSM1009665,GSM1009666,GSM1009667,GSM1009668,GSM1009669 -neutrophil,0.6349404196111952,0.6508816892019127,0.8734479313597019,0.5345835794981797,0.6846656996989239,0.9193866560113504,0.733484595214932,0.6894988085955095,0.5809550997357867,0.6429969122436895 -nk_cell,0.0043060663676117205,1.5908792613140287e-08,1.376417166071119e-09,7.754155428229958e-11,0.009291560768646049,5.403848495993537e-10,0.07483585127259323,0.06039050699878193,0.05045283893060284,0.003917385235080766 -b_cell,0.05237890267247137,0.016338804516337777,0.01165072429580731,0.018664085780459073,0.059201905690304917,0.009118742747831756,0.03334218801587912,0.024823385633772545,0.04045901077253031,0.02100463554900024 -cd4_t_cell,0.14534499800700326,0.215989817971748,0.07994504673855138,0.25103291174557724,0.1402233074156675,0.018528288644538933,0.09653316635721651,0.05965615549297141,0.19020688708127698,0.12647693664158982 -cd8_t_cell,0.0988151472291763,0.08928728141551068,1.2319302626736653e-09,0.19571942281133378,0.07189483091106882,5.110461002149227e-10,0.011743312572393775,0.04319271563047626,0.09179476947540229,0.13361702228133973 -monocyte,0.06421446611249824,0.027502390985500117,0.034956294997574223,8.749470859538934e-11,0.03472269551531783,0.05296631154572634,0.05006088656696152,0.12243842764841756,0.046131394004318586,0.0719871080492142 +,neutrophil,nk_cell,b_cell,cd4_t_cell,cd8_t_cell,monocyte +GSM1009660,0.6349404196111952,0.0043060663676117205,0.05237890267247137,0.14534499800700326,0.0988151472291763,0.06421446611249824 +GSM1009661,0.6508816892019127,1.5908792613140287e-08,0.016338804516337777,0.215989817971748,0.08928728141551068,0.027502390985500117 +GSM1009662,0.8734479313597019,1.376417166071119e-09,0.01165072429580731,0.07994504673855138,1.2319302626736653e-09,0.034956294997574223 +GSM1009663,0.5345835794981797,7.754155428229958e-11,0.018664085780459073,0.25103291174557724,0.19571942281133378,8.749470859538934e-11 +GSM1009664,0.6846656996989239,0.009291560768646049,0.059201905690304917,0.1402233074156675,0.07189483091106882,0.03472269551531783 +GSM1009665,0.9193866560113504,5.403848495993537e-10,0.009118742747831756,0.018528288644538933,5.110461002149227e-10,0.05296631154572634 +GSM1009666,0.733484595214932,0.07483585127259323,0.03334218801587912,0.09653316635721651,0.011743312572393775,0.05006088656696152 +GSM1009667,0.6894988085955095,0.06039050699878193,0.024823385633772545,0.05965615549297141,0.04319271563047626,0.12243842764841756 +GSM1009668,0.5809550997357867,0.05045283893060284,0.04045901077253031,0.19020688708127698,0.09179476947540229,0.046131394004318586 +GSM1009669,0.6429969122436895,0.003917385235080766,0.02100463554900024,0.12647693664158982,0.13361702228133973,0.0719871080492142 diff --git a/biolearn/test/data/expected_model_outputs/DeconvoluteBloodEPIC.csv b/biolearn/test/data/expected_model_outputs/DeconvoluteBloodEPIC.csv index 3e49e4a..aa4b99d 100644 --- a/biolearn/test/data/expected_model_outputs/DeconvoluteBloodEPIC.csv +++ b/biolearn/test/data/expected_model_outputs/DeconvoluteBloodEPIC.csv @@ -1,7 +1,11 @@ -,GSM1009660,GSM1009661,GSM1009662,GSM1009663,GSM1009664,GSM1009665,GSM1009666,GSM1009667,GSM1009668,GSM1009669 -neutrophil,0.5995409772045025,0.6226525091230747,0.84252152303835,0.4979730385676798,0.6685986653996869,0.8789151431899137,0.7059700794136665,0.6550150275574722,0.5380434826449068,0.6049417858298329 -nk_cell,1.0980721922114526e-09,2.0552146772293756e-10,5.485742298867206e-10,3.456256164557197e-10,0.0026859826821248223,7.581285712022034e-10,0.031300882743745304,0.03022215948701802,0.005183397768582708,1.5519440138901376e-09 -b_cell,0.04555911629867091,0.016638439485816336,0.03169050456378148,0.00890659548648567,0.044730541412660904,0.010146566659514586,0.05263606974198976,0.027938290543815577,0.05506125619178801,0.029034010316705914 -cd4_t_cell,0.18361716881478707,0.24175092741471843,0.09480040032243682,0.28337842963307897,0.16979838959693808,0.054545939928084014,0.14543751916260883,0.10558674250545809,0.23386874052499473,0.16017260112055023 -cd8_t_cell,0.10743680193390208,0.06697124570955697,2.172196171394358e-09,0.20331008030886052,0.07601564285732162,1.624884308075234e-09,0.01604956453849704,0.04973566712734309,0.08625382767774786,0.11468998951209286 -monocyte,0.06384593465066371,0.051986878062945496,0.030987569357083605,0.006431855659272403,0.03817077805121787,0.05639234784089877,0.04860588439939173,0.13150211277888932,0.08158929519195578,0.0911616116700725 +,neutrophil,nk_cell,b_cell,cd4_t_cell,cd8_t_cell,monocyte +GSM1009660,0.5995409772045025,1.0980721922114526e-09,0.04555911629867091,0.18361716881478707,0.10743680193390208,0.06384593465066371 +GSM1009661,0.6226525091230747,2.0552146772293756e-10,0.016638439485816336,0.24175092741471843,0.06697124570955697,0.051986878062945496 +GSM1009662,0.84252152303835,5.485742298867206e-10,0.03169050456378148,0.09480040032243682,2.172196171394358e-09,0.030987569357083605 +GSM1009663,0.4979730385676798,3.456256164557197e-10,0.00890659548648567,0.28337842963307897,0.20331008030886052,0.006431855659272403 +GSM1009664,0.6685986653996869,0.0026859826821248223,0.044730541412660904,0.16979838959693808,0.07601564285732162,0.03817077805121787 +GSM1009665,0.8789151431899137,7.581285712022034e-10,0.010146566659514586,0.054545939928084014,1.624884308075234e-09,0.05639234784089877 +GSM1009666,0.7059700794136665,0.031300882743745304,0.05263606974198976,0.14543751916260883,0.01604956453849704,0.04860588439939173 +GSM1009667,0.6550150275574722,0.03022215948701802,0.027938290543815577,0.10558674250545809,0.04973566712734309,0.13150211277888932 +GSM1009668,0.5380434826449068,0.005183397768582708,0.05506125619178801,0.23386874052499473,0.08625382767774786,0.08158929519195578 +GSM1009669,0.6049417858298329,1.5519440138901376e-09,0.029034010316705914,0.16017260112055023,0.11468998951209286,0.0911616116700725 diff --git a/biolearn/test/data/expected_model_outputs/TwelveCellDeconvoluteBloodEPIC.csv b/biolearn/test/data/expected_model_outputs/TwelveCellDeconvoluteBloodEPIC.csv index 36f000a..e2c98bb 100644 --- a/biolearn/test/data/expected_model_outputs/TwelveCellDeconvoluteBloodEPIC.csv +++ b/biolearn/test/data/expected_model_outputs/TwelveCellDeconvoluteBloodEPIC.csv @@ -1,13 +1,11 @@ -,GSM1009660,GSM1009661,GSM1009662,GSM1009663,GSM1009664,GSM1009665,GSM1009666,GSM1009667,GSM1009668,GSM1009669 -Bas,3.2789682648917985e-09,8.705271138470831e-10,1.9764865708401622e-10,1.2339398270409715e-09,1.3469214334012805e-09,1.1955161663871156e-10,2.0190181285821964e-10,1.2669198260212735e-09,4.198455030547216e-10,9.860027892890191e-11 -Bmem,1.7297026927528448e-09,1.7835119848085344e-10,9.079629255005708e-11,4.1845289443425174e-10,1.4306342047149068e-10,6.828853760065042e-11,1.5121316942514734e-11,4.004824713229142e-10,1.5068054649565586e-10,4.017987649404789e-11 -Bnv,6.71013398244451e-10,2.203890733917914e-10,9.492106427131602e-11,5.298147710889297e-10,9.06329762708373e-11,6.835067294547944e-11,1.9462116903382933e-11,4.68452253495693e-10,1.6654653546331108e-10,5.7885651871317804e-11 -CD4mem,0.1709531840532782,0.06119063789142382,0.04605351829155211,0.1400032019573191,0.11013551711518016,0.05821095398464105,0.046830242274542695,0.024237430716575407,0.0973257567618453,0.069859579964118 -CD4nv,5.7968170714523114e-09,0.1842720466800555,0.03396270203235185,0.136375886881937,0.06424154929018575,0.00929647848650827,0.081463400291921,0.06971274402341605,0.10783175192756948,0.048335663071975586 -CD8mem,0.07573158302472116,0.0007243664623098055,0.022419840346393473,0.03183665575961475,0.035932239646732644,0.0024204301236150183,0.012735268103428019,0.012245049401668038,0.035570624351121564,0.03058386846468522 -CD8nv,0.056074525176947526,0.08689129854815413,0.01904061947863435,0.21640116944024657,0.07248643021343407,0.014246603161849402,0.04273230157273525,0.05474966616713472,0.11986229248975262,0.16675707835856768 -Eos,0.02952121924561713,0.009581151237589687,0.010465971332375845,0.03681369472522632,0.02444601808704777,0.03221106232216474,0.04755377462963703,0.04499894888359418,0.038072161076901326,0.035703565310730996 -Mono,0.06379278881735143,0.041946563775761794,0.04087815804942192,8.320950300855526e-10,0.03987649314014124,0.08007170904083212,0.02487711080111019,0.12995395713345678,0.05448455503271336,0.0740309346874228 -NK,0.03006273583257728,0.0037003321233253983,0.015986759096612776,2.006733247843768e-09,0.022355520488204335,1.0996397436014186e-08,0.07861859145042013,0.04844733796468107,0.04443307276967812,0.02574052783766326 -Neu,0.5738639492692444,0.6116935979550414,0.7929139821396891,0.4385693840013352,0.6305262280300464,0.8035427510303458,0.654124472023623,0.6156548530628706,0.5024197843427861,0.5489887743651668 -Treg,3.10114803834215e-09,4.057334501110015e-09,0.01827844884876494,2.219508752801648e-09,2.4085436107949175e-09,5.969378088822599e-10,0.011064838617406796,1.0512307283587367e-08,5.074351894079943e-10,7.742681405324148e-09 +,Bas,Bmem,Bnv,CD4mem,CD4nv,CD8mem,CD8nv,Eos,Mono,NK,Neu,Treg +GSM1009660,3.2789682648917985e-09,1.7297026927528448e-09,6.71013398244451e-10,0.1709531840532782,5.7968170714523114e-09,0.07573158302472116,0.056074525176947526,0.02952121924561713,0.06379278881735143,0.03006273583257728,0.5738639492692444,3.10114803834215e-09 +GSM1009661,8.705271138470831e-10,1.7835119848085344e-10,2.203890733917914e-10,0.06119063789142382,0.1842720466800555,0.0007243664623098055,0.08689129854815413,0.009581151237589687,0.041946563775761794,0.0037003321233253983,0.6116935979550414,4.057334501110015e-09 +GSM1009662,1.9764865708401622e-10,9.079629255005708e-11,9.492106427131602e-11,0.04605351829155211,0.03396270203235185,0.022419840346393473,0.01904061947863435,0.010465971332375845,0.04087815804942192,0.015986759096612776,0.7929139821396891,0.01827844884876494 +GSM1009663,1.2339398270409715e-09,4.1845289443425174e-10,5.298147710889297e-10,0.1400032019573191,0.136375886881937,0.03183665575961475,0.21640116944024657,0.03681369472522632,8.320950300855526e-10,2.006733247843768e-09,0.4385693840013352,2.219508752801648e-09 +GSM1009664,1.3469214334012805e-09,1.4306342047149068e-10,9.06329762708373e-11,0.11013551711518016,0.06424154929018575,0.035932239646732644,0.07248643021343407,0.02444601808704777,0.03987649314014124,0.022355520488204335,0.6305262280300464,2.4085436107949175e-09 +GSM1009665,1.1955161663871156e-10,6.828853760065042e-11,6.835067294547944e-11,0.05821095398464105,0.00929647848650827,0.0024204301236150183,0.014246603161849402,0.03221106232216474,0.08007170904083212,1.0996397436014186e-08,0.8035427510303458,5.969378088822599e-10 +GSM1009666,2.0190181285821964e-10,1.5121316942514734e-11,1.9462116903382933e-11,0.046830242274542695,0.081463400291921,0.012735268103428019,0.04273230157273525,0.04755377462963703,0.02487711080111019,0.07861859145042013,0.654124472023623,0.011064838617406796 +GSM1009667,1.2669198260212735e-09,4.004824713229142e-10,4.68452253495693e-10,0.024237430716575407,0.06971274402341605,0.012245049401668038,0.05474966616713472,0.04499894888359418,0.12995395713345678,0.04844733796468107,0.6156548530628706,1.0512307283587367e-08 +GSM1009668,4.198455030547216e-10,1.5068054649565586e-10,1.6654653546331108e-10,0.0973257567618453,0.10783175192756948,0.035570624351121564,0.11986229248975262,0.038072161076901326,0.05448455503271336,0.04443307276967812,0.5024197843427861,5.074351894079943e-10 +GSM1009669,9.860027892890191e-11,4.017987649404789e-11,5.7885651871317804e-11,0.069859579964118,0.048335663071975586,0.03058386846468522,0.16675707835856768,0.035703565310730996,0.0740309346874228,0.02574052783766326,0.5489887743651668,7.742681405324148e-09 diff --git a/examples/03_deconvolution/plot_blood_deconvolution.py b/examples/03_deconvolution/plot_blood_deconvolution.py index 5bd92c8..5506983 100644 --- a/examples/03_deconvolution/plot_blood_deconvolution.py +++ b/examples/03_deconvolution/plot_blood_deconvolution.py @@ -29,14 +29,14 @@ sns.set_style("ticks") plt.figure(figsize = (5, 5)) -sns.scatterplot(x = epic_facs_salas_18.metadata.neutrophils_proportion.astype(float), y = epic_facs_cell_prop.loc["neutrophil"]) -sns.scatterplot(x = epic_facs_salas_18.metadata.monocytes_proportion.astype(float), y = epic_facs_cell_prop.loc["monocyte"]) -sns.scatterplot(x = epic_facs_salas_18.metadata.nk_proportion.astype(float), y = epic_facs_cell_prop.loc["nk_cell"]) -sns.scatterplot(x = epic_facs_salas_18.metadata.bcell_proportion.astype(float), y = epic_facs_cell_prop.loc["b_cell"]) -sns.scatterplot(x = epic_facs_salas_18.metadata.cd4t_proportion.astype(float), y = epic_facs_cell_prop.loc["cd4_t_cell"]) -sns.scatterplot(x = epic_facs_salas_18.metadata.cd8t_proportion.astype(float), y = epic_facs_cell_prop.loc["cd8_t_cell"]) +sns.scatterplot(x = epic_facs_salas_18.metadata.neutrophils_proportion.astype(float), y = epic_facs_cell_prop["neutrophil"]) +sns.scatterplot(x = epic_facs_salas_18.metadata.monocytes_proportion.astype(float), y = epic_facs_cell_prop["monocyte"]) +sns.scatterplot(x = epic_facs_salas_18.metadata.nk_proportion.astype(float), y = epic_facs_cell_prop["nk_cell"]) +sns.scatterplot(x = epic_facs_salas_18.metadata.bcell_proportion.astype(float), y = epic_facs_cell_prop["b_cell"]) +sns.scatterplot(x = epic_facs_salas_18.metadata.cd4t_proportion.astype(float), y = epic_facs_cell_prop["cd4_t_cell"]) +sns.scatterplot(x = epic_facs_salas_18.metadata.cd8t_proportion.astype(float), y = epic_facs_cell_prop["cd8_t_cell"]) plt.legend(["Neutrophils", "Monocytes", "NK Cells", "B Cells", "CD4 T Cells", "CD8 T Cells"]) plt.xlabel("Known cell proportion") plt.ylabel("Predicted cell proportion") -plt.title("Deconvolution Validation - FACS") \ No newline at end of file +plt.title("Deconvolution Validation - FACS")