From 3fb2fd9bb520369dec9c00099b109733a93b641c Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Sun, 5 Nov 2023 13:02:10 -0700 Subject: [PATCH 1/9] Update integrated_db_plotting.R --- scripts/integrated_db_plotting.R | 74 +++++++++++++++++--------------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/scripts/integrated_db_plotting.R b/scripts/integrated_db_plotting.R index 91f20fa..bf0bc10 100644 --- a/scripts/integrated_db_plotting.R +++ b/scripts/integrated_db_plotting.R @@ -3,64 +3,70 @@ library(readxl) library(stringr) library(tidyverse) -full_edges <- read_csv("./data/expanded_edge_list.csv") -orig_edges <- read_xlsx("./data/Resource Interaction Table.xlsx", sheet = 1) -orig_edges %>% - filter(predicate != "has construction method") -> orig_edges +setwd('./') +full_edges <- read_csv('../data/expanded_edge_list.csv') +orig_edges <- read_xlsx('../data/Resource Interaction Table.xlsx', sheet = 1) +orig_edges %>% + filter(predicate != 'has construction method') -> orig_edges fill_deg <- max(full_edges$distance) + 1 -nodes <- read_xlsx("./data/Resource Interaction Table.xlsx", sheet = 2) +nodes <- read_xlsx('../data/Resource Interaction Table.xlsx', sheet = 2) #nodes %>% -# mutate(category = str_replace(category, "\\/| ", "\n")) -> nodes +# mutate(category = str_replace(category, '\\/| ', '\n')) -> nodes -nodes %>% - filter(category == "Aggregated\nDB") %>% +nodes %>% + filter(category == 'General\nAggregate\nDB') %>% pull(node) -> reffed_idbs -full_edges %>% - pull(distance) %>% - unique() %>% +full_edges %>% + pull(distance) %>% + unique() %>% factor() -> distance_factor levels(distance_factor) <- rev(levels(distance_factor)) -category_sorted <- c("Microbe", "Protein", - "Metabolites", "Disease", - "Aggregated DB") +category_sorted <- c('Microbe', 'Protein', + 'Metabolites','Pathway','Disease', + 'General Aggregate DB') -full_edges %>% - select(source, target, distance) %>% +full_edges %>% + select(source, target, distance) %>% mutate(distance = as.numeric(distance), - distance = abs(distance - fill_deg)) %>% + distance = abs(distance - fill_deg)) %>% spread(target, distance, fill = 0) -> edge_mat -source_order <- hclust(dist(edge_mat[, -1]))$order -source_sorted <- edge_mat[source_order, 1]$source +source_order <- hclust(dist(edge_mat[,-1]))$order +source_sorted <- edge_mat[source_order,1]$source -full_edges %>% - merge(nodes, by.x = "target", by.y = "node") %>% +full_edges %>% + merge(nodes, by.x = 'target', by.y = 'node') %>% mutate(distance = factor(distance, levels = levels(distance_factor)), category = factor(category, levels = category_sorted), reffed_idbs = source %in% reffed_idbs, - source_f = factor(source, - levels = source_sorted)) -> plot_dat + source_f = factor(source, + levels = source_sorted)) -> plot_dat + +# New facet label names for category variable +category_labels <- c('Microbe'='Microbe', 'Protein'='Protein', + 'Metabolites'='Metabolite','Pathway'='PW','Disease'='Disease', + 'General Aggregate DB'='General Aggregate DB') plot_dat %>% - ggplot(aes(x = target, y = source_f, + ggplot(aes(x = target, y = source_f, fill = distance)) + - geom_tile(color = "black") + - facet_grid(~category, scales = "free", space = "free") + - theme_bw(base_size = 11) + + geom_tile(color = 'black') + + facet_grid(~category, scales = 'free', space = 'free',labeller = labeller(category = category_labels)) + + theme_bw(base_size = 11) + theme(axis.text.x = element_text(angle = 270 + 45, hjust = 0, vjust = 0.5)) + scale_fill_brewer(guide = guide_legend(reverse = TRUE)) + - labs(x = "Target DB", - y = "Source DB", - fill = "Reference Degree", - title = "Integrated Databases Links") -> db_viz_final + labs(x = 'Target DB', + y = 'Source DB', + fill = 'Reference Degree', + title = 'Aggregate Databases Links') -> db_viz_final -ggsave("./plots/db_edge_matrix_children.png", +ggsave('../db_viz_final.png', plot = db_viz_final, - width = 8, - height = 4) + width = 12, + height = 5) From 0a3fe385702cf759a45273ebf90114fe7c350c83 Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Sun, 5 Nov 2023 13:02:59 -0700 Subject: [PATCH 2/9] Update db_expansion.py --- scripts/db_expansion.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/db_expansion.py b/scripts/db_expansion.py index 5c0494c..76757b6 100644 --- a/scripts/db_expansion.py +++ b/scripts/db_expansion.py @@ -1,8 +1,7 @@ import networkx as nx import pandas as pd -myedges = pd.read_excel('./data/Resource Interaction Table.xlsx') -myedges = myedges[myedges['predicate'] != 'has construction method'] +myedges = pd.read_excel('../data/Resource Interaction Table.xlsx') G = nx.from_pandas_edgelist(myedges, create_using=nx.DiGraph()) @@ -25,5 +24,5 @@ outdf.append(tmp_df) outdf = pd.concat(outdf) -outdf.to_csv('./data/expanded_edge_list.csv', +outdf.to_csv('../data/expanded_edge_list.csv', index=False) From e1cf83e4b849033dc985194ac84fc646e8b1a669 Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Sun, 5 Nov 2023 13:03:47 -0700 Subject: [PATCH 3/9] Update integrated_db_graph.R --- scripts/integrated_db_graph.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/integrated_db_graph.R b/scripts/integrated_db_graph.R index cfef781..48cdcd1 100644 --- a/scripts/integrated_db_graph.R +++ b/scripts/integrated_db_graph.R @@ -19,7 +19,7 @@ mydat %>% mynodes %>% merge(inDegree, all = T) %>% mutate(inDegree = if_else(is.na(inDegree), 0, inDegree), - ISDB = as.character(category == 'Integrated DB')) -> mynodes + ISDB = as.character(category == 'Aggregate DB')) -> mynodes G <- graph_from_data_frame(mydat, directed = T, mynodes) From 9d98ae9b83fe5b81b29dcea9461663d6f4a7c241 Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Sun, 5 Nov 2023 13:04:28 -0700 Subject: [PATCH 4/9] Update collapse_categories.R --- scripts/collapse_categories.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/collapse_categories.R b/scripts/collapse_categories.R index 70167a0..3a65716 100644 --- a/scripts/collapse_categories.R +++ b/scripts/collapse_categories.R @@ -1,8 +1,13 @@ -myedges <- read_xlsx('./data/Resource Interaction Table.xlsx') -mynodes <- read_xlsx('./data/Resource Interaction Table.xlsx', sheet = 2) +library(readxl) +library(tidyverse) + + +setwd('./') +myedges <- read_xlsx('../data/Resource Interaction Table.xlsx') +mynodes <- read_xlsx('../data/Resource Interaction Table.xlsx', sheet = 2) mynodes %>% - filter(category == 'Integrated DB') %>% + filter(category == 'Aggregate DB') %>% pull(node) -> idbs myedges %>% @@ -13,4 +18,4 @@ myedges %>% rename(target = category) %>% arrange(desc(source), desc(target)) -> category_edges -write_tsv(category_edges, './data/category_edges.tsv') +write_tsv(category_edges, '../data/category_edges.tsv') From d07fcb811a58961aa9b6640c8b75314ed301bd0f Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:32:19 -0700 Subject: [PATCH 5/9] Update db_expansion.py --- scripts/db_expansion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/db_expansion.py b/scripts/db_expansion.py index 76757b6..02b2c25 100644 --- a/scripts/db_expansion.py +++ b/scripts/db_expansion.py @@ -14,7 +14,6 @@ for node in G.nodes(): if node not in leaves: tmp_desc = set(nx.descendants(G, node)) - #tmp_desc = leaves.intersection(tmp_desc) tmp_desc = list(tmp_desc) desc_dist = [nx.shortest_path_length(G, source=node, target=desc) for desc in tmp_desc] From 3c34cfa4195cbfb2774c155baa76c584f7424d2b Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:33:28 -0700 Subject: [PATCH 6/9] Update integrated_db_graph.R --- scripts/integrated_db_graph.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/integrated_db_graph.R b/scripts/integrated_db_graph.R index 48cdcd1..6e2ecbf 100644 --- a/scripts/integrated_db_graph.R +++ b/scripts/integrated_db_graph.R @@ -7,10 +7,6 @@ library(readxl) mydat <- read_xlsx('./data/Resource Interaction Table.xlsx') mynodes <- read_xlsx('./data/Resource Interaction Table.xlsx', sheet = 2) -mydat %>% - filter(predicate != 'has construction method') %>% - select(-predicate) -> mydat - mydat %>% group_by(target) %>% summarise(inDegree = n()) %>% From 70f1ad8e62324f7dce41b7ed93ef8a84024cea10 Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:35:36 -0700 Subject: [PATCH 7/9] Update integrated_db_plotting.R --- scripts/integrated_db_plotting.R | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/scripts/integrated_db_plotting.R b/scripts/integrated_db_plotting.R index bf0bc10..4d8a701 100644 --- a/scripts/integrated_db_plotting.R +++ b/scripts/integrated_db_plotting.R @@ -6,14 +6,10 @@ library(tidyverse) setwd('./') full_edges <- read_csv('../data/expanded_edge_list.csv') orig_edges <- read_xlsx('../data/Resource Interaction Table.xlsx', sheet = 1) -orig_edges %>% - filter(predicate != 'has construction method') -> orig_edges fill_deg <- max(full_edges$distance) + 1 nodes <- read_xlsx('../data/Resource Interaction Table.xlsx', sheet = 2) -#nodes %>% -# mutate(category = str_replace(category, '\\/| ', '\n')) -> nodes nodes %>% filter(category == 'General\nAggregate\nDB') %>% @@ -61,10 +57,10 @@ plot_dat %>% hjust = 0, vjust = 0.5)) + scale_fill_brewer(guide = guide_legend(reverse = TRUE)) + - labs(x = 'Target DB', - y = 'Source DB', + labs(x = 'Integrated Resource', + y = 'Primary Source', fill = 'Reference Degree', - title = 'Aggregate Databases Links') -> db_viz_final + title = 'Primary Source Mappings of all Integrated Resources') -> db_viz_final ggsave('../db_viz_final.png', plot = db_viz_final, From 32b0d729f5f5fc0bf97d2b15c98a29f46eb095d9 Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:42:59 -0700 Subject: [PATCH 8/9] Add files via upload --- data/Resource Interaction Table.xlsx | Bin 8724 -> 12434 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/Resource Interaction Table.xlsx b/data/Resource Interaction Table.xlsx index 6d1448e51822610597c5f93c5d5d4f9eb6c3b58f..9ceda9fb0c448ef593e30e88c9010b9a0f09c59b 100644 GIT binary patch literal 12434 zcmeHN1zTOovOTzK2qCxzm*7Ez!@=F%-Q6KrfZz_n-Q6t^++BhmJi#3TyhG;QJ2ScS ze!;u@``DZ6-nF{BPF44=?j&h}QqT&xAJAtc_Nrb1w{8YrEe5 zT%k=i^PD?|gt{={5*1htfd!hpoV9B~@Oy2_O(!Cab(G&*5-h5}0mZqRa=SpaInS9S zpp3)@_ipdf#(0~+KQ~!JKGQ6YBu7#*b^90%*|!l-bx<5yqxV^D$oX(@H6# zS<|iO`}$(??r4?5-$jzT?tO*uu^`%9zvm9)BX)K8FkM<@JIl_EMh6{(?8RX@pa0~t zMQ!J@n4Q6b>lD%`>%~Ty3(Jg=1oIlQp=O2PfUxJHyeuaYz)kFWczFaGAZFy-E=Sx>LEd zN70lv6=h2fu91mOp9)u_k29#^BD~1Q560$A@z>~+R$n!^seqUjR6HyXu4?4SI*1!j z^PNvD*hk?HeJAx}D&^&Xqk-vSh1Y-;$>lAsvZC2L^D2YS4qT+}x<*!A=fY{7FCM(; zWzq)}NLev&n8(HXDY7noHR@PThcn!JIS_hE2Tle9!ikDj!Ryq21_`jZK>rSG5iuwL z02crc;bz77N1V9YI#?Rm+FCxZVgHI52=GD%M*07JwZ)EF^fDoTI1cIvoNTv`eUuyG z@}v%GZ&-$7s>TeFwlXZ{cfU`KKVq-SOPa_fXFY#AVsiiPVv~T(d6k-~1|Pa*DQ&DC zQRiA#^_n=eo~7j)^<{*3RXGDA`}l&TjI2@Xd(vV~1-+D>z<4E1d{$$=EXkz4egQ{Z zL)yR!v=$Ndo!g9vPPVL6^N$(w=&n1sy*|i;x3<=6tlE)L9X@yyr>~I)!;s>#KGD|% znE%wlR)>Uf|D3HAx6#uNr7p~8kFQEzDA^3H_>*PCfVt7~OY|(pC{n)?A6Gt$CL@Jo zEe&#%_rYB?E$b-%m4nLD0n$Ht&WF0W^m_2rssjrEpn+Wi>^Xn+nLOoXn@uKkZ(Z}J z?laGWWXT~>6lMP$ktOYH9ikX}|*W%OTp+WiRwAOb@Xu!}LPREJ} zgle7_Ftw&FScp7G)`&ZL=83(DqE#~{f=#59 zl0XJ>eo#JWOH{Mn&J-Z+k|(=aNRoX)1(P<7?}c0}6rm=e+Y_7+#y3b~QI(_0gzNJ< zGN8_)yf?Z{HpdyyhW=Zp+JF|%kGya-^+)}nN~KToV`Z%}`7g`^#`TNmHwW}e9OMS^ z+psh~NOZ=W&vZdr_$F}cV>ucWB|sG)7W*ozuw5}t6B{^@J*rH>H*ph_z{7d;8L~Gj z?l3E*@N{pj3VIpYtmmi~tP?VbR169{C4PIq_|;HaeGGGeMKyosTa-1t)r z2hg~1ev`j$s7dyv|HTi%h0(P_MAEmX0#S;l=p&T5x^i)v4FQP$s8bIxpKmbhOh2Y2 zk9`&cogev$_NF74Ao7Gu%L_cS4wM&SJxA=!RpR!>(xDCWt<;sZE zvNWQ^XOlMP_*($l%LjReMh3pYDS0#fZZ&wTjV2r;X)EO#kRd{x!?!cU{B8?XE`)q2 z558dk{m;4ah_PQ{73|^VZ~y?xA0F=L&Thl7 zSE&N2Vh-W)D%xE!!a`D6AV`I-+fxNJd;tn+YVhnn;)qa(MlX>$>07dp5znwq&bth% zNDko0XOGobNM$Rg?J<*Yl~jnHxTX|VsIUuRO1jm2PtF##JTzQiRW-54R5fFwTZ-^?R@tuV~c2{)1rS6rVq?7ttI&UF28H?cg3ryn75G>(d zw9MYsWR{q|-YNpuvkg{;`Wm*OOO-|FD8C}f?w|7eL^O_-?>8QmaOe3aKOt7#B5lZ|+H3(TAz@mg%e;`~^H5buECVw-wVls<9W`w!bS;2`!Vo>D zk6T$so%Ieamg|yB5f@2CW1wy0d!(m^cJ~ud6A32052nzvyvS||-{t}dp3+qCTPy6N z4qvwoFt33*?y5nYDFU+aKp(;XVi>h@ne!)!MhLGT0mSxoPfh{PW~U1`z=tu8YgM$a zq<1yJN7tiLKgd+C-d-p#(1}lC=Pmt!vwT2jlLS>3_5H+z#cN`x2%jDe`9*p6uRCnv7^nl88W4dc-RZd2Ey>F;f}6 zOlSd)VS-yTn`Wbc(m)lspkNhrga%)P0Q`93!egsDu&e)PAei8%lVyTG5W-NO)7)QE zy_1=-wK3zb``hPm7*QRxS!P0Sf3?RC-@?&gFkBtUDX`hQmo4JoA~GH!OphRfKwkUc z9U?@ZN5L@)+e>NwlhdpA1jX6wPLApL6Jkn|*ESrS;aV;a4Jf^k_UXEEG^zPmLc~o< zZE%0cH*boc_x9(0GM%z0(m}TeLz2<9DNuQMQe0H;%~@6%RmfAU0+w)hskm4*@%4J6 zUT)8+8#|5on0HhhX9ulJ-6Gu5sU5)@qS8M{K{!}0?&M-j>eynp<8B(N+|Gp z_*NQ?09wCEXMPVmHvY%==@V3#SzB%~+n9K*!|%BPT1f^_02CwP?Qe=vFQvty+uh$d zd|}0_$un9Y936xrfn3T5p%};F%kg zXc;_V^OchFYo0j@d%I%droI>XcX6dwX)%bg*z2gpW!i;Pr9gy`7o$ZOPW>DOj}13E zeLMJY{(J*(j$@0)_)s;uv4F2HeiX3_LTX%7K~R4b7kXdmjf!B&lGJPD*DG)66#y-I z^@a``if(7gTd}0zqcx46hUQC78Nx|HBAG}t4`$Mcz7`o#{V&>na3p=04SpSFzLIejf2-QV+c+0qk%!8g@o|UMiS^UY zvAiWcDfd!WQ#n?I=QXV_JHkc_dW^+3C2{JBn$iS!Y+;9(8sc@BID+H4ZVp{S^g+PD z2syq;e~~~8Ee%iND_Y%yJd3LK4XL z8<8r=kKx0*sB^T#npD^9;m7T?F0?+FX4NND69^HG#dN{K07%-~~B zU*nx&m)eAKl|?v#A|+ik8qMlZah>geZ9W&8GcBX|7OD2S!SyEdDoIOtI`CyuBUfAH z85n^nZu?sy&aK*|aKet2y@q~k0as$0?#?Ki_7JLjngw~yJp5Dw>r`HCA?)S-suD^(6sCY4U6O@cWd0i?@6~Y=^>X#KxwvpEW;szDwB9>l+qb5uqnv{ zG?E49meYU!JY`jn9&p!rrImu~-~cT~s6xa~cinfS1+z{jT%;{SbDS2TKuI(4?Q4NQ zE?a(es^`yo%>!Jdw!QSa9P`+b;+>i|yn-gS-Au;RFlCWuY%?8j)mcqqj}@_zcoTI^ zFfxvfDKa+u~ZytbBxx1##1aO~TpTxwp5KaYZ$GJev;CVs&+1rm`l zAHLHq^p!oPenmFr{K(N=MO}aLrU&M8_SCh!K`jO?Z{SM%3oE{WAcw`W1QD6XH-)AF z+(?oRIKPa(&q%gCwL%?W$;UQ>5jK1udFg-yfEGTAu*QdH=Ei8 zY@g^+0?!J=vBt>a*14)$kZ)&DVBgelpb|#j(Ie?@KpUvQ#J4Fw3MTiR-?F9@*2>>n zTFE3yX(=$ z>&ES?o$Ya1T^&^4yW-CF+n*jcJvTPotE)T@muGjEQM#XfpGF_gu68n+y*%#r=Y9G4 zI(c|~-$C1~CY)YvJnk2rpUV$yp+22B+$dA__{ZxF4z)wX$5sWv)6Dg4$)hlHY5wjzkB`HHRd_e@F{DEq=~*Xt>PPheAYS z=7+fx&dazY4J03|Wz`#^1m~?SKP=2e6D+TqEb<}JTOXIIDv;&bT85qjk7}%lf-08h z;LWX3sFUQK-a;PLdVx#s!6i1}5&?&eR;!wbw5cig;=Ye!p%};Lpo@f9I7z*1w|Sg> z#+@RIK%}ljl+3=7oVMv;IIB6Rcl&PinWAGkGqkUMyw)cq5!EDgN>{DVJDrX%$Kl_6}hL4L;L>yl?ts_#yWQJRvo01l_=9JybiG9}!W_i-v##0D!WTByNg z4GovP)rpc2vVbs{v-}895-4z6ovzaGORaZ>k1>I(;d)WU5Ls5>)IKaHen?<2WK7ej zHJ%?+4`ne&+X7G%Fmbq(N@~P`F`HY3(qa=D&a`z99bkwoEvRcVSEU3Hi#7qd#*}I# zK_%4nBT%Xo7+G(}ZdUX4XX7=%3NQqfJ%#%9ii8%02y_Y({lobA{`HEaCAR=|fquM= z>47C`_E}Frc@`+UUQ(!(y57UfuRKdJJ@I&Y!2*bKj!p(5|H>)R&)RSmomoDekDePQ zHftX|$QM~&pj6-LhSv4cDP6y2eE8}>qxQvd^hr6T5!kR$rpZ5^0m_5X%AOmtxMOWf zRT%aHRAhlh2L2E{0Tp>5Bd&kf+`mL^m-bBF!$UlEpfHQ29U#ljU_EU@>gf0&?XX`+S+ZvYWk-%^h8~>`)y z?#cp1oGIdJOBZkRR(XJJT$D1k_p54#X^&GqkUA1<-7~^0+C4YF)L%8{F{9m8nS+Zy zoEX|@4`+ATkgX-m9=GPQpm)6k-!Kf?adbGrK#11p=d2(V~0s}%>uQbm1fl`C!2Y~WOA8^=sJ6Ix2iEhbr#%@KbWR#wm6Ii;RnERcV1VM~Wmf^LVFH%@(w`E1==R2Ye;{To+}w$9Yi_ z(LUeMlyv7+pO-~HN)ji#?hu?BRtFcAvzDNW@qI=P1xKhV&DO_IsyM=^0aM{%#LR#U1W3Z+I$a9pSxl+UXtac2gQ%mPW@T$ifI zxhR*4_)A`LB66Yf{~-^X`bp-2UTfC}EI}n7SW}?q?%pW(bH}u+*9XwJsL$rrDufJnnsd0x0&Q9=fX4|aClc+jfFPhvPsPw_0t#Km0Vxp{bjJGx z*khbWlTXDx&ei!&)x#w^Std|fq!ic0XC}%Ol?%qwwGe7Cu^jT$Y09@9)6b$_Ozgc+ zIjPDb)u&(&YoLIf4Rn0^9ZZ_?42{cTjdr)CpJv{cYk=|=4c_}t>{i70V!&FV$YBF2 zwcvsj6DN%)m8R~V* zSE-Vc>M9LDXoKxgeObf+x?XU;37}!pV~<9p%VHg7RLcFq>q)i8<=BDP(PoYjCtK_aT0p)Ko}DTTdl*IPM2 zvM74rBdVgCw{;az=j{)JSRA3y_bR}t0w7>BF23xho0W)ogJy&+F-UK2t}N&9kxEG5 zrJEo8o%o<6XlvGlT22Xukob#`oP#2_`=vS}mdb16uvf(s0l+cSX#-P0fngYUetf3O+8QG-vjAek##iThHTcocPXulf!JJz(V^PiIJQk;hOZ_AdrSH}tW5f?2$qS*ypY7P1V zG^Q&&L&jgKSJ(tm%Qe6du0wxcfRamf*@j9-Z*3~4^C@l%?|>@VLdkKz)E;VPlI-z? z?#bH9h@y69(~(ga9#x8a1jOJUH;)XrQMVbyeJv^3l3Ulx%0ERCW+H2KGFsU9;t2~7R5wf5^Xk|#m{@e32WNG0cjB!QoIjtt`58 zD)O9Evd>GN_F{C`r*^bTMv@_Fx0$vHx)-pkHmFh+Z|hNL`CCm}aP*=?CB8^|VkiDlz(;C?j=#DRWQsdn*oLtiL2otLGt%y0Y_R)=n?_>Pc`tQes zi6c$o=HT5uJMfkV7WfVD{G!f8%Jqt~hO9 z&-A7W!P$tVPlmhQ&1k_7HyU3e&)(NtWGW83QEav2!H4!|T2h1kX91cwH#6;RIIXBB zg7Fcmimr{?5f$2E=0lQ8@TiK{&_nGh?Y_IT`2?Wh*_rZsKA%$_<(UZuXOd4{k=p_T zi5BP7w6X*g6AH6{7V9=N&}pwFHK;-{^wam^dvj*{j8Cz*Dvp1fzlsi73KMWV3M#xP3-su^z(?0R)X^jq>71C~+cdC?T{Mp}f7QtLJ}F+WuP|9Cj1Z=-!i-ge;`vFvBGC&_rRs_@c-mXhteE|oSM^v?4bqxHba9X<;}>t0 zRDxS|;^NmYQbh~D1aq83_q-6a%z{#;=h7)VQAs){uU@>~aw&oYJ*@PN!h4SDJk+5K z8wsnCe1$;a6euN*01!g!4{v?Ug77trqRVZ06p=+l`+)7CSI9r5Y~(2PCX^~$KtNEV z_s8o5=>V!wsJc{-shl&?hd4aSI@%b6>GAIW6xz>SY~el$tL){Pb+`i z-mHtL9=Ai%@%X~oZ0ZQ;No{<~E7G$M+eLYdhHmo~vY1y$TuvF2rj{8FZjb1_1m3Q0 zdgrblBR5h*8rD7^B~+9Qk^Y{yhW98VrOUT>^O!!|LbB)Sv}S0Lhw_Ts@D7>6RY1{0Z!y!N;_h z1kuNV{^WJxf+{O36)mo%hxZMUj?v4nA1WC8wXb2;}_UyRzWzqR|oDk8V zm%-FRZk`?u&&rW7@jVutp7Y#Fk?y8cB(vfI9ROlSN#OL{rO!qSYMqSSOf|Oez9DPo z6c)>|+FSr6WnE@K__W04Tn?jhDFK|KlW-_Wo-iWDy21`Fw(Q;*JdyFgg6a)0BD`!B ztveD{{E3}z;4)qR!bT}PG$Hy6kq1>67NgGGl3GLTo@eG5PSh3lF|zNpVhW#+vIK%n zBujl%j1=7H{MGRufw334_I)Qp+*naiIXuxx#Ly}wbw*U#59G9t7l~0-RoV@E!pgCo zgQuzMEGRrfCT2BKGlh=c#+6+>=`4cA=Wb@QN^+MpHO;j*rEDPc7&zBwi5w!gPI$cWBz%|0-k;R7anZ(o*pPPJ$RB>) z&)^ges$BVA{j^Ww5}u^^B-GqYzhdxo@8)rTdwBx#d@ydj!8@~gIyqcB-)Q1*yXu~m zrdn~SB|7kWjX3M@K=kRsmcQYhZ6eX~r~|*qXYo8Q(L~+}N@dxcY+S^8r48&NEAq;B z$_nx*K^i4EenX->C1p9dh}~)wSv43@CAP$)$f!YCm4J`r=?Ge-S;2zzGv5X-?cHj^ zcsO*XzcM4q4mHUzaX5+K(wHOmaY%Dt;L8s&=Fz73V>sROo((-($D`=lAuw+ z?X-8If5J18QcxW*b~yL#l(0K=g_kL173!3mop;a(P0Hj(?}1=M3A5i*k9iQzz`a)H z#pw9-%2o2n`Gd-rWXM+{v4e!kBbVJJJjpwhE!SN}9cU`MlTsAwSUV!^8NihDKsFEI z{kV}Os5ov@lNv)wxeO^mh!42qP_~lp8V({v#*xKA7pl@8vfy?$}~8+ClSF zFMPqTuw3yCb2fxBpUwjT!JdS%_Js;>9?iaHp0o%4p&HWyLH4?`-)J*Rz?yL_}xh)H!TC97gN^8coWo39yJiIDvOX}?Zu=_9(gp{pTziW`4ABkjlN+_XY5Ap*mjMCE=C}|Veus_ zU-iq@ljwtqOW}`m^mc?{HW}M!XDOm%0h{O*D%AqDkXzuS}@NlqiYv9 zshLLKzSE`L4!ilS`E8KOFoTEn)A-vthh7tT%rE3>&&L4CsrYvx*G}?AJCzB9tvf>U zRdg)Z`sbuN&jsr==t+6hj-!g|PD#ykKTRK4bQI=fEyZRJ--2A+Pt;6Ds7ox{l1P|m^D&XLj3*1`CB$pAB-{>$P3o6ag`MY4wp>+2fKQ%J1oGHdw| zyfvKS)zG!R>pOvQD4`R{mba1uFVz;RvMkAwJ z$|r{{KFi2$LQMxaiwZ@eu20rAca${cf8%s>=?g%@^um-_QnazGL+MtC;9Uza;0s43 z_!?>dG3dsLz9HgR4XJ71+_Q|BsT;CKOwU4$l#2|8DCTO#8S1?qG6B^@w_z;C&qI{; z_$OCAIfis#a9|`y21us{hCeLV^LX3r$35ToVG_|1*^}>^=I_!|f!;qpLRoxza2g2_ zDRUqb7(pG4-KT1pNJ%r=iC9#=sVv6y3YD03X9`Q(r`}DfV;~|_b)3O+moLZ@cdVg7 z^pMv51zh{&~?W#b2fIz$Td!R%~U?tS-?jLVM$Q#mPgF8to6u3@z{XhEwxam(y9_ZKT z@WGlG9ruBq=^9?h3-2VQY~Ot)7Zi_l)=A!#K;r|s0Z=I>tO*%_V`08|k=~RYu+28ivE_d9g zQUudad<7)vzWDc}{iYs2T=r3MbPe0_Q>~F+Te^oi{@C?zP7-#+k+RVY^(@n6^JNS^ zK4E=uZ*9|wCjz>!EspFpu37tnee{n>5(1JQoaOy9Z~I@r*uS3tz~YvZ{5!zkxuyS7 ze12wtoBRvY^moO-^X>ks_yzpI|9`XZe#iNpef1~OPcSw8H!jxi%D)p^{#0HCQ@x%O z;eXRyepmgS3h<}uE|?quR{fnA@H@coXODjZG-Cd{J;P+Ji zcZA<_tUnP{N&m4QzcQ}hQU1Q({)qtq#83eM|FQc1uKxGA^snj{G=EY5-F(a=Ma+84HD8YlF~>?!$@~GA~lqBmxPky0Md;hAtl|7G}6-Fpy!;& z$M1cgbI$kox90bo+4tW2zUIEyUU99pRTUAD2;tDs(BL@rF}2}-6H?f*r!A+svx$e5 zy@ktv9&mGb+Sz6e8`yVn0Z_ja>|#G#u+k$XkR~)7R_4N9dT37`_qq=|f6jkIf#1pp z{()V;_{G71y=%=6kST#L-F78DvNuunp#dHCfkRailVH%K@vF}=`mtU1SJl8r!-&NxIMRn>aXo)^v;sI=qR%5MEzm0!fj6Nvut~FS?{p3gjE#2o+T@J&_b~|D^ z;_C6O(qwSWBaCy&EQ(7(nqXqnOXTwxOJ&8V@6|+Aui?7`f_Nhk!uXR}>UNl?<)m&z z&;@VFPUm~nTl&V0dgci~!&9nsveip__k=+i3|kVwG9LRcYb_BeU-tLZ3I14}hqCnY ze`I2{g&HoDf2pFaHBwbXMME{UdW8%R2ZxFX2dDbKRVB~=SLOe!Dm@&WZCotDU{@E; zpFh8>GGo?$R*>*XtMP67cOSF322}Y7t4PyJEx^W6*^C11SS5md{mFr>yFO)zPfmPl z3WHPaU>?{xEwQNdoO93rRP$VVl=A)L2CZJjc4y7(qp-%E)5QQkf9*2uP8~}3@fP8+ z$sMZG5B>qiv*Rw40aW`t-ug<&*qx`B{uERk0vRSgMz!OPBcfwrTLJcsE#=)?L$~(4 zA}4Mig(=7O1^6Q>m7YHeJGBYv7b}qQ@R_yUQJFu3mgB72#I-DFuZ~?Ww|J5IYuOm( z>d)F<*>Bo^jsRJvfoaz^Rrexd@H%fU+~0kJRrc=Qy|GVRqwETT443HM&kyL z%H+=HTSu?=hxYc}hpGLLfQzyHfKa2S_!IVmY3px&M=wbdZd(-?W=K~P6<9<_mIq2| zTnhQizFb2x8+!)Ei`OP^H%fdc&IjJy&Qyecc7J>!E;e?xf8j0aHg}8}!0&OsdKfY= zF#bYp|D<1B=|+Uj--xpDu=f3x|MhDB{(xlv?zd4?G49&eol_qJ#wyUN4EMHYx1GfO zchBzl8xQ@x2Mo;*uP<}vk13Qnk4|*_XZONJTfbS0=`WUZ_b!(1 zg8T3y)l;l{29qFX;S1~j3BTAgGdGuUkD=0A&t1*$Kl%?el9n}Xtk=1kFI@=??6PCR zvI(fTOcof#Ogjc41E5T(d$*n-z59n$ISMug^_XZ2+ubfxT9*&0Jwp6xPzXsRI!PoX z$ZQmt;SEi7gr+hH@n=BOV5e748eY?z3JT`53q3HSHrICfg`Dss5F`8b5JE7e)UaL-eXpxtYy-dqcM1;;a%Zh0o!O( zq%W&t@XA{uV*m{FVN!TLnd(6XL*7k#2?D$J&(XlsRpMIU7=9b19xeu0I-vZ?p7`Th zhdffZV6P51Rd*!g;c@Y&!mHEnJ|_E*kr1FY`f{q%6*+{-U_N4wO%HwAzdkPf>tuP| z06ru9thk~XmD zuh}CEBknWC@{s9YC&&9>z@1-3gT~?*{1tl%-#aefl4F)d@ubK-ab)HuXyZn7RF@&Q zrmZ_kdGWXtHi}}}eR`dY$6aq5?4Mb+? zXPRdxXa|3KD#0j#6R5n+)G0#H`EwZRB23ZysqbU%Q$PydqxLMtJ4zJ%5$~&&Y-T)) z4>v({c;&T`i#$95i0*Aff?Bd^3g)zRQD2tSQ}#(HAeI&e*U53{up0>k@zn;sIsE&3 zuLr;hQT+^fd#vmv?d#^ZD2}a5a-Uctlk@>83Fs7U_qUEPsMVu!F!BlpURgNO=ViH# zJXfwpvb{S>1XU++IQK*5=4L185v{>)gdaEZYBM*nHvOC9!mn<=Z)HN?-KaRES4}ZV zJw+a%Nt?49;$r~B^VEAT& z4o~Sk#Il0A0iiBnT!9wClg0k?Qd@8IGmkQ8R1LEdy*BPk`1t_c7*I8|W(rSs4hi;5 zkQI3#6N_C=$hz?zp(1ycFpH=%j96Bv{DyV5hCJtG3|!AjS*gQcPps4MN_JW%9a*Rs zd`~az{*rPDrfTR@<`~uyC5Uh>EpQGXzs;1++~xGJ4}A+hWkXKpIe!Xmef$*qCj6|d zoJ_`GqG>(;Ua)KYJ#7K?Q6QTR&pb1ObCyt6L0Ua& z|F&-9_>smoQwK-TTqqXW)Ptuy(8@CI5z{b75Pdk-_vf)-&0Et1N03OkZ!4Ov%y`=o zH`-YgB43~=8~m%-;4!9ofMqP}Y!M48>7>@gK^pGM( z2Ny2N5`kwJmrue(NI2h~Ot5|717o?jIJOVy4@YJ*qVtwI*1rt-b{ajx;jJPM5foy5 zvxdx7*|*h;jXyZ=)Qm_>pch?LMQ`#3O{uv=na`|Gz$w$$90*nGNR3~_#2c4Gq4UVL zRrV$UycG$R^wcDwkh6_2W$kzef11*($4*n-fHc9LeG4^MREfY=Ho%%bSq&M6Kd9GX z;07epJKoMTcg9-~gA5=Q5vK(AIw&vNQL2LNkOjtu|63;fn+|@`w_i*+b=E$g3pX^! zcoE)d5tph*UcO{cYy9ke$(f^4yOk=uEWgmn-XzVKvgoQowSbb;-9_r-W0TFJGfnLg zNhX877eomof-#_k{ybYl?ZY7|9DypquXYoJx(~^r5sM))4wFX7vc(H0TH*XWfie{< zcC-7$@_EPQMZBW%!Xv4Rwgn|s6TX@ikOk&AA_Fyphpr;o^ln=Ghan$SrAeKum|dJW zkNw!w&k)q|`t(Ly7LtW;BuiD`v{UNi_3UTVICiqlANLSA^*xy!-qnzH6D!Dntl@Aq zA1%56l!O!ftmeSLh}cD^%V(U;yF1!$Rlkbsn-RtKHk~i6ug;<|_qX0_jdRBMTPjEt zfs_d0(2^d&fwiB#_(3RiQBIt82OQ?BzEe0?`)oR#k!Z`$j`;3IS@WnaVqobEHM@V( z&POb^ECo`&vvAGQrDoLpdeH2ec767u)9kQ#^YPWN-||N0g<);wEeFRtYA4NbrJ9Nv zhssL>;=Kuq{CxDP&pGQOp`#;wqN^^tTl*F&S4>g)4YnK=)5SibhW^sR=0_=HGn1;? zotNor&JFuRbwe|@&6-D~#V+OW*^9o925!tEd?OYPGvhk^z=hut4^1!uT%CRGbKOm4Y#MMJdURBxL6G}|H>!f-iK#NcvvXSBUw2bBT-7K zgY~Hice+?MS9h!ysTZkNSsYIZi72GbMU|3?s=ymMV@GqG!0n^AgBNDJR zYQ#z+Lt4McG03@N@PH)mYS3V7)UM4`<=j{#piZNnX70%~{E~x3Pe3n>4AhK$d1Y=+ z0*K2B0;u7BL8-(H?NOFQIW@fnBi`>#66I#4h4~%~n`e^*b#ciNwcaIGy0xxVmnLmk zl*0qtd6VS)8*CpOd2>QNZ;a-09@7)(V|}BRt&#j5*nR7_tnxlP7&D2of8&vQ8y)F-%49PhsF6L*dV0@UKE)4JMw|Of=fOf@oXpQ>rNj z8p!K~1zSQ)4*kLFF9O%#n@mP`Mk*N_`ZWatDNw#sWhgxxVeWhW z^)!a4Ee1e7CL0Zd9I2@*DoqfXWp`UST|)feCD4q_kCUQ=^^87(Z5I8`PU0hvZvMj` zGT+)}x<XGwd34*MaWcm#14S%nM5cpG~ z>i;$t!ZdW>U~#ns+kt<5|EyvMv}f(-sBv$CZV^|S+b5fu8`UZ^o>A#N+pgK>g&r2R zPa*JI-L{bW*!e2vy`TddA*}L-@$$CC_LsH-k#T|}`7CTKjdXBviPuc}RvulSy4-oe zzl=={N!^GlTPDE!ZhgFkUtuoAgODgpz}gfFN$kR)!DHejCh?L?y(C^2=~P$pijs<< zBC?Z-Gt=cwTuyAC$!YBtf1Sri-6$gXBDgmqm>{Aso+G{*7nduOIXWyDmw9P}{wO*A zt8`*$;E{B063&O^W`kQik(OC?><3&1(u#M~Gx~Wb66?6NldWXhh#fxP9*VtNAz$(K zB@28d@q}A{)`j?Xp%jWzM221OP$4>bD(v}ck1*KqUNo4w!JXh+ zI?$@@0prUuf*WxG#rBXnlQ>PTo8ky@#fQs*Z_{RWUI3YFK28tLUDmB6PkipD*d|E> z;i0bu7iN_>5cO_O3Iz4b`)lUm$p1jw-;BF!V^O=E;~*e?E5$zz(6G1N=Z|dODG`gH z;}p3GeDfq|C&tupgGm21xsVKno|QNYUvfvIXF$yRt4fW7ubGzA$Gy*YZ_=CI^igWB zzg}?b4>|Qs@$`}ADq!y}tY7{5*84M%3aQ(<3eqH$Fz#z6uz8?o-o$!ZcCIqpTX}(c z?`YusmnKZGXj6{;Ptk_(m!r8@nmB{aHC&zV+nM*RH@C!o`&lmBC#|`6)Rs9tR`aw> z=E&ZkB9^}L+cL5@ z(;9u($R8IUn(ANE*%euCQ6sahU~c}Pxk|LQqa|lKq1}%b3Q)lx@Fp#8NwUB_TcaPB zi@wn`$Q#12EcJoo#qRw!z5AR;ons}uS3dQL$elBZY{HTQL8A82so0?UK-HMJo^pg_ zdMcX|MZJ0kbUlj9)wpQ8?MHP6pja|@R&+e>4V8LNT$Sb0%6Kc3BXVQOK3HnLrzVny zRQK%llhw{WS2nxY*Ew>}f-y1~+M_;hh(FYKc>|rRBbrqIn0sBNpkyHF#G(CuovbL- zxZL^`+N3Optft@MwP&xNTyD90s+QM!l|_540xjlV=9`*YojNQlG!6$#FB7qab}Jl* z@G}WS1NL&DP1Ezz&kbkOm|hre#`yska|)Ximba*=yK3>z{G16|aiYNlZ7aAf zCEt)B`*Y$Nqj!ZVxmV&Btc!+2kBH-V2&P^w4m6zTwm*zx2QbM?>UJ%vVaKO!NCkhTJ4G@vWjYtZ5C$uVN zQ6<*dgnUQBi|V)B@$(o+E!BHd|6Gz0AVc;c-G^^4rhX8^uu-*uXqLVg^=&;LAFclR zj-p}KlBKS$9pMQkY29lA?QL9wl`5L=1@r2~-CE28oRMtVAv?OY&;IFq}_e4Cj)9`YGt$t@R_G;on0QGjj z=$NkiV7%IRo$j);H6OIP6?r+uXBsIfPIk>;vx;LFg};?eqSg< z@-T?8A73T;;VegPGLF?QjumTX<02};h?kTIOQBzmnAPIm`w^Csb}uU>g((pRJW>-E zs~F~s3;&#Q`87lVA?*nX1&*a&4xi~;J&(saJ`r3Fz6yP~CBf>P3d3_lcE~8&7(0A8 zf^M?gtf(UA4<&{pI97fnS#x03=f}({KJ0oNII1qV zV~4lVv7H@^5+p^JOQ5%iZpGat=i>2Ln-`>I=@hFFs`%}Z>2_jfmb`bj?U*GmdUnp3 z9##fk>(vQ>7U&u^w` zc#XMjw$>%#A%IE28|z6@_gczxA*~D$N8FF)vy1p)^sO!>Z>I#qk5-T6)>P#XFHko` zZb|eZ%JJBJ(^U6Poz>Imr3NcR&0)FW-xs4~zn;dgD*X3n@{@cv$17QPa^Z%k3=4-( zPgZ8tB;pHY;P=!h;ZT=B!KR8*>9;@5viSMjs8czfYOjcLANW~TFZcqQJv&n$9}p7i zVj~#rDSgvJ=DgZF6%7Z?0Ox(eFl3&7D1JYoGG()pX5JW!6joQzh982;tYPOg%zO+_ zxyUDmovzcZg?}SQz*YsG0?i$Wp`gcnb%mLcD!AE1;yknqOgr>gxJN3V(NZ{RdB%#17)ZMIMH= zEMAxxJ)82twg3v2EaAD{$+|r9L zgm<&OX)BEn5YcJ=>9Pe5Z>TF_8)5p#j>SDoe=hA{?+UheHPZBQ1iKjAyKR@|tTh7n zJxlM5pES)AOH|kQ+gyjTkNLN`^6sFx&=EKUEZS;eCb4U(`u6Ub4+^CLc+>2nIVMQu zgq~O8QB@+?trt{A#`_H#p`4fHqG<)W6D!t8n^tWNn716uJ;A8atYnb6qegm$MMk|Z zG7~(;K^ksFPTV2=F&H`G(9x5T#e!}|Fq|DVLpl3HXi`jIc!e>3@s#x;VtmhN_<@l(m}<5EA{Par?=`7`xnuFmw)$KT2=9XilPYn-+zJfv-JEE<@fI1 z-`A213-~aUKf75!%eX&Le(#Unm%o3B8qAyij`A=2f`0=3UTOb*Sibbf84eX From b9c129fa3c5e51720d01b895e0346e689822e7ce Mon Sep 17 00:00:00 2001 From: Brook Santangelo <70932395+bsantan@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:44:18 -0700 Subject: [PATCH 9/9] Add files via upload --- README.md | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 8d52345..0a34979 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,70 @@ -# Environment Installation +## Visualize the integrated resources relationships as network diagram + +The following steps will create the output necessary to visualize the relationships among integrated resources and primary sources as a network diagram. In order to finish the figure, Cytoscape must be installed. Manual instructions to create the figure are included. + +### Create network where size of primary sources and aggregated DBs represent number of integrated resources that use them as mappings + +In Cytoscape: +1. Import network from file: Resource Interaction Table.xlsx, Sheet 1 (set as source, interaction, target) +2. Import table from file: Resource Interaction Table.xlsx, Sheet 2 (set as node, catergory) +3. Set node style, fill color, discrete mapping to unique colors for each category +5. Position integrated DB nodes in following order: mdad, gutmgene, gutmdisorder, disbiome, amadis, gimica, bugsigdb, dbbact, mikg4md, preprobiotickg, kg-microbe, biochem4j, unifuncnet +6. Remove labels of edges +7. Change label size to circle, inDegree, continuous mapping + a. Go to tools, analyze network, analyze as directed graph to change node size + b. Toggle with Continuous Mapping Editor for node size to make peak ~10 up to ~70-80 + c. Select integrated databases, set bypass for shape (rectangle) and size (15) +8. Only include edges between integrated DBs, aggregated DBs, and primary sources + a. Select all integrated db nodes, select - edges - select all edges, then select - nodes - deselect all nodes to remove edges + b. Select all aggregated db nodes, , select - edges - select all edges, then select - nodes - deselect all nodes to remove edges +9. Save figure as Network_sizeByDegree.svg + +### Create network where size of primary sources and aggregated DBs represent number of integrated resources that use them as mappings + +1. Run the following: +``` +cd ./scripts/ + +python db_expansion.py +Rscript integrated_db_plotting.R +Rscript collapse_categories.R +``` + +In Cytoscape: +2. Import network from file: ~/data/category_edges.tsv, Sheet 1 (set as source, interaction, target) +3. Import table from file: Resource Interaction Table.xlsx, Sheet 2 (set as node, catergory) +4. Align integrated db’s in order above categories + a. Select integrated db’s only, Layout Tools, align and distribute +5. Select all categories, set size to 100 +6. Change line width to 1, ensure no arrowhead is there (arrowhead will be added in AdobeIllustrator) +7. Save as Network_categories.svg + +### Create network with edges +In Adobe Illustrator: +1. Open Network_sizeByDegree.svg +2. Open Network_categories.svg + a. Update colors to chosen palette + b. For large category circles, make 50% opacity + c. Rotate rectangle, text for integrated DB rectangles +3. Change arrowhead to shape to edit colors + a. Add target arrowhead + b. Select same, fill & stroke + c. Object, path, outline stroke + +## Visualize the Reference Matrix + +The following code will create Figure 2b, the matrix of inegrated resource relationships. + +### Environment Installation ```bash mamba env create -f db_review.yml ``` -# Generate the Reference Matrix Visualization +### Generate the Reference Matrix Visualization ```bash snakemake --cores 1 ``` -## Child Database Expansion +#### Child Database Expansion The `db_expansion.py` script generates the edge distance between a given database `i` and all child databases that it references. An example case for WikiPathways is given below. ```mermaid @@ -37,8 +94,6 @@ erDiagram | WikiPathways | PubChem | 2 | | WikiPathways | GenBank | 2 | -## Reference Matrix Visualization +### Reference Matrix Visualization We then use our expanded reference table to hierarchically cluster the Source Databases (plotted along the y-axis) based off edge distance to the child nodes. ![alt text](./plots/db_edge_matrix_children.png "Database Links with Children") - -