@@ -4,13 +4,26 @@ conf <- BioDataScience::config()
44library(shiny )
55library(learndown )
66library(BioDataScience2 )
7+ library(dplyr )
8+ library(tidyr )
9+ library(flow )
10+ library(chart )
711
812# add news functions ----
913# # This function move to a package
1014
15+ # CAH for SciViews, version 1.1.1
16+ # Copyright (c) 2021, Philippe Grosjean (phgrsojean@sciviews.org)
17+
18+ # dist is really a dissimilarity matrix => we use dissimilarity() as in the
19+ # {cluster} package, i.e., class is c("dissimilarity", "dist")
20+ # TODO: also make a similarity object and convert between the two
21+ # fun can be stats::dist, vegan::vegdist, vegan::designdist, cluster::daisy
22+ # factoextra::get_dist and probably other dist-compatible functions
23+ # Depending on method =, use either vegan::vegdist or stats::dist as default fun
1124dissimilarity <- function (data , formula = ~ . , subset = NULL ,
12- method = " euclidean" , scale = FALSE , rownames.col = " rowname" ,
13- transpose = FALSE , fun = NULL , ... ) {
25+ method = " euclidean" , scale = FALSE , rownames.col = " rowname" ,
26+ transpose = FALSE , fun = NULL , ... ) {
1427 # TODO: get more meaningful warnings and errors by replacing fun by actual
1528 # name of the function
1629 if (is.null(fun )) {# Default function depends on the chosen method
@@ -104,7 +117,7 @@ as.dissimilarity.matrix <- function(x, ...) {
104117
105118# We want to print only the first few rows and columns
106119print.dissimilarity <- function (x , digits.d = 3L , rownames.lab = " labels" ,
107- ... ) {
120+ ... ) {
108121 mat <- as.matrix(x )
109122 mat <- format(round(mat , digits.d ))
110123 diag(mat ) <- " "
@@ -128,7 +141,7 @@ print.dissimilarity <- function(x, digits.d = 3L, rownames.lab = "labels",
128141 more_info <- " (transposed data)"
129142 }
130143 cat(" Dissimilarity matrix with metric: " , attr(x , " metric" ),
131- more_info , " \n " , sep = " " )
144+ more_info , " \n " , sep = " " )
132145 print(tbl )
133146 invisible (x )
134147}
@@ -144,14 +157,14 @@ nobs.dissimilarity <- function(object, ...)
144157# TODO: `[` by first transforming into a matrix with as.matrix()
145158
146159autoplot.dissimilarity <- function (object , order = TRUE , show.labels = TRUE ,
147- lab.size = NULL , gradient = list (low = " red" , mid = " white" , high = " blue" ),
148- ... ) {
160+ lab.size = NULL , gradient = list (low = " red" , mid = " white" , high = " blue" ),
161+ ... ) {
149162 factoextra :: fviz_dist(object , order = order , show_labels = show.labels ,
150- lab_size = lab.size , gradient = gradient )
163+ lab_size = lab.size , gradient = gradient )
151164}
152165
153166chart.dissimilarity <- function (data , ... ,
154- type = NULL , env = parent.frame())
167+ type = NULL , env = parent.frame())
155168 autoplot(data , type = type , ... )
156169
157170# cluster object (inheriting from hclust)
@@ -234,7 +247,7 @@ augment.cluster <- function(x, data, k = NULL, h = NULL, ...) {
234247 clst <- predict(x , k = k , h = h , ... )
235248 if (nrow(data ) != length(clst )) {
236249 stop(" Different number of items in " , msg , " (" ,nrow(data ) ,
237- " ) and in the clusters (" , length(clst ), " )" )
250+ " ) and in the clusters (" , length(clst ), " )" )
238251 }
239252 tibble :: add_column(data , .fitted = clst )
240253}
@@ -245,7 +258,7 @@ augment.cluster <- function(x, data, k = NULL, h = NULL, ...) {
245258# /beautiful-dendrogram-visualizations-in-r-5-must-known-methods
246259# -unsupervised-machine-learning
247260plot.cluster <- function (x , y , hang = - 1 , check = TRUE , type = " vertical" ,
248- lab = " Height" , ... ) {
261+ lab = " Height" , ... ) {
249262 type <- match.arg(type [1 ], c(" vertical" , " horizontal" , " circular" ))
250263 # type == "circular" is special because we need to transform as ape::phylo
251264 if (type == " circular" ) {
@@ -269,11 +282,11 @@ plot.cluster <- function(x, y, hang = -1, check = TRUE, type = "vertical",
269282# TODO: should be nice to do similar function for other symbols too in SciViews
270283circle <- function (x = 0 , y = 0 , d = 1 , col = 0 , lwd = 1 , lty = 1 , ... )
271284 symbols(x = x , y = y , circles = d / 2 , fg = col , lwd = lwd , lty = lty ,
272- inches = FALSE , add = TRUE , ... )
285+ inches = FALSE , add = TRUE , ... )
273286
274287# TODO: make sure the dendrogram is correct with different ggplot themes
275288autoplot.cluster <- function (object , type = " vertical" , circ.text.size = 3 ,
276- theme = theme_sciviews(), xlab = " " , ylab = " Height" , ... ) {
289+ theme = theme_sciviews(), xlab = " " , ylab = " Height" , ... ) {
277290 if (is.null(type ))
278291 type <- " vertical"
279292 type <- match.arg(type [1 ], c(" vertical" , " horizontal" , " circular" ))
@@ -301,49 +314,55 @@ autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
301314 scale_y_reverse() +
302315 coord_polar(start = pi / 2 ) +
303316 geom_text(data = label_df ,
304- aes(x = id , y = - 0.02 , label = labels , hjust = hjust ),
305- size = circ.text.size , angle = label_df $ angle , inherit.aes = FALSE ) +
317+ aes(x = id , y = - 0.02 , label = labels , hjust = hjust ),
318+ size = circ.text.size , angle = label_df $ angle , inherit.aes = FALSE ) +
306319 theme(panel.border = element_blank(),
307- axis.text = element_blank(),
308- axis.line = element_blank(),
309- axis.ticks.y = element_blank()) +
320+ axis.text = element_blank(),
321+ axis.line = element_blank(),
322+ axis.ticks.y = element_blank()) +
310323 ylab(" " )
311324
312325 } else if (type == " vertical" ) {# Vertical dendrogram
313326 dendro <- dendro +
314327 scale_x_continuous(breaks = seq_along(ddata $ labels $ label ),
315- labels = ddata $ labels $ label ) +
328+ labels = ddata $ labels $ label ) +
316329 scale_y_continuous(expand = expansion(mult = c(0 , 0.02 ))) +
317330 theme(panel.border = element_blank(),
318- axis.text.x = element_text(angle = 90 , hjust = 1 , vjust = 0.5 ),
319- axis.line.x = element_blank(),
320- axis.ticks.x = element_blank(),
321- axis.text.y = element_text(angle = 90 , hjust = 0.5 ))
331+ axis.text.x = element_text(angle = 90 , hjust = 1 , vjust = 0.5 ),
332+ axis.line.x = element_blank(),
333+ axis.ticks.x = element_blank(),
334+ axis.text.y = element_text(angle = 90 , hjust = 0.5 ))
322335
323336 } else {# Horizontal dendrogram
324337 dendro <- dendro +
325338 scale_x_continuous(breaks = seq_along(ddata $ labels $ label ),
326- labels = ddata $ labels $ label , position = " top" ) +
339+ labels = ddata $ labels $ label , position = " top" ) +
327340 scale_y_reverse(expand = expansion(mult = c(0.05 , 0 ))) +
328341 coord_flip() +
329342 theme(panel.border = element_blank(),
330- axis.line.y = element_blank(),
331- axis.ticks.y = element_blank())
343+ axis.line.y = element_blank(),
344+ axis.ticks.y = element_blank())
332345 }
333346 dendro
334347}
335348
336349chart.cluster <- function (data , ... ,
337- type = NULL , env = parent.frame())
350+ type = NULL , env = parent.frame())
338351 autoplot(data , type = type , ... )
339352
353+ # To indicate where to cut in the dendrogram, one could use `geom_hline()`,
354+ # but when the dendrogram is horizontal or circular, this is suprizing. So,
355+ # I define geom_dendroline(h = ....)
356+ geom_dendroline <- function (h , ... )
357+ geom_hline(yintercept = h , ... )
340358
341359# data ----
342- penguins <- read(" penguins" , package = " palmerpenguins" )
360+ penguins <- data.io :: read(" penguins" , package = " palmerpenguins" )
343361
344362penguins %> . %
345363 # filter(., sex == "male") %>.%
346- select(. , species , bill_length_mm , bill_depth_mm , flipper_length_mm , body_mass_g ) %> . %
364+ select(. , species , bill_length_mm , bill_depth_mm , flipper_length_mm ,
365+ body_mass_g ) %> . %
347366 drop_na(. ) - > peng
348367
349368peng %> . %
@@ -356,10 +375,10 @@ score_cah <- function(x, reference = peng$species, digits = 5) {
356375 max_gr <- apply(tab , 1 , which.max )
357376 tab [ , ]
358377
359- if (length(unique(max_gr )) < 3 )
378+ if (length(unique(max_gr )) < 3 )
360379 res <- " Votre CAH ne permet pas de retrouver les 3 groupes. Un ou plusieurs groupes sont confondus."
361380
362- if (length(unique(max_gr )) == 3 ) {
381+ if (length(unique(max_gr )) == 3 ) {
363382 tot <- apply(tab , 1 , max ) / rowSums(tab )
364383 res <- paste0(" Votre CAH permet de discerner 3 groupes avec une précision de " , round((100 * sum(tot )/ nlevels(reference )),digits = digits ), " %." )
365384 }
@@ -369,15 +388,16 @@ score_cah <- function(x, reference = peng$species, digits = 5) {
369388# UI -----
370389
371390ui <- fluidPage(
372- learndownShiny(" Regroupement d'espèces de manchôts avec la classification hiérarchique ascendante ." ),
391+ learndownShiny(" Classification hiérarchique ascendante sur des mesures de manchots d'antarctique ." ),
373392
374393 sidebarLayout(
375394 sidebarPanel(
376- p(" Vous avez à disposition 342 manchôts de 3 espèces différentes. Trouvez les meilleurs paramètres afin d'obtenir la plus haute similitude entre votre CAH et les observations de terrain ." ),
377- p(" Les variables monitorées sont les suivante : la longueur du bec (mm), la profondeur du bec (mm), la longueur de la nageoire (mm), la masse (g)." ),
378- selectInput(" method_dist" , " Indice de distance" , choices = c(" euclidian" , " bray" , " canberra" , " manhattan" )),
395+ p(" Vous avez à disposition des mesures sur 342 manchots de 3 espèces différentes. Trouvez les meilleurs paramètres pour votre CAH afin d'optimiser votre regroupement ." ),
396+ p(" Les variables mesurées sont les suivantes : la longueur du bec (mm), la largeur du bec (mm), la longueur de la nageoire (mm) et la masse (g)." ),
397+ selectInput(" method_dist" , " Métrique de distance" , choices = c(" euclidian" , " bray" , " canberra" , " manhattan" )),
379398 selectInput(" scale" , " Standardisation" , choices = c(FALSE , TRUE )),
380- selectInput(" method_clust" , " Méthode de CAH" , choices = c(" complete" , " single" ," average" , " ward.D2" )),
399+ selectInput(" method_clust" , " Méthode de CAH" ,
400+ choices = c(" complete" , " single" , " average" , " ward.D2" )),
381401 hr(),
382402 submitQuitButtons()
383403 ),
@@ -399,11 +419,11 @@ ui <- fluidPage(
399419 )
400420)
401421
402-
403422server <- function (input , output , session ) {
404423
405424 cah <- reactive({
406- peng_dist <- dissimilarity(data = peng_red , scale = as.logical(input $ scale ), method = input $ method_dist )
425+ peng_dist <- dissimilarity(data = peng_red , scale = as.logical(input $ scale ),
426+ method = input $ method_dist )
407427 peng_clust <- cluster(peng_dist , method = input $ method_clust )
408428 peng_clust
409429 })
@@ -429,14 +449,14 @@ server <- function(input, output, session) {
429449 })
430450
431451
432- trackEvents(session , input , output ,
433- sign_in.fun = BioDataScience :: sign_in , config = conf )
434- trackSubmit(session , input , output , max_score = 3 , solution =
435- list (method_dist = " euclidian" , scale = " TRUE" , method_clust = " ward.D2" ),
436- comment = " " ,
437- message.success = " Correct, c'est la meilleur solution. La CAH obtient un score très bon de plus de 94 % de correspondace " ,
438- message.error = " Incorrect, un meilleur choix des paramètres est possible." )
439- trackQuit(session , input , output , delay = 20 )
452+ trackEvents(session , input , output ,
453+ sign_in.fun = BioDataScience :: sign_in , config = conf )
454+ trackSubmit(session , input , output , max_score = 3 , solution =
455+ list (method_dist = " euclidian" , scale = " TRUE" , method_clust = " ward.D2" ),
456+ comment = " " ,
457+ message.success = " Correct, c'est la meilleur solution. La CAH obtient un score très bon de plus de 94 % de correspondance " ,
458+ message.error = " Incorrect, un meilleur choix des paramètres est possible." )
459+ trackQuit(session , input , output , delay = 20 )
440460}
441461
442462shinyApp(ui , server )
0 commit comments