@@ -4,7 +4,7 @@ author: "Guyliann Engels, Raphael Conotte & Philippe Grosjean"
44description : " **SDD II Module 5** Matrices de distances et CAH."
55tutorial :
66 id : " B05La_cah"
7- version : 2.0.0 /10
7+ version : 2.0.1 /10
88output :
99 learnr::tutorial :
1010 progressive : true
@@ -14,10 +14,10 @@ runtime: shiny_prerendered
1414``` {r setup, include=FALSE}
1515BioDataScience2::learnr_setup()
1616
17- # CAH for SciViews, version 1.1.0
17+ # CAH for SciViews, version 1.1.1
1818# Copyright (c) 2021, Philippe Grosjean (phgrsojean@sciviews.org)
1919
20- SciViews::R
20+ SciViews::R()
2121
2222# dist is really a dissimilarity matrix => we use dissimilarity() as in the
2323# {cluster} package, i.e., class is c("dissimilarity", "dist")
@@ -26,8 +26,8 @@ SciViews::R
2626# factoextra::get_dist and probably other dist-compatible functions
2727# Depending on method =, use either vegan::vegdist or stats::dist as default fun
2828dissimilarity <- function(data, formula = ~ ., subset = NULL,
29- method = "euclidean", scale = FALSE, rownames.col = "rowname",
30- transpose = FALSE, fun = NULL, ...) {
29+ method = "euclidean", scale = FALSE, rownames.col = "rowname",
30+ transpose = FALSE, fun = NULL, ...) {
3131 # TODO: get more meaningful warnings and errors by replacing fun by actual
3232 # name of the function
3333 if (is.null(fun)) {# Default function depends on the chosen method
@@ -121,7 +121,7 @@ as.dissimilarity.matrix <- function(x, ...) {
121121
122122# We want to print only the first few rows and columns
123123print.dissimilarity <- function(x, digits.d = 3L, rownames.lab = "labels",
124- ...) {
124+ ...) {
125125 mat <- as.matrix(x)
126126 mat <- format(round(mat, digits.d))
127127 diag(mat) <- ""
@@ -161,8 +161,8 @@ nobs.dissimilarity <- function(object, ...)
161161# TODO: `[` by first transforming into a matrix with as.matrix()
162162
163163autoplot.dissimilarity <- function(object, order = TRUE, show.labels = TRUE,
164- lab.size = NULL, gradient = list(low = "red", mid = "white", high = "blue"),
165- ...) {
164+ lab.size = NULL, gradient = list(low = "red", mid = "white", high = "blue"),
165+ ...) {
166166 factoextra::fviz_dist(object, order = order, show_labels = show.labels,
167167 lab_size = lab.size, gradient = gradient)
168168}
@@ -262,7 +262,7 @@ augment.cluster <- function(x, data, k = NULL, h = NULL, ...) {
262262# /beautiful-dendrogram-visualizations-in-r-5-must-known-methods
263263# -unsupervised-machine-learning
264264plot.cluster <- function(x, y, hang = -1, check = TRUE, type = "vertical",
265- lab = "Height", ...) {
265+ lab = "Height", ...) {
266266 type <- match.arg(type[1], c("vertical", "horizontal", "circular"))
267267 # type == "circular" is special because we need to transform as ape::phylo
268268 if (type == "circular") {
@@ -290,7 +290,7 @@ circle <- function(x = 0, y = 0, d = 1, col = 0, lwd = 1, lty = 1, ...)
290290
291291# TODO: make sure the dendrogram is correct with different ggplot themes
292292autoplot.cluster <- function(object, type = "vertical", circ.text.size = 3,
293- theme = theme_sciviews(), xlab = "", ylab = "Height", ...) {
293+ theme = theme_sciviews(), xlab = "", ylab = "Height", ...) {
294294 if (is.null(type))
295295 type <- "vertical"
296296 type <- match.arg(type[1], c("vertical", "horizontal", "circular"))
@@ -327,33 +327,46 @@ theme = theme_sciviews(), xlab = "", ylab = "Height", ...) {
327327 ylab("")
328328
329329 } else if (type == "vertical") {# Vertical dendrogram
330- dendro <- dendro +
331- scale_x_continuous(breaks = seq_along(ddata$labels$label),
332- labels = ddata$labels$label) +
333- scale_y_continuous(expand = expansion(mult = c(0, 0.02))) +
334- theme(panel.border = element_blank(),
335- axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
336- axis.line.x = element_blank(),
337- axis.ticks.x = element_blank(),
338- axis.text.y = element_text(angle = 90, hjust = 0.5))
330+ dendro <- dendro +
331+ scale_x_continuous(breaks = seq_along(ddata$labels$label),
332+ labels = ddata$labels$label) +
333+ scale_y_continuous(expand = expansion(mult = c(0, 0.02))) +
334+ theme(panel.border = element_blank(),
335+ axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
336+ axis.line.x = element_blank(),
337+ axis.ticks.x = element_blank(),
338+ axis.text.y = element_text(angle = 90, hjust = 0.5))
339339
340340 } else {# Horizontal dendrogram
341- dendro <- dendro +
342- scale_x_continuous(breaks = seq_along(ddata$labels$label),
343- labels = ddata$labels$label, position = "top") +
344- scale_y_reverse(expand = expansion(mult = c(0.05, 0))) +
345- coord_flip() +
346- theme(panel.border = element_blank(),
347- axis.line.y = element_blank(),
348- axis.ticks.y = element_blank())
341+ dendro <- dendro +
342+ scale_x_continuous(breaks = seq_along(ddata$labels$label),
343+ labels = ddata$labels$label, position = "top") +
344+ scale_y_reverse(expand = expansion(mult = c(0.05, 0))) +
345+ coord_flip() +
346+ theme(panel.border = element_blank(),
347+ axis.line.y = element_blank(),
348+ axis.ticks.y = element_blank())
349349 }
350350 dendro
351351}
352352
353353chart.cluster <- function(data, ...,
354- type = NULL, env = parent.frame())
354+ type = NULL, env = parent.frame())
355355 autoplot(data, type = type, ...)
356356
357+ # To indicate where to cut in the dendrogram, one could use `geom_hline()`,
358+ # but when the dendrogram is horizontal or circular, this is suprizing. So,
359+ # I define geom_dendroline(h = ....)
360+ geom_dendroline <- function(h, ...)
361+ geom_hline(yintercept = h, ...)
362+
363+ # A hack to get fun$type() working in learnr
364+ chart <- list(
365+ vertical = function(data, type, ...) chart(data, type = "vertical", ...),
366+ horizontal = function(data, type, ...) chart(data, type = "horizontal", ...),
367+ circular = function(data, type, ...) chart(data, type = "circular", ...)
368+ )
369+
357370# Loading datasets
358371data("doubs", package = "ade4")
359372envir <- doubs$env
@@ -677,7 +690,7 @@ envir_dist <- dissimilarity(envir, method = "euclidean", scale = TRUE)
677690envir_clust <- cluster(envir_dist, method = "ward.D2")
678691```
679692
680- En partant de ` envir_clust ` calculé par la méthode Ward D2 ci-dessus qui est déjà en mémoire, tracez maintenant un dendrogramme ** horizontal** , et indiquez-y un ** niveau de coupure à hauteur de 7 en rouge** (inspirez-vous des notes du cours ainsi que de l'aide en ligne des fonctions ` geom_...() ` ).
693+ En partant de ` envir_clust ` calculé par la méthode Ward D2 ci-dessus qui est déjà en mémoire, tracez maintenant un dendrogramme ** horizontal** , et indiquez-y un ** niveau de coupure à hauteur de 7 en rouge** (inspirez-vous des notes du cours pour voir quel ` geom_...() ` utiliser ).
681694
682695``` {r hclust3_h2, exercise=TRUE, exercise.setup="group_prep"}
683696chart$___(___) +
@@ -686,14 +699,14 @@ chart$___(___) +
686699
687700``` {r hclust3_h2-hint}
688701chart$___(___) +
689- geom_hline (___ = 7, ___ = "red")
702+ geom_dendroline (___ = 7, ___ = "red")
690703
691704#### ATTENTION: Hint suivant = solution !####
692705```
693706
694707``` {r hclust3_h2-solution}
695708chart$horizontal(envir_clust) +
696- geom_hline(yintercept = 7, color = "red")
709+ geom_dendroline(h = 7, color = "red")
697710```
698711
699712``` {r hclust3_h2-check}
@@ -739,7 +752,7 @@ Un second critère utilisable dans notre cas consiste à comparer des regroupeme
739752fish_dist <- dissimilarity(fish, method = "bray")
740753fish_clust <- cluster(fish_dist, method = "ward.D2")
741754chart$horizontal(fish_clust) +
742- geom_hline(yintercept = 1.05, color = "red") # Hauteur choisie pour avoir 5 groupes
755+ geom_dendroline(h = 1.05, color = "red") # Hauteur choisie pour avoir 5 groupes
743756(fish_groups <- predict(fish_clust, h = 1.05)) # Extrait les 5 groupes
744757```
745758
0 commit comments