-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcodeOriginalDraft
More file actions
57 lines (32 loc) · 1.2 KB
/
codeOriginalDraft
File metadata and controls
57 lines (32 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
canData <- read.csv(file="/Users/carleebettler/Downloads/breastCancerData.csv", header=TRUE, sep=",")
canData$x <- NULL
canData$id <- NULL
canData <- canData[,1:31]
#boxplot(canData$radius_mean)
#boxplot(canData$area_mean)
canData$diagnosis <- ifelse(canData$diagnosis == "M",1, 0)
logit <- function(p) log(p/(1-p))
modelCan <- glm(diagnosis ~ .,family = binomial(logit), data=canData)
summary(modelCan)
pr <- predict(modelCan, canData, type = "response")
table(actual = canData$diagnosis, predicted = pr > .5 )
#---------------------------- DFA:
library(MASS)
fit <- lda(diagnosis ~ ., data=canData, na.action="na.omit", CV=TRUE)
ct <- table(canData$diagnosis, fit$class)
diag(prop.table(ct, 1))
sum(diag(prop.table(ct)))
#--------------------------- Random forest
#Paul add here
#--------------------------- PCA
library(ggbiplot)
dataRefined <-log(canData[,2:31])
dataRefined <- do.call(data.frame, lapply(dataRefined, function(x) {
replace(x, is.infinite(x), 9999999)
})
)
pca <- prcomp(dataRefined, center=TRUE, scale. = TRUE)
summary(pca, loadings=T)
library(devtools)
gPlot <- ggbiplot(pca, obs.scale = 1, groups = canData$diagnosis, var.scale = 1, ellipse = TRUE, circle = TRUE)
gPlot