Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions machine learning/assignment 1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
exam1 = read.table('C:\\work\\BaiduNetdiskWorkspace\\coursera\\machine learning\\machine-learning-ex1\\ex1\\ex1data1.txt', sep = ",")
# identity matrix
diag(5)

# Plotting the Data
plot(exam1$V1,exam1$V2,pch = 4 ,col='red', xlab='Population of City in 10,000s',ylab='Profit in $10,000s')

# Implementation
m = length(exam1$V1)
X = cbind(rep(1,m),exam1$V1)
theta = matrix(c(0,0),2,1)
iter = 1500
alpha = 0.01

# computing the cost
h=X%*%theta
J = 1/(2*m)*sum((h-exam1$V2)^2)

i=0
# Gradient descent
for (i in seq(0,m)) {
theta=theta-alpha/m*t(X)%*%(h-exam1$V2)
h=X%*%theta
J = 1/(2*m)*sum((h-exam1$V2)^2)
i=i+1
}

# predict
h=X%*%theta
p1 = matrix(c(1,3.5),1,2)
p2 = matrix(c(1,7),1,2)
h1 = p1%*%theta
plot(exam1$V1,exam1$V2,pch = 4 ,col='red', xlab='Population of City in 10,000s',ylab='Profit in $10,000s')
lines(X[,2],h,col='blue',type='l')

### multivariable
exam2 = read.table('C:\\work\\BaiduNetdiskWorkspace\\coursera\\machine learning\\machine-learning-ex1\\ex1\\ex1data2.txt', sep = ",")

# Implementation
m = length(exam2$V1)
X = cbind(rep(1,m),exam2$V1,exam2$V2)
theta = matrix(c(0,0,0),3,1)
iter = 1500
alpha = 0.01

# standardized
mean1=mean(X[,2])
std1 = sd(X[,2])
X[,2] = (X[,2]-mean1)/std1

mean2=mean(X[,3])
std2 = sd(X[,3])
X[,3] = (X[,3]-mean2)/std2

# computing the cost
h=X%*%theta
J=rep(0,m)

i=0
# Gradient descent
for (i in seq(0,iter)) {
theta=theta-alpha/m*t(X)%*%(h-exam2$V3)
h=X%*%theta
J[i] = 1/(2*m)*sum((h-exam2$V3)^2)
i=i+1
}

plot(seq(1,iter),J,type = 'l',col='green')

# theta Normal Equations
library(matlib)
theta_n = inv(t(X)%*%X)%*%t(X)%*%exam2$V3
112 changes: 112 additions & 0 deletions machine learning/exam 2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# read data
data1 = read.table('C:\\work\\BaiduNetdiskWorkspace\\coursera\\machine learning\\machine-learning-ex2\\ex2\\ex2data1.txt',sep = ',')

colors = c('yellow','black')
pch = c(16,3)
X1 = seq(30,100,1)
X2 = -theta[1]/theta[3]-theta[2]*X1/theta[3]

plot(data1$V1, data1$V2, col=colors[factor(data1$V3, levels = c("0", "1"))],pch = pch[factor(data1$V3, levels = c("0", "1"))],xlab='Exam 1 score',ylab='exam 2 score')
lines(X1,X2,col='blue',type='l')

plot(X1,X2)

iter = 400
alpha = 0.001
#sigmoid function
m = length(data1$V1)
X = cbind(rep(1,m),data1$V1,data1$V2)
theta = matrix(c(0.1,0.1,0.1),3,1)
z = X%*%theta
sig = 1/(1+exp(-z))


# Cost function
J = rep(0,iter)
i=1
for (i in 1:iter) {
theta = theta-alpha/m*t(X)%*%(sig-data1$V3)
z = X%*%theta
sig = 1/(1+exp(-z))
J[i] = 1/m*sum(-data1$V3*log(sig)-(1-data1$V3)*log(1-sig))
i=i+1
}

plot(seq(1,iter),J,type = 'l')


# cost function
J <- function(theta){
z = X%*%theta
sig = 1/(1+exp(-z))
C=1/m*sum(-data1$V3*log(sig)-(1-data1$V3)*log(1-sig))
return(C)
}
z <- outer(theta1, theta2, J)
persp(theta1, theta2, z)
library(rgl)
persp3d(theta1, theta3, z,theta=155,phi=30,col="green4", ltheta=-120,shade=.75,border=NA,box=FALSE)

z = matrix(0,101,101)

for (i in 1:101) {
for (j in 1:101) {
z[i,j] = J(theta1[i],theta3[j])
}
}

theta_optim <- optim(par=theta,fn=J)
#set theta
theta <- theta_optim$par
#cost at optimal value of the theta
theta_optim$value


# Predict
p1 = c(1,45,85)
z = p1%*%theta
sig = 1/(1+exp(-z))

# multi
data2 = read.table('C:\\work\\BaiduNetdiskWorkspace\\coursera\\machine learning\\machine-learning-ex2\\ex2\\ex2data2.txt',sep = ',')

#Visualizing the data
colors = c('yellow','black')
plot(data2$V1, data2$V2, col=colors[factor(data2$V3, levels = c("0", "1"))],pch = pch[factor(data2$V3, levels = c("0", "1"))],xlab='Exam 1 score',ylab='exam 2 score')

# Cost function
m = length(data2$V1)
mapfeature = cbind(rep(1,length(data2$V1)),data2$V1,data2$V2,data2$V1^2,data2$V1*data2$V2,data2$V2^2,data2$V1^3,data2$V1^2*data2$V2,data2$V1*data2$V2^2,data2$V2^3,data2$V1^4,data2$V1^3*data2$V2,data2$V1^2*data2$V2^2,data2$V1*data2$V2^3,data2$V2^4,data2$V1^5,data2$V1^4*data2$V2,data2$V1^3*data2$V2^2,data2$V1^2*data2$V2^3,data2$V1*data2$V2^4,data2$V2^5,data2$V1^6,data2$V1^5*data2$V2,data2$V1^4*data2$V2^2,data2$V1^3*data2$V2^3,data2$V1^2*data2$V2^4,data2$V1*data2$V2^5,data2$V2^6)
theta2 = matrix(rep(0,28),28,1)
z = mapfeature%*%theta2
sig = 1/(1+exp(-z))
lambda = 1
J = 1/m*sum(-data2$V3*log(sig)-(1-data2$V3)*log(1-sig))+lambda*sum(theta2[2:28]^2)/(2*m)

# cost function
J <- function(theta){
z = mapfeature%*%theta
sig = 1/(1+exp(-z))
C=1/m*sum(-data2$V3*log(sig)-(1-data2$V3)*log(1-sig))+lambda*sum(theta2[2:28]^2)/(2*m)
return(C)
}
theta_optim <- optim(par=theta2,fn=J)
#set theta
theta <- theta_optim$par
#cost at optimal value of the theta
theta_optim$value

#plot decision rule boundary
# class labels: simple distance from origin
classes <- ifelse(z > 0, "black", "orange")

grid <- expand.grid(x=1:100, y=1:100)
classes.grid <- knn(train.df, grid, classes, k=25, prob=TRUE) # note last argument
prob.grid <- attr(classes.grid, "prob")
prob.grid <- ifelse(classes.grid == "blue", prob.grid, 1 - prob.grid)

# plot the boundary
contour(x=1:100, y=1:100, z=matrix(prob.grid, nrow=100), levels=0.5,
col="grey", drawlabels=FALSE, lwd=2)
# add points from test dataset
points(test.df, col=classes.test)
102 changes: 102 additions & 0 deletions machine learning/exam 3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
data1 = readMat('C:\\work\\BaiduNetdiskWorkspace\\coursera\\machine learning\\machine-learning-ex3\\ex3\\ex3data1.mat')
m = length(data1$y);
rand_indices = sample(1:m,100,replace = F);
sel = X(rand_indices, :);
X = data1$X[rand_indices,]
y= data1$y[rand_indices,]
m = 100
n = 400
example_width = 20
example_height = 20

# Compute number of items to display
display_rows = 10
display_cols = 10

# Between images padding
pad = 1

# Setup blank display
display_array = matrix(rep(1),pad + display_rows * (example_height + pad),pad + display_cols * (example_width + pad))

# Copy each example into a patch on the display array
curr_ex = 1;
for (j in 1:display_rows){
for (i in 1:display_cols){
display_array[pad + (j - 1) * (example_height + pad) + (1:example_height),pad + (j - 1) * (example_width + pad) + (1:example_width)] = matrix(X[curr_ex, ], example_height, example_width)
curr_ex = curr_ex + 1

}
}


# The col argument allows customizing the color palette of the image. You can pass as variable a function such as gray.colors, topo.colors, hcl.colors or a similar function. The default value is hcl.colors(12, "YlOrRd", rev = TRUE).
# Note that if you increase the number of values the color image will be smoothed.

image(display_array, col = gray.colors(50))

# Part 2a: Vectorize Logistic Regression
theta = matrix(c(-2,-1,1,2),4,1)
X_t = cbind(rep(1,5),matrix(seq(1,15,1)/10,5,3))
y_t = matrix(c(1,0,1,0,1),5,1)
m= length(y_t)
lambda_t = 3
g = 1/(1+exp(-X_t%*%theta))
reg = lambda_t/(2*m)*t(theta[2:4])%*%theta[2:4]
C = 1/m*sum(-y_t*log(g)-(1-y_t)*log(1-g)) + reg
grad = matrix(rep(0,4),4,1)
grad[1]=1/m*t(g-y_t)%*%X_t[,1]
grad[2:4]=1/m*t(g-y_t)%*%X_t[,2:4]+lambda_t/m*theta[2:4]

all_theta_t = matrix(rep(0,40),10,4)

cost_function = function(Y,theta){

m = length(Y)
n = length(theta)
g = 1/(1+exp(-X%*%theta))
reg = 0.1/(2*m)*t(theta[2:n])%*%theta[2:n]
C = 1/m*sum(-Y*log(g)-(1-Y)*log(1-g)) + reg
return(C)
}


a = cost_function(X_t,y_t,theta,lambda_t)

all_theta = matrix(rep(0.05,4010),401,10)
cost = rep(0,10)
X = cbind(matrix(rep(1,5000),5000,1), data1$X)
lambda_t = 0.1
for (i in 1:10) {
Y = ifelse(data1$y==i,1,0)
all_theta[,i] <- optim(par=all_theta[,i],fn=cost_function,Y=Y)$par
cost[i]= optim(par=all_theta[,i],fn=cost_function)$value
}

G = 1/(1+exp(-X%*%all_theta))
G = cbind(G,matrix(rep(0,5000),5000,1))

G[,11] = max(G[,1:10])
for (i in 1:5000) {
G[i,11] = max(G[i,1:10])
}

colnames(G)[apply(G,1,which.max)]
theta1 = data2$Theta1
theta2 = data2$Theta2

# Neural Networks
data2 = readMat('C:\\work\\BaiduNetdiskWorkspace\\coursera\\machine learning\\machine-learning-ex3\\ex3\\ex3weights.mat')
cost_function_NN = function(theta1,theta2,X){
m=dim(data1$X)[1]
num_labels = dim(data2$Theta2)[1]
p = matrix(rep(0,m),m,1)
g1 = 1/(1+exp(-X%*%t(theta1)))
g1 = cbind(matrix(rep(1,5000),5000,1), g1)
g2 = 1/(1+exp(-g1%*%t(theta2)))

}

colnames(g2) = c(1,2,3,4,5,6,7,8,9,10)
Y_pred = colnames(g2)[apply(g2,1,which.max)]
count(Y_pred==data1$y)/5000