-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlinear_reg.py
More file actions
51 lines (41 loc) · 1.43 KB
/
linear_reg.py
File metadata and controls
51 lines (41 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Linear regression on one-dimensional data using a closed form solution.
# Please make sure matplotlib is included in the conda_dependencies.yml file.
import numpy as np
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plot
fig = plot.figure()
# load data
X = []
Y = []
for line in open('data.csv'):
x, y = line.split(',')
X.append(float(x))
Y.append(float(y))
# turn them into numpy arrays so we can apply matrix operations
X = np.array(X)
Y = np.array(Y)
# this is the common denominator
denominator = X.dot(X) - X.mean() * X.sum()
# value of a
a = (X.dot(Y) - Y.mean() * X.sum()) / denominator
# value of b
b = (Y.mean() * X.dot(X) - X.mean() * X.dot(Y)) / denominator
# Yhat is simply aX + b
Yhat = a * X + b
print ("Coefficient: {0}, intercept: {1}".format(a, b))
# Plot the data and the fitted line, then save it into a png file in the output directory.
ax = fig.gca()
ax.scatter(X, Y)
ax.plot(X, Yhat, color='magenta')
fig.savefig('./outputs/lin.png')
### compute r-squared ###
# residual error of the prediction
d1 = Y - Yhat
# intrinsic error to mean
d2 = Y - Y.mean()
# if r2 is 1 (i.e., d1 is 0), this is a perfect model with no errors.
# if r2 is 0 (i.e., d1 is the same as d2), this is a useless model as it is just the same as predicting mean.
# if r2 is less than 0 (i.e., d1 is larger that d2), you are doing worse than predicting mean!!
r2 = 1 - d1.dot(d1) / d2.dot(d2)
print ("R-squared: {}.".format(r2))