-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptimizer.py
More file actions
129 lines (105 loc) · 5.13 KB
/
optimizer.py
File metadata and controls
129 lines (105 loc) · 5.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
class Optimizer():
def __init__(self, func, learningRate):
self.func = func
self.learningRate = learningRate
def evaluate(self, input, position, gradient):
'''
evaluate the new parameter at a specific neuron
Args:
input: the input given to the optimizer/the parameter's value currently
position: the position of the paramater as a string "paramaterType: layer, neuron"
-> "parameterType" denotes weight or bias, or g or v when weight normalizing
-> ex. f"weight: 0, 1" denotes the weight at layer 0, neuron 1, both indexed from 0
gradient: the gradient of the paramater being updated
'''
return self.func(input, position, gradient)
def reset(self):
raise NotImplementedError
class SGD(Optimizer):
def __init__(self, learningRate):
func = lambda X, position, nablaX : X - learningRate * nablaX
super().__init__(func, learningRate)
class SGD_Momentum(Optimizer):
def __init__(self, learningRate, beta = 0.9):
self.beta = beta
func = lambda X, position, nablaX : X - learningRate * self.updateVelocity(position, nablaX)
super().__init__(func, learningRate)
self.velocities = {}
def updateVelocity(self, position, gradient):
newVelocity = self.beta * self.velocities.setdefault(position, 0) + (1-self.beta) * gradient
self.velocities[position] = newVelocity
return newVelocity
def evaluate(self, input, position, gradient):
output = self.func(input, position, gradient)
# output = super().evaluate(input, position, gradient)
return output
# class Nesterov(Optimizer):
# def __init__(self, learningRate, beta):
# self.beta = beta
# func = lambda X, nablaX, rate : X - rate * self.updateVelocity(nablaX)
# super().__init__(func, learningRate)
# self.previousVelocity = 0
# def updateVelocity(self, gradient):
# newVelocity = self.beta * self.previousVelocity + (1-self.beta) * gradient
# self.previousVelocity = newVelocity
# return newVelocity
# def evaluate(self, input, gradient):
# output = super().evaluate(input, gradient)
# return output
class RMSProp(Optimizer):
def __init__(self, learningRate, beta = 0.9):
self.beta = beta
func = lambda X, position, nablaX: X - learningRate * (nablaX/np.sqrt(self.updateVelocity(position, nablaX) + 1e-8))
super().__init__(func, learningRate)
self.velocities = {}
def updateVelocity(self, position, gradient):
newVelocity = self.beta * self.velocities.setdefault(position, 0) + (1-self.beta) * gradient * gradient
self.velocities[position] = newVelocity
return newVelocity
def evaluate(self, input, position, gradient):
output = super().evaluate(input, position, gradient)
return output
class AdaGrad(Optimizer):
def __init__(self, learningRate, beta = 0.9):
self.beta = beta
func = lambda X, position, nablaX : X - learningRate * (nablaX/np.sqrt(self.updateVelocity(position, nablaX) + 1e-8))
super().__init__(func, learningRate)
self.velocities = {}
def updateVelocity(self, position, gradient):
newVelocity = self.beta * self.velocities.setdefault(position, 0) + gradient * gradient
self.velocities[position] = newVelocity
return newVelocity
def evaluate(self, input, position, gradient):
output = super().evaluate(input, position, gradient)
return output
class Adam(Optimizer):
def __init__(self, learningRate, beta_1 = 0.9, beta_2 = 0.99):
self.beta_1 = beta_1
self.beta_2 = beta_2
func = lambda X, position, nablaX : X - learningRate * (self.correctMoment(position, self.updateMoment(position,nablaX))/np.sqrt(self.correctVelocity(position, self.updateVelocity(position, nablaX)) + 1e-8))
super().__init__(func, learningRate)
self.velocities = {}
self.moments = {}
self.iterations = {}
def updateMoment(self, position, gradient):
newMoment = self.beta_1 * self.moments.setdefault(position, 1) + (1-self.beta_1) * gradient
self.previousMoment = newMoment
return newMoment
def updateVelocity(self, position, gradient):
newVelocity = self.beta_2 * self.velocities.setdefault(position, 1) + (1-self.beta_2) * gradient * gradient
self.previousVelocity = newVelocity
return newVelocity
def correctMoment(self, position, moment):
return moment / (1 - self.beta_1 ** self.iterations.setdefault(position, 1))
def correctVelocity(self, position, velocity):
return velocity / (1 - self.beta_2 ** self.iterations.setdefault(position, 1))
def evaluate(self, input, position, gradient):
self.iterations.setdefault(position, 1)
output = super().evaluate(input, position, gradient)
self.iterations[position] += 1
return output
def reset(self):
self.moments = {}
self.velocities = {}
self.iterations = {}