From e20cfb1fc730adf10fd6c2685521f87aca2ed379 Mon Sep 17 00:00:00 2001 From: ItayXD Date: Wed, 24 Aug 2022 09:54:12 +0300 Subject: [PATCH] Add support for apple silicon and switched to Aesara --- network3.py | 87 +++++++++++++++++++++++++++-------------------------- 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/network3.py b/network3.py index ff8afa8..87b3ce3 100644 --- a/network3.py +++ b/network3.py @@ -1,7 +1,7 @@ """network3.py ~~~~~~~~~~~~~~ -A Theano-based program for training and running simple neural +A Aesara-based program for training and running simple neural networks. Supports several layer types (fully connected, convolutional, max @@ -12,14 +12,14 @@ network2.py. However, unlike network.py and network2.py it can also be run on a GPU, which makes it faster still. -Because the code is based on Theano, the code is different in many +Because the code is based on Aesara, the code is different in many ways from network.py and network2.py. However, where possible I have tried to maintain consistency with the earlier programs. In particular, the API is similar to network2.py. Note that I have focused on making the code simple, easily readable, and easily modifiable. It is not optimized, and omits many desirable features. -This program incorporates ideas from the Theano documentation on +This program incorporates ideas from the Aesara documentation on convolutional neural nets (notably, http://deeplearning.net/tutorial/lenet.html ), from Misha Denil's implementation of dropout (https://github.com/mdenil/dropout ), and @@ -34,18 +34,18 @@ # Third-party libraries import numpy as np -import theano -import theano.tensor as T -from theano.tensor.nnet import conv -from theano.tensor.nnet import softmax -from theano.tensor import shared_randomstreams -from theano.tensor.signal.pool import pool_2d +import aesara +import aesara.tensor as T +from aesara.tensor.nnet import conv2d +from aesara.tensor.nnet import softmax +from aesara.tensor.random.utils import RandomStream +from aesara.tensor.signal.pool import pool_2d # Activation functions for neurons def linear(z): return z -def ReLU(z): return T.maximum(0.0, z) -from theano.tensor.nnet import sigmoid -from theano.tensor import tanh +from aesara.tensor.nnet import relu as ReLU +from aesara.tensor import sigmoid +from aesara.tensor import tanh #### Constants @@ -53,27 +53,30 @@ def ReLU(z): return T.maximum(0.0, z) if GPU: print("Trying to run under a GPU. If this is not desired, then modify "+\ "network3.py\nto set the GPU flag to False.") - try: theano.config.device = 'gpu' + try: aesara.config.device = 'gpu' except: pass # it's already set - theano.config.floatX = 'float32' + aesara.config.floatX = 'float32' else: print("Running with a CPU. If this is not desired, then the modify "+\ "network3.py to set\nthe GPU flag to True.") +aesara.config.gcc__cxxflags = "-Wno-c++11-narrowing" #Required for apple silicon + + #### Load the MNIST data def load_data_shared(filename="mnist.pkl.gz"): f = gzip.open(filename, 'rb') training_data, validation_data, test_data = pickle.load(f, encoding="latin1") f.close() def shared(data): - """Place the data into shared variables. This allows Theano to copy + """Place the data into shared variables. This allows Aesara to copy the data to the GPU, if one is available. """ - shared_x = theano.shared( - np.asarray(data[0], dtype=theano.config.floatX), borrow=True) - shared_y = theano.shared( - np.asarray(data[1], dtype=theano.config.floatX), borrow=True) + shared_x = aesara.shared( + np.asarray(data[0], dtype=aesara.config.floatX), borrow=True) + shared_y = aesara.shared( + np.asarray(data[1], dtype=aesara.config.floatX), borrow=True) return shared_x, T.cast(shared_y, "int32") return [shared(training_data), shared(validation_data), shared(test_data)] @@ -123,7 +126,7 @@ def SGD(self, training_data, epochs, mini_batch_size, eta, # define functions to train a mini-batch, and to compute the # accuracy in validation and test mini-batches. i = T.lscalar() # mini-batch index - train_mb = theano.function( + train_mb = aesara.function( [i], cost, updates=updates, givens={ self.x: @@ -131,7 +134,7 @@ def SGD(self, training_data, epochs, mini_batch_size, eta, self.y: training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] }) - validate_mb_accuracy = theano.function( + validate_mb_accuracy = aesara.function( [i], self.layers[-1].accuracy(self.y), givens={ self.x: @@ -139,7 +142,7 @@ def SGD(self, training_data, epochs, mini_batch_size, eta, self.y: validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] }) - test_mb_accuracy = theano.function( + test_mb_accuracy = aesara.function( [i], self.layers[-1].accuracy(self.y), givens={ self.x: @@ -147,7 +150,7 @@ def SGD(self, training_data, epochs, mini_batch_size, eta, self.y: test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size] }) - self.test_mb_predictions = theano.function( + self.test_mb_predictions = aesara.function( [i], self.layers[-1].y_out, givens={ self.x: @@ -210,23 +213,23 @@ def __init__(self, filter_shape, image_shape, poolsize=(2, 2), self.activation_fn=activation_fn # initialize weights and biases n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize)) - self.w = theano.shared( + self.w = aesara.shared( np.asarray( np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape), - dtype=theano.config.floatX), + dtype=aesara.config.floatX), borrow=True) - self.b = theano.shared( + self.b = aesara.shared( np.asarray( np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)), - dtype=theano.config.floatX), + dtype=aesara.config.floatX), borrow=True) self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape(self.image_shape) - conv_out = conv.conv2d( + conv_out = conv2d( input=self.inpt, filters=self.w, filter_shape=self.filter_shape, - image_shape=self.image_shape) + input_shape=self.image_shape) pooled_out = pool_2d( input=conv_out, ws=self.poolsize, ignore_border=True) self.output = self.activation_fn( @@ -241,15 +244,15 @@ def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0): self.activation_fn = activation_fn self.p_dropout = p_dropout # Initialize weights and biases - self.w = theano.shared( + self.w = aesara.shared( np.asarray( np.random.normal( loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)), - dtype=theano.config.floatX), + dtype=aesara.config.floatX), name='w', borrow=True) - self.b = theano.shared( + self.b = aesara.shared( np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)), - dtype=theano.config.floatX), + dtype=aesara.config.floatX), name='b', borrow=True) self.params = [self.w, self.b] @@ -274,21 +277,21 @@ def __init__(self, n_in, n_out, p_dropout=0.0): self.n_out = n_out self.p_dropout = p_dropout # Initialize weights and biases - self.w = theano.shared( - np.zeros((n_in, n_out), dtype=theano.config.floatX), + self.w = aesara.shared( + np.zeros((n_in, n_out), dtype=aesara.config.floatX), name='w', borrow=True) - self.b = theano.shared( - np.zeros((n_out,), dtype=theano.config.floatX), + self.b = aesara.shared( + np.zeros((n_out,), dtype=aesara.config.floatX), name='b', borrow=True) self.params = [self.w, self.b] def set_inpt(self, inpt, inpt_dropout, mini_batch_size): self.inpt = inpt.reshape((mini_batch_size, self.n_in)) - self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b) + self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b, axis=-1) self.y_out = T.argmax(self.output, axis=1) self.inpt_dropout = dropout_layer( inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout) - self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b) + self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b, axis=-1) def cost(self, net): "Return the log-likelihood cost." @@ -305,7 +308,7 @@ def size(data): return data[0].get_value(borrow=True).shape[0] def dropout_layer(layer, p_dropout): - srng = shared_randomstreams.RandomStreams( + srng = RandomStream( np.random.RandomState(0).randint(999999)) - mask = srng.binomial(n=1, p=1-p_dropout, size=layer.shape) - return layer*T.cast(mask, theano.config.floatX) + mask = srng.binomial(1, 1-p_dropout, size=layer.shape) + return layer*T.cast(mask, aesara.config.floatX)