From 660c96686d6a3071b1b1fdf30eeff2a6e8c7f799 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Vahala?= Date: Fri, 11 Sep 2015 21:53:29 +0200 Subject: [PATCH 1/2] added classification switcher into DBN --- code/lib/deepbelief.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/code/lib/deepbelief.py b/code/lib/deepbelief.py index 90b304f..825a347 100644 --- a/code/lib/deepbelief.py +++ b/code/lib/deepbelief.py @@ -20,7 +20,8 @@ class MiniBatchTrainer(BatchTrainer): def __init__(self, input, inputLabels, nrLayers, initialWeights, initialBiases, activationFunction, classificationActivationFunction, visibleDropout, hiddenDropout, - adversarial_training, adversarial_epsilon, adversarial_coefficient): + adversarial_training, adversarial_epsilon, adversarial_coefficient, + classification=True): self.input = input self.inputLabels = inputLabels # If we should use adversarial training or not @@ -33,6 +34,8 @@ def __init__(self, input, inputLabels, nrLayers, initialWeights, initialBiases, self.activationFunction = activationFunction self.classificationActivationFunction = classificationActivationFunction + self.classification = classification + # Let's initialize the fields # The weights and biases, make them shared variables nrWeights = nrLayers - 1 @@ -104,7 +107,10 @@ def forwardPass(self, x): return currentLayerValues def costFun(self, x, y): - return T.nnet.categorical_crossentropy(x, y) + if(self.classification): + return T.nnet.categorical_crossentropy(x, y) + else: + return (x - y) * (x - y) # TODO: do I still need to pass the y? def cost(self, y): @@ -120,7 +126,7 @@ class ClassifierBatch(object): def __init__(self, input, nrLayers, weights, biases, visibleDropout, hiddenDropout, - activationFunction, classificationActivationFunction): + activationFunction, classificationActivationFunction, classification=True): self.input = input @@ -128,6 +134,8 @@ def __init__(self, input, nrLayers, weights, biases, visibleDropout=visibleDropout, hiddenDropout=hiddenDropout) + self.classification = classification + nrWeights = nrLayers - 1 currentLayerValues = input @@ -148,8 +156,10 @@ def __init__(self, input, nrLayers, weights, biases, self.output = currentLayerValues def cost(self, y): - return T.nnet.categorical_crossentropy(self.output, y) - + if(self.classification): + return T.nnet.categorical_crossentropy(self.output, y) + else: + return (self.output - y) * (self.output - y) """ Class that implements a deep belief network, for classification """ class DBN(object): @@ -284,7 +294,8 @@ def __init__(self, nrLayers, layerSizes, adversarial_epsilon=1.0/255, preTrainEpochs=1, initialInputShape=None, - nameDataset=''): + nameDataset='', + classification=True): self.nrLayers = nrLayers self.layerSizes = layerSizes @@ -332,6 +343,8 @@ def __init__(self, nrLayers, layerSizes, self.nameDataset = nameDataset + self.classification = classification + print "hidden dropout in DBN", hiddenDropout print "visible dropout in DBN", visibleDropout @@ -583,7 +596,8 @@ def fineTune(self, data, labels, validation, validationData, validationLabels, hiddenDropout=self.hiddenDropout, adversarial_training=self.adversarial_training, adversarial_coefficient=self.adversarial_coefficient, - adversarial_epsilon=self.adversarial_epsilon) + adversarial_epsilon=self.adversarial_epsilon, + classification=self.classification) classifier = ClassifierBatch(input=x, nrLayers=self.nrLayers, activationFunction=self.activationFunction, @@ -591,7 +605,8 @@ def fineTune(self, data, labels, validation, validationData, validationLabels, visibleDropout=self.visibleDropout, hiddenDropout=self.hiddenDropout, weights=batchTrainer.weights, - biases=batchTrainer.biases) + biases=batchTrainer.biases, + classification=self.classification) trainModel = batchTrainer.makeTrainFunction(x, y, data, labels, self.trainingOptions) From e90dce52cd4a1d1db458566dd0f0f75961c371aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ji=C5=99=C3=AD=20Vahala?= Date: Sun, 27 Sep 2015 18:53:03 +0200 Subject: [PATCH 2/2] added least squares and categoricalCrossEntropy as cost functions into DBN --- code/lib/costfunctions.py | 45 ++++++++++++++++++++++++++++++++++++++ code/lib/deepbelief.py | 46 +++++++++++++++------------------------ 2 files changed, 63 insertions(+), 28 deletions(-) create mode 100644 code/lib/costfunctions.py diff --git a/code/lib/costfunctions.py b/code/lib/costfunctions.py new file mode 100644 index 0000000..18c2e9e --- /dev/null +++ b/code/lib/costfunctions.py @@ -0,0 +1,45 @@ +__author__ = 'snurkabill' + +from theano import tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +import theano +import numpy as np + +theanoFloat = theano.config.floatX + +class CostFunction(object): + + def __getstate__(self): + odict = self.__dict__.copy() + if 'theanoGenerator' in odict: + del odict['theanoGenerator'] + return odict + + def __setstate__(self, dict): + self.__dict__.update(dict) + + def __getinitargs__(): + return None + +class LeastSquares(CostFunction): + + def __init__(self): + pass + + def cost(self, x, y): + return (x - y) * (x - y) + + def __call__(self, *args, **kwargs): + return self.cost(args[1], args[2]) + +class CategoricalCrossEntropy(CostFunction): + + def __init__(self): + pass + + def __call__(self, *args, **kwargs): + return self.cost(args[1], args[2]) + + def cost(self, x, y): + return T.nnet.categorical_crossentropy(x, y) diff --git a/code/lib/deepbelief.py b/code/lib/deepbelief.py index 825a347..3d83735 100644 --- a/code/lib/deepbelief.py +++ b/code/lib/deepbelief.py @@ -3,6 +3,8 @@ import restrictedBoltzmannMachine as rbm from batchtrainer import * from activationfunctions import * +from costfunctions import CategoricalCrossEntropy +from costfunctions import LeastSquares from common import * from debug import * from trainingoptions import * @@ -18,10 +20,9 @@ class MiniBatchTrainer(BatchTrainer): def __init__(self, input, inputLabels, nrLayers, initialWeights, initialBiases, - activationFunction, classificationActivationFunction, + activationFunction, classificationActivationFunction, costFunction, visibleDropout, hiddenDropout, - adversarial_training, adversarial_epsilon, adversarial_coefficient, - classification=True): + adversarial_training, adversarial_epsilon, adversarial_coefficient): self.input = input self.inputLabels = inputLabels # If we should use adversarial training or not @@ -33,8 +34,7 @@ def __init__(self, input, inputLabels, nrLayers, initialWeights, initialBiases, self.hiddenDropout = hiddenDropout self.activationFunction = activationFunction self.classificationActivationFunction = classificationActivationFunction - - self.classification = classification + self.costFun = costFunction # Let's initialize the fields # The weights and biases, make them shared variables @@ -106,17 +106,11 @@ def forwardPass(self, x): return currentLayerValues - def costFun(self, x, y): - if(self.classification): - return T.nnet.categorical_crossentropy(x, y) - else: - return (x - y) * (x - y) - # TODO: do I still need to pass the y? def cost(self, y): - output_error = self.costFun(self.output, y) + output_error = self.costFun(self, self.output, y) if self.adversarial_training: - adversarial_error = self.costFun(self.adversarial_output, y) + adversarial_error = self.costFun(self, self.adversarial_output, y) alpha = self.adversarial_coefficient return alpha * output_error + (1.0 - alpha) * adversarial_error else: @@ -126,16 +120,16 @@ class ClassifierBatch(object): def __init__(self, input, nrLayers, weights, biases, visibleDropout, hiddenDropout, - activationFunction, classificationActivationFunction, classification=True): + activationFunction, classificationActivationFunction, costFunction): self.input = input + self.costFun = costFunction + self.classificationWeights = classificationWeightsFromTestWeights(weights, visibleDropout=visibleDropout, hiddenDropout=hiddenDropout) - self.classification = classification - nrWeights = nrLayers - 1 currentLayerValues = input @@ -156,10 +150,7 @@ def __init__(self, input, nrLayers, weights, biases, self.output = currentLayerValues def cost(self, y): - if(self.classification): - return T.nnet.categorical_crossentropy(self.output, y) - else: - return (self.output - y) * (self.output - y) + return self.costFun(self, self.output, y) """ Class that implements a deep belief network, for classification """ class DBN(object): @@ -267,6 +258,7 @@ def __init__(self, nrLayers, layerSizes, rbmActivationFunctionVisible=Sigmoid(), rbmActivationFunctionHidden=Sigmoid(), classificationActivationFunction=Softmax(), + costFunction=CategoricalCrossEntropy(), unsupervisedLearningRate=0.01, supervisedLearningRate=0.05, nesterovMomentum=True, @@ -294,8 +286,7 @@ def __init__(self, nrLayers, layerSizes, adversarial_epsilon=1.0/255, preTrainEpochs=1, initialInputShape=None, - nameDataset='', - classification=True): + nameDataset=''): self.nrLayers = nrLayers self.layerSizes = layerSizes @@ -343,7 +334,7 @@ def __init__(self, nrLayers, layerSizes, self.nameDataset = nameDataset - self.classification = classification + self.costFunction = costFunction print "hidden dropout in DBN", hiddenDropout print "visible dropout in DBN", visibleDropout @@ -473,7 +464,6 @@ def fit(self, data, labels, maxEpochs, validation=True, percentValidation=0.05, unsupervisedData=None, trainingIndices=None): return self.train(data, labels, maxEpochs, validation, percentValidation, unsupervisedData, trainingIndices) - """ Choose a percentage (percentValidation) of the data given to be validation data, used for early stopping of the model. @@ -590,23 +580,23 @@ def fineTune(self, data, labels, validation, validationData, validationLabels, batchTrainer = MiniBatchTrainer(input=x, inputLabels=y, nrLayers=self.nrLayers, activationFunction=self.activationFunction, classificationActivationFunction=self.classificationActivationFunction, + costFunction=self.costFunction, initialWeights=self.weights, initialBiases=self.biases, visibleDropout=self.visibleDropout, hiddenDropout=self.hiddenDropout, adversarial_training=self.adversarial_training, adversarial_coefficient=self.adversarial_coefficient, - adversarial_epsilon=self.adversarial_epsilon, - classification=self.classification) + adversarial_epsilon=self.adversarial_epsilon) classifier = ClassifierBatch(input=x, nrLayers=self.nrLayers, activationFunction=self.activationFunction, classificationActivationFunction=self.classificationActivationFunction, + costFunction=self.costFunction, visibleDropout=self.visibleDropout, hiddenDropout=self.hiddenDropout, weights=batchTrainer.weights, - biases=batchTrainer.biases, - classification=self.classification) + biases=batchTrainer.biases) trainModel = batchTrainer.makeTrainFunction(x, y, data, labels, self.trainingOptions)