diff --git a/code/lib/costfunctions.py b/code/lib/costfunctions.py new file mode 100644 index 0000000..18c2e9e --- /dev/null +++ b/code/lib/costfunctions.py @@ -0,0 +1,45 @@ +__author__ = 'snurkabill' + +from theano import tensor as T +from theano.tensor.shared_randomstreams import RandomStreams + +import theano +import numpy as np + +theanoFloat = theano.config.floatX + +class CostFunction(object): + + def __getstate__(self): + odict = self.__dict__.copy() + if 'theanoGenerator' in odict: + del odict['theanoGenerator'] + return odict + + def __setstate__(self, dict): + self.__dict__.update(dict) + + def __getinitargs__(): + return None + +class LeastSquares(CostFunction): + + def __init__(self): + pass + + def cost(self, x, y): + return (x - y) * (x - y) + + def __call__(self, *args, **kwargs): + return self.cost(args[1], args[2]) + +class CategoricalCrossEntropy(CostFunction): + + def __init__(self): + pass + + def __call__(self, *args, **kwargs): + return self.cost(args[1], args[2]) + + def cost(self, x, y): + return T.nnet.categorical_crossentropy(x, y) diff --git a/code/lib/deepbelief.py b/code/lib/deepbelief.py index 90b304f..3d83735 100644 --- a/code/lib/deepbelief.py +++ b/code/lib/deepbelief.py @@ -3,6 +3,8 @@ import restrictedBoltzmannMachine as rbm from batchtrainer import * from activationfunctions import * +from costfunctions import CategoricalCrossEntropy +from costfunctions import LeastSquares from common import * from debug import * from trainingoptions import * @@ -18,7 +20,7 @@ class MiniBatchTrainer(BatchTrainer): def __init__(self, input, inputLabels, nrLayers, initialWeights, initialBiases, - activationFunction, classificationActivationFunction, + activationFunction, classificationActivationFunction, costFunction, visibleDropout, hiddenDropout, adversarial_training, adversarial_epsilon, adversarial_coefficient): self.input = input @@ -32,6 +34,7 @@ def __init__(self, input, inputLabels, nrLayers, initialWeights, initialBiases, self.hiddenDropout = hiddenDropout self.activationFunction = activationFunction self.classificationActivationFunction = classificationActivationFunction + self.costFun = costFunction # Let's initialize the fields # The weights and biases, make them shared variables @@ -103,14 +106,11 @@ def forwardPass(self, x): return currentLayerValues - def costFun(self, x, y): - return T.nnet.categorical_crossentropy(x, y) - # TODO: do I still need to pass the y? def cost(self, y): - output_error = self.costFun(self.output, y) + output_error = self.costFun(self, self.output, y) if self.adversarial_training: - adversarial_error = self.costFun(self.adversarial_output, y) + adversarial_error = self.costFun(self, self.adversarial_output, y) alpha = self.adversarial_coefficient return alpha * output_error + (1.0 - alpha) * adversarial_error else: @@ -120,10 +120,12 @@ class ClassifierBatch(object): def __init__(self, input, nrLayers, weights, biases, visibleDropout, hiddenDropout, - activationFunction, classificationActivationFunction): + activationFunction, classificationActivationFunction, costFunction): self.input = input + self.costFun = costFunction + self.classificationWeights = classificationWeightsFromTestWeights(weights, visibleDropout=visibleDropout, hiddenDropout=hiddenDropout) @@ -148,8 +150,7 @@ def __init__(self, input, nrLayers, weights, biases, self.output = currentLayerValues def cost(self, y): - return T.nnet.categorical_crossentropy(self.output, y) - + return self.costFun(self, self.output, y) """ Class that implements a deep belief network, for classification """ class DBN(object): @@ -257,6 +258,7 @@ def __init__(self, nrLayers, layerSizes, rbmActivationFunctionVisible=Sigmoid(), rbmActivationFunctionHidden=Sigmoid(), classificationActivationFunction=Softmax(), + costFunction=CategoricalCrossEntropy(), unsupervisedLearningRate=0.01, supervisedLearningRate=0.05, nesterovMomentum=True, @@ -332,6 +334,8 @@ def __init__(self, nrLayers, layerSizes, self.nameDataset = nameDataset + self.costFunction = costFunction + print "hidden dropout in DBN", hiddenDropout print "visible dropout in DBN", visibleDropout @@ -460,7 +464,6 @@ def fit(self, data, labels, maxEpochs, validation=True, percentValidation=0.05, unsupervisedData=None, trainingIndices=None): return self.train(data, labels, maxEpochs, validation, percentValidation, unsupervisedData, trainingIndices) - """ Choose a percentage (percentValidation) of the data given to be validation data, used for early stopping of the model. @@ -577,6 +580,7 @@ def fineTune(self, data, labels, validation, validationData, validationLabels, batchTrainer = MiniBatchTrainer(input=x, inputLabels=y, nrLayers=self.nrLayers, activationFunction=self.activationFunction, classificationActivationFunction=self.classificationActivationFunction, + costFunction=self.costFunction, initialWeights=self.weights, initialBiases=self.biases, visibleDropout=self.visibleDropout, @@ -588,6 +592,7 @@ def fineTune(self, data, labels, validation, validationData, validationLabels, classifier = ClassifierBatch(input=x, nrLayers=self.nrLayers, activationFunction=self.activationFunction, classificationActivationFunction=self.classificationActivationFunction, + costFunction=self.costFunction, visibleDropout=self.visibleDropout, hiddenDropout=self.hiddenDropout, weights=batchTrainer.weights,