From b1c44e91612b00b553ac14c602431c57a2cd7e72 Mon Sep 17 00:00:00 2001 From: Maxim Nikitin Date: Fri, 15 Dec 2023 16:57:55 +0300 Subject: [PATCH] GRA-191: implement pool and crossEntropyLoss --- server/api/server.cpp | 6 +- server/core/Layer.cpp | 39 +++++- server/core/Layer.h | 25 ++++ server/core/LazyBlob.cpp | 146 +++++++++++++++++++++ server/core/LazyBlob.h | 4 + server/core/Operation.cpp | 46 +++++++ server/core/Operation.h | 21 +++ server/core/Parameters.h | 4 + server/core/main.cpp | 263 +++++++++++++++++++++++++------------- 9 files changed, 464 insertions(+), 90 deletions(-) diff --git a/server/api/server.cpp b/server/api/server.cpp index 151044ca..6048c47e 100644 --- a/server/api/server.cpp +++ b/server/api/server.cpp @@ -36,7 +36,8 @@ void train(json::rvalue& json, Graph** graph, int model_id) { Allocator::endVirtualMode(); for (int j = 0; j < 1000; ++j) { - auto& result = lastNode.forward(); + lastNode.forward(); + auto& result = lastNode.output.value(); printf("%d: %f\n", j, result(0, 0, 0, 0)); // lastNode.gradient = result; lastNode.gradient = Blob::ones({{1}}); @@ -52,7 +53,8 @@ void predict(int model_id, Graph* graph, std::vector& answer) { auto& lastNode = graph->getLastPredictLayers()[0]->result.value(); // Пока не думаем о нескольких выходах (!) Hard-coded lastNode.clear(); - const Blob& result = lastNode.forward(); + lastNode.forward(); + const Blob& result = lastNode.output.value(); answer.reserve(result.shape.rows() * result.shape.cols()); for (size_t j = 0; j < result.shape.rows(); ++j) { diff --git a/server/core/Layer.cpp b/server/core/Layer.cpp index b83ec091..a6a307de 100644 --- a/server/core/Layer.cpp +++ b/server/core/Layer.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include "Layer.h" @@ -113,4 +114,40 @@ LayerNorm::LayerNorm(const AxisParameters& params, pipeline.push_back(std::move(_fill)); result = Tensor(div, {pipeline[2], pipeline[5]}); - } +} + +SoftMax::SoftMax(const AxisParameters& params, + const std::vector& args) + : sum(params.axis) { + assert(args.size() == 1); + pipeline.reserve(3); + TensorRef tensor = args[0]; + + Tensor exp_(exp, {tensor}); + pipeline.push_back(std::move(exp_)); + + Tensor sum_(sum, {pipeline[0]}); + pipeline.push_back(std::move(sum_)); + + Tensor fill_(fill, {pipeline[0], pipeline[1]}); + pipeline.push_back(std::move(fill_)); + + result = Tensor(div, {pipeline[0], pipeline[2]}); +} + +EntropyLoss::EntropyLoss(const CrossEntropyLossParameters& params, + const std::vector& args) + : softmax({{3}}, {args[0]}), mean({0, 1, 2, 3}), entropy(params.classCount) { + assert(args.size() == 2); + pipeline.reserve(1); + + Tensor entropy_(entropy, {softmax.result.value(), args[1]}); + pipeline.push_back(std::move(entropy_)); + + result = Tensor(mean, {pipeline[0]}); +} + +MaxPool::MaxPool(const std::vector& args) { + assert(args.size() == 1); + result = Tensor(maxPool, {args[0]}); +} diff --git a/server/core/Layer.h b/server/core/Layer.h index 6c4842b1..f08ed983 100644 --- a/server/core/Layer.h +++ b/server/core/Layer.h @@ -87,3 +87,28 @@ class LayerNorm: public Layer { LayerNorm(const AxisParameters& params, const std::vector& args); }; + +class SoftMax: public Layer { +public: + Exp exp; + SumAxis sum; + Fill fill; + Divide div; + SoftMax(const AxisParameters& params, + const std::vector& args); +}; + +class EntropyLoss: public Layer { +public: + SoftMax softmax; + Mean mean; + Entropy entropy; + EntropyLoss(const CrossEntropyLossParameters& params, + const std::vector& args); +}; + +class MaxPool: public Layer { +public: + MaxPoolOp maxPool; + MaxPool(const std::vector& args); +}; diff --git a/server/core/LazyBlob.cpp b/server/core/LazyBlob.cpp index 0085afb3..983b1e1d 100644 --- a/server/core/LazyBlob.cpp +++ b/server/core/LazyBlob.cpp @@ -1,6 +1,9 @@ #include #include #include +#include +#include + #include "LazyBlob.h" #include "Iterations.h" @@ -8,6 +11,7 @@ #include "Blob.h" #define MAX_DIMS_COUNT 4 +#define EPS 1e-9 const Shape& LazyBlob::shape() const { if (shape_.has_value()) { @@ -568,3 +572,145 @@ std::ostream& operator<<(std::ostream& os, const LazyBlob &b) { } return os; } + +class LazyBlobEntropy: public LazyBlob { +public: + const LazyBlob &a, &b; + const int classCount; + LazyBlobEntropy(const LazyBlob &a, const LazyBlob &b, int classCount): + a(a), b(b), classCount(classCount) {}; + + void initShape() const final override { + shape_ = b.shape(); + } + + float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override { + assert(b(k, l, i, j) < classCount); + // WARNING: если проблемы, меняем на случай с EPS + return std::log(a(k, l, i, (int) b(k, l, i, j))); + // return std::log(a(k, l, i, (int) b(k, l, i, j)) + EPS); + } +}; + +class LazyBlobEntropyDerivative: public LazyBlob { +public: + const LazyBlob &a, &b; + const int classCount; + LazyBlobEntropyDerivative(const LazyBlob &a, const LazyBlob &b, int classCount): + a(a), b(b), classCount(classCount) {}; + + void initShape() const final override { + shape_ = a.shape(); + } + + float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override { + assert(b(k, l, i, j) < classCount); + if (j != (int) b(k, 0, 0, 0)) { + return 0; + } + // WARNING: если проблемы, меняем на случай с EPS + return - 1.0f / (a(k, l, i, j)); + // return - 1.0f / (a(k, l, i, j) + EPS); + } +}; + +const LazyBlob& LazyBlob::entropy(const LazyBlob& a, int classCount) const { + assert(shape().cols() == classCount); + assert(shape().dim4() == a.shape().dim4()); + assert(shape().dim3() == 1); + assert(shape().rows() == 1); + assert(a.shape().dim3() == 1); + assert(a.shape().rows() == 1); + assert(a.shape().cols() == 1); + + void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropy)); + return *(new(location) LazyBlobEntropy(*this, a, classCount)); +} + +const LazyBlob& LazyBlob::entropyDerivative(const LazyBlob& a, int classCount) const { + assert(shape().cols() == classCount); + assert(shape().dim4() == a.shape().dim4()); + assert(shape().dim3() == 1); + assert(shape().rows() == 1); + assert(a.shape().dim3() == 1); + assert(a.shape().rows() == 1); + assert(a.shape().cols() == 1); + void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropyDerivative)); + return *(new(location) LazyBlobEntropyDerivative(*this, a, classCount)); +} + +class LazyBlobMaxPool: public LazyBlob { +public: + const LazyBlob &a; + LazyBlobMaxPool(const LazyBlob &a): a(a) {}; + + void initShape() const final override { + shape_ = { + { + a.shape().dim4(), a.shape().dim3(), a.shape().rows() / 2, a.shape().cols() / 2 + }, + a.shape().dimsCount + }; + } + + float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override { + return std::max( + std::max(a(k, l, i * 2, j * 2), a(k, l, i * 2 + 1, j * 2)), + std::max(a(k, l, i * 2, j * 2 + 1), a(k, l, i * 2 + 1, j * 2 + 1)) + ); + } +}; + +class LazyBlobMaxPoolDerivative: public LazyBlob { +public: + const LazyBlob &a, &b; + LazyBlobMaxPoolDerivative(const LazyBlob &a, const LazyBlob& b): a(a), b(b) {}; + + void initShape() const final override { + shape_ = a.shape(); + } + + float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override { + size_t start_i = (i / 2) * 2; + size_t start_j = (j / 2) * 2; + size_t indexOfMax_i = start_i; + size_t indexOfMax_j = start_j; + float max = a(k, l, indexOfMax_i, indexOfMax_j); + if (max < a(k, l, start_i, start_j + 1)) { + indexOfMax_i = start_i; + indexOfMax_j = start_j + 1; + max = a(k, l, indexOfMax_i, indexOfMax_j); + } + + if (max < a(k, l, start_i + 1, start_j)) { + indexOfMax_i = start_i + 1; + indexOfMax_j = start_j; + max = a(k, l, indexOfMax_i, indexOfMax_j); + } + + if (max < a(k, l, start_i + 1, start_j + 1)) { + indexOfMax_i = start_i + 1; + indexOfMax_j = start_j + 1; + max = a(k, l, indexOfMax_i, indexOfMax_j); + } + + if (indexOfMax_i == i && indexOfMax_j == j) + return b(k, l, i / 2, j / 2); + + return 0.0f; + } +}; + +const LazyBlob& LazyBlob::maxPool() const { + assert(shape().cols() % 2 == 0); + assert(shape().rows() % 2 == 0); + void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropyDerivative)); + return *(new(location) LazyBlobMaxPool(*this)); +} + +const LazyBlob& LazyBlob::maxPoolDerivative(const LazyBlob& b) const { + assert(shape().cols() % 2 == 0); + assert(shape().rows() % 2 == 0); + void* location = Allocator::allocateBytes(sizeof(LazyBlobMaxPoolDerivative)); + return *(new(location) LazyBlobMaxPoolDerivative(*this, b)); +} diff --git a/server/core/LazyBlob.h b/server/core/LazyBlob.h index 349e8302..6ab59a64 100644 --- a/server/core/LazyBlob.h +++ b/server/core/LazyBlob.h @@ -31,6 +31,10 @@ class LazyBlob { const LazyBlob& mean(std::vector axis, bool minusOne = false) const; const LazyBlob& reverseLast2Dims() const; const LazyBlob& transposeFirst2Dims() const; + const LazyBlob& entropy(const LazyBlob& b, int classCount) const; + const LazyBlob& entropyDerivative(const LazyBlob& b, int classCount) const; + const LazyBlob& maxPool() const; + const LazyBlob& maxPoolDerivative(const LazyBlob& b) const; /// To repeat some dimensions several times /// - Parameter shape: the size we want to get diff --git a/server/core/Operation.cpp b/server/core/Operation.cpp index af707dac..f1a27701 100644 --- a/server/core/Operation.cpp +++ b/server/core/Operation.cpp @@ -273,3 +273,49 @@ Shape EPS::computeDim(const vector& args) const { args1(a); return a.shape(); } + +Blob Exp::compute(const vector& args) const { + args1(a); + return a.applying([](float x) { return std::exp(x); }); +} + +vector Exp::grad(const Blob& grad, const vector& args) const { + args1(a); + return {grad * a.applying([](float x) { return std::exp(x); })}; +} + +Shape Exp::computeDim(const vector& args) const { + args1(a); + return a.shape(); +} + +Blob Entropy::compute(const vector& args) const { + args2(a, b); + return -a.entropy(b, classCount); +} + +vector Entropy::grad(const Blob& grad, const vector& args) const { + args2(a, b); + return {grad.lazy().fill(a.shape()) * a.entropyDerivative(b, classCount), zeroBlob(b.shape())}; +} + +Shape Entropy::computeDim(const vector& args) const { + args2(a, b); + (void)a; + return b.shape(); +} + +Blob MaxPoolOp::compute(const vector& args) const { + args1(a); + return a.maxPool(); +} + +vector MaxPoolOp::grad(const Blob& grad, const vector& args) const { + args1(a); + return {a.maxPoolDerivative(grad.lazy())}; +} + +Shape MaxPoolOp::computeDim(const vector& args) const { + args1(a); + return {{a.shape().dim4(), a.shape().dim3(), a.shape().rows() / 2, a.shape().cols() / 2}, a.shape().dimsCount}; +} diff --git a/server/core/Operation.h b/server/core/Operation.h index c4192592..eb01170b 100644 --- a/server/core/Operation.h +++ b/server/core/Operation.h @@ -131,3 +131,24 @@ struct EPS: Operation { std::vector grad(const Blob& gradient, const std::vector& args) const override; Shape computeDim(const std::vector& args) const override; }; + +struct Exp: Operation { + std::string name = "Exp"; + Blob compute(const std::vector& args) const override; + std::vector grad(const Blob& gradient, const std::vector& args) const override; + Shape computeDim(const std::vector& args) const override; +}; + +struct Entropy: Operation { + int classCount; + Entropy(int classCouont): classCount(classCouont) {}; + Blob compute(const std::vector& args) const override; + std::vector grad(const Blob& gradient, const std::vector& args) const override; + Shape computeDim(const std::vector& args) const override; +}; + +struct MaxPoolOp: Operation { + Blob compute(const std::vector& args) const override; + std::vector grad(const Blob& gradient, const std::vector& args) const override; + Shape computeDim(const std::vector& args) const override; +}; diff --git a/server/core/Parameters.h b/server/core/Parameters.h index 4c3e82b5..b1fab57d 100644 --- a/server/core/Parameters.h +++ b/server/core/Parameters.h @@ -24,3 +24,7 @@ struct AxisParameters std::vector axis; }; +struct CrossEntropyLossParameters +{ + std::size_t classCount; +}; diff --git a/server/core/main.cpp b/server/core/main.cpp index 19b21c2a..2fb0a56e 100644 --- a/server/core/main.cpp +++ b/server/core/main.cpp @@ -57,120 +57,119 @@ static const ReLU reluOperation; thread_local int a = 0; -// int main() { -// Allocator::startVirtualMode(); -// { -// LinearLayerParameters params1{2ull, 2ull, true}; -// LinearLayerParameters params2{2ull, 1ull, true}; -// Conv2DLayerParameters paramsConv{7, 3, 1}; - -// auto inputNode = Tensor(Blob::constBlob(Shape {{4, 2}}, input)); +void testNN() { + Allocator::startVirtualMode(); + { + LinearLayerParameters params1{2ull, 2ull, true}; + LinearLayerParameters params2{2ull, 1ull, true}; + // Conv2DLayerParameters paramsConv{7, 3, 1}; -// auto trueNode = Tensor(Blob::constBlob(Shape {{4, 1}}, output)); + auto inputNode = Tensor(Blob::constBlob(Shape {{4, 2}}, input)); -// RandomObject initObject(0, 1, 17); -// OptimizerBase SGD = OptimizerBase(0.1); -// LinearLayer layer1 {params1, {inputNode}, &initObject}; -// SGD.append(layer1.layerOperationParams); + auto trueNode = Tensor(Blob::constBlob(Shape {{4, 1}}, output)); -// TensorRef res = layer1.result.value(); -// ReLULayer reluLayer1 {{res}}; + RandomObject initObject(0, 1, 17); + OptimizerBase SGD = OptimizerBase(0.1); + LinearLayer layer1 {params1, {inputNode}, &initObject}; + SGD.append(layer1.layerOperationParams); -// res = reluLayer1.result.value(); -// LinearLayer layer2 {params2, {res}, &initObject}; -// res = layer2.result.value(); -// SGD.append(layer2.layerOperationParams); + TensorRef res = layer1.result.value(); + ReLULayer reluLayer1 {{res}}; -// MSELoss mseLoss {{res, trueNode}}; + res = reluLayer1.result.value(); + LinearLayer layer2 {params2, {res}, &initObject}; + res = layer2.result.value(); + SGD.append(layer2.layerOperationParams); -// auto &lastNode = mseLoss.result.value(); -// lastNode.forward(); -// lastNode.gradient = Blob::ones(Shape {{1}}); -// lastNode.backward(); -// Allocator::endSession(); -// lastNode.clear(); -// Allocator::endVirtualMode(); + MSELoss mseLoss {{res, trueNode}}; -// for (int j = 0; j < 500; ++j) { -// auto &result = lastNode.forward(); -// lastNode.gradient = Blob::ones(Shape {{1}}); -// printf("%d: %f\n", j, result(0, 0)); -// lastNode.backward(); -// SGD.step(); -// Allocator::endSession(); -// lastNode.clear(); -// } -// auto &result2 = res.get().forward(); -// print(result2); -// Allocator::endSession(); -// } + auto &lastNode = mseLoss.result.value(); + lastNode.forward(); + lastNode.gradient = Blob::ones(Shape {{1}}); + lastNode.backward(); + Allocator::endSession(); + lastNode.clear(); + Allocator::endVirtualMode(); + + for (int j = 0; j < 500; ++j) { + lastNode.forward(); + auto &result = lastNode.output.value(); + lastNode.gradient = Blob::ones(Shape {{1}}); + printf("%d: %f\n", j, result(0, 0)); + lastNode.backward(); + SGD.step(); + Allocator::endSession(); + lastNode.clear(); + } + res.get().forward(); + auto &result2 = res.get().output.value(); + print(result2); + Allocator::endSession(); + } -// Allocator::end(); -// return 0; -// } + Allocator::end(); +} -// int main() { -// Allocator::startVirtualMode(); -// { -// Conv2DLayerParameters paramsConv{3, 3, 1}; +void testConv() { + Allocator::startVirtualMode(); + { + Conv2DLayerParameters paramsConv{3, 3, 1}; -// auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input)); + auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input)); -// auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output)); + auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output)); -// RandomObject initObject(0, 1, 17); -// OptimizerBase SGD = OptimizerBase(0.1); -// Conv2DLayer convL = {paramsConv, {inputNode}, &initObject}; -// SGD.append(convL.layerOperationParams); + RandomObject initObject(0, 1, 17); + OptimizerBase SGD = OptimizerBase(0.1); + Conv2DLayer convL = {paramsConv, {inputNode}, &initObject}; + SGD.append(convL.layerOperationParams); -// TensorRef res = convL.result.value(); -// res.get().forward(); -// print(res.get().output); + TensorRef res = convL.result.value(); + res.get().forward(); + print(res.get().output); -// print(convL.kernel.output); + print(convL.kernel.output); -// res.get().gradient = Blob::ones(Shape {{1, 1, 5, 5}}); -// res.get().backward(); + res.get().gradient = Blob::ones(Shape {{1, 1, 5, 5}}); + res.get().backward(); -// print(convL.kernel.gradient); + print(convL.kernel.gradient); -// print(inputNode.gradient); -// Allocator::endSession(); -// } + print(inputNode.gradient); + Allocator::endSession(); + } -// Allocator::end(); -// return 0; -// } + Allocator::end(); +} -// int main() { -// Allocator::startVirtualMode(); -// { -// AxisParameters paramsConv{{1, 2, 3}}; +void testVar() { + Allocator::startVirtualMode(); + { + AxisParameters paramsConv{{1, 2, 3}}; -// auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input)); + auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input)); -// auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output)); -// OptimizerBase SGD = OptimizerBase(0.1); -// VarLayer var = {paramsConv, {inputNode}}; + auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output)); + OptimizerBase SGD = OptimizerBase(0.1); + VarLayer var = {paramsConv, {inputNode}}; -// TensorRef res = var.result.value(); -// res.get().forward(); -// print(res.get().output); + TensorRef res = var.result.value(); + res.get().forward(); + print(res.get().output); -// res.get().gradient = Blob::ones(res.get().output.value().shape); -// res.get().backward(); + res.get().gradient = Blob::ones(res.get().output.value().shape); + res.get().backward(); -// // print(convL.kernel.gradient); + // print(convL.kernel.gradient); -// print(inputNode.gradient); -// Allocator::endSession(); -// } + print(inputNode.gradient); + Allocator::endSession(); + } -// Allocator::end(); -// return 0; -// } + Allocator::end(); +} -int main() { +void testNorm() { Allocator::startVirtualMode(); { AxisParameters paramsConv{{2, 3}}; @@ -198,5 +197,95 @@ int main() { } Allocator::end(); +} + +float inputL[] = { + 10.f, 30.f, + 25.f, 25.f, + 1.f, 6.f +}; + +float outputL[] = { 0, 0, 1}; + +void testEntropyLoss() { + Allocator::startVirtualMode(); + { + CrossEntropyLossParameters params{2}; + + auto inputNode = Tensor(Blob::constBlob(Shape {{3, 1, 1, 2}}, inputL)); + + auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1, 1, 1}}, outputL)); + OptimizerBase SGD = OptimizerBase(0.1); + EntropyLoss loss = {params, {inputNode, trueNode}}; + + TensorRef res = loss.result.value(); + res.get().forward(); + print(res.get().output); + + // res.get().gradient = Blob::ones(res.get().output.value().shape); + // res.get().backward(); + + res.get().gradient = Blob::fill(res.get().output.value().shape, 1.f); + res.get().backward(); + + // print(convL.kernel.gradient); + + print(inputNode.gradient); + Allocator::endSession(); + } + + Allocator::end(); +} + +float inputM[] = { + 10.f, 30.f, 3.f, 4.f, + 10.f, 30.f, 3.f, 4.f, + 10.f, 30.f, 3.f, 4.f, + 10.f, 30.f, 3.f, 4.f, + + 25.f, 25.f, 5.f, 6.f, + 10.f, 30.f, 3.f, 4.f, + 10.f, 30.f, 3.f, 4.f, + 40.f, 30.f, 3.f, 4.f, + + 1.f, 6.f, 7.f, 8.f, + 27.f, 25.f, 5.f, 6.f, + 10.f, 30.f, 3.f, 4.f, + 10.f, 30.f, 3.f, -1.f +}; + +float outputM[] = { 0, 0, 1}; + +void testMaxPoolLoss() { + Allocator::startVirtualMode(); + { + + auto inputNode = Tensor(Blob::constBlob(Shape {{3, 1, 4, 4}}, inputM)); + + auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1, 1, 1}}, outputM)); + OptimizerBase SGD = OptimizerBase(0.1); + MaxPool max = {{inputNode}}; + + TensorRef res = max.result.value(); + res.get().forward(); + print(res.get().output); + + // res.get().gradient = Blob::ones(res.get().output.value().shape); + // res.get().backward(); + + res.get().gradient = Blob::fill(res.get().output.value().shape, 1.f); + res.get().backward(); + + // print(convL.kernel.gradient); + + print(inputNode.gradient); + Allocator::endSession(); + } + + Allocator::end(); +} + +int main() { + testMaxPoolLoss(); return 0; } \ No newline at end of file