SPbuMinecraft · Maxon081102 · Dec 15, 2023 · Artem-Goldenberg · Dec 19, 2023 · Maxon081102
diff --git a/server/api/server.cpp b/server/api/server.cpp
@@ -36,7 +36,8 @@ void train(json::rvalue& json, Graph** graph, int model_id) {
     Allocator::endVirtualMode();
 
     for (int j = 0; j < 1000; ++j) {
-        auto& result = lastNode.forward();
+        lastNode.forward();
+        auto& result = lastNode.output.value();
         printf("%d: %f\n", j, result(0, 0, 0, 0));
         // lastNode.gradient = result;
         lastNode.gradient = Blob::ones({{1}});
@@ -52,7 +53,8 @@ void predict(int model_id, Graph* graph, std::vector<float>& answer) {
 
     auto& lastNode = graph->getLastPredictLayers()[0]->result.value();  // Пока не думаем о нескольких выходах (!) Hard-coded
     lastNode.clear();
-    const Blob& result = lastNode.forward();
+    lastNode.forward();
+    const Blob& result = lastNode.output.value();
 
     answer.reserve(result.shape.rows() * result.shape.cols());
     for (size_t j = 0; j < result.shape.rows(); ++j) {

diff --git a/server/core/Layer.cpp b/server/core/Layer.cpp
@@ -1,5 +1,6 @@
 #include <unordered_map>
 #include <functional>
+#include <cassert>
 #include <string>
 
 #include "Layer.h"
@@ -113,4 +114,40 @@ LayerNorm::LayerNorm(const AxisParameters& params,
     pipeline.push_back(std::move(_fill));
 
     result = Tensor(div, {pipeline[2], pipeline[5]});
-    }
+}
+
+SoftMax::SoftMax(const AxisParameters& params,
+                   const std::vector<TensorRef>& args)
+    : sum(params.axis) {
+    assert(args.size() == 1);
+    pipeline.reserve(3);
+    TensorRef tensor = args[0];
+
+    Tensor exp_(exp, {tensor});
+    pipeline.push_back(std::move(exp_));
+
+    Tensor sum_(sum, {pipeline[0]});
+    pipeline.push_back(std::move(sum_));
+
+    Tensor fill_(fill, {pipeline[0], pipeline[1]});
+    pipeline.push_back(std::move(fill_));
+
+    result = Tensor(div, {pipeline[0], pipeline[2]});
+}
+
+EntropyLoss::EntropyLoss(const CrossEntropyLossParameters& params,
+                        const std::vector<TensorRef>& args)
+    : softmax({{3}}, {args[0]}), mean({0, 1, 2, 3}), entropy(params.classCount) {
+    assert(args.size() == 2);
+    pipeline.reserve(1);
+
+    Tensor entropy_(entropy, {softmax.result.value(), args[1]});
+    pipeline.push_back(std::move(entropy_));
+
+    result = Tensor(mean, {pipeline[0]});
+}
+
+MaxPool::MaxPool(const std::vector<TensorRef>& args) {
+    assert(args.size() == 1);
+    result = Tensor(maxPool, {args[0]});
+}
diff --git a/server/core/Layer.h b/server/core/Layer.h
@@ -87,3 +87,28 @@ class LayerNorm: public Layer {
     LayerNorm(const AxisParameters& params,
         const std::vector<TensorRef>& args);
 };
+
+class SoftMax: public Layer {
+public:
+    Exp exp;
+    SumAxis sum;
+    Fill fill;
+    Divide div;
+    SoftMax(const AxisParameters& params,
+        const std::vector<TensorRef>& args);
+};
+
+class EntropyLoss: public Layer {
+public:
+    SoftMax softmax;
+    Mean mean;
+    Entropy entropy;
+    EntropyLoss(const CrossEntropyLossParameters& params, 
+        const std::vector<TensorRef>& args);
+};
+
+class MaxPool: public Layer {
+public:
+    MaxPoolOp maxPool;
+    MaxPool(const std::vector<TensorRef>& args);
+};
diff --git a/server/core/LazyBlob.cpp b/server/core/LazyBlob.cpp
@@ -1,13 +1,17 @@
 #include <cassert>
 #include <vector>
 #include <optional>
+#include <algorithm>
+#include <cmath>
+
 
 #include "LazyBlob.h"
 #include "Iterations.h"
 #include "Allocator.h"
 #include "Blob.h"
 
 #define MAX_DIMS_COUNT 4
+#define EPS 1e-9
 
 const Shape& LazyBlob::shape() const {
     if (shape_.has_value()) {
@@ -568,3 +572,145 @@ std::ostream& operator<<(std::ostream& os, const LazyBlob &b) {
     }
     return os;
 }
+
+class LazyBlobEntropy: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    const int classCount;
+    LazyBlobEntropy(const LazyBlob &a, const LazyBlob &b, int classCount): 
+        a(a), b(b), classCount(classCount) {};
+
+    void initShape() const final override {
+        shape_ = b.shape();
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        assert(b(k, l, i, j) < classCount);
+        // WARNING: если проблемы, меняем на случай с EPS
+        return std::log(a(k, l, i, (int) b(k, l, i, j)));
+        // return std::log(a(k, l, i, (int) b(k, l, i, j)) + EPS);
+    }
+};
+
+class LazyBlobEntropyDerivative: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    const int classCount;
+    LazyBlobEntropyDerivative(const LazyBlob &a, const LazyBlob &b, int classCount): 
+        a(a), b(b), classCount(classCount) {};
+
+    void initShape() const final override {
+        shape_ = a.shape();
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        assert(b(k, l, i, j) < classCount);
+        if (j != (int) b(k, 0, 0, 0)) {
+            return 0;
+        }
+        // WARNING: если проблемы, меняем на случай с EPS
+        return  - 1.0f / (a(k, l, i, j));
+        // return  - 1.0f / (a(k, l, i, j) + EPS);
+    }
+};
+
+const LazyBlob& LazyBlob::entropy(const LazyBlob& a, int classCount) const {
+    assert(shape().cols() == classCount);
+    assert(shape().dim4() == a.shape().dim4());
+    assert(shape().dim3() == 1);
+    assert(shape().rows() == 1);
+    assert(a.shape().dim3() == 1);
+    assert(a.shape().rows() == 1);
+    assert(a.shape().cols() == 1);
+
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropy));
+    return *(new(location) LazyBlobEntropy(*this, a, classCount));
+}
+
+const LazyBlob& LazyBlob::entropyDerivative(const LazyBlob& a, int classCount) const {
+    assert(shape().cols() == classCount);
+    assert(shape().dim4() == a.shape().dim4());
+    assert(shape().dim3() == 1);
+    assert(shape().rows() == 1);
+    assert(a.shape().dim3() == 1);
+    assert(a.shape().rows() == 1);
+    assert(a.shape().cols() == 1);
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropyDerivative));
+    return *(new(location) LazyBlobEntropyDerivative(*this, a, classCount));
+}
+
+class LazyBlobMaxPool: public LazyBlob {
+public:
+    const LazyBlob &a;
+    LazyBlobMaxPool(const LazyBlob &a): a(a) {};
+
+    void initShape() const final override {
+        shape_ = {
+            {
+                a.shape().dim4(), a.shape().dim3(), a.shape().rows() / 2, a.shape().cols() / 2
+            }, 
+            a.shape().dimsCount
+        };
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        return std::max(
+            std::max(a(k, l, i * 2, j * 2), a(k, l, i * 2 + 1, j * 2)),
+            std::max(a(k, l, i * 2, j * 2 + 1), a(k, l, i * 2 + 1, j * 2 + 1))
+        );
+    }
+};
+
+class LazyBlobMaxPoolDerivative: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    LazyBlobMaxPoolDerivative(const LazyBlob &a, const LazyBlob& b): a(a), b(b) {};
+
+    void initShape() const final override {
+        shape_ = a.shape();
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        size_t start_i = (i / 2) * 2;
+        size_t start_j = (j / 2) * 2;
+        size_t indexOfMax_i = start_i;
+        size_t indexOfMax_j = start_j;
+        float max = a(k, l, indexOfMax_i, indexOfMax_j);
+        if (max < a(k, l, start_i, start_j + 1)) {
+            indexOfMax_i = start_i;
+            indexOfMax_j = start_j + 1;
+            max =  a(k, l, indexOfMax_i, indexOfMax_j);
+        }
+
+        if (max < a(k, l, start_i + 1, start_j)) {
+            indexOfMax_i = start_i + 1;
+            indexOfMax_j = start_j;
+            max =  a(k, l, indexOfMax_i, indexOfMax_j);
+        }
+
+        if (max < a(k, l, start_i + 1, start_j + 1)) {
+            indexOfMax_i = start_i + 1;
+            indexOfMax_j = start_j + 1;
+            max =  a(k, l, indexOfMax_i, indexOfMax_j);
+        }
+
+        if (indexOfMax_i == i && indexOfMax_j == j)
+            return b(k, l, i / 2, j / 2);
+
+        return 0.0f;
+    }
+};
+
+const LazyBlob& LazyBlob::maxPool() const {
+    assert(shape().cols() % 2 == 0);
+    assert(shape().rows() % 2 == 0);
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropyDerivative));
+    return *(new(location) LazyBlobMaxPool(*this));
+}
+
+const LazyBlob& LazyBlob::maxPoolDerivative(const LazyBlob& b) const {
+    assert(shape().cols() % 2 == 0);
+    assert(shape().rows() % 2 == 0);
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobMaxPoolDerivative));
+    return *(new(location) LazyBlobMaxPoolDerivative(*this, b));
+}
diff --git a/server/core/LazyBlob.h b/server/core/LazyBlob.h
@@ -31,6 +31,10 @@ class LazyBlob {
     const LazyBlob& mean(std::vector<short> axis, bool minusOne = false) const;
     const LazyBlob& reverseLast2Dims() const;
     const LazyBlob& transposeFirst2Dims() const;
+    const LazyBlob& entropy(const LazyBlob& b, int classCount) const;
+    const LazyBlob& entropyDerivative(const LazyBlob& b, int classCount) const;
+    const LazyBlob& maxPool() const;
+    const LazyBlob& maxPoolDerivative(const LazyBlob& b) const;
 
     /// To repeat some dimensions several times
     /// - Parameter shape: the size we want to get

diff --git a/server/core/Operation.cpp b/server/core/Operation.cpp
@@ -273,3 +273,49 @@ Shape EPS::computeDim(const vector<LazyBlobRef>& args) const {
     args1(a);
     return a.shape();
 }
+
+Blob Exp::compute(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.applying([](float x) { return std::exp(x); });
+}
+
+vector<LazyBlobRef> Exp::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {grad * a.applying([](float x) { return std::exp(x); })};
+}
+
+Shape Exp::computeDim(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.shape();
+}
+
+Blob Entropy::compute(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return -a.entropy(b, classCount);
+}
+
+vector<LazyBlobRef> Entropy::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return {grad.lazy().fill(a.shape()) * a.entropyDerivative(b, classCount), zeroBlob(b.shape())};
+}
+
+Shape Entropy::computeDim(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    (void)a;
+    return b.shape();
+}
+
+Blob MaxPoolOp::compute(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.maxPool();
+}
+
+vector<LazyBlobRef> MaxPoolOp::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {a.maxPoolDerivative(grad.lazy())};
+}
+
+Shape MaxPoolOp::computeDim(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {{a.shape().dim4(), a.shape().dim3(), a.shape().rows() / 2, a.shape().cols() / 2}, a.shape().dimsCount};
+}
diff --git a/server/core/Operation.h b/server/core/Operation.h
@@ -131,3 +131,24 @@ struct EPS: Operation {
     std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
     Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
 };
+
+struct Exp: Operation {
+    std::string name = "Exp";
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct Entropy: Operation {
+    int classCount;
+    Entropy(int classCouont): classCount(classCouont) {};
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct MaxPoolOp: Operation {
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
diff --git a/server/core/Parameters.h b/server/core/Parameters.h
@@ -24,3 +24,7 @@ struct AxisParameters
     std::vector<short> axis;
 };
 
+struct CrossEntropyLossParameters
+{
+    std::size_t classCount; 
+};